I am trying to set a word vector model using the NLPL repository (
http://vectors.nlpl.eu/repository/)
my code:
import gensim
import zipfile
nlpl_zip="C:/Users/PC/Documents/CS/semesterD/nlp/project/47.zip"
with zipfile.ZipFile(nlpl_zip, "r") as archive:
stream = archive.open("model.bin")
word_vectors = gensim.models.KeyedVectors.load_word2vec_format(stream, binary=True,unicode_errors='replace')
this is the error i get:
--------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [5], in <cell line: 2>()
2 with zipfile.ZipFile(nlpl_zip, "r") as archive:
3 stream = archive.open("model.bin")
----> 4 word_vectors = gensim.models.KeyedVectors.load_word2vec_format(stream, binary=True,
5 unicode_errors='replace')
File ~\anaconda3\envs\hebnlp\lib\site-packages\gensim\models\keyedvectors.py:
1723, in KeyedVectors.load_word2vec_format(cls, fname, fvocab, binary, encoding, unicode_errors, limit, datatype, no_header)
1676 @classmethod
1677 def load_word2vec_format(
1678 cls, fname, fvocab=None, binary=False, encoding='utf8', unicode_errors='strict',
1679 limit=None, datatype=REAL, no_header=False,
1680 ):
1681 """Load KeyedVectors from a file produced by the original C word2vec-tool format.
1682
1683 Warnings
(...)
1721
1722 """
-> 1723 return _load_word2vec_format(
1724 cls, fname, fvocab=fvocab, binary=binary, encoding=encoding, unicode_errors=unicode_errors,
1725 limit=limit, datatype=datatype, no_header=no_header,
1726 )
File
~\anaconda3\envs\hebnlp\lib\site-packages\gensim\models\keyedvectors.py:2052, in _load_word2vec_format
(cls, fname, fvocab, binary, encoding, unicode_errors, limit, datatype, no_header, binary_chunk_size)
2049 counts[word] = int(count)
2051
logger.info("loading projection weights from
%s", fname)
-> 2052 with utils.open(fname, 'rb')
as fin:
2053
if no_header:
2054 # deduce both vocab_size & vector_size from 1st pass over file
2055
if binary:
File
~\anaconda3\envs\hebnlp\lib\site-packages\smart_open\smart_open_lib.py:224, in open
(uri, mode, buffering, encoding, errors, newline, closefd, opener, compression, transport_params)
221
except ValueError as ve:
222
raise NotImplementedError(ve.args[0])
--> 224 binary = _open_binary_stream(uri, binary_mode, transport_params)
225 decompressed = so_compression.compression_wrapper(binary, binary_mode, compression)
227
if 'b'
not in mode
or explicit_encoding
is not None:
File
~\anaconda3\envs\hebnlp\lib\site-packages\smart_open\smart_open_lib.py:396, in _open_binary_stream
(uri, mode, transport_params)
393
return fobj
395
if not isinstance(uri, str):
--> 396 raise TypeError("don't know how to handle uri
%s" % repr(uri))
398 scheme = _sniff_scheme(uri)
399 submodule = transport.get_transport(scheme)
TypeError: don't know how to handle uri <zipfile.ZipExtFile name='model.bin' mode='r' compress_type=deflate>
does anyone know the reason for the error?