QueryText = "Guardiola moved Lionel Messi to the No 9 role so that he didn't have to come deep and I think Aguero drops back into deeper positions too often."similarities.Similarity(indexpath, model,topics)dictionary = Dictionary(QueryText )corpus = Corpus(QueryText, dictionary)LDAModel = ldaModel(corpus,dictionary)existing_dictionary.add_document(dictionary)existing_lda_model.update(corpus)existing_index.add_dcoument(LDAModel[corpus])gensim\models\ldamodel.py:535: RuntimeWarning: overflow encountered in exp2 perwordbound, np.exp2(-perwordbound), len(chunk), corpus_wordsvec_bow = dictionary.doc2bow(QueryText)
vec_model = existing_lda_model[vec_bow]
sims = existing_index[vec_model]Similarity index with 723 documents in 1 shards (stored under ..\Files\models\lda_model)Similarity index with 725 documents in 0 shards (stored under ..\Files\models\lda_model)AssertionError Traceback (most recent call last)
<ipython-input-32-dd0e855dc48a> in <module>()
49
50
---> 51 sims = lda_index[vec_model]
52 sims = sorted(enumerate(sims), key=lambda item: -item[1])
53
~\Anaconda3\envs\lf\lib\site-packages\gensim\similarities\docsim.py in __getitem__(self, query)
317 efficient than computing the similarities one document after another.
318 """
--> 319 self.close_shard() # no-op if no documents added to index since last query
320
321 # reset num_best and normalize parameters, in case they were changed dynamically
~\Anaconda3\envs\lf\lib\site-packages\gensim\similarities\docsim.py in close_shard(self)
265 if issparse:
266 index = SparseMatrixSimilarity(
--> 267 self.fresh_docs, num_terms=self.num_features, num_docs=len(self.fresh_docs), num_nnz=self.fresh_nnz
268 )
269 else:
~\Anaconda3\envs\lf\lib\site-packages\gensim\similarities\docsim.py in __init__(self, corpus, num_features, num_terms, num_docs, num_nnz, num_best, chunksize, dtype, maintain_sparsity)
691 self.index = matutils.corpus2csc(
692 corpus, num_terms=num_terms, num_docs=num_docs, num_nnz=num_nnz,
--> 693 dtype=dtype, printprogress=10000
694 ).T
695
~\Anaconda3\envs\lf\lib\site-packages\gensim\matutils.py in corpus2csc(corpus, num_terms, dtype, num_docs, num_nnz, printprogress)
94 indptr.append(posnext)
95 posnow = posnext
---> 96 assert posnow == num_nnz, "mismatch between supplied and computed number of non-zeros"
97 result = scipy.sparse.csc_matrix((data, indices, indptr), shape=(num_terms, num_docs), dtype=dtype)
98 else:
AssertionError: mismatch between supplied and computed number of non-zerosHere is an new i am getting after I pass a big text.
Similarity index with 723 documents in 1 shards (stored under \Files\models\lda_model)
Similarity index with 725 documents in 0 shards (stored under \Files\models\lda_model)\gensim\models\ldamodel.py:535: RuntimeWarning: overflow encountered in exp2
perwordbound, np.exp2(-perwordbound), len(chunk), corpus_words---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-3-8fe711724367> in <module>()
45 trigram = Trigram.apply_trigram_model(queryText, bigram, trigram)
46 vec_bow = dictionry.doc2bow(trigram)
---> 47 vec_model = lda_model[vec_bow]
48 print(vec_model)
49
~\Anaconda3\envs\lf\lib\site-packages\gensim\models\ldamodel.py in __getitem__(self, bow, eps)
1103 `(topic_id, topic_probability)` 2-tuples.
1104 """
-> 1105 return self.get_document_topics(bow, eps, self.minimum_phi_value, self.per_word_topics)
1106
1107 def save(self, fname, ignore=('state', 'dispatcher'), separately=None, *args, **kwargs):
~\Anaconda3\envs\lf\lib\site-packages\gensim\models\ldamodel.py in get_document_topics(self, bow, minimum_probability, minimum_phi_value, per_word_topics)
944 return self._apply(corpus, **kwargs)
945
--> 946 gamma, phis = self.inference([bow], collect_sstats=per_word_topics)
947 topic_dist = gamma[0] / sum(gamma[0]) # normalize distribution
948
~\Anaconda3\envs\lf\lib\site-packages\gensim\models\ldamodel.py in inference(self, chunk, collect_sstats)
442 Elogthetad = Elogtheta[d, :]
443 expElogthetad = expElogtheta[d, :]
--> 444 expElogbetad = self.expElogbeta[:, ids]
445
446 # The optimal phi_{dwk} is proportional to expElogthetad_k * expElogbetad_w.
IndexError: index 718 is out of bounds for axis 1 with size 713existing_dictionary.add_document(dictionary)existing_lda_model.update(corpus)new_dictionary = Dictionary(QueryText )
new_corpus = Corpus(QueryText, dictionary)
new_modelLDA = lda.create_model(new_dictionary, new_corpus)
existing_dictionary.add_document(new_dictionary)
exiting_lda_index.add_documents(new_modelLDA[new_corpus])
vec_bow = existing_dictionary.doc2bow(trigram)
vec_model = exiting_lda_model[vec_bow]
sims = exiting_lda_index[vec_model]gensim\utils.py:862: UserWarning: detected Windows; aliasing chunkize to chunkize_serial
warnings.warn("detected Windows; aliasing chunkize to chunkize_serial")
<gensim.interfaces.TransformedCorpus object at 0x000002A501258240>
Traceback (most recent call last):
File "NLPServer\test.py", line 46, in <module>
vec_model = exiting_lda_model[vec_bow] File "\gensim\models\ldamodel.py", line 1105, in __getitem__
return self.get_document_topics(bow, eps, self.minimum_phi_value, self.per_word_topics)
File "\gensim\models\ldamodel.py", line 946, in get_document_topics
gamma, phis = self.inference([bow], collect_sstats=per_word_topics)
File "\gensim\models\ldamodel.py", line 444, in inference
expElogbetad = self.expElogbeta[:, ids]
IndexError: index 713 is out of bounds for axis 1 with size 713Traceback (most recent call last):
File "NLPLive\server.py", line 48, in <module>
sims = lda_index[vec_model]
File "\gensim\similarities\docsim.py", line 319, in __getitem__
self.close_shard() # no-op if no documents added to index since last query
File "\gensim\similarities\docsim.py", line 267, in close_shard
self.fresh_docs, num_terms=self.num_features, num_docs=len(self.fresh_docs), num_nnz=self.fresh_nnz
File "\gensim\similarities\docsim.py", line 693, in __init__
dtype=dtype, printprogress=10000
File "\gensim\matutils.py", line 92, in corpus2csc
indices[posnow: posnext] = [feature_id for feature_id, _ in doc]
ValueError: cannot copy sequence with size 2 to array axis with dimension 1Traceback (most recent call last): File "server.py", line 34, in <module> lda_index.add_documents(new_ldaModel[new_corpus]) File "/home/ivan/.virtualenvs/p3/lib/python3.6/site-packages/gensim/similarities/docsim.py", line 226, in add_documents self.reopen_shard() File "/home/ivan/.virtualenvs/p3/lib/python3.6/site-packages/gensim/similarities/docsim.py", line 283, in reopen_shard last_index = last_shard.get_index() File "/home/ivan/.virtualenvs/p3/lib/python3.6/site-packages/gensim/similarities/docsim.py", line 113, in get_index self.index = self.cls.load(self.fullname(), mmap='r') File "/home/ivan/.virtualenvs/p3/lib/python3.6/site-packages/gensim/utils.py", line 281, in load obj = unpickle(fname) File "/home/ivan/.virtualenvs/p3/lib/python3.6/site-packages/gensim/utils.py", line 930, in unpickle with smart_open(fname, 'rb') as f: File "/home/ivan/.virtualenvs/p3/lib/python3.6/site-packages/smart_open/smart_open_lib.py", line 169, in smart_open parsed_uri = ParseUri(uri) File "/home/ivan/.virtualenvs/p3/lib/python3.6/site-packages/smart_open/smart_open_lib.py", line 432, in __init__ raise NotImplementedError("unknown URI scheme %r in %r" % (self.scheme, uri))NotImplementedError: unknown URI scheme 'e' in 'E:\\Leaflet\\Development\\Python\\NLPServer\\Files\\models/lda_model.0'