Hello, I loaded a word2vec model and attempted to continue training, but I'm getting errors. Any guidance would be much appreciated. Thank you!
mod1 = Word2Vec.load('path_to_model')
bigrams = Phrases.load('path_to_bigrams')
trigrams = Phrases.load('path_to_trigrams')
mod1.train(trigrams[bigrams[sentences]], total_words=676857300)
2015-07-18 19:41:11,887 : INFO : training model with 4 workers on 222306 vocabulary and 300 features, using sg=1 hs=1 sample=0.001 and negative=10
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/gensim/models/word2vec.py", line 705, in train
pushed_words += round((chunksize/self.corpus_count)/total_words)
AttributeError: 'Word2Vec' object has no attribute 'corpus_count'
>>> Exception in thread Thread-2:
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 810, in __bootstrap_inner
self.run()
File "/usr/lib/python2.7/threading.py", line 763, in run
self.__target(*self.__args, **self.__kwargs)
File "/usr/local/lib/python2.7/dist-packages/gensim/models/word2vec.py", line 675, in worker_loop
if not worker_one_job(job, init):
File "/usr/local/lib/python2.7/dist-packages/gensim/models/word2vec.py", line 666, in worker_one_job
job_words = self._do_train_job(items, alpha, inits)
File "/usr/local/lib/python2.7/dist-packages/gensim/models/word2vec.py", line 623, in _do_train_job
tally += train_sentence_sg(self, sentence, alpha, work)
File "gensim/models/word2vec_inner.pyx", line 259, in gensim.models.word2vec_inner.train_sentence_sg (./gensim/models/word2vec_inner.c:3156)
cdef REAL_t *word_locks = <REAL_t *>(np.PyArray_DATA(model.syn0_lockf))
AttributeError: 'Word2Vec' object has no attribute 'syn0_lockf'
model = Word2Vec.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz',binary = True)model.train(sents) # sents is a list of sentences
/usr/local/lib/python2.7/dist-packages/gensim/models/word2vec.pyc in train(self, sentences, total_words, word_count, chunksize, total_examples, queue_factor, report_delay)683684 if total_words is None and total_examples is None:--> 685 if self.corpus_count:686 total_examples = self.corpus_count687 logger.info("expecting %i examples, matching count from corpus used for vocabulary survey", total_examples)
d2vModel = Doc2Vec()d2vModel.intersect_word2vec_format("models/" + model_dir + "/data/part-00000", binary=False)print "Training .."for epoch in range(10):d2vModel.train(labeled_line_sentence)print "done."
File "/usr/local/lib/python2.7/dist-packages/gensim/models/word2vec.py", line 1098, in intersect_word2vec_formatlogger.info("merged %d vectors into %s matrix from %s" % (overlap_count, self.syn0.shape, fname))AttributeError: 'Doc2Vec' object has no attribute 'syn0'
data_list = ['data/prep/prep_data_train_twitter140.csv']word2vec_path="model/GoogleNews-vectors-negative300.bin"data_path = data_list[0]
word2vec_model = word2vec.Word2Vec.load_word2vec_format(word2vec_path, binary=True)coreprepAPI_instance = CorePreprocessingAPI()word2vec_model.syn0_lockf = np.ones(len(word2vec_model.syn0), dtype=np.float32)
df = pd.read_csv(data_path,sep=',')print "Before dropna",len(df)df = df[(df['content'] != '')]print "After dropna",len(df)print df.head() df['tokens'] = df['content'].apply( lambda line : coreprepAPI_instance.text_to_tokens(line))# drop null tokens list# df = df[df['tokens'] != []] # bug hereprint "After dropna tokens",len(df)if word2vec_model: # updating word2vec model tokens = [] print "Parsing tokens from updating set" for tk in df['tokens']: for e in tk: tokens.append(e) print "Number of tokens rows: ", len(tokens)
word2vec_model.corpus_count = len(tokens) #word2vec_model.build_vocab(tokens) word2vec_model.train(tokens2016-12-08 15:36:50,036 : INFO : loading projection weights from model/GoogleNews-vectors-negative300.bin2016-12-08 15:37:24,962 : INFO : loaded (3000000, 300) matrix from model/GoogleNews-vectors-negative300.binBefore dropna 1048576After dropna 1048576 class content0 0 aww that's a bummer . you shoulda got david c...1 0 is upset that he can't update his facebook by...2 0 i dived many time for the ball . managed to s...3 0 my whole_body feel itchy and like it on fire . 4 0 no it's not behaving at all . i'm mad . why a...After dropna tokens 1048576Parsing tokens from updating setNumber of tokens rows: 19162762016-12-08 15:38:09,099 : INFO : training model with 3 workers on 3000000 vocabulary and 300 features, using sg=0 hs=0 sample=0.001 negative=52016-12-08 15:38:09,099 : INFO : expecting 1916276 sentences, matching count from corpus used for vocabulary surveyException in thread Thread-164:Traceback (most recent call last): File "/opt/miniconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner self.run() File "/opt/miniconda2/lib/python2.7/threading.py", line 754, in run self.__target(*self.__args, **self.__kwargs) File "/opt/miniconda2/lib/python2.7/site-packages/gensim/models/word2vec.py", line 735, in worker_loop tally, raw_tally = self._do_train_job(sentences, alpha, (work, neu1)) File "/opt/miniconda2/lib/python2.7/site-packages/gensim/models/word2vec.py", line 671, in _do_train_job tally += train_batch_cbow(self, sentences, alpha, work, neu1) File "gensim/models/word2vec_inner.pyx", line 398, in gensim.models.word2vec_inner.train_batch_cbow (./gensim/models/word2vec_inner.c:4671) syn1neg = <REAL_t *>(np.PyArray_DATA(model.syn1neg))AttributeError: 'Word2Vec' object has no attribute 'syn1neg'Exception in thread Thread-163:Traceback (most recent call last): File "/opt/miniconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner self.run() File "/opt/miniconda2/lib/python2.7/threading.py", line 754, in run self.__target(*self.__args, **self.__kwargs) File "/opt/miniconda2/lib/python2.7/site-packages/gensim/models/word2vec.py", line 735, in worker_loop tally, raw_tally = self._do_train_job(sentences, alpha, (work, neu1)) File "/opt/miniconda2/lib/python2.7/site-packages/gensim/models/word2vec.py", line 671, in _do_train_job tally += train_batch_cbow(self, sentences, alpha, work, neu1) File "gensim/models/word2vec_inner.pyx", line 398, in gensim.models.word2vec_inner.train_batch_cbow (./gensim/models/word2vec_inner.c:4671) syn1neg = <REAL_t *>(np.PyArray_DATA(model.syn1neg))AttributeError: 'Word2Vec' object has no attribute 'syn1neg'
Exception in thread Thread-162:Traceback (most recent call last): File "/opt/miniconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner self.run() File "/opt/miniconda2/lib/python2.7/threading.py", line 754, in run self.__target(*self.__args, **self.__kwargs) File "/opt/miniconda2/lib/python2.7/site-packages/gensim/models/word2vec.py", line 735, in worker_loop tally, raw_tally = self._do_train_job(sentences, alpha, (work, neu1)) File "/opt/miniconda2/lib/python2.7/site-packages/gensim/models/word2vec.py", line 671, in _do_train_job tally += train_batch_cbow(self, sentences, alpha, work, neu1) File "gensim/models/word2vec_inner.pyx", line 398, in gensim.models.word2vec_inner.train_batch_cbow (./gensim/models/word2vec_inner.c:4671) syn1neg = <REAL_t *>(np.PyArray_DATA(model.syn1neg))AttributeError: 'Word2Vec' object has no attribute 'syn1neg'