201 check_output(cmd, stderr=PIPE)Regards
Lev
time_seq = [50]*2 #assumming a topic every 20 messages
corpora = []
#df['EmailBody'] is a pandas Series object containing lists of tuples per cell
for x in df['EmailBody']:
s = [t[0] for t in x]
corpora.append(s)
class DTMcorpus(gensim.corpora.textcorpus.TextCorpus):
def get_texts(self):
return self.input
def __len__(self):
return len(self.input)
corpus = DTMcorpus(corpora)
#print(corpus.dictionary.token2id)
#setting an environment variable for the dtm
dtm_home = os.environ.get('DTM_HOME', "/my/personal/directory/bin/dtm-master")
#I selected the dtm-linux64 binary for my project
#I saved as dtm_home_address+"/bin/dtm-linux64"
dtm_path = os.path.join(dtm_home, 'bin','dtm-linux64') if dtm_home else None
model = gensim.models.wrappers.dtmmodel.DtmModel(dtm_path, corpus, time_seq, num_topics=2, id2word=corpus.dictionary, initialize_lda=True)
INFO:gensim.corpora.dictionary:adding document #0 to Dictionary(0 unique tokens: []) INFO:gensim.corpora.dictionary:built Dictionary(2715 unique tokens: ['holding', 'freshman', 'virtuous', 'aware', 'govt']...) from 100 documents (total 6171 corpus positions) INFO:gensim.models.wrappers.dtmmodel:serializing temporary corpus to /tmp/eb0661_train-mult.dat INFO:gensim.corpora.bleicorpus:no word id mapping provided; initializing from corpus INFO:gensim.corpora.bleicorpus:storing corpus in Blei's LDA-C format into /tmp/eb0661_train-mult.dat INFO:gensim.corpora.bleicorpus:saving vocabulary of 2715 words to /tmp/eb0661_train-mult.dat.vocab INFO:gensim.models.wrappers.dtmmodel:training DTM with args --ntopics=2 --model=dtm --mode=fit --initialize_lda=true --corpus_prefix=/tmp/eb0661_train --outname=/tmp/eb0661_train_out --alpha=0.01 --lda_max_em_iter=10 --lda_sequence_min_iter=6 --lda_sequence_max_iter=20 --top_chain_var=0.005 --rng_seed=0 INFO:gensim.models.wrappers.dtmmodel:Running command ['.../bin/dtm-master/bin/dtm-linux64', '--ntopics=2', '--model=dtm', '--mode=fit', '--initialize_lda=true', '--corpus_prefix=/tmp/eb0661_train', '--outname=/tmp/eb0661_train_out', '--alpha=0.01', '--lda_max_em_iter=10', '--lda_sequence_min_iter=6', '--lda_sequence_max_iter=20', '--top_chain_var=0.005', '--rng_seed=0']
PermissionError Traceback (most recent call last) <ipython-input-132-8a1f0e522247> in <module>() 38 dtm_path = os.path.join(dtm_home, 'bin','dtm-linux64') if dtm_home else None 39 ---> 40 model = gensim.models.wrappers.dtmmodel.DtmModel(dtm_path, corpus, time_seq, num_topics=2, id2word=corpus.dictionary, initialize_lda=True) 41 #model = gensim.models.wrappers.dtmmodel(dtm_path, df['EmailBody'], time_seq, num_topics=5, initialize_lda=True) 42 .../lib/python3.4/site-packages/gensim/models/wrappers/dtmmodel.py in __init__(self, dtm_path, corpus, time_slices, mode, model, num_topics, id2word, prefix, lda_sequence_min_iter, lda_sequence_max_iter, lda_max_em_iter, alpha, top_chain_var, rng_seed, initialize_lda) 126 127 if corpus is not None: --> 128 self.train(corpus, time_slices, mode, model) 129 130 def fout_liklihoods(self): .../lib/python3.4/site-packages/gensim/models/wrappers/dtmmodel.py in train(self, corpus, time_slices, mode, model) 199 cmd = [self.dtm_path] + arguments.split() 200 logger.info("Running command %s" % cmd) --> 201 check_output(cmd, stderr=PIPE) 202 203 self.em_steps = np.loadtxt(self.fem_steps()) .../lib/python3.4/site-packages/gensim/utils.py in check_output(*popenargs, **kwargs) 1140 """ 1141 try: -> 1142 process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) 1143 output, unused_err = process.communicate() 1144 retcode = process.poll() /usr/lib/python3.4/subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds) 857 c2pread, c2pwrite, 858 errread, errwrite, --> 859 restore_signals, start_new_session) 860 except: 861 # Cleanup if the child failed starting. /usr/lib/python3.4/subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session) 1455 else: 1456 err_msg += ': ' + repr(orig_executable) -> 1457 raise child_exception_type(errno_num, err_msg) 1458 raise child_exception_type(err_msg) 1459 PermissionError: [Errno 13] Permission denied
/usr/lib/python3.4/subprocess.pyWhat happens when you run this command yourself? Do you have write permissions in that folder?
you are welcome to check out our new pure python
running the code directly from my terminal I get a Permission Denied error (?).
import ldaseqmodel.LdaSeqModel should work even when it is missing in __init__.py
Do you have write permissions in the output folder and in /tmp?
chmod +x dtm-darwin64
Here dtm-darwin64 is my executable. The permission denied error went away after I did this.
Let me know if this is helpful!