for installation try https://hub.docker.com/r/ebuildy/echoprint/~/dockerfile/
cat echoprint-dump-1.json | jq -r '.[].code' | echoprint-inverted-index inverted_index_1.bin
Traceback (most recent call last):
File "./bin/echoprint-inverted-index", line 19, in <module>
create_inverted_index(streamer(sys.stdin), args.indexfile)
File "/Library/Python/2.7/site-packages/echoprint_server/lib.py", line 57, in create_inverted_index
for batch_index, batch in enumerate(split_seq(songs, 65535)):
File "/Library/Python/2.7/site-packages/echoprint_server/lib.py", line 30, in split_seq
item = list(itertools.islice(it, size))
File "/Library/Python/2.7/site-packages/echoprint_server/lib.py", line 78, in parsing_code_streamer
yield decode_echoprint(line.strip())[1]
File "/Library/Python/2.7/site-packages/echoprint_server/lib.py", line 42, in decode_echoprint
unzipped = zlib.decompress(zipped)
zlib.error: Error -5 while decompressing data: incomplete or truncated stream
But it works when I try to index only a small part of the data like this:
cat echoprint-dump-1.json | jq -r '.[0:100] | .[].code' | echoprint-inverted-index inverted_index_1.bin
Did anyone else run into this issue?
def makeAndLoadInvertedIndex():
client = MongoClient('localhost', 27017)
colection = client.test.songs
docs = colection.find({})
codesStr=""
app.gids=[]
for doc in docs:
codesStr+= str(doc['code'])+"\n"
app.gids.append({"id":str(doc['_id'])})
f = io.BytesIO(codesStr)
print "submiting ...."
create_inverted_index(parsing_code_streamer(f), args.indexfile)
app.inverted_index = load_inverted_index(['./index.bin'])
print "all song submited"