import getopt
import os
import struct
import sys
try:
opts, args = getopt.getopt(sys.argv[1:], 'o:v:', ['output=', 'vocab='])
except getopt.GetoptError as e:
print (e, file=sys.stderr)
sys.exit(2)
opt_output = 'vecs.bin'
opt_vocab = 'vocab.txt'
for o, a in opts:
if o in ('-o', '--output'):
opt_output = a
if o in ('-v', '--vocab'):
opt_vocab = a
def go(fhs):
fmt = None
with open(opt_vocab, 'w+') as vocab_out:
with open(opt_output, 'wb+') as vecs_out:
for lines in list(zip(fhs)):
parts = [str(line).split() for line in lines]
token = parts[0][0]
print(token)
if any(part[0] != token for part in parts[1:]):
raise IOError('vector files must be aligned')
print(token, file = vocab_out)
vec = [sum(float(x) for x in xs) for xs in zip(parts[0][1:])]
if not fmt:
fmt = struct.Struct('%df' % len(vec))
print ('hi')
print (vec)
vecs_out.write(fmt.pack(vec))
if args:
fhs = [open(filename) for filename in args]
go(fhs)
for fh in fhs:
fh.close()
else:
fhs = open('C:\\Users\\jhan\Desktop\\tensorflow\\swivel\\swivel output\\col_embedding.tsv')
go(fhs)