I have a text classification model and I'm using tensorflow 1.2.1. When I tested with Tensorflow I'm only getting 0.47 accuracy, but when I did the same with Keras model I'm able to attain 0.90. What is going wrong in my tensor model.
import tensorflow as tf
# Model builder
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
import tensorflow.contrib.learn as tflearn
# Helpers for data processing
from tensorflow.contrib.keras.api.keras import preprocessing
from tensorflow.contrib.keras.api.keras import layers
import pandas as pd
print('TensorFlow version:', tf.__version__)
maxlen = 80
N_WORDS = 100
REVIEW_KEY = 'review'
CLASSIFICATION_KEY = 'target'
SEQUENCE_LENGTH_KEY = 'sequence_length'
token = preprocessing.text.Tokenizer(num_words=N_WORDS)
def create_dict():
df = pd.read_csv('train.csv', header=None, sep=',', names=['target', 'review'])
token.fit_on_texts(df['review'].values)
dict = token.word_index
import json
with open('dict.json', 'w') as fp:
json.dump(dict, fp)
def get_train_input_fn():
# loading csv in memory
df = pd.read_csv('train.csv')
df = df.dropna()
df.target = pd.Categorical(df.target)
print(dict(enumerate(df.target.cat.categories)))
df.target = df.target.cat.codes
review_features = df['review'].values
labels = df['target'].values
import tensorflow.contrib.keras.api.keras as K
label_onehot = K.utils.to_categorical(labels, 9)
x_train = preprocessing.sequence.pad_sequences(token.texts_to_sequences(review_features), maxlen=maxlen)
# reshaping outputs to correct shapes
features = {REVIEW_KEY: x_train}
from tensorflow.python.estimator.inputs import numpy_io
return numpy_io.numpy_input_fn(features, label_onehot, batch_size=32, shuffle=True, num_epochs=None,
num_threads=1, queue_capacity=1000)
def get_test_input_fn():
# loading csv in memory
df = pd.read_csv('test.csv')
df = df.dropna()
df.target = pd.Categorical(df.target)
print(dict(enumerate(df.target.cat.categories)))
df.target = df.target.cat.codes
review_features = df['review'].values
labels = df['target'].values
import tensorflow.contrib.keras.api.keras as K
label_onehot = K.utils.to_categorical(labels, 9)
x_train = preprocessing.sequence.pad_sequences(token.texts_to_sequences(review_features), maxlen=maxlen)
# reshaping outputs to correct shapes
features = {REVIEW_KEY: x_train}
from tensorflow.python.estimator.inputs import numpy_io
return numpy_io.numpy_input_fn(features, label_onehot, batch_size=32, shuffle=False, num_epochs=1,
num_threads=1, queue_capacity=1000)
def model_fn(features, targets, mode):
reviews = features[REVIEW_KEY]
embed = layers.Embedding(200, 100, input_length=maxlen)(reviews)
fl = layers.Flatten()(embed)
d1 = layers.Dense(50, activation='relu')(fl)
d2 = layers.Dense(25, activation='relu')(d1)
logits = layers.Dense(9, activation='softmax')(d2)
loss = None
train_op = None
if mode != tf.estimator.ModeKeys.PREDICT:
loss = tf.losses.softmax_cross_entropy(onehot_labels=targets, logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
train_op = tf.contrib.layers.optimize_loss(
loss,
tf.contrib.framework.get_global_step(),
optimizer="Adam",
learning_rate=0.01)
predictions = {
"classes": tf.argmax(input=logits, axis=1),
"probabilities": tf.nn.softmax(logits)
}
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
tf.argmax(input=logits, axis=1),
tf.argmax(input=targets, axis=1))
}
return model_fn_lib.ModelFnOps(
mode=mode,
predictions=predictions,
loss=loss,
train_op=train_op,
eval_metric_ops=eval_metric_ops)
create_dict()
estimator = tflearn.Estimator(model_fn=model_fn, model_dir='build/')
estimator.fit(input_fn=get_test_input_fn(), steps=1000)
evaluation = estimator.evaluate(input_fn=get_test_input_fn())
print("Loss: %s" % evaluation["loss"])
print("Accuracy: %f" % evaluation["accuracy"])