Trying to replicate walk through code using Transformer library but unable to finish it.

Matt Dixie

unread,

Feb 15, 2018, 1:22:55 PM2/15/18

to tensor2tensor

Hey pals,

This is what I am trying to replicate...

pip install tensor2tensor

# See what problems, models, and hyperparameter sets are available.
# You can easily swap between them (and add new ones).
t2t-trainer --registry_help

PROBLEM=translate_ende_wmt32k
MODEL=transformer
HPARAMS=transformer_base_single_gpu

DATA_DIR=$HOME/t2t_data
TMP_DIR=/tmp/t2t_datagen
TRAIN_DIR=$HOME/t2t_train/$PROBLEM/$MODEL-$HPARAMS

mkdir -p $DATA_DIR $TMP_DIR $TRAIN_DIR

# Generate data
t2t-datagen \
  --data_dir=$DATA_DIR \
  --tmp_dir=$TMP_DIR \
  --problem=$PROBLEM

# Train
# *  If you run out of memory, add --hparams='batch_size=1024'.
t2t-trainer \
  --data_dir=$DATA_DIR \
  --problems=$PROBLEM \
  --model=$MODEL \
  --hparams_set=$HPARAMS \
  --output_dir=$TRAIN_DIR

# Decode

DECODE_FILE=$DATA_DIR/decode_this.txt
echo "Hello world" >> $DECODE_FILE
echo "Goodbye world" >> $DECODE_FILE
echo -e 'Hallo Welt\nAuf Wiedersehen Welt' > ref-translation.de

BEAM_SIZE=4
ALPHA=0.6

t2t-decoder \
  --data_dir=$DATA_DIR \
  --problems=$PROBLEM \
  --model=$MODEL \
  --hparams_set=$HPARAMS \
  --output_dir=$TRAIN_DIR \
  --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \
  --decode_from_file=$DECODE_FILE \
  --decode_to_file=translation.en

# See the translations
cat translation.en

# Evaluate the BLEU score
# Note: Report this BLEU score in papers, not the internal approx_bleu metric.
t2t-bleu --translation=translation.en --reference=ref-translation.de

This is my attempt...

import tensorflow as tf

import matplotlib.pyplot as plt

import numpy as np

import os

import collections

os.environ['CUDA_VISIBLE_DEVICES']=''

from tensor2tensor import models

from tensor2tensor import problems

from tensor2tensor.layers import common_layers

from tensor2tensor.utils import trainer_lib

from tensor2tensor.utils import t2t_model

from tensor2tensor.utils import registry

from tensor2tensor.utils import metrics

# Enable TF Eager execution

from tensorflow.contrib.eager.python import tfe

tfe.enable_eager_execution()

# Other setup

Modes = tf.estimator.ModeKeys

# Setup some directories

data_dir = os.path.expanduser("~/t2t/data")

tmp_dir = os.path.expanduser("~/t2t/tmp")

train_dir = os.path.expanduser("~/t2t/train")

checkpoint_dir = os.path.expanduser("~/t2t/checkpoints")

tf.gfile.MakeDirs(data_dir)

tf.gfile.MakeDirs(tmp_dir)

tf.gfile.MakeDirs(train_dir)

tf.gfile.MakeDirs(checkpoint_dir)

gs_data_dir = "gs://tensor2tensor-data"

gs_ckpt_dir = "gs://tensor2tensor-checkpoints/"

problems.available()

ende_problem = problems.problem("translate_enfr_wmt_small8k")

ende_problem.generate_data(data_dir, tmp_dir)

# Setup the training data

BATCH_SIZE = 128

ende_problem_train_dataset = ende_problem.dataset(Modes.TRAIN, data_dir)

ende_problem_train_dataset = ende_problem_train_dataset.repeat(None).batch(BATCH_SIZE)

from tensor2tensor.models import transformer

model_name = "transformer"

hparams_set = "transformer_base"

hparams = trainer_lib.create_hparams(hparams_set, data_dir=data_dir, problem_name="translate_enfr_wmt_small8k")

VOCAB_SIZE=8374

from tensor2tensor.data_generators import problem_hparams

p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE, VOCAB_SIZE)

hparams.problems = [p_hparams]

model=transformer.Transformer(hparams, Modes.TRAIN, p_hparams)

Don't know what to do next.

My dear lovely developers of Google, Please help me to fix it :)

h.kara...@gmail.com

unread,

Oct 27, 2018, 8:34:47 AM10/27/18

to tensor2tensor

Hi Matt

The trying is reached?

I'm looking for answers to your question.

Can you share the results with me?

Arben Sabani

unread,

Mar 10, 2019, 9:51:14 PM3/10/19

to tensor2tensor

I am also experimenting with the framework, but

I would say something like this (not sure what exactly you are trying to achive :-)):

problem_name = 'translate_enfr_wmt_small8k'

model_name = "transformer"

hparams_set = "transformer_base"

hparams = trainer_lib.create_hparams(hparams_set)

from tensor2tensor.utils.trainer_lib import create_run_config, create_experiment

# Initi Run COnfig for Model Training

RUN_CONFIG = create_run_config(

model_dir=train_dir , model_name=model_name

# More Params here in this fuction for controlling how often to save checkpoints and more.

)

hparams.batch_size = BATCH_SIZE

#VOCAB_SIZE is property of the problem and defined there.

# Create Tensorflow Experiment Object

tensorflow_exp_fn = create_experiment(

run_config=RUN_CONFIG,

hparams=hparams,

model_name=model_name,

problem_name=problem_name,

data_dir=data_dir ,

train_steps=200000, # Total number of train steps

eval_steps=100 # Number of steps to perform for each evaluation

)

# Kick off Training

tensorflow_exp_fn.train_and_evaluate()

Reply all

Reply to author

Forward