alexryan@alex-tpu:~/tpu$ cat train-mnist.sh
# Prerequisite:
# source set-vars.sh
python /usr/share/models/official/mnist/mnist_tpu.py \
--tpu=$TPU_NAME \
--DATA_DIR=${STORAGE_BUCKET}/data \
--MODEL_DIR=${STORAGE_BUCKET}/output \
--use_tpu=True \
--iterations=500 \
--train_steps=2000
alexryan@alex-tpu:~/tpu$ cat set-vars.sh
# Usage:
# source train.sh
export PATH="$PATH:`python -m site --user-base`/bin"
export STORAGE_BUCKET=gs://my_bucket/
export TPU_NAME=alex-tpu
export MODEL_DIR=gs://my_bucket/models/
printf "PATH=|%s|\n" "${PATH}"
printf "STORAGE_BUCKET=|%s|\n" "${STORAGE_BUCKET}"
printf "TPU_NAME=|%s|\n" "${TPU_NAME}"
printf "MODEL_DIR=|%s|\n" "${MODEL_DIR}"
alexryan@alex-tpu:~/tpu$
alexryan@alex-tpu:~/tpu$ ./train-mnist.sh
W1024 23:26:43.083925 139753385273088 __init__.py:44] file_cache is unavailable when using oauth2client >= 4.0.0 or google-auth
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/discovery_cache/__init__.py", line 41, in autodetect
from . import file_cache
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/discovery_cache/file_cache.py", line 41, in <module>
'file_cache is unavailable when using oauth2client >= 4.0.0 or google-auth')
ImportError: file_cache is unavailable when using oauth2client >= 4.0.0 or google-auth
WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpAhMnVF
W1024 23:26:43.252521 139753385273088 tf_logging.py:125] Using temporary folder as model directory: /tmp/tmpAhMnVF
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1ac525b190>, '_model_dir': '/tmp/tmpAhMnVF', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_session_config': allow_soft_placement: true
log_device_placement: true
cluster_def {
job {
name: "worker"
tasks {
value: "10.240.1.2:8470"
}
}
}
, '_tpu_config': TPUConfig(iterations_per_loop=500, num_shards=8, num_cores_per_replica=None, per_host_input_for_training=2, tpu_job_name=None, initial_infeed_sleep_secs=None, input_partition_dims=None), '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_cluster': <tensorflow.contrib.cluster_resolver.python.training.tpu_cluster_resolver.TPUClusterResolver object at 0x7f1ac8a43250>, '_experimental_distribute': None, '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': None, '_evaluation_master': u'grpc://10.240.1.2:8470', '_eval_distribute': None, '_train_distribute': None, '_master': u'grpc://10.240.1.2:8470'}
I1024 23:26:43.253330 139753385273088 tf_logging.py:115] Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1ac525b190>, '_model_dir': '/tmp/tmpAhMnVF', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_session_config': allow_soft_placement: true
log_device_placement: true
cluster_def {
job {
name: "worker"
tasks {
value: "10.240.1.2:8470"
}
}
}
, '_tpu_config': TPUConfig(iterations_per_loop=500, num_shards=8, num_cores_per_replica=None, per_host_input_for_training=2, tpu_job_name=None, initial_infeed_sleep_secs=None, input_partition_dims=None), '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_cluster': <tensorflow.contrib.cluster_resolver.python.training.tpu_cluster_resolver.TPUClusterResolver object at 0x7f1ac8a43250>, '_experimental_distribute': None, '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': None, '_evaluation_master': u'grpc://10.240.1.2:8470', '_eval_distribute': None, '_train_distribute': None, '_master': u'grpc://10.240.1.2:8470'}
INFO:tensorflow:_TPUContext: eval_on_tpu True
I1024 23:26:43.253621 139753385273088 tf_logging.py:115] _TPUContext: eval_on_tpu True
INFO:tensorflow:Querying Tensorflow master (grpc://10.240.1.2:8470) for TPU system metadata.
I1024 23:26:43.254575 139753385273088 tf_logging.py:115] Querying Tensorflow master (grpc://10.240.1.2:8470) for TPU system metadata.
2018-10-24 23:26:43.256290: W tensorflow/core/distributed_runtime/rpc/grpc_session.cc:349] GrpcSession::ListDevices will initialize the session with an empty graph and other defaults because the session has not yet been created.
INFO:tensorflow:Found TPU system:
I1024 23:26:43.264735 139753385273088 tf_logging.py:115] Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
I1024 23:26:43.265052 139753385273088 tf_logging.py:115] *** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
I1024 23:26:43.265506 139753385273088 tf_logging.py:115] *** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
I1024 23:26:43.265603 139753385273088 tf_logging.py:115] *** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 865846129352832139)
I1024 23:26:43.265665 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 865846129352832139)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 6209664364760083711)
I1024 23:26:43.265888 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 6209664364760083711)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 8132107236002965762)
I1024 23:26:43.265969 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 8132107236002965762)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 6371634624298757016)
I1024 23:26:43.266042 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 6371634624298757016)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 16529588649011723323)
I1024 23:26:43.266118 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 16529588649011723323)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 8845596694644158772)
I1024 23:26:43.266195 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 8845596694644158772)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 17179869184, 16188940170106469130)
I1024 23:26:43.266266 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 17179869184, 16188940170106469130)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 17179869184, 2892269351017409009)
I1024 23:26:43.266346 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 17179869184, 2892269351017409009)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 17179869184, 6580697550209944324)
I1024 23:26:43.266416 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 17179869184, 6580697550209944324)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 17179869184, 4429603230658933492)
I1024 23:26:43.266491 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 17179869184, 4429603230658933492)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 17179869184, 16627530887292553106)
I1024 23:26:43.266560 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 17179869184, 16627530887292553106)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 17179869184, 5522461793500686494)
I1024 23:26:43.266627 139753385273088 tf_logging.py:115] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 17179869184, 5522461793500686494)
INFO:tensorflow:Calling model_fn.
I1024 23:26:43.281177 139753385273088 tf_logging.py:115] Calling model_fn.
WARNING:tensorflow:From /usr/share/models/official/mnist/mnist_tpu.py:123: batch_and_drop_remainder (from tensorflow.contrib.data.python.ops.batching) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.data.Dataset.batch(..., drop_remainder=True)`.
W1024 23:26:43.311116 139753385273088 tf_logging.py:125] From /usr/share/models/official/mnist/mnist_tpu.py:123: batch_and_drop_remainder (from tensorflow.contrib.data.python.ops.batching) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.data.Dataset.batch(..., drop_remainder=True)`.
ERROR:tensorflow:Operation of type Placeholder (reshape_input) is not supported on the TPU. Execution will fail if this op is used in the graph.
E1024 23:26:43.370778 139753385273088 tf_logging.py:105] Operation of type Placeholder (reshape_input) is not supported on the TPU. Execution will fail if this op is used in the graph.
INFO:tensorflow:Create CheckpointSaverHook.
I1024 23:26:43.729510 139753385273088 tf_logging.py:115] Create CheckpointSaverHook.
INFO:tensorflow:Done calling model_fn.
I1024 23:26:43.744988 139753385273088 tf_logging.py:115] Done calling model_fn.
INFO:tensorflow:TPU job name worker
I1024 23:26:43.913090 139753385273088 tf_logging.py:115] TPU job name worker
INFO:tensorflow:Graph was finalized.
I1024 23:26:43.992217 139753385273088 tf_logging.py:115] Graph was finalized.
INFO:tensorflow:Running local_init_op.
I1024 23:26:44.090034 139753385273088 tf_logging.py:115] Running local_init_op.
INFO:tensorflow:Done running local_init_op.
I1024 23:26:44.107808 139753385273088 tf_logging.py:115] Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpAhMnVF/model.ckpt.
I1024 23:26:44.444988 139753385273088 tf_logging.py:115] Saving checkpoints for 0 into /tmp/tmpAhMnVF/model.ckpt.
INFO:tensorflow:Error recorded from training_loop: File system scheme '[local]' not implemented (file: '/tmp/tmpAhMnVF/model.ckpt-0_temp_e50b126c1d464cbf961a9a0e2cdd0257')
[[{{node save/SaveV2}} = SaveV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64], _device="/job:worker/replica:0/task:0/device:CPU:0"](save/ShardedFilename, save/SaveV2/tensor_names, save/SaveV2/shape_and_slices, conv2d/bias/Read/ReadVariableOp, conv2d/kernel/Read/ReadVariableOp, conv2d_1/bias/Read/ReadVariableOp, conv2d_1/kernel/Read/ReadVariableOp, dense/bias/Read/ReadVariableOp, dense/kernel/Read/ReadVariableOp, dense_1/bias/Read/ReadVariableOp, dense_1/kernel/Read/ReadVariableOp, global_step/Read/ReadVariableOp)]]
Caused by op u'save/SaveV2', defined at:
File "/usr/share/models/official/mnist/mnist_tpu.py", line 173, in <module>
tf.app.run()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py", line 125, in run
_sys.exit(main(argv))
File "/usr/share/models/official/mnist/mnist_tpu.py", line 163, in main
estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2394, in train
saving_listeners=saving_listeners
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 356, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1181, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1215, in _train_model_default
saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1406, in _train_with_estimator_spec
log_step_count_steps=self._config.log_step_count_steps) as mon_sess:
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 504, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 921, in __init__
stop_grace_period_secs=stop_grace_period_secs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 643, in __init__
self._sess = _RecoverableSession(self._coordinated_creator)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1107, in __init__
_WrappedSession.__init__(self, self._create_session())
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1112, in _create_session
return self._sess_creator.create_session()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 800, in create_session
self.tf_sess = self._session_creator.create_session()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 557, in create_session
self._scaffold.finalize()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 215, in finalize
self._saver.build()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 1106, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 1143, in _build
build_save=build_save, build_restore=build_restore)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 778, in _build_internal
save_tensor = self._AddShardedSaveOps(filename_tensor, per_device)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 369, in _AddShardedSaveOps
return self._AddShardedSaveOpsForV2(filename_tensor, per_device)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 343, in _AddShardedSaveOpsForV2
sharded_saves.append(self._AddSaveOps(sharded_filename, saveables))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 284, in _AddSaveOps
save = self.save_op(filename_tensor, saveables)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 202, in save_op
tensors)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_io_ops.py", line 1690, in save_v2
shape_and_slices=shape_and_slices, tensors=tensors, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3272, in create_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1768, in __init__
self._traceback = tf_stack.extract_stack()
UnimplementedError (see above for traceback): File system scheme '[local]' not implemented (file: '/tmp/tmpAhMnVF/model.ckpt-0_temp_e50b126c1d464cbf961a9a0e2cdd0257')
[[{{node save/SaveV2}} = SaveV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64], _device="/job:worker/replica:0/task:0/device:CPU:0"](save/ShardedFilename, save/SaveV2/tensor_names, save/SaveV2/shape_and_slices, conv2d/bias/Read/ReadVariableOp, conv2d/kernel/Read/ReadVariableOp, conv2d_1/bias/Read/ReadVariableOp, conv2d_1/kernel/Read/ReadVariableOp, dense/bias/Read/ReadVariableOp, dense/kernel/Read/ReadVariableOp, dense_1/bias/Read/ReadVariableOp, dense_1/kernel/Read/ReadVariableOp, global_step/Read/ReadVariableOp)]]
I1024 23:26:44.459172 139753385273088 tf_logging.py:115] Error recorded from training_loop: File system scheme '[local]' not implemented (file: '/tmp/tmpAhMnVF/model.ckpt-0_temp_e50b126c1d464cbf961a9a0e2cdd0257')
[[{{node save/SaveV2}} = SaveV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64], _device="/job:worker/replica:0/task:0/device:CPU:0"](save/ShardedFilename, save/SaveV2/tensor_names, save/SaveV2/shape_and_slices, conv2d/bias/Read/ReadVariableOp, conv2d/kernel/Read/ReadVariableOp, conv2d_1/bias/Read/ReadVariableOp, conv2d_1/kernel/Read/ReadVariableOp, dense/bias/Read/ReadVariableOp, dense/kernel/Read/ReadVariableOp, dense_1/bias/Read/ReadVariableOp, dense_1/kernel/Read/ReadVariableOp, global_step/Read/ReadVariableOp)]]
Caused by op u'save/SaveV2', defined at:
File "/usr/share/models/official/mnist/mnist_tpu.py", line 173, in <module>
tf.app.run()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py", line 125, in run
_sys.exit(main(argv))
File "/usr/share/models/official/mnist/mnist_tpu.py", line 163, in main
estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2394, in train
saving_listeners=saving_listeners
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 356, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1181, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1215, in _train_model_default
saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1406, in _train_with_estimator_spec
log_step_count_steps=self._config.log_step_count_steps) as mon_sess:
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 504, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 921, in __init__
stop_grace_period_secs=stop_grace_period_secs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 643, in __init__
self._sess = _RecoverableSession(self._coordinated_creator)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1107, in __init__
_WrappedSession.__init__(self, self._create_session())
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1112, in _create_session
return self._sess_creator.create_session()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 800, in create_session
self.tf_sess = self._session_creator.create_session()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 557, in create_session
self._scaffold.finalize()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 215, in finalize
self._saver.build()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 1106, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 1143, in _build
build_save=build_save, build_restore=build_restore)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 778, in _build_internal
save_tensor = self._AddShardedSaveOps(filename_tensor, per_device)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 369, in _AddShardedSaveOps
return self._AddShardedSaveOpsForV2(filename_tensor, per_device)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 343, in _AddShardedSaveOpsForV2
sharded_saves.append(self._AddSaveOps(sharded_filename, saveables))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 284, in _AddSaveOps
save = self.save_op(filename_tensor, saveables)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 202, in save_op
tensors)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_io_ops.py", line 1690, in save_v2
shape_and_slices=shape_and_slices, tensors=tensors, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3272, in create_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1768, in __init__
self._traceback = tf_stack.extract_stack()
UnimplementedError (see above for traceback): File system scheme '[local]' not implemented (file: '/tmp/tmpAhMnVF/model.ckpt-0_temp_e50b126c1d464cbf961a9a0e2cdd0257')
[[{{node save/SaveV2}} = SaveV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64], _device="/job:worker/replica:0/task:0/device:CPU:0"](save/ShardedFilename, save/SaveV2/tensor_names, save/SaveV2/shape_and_slices, conv2d/bias/Read/ReadVariableOp, conv2d/kernel/Read/ReadVariableOp, conv2d_1/bias/Read/ReadVariableOp, conv2d_1/kernel/Read/ReadVariableOp, dense/bias/Read/ReadVariableOp, dense/kernel/Read/ReadVariableOp, dense_1/bias/Read/ReadVariableOp, dense_1/kernel/Read/ReadVariableOp, global_step/Read/ReadVariableOp)]]
INFO:tensorflow:training_loop marked as finished
I1024 23:26:44.461478 139753385273088 tf_logging.py:115] training_loop marked as finished
WARNING:tensorflow:Reraising captured error
W1024 23:26:44.461673 139753385273088 tf_logging.py:120] Reraising captured error
Traceback (most recent call last):
File "/usr/share/models/official/mnist/mnist_tpu.py", line 173, in <module>
tf.app.run()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py", line 125, in run
_sys.exit(main(argv))
File "/usr/share/models/official/mnist/mnist_tpu.py", line 163, in main
estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2400, in train
rendezvous.raise_errors()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/error_handling.py", line 128, in raise_errors
six.reraise(typ, value, traceback)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2394, in train
saving_listeners=saving_listeners
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 356, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1181, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1215, in _train_model_default
saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1406, in _train_with_estimator_spec
log_step_count_steps=self._config.log_step_count_steps) as mon_sess:
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 504, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 921, in __init__
stop_grace_period_secs=stop_grace_period_secs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 643, in __init__
self._sess = _RecoverableSession(self._coordinated_creator)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1107, in __init__
_WrappedSession.__init__(self, self._create_session())
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1112, in _create_session
return self._sess_creator.create_session()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 807, in create_session
hook.after_create_session(self.tf_sess, self.coord)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/basic_session_run_hooks.py", line 567, in after_create_session
self._save(session, global_step)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/basic_session_run_hooks.py", line 598, in _save
self._get_saver().save(session, self._save_path, global_step=step)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 1433, in save
{self.saver_def.filename_tensor_name: checkpoint_file})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 887, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1110, in _run
feed_dict_tensor, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1286, in _do_run
run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1308, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.UnimplementedError: File system scheme '[local]' not implemented (file: '/tmp/tmpAhMnVF/model.ckpt-0_temp_e50b126c1d464cbf961a9a0e2cdd0257')
[[{{node save/SaveV2}} = SaveV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64], _device="/job:worker/replica:0/task:0/device:CPU:0"](save/ShardedFilename, save/SaveV2/tensor_names, save/SaveV2/shape_and_slices, conv2d/bias/Read/ReadVariableOp, conv2d/kernel/Read/ReadVariableOp, conv2d_1/bias/Read/ReadVariableOp, conv2d_1/kernel/Read/ReadVariableOp, dense/bias/Read/ReadVariableOp, dense/kernel/Read/ReadVariableOp, dense_1/bias/Read/ReadVariableOp, dense_1/kernel/Read/ReadVariableOp, global_step/Read/ReadVariableOp)]]
Caused by op u'save/SaveV2', defined at:
File "/usr/share/models/official/mnist/mnist_tpu.py", line 173, in <module>
tf.app.run()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py", line 125, in run
_sys.exit(main(argv))
File "/usr/share/models/official/mnist/mnist_tpu.py", line 163, in main
estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2394, in train
saving_listeners=saving_listeners
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 356, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1181, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1215, in _train_model_default
saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1406, in _train_with_estimator_spec
log_step_count_steps=self._config.log_step_count_steps) as mon_sess:
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 504, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 921, in __init__
stop_grace_period_secs=stop_grace_period_secs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 643, in __init__
self._sess = _RecoverableSession(self._coordinated_creator)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1107, in __init__
_WrappedSession.__init__(self, self._create_session())
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1112, in _create_session
return self._sess_creator.create_session()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 800, in create_session
self.tf_sess = self._session_creator.create_session()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 557, in create_session
self._scaffold.finalize()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 215, in finalize
self._saver.build()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 1106, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 1143, in _build
build_save=build_save, build_restore=build_restore)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 778, in _build_internal
save_tensor = self._AddShardedSaveOps(filename_tensor, per_device)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 369, in _AddShardedSaveOps
return self._AddShardedSaveOpsForV2(filename_tensor, per_device)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 343, in _AddShardedSaveOpsForV2
sharded_saves.append(self._AddSaveOps(sharded_filename, saveables))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 284, in _AddSaveOps
save = self.save_op(filename_tensor, saveables)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 202, in save_op
tensors)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_io_ops.py", line 1690, in save_v2
shape_and_slices=shape_and_slices, tensors=tensors, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3272, in create_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1768, in __init__
self._traceback = tf_stack.extract_stack()
UnimplementedError (see above for traceback): File system scheme '[local]' not implemented (file: '/tmp/tmpAhMnVF/model.ckpt-0_temp_e50b126c1d464cbf961a9a0e2cdd0257')
[[{{node save/SaveV2}} = SaveV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64], _device="/job:worker/replica:0/task:0/device:CPU:0"](save/ShardedFilename, save/SaveV2/tensor_names, save/SaveV2/shape_and_slices, conv2d/bias/Read/ReadVariableOp, conv2d/kernel/Read/ReadVariableOp, conv2d_1/bias/Read/ReadVariableOp, conv2d_1/kernel/Read/ReadVariableOp, dense/bias/Read/ReadVariableOp, dense/kernel/Read/ReadVariableOp, dense_1/bias/Read/ReadVariableOp, dense_1/kernel/Read/ReadVariableOp, global_step/Read/ReadVariableOp)]]
alexryan@alex-tpu:~/tpu$
Apologies, here is the tutorial:
https://cloud.google.com/tpu/docs/tutorials/mnist