+++++++++++++++Training iteration 9+++++++++++++++++++
custom_metrics: {}
date: 2020-08-05_20-58-57
done: false
episode_len_mean: .nan
episode_reward_max: .nan
episode_reward_mean: .nan
episode_reward_min: .nan
episodes_this_iter: 0
episodes_total: 0
experiment_id: c8ec3950c1704719af5ffdfc54947987
hostname: ip-172-31-29-189
info:
learner:
policy_01:
allreduce_latency: 0.0
cur_kl_coeff: 0.5
cur_lr: 0.0002500000000000001
entropy: 2.8751889978136336
entropy_coeff: 0.009999999999999998
kl: 0.013963970422212566
policy_loss: -0.10596811185990061
total_loss: -0.12725649561200822
vf_explained_var: 0.9584423899650574
vf_loss: 0.0004815203574253246
num_steps_sampled: 36000
num_steps_trained: 36000
iterations_since_restore: 9
node_ip: 172.31.29.189
num_healthy_workers: 4
off_policy_estimator: {}
perf:
cpu_util_percent: 62.568965517241374
ram_util_percent: 30.829310344827576
pid: 1923
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf: {}
time_since_restore: 741.328654050827
time_this_iter_s: 80.76175713539124
time_total_s: 741.328654050827
timers:
learn_throughput: 102.519
learn_time_ms: 39016.975
sample_throughput: 92.416
sample_time_ms: 43282.315
update_time_ms: 37.503
timestamp: 1596661137
timesteps_since_restore: 0
timesteps_total: 36000
training_iteration: 9
+++++++++++++++++++++++++++++++++++++++++++++++++++