I think I got it:
tensorflow/ops/metrics_impl.py
def accuracy(values, weights, name=None):
with variable_scope.variable_scope(name, 'acc', (values, weights)):
values = math_ops.to_float(values)
total = metric_variable([100000], dtypes.float32, name='total')
count = metric_variable([], dtypes.int32, name='count')
if weights is None:
num_values = math_ops.to_float(array_ops.size(values))
else:
values, _, weights = _remove_squeezable_dimensions(
predictions=values, labels=None, weights=weights)
weights = weights_broadcast_ops.broadcast_weights(
math_ops.to_float(weights), values)
values = math_ops.multiply(values, weights)
num_values = math_ops.reduce_sum(weights)
with ops.control_dependencies([values]):
update_count_op = state_ops.assign_add(count, math_ops.to_int32(num_values))
update_total_op = state_ops.scatter_update(total, math_ops.range(math_ops.to_int32(count), math_ops.to_int32(count)+array_ops.size(values), dtype=dtypes.int32), values)
return total, control_flow_ops.group((update_total_op, update_count_op))
(had to repurpose metrics.accuracy, because it refused to see the function when I created my own)
tensor2tensor/utils/metrics.py, problem_metric_fn():
... return tf.metrics.accuracy(scores, weights) if metric_fn==bleu_hook.bleu_score else tf.metrics.mean(scores, weights)
It's really ugly, but seems to work.