Cox Proportional Hazards With TFP

122 views
Skip to first unread message

Melinda Thielbar

unread,
Jul 24, 2020, 11:13:13 AM7/24/20
to tfprob...@tensorflow.org
Hi, all. I’m working with a Cox Proportional Hazards model, and I’d like to implement using tfp.glm.

Has anyone on the list tried this before? I’ve implemented Cox models in C++. I’ve used an R package with a Cox model and read the notes on their implementation.

I’ve been using TFP for a while, but this is a fairly big undertaking. If there are examples of Cox models in tfp that I’ve missed, I’d really like to see what others have done.

Thanks!
—MFT

Sent from my iPhone

J C

unread,
Jul 27, 2020, 9:02:08 AM7/27/20
to TensorFlow Probability
I was able to get AFT working really easily, by multiplying the scale term in Exponential/Weibull/other models by exp(X*beta). I imagine one could do coxph the same way, by writing out the complete likelihood.

J C

unread,
Jul 27, 2020, 9:30:46 AM7/27/20
to TensorFlow Probability
Just in case you might find it useful, here is a GeneralizedGamma distribution and a Weibull distribution

class GeneralizedGamma(distribution.Distribution):
"""Generalized Gamma distribution

Following the wikipedia parameterization

f(x; a=scale, d=shape, p=exponent) =
\frac{(p/a^d) x^{d-1} e^{-(x/a)^p}}{\Gamma(d/p)},

location =

Arguments:
distribution {[type]} -- [description]
"""

def __init__(self,
scale, shape, exponent,
validate_args=False,
allow_nan_stats=True,
name='GeneralizedGamma'):
parameters = dict(locals())
with tf.name_scope(name) as name:
dtype = dtype_util.common_dtype(
[scale, shape, exponent], dtype_hint=tf.float32)
self._scale = tensor_util.convert_nonref_to_tensor(
scale, dtype=dtype, name='scale')
self._shape = tensor_util.convert_nonref_to_tensor(
shape, dtype=dtype, name='shape')
self._exponent = tensor_util.convert_nonref_to_tensor(
exponent, dtype=dtype, name='exponent')

super(GeneralizedGamma, self).__init__(
dtype=dtype,
validate_args=validate_args,
allow_nan_stats=allow_nan_stats,
reparameterization_type=(
reparameterization.FULLY_REPARAMETERIZED
),
parameters=parameters,
name=name)

def _mean(self):
return self.scale * tf.math.exp(
tf.math.lgamma((self.shape + 1.)/self.exponent)
- tf.math.lgamma(self.shape/self.exponent)
)

def _variance(self):
return self._scale**2 * (
tf.math.exp(
tf.math.lgamma((self.shape+2.)/self.exponent)
- tf.math.lgamma(self.shape/self.exponent)
)
- tf.math.exp(
2*(
tf.math.lgamma((self.shape+1.)/self.exponent)
- tf.math.lgamma(self.shape/self.exponent)
)

)
)

def _cdf(self, x):
return tf.math.igamma(self.shape/self.exponent,
(x/self.scale)**self.exponent) * tf.exp(
-tf.math.lgamma(self.shape/self.exponent)
)

def _log_prob(self, x, scale=None, shape=None, exponent=None):
scale = convert_nonref_to_tensor(
self.scale if scale is None else scale)
shape = convert_nonref_to_tensor(
self.shape if shape is None else shape)
exponent = convert_nonref_to_tensor(
self.exponent if exponent is None else exponent)
log_unnormalized_prob = (
tf.math.xlogy(shape-1., x) - (x/scale)**exponent)
log_prefactor = (
tf.math.log(exponent) - tf.math.xlogy(shape, scale)
- tf.math.lgamma(shape/exponent))
return log_unnormalized_prob + log_prefactor

def _entropy(self):
scale = tf.convert_to_tensor(self.scale)
shape = tf.convert_to_tensor(self.shape)
exponent = tf.convert_to_tensor(self.exponent)
return (
tf.math.log(excale) + tf.math.lgamma(shape/exponent)
- tf.math.log(exponent) + shape/exponent
+ (1.0 - shape)/exponent*tf.math.digamma(shape/exponent)
)

def _stddev(self):
return tf.math.sqrt(self._variance())

def _default_event_space_bijector(self):
return softplus_bijector.Softplus(validate_args=self.validate_args)

def _sample_control_dependencies(self, x):
assertions = []
if not self.validate_args:
return assertions
assertions.append(assert_util.assert_non_negative(
x, message='Sample must be non-negative.'))
return assertions

@property
def scale(self):
return self._scale

@property
def shape(self):
return self._shape

@property
def exponent(self):
return self._exponent

def _batch_shape_tensor(self, scale=None, shape=None, exponent=None):
return prefer_static.broadcast_shape(
prefer_static.shape(
self.scale if scale is None else scale),
prefer_static.shape(self.shape if shape is None else shape),
prefer_static.shape(
self.exponent if exponent is None else exponent))

def _batch_shape(self):
return tf.broadcast_static_shape(
self.scale.shape,
self.shape.shape)

def _event_shape_tensor(self):
return tf.constant([], dtype=tf.int32)

def _sample_n(self, n, seed=None):
"""Sample based on transforming Gamma RVs

Arguments:
n {int} -- [description]

Keyword Arguments:
seed {int} -- [description] (default: {None})

Returns:
[type] -- [description]
"""
gamma_samples = tf.random.gamma(
shape=[n],
alpha=self.shape/self.exponent,
beta=1.,
dtype=self.dtype,
seed=seed
)
ggamma_samples = (
self.scale*tf.math.exp(tf.math.log(gamma_samples)/self.exponent)
)
return ggamma_samples

def _event_shape(self):
return tf.TensorShape([])

def _parameter_control_dependencies(self, is_init):
if not self.validate_args:
return []
assertions = []
if is_init != tensor_util.is_ref(self.scale):
assertions.append(assert_util.assert_positive(
self.scale,
message='Argument `scale` must be positive.'))
if is_init != tensor_util.is_ref(self.shape):
assertions.append(assert_util.assert_positive(
self.shape,
message='Argument `shape` must be positive.'))
if is_init != tensor_util.is_ref(self.exponent):
assertions.append(assert_util.assert_positive(
self.exponent,
message='Argument `exponent` must be positive.'))
return assertions


class Weibull(GeneralizedGamma):
def __init__(self,
scale, shape,
validate_args=False,
allow_nan_stats=True,
name='Weibull'):
parameters = dict(locals())
with tf.name_scope(name) as name:
dtype = dtype_util.common_dtype(
[scale, shape], dtype_hint=tf.float32)
self._scale = tensor_util.convert_nonref_to_tensor(
scale, dtype=dtype, name='scale')
self._shape = tensor_util.convert_nonref_to_tensor(
shape, dtype=dtype, name='shape')
self._exponent = tensor_util.convert_nonref_to_tensor(
shape, dtype=dtype, name='exponent')

super(GeneralizedGamma, self).__init__(
dtype=dtype,
validate_args=validate_args,
allow_nan_stats=allow_nan_stats,
reparameterization_type=(
reparameterization.FULLY_REPARAMETERIZED
),
parameters=parameters,
name=name)

@property
def scale(self):
return self._scale

@property
def shape(self):
return self._shape

Melinda Thielbar

unread,
Jul 27, 2020, 9:40:25 AM7/27/20
to J C, TensorFlow Probability
That’s a really good idea, JC. I will try that and let the group know.

Thank you for sharing your code. That’s a huge help.
—MFT

Sent from my iPhone

On Jul 27, 2020, at 09:30, J C <josh.colu...@gmail.com> wrote:


--
You received this message because you are subscribed to the Google Groups "TensorFlow Probability" group.
To unsubscribe from this group and stop receiving emails from it, send an email to tfprobabilit...@tensorflow.org.
To view this discussion on the web visit https://groups.google.com/a/tensorflow.org/d/msgid/tfprobability/b756d47b-8a3c-46b4-84e8-f70fce761596o%40tensorflow.org.

rif

unread,
Jul 27, 2020, 10:30:17 AM7/27/20
to J C, TensorFlow Probability
JC, we do have a Weibull in the repo; is this different from what you shared? (I haven't looked at your code in any detail.)

rif


--
You received this message because you are subscribed to the Google Groups "TensorFlow Probability" group.
To unsubscribe from this group and stop receiving emails from it, send an email to tfprobabilit...@tensorflow.org.

J C

unread,
Jul 27, 2020, 10:41:14 AM7/27/20
to rif, TensorFlow Probability
Ah thanks... I implemented mine last December inheriting from a generalized gamma distribution. I noticed back then a Weibull bijector but not the distribution though maybe I didn't look hard enough. My Weibull class was mainly used to test the Generalized Gamma class.

rif

unread,
Jul 27, 2020, 10:46:07 AM7/27/20
to J C, David Smalling, TensorFlow Probability

Hmm. If the generalized gamma is a generally useful distribution (what's the generalization?), perhaps we should consider adding it?

J C

unread,
Jul 27, 2020, 10:54:52 AM7/27/20
to rif, David Smalling, TensorFlow Probability
It's a three parameter distribution that has as subcases exponential, Weibull, gamma distributions. It's used in survival modelling.

J C

unread,
Jul 28, 2020, 9:35:34 AM7/28/20
to TensorFlow Probability, J C, David Smalling, TensorFlow Probability, Rif A. Saurous
The piecewise exponential model is also a  very commonly-used  Survival likelihood. I'm having  someone on my team try to implement as a learning problem for TFP, though we would appreciate a pre-existing implementation if it exists. This is something that we could perhaps send upstream I think if there is interest.

Melinda Thielbar

unread,
Jul 28, 2020, 1:27:07 PM7/28/20
to J C, TensorFlow Probability, David Smalling, Rif A. Saurous
Hi, JC. I was looking at the code today, and I don't see code that handles censored observations. Am I missing it? Or was it not needed for your implementation? (If I remember right, AFT experiments usually run until most components fail.)

Thank you again for the examples. They are very helpful. 
Best,
--MFT



--
----
Co-Founder, Research Triangle Analysts: www.rtpanalysts.org

J C

unread,
Jul 28, 2020, 1:33:32 PM7/28/20
to Melinda Thielbar, TensorFlow Probability, David Smalling, Rif A. Saurous
For observed, you use the point log likelihood, like so:

ll_observed = rv.log_prob(tf.squeeze(data['wait']))

For right censored you use the log tail probability like so:

ll_censored = rv.log_survival_function(tf.squeeze(data['wait']))

If you have an array observed that tells you where an observations was observed then you use

ll_survival = tf.where(observed, ll_observed, ll_censored)

I think left censoring is a bit more tricky to treat, particularly in non-Exponential wait time distributions. You would need to condition/marganizalize over the left timepoint.
--
Warm regards

J C

unread,
Jul 28, 2020, 1:36:26 PM7/28/20
to Melinda Thielbar, TensorFlow Probability, David Smalling, Rif A. Saurous
To finish that example out, for the case of AFT,

rv would be your random variable object for whatever distribution you are using. For AFT you would alter the scale parameter of that distribution by multiplying  exp(X*beta), where X is nxp and beta is your regression coefficients. Sorry about the tf squeeze etc, those are copy/pasted from our specific use of AFT where we used the TF dataset API.
--
Warm regards
Reply all
Reply to author
Forward
0 new messages