Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCHv5 04/20] thermal: cpu_cooling: introduce of_cpufreq_cooling_register

16 views
Skip to first unread message

Eduardo Valentin

unread,
Nov 12, 2013, 2:50:02 PM11/12/13
to
This patch introduces an API to register cpufreq cooling device
based on device tree node.

The registration via device tree node differs from normal
registration due to the fact that it is needed to fill
the device_node structure in order to be able to match
the cooling devices with trip points.

Cc: Zhang Rui <rui....@intel.com>
Cc: linu...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
drivers/thermal/Kconfig | 1 +
drivers/thermal/cpu_cooling.c | 56 ++++++++++++++++++++++++++++++++++++++-----
include/linux/cpu_cooling.h | 25 +++++++++++++++++++
3 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 596eeee..754c65d 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -92,6 +92,7 @@ config THERMAL_GOV_USER_SPACE
config CPU_THERMAL
bool "generic cpu cooling support"
depends on CPU_FREQ
+ depends on THERMAL_OF
select CPU_FREQ_TABLE
help
This implements the generic cpu cooling mechanism through frequency
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 02a46f2..a6cb553 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -417,18 +417,21 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
};

/**
- * cpufreq_cooling_register - function to create cpufreq cooling device.
+ * __cpufreq_cooling_register - helper function to create cpufreq cooling device
+ * @np: a valid struct device_node to the cooling device device tree node
* @clip_cpus: cpumask of cpus where the frequency constraints will happen.
*
* This interface function registers the cpufreq cooling device with the name
* "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
- * cooling devices.
+ * cooling devices. It also gives the opportunity to link the cooling device
+ * with a device tree node, in order to bind it via the thermal DT code.
*
* Return: a valid struct thermal_cooling_device pointer on success,
* on failure, it returns a corresponding ERR_PTR().
*/
-struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus)
+static struct thermal_cooling_device *
+__cpufreq_cooling_register(struct device_node *np,
+ const struct cpumask *clip_cpus)
{
struct thermal_cooling_device *cool_dev;
struct cpufreq_cooling_device *cpufreq_dev = NULL;
@@ -467,8 +470,8 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus)
snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
cpufreq_dev->id);

- cool_dev = thermal_cooling_device_register(dev_name, cpufreq_dev,
- &cpufreq_cooling_ops);
+ cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
+ &cpufreq_cooling_ops);
if (IS_ERR(cool_dev)) {
release_idr(&cpufreq_idr, cpufreq_dev->id);
kfree(cpufreq_dev);
@@ -488,9 +491,50 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus)

return cool_dev;
}
+
+/**
+ * cpufreq_cooling_register - function to create cpufreq cooling device.
+ * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ *
+ * This interface function registers the cpufreq cooling device with the name
+ * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
+ * cooling devices.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+cpufreq_cooling_register(const struct cpumask *clip_cpus)
+{
+ return __cpufreq_cooling_register(NULL, clip_cpus);
+}
EXPORT_SYMBOL_GPL(cpufreq_cooling_register);

/**
+ * of_cpufreq_cooling_register - function to create cpufreq cooling device.
+ * @np: a valid struct device_node to the cooling device device tree node
+ * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ *
+ * This interface function registers the cpufreq cooling device with the name
+ * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
+ * cooling devices. Using this API, the cpufreq cooling device will be
+ * linked to the device tree node provided.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+of_cpufreq_cooling_register(struct device_node *np,
+ const struct cpumask *clip_cpus)
+{
+ if (!np)
+ return ERR_PTR(-EINVAL);
+
+ return __cpufreq_cooling_register(np, clip_cpus);
+}
+EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
+
+/**
* cpufreq_cooling_unregister - function to remove cpufreq cooling device.
* @cdev: thermal cooling device pointer.
*
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index a5d52ee..c303d38 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -24,6 +24,7 @@
#ifndef __CPU_COOLING_H__
#define __CPU_COOLING_H__

+#include <linux/of.h>
#include <linux/thermal.h>
#include <linux/cpumask.h>

@@ -36,6 +37,24 @@ struct thermal_cooling_device *
cpufreq_cooling_register(const struct cpumask *clip_cpus);

/**
+ * of_cpufreq_cooling_register - create cpufreq cooling device based on DT.
+ * @np: a valid struct device_node to the cooling device device tree node.
+ * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ */
+#ifdef CONFIG_THERMAL_OF
+struct thermal_cooling_device *
+of_cpufreq_cooling_register(struct device_node *np,
+ const struct cpumask *clip_cpus);
+#else
+static inline struct thermal_cooling_device *
+of_cpufreq_cooling_register(struct device_node *np,
+ const struct cpumask *clip_cpus)
+{
+ return NULL;
+}
+#endif
+
+/**
* cpufreq_cooling_unregister - function to remove cpufreq cooling device.
* @cdev: thermal cooling device pointer.
*/
@@ -48,6 +67,12 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus)
{
return NULL;
}
+static inline struct thermal_cooling_device *
+of_cpufreq_cooling_register(struct device_node *np,
+ const struct cpumask *clip_cpus)
+{
+ return NULL;
+}
static inline
void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
{
--
1.8.2.1.342.gfa7285d

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Eduardo Valentin

unread,
Nov 12, 2013, 2:50:02 PM11/12/13
to
This patch adds to lm75 temperature sensor the possibility
to expose itself as thermal zone device, registered on the
thermal framework.

The thermal zone is built only if a device tree node
describing a thermal zone for this sensor is present
inside the lm75 DT node. Otherwise, the driver behavior
will be the same.

Cc: Jean Delvare <kh...@linux-fr.org>
Cc: lm-se...@lm-sensors.org
Cc: linux-...@vger.kernel.org
Acked-by: Guenter Roeck <li...@roeck-us.net>
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
drivers/hwmon/lm75.c | 35 ++++++++++++++++++++++++++++++-----
1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index c03b490..1d3600a 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -27,6 +27,8 @@
#include <linux/hwmon-sysfs.h>
#include <linux/err.h>
#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/thermal.h>
#include "lm75.h"


@@ -70,6 +72,7 @@ static const u8 LM75_REG_TEMP[3] = {
/* Each client has this additional data */
struct lm75_data {
struct device *hwmon_dev;
+ struct thermal_zone_device *tz;
struct mutex update_lock;
u8 orig_conf;
u8 resolution; /* In bits, between 9 and 12 */
@@ -90,22 +93,36 @@ static struct lm75_data *lm75_update_device(struct device *dev);

/*-----------------------------------------------------------------------*/

+static inline long lm75_reg_to_mc(s16 temp, u8 resolution)
+{
+ return ((temp >> (16 - resolution)) * 1000) >> (resolution - 8);
+}
+
/* sysfs attributes for hwmon */

+static int lm75_read_temp(void *dev, long *temp)
+{
+ struct lm75_data *data = lm75_update_device(dev);
+
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ *temp = lm75_reg_to_mc(data->temp[0], data->resolution);
+
+ return 0;
+}
+
static ssize_t show_temp(struct device *dev, struct device_attribute *da,
char *buf)
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
struct lm75_data *data = lm75_update_device(dev);
- long temp;

if (IS_ERR(data))
return PTR_ERR(data);

- temp = ((data->temp[attr->index] >> (16 - data->resolution)) * 1000)
- >> (data->resolution - 8);
-
- return sprintf(buf, "%ld\n", temp);
+ return sprintf(buf, "%ld\n", lm75_reg_to_mc(data->temp[attr->index],
+ data->resolution));
}

static ssize_t set_temp(struct device *dev, struct device_attribute *da,
@@ -271,6 +288,13 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
goto exit_remove;
}

+ data->tz = thermal_zone_of_sensor_register(&client->dev,
+ 0,
+ &client->dev,
+ lm75_read_temp, NULL);
+ if (IS_ERR(data->tz))
+ data->tz = NULL;
+
dev_info(&client->dev, "%s: sensor '%s'\n",
dev_name(data->hwmon_dev), client->name);

@@ -285,6 +309,7 @@ static int lm75_remove(struct i2c_client *client)
{
struct lm75_data *data = i2c_get_clientdata(client);

+ thermal_zone_of_sensor_unregister(&client->dev, data->tz);
hwmon_device_unregister(data->hwmon_dev);
sysfs_remove_group(&client->dev.kobj, &lm75_group);
lm75_write_value(client, LM75_REG_CONF, data->orig_conf);

Eduardo Valentin

unread,
Nov 12, 2013, 2:50:02 PM11/12/13
to
This patch changes the cpufreq-cpu0 driver to consider if
a cpu needs cooling (with cpufreq). In case the cooling is needed,
the cpu0 device tree node needs to be properly configured
with cooling device properties.

In case these properties are present,, the driver will
load a cpufreq cooling device in the system. The cpufreq-cpu0
driver is not interested in determining how the system should
be using the cooling device. The driver is responsible
only of loading the cooling device.

Describing how the cooling device will be used can be
accomplished by setting up a thermal zone that references
and is composed by the cpufreq cooling device.

Cc: "Rafael J. Wysocki" <r...@sisk.pl>
Cc: Viresh Kumar <viresh...@linaro.org>
Cc: Grant Likely <grant....@linaro.org>
Cc: Rob Herring <rob.h...@calxeda.com>
Cc: cpu...@vger.kernel.org
Cc: linu...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: devicetre...@lists.ozlabs.org
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
.../devicetree/bindings/cpufreq/cpufreq-cpu0.txt | 7 +++++++
drivers/cpufreq/Kconfig | 2 +-
drivers/cpufreq/cpufreq-cpu0.c | 16 ++++++++++++++++
3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
index 051f764..f055515 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
@@ -15,6 +15,10 @@ Optional properties:
- clock-latency: Specify the possible maximum transition latency for clock,
in unit of nanoseconds.
- voltage-tolerance: Specify the CPU voltage tolerance in percentage.
+- #cooling-cells:
+- cooling-min-level:
+- cooling-max-level:
+ Please refer to Documentation/devicetree/bindings/thermal/thermal.txt.

Examples:

@@ -33,6 +37,9 @@ cpus {
198000 850000
>;
clock-latency = <61036>; /* two CLK32 periods */
+ #cooling-cells = <2>;
+ cooling-min-level = <0>;
+ cooling-max-level = <2>;
};

cpu@1 {
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 534fcb8..fc1e9a5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -186,7 +186,7 @@ config CPU_FREQ_GOV_CONSERVATIVE

config GENERIC_CPUFREQ_CPU0
tristate "Generic CPU0 cpufreq driver"
- depends on HAVE_CLK && REGULATOR && PM_OPP && OF
+ depends on HAVE_CLK && REGULATOR && PM_OPP && OF && THERMAL && CPU_THERMAL
select CPU_FREQ_TABLE
help
This adds a generic cpufreq driver for CPU0 frequency management.
diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
index c522a95..568aaf3 100644
--- a/drivers/cpufreq/cpufreq-cpu0.c
+++ b/drivers/cpufreq/cpufreq-cpu0.c
@@ -13,7 +13,9 @@

#include <linux/clk.h>
#include <linux/cpu.h>
+#include <linux/cpu_cooling.h>
#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/of.h>
@@ -21,6 +23,7 @@
#include <linux/platform_device.h>
#include <linux/regulator/consumer.h>
#include <linux/slab.h>
+#include <linux/thermal.h>

static unsigned int transition_latency;
static unsigned int voltage_tolerance; /* in percentage */
@@ -29,6 +32,7 @@ static struct device *cpu_dev;
static struct clk *cpu_clk;
static struct regulator *cpu_reg;
static struct cpufreq_frequency_table *freq_table;
+static struct thermal_cooling_device *cdev;

static int cpu0_verify_speed(struct cpufreq_policy *policy)
{
@@ -260,6 +264,17 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
goto out_free_table;
}

+ /*
+ * For now, just loading the cooling device;
+ * thermal DT code takes care of matching them.
+ */
+ if (of_find_property(np, "#cooling-cells", NULL)) {
+ cdev = of_cpufreq_cooling_register(np, cpu_present_mask);
+ if (IS_ERR(cdev))
+ pr_err("running cpufreq without cooling device: %ld\n",
+ PTR_ERR(cdev));
+ }
+
of_node_put(np);
return 0;

@@ -272,6 +287,7 @@ out_put_node:

static int cpu0_cpufreq_remove(struct platform_device *pdev)
{
+ cpufreq_cooling_unregister(cdev);
cpufreq_unregister_driver(&cpu0_cpufreq_driver);
opp_free_cpufreq_table(cpu_dev, &freq_table);

Eduardo Valentin

unread,
Nov 12, 2013, 2:50:02 PM11/12/13
to
Hello all,

Here is v5 of the work of representing hardware thermal properties
in device tree infrastructure. Previous versions can be
found here:
V4: https://lkml.org/lkml/2013/9/26/787
V3: https://lkml.org/lkml/2013/9/15/122
RFCv2: http://lkml.org/lkml/2013/8/23/594
RFCv1: http://lkml.org/lkml/2013/7/22/319

The differences from V4 of this series are pretty minor. Here is a shortlog:
1. Rephrased couple of sentences and improved description of a couple of properties.
2. Removed the macro constants for trip type from the binding header file.
3. Due to (3), patches 09/14/15 were adapted to use plain string constants
4. As per suggestion by Nishanth M. I also included in this series two
patches to move TI bandgap nodes to belong under OCP.
5. I volunteered to maintain the thermal binding, so added that to MAINTAINERS.

This specific series has been tested on OMAP4430, OMAP4460, OMAP5432 and DRA7.

All best,

Eduardo Valentin (20):
thermal: allow registering without .get_temp
thermal: introduce device tree parser
thermal: core: introduce thermal_of_cooling_device_register
thermal: cpu_cooling: introduce of_cpufreq_cooling_register
cpufreq: cpufreq-cpu0: add dt node parsing for cooling device
properties
hwmon: lm75: expose to thermal fw via DT nodes
hwmon: tmp102: expose to thermal fw via DT nodes
thermal: ti-soc-thermal: use thermal DT infrastructure
arm: dts: add omap4 CPU thermal data
arm: dts: add omap4430 thermal data
arm: dts: add omap4460 thermal data
arm: dts: add cooling properties on omap4430 cpu node
arm: dts: add cooling properties on omap4460 cpu node
arm: dts: add omap5 GPU thermal data
arm: dts: add omap5 CORE thermal data
arm: dts: add omap5 thermal data
arm: dts: add cooling properties on omap5 cpu node
arm: dts: make OMAP443x bandgap node to belong to OCP
arm: dts: make OMAP4460 bandgap node to belong to OCP
MAINTAINERS: add maintainer entry for thermal bindings

.../devicetree/bindings/cpufreq/cpufreq-cpu0.txt | 7 +
.../devicetree/bindings/thermal/thermal.txt | 586 ++++++++++++++
MAINTAINERS | 1 +
arch/arm/boot/dts/omap4-cpu-thermal.dtsi | 41 +
arch/arm/boot/dts/omap443x.dtsi | 23 +-
arch/arm/boot/dts/omap4460.dtsi | 29 +-
arch/arm/boot/dts/omap5-core-thermal.dtsi | 28 +
arch/arm/boot/dts/omap5-gpu-thermal.dtsi | 28 +
arch/arm/boot/dts/omap5.dtsi | 15 +-
drivers/cpufreq/Kconfig | 2 +-
drivers/cpufreq/cpufreq-cpu0.c | 16 +
drivers/hwmon/lm75.c | 35 +-
drivers/hwmon/tmp102.c | 19 +
drivers/thermal/Kconfig | 14 +
drivers/thermal/Makefile | 1 +
drivers/thermal/cpu_cooling.c | 56 +-
drivers/thermal/of-thermal.c | 849 +++++++++++++++++++++
drivers/thermal/thermal_core.c | 77 +-
drivers/thermal/thermal_core.h | 9 +
drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 77 +-
include/dt-bindings/thermal/thermal.h | 17 +
include/linux/cpu_cooling.h | 25 +
include/linux/thermal.h | 32 +-
23 files changed, 1936 insertions(+), 51 deletions(-)
create mode 100644 Documentation/devicetree/bindings/thermal/thermal.txt
create mode 100644 arch/arm/boot/dts/omap4-cpu-thermal.dtsi
create mode 100644 arch/arm/boot/dts/omap5-core-thermal.dtsi
create mode 100644 arch/arm/boot/dts/omap5-gpu-thermal.dtsi
create mode 100644 drivers/thermal/of-thermal.c
create mode 100644 include/dt-bindings/thermal/thermal.h

Eduardo Valentin

unread,
Nov 12, 2013, 2:50:03 PM11/12/13
to
This patch adds a new API to allow registering cooling devices
in the thermal framework derived from device tree nodes.

This API links the cooling device with the device tree node
so that binding with thermal zones is possible, given
that thermal zones are pointing to cooling device
device tree nodes.
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
drivers/thermal/thermal_core.c | 58 +++++++++++++++++++++++++++++++++++++++---
include/linux/thermal.h | 4 +++
2 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index aba68dc..3092bfb 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -34,6 +34,7 @@
#include <linux/thermal.h>
#include <linux/reboot.h>
#include <linux/string.h>
+#include <linux/of.h>
#include <net/netlink.h>
#include <net/genetlink.h>

@@ -1055,7 +1056,8 @@ static struct class thermal_class = {
};

/**
- * thermal_cooling_device_register() - register a new thermal cooling device
+ * __thermal_cooling_device_register() - register a new thermal cooling device
+ * @np: a pointer to a device tree node.
* @type: the thermal cooling device type.
* @devdata: device private data.
* @ops: standard thermal cooling devices callbacks.
@@ -1063,13 +1065,16 @@ static struct class thermal_class = {
* This interface function adds a new thermal cooling device (fan/processor/...)
* to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
* to all the thermal zone devices registered at the same time.
+ * It also gives the opportunity to link the cooling device to a device tree
+ * node, so that it can be bound to a thermal zone created out of device tree.
*
* Return: a pointer to the created struct thermal_cooling_device or an
* ERR_PTR. Caller must check return value with IS_ERR*() helpers.
*/
-struct thermal_cooling_device *
-thermal_cooling_device_register(char *type, void *devdata,
- const struct thermal_cooling_device_ops *ops)
+static struct thermal_cooling_device *
+__thermal_cooling_device_register(struct device_node *np,
+ char *type, void *devdata,
+ const struct thermal_cooling_device_ops *ops)
{
struct thermal_cooling_device *cdev;
int result;
@@ -1094,6 +1099,7 @@ thermal_cooling_device_register(char *type, void *devdata,
strlcpy(cdev->type, type ? : "", sizeof(cdev->type));
mutex_init(&cdev->lock);
INIT_LIST_HEAD(&cdev->thermal_instances);
+ cdev->np = np;
cdev->ops = ops;
cdev->updated = true;
cdev->device.class = &thermal_class;
@@ -1136,9 +1142,53 @@ unregister:
device_unregister(&cdev->device);
return ERR_PTR(result);
}
+
+/**
+ * thermal_cooling_device_register() - register a new thermal cooling device
+ * @type: the thermal cooling device type.
+ * @devdata: device private data.
+ * @ops: standard thermal cooling devices callbacks.
+ *
+ * This interface function adds a new thermal cooling device (fan/processor/...)
+ * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
+ * to all the thermal zone devices registered at the same time.
+ *
+ * Return: a pointer to the created struct thermal_cooling_device or an
+ * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
+ */
+struct thermal_cooling_device *
+thermal_cooling_device_register(char *type, void *devdata,
+ const struct thermal_cooling_device_ops *ops)
+{
+ return __thermal_cooling_device_register(NULL, type, devdata, ops);
+}
EXPORT_SYMBOL_GPL(thermal_cooling_device_register);

/**
+ * thermal_of_cooling_device_register() - register an OF thermal cooling device
+ * @np: a pointer to a device tree node.
+ * @type: the thermal cooling device type.
+ * @devdata: device private data.
+ * @ops: standard thermal cooling devices callbacks.
+ *
+ * This function will register a cooling device with device tree node reference.
+ * This interface function adds a new thermal cooling device (fan/processor/...)
+ * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
+ * to all the thermal zone devices registered at the same time.
+ *
+ * Return: a pointer to the created struct thermal_cooling_device or an
+ * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
+ */
+struct thermal_cooling_device *
+thermal_of_cooling_device_register(struct device_node *np,
+ char *type, void *devdata,
+ const struct thermal_cooling_device_ops *ops)
+{
+ return __thermal_cooling_device_register(np, type, devdata, ops);
+}
+EXPORT_SYMBOL_GPL(thermal_of_cooling_device_register);
+
+/**
* thermal_cooling_device_unregister - removes the registered thermal cooling device
* @cdev: the thermal cooling device to remove.
*
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index b780c5b..f7e11c7 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -25,6 +25,7 @@
#ifndef __THERMAL_H__
#define __THERMAL_H__

+#include <linux/of.h>
#include <linux/idr.h>
#include <linux/device.h>
#include <linux/workqueue.h>
@@ -280,6 +281,9 @@ void thermal_zone_device_update(struct thermal_zone_device *);

struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
const struct thermal_cooling_device_ops *);
+struct thermal_cooling_device *
+thermal_of_cooling_device_register(struct device_node *np, char *, void *,
+ const struct thermal_cooling_device_ops *);
void thermal_cooling_device_unregister(struct thermal_cooling_device *);
struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name);
int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp);

Eduardo Valentin

unread,
Nov 12, 2013, 2:50:03 PM11/12/13
to
This patch changes the thermal core driver to allow
registration of thermal zones without the .get_temp callback.

The idea behind this change is to allow lazy registration
of sensor callbacks.

The thermal zone will be disabled whenever the ops
does not contain a .get_temp callback. The sysfs interface
will be returning -EINVAL on any temperature read operation.

Cc: Zhang Rui <rui....@intel.com>
Cc: linu...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
drivers/thermal/thermal_core.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 03a5671..f4c9021 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -403,7 +403,7 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp)
enum thermal_trip_type type;
#endif

- if (!tz || IS_ERR(tz))
+ if (!tz || IS_ERR(tz) || !tz->ops->get_temp)
goto exit;

mutex_lock(&tz->lock);
@@ -456,6 +456,9 @@ void thermal_zone_device_update(struct thermal_zone_device *tz)
{
int count;

+ if (!tz->ops->get_temp)
+ return;
+
update_temperature(tz);

for (count = 0; count < tz->trips; count++)
@@ -1386,7 +1389,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips)
return ERR_PTR(-EINVAL);

- if (!ops || !ops->get_temp)
+ if (!ops)
return ERR_PTR(-EINVAL);

if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp))
@@ -1490,6 +1493,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,

INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);

+ if (!tz->ops->get_temp)
+ thermal_zone_device_set_polling(tz, 0);
+
thermal_zone_device_update(tz);

if (!result)

Eduardo Valentin

unread,
Nov 12, 2013, 2:50:03 PM11/12/13
to
This patch introduces a device tree bindings for
describing the hardware thermal behavior and limits.
Also a parser to read and interpret the data and feed
it in the thermal framework is presented.

This patch introduces a thermal data parser for device
tree. The parsed data is used to build thermal zones
and thermal binding parameters. The output data
can then be used to deploy thermal policies.

This patch adds also documentation regarding this
API and how to define tree nodes to use
this infrastructure.

Note that, in order to be able to have control
on the sensor registration on the DT thermal zone,
it was required to allow changing the thermal zone
.get_temp callback. For this reason, this patch
also removes the 'const' modifier from the .ops
field of thermal zone devices.
Cc: Mark Rutland <mark.r...@arm.com>
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---

Hello all,

Very minor changes from v8 to v9.

Changelog:

- Rephrase a couple of sentences in the binding document
- Fixed a couple of property types in the binding document
- Removed the constant macro definitions for trip type. This
change also affected the bindings posted on patches 09/14/15.

.../devicetree/bindings/thermal/thermal.txt | 586 ++++++++++++++
drivers/thermal/Kconfig | 13 +
drivers/thermal/Makefile | 1 +
drivers/thermal/of-thermal.c | 849 +++++++++++++++++++++
drivers/thermal/thermal_core.c | 9 +-
drivers/thermal/thermal_core.h | 9 +
include/dt-bindings/thermal/thermal.h | 17 +
include/linux/thermal.h | 28 +-
8 files changed, 1509 insertions(+), 3 deletions(-)
create mode 100644 Documentation/devicetree/bindings/thermal/thermal.txt
create mode 100644 drivers/thermal/of-thermal.c
create mode 100644 include/dt-bindings/thermal/thermal.h

diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt
new file mode 100644
index 0000000..59f5bd2
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/thermal.txt
@@ -0,0 +1,586 @@
+* Thermal Framework Device Tree descriptor
+
+This file describes a generic binding to provide a way of
+defining hardware thermal structure using device tree.
+A thermal structure includes thermal zones and their components,
+such as trip points, polling intervals, sensors and cooling devices
+binding descriptors.
+
+The target of device tree thermal descriptors is to describe only
+the hardware thermal aspects. The thermal device tree bindings are
+not about how the system must control or which algorithm or policy
+must be taken in place.
+
+There are five types of nodes involved to describe thermal bindings:
+- thermal sensors: devices which may be used to take temperature
+ measurements.
+- cooling devices: devices which may be used to dissipate heat.
+- trip points: describe key temperatures at which cooling is recommended. The
+ set of points should be chosen based on hardware limits.
+- cooling maps: used to describe links between trip points and cooling devices;
+- thermal zones: used to describe thermal data within the hardware;
+
+The following is a description of each of these node types.
+
+* Thermal sensor devices
+
+Thermal sensor devices are nodes providing temperature sensing capabilities on
+thermal zones. Typical devices are I2C ADC converters and bandgaps. These are
+nodes providing temperature data to thermal zones. Thermal sensor devices may
+control one or more internal sensors.
+
+Required property:
+- #thermal-sensor-cells: Used to provide sensor device specific information
+ Type: unsigned while referring to it. Typically 0 on thermal sensor
+ Size: one cell nodes with only one sensor, and at least 1 on nodes
+ with several internal sensors, in order
+ to identify uniquely the sensor instances within
+ the IC. See thermal zone binding for more details
+ on how consumers refer to sensor devices.
+
+* Cooling device nodes
+
+Cooling devices are nodes providing control on power dissipation. There
+are essentially two ways to provide control on power dissipation. First
+is by means of regulating device performance, which is known as passive
+cooling. A typical passive cooling is a CPU that has dynamic voltage and
+frequency scaling (DVFS), and uses lower frequencies as cooling states.
+Second is by means of activating devices in order to remove
+the dissipated heat, which is known as active cooling, e.g. regulating
+fan speeds. In both cases, cooling devices shall have a way to determine
+the state of cooling in which the device is.
+
+Required properties:
+- cooling-min-state: An integer indicating the smallest
+ Type: unsigned cooling state accepted. Typically 0.
+ Size: one cell
+
+- cooling-max-state: An integer indicating the largest
+ Type: unsigned cooling state accepted.
+ Size: one cell
+
+- #cooling-cells: Used to provide cooling device specific information
+ Type: unsigned while referring to it. Must be at least 2, in order
+ Size: one cell to specify minimum and maximum cooling state used
+ in the reference. The first cell is the minimum
+ cooling state requested and the second cell is
+ the maximum cooling state requested in the reference.
+ See Cooling device maps section below for more details
+ on how consumers refer to cooling devices.
+
+* Trip points
+
+The trip node is a node to describe a point in the temperature domain
+in which the system takes an action. This node describes just the point,
+not the action.
+
+Required properties:
+- temperature: An integer indicating the trip temperature level,
+ Type: signed in millicelsius.
+ Size: one cell
+
+- hysteresis: A low hysteresis value on temperature property (above).
+ Type: unsigned This is a relative value, in millicelsius.
+ Size: one cell
+
+- type: a string containing the trip type. Supported values are:
+ "active": A trip point to enable active cooling
+ "passive": A trip point to enable passive cooling
+ "hot": A trip point to notify emergency
+ "critical": Hardware not reliable.
+ Type: string
+
+* Cooling device maps
+
+The cooling device maps node is a node to describe how cooling devices
+get assigned to trip points of the zone. The cooling devices are expected
+to be loaded in the target system.
+
+Required properties:
+- cooling-device: A phandle of a cooling device with its specifier,
+ Type: phandle + referring to which cooling device is used in this
+ cooling specifier binding. In the cooling specifier, the first cell
+ is the minimum cooling state and the second cell
+ is the maximum cooling state used in this map.
+- trip: A phandle of a trip point node within the same thermal
+ Type: phandle of zone.
+ trip point node
+
+Optional property:
+- contribution: The cooling contribution to the thermal zone of the
+ Type: unsigned referred cooling device at the referred trip point.
+ Size: one cell The contribution is a ratio of the sum
+ of all cooling contributions within a thermal zone.
+
+Note: Using the THERMAL_NO_LIMIT (-1UL) constant in the cooling-device phandle
+limit specifier means:
+(i) - minimum state allowed for minimum cooling state used in the reference.
+(ii) - maximum state allowed for maximum cooling state used in the reference.
+Refer to include/dt-bindings/thermal/thermal.h for definition of this constant.
+
+* Thermal zone nodes
+
+The thermal zone node is the node containing all the required info
+for describing a thermal zone, including its cooling device bindings. The
+thermal zone node must contain, apart from its own properties, one sub-node
+containing trip nodes and one sub-node containing all the zone cooling maps.
+
+Required properties:
+- polling-delay: The maximum number of milliseconds to wait between polls
+ Type: unsigned when checking this thermal zone.
+ Size: one cell
+
+- polling-delay-passive: The maximum number of milliseconds to wait
+ Type: unsigned between polls when performing passive cooling.
+ Size: one cell
+
+- thermal-sensors: A list of thermal sensor phandles and sensor specifier
+ Type: list of used while monitoring the thermal zone.
+ phandles + sensor
+ specifier
+
+- trips: A sub-node which is a container of only trip point nodes
+ Type: sub-node required to describe the thermal zone.
+
+- cooling-maps: A sub-node which is a container of only cooling device
+ Type: sub-node map nodes, used to describe the relation between trips
+ and cooling devices.
+
+Optional property:
+- coefficients: An array of integers (one signed cell) containing
+ Type: array coefficients to compose a linear relation between
+ Elem size: one cell the sensors listed in the thermal-sensors property.
+ Elem type: signed Coefficients defaults to 1, in case this property
+ is not specified. A simple linear polynomial is used:
+ Z = c0 * x0 + c1 + x1 + ... + c(n-1) * x(n-1) + cn.
+
+ The coefficients are ordered and they match with sensors
+ by means of sensor ID. Additional coefficients are
+ interpreted as constant offset.
+
+Note: The delay properties are bound to the maximum dT/dt (temperature
+derivative over time) in two situations for a thermal zone:
+(i) - when passive cooling is activated (polling-delay-passive); and
+(ii) - when the zone just needs to be monitored (polling-delay) or
+when active cooling is activated.
+
+The maximum dT/dt is highly bound to hardware power consumption and dissipation
+capability. The delays should be chosen to account for said max dT/dt,
+such that a device does not cross several trip boundaries unexpectedly
+between polls. Choosing the right polling delays shall avoid having the
+device in temperature ranges that may damage the silicon structures and
+reduce silicon lifetime.
+
+* The thermal-zones node
+
+The "thermal-zones" node is a container for all thermal zone nodes. It shall
+contain only sub-nodes describing thermal zones as in the section
+"Thermal zone nodes". The "thermal-zones" node appears under "/".
+
+* Examples
+
+Below are several examples on how to use thermal data descriptors
+using device tree bindings:
+
+(a) - CPU thermal zone
+
+The CPU thermal zone example below describes how to setup one thermal zone
+using one single sensor as temperature source and many cooling devices and
+power dissipation control sources.
+
+#include <dt-bindings/thermal/thermal.h>
+
+cpus {
+ /*
+ * Here is an example of describing a cooling device for a DVFS
+ * capable CPU. The CPU node describes its four OPPs.
+ * The cooling states possible are 0..3, and they are
+ * used as OPP indexes. The minimum cooling state is 0, which means
+ * all four OPPs can be available to the system. The maximum
+ * cooling state is 3, which means only the lowest OPPs (198...@0.85V)
+ * can be available in the system.
+ */
+ cpu0: cpu@0 {
+ ...
+ operating-points = <
+ /* kHz uV */
+ 970000 1200000
+ 792000 1100000
+ 396000 950000
+ 198000 850000
+ >;
+ cooling-min-state = <0>;
+ cooling-max-state = <3>;
+ #cooling-cells = <2>; /* min followed by max */
+ };
+ ...
+};
+
+&i2c1 {
+ ...
+ /*
+ * A simple fan controller which supports 10 speeds of operation
+ * (represented as 0-9).
+ */
+ fan0: fan@0x48 {
+ ...
+ cooling-min-state = <0>;
+ cooling-max-state = <9>;
+ #cooling-cells = <2>; /* min followed by max */
+ };
+};
+
+ocp {
+ ...
+ /*
+ * A simple IC with a single bandgap temperature sensor.
+ */
+ bandgap0: bandgap@0x0000ED00 {
+ ...
+ #thermal-sensor-cells = <0>;
+ };
+};
+
+thermal-zones {
+ cpu-thermal: cpu-thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ thermal-sensors = <&bandgap0>;
+
+ trips {
+ cpu-alert0: cpu-alert {
+ temperature = <90000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "active";
+ };
+ cpu-alert1: cpu-alert {
+ temperature = <100000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ cpu-crit: cpu-crit {
+ temperature = <125000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu-alert0>;
+ cooling-device = <&fan0 THERMAL_NO_LIMITS 4>;
+ };
+ map1 {
+ trip = <&cpu-alert1>;
+ cooling-device = <&fan0 5 THERMAL_NO_LIMITS>;
+ };
+ map2 {
+ trip = <&cpu-alert1>;
+ cooling-device =
+ <&cpu0 THERMAL_NO_LIMITS THERMAL_NO_LIMITS>;
+ };
+ };
+ };
+};
+
+In the example above, the ADC sensor (bandgap0) at address 0x0000ED00 is
+used to monitor the zone 'cpu-thermal' using its sole sensor. A fan
+device (fan0) is controlled via I2C bus 1, at address 0x48, and has ten
+different cooling states 0-9. It is used to remove the heat out of
+the thermal zone 'cpu-thermal' using its cooling states
+from its minimum to 4, when it reaches trip point 'cpu-alert0'
+at 90C, as an example of active cooling. The same cooling device is used at
+'cpu-alert1', but from 5 to its maximum state. The cpu@0 device is also
+linked to the same thermal zone, 'cpu-thermal', as a passive cooling device,
+using all its cooling states at trip point 'cpu-alert1',
+which is a trip point at 100C. On the thermal zone 'cpu-thermal', at the
+temperature of 125C, represented by the trip point 'cpu-crit', the silicon
+is not reliable anymore.
+
+(b) - IC with several internal sensors
+
+The example below describes how to deploy several thermal zones based off a
+single sensor IC, assuming it has several internal sensors. This is a common
+case on SoC designs with several internal IPs that may need different thermal
+requirements, and thus may have their own sensor to monitor or detect internal
+hotspots in their silicon.
+
+#include <dt-bindings/thermal/thermal.h>
+
+ocp {
+ ...
+ /*
+ * A simple IC with several bandgap temperature sensors.
+ */
+ bandgap0: bandgap@0x0000ED00 {
+ ...
+ #thermal-sensor-cells = <1>;
+ };
+};
+
+thermal-zones {
+ cpu-thermal: cpu-thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap0 0>;
+
+ trips {
+ /* each zone within the SoC may have its own trips */
+ cpu-alert: cpu-alert {
+ temperature = <100000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ cpu-crit: cpu-crit {
+ temperature = <125000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ /* each zone within the SoC may have its own cooling */
+ ...
+ };
+ };
+
+ gpu-thermal: gpu-thermal {
+ polling-delay-passive = <120>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap0 1>;
+
+ trips {
+ /* each zone within the SoC may have its own trips */
+ gpu-alert: gpu-alert {
+ temperature = <90000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ gpu-crit: gpu-crit {
+ temperature = <105000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ /* each zone within the SoC may have its own cooling */
+ ...
+ };
+ };
+
+ dsp-thermal: dsp-thermal {
+ polling-delay-passive = <50>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap0 2>;
+
+ trips {
+ /* each zone within the SoC may have its own trips */
+ dsp-alert: gpu-alert {
+ temperature = <90000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ dsp-crit: gpu-crit {
+ temperature = <135000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ /* each zone within the SoC may have its own cooling */
+ ...
+ };
+ };
+};
+
+In the example above, there is one bandgap IC which has the capability to
+monitor three sensors. The hardware has been designed so that sensors are
+placed on different places in the DIE to monitor different temperature
+hotspots: one for CPU thermal zone, one for GPU thermal zone and the
+other to monitor a DSP thermal zone.
+
+Thus, there is a need to assign each sensor provided by the bandgap IC
+to different thermal zones. This is achieved by means of using the
+#thermal-sensor-cells property and using the first cell of the sensor
+specifier as sensor ID. In the example, then, <bandgap 0> is used to
+monitor CPU thermal zone, <bandgap 1> is used to monitor GPU thermal
+zone and <bandgap 2> is used to monitor DSP thermal zone. Each zone
+may be uncorrelated, having its own dT/dt requirements, trips
+and cooling maps.
+
+
+(c) - Several sensors within one single thermal zone
+
+The example below illustrates how to use more than one sensor within
+one thermal zone.
+
+#include <dt-bindings/thermal/thermal.h>
+
+&i2c1 {
+ ...
+ /*
+ * A simple IC with a single temperature sensor.
+ */
+ adc: sensor@0x49 {
+ ...
+ #thermal-sensor-cells = <0>;
+ };
+};
+
+ocp {
+ ...
+ /*
+ * A simple IC with a single bandgap temperature sensor.
+ */
+ bandgap0: bandgap@0x0000ED00 {
+ ...
+ #thermal-sensor-cells = <0>;
+ };
+};
+
+thermal-zones {
+ cpu-thermal: cpu-thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ thermal-sensors = <&bandgap0>, /* cpu */
+ <&adc>; /* pcb north */
+
+ /* hotspot = 100 * bandgap - 120 * adc + 484 */
+ coefficients = <100 -120 484>;
+
+ trips {
+ ...
+ };
+
+ cooling-maps {
+ ...
+ };
+ };
+};
+
+In some cases, there is a need to use more than one sensor to extrapolate
+a thermal hotspot in the silicon. The above example illustrates this situation.
+For instance, it may be the case that a sensor external to CPU IP may be placed
+close to CPU hotspot and together with internal CPU sensor, it is used
+to determine the hotspot. Assuming this is the case for the above example,
+the hypothetical extrapolation rule would be:
+ hotspot = 100 * bandgap - 120 * adc + 484
+
+In other context, the same idea can be used to add fixed offset. For instance,
+consider the hotspot extrapolation rule below:
+ hotspot = 1 * adc + 6000
+
+In the above equation, the hotspot is always 6C higher than what is read
+from the ADC sensor. The binding would be then:
+ thermal-sensors = <&adc>;
+
+ /* hotspot = 1 * adc + 6000 */
+ coefficients = <1 6000>;
+
+(d) - Board thermal
+
+The board thermal example below illustrates how to setup one thermal zone
+with many sensors and many cooling devices.
+
+#include <dt-bindings/thermal/thermal.h>
+
+&i2c1 {
+ ...
+ /*
+ * An IC with several temperature sensor.
+ */
+ adc-dummy: sensor@0x50 {
+ ...
+ #thermal-sensor-cells = <1>; /* sensor internal ID */
+ };
+};
+
+thermal-zones {
+ batt-thermal {
+ polling-delay-passive = <500>; /* milliseconds */
+ polling-delay = <2500>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&adc-dummy 4>;
+
+ trips {
+ ...
+ };
+
+ cooling-maps {
+ ...
+ };
+ };
+
+ board-thermal: board-thermal {
+ polling-delay-passive = <1000>; /* milliseconds */
+ polling-delay = <2500>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&adc-dummy 0>, /* pcb top edge */
+ <&adc-dummy 1>, /* lcd */
+ <&adc-dymmy 2>; /* back cover */
+ /*
+ * An array of coefficients describing the sensor
+ * linear relation. E.g.:
+ * z = c1*x1 + c2*x2 + c3*x3
+ */
+ coefficients = <1200 -345 890>;
+
+ trips {
+ /* Trips are based on resulting linear equation */
+ cpu-trip: cpu-trip {
+ temperature = <60000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ gpu-trip: gpu-trip {
+ temperature = <55000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ }
+ lcd-trip: lcp-trip {
+ temperature = <53000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ crit-trip: crit-trip {
+ temperature = <68000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu-trip>;
+ cooling-device = <&cpu0 0 2>;
+ contribution = <55>;
+ };
+ map1 {
+ trip = <&gpu-trip>;
+ cooling-device = <&gpu0 0 2>;
+ contribution = <20>;
+ };
+ map2 {
+ trip = <&lcd-trip>;
+ cooling-device = <&lcd0 5 10>;
+ contribution = <15>;
+ };
+ };
+ };
+};
+
+The above example is a mix of previous examples, a sensor IP with several internal
+sensors used to monitor different zones, one of them is composed by several sensors and
+with different cooling devices.
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 3910b09..596eeee 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -29,6 +29,19 @@ config THERMAL_HWMON
Say 'Y' here if you want all thermal sensors to
have hwmon sysfs interface too.

+config THERMAL_OF
+ bool
+ prompt "APIs to parse thermal data out of device tree"
+ depends on OF
+ default y
+ help
+ This options provides helpers to add the support to
+ read and parse thermal data definitions out of the
+ device tree blob.
+
+ Say 'Y' here if you need to build thermal infrastructure
+ based on device tree.
+
choice
prompt "Default Thermal governor"
default THERMAL_DEFAULT_GOV_STEP_WISE
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 584b363..4b03956 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -7,6 +7,7 @@ thermal_sys-y += thermal_core.o

# interface to/from other layers providing sensors
thermal_sys-$(CONFIG_THERMAL_HWMON) += thermal_hwmon.o
+thermal_sys-$(CONFIG_THERMAL_OF) += of-thermal.o

# governors
thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
new file mode 100644
index 0000000..66f9eb2
--- /dev/null
+++ b/drivers/thermal/of-thermal.c
@@ -0,0 +1,849 @@
+/*
+ * of-thermal.c - Generic Thermal Management device tree support.
+ *
+ * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Eduardo Valentin <eduardo....@ti.com>
+ *
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <linux/thermal.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/string.h>
+
+#include "thermal_core.h"
+
+/*** Private data structures to represent thermal device tree data ***/
+
+/**
+ * struct __thermal_trip - representation of a point in temperature domain
+ * @np: pointer to struct device_node that this trip point was created from
+ * @temperature: temperature value in miliCelsius
+ * @hysteresis: relative hysteresis in miliCelsius
+ * @type: trip point type
+ */
+
+struct __thermal_trip {
+ struct device_node *np;
+ unsigned long int temperature;
+ unsigned long int hysteresis;
+ enum thermal_trip_type type;
+};
+
+/**
+ * struct __thermal_bind_param - a match between trip and cooling device
+ * @cooling_device: a pointer to identify the referred cooling device
+ * @trip_id: the trip point index
+ * @usage: the percentage (from 0 to 100) of cooling contribution
+ * @min: minimum cooling state used at this trip point
+ * @max: maximum cooling state used at this trip point
+ */
+
+struct __thermal_bind_params {
+ struct device_node *cooling_device;
+ unsigned int trip_id;
+ unsigned int usage;
+ unsigned long min;
+ unsigned long max;
+};
+
+/**
+ * struct __thermal_zone - internal representation of a thermal zone
+ * @mode: current thermal zone device mode (enabled/disabled)
+ * @passive_delay: polling interval while passive cooling is activated
+ * @polling_delay: zone polling interval
+ * @ntrips: number of trip points
+ * @trips: an array of trip points (0..ntrips - 1)
+ * @num_tbps: number of thermal bind params
+ * @tbps: an array of thermal bind params (0..num_tbps - 1)
+ * @sensor_data: sensor private data used while reading temperature and trend
+ * @get_temp: sensor callback to read temperature
+ * @get_trend: sensor callback to read temperature trend
+ */
+
+struct __thermal_zone {
+ enum thermal_device_mode mode;
+ int passive_delay;
+ int polling_delay;
+
+ /* trip data */
+ int ntrips;
+ struct __thermal_trip *trips;
+
+ /* cooling binding data */
+ int num_tbps;
+ struct __thermal_bind_params *tbps;
+
+ /* sensor interface */
+ void *sensor_data;
+ int (*get_temp)(void *, long *);
+ int (*get_trend)(void *, long *);
+};
+
+/*** DT thermal zone device callbacks ***/
+
+static int of_thermal_get_temp(struct thermal_zone_device *tz,
+ unsigned long *temp)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ if (!data->get_temp)
+ return -EINVAL;
+
+ return data->get_temp(data->sensor_data, temp);
+}
+
+static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
+ enum thermal_trend *trend)
+{
+ struct __thermal_zone *data = tz->devdata;
+ long dev_trend;
+ int r;
+
+ if (!data->get_trend)
+ return -EINVAL;
+
+ r = data->get_trend(data->sensor_data, &dev_trend);
+ if (r)
+ return r;
+
+ /* TODO: These intervals might have some thresholds, but in core code */
+ if (dev_trend > 0)
+ *trend = THERMAL_TREND_RAISING;
+ else if (dev_trend < 0)
+ *trend = THERMAL_TREND_DROPPING;
+ else
+ *trend = THERMAL_TREND_STABLE;
+
+ return 0;
+}
+
+static int of_thermal_bind(struct thermal_zone_device *thermal,
+ struct thermal_cooling_device *cdev)
+{
+ struct __thermal_zone *data = thermal->devdata;
+ int i;
+
+ if (!data || IS_ERR(data))
+ return -ENODEV;
+
+ /* find where to bind */
+ for (i = 0; i < data->num_tbps; i++) {
+ struct __thermal_bind_params *tbp = data->tbps + i;
+
+ if (tbp->cooling_device == cdev->np) {
+ int ret;
+
+ ret = thermal_zone_bind_cooling_device(thermal,
+ tbp->trip_id, cdev,
+ tbp->min,
+ tbp->max);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int of_thermal_unbind(struct thermal_zone_device *thermal,
+ struct thermal_cooling_device *cdev)
+{
+ struct __thermal_zone *data = thermal->devdata;
+ int i;
+
+ if (!data || IS_ERR(data))
+ return -ENODEV;
+
+ /* find where to unbind */
+ for (i = 0; i < data->num_tbps; i++) {
+ struct __thermal_bind_params *tbp = data->tbps + i;
+
+ if (tbp->cooling_device == cdev->np) {
+ int ret;
+
+ ret = thermal_zone_unbind_cooling_device(thermal,
+ tbp->trip_id, cdev);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int of_thermal_get_mode(struct thermal_zone_device *tz,
+ enum thermal_device_mode *mode)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ *mode = data->mode;
+
+ return 0;
+}
+
+static int of_thermal_set_mode(struct thermal_zone_device *tz,
+ enum thermal_device_mode mode)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ mutex_lock(&tz->lock);
+
+ if (mode == THERMAL_DEVICE_ENABLED)
+ tz->polling_delay = data->polling_delay;
+ else
+ tz->polling_delay = 0;
+
+ mutex_unlock(&tz->lock);
+
+ data->mode = mode;
+ thermal_zone_device_update(tz);
+
+ return 0;
+}
+
+static int of_thermal_get_trip_type(struct thermal_zone_device *tz, int trip,
+ enum thermal_trip_type *type)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ if (trip >= data->ntrips || trip < 0)
+ return -EDOM;
+
+ *type = data->trips[trip].type;
+
+ return 0;
+}
+
+static int of_thermal_get_trip_temp(struct thermal_zone_device *tz, int trip,
+ unsigned long *temp)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ if (trip >= data->ntrips || trip < 0)
+ return -EDOM;
+
+ *temp = data->trips[trip].temperature;
+
+ return 0;
+}
+
+static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip,
+ unsigned long temp)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ if (trip >= data->ntrips || trip < 0)
+ return -EDOM;
+
+ /* thermal framework should take care of data->mask & (1 << trip) */
+ data->trips[trip].temperature = temp;
+
+ return 0;
+}
+
+static int of_thermal_get_trip_hyst(struct thermal_zone_device *tz, int trip,
+ unsigned long *hyst)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ if (trip >= data->ntrips || trip < 0)
+ return -EDOM;
+
+ *hyst = data->trips[trip].hysteresis;
+
+ return 0;
+}
+
+static int of_thermal_set_trip_hyst(struct thermal_zone_device *tz, int trip,
+ unsigned long hyst)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ if (trip >= data->ntrips || trip < 0)
+ return -EDOM;
+
+ /* thermal framework should take care of data->mask & (1 << trip) */
+ data->trips[trip].hysteresis = hyst;
+
+ return 0;
+}
+
+static int of_thermal_get_crit_temp(struct thermal_zone_device *tz,
+ unsigned long *temp)
+{
+ struct __thermal_zone *data = tz->devdata;
+ int i;
+
+ for (i = 0; i < data->ntrips; i++)
+ if (data->trips[i].type == THERMAL_TRIP_CRITICAL) {
+ *temp = data->trips[i].temperature;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static struct thermal_zone_device_ops of_thermal_ops = {
+ .get_mode = of_thermal_get_mode,
+ .set_mode = of_thermal_set_mode,
+
+ .get_trip_type = of_thermal_get_trip_type,
+ .get_trip_temp = of_thermal_get_trip_temp,
+ .set_trip_temp = of_thermal_set_trip_temp,
+ .get_trip_hyst = of_thermal_get_trip_hyst,
+ .set_trip_hyst = of_thermal_set_trip_hyst,
+ .get_crit_temp = of_thermal_get_crit_temp,
+
+ .bind = of_thermal_bind,
+ .unbind = of_thermal_unbind,
+};
+
+/*** sensor API ***/
+
+static struct thermal_zone_device *
+thermal_zone_of_add_sensor(struct device_node *zone,
+ struct device_node *sensor, void *data,
+ int (*get_temp)(void *, long *),
+ int (*get_trend)(void *, long *))
+{
+ struct thermal_zone_device *tzd;
+ struct __thermal_zone *tz;
+
+ tzd = thermal_zone_get_zone_by_name(zone->name);
+ if (IS_ERR(tzd))
+ return ERR_PTR(-EPROBE_DEFER);
+
+ tz = tzd->devdata;
+
+ mutex_lock(&tzd->lock);
+ tz->get_temp = get_temp;
+ tz->get_trend = get_trend;
+ tz->sensor_data = data;
+
+ tzd->ops->get_temp = of_thermal_get_temp;
+ tzd->ops->get_trend = of_thermal_get_trend;
+ mutex_unlock(&tzd->lock);
+
+ return tzd;
+}
+
+/**
+ * thermal_zone_of_sensor_register - registers a sensor to a DT thermal zone
+ * @dev: a valid struct device pointer of a sensor device. Must contain
+ * a valid .of_node, for the sensor node.
+ * @sensor_id: a sensor identifier, in case the sensor IP has more
+ * than one sensors
+ * @data: a private pointer (owned by the caller) that will be passed
+ * back, when a temperature reading is needed.
+ * @get_temp: a pointer to a function that reads the sensor temperature.
+ * @get_trend: a pointer to a function that reads the sensor temperature trend.
+ *
+ * This function will search the list of thermal zones described in device
+ * tree and look for the zone that refer to the sensor device pointed by
+ * @dev->of_node as temperature providers. For the zone pointing to the
+ * sensor node, the sensor will be added to the DT thermal zone device.
+ *
+ * The thermal zone temperature is provided by the @get_temp function
+ * pointer. When called, it will have the private pointer @data back.
+ *
+ * The thermal zone temperature trend is provided by the @get_trend function
+ * pointer. When called, it will have the private pointer @data back.
+ *
+ * TODO:
+ * 01 - This function must enqueue the new sensor instead of using
+ * it as the only source of temperature values.
+ *
+ * 02 - There must be a way to match the sensor with all thermal zones
+ * that refer to it.
+ *
+ * Return: On success returns a valid struct thermal_zone_device,
+ * otherwise, it returns a corresponding ERR_PTR(). Caller must
+ * check the return value with help of IS_ERR() helper.
+ */
+struct thermal_zone_device *
+thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
+ void *data, int (*get_temp)(void *, long *),
+ int (*get_trend)(void *, long *))
+{
+ struct device_node *np, *child, *sensor_np;
+
+ np = of_find_node_by_name(NULL, "thermal-zones");
+ if (!np)
+ return ERR_PTR(-ENODEV);
+
+ if (!dev || !dev->of_node)
+ return ERR_PTR(-EINVAL);
+
+ sensor_np = dev->of_node;
+
+ for_each_child_of_node(np, child) {
+ struct of_phandle_args sensor_specs;
+ int ret, id;
+
+ /* For now, thermal framework supports only 1 sensor per zone */
+ ret = of_parse_phandle_with_args(child, "thermal-sensors",
+ "#thermal-sensor-cells",
+ 0, &sensor_specs);
+ if (ret)
+ continue;
+
+ if (sensor_specs.args_count >= 1) {
+ id = sensor_specs.args[0];
+ WARN(sensor_specs.args_count > 1,
+ "%s: too many cells in sensor specifier %d\n",
+ sensor_specs.np->name, sensor_specs.args_count);
+ } else {
+ id = 0;
+ }
+
+ if (sensor_specs.np == sensor_np && id == sensor_id) {
+ of_node_put(np);
+ return thermal_zone_of_add_sensor(child, sensor_np,
+ data,
+ get_temp,
+ get_trend);
+ }
+ }
+ of_node_put(np);
+
+ return ERR_PTR(-ENODEV);
+}
+EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_register);
+
+/**
+ * thermal_zone_of_sensor_unregister - unregisters a sensor from a DT thermal zone
+ * @dev: a valid struct device pointer of a sensor device. Must contain
+ * a valid .of_node, for the sensor node.
+ * @tzd: a pointer to struct thermal_zone_device where the sensor is registered.
+ *
+ * This function removes the sensor callbacks and private data from the
+ * thermal zone device registered with thermal_zone_of_sensor_register()
+ * API. It will also silent the zone by remove the .get_temp() and .get_trend()
+ * thermal zone device callbacks.
+ *
+ * TODO: When the support to several sensors per zone is added, this
+ * function must search the sensor list based on @dev parameter.
+ *
+ */
+void thermal_zone_of_sensor_unregister(struct device *dev,
+ struct thermal_zone_device *tzd)
+{
+ struct __thermal_zone *tz;
+
+ if (!dev || !tzd || !tzd->devdata)
+ return;
+
+ tz = tzd->devdata;
+
+ /* no __thermal_zone, nothing to be done */
+ if (!tz)
+ return;
+
+ mutex_lock(&tzd->lock);
+ tzd->ops->get_temp = NULL;
+ tzd->ops->get_trend = NULL;
+
+ tz->get_temp = NULL;
+ tz->get_trend = NULL;
+ tz->sensor_data = NULL;
+ mutex_unlock(&tzd->lock);
+}
+EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister);
+
+/*** functions parsing device tree nodes ***/
+
+/**
+ * thermal_of_populate_bind_params - parse and fill cooling map data
+ * @np: DT node containing a cooling-map node
+ * @__tbp: data structure to be filled with cooling map info
+ * @trips: array of thermal zone trip points
+ * @ntrips: number of trip points inside trips.
+ *
+ * This function parses a cooling-map type of node represented by
+ * @np parameter and fills the read data into @__tbp data structure.
+ * It needs the already parsed array of trip points of the thermal zone
+ * in consideration.
+ *
+ * Return: 0 on success, proper error code otherwise
+ */
+static int thermal_of_populate_bind_params(struct device_node *np,
+ struct __thermal_bind_params *__tbp,
+ struct __thermal_trip *trips,
+ int ntrips)
+{
+ struct of_phandle_args cooling_spec;
+ struct device_node *trip;
+ int ret, i;
+ u32 prop;
+
+ /* Default weight. Usage is optional */
+ __tbp->usage = 0;
+ ret = of_property_read_u32(np, "contribution", &prop);
+ if (ret == 0)
+ __tbp->usage = prop;
+
+ trip = of_parse_phandle(np, "trip", 0);
+ if (!trip) {
+ pr_err("missing trip property\n");
+ return -ENODEV;
+ }
+
+ /* match using device_node */
+ for (i = 0; i < ntrips; i++)
+ if (trip == trips[i].np) {
+ __tbp->trip_id = i;
+ break;
+ }
+
+ if (i == ntrips) {
+ ret = -ENODEV;
+ goto end;
+ }
+
+ ret = of_parse_phandle_with_args(np, "cooling-device", "#cooling-cells",
+ 0, &cooling_spec);
+ if (ret < 0) {
+ pr_err("missing cooling_device property\n");
+ goto end;
+ }
+ __tbp->cooling_device = cooling_spec.np;
+ if (cooling_spec.args_count >= 2) { /* at least min and max */
+ __tbp->min = cooling_spec.args[0];
+ __tbp->max = cooling_spec.args[1];
+ } else {
+ pr_err("wrong reference to cooling device, missing limits\n");
+ }
+
+end:
+ of_node_put(trip);
+
+ return ret;
+}
+
+/**
+ * It maps 'enum thermal_trip_type' found in include/linux/thermal.h
+ * into the device tree binding of 'trip', property type.
+ */
+static const char * const trip_types[] = {
+ [THERMAL_TRIP_ACTIVE] = "active",
+ [THERMAL_TRIP_PASSIVE] = "passive",
+ [THERMAL_TRIP_HOT] = "hot",
+ [THERMAL_TRIP_CRITICAL] = "critical",
+};
+
+/**
+ * thermal_of_get_trip_type - Get phy mode for given device_node
+ * @np: Pointer to the given device_node
+ * @type: Pointer to resulting trip type
+ *
+ * The function gets trip type string from property 'type',
+ * and store its index in trip_types table in @type,
+ *
+ * Return: 0 on success, or errno in error case.
+ */
+static int thermal_of_get_trip_type(struct device_node *np,
+ enum thermal_trip_type *type)
+{
+ const char *t;
+ int err, i;
+
+ err = of_property_read_string(np, "type", &t);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(trip_types); i++)
+ if (!strcasecmp(t, trip_types[i])) {
+ *type = i;
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+/**
+ * thermal_of_populate_trip - parse and fill one trip point data
+ * @np: DT node containing a trip point node
+ * @trip: trip point data structure to be filled up
+ *
+ * This function parses a trip point type of node represented by
+ * @np parameter and fills the read data into @trip data structure.
+ *
+ * Return: 0 on success, proper error code otherwise
+ */
+static int thermal_of_populate_trip(struct device_node *np,
+ struct __thermal_trip *trip)
+{
+ int prop;
+ int ret;
+
+ ret = of_property_read_u32(np, "temperature", &prop);
+ if (ret < 0) {
+ pr_err("missing temperature property\n");
+ return ret;
+ }
+ trip->temperature = prop;
+
+ ret = of_property_read_u32(np, "hysteresis", &prop);
+ if (ret < 0) {
+ pr_err("missing hysteresis property\n");
+ return ret;
+ }
+ trip->hysteresis = prop;
+
+ ret = thermal_of_get_trip_type(np, &trip->type);
+ if (ret < 0) {
+ pr_err("wrong trip type property\n");
+ return ret;
+ }
+
+ /* Required for cooling map matching */
+ trip->np = np;
+
+ return 0;
+}
+
+/**
+ * thermal_of_build_thermal_zone - parse and fill one thermal zone data
+ * @np: DT node containing a thermal zone node
+ *
+ * This function parses a thermal zone type of node represented by
+ * @np parameter and fills the read data into a __thermal_zone data structure
+ * and return this pointer.
+ *
+ * TODO: Missing properties to parse: thermal-sensor-names and coefficients
+ *
+ * Return: On success returns a valid struct __thermal_zone,
+ * otherwise, it returns a corresponding ERR_PTR(). Caller must
+ * check the return value with help of IS_ERR() helper.
+ */
+static struct __thermal_zone *
+thermal_of_build_thermal_zone(struct device_node *np)
+{
+ struct device_node *child = NULL, *gchild;
+ struct __thermal_zone *tz;
+ int ret, i;
+ u32 prop;
+
+ if (!np) {
+ pr_err("no thermal zone np\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ tz = kzalloc(sizeof(*tz), GFP_KERNEL);
+ if (!tz)
+ return ERR_PTR(-ENOMEM);
+
+ ret = of_property_read_u32(np, "polling-delay-passive", &prop);
+ if (ret < 0) {
+ pr_err("missing polling-delay-passive property\n");
+ goto free_tz;
+ }
+ tz->passive_delay = prop;
+
+ ret = of_property_read_u32(np, "polling-delay", &prop);
+ if (ret < 0) {
+ pr_err("missing polling-delay property\n");
+ goto free_tz;
+ }
+ tz->polling_delay = prop;
+
+ /* trips */
+ child = of_get_child_by_name(np, "trips");
+
+ /* No trips provided */
+ if (!child)
+ goto finish;
+
+ tz->ntrips = of_get_child_count(child);
+ if (tz->ntrips == 0) /* must have at least one child */
+ goto finish;
+
+ tz->trips = kzalloc(tz->ntrips * sizeof(*tz->trips), GFP_KERNEL);
+ if (!tz->trips) {
+ ret = -ENOMEM;
+ goto free_tz;
+ }
+
+ i = 0;
+ for_each_child_of_node(child, gchild) {
+ ret = thermal_of_populate_trip(gchild, &tz->trips[i++]);
+ if (ret)
+ goto free_trips;
+ }
+
+ of_node_put(child);
+
+ /* cooling-maps */
+ child = of_get_child_by_name(np, "cooling-maps");
+
+ /* cooling-maps not provided */
+ if (!child)
+ goto finish;
+
+ tz->num_tbps = of_get_child_count(child);
+ if (tz->num_tbps == 0)
+ goto finish;
+
+ tz->tbps = kzalloc(tz->num_tbps * sizeof(*tz->tbps), GFP_KERNEL);
+ if (!tz->tbps) {
+ ret = -ENOMEM;
+ goto free_trips;
+ }
+
+ i = 0;
+ for_each_child_of_node(child, gchild)
+ ret = thermal_of_populate_bind_params(gchild, &tz->tbps[i++],
+ tz->trips, tz->ntrips);
+ if (ret)
+ goto free_tbps;
+
+finish:
+ of_node_put(child);
+ tz->mode = THERMAL_DEVICE_DISABLED;
+
+ return tz;
+
+free_tbps:
+ kfree(tz->tbps);
+free_trips:
+ kfree(tz->trips);
+free_tz:
+ kfree(tz);
+ of_node_put(child);
+
+ return ERR_PTR(ret);
+}
+
+static inline void of_thermal_free_zone(struct __thermal_zone *tz)
+{
+ kfree(tz->tbps);
+ kfree(tz->trips);
+ kfree(tz);
+}
+
+/**
+ * of_parse_thermal_zones - parse device tree thermal data
+ *
+ * Initialization function that can be called by machine initialization
+ * code to parse thermal data and populate the thermal framework
+ * with hardware thermal zones info. This function only parses thermal zones.
+ * Cooling devices and sensor devices nodes are supposed to be parsed
+ * by their respective drivers.
+ *
+ * Return: 0 on success, proper error code otherwise
+ *
+ */
+int __init of_parse_thermal_zones(void)
+{
+ struct device_node *np, *child;
+ struct __thermal_zone *tz;
+ struct thermal_zone_device_ops *ops;
+
+ np = of_find_node_by_name(NULL, "thermal-zones");
+ if (!np) {
+ pr_debug("unable to find thermal zones\n");
+ return 0; /* Run successfully on systems without thermal DT */
+ }
+
+ for_each_child_of_node(np, child) {
+ struct thermal_zone_device *zone;
+ struct thermal_zone_params *tzp;
+
+ tz = thermal_of_build_thermal_zone(child);
+ if (IS_ERR(tz)) {
+ pr_err("failed to build thermal zone %s: %ld\n",
+ child->name,
+ PTR_ERR(tz));
+ continue;
+ }
+
+ ops = kmemdup(&of_thermal_ops, sizeof(*ops), GFP_KERNEL);
+ if (!ops)
+ goto exit_free;
+
+ tzp = kzalloc(sizeof(*tzp), GFP_KERNEL);
+ if (!tzp) {
+ kfree(ops);
+ goto exit_free;
+ }
+
+ /* No hwmon because there might be hwmon drivers registering */
+ tzp->no_hwmon = true;
+
+ zone = thermal_zone_device_register(child->name, tz->ntrips,
+ 0, tz,
+ ops, tzp,
+ tz->passive_delay,
+ tz->polling_delay);
+ if (IS_ERR(zone)) {
+ pr_err("Failed to build %s zone %ld\n", child->name,
+ PTR_ERR(zone));
+ kfree(tzp);
+ kfree(ops);
+ of_thermal_free_zone(tz);
+ /* attempting to build remaining zones still */
+ }
+ }
+
+ return 0;
+
+exit_free:
+ of_thermal_free_zone(tz);
+
+ /* no memory available, so free what we have built */
+ of_thermal_destroy_zones();
+
+ return -ENOMEM;
+}
+
+/**
+ * of_thermal_destroy_zones - remove all zones parsed and allocated resources
+ *
+ * Finds all zones parsed and added to the thermal framework and remove them
+ * from the system, together with their resources.
+ *
+ */
+void __exit of_thermal_destroy_zones(void)
+{
+ struct device_node *np, *child;
+
+ np = of_find_node_by_name(NULL, "thermal-zones");
+ if (!np) {
+ pr_err("unable to find thermal zones\n");
+ return;
+ }
+
+ for_each_child_of_node(np, child) {
+ struct thermal_zone_device *zone;
+
+ zone = thermal_zone_get_zone_by_name(child->name);
+ if (IS_ERR(zone))
+ continue;
+
+ thermal_zone_device_unregister(zone);
+ kfree(zone->tzp);
+ kfree(zone->ops);
+ of_thermal_free_zone(zone->devdata);
+ }
+}
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index f4c9021..aba68dc 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1373,7 +1373,7 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)
*/
struct thermal_zone_device *thermal_zone_device_register(const char *type,
int trips, int mask, void *devdata,
- const struct thermal_zone_device_ops *ops,
+ struct thermal_zone_device_ops *ops,
const struct thermal_zone_params *tzp,
int passive_delay, int polling_delay)
{
@@ -1753,8 +1753,14 @@ static int __init thermal_init(void)
if (result)
goto unregister_class;

+ result = of_parse_thermal_zones();
+ if (result)
+ goto exit_netlink;
+
return 0;

+exit_netlink:
+ genetlink_exit();
unregister_governors:
thermal_unregister_governors();
unregister_class:
@@ -1770,6 +1776,7 @@ error:

static void __exit thermal_exit(void)
{
+ of_thermal_destroy_zones();
genetlink_exit();
class_unregister(&thermal_class);
thermal_unregister_governors();
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 7cf2f66..3db339f 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -77,4 +77,13 @@ static inline int thermal_gov_user_space_register(void) { return 0; }
static inline void thermal_gov_user_space_unregister(void) {}
#endif /* CONFIG_THERMAL_GOV_USER_SPACE */

+/* device tree support */
+#ifdef CONFIG_THERMAL_OF
+int of_parse_thermal_zones(void);
+void of_thermal_destroy_zones(void);
+#else
+static inline int of_parse_thermal_zones(void) { return 0; }
+static inline void of_thermal_destroy_zones(void) { }
+#endif
+
#endif /* __THERMAL_CORE_H__ */
diff --git a/include/dt-bindings/thermal/thermal.h b/include/dt-bindings/thermal/thermal.h
new file mode 100644
index 0000000..59822a9
--- /dev/null
+++ b/include/dt-bindings/thermal/thermal.h
@@ -0,0 +1,17 @@
+/*
+ * This header provides constants for most thermal bindings.
+ *
+ * Copyright (C) 2013 Texas Instruments
+ * Eduardo Valentin <eduardo....@ti.com>
+ *
+ * GPLv2 only
+ */
+
+#ifndef _DT_BINDINGS_THERMAL_THERMAL_H
+#define _DT_BINDINGS_THERMAL_THERMAL_H
+
+/* On cooling devices upper and lower limits */
+#define THERMAL_NO_LIMIT (-1UL)
+
+#endif
+
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index b268d3c..b780c5b 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -143,6 +143,7 @@ struct thermal_cooling_device {
int id;
char type[THERMAL_NAME_LENGTH];
struct device device;
+ struct device_node *np;
void *devdata;
const struct thermal_cooling_device_ops *ops;
bool updated; /* true if the cooling device does not need update */
@@ -172,7 +173,7 @@ struct thermal_zone_device {
int emul_temperature;
int passive;
unsigned int forced_passive;
- const struct thermal_zone_device_ops *ops;
+ struct thermal_zone_device_ops *ops;
const struct thermal_zone_params *tzp;
struct thermal_governor *governor;
struct list_head thermal_instances;
@@ -242,8 +243,31 @@ struct thermal_genl_event {
};

/* Function declarations */
+#ifdef CONFIG_THERMAL_OF
+struct thermal_zone_device *
+thermal_zone_of_sensor_register(struct device *dev, int id,
+ void *data, int (*get_temp)(void *, long *),
+ int (*get_trend)(void *, long *));
+void thermal_zone_of_sensor_unregister(struct device *dev,
+ struct thermal_zone_device *tz);
+#else
+static inline struct thermal_zone_device *
+thermal_zone_of_sensor_register(struct device *dev, int id,
+ void *data, int (*get_temp)(void *, long *),
+ int (*get_trend)(void *, long *))
+{
+ return NULL;
+}
+
+static inline
+void thermal_zone_of_sensor_unregister(struct device *dev,
+ struct thermal_zone_device *tz)
+{
+}
+
+#endif
struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
- void *, const struct thermal_zone_device_ops *,
+ void *, struct thermal_zone_device_ops *,
const struct thermal_zone_params *, int, int);
void thermal_zone_device_unregister(struct thermal_zone_device *);

Eduardo Valentin

unread,
Nov 12, 2013, 2:50:03 PM11/12/13
to
This patch improves the ti-soc-thermal driver by adding the
support to build the thermal zones based on DT nodes.

The driver will have two options now to build the thermal
zones. The first option is the zones originally coded
in this driver. So, the driver behavior will be same
if there is no DT node describing the zones. The second
option, when it is found a DT node with thermal data,
will used the common infrastructure to build the thermal
zone and bind its cooling devices.

In case the driver loads thermal data using the legacy
mode, this driver still adds to the system
a cpufreq cooling device. Loading the thermal data from
DT, the driver assumes someone else will add the cpufreq
cooling device, like the cpufreq driver.
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 77 +++++++++++++++++-----
1 file changed, 62 insertions(+), 15 deletions(-)

diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 5a47cc8..9eec26d 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -31,6 +31,7 @@
#include <linux/cpufreq.h>
#include <linux/cpumask.h>
#include <linux/cpu_cooling.h>
+#include <linux/of.h>

#include "ti-thermal.h"
#include "ti-bandgap.h"
@@ -44,6 +45,7 @@ struct ti_thermal_data {
enum thermal_device_mode mode;
struct work_struct thermal_wq;
int sensor_id;
+ bool our_zone;
};

static void ti_thermal_work(struct work_struct *work)
@@ -75,11 +77,10 @@ static inline int ti_thermal_hotspot_temperature(int t, int s, int c)

/* thermal zone ops */
/* Get temperature callback function for thermal zone*/
-static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
- unsigned long *temp)
+static inline int __ti_thermal_get_temp(void *devdata, long *temp)
{
struct thermal_zone_device *pcb_tz = NULL;
- struct ti_thermal_data *data = thermal->devdata;
+ struct ti_thermal_data *data = devdata;
struct ti_bandgap *bgp;
const struct ti_temp_sensor *s;
int ret, tmp, slope, constant;
@@ -118,6 +119,14 @@ static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
return ret;
}

+static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
+ unsigned long *temp)
+{
+ struct ti_thermal_data *data = thermal->devdata;
+
+ return __ti_thermal_get_temp(data, temp);
+}
+
/* Bind callback functions for thermal zone */
static int ti_thermal_bind(struct thermal_zone_device *thermal,
struct thermal_cooling_device *cdev)
@@ -230,11 +239,9 @@ static int ti_thermal_get_trip_temp(struct thermal_zone_device *thermal,
return 0;
}

-/* Get the temperature trend callback functions for thermal zone */
-static int ti_thermal_get_trend(struct thermal_zone_device *thermal,
- int trip, enum thermal_trend *trend)
+static int __ti_thermal_get_trend(void *p, long *trend)
{
- struct ti_thermal_data *data = thermal->devdata;
+ struct ti_thermal_data *data = p;
struct ti_bandgap *bgp;
int id, tr, ret = 0;

@@ -245,6 +252,22 @@ static int ti_thermal_get_trend(struct thermal_zone_device *thermal,
if (ret)
return ret;

+ *trend = tr;
+
+ return 0;
+}
+
+/* Get the temperature trend callback functions for thermal zone */
+static int ti_thermal_get_trend(struct thermal_zone_device *thermal,
+ int trip, enum thermal_trend *trend)
+{
+ int ret;
+ long tr;
+
+ ret = __ti_thermal_get_trend(thermal->devdata, &tr);
+ if (ret)
+ return ret;
+
if (tr > 0)
*trend = THERMAL_TREND_RAISING;
else if (tr < 0)
@@ -308,16 +331,23 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
if (!data)
return -EINVAL;

- /* Create thermal zone */
- data->ti_thermal = thermal_zone_device_register(domain,
+ /* in case this is specified by DT */
+ data->ti_thermal = thermal_zone_of_sensor_register(bgp->dev, id,
+ data, __ti_thermal_get_temp,
+ __ti_thermal_get_trend);
+ if (IS_ERR(data->ti_thermal)) {
+ /* Create thermal zone */
+ data->ti_thermal = thermal_zone_device_register(domain,
OMAP_TRIP_NUMBER, 0, data, &ti_thermal_ops,
NULL, FAST_TEMP_MONITORING_RATE,
FAST_TEMP_MONITORING_RATE);
- if (IS_ERR(data->ti_thermal)) {
- dev_err(bgp->dev, "thermal zone device is NULL\n");
- return PTR_ERR(data->ti_thermal);
+ if (IS_ERR(data->ti_thermal)) {
+ dev_err(bgp->dev, "thermal zone device is NULL\n");
+ return PTR_ERR(data->ti_thermal);
+ }
+ data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE;
+ data->our_zone = true;
}
- data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE;
ti_bandgap_set_sensor_data(bgp, id, data);
ti_bandgap_write_update_interval(bgp, data->sensor_id,
data->ti_thermal->polling_delay);
@@ -331,7 +361,13 @@ int ti_thermal_remove_sensor(struct ti_bandgap *bgp, int id)

data = ti_bandgap_get_sensor_data(bgp, id);

- thermal_zone_device_unregister(data->ti_thermal);
+ if (data && data->ti_thermal) {
+ if (data->our_zone)
+ thermal_zone_device_unregister(data->ti_thermal);
+ else
+ thermal_zone_of_sensor_unregister(bgp->dev,
+ data->ti_thermal);
+ }

return 0;
}
@@ -350,6 +386,15 @@ int ti_thermal_report_sensor_temperature(struct ti_bandgap *bgp, int id)
int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id)
{
struct ti_thermal_data *data;
+ struct device_node *np = bgp->dev->of_node;
+
+ /*
+ * We are assuming here that if one deploys the zone
+ * using DT, then it must be aware that the cooling device
+ * loading has to happen via cpufreq driver.
+ */
+ if (of_find_property(np, "#thermal-sensor-cells", NULL))
+ return 0;

data = ti_bandgap_get_sensor_data(bgp, id);
if (!data || IS_ERR(data))
@@ -380,7 +425,9 @@ int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id)
struct ti_thermal_data *data;

data = ti_bandgap_get_sensor_data(bgp, id);
- cpufreq_cooling_unregister(data->cool_dev);
+
+ if (data && data->cool_dev)
+ cpufreq_cooling_unregister(data->cool_dev);

return 0;

Eduardo Valentin

unread,
Nov 12, 2013, 2:50:02 PM11/12/13
to
This patch adds to tmp102 temperature sensor the possibility
to expose itself as thermal zone device, registered on the
thermal framework.

The thermal zone is built only if a device tree node
describing a thermal zone for this sensor is present
inside the tmp102 DT node. Otherwise, the driver behavior
will be the same.

Cc: Jean Delvare <kh...@linux-fr.org>
Cc: lm-se...@lm-sensors.org
Cc: linux-...@vger.kernel.org
Acked-by: Guenter Roeck <li...@roeck-us.net>
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
drivers/hwmon/tmp102.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)

diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index d7b47ab..6748b45 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -27,6 +27,8 @@
#include <linux/mutex.h>
#include <linux/device.h>
#include <linux/jiffies.h>
+#include <linux/thermal.h>
+#include <linux/of.h>

#define DRIVER_NAME "tmp102"

@@ -50,6 +52,7 @@

struct tmp102 {
struct device *hwmon_dev;
+ struct thermal_zone_device *tz;
struct mutex lock;
u16 config_orig;
unsigned long last_update;
@@ -93,6 +96,15 @@ static struct tmp102 *tmp102_update_device(struct i2c_client *client)
return tmp102;
}

+static int tmp102_read_temp(void *dev, long *temp)
+{
+ struct tmp102 *tmp102 = tmp102_update_device(to_i2c_client(dev));
+
+ *temp = tmp102->temp[0];
+
+ return 0;
+}
+
static ssize_t tmp102_show_temp(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -204,6 +216,12 @@ static int tmp102_probe(struct i2c_client *client,
goto fail_remove_sysfs;
}

+ tmp102->tz = thermal_zone_of_sensor_register(&client->dev, 0,
+ &client->dev,
+ tmp102_read_temp, NULL);
+ if (IS_ERR(tmp102->tz))
+ tmp102->tz = NULL;
+
dev_info(&client->dev, "initialized\n");

return 0;
@@ -220,6 +238,7 @@ static int tmp102_remove(struct i2c_client *client)
{
struct tmp102 *tmp102 = i2c_get_clientdata(client);

+ thermal_zone_of_sensor_unregister(&client->dev, tmp102->tz);
hwmon_device_unregister(tmp102->hwmon_dev);
sysfs_remove_group(&client->dev.kobj, &tmp102_attr_group);

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:01 PM11/12/13
to
This patch changes the dtsi entry on omap4430 to contain
the thermal data. This data will enable the passive
cooling with CPUfreq cooling device at 100C and the
system will do a thermal shutdown at 125C.

Cc: "Benoît Cousson" <bcou...@baylibre.com>
Cc: Tony Lindgren <to...@atomide.com>
Cc: Russell King <li...@arm.linux.org.uk>
Cc: linux...@vger.kernel.org
Cc: devicetre...@lists.ozlabs.org
Cc: linux-ar...@lists.infradead.org
Cc: linux-...@vger.kernel.org
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
arch/arm/boot/dts/omap443x.dtsi | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/omap443x.dtsi b/arch/arm/boot/dts/omap443x.dtsi
index bcf455e..e9c97d6 100644
--- a/arch/arm/boot/dts/omap443x.dtsi
+++ b/arch/arm/boot/dts/omap443x.dtsi
@@ -12,7 +12,7 @@

/ {
cpus {
- cpu@0 {
+ cpu0: cpu@0 {
/* OMAP443x variants OPP50-OPPNT */
operating-points = <
/* kHz uV */
@@ -25,9 +25,15 @@
};
};

- bandgap {
+ thermal-zones{
+ #include "omap4-cpu-thermal.dtsi"
+ };
+
+ bandgap: bandgap {
reg = <0x4a002260 0x4
0x4a00232C 0x4>;
compatible = "ti,omap4430-bandgap";
+
+ #thermal-sensor-cells = <0>;
};
};

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:01 PM11/12/13
to
Small fix on representation. Bandgap node belongs to OCP.

Cc: "Benoît Cousson" <bcou...@baylibre.com>
Cc: Tony Lindgren <to...@atomide.com>
Cc: Rob Herring <rob.h...@calxeda.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: Stephen Warren <swa...@wwwdotorg.org>
Cc: Ian Campbell <ijc+dev...@hellion.org.uk>
Cc: devic...@vger.kernel.org
Cc: linux-ar...@lists.infradead.org
Cc: linux-...@vger.kernel.org
Suggested-by: Nishanth Menon <n...@ti.com>
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
arch/arm/boot/dts/omap4460.dtsi | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/arm/boot/dts/omap4460.dtsi b/arch/arm/boot/dts/omap4460.dtsi
index dc22719..def8183 100644
--- a/arch/arm/boot/dts/omap4460.dtsi
+++ b/arch/arm/boot/dts/omap4460.dtsi
@@ -39,14 +39,16 @@
#include "omap4-cpu-thermal.dtsi"
};

- bandgap: bandgap {
- reg = <0x4a002260 0x4
- 0x4a00232C 0x4
- 0x4a002378 0x18>;
- compatible = "ti,omap4460-bandgap";
- interrupts = <0 126 IRQ_TYPE_LEVEL_HIGH>; /* talert */
- gpios = <&gpio3 22 0>; /* tshut */
+ ocp {
+ bandgap: bandgap {
+ reg = <0x4a002260 0x4
+ 0x4a00232C 0x4
+ 0x4a002378 0x18>;
+ compatible = "ti,omap4460-bandgap";
+ interrupts = <0 126 IRQ_TYPE_LEVEL_HIGH>; /* talert */
+ gpios = <&gpio3 22 0>; /* tshut */

- #thermal-sensor-cells = <0>;
+ #thermal-sensor-cells = <0>;
+ };
};
};

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:03 PM11/12/13
to
This patch changes a dtsi file to contain the thermal data
for GPU domain on OMAP5 and later SoCs. This data will
enable a thermal shutdown at 125C.

This thermal data can be reused across TI SoC devices.

Cc: "Benoît Cousson" <bcou...@baylibre.com>
Cc: Tony Lindgren <to...@atomide.com>
Cc: Rob Herring <rob.h...@calxeda.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: Stephen Warren <swa...@wwwdotorg.org>
Cc: Ian Campbell <ian.ca...@citrix.com>
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
Change from v4:
- Using string constants instead of macros on trip types

arch/arm/boot/dts/omap5-gpu-thermal.dtsi | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
create mode 100644 arch/arm/boot/dts/omap5-gpu-thermal.dtsi

diff --git a/arch/arm/boot/dts/omap5-gpu-thermal.dtsi b/arch/arm/boot/dts/omap5-gpu-thermal.dtsi
new file mode 100644
index 0000000..1b87aca
--- /dev/null
+++ b/arch/arm/boot/dts/omap5-gpu-thermal.dtsi
@@ -0,0 +1,28 @@
+/*
+ * Device Tree Source for OMAP543x SoC GPU thermal
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
+ * Contact: Eduardo Valentin <eduardo....@ti.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <dt-bindings/thermal/thermal.h>
+
+gpu_thermal: gpu_thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap 1>;
+
+ trips {
+ gpu_crit: gpu_crit {
+ temperature = <125000>; /* milliCelsius */
+ hysteresis = <2000>; /* milliCelsius */
+ type = "critical";
+ };
+ };
+};

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:03 PM11/12/13
to
This patch changes the dtsi entry on omap5 to contain
the thermal data. This data will enable the passive
cooling with CPUfreq cooling device at 100C. The
system will do a thermal shutdown at 125C whenever
any of its sensors sees this level.

Cc: "Benoît Cousson" <bcou...@baylibre.com>
Cc: Tony Lindgren <to...@atomide.com>
Cc: Rob Herring <rob.h...@calxeda.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: Stephen Warren <swa...@wwwdotorg.org>
Cc: Ian Campbell <ian.ca...@citrix.com>
Cc: Russell King <li...@arm.linux.org.uk>
Cc: linux...@vger.kernel.org
Cc: devic...@vger.kernel.org
Cc: linux-ar...@lists.infradead.org
Cc: linux-...@vger.kernel.org
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
arch/arm/boot/dts/omap5.dtsi | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index 7cdea1b..187cb71 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -33,7 +33,7 @@
#address-cells = <1>;
#size-cells = <0>;

- cpu@0 {
+ cpu0: cpu@0 {
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <0x0>;
@@ -45,6 +45,12 @@
};
};

+ thermal-zones {
+ #include "omap4-cpu-thermal.dtsi"
+ #include "omap5-gpu-thermal.dtsi"
+ #include "omap5-core-thermal.dtsi"
+ };
+
timer {
compatible = "arm,armv7-timer";
/* PPI secure/nonsecure IRQ */
@@ -705,13 +711,15 @@
};
};

- bandgap@4a0021e0 {
+ bandgap: bandgap@4a0021e0 {
reg = <0x4a0021e0 0xc
0x4a00232c 0xc
0x4a002380 0x2c
0x4a0023C0 0x3c>;
interrupts = <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>;
compatible = "ti,omap5430-bandgap";
+
+ #thermal-sensor-cells = <1>;
};
};
};

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:04 PM11/12/13
to
OMAP5 devices can reach high temperatures and thus
needs to have cpufreq-cooling on systems running on it.

This patch adds the required cooling device properties
so that cpufreq-cpu0 driver loads the cooling device.

Cc: "Benoît Cousson" <bcou...@baylibre.com>
Cc: Tony Lindgren <to...@atomide.com>
Cc: Rob Herring <rob.h...@calxeda.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: Stephen Warren <swa...@wwwdotorg.org>
Cc: Ian Campbell <ian.ca...@citrix.com>
Cc: Russell King <li...@arm.linux.org.uk>
Cc: linux...@vger.kernel.org
Cc: devic...@vger.kernel.org
Cc: linux-ar...@lists.infradead.org
Cc: linux-...@vger.kernel.org
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
arch/arm/boot/dts/omap5.dtsi | 3 +++
1 file changed, 3 insertions(+)

diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index 187cb71..a35cd61 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -37,6 +37,9 @@
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <0x0>;
+
+ /* cooling options */
+ #cooling-cells = <2>; /* min followed by max */
};
cpu@1 {
device_type = "cpu";

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:03 PM11/12/13
to
This patch changes a dtsi file to contain the thermal data
for MPU domain on OMAP4 and later SoCs. This data will
enable the passive cooling with CPUfreq cooling device
at 100C and the system will do a thermal shutdown at 125C.

This thermal data can be reused across TI SoC devices.

Cc: "Benoît Cousson" <bcou...@baylibre.com>
Cc: Tony Lindgren <to...@atomide.com>
Cc: Rob Herring <rob.h...@calxeda.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: Stephen Warren <swa...@wwwdotorg.org>
Cc: Ian Campbell <ian.ca...@citrix.com>
Cc: Russell King <li...@arm.linux.org.uk>
Cc: linux...@vger.kernel.org
Cc: devic...@vger.kernel.org
Cc: linux-ar...@lists.infradead.org
Cc: linux-...@vger.kernel.org
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
Change from v4:
- Using string constants instead of macros on trip types

arch/arm/boot/dts/omap4-cpu-thermal.dtsi | 41 ++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
create mode 100644 arch/arm/boot/dts/omap4-cpu-thermal.dtsi

diff --git a/arch/arm/boot/dts/omap4-cpu-thermal.dtsi b/arch/arm/boot/dts/omap4-cpu-thermal.dtsi
new file mode 100644
index 0000000..cb9458f
--- /dev/null
+++ b/arch/arm/boot/dts/omap4-cpu-thermal.dtsi
@@ -0,0 +1,41 @@
+/*
+ * Device Tree Source for OMAP4/5 SoC CPU thermal
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
+ * Contact: Eduardo Valentin <eduardo....@ti.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <dt-bindings/thermal/thermal.h>
+
+cpu_thermal: cpu_thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap 0>;
+
+ trips {
+ cpu_alert0: cpu_alert {
+ temperature = <100000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ cpu_crit: cpu_crit {
+ temperature = <125000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu_alert0>;
+ cooling-device =
+ <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+};

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:04 PM11/12/13
to
OMAP4430 devices can reach high temperatures and thus
needs to have cpufreq-cooling on systems running on it.

This patch adds the required cooling device properties
so that cpufreq-cpu0 driver loads the cooling device.

Cc: "Benoît Cousson" <bcou...@baylibre.com>
Cc: Tony Lindgren <to...@atomide.com>
Cc: devicetre...@lists.ozlabs.org
arch/arm/boot/dts/omap443x.dtsi | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/omap443x.dtsi b/arch/arm/boot/dts/omap443x.dtsi
index e9c97d6..930ab47 100644
--- a/arch/arm/boot/dts/omap443x.dtsi
+++ b/arch/arm/boot/dts/omap443x.dtsi
@@ -22,6 +22,11 @@
1008000 1375000
>;
clock-latency = <300000>; /* From legacy driver */
+
+ /* cooling options */
+ cooling-min-level = <0>;
+ cooling-max-level = <3>;
+ #cooling-cells = <2>; /* min followed by max */
};
};

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:02 PM11/12/13
to
Small fix on representation. Bandgap node belongs to OCP.

Cc: "Benoît Cousson" <bcou...@baylibre.com>
Cc: Tony Lindgren <to...@atomide.com>
Cc: Rob Herring <rob.h...@calxeda.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: Stephen Warren <swa...@wwwdotorg.org>
Cc: Ian Campbell <ijc+dev...@hellion.org.uk>
Suggested-by: Nishanth Menon <n...@ti.com>
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
arch/arm/boot/dts/omap443x.dtsi | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/arm/boot/dts/omap443x.dtsi b/arch/arm/boot/dts/omap443x.dtsi
index 930ab47..2fca4e5 100644
--- a/arch/arm/boot/dts/omap443x.dtsi
+++ b/arch/arm/boot/dts/omap443x.dtsi
@@ -34,11 +34,13 @@
#include "omap4-cpu-thermal.dtsi"
};

- bandgap: bandgap {
- reg = <0x4a002260 0x4
- 0x4a00232C 0x4>;
- compatible = "ti,omap4430-bandgap";
+ ocp {
+ bandgap: bandgap {
+ reg = <0x4a002260 0x4
+ 0x4a00232C 0x4>;
+ compatible = "ti,omap4430-bandgap";

- #thermal-sensor-cells = <0>;
+ #thermal-sensor-cells = <0>;
+ };
};
};

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:03 PM11/12/13
to
This patch changes the dtsi entry on omap4460 to contain
the thermal data. This data will enable the passive
cooling with CPUfreq cooling device at 100C and the
system will do a thermal shutdown at 125C.

Cc: "Benoît Cousson" <bcou...@baylibre.com>
Cc: Tony Lindgren <to...@atomide.com>
Cc: Rob Herring <rob.h...@calxeda.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: Stephen Warren <swa...@wwwdotorg.org>
Cc: Ian Campbell <ian.ca...@citrix.com>
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
arch/arm/boot/dts/omap4460.dtsi | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/omap4460.dtsi b/arch/arm/boot/dts/omap4460.dtsi
index c2f0f39..4d93aba 100644
--- a/arch/arm/boot/dts/omap4460.dtsi
+++ b/arch/arm/boot/dts/omap4460.dtsi
@@ -12,7 +12,7 @@
/ {
cpus {
/* OMAP446x 'standard device' variants OPP50 to OPPTurbo */
- cpu@0 {
+ cpu0: cpu@0 {
operating-points = <
/* kHz uV */
350000 1025000
@@ -30,12 +30,18 @@
ti,hwmods = "debugss";
};

- bandgap {
+ thermal-zones{
+ #include "omap4-cpu-thermal.dtsi"
+ };
+
+ bandgap: bandgap {
reg = <0x4a002260 0x4
0x4a00232C 0x4
0x4a002378 0x18>;
compatible = "ti,omap4460-bandgap";
interrupts = <0 126 IRQ_TYPE_LEVEL_HIGH>; /* talert */
gpios = <&gpio3 22 0>; /* tshut */
+
+ #thermal-sensor-cells = <0>;
};
};

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:03 PM11/12/13
to
After discussion and agreement of thermal device tree bindings,
it is desirable to properly maintain thermal bindings for
existing and upcoming devices.

As original author of thermal device tree bindings, I am
volunteering to maintain them.

Cc: Rob Herring <rob.h...@calxeda.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: Stephen Warren <swa...@wwwdotorg.org>
Cc: Ian Campbell <ijc+dev...@hellion.org.uk>
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
MAINTAINERS | 1 +
1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3438384..94bfc46 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6178,6 +6178,7 @@ M: Pawel Moll <pawel...@arm.com>
M: Mark Rutland <mark.r...@arm.com>
M: Stephen Warren <swa...@wwwdotorg.org>
M: Ian Campbell <ijc+dev...@hellion.org.uk>
+M: Eduardo Valentin <eduardo....@ti.com>
L: devic...@vger.kernel.org
S: Maintained
F: Documentation/devicetree/

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:03 PM11/12/13
to
This patch changes a dtsi file to contain the thermal data
for CORE domain on OMAP5 and later SoCs. This data will
enable a thermal shutdown at 125C.

This thermal data can be reused across TI SoC devices.

Cc: "Benoît Cousson" <bcou...@baylibre.com>
Cc: Tony Lindgren <to...@atomide.com>
Cc: Rob Herring <rob.h...@calxeda.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: Stephen Warren <swa...@wwwdotorg.org>
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
Change from v4:
- Using string constants instead of macros on trip types

arch/arm/boot/dts/omap5-core-thermal.dtsi | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
create mode 100644 arch/arm/boot/dts/omap5-core-thermal.dtsi

diff --git a/arch/arm/boot/dts/omap5-core-thermal.dtsi b/arch/arm/boot/dts/omap5-core-thermal.dtsi
new file mode 100644
index 0000000..19212ac
--- /dev/null
+++ b/arch/arm/boot/dts/omap5-core-thermal.dtsi
@@ -0,0 +1,28 @@
+/*
+ * Device Tree Source for OMAP543x SoC CORE thermal
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
+ * Contact: Eduardo Valentin <eduardo....@ti.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <dt-bindings/thermal/thermal.h>
+
+core_thermal: core_thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap 2>;
+
+ trips {
+ core_crit: core_crit {
+ temperature = <125000>; /* milliCelsius */
+ hysteresis = <2000>; /* milliCelsius */
+ type = "critical";
+ };
+ };
+};

Eduardo Valentin

unread,
Nov 12, 2013, 3:00:03 PM11/12/13
to
OMAP4460 devices can reach high temperatures and thus
needs to have cpufreq-cooling on systems running on it.

This patch adds the required cooling device properties
so that cpufreq-cpu0 driver loads the cooling device.

Cc: "Benoît Cousson" <bcou...@baylibre.com>
Cc: Tony Lindgren <to...@atomide.com>
Cc: Rob Herring <rob.h...@calxeda.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: Stephen Warren <swa...@wwwdotorg.org>
Cc: Ian Campbell <ian.ca...@citrix.com>
Cc: Russell King <li...@arm.linux.org.uk>
Cc: linux...@vger.kernel.org
Cc: devic...@vger.kernel.org
Cc: linux-ar...@lists.infradead.org
Cc: linux-...@vger.kernel.org
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
arch/arm/boot/dts/omap4460.dtsi | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/omap4460.dtsi b/arch/arm/boot/dts/omap4460.dtsi
index 4d93aba..dc22719 100644
--- a/arch/arm/boot/dts/omap4460.dtsi
+++ b/arch/arm/boot/dts/omap4460.dtsi
@@ -20,6 +20,11 @@
920000 1313000
>;
clock-latency = <300000>; /* From legacy driver */
+
+ /* cooling options */
+ cooling-min-level = <0>;
+ cooling-max-level = <2>;
+ #cooling-cells = <2>; /* min followed by max */
};
};

Olof Johansson

unread,
Nov 12, 2013, 3:10:02 PM11/12/13
to
The number of device-tree maintainers is getting to be somewhat silly.
You don't have to be a maintainer to be a reviewer, and the number is
getting large enough that it might get hard for one maintainer to know
what another is doing.

So, soft nack, but it's really up to the DT guys.


-Olof

Eduardo Valentin

unread,
Nov 12, 2013, 3:20:01 PM11/12/13
to
Hello Olof,

No issues on my side. As I stated in the patch description, I am
volunteering. The motivation comes from the fact that it's been hard to
get DT maintainers attention, so I am assuming they are not having
enough bandwidth to pay attention to an extra class of bindings.

I agree with you, this is up to the DT folks.


>
>
> -Olof
>
>


--
You have got to be excited about what you are doing. (L. Lamport)

Eduardo Valentin

signature.asc

Mark Rutland

unread,
Nov 13, 2013, 4:50:01 AM11/13/13
to
Hi,
When we discussed this in #devicetree, I was under the impression you
meant adding a line like:

F: Documentation/devicetree/bindings/thermal/

To your MAINTAINERS entry, rather than adding yourself to the general
devicetree bindings MAINTAINERS entry. Sorry for the confusion regarding
this.

Given we have enough difficulty organising ourselves at present, I agree
with Olof that it doesn't make much sense to throw more people into the
general devicetree bindings maintainer pile.

However, we would certainly appreciate any review you'd be willing to
provide.

Thanks,
Mark.

Eduardo Valentin

unread,
Nov 13, 2013, 7:20:02 AM11/13/13
to
No issues then folks. :-)

>
> Thanks,
> Mark.
signature.asc

Eduardo Valentin

unread,
Nov 13, 2013, 9:50:02 AM11/13/13
to
On 13-11-2013 05:42, Mark Rutland wrote:
This would work for me too. How about adding this to thermal subsys
entry in MAINTAINERS file?

>
> To your MAINTAINERS entry, rather than adding yourself to the general
> devicetree bindings MAINTAINERS entry. Sorry for the confusion regarding
> this.
>
> Given we have enough difficulty organising ourselves at present, I agree
> with Olof that it doesn't make much sense to throw more people into the
> general devicetree bindings maintainer pile.
>
> However, we would certainly appreciate any review you'd be willing to
> provide.
>
> Thanks,
> Mark.
>
>


--
signature.asc

Tomasz Figa

unread,
Nov 13, 2013, 12:00:02 PM11/13/13
to
Hi Eduardo,

On Tuesday 12 of November 2013 15:46:04 Eduardo Valentin wrote:
> This patch introduces a device tree bindings for
> describing the hardware thermal behavior and limits.
> Also a parser to read and interpret the data and feed
> it in the thermal framework is presented.
[snip]
> diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt
> new file mode 100644
> index 0000000..59f5bd2
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/thermal/thermal.txt
> @@ -0,0 +1,586 @@
[snip]
> +* Cooling device nodes
> +
> +Cooling devices are nodes providing control on power dissipation. There
> +are essentially two ways to provide control on power dissipation. First
> +is by means of regulating device performance, which is known as passive
> +cooling. A typical passive cooling is a CPU that has dynamic voltage and
> +frequency scaling (DVFS), and uses lower frequencies as cooling states.
> +Second is by means of activating devices in order to remove
> +the dissipated heat, which is known as active cooling, e.g. regulating
> +fan speeds. In both cases, cooling devices shall have a way to determine
> +the state of cooling in which the device is.
> +
> +Required properties:
> +- cooling-min-state: An integer indicating the smallest
> + Type: unsigned cooling state accepted. Typically 0.
> + Size: one cell

Could you explain (just in a reply) what a cooling state is and what are
the min and max values used for?
What about replacing temperature and hysteresis with a single temperature
property that can be either one cell for 0 hysteresis or two cells to
specify lower and upper range of temperatures?

> +
> +- type: a string containing the trip type. Supported values are:
> + "active": A trip point to enable active cooling
> + "passive": A trip point to enable passive cooling

The two above seem to be implying action, as opposed to the general
comment about trip points.

> + "hot": A trip point to notify emergency
> + "critical": Hardware not reliable.
> + Type: string
> +
[snip]
I believe you don't need the min- and max-state properties here then. Your
thermal core can simply query the cpufreq driver (which would be a cooling
device here) about the range of states it supports.

> + };
> + ...
> +};
> +
> +&i2c1 {
> + ...
> + /*
> + * A simple fan controller which supports 10 speeds of operation
> + * (represented as 0-9).
> + */
> + fan0: fan@0x48 {
> + ...
> + cooling-min-state = <0>;
> + cooling-max-state = <9>;

This is similar. The fan driver will probaby know about available
fan speed levels and be able to report the range of states to thermal
core.

That's just for the binding. I will try to review the code when I find
a bit more time.

Best regards,
Tomasz

Eduardo Valentin

unread,
Nov 14, 2013, 6:40:01 AM11/14/13
to
On 13-11-2013 12:57, Tomasz Figa wrote:
> Hi Eduardo,
>

Hello Tomaz
Cooling state is an unsigned integer which represents heat control that
a cooling device implies.
What is the problem with using two properties? I think we loose
representation by gluing two properties into one just because one cell.

>
>> +
>> +- type: a string containing the trip type. Supported values are:
>> + "active": A trip point to enable active cooling
>> + "passive": A trip point to enable passive cooling
>
> The two above seem to be implying action, as opposed to the general
> comment about trip points.

They do not imply action, they specify type of trip.
This binding is not supposed to be aware of cpufreq, which is Linux
specific implementation.

Besides, the cpufreq layer is populated by data already specified in DT.
.
>
>> + };
>> + ...
>> +};
>> +
>> +&i2c1 {
>> + ...
>> + /*
>> + * A simple fan controller which supports 10 speeds of operation
>> + * (represented as 0-9).
>> + */
>> + fan0: fan@0x48 {
>> + ...
>> + cooling-min-state = <0>;
>> + cooling-max-state = <9>;
>
> This is similar. The fan driver will probaby know about available
> fan speed levels and be able to report the range of states to thermal
> core.

Then we loose also the flexibility to assign cooling states to trip
points, as described in this binding.

>
> That's just for the binding. I will try to review the code when I find
> a bit more time.

ok.

>
> Best regards,
> Tomasz
signature.asc

Eduardo Valentin

unread,
Nov 14, 2013, 8:20:01 AM11/14/13
to
On 12-11-2013 15:46, Eduardo Valentin wrote:
> This patch changes the cpufreq-cpu0 driver to consider if
> a cpu needs cooling (with cpufreq). In case the cooling is needed,
> the cpu0 device tree node needs to be properly configured
> with cooling device properties.
>
> In case these properties are present,, the driver will
> load a cpufreq cooling device in the system. The cpufreq-cpu0
> driver is not interested in determining how the system should
> be using the cooling device. The driver is responsible
> only of loading the cooling device.
>
> Describing how the cooling device will be used can be
> accomplished by setting up a thermal zone that references
> and is composed by the cpufreq cooling device.
>
> Cc: "Rafael J. Wysocki" <r...@sisk.pl>

Rafael, Can I still assume you are OK with this patch and add your
acked-by [1]?
http://www.spinics.net/lists/lm-sensors/msg39136.html

It has changed a few bits from V1 to here. The idea is still same though.

> Cc: Viresh Kumar <viresh...@linaro.org>
> Cc: Grant Likely <grant....@linaro.org>
> Cc: Rob Herring <rob.h...@calxeda.com>
> Cc: cpu...@vger.kernel.org
> Cc: linu...@vger.kernel.org
> Cc: linux-...@vger.kernel.org
> Cc: devicetre...@lists.ozlabs.org
> Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
> ---
> .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt | 7 +++++++
> drivers/cpufreq/Kconfig | 2 +-
> drivers/cpufreq/cpufreq-cpu0.c | 16 ++++++++++++++++
> 3 files changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> index 051f764..f055515 100644
> --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> @@ -15,6 +15,10 @@ Optional properties:
> - clock-latency: Specify the possible maximum transition latency for clock,
> in unit of nanoseconds.
> - voltage-tolerance: Specify the CPU voltage tolerance in percentage.
> +- #cooling-cells:
> +- cooling-min-level:
> +- cooling-max-level:
> + Please refer to Documentation/devicetree/bindings/thermal/thermal.txt.
>
> Examples:
>
> @@ -33,6 +37,9 @@ cpus {
> 198000 850000
> >;
> clock-latency = <61036>; /* two CLK32 periods */
> + #cooling-cells = <2>;
> + cooling-min-level = <0>;
> + cooling-max-level = <2>;
> };
>
> cpu@1 {
> diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
> index 534fcb8..fc1e9a5 100644
> --- a/drivers/cpufreq/Kconfig
> +++ b/drivers/cpufreq/Kconfig
> @@ -186,7 +186,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
>
> config GENERIC_CPUFREQ_CPU0
> tristate "Generic CPU0 cpufreq driver"
> - depends on HAVE_CLK && REGULATOR && PM_OPP && OF
> + depends on HAVE_CLK && REGULATOR && PM_OPP && OF && THERMAL && CPU_THERMAL
> select CPU_FREQ_TABLE
> help
> This adds a generic cpufreq driver for CPU0 frequency management.
> diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
> index c522a95..568aaf3 100644
> --- a/drivers/cpufreq/cpufreq-cpu0.c
> +++ b/drivers/cpufreq/cpufreq-cpu0.c
> @@ -13,7 +13,9 @@
>
> #include <linux/clk.h>
> #include <linux/cpu.h>
> +#include <linux/cpu_cooling.h>
> #include <linux/cpufreq.h>
> +#include <linux/cpumask.h>
> #include <linux/err.h>
> #include <linux/module.h>
> #include <linux/of.h>
> @@ -21,6 +23,7 @@
> #include <linux/platform_device.h>
> #include <linux/regulator/consumer.h>
> #include <linux/slab.h>
> +#include <linux/thermal.h>
>
> static unsigned int transition_latency;
> static unsigned int voltage_tolerance; /* in percentage */
> @@ -29,6 +32,7 @@ static struct device *cpu_dev;
> static struct clk *cpu_clk;
> static struct regulator *cpu_reg;
> static struct cpufreq_frequency_table *freq_table;
> +static struct thermal_cooling_device *cdev;
>
> static int cpu0_verify_speed(struct cpufreq_policy *policy)
> {
> @@ -260,6 +264,17 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
> goto out_free_table;
> }
>
> + /*
> + * For now, just loading the cooling device;
> + * thermal DT code takes care of matching them.
> + */
> + if (of_find_property(np, "#cooling-cells", NULL)) {
> + cdev = of_cpufreq_cooling_register(np, cpu_present_mask);
> + if (IS_ERR(cdev))
> + pr_err("running cpufreq without cooling device: %ld\n",
> + PTR_ERR(cdev));
> + }
> +
> of_node_put(np);
> return 0;
>
> @@ -272,6 +287,7 @@ out_put_node:
>
> static int cpu0_cpufreq_remove(struct platform_device *pdev)
> {
> + cpufreq_cooling_unregister(cdev);
> cpufreq_unregister_driver(&cpu0_cpufreq_driver);
> opp_free_cpufreq_table(cpu_dev, &freq_table);
signature.asc

Eduardo Valentin

unread,
Nov 14, 2013, 8:40:02 AM11/14/13
to
After discussion and agreement of thermal device tree bindings,
it is desirable to properly maintain thermal bindings for
existing and upcoming devices.

As original author of thermal device tree bindings, I am
volunteering to maintain them. This then adds and entry
for device tree bindings under thermal domain.

Cc: Rob Herring <rob.h...@calxeda.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Mark Rutland <mark.r...@arm.com>
Cc: Stephen Warren <swa...@wwwdotorg.org>
Cc: Ian Campbell <ijc+dev...@hellion.org.uk>
Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
---
MAINTAINERS | 1 +
1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3438384..57fbf81 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8411,6 +8411,7 @@ S: Supported
F: drivers/thermal/
F: include/linux/thermal.h
F: include/linux/cpu_cooling.h
+F: Documentation/devicetree/bindings/thermal/

THINGM BLINK(1) USB RGB LED DRIVER
M: Vivien Didelot <vivien....@savoirfairelinux.com>
--
1.8.2.1.342.gfa7285d

Tomasz Figa

unread,
Nov 14, 2013, 8:50:02 AM11/14/13
to
OK. So you have a cooling device and it can have multiple cooling states,
like a cooling fan that has multiple speed levels. Did I understand this
correctly?

IMHO this should be also explained in the documentation above, possibly
with one or two examples.
Ranges is the representation widely used in other bindings. In addition
I believe a range is more representative - when reading a DTS you don't
need to think whether the hysteresis is in temperature units, percents or
something else and also is less ambiguous, because you have clearly
defined lower and upper bounds in one place.

>
> >
> >> +
> >> +- type: a string containing the trip type. Supported values are:
> >> + "active": A trip point to enable active cooling
> >> + "passive": A trip point to enable passive cooling
> >
> > The two above seem to be implying action, as opposed to the general
> > comment about trip points.
>
> They do not imply action, they specify type of trip.

For me "A trip point to enable active cooling" means that when this trip
point is crossed, active cooling must be enabled. What is it if not
implying action?
I didn't say anything about making the binding aware of cpufreq, but
instead about getting rid of redundancy of data, that can be provided
by respective drivers anyway.

>
> Besides, the cpufreq layer is populated by data already specified in DT.
> .
> >
> >> + };
> >> + ...
> >> +};
> >> +
> >> +&i2c1 {
> >> + ...
> >> + /*
> >> + * A simple fan controller which supports 10 speeds of operation
> >> + * (represented as 0-9).
> >> + */
> >> + fan0: fan@0x48 {
> >> + ...
> >> + cooling-min-state = <0>;
> >> + cooling-max-state = <9>;
> >
> > This is similar. The fan driver will probaby know about available
> > fan speed levels and be able to report the range of states to thermal
> > core.
>
> Then we loose also the flexibility to assign cooling states to trip
> points, as described in this binding.

I don't think you got my point correctly.

Let's say you have a CPU, which has 4 operating points. You don't need
to specify that min state is 0 and max state is 4, because it is implied
by the list of operating points.

Same goes for a fan controller that has, let's say, an 8-bit PWM duty
cycle register, which in turn allows 256 different speed states. This
implies that min state for it is 0 and max state 255 and you don't need
to specify this in DT.

Now, both a CPU and fan controller will have their thermal drivers, which
can report to the thermal core what ranges of cooling states they support,
which again makes it wrong to specify such data in DT.

Rafael J. Wysocki

unread,
Nov 14, 2013, 5:00:02 PM11/14/13
to
On Thursday, November 14, 2013 09:17:21 AM Eduardo Valentin wrote:
> On 12-11-2013 15:46, Eduardo Valentin wrote:
> > This patch changes the cpufreq-cpu0 driver to consider if
> > a cpu needs cooling (with cpufreq). In case the cooling is needed,
> > the cpu0 device tree node needs to be properly configured
> > with cooling device properties.
> >
> > In case these properties are present,, the driver will
> > load a cpufreq cooling device in the system. The cpufreq-cpu0
> > driver is not interested in determining how the system should
> > be using the cooling device. The driver is responsible
> > only of loading the cooling device.
> >
> > Describing how the cooling device will be used can be
> > accomplished by setting up a thermal zone that references
> > and is composed by the cpufreq cooling device.
> >
> > Cc: "Rafael J. Wysocki" <r...@sisk.pl>
>
> Rafael, Can I still assume you are OK with this patch and add your
> acked-by [1]?
> http://www.spinics.net/lists/lm-sensors/msg39136.html
>
> It has changed a few bits from V1 to here. The idea is still same though.

Well, this is fine by me, personally, but I'd like someone from the ARM camp to
ACK this to be honest.

Thanks!
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

viresh kumar

unread,
Nov 14, 2013, 11:50:01 PM11/14/13
to
On Wednesday 13 November 2013 01:16 AM, Eduardo Valentin wrote:
> This patch changes the cpufreq-cpu0 driver to consider if
> a cpu needs cooling (with cpufreq). In case the cooling is needed,
> the cpu0 device tree node needs to be properly configured
> with cooling device properties.
>
> In case these properties are present,, the driver will
> load a cpufreq cooling device in the system. The cpufreq-cpu0
> driver is not interested in determining how the system should
> be using the cooling device. The driver is responsible
> only of loading the cooling device.
>
> Describing how the cooling device will be used can be
> accomplished by setting up a thermal zone that references
> and is composed by the cpufreq cooling device.
>
> Cc: "Rafael J. Wysocki" <r...@sisk.pl>
> Cc: Viresh Kumar <viresh...@linaro.org>
> Cc: Grant Likely <grant....@linaro.org>
> Cc: Rob Herring <rob.h...@calxeda.com>
> Cc: cpu...@vger.kernel.org
> Cc: linu...@vger.kernel.org
> Cc: linux-...@vger.kernel.org
> Cc: devicetre...@lists.ozlabs.org
> Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
> ---
> .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt | 7 +++++++
> drivers/cpufreq/Kconfig | 2 +-
> drivers/cpufreq/cpufreq-cpu0.c | 16 ++++++++++++++++
> 3 files changed, 24 insertions(+), 1 deletion(-)

Acked-by: Viresh Kumar <viresh...@linaro.org>

Jean Delvare

unread,
Nov 15, 2013, 2:50:02 AM11/15/13
to
Hi Eduardo,

Sorry for joining the discussion a little late, I could never find the
time to look into this patch series so far.

On Tue, 12 Nov 2013 15:46:08 -0400, Eduardo Valentin wrote:
> This patch adds to lm75 temperature sensor the possibility
> to expose itself as thermal zone device, registered on the
> thermal framework.
>
> The thermal zone is built only if a device tree node
> describing a thermal zone for this sensor is present
> inside the lm75 DT node. Otherwise, the driver behavior
> will be the same.
>
> Cc: Jean Delvare <kh...@linux-fr.org>
> Cc: lm-se...@lm-sensors.org
> Cc: linux-...@vger.kernel.org
> Acked-by: Guenter Roeck <li...@roeck-us.net>
> Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
> ---
> drivers/hwmon/lm75.c | 35 ++++++++++++++++++++++++++++++-----
> 1 file changed, 30 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
> index c03b490..1d3600a 100644
> --- a/drivers/hwmon/lm75.c
> +++ b/drivers/hwmon/lm75.c
> @@ -27,6 +27,8 @@
> #include <linux/hwmon-sysfs.h>
> #include <linux/err.h>
> #include <linux/mutex.h>
> +#include <linux/of.h>
> +#include <linux/thermal.h>
> #include "lm75.h"
>
>
> @@ -70,6 +72,7 @@ static const u8 LM75_REG_TEMP[3] = {
> /* Each client has this additional data */
> struct lm75_data {
> struct device *hwmon_dev;
> + struct thermal_zone_device *tz;
> struct mutex update_lock;
> u8 orig_conf;
> u8 resolution; /* In bits, between 9 and 12 */
> @@ -90,22 +93,36 @@ static struct lm75_data *lm75_update_device(struct device *dev);
>
> /*-----------------------------------------------------------------------*/
>
> +static inline long lm75_reg_to_mc(s16 temp, u8 resolution)
> +{
> + return ((temp >> (16 - resolution)) * 1000) >> (resolution - 8);
> +}
> +
> /* sysfs attributes for hwmon */
>
> +static int lm75_read_temp(void *dev, long *temp)
> +{
> + struct lm75_data *data = lm75_update_device(dev);
> +
> + if (IS_ERR(data))
> + return PTR_ERR(data);
> +
> + *temp = lm75_reg_to_mc(data->temp[0], data->resolution);
> +
> + return 0;
> +}
> +
> static ssize_t show_temp(struct device *dev, struct device_attribute *da,
> char *buf)
> {
> struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
> struct lm75_data *data = lm75_update_device(dev);
> - long temp;
>
> if (IS_ERR(data))
> return PTR_ERR(data);
>
> - temp = ((data->temp[attr->index] >> (16 - data->resolution)) * 1000)
> - >> (data->resolution - 8);
> -
> - return sprintf(buf, "%ld\n", temp);
> + return sprintf(buf, "%ld\n", lm75_reg_to_mc(data->temp[attr->index],
> + data->resolution));
> }

I am a bit worried by this. I did expect that you'd have to split a
piece of show_temp() into one separate function, not two. Here you have
quite some redundancy between lm75_read_temp and show_temp. Sure these
are small functions so the duplicate code is limited, but if you
multiply by the number of hwmon drivers which are candidates for this
change, this all adds up.

>
> static ssize_t set_temp(struct device *dev, struct device_attribute *da,
> @@ -271,6 +288,13 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
> goto exit_remove;
> }
>
> + data->tz = thermal_zone_of_sensor_register(&client->dev,
> + 0,
> + &client->dev,
> + lm75_read_temp, NULL);

I am also worried by this interface. Basically a separate callback
function (here lm75_read_temp) is needed for every thermal input. Some
hwmon drivers have many of these! This will result in duplicate code
again. If you could just pass a sensor ID as an extra parameter to
lm75_read_temp, you could use the same callback for all sensors. This
would maybe also let you refactor the code above, as I believe
show_temp would then be able to call lm75_read_temp(dev, &temp, 0)
instead of reimplementing most of it.

I also note the lack of support for limits. Thermal zones can have
limits, and the various hwmon drivers do support that, yet your
interface offers no possibility to expose them. Wouldn't that be useful?

> + if (IS_ERR(data->tz))
> + data->tz = NULL;

If you are doing that in all drivers, I would question the point of
having thermal_zone_of_sensor_register() return a PTR_ERR in the first
place.

> +
> dev_info(&client->dev, "%s: sensor '%s'\n",
> dev_name(data->hwmon_dev), client->name);
>
> @@ -285,6 +309,7 @@ static int lm75_remove(struct i2c_client *client)
> {
> struct lm75_data *data = i2c_get_clientdata(client);
>
> + thermal_zone_of_sensor_unregister(&client->dev, data->tz);
> hwmon_device_unregister(data->hwmon_dev);
> sysfs_remove_group(&client->dev.kobj, &lm75_group);
> lm75_write_value(client, LM75_REG_CONF, data->orig_conf);


--
Jean Delvare

Jean Delvare

unread,
Nov 15, 2013, 3:10:02 AM11/15/13
to
Hi Eduardo,

On Tue, 12 Nov 2013 15:46:04 -0400, Eduardo Valentin wrote:
> This patch introduces a device tree bindings for
> describing the hardware thermal behavior and limits.
> Also a parser to read and interpret the data and feed
> it in the thermal framework is presented.
>
> This patch introduces a thermal data parser for device
> tree. The parsed data is used to build thermal zones
> and thermal binding parameters. The output data
> can then be used to deploy thermal policies.
>
> This patch adds also documentation regarding this
> API and how to define tree nodes to use
> this infrastructure.
>
> Note that, in order to be able to have control
> on the sensor registration on the DT thermal zone,
> it was required to allow changing the thermal zone
> .get_temp callback. For this reason, this patch
> also removes the 'const' modifier from the .ops
> field of thermal zone devices.
>
> Cc: Zhang Rui <rui....@intel.com>
> Cc: linu...@vger.kernel.org
> Cc: linux-...@vger.kernel.org
> Cc: Mark Rutland <mark.r...@arm.com>
> Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
> ---
> (...)
> +static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
> + enum thermal_trend *trend)
> +{
> + struct __thermal_zone *data = tz->devdata;
> + long dev_trend;
> + int r;
> +
> + if (!data->get_trend)
> + return -EINVAL;
> +
> + r = data->get_trend(data->sensor_data, &dev_trend);
> + if (r)
> + return r;
> +
> + /* TODO: These intervals might have some thresholds, but in core code */
> + if (dev_trend > 0)
> + *trend = THERMAL_TREND_RAISING;
> + else if (dev_trend < 0)
> + *trend = THERMAL_TREND_DROPPING;
> + else
> + *trend = THERMAL_TREND_STABLE;
> +
> + return 0;
> +}

I don't like the whole trend thing.

I've been writing hwmon drivers for the past decade and I've never seen
a thermal sensor device being able to report trends. And as a rule of
thumb, if the hardware can't do it then the (hardware-specific) drivers
should not report it.

Hwmon devices (and drivers) report temperature values, and sometimes
historical min/max. They can do that because these are facts that need
no interpretation.

Defining a trend, however, requires care and, more importantly,
decisions. For example, consider a thermal sensor which reports 50°C at
time t, then 47°C at time t+3s, then 48°C at time t+6s. At t+7s someone
asks for the trend, what should the driver reply? If you consider only
the last 5 seconds, temperature is raising. If you consider the last 10
seconds instead, temperature is dropping. How would the driver know
what time frame the caller is interested in?

Another example: "small" temperature variations. If temperatures varies
by 1°C in my kitchen's oven, I'd call it stable. If my body temperature
varies by 1°C, I'd call it non-stable, and my doctor for an appointment
also ;-)

My point is that only the caller, and not the driver, knows how to
convert a series of measurements into a trend. So I don't think drivers
should implement anything like get_trend(). Whatever piece of code
needs to establish a trend should call get_temp() repeatedly, store the
results and do its own analysis of the data.

Eduardo Valentin

unread,
Nov 15, 2013, 8:30:01 AM11/15/13
to
Hello Tomasz,
There are more than one example in this file. Even explaining why
cooling min and max are used, and the difference of min and max
properties that appear in cooling device and those present in a cooling
specifier. Cooling devices and cooling state are described in the
paragraph above.
Well that sentence is arguable. It is not like all properties in DT are
standardized as ranges, is it?

> I believe a range is more representative - when reading a DTS you don't
> need to think whether the hysteresis is in temperature units, percents or
> something else and also is less ambiguous, because you have clearly
> defined lower and upper bounds in one place.

It is the other way around. For a person designing a thermal
representation for a specific board it is intuitive to think about
hysteresis in this case. It is a better representation because we are
really talking about a hysteresis here in order to give some room for
the system to react on temporary temperature transitions around that
point. It is possible to use ranges as you are suggesting, but it
becomes confusing.


>
>>
>>>
>>>> +
>>>> +- type: a string containing the trip type. Supported values are:
>>>> + "active": A trip point to enable active cooling
>>>> + "passive": A trip point to enable passive cooling
>>>
>>> The two above seem to be implying action, as opposed to the general
>>> comment about trip points.
>>
>> They do not imply action, they specify type of trip.
>
> For me "A trip point to enable active cooling" means that when this trip
> point is crossed, active cooling must be enabled. What is it if not
> implying action?

But within a board there could be more than one active cooling actions
that could be done, it is not a 1 to 1 relation. Same thing applies to
passive cooling. The binding does not imply a specific action. Just the
trip type.
There are cases in which cooling devices don't need to use all states
for cooling, because either lower states does not provide cooling
effectiveness or higher states must be avoided at all. So, allowing
drivers to report this thermal info is possible, but questionable
design, as you would be spreading the thermal info. Besides, for the
example I gave, the driver would need to know board specifics, as the
range of states would vary anyway across board.
Please read my explanation above.

>
> Same goes for a fan controller that has, let's say, an 8-bit PWM duty
> cycle register, which in turn allows 256 different speed states. This
> implies that min state for it is 0 and max state 255 and you don't need
> to specify this in DT.

ditto.

>
> Now, both a CPU and fan controller will have their thermal drivers, which
> can report to the thermal core what ranges of cooling states they support,
> which again makes it wrong to specify such data in DT.


Please also read the examples I gave in the thermal binding. There are
case that the designer may want to assign a range of states to
temperature trip points. This binding is flexible enough to cover for
that situation. And without the representation of these limits it would
be hard to read the binding. It is not redundant data, please check the
documentation.
signature.asc

Zhang Rui

unread,
Nov 18, 2013, 1:10:01 AM11/18/13
to
I agree that a sensor device should not be able to report trends. But
currently, the thermal zone driver is not only a sensor driver, it also
owns the platform thermal cooling strategy.
And some platforms do have this kind of knowledge, right?
e.g. ACPI spec provides an equation for processor passive cooling (ACPI
Spec 5.0 11.1.5).

>
> Hwmon devices (and drivers) report temperature values, and sometimes
> historical min/max. They can do that because these are facts that need
> no interpretation.
>
> Defining a trend, however, requires care and, more importantly,
> decisions.

We had the assumption that we will get an interrupt when the firmware
thinks there is an temperature change that should be noticed by OS.

> For example, consider a thermal sensor which reports 50°C at
> time t, then 47°C at time t+3s,

does firmware thinks this should be noticed by OS?
If no, there is no interrupt and this temperature change is totally
transparent to the OS at all.
If yes, the thermal core will check if the system is overheating, if
yes, it throttles the devices, and if no, do nothing.

> then 48°C at time t+6s. At t+7s someone
> asks for the trend,

the trend is needed when platform thermal driver calls
thermal_zone_device_update(), and mostly following a temperature change
interrupt.

> what should the driver reply?
> If you consider only
> the last 5 seconds, temperature is raising. If you consider the last 10
> seconds instead, temperature is dropping. How would the driver know
> what time frame the caller is interested in?
>
> Another example: "small" temperature variations. If temperatures varies
> by 1°C in my kitchen's oven, I'd call it stable. If my body temperature
> varies by 1°C, I'd call it non-stable, and my doctor for an appointment
> also ;-)
>

> My point is that only the caller, and not the driver, knows how to
> convert a series of measurements into a trend. So I don't think drivers
> should implement anything like get_trend(). Whatever piece of code
> needs to establish a trend should call get_temp() repeatedly, store the
> results and do its own analysis of the data.
>
I think the key problem is that,
the thermal subsystem just provides a basic/minimum thermal control
framework in kernel, and it is totally platform independent. And any
platform driver can use this framework to do some basic thermal control
easily but just telling thermal core if the thermal core should take
some cooling actions (trip points + trend + current temperature) and how
(by choosing thermal governors).

So if you want to do more precise thermal control, you'd better disable
kernel thermal control and do it in userspace. In this case,
the .get_trend() will never be invoked at all.
Further more, I'm indeed considering introduce platform specific thermal
governors, so that platform thermal driver can tell thermal core what to
do at a certain point.

thanks,
rui

Eduardo Valentin

unread,
Nov 18, 2013, 9:30:02 AM11/18/13
to
On 15-11-2013 03:43, Jean Delvare wrote:
> Hi Eduardo,
>

Hello Jean!

> Sorry for joining the discussion a little late, I could never find the
> time to look into this patch series so far.

Well, better late than never, that's what it is said, isn't it? :-)
Thanks for arranging the time to look at these patches.

The patch series have been looked and contributed by several people and
a couple o maintainers now. Starts to be way better than the original
RFC. The core idea still holds though.

>
> On Tue, 12 Nov 2013 15:46:08 -0400, Eduardo Valentin wrote:
>> This patch adds to lm75 temperature sensor the possibility
>> to expose itself as thermal zone device, registered on the
>> thermal framework.
>>
>> The thermal zone is built only if a device tree node
>> describing a thermal zone for this sensor is present
>> inside the lm75 DT node. Otherwise, the driver behavior
>> will be the same.
>>
>> Cc: Jean Delvare <kh...@linux-fr.org>
>> Cc: lm-se...@lm-sensors.org
>> Cc: linux-...@vger.kernel.org
>> Acked-by: Guenter Roeck <li...@roeck-us.net>

On HWMON side, I got Guenter's solid contributions, as you can see.
Can you please elaborate a bit more on which way you think the code
above shall be improved? Are you suggesting we should make show_temp
call lm75_read_temp? On this specific case, I believe the code
duplication is minor, although I haven't measured.

>
>>
>> static ssize_t set_temp(struct device *dev, struct device_attribute *da,
>> @@ -271,6 +288,13 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
>> goto exit_remove;
>> }
>>
>> + data->tz = thermal_zone_of_sensor_register(&client->dev,
>> + 0,
>> + &client->dev,
>> + lm75_read_temp, NULL);
>
> I am also worried by this interface. Basically a separate callback
> function (here lm75_read_temp) is needed for every thermal input. Some
> hwmon drivers have many of these! This will result in duplicate code

The code registering a callback can actually have a private data, in
case you need to differentiate between inputs or in case you need to
derive specific data in order to access the correct input.

> again. If you could just pass a sensor ID as an extra parameter to
> lm75_read_temp, you could use the same callback for all sensors. This

Wouldn't the private pointer be enough?

I think it is a matter of checking what is the best way for the
candidates of users of this new API. In theory it is enough to hold the
ID on the data structure you pass through the void * parameter. However,
that would cause less impact if you have the users of the API already
structured in that way. Let me know what is best for your case.

> would maybe also let you refactor the code above, as I believe
> show_temp would then be able to call lm75_read_temp(dev, &temp, 0)
> instead of reimplementing most of it.
>
> I also note the lack of support for limits. Thermal zones can have
> limits, and the various hwmon drivers do support that, yet your
> interface offers no possibility to expose them. Wouldn't that be useful?
>
>> + if (IS_ERR(data->tz))
>> + data->tz = NULL;
>
> If you are doing that in all drivers, I would question the point of
> having thermal_zone_of_sensor_register() return a PTR_ERR in the first
> place.

Yeah, maybe it should simply return NULL in the fail path and drivers
would not need to bother about it. What do you think?

>
>> +
>> dev_info(&client->dev, "%s: sensor '%s'\n",
>> dev_name(data->hwmon_dev), client->name);
>>
>> @@ -285,6 +309,7 @@ static int lm75_remove(struct i2c_client *client)
>> {
>> struct lm75_data *data = i2c_get_clientdata(client);
>>
>> + thermal_zone_of_sensor_unregister(&client->dev, data->tz);
>> hwmon_device_unregister(data->hwmon_dev);
>> sysfs_remove_group(&client->dev.kobj, &lm75_group);
>> lm75_write_value(client, LM75_REG_CONF, data->orig_conf);
>
>


--
signature.asc

Eduardo Valentin

unread,
Nov 18, 2013, 9:50:01 AM11/18/13
to
Hello Jean,

I will try to complement what Rui's already commented.
In fact, I would agree with you that it is not common to see such
devices. What I've already seen is that HW provides is usually artifacts
that may help on computing such statistic data. And in that case it may
be the case that the data coming from the HW may have less noise, such
as system overhead, than that computed by the caller.

However, it is also the case that such artifacts do not work without a
proper platform dependent configuration step, such as update interval
and size of the history window.

>
> I agree that a sensor device should not be able to report trends. But
> currently, the thermal zone driver is not only a sensor driver, it also
> owns the platform thermal cooling strategy.
> And some platforms do have this kind of knowledge, right?
> e.g. ACPI spec provides an equation for processor passive cooling (ACPI
> Spec 5.0 11.1.5).
>

Just a complementary point here, The design of current thermal framework
is so that get_trend is zone specific, and thus, it has to consider the
cooling strategy owned by the driver, like the update interval. And
besides, in case the driver does not know how to compute the trend, the
framework, the caller of that callback, also provides default behavior.

The purpose of this series is to split the data that is platform
dependent and allow designers to write those data points in device tree.
That is why the "glue layer" still keeps the get_trend.


>>
>> Hwmon devices (and drivers) report temperature values, and sometimes
>> historical min/max. They can do that because these are facts that need
>> no interpretation.
>>
>> Defining a trend, however, requires care and, more importantly,
>> decisions.


Maybe your concern is the fact that it would be very hard to provide a
trend from a hwmon driver? And that I agree, unless the device provides
artifacts. But that is not the case for all users of this API. For
instance, some versions of TI SoC bandgap sensors may have historical
buffers and cumulative values. But obviously, these features would work
only if proper configuration is agreed.
Agreed here. However, the caller can also instruct the device to help on
the computation based on historical values. And historical values can be
configured also by the caller.

>> should implement anything like get_trend(). Whatever piece of code
>> needs to establish a trend should call get_temp() repeatedly, store the
>> results and do its own analysis of the data.
>>
> I think the key problem is that,
> the thermal subsystem just provides a basic/minimum thermal control
> framework in kernel, and it is totally platform independent. And any
> platform driver can use this framework to do some basic thermal control
> easily but just telling thermal core if the thermal core should take
> some cooling actions (trip points + trend + current temperature) and how
> (by choosing thermal governors).
>
> So if you want to do more precise thermal control, you'd better disable
> kernel thermal control and do it in userspace. In this case,
> the .get_trend() will never be invoked at all.
> Further more, I'm indeed considering introduce platform specific thermal
> governors, so that platform thermal driver can tell thermal core what to
> do at a certain point.
>
> thanks,
> rui
>
>
>


--
signature.asc

Guenter Roeck

unread,
Nov 18, 2013, 11:30:02 AM11/18/13
to
On Mon, Nov 18, 2013 at 10:27:41AM -0400, Eduardo Valentin wrote:
> On 15-11-2013 03:43, Jean Delvare wrote:
> > Hi Eduardo,
> >
>
> Hello Jean!
>
> > Sorry for joining the discussion a little late, I could never find the
> > time to look into this patch series so far.
>
> Well, better late than never, that's what it is said, isn't it? :-)
> Thanks for arranging the time to look at these patches.
>
> The patch series have been looked and contributed by several people and
> a couple o maintainers now. Starts to be way better than the original
> RFC. The core idea still holds though.
>
> >
> > On Tue, 12 Nov 2013 15:46:08 -0400, Eduardo Valentin wrote:
> >> This patch adds to lm75 temperature sensor the possibility
> >> to expose itself as thermal zone device, registered on the
> >> thermal framework.
> >>
> >> The thermal zone is built only if a device tree node
> >> describing a thermal zone for this sensor is present
> >> inside the lm75 DT node. Otherwise, the driver behavior
> >> will be the same.
> >>
> >> Cc: Jean Delvare <kh...@linux-fr.org>
> >> Cc: lm-se...@lm-sensors.org
> >> Cc: linux-...@vger.kernel.org
> >> Acked-by: Guenter Roeck <li...@roeck-us.net>
>
> On HWMON side, I got Guenter's solid contributions, as you can see.
>
Not an argument. I am not perfect, and Jean has a number of valid points.

Guenter

Eduardo Valentin

unread,
Nov 18, 2013, 11:50:02 AM11/18/13
to
In fact, this is not an argument, at least not on what Jean is bringing
now. The point here was just to mention that, even though he joined
late, you have already contributed on this patch series. ;-)

> Guenter
signature.asc

Jean Delvare

unread,
Nov 19, 2013, 4:50:02 AM11/19/13
to
Hi Eduardo,

On Mon, 18 Nov 2013 10:27:41 -0400, Eduardo Valentin wrote:
> The patch series have been looked and contributed by several people and
> a couple o maintainers now. Starts to be way better than the original
> RFC. The core idea still holds though.

I have no objection to the core idea, it's great :-)

> >> (...)
Yes, that's pretty much what I had in mind.

> On this specific case, I believe the code
> duplication is minor, although I haven't measured.

I do agree the duplication in a single driver is minor. However I
expect more hwmon drivers to be converted to cooperate with the thermal
subsystem in the future. Duplicated code adds up, in terms of build
time, storage space and run-time memory consumption. Plus it hurts CPU
caching even though I don't expect these code paths to be particularly
hot.

OTOH I suppose that the compiler will inline lm75_reg_to_mc() in this
specific case, saving one function call. It wouldn't possibly inline
lm75_read_temp() so it is quite possible that your implementation
actually performs better than what I am suggesting.

We're speaking of hardly measurable differences anyway. My other point
below is probably more important...

> >>
> >> static ssize_t set_temp(struct device *dev, struct device_attribute *da,
> >> @@ -271,6 +288,13 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
> >> goto exit_remove;
> >> }
> >>
> >> + data->tz = thermal_zone_of_sensor_register(&client->dev,
> >> + 0,
> >> + &client->dev,
> >> + lm75_read_temp, NULL);
> >
> > I am also worried by this interface. Basically a separate callback
> > function (here lm75_read_temp) is needed for every thermal input. Some
> > hwmon drivers have many of these! This will result in duplicate code
>
> The code registering a callback can actually have a private data, in
> case you need to differentiate between inputs or in case you need to
> derive specific data in order to access the correct input.

Are you talking about void *dev? OK, you can virtually pass anything in
that pointer, but...

> > again. If you could just pass a sensor ID as an extra parameter to
> > lm75_read_temp, you could use the same callback for all sensors. This
>
> Wouldn't the private pointer be enough?

I am worried that this won't be practical at all for the real world
cases. I expect that most drivers will need to pass the device plus one
index-like value. Having a single void pointer for that is certainly
flexible but it will also be inefficient, as every driver will have to
define its own custom structure to embed the needed information. You
_do_ pass a sensor_id to thermal_zone_of_sensor_register() so it
shouldn't be hard to pass it to the callback function(s) as well
(assuming the indexing in the device tree matches the one done by the
driver - I certainly hope so.) I'd even suggest passing the device
unconditionally too, as I just can't see how a driver could do without
it. If you pass the right parameters then you may not even need a void
pointer.

> I think it is a matter of checking what is the best way for the
> candidates of users of this new API.

I agree. But two drivers are probably too few to make an educated
decision, I suppose we'll need to convert some more to get a clearer
view. Note that most things can be changed later if needed, so my
comments aren't meant to block the integration of your patches.

> In theory it is enough to hold the
> ID on the data structure you pass through the void * parameter. However,
> that would cause less impact if you have the users of the API already
> structured in that way. Let me know what is best for your case.

We do not have that structure (struct device *, int index) ready yet,
so we'd have to add it to every driver with more than one input. We
could have a generic structure for that in some header file, to avoid
the duplicated work, but if we need to do that, I'd say this is a good
hint that we settled for the wrong API in the first place.

> > (...)
> > I also note the lack of support for limits. Thermal zones can have
> > limits, and the various hwmon drivers do support that, yet your
> > interface offers no possibility to expose them. Wouldn't that be useful?

No comment on that? I'd expect the get_temp() callback to take an extra
parameter so that it can return the value of a specific subfeature, for
example INPUT, HOT, CRITICAL etc. Or leave get_temp() as is but add a
get_trip_temp() callback to retrieve limit information (not sure if
thermal zones can have more than one limit?)

I think thermal zones support hysteresis as well, and so do many hwmon
drivers. It seems unfortunate that your proposed "bridge" between the
hwmon and thermal subsystems doesn't map all the features which are
supported on both sides.

> >> + if (IS_ERR(data->tz))
> >> + data->tz = NULL;
> >
> > If you are doing that in all drivers, I would question the point of
> > having thermal_zone_of_sensor_register() return a PTR_ERR in the first
> > place.
>
> Yeah, maybe it should simply return NULL in the fail path and drivers
> would not need to bother about it. What do you think?

I think exactly what I wrote above, no more no less. I don't think I
can add anything, and the decision is up to you.

Jean Delvare

unread,
Nov 19, 2013, 9:50:03 AM11/19/13
to
Eduardo, Rui,

I don't want to enter a long discussion here. I don't see get_trend()
being implemented by any currently existing hwmon driver. But if you
think future monitoring chips will be different and will be able to
implement get_trend(), well, you'll know better.

--
Jean Delvare

Pavel Machek

unread,
Nov 20, 2013, 7:40:01 AM11/20/13
to
HI!

> This patch changes the dtsi entry on omap4430 to contain
> the thermal data. This data will enable the passive
> cooling with CPUfreq cooling device at 100C and the
> system will do a thermal shutdown at 125C.
>
> Cc: "Benoīt Cousson" <bcou...@baylibre.com>
> Cc: Tony Lindgren <to...@atomide.com>
> Cc: devicetre...@lists.ozlabs.org
> Cc: linux-ar...@lists.infradead.org
> Cc: linux-...@vger.kernel.org
> Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
> ---
> arch/arm/boot/dts/omap443x.dtsi | 10 ++++++++--
> 1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm/boot/dts/omap443x.dtsi b/arch/arm/boot/dts/omap443x.dtsi
> index bcf455e..e9c97d6 100644
> --- a/arch/arm/boot/dts/omap443x.dtsi
> +++ b/arch/arm/boot/dts/omap443x.dtsi
> @@ -12,7 +12,7 @@
>
> / {
> cpus {
> - cpu@0 {
> + cpu0: cpu@0 {
> /* OMAP443x variants OPP50-OPPNT */
> operating-points = <
> /* kHz uV */

I see you also add labels to various other entries...

> @@ -25,9 +25,15 @@
> };
> };
>
> - bandgap {
> + thermal-zones{

You may want to include space here.

> + #include "omap4-cpu-thermal.dtsi"
> + };
> +
> + bandgap: bandgap {
> reg = <0x4a002260 0x4
> 0x4a00232C 0x4>;
> compatible = "ti,omap4430-bandgap";
> +
> + #thermal-sensor-cells = <0>;
> };
> };

--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

Tomasz Figa

unread,
Nov 21, 2013, 10:00:02 AM11/21/13
to
I mean, the definition I commented about is completely confusing. You
should rewrite it in a more readable way. For example, "Cooling state is
an unsigned integer which represents level of cooling performance of
a thermal device." would be much more meaningful, if I understood the
whole idea of "cooling state" correctly.

By example I mean simply listing one or two possible practical meanings,
like "(e.g. speed level of a cooling fan, performance throttling level of
CPU)".
No, they are not, but property range is a common concept of representing
range of some values.

Anyway, I won't insist on this, as it's just a minor detail. However I'd
like to see comments from more people on this.

>
> > I believe a range is more representative - when reading a DTS you don't
> > need to think whether the hysteresis is in temperature units, percents or
> > something else and also is less ambiguous, because you have clearly
> > defined lower and upper bounds in one place.
>
> It is the other way around. For a person designing a thermal
> representation for a specific board it is intuitive to think about
> hysteresis in this case. It is a better representation because we are
> really talking about a hysteresis here in order to give some room for
> the system to react on temporary temperature transitions around that
> point. It is possible to use ranges as you are suggesting, but it
> becomes confusing.
>

Probably it depends on what you are used to. I'd like to see opinion
of more people on this.

>
> >
> >>
> >>>
> >>>> +
> >>>> +- type: a string containing the trip type. Supported values are:
> >>>> + "active": A trip point to enable active cooling
> >>>> + "passive": A trip point to enable passive cooling
> >>>
> >>> The two above seem to be implying action, as opposed to the general
> >>> comment about trip points.
> >>
> >> They do not imply action, they specify type of trip.
> >
> > For me "A trip point to enable active cooling" means that when this trip
> > point is crossed, active cooling must be enabled. What is it if not
> > implying action?
>
> But within a board there could be more than one active cooling actions
> that could be done, it is not a 1 to 1 relation. Same thing applies to
> passive cooling. The binding does not imply a specific action. Just the
> trip type.

I'd prefer the "active" and "passive" states to be renamed an their
descriptions rephrased. In general, the idea of having just four trip
point types seems like bound to a single specific hardware platform.

That's a wild guess, but maybe having an unsigned integer to represent
trip point "attention level" would be a better idea?
One thing is the allowed range of cooling states supported by the
cooling device and another is the range of states that are usable on
given board. The former is a property of the cooling device that
in most cases is implied by its compatible string, so to eliminate the
bad data redundancy, you should not make the user respecify that. The
latter you have as a part of your cooling device specifier tuple.

As an example of this, see PWM, GPIO or IRQ bindings. You don't have
{pwm-channel,pwm-duty,interrupt,gpio}-{min,max} properties, because this
is not needed by respective PWM, GPIO or IRQ drivers, as they already
know these parameters.
I don't see any problem with just dropping the min and max properties.
All the use cases you listed above will still work, as you have the
cooling state limits included in cooling device specifier.

Eduardo Valentin

unread,
Nov 21, 2013, 10:40:02 AM11/21/13
to
On 20-11-2013 08:32, Pavel Machek wrote:
> HI!
>
>> This patch changes the dtsi entry on omap4430 to contain
>> the thermal data. This data will enable the passive
>> cooling with CPUfreq cooling device at 100C and the
>> system will do a thermal shutdown at 125C.
>>
>> Cc: "Benoît Cousson" <bcou...@baylibre.com>
>> Cc: Tony Lindgren <to...@atomide.com>
>> Cc: Russell King <li...@arm.linux.org.uk>
>> Cc: linux...@vger.kernel.org
>> Cc: devicetre...@lists.ozlabs.org
>> Cc: linux-ar...@lists.infradead.org
>> Cc: linux-...@vger.kernel.org
>> Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
>> ---
>> arch/arm/boot/dts/omap443x.dtsi | 10 ++++++++--
>> 1 file changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm/boot/dts/omap443x.dtsi b/arch/arm/boot/dts/omap443x.dtsi
>> index bcf455e..e9c97d6 100644
>> --- a/arch/arm/boot/dts/omap443x.dtsi
>> +++ b/arch/arm/boot/dts/omap443x.dtsi
>> @@ -12,7 +12,7 @@
>>
>> / {
>> cpus {
>> - cpu@0 {
>> + cpu0: cpu@0 {
>> /* OMAP443x variants OPP50-OPPNT */
>> operating-points = <
>> /* kHz uV */
>
> I see you also add labels to various other entries...

I think I didn't quite get your point here. Can you please elaborate?

This label has been added because it will be used in thermal zone while
referencing the cooling device.

>
>> @@ -25,9 +25,15 @@
>> };
>> };
>>
>> - bandgap {
>> + thermal-zones{
>
> You may want to include space here.

OK.

>
>> + #include "omap4-cpu-thermal.dtsi"
>> + };
>> +
>> + bandgap: bandgap {
>> reg = <0x4a002260 0x4
>> 0x4a00232C 0x4>;
>> compatible = "ti,omap4430-bandgap";
>> +
>> + #thermal-sensor-cells = <0>;
>> };
>> };
>


--
signature.asc

Eduardo Valentin

unread,
Nov 21, 2013, 10:50:02 AM11/21/13
to
Yeah, I see your point. But I avoided describing cooling state as a
property, as you are requesting, because in fact it is not a property.
Its min and max values are properties, as you can see in the document.
Describing cooling state as a property in this document will confuse
people, because it won't be used in any part of the hardware description.

>
> By example I mean simply listing one or two possible practical meanings,
> like "(e.g. speed level of a cooling fan, performance throttling level of
> CPU)".

Mark R. has already requested this example and I already wrote it. There
is a comment in the CPU example session explaining exactly what you are
asking. Have you missed that one?
Not it is in fact not. These concepts are derived from an specification
that is there for decades actually, ACPI. I am not reinventing the wheel
here. There are several platforms based on that specification. The
concepts have been reused on top of non-ACPI platforms too.

> That's a wild guess, but maybe having an unsigned integer to represent
> trip point "attention level" would be a better idea?

It would be less representative and not standardized. Do you have a
class of boards, or at least a single board that is using the proposed
standard?
So, I still don't understand what is wrong with the binding, as it is
supposed to cover the situations you are talking about. I really don't
see to where this discussion is leading to.

> As an example of this, see PWM, GPIO or IRQ bindings. You don't have
> {pwm-channel,pwm-duty,interrupt,gpio}-{min,max} properties, because this
> is not needed by respective PWM, GPIO or IRQ drivers, as they already
> know these parameters.

Which are completely different devices and concepts.
Because it is not about making using cases to work, but describing the
hardware thermal limits.
signature.asc

Tomasz Figa

unread,
Nov 21, 2013, 11:40:02 AM11/21/13
to
Sorry, I'm not getting your point here. What kind of "property" are you
talking about? Is there anything wrong with my proposed alternative
description?

>
> >
> > By example I mean simply listing one or two possible practical meanings,
> > like "(e.g. speed level of a cooling fan, performance throttling level of
> > CPU)".
>
> Mark R. has already requested this example and I already wrote it. There
> is a comment in the CPU example session explaining exactly what you are
> asking. Have you missed that one?

No, I haven't. But a binding usage example is a separate thing from what
I mean. When you read some text from top to bottom, you would prefer
not to need to jump down and up to understand what you read. That's
why adding one additional sentence, just quickly showing the meaning
of something, is rather justified.
Well, ACPI seems to be designed primarily with PC-class hardware in mind,
such as desktops and notebooks, not small mobile devices, such as mobile
phones and it should not be considered as a reference design, especially
on such hardware, which it was not designed for.

>
> > That's a wild guess, but maybe having an unsigned integer to represent
> > trip point "attention level" would be a better idea?
>
> It would be less representative and not standardized. Do you have a
> class of boards, or at least a single board that is using the proposed
> standard?

Well, I don't have any particular board in mind, but an use case instead.
On a lot of mobile phones, there is also a low temperature trip point
(usually tens of degrees below zero) to shut down the phone when the
temperature falls down below its minimal operating temperature. Although,
one could use "critical" trip point for this, how would you distinguish
hot and cold critical points?

Similarly, there is often a threshold (also below 0C) under which battery
charging process should be disabled.

Does it mean that we also need "cold" and "freezing" trip point types?
The binding is wrong in the aspect that you should not specify in device
tree any information that can be inferred by software, either by using
hardware identity or querying the hardware itself.

>
> > As an example of this, see PWM, GPIO or IRQ bindings. You don't have
> > {pwm-channel,pwm-duty,interrupt,gpio}-{min,max} properties, because this
> > is not needed by respective PWM, GPIO or IRQ drivers, as they already
> > know these parameters.
>
> Which are completely different devices and concepts.

Not in the aspect I'm talking about, especially PWM duty cycle.
I'm not talking about user's use cases here. I mean the use cases that
you mentioned above ("There are case that the designer may want to assign
a range of states to temperature trip points."). This is possible without
those useless (and wrong) min and max properties of cooling devices.

By the way, half of the binding looks to me more like a description of
a single use case of particular board (users may want to have different
trip points, cooling state ranges and polling delays, depending on their
requirements) more than hardware thermal limits. However I'm not
complaining about this, because I believe we agreed on that DT can contain
safe default configuration values as well as pure hardware description,
assuming that these defaults can be changed by the user. Is it also the
case for these thermal bindings?

Eduardo Valentin

unread,
Nov 22, 2013, 7:40:01 AM11/22/13
to

Hello Tomasz,
Again, I simply mean describing cooling state the way you are proposing
may confuse (as in this discussing) that it is a device property, which
is not in this binding.

>
>>
>>>
>>> By example I mean simply listing one or two possible practical meanings,
>>> like "(e.g. speed level of a cooling fan, performance throttling level of
>>> CPU)".
>>
>> Mark R. has already requested this example and I already wrote it. There
>> is a comment in the CPU example session explaining exactly what you are
>> asking. Have you missed that one?
>
> No, I haven't. But a binding usage example is a separate thing from what
> I mean. When you read some text from top to bottom, you would prefer
> not to need to jump down and up to understand what you read. That's
> why adding one additional sentence, just quickly showing the meaning
> of something, is rather justified.

So, it is just a matter of taste on how you want the text organized?
Again, the info is there.
Looks like you missed my point. Let me try, again, to help you
understand. This binding is based on description and modeling that has
worked and discussed already, on different platforms.

Besides, I don't think the discussion is about applicability of ACPI on
mobile device or not. Besides, because it has been designed to a
population of devices it does not imply part of it is not applicable to
other. Just look to how linux ARM based PM support has evolved last
years and you understand what I mean.

>
>>
>>> That's a wild guess, but maybe having an unsigned integer to represent
>>> trip point "attention level" would be a better idea?
>>
>> It would be less representative and not standardized. Do you have a
>> class of boards, or at least a single board that is using the proposed
>> standard?
>
> Well, I don't have any particular board in mind, but an use case instead.

I see. How about if we spend our time on practical cases? Oh I see now,
maybe this is the main reason why this discussion is already taking too
long. Seriously, although I enjoy discussing hypothetical scenarios or
non-existing hardware or hardware from the future, the proposal of this
patch series is, if it is not clear to you: (a) be based on existing
problems (b) make sure we continue the support of existing hardware.
Once more, I am not reinventing the wheel.

Please, bring the hardware specification or board description so we can
be pragmatic. Even if it is not launched you can still describe the
scenario. But please, don't waste our time on some non-existing
problem/scenario.

> On a lot of mobile phones, there is also a low temperature trip point
> (usually tens of degrees below zero) to shut down the phone when the
> temperature falls down below its minimal operating temperature. Although,
> one could use "critical" trip point for this, how would you distinguish
> hot and cold critical points?
>

I think you already provided the answer.

> Similarly, there is often a threshold (also below 0C) under which battery
> charging process should be disabled.
>
> Does it mean that we also need "cold" and "freezing" trip point types?
>

What is the name of the board/phone/tablet/mobile device we are talking
about?
You miss the point that we are talking about thermal limits, not
hardware functionality.



>>
>>> As an example of this, see PWM, GPIO or IRQ bindings. You don't have
>>> {pwm-channel,pwm-duty,interrupt,gpio}-{min,max} properties, because this
>>> is not needed by respective PWM, GPIO or IRQ drivers, as they already
>>> know these parameters.
>>
>> Which are completely different devices and concepts.
>
> Not in the aspect I'm talking about, especially PWM duty cycle.
>

ditto.
No.
signature.asc

Eduardo Valentin

unread,
Nov 22, 2013, 9:40:02 AM11/22/13
to
Hello Jean,


On 19-11-2013 05:39, Jean Delvare wrote:
> Hi Eduardo,
>
> On Mon, 18 Nov 2013 10:27:41 -0400, Eduardo Valentin wrote:
>> The patch series have been looked and contributed by several people and
>> a couple o maintainers now. Starts to be way better than the original
>> RFC. The core idea still holds though.
>
> I have no objection to the core idea, it's great :-)
>

This is good, thanks for the support.
OK.

>> On this specific case, I believe the code
>> duplication is minor, although I haven't measured.
>
> I do agree the duplication in a single driver is minor. However I
> expect more hwmon drivers to be converted to cooperate with the thermal
> subsystem in the future. Duplicated code adds up, in terms of build
> time, storage space and run-time memory consumption. Plus it hurts CPU
> caching even though I don't expect these code paths to be particularly
> hot.
>
> OTOH I suppose that the compiler will inline lm75_reg_to_mc() in this
> specific case, saving one function call. It wouldn't possibly inline
> lm75_read_temp() so it is quite possible that your implementation
> actually performs better than what I am suggesting.
>
> We're speaking of hardly measurable differences anyway. My other point
> below is probably more important...
>

Yeah, but I see your point. You probably want to have first code changes
done properly so that if we have code duplication, it does not spread
like a disease, like usually does. It is unavoidable to base new drivers
on existing ones. So better to get first converted drivers to be done
properly. I like this approach.

>>>>
>>>> static ssize_t set_temp(struct device *dev, struct device_attribute *da,
>>>> @@ -271,6 +288,13 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
>>>> goto exit_remove;
>>>> }
>>>>
>>>> + data->tz = thermal_zone_of_sensor_register(&client->dev,
>>>> + 0,
>>>> + &client->dev,
>>>> + lm75_read_temp, NULL);
>>>
>>> I am also worried by this interface. Basically a separate callback
>>> function (here lm75_read_temp) is needed for every thermal input. Some
>>> hwmon drivers have many of these! This will result in duplicate code
>>
>> The code registering a callback can actually have a private data, in
>> case you need to differentiate between inputs or in case you need to
>> derive specific data in order to access the correct input.
>
> Are you talking about void *dev? OK, you can virtually pass anything in
> that pointer, but...

Yes.

>
>>> again. If you could just pass a sensor ID as an extra parameter to
>>> lm75_read_temp, you could use the same callback for all sensors. This
>>
>> Wouldn't the private pointer be enough?
>
> I am worried that this won't be practical at all for the real world
> cases. I expect that most drivers will need to pass the device plus one
> index-like value. Having a single void pointer for that is certainly
> flexible but it will also be inefficient, as every driver will have to
> define its own custom structure to embed the needed information. You

I see. Check patch 08/20 of this series. The driver had already
sensor/input id embedded in its own driver data. So, it was just a
matter of passing the complete driver data and then fetching id from the
passed driver data. Then again, that was only one case, if all other
potential API user drivers need to be modified to create their own
custom structure just for passing id, I definitely agree that it is
better to change the proposed API.

> _do_ pass a sensor_id to thermal_zone_of_sensor_register() so it
> shouldn't be hard to pass it to the callback function(s) as well
> (assuming the indexing in the device tree matches the one done by the
> driver - I certainly hope so.) I'd even suggest passing the device

Yes the ids from DT and driver layer must match.

> unconditionally too, as I just can't see how a driver could do without
> it. If you pass the right parameters then you may not even need a void
> pointer.

OK. So I assume you are proposing something like:
.get_temp(struct device *dev, int id, long *temp);

right?




>
>> I think it is a matter of checking what is the best way for the
>> candidates of users of this new API.
>
> I agree. But two drivers are probably too few to make an educated
> decision, I suppose we'll need to convert some more to get a clearer

Yeah, I agree. The driver I chose were obviously based on the needs I
have to support the boards I have available/access.


> view. Note that most things can be changed later if needed, so my
> comments aren't meant to block the integration of your patches.

OK. I agree that this patch series maybe does not take into
consideration the complete driver population. However, I must agree that
driver migration can be done latter. Actually on both sides that is
required, from thermal and from hwmon.

I was already prompted about the strategy on this subject. My plan is
still to settle first the DT bindings, then the layers and APIs, then
add support on driver side. In order to show that the proposal works on
all fronts, I picked a sample of drivers to make things working on
existing boards, like DRA7-evm, panda 4430 and 4460, OMAP5432-uevm,
where we have both SoC sensors and board sensors.

DT bindings seams to be the most hard part, as there is no well
settle/defined standard to follow. The closest we can have is ACPI,
which is not applicable fully, otherwise we wouldn't be having this
discussion. However, I believe now the binding is more or less defined.
But I didn't get an official ack from DT folks though.

In the end of the day, the APIs and driver support will be really well
defined when we convert most of the drivers. But I would like to this in
a separated patch series.

What is your view? Are you OK if we integrate this series as a first
step of the complete process?

>
>> In theory it is enough to hold the
>> ID on the data structure you pass through the void * parameter. However,
>> that would cause less impact if you have the users of the API already
>> structured in that way. Let me know what is best for your case.
>
> We do not have that structure (struct device *, int index) ready yet,
> so we'd have to add it to every driver with more than one input. We
> could have a generic structure for that in some header file, to avoid
> the duplicated work, but if we need to do that, I'd say this is a good
> hint that we settled for the wrong API in the first place.

I see. I think we need to take into account the driver needs while
converting them. Assuming you are seeing a need to have these
parameters, I would guess it is a good advice, based on the fact that
you know better the existing hwmon drivers.

>
>>> (...)
>>> I also note the lack of support for limits. Thermal zones can have
>>> limits, and the various hwmon drivers do support that, yet your
>>> interface offers no possibility to expose them. Wouldn't that be useful?
>
> No comment on that? I'd expect the get_temp() callback to take an extra
> parameter so that it can return the value of a specific subfeature, for
> example INPUT, HOT, CRITICAL etc. Or leave get_temp() as is but add a
> get_trip_temp() callback to retrieve limit information (not sure if
> thermal zones can have more than one limit?)
>
> I think thermal zones support hysteresis as well, and so do many hwmon
> drivers. It seems unfortunate that your proposed "bridge" between the
> hwmon and thermal subsystems doesn't map all the features which are
> supported on both sides.

Yeah, it is back to the point that I am taking small steps here.

The hysteresis, and other properties, like update rate, or limits, in
fact need to be propagated down to driver layer. This way, hardware
features like interrupts and counters can be properly configured. So
far, the code now handles this in the DT glue layer, which means, the
thermal framework will be polling the hardware.

>
>>>> + if (IS_ERR(data->tz))
>>>> + data->tz = NULL;
>>>
>>> If you are doing that in all drivers, I would question the point of
>>> having thermal_zone_of_sensor_register() return a PTR_ERR in the first
>>> place.
>>
>> Yeah, maybe it should simply return NULL in the fail path and drivers
>> would not need to bother about it. What do you think?
>
> I think exactly what I wrote above, no more no less. I don't think I
> can add anything, and the decision is up to you.
>

OK.

Thanks for the good feedback.
signature.asc

Jean Delvare

unread,
Nov 23, 2013, 1:40:01 PM11/23/13
to
On Fri, 22 Nov 2013 10:37:04 -0400, Eduardo Valentin wrote:
> Yeah, but I see your point. You probably want to have first code changes
> done properly so that if we have code duplication, it does not spread
> like a disease, like usually does. It is unavoidable to base new drivers
> on existing ones. So better to get first converted drivers to be done
> properly. I like this approach.

Not only that, but also getting things right (as much as we are able
to) at first avoids having to touch 20 already converted drivers when
you attempt to convert the 21st one and it doesn't fit in the API.

> (...)
> OK. So I assume you are proposing something like:
> .get_temp(struct device *dev, int id, long *temp);
>
> right?

Yes.

> (...)
> In the end of the day, the APIs and driver support will be really well
> defined when we convert most of the drivers. But I would like to this in
> a separated patch series.
>
> What is your view? Are you OK if we integrate this series as a first
> step of the complete process?

Yes we are.

> (...)
> Thanks for the good feedback.

You're welcome.

Mark Rutland

unread,
Nov 25, 2013, 10:20:02 AM11/25/13
to
[...]
I'm happy with having these as separate properties.

[...]

> > >>>> +
> > >>>> +- type: a string containing the trip type. Supported values are:
> > >>>> + "active": A trip point to enable active cooling
> > >>>> + "passive": A trip point to enable passive cooling
> > >>>
> > >>> The two above seem to be implying action, as opposed to the general
> > >>> comment about trip points.
> > >>
> > >> They do not imply action, they specify type of trip.
> > >
> > > For me "A trip point to enable active cooling" means that when this trip
> > > point is crossed, active cooling must be enabled. What is it if not
> > > implying action?
> >
> > But within a board there could be more than one active cooling actions
> > that could be done, it is not a 1 to 1 relation. Same thing applies to
> > passive cooling. The binding does not imply a specific action. Just the
> > trip type.
>
> I'd prefer the "active" and "passive" states to be renamed an their
> descriptions rephrased. In general, the idea of having just four trip
> point types seems like bound to a single specific hardware platform.
>
> That's a wild guess, but maybe having an unsigned integer to represent
> trip point "attention level" would be a better idea?

The set of names can be extended in future, and we can choose to have an
attention-level property in future, and map the existing types to them.
If we don't have something fundamantally better, I think this should be
OK for now.
This is arguable. The {min,max} levels associated with a cooling state
is essentially a recommendation of a sane set of values you might want
to use the cooling device with.

It could work using only the cooling device's information, but it might
not be all that useful. The range might not represent a linear scale,
and there might be secondary effects to consider (e.g. noise) that would
affect the decision and would likely need to be considered by the
particular drivers.

The key question is where do we make the trade off? How much platform
knowledge do we want each driver to have to handle, and how much are we
happy to place in DT?

It is possible to use the information from particular devices and ignore
the {min,max} cooling state properties in future if that turns out to be
better. I think that would be reasonable for now.

Thanks,
Mark.

Mark Rutland

unread,
Nov 25, 2013, 10:40:01 AM11/25/13
to
I think all that Tomasz is arguing for is an inline definition of what a
"cooling state" is. I don't think that will lead to that much confusion.

How about something like:

Any cooling device has a range of cooling states (i.e. different levels
of heat dissipation). For example a fan's cooling states correspond to
the different fan speeds possible. Cooling states are referred to by
single unsigned integers, where larger numbers mean greate heat
dissipation. The precise set of cooling states associated with a device
(as referred to be the cooling-min-state and cooling-max-state
properties) should be defined in a particular device's binding.


>
> >
> >>
> >>>
> >>> By example I mean simply listing one or two possible practical meanings,
> >>> like "(e.g. speed level of a cooling fan, performance throttling level of
> >>> CPU)".
> >>
> >> Mark R. has already requested this example and I already wrote it. There
> >> is a comment in the CPU example session explaining exactly what you are
> >> asking. Have you missed that one?
> >
> > No, I haven't. But a binding usage example is a separate thing from what
> > I mean. When you read some text from top to bottom, you would prefer
> > not to need to jump down and up to understand what you read. That's
> > why adding one additional sentence, just quickly showing the meaning
> > of something, is rather justified.
>
> So, it is just a matter of taste on how you want the text organized?
> Again, the info is there.

I think a brief inline description would be helpful. I am happy for it
to be in a later amendment.

[...]

> > On a lot of mobile phones, there is also a low temperature trip point
> > (usually tens of degrees below zero) to shut down the phone when the
> > temperature falls down below its minimal operating temperature. Although,
> > one could use "critical" trip point for this, how would you distinguish
> > hot and cold critical points?
> >
>
> I think you already provided the answer.
>
> > Similarly, there is often a threshold (also below 0C) under which battery
> > charging process should be disabled.
> >
> > Does it mean that we also need "cold" and "freezing" trip point types?
> >
>
> What is the name of the board/phone/tablet/mobile device we are talking
> about?

I think if these are needed the set of types can be extended, no?
It's not quite a probable property though. It's more of a
system-specific recommendation. A system intended for use as an
always-on HTPC might recommend always having a quiet fan on at a low
speed rather than suddenly bringing it up at high speed when the board
gets hot (and making a lot of noise that spoils the experience).

I think the question is more is this appropriate for DT? I can't see a
better place to put it without having board files. It's also worth
noting that we can add the appropriate knobs to override this if
preferred.
I would expect that at some point someone is going to want to mess with
these values, and I they should be allowed to (within reason). While
it's likely you can under-cool a device, it's not so likely that you can
over-cool it...

However, I think that can change in future.

Thanks,
Mark.

Eduardo Valentin

unread,
Nov 25, 2013, 10:40:02 AM11/25/13
to
I agree that we surely can rip off unused properties if they turn out to
be not helping at all. On the min/max specific case, I do have board
that investigation (by experimentation on real board and simulation)
showed that, for instance, allowing maximum power delta can cause burst
on temperature, and those bursts are hard to detect on higher levels of
temperature domain. Well, it turns out that this is essentially how
temperature behaves. Obviously, behavior on different boards is also
different, or at least produce different bursts. It would require
drivers to have board knowledge in order to properly report this.

As it has been already discussed during the development of this work,
one can also develop a PID controller to minimize the board dependency.
However, the board dependency can only be minimized, not avoided, as not
all board have needed sensors, say power, current sensor, or not enough
temperature sensors, and power estimation based only on cpu load is
known to not be strongly correlated to overall board power dissipation.

The proposal of using min max on trip point is not coming from nowhere.
>
> Thanks,
> Mark.
signature.asc

Mark Rutland

unread,
Nov 25, 2013, 10:40:02 AM11/25/13
to
On Tue, Nov 12, 2013 at 07:46:04PM +0000, Eduardo Valentin wrote:
> This patch introduces a device tree bindings for
> describing the hardware thermal behavior and limits.
> Also a parser to read and interpret the data and feed
> it in the thermal framework is presented.
>
> This patch introduces a thermal data parser for device
> tree. The parsed data is used to build thermal zones
> and thermal binding parameters. The output data
> can then be used to deploy thermal policies.
>
> This patch adds also documentation regarding this
> API and how to define tree nodes to use
> this infrastructure.
>
> Note that, in order to be able to have control
> on the sensor registration on the DT thermal zone,
> it was required to allow changing the thermal zone
> .get_temp callback. For this reason, this patch
> also removes the 'const' modifier from the .ops
> field of thermal zone devices.
>
> Cc: Zhang Rui <rui....@intel.com>
> Cc: linu...@vger.kernel.org
> Cc: linux-...@vger.kernel.org
> Cc: Mark Rutland <mark.r...@arm.com>
> Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
> ---
>
> Hello all,
>
> Very minor changes from v8 to v9.
>
> Changelog:
>
> - Rephrase a couple of sentences in the binding document
> - Fixed a couple of property types in the binding document
> - Removed the constant macro definitions for trip type. This
> change also affected the bindings posted on patches 09/14/15.
>
> .../devicetree/bindings/thermal/thermal.txt | 586 ++++++++++++++
> drivers/thermal/Kconfig | 13 +
> drivers/thermal/Makefile | 1 +
> drivers/thermal/of-thermal.c | 849 +++++++++++++++++++++
> drivers/thermal/thermal_core.c | 9 +-
> drivers/thermal/thermal_core.h | 9 +
> include/dt-bindings/thermal/thermal.h | 17 +
> include/linux/thermal.h | 28 +-
> 8 files changed, 1509 insertions(+), 3 deletions(-)
> create mode 100644 Documentation/devicetree/bindings/thermal/thermal.txt
> create mode 100644 drivers/thermal/of-thermal.c
> create mode 100644 include/dt-bindings/thermal/thermal.h
>
> diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt
> new file mode 100644
> index 0000000..59f5bd2
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/thermal/thermal.txt
> @@ -0,0 +1,586 @@
> +* Thermal Framework Device Tree descriptor
> +
> +This file describes a generic binding to provide a way of
> +defining hardware thermal structure using device tree.
> +A thermal structure includes thermal zones and their components,
> +such as trip points, polling intervals, sensors and cooling devices
> +binding descriptors.
> +
> +The target of device tree thermal descriptors is to describe only
> +the hardware thermal aspects. The thermal device tree bindings are
> +not about how the system must control or which algorithm or policy
> +must be taken in place.
> +
> +There are five types of nodes involved to describe thermal bindings:
> +- thermal sensors: devices which may be used to take temperature
> + measurements.
> +- cooling devices: devices which may be used to dissipate heat.
> +- trip points: describe key temperatures at which cooling is recommended. The
> + set of points should be chosen based on hardware limits.
> +- cooling maps: used to describe links between trip points and cooling devices;
> +- thermal zones: used to describe thermal data within the hardware;
> +
> +The following is a description of each of these node types.
> +
> +* Thermal sensor devices
> +
> +Thermal sensor devices are nodes providing temperature sensing capabilities on
> +thermal zones. Typical devices are I2C ADC converters and bandgaps. These are
> +nodes providing temperature data to thermal zones. Thermal sensor devices may
> +control one or more internal sensors.
> +
> +Required property:
> +- #thermal-sensor-cells: Used to provide sensor device specific information
> + Type: unsigned while referring to it. Typically 0 on thermal sensor
> + Size: one cell nodes with only one sensor, and at least 1 on nodes
> + with several internal sensors, in order
> + to identify uniquely the sensor instances within
> + the IC. See thermal zone binding for more details
> + on how consumers refer to sensor devices.
> +
> +* Cooling device nodes
> +
> +Cooling devices are nodes providing control on power dissipation. There
> +are essentially two ways to provide control on power dissipation. First
> +is by means of regulating device performance, which is known as passive
> +cooling. A typical passive cooling is a CPU that has dynamic voltage and
> +frequency scaling (DVFS), and uses lower frequencies as cooling states.
> +Second is by means of activating devices in order to remove
> +the dissipated heat, which is known as active cooling, e.g. regulating
> +fan speeds. In both cases, cooling devices shall have a way to determine
> +the state of cooling in which the device is.
> +
> +Required properties:
> +- cooling-min-state: An integer indicating the smallest
> + Type: unsigned cooling state accepted. Typically 0.
> + Size: one cell
> +
> +- cooling-max-state: An integer indicating the largest
> + Type: unsigned cooling state accepted.
> + Size: one cell
> +
> +- #cooling-cells: Used to provide cooling device specific information
> + Type: unsigned while referring to it. Must be at least 2, in order
> + Size: one cell to specify minimum and maximum cooling state used
> + in the reference. The first cell is the minimum
> + cooling state requested and the second cell is
> + the maximum cooling state requested in the reference.
> + See Cooling device maps section below for more details
> + on how consumers refer to cooling devices.

This is the point I'm still most concerned with, as it provides a
definite meaning to an otherwise abstract property. However, if you
believe this is sane and unlikely to be problematic, then I am happy to
leave the decision to you.

> +
> +* Trip points
> +
> +The trip node is a node to describe a point in the temperature domain
> +in which the system takes an action. This node describes just the point,
> +not the action.
> +
> +Required properties:
> +- temperature: An integer indicating the trip temperature level,
> + Type: signed in millicelsius.
> + Size: one cell
> +
> +- hysteresis: A low hysteresis value on temperature property (above).
> + Type: unsigned This is a relative value, in millicelsius.
> + Size: one cell
> +
> +- type: a string containing the trip type. Supported values are:

Get rid of the "Supported", that's a Linux detail.

s/Supported/expected/ ?

Given the discussion has otherwise boiled down to bits that I think can
change later, I'm happy to give my ack:

Acked-by: Mark Rutland <mark.r...@arm.com>

I'll leave it to the others to fight against this if they still have
concerns.

Eduardo Valentin

unread,
Nov 25, 2013, 10:50:02 AM11/25/13
to
On 25-11-2013 11:31, Mark Rutland wrote:
> Any cooling device has a range of cooling states (i.e. different levels
> of heat dissipation). For example a fan's cooling states correspond to
> the different fan speeds possible. Cooling states are referred to by
> single unsigned integers, where larger numbers mean greate heat
> dissipation. The precise set of cooling states associated with a device
> (as referred to be the cooling-min-state and cooling-max-state
> properties) should be defined in a particular device's binding.

OK. Added this to the binding document.
signature.asc

Eduardo Valentin

unread,
Nov 25, 2013, 10:50:02 AM11/25/13
to
On 25-11-2013 11:31, Mark Rutland wrote:
> I think if these are needed the set of types can be extended, no?
>

As long as we have a real board with real use cases to be covered, yes,
why not.
signature.asc

Eduardo Valentin

unread,
Nov 25, 2013, 10:50:03 AM11/25/13
to
OK.

>
> Given the discussion has otherwise boiled down to bits that I think can
> change later, I'm happy to give my ack:
>
> Acked-by: Mark Rutland <mark.r...@arm.com>
>

Thanks for your valuable contributions to this work. For sure it would
not be at the state it is without your punctual and objective review.

> I'll leave it to the others to fight against this if they still have
> concerns.
>
> Thanks,
> Mark.
>
>


--
signature.asc

Wei Ni

unread,
Dec 31, 2013, 5:20:01 AM12/31/13
to
On 11/13/2013 03:46 AM, Eduardo Valentin wrote:
> This patch introduces a device tree bindings for
> describing the hardware thermal behavior and limits.
> Also a parser to read and interpret the data and feed
> it in the thermal framework is presented.
>
> This patch introduces a thermal data parser for device
> tree. The parsed data is used to build thermal zones
> and thermal binding parameters. The output data
> can then be used to deploy thermal policies.
>
> This patch adds also documentation regarding this
> API and how to define tree nodes to use
> this infrastructure.
>
> Note that, in order to be able to have control
> on the sensor registration on the DT thermal zone,
> it was required to allow changing the thermal zone
> .get_temp callback. For this reason, this patch
> also removes the 'const' modifier from the .ops
> field of thermal zone devices.
>
> ...
> +
> +static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
> + enum thermal_trend *trend)
> +{
> + struct __thermal_zone *data = tz->devdata;
> + long dev_trend;
> + int r;
> +
> + if (!data->get_trend)
> + return -EINVAL;
> +
> + r = data->get_trend(data->sensor_data, &dev_trend);
I think the ->get_trend should be defined as:
.get_trend(*dev, int, *long)
so that the "trip" can be passed into it, otherwise the "trip" can't be
used.
And the dev_trend should be returned as THERMAL_TREND_XXX directly.
because the THERM_TREND_xx is more than three states.

The code may be something like:
r = data->get_trend(data->sensor_data, trip, &dev_trend);
if (r)
return r;
*trend = dev_trend;
return 0;
> + if (r)
> + return r;
> +
> + /* TODO: These intervals might have some thresholds, but in core code */
> + if (dev_trend > 0)
> + *trend = THERMAL_TREND_RAISING;
> + else if (dev_trend < 0)
> + *trend = THERMAL_TREND_DROPPING;
> + else
> + *trend = THERMAL_TREND_STABLE;
> +
> + return 0;
> +}
> +
> .....
> +
> +/*** sensor API ***/
> +
> +static struct thermal_zone_device *
> +thermal_zone_of_add_sensor(struct device_node *zone,
> + struct device_node *sensor, void *data,
> + int (*get_temp)(void *, long *),
> + int (*get_trend)(void *, long *))
> +{
> + struct thermal_zone_device *tzd;
> + struct __thermal_zone *tz;
> +
> + tzd = thermal_zone_get_zone_by_name(zone->name);
I think we could get the thermal zone by node,
something like:
thermal_zone_get_zone_by_node(zone);
so that it can get unique zone.

I think we can add a member "struct device_node *np" in the
thermal_zone_device,
and set it after you call thermal_zone_device_register successfully.
Then add following codes:
thermal_zone_get_zone_by_node(struct device_node *node)
{
struct thermal_zone_device *pos = NULL, *ref = ERR_PTR(-ENODEV);
bool found = false;

if (!node)
return ERR_PTR(-EINVAL);

mutex_lock(&thermal_list_lock);
list_for_each_entry(pos, &thermal_tz_list, node)
if (node == pos->np) {
ref = pos;
found = true;
break;
}
mutex_unlock(&thermal_list_lock);

return ref;
}

Thanks.
Wei.
> + if (IS_ERR(tzd))
> + return ERR_PTR(-EPROBE_DEFER);
> +
> + tz = tzd->devdata;
> +
> + mutex_lock(&tzd->lock);
> + tz->get_temp = get_temp;
> + tz->get_trend = get_trend;
> + tz->sensor_data = data;
> +
> + tzd->ops->get_temp = of_thermal_get_temp;
> + tzd->ops->get_trend = of_thermal_get_trend;
> + mutex_unlock(&tzd->lock);
> +
> + return tzd;
> +}
> +

Wei Ni

unread,
Jan 1, 2014, 10:00:01 PM1/1/14
to
On 11/13/2013 03:46 AM, Eduardo Valentin wrote:
> ....
> +
> +/**
> + * of_parse_thermal_zones - parse device tree thermal data
> + *
> + * Initialization function that can be called by machine initialization
> + * code to parse thermal data and populate the thermal framework
> + * with hardware thermal zones info. This function only parses thermal zones.
> + * Cooling devices and sensor devices nodes are supposed to be parsed
> + * by their respective drivers.
> + *
> + * Return: 0 on success, proper error code otherwise
> + *
> + */
> +int __init of_parse_thermal_zones(void)
> +{
> + struct device_node *np, *child;
> + struct __thermal_zone *tz;
> + struct thermal_zone_device_ops *ops;
> +
> + np = of_find_node_by_name(NULL, "thermal-zones");
> + if (!np) {
> + pr_debug("unable to find thermal zones\n");
> + return 0; /* Run successfully on systems without thermal DT */
> + }
> +
> + for_each_child_of_node(np, child) {
> + struct thermal_zone_device *zone;
> + struct thermal_zone_params *tzp;
> +
> + tz = thermal_of_build_thermal_zone(child);
> + if (IS_ERR(tz)) {
> + pr_err("failed to build thermal zone %s: %ld\n",
> + child->name,
> + PTR_ERR(tz));
> + continue;
> + }
> +
> + ops = kmemdup(&of_thermal_ops, sizeof(*ops), GFP_KERNEL);
> + if (!ops)
> + goto exit_free;
> +
> + tzp = kzalloc(sizeof(*tzp), GFP_KERNEL);
> + if (!tzp) {
> + kfree(ops);
> + goto exit_free;
> + }
> +
> + /* No hwmon because there might be hwmon drivers registering */
> + tzp->no_hwmon = true;

I think the platform driver may set the governor for the thermal zone,
so how about to add a property named as "governor",
and parse it to tzp->governor_name ?
something like:
ret = of_property_read_string(child, "governor", &str);
if (ret == 0)
if (strlen(str) < THERMAL_NAME_LENGTH)
strcpy(tzp->governor_name, str);

Thanks.
Wei.

> +
> + zone = thermal_zone_device_register(child->name, tz->ntrips,
> + 0, tz,
> + ops, tzp,
> + tz->passive_delay,
> + tz->polling_delay);
> + if (IS_ERR(zone)) {
> + pr_err("Failed to build %s zone %ld\n", child->name,
> + PTR_ERR(zone));
> + kfree(tzp);
> + kfree(ops);
> + of_thermal_free_zone(tz);
> + /* attempting to build remaining zones still */
> + }
> + }
> +
> + return 0;
> +
> +exit_free:
> + of_thermal_free_zone(tz);
> +
> + /* no memory available, so free what we have built */
> + of_thermal_destroy_zones();
> +
> + return -ENOMEM;

Wei Ni

unread,
Jan 1, 2014, 10:00:01 PM1/1/14
to
On 11/13/2013 03:46 AM, Eduardo Valentin wrote:
> ...
I think the platform driver may set governor for the thermal zone,
so how about to add a property named as "governor",
and parse it to tzp->governor_name,
> +/**
> + * of_thermal_destroy_zones - remove all zones parsed and allocated resources
> + *
> + * Finds all zones parsed and added to the thermal framework and remove them
> + * from the system, together with their resources.
> + *
> + */
> +void __exit of_thermal_destroy_zones(void)
> +{
> + struct device_node *np, *child;
> +
> + np = of_find_node_by_name(NULL, "thermal-zones");
> + if (!np) {
> + pr_err("unable to find thermal zones\n");
> + return;
> + }
> +
> + for_each_child_of_node(np, child) {
> + struct thermal_zone_device *zone;
> +
> + zone = thermal_zone_get_zone_by_name(child->name);
> + if (IS_ERR(zone))
> + continue;
> +
> + thermal_zone_device_unregister(zone);
> + kfree(zone->tzp);
> + kfree(zone->ops);
> + of_thermal_free_zone(zone->devdata);
> + }
> +}
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index f4c9021..aba68dc 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -1373,7 +1373,7 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)
> */
> struct thermal_zone_device *thermal_zone_device_register(const char *type,
> int trips, int mask, void *devdata,
> - const struct thermal_zone_device_ops *ops,
> + struct thermal_zone_device_ops *ops,
> const struct thermal_zone_params *tzp,
> int passive_delay, int polling_delay)
> {
> @@ -1753,8 +1753,14 @@ static int __init thermal_init(void)
> if (result)
> goto unregister_class;
>
> + result = of_parse_thermal_zones();
> + if (result)
> + goto exit_netlink;
> +
> return 0;
>
> +exit_netlink:
> + genetlink_exit();
> unregister_governors:
> thermal_unregister_governors();
> unregister_class:
> @@ -1770,6 +1776,7 @@ error:
>
> static void __exit thermal_exit(void)
> {
> + of_thermal_destroy_zones();
> genetlink_exit();
> class_unregister(&thermal_class);
> thermal_unregister_governors();
> diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
> index 7cf2f66..3db339f 100644
> --- a/drivers/thermal/thermal_core.h
> +++ b/drivers/thermal/thermal_core.h
> @@ -77,4 +77,13 @@ static inline int thermal_gov_user_space_register(void) { return 0; }
> static inline void thermal_gov_user_space_unregister(void) {}
> #endif /* CONFIG_THERMAL_GOV_USER_SPACE */
>
> +/* device tree support */
> +#ifdef CONFIG_THERMAL_OF
> +int of_parse_thermal_zones(void);
> +void of_thermal_destroy_zones(void);
> +#else
> +static inline int of_parse_thermal_zones(void) { return 0; }
> +static inline void of_thermal_destroy_zones(void) { }
> +#endif
> +
> #endif /* __THERMAL_CORE_H__ */
> diff --git a/include/dt-bindings/thermal/thermal.h b/include/dt-bindings/thermal/thermal.h
> new file mode 100644
> index 0000000..59822a9
> --- /dev/null
> +++ b/include/dt-bindings/thermal/thermal.h
> @@ -0,0 +1,17 @@
> +/*
> + * This header provides constants for most thermal bindings.
> + *
> + * Copyright (C) 2013 Texas Instruments
> + * Eduardo Valentin <eduardo....@ti.com>
> + *
> + * GPLv2 only
> + */
> +
> +#ifndef _DT_BINDINGS_THERMAL_THERMAL_H
> +#define _DT_BINDINGS_THERMAL_THERMAL_H
> +
> +/* On cooling devices upper and lower limits */
> +#define THERMAL_NO_LIMIT (-1UL)
> +
> +#endif
> +
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index b268d3c..b780c5b 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -143,6 +143,7 @@ struct thermal_cooling_device {
> int id;
> char type[THERMAL_NAME_LENGTH];
> struct device device;
> + struct device_node *np;
> void *devdata;
> const struct thermal_cooling_device_ops *ops;
> bool updated; /* true if the cooling device does not need update */
> @@ -172,7 +173,7 @@ struct thermal_zone_device {
> int emul_temperature;
> int passive;
> unsigned int forced_passive;
> - const struct thermal_zone_device_ops *ops;
> + struct thermal_zone_device_ops *ops;
> const struct thermal_zone_params *tzp;
> struct thermal_governor *governor;
> struct list_head thermal_instances;
> @@ -242,8 +243,31 @@ struct thermal_genl_event {
> };
>
> /* Function declarations */
> +#ifdef CONFIG_THERMAL_OF
> +struct thermal_zone_device *
> +thermal_zone_of_sensor_register(struct device *dev, int id,
> + void *data, int (*get_temp)(void *, long *),
> + int (*get_trend)(void *, long *));
> +void thermal_zone_of_sensor_unregister(struct device *dev,
> + struct thermal_zone_device *tz);
> +#else
> +static inline struct thermal_zone_device *
> +thermal_zone_of_sensor_register(struct device *dev, int id,
> + void *data, int (*get_temp)(void *, long *),
> + int (*get_trend)(void *, long *))
> +{
> + return NULL;
> +}
> +
> +static inline
> +void thermal_zone_of_sensor_unregister(struct device *dev,
> + struct thermal_zone_device *tz)
> +{
> +}
> +
> +#endif
> struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
> - void *, const struct thermal_zone_device_ops *,
> + void *, struct thermal_zone_device_ops *,
> const struct thermal_zone_params *, int, int);
> void thermal_zone_device_unregister(struct thermal_zone_device *);

Wei Ni

unread,
Jan 1, 2014, 10:10:02 PM1/1/14
to
Sorry, please ignore this mail.
This is not my regularly mail address.

Thanks.
Wei.

Matthew Longnecker

unread,
Jan 2, 2014, 12:40:02 PM1/2/14
to
Eduardo,

For the most part, this binding is really well thought out. It makes a
lot of sense to me (as someone who has been working with thermal
management in Linux/Android-based mobile devices for a few years).

However, I have one substantive criticism.

On 11/12/2013 11:46 AM, Eduardo Valentin wrote:
> +* Thermal zone nodes
> +
> +The thermal zone node is the node containing all the required info
> +for describing a thermal zone, including its cooling device bindings. The
> +thermal zone node must contain, apart from its own properties, one sub-node
> +containing trip nodes and one sub-node containing all the zone cooling maps.
> +
> +Required properties:
...
> +- thermal-sensors: A list of thermal sensor phandles and sensor specifier
> + Type: list of used while monitoring the thermal zone.
> + phandles + sensor
> + specifier
...
> +Optional property:
> +- coefficients: An array of integers (one signed cell) containing
> + Type: array coefficients to compose a linear relation between
> + Elem size: one cell the sensors listed in the thermal-sensors property.
> + Elem type: signed Coefficients defaults to 1, in case this property
> + is not specified. A simple linear polynomial is used:
> + Z = c0 * x0 + c1 + x1 + ... + c(n-1) * x(n-1) + cn.
> +
> + The coefficients are ordered and they match with sensors
> + by means of sensor ID. Additional coefficients are
> + interpreted as constant offset.


"coefficients" is a problematic way of describing the relationship
between temperatures at various sensors and temperature at some other
location. It would make sense if heat flowed infinitely quickly.
However, in practice thermal capacitance means that we need to take into
account the _history_ of temperature at sensors in order to predict heat
coupled into a distant point.

For example, assuming that handset enclosure starts at ~25C, the CPU
could burst to 100C for many minutes before the handset enclosure
reaches ~40C. However, at steady-state, the CPU might only be able to
sustain 65C without pushing the enclosure above 40C.

I wouldn't be complaining except that you're proposing this as a DT
definition. In this case, the binding you've proposed is poor
abstraction of the hardware.

thanks,
Matt Longnecker

p.s. I apologize for chiming in without having read the entire history
of the patch set. Engineers on my team will be trying this out for Tegra
within the next few weeks.

Matthew Longnecker

unread,
Jan 2, 2014, 1:00:03 PM1/2/14
to

> I think the platform driver may set governor for the thermal zone,
> so how about to add a property named as "governor",
> and parse it to tzp->governor_name,
> something like:
> ret = of_property_read_string(child, "governor", &str);
> if (ret == 0)
> if (strlen(str) < THERMAL_NAME_LENGTH)
> strcpy(tzp->governor_name, str);
>
> Thanks.
> Wei.

DT is supposed to describe the hardware, right? The governor isn't
hardware -- it's a software control policy. On the other hand, that
control policy must be tuned according to the behaviors of the platform
hardware otherwise the system will be unstable.

Is it appropriate to be naming the governor in DT? If so, is it equally
appropriate to describe any governor-specific parameters in DT (even
though they are pure software constructs)?

-Matt

Mark Rutland

unread,
Jan 6, 2014, 9:00:02 AM1/6/14
to
On Thu, Jan 02, 2014 at 05:50:06PM +0000, Matthew Longnecker wrote:
>
> > I think the platform driver may set governor for the thermal zone,
> > so how about to add a property named as "governor",
> > and parse it to tzp->governor_name,
> > something like:
> > ret = of_property_read_string(child, "governor", &str);
> > if (ret == 0)
> > if (strlen(str) < THERMAL_NAME_LENGTH)
> > strcpy(tzp->governor_name, str);
> >
> > Thanks.
> > Wei.
>
> DT is supposed to describe the hardware, right? The governor isn't
> hardware -- it's a software control policy. On the other hand, that
> control policy must be tuned according to the behaviors of the platform
> hardware otherwise the system will be unstable.
>
> Is it appropriate to be naming the governor in DT? If so, is it equally
> appropriate to describe any governor-specific parameters in DT (even
> though they are pure software constructs)?

The dt should be relatively static -- if the hardware doesn't change the
dt shouldn't have to.

The governers are not static. We can introduce new ones and throw away
old ones at any time. Tuning parameters can also change at any time.

I'd prefer to not have governer details described in the dt, and the
choice of governer and configuration of its tuning parameters should be
made at runtime somehow.

Thanks,
Mark.

Eduardo Valentin

unread,
Jan 6, 2014, 10:00:02 AM1/6/14
to
Agreed.
signature.asc

Eduardo Valentin

unread,
Jan 6, 2014, 2:00:02 PM1/6/14
to
On 02-01-2014 13:35, Matthew Longnecker wrote:
> Eduardo,

Hello Matthew,

>
> For the most part, this binding is really well thought out. It makes a
> lot of sense to me (as someone who has been working with thermal
> management in Linux/Android-based mobile devices for a few years).

No issues, I also have been there.
I agree. But coefficients are targeting in fact the steady state cases.
As described in the DT binding documentation, the case of an offset due
to sensor location distance to hotspot is a perfect usage of this property.

The thermal capacitance behavior is described, so far, most based on the
requested time requirement for each zone. Of course, this point was a
major point for discussion during this thread. Hopefully I was able to
keep the minimal time behavior requirements in the DT definition.

>
> For example, assuming that handset enclosure starts at ~25C, the CPU
> could burst to 100C for many minutes before the handset enclosure
> reaches ~40C. However, at steady-state, the CPU might only be able to
> sustain 65C without pushing the enclosure above 40C.


Yeah, but here you are maybe confusing two different constraints, and
possibly the behavior of two different zones. The current binding do not
limit you in the usage of hierarchical description. Thus, you can and
should describe two different zones to cover for two different
constraints you have in your description, one to cover for your silicon
junction (>100C) and another to cover for your case / device skin
hotspot (<45C). And in each zone you may have different limits and
thermal capacitance requirements. Pay attention that it does not mean
that you CPU (cooling) device cannot appear in two different zone. Thus
its contribution to one zone may be different to the other (each zone
can assume different coefficients).

>
> I wouldn't be complaining except that you're proposing this as a DT
> definition. In this case, the binding you've proposed is poor
> abstraction of the hardware.

OK. The main reason I made this property optional is exactly because
different use case may require different usage of these relations. And
possibly people may have different experience and different solutions to
the same problem. But in the end what we want is to describe the
hardware (and possibly the physics) behind the use cases. And I do
believe we have been dealing with very similar cases here.

I don't think the property is a poor abstraction. Firstly because it is
not limiting you in anything. Secondly because you may be having a
different interpretation of it, as I explained above.

In fact the thermal behavior is much more dynamic than the steady state
situation. However, so far, I haven't seen a real case that we need to
describe these dynamics deeper in DT. Including for the example you gave
above. Dealing with the history can be derived by the OS based on the
already defined properties.

>
> thanks,

No problem, thanks for your contribution. Again, if we are not covering
your use cases properly, let's go through them and narrow a proper
solution down.

> Matt Longnecker
>
> p.s. I apologize for chiming in without having read the entire history
> of the patch set. Engineers on my team will be trying this out for Tegra
> within the next few weeks.
>

OK.
signature.asc

Wei Ni

unread,
Jan 6, 2014, 9:50:02 PM1/6/14
to
Hi, Eduardo
Will you consider my comments :)

Thanks.
Wei.

Wei Ni

unread,
Jan 6, 2014, 9:50:02 PM1/6/14
to
On 01/06/2014 10:54 PM, Eduardo Valentin wrote:
> * PGP Signed by an unknown key
Yes, I think so, but the of-thermal driver handle the
thermal_zone_device_register, and pass the "tzp" without governor_name,
so the created thermal_zone's governor will be NULL, then it can't run
into the governor->throttle() if needed. And currently there have no
interface to support updating governor and configuration at runtime.
I think it's better to initialize the governor_name when register the
thermal zone device in the of-thermal driver.

Thanks.

Eduardo Valentin

unread,
Jan 7, 2014, 6:20:01 AM1/7/14
to
On 06-01-2014 22:48, Wei Ni wrote:
> Hi, Eduardo
> Will you consider my comments :)

By now Wei, it is better if you start a new thread, by sending a patch
on top of it, as this thread has been already merged by Rui.
signature.asc

Mark Rutland

unread,
Jan 7, 2014, 7:10:02 AM1/7/14
to
Initialising it in the driver doesn't mean it has to come from the
device tree. That's still a run-time decision, even though it's made
immediately after parsing the DT.

Wei Ni

unread,
Jan 7, 2014, 10:20:01 PM1/7/14
to
On 01/07/2014 07:17 PM, Eduardo Valentin wrote:
> * PGP Signed by an unknown key
>
> On 06-01-2014 22:48, Wei Ni wrote:
>> Hi, Eduardo
>> Will you consider my comments :)
>
> By now Wei, it is better if you start a new thread, by sending a patch
> on top of it, as this thread has been already merged by Rui.

Ok, I will send it.

Thanks.
Wei.

Wei Ni

unread,
Jan 7, 2014, 11:20:02 PM1/7/14
to
On 01/08/2014 11:24 AM, Hu Yaohui wrote:
> I am new here. How can I could not mail a new message to this mail list?
> TIA

I use Thunderbird, it's a pretty good mail client :)
> <mailto:majo...@vger.kernel.org>

Zhang, Rui

unread,
Jan 12, 2014, 9:40:01 AM1/12/14
to


> -----Original Message-----
> From: Eduardo Valentin [mailto:eduardo....@ti.com]
> Sent: Wednesday, November 13, 2013 3:46 AM
> To: swa...@wwwdotorg.org; pawel...@arm.com; mark.r...@arm.com;
> ian.ca...@citrix.com; rob.h...@calxeda.com; li...@roeck-us.net;
> Zhang, Rui
> Cc: w...@nvidia.com; grant....@linaro.org; R, Durgadoss; linux-
> p...@vger.kernel.org; devic...@vger.kernel.org; lm-sensors@lm-
> sensors.org; linux-...@vger.kernel.org; Eduardo Valentin; Rafael J.
> Wysocki; Viresh Kumar; cpu...@vger.kernel.org; devicetree-
> dis...@lists.ozlabs.org
> Subject: [PATCHv5 05/20] cpufreq: cpufreq-cpu0: add dt node parsing for
> cooling device properties
> Importance: High
>
> This patch changes the cpufreq-cpu0 driver to consider if a cpu needs
> cooling (with cpufreq). In case the cooling is needed, the cpu0 device
> tree node needs to be properly configured with cooling device
> properties.
>
> In case these properties are present,, the driver will load a cpufreq
> cooling device in the system. The cpufreq-cpu0 driver is not interested
> in determining how the system should be using the cooling device. The
> driver is responsible only of loading the cooling device.
>
> Describing how the cooling device will be used can be accomplished by
> setting up a thermal zone that references and is composed by the
> cpufreq cooling device.
>
> Cc: "Rafael J. Wysocki" <r...@sisk.pl>
> Cc: Viresh Kumar <viresh...@linaro.org>
> Cc: Grant Likely <grant....@linaro.org>
> Cc: Rob Herring <rob.h...@calxeda.com>
> Cc: cpu...@vger.kernel.org
> Cc: linu...@vger.kernel.org
> Cc: linux-...@vger.kernel.org
> Cc: devicetre...@lists.ozlabs.org
> Signed-off-by: Eduardo Valentin <eduardo....@ti.com>
> ---
> .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt | 7 +++++++
> drivers/cpufreq/Kconfig | 2 +-
> drivers/cpufreq/cpufreq-cpu0.c | 16
> ++++++++++++++++
> 3 files changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> index 051f764..f055515 100644
> --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> @@ -15,6 +15,10 @@ Optional properties:
> - clock-latency: Specify the possible maximum transition latency for
> clock,
> in unit of nanoseconds.
> - voltage-tolerance: Specify the CPU voltage tolerance in percentage.
> +- #cooling-cells:
> +- cooling-min-level:
> +- cooling-max-level:
> + Please refer to
> Documentation/devicetree/bindings/thermal/thermal.txt.
>
> Examples:
>
> @@ -33,6 +37,9 @@ cpus {
> 198000 850000
> >;
> clock-latency = <61036>; /* two CLK32 periods */
> + #cooling-cells = <2>;
> + cooling-min-level = <0>;
> + cooling-max-level = <2>;
> };
>
> cpu@1 {
> diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index
> 534fcb8..fc1e9a5 100644
> --- a/drivers/cpufreq/Kconfig
> +++ b/drivers/cpufreq/Kconfig
> @@ -186,7 +186,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
>
> config GENERIC_CPUFREQ_CPU0
> tristate "Generic CPU0 cpufreq driver"
> - depends on HAVE_CLK && REGULATOR && PM_OPP && OF
> + depends on HAVE_CLK && REGULATOR && PM_OPP && OF && THERMAL &&
> +CPU_THERMAL

config: make ARCH=arm multi_v7_defconfig
All warnings:
warning: (ARM_HIGHBANK_CPUFREQ) selects GENERIC_CPUFREQ_CPU0 which
has unmet direct dependencies (ARCH_HAS_CPUFREQ && CPU_FREQ &&
HAVE_CLK && REGULATOR && PM_OPP && OF && THERMAL && CPU_THERMAL)

I think you need to select THERMAL and CPU_THERMAL instead, right?

Thanks,
rui
> select CPU_FREQ_TABLE
> help
> This adds a generic cpufreq driver for CPU0 frequency
> management.
> diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-
> cpu0.c index c522a95..568aaf3 100644
> --- a/drivers/cpufreq/cpufreq-cpu0.c
> +++ b/drivers/cpufreq/cpufreq-cpu0.c
> @@ -13,7 +13,9 @@
>
> #include <linux/clk.h>
> #include <linux/cpu.h>
> +#include <linux/cpu_cooling.h>
> #include <linux/cpufreq.h>
> +#include <linux/cpumask.h>
> #include <linux/err.h>
> #include <linux/module.h>
> #include <linux/of.h>
> @@ -21,6 +23,7 @@
> #include <linux/platform_device.h>
> #include <linux/regulator/consumer.h>
> #include <linux/slab.h>
> +#include <linux/thermal.h>
>
> static unsigned int transition_latency; static unsigned int
> voltage_tolerance; /* in percentage */ @@ -29,6 +32,7 @@ static struct
> device *cpu_dev; static struct clk *cpu_clk; static struct regulator
> *cpu_reg; static struct cpufreq_frequency_table *freq_table;
> +static struct thermal_cooling_device *cdev;
>
> static int cpu0_verify_speed(struct cpufreq_policy *policy) { @@ -
> 260,6 +264,17 @@ static int cpu0_cpufreq_probe(struct platform_device
> *pdev)
> goto out_free_table;
> }
>
> + /*
> + * For now, just loading the cooling device;
> + * thermal DT code takes care of matching them.
> + */
> + if (of_find_property(np, "#cooling-cells", NULL)) {
> + cdev = of_cpufreq_cooling_register(np, cpu_present_mask);
> + if (IS_ERR(cdev))
> + pr_err("running cpufreq without cooling
> device: %ld\n",
> + PTR_ERR(cdev));
> + }
> +
> of_node_put(np);
> return 0;
>
> @@ -272,6 +287,7 @@ out_put_node:
>
> static int cpu0_cpufreq_remove(struct platform_device *pdev) {
> + cpufreq_cooling_unregister(cdev);
> cpufreq_unregister_driver(&cpu0_cpufreq_driver);
> opp_free_cpufreq_table(cpu_dev, &freq_table);
>
> --
> 1.8.2.1.342.gfa7285d

Eduardo Valentin

unread,
Jan 13, 2014, 10:20:01 AM1/13/14
to
On 12-01-2014 10:31, Zhang, Rui wrote:
>> +++ b/drivers/cpufreq/Kconfig
>> > @@ -186,7 +186,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
>> >
>> > config GENERIC_CPUFREQ_CPU0
>> > tristate "Generic CPU0 cpufreq driver"
>> > - depends on HAVE_CLK && REGULATOR && PM_OPP && OF
>> > + depends on HAVE_CLK && REGULATOR && PM_OPP && OF && THERMAL &&
>> > +CPU_THERMAL
> config: make ARCH=arm multi_v7_defconfig
> All warnings:
> warning: (ARM_HIGHBANK_CPUFREQ) selects GENERIC_CPUFREQ_CPU0 which
> has unmet direct dependencies (ARCH_HAS_CPUFREQ && CPU_FREQ &&
> HAVE_CLK && REGULATOR && PM_OPP && OF && THERMAL && CPU_THERMAL)
>
> I think you need to select THERMAL and CPU_THERMAL instead, right?
>

Yeah, I think that would be better. I will send you a fix for that.
signature.asc

Eduardo Valentin

unread,
Jan 13, 2014, 10:40:02 AM1/13/14
to
On 02-01-2014 13:50, Matthew Longnecker wrote:
>
>> I think the platform driver may set governor for the thermal zone,
>> so how about to add a property named as "governor",
>> and parse it to tzp->governor_name,
>> something like:
>> ret = of_property_read_string(child, "governor", &str);
>> if (ret == 0)
>> if (strlen(str) < THERMAL_NAME_LENGTH)
>> strcpy(tzp->governor_name, str);

The above is not applicable to DT. The very first version of my proposed
series did include some sort of governor treatment too, fetching such
data from DT.

>>
>> Thanks.
>> Wei.
>
> DT is supposed to describe the hardware, right? The governor isn't
> hardware -- it's a software control policy. On the other hand, that
> control policy must be tuned according to the behaviors of the platform
> hardware otherwise the system will be unstable.

Yes, this is the correct understanding. We can describe hardware,
including thermal constraints, as long as they do not include policy or
OS specific implementation.

>
> Is it appropriate to be naming the governor in DT? If so, is it equally
> appropriate to describe any governor-specific parameters in DT (even
> though they are pure software constructs)?

No for both questions.

However, for the parameters, as long as you can map the software
parameter as a hardware descriptor, then we can discuss if that can be
used as DT properties.

>
> -Matt
signature.asc

Eduardo Valentin

unread,
Jan 13, 2014, 4:40:02 PM1/13/14
to
Wei,
In fact, it will fall into the default governor, which is step_wise, by
default config.

> so the created thermal_zone's governor will be NULL, then it can't run
> into the governor->throttle() if needed. And currently there have no

Actually, no, the tz will be set to default governor, and its throttle
call will be called.

> interface to support updating governor and configuration at runtime.
> I think it's better to initialize the governor_name when register the
> thermal zone device in the of-thermal driver.

Still, why would you need to change the governor from a in kernel
decision? There is an ABI to change the thermal zone policy based on
user(land) request. If you need to change the policy from within the
kernel, which seams to be what you are trying to propose, you need to
explain why you need it, say, by giving at least one user of this API or
explaining its use case.

>
> Thanks.
signature.asc

Wei Ni

unread,
Jan 13, 2014, 10:00:02 PM1/13/14
to
On 01/14/2014 05:29 AM, Eduardo Valentin wrote:
> * PGP Signed by an unknown key
>
> Wei,
>
> On 06-01-2014 22:44, Wei Ni wrote:
>> On 01/06/2014 10:54 PM, Eduardo Valentin wrote:
>>>> Old Signed by an unknown key
In the thermal_zone_device_register(), it has following codes:
if (tz->tzp)
tz->governor = __find_governor(tz->tzp->governor_name);
else
tz->governor = __find_governor(DEFAULT_THERMAL_GOVERNOR);

It mean if the tz->tzp is not NULL, and the governor_name is NULL, then
the __find_governor() will return NULL, so the tz->governor is NULL, it
can't fall into the default governor. And in the of-thermal driver, it
call the thermal_zone_device_register(), and pass the "tzp" without
governor_name.
I think if we want to change the governor in user space, we need to fix
this first.

>
>> so the created thermal_zone's governor will be NULL, then it can't run
>> into the governor->throttle() if needed. And currently there have no
>
> Actually, no, the tz will be set to default governor, and its throttle
> call will be called.
>
>> interface to support updating governor and configuration at runtime.
>> I think it's better to initialize the governor_name when register the
>> thermal zone device in the of-thermal driver.
>
> Still, why would you need to change the governor from a in kernel
> decision? There is an ABI to change the thermal zone policy based on
> user(land) request. If you need to change the policy from within the
> kernel, which seams to be what you are trying to propose, you need to
> explain why you need it, say, by giving at least one user of this API or
> explaining its use case.

The thermal_zone_device_register() support to set the governor which you
want, but with the of-thermal framework, it only support to set to
default governor, if fix above issue. I think the driver which use the
of-thermal should be able to set to any governors which it want, in its
initialization. So I add the function thermal_update_governor().

Eduardo Valentin

unread,
Jan 14, 2014, 1:50:01 PM1/14/14
to
Hello Wei,
Yes, it is a bug in the thermal core, thanks for reporting. Something
like [1] should fix it.

> I think if we want to change the governor in user space, we need to fix
> this first.
>

Well no, the above bug does not prevent you to switch governors in
userspace.

[1] - https://patchwork.kernel.org/patch/3487491/

>>
>>> so the created thermal_zone's governor will be NULL, then it can't run
>>> into the governor->throttle() if needed. And currently there have no
>>
>> Actually, no, the tz will be set to default governor, and its throttle
>> call will be called.
>>
>>> interface to support updating governor and configuration at runtime.
>>> I think it's better to initialize the governor_name when register the
>>> thermal zone device in the of-thermal driver.
>>
>> Still, why would you need to change the governor from a in kernel
>> decision? There is an ABI to change the thermal zone policy based on
>> user(land) request. If you need to change the policy from within the
>> kernel, which seams to be what you are trying to propose, you need to
>> explain why you need it, say, by giving at least one user of this API or
>> explaining its use case.
>
> The thermal_zone_device_register() support to set the governor which you
> want, but with the of-thermal framework, it only support to set to
> default governor, if fix above issue. I think the driver which use the
> of-thermal should be able to set to any governors which it want, in its
> initialization. So I add the function thermal_update_governor().
>
>>
>>>
>>> Thanks.
>>>
>>>>
>>>>>
>>>>> Thanks,
>>>>> Mark.
>>>>>
>>>>>
>>>>
>>>>
>>>
>>>
>>>
>>
>>
>
>
>


--
signature.asc

Eduardo Valentin

unread,
Jan 14, 2014, 2:10:01 PM1/14/14
to
On 12-01-2014 10:31, Zhang, Rui wrote:
> config: make ARCH=arm multi_v7_defconfig
> All warnings:
> warning: (ARM_HIGHBANK_CPUFREQ) selects GENERIC_CPUFREQ_CPU0 which
> has unmet direct dependencies (ARCH_HAS_CPUFREQ && CPU_FREQ &&
> HAVE_CLK && REGULATOR && PM_OPP && OF && THERMAL && CPU_THERMAL)

Rui,

BTW, I am not able to reproduce the above warning :-(

Are you sure you simply do a
$ make ARCH=arm multi_v7_defconfig

??
signature.asc
0 new messages