[PATCH tip/sched/core] introduce task_times() to replace task

Hidetoshi Seto

unread,

Nov 19, 2009, 11:45:24 PM11/19/09

to linux-...@vger.kernel.org, Ingo Molnar, Peter Zijlstra, Spencer Candland, Stanislaw Gruszka, Oleg Nesterov, Balbir Singh, Américo Wang

Function task_[us]times() are called consecutively in almost all
cases. However task_stime() is implemented to call task_utime()
from its inside, so such paired calls run task_utime() twice.

It means we do heavy divisions (div_u64 + do_div) twice to get
stime and utime which can be obtained at same time by one set
of divisions.

This patch introduces task_times(*tsk, *utime, *stime) to get
stime and utime at once, in better, optimized way.

Signed-off-by: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>
---
fs/proc/array.c | 3 +-
include/linux/sched.h | 1 +
kernel/exit.c | 7 ++++-
kernel/sched.c | 55 +++++++++++++++++++++++++++++++-----------------
kernel/sys.c | 3 +-
5 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index e209f64..330deda 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -535,8 +535,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
if (!whole) {
min_flt = task->min_flt;
maj_flt = task->maj_flt;
- utime = task_utime(task);
- stime = task_stime(task);
+ task_times(task, &utime, &stime);
gtime = task_gtime(task);
}

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78ba664..fe6ae15 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1723,6 +1723,7 @@ static inline void put_task_struct(struct task_struct *t)
extern cputime_t task_utime(struct task_struct *p);
extern cputime_t task_stime(struct task_struct *p);
extern cputime_t task_gtime(struct task_struct *p);
+extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);

/*
* Per process flags
diff --git a/kernel/exit.c b/kernel/exit.c
index e61891f..a19e429 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -91,6 +91,8 @@ static void __exit_signal(struct task_struct *tsk)
if (atomic_dec_and_test(&sig->count))
posix_cpu_timers_exit_group(tsk);
else {
+ cputime_t utime, stime;
+
/*
* If there is any task waiting for the group exit
* then notify it:
@@ -110,8 +112,9 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
- sig->utime = cputime_add(sig->utime, task_utime(tsk));
- sig->stime = cputime_add(sig->stime, task_stime(tsk));
+ task_times(tsk, &utime, &stime);
+ sig->utime = cputime_add(sig->utime, utime);
+ sig->stime = cputime_add(sig->stime, stime);
sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
diff --git a/kernel/sched.c b/kernel/sched.c
index ab9a034..18c89e0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5155,6 +5155,14 @@ cputime_t task_stime(struct task_struct *p)
{
return p->stime;
}
+
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+ if (ut)
+ *ut = task_utime(p);
+ if (st)
+ *st = task_stime(p);
+}
#else

#ifndef nsecs_to_cputime
@@ -5162,41 +5170,48 @@ cputime_t task_stime(struct task_struct *p)
msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
#endif

-cputime_t task_utime(struct task_struct *p)
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
- cputime_t utime = p->utime, total = utime + p->stime;
- u64 temp;
+ cputime_t rtime, utime = p->utime, total = utime + p->stime;

/*
* Use CFS's precise accounting:
*/
- temp = (u64)nsecs_to_cputime(p->se.sum_exec_runtime);
+ rtime = nsecs_to_cputime(p->se.sum_exec_runtime);

if (total) {
- temp *= utime;
+ u64 temp;
+
+ temp = (u64)(rtime * utime);
do_div(temp, total);
- }
- utime = (cputime_t)temp;
+ utime = (cputime_t)temp;
+ } else
+ utime = rtime;

+ /*
+ * Compare with previous values, to keep monotonicity:
+ */
p->prev_utime = max(p->prev_utime, utime);
- return p->prev_utime;
+ p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
+
+ if (ut)
+ *ut = p->prev_utime;
+ if (st)
+ *st = p->prev_stime;
+}
+
+cputime_t task_utime(struct task_struct *p)
+{
+ cputime_t utime;
+ task_times(p, &utime, NULL);
+ return utime;
}

cputime_t task_stime(struct task_struct *p)
{
cputime_t stime;
-
- /*
- * Use CFS's precise accounting. (we subtract utime from
- * the total, to make sure the total observed by userspace
- * grows monotonically - apps rely on that):
- */
- stime = nsecs_to_cputime(p->se.sum_exec_runtime) - task_utime(p);
-
- if (stime >= 0)
- p->prev_stime = max(p->prev_stime, stime);
-
- return p->prev_stime;
+ task_times(p, NULL, &stime);
+ return stime;
}
#endif

diff --git a/kernel/sys.c b/kernel/sys.c
index 255475d..b6caf1e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1344,8 +1344,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
utime = stime = cputime_zero;

if (who == RUSAGE_THREAD) {
- utime = task_utime(current);
- stime = task_stime(current);
+ task_times(current, &utime, &stime);
accumulate_thread_rusage(p, r);
maxrss = p->signal->maxrss;
goto out;
--
1.6.5.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Stanislaw Gruszka

unread,

Nov 23, 2009, 5:30:50 AM11/23/09

to Hidetoshi Seto, linux-...@vger.kernel.org, Ingo Molnar, Peter Zijlstra, Spencer Candland, Oleg Nesterov, Balbir Singh, Américo Wang

On Fri, Nov 20, 2009 at 01:44:10PM +0900, Hidetoshi Seto wrote:
> Function task_[us]times() are called consecutively in almost all
> cases. However task_stime() is implemented to call task_utime()
> from its inside, so such paired calls run task_utime() twice.
>
> It means we do heavy divisions (div_u64 + do_div) twice to get
> stime and utime which can be obtained at same time by one set
> of divisions.
>
> This patch introduces task_times(*tsk, *utime, *stime) to get
> stime and utime at once, in better, optimized way.
>
> Signed-off-by: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>

[snip]

> @@ -5155,6 +5155,14 @@ cputime_t task_stime(struct task_struct *p)
> {
> return p->stime;
> }
> +
> +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
> +{
> + if (ut)
> + *ut = task_utime(p);
> + if (st)
> + *st = task_stime(p);
> +}
> #else

I think task_{u,s}time are not needed anymore. Can we just fully get
rid of them and only use task_times() ?

> #ifndef nsecs_to_cputime
> @@ -5162,41 +5170,48 @@ cputime_t task_stime(struct task_struct *p)
> msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
> #endif

Could we furhter optimize this? Perhaps we can use below code
(taken from timespec_to_jiffies()):

cputime = (nsec * NSEC_CONVERSION) >>
(NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;

Stanislaw

Hidetoshi Seto

unread,

Nov 24, 2009, 12:38:51 AM11/24/09

to Stanislaw Gruszka, linux-...@vger.kernel.org, Ingo Molnar, Peter Zijlstra, Spencer Candland, Oleg Nesterov, Balbir Singh, Américo Wang

Yes, we can :-)

I was just afraid that there were other task_{u,s}time users I could
not find. So I separated it in another patch to remove the API, to be
posted later. But if it is OK, I can put them together in one patch.
(Or it is still better to be separated and incremental one?)

>> #ifndef nsecs_to_cputime
>> @@ -5162,41 +5170,48 @@ cputime_t task_stime(struct task_struct *p)
>> msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
>> #endif
>
> Could we furhter optimize this? Perhaps we can use below code
> (taken from timespec_to_jiffies()):
>
> cputime = (nsec * NSEC_CONVERSION) >>
> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;

I hope there were nsecs_to_jiffies().
It will be complex than:

cputime = (nsec * NSEC_CONVERSION) >> NSEC_JIFFIE_SC;

In timespec_to_jiffies(), nsec is never greater than NSEC_PER_SEC.
So above will work without any overflow (I confirmed it becomes wrong
if nsec > (LLONG_MAX / NSEC_CONVERSION) = about 8190ms).

But here in task_timers() the nsec can be greater than hours (or days),
we must be careful...

And just now I noticed that using msecs_to_cputime() is problematic,
since the type of its return value is "unsigned long" so not 64bit.
I'll make and post a patch to fix this asap.

..BTW, could anyone explain what the following (line 661) is doing?:

[kernel/time.c]
649 u64 nsec_to_clock_t(u64 x)
650 {
651 #if (NSEC_PER_SEC % USER_HZ) == 0
652 return div_u64(x, NSEC_PER_SEC / USER_HZ);
653 #elif (USER_HZ % 512) == 0
654 return div_u64(x * USER_HZ / 512, NSEC_PER_SEC / 512);
655 #else
656 /*
657 * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024,
658 * overflow after 64.99 years.
659 * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
660 */
661 return div_u64(x * 9, (9ull * NSEC_PER_SEC + (USER_HZ / 2)) / USER_HZ);
662 #endif
663 }

Thanks,
H.Seto

Hidetoshi Seto

unread,

Nov 24, 2009, 2:09:23 AM11/24/09

to Stanislaw Gruszka, linux-...@vger.kernel.org, Ingo Molnar, Peter Zijlstra, Spencer Candland, Oleg Nesterov, Balbir Singh, Américo Wang

Hidetoshi Seto wrote:
> And just now I noticed that using msecs_to_cputime() is problematic,
> since the type of its return value is "unsigned long" so not 64bit.
> I'll make and post a patch to fix this asap.

Sorry, I was confused. Please ignore the above.

The problem is that since the type of msecs_to_cputime() argument is
unsigned int, it would be wrong if calculated msecs is greater than
UINT_MAX.

Another problem is that msecs_to_jiffies() returns MAX_JIFFY_OFFSET
if msecs > INT_MAX.

5168 #ifndef nsecs_to_cputime
5169 # define nsecs_to_cputime(__nsecs) \
5170 msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
5171 #endif

452 unsigned long msecs_to_jiffies(const unsigned int m)
453 {
454 /*
455 * Negative value, means infinite timeout:
456 */
457 if ((int)m < 0)
458 return MAX_JIFFY_OFFSET;
:

What I need here is a simple function can convert 64bit nsecs to
cputime_t, which is unsigned long in asm-generic/cputime.h.
Humm...

Hidetoshi Seto

unread,

Nov 26, 2009, 12:49:18 AM11/26/09

to linux-...@vger.kernel.org, Stanislaw Gruszka, Ingo Molnar, Peter Zijlstra, Spencer Candland, Oleg Nesterov, Balbir Singh, Américo Wang

Functions task_{u,s}time() are called in pair in almost all

cases. However task_stime() is implemented to call task_utime()
from its inside, so such paired calls run task_utime() twice.

It means we do heavy divisions (div_u64 + do_div) twice to get

utime and stime which can be obtained at same time by one set
of divisions.

This patch introduces a function task_times(*tsk, *utime, *stime)
to retrieve utime and stime at once in better, optimized way.

Signed-off-by: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>

index a57c6ae..4d5e1a2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c

@@ -5155,6 +5155,14 @@ cputime_t task_stime(struct task_struct *p)
{
return p->stime;
}
+
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+ if (ut)
+ *ut = task_utime(p);
+ if (st)
+ *st = task_stime(p);
+}
#else

#ifndef nsecs_to_cputime
@@ -5162,41 +5170,48 @@ cputime_t task_stime(struct task_struct *p)
msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
#endif

-cputime_t task_utime(struct task_struct *p)

+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)

+
+ if (ut)
+ *ut = p->prev_utime;
+ if (st)

--

Hidetoshi Seto

unread,

Nov 26, 2009, 12:49:37 AM11/26/09

to linux-...@vger.kernel.org, Stanislaw Gruszka, Ingo Molnar, Peter Zijlstra, Spencer Candland, Oleg Nesterov, Balbir Singh, Américo Wang

Now all task_{u,s}time() pairs are replaced by task_times().
And task_gtime() is too simple to be an inline function.

Cleanup them all.

Signed-off-by: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>
---

fs/proc/array.c | 4 ++--
include/linux/sched.h | 3 ---
kernel/exit.c | 2 +-
kernel/sched.c | 33 ++-------------------------------
4 files changed, 5 insertions(+), 37 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 330deda..ca61a88 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -511,7 +511,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
do {
min_flt += t->min_flt;
maj_flt += t->maj_flt;
- gtime = cputime_add(gtime, task_gtime(t));
+ gtime = cputime_add(gtime, t->gtime);
t = next_thread(t);
} while (t != task);

@@ -536,7 +536,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,

min_flt = task->min_flt;
maj_flt = task->maj_flt;

task_times(task, &utime, &stime);
- gtime = task_gtime(task);
+ gtime = task->gtime;
}

/* scale priority and nice values from timeslices to -20..20 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fe6ae15..0395b0f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1720,9 +1720,6 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}

-extern cputime_t task_utime(struct task_struct *p);
-extern cputime_t task_stime(struct task_struct *p);
-extern cputime_t task_gtime(struct task_struct *p);
extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);

/*
diff --git a/kernel/exit.c b/kernel/exit.c
index a19e429..480de5b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -115,7 +115,7 @@ static void __exit_signal(struct task_struct *tsk)
task_times(tsk, &utime, &stime);

sig->utime = cputime_add(sig->utime, utime);

sig->stime = cputime_add(sig->stime, stime);

- sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
+ sig->gtime = cputime_add(sig->gtime, tsk->gtime);

sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;

sig->nvcsw += tsk->nvcsw;
diff --git a/kernel/sched.c b/kernel/sched.c
index 4d5e1a2..a7093c1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5146,22 +5146,12 @@ void account_idle_ticks(unsigned long ticks)
* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-cputime_t task_utime(struct task_struct *p)
-{
- return p->utime;
-}
-
-cputime_t task_stime(struct task_struct *p)
-{
- return p->stime;
-}
-

void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)

{
if (ut)
- *ut = task_utime(p);
+ *ut = p->utime;
if (st)
- *st = task_stime(p);
+ *st = p->stime;
}
#else

@@ -5199,27 +5189,8 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
if (st)
*st = p->prev_stime;
}
-
-cputime_t task_utime(struct task_struct *p)
-{
- cputime_t utime;
- task_times(p, &utime, NULL);
- return utime;
-}
-
-cputime_t task_stime(struct task_struct *p)
-{
- cputime_t stime;
- task_times(p, NULL, &stime);
- return stime;
-}
#endif

-inline cputime_t task_gtime(struct task_struct *p)
-{
- return p->gtime;
-}
-
/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
--
1.6.5.3

Hidetoshi Seto

unread,

Nov 26, 2009, 12:50:07 AM11/26/09

to linux-...@vger.kernel.org, Stanislaw Gruszka, Ingo Molnar, Peter Zijlstra, Spencer Candland, Oleg Nesterov, Balbir Singh, Américo Wang

Use of msecs_to_jiffies() for nsecs_to_cputime() have some problems:

- The type of msecs_to_jiffies()'s argument is unsigned int, so
it cannot convert msecs greater than UINT_MAX = about 49.7 days.
- msecs_to_jiffies() returns MAX_JIFFY_OFFSET if MSB of argument
is set, assuming that input was negative value. So it cannot
convert msecs greater than INT_MAX = about 24.8 days too.

This patch defines a new function nsecs_to_jiffies() that can deal
greater values, and that can deal all incoming values as unsigned.

Signed-off-by: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>
---

include/linux/jiffies.h | 1 +
kernel/sched.c | 3 +--
kernel/time.c | 30 ++++++++++++++++++++++++++++++
3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 1a9cf78..6811f4b 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x);
extern unsigned long clock_t_to_jiffies(unsigned long x);
extern u64 jiffies_64_to_clock_t(u64 x);
extern u64 nsec_to_clock_t(u64 x);
+extern unsigned long nsecs_to_jiffies(u64 n);

#define TIMESTAMP_SIZE 30

diff --git a/kernel/sched.c b/kernel/sched.c
index a7093c1..792b25e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5156,8 +5156,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
#else

#ifndef nsecs_to_cputime
-# define nsecs_to_cputime(__nsecs) \
- msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
+# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
#endif

void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)

diff --git a/kernel/time.c b/kernel/time.c
index 2e2e469..b30016c 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -662,6 +662,36 @@ u64 nsec_to_clock_t(u64 x)
#endif
}

+/**
+ * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ *
+ * @n: nsecs in u64
+ *
+ * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
+ * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
+ * for scheduler, not for use in device drivers to calculate timeout value.
+ *
+ * note:
+ * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
+ * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
+ */
+unsigned long nsecs_to_jiffies(u64 n)
+{
+#if (NSEC_PER_SEC % HZ) == 0
+ /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
+ return div_u64(n, NSEC_PER_SEC / HZ);
+#elif (HZ % 512) == 0
+ /* overflow after 292 years if HZ = 1024 */
+ return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
+#else
+ /*
+ * Generic case - optimized for cases where HZ is a multiple of 3.
+ * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
+ */
+ return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
+#endif
+}
+
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void)
{
--
1.6.5.3

Peter Zijlstra

unread,

Nov 26, 2009, 5:26:10 AM11/26/09

to Hidetoshi Seto, linux-...@vger.kernel.org, Stanislaw Gruszka, Ingo Molnar, Spencer Candland, Oleg Nesterov, Balbir Singh, Américo Wang

On Thu, 2009-11-26 at 14:48 +0900, Hidetoshi Seto wrote:
> Functions task_{u,s}time() are called in pair in almost all
> cases. However task_stime() is implemented to call task_utime()
> from its inside, so such paired calls run task_utime() twice.
>
> It means we do heavy divisions (div_u64 + do_div) twice to get
> utime and stime which can be obtained at same time by one set
> of divisions.
>
> This patch introduces a function task_times(*tsk, *utime, *stime)
> to retrieve utime and stime at once in better, optimized way.
>
> Signed-off-by: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>

OK, the patch looks good, but it does not solve those non monotonic
times funnies that started all this, right?

Peter Zijlstra

unread,

Nov 26, 2009, 5:26:50 AM11/26/09

to Hidetoshi Seto, linux-...@vger.kernel.org, Stanislaw Gruszka, Ingo Molnar, Spencer Candland, Oleg Nesterov, Balbir Singh, Américo Wang

On Thu, 2009-11-26 at 14:49 +0900, Hidetoshi Seto wrote:
> Now all task_{u,s}time() pairs are replaced by task_times().
> And task_gtime() is too simple to be an inline function.
>
> Cleanup them all.

Cleanups are always nice!

tip-bot for Hidetoshi Seto

unread,

Nov 26, 2009, 7:34:42 AM11/26/09

to linux-ti...@vger.kernel.org, linux-...@vger.kernel.org, spe...@bluehost.com, h...@zytor.com, mi...@redhat.com, seto.hi...@jp.fujitsu.com, pet...@infradead.org, xiyou.w...@gmail.com, bal...@in.ibm.com, tg...@linutronix.de, ol...@redhat.com, sgru...@redhat.com, mi...@elte.hu

Commit-ID: d5b7c78e975302a1bab28263266c39ecb71acad4
Gitweb: http://git.kernel.org/tip/d5b7c78e975302a1bab28263266c39ecb71acad4
Author: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>
AuthorDate: Thu, 26 Nov 2009 14:49:05 +0900
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Thu, 26 Nov 2009 12:59:20 +0100

sched: Remove task_{u,s,g}time()

Now all task_{u,s}time() pairs are replaced by task_times().
And task_gtime() is too simple to be an inline function.

Cleanup them all.

Signed-off-by: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>
Acked-by: Peter Zijlstra <pet...@infradead.org>
Cc: Stanislaw Gruszka <sgru...@redhat.com>
Cc: Spencer Candland <spe...@bluehost.com>
Cc: Oleg Nesterov <ol...@redhat.com>
Cc: Balbir Singh <bal...@in.ibm.com>
Cc: Americo Wang <xiyou.w...@gmail.com>
LKML-Reference: <4B0E16D...@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>

index 29068ab..2eaf68b 100644

--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -115,7 +115,7 @@ static void __exit_signal(struct task_struct *tsk)
task_times(tsk, &utime, &stime);
sig->utime = cputime_add(sig->utime, utime);
sig->stime = cputime_add(sig->stime, stime);
- sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
+ sig->gtime = cputime_add(sig->gtime, tsk->gtime);
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
diff --git a/kernel/sched.c b/kernel/sched.c

index 475a6f2..82251c2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5182,22 +5182,12 @@ void account_idle_ticks(unsigned long ticks)

* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-cputime_t task_utime(struct task_struct *p)
-{
- return p->utime;
-}
-
-cputime_t task_stime(struct task_struct *p)
-{
- return p->stime;
-}
-
void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
if (ut)
- *ut = task_utime(p);
+ *ut = p->utime;
if (st)
- *st = task_stime(p);
+ *st = p->stime;
}
#else

@@ -5235,27 +5225,8 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)

tip-bot for Hidetoshi Seto

unread,

Nov 26, 2009, 7:34:48 AM11/26/09

to linux-ti...@vger.kernel.org, linux-...@vger.kernel.org, spe...@bluehost.com, h...@zytor.com, mi...@redhat.com, seto.hi...@jp.fujitsu.com, pet...@infradead.org, xiyou.w...@gmail.com, bal...@in.ibm.com, tg...@linutronix.de, ol...@redhat.com, sgru...@redhat.com, mi...@elte.hu

Commit-ID: d180c5bccec02612256fd8076ff3c1fac3429553
Gitweb: http://git.kernel.org/tip/d180c5bccec02612256fd8076ff3c1fac3429553
Author: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>
AuthorDate: Thu, 26 Nov 2009 14:48:30 +0900
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Thu, 26 Nov 2009 12:59:19 +0100

sched: Introduce task_times() to replace task_{u,s}time() pair

Functions task_{u,s}time() are called in pair in almost all
cases. However task_stime() is implemented to call task_utime()
from its inside, so such paired calls run task_utime() twice.

It means we do heavy divisions (div_u64 + do_div) twice to get
utime and stime which can be obtained at same time by one set
of divisions.

This patch introduces a function task_times(*tsk, *utime,
*stime) to retrieve utime and stime at once in better, optimized
way.

Signed-off-by: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>

Acked-by: Peter Zijlstra <pet...@infradead.org>
Cc: Stanislaw Gruszka <sgru...@redhat.com>
Cc: Spencer Candland <spe...@bluehost.com>
Cc: Oleg Nesterov <ol...@redhat.com>
Cc: Balbir Singh <bal...@in.ibm.com>
Cc: Americo Wang <xiyou.w...@gmail.com>

LKML-Reference: <4B0E16...@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>

index f7864ac..29068ab 100644

--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -91,6 +91,8 @@ static void __exit_signal(struct task_struct *tsk)
if (atomic_dec_and_test(&sig->count))
posix_cpu_timers_exit_group(tsk);
else {
+ cputime_t utime, stime;
+
/*
* If there is any task waiting for the group exit
* then notify it:
@@ -110,8 +112,9 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
- sig->utime = cputime_add(sig->utime, task_utime(tsk));
- sig->stime = cputime_add(sig->stime, task_stime(tsk));
+ task_times(tsk, &utime, &stime);
+ sig->utime = cputime_add(sig->utime, utime);
+ sig->stime = cputime_add(sig->stime, stime);
sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
diff --git a/kernel/sched.c b/kernel/sched.c

index 315ba40..475a6f2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5191,6 +5191,14 @@ cputime_t task_stime(struct task_struct *p)

{
return p->stime;
}
+
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+ if (ut)
+ *ut = task_utime(p);
+ if (st)
+ *st = task_stime(p);
+}
#else

#ifndef nsecs_to_cputime

@@ -5198,41 +5206,48 @@ cputime_t task_stime(struct task_struct *p)

index ce17760..bbdfce0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1346,8 +1346,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)

utime = stime = cputime_zero;

if (who == RUSAGE_THREAD) {
- utime = task_utime(current);
- stime = task_stime(current);
+ task_times(current, &utime, &stime);
accumulate_thread_rusage(p, r);
maxrss = p->signal->maxrss;
goto out;
--

tip-bot for Hidetoshi Seto

unread,

Nov 26, 2009, 7:34:56 AM11/26/09

to linux-ti...@vger.kernel.org, linux-...@vger.kernel.org, spe...@bluehost.com, h...@zytor.com, mi...@redhat.com, john...@linux.vnet.ibm.com, seto.hi...@jp.fujitsu.com, pet...@infradead.org, xiyou.w...@gmail.com, bal...@in.ibm.com, tg...@linutronix.de, ol...@redhat.com, sgru...@redhat.com, mi...@elte.hu

Commit-ID: b7b20df91d43d5e59578b8fc16e895c0c8cbd9b5
Gitweb: http://git.kernel.org/tip/b7b20df91d43d5e59578b8fc16e895c0c8cbd9b5
Author: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>
AuthorDate: Thu, 26 Nov 2009 14:49:27 +0900
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Thu, 26 Nov 2009 12:59:20 +0100

sched, time: Define nsecs_to_jiffies()

Use of msecs_to_jiffies() for nsecs_to_cputime() have some
problems:

- The type of msecs_to_jiffies()'s argument is unsigned int, so
it cannot convert msecs greater than UINT_MAX = about 49.7 days.

- msecs_to_jiffies() returns MAX_JIFFY_OFFSET if MSB of argument
is set, assuming that input was negative value. So it cannot
convert msecs greater than INT_MAX = about 24.8 days too.

This patch defines a new function nsecs_to_jiffies() that can
deal greater values, and that can deal all incoming values as
unsigned.

Signed-off-by: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>

Acked-by: Peter Zijlstra <pet...@infradead.org>
Cc: Stanislaw Gruszka <sgru...@redhat.com>
Cc: Spencer Candland <spe...@bluehost.com>
Cc: Oleg Nesterov <ol...@redhat.com>
Cc: Balbir Singh <bal...@in.ibm.com>

Cc: Amrico Wang <xiyou.w...@gmail.com>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: John Stultz <john...@linux.vnet.ibm.com>
LKML-Reference: <4B0E16E7...@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>

---
include/linux/jiffies.h | 1 +
kernel/sched.c | 3 +--
kernel/time.c | 30 ++++++++++++++++++++++++++++++
3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 1a9cf78..6811f4b 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x);
extern unsigned long clock_t_to_jiffies(unsigned long x);
extern u64 jiffies_64_to_clock_t(u64 x);
extern u64 nsec_to_clock_t(u64 x);
+extern unsigned long nsecs_to_jiffies(u64 n);

#define TIMESTAMP_SIZE 30

diff --git a/kernel/sched.c b/kernel/sched.c

index 82251c2..b3d4e2b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5192,8 +5192,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)

#else

#ifndef nsecs_to_cputime
-# define nsecs_to_cputime(__nsecs) \
- msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
+# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
#endif

void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
diff --git a/kernel/time.c b/kernel/time.c

index 2e2e469..8047980 100644

Hidetoshi Seto

unread,

Nov 26, 2009, 7:38:08 PM11/26/09

to Peter Zijlstra, linux-...@vger.kernel.org, Stanislaw Gruszka, Ingo Molnar, Spencer Candland, Oleg Nesterov, Balbir Singh, Américo Wang

Peter Zijlstra wrote:
> On Thu, 2009-11-26 at 14:48 +0900, Hidetoshi Seto wrote:
>> This patch introduces a function task_times(*tsk, *utime, *stime)
>> to retrieve utime and stime at once in better, optimized way.
>>
>> Signed-off-by: Hidetoshi Seto <seto.hi...@jp.fujitsu.com>
>
> OK, the patch looks good, but it does not solve those non monotonic
> times funnies that started all this, right?

Yes, you are right.
These three patches are cleanup, not bugfix.

I don't have any clear idea to solve the original issue yet...

Thanks,
H.Seto

[PATCH tip/sched/core] introduce task_times() to replace task_[us]time() pair

Hidetoshi Seto

Stanislaw Gruszka

Hidetoshi Seto

Hidetoshi Seto

Hidetoshi Seto

Hidetoshi Seto

Hidetoshi Seto

Peter Zijlstra

Peter Zijlstra

tip-bot for Hidetoshi Seto

tip-bot for Hidetoshi Seto

tip-bot for Hidetoshi Seto

Hidetoshi Seto