Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[BUG] sched_setattr() SCHED_DEADLINE hangs system

176 views
Skip to first unread message

Michael Kerrisk (man-pages)

unread,
May 11, 2014, 11:00:01 AM5/11/14
to
[Dave: I wonder if there's anything trinity can add in the way of
a test here?]

Hi Peter,

This looks like another bug in sched_setattr(). Using the program
below (which you might find generally helpful for testing), I'm
able to reliably freeze up my x64 (Intel Core i7-3520M Processor)
system for up to about a minute when I run with the following
command line:

$ time sudo ./t_sched_setattr d 18446744072 18446744072 18446744073

'd' here means use SCHED_DEADLINE, then the remaining arguments
are the Runtime, Deadline, and Period, expressed in *seconds*.
(Those number by the way are just a little below 2^64.)

Aside from interpreting its command-line arguments, all that the
program does is call sched_setattr() and displays elapsed times.
(By the way, on my system I see some weird effects for time(2),
presumably VDSO effects.)

Here's sample run:

time sudo ./t_sched_setattr d 18446744072 18446744072 18446744073
Runtime = 18446744072000000000
Deadline = 18446744072000000000
Period = 18446744073000000000
About to call sched_setattr()
Successful return from sched_setattr() [6 seconds]

real 0m40.421s
user 0m3.097s
sys 0m30.804s

After unfreezing the machine is fine, while the program is running,
the machine is pretty unresponsive.

I'm on kernel 3.15-rc4.

Cheers,

Michael


=====
/*#* t_sched_setattr.c
*/
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <unistd.h>
#include <sys/syscall.h>
#include <inttypes.h>
#include <sched.h>
#include <time.h>
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

struct sched_attr {
uint32_t size;

uint32_t sched_policy;
uint64_t sched_flags;

/* SCHED_NORMAL, SCHED_BATCH */
int32_t sched_nice;

/* SCHED_FIFO, SCHED_RR */
uint32_t sched_priority;

/* SCHED_DEADLINE */
uint64_t sched_runtime;
uint64_t sched_deadline;
uint64_t sched_period;
};

#ifdef __x86_64__
#define __NR_sched_setattr 314
#define __NR_sched_getattr 315
#endif

#ifdef __i386__
#define __NR_sched_setattr 351
#define __NR_sched_getattr 352
#endif

#ifdef __arm__
#define __NR_sched_setattr 380
#define __NR_sched_getattr 381
#endif

#ifndef SCHED_DEADLINE
#define SCHED_DEADLINE 6
#endif

#ifndef SCHED_FLAG_RESET_ON_FORK
#define SCHED_FLAG_RESET_ON_FORK 0x01
#endif

static int
sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags)
{
return syscall(__NR_sched_setattr, pid, attr, flags);
}

static void
usageError(char *pname)
{
fprintf(stderr, "Usage:\n");
fprintf(stderr, " Non-realtime:\n");
fprintf(stderr, " %s {o|b|i} <nice>\n", pname);
fprintf(stderr, " Realtime:\n");
fprintf(stderr, " %s {f|r} <prio>\n", pname);
fprintf(stderr, " Deadline:\n");
fprintf(stderr, " %s d <runtime> <deadline> <period>\n",
pname);
fprintf(stderr, " (runtime, deadline, and period are in "
"seconds,\n");
fprintf(stderr, " unless -m or -n specified)\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " -f SCHED_FLAG_RESET_ON_FORK\n");
fprintf(stderr, " -m Deadline times are in milliseconds\n");
fprintf(stderr, " -n Deadline times are in nanoseconds\n");
fprintf(stderr, " -s size Value for size argument\n");
fprintf(stderr, " -p pid PID of target process (default "
"is self)\n");
fprintf(stderr, " -v val Value for unused bytes of 'attr' "
"buffer\n");
fprintf(stderr, " -w nsecs Sleep time (only valid when setting "
"policy for self)\n");
exit(EXIT_FAILURE);
}

int
main(int argc, char *argv[])
{
size_t size, alloc_size;;
int opt, flags, val;
struct sched_attr *sa;
pid_t pid;
int sleepTime;
long long timeFactor;
time_t base;

/* Parse command-line arguments */

sleepTime = 0;
pid = 0;
flags = 0;
size = sizeof(struct sched_attr);
val = 0;
timeFactor = 1000 * 1000 * 1000;


while ((opt = getopt(argc, argv, "fmnp:s:v:w:")) != -1) {
switch (opt) {
case 'f': flags = SCHED_FLAG_RESET_ON_FORK; break;
case 'm': timeFactor = 1000 * 1000; break;
case 'n': timeFactor = 1; break;
case 'p': pid = atoi(optarg); break;
case 's': size = atoi(optarg); break;
case 'v': val = atoi(optarg); break;
case 'w': sleepTime = atoi(optarg); break;
default: usageError(argv[1]);
}
}

if (optind + 1 > argc)
usageError(argv[0]);

alloc_size = (size > sizeof(struct sched_attr)) ? size :
sizeof(struct sched_attr);
sa = malloc(alloc_size);
if (sa == NULL) {
perror("malloc");
exit(EXIT_FAILURE);
}

/* Initializing bytes in buffer to nonzero values allows us to test
the E2BIG error case */

memset(sa, val, alloc_size);

sa->size = size;
sa->sched_flags = flags;

switch (argv[optind][0]) {

case 'o':
sa->sched_policy = SCHED_OTHER;
sa->sched_nice = atoi(argv[optind + 1]);
break;

case 'b':
sa->sched_policy = SCHED_BATCH;
sa->sched_priority = 1;
sa->sched_nice = atoi(argv[optind + 1]);
break;

case 'i':
sa->sched_policy = SCHED_IDLE;
sa->sched_nice = atoi(argv[optind + 1]);
/* Yes, SCHED_IDLE doesn't use nice values, but this let's us
test what happen if a nonzero nice value is given */
break;

case 'f':
sa->sched_policy = SCHED_FIFO;
sa->sched_priority = atoi(argv[optind + 1]);
break;

case 'r':
sa->sched_policy = SCHED_RR;
sa->sched_priority = atoi(argv[optind + 1]);
break;

case 'd':
if (argc != optind + 4)
usageError(argv[0]);
sa->sched_policy = SCHED_DEADLINE;
sa->sched_runtime = atoll(argv[optind + 1]) * timeFactor;
sa->sched_deadline = atoll(argv[optind + 2]) * timeFactor;
sa->sched_period = atoll(argv[optind + 3]) * timeFactor;
printf("Runtime = %25" PRIu64 "\nDeadline = %25" PRIu64
"\nPeriod = %25" PRIu64 "\n",
sa->sched_runtime, sa->sched_deadline, sa->sched_period);
break;

default:
usageError(argv[0]);
}

printf("About to call sched_setattr()\n");

usleep(10000);

base = time(NULL);
if (sched_setattr(pid, sa, 0) == -1) {
perror("sched_setattr");
printf("sa->size = %" PRIu32 "\n", sa->size);
exit(EXIT_FAILURE);
}

usleep(10000); /* Without this small sleep, time() does not
seem to return an up-to-date time. Some VDSO
effect? */

printf("Successful return from sched_setattr() [%ld seconds]\n",
(long) time(NULL) - base);

/* If we set our own scheduling policy and attributes, then we
optionally sleep for a while, so we can inspect process
attributes */

if ((pid == 0 || pid == getpid()) && sleepTime > 0)
sleep(sleepTime);

exit(EXIT_SUCCESS);
}


--
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Michael Kerrisk (man-pages)

unread,
May 11, 2014, 1:50:01 PM5/11/14
to
On 05/11/2014 04:54 PM, Michael Kerrisk (man-pages) wrote:
> $ time sudo ./t_sched_setattr d 18446744072 18446744072 18446744073
>
> 'd' here means use SCHED_DEADLINE, then the remaining arguments
> are the Runtime, Deadline, and Period, expressed in *seconds*.
> (Those number by the way are just a little below 2^64.)

I wasn't being 100% clear there. I mean to say: Those numbers are,
when converted to nanoseconds, just a little below 2^64.

Michael Kerrisk (man-pages)

unread,
May 12, 2014, 3:00:02 AM5/12/14
to
On 05/11/2014 04:54 PM, Michael Kerrisk (man-pages) wrote:
Hi Peter,

I realize my speculation was completely off the mark. time(2) really
is reporting the truth, and the sched_setattr() call returns immediately.
But it looks like with these settings the deadline scheduler gets itself
into a confused state. The process chews up a vast amount of CPU time
for the few actions (including process teardown) that occur after
the sched_setattr() call, and since the SCHED_DEADLINE process has
priority over everything else, the system locks up.

Cheers,

Michael

Peter Zijlstra

unread,
May 12, 2014, 4:50:02 AM5/12/14
to
On Mon, May 12, 2014 at 08:53:59AM +0200, Michael Kerrisk (man-pages) wrote:
> On 05/11/2014 04:54 PM, Michael Kerrisk (man-pages) wrote:

> > $ time sudo ./t_sched_setattr d 18446744072 18446744072 18446744073
>
> I realize my speculation was completely off the mark. time(2) really
> is reporting the truth, and the sched_setattr() call returns immediately.
> But it looks like with these settings the deadline scheduler gets itself
> into a confused state. The process chews up a vast amount of CPU time
> for the few actions (including process teardown) that occur after
> the sched_setattr() call, and since the SCHED_DEADLINE process has
> priority over everything else, the system locks up.

Yeah, its doing something weird alright.. let me see if I can get
something useful out.

Btw, you do know about EX_USAGE from sysexits.h ?

Michael Kerrisk (man-pages)

unread,
May 12, 2014, 5:30:02 AM5/12/14
to
Hi Peter,
Thanks!

> Btw, you do know about EX_USAGE from sysexits.h ?

Yes, I'm peripherally aware of them, but have tended to avoid them
because they're not in POSIX, and don't seem to be all that widely
used.

Peter Zijlstra

unread,
May 12, 2014, 8:40:01 AM5/12/14
to
On Mon, May 12, 2014 at 11:19:39AM +0200, Michael Kerrisk (man-pages) wrote:
> Hi Peter,
>
> On Mon, May 12, 2014 at 10:47 AM, Peter Zijlstra <pet...@infradead.org> wrote:
> > On Mon, May 12, 2014 at 08:53:59AM +0200, Michael Kerrisk (man-pages) wrote:
> >> On 05/11/2014 04:54 PM, Michael Kerrisk (man-pages) wrote:
> >
> >> > $ time sudo ./t_sched_setattr d 18446744072 18446744072 18446744073
> >>
> >> I realize my speculation was completely off the mark. time(2) really
> >> is reporting the truth, and the sched_setattr() call returns immediately.
> >> But it looks like with these settings the deadline scheduler gets itself
> >> into a confused state. The process chews up a vast amount of CPU time
> >> for the few actions (including process teardown) that occur after
> >> the sched_setattr() call, and since the SCHED_DEADLINE process has
> >> priority over everything else, the system locks up.
> >
> > Yeah, its doing something weird alright.. let me see if I can get
> > something useful out.
>
> Thanks!

So I think its because the way we check wrapping

(s64)(a - b) < 0

This means that its impossible to tell if time went fwd or bwd with
64bit increments. I've not entirely pinpointed where this is wrecking
things, but it seems like a fair bet this is what's going wrong.

So I'm tempted to put a sanity check on all these values to make sure <=
2^63. That way the wrapping logic in the kernel keeps working.

And 2^63 [ns] should be plenty large enough for everyone (famous last
words of course).

> > Btw, you do know about EX_USAGE from sysexits.h ?
>
> Yes, I'm peripherally aware of them, but have tended to avoid them
> because they're not in POSIX, and don't seem to be all that widely
> used.

Ah, so then its just something weird I've picked up along the way :-)

Juri Lelli

unread,
May 13, 2014, 6:00:03 AM5/13/14
to
Hi all,

On Mon, 12 May 2014 14:30:32 +0200
Does the following fix the thing?

Thanks,

- Juri

---
From 90a7603a0b6b620c9d07e3f375906b436dcc2230 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri....@gmail.com>
Date: Tue, 13 May 2014 10:15:59 +0200
Subject: [PATCH] sched/deadline: restrict user params max value to 2^63 ns

Michael Kerrisk noticed that creating SCHED_DEADLINE reservations
with certain parameters (e.g, a runtime of something near 2^64 ns)
can cause a system freeze for some amount of time.

The problem is that in the interface we have

u64 sched_runtime;

while internally we need to have a signed runtime (to cope with
budget overruns)

s64 runtime;

At the time we setup a new dl_entity we copy the first value in
the second. The cast turns out with negative values when
sched_runtime is too big, and this causes the scheduler to go crazy
right from the start.

Moreover, considering how we deal with deadlines wraparound

(s64)(a - b) < 0

we also have to restrict acceptable values for sched_{deadline,period}.

This patch fixes the thing checking that user parameters are always
below 2^63 ns (still large enough for everyone).

It also rewrites other conditions that we check, since in
__checkparam_dl we don't have to deal with deadline wraparounds
and what we have now erroneously fails when the difference between
values is too big.

Reported-by: Michael Kerrisk <mtk.ma...@gmail.com>
Suggested-by: Peter Zijlstra <pet...@infradead.org>
Signed-off-by: Juri Lelli <juri....@gmail.com>
---
kernel/sched/core.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d9d8ece..96ba59d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3188,17 +3188,21 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr)
* We ask for the deadline not being zero, and greater or equal
* than the runtime, as well as the period of being zero or
* greater than deadline. Furthermore, we have to be sure that
- * user parameters are above the internal resolution (1us); we
- * check sched_runtime only since it is always the smaller one.
+ * user parameters are above the internal resolution of 1us (we
+ * check sched_runtime only since it is always the smaller one) and
+ * below 2^63 ns (we have to check both sched_deadline and
+ * sched_period, as the latter can be zero).
*/
static bool
__checkparam_dl(const struct sched_attr *attr)
{
return attr && attr->sched_deadline != 0 &&
(attr->sched_period == 0 ||
- (s64)(attr->sched_period - attr->sched_deadline) >= 0) &&
- (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 &&
- attr->sched_runtime >= (2 << (DL_SCALE - 1));
+ (attr->sched_period >= attr->sched_deadline)) &&
+ (attr->sched_deadline >= attr->sched_runtime) &&
+ attr->sched_runtime >= (1ULL << DL_SCALE) &&
+ (attr->sched_deadline < (1ULL << 63) &&
+ attr->sched_period < (1ULL << 63));
}

/*
--
1.7.9.5

Peter Zijlstra

unread,
May 13, 2014, 6:50:01 AM5/13/14
to
On Tue, May 13, 2014 at 11:57:49AM +0200, Juri Lelli wrote:
> static bool
> __checkparam_dl(const struct sched_attr *attr)
> {
> return attr && attr->sched_deadline != 0 &&
> (attr->sched_period == 0 ||
> - (s64)(attr->sched_period - attr->sched_deadline) >= 0) &&
> - (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 &&
> - attr->sched_runtime >= (2 << (DL_SCALE - 1));
> + (attr->sched_period >= attr->sched_deadline)) &&
> + (attr->sched_deadline >= attr->sched_runtime) &&
> + attr->sched_runtime >= (1ULL << DL_SCALE) &&
> + (attr->sched_deadline < (1ULL << 63) &&
> + attr->sched_period < (1ULL << 63));
> }

Could we maybe rewrite that function to look less like a ioccc.org
submission?

static bool
__checkparam_dl(const struct sched_attr *attr)
{
if (!attr) /* possible at all? */
return false;

/* runtime <= deadline <= period */
if (attr->sched_period < attr->sched_deadline ||
attr->sched_deadline < attr->sched_runtime)
return false;

/*
* Since we truncate DL_SCALE bits make sure we're at least that big,
* if runtime > (1 << DL_SCALE), so must the others be per the above
*/
if (attr->sched_runtime <= (1ULL << DL_SCALE))
return false;

/*
* Since we use the MSB for wrap-around and sign issues, make
* sure its not set, if period < 2^63, so must the others be.
*/
if (attr->sched_period & (1ULL << 63))
return false;

return true;
}

Did I miss anything?

Juri Lelli

unread,
May 13, 2014, 8:20:02 AM5/13/14
to
On Tue, 13 May 2014 12:43:07 +0200
Peter Zijlstra <pet...@infradead.org> wrote:

> On Tue, May 13, 2014 at 11:57:49AM +0200, Juri Lelli wrote:
> > static bool
> > __checkparam_dl(const struct sched_attr *attr)
> > {
> > return attr && attr->sched_deadline != 0 &&
> > (attr->sched_period == 0 ||
> > - (s64)(attr->sched_period - attr->sched_deadline) >= 0) &&
> > - (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 &&
> > - attr->sched_runtime >= (2 << (DL_SCALE - 1));
> > + (attr->sched_period >= attr->sched_deadline)) &&
> > + (attr->sched_deadline >= attr->sched_runtime) &&
> > + attr->sched_runtime >= (1ULL << DL_SCALE) &&
> > + (attr->sched_deadline < (1ULL << 63) &&
> > + attr->sched_period < (1ULL << 63));
> > }
>
> Could we maybe rewrite that function to look less like a ioccc.org
> submission?
>

Right.

> static bool
> __checkparam_dl(const struct sched_attr *attr)
> {
> if (!attr) /* possible at all? */
> return false;
>

I'd say no, removed.

> /* runtime <= deadline <= period */
> if (attr->sched_period < attr->sched_deadline ||
> attr->sched_deadline < attr->sched_runtime)
> return false;
>
> /*
> * Since we truncate DL_SCALE bits make sure we're at least that big,
> * if runtime > (1 << DL_SCALE), so must the others be per the above
> */
> if (attr->sched_runtime <= (1ULL << DL_SCALE))
> return false;
>
> /*
> * Since we use the MSB for wrap-around and sign issues, make
> * sure its not set, if period < 2^63, so must the others be.
> */
> if (attr->sched_period & (1ULL << 63))
> return false;
>
> return true;
> }
>
> Did I miss anything?

period can be 0, so we have to check also sched_deadline for MSB set.

Thanks,

- Juri

---
From e0c44d7614127f8dfbafc08c30681cb8098271e8 Mon Sep 17 00:00:00 2001
kernel/sched/core.c | 37 ++++++++++++++++++++++++++++++-------
1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d9d8ece..682a986 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3188,17 +3188,40 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr)
* We ask for the deadline not being zero, and greater or equal
* than the runtime, as well as the period of being zero or
* greater than deadline. Furthermore, we have to be sure that
- * user parameters are above the internal resolution (1us); we
- * check sched_runtime only since it is always the smaller one.
+ * user parameters are above the internal resolution of 1us (we
+ * check sched_runtime only since it is always the smaller one) and
+ * below 2^63 ns (we have to check both sched_deadline and
+ * sched_period, as the latter can be zero).
*/
static bool
__checkparam_dl(const struct sched_attr *attr)
{
- return attr && attr->sched_deadline != 0 &&
- (attr->sched_period == 0 ||
- (s64)(attr->sched_period - attr->sched_deadline) >= 0) &&
- (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 &&
- attr->sched_runtime >= (2 << (DL_SCALE - 1));
+ /* deadline != 0 */
+ if (attr->sched_deadline == 0)
+ return false;
+
+ /*
+ * Since we truncate DL_SCALE bits, make sure we're at least
+ * that big.
+ */
+ if (attr->sched_runtime < (1ULL << DL_SCALE))
+ return false;
+
+ /*
+ * Since we use the MSB for wrap-around and sign issues, make
+ * sure it's not set (mind that period can be equal to zero).
+ */
+ if (attr->sched_deadline & (1ULL << 63) ||
+ attr->sched_period & (1ULL << 63))
+ return false;
+
+ /* runtime <= deadline <= period (if period != 0) */
+ if ((attr->sched_period != 0 &&
+ attr->sched_period < attr->sched_deadline) ||
+ attr->sched_deadline < attr->sched_runtime)
+ return false;
+
+ return true;

Michael Kerrisk (man-pages)

unread,
May 13, 2014, 8:50:02 AM5/13/14
to
Thanks so much for suggesting the rewrite. It was rather impenetrable before.

> period can be 0, so we have to check also sched_deadline for MSB set.

Yes, I spotted that too as I tested the patch.

Anyway, I tested your version of the patch, Peter, and other than the
above problem, it works (and the error case I identified now fails with
EINVAL).

Cheers,

Michael
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/

tip-bot for Juri Lelli

unread,
May 19, 2014, 9:10:02 AM5/19/14
to
Commit-ID: 4c15961119dc58bab18f2e0600791f0476a511d4
Gitweb: http://git.kernel.org/tip/4c15961119dc58bab18f2e0600791f0476a511d4
Author: Juri Lelli <juri....@gmail.com>
AuthorDate: Tue, 13 May 2014 14:11:31 +0200
Committer: Thomas Gleixner <tg...@linutronix.de>
CommitDate: Mon, 19 May 2014 21:47:33 +0900

sched/deadline: Restrict user params max value to 2^63 ns

Michael Kerrisk noticed that creating SCHED_DEADLINE reservations
with certain parameters (e.g, a runtime of something near 2^64 ns)
can cause a system freeze for some amount of time.

The problem is that in the interface we have

u64 sched_runtime;

while internally we need to have a signed runtime (to cope with
budget overruns)

s64 runtime;

At the time we setup a new dl_entity we copy the first value in
the second. The cast turns out with negative values when
sched_runtime is too big, and this causes the scheduler to go crazy
right from the start.

Moreover, considering how we deal with deadlines wraparound

(s64)(a - b) < 0

we also have to restrict acceptable values for sched_{deadline,period}.

This patch fixes the thing checking that user parameters are always
below 2^63 ns (still large enough for everyone).

It also rewrites other conditions that we check, since in
__checkparam_dl we don't have to deal with deadline wraparounds
and what we have now erroneously fails when the difference between
values is too big.

Cc: sta...@vger.kernel.org
Cc: Dario Faggioli<rais...@linux.it>
Cc: Ingo Molnar <mi...@elte.hu>
Cc: Dave Jones <da...@redhat.com>
Reported-by: Michael Kerrisk <mtk.ma...@gmail.com>
Suggested-by: Peter Zijlstra <pet...@infradead.org>
Signed-off-by: Juri Lelli <juri....@gmail.com>
Signed-off-by: Peter Zijlstra <pet...@infradead.org>
Link: http://lkml.kernel.org/r/20140513141131.20d9...@gmail.com
Signed-off-by: Thomas Gleixner <tg...@linutronix.de>
---
kernel/sched/core.c | 37 ++++++++++++++++++++++++++++++-------
1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f3f08bf..44e00ab 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3195,17 +3195,40 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr)

tip-bot for Juri Lelli

unread,
May 22, 2014, 8:30:03 AM5/22/14
to
Commit-ID: b0827819b0da4acfbc1df1e05edcf50efd07cbd1
Gitweb: http://git.kernel.org/tip/b0827819b0da4acfbc1df1e05edcf50efd07cbd1
Author: Juri Lelli <juri....@gmail.com>
AuthorDate: Tue, 13 May 2014 14:11:31 +0200
Committer: Ingo Molnar <mi...@kernel.org>
CommitDate: Thu, 22 May 2014 10:21:27 +0200

sched/deadline: Restrict user params max value to 2^63 ns

Michael Kerrisk noticed that creating SCHED_DEADLINE reservations
with certain parameters (e.g, a runtime of something near 2^64 ns)
can cause a system freeze for some amount of time.

The problem is that in the interface we have

u64 sched_runtime;

while internally we need to have a signed runtime (to cope with
budget overruns)

s64 runtime;

At the time we setup a new dl_entity we copy the first value in
the second. The cast turns out with negative values when
sched_runtime is too big, and this causes the scheduler to go crazy
right from the start.

Moreover, considering how we deal with deadlines wraparound

(s64)(a - b) < 0

we also have to restrict acceptable values for sched_{deadline,period}.

This patch fixes the thing checking that user parameters are always
below 2^63 ns (still large enough for everyone).

It also rewrites other conditions that we check, since in
__checkparam_dl we don't have to deal with deadline wraparounds
and what we have now erroneously fails when the difference between
values is too big.

Reported-by: Michael Kerrisk <mtk.ma...@gmail.com>
Suggested-by: Peter Zijlstra <pet...@infradead.org>
Signed-off-by: Juri Lelli <juri....@gmail.com>
Signed-off-by: Peter Zijlstra <pet...@infradead.org>
Cc: <sta...@vger.kernel.org>
Cc: Dario Faggioli<rais...@linux.it>
Cc: Dave Jones <da...@redhat.com>
Cc: Linus Torvalds <torv...@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140513141131.20d9...@gmail.com
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
kernel/sched/core.c | 37 ++++++++++++++++++++++++++++++-------
1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f3f08bf..44e00ab 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3195,17 +3195,40 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr)
0 new messages