Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH] lib/vsprintf: add %pT[C012] format specifier

13 views
Skip to first unread message

Tetsuo Handa

unread,
Dec 25, 2013, 7:40:02 AM12/25/13
to
>From 545dae06c6690a0c937e082ed984f828a2ea7aa2 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
Date: Wed, 25 Dec 2013 18:16:17 +0900
Subject: [PATCH] lib/vsprintf: add %pT[C012] format specifier

Since task_struct->comm can be modified by other threads while the current
thread is reading it, it is recommended to use get_task_comm() for reading it.

However, since get_task_comm() holds task_struct->alloc_lock spinlock,
some users cannot use get_task_comm(). Also, a lot of users are directly
reading from task_struct->comm even if they can use get_task_comm().
Such users might obtain inconsistent comm name.

This patch introduces %pTC format specifier for reading task_struct->comm
and %pT0 %pT1 %pT2 format specifiers for reading task_struct->comm and
task_struct->pid.

Currently %pT does not hold task_struct->alloc_lock spinlock. This is because
I'm expecting that we will change to update task_struct->comm using RCU.

By using RCU, the comm name read from task_struct->comm will be guaranteed to
be consistent. But before modifying set_task_comm() to use RCU, we need to kill
direct ->comm users who do not use get_task_comm().

Although there might be arguments that whether to modify set_task_comm() to
use RCU, this patch will anyway serve as a cleanup.

Some examples for converting direct ->comm users are shown below.

pr_info("comm=%s\n", p->comm); => pr_info("comm=%pTC\n", p);
pr_info("%s[%u]\n", p->comm, p->pid); => pr_info("%pT0\n", p);
pr_info("%s[%u]\n", p->comm, task_pid_nr(p)); => pr_info("%pT0\n", p);
pr_info("%s/%u\n", p->comm, p->pid); => pr_info("%pT1\n", p);
pr_info("%s,%u\n", p->comm, p->pid); => pr_info("%pT2\n", p);

Since many debug printings use p == current, you can pass NULL instead of p
if p == current. That is, you can simplify like examples shown below.

pr_info("comm=%s\n", current->comm); => pr_info("comm=%pTC\n", NULL);
pr_info("(%s/%u)", current->comm, current->pid); => pr_info("(%pT1)", NULL);

Signed-off-by: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
---
Documentation/printk-formats.txt | 12 ++++++++++
lib/vsprintf.c | 44 +++++++++++++++++++++++++++++++++++++-
2 files changed, 55 insertions(+), 1 deletions(-)

diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index 445ad74..c950c04 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -177,6 +177,18 @@ dentry names:
equivalent of %s dentry->d_name.name we used to use, %pd<n> prints
n last components. %pD does the same thing for struct file.

+task_struct comm name:
+
+ format: meaning: example:
+ %pTC commname init
+ %pT0 commname[pid] init[1]
+ %pT1 commname/pid init/1
+ %pT2 commname,pid init,1
+
+ For printing task_struct->comm and optionally task_struct->pid.
+ Currently task_struct->alloc_lock is not held; might be replaced
+ by RCU in the future.
+
struct va_format:

%pV
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 10909c5..4968f57 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1155,6 +1155,38 @@ char *netdev_feature_string(char *buf, char *end, const u8 *addr,
return number(buf, end, *(const netdev_features_t *)addr, spec);
}

+static noinline_for_stack
+char *comm_name(char *buf, char *end, struct task_struct *tsk,
+ struct printf_spec spec, const char *fmt)
+{
+ char name[TASK_COMM_LEN + 20] = { };
+ const char c = *fmt;
+
+ /* Caller can pass NULL instead of current. */
+ if (!tsk)
+ tsk = current;
+ /* Not using get_task_comm() in case I'm in IRQ context. */
+ strncpy(name, tsk->comm, TASK_COMM_LEN - 1);
+ /* Optionally print pid value. */
+ if (c != 'C') {
+ struct printf_spec ps = {
+ .type = FORMAT_TYPE_UINT,
+ .base = 10,
+ };
+ char *p = name + strlen(name);
+ if (c == '0')
+ *p++ = '['; /* comm[pid] */
+ else if (c == '1')
+ *p++ = '/'; /* comm/pid */
+ else
+ *p++ = ','; /* comm,pid */
+ p = number(p, name + sizeof(name) - 2, task_pid_nr(tsk), ps);
+ if (c == '0' && p < name + sizeof(name) - 2)
+ *p++ = ']';
+ }
+ return string(buf, end, name, spec);
+}
+
int kptr_restrict __read_mostly;

/*
@@ -1221,6 +1253,7 @@ int kptr_restrict __read_mostly;
* - 'a' For a phys_addr_t type and its derivative types (passed by reference)
* - 'd[234]' For a dentry name (optionally 2-4 last components)
* - 'D[234]' Same as 'd' but for a struct file
+ * - 'T[C012]' task_struct->comm and optionally task_struct->pid
*
* Note: The difference between 'S' and 'F' is that on ia64 and ppc64
* function pointers are really function descriptors, which contain a
@@ -1232,7 +1265,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
{
int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);

- if (!ptr && *fmt != 'K') {
+ if (!ptr && *fmt != 'K' && *fmt != 'T') {
/*
* Print (null) with the same width as a pointer so it makes
* tabular output look nice.
@@ -1364,6 +1397,15 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
return dentry_name(buf, end,
((const struct file *)ptr)->f_path.dentry,
spec, fmt);
+ case 'T':
+ switch (fmt[1]) {
+ case 'C':
+ case '0':
+ case '1':
+ case '2':
+ return comm_name(buf, end, ptr, spec, fmt + 1);
+ }
+ break;
}
spec.flags |= SMALL;
if (spec.field_width == -1) {
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Joe Perches

unread,
Dec 26, 2013, 3:50:01 AM12/26/13
to
On Wed, 2013-12-25 at 21:37 +0900, Tetsuo Handa wrote:
> This patch introduces %pTC format specifier for reading task_struct->comm
> and %pT0 %pT1 %pT2 format specifiers for reading task_struct->comm and
> task_struct->pid.
[]
> pr_info("comm=%s\n", p->comm); => pr_info("comm=%pTC\n", p);
> pr_info("%s[%u]\n", p->comm, p->pid); => pr_info("%pT0\n", p);
> pr_info("%s[%u]\n", p->comm, task_pid_nr(p)); => pr_info("%pT0\n", p);
> pr_info("%s/%u\n", p->comm, p->pid); => pr_info("%pT1\n", p);
> pr_info("%s,%u\n", p->comm, p->pid); => pr_info("%pT2\n", p);

Perhaps the presentation form should be standardized as well,
so %pT[012] would be unnecessary.'

Perhaps using only %pt and %ptp to emit the pid if desired
would be better.

Andrew Morton

unread,
Dec 27, 2013, 6:10:01 PM12/27/13
to
On Wed, 25 Dec 2013 21:37:33 +0900 Tetsuo Handa <penguin...@i-love.sakura.ne.jp> wrote:

> Some examples for converting direct ->comm users are shown below.
>
> pr_info("comm=%s\n", p->comm); => pr_info("comm=%pTC\n", p);
> pr_info("%s[%u]\n", p->comm, p->pid); => pr_info("%pT0\n", p);
> pr_info("%s[%u]\n", p->comm, task_pid_nr(p)); => pr_info("%pT0\n", p);
> pr_info("%s/%u\n", p->comm, p->pid); => pr_info("%pT1\n", p);
> pr_info("%s,%u\n", p->comm, p->pid); => pr_info("%pT2\n", p);

Places where one task accesses a different tasks's ->comm are rare, and
those places damn well better have a lot of locking in place -
otherwise they are racy against much more serious things than prctl().

The vast majority of ->comm accesses are accessing current->comm, for
debug reasons. I'm counting 350-odd sites. At all these sites it's
pointless passing `current' to the printk function at all!

I wonder if there's some way in which we can invent a vsprintf token
which means "insert corrent->comm here" and which doesn't require that
the caller pass in the additional argument?



That being said.....

current->comm isn't a terribly good way of identifying a task - it's
unaware of namespaces, is non-unique, userspace can overwrite ->comm[]
to anything it wants and evil users can probably write silly stuff into
->comm[] to confuse sysadmin tools.

So perhaps the world would be a better place if we were to invent a
standard kernel-wide way of identifying a process within a debug
printk. Presumably it would include ->comm and the pid, but other
things can be added later if needed.

So a usage site would look like:

pr_warn("%s: hair on fire\n", this_task_id());

but we need storage so it's really

char b[THIS_TASK_ID_SIZE];

pr_warn("%s: hair on fire\n", this_task_id(b));

which is painful, so we also provide the new vsprintf token as a
convenience:

pr_warn("%|: hair on fire\n");

but I don't know what we can use in place of %|.

Tetsuo Handa

unread,
Dec 27, 2013, 10:50:01 PM12/27/13
to
Andrew Morton wrote:
> which is painful, so we also provide the new vsprintf token as a
> convenience:
>
> pr_warn("%|: hair on fire\n");
>
> but I don't know what we can use in place of %|.

We are using %pEXTENSION where EXTENSION is [A-Za-z0-9]* because compiler does
not need to understand what EXTENSION does; compiler needs to care about what
character follows the % character and check type of corresponding argument if
__printf() attribute is given.

If we introduce a character which compiler does not know that follows the %
character, compiler would be confused when checking type of corresponding
argument.

> I wonder if there's some way in which we can invent a vsprintf token
> which means "insert corrent->comm here" and which doesn't require that
> the caller pass in the additional argument?

Therefore, if we want to omit passing corresponding argument, we should not
introduce new character which compiler does not know that follows the %
character.

Also, % is the only character which everybody knows that it is reserved for the
beginning of format specifier and %% is the only characters which everybody
knows that it is reserved for literal % character.

Therefore, what we could do for printing current thread's attributes would be
either reserve a new character and add EXTENSION like

pr_warn("$comm$: hair on fire\n");
pr_warn("Process $pid$: hair on fire\n");

or add EXTENSION after the %% characters like

pr_warn("%%comm%%: hair on fire\n");
pr_warn("Process %%pid%%: hair on fire\n");

Geert Uytterhoeven

unread,
Dec 28, 2013, 2:00:02 PM12/28/13
to
On Sat, Dec 28, 2013 at 4:43 AM, Tetsuo Handa
<penguin...@i-love.sakura.ne.jp> wrote:
> Andrew Morton wrote:
>> which is painful, so we also provide the new vsprintf token as a
>> convenience:
>>
>> pr_warn("%|: hair on fire\n");
>>
>> but I don't know what we can use in place of %|.
>
> We are using %pEXTENSION where EXTENSION is [A-Za-z0-9]* because compiler does
> not need to understand what EXTENSION does; compiler needs to care about what
> character follows the % character and check type of corresponding argument if
> __printf() attribute is given.

Indeed,

> If we introduce a character which compiler does not know that follows the %
> character, compiler would be confused when checking type of corresponding
> argument.
>
>> I wonder if there's some way in which we can invent a vsprintf token
>> which means "insert corrent->comm here" and which doesn't require that
>> the caller pass in the additional argument?
>
> Therefore, if we want to omit passing corresponding argument, we should not
> introduce new character which compiler does not know that follows the %
> character.
>
> Also, % is the only character which everybody knows that it is reserved for the
> beginning of format specifier and %% is the only characters which everybody
> knows that it is reserved for literal % character.
>
> Therefore, what we could do for printing current thread's attributes would be
> either reserve a new character and add EXTENSION like
>
> pr_warn("$comm$: hair on fire\n");
> pr_warn("Process $pid$: hair on fire\n");
>
> or add EXTENSION after the %% characters like
>
> pr_warn("%%comm%%: hair on fire\n");
> pr_warn("Process %%pid%%: hair on fire\n");

ESC sequences? So far printk() doesn't parse them (a bit unfortunate, as I
always liked the idea of printing error messages in red, warnings in yellow,
etc.).

Is any of the "\x" (backslash + character) unused and thus available?

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

Andrew Morton

unread,
Dec 28, 2013, 2:30:02 PM12/28/13
to
On Sat, 28 Dec 2013 19:57:50 +0100 Geert Uytterhoeven <ge...@linux-m68k.org> wrote:

> > If we introduce a character which compiler does not know that follows the %
> > character, compiler would be confused when checking type of corresponding
> > argument.
> >
> >> I wonder if there's some way in which we can invent a vsprintf token
> >> which means "insert corrent->comm here" and which doesn't require that
> >> the caller pass in the additional argument?
> >
> > Therefore, if we want to omit passing corresponding argument, we should not
> > introduce new character which compiler does not know that follows the %
> > character.
> >
> > Also, % is the only character which everybody knows that it is reserved for the
> > beginning of format specifier and %% is the only characters which everybody
> > knows that it is reserved for literal % character.
> >
> > Therefore, what we could do for printing current thread's attributes would be
> > either reserve a new character and add EXTENSION like
> >
> > pr_warn("$comm$: hair on fire\n");
> > pr_warn("Process $pid$: hair on fire\n");
> >
> > or add EXTENSION after the %% characters like
> >
> > pr_warn("%%comm%%: hair on fire\n");
> > pr_warn("Process %%pid%%: hair on fire\n");
>
> ESC sequences? So far printk() doesn't parse them (a bit unfortunate, as I
> always liked the idea of printing error messages in red, warnings in yellow,
> etc.).
>
> Is any of the "\x" (backslash + character) unused and thus available?

I guess control characters would work.

#define PRINTK_COMM "\001"
#define PRINTK_PID "\002"
#define PRINTK_TASK_ID "\003" /* "comm:pid" */

printk(PRINTK_TASK_ID ": hair on fire\n");

It's certainly compact. I doubt if there's any existing code which
deliberately prints control chars?

Geert Uytterhoeven

unread,
Dec 28, 2013, 2:30:02 PM12/28/13
to
On Sat, Dec 28, 2013 at 8:25 PM, Andrew Morton
<ak...@linux-foundation.org> wrote:
>> Is any of the "\x" (backslash + character) unused and thus available?
>
> I guess control characters would work.
>
> #define PRINTK_COMM "\001"

Not that one, cfr. include/linux/kern_levels.h ;-)

> #define PRINTK_PID "\002"
> #define PRINTK_TASK_ID "\003" /* "comm:pid" */
>
> printk(PRINTK_TASK_ID ": hair on fire\n");
>
> It's certainly compact. I doubt if there's any existing code which
> deliberately prints control chars?

But the rest looks OK to me.

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

Joe Perches

unread,
Dec 28, 2013, 3:00:02 PM12/28/13
to
On Sat, 2013-12-28 at 20:25 +0100, Geert Uytterhoeven wrote:
> On Sat, Dec 28, 2013 at 8:25 PM, Andrew Morton
> <ak...@linux-foundation.org> wrote:
> >> Is any of the "\x" (backslash + character) unused and thus available?
> >
> > I guess control characters would work.
> >
> > #define PRINTK_COMM "\001"
>
> Not that one, cfr. include/linux/kern_levels.h ;-)

yup.

> > #define PRINTK_PID "\002"
> > #define PRINTK_TASK_ID "\003" /* "comm:pid" */
> >
> > printk(PRINTK_TASK_ID ": hair on fire\n");
> >
> > It's certainly compact. I doubt if there's any existing code which
> > deliberately prints control chars?
>
> But the rest looks OK to me.

Tell me again, what's wrong with using p or current?

printk("%pt", current);

Andrew Morton

unread,
Dec 28, 2013, 3:10:03 PM12/28/13
to
On Sat, 28 Dec 2013 11:53:25 -0800 Joe Perches <j...@perches.com> wrote:

> > > #define PRINTK_PID "\002"
> > > #define PRINTK_TASK_ID "\003" /* "comm:pid" */
> > >
> > > printk(PRINTK_TASK_ID ": hair on fire\n");
> > >
> > > It's certainly compact. I doubt if there's any existing code which
> > > deliberately prints control chars?
> >
> > But the rest looks OK to me.
>
> Tell me again, what's wrong with using p or current?
>
> printk("%pt", current);

Nothing much. It's just that all these callsites are generating the
code to pass an argument which the callee already has access to.
Optimizing that will reduce text size a bit.

There's also the matter of providing a standard and abstracted way of
representing a task, instead of directly accessing its ->comm. But
that's a separate thing from new printk tokens.

Joe Perches

unread,
Dec 28, 2013, 3:30:02 PM12/28/13
to
On Sat, 2013-12-28 at 12:08 -0800, Andrew Morton wrote:
> On Sat, 28 Dec 2013 11:53:25 -0800 Joe Perches <j...@perches.com> wrote:
>
> > > > #define PRINTK_PID "\002"
> > > > #define PRINTK_TASK_ID "\003" /* "comm:pid" */

> > > >
> > > > printk(PRINTK_TASK_ID ": hair on fire\n");
> > > >
> > > > It's certainly compact. I doubt if there's any existing code which
> > > > deliberately prints control chars?
> > >
> > > But the rest looks OK to me.
> >
> > Tell me again, what's wrong with using p or current?
> >
> > printk("%pt", current);
>
> Nothing much. It's just that all these callsites are generating the
> code to pass an argument which the callee already has access to.
> Optimizing that will reduce text size a bit.

There have been smaller reductions in code size
than that...

> There's also the matter of providing a standard and abstracted way of
> representing a task, instead of directly accessing its ->comm. But
> that's a separate thing from new printk tokens.

OK then perhaps use KERN_<ATTR> or CURRENT_<ATTR>
rather than PRINTK_<ATTR>

Tetsuo Handa

unread,
Dec 28, 2013, 7:40:02 PM12/28/13
to
Joe Perches wrote:
> On Sat, 2013-12-28 at 12:08 -0800, Andrew Morton wrote:
> > On Sat, 28 Dec 2013 11:53:25 -0800 Joe Perches <j...@perches.com> wrote:
> >
> > > > > #define PRINTK_PID "\002"
> > > > > #define PRINTK_TASK_ID "\003" /* "comm:pid" */
>
> > > > >
> > > > > printk(PRINTK_TASK_ID ": hair on fire\n");
> > > > >
> > > > > It's certainly compact. I doubt if there's any existing code which
> > > > > deliberately prints control chars?

What about using bytes from \x7F to \xFF ?

We are not passing multibyte characters like UTF-8 in the format string (are
we?) because the non-first byte of multibyte characters by error matching %
will cause security problem (format string bug) because the format string is
parsed as char array.

Then, we could do something like below.

pr_info("%s", current->comm); => pr_info("\x7F\x80");
pr_info("%d", current->pid); => pr_info("\x7F\x81");
pr_info("%10d", current->pid); => pr_info("\x7F10\x81");

If precision field support is unnecessary, we could use only \x80 to \xFF .

pr_info("%s", current->comm); => pr_info("\x80");
pr_info("%d", current->pid); => pr_info("\x81");

Joe Perches

unread,
Dec 28, 2013, 9:10:01 PM12/28/13
to
On Sun, 2013-12-29 at 09:32 +0900, Tetsuo Handa wrote:
> Joe Perches wrote:
> > On Sat, 2013-12-28 at 12:08 -0800, Andrew Morton wrote:
> > > On Sat, 28 Dec 2013 11:53:25 -0800 Joe Perches <j...@perches.com> wrote:
> > >
> > > > > > #define PRINTK_PID "\002"
> > > > > > #define PRINTK_TASK_ID "\003" /* "comm:pid" */
> >
> > > > > >
> > > > > > printk(PRINTK_TASK_ID ": hair on fire\n");
> > > > > >
> > > > > > It's certainly compact. I doubt if there's any existing code which
> > > > > > deliberately prints control chars?
>
> What about using bytes from \x7F to \xFF ?
>
> We are not passing multibyte characters like UTF-8 in the format string (are
> we?) because the non-first byte of multibyte characters by error matching %
> will cause security problem (format string bug) because the format string is
> parsed as char array.
>
> Then, we could do something like below.
>
> pr_info("%s", current->comm); => pr_info("\x7F\x80");
> pr_info("%d", current->pid); => pr_info("\x7F\x81");
> pr_info("%10d", current->pid); => pr_info("\x7F10\x81");
>
> If precision field support is unnecessary, we could use only \x80 to \xFF .
>
> pr_info("%s", current->comm); => pr_info("\x80");
> pr_info("%d", current->pid); => pr_info("\x81");

My preference would be to do something like:

$ cat include/asm-generic/current.h
#ifndef __ASM_GENERIC_CURRENT_H
#define __ASM_GENERIC_CURRENT_H

#include <linux/thread_info.h>

#define get_current() (current_thread_info()->task)
#define current get_current()

#define CURRENT_SUB "\032"
#define CURRENT_SUB_ASCII '\032'

#define CURRENT_ID CURRENT_SUB "1"
#define CURRENT_COMM CURRENT_SUB "2"
#define CURRENT_TASK_STATE CURRENT_SUB "3"
#define CURRENT_TASK_PID CURRENT_SUB "4"
#define CURRENT_TASK_GID CURRENT_SUB "5"
#define CURRENT_TASK_UID CURRENT_SUB "6"
#define CURRENT_TASK_FSUID CURRENT_SUB "7"
#define CURRENT_TASK_FSGID CURRENT_SUB "8"
/* add more as necessary */

#endif /* __ASM_GENERIC_CURRENT_H */

Tetsuo Handa

unread,
Dec 29, 2013, 7:20:02 AM12/29/13
to
Tetsuo Handa wrote:
> Joe Perches wrote:
> > On Sat, 2013-12-28 at 12:08 -0800, Andrew Morton wrote:
> > > On Sat, 28 Dec 2013 11:53:25 -0800 Joe Perches <j...@perches.com> wrote:
> > >
> > > > > > #define PRINTK_PID "\002"
> > > > > > #define PRINTK_TASK_ID "\003" /* "comm:pid" */
> >
> > > > > >
> > > > > > printk(PRINTK_TASK_ID ": hair on fire\n");
> > > > > >
> > > > > > It's certainly compact. I doubt if there's any existing code which
> > > > > > deliberately prints control chars?
>
> What about using bytes from \x7F to \xFF ?

Here is a draft patch. With this approach, we can embed whatever variables
vsnprintf() can access (not only current thread's attributes seen from init and
current namespace but also other variables like current time) into the format
string without passing corresponding argument.

Is this approach acceptable? (Of course, I'll add lines like

#define PRINT_FORMAT "\x7F"
#define PRINT_TASK_COMM "\x80"
#define PRINT_TASK_PID "\x81"

for final version.) What variables (e.g. current->comm, current->pid,
task_tgid_vnr(current)) do we want to pass using builtin tokens?
----------
>From 1d891b935efeab46cb1947d72659187c8e47c93c Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
Date: Sun, 29 Dec 2013 20:58:50 +0900
Subject: [PATCH] lib/vsprintf: support builtin tokens

The vast majority of ->comm accesses are accessing current->comm, for
debug reasons. I'm counting 350-odd sites. At all these sites it's
pointless passing `current' to the printk function at all!

This patch introduces a set of vsprintf tokens for embedding globally visible
arguments into the format string, so that we can reduce text size a bit (and
in the future make reading of task_struct->comm consistent) by stop generating
the code to pass an argument which the callee already has access to, with an
assumption that we can avoid using bytes >= 0x7F within the format string.

Some examples for converting direct ->comm users are shown below.

pr_info("comm=%s\n", current->comm); => pr_info("comm=\x80\n");
pr_info("pid=%d\n", current->pid); => pr_info("pid=\x81\n");
pr_info("%s(%d)\n", current->comm, current->pid); => pr_info("\x80(\x81)\n");
pr_info("[%-6.6s]\n", current->comm); => pr_info("\x7F-6.6\x80\n");

Since this patch does not use get_task_comm(), you should not convert from
get_task_comm(current) to builtin tokens if you want constsistent reading.

Suggested-by: Andrew Morton <ak...@linux-foundation.org>
Signed-off-by: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
---
lib/vsprintf.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 10909c5..50cdb39 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -366,7 +366,8 @@ enum format_type {
FORMAT_TYPE_INT,
FORMAT_TYPE_NRCHARS,
FORMAT_TYPE_SIZE_T,
- FORMAT_TYPE_PTRDIFF
+ FORMAT_TYPE_PTRDIFF,
+ FORMAT_TYPE_BUILTIN_TOKEN,
};

struct printf_spec {
@@ -1375,6 +1376,69 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
return number(buf, end, (unsigned long) ptr, spec);
}

+static inline int format_decode_builtin(const char *fmt, const char *start,
+ struct printf_spec *spec)
+{
+ spec->type = FORMAT_TYPE_BUILTIN_TOKEN;
+ spec->flags = 0;
+ spec->field_width = -1;
+ spec->precision = -1;
+ if (*fmt == 0x7F) { /* Custom flags, field width and precision. */
+ if (*++fmt == '-') {
+ spec->flags = LEFT;
+ ++fmt;
+ }
+ if (isdigit(*fmt))
+ spec->field_width = skip_atoi(&fmt);
+ if (*fmt == '.') {
+ ++fmt;
+ if (isdigit(*fmt)) {
+ spec->precision = skip_atoi(&fmt);
+ if (spec->precision < 0)
+ spec->precision = 0;
+ }
+ }
+ }
+ if (*(unsigned char *) fmt & 0x80) {
+ spec->base = *fmt; /* Borrow base field for passing token. */
+ return ++fmt - start;
+ }
+ spec->type = FORMAT_TYPE_INVALID;
+ return fmt - start;
+}
+
+static noinline_for_stack
+char *builtin_token(char *buf, char *end, struct printf_spec spec)
+{
+ struct task_struct *const tsk = current;
+ union {
+ char str[TASK_COMM_LEN];
+ int num;
+ } arg;
+
+ switch (spec.base) { /* What the token is? */
+ case 0x80: /* current->comm */
+ /* Not using get_task_comm() in case I'm in IRQ context. */
+ memcpy(arg.str, tsk->comm, sizeof(arg.str));
+ /*
+ * Intentionally copied 16 bytes for compiler optimization.
+ * Make sure that str is '\0'-terminated in case ->comm was
+ * by error not '\0'-terminated.
+ */
+ arg.str[sizeof(arg.str) - 1] = '\0';
+ goto print_string;
+ case 0x81: /* task_pid_nr(current) or current->pid */
+ arg.num = task_pid_nr(tsk);
+ goto print_number;
+ }
+ memcpy(arg.str, "?", 2); /* This should not happen. */
+ print_string:
+ return string(buf, end, arg.str, spec);
+ print_number:
+ spec.base = 10; /* Assign base field. */
+ return number(buf, end, arg.num, spec);
+}
+
/*
* Helper function to decode printf style format.
* Each call decode a token from the format and return the
@@ -1423,7 +1487,7 @@ int format_decode(const char *fmt, struct printf_spec *spec)
spec->type = FORMAT_TYPE_NONE;

for (; *fmt ; ++fmt) {
- if (*fmt == '%')
+ if (*fmt == '%' || *(unsigned char *) fmt >= 0x7F)
break;
}

@@ -1431,6 +1495,10 @@ int format_decode(const char *fmt, struct printf_spec *spec)
if (fmt != start || !*fmt)
return fmt - start;

+ /* Process built-in tokens. */
+ if (*(unsigned char *) fmt >= 0x7F)
+ return format_decode_builtin(fmt, start, spec);
+
/* Process flags */
spec->flags = 0;

@@ -1725,6 +1793,10 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
break;
}

+ case FORMAT_TYPE_BUILTIN_TOKEN:
+ str = builtin_token(str, end, spec);
+ break;
+
default:
switch (spec.type) {
case FORMAT_TYPE_LONG_LONG:
--
1.7.1

Joe Perches

unread,
Dec 30, 2013, 12:00:02 PM12/30/13
to
On Sun, 2013-12-29 at 21:13 +0900, Tetsuo Handa wrote:
> Tetsuo Handa wrote:
> > Joe Perches wrote:
> > > On Sat, 2013-12-28 at 12:08 -0800, Andrew Morton wrote:
> > > > On Sat, 28 Dec 2013 11:53:25 -0800 Joe Perches <j...@perches.com> wrote:
> > > >
> > > > > > > #define PRINTK_PID "\002"
> > > > > > > #define PRINTK_TASK_ID "\003" /* "comm:pid" */
> > >
> > > > > > >
> > > > > > > printk(PRINTK_TASK_ID ": hair on fire\n");
> > > > > > >
> > > > > > > It's certainly compact. I doubt if there's any existing code which
> > > > > > > deliberately prints control chars?
> >
> > What about using bytes from \x7F to \xFF ?
>
> Here is a draft patch. With this approach, we can embed whatever variables
> vsnprintf() can access (not only current thread's attributes seen from init and
> current namespace but also other variables like current time) into the format
> string without passing corresponding argument.
>
> Is this approach acceptable? (Of course, I'll add lines like
>
> #define PRINT_FORMAT "\x7F"
> #define PRINT_TASK_COMM "\x80"
> #define PRINT_TASK_PID "\x81"
>
> for final version.) What variables (e.g. current->comm, current->pid,
> task_tgid_vnr(current)) do we want to pass using builtin tokens?
[]
> The vast majority of ->comm accesses are accessing current->comm, for
> debug reasons. I'm counting 350-odd sites. At all these sites it's
> pointless passing `current' to the printk function at all!

I get:

$ grep-2.5.4 -rP --include=*.[ch] -oh \
"\b(?:printk|[a-z_]+_(?:printk|emerg|alert|crit|err|warn|notice|info|debug|dbg))[^;]*\bcurrent->[\w_]+" * | \
grep -P -oh "\bcurrent->[\w_]+"| sort | uniq -c | sort -rn
380 current->pid
267 current->comm
34 current->mm
25 current->thread
17 current->journal_info
8 current->flags
7 current->tgid
5 current->active_mm
2 current->sas_ss_sp
2 current->addr_limit
1 current->uid
1 current->signal
1 current->sas_ss_size
1 current->ret_stack
1 current->memcg_batch
1 current->kernel_stack_page
1 current->group_leader
1 current->exit_code
1 current->cred
1 current->blocked

> This patch introduces a set of vsprintf tokens for embedding globally visible
> arguments into the format string, so that we can reduce text size a bit (and
> in the future make reading of task_struct->comm consistent) by stop generating
> the code to pass an argument which the callee already has access to, with an
> assumption that we can avoid using bytes >= 0x7F within the format string.
>
> Some examples for converting direct ->comm users are shown below.
>
> pr_info("comm=%s\n", current->comm); => pr_info("comm=\x80\n");
> pr_info("pid=%d\n", current->pid); => pr_info("pid=\x81\n");
> pr_info("%s(%d)\n", current->comm, current->pid); => pr_info("\x80(\x81)\n");
> pr_info("[%-6.6s]\n", current->comm); => pr_info("\x7F-6.6\x80\n");

I too prefer using the #define CURRENT_<FOO> string approach
rather than direct embedding of string.

pr_info("comm=" CURRENT_COMM "\n");

Also I prefer using ASCII SUB (26 \x1a \032 ^z) or maybe
PU1 - 145 or PU2 - 146, as an initiator byte as it takes
up much less of the control word space instead of using
multiple values like \x80, \x81, \x82, etc. Using an
initiator byte seems more extensible too.

http://en.wikipedia.org/wiki/C0_and_C1_control_codes

Tetsuo Handa

unread,
Dec 31, 2013, 2:00:01 AM12/31/13
to
Joe Perches wrote:
> I get:
>
> $ grep-2.5.4 -rP --include=*.[ch] -oh \
> "\b(?:printk|[a-z_]+_(?:printk|emerg|alert|crit|err|warn|notice|info|debug|dbg))[^;]*\bcurrent->[\w_]+" * | \
> grep -P -oh "\bcurrent->[\w_]+"| sort | uniq -c | sort -rn
> 380 current->pid
> 267 current->comm

We also have cases like

struct task_struct *tsk = current;
printk("%s/%d", tsk->comm, tsk->pid);
printk("%d", task_pid_nr(tsk));
snprintf(buf, sizeof(buf), "%d", task_pid_nr(current));

which are not counted with your grep rule.
But anyway, let's start with ->comm and ->pid .

> Also I prefer using ASCII SUB (26 \x1a \032 ^z) or maybe
> PU1 - 145 or PU2 - 146, as an initiator byte as it takes
> up much less of the control word space instead of using
> multiple values like \x80, \x81, \x82, etc. Using an
> initiator byte seems more extensible too.

My format and rules are:

#define EMBED_FORMAT "0x7F"
#define EMBED_CURRENT_COMM "0x80"
#define EMBED_CURRENT_PID "0x81"

We need to use EMBED_FORMAT prefix only when you want to specify one (or
more) of flags, field width and precision options. That is,

pr_info("%s(%d)\n", current->comm, current->pid);
=> pr_info(EMBED_CURRENT_COMM "(" EMBED_CURRENT_PID ")\n");

pr_info("[%-6.6s]\n", current->comm);
=> pr_info(EMBED_FORMAT "-6.6" EMBED_CURRENT_COMM "\n");

But I can't imagine what your format and rules are because

#define CURRENT_SUB "\032"
#define CURRENT_SUB_ASCII '\032'

are ' ' character which is also used within the format string.
Also, if you assign one of ('0' to '9', '-', '.') for variable name like

#define CURRENT_ID CURRENT_SUB "1"
#define CURRENT_COMM CURRENT_SUB "2"

you will need a separating byte in order to distinguish end of
flags, field width and precision options.

Please describe your format and rules (e.g. what byte starts a built-in token;
what bytes are used for representing variable name, what separates flags, field
width and precision options from variable name if these options are specified,
what byte terminates a built-in token) using examples below.

pr_info("%s(%d)\n", current->comm, current->pid);

pr_info("[%-6.6s]\n", current->comm);

Joe Perches

unread,
Dec 31, 2013, 11:30:03 AM12/31/13
to
On Tue, 2013-12-31 at 15:53 +0900, Tetsuo Handa wrote:
> Joe Perches wrote:
The goal of a single leading char is simply resource
economy. Using fewer chars from the available pool may be
better than using more. Using a couple more bytes in the
format string doesn't seem an excessive overhead.

> Please describe your format and rules (e.g. what byte starts a built-in token;
> what bytes are used for representing variable name, what separates flags, field
> width and precision options from variable name if these options are specified,
> what byte terminates a built-in token) using examples below.
>
> pr_info("%s(%d)\n", current->comm, current->pid);

pr_info(CURRENT_COMM "(" CURRENT_PID ")\n");

> pr_info("[%-6.6s]\n", current->comm);

I think using formatting controls for field widths and
such shouldn't be supported but if it is, then using yet
another macro form like below is possible

either:
#define CURRENT_COMM_FORMAT(fmt) \
CURRENT_SUB fmt "2"
or
#define CURRENT_COMM_FORMATTED(fmt) \
CURRENT_SUB __stringify(fmt) "2"

So maybe,
pr_info(CURRENT_COMM_FORMAT("-6.6") "\n");
or
pr_info(CURRENT_COMM_FORMATTED(-6.6) "\n");

depending on taste could work.

"2" would need changing to something like "t2" so any
leading format directives like field width and precision
could be done properly.

In other words: using a separating byte may be necessary if
some formatting directive support is required.

This wouldn't/couldn't work with formats where field length
is specified via "*" with a separate int.

Perhaps that's a good enough reason not to support using
format directives.

Tetsuo Handa

unread,
Jan 1, 2014, 12:40:01 AM1/1/14
to
I think we want formatting directive support because we have users shown below.

fs/afs/internal.h: printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__)
fs/cachefiles/internal.h: printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
fs/fscache/internal.h: printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)

Though I think that

> This wouldn't/couldn't work with formats where field length
> is specified via "*" with a separate int.
>
> Perhaps that's a good enough reason not to support using
> format directives.

we don't need to support complete set like '*'.

> The goal of a single leading char is simply resource
> economy. Using fewer chars from the available pool may be
> better than using more. Using a couple more bytes in the
> format string doesn't seem an excessive overhead.

Then, I think we can choose as

Byte for beginning of built-in token:

'\xFF'

Bytes for specifying flags, field width, precision options:

'0' to '9', '-' and '.'

Bytes for specifying built-in variable:

All but '\xFF', '0' to '9', '-', '.', '%' and '\x00'

and define 241 built-in variables, while using only single leading char (i.e.
'\xFF') from the available pool (all but '%' and '\x00').

#define EMBED_FORMAT "\xFF"
#define EMBED_CURRENT_COMM "\xFF" "\x80"
#define EMBED_CURRENT_PID "\xFF" "\x81"

pr_info("%s(%d)\n", current->comm, current->pid);
=> pr_info(EMBED_CURRENT_COMM "(" EMBED_CURRENT_PID ")\n");

pr_info("[%-6.6s]\n", current->comm);
=> pr_info(EMBED_FORMAT "-6.6" EMBED_CURRENT_COMM "\n");

Since I assume that 241 built-in variables are sufficient, I'm laying
"Bytes for specifying built-in variable" under obligation of also acting as
"Bytes for ending of built-in token".

This choice works because once format_decode() encounters '\xFF' byte, it can
continue reading until it encounters one of "Bytes for ending of built-in
token", and split them into "Bytes for specifying flags, field width, precision
options" and "Bytes for specifying built-in variable" without any fuzziness.

\xFF\x80(\xFF\x80)\n
^ ^ ^ ^
| | | +- Decoder interprets as variable name and understands that this
| | | is a trailing byte.
| | +----- Decoder interprets as a leading byte because decoder sees
| | this byte for the first time.
| +---------- Decoder interprets as variable name and understands that this
| is a trailing byte.
+-------------- Decoder interprets as a leading byte because decoder sees
this byte for the first time.

[\xFF-6.6\xFF\x80]\n
^ ^ ^ ^
| | | +- Decoder interprets as variable name and understands that
| | | this is a trailing byte.
| | +----- Decoder ignores this byte because decoder already saw
| | this byte.
| +--------- Decoder interprets as format specifier.
+------------- Decoder interprets as leading byte because decoder sees
this byte for the first time.

This choice (i.e. reserve only '\xFF') is more resource economy than my
previous choice (i.e. reserve '\x7F' to '\xFF') at the cost of wasting only
one byte compared to my previous choice.

Joe Perches

unread,
Jan 1, 2014, 1:00:01 AM1/1/14
to
On Wed, 2014-01-01 at 14:34 +0900, Tetsuo Handa wrote:
> I think we want formatting directive support because we have users shown below.
>
> fs/afs/internal.h: printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__)
> fs/cachefiles/internal.h: printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
> fs/fscache/internal.h: printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)

Maybe, but I still think it's not particularly useful
and should not be possible/isn't necessary.

> This choice (i.e. reserve only '\xFF') is more resource economy than my
> previous choice (i.e. reserve '\x7F' to '\xFF') at the cost of wasting only
> one byte compared to my previous choice.

I supposed that's better.

Is there a particularly utility/reason to use 0xff
vs ascii SUB/PU1/PU2?

Tetsuo Handa

unread,
Jan 1, 2014, 5:10:03 AM1/1/14
to
Joe Perches wrote:
> > This choice (i.e. reserve only '\xFF') is more resource economy than my
> > previous choice (i.e. reserve '\x7F' to '\xFF') at the cost of wasting only
> > one byte compared to my previous choice.
>
> I supposed that's better.
>
> Is there a particularly utility/reason to use 0xff
> vs ascii SUB/PU1/PU2?
>
Nothing. Is 0x1A preferable to 0xFF?
----------
>From ebace41254ea02250c01c56a9e3ee08a34957830 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
Date: Wed, 1 Jan 2014 18:15:34 +0900
Subject: [PATCH v2] lib/vsprintf: support built-in tokens

The vast majority of ->comm accesses are accessing current->comm, for
debug reasons. I'm counting 350-odd sites. At all these sites it's
pointless passing `current' to the printk function at all!

This patch introduces a set of vsprintf tokens for embedding globally visible
arguments into the format string, so that we can reduce text size a bit (and
in the future make reading of task_struct->comm consistent) by stop generating
the code to pass an argument which the callee already has access to, with an
assumption that currently we are not using the 0xFF byte within the format
string.

Some examples are shown below.

pr_info("comm=%s\n", current->comm);
=> pr_info("comm=" EMBED_CURRENT_COMM "\n");

pr_info("pid=%d\n", current->pid);
=> pr_info("pid=" EMBED_CURRENT_PID "\n");

pr_info("%s(%d)\n", current->comm, current->pid);
=> pr_info(EMBED_CURRENT_COMM "(" EMBED_CURRENT_PID ")\n");

pr_info("[%-6.6s]\n", current->comm);
=> pr_info(EMBED_FORMAT "-6.6" EMBED_CURRENT_COMM "\n");

Since this patch does not use get_task_comm(), you should not convert from
get_task_comm(current) to built-in tokens if you want constsistent reading.

Suggested-by: Andrew Morton <ak...@linux-foundation.org>
Signed-off-by: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
---
include/linux/kernel.h | 17 ++++++++
lib/vsprintf.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ecb8754..5c78971 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -353,6 +353,23 @@ extern int num_to_str(char *buf, int size, unsigned long long num);

/* lib/printf utilities */

+/*
+ * Built-in tokens for __printf() functions.
+ *
+ * The __printf() functions interpret special bytes starting from a \xFF byte
+ * inside the format string as built-in tokens. The built-in tokens are used
+ * for saving text size (and centralizing pointer dereferences) by omitting
+ * arguments which are visible from __printf() functions. This means that
+ * the format string must not contain the \xFF byte. If the format string
+ * contains the \xFF byte which do not mean to be built-in tokens, you need to
+ * change from pr_info("\xFF\n") to pr_info("%c\n", '0xFF').
+ *
+ * See builtin_token() in lib/vsprintf.c for more information.
+ */
+#define EMBED_FORMAT "\xFF"
+#define EMBED_CURRENT_COMM "\xFF\xFE"
+#define EMBED_CURRENT_PID "\xFF\xFD"
+
extern __printf(2, 3) int sprintf(char *buf, const char * fmt, ...);
extern __printf(2, 0) int vsprintf(char *buf, const char *, va_list);
extern __printf(3, 4)
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 10909c5..4b7c915 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -366,7 +366,8 @@ enum format_type {
FORMAT_TYPE_INT,
FORMAT_TYPE_NRCHARS,
FORMAT_TYPE_SIZE_T,
- FORMAT_TYPE_PTRDIFF
+ FORMAT_TYPE_PTRDIFF,
+ FORMAT_TYPE_BUILTIN_TOKEN,
};

struct printf_spec {
@@ -1375,6 +1376,92 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
return number(buf, end, (unsigned long) ptr, spec);
}

+static inline const char *format_decode_builtin(const char *fmt,
+ struct printf_spec *spec)
+{
+ spec->type = FORMAT_TYPE_BUILTIN_TOKEN;
+ spec->flags = 0;
+ spec->field_width = -1;
+ spec->precision = -1;
+ /* Check custom flags, field width and precision. */
+ if (*fmt == '-') {
+ fmt++;
+ spec->flags = LEFT;
+ }
+ if (isdigit(*fmt))
+ spec->field_width = skip_atoi(&fmt);
+ if (*fmt == '.') {
+ fmt++;
+ if (isdigit(*fmt)) {
+ spec->precision = skip_atoi(&fmt);
+ if (spec->precision < 0)
+ spec->precision = 0;
+ }
+ }
+ /*
+ * Skip redundant byte if custom flags, field width or precision are
+ * specified.
+ */
+ if (*fmt == (char) 0xFF)
+ fmt++;
+ spec->base = *fmt; /* Borrow base field for passing token. */
+ if (spec->base)
+ fmt++;
+ return fmt;
+}
+
+/*
+ * Examples of using built-in tokens:
+ *
+ * pr_info("%s\n", current->comm) => pr_info(EMBED_CURRENT_COMM "\n");
+ *
+ * pr_info("%s(%d)\n", current->comm, current->pid);
+ * => pr_info(EMBED_CURRENT_COMM "(" EMBED_CURRENT_PID ")\n");
+ *
+ * pr_info("[%-6.6s]\n", current->comm);
+ * => pr_info(EMBED_FORMAT "-6.6" EMBED_CURRENT_COMM "\n");
+ *
+ * You can define other tokens using "\xFF\x??" format where ?? is a two hex
+ * digits other than 00 ('\0'), 25 ('%'), 2D ('-'), 2E ('.'), 30 ('0') to 39
+ * ('9') and FF ('\xFF').
+ */
+static noinline_for_stack
+char *builtin_token(char *buf, char *end, struct printf_spec spec)
+{
+ struct task_struct *const tsk = current;
+ union {
+ char str[TASK_COMM_LEN];
+ int num;
+ } arg;
+
+ switch (spec.base) {
+ case 0xFE: /* current->comm */
+ /* Not using get_task_comm() in case I'm in IRQ context. */
+ memcpy(arg.str, tsk->comm, sizeof(arg.str));
+ /*
+ * Intentionally copied 16 bytes for compiler optimization.
+ * Make sure that str is '\0'-terminated in case ->comm was
+ * by error not '\0'-terminated.
+ */
+ arg.str[sizeof(arg.str) - 1] = '\0';
+ goto print_string;
+ case 0xFD: /* task_pid_nr(current) or current->pid */
+ arg.num = task_pid_nr(tsk);
+ goto print_number;
+ default: /* Oops, this built-in token is not defined. */
+ spec.flags = 0;
+ spec.field_width = -1;
+ spec.precision = -1;
+ memcpy(arg.str, "?", 2);
+ goto print_string;
+ }
+ print_string:
+ return string(buf, end, arg.str, spec);
+ print_number:
+ spec.base = 10; /* Assign base field. */
+ return number(buf, end, arg.num, spec);
+}
+
/*
* Helper function to decode printf style format.
* Each call decode a token from the format and return the
@@ -1423,7 +1510,7 @@ int format_decode(const char *fmt, struct printf_spec *spec)
spec->type = FORMAT_TYPE_NONE;

for (; *fmt ; ++fmt) {
- if (*fmt == '%')
+ if (*fmt == '%' || *fmt == (char) 0xFF)
break;
}

@@ -1431,6 +1518,10 @@ int format_decode(const char *fmt, struct printf_spec *spec)
if (fmt != start || !*fmt)
return fmt - start;

+ /* Process built-in tokens. */
+ if (*fmt == (char) 0xFF)
+ return format_decode_builtin(fmt + 1, spec) - start;
+
/* Process flags */
spec->flags = 0;

@@ -1725,6 +1816,10 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
break;
}

+ case FORMAT_TYPE_BUILTIN_TOKEN:
+ str = builtin_token(str, end, spec);
+ break;
+
default:
switch (spec.type) {
case FORMAT_TYPE_LONG_LONG:
--
1.7.1

Joe Perches

unread,
Jan 1, 2014, 8:40:02 PM1/1/14
to
btw: Those -6.6 field width uses really are just for debugging
and I think should be removed. I didn't notice any other uses of
field widths and current-><value>. Are there any?

Joe Perches

unread,
Jan 1, 2014, 8:40:02 PM1/1/14
to
On Wed, 2014-01-01 at 19:02 +0900, Tetsuo Handa wrote:
> Joe Perches wrote:
> > > This choice (i.e. reserve only '\xFF') is more resource economy than my
> > > previous choice (i.e. reserve '\x7F' to '\xFF') at the cost of wasting only
> > > one byte compared to my previous choice.
> >
> > I supposed that's better.
> >
> > Is there a particularly utility/reason to use 0xff
> > vs ascii SUB/PU1/PU2?
> >
> Nothing. Is 0x1A preferable to 0xFF?

ASCII SUB seems to me to fit better for the purpose.
The "Private Use" PU1/PU2 also seemed appropriate,
but I suppose any non-printable character might be OK.

(from wikipedia)

The substitute character (SUB) was intended to request a translation of
the next character from a printable character to another value, usually
by setting bit 5 to zero. This is handy because some media (such as
sheets of paper produced by typewriters) can transmit only printable
characters. However, on MS-DOS systems with files opened in text mode,
"end of text" or "end of file" is marked by this Ctrl-Z character,
instead of the Ctrl-C or Ctrl-D, which are common on other operating
systems.

PU1/PU2:

Reserved for a function without standardized meaning for private use as
required, subject to the prior agreement of the sender and the recipient
of the data.

Tetsuo Handa

unread,
Jan 2, 2014, 12:00:02 AM1/2/14
to
Joe Perches wrote:
> On Wed, 2014-01-01 at 19:02 +0900, Tetsuo Handa wrote:
> > Joe Perches wrote:
> > > > This choice (i.e. reserve only '\xFF') is more resource economy than my
> > > > previous choice (i.e. reserve '\x7F' to '\xFF') at the cost of wasting only
> > > > one byte compared to my previous choice.
> > >
> > > I supposed that's better.
> > >
> > > Is there a particularly utility/reason to use 0xff
> > > vs ascii SUB/PU1/PU2?
> > >
> > Nothing. Is 0x1A preferable to 0xFF?
>
> ASCII SUB seems to me to fit better for the purpose.
> The "Private Use" PU1/PU2 also seemed appropriate,
> but I suppose any non-printable character might be OK.

OK. I changed to 0x1A.

Joe Perches wrote:
> btw: Those -6.6 field width uses really are just for debugging
> and I think should be removed. I didn't notice any other uses of
> field widths and current-><value>. Are there any?

I don't know. I'm keeping field widths support in case somebody uses field
widths and tsk-><value> (where tsk = current is done prior to reading) and
in the future somebody might want to print other globally accessible variables
(e.g. current time) with field widths. We can consider removing field widths
support after conversion is done.

I think this version is ready for testing.

Regards.
----------
>From 0fdc186b2c8eef8acff74705391eea69632f3d4a Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
Date: Thu, 2 Jan 2014 13:37:14 +0900
Subject: [PATCH v3] lib/vsprintf: support built-in tokens

The vast majority of ->comm accesses are accessing current->comm, for
debug reasons. I'm counting 350-odd sites. At all these sites it's
pointless passing `current' to the printk function at all!

This patch introduces a set of vsprintf tokens for embedding globally visible
arguments into the format string, so that we can reduce text size a bit (and
in the future make reading of task_struct->comm consistent) by stop generating
the code to pass an argument which the callee already has access to, with an
assumption that currently we are not using the 0x1A byte within the format
string.

Some examples are shown below.

pr_info("comm=%s\n", current->comm);
=> pr_info("comm=" EMBED_CURRENT_COMM "\n");

pr_info("pid=%d\n", current->pid);
=> pr_info("pid=" EMBED_CURRENT_PID "\n");

pr_info("%s(%d)\n", current->comm, current->pid);
=> pr_info(EMBED_CURRENT_COMM "(" EMBED_CURRENT_PID ")\n");

pr_info("[%-6.6s]\n", current->comm);
=> pr_info(EMBED_FORMAT "-6.6" EMBED_CURRENT_COMM "\n");

Since this patch does not use get_task_comm(), you should not convert from
get_task_comm(current) to built-in tokens if you want constsistent reading.

Suggested-by: Andrew Morton <ak...@linux-foundation.org>
Signed-off-by: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
---
include/linux/kernel.h | 17 ++++++++
lib/vsprintf.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ecb8754..8efd2dc 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -353,6 +353,23 @@ extern int num_to_str(char *buf, int size, unsigned long long num);

/* lib/printf utilities */

+/*
+ * Built-in tokens for __printf() functions.
+ *
+ * The __printf() functions interpret special bytes starting from a \x1A byte
+ * inside the format string as built-in tokens. The built-in tokens are used
+ * for saving text size (and centralizing pointer dereferences) by omitting
+ * arguments which are visible from __printf() functions. This means that
+ * the format string must not contain the \x1A byte. If the format string
+ * contains the \x1A byte which do not mean to be built-in tokens, you need to
+ * change from pr_info("\x1A\n") to pr_info("%c\n", '0x1A').
+ *
+ * See builtin_token() in lib/vsprintf.c for more information.
+ */
+#define EMBED_FORMAT "\x1A"
+#define EMBED_CURRENT_COMM "\x1A\xFF"
+#define EMBED_CURRENT_PID "\x1A\xFE"
+
extern __printf(2, 3) int sprintf(char *buf, const char * fmt, ...);
extern __printf(2, 0) int vsprintf(char *buf, const char *, va_list);
extern __printf(3, 4)
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 10909c5..b05217d 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -366,7 +366,8 @@ enum format_type {
FORMAT_TYPE_INT,
FORMAT_TYPE_NRCHARS,
FORMAT_TYPE_SIZE_T,
- FORMAT_TYPE_PTRDIFF
+ FORMAT_TYPE_PTRDIFF,
+ FORMAT_TYPE_BUILTIN_TOKEN,
};

struct printf_spec {
@@ -1375,6 +1376,94 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
return number(buf, end, (unsigned long) ptr, spec);
}

+static inline const char *format_decode_builtin(const char *fmt,
+ struct printf_spec *spec)
+{
+ u8 flags = 0;
+ u8 base;
+ s16 field_width = -1;
+ s16 precision = -1;
+ /* Check custom flags, field width and precision. */
+ if (*fmt == '-') {
+ fmt++;
+ flags = LEFT;
+ }
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+ if (*fmt == '.') {
+ fmt++;
+ if (isdigit(*fmt)) {
+ precision = skip_atoi(&fmt);
+ if (precision < 0)
+ precision = 0;
+ }
+ }
+ /*
+ * Skip redundant byte if custom flags, field width or precision are
+ * specified.
+ */
+ if (*fmt == 0x1A)
+ fmt++;
+ /* Check whether this token byte is valid. */
+ base = *fmt;
+ if (base > 0xFD) {
+ fmt++;
+ spec->type = FORMAT_TYPE_BUILTIN_TOKEN;
+ spec->flags = flags;
+ spec->field_width = field_width;
+ spec->precision = precision;
+ spec->base = base; /* Borrow base field for passing token. */
+ }
+ return fmt;
+}
+
+/*
+ * Examples of using built-in tokens:
+ *
+ * pr_info("%s\n", current->comm) => pr_info(EMBED_CURRENT_COMM "\n");
+ *
+ * pr_info("%s(%d)\n", current->comm, current->pid);
+ * => pr_info(EMBED_CURRENT_COMM "(" EMBED_CURRENT_PID ")\n");
+ *
+ * pr_info("[%-6.6s]\n", current->comm);
+ * => pr_info(EMBED_FORMAT "-6.6" EMBED_CURRENT_COMM "\n");
+ *
+ * You can define other tokens using "\x1A\x??" format where ?? is a two hex
+ * digits other than 00 ('\0'), 1A ('\x1A'), 25 ('%'), 2D ('-'), 2E ('.') and
+ * 30 ('0') to 39 ('9').
+ */
+static noinline_for_stack
+char *builtin_token(char *buf, char *end, struct printf_spec spec)
+{
+ struct task_struct *const tsk = current;
+ union {
+ char str[TASK_COMM_LEN];
+ int num;
+ } arg;
+
+ switch (spec.base) {
+ case 0xFF: /* current->comm */
+ /* Not using get_task_comm() in case I'm in IRQ context. */
+ memcpy(arg.str, tsk->comm, sizeof(arg.str));
+ /*
+ * Intentionally copied 16 bytes for compiler optimization.
+ * Make sure that str is '\0'-terminated in case ->comm was
+ * by error not '\0'-terminated.
+ */
+ arg.str[sizeof(arg.str) - 1] = '\0';
+ goto print_string;
+ case 0xFE: /* task_pid_nr(current) or current->pid */
+ arg.num = task_pid_nr(tsk);
+ goto print_number;
+ }
+ memcpy(arg.str, "?", 2); /* This should not happen. */
+ print_string:
+ return string(buf, end, arg.str, spec);
+ print_number:
+ spec.base = 10; /* Assign base field. */
+ return number(buf, end, arg.num, spec);
+}
+
/*
* Helper function to decode printf style format.
* Each call decode a token from the format and return the
@@ -1423,7 +1512,7 @@ int format_decode(const char *fmt, struct printf_spec *spec)
spec->type = FORMAT_TYPE_NONE;

for (; *fmt ; ++fmt) {
- if (*fmt == '%')
+ if (*fmt == '%' || *fmt == 0x1A)
break;
}

@@ -1431,6 +1520,10 @@ int format_decode(const char *fmt, struct printf_spec *spec)
if (fmt != start || !*fmt)
return fmt - start;

+ /* Process built-in tokens. */
+ if (*fmt == 0x1A)
+ return format_decode_builtin(fmt + 1, spec) - start;
+
/* Process flags */
spec->flags = 0;

@@ -1725,6 +1818,10 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
break;
}

+ case FORMAT_TYPE_BUILTIN_TOKEN:
+ str = builtin_token(str, end, spec);
+ break;
+
default:
switch (spec.type) {
case FORMAT_TYPE_LONG_LONG:
--
1.7.1

Pavel Machek

unread,
Jan 2, 2014, 7:00:02 AM1/2/14
to
Hi!

> > > > #define PRINTK_PID "\002"
> > > > #define PRINTK_TASK_ID "\003" /* "comm:pid" */
> > > >
> > > > printk(PRINTK_TASK_ID ": hair on fire\n");
> > > >
> > > > It's certainly compact. I doubt if there's any existing code which
> > > > deliberately prints control chars?
> > >
> > > But the rest looks OK to me.
> >
> > Tell me again, what's wrong with using p or current?
> >
> > printk("%pt", current);
>
> Nothing much. It's just that all these callsites are generating the
> code to pass an argument which the callee already has access to.
> Optimizing that will reduce text size a bit.

Can we do "printk("%pt", 0)", then? Or just pass current and live with
few bytes of penalty. There are probably less ugly ways to make kernel
smaller. Inventing another escape character for printk is extremely
ugly.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

Kees Cook

unread,
Jan 3, 2014, 12:10:02 PM1/3/14
to
> [...]
> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> index 10909c5..b05217d 100644
> @@ -1423,7 +1512,7 @@ int format_decode(const char *fmt, struct printf_spec *spec)
> spec->type = FORMAT_TYPE_NONE;
>
> for (; *fmt ; ++fmt) {
> - if (*fmt == '%')
> + if (*fmt == '%' || *fmt == 0x1A)
> break;
> }
>

I'm on board with the idea of embedding comm/pid/whatever, but I
either missed or do not understand why a second format-start character
is being added. I think this will complicate audits and maybe trigger
weird info leaks (imagine printing a string that was %-escaped, but
not 0x1A-escaped?)

Why not use % followed by 0x1A to be the start code, instead of just 0x1A?

-Kees

--
Kees Cook
Chrome OS Security

Joe Perches

unread,
Jan 3, 2014, 12:40:03 PM1/3/14
to
On Fri, 2014-01-03 at 09:08 -0800, Kees Cook wrote:
> I'm on board with the idea of embedding comm/pid/whatever,

I still think the space reduction isn't worth the
complication.

> but I
> either missed or do not understand why a second format-start character
> is being added. I think this will complicate audits and maybe trigger
> weird info leaks (imagine printing a string that was %-escaped, but
> not 0x1A-escaped?)
> Why not use % followed by 0x1A to be the start code, instead of just 0x1A?

gcc would bleat an error for
"unknown conversion type character 0x1a in format".

Kees Cook

unread,
Jan 3, 2014, 12:50:03 PM1/3/14
to
On Fri, Jan 3, 2014 at 9:39 AM, Joe Perches <j...@perches.com> wrote:
> On Fri, 2014-01-03 at 09:08 -0800, Kees Cook wrote:
>> I'm on board with the idea of embedding comm/pid/whatever,
>
> I still think the space reduction isn't worth the
> complication.
>
>> but I
>> either missed or do not understand why a second format-start character
>> is being added. I think this will complicate audits and maybe trigger
>> weird info leaks (imagine printing a string that was %-escaped, but
>> not 0x1A-escaped?)
>> Why not use % followed by 0x1A to be the start code, instead of just 0x1A?
>
> gcc would bleat an error for
> "unknown conversion type character 0x1a in format".

Oh right. Bleh. Which gets me back around to the original patch which
overloaded %p.

Hrmpf. Yeah, on the fence about this.

-Kees

--
Kees Cook
Chrome OS Security

Tetsuo Handa

unread,
Jan 3, 2014, 9:30:01 PM1/3/14
to
I'm planning to convert task_struct->comm to use RCU so that they always get
consistent result. Inconsistent result (e.g. trailing '\0' byte is emitted when
printing string argument) is caused by breaking a rule that the string argument
must not change during the function when it is passed as "const char *" (e.g.
strcmp() and printf("%s")).

Although task_struct->comm is not modified so frequently, task_struct->comm
passed as a "const char *" argument can change at any moment unless we pass
task_struct->comm via get_task_comm().

Since there are a lot of current->comm readers, Andrew Morton suggested that
we might be able to omit passing the argument and I wrote a patch that adds a
second format-start character for omit passing the argument.

Kees Cook wrote:
> I'm on board with the idea of embedding comm/pid/whatever, but I
> either missed or do not understand why a second format-start character
> is being added. I think this will complicate audits and maybe trigger
> weird info leaks (imagine printing a string that was %-escaped, but
> not 0x1A-escaped?)

The second format-start character applies to only format string.
If we use a dynamically generated string that was %-escaped at run-time as a
format string, we will leak globally accessible variables. But are there such
users? Such users are rare because they are already problematic since compilers
cannot check for safety using __printf() attributes at build-time.

> Hrmpf. Yeah, on the fence about this.

But I'm fine even if we cannot agree with the second format-start character.
Since my purpose is to make reading of task_struct->comm consistent, %pT-like
extension is what I want for centralizing pointer dereferences.

Tetsuo Handa

unread,
Jan 4, 2014, 10:20:01 PM1/4/14
to
Tetsuo Handa wrote:
> Since my purpose is to make reading of task_struct->comm consistent, %pT-like
> extension is what I want for centralizing pointer dereferences.

If we have no objections for %pT[C012] patch, I'd like to propose a counterpart
patch for users reading/copying task_struct->comm for non-printing purposes.
With %pT[C012] patch and this patch, we can convert almost all
task_struct->comm users.
----------
>From 89e676ddce3e2a232fbb760085a1954cd53c8b63 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
Date: Sun, 5 Jan 2014 11:54:39 +0900
Subject: [PATCH] exec: Add wrapper functions for reading/copying task_struct->comm.

Since task_struct->comm can be modified by other threads while the current
thread is reading it, it is recommended to use get_task_comm() for reading it.

However, since get_task_comm() holds task_struct->alloc_lock spinlock,
some users cannot use get_task_comm(). Also, a lot of users are directly
reading from task_struct->comm even if they can use get_task_comm().
Such users might obtain inconsistent comm name.

This patch introduces wrapper functions for reading/copying task_struct->comm
so that in the future we need to modify only these wrapper functions to obtain
consistent comm name.

Users directly reading from task_struct->comm for printing purpose can use
%pT[C012] format specifiers rather than these wrapper functions.

Signed-off-by: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
---
include/linux/sched.h | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 53f97eb..ead439f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1665,6 +1665,40 @@ static inline cputime_t task_gtime(struct task_struct *t)
extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);

+/**
+ * commcmp - Compare task_struct->comm .
+ *
+ * @tsk: Pointer to "struct task_struct".
+ * @comm: Comm name.
+ *
+ * Returns return value of strcmp(@tsk->comm, @comm).
+ *
+ * Please use this wrapper function which will be updated in the future to read
+ * @tsk->comm in a consistent way.
+ */
+static inline int commcmp(const struct task_struct *tsk, const char *comm)
+{
+ return strcmp(tsk->comm, comm);
+}
+
+/**
+ * commcpy - Copy task_struct->comm to buffer.
+ *
+ * @buf: Buffer to copy @tsk->comm which must be at least TASK_COMM_LEN bytes.
+ * @tsk: Pointer to "struct task_struct".
+ *
+ * Returns return value of memcpy(@buf, @tsk->comm, TASK_COMM_LEN).
+ *
+ * Please use get_task_comm(@buf, @tsk) if it is safe to call task_lock(@tsk),
+ * for get_task_comm() reads @tsk->comm in a consistent way. Otherwise, please
+ * use this wrapper function which will be updated in the future to read
+ * @tsk->comm in a consistent way.
+ */
+static inline void *commcpy(void *buf, const struct task_struct *tsk)
+{
+ return memcpy(buf, tsk->comm, TASK_COMM_LEN);
+}
+
/*
* Per process flags
*/
--
1.7.9.5

Joe Perches

unread,
Jan 5, 2014, 1:20:02 PM1/5/14
to
On Sun, 2014-01-05 at 12:15 +0900, Tetsuo Handa wrote:
> Tetsuo Handa wrote:
> > Since my purpose is to make reading of task_struct->comm consistent, %pT-like
> > extension is what I want for centralizing pointer dereferences.
>
> If we have no objections for %pT[C012] patch,

I still believe emitting different output styles using
%pT[012] is not ideal.

Is this really necessary?

I'd prefer that each element be described separately
using %pT<type> where T is a struct task_struct and
type is a member.

type 'c' -> tsk.comm
type 'p' -> tsk.pid
type 't' -> task.tgid

though if the real concern is simply comm consistency,
maybe these other member types aren't at all useful.

Pavel Machek

unread,
Jan 5, 2014, 5:30:02 PM1/5/14
to
Hi!

> + * Please use this wrapper function which will be updated in the future to read
> + * @tsk->comm in a consistent way.
> + */
> +static inline int commcmp(const struct task_struct *tsk, const char *comm)
> +{
> + return strcmp(tsk->comm, comm);
> +}

Is this useful to something? Printing command name is
useful. Comparing it...?

Tetsuo Handa

unread,
Jan 6, 2014, 9:10:02 AM1/6/14
to
Pavel Machek wrote:
> > + * Please use this wrapper function which will be updated in the future to read
> > + * @tsk->comm in a consistent way.
> > + */
> > +static inline int commcmp(const struct task_struct *tsk, const char *comm)
> > +{
> > + return strcmp(tsk->comm, comm);
> > +}
>
> Is this useful to something? Printing command name is
> useful. Comparing it...?
>

(a) Using tsk->comm for reducing duplicated printk() messages.

if (strcmp(p->comm, last_comm)) {
printk("hello\n");
strcpy(last_comm, p->comm);
}

(b) Using tsk->pid for reducing duplicated printk() messages.

if (p->pid != last_pid) {
printk("hello\n");
last_pid = p->pid;
}

(c) Using printk_ratelimit() for reducing printk() flooding.

printk_ratelimit("hello\n");

(d) Using plain printk().

printk("hello\n");

(e) Other purposes. (e.g. drivers/target/iscsi/iscsi_target_tq.c )

if (!strncmp(current->comm, ISCSI_RX_THREAD_NAME,
strlen(ISCSI_RX_THREAD_NAME)))
thread_called = ISCSI_RX_THREAD;
else if (!strncmp(current->comm, ISCSI_TX_THREAD_NAME,
strlen(ISCSI_TX_THREAD_NAME)))
thread_called = ISCSI_TX_THREAD;

commcmp() and commcpy() are for wrappring (a).
Though (a) should consider changing to (b) or (c).

(e) should be rewritten not to depend on current->comm .

Tetsuo Handa

unread,
Jan 6, 2014, 9:10:02 AM1/6/14
to
Joe Perches wrote:
> On Sun, 2014-01-05 at 12:15 +0900, Tetsuo Handa wrote:
> > > Since my purpose is to make reading of task_struct->comm consistent, %pT-like
> > > extension is what I want for centralizing pointer dereferences.
> >
> > If we have no objections for %pT[C012] patch,
>
> I still believe emitting different output styles using
> %pT[012] is not ideal.
>
> Is this really necessary?

No problem. %pT[012] are simply optimization (reducing number of function
arguments for saving text size) and therefore I can drop them.
What about below patch?
----------
>From f69a1db69a9ad6e14235edbb5a9b7d720530a2be Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
Date: Mon, 6 Jan 2014 22:22:00 +0900
Subject: [PATCH] lib/vsprintf: add %pT format specifier

Since task_struct->comm can be modified by other threads while the current
thread is reading it, it is recommended to use get_task_comm() for reading it.

However, since get_task_comm() holds task_struct->alloc_lock spinlock,
some users cannot use get_task_comm(). Also, a lot of users are directly
reading from task_struct->comm even if they can use get_task_comm().
Such users might obtain inconsistent result.

This patch introduces %pT format specifier for printing task_struct->comm.
Currently %pT does not provide consistency. I'm planning to change to use RCU
in the future. By using RCU, the comm name read from task_struct->comm will be
guaranteed to be consistent. But before modifying set_task_comm() to use RCU,
we need to kill direct ->comm users who do not use get_task_comm().

An example for converting direct ->comm users is shown below. Since many debug
printings use p == current, you can pass NULL instead of p if p == current.

pr_info("comm=%s\n", p->comm); => pr_info("comm=%pT\n", p);
pr_info("comm=%s\n", current->comm); => pr_info("comm=%pT\n", NULL);

Signed-off-by: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
---
Documentation/printk-formats.txt | 6 ++++++
lib/vsprintf.c | 20 +++++++++++++++++++-
2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index 445ad74..bd96e34 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -177,6 +177,12 @@ dentry names:
equivalent of %s dentry->d_name.name we used to use, %pd<n> prints
n last components. %pD does the same thing for struct file.

+task_struct comm name:
+
+ %pT
+
+ For printing task_struct->comm.
+
struct va_format:

%pV
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 10909c5..0300dae 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1155,6 +1155,21 @@ char *netdev_feature_string(char *buf, char *end, const u8 *addr,
return number(buf, end, *(const netdev_features_t *)addr, spec);
}

+static noinline_for_stack
+char *comm_name(char *buf, char *end, struct task_struct *tsk,
+ struct printf_spec spec, const char *fmt)
+{
+ char name[TASK_COMM_LEN];
+
+ /* Caller can pass NULL instead of current. */
+ if (!tsk)
+ tsk = current;
+ /* Not using get_task_comm() in case I'm in IRQ context. */
+ memcpy(name, tsk->comm, TASK_COMM_LEN);
+ name[sizeof(name) - 1] = '\0';
+ return string(buf, end, name, spec);
+}
+
int kptr_restrict __read_mostly;

/*
@@ -1221,6 +1236,7 @@ int kptr_restrict __read_mostly;
* - 'a' For a phys_addr_t type and its derivative types (passed by reference)
* - 'd[234]' For a dentry name (optionally 2-4 last components)
* - 'D[234]' Same as 'd' but for a struct file
+ * - 'T' task_struct->comm
*
* Note: The difference between 'S' and 'F' is that on ia64 and ppc64
* function pointers are really function descriptors, which contain a
@@ -1232,7 +1248,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
{
int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);

- if (!ptr && *fmt != 'K') {
+ if (!ptr && *fmt != 'K' && *fmt != 'T') {
/*
* Print (null) with the same width as a pointer so it makes
* tabular output look nice.
@@ -1364,6 +1380,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
return dentry_name(buf, end,
((const struct file *)ptr)->f_path.dentry,
spec, fmt);
+ case 'T':
+ return comm_name(buf, end, ptr, spec, fmt);
}
spec.flags |= SMALL;
if (spec.field_width == -1) {
--
1.7.9.5

Joe Perches

unread,
Jan 6, 2014, 12:40:02 PM1/6/14
to
On Mon, 2014-01-06 at 23:00 +0900, Tetsuo Handa wrote:
> Joe Perches wrote:
> > Is this really necessary?
> No problem. %pT[012] are simply optimization (reducing number of function
> arguments for saving text size) and therefore I can drop them.
> What about below patch?

Hi Tetsuo. Just a nit.

> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
[]
> @@ -1232,7 +1248,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
> {
> int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
>
> - if (!ptr && *fmt != 'K') {
> + if (!ptr && *fmt != 'K' && *fmt != 'T') {

I think this new 'T' comparison isn't necessary.

cheers, Joe

Pavel Machek

unread,
Jan 6, 2014, 4:40:03 PM1/6/14
to

Tetsuo Handa

unread,
Jan 6, 2014, 4:50:03 PM1/6/14
to
Joe Perches wrote:
> On Mon, 2014-01-06 at 23:00 +0900, Tetsuo Handa wrote:
> > Joe Perches wrote:
> > > Is this really necessary?
> > No problem. %pT[012] are simply optimization (reducing number of function
> > arguments for saving text size) and therefore I can drop them.
> > What about below patch?
>
> Hi Tetsuo. Just a nit.
>
> > diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> []
> > @@ -1232,7 +1248,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
> > {
> > int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
> >
> > - if (!ptr && *fmt != 'K') {
> > + if (!ptr && *fmt != 'K' && *fmt != 'T') {
>
> I think this new 'T' comparison isn't necessary.

This is needed for allowing comm_name() to accept NULL instead of current.

Joe Perches

unread,
Jan 6, 2014, 5:30:01 PM1/6/14
to
On Tue, 2014-01-07 at 06:41 +0900, Tetsuo Handa wrote:
> Joe Perches wrote:
> > On Mon, 2014-01-06 at 23:00 +0900, Tetsuo Handa wrote:
> > > Joe Perches wrote:
> > > > Is this really necessary?
> > > No problem. %pT[012] are simply optimization (reducing number of function
> > > arguments for saving text size) and therefore I can drop them.
> > > What about below patch?
> >
> > Hi Tetsuo. Just a nit.
> >
> > > diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> > []
> > > @@ -1232,7 +1248,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
> > > {
> > > int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
> > >
> > > - if (!ptr && *fmt != 'K') {
> > > + if (!ptr && *fmt != 'K' && *fmt != 'T') {
> >
> > I think this new 'T' comparison isn't necessary.
>
> This is needed for allowing comm_name() to accept NULL instead of current.

Yeah, that's what I think isn't necessary.

current is current_thread_info()->task.

I think it's pretty lightweight in all arches and
it'd be simpler/more intelligible to not use NULL.

Andrew? Any opinion? Anyone else?

cheers, Joe

Pavel Machek

unread,
Jan 6, 2014, 7:20:03 PM1/6/14
to
Hi!

> > > > diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> > > []
> > > > @@ -1232,7 +1248,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
> > > > {
> > > > int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
> > > >
> > > > - if (!ptr && *fmt != 'K') {
> > > > + if (!ptr && *fmt != 'K' && *fmt != 'T') {
> > >
> > > I think this new 'T' comparison isn't necessary.
> >
> > This is needed for allowing comm_name() to accept NULL instead of current.
>
> Yeah, that's what I think isn't necessary.
>
> current is current_thread_info()->task.
>
> I think it's pretty lightweight in all arches and
> it'd be simpler/more intelligible to not use NULL.
>
> Andrew? Any opinion? Anyone else?

Andrew was worried about all the "current" duplication, IIRC. It is in
the mail thread somewhere. And one condition in printk is price worth paying.

Joe Perches

unread,
Jan 6, 2014, 8:10:02 PM1/6/14
to
On Tue, 2014-01-07 at 01:16 +0100, Pavel Machek wrote:
> > > > > diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> > > > []
> > > > > @@ -1232,7 +1248,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
> > > > > {
> > > > > int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
> > > > >
> > > > > - if (!ptr && *fmt != 'K') {
> > > > > + if (!ptr && *fmt != 'K' && *fmt != 'T') {
> > > >
> > > > I think this new 'T' comparison isn't necessary.
> > >
> > > This is needed for allowing comm_name() to accept NULL instead of current.
> >
> > Yeah, that's what I think isn't necessary.
> >
> > current is current_thread_info()->task.
> >
> > I think it's pretty lightweight in all arches and
> > it'd be simpler/more intelligible to not use NULL.
> >
> > Andrew? Any opinion? Anyone else?
>
> Andrew was worried about all the "current" duplication, IIRC. It is in
> the mail thread somewhere. And one condition in printk is price worth paying.
> Pavel

Hi Pavel.

I'm not nacking this, just stating my view.

I believe I showed how many uses of vsprintf w/ current
there are. Passing NULL vs passing current as the %pT
argument is I think a negligible overall size delta too.

cheers, Joe

Pavel Machek

unread,
Jan 7, 2014, 3:40:02 AM1/7/14
to
On Mon 2014-01-06 17:03:55, Joe Perches wrote:
> On Tue, 2014-01-07 at 01:16 +0100, Pavel Machek wrote:
> > > > > > diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> > > > > []
> > > > > > @@ -1232,7 +1248,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
> > > > > > {
> > > > > > int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
> > > > > >
> > > > > > - if (!ptr && *fmt != 'K') {
> > > > > > + if (!ptr && *fmt != 'K' && *fmt != 'T') {
> > > > >
> > > > > I think this new 'T' comparison isn't necessary.
> > > >
> > > > This is needed for allowing comm_name() to accept NULL instead of current.
> > >
> > > Yeah, that's what I think isn't necessary.
> > >
> > > current is current_thread_info()->task.
> > >
> > > I think it's pretty lightweight in all arches and
> > > it'd be simpler/more intelligible to not use NULL.
> > >
> > > Andrew? Any opinion? Anyone else?
> >
> > Andrew was worried about all the "current" duplication, IIRC. It is in
> > the mail thread somewhere. And one condition in printk is price worth paying.
>
> Hi Pavel.
>
> I'm not nacking this, just stating my view.

And I believe Andrew clearly stated his view, on the very topic you
asked him on.

> I believe I showed how many uses of vsprintf w/ current
> there are. Passing NULL vs passing current as the %pT
> argument is I think a negligible overall size delta too.

One condition in if () is negligible, too, so passing NULL will still
be overall win. And that was the point of this patch series.

Joe Perches

unread,
Jan 7, 2014, 12:40:03 PM1/7/14
to
On Tue, 2014-01-07 at 09:37 +0100, Pavel Machek wrote:
> On Mon 2014-01-06 17:03:55, Joe Perches wrote:
> > On Tue, 2014-01-07 at 01:16 +0100, Pavel Machek wrote:
> > > > > > > diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> > > > > > []
> > > > > > > @@ -1232,7 +1248,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
> > > > > > > {
> > > > > > > int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
> > > > > > >
> > > > > > > - if (!ptr && *fmt != 'K') {
> > > > > > > + if (!ptr && *fmt != 'K' && *fmt != 'T') {
> > > > > >
> > > > > > I think this new 'T' comparison isn't necessary.
> > > > >
> > > > > This is needed for allowing comm_name() to accept NULL instead of current.
> > > >
> > > > Yeah, that's what I think isn't necessary.
> > > >
> > > > current is current_thread_info()->task.
> > > >
> > > > I think it's pretty lightweight in all arches and
> > > > it'd be simpler/more intelligible to not use NULL.
> > > >
> > > > Andrew? Any opinion? Anyone else?
> > >
> > > Andrew was worried about all the "current" duplication, IIRC. It is in
> > > the mail thread somewhere. And one condition in printk is price worth paying.
> >
> > Hi Pavel.
> >
> > I'm not nacking this, just stating my view.
>
> And I believe Andrew clearly stated his view, on the very topic you
> asked him on.

I believe Andrew's view:

On Sat, 2013-12-28 at 12:08 -0800, Andrew Morton wrote:
> On Sat, 28 Dec 2013 11:53:25 -0800 Joe Perches <j...@perches.com> wrote:
> > Tell me again, what's wrong with using p or current?
> > printk("%pt", current);
>
> Nothing much. It's just that all these callsites are generating the
> code to pass an argument which the callee already has access to.
> Optimizing that will reduce text size a bit.

was that the argument passing was the primary issue.

Now that that's not done, this code actually uses a
different concept, that "NULL" is special when using
%pT. If the argument isn't eliminated all together,
I think that new concept should be avoided.

The additional cost of using current vs NULL is ~zero.

Pavel Machek

unread,
Jan 7, 2014, 1:00:03 PM1/7/14
to

> > > I'm not nacking this, just stating my view.
> >
> > And I believe Andrew clearly stated his view, on the very topic you
> > asked him on.
>
> I believe Andrew's view:
>
> On Sat, 2013-12-28 at 12:08 -0800, Andrew Morton wrote:
> > On Sat, 28 Dec 2013 11:53:25 -0800 Joe Perches <j...@perches.com> wrote:
> > > Tell me again, what's wrong with using p or current?
> > > printk("%pt", current);
> >
> > Nothing much. It's just that all these callsites are generating the
> > code to pass an argument which the callee already has access to.
> > Optimizing that will reduce text size a bit.
>
> was that the argument passing was the primary issue.

Yes. He dislikes passing argument callee has already access
to. "current". This patch does not do this, it just passes the NULL as
a marker... and to keep printf() checkers happy.

I believe this is way better than alternatives.

> Now that that's not done, this code actually uses a
> different concept, that "NULL" is special when using
> %pT. If the argument isn't eliminated all together,
> I think that new concept should be avoided.
>
> The additional cost of using current vs NULL is ~zero.

The additional cost of current vs NULL is cca 8 bytes per caller. Test
for NULL is cca 4 bytes, maybe 20 bytes total. I believe it is worth
it.

Geert Uytterhoeven

unread,
Jan 7, 2014, 1:30:01 PM1/7/14
to
On Tue, Jan 7, 2014 at 6:56 PM, Pavel Machek <pa...@ucw.cz> wrote:
>> The additional cost of using current vs NULL is ~zero.
>
> The additional cost of current vs NULL is cca 8 bytes per caller. Test
> for NULL is cca 4 bytes, maybe 20 bytes total. I believe it is worth
> it.

It depends (typical answer ;-)

On architectures that keep current in a register, the cost of using it is
usually zero, as there's typically no difference between pushing a register
or a zero on the stack, or moving a register or a zero to another register.

current_thread_info()->task is more expensive.

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

Tetsuo Handa

unread,
Jan 8, 2014, 9:30:01 AM1/8/14
to
Pavel Machek wrote:
> > > > I'm not nacking this, just stating my view.
> > >
> > > And I believe Andrew clearly stated his view, on the very topic you
> > > asked him on.
> >
> > I believe Andrew's view:
> >
> > On Sat, 2013-12-28 at 12:08 -0800, Andrew Morton wrote:
> > > On Sat, 28 Dec 2013 11:53:25 -0800 Joe Perches <j...@perches.com> wrote:
> > > > Tell me again, what's wrong with using p or current?
> > > > printk("%pt", current);
> > >
> > > Nothing much. It's just that all these callsites are generating the
> > > code to pass an argument which the callee already has access to.
> > > Optimizing that will reduce text size a bit.
> >
> > was that the argument passing was the primary issue.
>
> Yes. He dislikes passing argument callee has already access
> to. "current". This patch does not do this, it just passes the NULL as
> a marker... and to keep printf() checkers happy.

Excuse me?
Compilers or kernel developers, which one does "printf() checkers" refer to?

The "%pT" patch is for printing tsk->comm consistently, but not always
tsk == current .

The "\x1A" patch is for printing tsk->xxx without passing tsk->xxx as
arguments, but always tsk == current .

These two patches have different purposes and do not overwrap.

Both patches are written not to confuse compilers when using
__attribute__((format(printf, a, b))) check. Therefore, I don't think that
the "\x1A" patch makes compilers unhappy. Rather, the "\x1A" patch would make
compilers more happy because it replaces cost of passing current or NULL
as arguments with cost of symbolic names (e.g. two bytes for
#define EMBED_CURRENT_COMM "\x1A\xFF"
case) within the format string.

(no patch applied)
tsk = current;
pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
tsk->comm, tsk->pid, str, regs->ip, regs->sp, error_code);

(the "%pT" patch applied)
tsk = current;
pr_info("%pT[%d] trap %s ip:%lx sp:%lx error:%lx",
NULL, tsk->pid, str, regs->ip, regs->sp, error_code);

(the "\x1A" patch applied)
pr_info(EMBED_CURRENT_COMM "[" EMBED_CURRENT_PID "] trap %s ip:%lx sp:%lx error:%lx",
str, regs->ip, regs->sp, error_code);

By reducing number of function arguments, those who audit the correctness of
kernel source code would gain more readability.

(no patch applied)
tsk = current;
pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
tsk->comm, tsk->pid, str, regs->ip, regs->sp, error_code);

(the "\x1A" patch applied)
pr_info(EMBED_CURRENT_COMM "[" EMBED_CURRENT_PID "] trap %s ip:%lx sp:%lx error:%lx",
str, regs->ip, regs->sp, error_code);

There might be kernel developers who are using \x1A within the format string as
a literal byte, but we will be able to find and rewrite \x1A in the format
string like

(before fixing problem)
const chat *fmt = "aaa\x1Axxx\n";
pr_debug(fmt);

(after fixing problem)
const char *str = "aaa\x1Axxx"
pr_debug("%s\n", str);

as with we find and rewrite '%' in the format string like

(before fixing problem)
const chat *fmt = "aaa%xxx\n";
pr_debug(fmt);

(after fixing problem)
const char *str = "aaa%xxx"
pr_debug("%s\n", str);

.

It seems to me that we can agree with the "%pT" patch.
But how does the "\x1A" patch make compilers or kernel developers unhappy?

Pavel Machek

unread,
Jan 8, 2014, 9:50:02 AM1/8/14
to
On Wed 2014-01-08 23:19:04, Tetsuo Handa wrote:
> Pavel Machek wrote:
> > > > > I'm not nacking this, just stating my view.
> > > >
> > > > And I believe Andrew clearly stated his view, on the very topic you
> > > > asked him on.
> > >
> > > I believe Andrew's view:
> > >
> > > On Sat, 2013-12-28 at 12:08 -0800, Andrew Morton wrote:
> > > > On Sat, 28 Dec 2013 11:53:25 -0800 Joe Perches <j...@perches.com> wrote:
> > > > > Tell me again, what's wrong with using p or current?
> > > > > printk("%pt", current);
> > > >
> > > > Nothing much. It's just that all these callsites are generating the
> > > > code to pass an argument which the callee already has access to.
> > > > Optimizing that will reduce text size a bit.
> > >
> > > was that the argument passing was the primary issue.
> >
> > Yes. He dislikes passing argument callee has already access
> > to. "current". This patch does not do this, it just passes the NULL as
> > a marker... and to keep printf() checkers happy.
>
> Excuse me?
> Compilers or kernel developers, which one does "printf() checkers" refer to?

Compilers.

> The "%pT" patch is for printing tsk->comm consistently, but not always
> tsk == current .

I like this one.

> The "\x1A" patch is for printing tsk->xxx without passing tsk->xxx as
> arguments, but always tsk == current .

I think this is not a good idea.

> Both patches are written not to confuse compilers when using
> __attribute__((format(printf, a, b))) check. Therefore, I don't think that
> the "\x1A" patch makes compilers unhappy. Rather, the "\x1A" patch would make
> compilers more happy because it replaces cost of passing current or NULL
> as arguments with cost of symbolic names (e.g. two bytes for
> #define EMBED_CURRENT_COMM "\x1A\xFF"
> case) within the format string.

Agreed. No patches make compilers unhappy. I thought ("%pT", NULL) is
done that way not to confuse compilers. ("%pT") would trigger checks,
right?

> There might be kernel developers who are using \x1A within the format string as
> a literal byte, but we will be able to find and rewrite \x1A in the format
> string like

Yeah, but I still think magically replacing \x1A is unexpected
behaviour.

> It seems to me that we can agree with the "%pT" patch.

Yes.

> But how does the "\x1A" patch make compilers or kernel developers unhappy?

It makes me unhappy -- one more character to escape, and unexpected
one. (With possible security implications -- think sprintf(buf, "string
without %s, from user").

But in this thread, I was arguing that %pT is a good idea, should be
applied, and no more bike shedding is neccessary.

(Feel free to add Acked-by:/Reviewed-by: Pavel Machek <pa...@ucw.cz>
if you wish.)

Thanks,

Tetsuo Handa

unread,
Jan 10, 2014, 8:20:02 AM1/10/14
to
I realized that we don't need commcmp() because we can rewrite like below.

(a)
char tmp_comm[TASK_COMM_LEN];
commcpy(tmp_comm, p->comm);
if (strcmp(tmp_comm, last_comm)) {
printk("hello\n");
strcpy(last_comm, tmp_comm);
}

(e)
char tmp_comm[TASK_COMM_LEN];
commcpy(tmp_comm, p->comm);
if (!strncmp(tmp_comm, ISCSI_RX_THREAD_NAME,
strlen(ISCSI_RX_THREAD_NAME)))
thread_called = ISCSI_RX_THREAD;
else if (!strncmp(tmp_comm, ISCSI_TX_THREAD_NAME,
strlen(ISCSI_TX_THREAD_NAME)))
thread_called = ISCSI_TX_THREAD;

Below is an updated patch (and one of users of this patch). I assume we can
agree on these patches, and I expect these patches go to linux-next.git via
linux-security.git (as I can save one merge window for fixing this issue in
the LSM auditing code.)
----------------------------------------
>From 0c183ad7aceb0db8ca4b9bd3a048182bf23e32be Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
Date: Fri, 10 Jan 2014 21:54:38 +0900
Subject: [PATCH] exec: Add wrapper function for reading task_struct->comm.

Since task_struct->comm can be modified by other threads while the current
thread is reading it, it is recommended to use get_task_comm() for reading it.

However, since get_task_comm() holds task_struct->alloc_lock spinlock,
some users cannot use get_task_comm(). Also, a lot of users are directly
reading from task_struct->comm even if they can use get_task_comm().
Such users might obtain inconsistent result.

This patch introduces a wrapper function for reading task_struct->comm .
Currently this function does not provide consistency. I'm planning to change to
use RCU in the future. By using RCU, the comm name read from task_struct->comm
will be guaranteed to be consistent. But before modifying set_task_comm() to
use RCU, we need to kill direct ->comm users who do not use get_task_comm().

Users directly reading from task_struct->comm for printing purpose can use
%pT format specifier rather than this wrapper function.

Signed-off-by: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
---
include/linux/sched.h | 18 ++++++++++++++++++
1 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e27baee..de12b27 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1601,6 +1601,24 @@ static inline cputime_t task_gtime(struct task_struct *t)
extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);

+/**
+ * commcpy - Copy task_struct->comm to buffer.
+ *
+ * @buf: Buffer to copy @tsk->comm which must be at least TASK_COMM_LEN bytes.
+ * @tsk: Pointer to "struct task_struct".
+ *
+ * Returns return value of memcpy(@buf, @tsk->comm, TASK_COMM_LEN).
+ *
+ * Please use get_task_comm(@buf, @tsk) if it is safe to call task_lock(@tsk),
+ * for get_task_comm() reads @tsk->comm in a consistent way. Otherwise, please
+ * use this wrapper function which will be updated in the future to read
+ * @tsk->comm in a consistent way.
+ */
+static inline void *commcpy(void *buf, const struct task_struct *tsk)
+{
+ return memcpy(buf, tsk->comm, TASK_COMM_LEN);
+}
+
/*
* Per process flags
*/
--
1.7.1
----------------------------------------
>From 3b5137f09bfdac7b0133ddaa194d181d7c897e1f Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
Date: Fri, 10 Jan 2014 22:02:14 +0900
Subject: [PATCH] LSM: Pass comm name via commcpy()

When we pass task->comm to audit_log_untrustedstring(), we need to pass a
snapshot of it using commcpy() because task->comm can be changed to contain
control character by other threads after audit_log_untrustedstring() confirmed
that task->comm does not contain control character.

Signed-off-by: Tetsuo Handa <penguin...@I-love.SAKURA.ne.jp>
---
security/lsm_audit.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 8d8d97d..76f37f7 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -212,6 +212,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
struct common_audit_data *a)
{
struct task_struct *tsk = current;
+ char name[TASK_COMM_LEN];

/*
* To keep stack sizes in check force programers to notice if they
@@ -221,7 +222,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
BUILD_BUG_ON(sizeof(a->u) > sizeof(void *)*2);

audit_log_format(ab, " pid=%d comm=", tsk->pid);
- audit_log_untrustedstring(ab, tsk->comm);
+ audit_log_untrustedstring(ab, commcpy(name, tsk));

switch (a->type) {
case LSM_AUDIT_DATA_NONE:
@@ -280,7 +281,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
tsk = a->u.tsk;
if (tsk && tsk->pid) {
audit_log_format(ab, " pid=%d comm=", tsk->pid);
- audit_log_untrustedstring(ab, tsk->comm);
+ audit_log_untrustedstring(ab, commcpy(name, tsk));
}
break;
case LSM_AUDIT_DATA_NET:
--
1.7.1
----------------------------------------
0 new messages