[PATCH] kernel: Conditionally support non-root users, groups and capabilities

55 views
Skip to first unread message

Iulia Manda

unread,
Jan 13, 2015, 5:17:28 PM1/13/15
to jo...@joshtriplett.org, opw-k...@googlegroups.com
Add a new symbol that permits compiling out support for non-root users. As
capabilities depend on the existance of multiple users, they are also stubbed
out if we only keep the root user.

When this symbol is not defined, UID and GID are zero in any possible case.
Also, the corresponding syscalls are compiled out.

This patch saves 24447 bytes. Check the attachment for the bloat-o-meter
output.

Signed-off-by: Iulia Manda <iulia....@gmail.com>
---
include/linux/capability.h | 11 +++++++++++
include/linux/uidgid.h | 13 ++++++++++++-
init/Kconfig | 12 ++++++++++++
kernel/capability.c | 6 ++++++
kernel/sys.c | 3 ++-
kernel/sys_ni.c | 10 ++++++++++
6 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/include/linux/capability.h b/include/linux/capability.h
index aa93e5e..79f098b 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -211,8 +211,19 @@ extern bool has_ns_capability(struct task_struct *t,
extern bool has_capability_noaudit(struct task_struct *t, int cap);
extern bool has_ns_capability_noaudit(struct task_struct *t,
struct user_namespace *ns, int cap);
+#ifdef CONFIG_NON_ROOT
extern bool capable(int cap);
extern bool ns_capable(struct user_namespace *ns, int cap);
+#else
+static inline bool capable(int cap)
+{
+ return true;
+}
+static inline bool ns_capable(struct user_namespace *ns, int cap)
+{
+ return true;
+}
+#endif /* CONFIG_NON_ROOT */
extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);

diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
index 2d1f9b6..70da49a 100644
--- a/include/linux/uidgid.h
+++ b/include/linux/uidgid.h
@@ -29,15 +29,26 @@ typedef struct {
#define KUIDT_INIT(value) (kuid_t){ value }
#define KGIDT_INIT(value) (kgid_t){ value }

+#ifdef CONFIG_NON_ROOT
static inline uid_t __kuid_val(kuid_t uid)
{
return uid.val;
}
-
static inline gid_t __kgid_val(kgid_t gid)
{
return gid.val;
}
+#else
+static inline uid_t __kuid_val(kuid_t uid)
+{
+ return 0;
+}
+static inline gid_t __kgid_val(kgid_t gid)
+{
+ return 0;
+}
+#endif
+

#define GLOBAL_ROOT_UID KUIDT_INIT(0)
#define GLOBAL_ROOT_GID KGIDT_INIT(0)
diff --git a/init/Kconfig b/init/Kconfig
index 9afb971..d7f5924 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -394,6 +394,7 @@ endchoice

config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
+ select NON_ROOT
help
If you say Y here, a user level program will be able to instruct the
kernel (via a special system call) to write process accounting
@@ -420,6 +421,7 @@ config BSD_PROCESS_ACCT_V3
config TASKSTATS
bool "Export task/process statistics through netlink"
depends on NET
+ select NON_ROOT
default n
help
Export selected statistics for tasks/processes through the
@@ -1140,6 +1142,7 @@ config CHECKPOINT_RESTORE

menuconfig NAMESPACES
bool "Namespaces support" if EXPERT
+ depends on NON_ROOT
default !EXPERT
help
Provides the way to make tasks work with different objects using
@@ -1357,6 +1360,15 @@ config UID16
help
This enables the legacy 16-bit UID syscall wrappers.

+config NON_ROOT
+ bool "Enable support for multiple users" if EXPERT
+ default y
+ help
+ This option enables support for non-root users.
+ If not set, capabilities are also disabled.
+
+ Leave this option as it is if unsure.
+
config SGETMASK_SYSCALL
bool "sgetmask/ssetmask syscalls support" if EXPERT
def_bool PARISC || MN10300 || BLACKFIN || M68K || PPC || MIPS || X86 || SPARC || CRIS || MICROBLAZE || SUPERH
diff --git a/kernel/capability.c b/kernel/capability.c
index 989f5bf..bead84a 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -35,6 +35,7 @@ static int __init file_caps_disable(char *str)
}
__setup("no_file_caps", file_caps_disable);

+#ifdef CONFIG_NON_ROOT
/*
* More recent versions of libcap are available from:
*
@@ -279,6 +280,7 @@ error:
abort_creds(new);
return ret;
}
+#endif

/**
* has_ns_capability - Does a task have a capability in a specific user ns
@@ -360,6 +362,7 @@ bool has_capability_noaudit(struct task_struct *t, int cap)
return has_ns_capability_noaudit(t, &init_user_ns, cap);
}

+#ifdef CONFIG_NON_ROOT
/**
* ns_capable - Determine if the current task has a superior capability in effect
* @ns: The usernamespace we want the capability in
@@ -385,6 +388,7 @@ bool ns_capable(struct user_namespace *ns, int cap)
return false;
}
EXPORT_SYMBOL(ns_capable);
+#endif

/**
* file_ns_capable - Determine if the file's opener had a capability in effect
@@ -411,6 +415,7 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns,
}
EXPORT_SYMBOL(file_ns_capable);

+#ifdef CONFIG_NON_ROOT
/**
* capable - Determine if the current task has a superior capability in effect
* @cap: The capability to be tested for
@@ -426,6 +431,7 @@ bool capable(int cap)
return ns_capable(&init_user_ns, cap);
}
EXPORT_SYMBOL(capable);
+#endif

/**
* capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
diff --git a/kernel/sys.c b/kernel/sys.c
index a8c9f5a..bb7c2a8 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -319,6 +319,7 @@ out_unlock:
* SMP: There are not races, the GIDs are checked only by filesystem
* operations (as far as semantic preservation is concerned).
*/
+#ifdef CONFIG_NON_ROOT
SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
{
struct user_namespace *ns = current_user_ns();
@@ -565,7 +566,6 @@ error:
return retval;
}

-
/*
* This function implements a generic ability to update ruid, euid,
* and suid. This allows you to implement the 4.4 compatible seteuid().
@@ -729,6 +729,7 @@ SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t _
return retval;
}

+#endif

/*
* "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5adcb0a..0f79b39 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -159,6 +159,16 @@ cond_syscall(sys_uselib);
cond_syscall(sys_fadvise64);
cond_syscall(sys_fadvise64_64);
cond_syscall(sys_madvise);
+cond_syscall(sys_setuid);
+cond_syscall(sys_setregid);
+cond_syscall(sys_setgid);
+cond_syscall(sys_setreuid);
+cond_syscall(sys_setresuid);
+cond_syscall(sys_getresuid);
+cond_syscall(sys_getresgid);
+cond_syscall(sys_setresgid);
+cond_syscall(sys_capget);
+cond_syscall(sys_capset);

/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
--
1.7.10.4

nonroot_bloat.txt

Iulia Manda

unread,
Jan 13, 2015, 5:26:10 PM1/13/15
to opw-k...@googlegroups.com
I have a few observations/questions below:
1. compiling out non-root users and capabilties may need to be done in two
different patches.
2. tried to compile out capabilities.c as a whole, but the resulted code was
very similar to what was trying to be avoided in commit
b3a222e52e4d4be77cc4520a57af1a4a0d8222d1 (problems encountered mostly in
security/commoncap.c).
3. tested my changes in Qemu VM, e.g:
$ adduser test
passwd: setuid: Function not implemented
$ su test
su: can't set groups: Invalid argument
How should userspace be noticed about these changes? Or should this be left
in the hands of the developer?
4. Mostly due to constant propagation and constant folding, this patch
decreases the size of the final image by 24447 bytes.

jo...@joshtriplett.org

unread,
Jan 15, 2015, 1:42:24 PM1/15/15
to Iulia Manda, opw-k...@googlegroups.com
On Tue, Jan 13, 2015 at 02:26:10PM -0800, Iulia Manda wrote:
> I have a few observations/questions below:
> 1. compiling out non-root users and capabilties may need to be done in two
> different patches.

The patch you posted seems quite reasonably sized to me; I don't think
you necessarily need to split it. If you see a logical split point and
want to split it, please feel free to send a two-patch series; however,
I think you could successfully upstream it as one patch.

> 2. tried to compile out capabilities.c as a whole, but the resulted code was
> very similar to what was trying to be avoided in commit
> b3a222e52e4d4be77cc4520a57af1a4a0d8222d1 (problems encountered mostly in
> security/commoncap.c).

Yeah, that seems like a harder problem, and not one you necessarily want
to solve at the same time.

> 3. tested my changes in Qemu VM, e.g:
> $ adduser test
> passwd: setuid: Function not implemented
> $ su test
> su: can't set groups: Invalid argument
> How should userspace be noticed about these changes? Or should this be left
> in the hands of the developer?

If the user compiles out non-root users, they shouldn't be running
commands like adduser or su. You don't have to do anything special to
notify userspace; you're already returning errors, as indicated by the
error messages above. ("Function not implemented" is ENOSYS.)

> 4. Mostly due to constant propagation and constant folding, this patch
> decreases the size of the final image by 24447 bytes.

*Very* impressive.

- Josh Triplett

jo...@joshtriplett.org

unread,
Jan 15, 2015, 2:01:29 PM1/15/15
to Iulia Manda, opw-k...@googlegroups.com
Very nice work, and extremely impressive space savings.

You're going to need a paragraph at the top of your commit message
explaining the use cases for this (for instance, embedded systems that
run most or all of their functionality in init, running as root:root).

On Wed, Jan 14, 2015 at 12:16:43AM +0200, Iulia Manda wrote:
> Add a new symbol that permits compiling out support for non-root users. As
> capabilities depend on the existance of multiple users, they are also stubbed
> out if we only keep the root user.
>
> When this symbol is not defined, UID and GID are zero in any possible case.

"and processes always have all capabilities".

> Also, the corresponding syscalls are compiled out.

You should explicitly list all the syscalls you've compiled out in the
commit message.

> This patch saves 24447 bytes. Check the attachment for the bloat-o-meter
> output.

You should note what baseline you measured the savings from (tinyconfig,
defconfig).

You should also include the summary line from bloat-o-meter, and mention
that this allows widespread GCC inlining and dead code elimination.

A couple of minor formatting-only nits below; the code changes
themselves look fine.

> --- a/include/linux/capability.h
> +++ b/include/linux/capability.h
> @@ -211,8 +211,19 @@ extern bool has_ns_capability(struct task_struct *t,
> extern bool has_capability_noaudit(struct task_struct *t, int cap);
> extern bool has_ns_capability_noaudit(struct task_struct *t,
> struct user_namespace *ns, int cap);
> +#ifdef CONFIG_NON_ROOT
> extern bool capable(int cap);
> extern bool ns_capable(struct user_namespace *ns, int cap);
> +#else
> +static inline bool capable(int cap)
> +{
> + return true;
> +}

I would suggest leaving a blank line here between the two functions.

> +static inline bool ns_capable(struct user_namespace *ns, int cap)
> +{
> + return true;
> +}
> +#endif /* CONFIG_NON_ROOT */
> extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
> extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
>
> diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
> index 2d1f9b6..70da49a 100644
> --- a/include/linux/uidgid.h
> +++ b/include/linux/uidgid.h
> @@ -29,15 +29,26 @@ typedef struct {
> #define KUIDT_INIT(value) (kuid_t){ value }
> #define KGIDT_INIT(value) (kgid_t){ value }
>
> +#ifdef CONFIG_NON_ROOT
> static inline uid_t __kuid_val(kuid_t uid)
> {
> return uid.val;
> }
> -

Don't delete this blank line; someone will probably complain upstream
about including unrelated changes.

> static inline gid_t __kgid_val(kgid_t gid)
> {
> return gid.val;
> }
> +#else
> +static inline uid_t __kuid_val(kuid_t uid)
> +{
> + return 0;
> +}

I'd leave a blank line here to match the pair above.

> @@ -1357,6 +1360,15 @@ config UID16
> help
> This enables the legacy 16-bit UID syscall wrappers.
>
> +config NON_ROOT
> + bool "Enable support for multiple users" if EXPERT
> + default y
> + help
> + This option enables support for non-root users.
> + If not set, capabilities are also disabled.

How about: "This option enables support for non-root users, groups, and
capabilities."

This help text also needs to spell out the implications more explicitly:

If you say N here, all processes will run with UID 0, GID 0, and all
possible capabilities. Saying N here also compiles out support for
system calls related to UIDs, GIDs, and capabilities, such as setuid,
setgid, and capset.

> + Leave this option as it is if unsure.

The usual wording is "If unsure, say Y."

> diff --git a/kernel/sys.c b/kernel/sys.c
> index a8c9f5a..bb7c2a8 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -319,6 +319,7 @@ out_unlock:
> * SMP: There are not races, the GIDs are checked only by filesystem
> * operations (as far as semantic preservation is concerned).
> */
> +#ifdef CONFIG_NON_ROOT
> SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
> {
> struct user_namespace *ns = current_user_ns();
> @@ -565,7 +566,6 @@ error:
> return retval;
> }
>
> -
> /*
> * This function implements a generic ability to update ruid, euid,
> * and suid. This allows you to implement the 4.4 compatible seteuid().
> @@ -729,6 +729,7 @@ SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t _
> return retval;
> }
>
> +#endif
>
> /*
> * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This

Shouldn't get/setfsuid go away as well? And for that matter,
get/setgroups?

- Josh Triplett

Iulia Manda

unread,
Jan 16, 2015, 8:00:31 AM1/16/15
to jo...@joshtriplett.org, opw-k...@googlegroups.com
There are a lot of embedded systems that run most or all of their functionality
in init, running as root:root. For these systems, supporting multiple users and
groups is not necessary.

This patch adds a new symbol that permits compiling out support for non-root
users and groups. As capabilities depend on the existance of multiple users,
they are also stubbed out if we only keep the root user.

When this symbol is not defined, UID and GID are zero in any possible case
and processes always have all capabilities.

Also, the following syscalls are compiled out: setuid, setregid, setgid,
setreuid, setresuid, getresuid, setresgid, getresgid, setgroups, getgroups,
setfsuid, setfsgid, capget, capset.

This change saves almost 25 KB on a defconfig build.

Bloat-o-meter output:
add/remove: 7/47 grow/shrink: 21/428 up/down: 1701/-26371 (-24670)

Signed-off-by: Iulia Manda <iulia....@gmail.com>
---
include/linux/capability.h | 12 ++++++++++++
include/linux/uidgid.h | 12 ++++++++++++
init/Kconfig | 17 +++++++++++++++++
kernel/capability.c | 6 ++++++
kernel/groups.c | 4 ++++
kernel/sys.c | 2 ++
kernel/sys_ni.c | 14 ++++++++++++++
7 files changed, 67 insertions(+)

diff --git a/include/linux/capability.h b/include/linux/capability.h
index aa93e5e..d8791d2 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -211,8 +211,20 @@ extern bool has_ns_capability(struct task_struct *t,
extern bool has_capability_noaudit(struct task_struct *t, int cap);
extern bool has_ns_capability_noaudit(struct task_struct *t,
struct user_namespace *ns, int cap);
+#ifdef CONFIG_NON_ROOT
extern bool capable(int cap);
extern bool ns_capable(struct user_namespace *ns, int cap);
+#else
+static inline bool capable(int cap)
+{
+ return true;
+}
+
+static inline bool ns_capable(struct user_namespace *ns, int cap)
+{
+ return true;
+}
+#endif /* CONFIG_NON_ROOT */
extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);

diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
index 2d1f9b6..22bd1fa 100644
--- a/include/linux/uidgid.h
+++ b/include/linux/uidgid.h
@@ -29,6 +29,7 @@ typedef struct {
#define KUIDT_INIT(value) (kuid_t){ value }
#define KGIDT_INIT(value) (kgid_t){ value }

+#ifdef CONFIG_NON_ROOT
static inline uid_t __kuid_val(kuid_t uid)
{
return uid.val;
@@ -38,6 +39,17 @@ static inline gid_t __kgid_val(kgid_t gid)
{
return gid.val;
}
+#else
+static inline uid_t __kuid_val(kuid_t uid)
+{
+ return 0;
+}
+
+static inline gid_t __kgid_val(kgid_t gid)
+{
+ return 0;
+}
+#endif

#define GLOBAL_ROOT_UID KUIDT_INIT(0)
#define GLOBAL_ROOT_GID KGIDT_INIT(0)
diff --git a/init/Kconfig b/init/Kconfig
index 9afb971..0a59711 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -394,6 +394,7 @@ endchoice

config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
+ select NON_ROOT
help
If you say Y here, a user level program will be able to instruct the
kernel (via a special system call) to write process accounting
@@ -420,6 +421,7 @@ config BSD_PROCESS_ACCT_V3
config TASKSTATS
bool "Export task/process statistics through netlink"
depends on NET
+ select NON_ROOT
default n
help
Export selected statistics for tasks/processes through the
@@ -1140,6 +1142,7 @@ config CHECKPOINT_RESTORE

menuconfig NAMESPACES
bool "Namespaces support" if EXPERT
+ depends on NON_ROOT
default !EXPERT
help
Provides the way to make tasks work with different objects using
@@ -1357,6 +1360,20 @@ config UID16
help
This enables the legacy 16-bit UID syscall wrappers.

+config NON_ROOT
+ bool "Multiple users, groups and capabilities support" if EXPERT
+ default y
+ help
+ This option enables support for non-root users, groups and
+ capabilities.
+
+ If you say N here, all processes will run with UID 0, GID 0, and all
+ possible capabilities. Saying N here also compiles out support for
+ system calls related to UIDs, GIDs, and capabilities, such as setuid,
+ setgid, and capset.
+
+ If unsure, say Y here.
diff --git a/kernel/groups.c b/kernel/groups.c
index 664411f..94f2c89 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -190,6 +190,7 @@ int set_current_groups(struct group_info *group_info)

EXPORT_SYMBOL(set_current_groups);

+#ifdef CONFIG_NON_ROOT
SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
{
const struct cred *cred = current_cred();
@@ -213,6 +214,7 @@ SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
out:
return i;
}
+#endif

bool may_setgroups(void)
{
@@ -227,6 +229,7 @@ bool may_setgroups(void)
* without another task interfering.
*/

+#ifdef CONFIG_NON_ROOT
SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
{
struct group_info *group_info;
@@ -251,6 +254,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)

return retval;
}
+#endif

/*
* Check whether we're fsgid/egid or in the supplemental group..
diff --git a/kernel/sys.c b/kernel/sys.c
index a8c9f5a..bfe532b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -319,6 +319,7 @@ out_unlock:
* SMP: There are not races, the GIDs are checked only by filesystem
* operations (as far as semantic preservation is concerned).
*/
+#ifdef CONFIG_NON_ROOT
SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
{
struct user_namespace *ns = current_user_ns();
@@ -809,6 +810,7 @@ change_okay:
commit_creds(new);
return old_fsgid;
}
+#endif /* CONFIG_NON_ROOT */

/**
* sys_getpid - return the thread group id of the current process
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5adcb0a..7995ef5 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -159,6 +159,20 @@ cond_syscall(sys_uselib);
cond_syscall(sys_fadvise64);
cond_syscall(sys_fadvise64_64);
cond_syscall(sys_madvise);
+cond_syscall(sys_setuid);
+cond_syscall(sys_setregid);
+cond_syscall(sys_setgid);
+cond_syscall(sys_setreuid);
+cond_syscall(sys_setresuid);
+cond_syscall(sys_getresuid);
+cond_syscall(sys_setresgid);
+cond_syscall(sys_getresgid);
+cond_syscall(sys_setgroups);
+cond_syscall(sys_getgroups);
+cond_syscall(sys_setfsuid);
+cond_syscall(sys_setfsgid);

jo...@joshtriplett.org

unread,
Jan 16, 2015, 2:51:30 PM1/16/15
to Iulia Manda, opw-k...@googlegroups.com
On Fri, Jan 16, 2015 at 02:59:42PM +0200, Iulia Manda wrote:
> There are a lot of embedded systems that run most or all of their functionality
> in init, running as root:root. For these systems, supporting multiple users and
> groups is not necessary.
>
> This patch adds a new symbol that permits compiling out support for non-root
> users and groups. As capabilities depend on the existance of multiple users,
> they are also stubbed out if we only keep the root user.

This isn't quite accurate: capabilities don't depend on multiple users,
since root can drop capabilities too. That's less common (it makes more
sense to drop to an unprivileged user and keep some root capabilities
than stay root and drop some capabilities), and I still think it makes
sense to combine these options, but I'd like to avoid leaving any room
for upstream to nitpick. :)

Perhaps:

"This patch adds a new symbol, CONFIG_NON_ROOT, that makes support for
non-root users, non-root groups, and capabilities optional."

> When this symbol is not defined, UID and GID are zero in any possible case
> and processes always have all capabilities.
>
> Also, the following syscalls are compiled out: setuid, setregid, setgid,
> setreuid, setresuid, getresuid, setresgid, getresgid, setgroups, getgroups,
> setfsuid, setfsgid, capget, capset.

I just realized that the 16-bit versions of these syscalls need
compiling out too, in this case. I'd suggest making UID16 "depends
NON_ROOT".

> This change saves almost 25 KB on a defconfig build.

Very nice!

> Bloat-o-meter output:
> add/remove: 7/47 grow/shrink: 21/428 up/down: 1701/-26371 (-24670)
>
> Signed-off-by: Iulia Manda <iulia....@gmail.com>

With the commit message and UID16 changes:
Reviewed-by: Josh Triplett <jo...@joshtriplett.org>

Thanks for working on this!
Reply all
Reply to author
Forward
0 new messages