runtime: use Uname version check for 64bits time on 32bits arch codepaths
The previous fallback on ENOSYS logic causes issues on forks on Linux.
Android: #77621 (CL 750040 added a workaround with a TODO, this fixes that TODO)
Causes the OS to terminate the program when running on Android versions <=10
since the seccomp jail does not know about the 64bits time syscall
and is configured to terminate the program on any unknown syscall.
Synology's Linux: #77930
On old versions of Synology's Linux they added custom vendor syscalls
without adding a gap in the syscall numbers, that means when we call
the newer Linux syscall which was added later, Synology's Linux
interpret it as a completely different syscall.
Fixes #77930
diff --git a/src/internal/runtime/syscall/linux/defs_linux_386.go b/src/internal/runtime/syscall/linux/defs_linux_386.go
index 7fdf5d3..4e8e645 100644
--- a/src/internal/runtime/syscall/linux/defs_linux_386.go
+++ b/src/internal/runtime/syscall/linux/defs_linux_386.go
@@ -17,6 +17,7 @@
SYS_OPENAT = 295
SYS_PREAD64 = 180
SYS_READ = 3
+ SYS_UNAME = 122
EFD_NONBLOCK = 0x800
diff --git a/src/internal/runtime/syscall/linux/defs_linux_amd64.go b/src/internal/runtime/syscall/linux/defs_linux_amd64.go
index 2c8676e..fa764d9 100644
--- a/src/internal/runtime/syscall/linux/defs_linux_amd64.go
+++ b/src/internal/runtime/syscall/linux/defs_linux_amd64.go
@@ -17,6 +17,7 @@
SYS_OPENAT = 257
SYS_PREAD64 = 17
SYS_READ = 0
+ SYS_UNAME = 63
EFD_NONBLOCK = 0x800
diff --git a/src/internal/runtime/syscall/linux/defs_linux_arm.go b/src/internal/runtime/syscall/linux/defs_linux_arm.go
index a0b395d..cef556d 100644
--- a/src/internal/runtime/syscall/linux/defs_linux_arm.go
+++ b/src/internal/runtime/syscall/linux/defs_linux_arm.go
@@ -17,6 +17,7 @@
SYS_OPENAT = 322
SYS_PREAD64 = 180
SYS_READ = 3
+ SYS_UNAME = 122
EFD_NONBLOCK = 0x800
diff --git a/src/internal/runtime/syscall/linux/defs_linux_arm64.go b/src/internal/runtime/syscall/linux/defs_linux_arm64.go
index 223dce0..eabddba 100644
--- a/src/internal/runtime/syscall/linux/defs_linux_arm64.go
+++ b/src/internal/runtime/syscall/linux/defs_linux_arm64.go
@@ -17,6 +17,7 @@
SYS_OPENAT = 56
SYS_PREAD64 = 67
SYS_READ = 63
+ SYS_UNAME = 160
EFD_NONBLOCK = 0x800
diff --git a/src/internal/runtime/syscall/linux/defs_linux_loong64.go b/src/internal/runtime/syscall/linux/defs_linux_loong64.go
index 8aa61c3..08e5d49 100644
--- a/src/internal/runtime/syscall/linux/defs_linux_loong64.go
+++ b/src/internal/runtime/syscall/linux/defs_linux_loong64.go
@@ -17,6 +17,7 @@
SYS_OPENAT = 56
SYS_PREAD64 = 67
SYS_READ = 63
+ SYS_UNAME = 160
EFD_NONBLOCK = 0x800
diff --git a/src/internal/runtime/syscall/linux/defs_linux_mips64x.go b/src/internal/runtime/syscall/linux/defs_linux_mips64x.go
index 84b760d..b5794e5 100644
--- a/src/internal/runtime/syscall/linux/defs_linux_mips64x.go
+++ b/src/internal/runtime/syscall/linux/defs_linux_mips64x.go
@@ -19,6 +19,7 @@
SYS_OPENAT = 5247
SYS_PREAD64 = 5016
SYS_READ = 5000
+ SYS_UNAME = 5061
EFD_NONBLOCK = 0x80
diff --git a/src/internal/runtime/syscall/linux/defs_linux_mipsx.go b/src/internal/runtime/syscall/linux/defs_linux_mipsx.go
index a9be214..1fb4d91 100644
--- a/src/internal/runtime/syscall/linux/defs_linux_mipsx.go
+++ b/src/internal/runtime/syscall/linux/defs_linux_mipsx.go
@@ -19,6 +19,7 @@
SYS_OPENAT = 4288
SYS_PREAD64 = 4200
SYS_READ = 4003
+ SYS_UNAME = 4122
EFD_NONBLOCK = 0x80
diff --git a/src/internal/runtime/syscall/linux/defs_linux_ppc64x.go b/src/internal/runtime/syscall/linux/defs_linux_ppc64x.go
index 63f4e5d..ee93ad3 100644
--- a/src/internal/runtime/syscall/linux/defs_linux_ppc64x.go
+++ b/src/internal/runtime/syscall/linux/defs_linux_ppc64x.go
@@ -19,6 +19,7 @@
SYS_OPENAT = 286
SYS_PREAD64 = 179
SYS_READ = 3
+ SYS_UNAME = 122
EFD_NONBLOCK = 0x800
diff --git a/src/internal/runtime/syscall/linux/defs_linux_riscv64.go b/src/internal/runtime/syscall/linux/defs_linux_riscv64.go
index 8aa61c3..08e5d49 100644
--- a/src/internal/runtime/syscall/linux/defs_linux_riscv64.go
+++ b/src/internal/runtime/syscall/linux/defs_linux_riscv64.go
@@ -17,6 +17,7 @@
SYS_OPENAT = 56
SYS_PREAD64 = 67
SYS_READ = 63
+ SYS_UNAME = 160
EFD_NONBLOCK = 0x800
diff --git a/src/internal/runtime/syscall/linux/defs_linux_s390x.go b/src/internal/runtime/syscall/linux/defs_linux_s390x.go
index 52945db..da11c70 100644
--- a/src/internal/runtime/syscall/linux/defs_linux_s390x.go
+++ b/src/internal/runtime/syscall/linux/defs_linux_s390x.go
@@ -17,6 +17,7 @@
SYS_OPENAT = 288
SYS_PREAD64 = 180
SYS_READ = 3
+ SYS_UNAME = 122
EFD_NONBLOCK = 0x800
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index 7e6af22..3db58ee 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -354,6 +354,7 @@
numCPUStartup = getCPUCount()
physHugePageSize = getHugePageSize()
vgetrandomInit()
+ configure64bitsTimeOn32BitsArchitectures()
}
var urandom_dev = []byte("/dev/urandom\x00")
@@ -935,3 +936,57 @@
r, _, err := linux.Syscall6(linux.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
return int32(r), int32(err)
}
+
+type utsname struct {
+ Sysname [65]int8
+ Nodename [65]int8
+ Release [65]int8
+ Version [65]int8
+ Machine [65]int8
+ Domainname [65]int8
+}
+
+type kernelVersion struct {
+ major int
+ minor int
+}
+
+// getKernelVersion returns major and minor kernel version numbers
+// parsed from the syscall.Uname's Release field.
+func getKernelVersion() kernelVersion {
+ var uname utsname
+ if _, _, err := linux.Syscall6(linux.SYS_UNAME, uintptr(unsafe.Pointer(&uname)), 0, 0, 0, 0, 0); err != 0 {
+ throw("uname failed")
+ }
+
+ var (
+ values [2]int
+ value, vi int
+ )
+ for _, c := range uname.Release {
+ if '0' <= c && c <= '9' {
+ value = (value * 10) + int(c-'0')
+ } else {
+ // Note that we're assuming N.N.N here.
+ // If we see anything else, we are likely to mis-parse it.
+ values[vi] = value
+ vi++
+ if vi >= len(values) {
+ break
+ }
+ value = 0
+ }
+ }
+
+ if values[0] == 0 && values[1] == 0 {
+ throw("failed to parse kernel version from uname")
+ }
+
+ return kernelVersion{major: values[0], minor: values[1]}
+}
+
+// GE checks if the running kernel version
+// is greater than or equal to the provided version.
+func (kv kernelVersion) GE(x, y int) bool {
+ return kv.major > x || (kv.major == x && kv.minor >= y)
+}
diff --git a/src/runtime/os_linux32.go b/src/runtime/os_linux32.go
index 748fc53..02cb18f 100644
--- a/src/runtime/os_linux32.go
+++ b/src/runtime/os_linux32.go
@@ -7,32 +7,25 @@
package runtime
import (
- "internal/runtime/atomic"
"unsafe"
)
+func configure64bitsTimeOn32BitsArchitectures() {
+ use64bitsTimeOn32bits = getKernelVersion().GE(5, 1)
+}
+
//go:noescape
func futex_time32(addr unsafe.Pointer, op int32, val uint32, ts *timespec32, addr2 unsafe.Pointer, val3 uint32) int32
//go:noescape
func futex_time64(addr unsafe.Pointer, op int32, val uint32, ts *timespec, addr2 unsafe.Pointer, val3 uint32) int32
-var isFutexTime32bitOnly atomic.Bool
+var use64bitsTimeOn32bits bool
//go:nosplit
func futex(addr unsafe.Pointer, op int32, val uint32, ts *timespec, addr2 unsafe.Pointer, val3 uint32) int32 {
- // In Android versions 8.0-10 (API levels 26-29), futex_time64
- // is not in the allowlist of the seccomp filter and will lead to a
- // runtime crash. See issue 77621.
- // TODO: Check Android version and do not skip futex_time64
- // on Android 11 or higher (API level 30+).
- if GOOS != "android" && !isFutexTime32bitOnly.Load() {
- ret := futex_time64(addr, op, val, ts, addr2, val3)
- // futex_time64 is only supported on Linux 5.0+
- if ret != -_ENOSYS {
- return ret
- }
- isFutexTime32bitOnly.Store(true)
+ if use64bitsTimeOn32bits {
+ return futex_time64(addr, op, val, ts, addr2, val3)
}
// Downgrade ts.
var ts32 timespec32
@@ -50,22 +43,10 @@
//go:noescape
func timer_settime64(timerid int32, flags int32, new, old *itimerspec) int32
-var isSetTime32bitOnly atomic.Bool
-
//go:nosplit
func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32 {
- // In Android versions 8.0-10 (API levels 26-29), timer_settime64
- // is not in the allowlist of the seccomp filter and will lead to a
- // runtime crash. See issue 77621.
- // TODO: Check Android version and do not skip timer_settime64
- // on Android 11 or higher (API level 30+).
- if GOOS != "android" && !isSetTime32bitOnly.Load() {
- ret := timer_settime64(timerid, flags, new, old)
- // timer_settime64 is only supported on Linux 5.0+
- if ret != -_ENOSYS {
- return ret
- }
- isSetTime32bitOnly.Store(true)
+ if use64bitsTimeOn32bits {
+ return timer_settime64(timerid, flags, new, old)
}
var newts, oldts itimerspec32
@@ -83,6 +64,5 @@
old32 = &oldts
}
- // Fall back to 32-bit timer
return timer_settime32(timerid, flags, new32, old32)
}
diff --git a/src/runtime/os_linux64.go b/src/runtime/os_linux64.go
index 7b70d80..f9571dd 100644
--- a/src/runtime/os_linux64.go
+++ b/src/runtime/os_linux64.go
@@ -10,6 +10,8 @@
"unsafe"
)
+func configure64bitsTimeOn32BitsArchitectures() {}
+
//go:noescape
func futex(addr unsafe.Pointer, op int32, val uint32, ts *timespec, addr2 unsafe.Pointer, val3 uint32) int32
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Auto-Submit | +0 |
CTRL + R accident, I didn't even ran this code.
We want to test this a bit before submitting it.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
func getKernelVersion() kernelVersion {Doesn't this exist as unix.KernelVersion?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
func getKernelVersion() kernelVersion {Doesn't this exist as unix.KernelVersion?
I don't think I can call unix from runtime due to import cycles.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
One of the two reporters tested the patch and it looks functional:
https://github.com/golang/go/issues/77930#issuecomment-3996903970
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Sysname [65]int8No need to repeat the obnoxious API from package syscall, right? i.e., just make these `[65]byte`.
if _, _, err := linux.Syscall6(linux.SYS_UNAME, uintptr(unsafe.Pointer(&uname)), 0, 0, 0, 0, 0); err != 0 {Since you're already using the linux package, you may as well add a wrapper to internal/runtime/syscall/linux/syscall_linux.go.
for _, c := range uname.Release {I don't feel too strongly, but I have a minor preference for using the parsing code from CL 246200 (parseRelease), since it has a few years of use.
// TODO: Check Android version and do not skip futex_time64
// on Android 11 or higher (API level 30+).Done? I think we're safe to assume that Android 10 won't be running Linux 5.1+?
return futex_time64(addr, op, val, ts, addr2, val3)Isn't this syscall also behind a kernel CONFIG option? Do we still need to test for ENOSYS?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Sysname [65]int8No need to repeat the obnoxious API from package syscall, right? i.e., just make these `[65]byte`.
True I just blindly copy pasted.
for _, c := range uname.Release {I don't feel too strongly, but I have a minor preference for using the parsing code from CL 246200 (parseRelease), since it has a few years of use.
Sure I'll use this.
// TODO: Check Android version and do not skip futex_time64
// on Android 11 or higher (API level 30+).Done? I think we're safe to assume that Android 10 won't be running Linux 5.1+?
Yeah so I've checked and I'm not sure.
Android in their infinite wisdom support 3 kernel branches per major Android version (probably for hardware / SOCs not in upstream ¿),
For Android 11 we have 5.4, 4.19 and 4.14.
I am 95% confident that for Android 11 running 5.4, the 64 bits syscall works.
I do not know what happens if you try to use the 64 bits syscall for Android >= 11 yet running Linux <5.1.
It could give you ENOSYS (if the seccomp config is per Android version and not tuned to the underlying kernel) or it could give a seccomp violation.
However then we will parse uname, see that it is bellow 5.1 and not use it.
So I think it's fine
return futex_time64(addr, op, val, ts, addr2, val3)Isn't this syscall also behind a kernel CONFIG option? Do we still need to test for ENOSYS?
It is the other way around.
The 32 bits syscall is gated behind the KCONFIG.
The 64 bits syscall always exists if the version is 5.1.
The KCONFIG was also added in 5.1.
So if <5.1 we know the 32 bits syscall always exists.
If >=5.1 we know the 64 bits syscall always exists.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// TODO: Check Android version and do not skip futex_time64
// on Android 11 or higher (API level 30+).JorropoDone? I think we're safe to assume that Android 10 won't be running Linux 5.1+?
Yeah so I've checked and I'm not sure.
Android in their infinite wisdom support 3 kernel branches per major Android version (probably for hardware / SOCs not in upstream ¿),
For Android 11 we have 5.4, 4.19 and 4.14.I am 95% confident that for Android 11 running 5.4, the 64 bits syscall works.
I do not know what happens if you try to use the 64 bits syscall for Android >= 11 yet running Linux <5.1.It could give you ENOSYS (if the seccomp config is per Android version and not tuned to the underlying kernel) or it could give a seccomp violation.
However then we will parse uname, see that it is bellow 5.1 and not use it.So I think it's fine
It could give you ENOSYS (if the seccomp config is per Android version and not tuned to the underlying kernel) or it could give a seccomp violation.
I believe the seccomp configuration is based primarily on the Android.
In case you haven't seen it https://cs.android.com/android/platform/superproject/main/+/main:bionic/libc/tools/genseccomp.py generates the seccomp BPF program using the various text files in https://cs.android.com/android/platform/superproject/main/+/main:bionic/libc/. I think the tl;dr is it allows `(SYSCALLS.TXT - SECCOMP_BLOCKLIST*.txt + SECCOMP_ALLOWLIST*.TXT)` (with some adjustment for app vs system, etc).
I haven't looked closely at how it has changed between versions or quite followed which deny action is selected (return EPERM vs signal vs kill).
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// TODO: Check Android version and do not skip futex_time64
// on Android 11 or higher (API level 30+).JorropoDone? I think we're safe to assume that Android 10 won't be running Linux 5.1+?
Michael PrattYeah so I've checked and I'm not sure.
Android in their infinite wisdom support 3 kernel branches per major Android version (probably for hardware / SOCs not in upstream ¿),
For Android 11 we have 5.4, 4.19 and 4.14.I am 95% confident that for Android 11 running 5.4, the 64 bits syscall works.
I do not know what happens if you try to use the 64 bits syscall for Android >= 11 yet running Linux <5.1.It could give you ENOSYS (if the seccomp config is per Android version and not tuned to the underlying kernel) or it could give a seccomp violation.
However then we will parse uname, see that it is bellow 5.1 and not use it.So I think it's fine
It could give you ENOSYS (if the seccomp config is per Android version and not tuned to the underlying kernel) or it could give a seccomp violation.
I believe the seccomp configuration is based primarily on the Android.
In case you haven't seen it https://cs.android.com/android/platform/superproject/main/+/main:bionic/libc/tools/genseccomp.py generates the seccomp BPF program using the various text files in https://cs.android.com/android/platform/superproject/main/+/main:bionic/libc/. I think the tl;dr is it allows `(SYSCALLS.TXT - SECCOMP_BLOCKLIST*.txt + SECCOMP_ALLOWLIST*.TXT)` (with some adjustment for app vs system, etc).
I haven't looked closely at how it has changed between versions or quite followed which deny action is selected (return EPERM vs signal vs kill).
To be honest unless I learn something new I don't care about looking to the Android code 😄
Because there isn't a problem with using the 32 bits on versions of Linux (/ Android) where `CONFIG_COMPAT_32BIT_TIME` does not exists.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Jorropo, are you still working on this?
I've been busy with work, if someone else implement a fixed Uname based solution I'll review and merge it.
Otherwise I'll get back to this in a couple weeks.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |