Commit 5930c7de authored by Andrei Vagin's avatar Andrei Vagin Committed by Ian Lance Taylor

syscall: add all ambient capabilities into permitted and inheritable sets

According to the prctl man page, each capability from the ambient set
must already be present in both  the  permitted  and  the  inheritable
sets  of the process.

exec_linux_test suggests configuring the capabilities in the parent
process. This doesn't look nice, because:
* Capabilities are a per-thread attribute, so we need to use
LockOSThread.
* Need to restore capabilities after creating a process.
* Doesn't work with user namespaces, because a process gets capabilities
when a namespace is created.

Fixes #23152

Change-Id: Iba23e530fc7b9f5182d602fe855f82218f354219
Reviewed-on: https://go-review.googlesource.com/c/go/+/156577
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent f1d5ce01
...@@ -94,6 +94,29 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr ...@@ -94,6 +94,29 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr
return pid, 0 return pid, 0
} }
const _LINUX_CAPABILITY_VERSION_3 = 0x20080522
type capHeader struct {
version uint32
pid int32
}
type capData struct {
effective uint32
permitted uint32
inheritable uint32
}
type caps struct {
hdr capHeader
data [2]capData
}
// See CAP_TO_INDEX in linux/capability.h:
func capToIndex(cap uintptr) uintptr { return cap >> 5 }
// See CAP_TO_MASK in linux/capability.h:
func capToMask(cap uintptr) uint32 { return 1 << uint(cap&31) }
// forkAndExecInChild1 implements the body of forkAndExecInChild up to // forkAndExecInChild1 implements the body of forkAndExecInChild up to
// the parent's post-fork path. This is a separate function so we can // the parent's post-fork path. This is a separate function so we can
// separate the child's and parent's stack frames if we're using // separate the child's and parent's stack frames if we're using
...@@ -122,6 +145,7 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att ...@@ -122,6 +145,7 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att
err2 Errno err2 Errno
nextfd int nextfd int
i int i int
caps caps
) )
// Record parent PID so child can test if it has died. // Record parent PID so child can test if it has died.
...@@ -286,12 +310,33 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att ...@@ -286,12 +310,33 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att
} }
} }
if len(sys.AmbientCaps) != 0 {
// Ambient capabilities were added in the 4.3 kernel,
// so it is safe to always use _LINUX_CAPABILITY_VERSION_3.
caps.hdr.version = _LINUX_CAPABILITY_VERSION_3
if _, _, err1 := RawSyscall(SYS_CAPGET, uintptr(unsafe.Pointer(&caps.hdr)), uintptr(unsafe.Pointer(&caps.data[0])), 0); err1 != 0 {
goto childerror
}
for _, c := range sys.AmbientCaps {
// Add the c capability to the permitted and inheritable capability mask,
// otherwise we will not be able to add it to the ambient capability mask.
caps.data[capToIndex(c)].permitted |= capToMask(c)
caps.data[capToIndex(c)].inheritable |= capToMask(c)
}
if _, _, err1 := RawSyscall(SYS_CAPSET, uintptr(unsafe.Pointer(&caps.hdr)), uintptr(unsafe.Pointer(&caps.data[0])), 0); err1 != 0 {
goto childerror
}
for _, c := range sys.AmbientCaps { for _, c := range sys.AmbientCaps {
_, _, err1 = RawSyscall6(SYS_PRCTL, PR_CAP_AMBIENT, uintptr(PR_CAP_AMBIENT_RAISE), c, 0, 0, 0) _, _, err1 = RawSyscall6(SYS_PRCTL, PR_CAP_AMBIENT, uintptr(PR_CAP_AMBIENT_RAISE), c, 0, 0, 0)
if err1 != 0 { if err1 != 0 {
goto childerror goto childerror
} }
} }
}
// Chdir // Chdir
if dir != nil { if dir != nil {
......
...@@ -436,7 +436,7 @@ func TestUnshareMountNameSpaceChroot(t *testing.T) { ...@@ -436,7 +436,7 @@ func TestUnshareMountNameSpaceChroot(t *testing.T) {
type capHeader struct { type capHeader struct {
version uint32 version uint32
pid int pid int32
} }
type capData struct { type capData struct {
...@@ -446,6 +446,7 @@ type capData struct { ...@@ -446,6 +446,7 @@ type capData struct {
} }
const CAP_SYS_TIME = 25 const CAP_SYS_TIME = 25
const CAP_SYSLOG = 34
type caps struct { type caps struct {
hdr capHeader hdr capHeader
...@@ -506,15 +507,28 @@ func TestAmbientCapsHelper(*testing.T) { ...@@ -506,15 +507,28 @@ func TestAmbientCapsHelper(*testing.T) {
fmt.Fprintln(os.Stderr, "CAP_SYS_TIME unexpectedly not in the effective capability mask") fmt.Fprintln(os.Stderr, "CAP_SYS_TIME unexpectedly not in the effective capability mask")
os.Exit(2) os.Exit(2)
} }
if caps.data[1].effective&(1<<uint(CAP_SYSLOG&31)) == 0 {
fmt.Fprintln(os.Stderr, "CAP_SYSLOG unexpectedly not in the effective capability mask")
os.Exit(2)
}
} }
func TestAmbientCaps(t *testing.T) { func TestAmbientCaps(t *testing.T) {
skipInContainer(t)
// Make sure we are running as root so we have permissions to use unshare // Make sure we are running as root so we have permissions to use unshare
// and create a network namespace. // and create a network namespace.
if os.Getuid() != 0 { if os.Getuid() != 0 {
t.Skip("kernel prohibits unshare in unprivileged process, unless using user namespace") t.Skip("kernel prohibits unshare in unprivileged process, unless using user namespace")
} }
testAmbientCaps(t, false)
}
func TestAmbientCapsUserns(t *testing.T) {
testAmbientCaps(t, true)
}
func testAmbientCaps(t *testing.T, userns bool) {
skipInContainer(t)
mustSupportAmbientCaps(t) mustSupportAmbientCaps(t)
// When running under the Go continuous build, skip tests for // When running under the Go continuous build, skip tests for
...@@ -530,20 +544,6 @@ func TestAmbientCaps(t *testing.T) { ...@@ -530,20 +544,6 @@ func TestAmbientCaps(t *testing.T) {
t.Skip("skipping test on android; see Issue 27327") t.Skip("skipping test on android; see Issue 27327")
} }
caps, err := getCaps()
if err != nil {
t.Fatal(err)
}
// Add CAP_SYS_TIME to the permitted and inheritable capability mask,
// otherwise we will not be able to add it to the ambient capability mask.
caps.data[0].permitted |= 1 << uint(CAP_SYS_TIME)
caps.data[0].inheritable |= 1 << uint(CAP_SYS_TIME)
if _, _, errno := syscall.Syscall(syscall.SYS_CAPSET, uintptr(unsafe.Pointer(&caps.hdr)), uintptr(unsafe.Pointer(&caps.data[0])), 0); errno != 0 {
t.Fatalf("SYS_CAPSET: %v", errno)
}
u, err := user.Lookup("nobody") u, err := user.Lookup("nobody")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
...@@ -588,7 +588,29 @@ func TestAmbientCaps(t *testing.T) { ...@@ -588,7 +588,29 @@ func TestAmbientCaps(t *testing.T) {
Uid: uint32(uid), Uid: uint32(uid),
Gid: uint32(gid), Gid: uint32(gid),
}, },
AmbientCaps: []uintptr{CAP_SYS_TIME}, AmbientCaps: []uintptr{CAP_SYS_TIME, CAP_SYSLOG},
}
if userns {
cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWUSER
const nobody = 65534
uid := os.Getuid()
gid := os.Getgid()
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{{
ContainerID: int(nobody),
HostID: int(uid),
Size: int(1),
}}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{{
ContainerID: int(nobody),
HostID: int(gid),
Size: int(1),
}}
// Set credentials to run as user and group nobody.
cmd.SysProcAttr.Credential = &syscall.Credential{
Uid: nobody,
Gid: nobody,
}
} }
if err := cmd.Run(); err != nil { if err := cmd.Run(); err != nil {
t.Fatal(err.Error()) t.Fatal(err.Error())
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment