package syscall
import (
)
type SysProcIDMap struct {
ContainerID int
HostID int
Size int
}
type SysProcAttr struct {
Chroot string
Credential *Credential
Ptrace bool
Setsid bool
Setpgid bool
Setctty bool
Noctty bool
Ctty int
Foreground bool
Pgid int
Pdeathsig Signal
Cloneflags uintptr
Unshareflags uintptr
UidMappings []SysProcIDMap
GidMappings []SysProcIDMap
GidMappingsEnableSetgroups bool
AmbientCaps []uintptr
}
var (
none = [...]byte{'n', 'o', 'n', 'e', 0}
slash = [...]byte{'/', 0}
)
func ()
func ()
func ()
func ( *byte, , []*byte, , *byte, *ProcAttr, *SysProcAttr, int) ( int, Errno) {
, , , := forkAndExecInChild1(, , , , , , , )
if {
runtime_AfterFork()
}
if != 0 {
return 0,
}
= int()
if .UidMappings != nil || .GidMappings != nil {
Close([0])
var Errno
if .Unshareflags&CLONE_NEWUSER == 0 {
if := writeUidGidMappings(, ); != nil {
= .(Errno)
}
}
RawSyscall(SYS_WRITE, uintptr([1]), uintptr(unsafe.Pointer(&)), unsafe.Sizeof())
Close([1])
}
return , 0
}
const _LINUX_CAPABILITY_VERSION_3 = 0x20080522
type capHeader struct {
version uint32
pid int32
}
type capData struct {
effective uint32
permitted uint32
inheritable uint32
}
type caps struct {
hdr capHeader
data [2]capData
}
func ( uintptr) uintptr { return >> 5 }
func ( uintptr) uint32 { return 1 << uint(&31) }
func ( *byte, , []*byte, , *byte, *ProcAttr, *SysProcAttr, int) ( uintptr, Errno, [2]int, bool) {
const (
= 0x2f
= 0x2
)
var (
Errno
int
int
caps
uintptr
, , []byte
, , []byte
)
if .UidMappings != nil {
= []byte("/proc/self/uid_map\000")
= formatIDMappings(.UidMappings)
}
if .GidMappings != nil {
= []byte("/proc/self/setgroups\000")
= []byte("/proc/self/gid_map\000")
if .GidMappingsEnableSetgroups {
= []byte("allow\000")
} else {
= []byte("deny\000")
}
= formatIDMappings(.GidMappings)
}
, := rawSyscallNoError(SYS_GETPID, 0, 0, 0)
:= make([]int, len(.Files))
= len(.Files)
for , := range .Files {
if < int() {
= int()
}
[] = int()
}
++
if .UidMappings != nil || .GidMappings != nil {
if := forkExecPipe([:]); != nil {
= .(Errno)
return
}
}
var bool
switch runtime.GOARCH {
case "amd64", "arm64", "ppc64", "riscv64", "s390x":
= true
}
runtime_BeforeFork()
= true
switch {
case && (.Cloneflags&CLONE_NEWUSER == 0 && .Unshareflags&CLONE_NEWUSER == 0):
, = rawVforkSyscall(SYS_CLONE, uintptr(SIGCHLD|CLONE_VFORK|CLONE_VM)|.Cloneflags)
case runtime.GOARCH == "s390x":
, _, = RawSyscall6(SYS_CLONE, 0, uintptr(SIGCHLD)|.Cloneflags, 0, 0, 0, 0)
default:
, _, = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|.Cloneflags, 0, 0, 0, 0, 0)
}
if != 0 || != 0 {
return
}
runtime_AfterForkInChild()
if len(.AmbientCaps) > 0 {
_, _, = RawSyscall6(SYS_PRCTL, PR_SET_KEEPCAPS, 1, 0, 0, 0, 0)
if != 0 {
goto
}
}
if .UidMappings != nil || .GidMappings != nil {
if _, _, = RawSyscall(SYS_CLOSE, uintptr([1]), 0, 0); != 0 {
goto
}
, _, = RawSyscall(SYS_READ, uintptr([0]), uintptr(unsafe.Pointer(&)), unsafe.Sizeof())
if != 0 {
goto
}
if != unsafe.Sizeof() {
= EINVAL
goto
}
if != 0 {
=
goto
}
}
if .Setsid {
_, _, = RawSyscall(SYS_SETSID, 0, 0, 0)
if != 0 {
goto
}
}
if .Setpgid || .Foreground {
_, _, = RawSyscall(SYS_SETPGID, 0, uintptr(.Pgid), 0)
if != 0 {
goto
}
}
if .Foreground {
:= int32(.Pgid)
if == 0 {
, _ = rawSyscallNoError(SYS_GETPID, 0, 0, 0)
= int32()
}
_, _, = RawSyscall(SYS_IOCTL, uintptr(.Ctty), uintptr(TIOCSPGRP), uintptr(unsafe.Pointer(&)))
if != 0 {
goto
}
}
if .Unshareflags != 0 {
_, _, = RawSyscall(SYS_UNSHARE, .Unshareflags, 0, 0)
if != 0 {
goto
}
if .Unshareflags&CLONE_NEWUSER != 0 && .GidMappings != nil {
:= int(_AT_FDCWD)
if , _, = RawSyscall6(SYS_OPENAT, uintptr(), uintptr(unsafe.Pointer(&[0])), uintptr(O_WRONLY), 0, 0, 0); != 0 {
goto
}
, _, = RawSyscall(SYS_WRITE, uintptr(), uintptr(unsafe.Pointer(&[0])), uintptr(len()))
if != 0 {
goto
}
if _, _, = RawSyscall(SYS_CLOSE, uintptr(), 0, 0); != 0 {
goto
}
if , _, = RawSyscall6(SYS_OPENAT, uintptr(), uintptr(unsafe.Pointer(&[0])), uintptr(O_WRONLY), 0, 0, 0); != 0 {
goto
}
, _, = RawSyscall(SYS_WRITE, uintptr(), uintptr(unsafe.Pointer(&[0])), uintptr(len()))
if != 0 {
goto
}
if _, _, = RawSyscall(SYS_CLOSE, uintptr(), 0, 0); != 0 {
goto
}
}
if .Unshareflags&CLONE_NEWUSER != 0 && .UidMappings != nil {
:= int(_AT_FDCWD)
if , _, = RawSyscall6(SYS_OPENAT, uintptr(), uintptr(unsafe.Pointer(&[0])), uintptr(O_WRONLY), 0, 0, 0); != 0 {
goto
}
, _, = RawSyscall(SYS_WRITE, uintptr(), uintptr(unsafe.Pointer(&[0])), uintptr(len()))
if != 0 {
goto
}
if _, _, = RawSyscall(SYS_CLOSE, uintptr(), 0, 0); != 0 {
goto
}
}
if .Unshareflags&CLONE_NEWNS == CLONE_NEWNS {
_, _, = RawSyscall6(SYS_MOUNT, uintptr(unsafe.Pointer(&none[0])), uintptr(unsafe.Pointer(&slash[0])), 0, MS_REC|MS_PRIVATE, 0, 0)
if != 0 {
goto
}
}
}
if != nil {
_, _, = RawSyscall(SYS_CHROOT, uintptr(unsafe.Pointer()), 0, 0)
if != 0 {
goto
}
}
if := .Credential; != nil {
:= uintptr(len(.Groups))
:= uintptr(0)
if > 0 {
= uintptr(unsafe.Pointer(&.Groups[0]))
}
if !(.GidMappings != nil && !.GidMappingsEnableSetgroups && == 0) && !.NoSetGroups {
_, _, = RawSyscall(_SYS_setgroups, , , 0)
if != 0 {
goto
}
}
_, _, = RawSyscall(sys_SETGID, uintptr(.Gid), 0, 0)
if != 0 {
goto
}
_, _, = RawSyscall(sys_SETUID, uintptr(.Uid), 0, 0)
if != 0 {
goto
}
}
if len(.AmbientCaps) != 0 {
.hdr.version = _LINUX_CAPABILITY_VERSION_3
if , , := RawSyscall(SYS_CAPGET, uintptr(unsafe.Pointer(&.hdr)), uintptr(unsafe.Pointer(&.data[0])), 0); != 0 {
goto
}
for , := range .AmbientCaps {
.data[capToIndex()].permitted |= capToMask()
.data[capToIndex()].inheritable |= capToMask()
}
if , , := RawSyscall(SYS_CAPSET, uintptr(unsafe.Pointer(&.hdr)), uintptr(unsafe.Pointer(&.data[0])), 0); != 0 {
goto
}
for , := range .AmbientCaps {
_, _, = RawSyscall6(SYS_PRCTL, , uintptr(), , 0, 0, 0)
if != 0 {
goto
}
}
}
if != nil {
_, _, = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer()), 0, 0)
if != 0 {
goto
}
}
if .Pdeathsig != 0 {
_, _, = RawSyscall6(SYS_PRCTL, PR_SET_PDEATHSIG, uintptr(.Pdeathsig), 0, 0, 0, 0)
if != 0 {
goto
}
, _ = rawSyscallNoError(SYS_GETPPID, 0, 0, 0)
if != {
, := rawSyscallNoError(SYS_GETPID, 0, 0, 0)
, , := RawSyscall(SYS_KILL, , uintptr(.Pdeathsig), 0)
if != 0 {
goto
}
}
}
if < {
_, _, = RawSyscall(SYS_DUP3, uintptr(), uintptr(), O_CLOEXEC)
if _SYS_dup != SYS_DUP3 && == ENOSYS {
_, _, = RawSyscall(_SYS_dup, uintptr(), uintptr(), 0)
if != 0 {
goto
}
RawSyscall(fcntl64Syscall, uintptr(), F_SETFD, FD_CLOEXEC)
} else if != 0 {
goto
}
=
++
}
for = 0; < len(); ++ {
if [] >= 0 && [] < int() {
if == {
++
}
_, _, = RawSyscall(SYS_DUP3, uintptr([]), uintptr(), O_CLOEXEC)
if _SYS_dup != SYS_DUP3 && == ENOSYS {
_, _, = RawSyscall(_SYS_dup, uintptr([]), uintptr(), 0)
if != 0 {
goto
}
RawSyscall(fcntl64Syscall, uintptr(), F_SETFD, FD_CLOEXEC)
} else if != 0 {
goto
}
[] =
++
}
}
for = 0; < len(); ++ {
if [] == -1 {
RawSyscall(SYS_CLOSE, uintptr(), 0, 0)
continue
}
if [] == int() {
_, _, = RawSyscall(fcntl64Syscall, uintptr([]), F_SETFD, 0)
if != 0 {
goto
}
continue
}
_, _, = RawSyscall(_SYS_dup, uintptr([]), uintptr(), 0)
if != 0 {
goto
}
}
for = len(); < 3; ++ {
RawSyscall(SYS_CLOSE, uintptr(), 0, 0)
}
if .Noctty {
_, _, = RawSyscall(SYS_IOCTL, 0, uintptr(TIOCNOTTY), 0)
if != 0 {
goto
}
}
if .Setctty {
_, _, = RawSyscall(SYS_IOCTL, uintptr(.Ctty), uintptr(TIOCSCTTY), 1)
if != 0 {
goto
}
}
if .Ptrace {
_, _, = RawSyscall(SYS_PTRACE, uintptr(PTRACE_TRACEME), 0, 0)
if != 0 {
goto
}
}
_, _, = RawSyscall(SYS_EXECVE,
uintptr(unsafe.Pointer()),
uintptr(unsafe.Pointer(&[0])),
uintptr(unsafe.Pointer(&[0])))
:
RawSyscall(SYS_WRITE, uintptr(), uintptr(unsafe.Pointer(&)), unsafe.Sizeof())
for {
RawSyscall(SYS_EXIT, 253, 0, 0)
}
}
func ( []int) ( error) {
= Pipe2(, O_CLOEXEC)
if == ENOSYS {
if = Pipe(); != nil {
return
}
if _, = fcntl([0], F_SETFD, FD_CLOEXEC); != nil {
return
}
_, = fcntl([1], F_SETFD, FD_CLOEXEC)
}
return
}
func ( []SysProcIDMap) []byte {
var []byte
for , := range {
= append(, []byte(itoa(.ContainerID)+" "+itoa(.HostID)+" "+itoa(.Size)+"\n")...)
}
return
}
func ( string, []SysProcIDMap) error {
, := Open(, O_RDWR, 0)
if != nil {
return
}
if , := Write(, formatIDMappings()); != nil {
Close()
return
}
if := Close(); != nil {
return
}
return nil
}
func ( int, bool) error {
:= "/proc/" + itoa() + "/setgroups"
, := Open(, O_RDWR, 0)
if != nil {
return
}
var []byte
if {
= []byte("allow")
} else {
= []byte("deny")
}
if , := Write(, ); != nil {
Close()
return
}
return Close()
}
func ( int, *SysProcAttr) error {
if .UidMappings != nil {
:= "/proc/" + itoa() + "/uid_map"
if := writeIDMappings(, .UidMappings); != nil {
return
}
}
if .GidMappings != nil {
if := writeSetgroups(, .GidMappingsEnableSetgroups); != nil && != ENOENT {
return
}
:= "/proc/" + itoa() + "/gid_map"
if := writeIDMappings(, .GidMappings); != nil {
return
}
}
return nil
}