diff --git a/Makefile b/Makefile index 1a2e23e04..6c435b196 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,8 @@ direct-test-short: go test $(TEST_TAGS) -cover -test.short -v $(GO_PACKAGES) direct-build: + chmod 755 hack/seccomp.sh + hack/seccomp.sh go build -v $(GO_PACKAGES) direct-install: diff --git a/configs/config.go b/configs/config.go index 293af0a9b..63bd1b30b 100644 --- a/configs/config.go +++ b/configs/config.go @@ -13,6 +13,10 @@ type IDMap struct { Size int `json:"size"` } +type SeccompConf struct { + SysCalls []string `json:"syscalls"` +} + // TODO Windows. Many of these fields should be factored out into those parts // which are common across platforms, and those which are platform specific. @@ -104,4 +108,7 @@ type Config struct { // SystemProperties is a map of properties and their values. It is the equivalent of using // sysctl -w my.property.name value in Linux. SystemProperties map[string]string `json:"system_properties"` + + // SysCalls specify the system calls to keep when executing the process inside the container + Seccomps SeccompConf `json:"seccomp"` } diff --git a/configs/namespaces_linux.go b/configs/namespaces_linux.go index c937b49ff..428df017a 100644 --- a/configs/namespaces_linux.go +++ b/configs/namespaces_linux.go @@ -5,12 +5,13 @@ package configs import "fmt" const ( - NEWNET NamespaceType = "NEWNET" - NEWPID NamespaceType = "NEWPID" - NEWNS NamespaceType = "NEWNS" - NEWUTS NamespaceType = "NEWUTS" - NEWIPC NamespaceType = "NEWIPC" - NEWUSER NamespaceType = "NEWUSER" + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" + NEWSECCOMP NamespaceType = "NEWSECCOMP" ) func NamespaceTypes() []NamespaceType { diff --git a/configs/namespaces_syscall.go b/configs/namespaces_syscall.go index c962999ef..d3bd38934 100644 --- a/configs/namespaces_syscall.go +++ b/configs/namespaces_syscall.go @@ -4,17 +4,22 @@ package configs import "syscall" +var ( + CLONE_SECCOMP = 0x10000 //diffrent from other flag, hard code +) + func (n *Namespace) Syscall() int { return namespaceInfo[n.Type] } var namespaceInfo = map[NamespaceType]int{ - NEWNET: syscall.CLONE_NEWNET, - NEWNS: syscall.CLONE_NEWNS, - NEWUSER: syscall.CLONE_NEWUSER, - NEWIPC: syscall.CLONE_NEWIPC, - NEWUTS: syscall.CLONE_NEWUTS, - NEWPID: syscall.CLONE_NEWPID, + NEWNET: syscall.CLONE_NEWNET, + NEWNS: syscall.CLONE_NEWNS, + NEWUSER: syscall.CLONE_NEWUSER, + NEWIPC: syscall.CLONE_NEWIPC, + NEWUTS: syscall.CLONE_NEWUTS, + NEWPID: syscall.CLONE_NEWPID, + NEWSECCOMP: CLONE_SECCOMP, } // CloneFlags parses the container's Namespaces options to set the correct diff --git a/container_linux.go b/container_linux.go index 215f35d38..27684e23c 100644 --- a/container_linux.go +++ b/container_linux.go @@ -169,6 +169,13 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c cmd.SysProcAttr.Credential = &syscall.Credential{} } } + if cloneFlags&uintptr(configs.CLONE_SECCOMP) != 0 { + //os don't surport for CLONE_SECCOMP, remote it + c.config.Namespaces.Remove(configs.NEWSECCOMP) + cloneFlags = c.config.Namespaces.CloneFlags() + } else { + c.config.Seccomps.SysCalls = []string{} + } cmd.Env = append(cmd.Env, t) cmd.SysProcAttr.Cloneflags = cloneFlags return &initProcess{ diff --git a/hack/seccomp.pl b/hack/seccomp.pl new file mode 100755 index 000000000..dc0f6646f --- /dev/null +++ b/hack/seccomp.pl @@ -0,0 +1,58 @@ +#!/usr/bin/perl + +# ./seccomp.pl < syscall.sample > seccompsyscall.go + +use strict; +use warnings; + +my $pid = open(my $in, "-|") // die "Couldn't fork1 ($!)\n"; + +if($pid == 0) { + $pid = open(my $out, "|-") // die "Couldn't fork2 ($!)\n"; + if($pid == 0) { + exec "cpp" or die "Couldn't exec cpp ($!)\n"; + exit 1; + } + + print $out "#include \n"; + while(<>) { + if(/^\w/) { + my $name="$_"; + chomp($name); + + print $out $name; + print $out " = "; + print $out "__NR_$_"; + } + } + close $out; + exit 0; +} +print "//"; +system("uname -m"); +print "package seccomp\r\n\r\n"; +print "var syscallMap = map[string] int {\n"; +while(<$in>) { + my $line=$_; + + if($line =~ /^[\da-z_]/) + { + my @personal=split(/=/); + $personal[0] =~ s/[ ]//; + $personal[1] =~ s/[\r\n]//; + print " \""; + print $personal[0]; + print "\""; + print " : "; + if (($personal[1] !~ /[0-9]/) || length($personal[1]) > 4) + { + print "-1,\r\n"; + }else{ + print $personal[1]; + print ",\r\n"; + } + } +} + +print "}\r\n"; + diff --git a/hack/seccomp.sh b/hack/seccomp.sh new file mode 100755 index 000000000..40fa02c5c --- /dev/null +++ b/hack/seccomp.sh @@ -0,0 +1,4 @@ +#/bin/bash + +chmod 755 hack/seccomp.pl +hack/seccomp.pl < hack/syscall.sample > seccomp/seccompsyscall.go diff --git a/hack/syscall.sample b/hack/syscall.sample new file mode 100644 index 000000000..b1f61d5d7 --- /dev/null +++ b/hack/syscall.sample @@ -0,0 +1,405 @@ +access +chdir +chmod +chown +chown32 +close +creat +dup +dup2 +dup3 +epoll_create +epoll_create1 +epoll_ctl +epoll_ctl_old +epoll_pwait +epoll_wait +epoll_wait_old +eventfd +eventfd2 +faccessat +fadvise64 +fadvise64_64 +fallocate +fanotify_init +fanotify_mark +ioctl +fchdir +fchmod +fchmodat +fchown +fchown32 +fchownat +fcntl +fcntl64 +fdatasync +fgetxattr +flistxattr +flock +fremovexattr +fsetxattr +fstat +fstat64 +fstatat64 +fstatfs +fstatfs64 +fsync +ftruncate +ftruncate64 +getcwd +getdents +getdents64 +getxattr +inotify_add_watch +inotify_init +inotify_init1 +inotify_rm_watch +io_cancel +io_destroy +io_getevents +io_setup +io_submit +lchown +lchown32 +lgetxattr +link +linkat +listxattr +llistxattr +llseek +_llseek +lremovexattr +lseek +lsetxattr +lstat +lstat64 +mkdir +mkdirat +mknod +mknodat +newfstatat +_newselect +oldfstat +oldlstat +oldolduname +oldstat +olduname +oldwait4 +open +openat +pipe +pipe2 +poll +ppoll +pread64 +preadv +futimesat +pselect6 +pwrite64 +pwritev +read +readahead +readdir +readlink +readlinkat +readv +removexattr +rename +renameat +rmdir +select +sendfile +sendfile64 +setxattr +splice +stat +stat64 +statfs +statfs64 +symlink +symlinkat +sync +sync_file_range +sync_file_range2 +syncfs +tee +truncate +truncate64 +umask +unlink +unlinkat +ustat +utime +utimensat +utimes +write +writev + +// Network related +accept +accept4 +bind +connect +getpeername +getsockname +getsockopt +listen +recv +recvfrom +recvmmsg +recvmsg +send +sendmmsg +sendmsg +sendto +setsockopt +shutdown +socket +socketcall +socketpair +sethostname + +// Signal related +pause +rt_sigaction +rt_sigpending +rt_sigprocmask +rt_sigqueueinfo +rt_sigreturn +rt_sigsuspend +rt_sigtimedwait +rt_tgsigqueueinfo +sigaction +sigaltstack +signal +signalfd +signalfd4 +sigpending +sigprocmask +sigreturn +sigsuspend + +// Other needed POSIX +alarm +brk +clock_adjtime +clock_getres +clock_gettime +clock_nanosleep +clock_settime +gettimeofday +nanosleep +nice +sysinfo +syslog +time +timer_create +timer_delete +timerfd_create +timerfd_gettime +timerfd_settime +timer_getoverrun +timer_gettime +timer_settime +times +uname + +// Memory control +madvise +mbind +mincore +mlock +mlockall +mmap +mmap2 +mprotect +mremap +msync +munlock +munlockall +munmap +remap_file_pages +set_mempolicy +vmsplice + +// Process control +capget +capset +clone +execve +exit +exit_group +fork +getcpu +getpgid +getpgrp +getpid +getppid +getpriority +getresgid +getresgid32 +getresuid +getresuid32 +getrlimit +getrusage +getsid +getuid +getuid32 +getegid +getegid32 +geteuid +geteuid32 +getgid +getgid32 +getgroups +getgroups32 +getitimer +get_mempolicy +kill +prctl +prlimit64 +sched_getaffinity +sched_getparam +sched_get_priority_max +sched_get_priority_min +sched_getscheduler +sched_rr_get_interval +sched_setaffinity +sched_setparam +sched_setscheduler +sched_yield +setfsgid +setfsgid32 +setfsuid +setfsuid32 +setgid +setgid32 +setgroups +setgroups32 +setitimer +setpgid +setpriority +setregid +setregid32 +setresgid +setresgid32 +setresuid +setresuid32 +setreuid +setreuid32 +setrlimit +setsid +setuid +setuid32 +ugetrlimit +vfork +wait4 +waitid +waitpid + +// IPC +ipc +mq_getsetattr +mq_notify +mq_open +mq_timedreceive +mq_timedsend +mq_unlink +msgctl +msgget +msgrcv +msgsnd +semctl +semget +semop +semtimedop +shmat +shmctl +shmdt +shmget + +// Linux specific, mostly needed for thread-related stuff +arch_prctl +get_robust_list +get_thread_area +gettid +futex +restart_syscall +set_robust_list +set_thread_area +set_tid_address +tgkill +tkill + +// Admin syscalls, these are blocked +acct +adjtimex +bdflush +chroot +create_module +delete_module +get_kernel_syms +idle +init_module +ioperm +iopl +ioprio_get +ioprio_set +kexec_load +lookup_dcookie +migrate_pages +modify_ldt +mount +move_pages +name_to_handle_at +nfsservctl +open_by_handle_at +perf_event_open +pivot_root +process_vm_readv +process_vm_writev +ptrace +query_module +quotactl +reboot +setdomainname +setns +settimeofday +sgetmask +ssetmask +stime +swapoff +swapon +_sysctl +sysfs +sys_setaltroot +umount +umount2 +unshare +uselib +vhangup +vm86 +vm86old + +// Kernel key management +add_key +keyctl +request_key + +// Unimplemented +afs_syscall +break +ftime +getpmsg +gtty +lock +madvise1 +mpx +prof +profil +putpmsg +security +stty +tuxcall +ulimit +vserver diff --git a/init_linux.go b/init_linux.go index 1771fd193..bd97364e5 100644 --- a/init_linux.go +++ b/init_linux.go @@ -13,6 +13,7 @@ import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/netlink" + "github.com/docker/libcontainer/seccomp" "github.com/docker/libcontainer/system" "github.com/docker/libcontainer/user" "github.com/docker/libcontainer/utils" @@ -259,3 +260,15 @@ func killCgroupProcesses(m cgroups.Manager) error { } return nil } + +func finalizeSeccomp(config *initConfig) error { + if len(config.Config.Seccomps.SysCalls) > 0 { + scmpCtx, _ := seccomp.ScmpInit(seccomp.ScmpActAllow) + for _, key := range config.Config.Seccomps.SysCalls { + seccomp.ScmpAdd(scmpCtx, key, seccomp.ScmpActAllow) + } + return seccomp.ScmpLoad(scmpCtx) + } + + return nil +} diff --git a/integration/exec_test.go b/integration/exec_test.go index 20d781ee5..60e497e11 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -1,7 +1,10 @@ package integration import ( + "bufio" "bytes" + "fmt" + "io" "io/ioutil" "os" "path/filepath" @@ -714,3 +717,74 @@ func TestSystemProperties(t *testing.T) { t.Fatalf("kernel.shmmni property expected to be 8192, but is %s", shmmniOutput) } } + +func formExceptSyscall(configFile string, excall []string, Seccomps *configs.SeccompConf) error { + f, err := os.Open(configFile) + defer f.Close() + if nil == err { + buff := bufio.NewReader(f) + for { + line, err := buff.ReadString('\n') + if err != nil || io.EOF == err { + break + } + if strings.Index(line, "//") >= 0 || len(line) == 1 { + continue + } + call := strings.TrimSpace(line) + j := 0 + for _, key := range excall { + if call == key { + break + } + j++ + } + if j == len(excall) { + Seccomps.SysCalls = append(Seccomps.SysCalls, call) + } + } + } + return nil +} + +func TestSeccompNotStat(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + config := newTemplateConfig(rootfs) + exceptCall := []string{"stat"} + formExceptSyscall("../hack/syscall.sample", exceptCall, &config.Seccomps) + + out, _, err := runContainer(config, "", "/bin/sh", "-c", "ls / -l") + if err == nil { + t.Fatal("runontainer[ls without SYS_STAT] should be failed") + } else { + fmt.Println(out) + } +} + +func TestSeccompStat(t *testing.T) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + exceptCall := []string{} + formExceptSyscall("../hack/syscall.sample", exceptCall, &config.Seccomps) + out, _, err := runContainer(config, "", "/bin/sh", "-c", "ls / -l") + if err != nil { + t.Fatal(err) + } + fmt.Println(out) +} diff --git a/integration/template_test.go b/integration/template_test.go index cb991b417..8710a9148 100644 --- a/integration/template_test.go +++ b/integration/template_test.go @@ -44,6 +44,7 @@ func newTemplateConfig(rootfs string) *configs.Config { {Type: configs.NEWIPC}, {Type: configs.NEWPID}, {Type: configs.NEWNET}, + {Type: configs.NEWSECCOMP}, }), Cgroups: &configs.Cgroup{ Name: "test", @@ -114,5 +115,8 @@ func newTemplateConfig(rootfs string) *configs.Config { Soft: uint64(1025), }, }, + Seccomps: configs.SeccompConf{ + SysCalls: make([]string, 0, 512), + }, } } diff --git a/seccomp/seccomp.go b/seccomp/seccomp.go new file mode 100755 index 000000000..66c59a741 --- /dev/null +++ b/seccomp/seccomp.go @@ -0,0 +1,560 @@ +package seccomp + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "os" + "os/signal" + "runtime" + "strings" + "syscall" + "unsafe" +) + +const ( + EQ = 0 + NE = 1 + GE = 2 + LE = 3 + MEQ = 4 +) + +const ( + ALLOW = 0 + DENY = 1 + JUMP = 2 +) + +const ( + JUMP_JT = 0xff + JUMP_JF = 0xff + LABEL_JT = 0xfe + LABEL_JF = 0xfe +) + +const ( + ScmpActAllow = 0x0 + + PF_LD = 0x0 + BPF_RET = syscall.BPF_RET + BPF_K = syscall.BPF_K + BPF_ABS = syscall.BPF_ABS + BPF_JMP = syscall.BPF_JMP + BPF_JEQ = syscall.BPF_JEQ + BPF_W = syscall.BPF_W + BPF_LD = syscall.BPF_LD + BPF_JA = syscall.BPF_JA + BPF_MEM = syscall.BPF_MEM + BPF_ST = syscall.BPF_ST + BPF_JGT = syscall.BPF_JGT + BPF_JGE = syscall.BPF_JGE + BPF_JSET = syscall.BPF_JSET + BPF_ALU = syscall.BPF_ALU + BPF_AND = syscall.BPF_AND + + SECCOMP_RET_KILL = 0x00000000 + SECCOMP_RET_TRAP = 0x00030000 + SECCOMP_RET_ALLOW = 0x7fff0000 + SECCOMP_MODE_FILTER = 0x2 + PR_SET_NO_NEW_PRIVS = 0x26 +) + +type seccompData struct { + nr int32 + arch uint32 + insPointer uint64 + args [6]uint64 +} + +type sockFilter struct { + code uint16 + jt uint8 + jf uint8 + k uint32 +} + +type sockFprog struct { + len uint16 + filt []sockFilter +} + +type FilterArgs struct { + Args []Filter +} + +type Action struct { + action int + args []FilterArgs +} + +type Filter struct { + Arg uint32 //index of args which start from zero + Op int //operation, such ass EQ/NE/GE/LE/MEQ + V uint //the value of arg + M uint //the mask of arg +} + +type bpfLabel struct { + label string + location uint32 +} + +type bpfLabels struct { + count uint32 + labels []bpfLabel +} + +type ScmpCtx struct { + CallMap map[int]*Action + filter []sockFilter + label bpfLabels +} + +type argOFunc func(uint32) uint32 +type argFunc func(*ScmpCtx, uint32) +type jFunc func(*ScmpCtx, Filter, sockFilter) +type addFunc func(ctx *ScmpCtx, call int, action int, args ...FilterArgs) error + +var secData seccompData = seccompData{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}} +var hiArg argOFunc +var loArg argOFunc +var arg argFunc + +var secAdd addFunc = nil + +var op [5]jFunc + +var ( + sysCallMin = 0 + sysCallMax = 0 +) +var sigSec bool = false + +func arg32(ctx *ScmpCtx, idx uint32) { + ctx.filter = append(ctx.filter, + scmpBpfStmt(BPF_LD+BPF_W+BPF_ABS, loArg(idx))) +} + +func jEq32(ctx *ScmpCtx, v Filter, jt sockFilter) { + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, uint32(v.V), 0, 1)) + ctx.filter = append(ctx.filter, jt) +} + +func jNe32(ctx *ScmpCtx, v Filter, jt sockFilter) { + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, uint32(v.V), 1, 0)) + ctx.filter = append(ctx.filter, jt) +} + +func jGe32(ctx *ScmpCtx, v Filter, jt sockFilter) { + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGE+BPF_K, uint32(v.V), 0, 1)) + ctx.filter = append(ctx.filter, jt) +} + +func jLe32(ctx *ScmpCtx, v Filter, jt sockFilter) { + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGT+BPF_K, uint32(v.V), 1, 0)) + ctx.filter = append(ctx.filter, jt) +} + +func jMeq32(ctx *ScmpCtx, v Filter, jt sockFilter) { + //todo, not implement now +} + +func arg64(ctx *ScmpCtx, idx uint32) { + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_W+BPF_ABS, loArg(idx))) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_ST, 0)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_W+BPF_ABS, hiArg(idx))) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_ST, 1)) +} + +func jNe64(ctx *ScmpCtx, v Filter, jt sockFilter) { + lo := uint32(uint64(v.V) % 0x100000000) + hi := uint32(uint64(v.V) / 0x100000000) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (hi), 5, 0)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 0)) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (lo), 2, 0)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1)) + ctx.filter = append(ctx.filter, jt) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1)) +} + +func jGe64(ctx *ScmpCtx, v Filter, jt sockFilter) { + lo := uint32(uint64(v.V) % 0x100000000) + hi := uint32(uint64(v.V) / 0x100000000) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0)) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 0)) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGE+BPF_K, (lo), 0, 2)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1)) + ctx.filter = append(ctx.filter, jt) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1)) +} + +func jEq64(ctx *ScmpCtx, v Filter, jt sockFilter) { + lo := uint32(uint64(v.V) % 0x100000000) + hi := uint32(uint64(v.V) / 0x100000000) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 0)) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (lo), 0, 2)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1)) + ctx.filter = append(ctx.filter, jt) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1)) +} + +func jMeq64(ctx *ScmpCtx, v Filter, jt sockFilter) { + lo := uint32(uint64(v.V) % 0x100000000) + hi := uint32(uint64(v.V) / 0x100000000) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 6)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 0)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_ALU+BPF_AND, uint32(v.M))) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (lo), 0, 2)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1)) + ctx.filter = append(ctx.filter, jt) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1)) + +} + +func jLe64(ctx *ScmpCtx, v Filter, jt sockFilter) { + lo := uint32(uint64(v.V) % 0x100000000) + hi := uint32(uint64(v.V) / 0x100000000) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGT+BPF_K, (hi), 6, 0)) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 3)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 0)) + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0)) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1)) + ctx.filter = append(ctx.filter, jt) + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1)) +} + +func allow(ctx *ScmpCtx) { + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)) +} + +func deny(ctx *ScmpCtx) { + ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_RET+BPF_K, SECCOMP_RET_TRAP)) +} + +func jump(ctx *ScmpCtx, lb string) { + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JA, findLabel(&ctx.label, lb), + JUMP_JT, JUMP_JF)) +} + +func label(ctx *ScmpCtx, lb string) { + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JA, findLabel(&ctx.label, lb), + LABEL_JT, LABEL_JF)) +} + +func secCall(ctx *ScmpCtx, nr int, jt sockFilter) { + ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, uint32(nr), 0, 1)) + ctx.filter = append(ctx.filter, jt) +} + +func findLabel(labels *bpfLabels, lb string) uint32 { + var id uint32 + for id = 0; id < labels.count; id++ { + if true == strings.EqualFold(lb, labels.labels[id].label) { + return id + } + } + tlabel := bpfLabel{lb, 0xffffffff} + labels.labels = append(labels.labels, tlabel) + labels.count += 1 + return id +} + +func hiArgLittle(idx uint32) uint32 { + if idx < 0 || idx >= 6 { + return 0 + } + + hi := uint32(unsafe.Offsetof(secData.args)) + uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch)) + return uint32(hi) +} + +func hiArgBig(idx uint32) uint32 { + if idx >= 6 { + return 0 + } + hi := uint32(unsafe.Offsetof(secData.args)) + 8*idx + return uint32(hi) +} + +func isLittle() bool { + litEndian := true + x := 0x1234 + p := unsafe.Pointer(&x) + p2 := (*[unsafe.Sizeof(0)]byte)(p) + if p2[0] == 0 { + litEndian = false + } + return litEndian +} + +func scmpBpfStmt(code uint16, k uint32) sockFilter { + return sockFilter{code, 0, 0, k} +} + +func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter { + return sockFilter{code, jt, jf, k} +} + +func prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) { + _, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0) + if e1 != 0 { + err = e1 + } + return nil +} + +func scmpfilter(prog *sockFprog) (err error) { + _, _, e1 := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP), + uintptr(SECCOMP_MODE_FILTER), uintptr(unsafe.Pointer(prog))) + if e1 != 0 { + err = e1 + } + return nil +} + +func CombineArgs(args1 []FilterArgs, args2 []FilterArgs) []FilterArgs { + ilen1 := len(args1) + if ilen1 > len(args2) { + ilen1 = len(args2) + } + for i1 := 0; i1 < ilen1; i1++ { + jlen1 := len(args1[i1].Args) + jlen2 := len(args2[i1].Args) + for j2 := 0; j2 < jlen2; j2++ { + num := 0 + for j1 := 0; j1 < jlen1; j1++ { + if args1[i1].Args[j1] == args2[i1].Args[j2] { + break + } + num = num + 1 + } + if num == jlen1 { + args1[i1].Args = append(args1[i1].Args, args2[i1].Args[j2]) + } + } + } + if ilen1 < len(args2) { + args1 = append(args1, args2[ilen1:]...) + } + return args1 +} + +func Sys(call string) int { + number, exists := syscallMap[call] + if exists { + return number + } + return -1 +} + +func ScmpInit(action int) (*ScmpCtx, error) { + ctx := ScmpCtx{ + CallMap: make(map[int]*Action), + filter: make([]sockFilter, 0, 128), + label: bpfLabels{ + count: 0, + labels: make([]bpfLabel, 0, 128), + }, + } + + ctx.filter = append(ctx.filter, + sockFilter{PF_LD + BPF_W + BPF_ABS, 0, 0, uint32(unsafe.Offsetof(secData.nr))}) + return &ctx, nil +} + +func ScmpDel(ctx *ScmpCtx, call int) error { + _, exists := ctx.CallMap[call] + if exists { + delete(ctx.CallMap, call) + return nil + } + + return errors.New("syscall not exist") +} + +func ScmpAdd(ctx *ScmpCtx, scall string, action int, args ...FilterArgs) error { + call, exists := syscallMap[scall] + if !exists { + return errors.New("syscall not surport") + } + if call <= sysCallMax { + _, exists := ctx.CallMap[call] + if exists { + return errors.New("syscall exist") + } + ctx.CallMap[call] = &Action{action, args} + return nil + } else { + if nil != secAdd { + return secAdd(ctx, call, action, args...) + } + } + + return errors.New("syscall not surport") +} + +func dumpHex(ch []byte, len int) { + for i := 0; i < len; i++ { + fmt.Printf("%02x ", ch[i]) + if (i+1)%16 == 0 { + fmt.Printf("\n") + } else if (i+1)%8 == 0 { + fmt.Printf(" ") + } + } + fmt.Printf("\n") +} + +func dumpFilter(filter []sockFilter) { + buf := new(bytes.Buffer) + for _, v := range filter { + err := binary.Write(buf, binary.LittleEndian, v) + if err != nil { + fmt.Println("binary.Write failed:", err) + } + } + dumpHex(buf.Bytes(), int(unsafe.Sizeof(filter[0]))*len(filter)) +} + +func ScmpLoad(ctx *ScmpCtx) error { + for call, act := range ctx.CallMap { + if len(act.args) == 0 { + secCall(ctx, call, scmpBpfStmt(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)) + } else { + if len(act.args[0].Args) > 0 { + lb := fmt.Sprintf("lb-%d-%d", call, act.args[0].Args[0].Arg) + secCall(ctx, call, + scmpBpfJump(BPF_JMP+BPF_JA, findLabel(&ctx.label, lb), + JUMP_JT, JUMP_JF)) + } + } + } + deny(ctx) + + for call, act := range ctx.CallMap { + for i := 0; i < len(act.args); i++ { + if len(act.args[i].Args) > 0 { + lb := fmt.Sprintf("lb-%d-%d", call, act.args[i].Args[0].Arg) + label(ctx, lb) + arg(ctx, act.args[i].Args[0].Arg) + } + + for j := 0; j < len(act.args[i].Args); j++ { + var jf sockFilter + if len(act.args)-1 > i && len(act.args[i+1].Args) > 0 { + lbj := fmt.Sprintf("lb-%d-%d", call, act.args[i+1].Args[0].Arg) + jf = scmpBpfJump(BPF_JMP+BPF_JA, + findLabel(&ctx.label, lbj), JUMP_JT, JUMP_JF) + } else { + jf = scmpBpfStmt(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) + } + op[act.args[i].Args[j].Op](ctx, act.args[i].Args[j], jf) + } + + deny(ctx) + } + } + + //dumpFilter(ctx.filter) + + idx := int32(len(ctx.filter) - 1) + for ; idx >= 0; idx-- { + filter := &ctx.filter[idx] + if filter.code != (BPF_JMP + BPF_JA) { + continue + } + + rel := int32(filter.jt)<<8 | int32(filter.jf) + if ((JUMP_JT << 8) | JUMP_JF) == rel { + if ctx.label.labels[filter.k].location == 0xffffffff { + return errors.New("Unresolved label") + } + filter.k = ctx.label.labels[filter.k].location - uint32(idx+1) + filter.jt = 0 + filter.jf = 0 + } else if ((LABEL_JT << 8) | LABEL_JF) == rel { + if ctx.label.labels[filter.k].location != 0xffffffff { + return errors.New("Duplicate label use") + } + ctx.label.labels[filter.k].location = uint32(idx) + filter.k = 0 + filter.jt = 0 + filter.jf = 0 + } + } + + //dumpFilter(ctx.filter) + prog := sockFprog{ + len: uint16(len(ctx.filter)), + filt: ctx.filter, + } + + if nil != prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) { + fmt.Println("prctl PR_SET_NO_NEW_PRIVS error") + return errors.New("prctl PR_SET_NO_NEW_PRIVS error") + } + + if nil != scmpfilter(&prog) { + fmt.Println("scmpfilter error") + return errors.New("scmpfilter error") + } + return nil +} + +func sigSeccomp() { + sigSec = true +} + +func ScmpError() bool { + ret := sigSec + sigSec = false + return ret +} + +func init() { + if runtime.GOARCH == "386" { + sysCallMax = 340 + } else if runtime.GOARCH == "amd64" { + sysCallMax = 302 + } else if runtime.GOARCH == "arm" { + sysCallMax = 377 + } else if runtime.GOARCH == "arm64" { + sysCallMax = 281 + } else if runtime.GOARCH == "ppc64" { + sysCallMax = 354 + } else if runtime.GOARCH == "ppc64le" { + sysCallMax = 354 + } + if isLittle() { + hiArg = hiArgLittle + loArg = hiArgBig + } else { + hiArg = hiArgBig + loArg = hiArgLittle + } + + var length int + if 8 == int(unsafe.Sizeof(length)) { + arg = arg64 + op[EQ] = jEq64 + op[NE] = jNe64 + op[GE] = jGe64 + op[LE] = jLe64 + op[MEQ] = jMeq64 + } else { + arg = arg32 + op[EQ] = jEq32 + op[NE] = jNe32 + op[GE] = jGe32 + op[LE] = jLe32 + op[MEQ] = jMeq32 + } + + chSignal := make(chan os.Signal) + signal.Notify(chSignal, syscall.SIGSYS) + go sigSeccomp() +} diff --git a/seccomp/seccomp.test b/seccomp/seccomp.test new file mode 100755 index 000000000..14f69c46b --- /dev/null +++ b/seccomp/seccomp.test @@ -0,0 +1,131 @@ +package main + +import ( + "flag" + "os" + "fmt" + "syscall" + + sec "seccomp" +) +const ( + STDIN_FILENO = 0 + STDOUT_FILENO = 1 + BUFLEN = 8 +) + +func writeOk(args []string) { + scmpCtx, _ := sec.ScmpInit(sec.ScmpActAllow) + + sec.ScmpAdd(scmpCtx, "exit", sec.ScmpActAllow) + sec.ScmpAdd(scmpCtx, "exit_group", sec.ScmpActAllow) + + //the first arg is STDOUT_FILENO, the third arg must be <= BUFLEN + sec.ScmpAdd(scmpCtx, "write", sec.ScmpActAllow, + sec.FilterArgs{[]sec.Filter{{0, sec.EQ, STDOUT_FILENO, 0}}}, + sec.FilterArgs{[]sec.Filter{{2, sec.LE, BUFLEN, 0}}}, + ) + + sec.ScmpLoad(scmpCtx) + fmt.Printf("8888888\n") //ok +} + +func writeErr(args []string) { + scmpCtx, _ := sec.ScmpInit(sec.ScmpActAllow) + + sec.ScmpAdd(scmpCtx, "exit", sec.ScmpActAllow) + sec.ScmpAdd(scmpCtx, "exit_group", sec.ScmpActAllow) + + sec.ScmpAdd(scmpCtx, "write", sec.ScmpActAllow, + sec.FilterArgs{[]sec.Filter{{0, sec.EQ, STDOUT_FILENO, 0}}}, + sec.FilterArgs{[]sec.Filter{{2, sec.LE, BUFLEN, 0}}}, + ) + + sec.ScmpLoad(scmpCtx) + + // bad system call + fmt.Printf("99999999\n") +} + +func socketOk(args []string) { + scmpCtx, _ := sec.ScmpInit(sec.ScmpActAllow) + + //for 386, the next line is same as + //sec.ScmpAdd(scmpCtx, "socketcall", sec.ScmpActAllow, + // sec.FilterArgs{[]sec.Filter{{0, sec.EQ, 1, 0}}}, + //) + //SYS_SOCKET = 1 + sec.ScmpAdd(scmpCtx, "socket", sec.ScmpActAllow) + + sec.ScmpAdd(scmpCtx, "exit", sec.ScmpActAllow) + sec.ScmpAdd(scmpCtx, "exit_group", sec.ScmpActAllow) + + sec.ScmpAdd(scmpCtx, "write", sec.ScmpActAllow, + sec.FilterArgs{[]sec.Filter{{0, sec.EQ, STDOUT_FILENO, 0}}}, + sec.FilterArgs{[]sec.Filter{{2, sec.LE, BUFLEN, 0}}}, + ) + + sec.ScmpLoad(scmpCtx) + + syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_IP) + fmt.Printf("Sock ok\n") +} + +func socketErr(args []string) { + scmpCtx, _ := sec.ScmpInit(sec.ScmpActAllow) + + sec.ScmpAdd(scmpCtx, "exit", sec.ScmpActAllow) + sec.ScmpAdd(scmpCtx, "exit_group", sec.ScmpActAllow) + + sec.ScmpLoad(scmpCtx) + + // bad system call + syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_IP) +} + +func maskEqualOk(args []string) { + scmpCtx, _ := sec.ScmpInit(sec.ScmpActAllow) + + sec.ScmpAdd(scmpCtx, "clone", sec.ScmpActAllow, + sec.FilterArgs{[]sec.Filter{{2, sec.MEQ, 0x1, 0x5}}}, + ) + sec.ScmpAdd(scmpCtx, "exit_group", sec.ScmpActAllow) + sec.ScmpLoad(scmpCtx) + + //0x3 & 0x5 == 0x1, It's ok + syscall.RawSyscall6(syscall.SYS_CLONE, 0, 0, 0x3, 0, 0, 0) +} + +func maskEqualErr(args []string) { + scmpCtx, _ := sec.ScmpInit(sec.ScmpActAllow) + + sec.ScmpAdd(scmpCtx, "clone", sec.ScmpActAllow, + sec.FilterArgs{[]sec.Filter{{2, sec.MEQ, 0x1, 0x7}}}, + ) + sec.ScmpAdd(scmpCtx, "exit_group", sec.ScmpActAllow) + sec.ScmpLoad(scmpCtx) + + //0x3 & 0x7 != 0x1, bad system call + syscall.RawSyscall6(syscall.SYS_CLONE, 0, 0, 0x3, 0, 0, 0) +} + +func main() { + flag.Parse() + + idx := 0 + args := os.Args[(idx + 1):] + + if flag.Arg(idx) == "writeOk" { + writeOk(args) + } else if flag.Arg(idx) == "writeErr" { + writeErr(args) + } else if flag.Arg(idx) == "socketOk" { + socketOk(args) + } else if flag.Arg(idx) == "socketErr" { + socketErr(args) + } else if flag.Arg(idx) == "maskEqualOk" { + maskEqualOk(args) + } else if flag.Arg(idx) == "maskEqualErr" { + maskEqualErr(args) + } +} diff --git a/seccomp/seccomp386.go b/seccomp/seccomp386.go new file mode 100644 index 000000000..db696e6f1 --- /dev/null +++ b/seccomp/seccomp386.go @@ -0,0 +1,117 @@ +// +build linux +// +build 386 + +package seccomp + +import ( + "errors" +) + +var ( + syscallInterval = 100 + ipcNr = syscallInterval + 0 + socketcallNr = syscallInterval + ipcNr + callipc = 0 + callsocket = 0 +) + +func scmpAdd386(ctx *ScmpCtx, call int, action int, args ...FilterArgs) error { + var syscallNo int + pseCall := call - sysCallMax + if (pseCall >= ipcNr) && (pseCall < ipcNr+syscallInterval) { + syscallNo, _ = syscallMap["ipc"] + pseCall = (pseCall - ipcNr) % ipcNr + + } else if (pseCall >= socketcallNr) && (pseCall < socketcallNr+syscallInterval) { + syscallNo, _ = syscallMap["socketcall"] + pseCall = (pseCall - socketcallNr) % socketcallNr + } else { + return errors.New("scmpAdd386, syscall error") + } + act, exists := ctx.CallMap[syscallNo] + if !exists { + newArg := make([]FilterArgs, len(args)+1) + newArg[0].Args = make([]Filter, 1) + newArg[0].Args[0].Op = EQ + newArg[0].Args[0].Arg = 0 + newArg[0].Args[0].V = uint(pseCall) + for i := 0; i < len(args); i++ { + alen := len(args[i].Args) + if alen > 0 { + newArg[i+1].Args = make([]Filter, alen) + for j := 0; j < alen; i++ { + newArg[i+1].Args[j].Op = args[i].Args[j].Op + newArg[i+1].Args[j].Arg = args[i].Args[j].Arg + newArg[i+1].Args[j].V = args[i].Args[j].V + } + } + } + ctx.CallMap[syscallNo] = &Action{action, newArg} + } else { + newArg := make([]FilterArgs, len(args)) + for i := 0; i < len(args); i++ { + alen := len(args[i].Args) + if alen > 0 { + newArg[i].Args = make([]Filter, alen) + for j := 0; j < alen; i++ { + newArg[i].Args[j].Op = args[i].Args[j].Op + newArg[i].Args[j].Arg = args[i].Args[j].Arg + newArg[i].Args[j].V = args[i].Args[j].V + } + } + } + act.args = CombineArgs(act.args, newArg) + } + + return nil +} + +func resetCallipc(call string, num int) { + syscallMap[call] = num + callipc +} + +func resetCallsocket(call string, num int) { + syscallMap[call] = num + callsocket +} + +func init() { + sysCallMax = 340 + callipc = ipcNr + sysCallMax + callsocket = socketcallNr + sysCallMax + secAdd = scmpAdd386 + + resetCallipc("semop", 1) + resetCallipc("semget", 2) + resetCallipc("semctl", 3) + resetCallipc("semtimedop", 4) + resetCallipc("msgsnd", 11) + resetCallipc("msgrcv", 12) + resetCallipc("msgget", 13) + resetCallipc("msgctl", 14) + resetCallipc("shmat", 21) + resetCallipc("shmdt", 22) + resetCallipc("shmget", 23) + resetCallipc("shmctl", 24) + + resetCallsocket("socket", 1) + resetCallsocket("bind", 2) + resetCallsocket("connect", 3) + resetCallsocket("listen", 4) + resetCallsocket("accept", 5) + resetCallsocket("getsockname", 6) + resetCallsocket("getpeername", 7) + resetCallsocket("socketpair", 8) + resetCallsocket("send", 9) + resetCallsocket("recv", 10) + resetCallsocket("sendto", 11) + resetCallsocket("recvfrom", 12) + resetCallsocket("shutdown", 13) + resetCallsocket("setsockopt", 14) + resetCallsocket("getsockopt", 15) + resetCallsocket("sendmsg", 16) + resetCallsocket("recvmsg", 17) + resetCallsocket("accept4", 18) + resetCallsocket("recvmmsg", 19) + resetCallsocket("sendmmsg", 20) + +} diff --git a/seccomp/seccomp_test.go b/seccomp/seccomp_test.go new file mode 100755 index 000000000..49a492a68 --- /dev/null +++ b/seccomp/seccomp_test.go @@ -0,0 +1,60 @@ +package seccomp + +import ( + "fmt" + "os/exec" + "testing" +) + +var osec = "/go/src/seccomp_main.go" + +func secMain(t *testing.T, args []string) { + if len(args) < 1 { + return + } + + cmd := args[0] + path := "go" + argv := []string{"run", osec} + argv = append(argv, args[0:]...) + + c := exec.Command(path, argv...) + _, err := c.Output() + fmt.Printf("do %s, err is [%v]\n", cmd, err) + if err != nil { + if "writeOk" == cmd || "socketOk" == cmd || "maskEqualOk" == cmd { + t.Fatal(err) + } + } else { + if "writeErr" == cmd || "socketErr" == cmd || "maskEqualErr" == cmd { + t.Fatal(err) + } + } +} + +func commandGC(file string) { + c := exec.Command("rm", "-rf", file) + d, _ := c.Output() + fmt.Println(string(d)) +} + +func cp(src, dst string) { + c := exec.Command("cp", "-ra", src, dst) + d, _ := c.Output() + fmt.Println(string(d)) +} + +func TestSeccomp(t *testing.T) { + //hard code + cp("../seccomp", "/go/src/") + cp("./seccomp.test", osec) + defer commandGC("/go/src/seccomp") + defer commandGC(osec) + + secMain(t, []string{"writeOk"}) + secMain(t, []string{"writeErr"}) + secMain(t, []string{"socketOk"}) + secMain(t, []string{"socketErr"}) + secMain(t, []string{"maskEqualOk"}) + secMain(t, []string{"maskEqualErr"}) +} diff --git a/seccomp/seccompsyscall.go b/seccomp/seccompsyscall.go new file mode 100644 index 000000000..d7674d1a2 --- /dev/null +++ b/seccomp/seccompsyscall.go @@ -0,0 +1,390 @@ +//x86_64 +package seccomp + +var syscallMap = map[string] int { + "access" : 21, + "chdir" : 80, + "chmod" : 90, + "chown" : 92, + "chown32" : -1, + "close" : 3, + "creat" : 85, + "dup" : 32, + "dup2" : 33, + "dup3" : 292, + "epoll_create" : 213, + "epoll_create1" : 291, + "epoll_ctl" : 233, + "epoll_ctl_old" : 214, + "epoll_pwait" : 281, + "epoll_wait" : 232, + "epoll_wait_old" : 215, + "eventfd" : 284, + "eventfd2" : 290, + "faccessat" : 269, + "fadvise64" : 221, + "fadvise64_64" : -1, + "fallocate" : 285, + "fanotify_init" : 300, + "fanotify_mark" : 301, + "ioctl" : 16, + "fchdir" : 81, + "fchmod" : 91, + "fchmodat" : 268, + "fchown" : 93, + "fchown32" : -1, + "fchownat" : 260, + "fcntl" : 72, + "fcntl64" : -1, + "fdatasync" : 75, + "fgetxattr" : 193, + "flistxattr" : 196, + "flock" : 73, + "fremovexattr" : 199, + "fsetxattr" : 190, + "fstat" : 5, + "fstat64" : -1, + "fstatat64" : -1, + "fstatfs" : 138, + "fstatfs64" : -1, + "fsync" : 74, + "ftruncate" : 77, + "ftruncate64" : -1, + "getcwd" : 79, + "getdents" : 78, + "getdents64" : 217, + "getxattr" : 191, + "inotify_add_watch" : 254, + "inotify_init" : 253, + "inotify_init1" : 294, + "inotify_rm_watch" : 255, + "io_cancel" : 210, + "io_destroy" : 207, + "io_getevents" : 208, + "io_setup" : 206, + "io_submit" : 209, + "lchown" : 94, + "lchown32" : -1, + "lgetxattr" : 192, + "link" : 86, + "linkat" : 265, + "listxattr" : 194, + "llistxattr" : 195, + "llseek" : -1, + "_llseek" : -1, + "lremovexattr" : 198, + "lseek" : 8, + "lsetxattr" : 189, + "lstat" : 6, + "lstat64" : -1, + "mkdir" : 83, + "mkdirat" : 258, + "mknod" : 133, + "mknodat" : 259, + "newfstatat" : 262, + "_newselect" : -1, + "oldfstat" : -1, + "oldlstat" : -1, + "oldolduname" : -1, + "oldstat" : -1, + "olduname" : -1, + "oldwait4" : -1, + "open" : 2, + "openat" : 257, + "pipe" : 22, + "pipe2" : 293, + "poll" : 7, + "ppoll" : 271, + "pread64" : 17, + "preadv" : 295, + "futimesat" : 261, + "pselect6" : 270, + "pwrite64" : 18, + "pwritev" : 296, + "read" : 0, + "readahead" : 187, + "readdir" : -1, + "readlink" : 89, + "readlinkat" : 267, + "readv" : 19, + "removexattr" : 197, + "rename" : 82, + "renameat" : 264, + "rmdir" : 84, + "select" : 23, + "sendfile" : 40, + "sendfile64" : -1, + "setxattr" : 188, + "splice" : 275, + "stat" : 4, + "stat64" : -1, + "statfs" : 137, + "statfs64" : -1, + "symlink" : 88, + "symlinkat" : 266, + "sync" : 162, + "sync_file_range" : 277, + "sync_file_range2" : -1, + "syncfs" : 306, + "tee" : 276, + "truncate" : 76, + "truncate64" : -1, + "umask" : 95, + "unlink" : 87, + "unlinkat" : 263, + "ustat" : 136, + "utime" : 132, + "utimensat" : 280, + "utimes" : 235, + "write" : 1, + "writev" : 20, + "accept" : 43, + "accept4" : 288, + "bind" : 49, + "connect" : 42, + "getpeername" : 52, + "getsockname" : 51, + "getsockopt" : 55, + "listen" : 50, + "recv" : -1, + "recvfrom" : 45, + "recvmmsg" : 299, + "recvmsg" : 47, + "send" : -1, + "sendmmsg" : 307, + "sendmsg" : 46, + "sendto" : 44, + "setsockopt" : 54, + "shutdown" : 48, + "socket" : 41, + "socketcall" : -1, + "socketpair" : 53, + "sethostname" : 170, + "pause" : 34, + "rt_sigaction" : 13, + "rt_sigpending" : 127, + "rt_sigprocmask" : 14, + "rt_sigqueueinfo" : 129, + "rt_sigreturn" : 15, + "rt_sigsuspend" : 130, + "rt_sigtimedwait" : 128, + "rt_tgsigqueueinfo" : 297, + "sigaction" : -1, + "sigaltstack" : 131, + "signal" : -1, + "signalfd" : 282, + "signalfd4" : 289, + "sigpending" : -1, + "sigprocmask" : -1, + "sigreturn" : -1, + "sigsuspend" : -1, + "alarm" : 37, + "brk" : 12, + "clock_adjtime" : 305, + "clock_getres" : 229, + "clock_gettime" : 228, + "clock_nanosleep" : 230, + "clock_settime" : 227, + "gettimeofday" : 96, + "nanosleep" : 35, + "nice" : -1, + "sysinfo" : 99, + "syslog" : 103, + "time" : 201, + "timer_create" : 222, + "timer_delete" : 226, + "timerfd_create" : 283, + "timerfd_gettime" : 287, + "timerfd_settime" : 286, + "timer_getoverrun" : 225, + "timer_gettime" : 224, + "timer_settime" : 223, + "times" : 100, + "uname" : 63, + "madvise" : 28, + "mbind" : 237, + "mincore" : 27, + "mlock" : 149, + "mlockall" : 151, + "mmap" : 9, + "mmap2" : -1, + "mprotect" : 10, + "mremap" : 25, + "msync" : 26, + "munlock" : 150, + "munlockall" : 152, + "munmap" : 11, + "remap_file_pages" : 216, + "set_mempolicy" : 238, + "vmsplice" : 278, + "capget" : 125, + "capset" : 126, + "clone" : 56, + "execve" : 59, + "exit" : 60, + "exit_group" : 231, + "fork" : 57, + "getcpu" : 309, + "getpgid" : 121, + "getpgrp" : 111, + "getpid" : 39, + "getppid" : 110, + "getpriority" : 140, + "getresgid" : 120, + "getresgid32" : -1, + "getresuid" : 118, + "getresuid32" : -1, + "getrlimit" : 97, + "getrusage" : 98, + "getsid" : 124, + "getuid" : 102, + "getuid32" : -1, + "getegid" : 108, + "getegid32" : -1, + "geteuid" : 107, + "geteuid32" : -1, + "getgid" : 104, + "getgid32" : -1, + "getgroups" : 115, + "getgroups32" : -1, + "getitimer" : 36, + "get_mempolicy" : 239, + "kill" : 62, + "prctl" : 157, + "prlimit64" : 302, + "sched_getaffinity" : 204, + "sched_getparam" : 143, + "sched_get_priority_max" : 146, + "sched_get_priority_min" : 147, + "sched_getscheduler" : 145, + "sched_rr_get_interval" : 148, + "sched_setaffinity" : 203, + "sched_setparam" : 142, + "sched_setscheduler" : 144, + "sched_yield" : 24, + "setfsgid" : 123, + "setfsgid32" : -1, + "setfsuid" : 122, + "setfsuid32" : -1, + "setgid" : 106, + "setgid32" : -1, + "setgroups" : 116, + "setgroups32" : -1, + "setitimer" : 38, + "setpgid" : 109, + "setpriority" : 141, + "setregid" : 114, + "setregid32" : -1, + "setresgid" : 119, + "setresgid32" : -1, + "setresuid" : 117, + "setresuid32" : -1, + "setreuid" : 113, + "setreuid32" : -1, + "setrlimit" : 160, + "setsid" : 112, + "setuid" : 105, + "setuid32" : -1, + "ugetrlimit" : -1, + "vfork" : 58, + "wait4" : 61, + "waitid" : 247, + "waitpid" : -1, + "ipc" : -1, + "mq_getsetattr" : 245, + "mq_notify" : 244, + "mq_open" : 240, + "mq_timedreceive" : 243, + "mq_timedsend" : 242, + "mq_unlink" : 241, + "msgctl" : 71, + "msgget" : 68, + "msgrcv" : 70, + "msgsnd" : 69, + "semctl" : 66, + "semget" : 64, + "semop" : 65, + "semtimedop" : 220, + "shmat" : 30, + "shmctl" : 31, + "shmdt" : 67, + "shmget" : 29, + "arch_prctl" : 158, + "get_robust_list" : 274, + "get_thread_area" : 211, + "gettid" : 186, + "futex" : 202, + "restart_syscall" : 219, + "set_robust_list" : 273, + "set_thread_area" : 205, + "set_tid_address" : 218, + "tgkill" : 234, + "tkill" : 200, + "acct" : 163, + "adjtimex" : 159, + "bdflush" : -1, + "chroot" : 161, + "create_module" : 174, + "delete_module" : 176, + "get_kernel_syms" : 177, + "idle" : -1, + "init_module" : 175, + "ioperm" : 173, + "iopl" : 172, + "ioprio_get" : 252, + "ioprio_set" : 251, + "kexec_load" : 246, + "lookup_dcookie" : 212, + "migrate_pages" : 256, + "modify_ldt" : 154, + "mount" : 165, + "move_pages" : 279, + "name_to_handle_at" : 303, + "nfsservctl" : 180, + "open_by_handle_at" : 304, + "perf_event_open" : 298, + "pivot_root" : 155, + "process_vm_readv" : 310, + "process_vm_writev" : 311, + "ptrace" : 101, + "query_module" : 178, + "quotactl" : 179, + "reboot" : 169, + "setdomainname" : 171, + "setns" : 308, + "settimeofday" : 164, + "sgetmask" : -1, + "ssetmask" : -1, + "stime" : -1, + "swapoff" : 168, + "swapon" : 167, + "_sysctl" : 156, + "sysfs" : 139, + "sys_setaltroot" : -1, + "umount" : -1, + "umount2" : 166, + "unshare" : 272, + "uselib" : 134, + "vhangup" : 153, + "vm86" : -1, + "vm86old" : -1, + "add_key" : 248, + "keyctl" : 250, + "request_key" : 249, + "afs_syscall" : 183, + "break" : -1, + "ftime" : -1, + "getpmsg" : 181, + "gtty" : -1, + "lock" : -1, + "madvise1" : -1, + "mpx" : -1, + "prof" : -1, + "profil" : -1, + "putpmsg" : 182, + "security" : 185, + "stty" : -1, + "tuxcall" : 184, + "ulimit" : -1, + "vserver" : 236, +} diff --git a/standard_init_linux.go b/standard_init_linux.go index 251c09f69..445c1fa29 100644 --- a/standard_init_linux.go +++ b/standard_init_linux.go @@ -99,5 +99,8 @@ func (l *linuxStandardInit) Init() error { if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } + if err := finalizeSeccomp(l.config); err != nil { + return err + } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) }