nvidia-container-toolkit/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/seccomp_default.go

591 lines
11 KiB
Go
Raw Normal View History

package seccomp
import (
"runtime"
"github.com/opencontainers/runtime-spec/specs-go"
rspec "github.com/opencontainers/runtime-spec/specs-go"
)
func arches() []rspec.Arch {
native := runtime.GOARCH
switch native {
case "amd64":
return []rspec.Arch{rspec.ArchX86_64, rspec.ArchX86, rspec.ArchX32}
case "arm64":
return []rspec.Arch{rspec.ArchARM, rspec.ArchAARCH64}
case "mips64":
return []rspec.Arch{rspec.ArchMIPS, rspec.ArchMIPS64, rspec.ArchMIPS64N32}
case "mips64n32":
return []rspec.Arch{rspec.ArchMIPS, rspec.ArchMIPS64, rspec.ArchMIPS64N32}
case "mipsel64":
return []rspec.Arch{rspec.ArchMIPSEL, rspec.ArchMIPSEL64, rspec.ArchMIPSEL64N32}
case "mipsel64n32":
return []rspec.Arch{rspec.ArchMIPSEL, rspec.ArchMIPSEL64, rspec.ArchMIPSEL64N32}
case "s390x":
return []rspec.Arch{rspec.ArchS390, rspec.ArchS390X}
default:
return []rspec.Arch{}
}
}
// DefaultProfile defines the whitelist for the default seccomp profile.
func DefaultProfile(rs *specs.Spec) *rspec.LinuxSeccomp {
syscalls := []rspec.LinuxSyscall{
{
Names: []string{
"accept",
"accept4",
"access",
"alarm",
"bind",
"brk",
"capget",
"capset",
"chdir",
"chmod",
"chown",
"chown32",
"clock_getres",
"clock_gettime",
"clock_nanosleep",
"close",
"connect",
"copy_file_range",
"creat",
"dup",
"dup2",
"dup3",
"epoll_create",
"epoll_create1",
"epoll_ctl",
"epoll_ctl_old",
"epoll_pwait",
"epoll_wait",
"epoll_wait_old",
"eventfd",
"eventfd2",
"execve",
"execveat",
"exit",
"exit_group",
"faccessat",
"fadvise64",
"fadvise64_64",
"fallocate",
"fanotify_mark",
"fchdir",
"fchmod",
"fchmodat",
"fchown",
"fchown32",
"fchownat",
"fcntl",
"fcntl64",
"fdatasync",
"fgetxattr",
"flistxattr",
"flock",
"fork",
"fremovexattr",
"fsetxattr",
"fstat",
"fstat64",
"fstatat64",
"fstatfs",
"fstatfs64",
"fsync",
"ftruncate",
"ftruncate64",
"futex",
"futimesat",
"getcpu",
"getcwd",
"getdents",
"getdents64",
"getegid",
"getegid32",
"geteuid",
"geteuid32",
"getgid",
"getgid32",
"getgroups",
"getgroups32",
"getitimer",
"getpeername",
"getpgid",
"getpgrp",
"getpid",
"getppid",
"getpriority",
"getrandom",
"getresgid",
"getresgid32",
"getresuid",
"getresuid32",
"getrlimit",
"get_robust_list",
"getrusage",
"getsid",
"getsockname",
"getsockopt",
"get_thread_area",
"gettid",
"gettimeofday",
"getuid",
"getuid32",
"getxattr",
"inotify_add_watch",
"inotify_init",
"inotify_init1",
"inotify_rm_watch",
"io_cancel",
"ioctl",
"io_destroy",
"io_getevents",
"ioprio_get",
"ioprio_set",
"io_setup",
"io_submit",
"ipc",
"kill",
"lchown",
"lchown32",
"lgetxattr",
"link",
"linkat",
"listen",
"listxattr",
"llistxattr",
"_llseek",
"lremovexattr",
"lseek",
"lsetxattr",
"lstat",
"lstat64",
"madvise",
"memfd_create",
"mincore",
"mkdir",
"mkdirat",
"mknod",
"mknodat",
"mlock",
"mlock2",
"mlockall",
"mmap",
"mmap2",
"mprotect",
"mq_getsetattr",
"mq_notify",
"mq_open",
"mq_timedreceive",
"mq_timedsend",
"mq_unlink",
"mremap",
"msgctl",
"msgget",
"msgrcv",
"msgsnd",
"msync",
"munlock",
"munlockall",
"munmap",
"nanosleep",
"newfstatat",
"_newselect",
"open",
"openat",
"pause",
"pipe",
"pipe2",
"poll",
"ppoll",
"prctl",
"pread64",
"preadv",
"prlimit64",
"pselect6",
"pwrite64",
"pwritev",
"read",
"readahead",
"readlink",
"readlinkat",
"readv",
"recv",
"recvfrom",
"recvmmsg",
"recvmsg",
"remap_file_pages",
"removexattr",
"rename",
"renameat",
"renameat2",
"restart_syscall",
"rmdir",
"rt_sigaction",
"rt_sigpending",
"rt_sigprocmask",
"rt_sigqueueinfo",
"rt_sigreturn",
"rt_sigsuspend",
"rt_sigtimedwait",
"rt_tgsigqueueinfo",
"sched_getaffinity",
"sched_getattr",
"sched_getparam",
"sched_get_priority_max",
"sched_get_priority_min",
"sched_getscheduler",
"sched_rr_get_interval",
"sched_setaffinity",
"sched_setattr",
"sched_setparam",
"sched_setscheduler",
"sched_yield",
"seccomp",
"select",
"semctl",
"semget",
"semop",
"semtimedop",
"send",
"sendfile",
"sendfile64",
"sendmmsg",
"sendmsg",
"sendto",
"setfsgid",
"setfsgid32",
"setfsuid",
"setfsuid32",
"setgid",
"setgid32",
"setgroups",
"setgroups32",
"setitimer",
"setpgid",
"setpriority",
"setregid",
"setregid32",
"setresgid",
"setresgid32",
"setresuid",
"setresuid32",
"setreuid",
"setreuid32",
"setrlimit",
"set_robust_list",
"setsid",
"setsockopt",
"set_thread_area",
"set_tid_address",
"setuid",
"setuid32",
"setxattr",
"shmat",
"shmctl",
"shmdt",
"shmget",
"shutdown",
"sigaltstack",
"signalfd",
"signalfd4",
"sigreturn",
"socket",
"socketcall",
"socketpair",
"splice",
"stat",
"stat64",
"statfs",
"statfs64",
"symlink",
"symlinkat",
"sync",
"sync_file_range",
"syncfs",
"sysinfo",
"syslog",
"tee",
"tgkill",
"time",
"timer_create",
"timer_delete",
"timerfd_create",
"timerfd_gettime",
"timerfd_settime",
"timer_getoverrun",
"timer_gettime",
"timer_settime",
"times",
"tkill",
"truncate",
"truncate64",
"ugetrlimit",
"umask",
"uname",
"unlink",
"unlinkat",
"utime",
"utimensat",
"utimes",
"vfork",
"vmsplice",
"wait4",
"waitid",
"waitpid",
"write",
"writev",
},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
{
Names: []string{"personality"},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{
{
Index: 0,
Value: 0x0,
Op: rspec.OpEqualTo,
},
{
Index: 0,
Value: 0x0008,
Op: rspec.OpEqualTo,
},
{
Index: 0,
Value: 0xffffffff,
Op: rspec.OpEqualTo,
},
},
},
}
var sysCloneFlagsIndex uint
capSysAdmin := false
caps := make(map[string]bool)
for _, cap := range rs.Process.Capabilities.Bounding {
caps[cap] = true
}
for _, cap := range rs.Process.Capabilities.Effective {
caps[cap] = true
}
for _, cap := range rs.Process.Capabilities.Inheritable {
caps[cap] = true
}
for _, cap := range rs.Process.Capabilities.Permitted {
caps[cap] = true
}
for _, cap := range rs.Process.Capabilities.Ambient {
caps[cap] = true
}
for cap := range caps {
switch cap {
case "CAP_DAC_READ_SEARCH":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{"open_by_handle_at"},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
case "CAP_SYS_ADMIN":
capSysAdmin = true
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{
"bpf",
"clone",
"fanotify_init",
"lookup_dcookie",
"mount",
"name_to_handle_at",
"perf_event_open",
"setdomainname",
"sethostname",
"setns",
"umount",
"umount2",
"unshare",
},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
case "CAP_SYS_BOOT":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{"reboot"},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
case "CAP_SYS_CHROOT":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{"chroot"},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
case "CAP_SYS_MODULE":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{
"delete_module",
"init_module",
"finit_module",
"query_module",
},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
case "CAP_SYS_PACCT":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{"acct"},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
case "CAP_SYS_PTRACE":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{
"kcmp",
"process_vm_readv",
"process_vm_writev",
"ptrace",
},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
case "CAP_SYS_RAWIO":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{
"iopl",
"ioperm",
},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
case "CAP_SYS_TIME":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{
"settimeofday",
"stime",
"adjtimex",
},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
case "CAP_SYS_TTY_CONFIG":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{"vhangup"},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
}
}
if !capSysAdmin {
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{"clone"},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{
{
Index: sysCloneFlagsIndex,
Value: CloneNewNS | CloneNewUTS | CloneNewIPC | CloneNewUser | CloneNewPID | CloneNewNet,
ValueTwo: 0,
Op: rspec.OpMaskedEqual,
},
},
},
}...)
}
arch := runtime.GOARCH
switch arch {
case "arm", "arm64":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{
"breakpoint",
"cacheflush",
"set_tls",
},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
case "amd64", "x32":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{"arch_prctl"},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
fallthrough
case "x86":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{"modify_ldt"},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
case "s390", "s390x":
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{
"s390_pci_mmio_read",
"s390_pci_mmio_write",
"s390_runtime_instr",
},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{},
},
}...)
/* Flags parameter of the clone syscall is the 2nd on s390 */
syscalls = append(syscalls, []rspec.LinuxSyscall{
{
Names: []string{"clone"},
Action: rspec.ActAllow,
Args: []rspec.LinuxSeccompArg{
{
Index: 1,
Value: 2080505856,
ValueTwo: 0,
Op: rspec.OpMaskedEqual,
},
},
},
}...)
}
return &rspec.LinuxSeccomp{
DefaultAction: rspec.ActErrno,
Architectures: arches(),
Syscalls: syscalls,
}
}