compat_linux.go
1 // Copyright 2026 Alibaba Group Holding Ltd. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build linux 16 17 package clone3compat 18 19 import ( 20 "errors" 21 "fmt" 22 "os" 23 "strings" 24 "syscall" 25 26 seccomp "github.com/elastic/go-seccomp-bpf" 27 "golang.org/x/sys/unix" 28 ) 29 30 const ( 31 envCompat = "EXECD_CLONE3_COMPAT" 32 envApplied = "_EXECD_CLONE3_COMPAT_APPLIED" 33 ) 34 35 // MaybeApply optionally installs a seccomp rule so clone3 returns ENOSYS, matching the 36 // behavior of https://github.com/AkihiroSuda/clone3-workaround . 37 // It returns true if this process is running with that compatibility active (including 38 // a post-reexec process that inherited the seccomp filter). 39 func MaybeApply() bool { 40 mode := strings.ToLower(strings.TrimSpace(os.Getenv(envCompat))) 41 switch mode { 42 case "", "0", "false", "off", "no": 43 return false 44 case "1", "true", "yes", "on": 45 if err := loadClone3EnosysFilter(); err != nil { 46 _, _ = fmt.Fprintf(os.Stderr, "execd: %v\n", err) 47 os.Exit(1) 48 } 49 return true 50 case "reexec": 51 if os.Getenv(envApplied) == "1" { 52 return true 53 } 54 if err := loadClone3EnosysFilter(); err != nil { 55 _, _ = fmt.Fprintf(os.Stderr, "execd: %v\n", err) 56 os.Exit(1) 57 } 58 if err := os.Setenv(envApplied, "1"); err != nil { 59 _, _ = fmt.Fprintf(os.Stderr, "execd: clone3 compat: set %s: %v\n", envApplied, err) 60 os.Exit(1) 61 } 62 exe, err := os.Readlink("/proc/self/exe") 63 if err != nil { 64 _, _ = fmt.Fprintf(os.Stderr, "execd: clone3 compat: readlink /proc/self/exe: %v\n", err) 65 os.Exit(1) 66 } 67 exe = strings.TrimSuffix(exe, " (deleted)") 68 if err := unix.Exec(exe, os.Args, os.Environ()); err != nil { 69 _, _ = fmt.Fprintf(os.Stderr, "execd: clone3 compat: exec: %v\n", err) 70 os.Exit(1) 71 } 72 panic("unreachable") // Exec replaces this process. 73 default: 74 _, _ = fmt.Fprintf(os.Stderr, "execd: invalid %s=%q (use 1, true, or reexec)\n", envCompat, os.Getenv(envCompat)) 75 os.Exit(1) 76 } 77 78 return false 79 } 80 81 func loadClone3EnosysFilter() error { 82 if !seccomp.Supported() { 83 return errors.New("clone3 compat: seccomp is not available on this kernel") 84 } 85 f := seccomp.Filter{ 86 NoNewPrivs: true, 87 Flag: seccomp.FilterFlagTSync, 88 Policy: seccomp.Policy{ 89 DefaultAction: seccomp.ActionAllow, 90 Syscalls: []seccomp.SyscallGroup{ 91 { 92 Names: []string{"clone3"}, 93 // Not plain ActionErrno: assembler defaults errno to EPERM. 94 Action: seccomp.ActionErrno | seccomp.Action(syscall.ENOSYS), 95 }, 96 }, 97 }, 98 } 99 if err := seccomp.LoadFilter(f); err != nil { 100 return fmt.Errorf("clone3 compat: %w", err) 101 } 102 return nil 103 }