/ components / execd / pkg / clone3compat / compat_linux.go
compat_linux.go
  1  // Copyright 2026 Alibaba Group Holding Ltd.
  2  //
  3  // Licensed under the Apache License, Version 2.0 (the "License");
  4  // you may not use this file except in compliance with the License.
  5  // You may obtain a copy of the License at
  6  //
  7  //     http://www.apache.org/licenses/LICENSE-2.0
  8  //
  9  // Unless required by applicable law or agreed to in writing, software
 10  // distributed under the License is distributed on an "AS IS" BASIS,
 11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12  // See the License for the specific language governing permissions and
 13  // limitations under the License.
 14  
 15  //go:build linux
 16  
 17  package clone3compat
 18  
 19  import (
 20  	"errors"
 21  	"fmt"
 22  	"os"
 23  	"strings"
 24  	"syscall"
 25  
 26  	seccomp "github.com/elastic/go-seccomp-bpf"
 27  	"golang.org/x/sys/unix"
 28  )
 29  
 30  const (
 31  	envCompat  = "EXECD_CLONE3_COMPAT"
 32  	envApplied = "_EXECD_CLONE3_COMPAT_APPLIED"
 33  )
 34  
 35  // MaybeApply optionally installs a seccomp rule so clone3 returns ENOSYS, matching the
 36  // behavior of https://github.com/AkihiroSuda/clone3-workaround .
 37  // It returns true if this process is running with that compatibility active (including
 38  // a post-reexec process that inherited the seccomp filter).
 39  func MaybeApply() bool {
 40  	mode := strings.ToLower(strings.TrimSpace(os.Getenv(envCompat)))
 41  	switch mode {
 42  	case "", "0", "false", "off", "no":
 43  		return false
 44  	case "1", "true", "yes", "on":
 45  		if err := loadClone3EnosysFilter(); err != nil {
 46  			_, _ = fmt.Fprintf(os.Stderr, "execd: %v\n", err)
 47  			os.Exit(1)
 48  		}
 49  		return true
 50  	case "reexec":
 51  		if os.Getenv(envApplied) == "1" {
 52  			return true
 53  		}
 54  		if err := loadClone3EnosysFilter(); err != nil {
 55  			_, _ = fmt.Fprintf(os.Stderr, "execd: %v\n", err)
 56  			os.Exit(1)
 57  		}
 58  		if err := os.Setenv(envApplied, "1"); err != nil {
 59  			_, _ = fmt.Fprintf(os.Stderr, "execd: clone3 compat: set %s: %v\n", envApplied, err)
 60  			os.Exit(1)
 61  		}
 62  		exe, err := os.Readlink("/proc/self/exe")
 63  		if err != nil {
 64  			_, _ = fmt.Fprintf(os.Stderr, "execd: clone3 compat: readlink /proc/self/exe: %v\n", err)
 65  			os.Exit(1)
 66  		}
 67  		exe = strings.TrimSuffix(exe, " (deleted)")
 68  		if err := unix.Exec(exe, os.Args, os.Environ()); err != nil {
 69  			_, _ = fmt.Fprintf(os.Stderr, "execd: clone3 compat: exec: %v\n", err)
 70  			os.Exit(1)
 71  		}
 72  		panic("unreachable") // Exec replaces this process.
 73  	default:
 74  		_, _ = fmt.Fprintf(os.Stderr, "execd: invalid %s=%q (use 1, true, or reexec)\n", envCompat, os.Getenv(envCompat))
 75  		os.Exit(1)
 76  	}
 77  
 78  	return false
 79  }
 80  
 81  func loadClone3EnosysFilter() error {
 82  	if !seccomp.Supported() {
 83  		return errors.New("clone3 compat: seccomp is not available on this kernel")
 84  	}
 85  	f := seccomp.Filter{
 86  		NoNewPrivs: true,
 87  		Flag:       seccomp.FilterFlagTSync,
 88  		Policy: seccomp.Policy{
 89  			DefaultAction: seccomp.ActionAllow,
 90  			Syscalls: []seccomp.SyscallGroup{
 91  				{
 92  					Names: []string{"clone3"},
 93  					// Not plain ActionErrno: assembler defaults errno to EPERM.
 94  					Action: seccomp.ActionErrno | seccomp.Action(syscall.ENOSYS),
 95  				},
 96  			},
 97  		},
 98  	}
 99  	if err := seccomp.LoadFilter(f); err != nil {
100  		return fmt.Errorf("clone3 compat: %w", err)
101  	}
102  	return nil
103  }