diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go index 00f0cf0a58..a1373f88c3 100644 --- a/runsc/cmd/chroot.go +++ b/runsc/cmd/chroot.go @@ -82,6 +82,63 @@ func copyFile(dst, src string) error { return err } +// setupMinimalProcfs creates a minimal procfs-like tree at `${chroot}/proc`. +func setupMinimalProcfs(chroot string) error { + // We can't always directly mount procfs because it may be obstructed + // by submounts within it. See https://gvisor.dev/issue/10944. + // All we really need from procfs is /proc/self and a few kernel + // parameter files, which are typically not obstructed. + // So we create a tmpfs at /proc and manually copy the kernel parameter + // files into it. Then, to get /proc/self, we mount either a new + // instance of procfs (if possible), or a recursive bind mount of the + // procfs we do have access to (which still contains the obstructed + // submounts but /proc/self is not obstructed), and we symlink + // our /proc/self to the one in that mount. + procRoot := filepath.Join(chroot, "/proc") + if err := os.Mkdir(procRoot, 0755); err != nil { + return fmt.Errorf("error creating /proc in chroot: %v", err) + } + if err := specutils.SafeMount("runsc-proc", procRoot, "tmpfs", + unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, "", "/proc"); err != nil { + return fmt.Errorf("error mounting tmpfs in /proc: %v", err) + } + for _, d := range []string{ + "/proc/sys", + "/proc/sys/kernel", + "/proc/sys/vm", + } { + if err := os.Mkdir(filepath.Join(chroot, d), 0755); err != nil { + return fmt.Errorf("error creating directory %q: %v", filepath.Join(chroot, d), err) + } + } + for _, f := range []string{ + "/proc/sys/vm/mmap_min_addr", + "/proc/sys/kernel/cap_last_cap", + } { + if err := copyFile(filepath.Join(chroot, f), f); err != nil { + return fmt.Errorf("failed to copy %q -> %q: %w", f, filepath.Join(chroot, f), err) + } + } + flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY) + procSubmountDir := "sandbox-proc" + if newProcfsErr := mountInChroot(chroot, "proc", "/proc/"+procSubmountDir, "proc", flags); newProcfsErr != nil { + log.Debugf("Unable to mount a new instance of the procfs file system at %q (%v); trying a recursive bind mount instead.", filepath.Join(procRoot, procSubmountDir), newProcfsErr) + procSubmountDir = "host-proc" + if bindErr := mountInChroot(chroot, "/proc", "/proc/"+procSubmountDir, "bind", + unix.MS_BIND|unix.MS_REC|flags); bindErr != nil { + return fmt.Errorf("error recursively bind-mounting proc at %q (%w) after also failing to mount a new procfs instance there (%v)", filepath.Join(procRoot, procSubmountDir), bindErr, newProcfsErr) + } + log.Debugf("Successfully mounted a recursive bind mount of procfs at %q; continuing.", filepath.Join(procRoot, procSubmountDir)) + } + if err := os.Symlink(procSubmountDir+"/self", filepath.Join(procRoot, "self")); err != nil { + return fmt.Errorf("error creating symlink %q -> %q: %w", filepath.Join(procRoot, "self"), procSubmountDir+"/self", err) + } + if err := os.Chmod(procRoot, 0o111); err != nil { + return fmt.Errorf("error chmodding %q: %v", procRoot, err) + } + return nil +} + // setUpChroot creates an empty directory with runsc mounted at /runsc and proc // mounted at /proc. func setUpChroot(spec *specs.Spec, conf *config.Config) error { @@ -109,9 +166,8 @@ func setUpChroot(spec *specs.Spec, conf *config.Config) error { log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err) } - flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY) - if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil { - return fmt.Errorf("error mounting proc in chroot: %v", err) + if err := setupMinimalProcfs(chroot); err != nil { + return fmt.Errorf("error setting up minimal procfs in chroot %q: %v", chroot, err) } if err := tpuProxyUpdateChroot("/", chroot, spec, conf); err != nil { diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index abbfa6350b..bd3754b08a 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -214,7 +214,7 @@ func SetUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) { // HasCapabilities returns true if the user has all capabilities in 'cs'. func HasCapabilities(cs ...capability.Cap) bool { - caps, err := capability.NewPid2(os.Getpid()) + caps, err := capability.NewPid2(0) if err != nil { return false } diff --git a/test/e2e/runtime_in_docker_test.go b/test/e2e/runtime_in_docker_test.go index 97c7e5aa96..9924c2c1d2 100644 --- a/test/e2e/runtime_in_docker_test.go +++ b/test/e2e/runtime_in_docker_test.go @@ -74,18 +74,6 @@ func (test testVariant) run(ctx context.Context, logger testutil.Logger, runscPa ReadOnly: false, }) } - // Mount an unobstructed view of procfs at /proc2 so that the runtime - // can mount a fresh procfs. - // TODO(gvisor.dev/issue/10944): Remove this once issue is fixed. - opts.Mounts = append(opts.Mounts, mount.Mount{ - Type: mount.TypeBind, - Source: "/proc", - Target: "/proc2", - ReadOnly: false, - BindOptions: &mount.BindOptions{ - NonRecursive: true, - }, - }) const wantMessage = "It became a jumble of words, a litany, almost a kind of glossolalia." args := []string{ "/runtime",