Skip to content

Commit

Permalink
runsc: When mounting a new procfs fails, fall back to recursive bind-…
Browse files Browse the repository at this point in the history
…mount.

As part of sandbox startup, `runsc` needs to set up a chroot environment
with a minimal working `procfs` filesystem mounted within. However, doing
so from within a container (as applications like Dangerzone do) may fail,
because in the container runtime's default configuration, some paths of the
procfs filesystem visible from within the container may be obstructed. This
prevents mounting new unobstructed instances of `procfs`.

This change detects this case and falls back to the previous behavior of
using a recursive bind-mount of `/proc` in such a case. The obstructed
subdirectories of procfs are preserved in this case, which is fine because
we only need a very minimal subset of `procfs` to actually work.

Additionally, `runsc` actually only needs a few kernel parameter files
and `/proc/self` in order to work. So this change sets up a `tmpfs` mount
that contains just those files, with the kernel parameter files being
plainly copied and `/proc/self` being a symlink to the one present in the
mounted view of `procfs` (regardless of which mounting method was used).

The `runtime_in_docker` test will continuously verify that this fallback
mechanism works to avoid similar breakage in the future.

Credits to @avagin for figuring out this solution.

Fixes #10944.

PiperOrigin-RevId: 691672104
  • Loading branch information
EtiennePerot authored and gvisor-bot committed Nov 1, 2024
1 parent b2d8bb4 commit fd0c06b
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 16 deletions.
62 changes: 59 additions & 3 deletions runsc/cmd/chroot.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,63 @@ func copyFile(dst, src string) error {
return err
}

// setupMinimalProcfs creates a minimal procfs-like tree at `${chroot}/proc`.
func setupMinimalProcfs(chroot string) error {
// We can't always directly mount procfs because it may be obstructed
// by submounts within it. See https://gvisor.dev/issue/10944.
// All we really need from procfs is /proc/self and a few kernel
// parameter files, which are typically not obstructed.
// So we create a tmpfs at /proc and manually copy the kernel parameter
// files into it. Then, to get /proc/self, we mount either a new
// instance of procfs (if possible), or a recursive bind mount of the
// procfs we do have access to (which still contains the obstructed
// submounts but /proc/self is not obstructed), and we symlink
// our /proc/self to the one in that mount.
procRoot := filepath.Join(chroot, "/proc")
if err := os.Mkdir(procRoot, 0755); err != nil {
return fmt.Errorf("error creating /proc in chroot: %v", err)
}
if err := specutils.SafeMount("runsc-proc", procRoot, "tmpfs",
unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, "", "/proc"); err != nil {
return fmt.Errorf("error mounting tmpfs in /proc: %v", err)
}
for _, d := range []string{
"/proc/sys",
"/proc/sys/kernel",
"/proc/sys/vm",
} {
if err := os.Mkdir(filepath.Join(chroot, d), 0755); err != nil {
return fmt.Errorf("error creating directory %q: %v", filepath.Join(chroot, d), err)
}
}
for _, f := range []string{
"/proc/sys/vm/mmap_min_addr",
"/proc/sys/kernel/cap_last_cap",
} {
if err := copyFile(filepath.Join(chroot, f), f); err != nil {
return fmt.Errorf("failed to copy %q -> %q: %w", f, filepath.Join(chroot, f), err)
}
}
flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY)
procSubmountDir := "sandbox-proc"
if newProcfsErr := mountInChroot(chroot, "proc", "/proc/"+procSubmountDir, "proc", flags); newProcfsErr != nil {
log.Debugf("Unable to mount a new instance of the procfs file system at %q (%v); trying a recursive bind mount instead.", filepath.Join(procRoot, procSubmountDir), newProcfsErr)
procSubmountDir = "host-proc"
if bindErr := mountInChroot(chroot, "/proc", "/proc/"+procSubmountDir, "bind",
unix.MS_BIND|unix.MS_REC|flags); bindErr != nil {
return fmt.Errorf("error recursively bind-mounting proc at %q (%w) after also failing to mount a new procfs instance there (%v)", filepath.Join(procRoot, procSubmountDir), bindErr, newProcfsErr)
}
log.Debugf("Successfully mounted a recursive bind mount of procfs at %q; continuing.", filepath.Join(procRoot, procSubmountDir))
}
if err := os.Symlink(procSubmountDir+"/self", filepath.Join(procRoot, "self")); err != nil {
return fmt.Errorf("error creating symlink %q -> %q: %w", filepath.Join(procRoot, "self"), procSubmountDir+"/self", err)
}
if err := os.Chmod(procRoot, 0o111); err != nil {
return fmt.Errorf("error chmodding %q: %v", procRoot, err)
}
return nil
}

// setUpChroot creates an empty directory with runsc mounted at /runsc and proc
// mounted at /proc.
func setUpChroot(spec *specs.Spec, conf *config.Config) error {
Expand Down Expand Up @@ -109,9 +166,8 @@ func setUpChroot(spec *specs.Spec, conf *config.Config) error {
log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err)
}

flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY)
if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil {
return fmt.Errorf("error mounting proc in chroot: %v", err)
if err := setupMinimalProcfs(chroot); err != nil {
return fmt.Errorf("error setting up minimal procfs in chroot %q: %v", chroot, err)
}

if err := tpuProxyUpdateChroot("/", chroot, spec, conf); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion runsc/specutils/namespace.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ func SetUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) {

// HasCapabilities returns true if the user has all capabilities in 'cs'.
func HasCapabilities(cs ...capability.Cap) bool {
caps, err := capability.NewPid2(os.Getpid())
caps, err := capability.NewPid2(0)
if err != nil {
return false
}
Expand Down
12 changes: 0 additions & 12 deletions test/e2e/runtime_in_docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,6 @@ func (test testVariant) run(ctx context.Context, logger testutil.Logger, runscPa
ReadOnly: false,
})
}
// Mount an unobstructed view of procfs at /proc2 so that the runtime
// can mount a fresh procfs.
// TODO(gvisor.dev/issue/10944): Remove this once issue is fixed.
opts.Mounts = append(opts.Mounts, mount.Mount{
Type: mount.TypeBind,
Source: "/proc",
Target: "/proc2",
ReadOnly: false,
BindOptions: &mount.BindOptions{
NonRecursive: true,
},
})
const wantMessage = "It became a jumble of words, a litany, almost a kind of glossolalia."
args := []string{
"/runtime",
Expand Down

0 comments on commit fd0c06b

Please sign in to comment.