From 6bae0a7b387bdd671df527c80ba8757eb32b130d Mon Sep 17 00:00:00 2001
From: Nayana Bidari <nybidari@google.com>
Date: Mon, 28 Oct 2024 12:29:55 -0700
Subject: [PATCH] Add validation for spec fields.

Adds validation for OCI spec fields across checkpoint restore. Tests are added
to verify the behavior.

PiperOrigin-RevId: 690710163
---
 runsc/boot/restore.go             | 223 +++++++++++++++++++++++++++++-
 runsc/container/container_test.go | 216 +++++++++++++++++++++++++++++
 2 files changed, 436 insertions(+), 3 deletions(-)

diff --git a/runsc/boot/restore.go b/runsc/boot/restore.go
index eec2cff994..1c0e28f791 100644
--- a/runsc/boot/restore.go
+++ b/runsc/boot/restore.go
@@ -18,6 +18,9 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"reflect"
+	"slices"
+	"sort"
 	"strconv"
 	time2 "time"
 
@@ -141,13 +144,227 @@ func createNetworkStackForRestore(l *Loader) (*stack.Stack, inet.Stack) {
 	return nil, hostinet.NewStack()
 }
 
+func validateErrorWithMsg(field, cName string, oldV, newV any, msg string) error {
+	return fmt.Errorf("%v does not match across checkpoint restore for container: %v, checkpoint %v restore %v, got error %v", field, cName, oldV, newV, msg)
+}
+
+func validateError(field, cName string, oldV, newV any) error {
+	return fmt.Errorf("%v does not match across checkpoint restore for container: %v, checkpoint %v restore %v", field, cName, oldV, newV)
+}
+
+func cloneMount(mnt specs.Mount) specs.Mount {
+	cloneMnt := specs.Mount{
+		Source:      mnt.Source,
+		Destination: mnt.Destination,
+		Type:        mnt.Type,
+	}
+	cloneMnt.Options = make([]string, len(mnt.Options))
+	copy(cloneMnt.Options, mnt.Options)
+	sort.Strings(cloneMnt.Options)
+	cloneMnt.UIDMappings = make([]specs.LinuxIDMapping, len(mnt.UIDMappings))
+	copy(cloneMnt.UIDMappings, mnt.UIDMappings)
+	cloneMnt.GIDMappings = make([]specs.LinuxIDMapping, len(mnt.GIDMappings))
+	copy(cloneMnt.GIDMappings, mnt.GIDMappings)
+	return cloneMnt
+}
+
+// validateMounts validates the mounts in the checkpoint and restore spec.
+// Duplicate mounts are allowed iff all the fields in the mount are same.
+func validateMounts(field, cName string, o, n []specs.Mount) error {
+	// Create a new mount map without source as source path can vary
+	// across checkpoint restore.
+	oldMnts := make(map[string]specs.Mount)
+	for _, m := range o {
+		oldMnts[m.Destination] = cloneMount(m)
+	}
+	newMnts := make(map[string]specs.Mount)
+	for _, m := range n {
+		mnt := cloneMount(m)
+		oldMnt, ok := oldMnts[mnt.Destination]
+		if !ok {
+			return validateError(field, cName, o, n)
+		}
+
+		// Duplicate mounts are allowed iff all fields in specs.Mount are same.
+		if val, ok := newMnts[mnt.Destination]; ok {
+			if !reflect.DeepEqual(val, mnt) {
+				return validateErrorWithMsg(field, cName, o, n, "invalid mount in the restore spec")
+			}
+			continue
+		}
+		newMnts[mnt.Destination] = mnt
+
+		if err := validateArray(field, cName, oldMnt.UIDMappings, mnt.UIDMappings); err != nil {
+			return validateError(field, cName, o, n)
+		}
+		oldMnt.UIDMappings, mnt.UIDMappings = []specs.LinuxIDMapping{}, []specs.LinuxIDMapping{}
+		if err := validateArray(field, cName, oldMnt.GIDMappings, mnt.GIDMappings); err != nil {
+			return validateError(field, cName, o, n)
+		}
+		oldMnt.GIDMappings, mnt.GIDMappings = []specs.LinuxIDMapping{}, []specs.LinuxIDMapping{}
+
+		oldMnt.Source, mnt.Source = "", ""
+		if !reflect.DeepEqual(oldMnt, mnt) {
+			return validateError(field, cName, o, n)
+		}
+	}
+	if len(oldMnts) != len(newMnts) {
+		return validateError(field, cName, o, n)
+	}
+	return nil
+}
+
+func validateDevices(field, cName string, o, n []specs.LinuxDevice) error {
+	if len(o) != len(n) {
+		return validateErrorWithMsg(field, cName, o, n, "length mismatch")
+	}
+	if len(o) == 0 {
+		return nil
+	}
+
+	// Create with only Path and Type fields as other fields can vary during restore.
+	devs := make(map[specs.LinuxDevice]struct{})
+	for _, d := range o {
+		dev := specs.LinuxDevice{
+			Path: d.Path,
+			Type: d.Type,
+		}
+		if _, ok := devs[dev]; ok {
+			return fmt.Errorf("duplicate device found in the spec %v before checkpoint for container %v", o, cName)
+		}
+		devs[dev] = struct{}{}
+	}
+	for _, d := range n {
+		dev := specs.LinuxDevice{
+			Path: d.Path,
+			Type: d.Type,
+		}
+		if _, ok := devs[dev]; !ok {
+			return validateError(field, cName, o, n)
+		}
+		delete(devs, dev)
+	}
+	if len(devs) != 0 {
+		return validateError(field, cName, o, n)
+	}
+	return nil
+}
+
+// validateArray performs a deep comparison of two arrays, checking for equality
+// at every level of nesting. Note that this method:
+// * does not allow duplicates in the arrays.
+// * does not depend on the order of the elements in the arrays.
+func validateArray[T any](field, cName string, oldArr, newArr []T) error {
+	if len(oldArr) != len(newArr) {
+		return validateErrorWithMsg(field, cName, oldArr, newArr, "length mismatch")
+	}
+	if len(oldArr) == 0 {
+		return nil
+	}
+	oldMap := make(map[any]struct{})
+	newMap := make(map[any]struct{})
+	for i := 0; i < len(oldArr); i++ {
+		key := oldArr[i]
+		if _, ok := oldMap[key]; ok {
+			return validateErrorWithMsg(field, cName, oldArr, newArr, "duplicate value")
+		}
+		oldMap[key] = struct{}{}
+
+		key = newArr[i]
+		if _, ok := newMap[key]; ok {
+			return validateErrorWithMsg(field, cName, oldArr, newArr, "duplicate value")
+		}
+		newMap[key] = struct{}{}
+	}
+	if !reflect.DeepEqual(oldMap, newMap) {
+		return validateError(field, cName, oldArr, newArr)
+	}
+
+	return nil
+}
+
+func validateStruct(field, cName string, oldS, newS any) error {
+	if !reflect.DeepEqual(oldS, newS) {
+		return validateError(field, cName, oldS, newS)
+	}
+	return nil
+}
+
+func ifNil[T any](v *T) *T {
+	if v != nil {
+		return v
+	}
+	var t T
+	return &t
+}
+
+func validateSpecForContainer(oldSpec, newSpec *specs.Spec, cName string) error {
+	oldLinux, newLinux := ifNil(oldSpec.Linux), ifNil(newSpec.Linux)
+	oldProcess, newProcess := ifNil(oldSpec.Process), ifNil(newSpec.Process)
+	oldRoot, newRoot := ifNil(oldSpec.Root), ifNil(newSpec.Root)
+
+	if oldSpec.Version != newSpec.Version {
+		return validateError("OCI Version", cName, oldSpec.Version, newSpec.Version)
+	}
+	validateStructMap := make(map[string][2]any)
+	validateStructMap["Root"] = [2]any{oldRoot, newRoot}
+	if err := validateMounts("Mounts", cName, oldSpec.Mounts, newSpec.Mounts); err != nil {
+		return err
+	}
+
+	// Validate specs.Process.
+	if oldProcess.Terminal != newProcess.Terminal {
+		return validateError("Terminal", cName, oldProcess.Terminal, newProcess.Terminal)
+	}
+	if oldProcess.Cwd != newProcess.Cwd {
+		return validateError("Cwd", cName, oldProcess.Cwd, newProcess.Cwd)
+	}
+	validateStructMap["User"] = [2]any{oldProcess.User, newProcess.User}
+	validateStructMap["Rlimits"] = [2]any{oldProcess.Rlimits, newProcess.Rlimits}
+	if ok := slices.Equal(oldProcess.Args, newProcess.Args); !ok {
+		return validateError("Args", cName, oldProcess.Args, newProcess.Args)
+	}
+
+	// Validate specs.Linux.
+	if oldLinux.CgroupsPath != newLinux.CgroupsPath {
+		return validateError("CgroupsPath", cName, oldLinux.CgroupsPath, newLinux.CgroupsPath)
+	}
+	validateStructMap["Sysctl"] = [2]any{oldLinux.Sysctl, newLinux.Sysctl}
+	validateStructMap["Seccomp"] = [2]any{oldLinux.Seccomp, newLinux.Seccomp}
+	if err := validateDevices("Devices", cName, oldLinux.Devices, newLinux.Devices); err != nil {
+		return err
+	}
+	if err := validateArray("UIDMappings", cName, oldLinux.UIDMappings, newLinux.UIDMappings); err != nil {
+		return err
+	}
+	if err := validateArray("GIDMappings", cName, oldLinux.GIDMappings, newLinux.GIDMappings); err != nil {
+		return err
+	}
+	if err := validateArray("Namespace", cName, oldLinux.Namespaces, newLinux.Namespaces); err != nil {
+		return err
+	}
+
+	for key, val := range validateStructMap {
+		if err := validateStruct(key, cName, val[0], val[1]); err != nil {
+			return err
+		}
+	}
+
+	// TODO(b/359591006): Validate runsc version, Linux.Resources, Process.Capabilities and Annotations.
+	// TODO(b/359591006): Check other remaining fields for equality.
+	return nil
+}
+
 // Validate OCI specs before restoring the containers.
 func validateSpecs(oldSpecs, newSpecs map[string]*specs.Spec) error {
-	for name := range newSpecs {
-		if _, ok := oldSpecs[name]; !ok {
-			return fmt.Errorf("checkpoint image does not contain spec for container: %q", name)
+	for cName, newSpec := range newSpecs {
+		oldSpec, ok := oldSpecs[cName]
+		if !ok {
+			return fmt.Errorf("checkpoint image does not contain spec for container: %q", cName)
 		}
+		return validateSpecForContainer(oldSpec, newSpec, cName)
 	}
+
 	return nil
 }
 
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 19f7e8b486..38eb241d53 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -3653,3 +3653,219 @@ func TestLookupEROFS(t *testing.T) {
 		}
 	}
 }
+
+func TestSpecValidation(t *testing.T) {
+	// TODO(b/359591006): Add more tests.
+	tests := []struct {
+		name    string
+		mutate  func(spec, restoreSpec *specs.Spec, mountPath, restoreMntPath string)
+		wantErr string
+	}{
+		{
+			name: "Terminal",
+			mutate: func(_, restoreSpec *specs.Spec, _, _ string) {
+				restoreSpec.Process.Terminal = true
+			},
+			wantErr: "Terminal does not match across checkpoint restore",
+		},
+		{
+			name: "Args",
+			mutate: func(_, restoreSpec *specs.Spec, _, _ string) {
+				restoreSpec.Process.Args = append(restoreSpec.Process.Args, "new arg")
+			},
+			wantErr: "Args does not match across checkpoint restore",
+		},
+		{
+			name: "Device",
+			mutate: func(spec, restoreSpec *specs.Spec, _, _ string) {
+				spec.Linux = &specs.Linux{}
+				restoreSpec.Linux = &specs.Linux{}
+				mode := os.FileMode(0666)
+				dev := specs.LinuxDevice{
+					Path:     "/dev/nvidiactl",
+					Type:     "c",
+					Major:    195, // nvgpu.NV_MAJOR_DEVICE_NUMBER,
+					Minor:    255, // nvgpu.NV_CONTROL_DEVICE_MINOR,
+					FileMode: &mode,
+				}
+				restoreSpec.Linux.Devices = append(restoreSpec.Linux.Devices, dev)
+			},
+			wantErr: "Devices does not match across checkpoint restore",
+		},
+		{
+			name: "Namespace",
+			mutate: func(spec, restoreSpec *specs.Spec, _, _ string) {
+				spec.Linux = &specs.Linux{}
+				restoreSpec.Linux = &specs.Linux{}
+				restoreSpec.Linux.Namespaces = append(restoreSpec.Linux.Namespaces, specs.LinuxNamespace{
+					Type: "network",
+					Path: fmt.Sprintf("/proc/%d/ns/net", os.Getpid()),
+				})
+			},
+			wantErr: "Namespace does not match across checkpoint restore",
+		},
+		{
+			name: "Seccomp",
+			mutate: func(spec, restoreSpec *specs.Spec, _, _ string) {
+				spec.Linux = &specs.Linux{}
+				restoreSpec.Linux = &specs.Linux{}
+				restoreSpec.Linux.Seccomp = &specs.LinuxSeccomp{
+					DefaultAction: specs.ActAllow,
+				}
+			},
+			wantErr: "Seccomp does not match across checkpoint restore",
+		},
+		{
+			name: "RestoreDupMountsSuccess",
+			mutate: func(spec, restoreSpec *specs.Spec, mountPath, restoreMntPath string) {
+				mnt := specs.Mount{
+					Source:      mountPath,
+					Destination: mountPath,
+					Type:        "tmpfs",
+				}
+				spec.Mounts = append(spec.Mounts, mnt)
+				restoreMnt := specs.Mount{
+					Source:      restoreMntPath,
+					Destination: mountPath,
+					Type:        "tmpfs",
+				}
+				restoreSpec.Mounts = append(restoreSpec.Mounts, restoreMnt)
+				restoreSpec.Mounts = append(restoreSpec.Mounts, restoreMnt)
+			},
+			wantErr: "",
+		},
+		{
+			name: "RestoreDupMountsFail",
+			mutate: func(spec, restoreSpec *specs.Spec, mountPath, restoreMntPath string) {
+				mnt := specs.Mount{
+					Source:      mountPath,
+					Destination: mountPath,
+					Type:        "tmpfs",
+				}
+				spec.Mounts = append(spec.Mounts, mnt)
+				restoreMnt := specs.Mount{
+					Source:      restoreMntPath,
+					Destination: mountPath,
+					Type:        "tmpfs",
+				}
+				restoreSpec.Mounts = append(restoreSpec.Mounts, restoreMnt)
+				restoreMnt.Source = restoreMntPath + "2"
+				restoreSpec.Mounts = append(restoreSpec.Mounts, restoreMnt)
+
+			},
+			wantErr: "invalid mount",
+		},
+		{
+			name: "RestoreMountsFail",
+			mutate: func(spec, restoreSpec *specs.Spec, mountPath, restoreMntPath string) {
+				mnt := specs.Mount{
+					Source:      mountPath,
+					Destination: mountPath,
+					Type:        "tmpfs",
+				}
+				spec.Mounts = append(spec.Mounts, mnt)
+				restoreMnt := specs.Mount{
+					Source:      restoreMntPath,
+					Destination: restoreMntPath,
+					Type:        "tmpfs",
+				}
+				restoreSpec.Mounts = append(restoreSpec.Mounts, restoreMnt)
+			},
+			wantErr: "Mounts does not match across checkpoint restore",
+		},
+	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			spec, _ := sleepSpecConf(t)
+			mountDir, err := os.MkdirTemp(testutil.TmpDir(), "mount-test")
+			if err != nil {
+				t.Fatalf("os.MkdirTemp() failed: %v", err)
+			}
+			if err := os.Chmod(mountDir, 0777); err != nil {
+				t.Fatalf("error chmoding file: %q, %v", mountDir, err)
+			}
+			defer os.RemoveAll(mountDir)
+			mountPath := filepath.Join(mountDir, "/foo-dir")
+
+			restoreSpec, _ := sleepSpecConf(t)
+			restoreDir, err := os.MkdirTemp(testutil.TmpDir(), "restore-test")
+			if err != nil {
+				t.Fatalf("os.MkdirTemp() failed: %v", err)
+			}
+			if err := os.Chmod(restoreDir, 0777); err != nil {
+				t.Fatalf("error chmoding file: %q, %v", restoreDir, err)
+			}
+			defer os.RemoveAll(restoreDir)
+			restoreMntPath := filepath.Join(restoreDir, "/restore-dir")
+
+			test.mutate(spec, restoreSpec, mountPath, restoreMntPath)
+
+			conf := testutil.TestConfig(t)
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
+
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			cont, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+
+			if err := cont.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
+
+			// Set the image path, which is where the checkpoint image will be saved.
+			dir, err := os.MkdirTemp(testutil.TmpDir(), "checkpoint")
+			if err != nil {
+				t.Fatalf("os.MkdirTemp failed: %v", err)
+			}
+			defer os.RemoveAll(dir)
+			if err := os.Chmod(dir, 0777); err != nil {
+				t.Fatalf("error chmoding file: %q, %v", dir, err)
+			}
+			// Checkpoint running container; save state into new file.
+			if err := cont.Checkpoint(dir, false /* direct */, statefile.Options{Compression: statefile.CompressionLevelFlateBestSpeed}, pgalloc.SaveOpts{}); err != nil {
+				t.Fatalf("error checkpointing container to empty file: %v", err)
+			}
+
+			_, bundleDir2, cleanup2, err := testutil.SetupContainer(restoreSpec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup2()
+
+			// Restore into a new container with different ID (e.g. clone). Keep the
+			// initial container running to ensure no conflict with it.
+			args2 := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      restoreSpec,
+				BundleDir: bundleDir2,
+			}
+			cont2, err := New(conf, args2)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer cont2.Destroy()
+
+			err = cont2.Restore(conf, dir, false /* direct */, false /* background */)
+			if err == nil {
+				if test.wantErr == "" {
+					return
+				}
+				t.Fatalf("spec validation failed for test %v, got: nil, want: %v", test, test.wantErr)
+			}
+
+			got := err.Error()
+			if !strings.Contains(got, test.wantErr) {
+				t.Fatalf("wrong error message, got: %v, want: %v", got, test.wantErr)
+			}
+		})
+	}
+}