From 504e98ccadafcccfaaab61e67b9eff925f5b7f6e Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Wed, 11 Sep 2024 15:01:54 -0700 Subject: [PATCH] Store the container specs in the statefile for spec validation. - Append all the container specs to the statefile during checkpoint. - These specs are retrieved during restore and compare/validate with the new specs passed during the restore. PiperOrigin-RevId: 673550450 --- runsc/boot/autosave.go | 2 ++ runsc/boot/loader.go | 45 ++++++++++++++++++++++++++++++++++++++++-- runsc/boot/restore.go | 19 ++++++++++++++++++ 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/runsc/boot/autosave.go b/runsc/boot/autosave.go index 7027313cae..d2c5e89073 100644 --- a/runsc/boot/autosave.go +++ b/runsc/boot/autosave.go @@ -56,6 +56,7 @@ func getSaveOpts(l *Loader, k *kernel.Kernel, isResume bool) state.SaveOpts { func getTargetForSaveResume(l *Loader) func(k *kernel.Kernel) { return func(k *kernel.Kernel) { + l.addContainerSpecsToCheckpoint() saveOpts := getSaveOpts(l, k, true /* isResume */) // Store the state file contents in a buffer for save-resume. // There is no need to verify the state file, we just need the @@ -74,6 +75,7 @@ func getTargetForSaveRestore(l *Loader, files []*fd.FD) func(k *kernel.Kernel) { var once sync.Once return func(k *kernel.Kernel) { once.Do(func() { + l.addContainerSpecsToCheckpoint() saveOpts := getSaveOpts(l, k, false /* isResume */) saveOpts.Destination = files[0] if len(files) == 3 { diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index acec716b43..76ee3c5b0a 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -16,6 +16,7 @@ package boot import ( + "encoding/json" "errors" "fmt" mrand "math/rand" @@ -363,8 +364,16 @@ type Args struct { SaveFDs []*fd.FD } -// make sure stdioFDs are always the same on initial start and on restore -const startingStdioFD = 256 +const ( + // startingStdioFD is the starting stdioFD number used during sandbox + // start and restore. This makes sure the stdioFDs are always the same + // on initial start and on restore. + startingStdioFD = 256 + + // containerSpecsKey is the key used to add and pop the container specs to the + // kernel during save/restore. + containerSpecsKey = "container_specs" +) func getRootCredentials(spec *specs.Spec, conf *config.Config, userNs *auth.UserNamespace) *auth.Credentials { // Create capabilities. @@ -1941,3 +1950,35 @@ func (l *Loader) containerRuntimeState(cid string) ContainerRuntimeState { // Init process has stopped, but no one has called wait on it yet. return RuntimeStateStopped } + +// addContainerSpecsToCheckpoint adds the container specs to the kernel. +func (l *Loader) addContainerSpecsToCheckpoint() { + l.mu.Lock() + s := l.containerSpecs + l.mu.Unlock() + + specsMap := make(map[string][]byte) + for k, v := range s { + data, err := json.Marshal(v) + if err != nil { + log.Warningf("json marshal error for specs %v", err) + return + } + specsMap[k] = data + } + l.k.AddStateToCheckpoint(containerSpecsKey, specsMap) +} + +// popContainerSpecsFromCheckpoint pops all the container specs from the kernel. +func popContainerSpecsFromCheckpoint(k *kernel.Kernel) (map[string]*specs.Spec, error) { + specsMap := (k.PopCheckpointState(containerSpecsKey)).(map[string][]byte) + oldSpecs := make(map[string]*specs.Spec) + for k, v := range specsMap { + var s specs.Spec + if err := json.Unmarshal(v, &s); err != nil { + return nil, fmt.Errorf("json unmarshal error for specs %v", err) + } + oldSpecs[k] = &s + } + return oldSpecs, nil +} diff --git a/runsc/boot/restore.go b/runsc/boot/restore.go index ccf311e8d1..0740c27b96 100644 --- a/runsc/boot/restore.go +++ b/runsc/boot/restore.go @@ -137,6 +137,14 @@ func createNetworkStackForRestore(l *Loader) (*stack.Stack, inet.Stack) { return nil, hostinet.NewStack() } +// Validate OCI specs before restoring the containers. +func validateSpecs(oldSpecs, newSpecs map[string]*specs.Spec) error { + if len(oldSpecs) != len(newSpecs) { + return fmt.Errorf("incorrect number of specs during checkpoint and restore") + } + return nil +} + func (r *restorer) restore(l *Loader) error { log.Infof("Starting to restore %d containers", len(r.containers)) @@ -226,6 +234,14 @@ func (r *restorer) restore(l *Loader) error { return err } + oldSpecs, err := popContainerSpecsFromCheckpoint(l.k) + if err != nil { + return err + } + if err := validateSpecs(oldSpecs, l.containerSpecs); err != nil { + return err + } + // Since we have a new kernel we also must make a new watchdog. dogOpts := watchdog.DefaultOpts dogOpts.TaskTimeoutAction = l.root.conf.WatchdogAction @@ -328,6 +344,9 @@ func (l *Loader) save(o *control.SaveOpts) (err error) { } o.Metadata["container_count"] = strconv.Itoa(l.containerCount()) + // Save container specs. + l.addContainerSpecsToCheckpoint() + if err := preSaveImpl(l, o); err != nil { return err }