From ef7b46a13ffc051b3934e4966217ac29aebe82ad Mon Sep 17 00:00:00 2001 From: Saied Kazemi Date: Thu, 5 Feb 2015 20:32:27 -0800 Subject: [PATCH 01/13] Checkpoint/Restore Support: add exec driver methods Methods for checkpointing and restoring containers were added to the native driver. The LXC driver returns an error message that these methods are not implemented yet. Signed-off-by: Saied Kazemi Conflicts: daemon/execdriver/native/create.go daemon/execdriver/native/driver.go daemon/execdriver/native/init.go --- daemon/execdriver/driver.go | 5 + daemon/execdriver/lxc/driver.go | 9 +- daemon/execdriver/native/create.go | 19 ++++ daemon/execdriver/native/driver.go | 150 +++++++++++++++++++++++++++++ 4 files changed, 182 insertions(+), 1 deletion(-) diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index 6a9049ba98da0..3b0f0f36fd2e5 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -28,6 +28,7 @@ var ( // It's used by 'Run' and 'Exec', does some work in parent process // after child process is started. type StartCallback func(*ProcessConfig, int) +type RestoreCallback func(*ProcessConfig, int) // Info is driver specific information based on // processes registered with the driver @@ -71,6 +72,10 @@ type Driver interface { // Unpause unpauses a container. Unpause(c *Command) error + Checkpoint(c *Command) error + + Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback) (int, error) + // Name returns the name of the driver. Name() string diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index 27da7c8c24db8..f8b0511ae0e73 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -560,7 +560,14 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return err } -// Terminate implements the exec driver Driver interface. +func (d *driver) Checkpoint(c *execdriver.Command) error { + return fmt.Errorf("Checkpointing lxc containers not supported yet\n") +} + +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { + return 0, fmt.Errorf("Restoring lxc containers not supported yet\n") +} + func (d *Driver) Terminate(c *execdriver.Command) error { return killLxc(c.ID, 9) } diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 85f72f8c2cc83..8759135b9c1da 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -4,6 +4,7 @@ package native import ( "errors" + "encoding/json" "fmt" "net" "strings" @@ -113,6 +114,24 @@ func generateIfaceName() (string, error) { return "", errors.New("Failed to find name for new interface") } +// Re-create the container type from the image that was saved during checkpoint. +func (d *driver) createRestoreContainer(c *execdriver.Command, imageDir string) (*libcontainer.Config, error) { + // Read the container.json. + f1, err := os.Open(filepath.Join(imageDir, "container.json")) + if err != nil { + return nil, err + } + defer f1.Close() + + var container *libcontainer.Config + err = json.NewDecoder(f1).Decode(&container) + if err != nil { + return nil, err + } + + return container, nil +} + func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command) error { if c.Network == nil { return nil diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index b241bdbc504c8..3bd3d5d610bd5 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -20,6 +20,7 @@ import ( "github.com/docker/docker/pkg/reexec" sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" + "github.com/docker/docker/utils" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" @@ -298,6 +299,155 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return active.Resume() } +// XXX Where is the right place for the following +// const and getCheckpointImageDir() function? +const ( + containersDir = "/var/lib/docker/containers" + criuImgDir = "criu_img" +) + +func getCheckpointImageDir(containerId string) string { + return filepath.Join(containersDir, containerId, criuImgDir) +} + +func (d *driver) Checkpoint(c *execdriver.Command) error { + active := d.activeContainers[c.ID] + if active == nil { + return fmt.Errorf("active container for %s does not exist", c.ID) + } + container := active.container + + // Create an image directory for this container (which + // may already exist from a previous checkpoint). + imageDir := getCheckpointImageDir(c.ID) + err := os.MkdirAll(imageDir, 0700) + if err != nil && !os.IsExist(err) { + return err + } + + // Copy container.json and state.json files to the CRIU + // image directory for later use during restore. Do this + // before checkpointing because after checkpoint the container + // will exit and these files will be removed. + log.CRDbg("saving container.json and state.json before calling CRIU in %s", imageDir) + srcFiles := []string{"container.json", "state.json"} + for _, f := range srcFiles { + srcFile := filepath.Join(d.root, c.ID, f) + dstFile := filepath.Join(imageDir, f) + if _, err := utils.CopyFile(srcFile, dstFile); err != nil { + return err + } + } + + d.Lock() + defer d.Unlock() + err = namespaces.Checkpoint(container, imageDir, c.ProcessConfig.Process.Pid) + if err != nil { + return err + } + + return nil +} + +type restoreOutput struct { + exitCode int + err error +} + +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { + imageDir := getCheckpointImageDir(c.ID) + container, err := d.createRestoreContainer(c, imageDir) + if err != nil { + return 1, err + } + + var term execdriver.Terminal + + if c.ProcessConfig.Tty { + term, err = NewTtyConsole(&c.ProcessConfig, pipes) + } else { + term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes) + } + if err != nil { + return -1, err + } + c.ProcessConfig.Terminal = term + + d.Lock() + d.activeContainers[c.ID] = &activeContainer{ + container: container, + cmd: &c.ProcessConfig.Cmd, + } + d.Unlock() + defer d.cleanContainer(c.ID) + + // Since the CRIU binary exits after restoring the container, we + // need to reap its child by setting PR_SET_CHILD_SUBREAPER (36) + // so that it'll be owned by this process (Docker daemon) after restore. + // + // XXX This really belongs to where the Docker daemon starts. + if _, _, syserr := syscall.RawSyscall(syscall.SYS_PRCTL, 36, 1, 0); syserr != 0 { + return -1, fmt.Errorf("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)", syserr) + } + + restoreOutputChan := make(chan restoreOutput, 1) + waitForRestore := make(chan struct{}) + + go func() { + exitCode, err := namespaces.Restore(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, filepath.Join(d.root, c.ID), imageDir, + func(child *os.File, args []string) *exec.Cmd { + cmd := new(exec.Cmd) + cmd.Path = d.initPath + cmd.Args = append([]string{ + DriverName, + "-restore", + "-pipe", "3", + "--", + }, args...) + cmd.ExtraFiles = []*os.File{child} + return cmd + }, + func(restorePid int) error { + log.CRDbg("restorePid=%d", restorePid) + if restorePid == 0 { + restoreCallback(&c.ProcessConfig, 0) + return nil + } + + // The container.json file should be written *after* the container + // has started because its StdFds cannot be initialized before. + // + // XXX How do we handle error here? + d.writeContainerFile(container, c.ID) + close(waitForRestore) + if restoreCallback != nil { + c.ProcessConfig.Process, err = os.FindProcess(restorePid) + if err != nil { + log.Debugf("cannot find restored process %d", restorePid) + return err + } + c.ContainerPid = c.ProcessConfig.Process.Pid + restoreCallback(&c.ProcessConfig, c.ContainerPid) + } + return nil + }) + restoreOutputChan <- restoreOutput{exitCode, err} + }() + + select { + case restoreOutput := <-restoreOutputChan: + // there was an error + return restoreOutput.exitCode, restoreOutput.err + case <-waitForRestore: + // container restored + break + } + + // Wait for the container to exit. + restoreOutput := <-restoreOutputChan + return restoreOutput.exitCode, restoreOutput.err +} + // Terminate implements the exec driver Driver interface. func (d *Driver) Terminate(c *execdriver.Command) error { defer d.cleanContainer(c.ID) From 9378acf1d65f14f4ef849f29373225d69f112717 Mon Sep 17 00:00:00 2001 From: boucher Date: Mon, 25 May 2015 08:32:58 -0700 Subject: [PATCH 02/13] Update checkpoint/restore support to match docker/master Docker-DCO-1.1-Signed-off-by: Ross Boucher (github: boucher) --- daemon/execdriver/driver.go | 4 +- daemon/execdriver/lxc/driver.go | 6 +- daemon/execdriver/native/create.go | 1 - daemon/execdriver/native/driver.go | 182 ++++++++++------------------- 4 files changed, 69 insertions(+), 124 deletions(-) diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index 3b0f0f36fd2e5..755f27d67f19b 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -72,9 +72,9 @@ type Driver interface { // Unpause unpauses a container. Unpause(c *Command) error - Checkpoint(c *Command) error + Checkpoint(c *Command, opts *libcontainer.CriuOpts) error - Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback) (int, error) + Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (ExitStatus, error) // Name returns the name of the driver. Name() string diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index f8b0511ae0e73..26866b7efd15f 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -560,12 +560,12 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return err } -func (d *driver) Checkpoint(c *execdriver.Command) error { +func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) error { return fmt.Errorf("Checkpointing lxc containers not supported yet\n") } -func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { - return 0, fmt.Errorf("Restoring lxc containers not supported yet\n") +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (execdriver.ExitStatus, error) { + return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Restoring lxc containers not supported yet\n") } func (d *Driver) Terminate(c *execdriver.Command) error { diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 8759135b9c1da..95c2fd03eb5fc 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -4,7 +4,6 @@ package native import ( "errors" - "encoding/json" "fmt" "net" "strings" diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index 3bd3d5d610bd5..90924230538ab 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -299,49 +299,15 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return active.Resume() } -// XXX Where is the right place for the following -// const and getCheckpointImageDir() function? -const ( - containersDir = "/var/lib/docker/containers" - criuImgDir = "criu_img" -) - -func getCheckpointImageDir(containerId string) string { - return filepath.Join(containersDir, containerId, criuImgDir) -} - -func (d *driver) Checkpoint(c *execdriver.Command) error { +func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) error { active := d.activeContainers[c.ID] if active == nil { return fmt.Errorf("active container for %s does not exist", c.ID) } - container := active.container - - // Create an image directory for this container (which - // may already exist from a previous checkpoint). - imageDir := getCheckpointImageDir(c.ID) - err := os.MkdirAll(imageDir, 0700) - if err != nil && !os.IsExist(err) { - return err - } - - // Copy container.json and state.json files to the CRIU - // image directory for later use during restore. Do this - // before checkpointing because after checkpoint the container - // will exit and these files will be removed. - log.CRDbg("saving container.json and state.json before calling CRIU in %s", imageDir) - srcFiles := []string{"container.json", "state.json"} - for _, f := range srcFiles { - srcFile := filepath.Join(d.root, c.ID, f) - dstFile := filepath.Join(imageDir, f) - if _, err := utils.CopyFile(srcFile, dstFile); err != nil { - return err - } - } d.Lock() defer d.Unlock() - err = namespaces.Checkpoint(container, imageDir, c.ProcessConfig.Process.Pid) + err := active.Checkpoint(opts) if err != nil { return err } @@ -349,103 +315,83 @@ func (d *driver) Checkpoint(c *execdriver.Command) error { return nil } -type restoreOutput struct { - exitCode int - err error -} +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (execdriver.ExitStatus, error) { + var ( + cont libcontainer.Container + err error + ) -func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { - imageDir := getCheckpointImageDir(c.ID) - container, err := d.createRestoreContainer(c, imageDir) + cont, err = d.factory.Load(c.ID) if err != nil { - return 1, err + if forceRestore { + var config *configs.Config + config, err = d.createContainer(c) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + cont, err = d.factory.Create(c.ID, config) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + } else { + return execdriver.ExitStatus{ExitCode: -1}, err + } } - var term execdriver.Terminal - - if c.ProcessConfig.Tty { - term, err = NewTtyConsole(&c.ProcessConfig, pipes) - } else { - term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes) + p := &libcontainer.Process{ + Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...), + Env: c.ProcessConfig.Env, + Cwd: c.WorkingDir, + User: c.ProcessConfig.User, } - if err != nil { - return -1, err + + config := cont.Config() + if err := setupPipes(&config, &c.ProcessConfig, p, pipes); err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err } - c.ProcessConfig.Terminal = term d.Lock() - d.activeContainers[c.ID] = &activeContainer{ - container: container, - cmd: &c.ProcessConfig.Cmd, - } + d.activeContainers[c.ID] = cont d.Unlock() - defer d.cleanContainer(c.ID) + defer func() { + cont.Destroy() + d.cleanContainer(c.ID) + }() - // Since the CRIU binary exits after restoring the container, we - // need to reap its child by setting PR_SET_CHILD_SUBREAPER (36) - // so that it'll be owned by this process (Docker daemon) after restore. - // - // XXX This really belongs to where the Docker daemon starts. - if _, _, syserr := syscall.RawSyscall(syscall.SYS_PRCTL, 36, 1, 0); syserr != 0 { - return -1, fmt.Errorf("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)", syserr) + if err := cont.Restore(p, opts); err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err } - restoreOutputChan := make(chan restoreOutput, 1) - waitForRestore := make(chan struct{}) - - go func() { - exitCode, err := namespaces.Restore(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, filepath.Join(d.root, c.ID), imageDir, - func(child *os.File, args []string) *exec.Cmd { - cmd := new(exec.Cmd) - cmd.Path = d.initPath - cmd.Args = append([]string{ - DriverName, - "-restore", - "-pipe", "3", - "--", - }, args...) - cmd.ExtraFiles = []*os.File{child} - return cmd - }, - func(restorePid int) error { - log.CRDbg("restorePid=%d", restorePid) - if restorePid == 0 { - restoreCallback(&c.ProcessConfig, 0) - return nil - } - - // The container.json file should be written *after* the container - // has started because its StdFds cannot be initialized before. - // - // XXX How do we handle error here? - d.writeContainerFile(container, c.ID) - close(waitForRestore) - if restoreCallback != nil { - c.ProcessConfig.Process, err = os.FindProcess(restorePid) - if err != nil { - log.Debugf("cannot find restored process %d", restorePid) - return err - } - c.ContainerPid = c.ProcessConfig.Process.Pid - restoreCallback(&c.ProcessConfig, c.ContainerPid) - } - return nil - }) - restoreOutputChan <- restoreOutput{exitCode, err} - }() + // FIXME: no idea if any of this is needed... + if restoreCallback != nil { + pid, err := p.Pid() + if err != nil { + p.Signal(os.Kill) + p.Wait() + return execdriver.ExitStatus{ExitCode: -1}, err + } + restoreCallback(&c.ProcessConfig, pid) + } - select { - case restoreOutput := <-restoreOutputChan: - // there was an error - return restoreOutput.exitCode, restoreOutput.err - case <-waitForRestore: - // container restored - break + oom := notifyOnOOM(cont) + waitF := p.Wait + if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) { + // we need such hack for tracking processes with inherited fds, + // because cmd.Wait() waiting for all streams to be copied + waitF = waitInPIDHost(p, cont) + } + ps, err := waitF() + if err != nil { + execErr, ok := err.(*exec.ExitError) + if !ok { + return execdriver.ExitStatus{ExitCode: -1}, err + } + ps = execErr.ProcessState } - // Wait for the container to exit. - restoreOutput := <-restoreOutputChan - return restoreOutput.exitCode, restoreOutput.err + cont.Destroy() + _, oomKill := <-oom + return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil } // Terminate implements the exec driver Driver interface. From 6baca3887afa59966c70273efa9e8f24229cd2f1 Mon Sep 17 00:00:00 2001 From: Saied Kazemi Date: Thu, 5 Feb 2015 20:37:07 -0800 Subject: [PATCH 03/13] Checkpoint/Restore Support: add functionality to daemon Support was added to the daemon to use the Checkpoint and Restore methods of the native exec driver for checkpointing and restoring containers. Signed-off-by: Saied Kazemi Conflicts: api/server/server.go daemon/container.go daemon/daemon.go daemon/networkdriver/bridge/driver.go daemon/state.go vendor/src/github.com/docker/libnetwork/ipallocator/allocator.go --- api/server/server.go | 77 ++++++++++++++++++++++++++++------------ daemon/checkpoint.go | 55 ++++++++++++++++++++++++++++ daemon/container.go | 66 ++++++++++++++++++++++++++++++++-- daemon/container_unix.go | 47 ++++++++++++++++++++++++ daemon/daemon.go | 31 ++++++++++++++++ daemon/monitor.go | 70 ++++++++++++++++++++++++++++++++++++ daemon/state.go | 23 ++++++++++++ 7 files changed, 345 insertions(+), 24 deletions(-) create mode 100644 daemon/checkpoint.go diff --git a/api/server/server.go b/api/server/server.go index 66ba4e0842770..6b3b87176e781 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -215,10 +215,41 @@ func httpError(w http.ResponseWriter, err error) { // json encoding. func writeJSON(w http.ResponseWriter, code int, v interface{}) error { w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) return json.NewEncoder(w).Encode(v) } +func postContainersCheckpoint(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := parseForm(r); err != nil { + return err + } + job := eng.Job("checkpoint", vars["name"]) + if err := job.Run(); err != nil { + return err + } + w.WriteHeader(http.StatusNoContent) + return nil +} + +func postContainersRestore(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := parseForm(r); err != nil { + return err + } + job := eng.Job("restore", vars["name"]) + if err := job.Run(); err != nil { + return err + } + w.WriteHeader(http.StatusNoContent) + return nil +} + func (s *Server) optionsHandler(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { w.WriteHeader(http.StatusOK) return nil @@ -330,28 +361,30 @@ func createRouter(s *Server) *mux.Router { "/containers/{name:.*}/archive": s.getContainersArchive, }, "POST": { - "/auth": s.postAuth, - "/commit": s.postCommit, - "/build": s.postBuild, - "/images/create": s.postImagesCreate, - "/images/load": s.postImagesLoad, - "/images/{name:.*}/push": s.postImagesPush, - "/images/{name:.*}/tag": s.postImagesTag, - "/containers/create": s.postContainersCreate, - "/containers/{name:.*}/kill": s.postContainersKill, - "/containers/{name:.*}/pause": s.postContainersPause, - "/containers/{name:.*}/unpause": s.postContainersUnpause, - "/containers/{name:.*}/restart": s.postContainersRestart, - "/containers/{name:.*}/start": s.postContainersStart, - "/containers/{name:.*}/stop": s.postContainersStop, - "/containers/{name:.*}/wait": s.postContainersWait, - "/containers/{name:.*}/resize": s.postContainersResize, - "/containers/{name:.*}/attach": s.postContainersAttach, - "/containers/{name:.*}/copy": s.postContainersCopy, - "/containers/{name:.*}/exec": s.postContainerExecCreate, - "/exec/{name:.*}/start": s.postContainerExecStart, - "/exec/{name:.*}/resize": s.postContainerExecResize, - "/containers/{name:.*}/rename": s.postContainerRename, + "/auth": s.postAuth, + "/commit": s.postCommit, + "/build": s.postBuild, + "/images/create": s.postImagesCreate, + "/images/load": s.postImagesLoad, + "/images/{name:.*}/push": s.postImagesPush, + "/images/{name:.*}/tag": s.postImagesTag, + "/containers/create": s.postContainersCreate, + "/containers/{name:.*}/kill": s.postContainersKill, + "/containers/{name:.*}/pause": s.postContainersPause, + "/containers/{name:.*}/unpause": s.postContainersUnpause, + "/containers/{name:.*}/restart": s.postContainersRestart, + "/containers/{name:.*}/start": s.postContainersStart, + "/containers/{name:.*}/stop": s.postContainersStop, + "/containers/{name:.*}/wait": s.postContainersWait, + "/containers/{name:.*}/resize": s.postContainersResize, + "/containers/{name:.*}/attach": s.postContainersAttach, + "/containers/{name:.*}/copy": s.postContainersCopy, + "/containers/{name:.*}/exec": s.postContainerExecCreate, + "/exec/{name:.*}/start": s.postContainerExecStart, + "/exec/{name:.*}/resize": s.postContainerExecResize, + "/containers/{name:.*}/rename": s.postContainerRename, + "/containers/{name:.*}/checkpoint": s.postContainersCheckpoint, + "/containers/{name:.*}/restore": s.postContainersRestore, }, "PUT": { "/containers/{name:.*}/archive": s.putContainersArchive, diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go new file mode 100644 index 0000000000000..f6057c6a028f9 --- /dev/null +++ b/daemon/checkpoint.go @@ -0,0 +1,55 @@ +package daemon + +import ( + "github.com/docker/docker/engine" +) + +// Checkpoint a running container. +func (daemon *Daemon) ContainerCheckpoint(job *engine.Job) engine.Status { + if len(job.Args) != 1 { + return job.Errorf("Usage: %s CONTAINER\n", job.Name) + } + + name := job.Args[0] + container, err := daemon.Get(name) + if err != nil { + return job.Error(err) + } + if !container.IsRunning() { + return job.Errorf("Container %s not running", name) + } + + if err := container.Checkpoint(); err != nil { + return job.Errorf("Cannot checkpoint container %s: %s", name, err) + } + + container.LogEvent("checkpoint") + return engine.StatusOK +} + +// Restore a checkpointed container. +func (daemon *Daemon) ContainerRestore(job *engine.Job) engine.Status { + if len(job.Args) != 1 { + return job.Errorf("Usage: %s CONTAINER\n", job.Name) + } + + name := job.Args[0] + container, err := daemon.Get(name) + if err != nil { + return job.Error(err) + } + if container.IsRunning() { + return job.Errorf("Container %s already running", name) + } + if !container.State.IsCheckpointed() { + return job.Errorf("Container %s is not checkpointed", name) + } + + if err := container.Restore(); err != nil { + container.LogEvent("die") + return job.Errorf("Cannot restore container %s: %s", name, err) + } + + container.LogEvent("restore") + return engine.StatusOK +} diff --git a/daemon/container.go b/daemon/container.go index f56b8cbe99607..4bdc991a4fc64 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -340,10 +340,15 @@ func (container *Container) isNetworkAllocated() bool { return container.NetworkSettings.IPAddress != "" } + // cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. func (container *Container) cleanup() { - container.ReleaseNetwork() + if container.IsCheckpointed() { + log.CRDbg("not calling ReleaseNetwork() for checkpointed container %s", container.ID) + } else { + container.ReleaseNetwork() + } if err := container.CleanupStorage(); err != nil { logrus.Errorf("%v: Failed to cleanup storage: %v", container.ID, err) @@ -675,6 +680,41 @@ func (container *Container) Copy(resource string) (rc io.ReadCloser, err error) return reader, nil } +func (container *Container) Checkpoint() error { + return container.daemon.Checkpoint(container) +} + +func (container *Container) Restore() error { + var err error + + container.Lock() + defer container.Unlock() + + defer func() { + if err != nil { + container.cleanup() + } + }() + + if err = container.initializeNetworking(); err != nil { + return err + } + + linkedEnv, err := container.setupLinkedContainers() + if err != nil { + return err + } + if err = container.setupWorkingDirectory(); err != nil { + return err + } + env := container.createDaemonEnvironment(linkedEnv) + if err = populateCommandRestore(container, env); err != nil { + return err + } + + return container.waitForRestore() +} + // Returns true if the container exposes a certain port func (container *Container) Exposes(p nat.Port) bool { _, exists := container.Config.ExposedPorts[p] @@ -764,6 +804,29 @@ func (container *Container) waitForStart() error { return nil } +// Like waitForStart() but for restoring a container. +// +// XXX Does RestartPolicy apply here? +func (container *Container) waitForRestore() error { + container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy) + + // After calling promise.Go() we'll have two goroutines: + // - The current goroutine that will block in the select + // below until restore is done. + // - A new goroutine that will restore the container and + // wait for it to exit. + select { + case <-container.monitor.restoreSignal: + if container.ExitCode != 0 { + return fmt.Errorf("restore process failed") + } + case err := <-promise.Go(container.monitor.Restore): + return err + } + + return nil +} + func (container *Container) GetProcessLabel() string { // even if we have a process label return "" if we are running // in privileged mode @@ -970,7 +1033,6 @@ func attach(streamConfig *StreamConfig, openStdin, stdinOnce, tty bool, stdin io _, err = copyEscapable(cStdin, stdin) } else { _, err = io.Copy(cStdin, stdin) - } if err == io.ErrClosedPipe { err = nil diff --git a/daemon/container_unix.go b/daemon/container_unix.go index b52f14c5de23d..43617fe886a28 100644 --- a/daemon/container_unix.go +++ b/daemon/container_unix.go @@ -322,6 +322,53 @@ func mergeDevices(defaultDevices, userDevices []*configs.Device) []*configs.Devi return append(devs, userDevices...) } +// Like populateCommand() but for restoring a container. +// +// XXX populateCommand() does a lot more. Not sure if we have +// to do everything it does. +func populateCommandRestore(c *Container, env []string) error { + resources := &execdriver.Resources{ + Memory: c.Config.Memory, + MemorySwap: c.Config.MemorySwap, + CpuShares: c.Config.CpuShares, + Cpuset: c.Config.Cpuset, + } + + processConfig := execdriver.ProcessConfig{ + Privileged: c.hostConfig.Privileged, + Entrypoint: c.Path, + Arguments: c.Args, + Tty: c.Config.Tty, + User: c.Config.User, + } + + processConfig.SysProcAttr = &syscall.SysProcAttr{Setsid: true} + processConfig.Env = env + + c.command = &execdriver.Command{ + ID: c.ID, + Rootfs: c.RootfsPath(), + ReadonlyRootfs: c.hostConfig.ReadonlyRootfs, + InitPath: "/.dockerinit", + WorkingDir: c.Config.WorkingDir, + // Network: en, + // Ipc: ipc, + // Pid: pid, + Resources: resources, + // AllowedDevices: allowedDevices, + // AutoCreatedDevices: autoCreatedDevices, + CapAdd: c.hostConfig.CapAdd, + CapDrop: c.hostConfig.CapDrop, + ProcessConfig: processConfig, + ProcessLabel: c.GetProcessLabel(), + MountLabel: c.GetMountLabel(), + // LxcConfig: lxcConfig, + AppArmorProfile: c.AppArmorProfile, + } + + return nil +} + // GetSize, return real size, virtual size func (container *Container) GetSize() (int64, int64) { var ( diff --git a/daemon/daemon.go b/daemon/daemon.go index 4d6d43dc31915..34a93aefd44e0 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -274,6 +274,18 @@ func (daemon *Daemon) restore() error { logrus.Debugf("Loaded container %v", container.ID) containers[container.ID] = &cr{container: container} + + // If the container was checkpointed, we need to reserve + // the IP address that it was using. + // + // XXX We should also reserve host ports (if any). + if container.IsCheckpointed() { + /*err = bridge.ReserveIP(container.ID, container.NetworkSettings.IPAddress) + if err != nil { + log.Errorf("Failed to reserve IP %s for container %s", + container.ID, container.NetworkSettings.IPAddress) + }*/ + } } else { logrus.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID) } @@ -814,6 +826,25 @@ func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback e return daemon.execDriver.Run(c.command, pipes, startCallback) } +func (daemon *Daemon) Checkpoint(c *Container) error { + if err := daemon.execDriver.Checkpoint(c.command); err != nil { + return err + } + c.SetCheckpointed() + return nil +} + +func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { + // Mount the container's filesystem (daemon/graphdriver/aufs/aufs.go). + _, err := daemon.driver.Get(c.ID, c.GetMountLabel()) + if err != nil { + return 0, err + } + + exitCode, err := daemon.execDriver.Restore(c.command, pipes, restoreCallback) + return exitCode, err +} + func (daemon *Daemon) Kill(c *Container, sig int) error { return daemon.execDriver.Kill(c.command, sig) } diff --git a/daemon/monitor.go b/daemon/monitor.go index 1f020574b0523..a2c90f779961f 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -47,6 +47,9 @@ type containerMonitor struct { // left waiting for nothing to happen during this time stopChan chan struct{} + // like startSignal but for restoring a container + restoreSignal chan struct{} + // timeIncrement is the amount of time to wait between restarts // this is in milliseconds timeIncrement int @@ -64,6 +67,7 @@ func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) * timeIncrement: defaultTimeIncrement, stopChan: make(chan struct{}), startSignal: make(chan struct{}), + restoreSignal: make(chan struct{}), } } @@ -184,6 +188,49 @@ func (m *containerMonitor) Start() error { } } +// Like Start() but for restoring a container. +func (m *containerMonitor) Restore() error { + var ( + err error + // XXX The following line should be changed to + // exitStatus execdriver.ExitStatus to match Start() + exitCode int + afterRestore bool + ) + + defer func() { + if afterRestore { + m.container.Lock() + m.container.setStopped(&execdriver.ExitStatus{exitCode, false}) + defer m.container.Unlock() + } + m.Close() + }() + + if err := m.container.startLoggingToDisk(); err != nil { + m.resetContainer(false) + return err + } + + pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) + + m.container.LogEvent("restore") + m.lastStartTime = time.Now() + if exitCode, err = m.container.daemon.Restore(m.container, pipes, m.restoreCallback); err != nil { + log.Errorf("Error restoring container: %s, exitCode=%d", err, exitCode) + m.container.ExitCode = -1 + m.resetContainer(false) + return err + } + afterRestore = true + + m.container.ExitCode = exitCode + m.resetMonitor(err == nil && exitCode == 0) + m.container.LogEvent("die") + m.resetContainer(true) + return err +} + // resetMonitor resets the stateful fields on the containerMonitor based on the // previous runs success or failure. Regardless of success, if the container had // an execution time of more than 10s then reset the timer back to the default @@ -270,6 +317,29 @@ func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid } } +// Like callback() but for restoring a container. +func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConfig, restorePid int) { + // If restorePid is 0, it means that restore failed. + if restorePid != 0 { + m.container.setRunning(restorePid) + } + + // Unblock the goroutine waiting in waitForRestore(). + select { + case <-m.restoreSignal: + default: + close(m.restoreSignal) + } + + if restorePid != 0 { + // Write config.json and hostconfig.json files + // to /var/lib/docker/containers/. + if err := m.container.ToDisk(); err != nil { + log.Debugf("%s", err) + } + } +} + // resetContainer resets the container's IO and ensures that the command is able to be executed again // by copying the data into a new struct // if lock is true, then container locked during reset diff --git a/daemon/state.go b/daemon/state.go index 861671d7adf42..f1292b4d1d183 100644 --- a/daemon/state.go +++ b/daemon/state.go @@ -14,6 +14,7 @@ type State struct { Running bool Paused bool Restarting bool + Checkpointed bool OOMKilled bool removalInProgress bool // Not need for this to be persistent on disk. Dead bool @@ -22,7 +23,9 @@ type State struct { Error string // contains last known error when starting the container StartedAt time.Time FinishedAt time.Time + CheckpointedAt time.Time waitChan chan struct{} + } func NewState() *State { @@ -42,6 +45,8 @@ func (s *State) String() string { } return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) + } else if s.Checkpointed { + return fmt.Sprintf("Checkpointed %s ago", units.HumanDuration(time.Now().UTC().Sub(s.CheckpointedAt))) } if s.removalInProgress { @@ -178,6 +183,7 @@ func (s *State) setRunning(pid int) { s.Error = "" s.Running = true s.Paused = false + s.Checkpointed = false s.Restarting = false s.ExitCode = 0 s.Pid = pid @@ -274,3 +280,20 @@ func (s *State) SetDead() { s.Dead = true s.Unlock() } + +func (s *State) SetCheckpointed() { + s.Lock() + s.CheckpointedAt = time.Now().UTC() + s.Checkpointed = true + s.Running = false + s.Paused = false + s.Restarting = false + // XXX Not sure if we need to close and recreate waitChan. + // close(s.waitChan) + // s.waitChan = make(chan struct{}) + s.Unlock() +} + +func (s *State) IsCheckpointed() bool { + return s.Checkpointed +} From 6e0a6978a208dd903f088ede03aba31d8c1fba52 Mon Sep 17 00:00:00 2001 From: Hui Kang Date: Tue, 19 May 2015 21:08:04 +0000 Subject: [PATCH 04/13] Release the network resource during checkpoint Restore failed if network resource not released during checkpoint, e.g., a container with port open with -p Signed-off-by: Hui Kang Conflicts: daemon/container.go --- daemon/container.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/daemon/container.go b/daemon/container.go index 4bdc991a4fc64..a9f239907b488 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -609,6 +609,18 @@ func validateID(id string) error { return nil } +func (container *Container) Checkpoint(opts *libcontainer.CriuOpts) error { + if err := container.daemon.Checkpoint(container, opts); err != nil { + return err + } + + if opts.LeaveRunning == false { + container.ReleaseNetwork() + } + return nil +} + + func (container *Container) Copy(resource string) (rc io.ReadCloser, err error) { container.Lock() From 5ec9d95f0d16cfba1358b0e70009a96d11a51662 Mon Sep 17 00:00:00 2001 From: boucher Date: Mon, 1 Jun 2015 15:15:02 -0700 Subject: [PATCH 05/13] Update daemon and cli support for checkpoint and restore. Docker-DCO-1.1-Signed-off-by: Ross Boucher (github: boucher) --- api/client/checkpoint.go | 52 +++++++++++++++ api/client/restore.go | 54 ++++++++++++++++ api/server/server.go | 24 +++++-- daemon/checkpoint.go | 57 ++++++++--------- daemon/container.go | 94 +++++++++++++++------------- daemon/container_unix.go | 71 +++++---------------- daemon/container_windows.go | 2 +- daemon/daemon.go | 12 ++-- daemon/execdriver/driver.go | 7 ++- daemon/execdriver/lxc/driver.go | 5 +- daemon/execdriver/native/driver.go | 21 +++++-- daemon/execdriver/windows/windows.go | 9 +++ daemon/monitor.go | 26 ++++---- daemon/state.go | 21 +++++-- docker/flags.go | 2 + runconfig/restore.go | 15 +++++ 16 files changed, 305 insertions(+), 167 deletions(-) create mode 100644 api/client/checkpoint.go create mode 100644 api/client/restore.go create mode 100644 runconfig/restore.go diff --git a/api/client/checkpoint.go b/api/client/checkpoint.go new file mode 100644 index 0000000000000..8c681bcf9716f --- /dev/null +++ b/api/client/checkpoint.go @@ -0,0 +1,52 @@ +package client + +import ( + "fmt" + + flag "github.com/docker/docker/pkg/mflag" + "github.com/docker/docker/runconfig" +) + +func (cli *DockerCli) CmdCheckpoint(args ...string) error { + cmd := cli.Subcmd("checkpoint", []string{"CONTAINER [CONTAINER...]"}, "Checkpoint one or more running containers", true) + cmd.Require(flag.Min, 1) + + var ( + flImgDir = cmd.String([]string{"-image-dir"}, "", "directory for storing checkpoint image files") + flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for storing log file") + flLeaveRunning = cmd.Bool([]string{"-leave-running"}, false, "leave the container running after checkpoint") + flCheckTcp = cmd.Bool([]string{"-allow-tcp"}, false, "allow checkpointing tcp connections") + flExtUnix = cmd.Bool([]string{"-allow-ext-unix"}, false, "allow checkpointing external unix connections") + flShell = cmd.Bool([]string{"-allow-shell"}, false, "allow checkpointing shell jobs") + ) + + if err := cmd.ParseFlags(args, true); err != nil { + return err + } + + if cmd.NArg() < 1 { + cmd.Usage() + return nil + } + + criuOpts := &runconfig.CriuConfig{ + ImagesDirectory: *flImgDir, + WorkDirectory: *flWorkDir, + LeaveRunning: *flLeaveRunning, + TcpEstablished: *flCheckTcp, + ExternalUnixConnections: *flExtUnix, + ShellJob: *flShell, + } + + var encounteredError error + for _, name := range cmd.Args() { + _, _, err := readBody(cli.call("POST", "/containers/"+name+"/checkpoint", criuOpts, nil)) + if err != nil { + fmt.Fprintf(cli.err, "%s\n", err) + encounteredError = fmt.Errorf("Error: failed to checkpoint one or more containers") + } else { + fmt.Fprintf(cli.out, "%s\n", name) + } + } + return encounteredError +} diff --git a/api/client/restore.go b/api/client/restore.go new file mode 100644 index 0000000000000..0c4085fbbbd84 --- /dev/null +++ b/api/client/restore.go @@ -0,0 +1,54 @@ +package client + +import ( + "fmt" + + flag "github.com/docker/docker/pkg/mflag" + "github.com/docker/docker/runconfig" +) + +func (cli *DockerCli) CmdRestore(args ...string) error { + cmd := cli.Subcmd("restore", []string{"CONTAINER [CONTAINER...]"}, "Restore one or more checkpointed containers", true) + cmd.Require(flag.Min, 1) + + var ( + flImgDir = cmd.String([]string{"-image-dir"}, "", "directory to restore image files from") + flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for restore log") + flCheckTcp = cmd.Bool([]string{"-allow-tcp"}, false, "allow restoring tcp connections") + flExtUnix = cmd.Bool([]string{"-allow-ext-unix"}, false, "allow restoring external unix connections") + flShell = cmd.Bool([]string{"-allow-shell"}, false, "allow restoring shell jobs") + flForce = cmd.Bool([]string{"-force"}, false, "bypass checks for current container state") + ) + + if err := cmd.ParseFlags(args, true); err != nil { + return err + } + + if cmd.NArg() < 1 { + cmd.Usage() + return nil + } + + restoreOpts := &runconfig.RestoreConfig{ + CriuOpts: runconfig.CriuConfig{ + ImagesDirectory: *flImgDir, + WorkDirectory: *flWorkDir, + TcpEstablished: *flCheckTcp, + ExternalUnixConnections: *flExtUnix, + ShellJob: *flShell, + }, + ForceRestore: *flForce, + } + + var encounteredError error + for _, name := range cmd.Args() { + _, _, err := readBody(cli.call("POST", "/containers/"+name+"/restore", restoreOpts, nil)) + if err != nil { + fmt.Fprintf(cli.err, "%s\n", err) + encounteredError = fmt.Errorf("Error: failed to restore one or more containers") + } else { + fmt.Fprintf(cli.out, "%s\n", name) + } + } + return encounteredError +} diff --git a/api/server/server.go b/api/server/server.go index 6b3b87176e781..3cfa958f66fec 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -220,32 +220,44 @@ func writeJSON(w http.ResponseWriter, code int, v interface{}) error { return json.NewEncoder(w).Encode(v) } -func postContainersCheckpoint(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { +func (s *Server) postContainersCheckpoint(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { if vars == nil { return fmt.Errorf("Missing parameter") } if err := parseForm(r); err != nil { return err } - job := eng.Job("checkpoint", vars["name"]) - if err := job.Run(); err != nil { + + criuOpts := &runconfig.CriuConfig{} + if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil { return err } + + if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil { + return err + } + w.WriteHeader(http.StatusNoContent) return nil } -func postContainersRestore(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { +func (s *Server) postContainersRestore(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { if vars == nil { return fmt.Errorf("Missing parameter") } if err := parseForm(r); err != nil { return err } - job := eng.Job("restore", vars["name"]) - if err := job.Run(); err != nil { + + restoreOpts := runconfig.RestoreConfig{} + if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil { return err } + + if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil { + return err + } + w.WriteHeader(http.StatusNoContent) return nil } diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go index f6057c6a028f9..a39662cc0f325 100644 --- a/daemon/checkpoint.go +++ b/daemon/checkpoint.go @@ -1,55 +1,56 @@ package daemon import ( - "github.com/docker/docker/engine" + "fmt" + + "github.com/docker/docker/runconfig" ) // Checkpoint a running container. -func (daemon *Daemon) ContainerCheckpoint(job *engine.Job) engine.Status { - if len(job.Args) != 1 { - return job.Errorf("Usage: %s CONTAINER\n", job.Name) - } - - name := job.Args[0] +func (daemon *Daemon) ContainerCheckpoint(name string, opts *runconfig.CriuConfig) error { container, err := daemon.Get(name) if err != nil { - return job.Error(err) + return err } if !container.IsRunning() { - return job.Errorf("Container %s not running", name) + return fmt.Errorf("Container %s not running", name) } - - if err := container.Checkpoint(); err != nil { - return job.Errorf("Cannot checkpoint container %s: %s", name, err) + if err := container.Checkpoint(opts); err != nil { + return fmt.Errorf("Cannot checkpoint container %s: %s", name, err) } container.LogEvent("checkpoint") - return engine.StatusOK + return nil } // Restore a checkpointed container. -func (daemon *Daemon) ContainerRestore(job *engine.Job) engine.Status { - if len(job.Args) != 1 { - return job.Errorf("Usage: %s CONTAINER\n", job.Name) - } - - name := job.Args[0] +func (daemon *Daemon) ContainerRestore(name string, opts *runconfig.CriuConfig, forceRestore bool) error { container, err := daemon.Get(name) if err != nil { - return job.Error(err) - } - if container.IsRunning() { - return job.Errorf("Container %s already running", name) + return err } - if !container.State.IsCheckpointed() { - return job.Errorf("Container %s is not checkpointed", name) + + if !forceRestore { + // TODO: It's possible we only want to bypass the checkpointed check, + // I'm not sure how this will work if the container is already running + if container.IsRunning() { + return fmt.Errorf("Container %s already running", name) + } + + if !container.IsCheckpointed() { + return fmt.Errorf("Container %s is not checkpointed", name) + } + } else { + if !container.HasBeenCheckpointed() && opts.ImagesDirectory == "" { + return fmt.Errorf("You must specify an image directory to restore from %s", name) + } } - if err := container.Restore(); err != nil { + if err = container.Restore(opts, forceRestore); err != nil { container.LogEvent("die") - return job.Errorf("Cannot restore container %s: %s", name, err) + return fmt.Errorf("Cannot restore container %s: %s", name, err) } container.LogEvent("restore") - return engine.StatusOK + return nil } diff --git a/daemon/container.go b/daemon/container.go index a9f239907b488..eacace7bfd0b4 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -268,7 +268,7 @@ func (container *Container) Start() (err error) { // backwards API compatibility. container.hostConfig = runconfig.SetDefaultNetModeIfBlank(container.hostConfig) - if err := container.initializeNetworking(); err != nil { + if err := container.initializeNetworking(false); err != nil { return err } linkedEnv, err := container.setupLinkedContainers() @@ -340,12 +340,11 @@ func (container *Container) isNetworkAllocated() bool { return container.NetworkSettings.IPAddress != "" } - // cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. func (container *Container) cleanup() { if container.IsCheckpointed() { - log.CRDbg("not calling ReleaseNetwork() for checkpointed container %s", container.ID) + logrus.Debugf("not calling ReleaseNetwork() for checkpointed container %s", container.ID) } else { container.ReleaseNetwork() } @@ -609,7 +608,7 @@ func validateID(id string) error { return nil } -func (container *Container) Checkpoint(opts *libcontainer.CriuOpts) error { +func (container *Container) Checkpoint(opts *runconfig.CriuConfig) error { if err := container.daemon.Checkpoint(container, opts); err != nil { return err } @@ -620,6 +619,50 @@ func (container *Container) Checkpoint(opts *libcontainer.CriuOpts) error { return nil } +func (container *Container) Restore(opts *runconfig.CriuConfig, forceRestore bool) error { + var err error + container.Lock() + defer container.Unlock() + + defer func() { + if err != nil { + container.setError(err) + // if no one else has set it, make sure we don't leave it at zero + if container.ExitCode == 0 { + container.ExitCode = 128 + } + container.toDisk() + container.cleanup() + } + }() + + if err := container.Mount(); err != nil { + return err + } + if err = container.initializeNetworking(true); err != nil { + return err + } + linkedEnv, err := container.setupLinkedContainers() + if err != nil { + return err + } + if err = container.setupWorkingDirectory(); err != nil { + return err + } + + env := container.createDaemonEnvironment(linkedEnv) + if err = populateCommand(container, env); err != nil { + return err + } + + mounts, err := container.setupMounts() + if err != nil { + return err + } + + container.command.Mounts = mounts + return container.waitForRestore(opts, forceRestore) +} func (container *Container) Copy(resource string) (rc io.ReadCloser, err error) { container.Lock() @@ -692,41 +735,6 @@ func (container *Container) Copy(resource string) (rc io.ReadCloser, err error) return reader, nil } -func (container *Container) Checkpoint() error { - return container.daemon.Checkpoint(container) -} - -func (container *Container) Restore() error { - var err error - - container.Lock() - defer container.Unlock() - - defer func() { - if err != nil { - container.cleanup() - } - }() - - if err = container.initializeNetworking(); err != nil { - return err - } - - linkedEnv, err := container.setupLinkedContainers() - if err != nil { - return err - } - if err = container.setupWorkingDirectory(); err != nil { - return err - } - env := container.createDaemonEnvironment(linkedEnv) - if err = populateCommandRestore(container, env); err != nil { - return err - } - - return container.waitForRestore() -} - // Returns true if the container exposes a certain port func (container *Container) Exposes(p nat.Port) bool { _, exists := container.Config.ExposedPorts[p] @@ -816,10 +824,7 @@ func (container *Container) waitForStart() error { return nil } -// Like waitForStart() but for restoring a container. -// -// XXX Does RestartPolicy apply here? -func (container *Container) waitForRestore() error { +func (container *Container) waitForRestore(opts *runconfig.CriuConfig, forceRestore bool) error { container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy) // After calling promise.Go() we'll have two goroutines: @@ -832,7 +837,7 @@ func (container *Container) waitForRestore() error { if container.ExitCode != 0 { return fmt.Errorf("restore process failed") } - case err := <-promise.Go(container.monitor.Restore): + case err := <-promise.Go(func() error { return container.monitor.Restore(opts, forceRestore) }): return err } @@ -1045,6 +1050,7 @@ func attach(streamConfig *StreamConfig, openStdin, stdinOnce, tty bool, stdin io _, err = copyEscapable(cStdin, stdin) } else { _, err = io.Copy(cStdin, stdin) + } if err == io.ErrClosedPipe { err = nil diff --git a/daemon/container_unix.go b/daemon/container_unix.go index 43617fe886a28..68b3d126f5565 100644 --- a/daemon/container_unix.go +++ b/daemon/container_unix.go @@ -322,53 +322,6 @@ func mergeDevices(defaultDevices, userDevices []*configs.Device) []*configs.Devi return append(devs, userDevices...) } -// Like populateCommand() but for restoring a container. -// -// XXX populateCommand() does a lot more. Not sure if we have -// to do everything it does. -func populateCommandRestore(c *Container, env []string) error { - resources := &execdriver.Resources{ - Memory: c.Config.Memory, - MemorySwap: c.Config.MemorySwap, - CpuShares: c.Config.CpuShares, - Cpuset: c.Config.Cpuset, - } - - processConfig := execdriver.ProcessConfig{ - Privileged: c.hostConfig.Privileged, - Entrypoint: c.Path, - Arguments: c.Args, - Tty: c.Config.Tty, - User: c.Config.User, - } - - processConfig.SysProcAttr = &syscall.SysProcAttr{Setsid: true} - processConfig.Env = env - - c.command = &execdriver.Command{ - ID: c.ID, - Rootfs: c.RootfsPath(), - ReadonlyRootfs: c.hostConfig.ReadonlyRootfs, - InitPath: "/.dockerinit", - WorkingDir: c.Config.WorkingDir, - // Network: en, - // Ipc: ipc, - // Pid: pid, - Resources: resources, - // AllowedDevices: allowedDevices, - // AutoCreatedDevices: autoCreatedDevices, - CapAdd: c.hostConfig.CapAdd, - CapDrop: c.hostConfig.CapDrop, - ProcessConfig: processConfig, - ProcessLabel: c.GetProcessLabel(), - MountLabel: c.GetMountLabel(), - // LxcConfig: lxcConfig, - AppArmorProfile: c.AppArmorProfile, - } - - return nil -} - // GetSize, return real size, virtual size func (container *Container) GetSize() (int64, int64) { var ( @@ -731,7 +684,7 @@ func (container *Container) UpdateNetwork() error { return nil } -func (container *Container) buildCreateEndpointOptions() ([]libnetwork.EndpointOption, error) { +func (container *Container) buildCreateEndpointOptions(restoring bool) ([]libnetwork.EndpointOption, error) { var ( portSpecs = make(nat.PortSet) bindings = make(nat.PortMap) @@ -806,6 +759,14 @@ func (container *Container) buildCreateEndpointOptions() ([]libnetwork.EndpointO createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption)) } + /*if restoring && container.NetworkSettings.IPAddress != "" { + genericOption := options.Generic{ + netlabel.IPAddress: net.ParseIP(container.NetworkSettings.IPAddress), + } + + createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption)) + }*/ + return createOptions, nil } @@ -859,7 +820,7 @@ func (container *Container) secondaryNetworkRequired(primaryNetworkType string) return false } -func (container *Container) AllocateNetwork() error { +func (container *Container) AllocateNetwork(isRestoring bool) error { mode := container.hostConfig.NetworkMode controller := container.daemon.netController if container.Config.NetworkDisabled || mode.IsContainer() { @@ -895,19 +856,19 @@ func (container *Container) AllocateNetwork() error { if container.secondaryNetworkRequired(networkDriver) { // Configure Bridge as secondary network for port binding purposes - if err := container.configureNetwork("bridge", service, "bridge", false); err != nil { + if err := container.configureNetwork("bridge", service, "bridge", false, isRestoring); err != nil { return err } } - if err := container.configureNetwork(networkName, service, networkDriver, mode.IsDefault()); err != nil { + if err := container.configureNetwork(networkName, service, networkDriver, mode.IsDefault(), isRestoring); err != nil { return err } return container.WriteHostConfig() } -func (container *Container) configureNetwork(networkName, service, networkDriver string, canCreateNetwork bool) error { +func (container *Container) configureNetwork(networkName, service, networkDriver string, canCreateNetwork bool, isRestoring bool) error { controller := container.daemon.netController n, err := controller.NetworkByName(networkName) if err != nil { @@ -926,7 +887,7 @@ func (container *Container) configureNetwork(networkName, service, networkDriver return err } - createOptions, err := container.buildCreateEndpointOptions() + createOptions, err := container.buildCreateEndpointOptions(isRestoring) if err != nil { return err } @@ -957,7 +918,7 @@ func (container *Container) configureNetwork(networkName, service, networkDriver return nil } -func (container *Container) initializeNetworking() error { +func (container *Container) initializeNetworking(restoring bool) error { var err error if container.hostConfig.NetworkMode.IsContainer() { @@ -988,7 +949,7 @@ func (container *Container) initializeNetworking() error { } - if err := container.AllocateNetwork(); err != nil { + if err := container.AllocateNetwork(restoring); err != nil { return err } diff --git a/daemon/container_windows.go b/daemon/container_windows.go index 425e1abe54b03..5d85853341ede 100644 --- a/daemon/container_windows.go +++ b/daemon/container_windows.go @@ -45,7 +45,7 @@ func (container *Container) createDaemonEnvironment(linkedEnv []string) []string return container.Config.Env } -func (container *Container) initializeNetworking() error { +func (container *Container) initializeNetworking(restoring bool) error { return nil } diff --git a/daemon/daemon.go b/daemon/daemon.go index 34a93aefd44e0..21fe2bf864129 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -826,22 +826,22 @@ func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback e return daemon.execDriver.Run(c.command, pipes, startCallback) } -func (daemon *Daemon) Checkpoint(c *Container) error { - if err := daemon.execDriver.Checkpoint(c.command); err != nil { +func (daemon *Daemon) Checkpoint(c *Container, opts *runconfig.CriuConfig) error { + if err := daemon.execDriver.Checkpoint(c.command, opts); err != nil { return err } - c.SetCheckpointed() + c.SetCheckpointed(opts.LeaveRunning) return nil } -func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { +func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { // Mount the container's filesystem (daemon/graphdriver/aufs/aufs.go). _, err := daemon.driver.Get(c.ID, c.GetMountLabel()) if err != nil { - return 0, err + return execdriver.ExitStatus{ExitCode: 0}, err } - exitCode, err := daemon.execDriver.Restore(c.command, pipes, restoreCallback) + exitCode, err := daemon.execDriver.Restore(c.command, pipes, restoreCallback, opts, forceRestore) return exitCode, err } diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index 755f27d67f19b..442d28a890646 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -8,6 +8,7 @@ import ( // TODO Windows: Factor out ulimit "github.com/docker/docker/pkg/ulimit" + "github.com/docker/docker/runconfig" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/configs" ) @@ -72,9 +73,11 @@ type Driver interface { // Unpause unpauses a container. Unpause(c *Command) error - Checkpoint(c *Command, opts *libcontainer.CriuOpts) error + // Checkpoints a container (with criu). + Checkpoint(c *Command, opts *runconfig.CriuConfig) error - Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (ExitStatus, error) + // Restores a checkpoint image into a container (with criu). + Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (ExitStatus, error) // Name returns the name of the driver. Name() string diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index 26866b7efd15f..e8196316098ad 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -25,6 +25,7 @@ import ( sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" "github.com/docker/docker/pkg/version" + "github.com/docker/docker/runconfig" "github.com/kr/pty" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups" @@ -560,11 +561,11 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return err } -func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) error { +func (d *driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { return fmt.Errorf("Checkpointing lxc containers not supported yet\n") } -func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (execdriver.ExitStatus, error) { +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Restoring lxc containers not supported yet\n") } diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index 90924230538ab..e9bd2d981600f 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -20,7 +20,7 @@ import ( "github.com/docker/docker/pkg/reexec" sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" - "github.com/docker/docker/utils" + "github.com/docker/docker/runconfig" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" @@ -299,7 +299,18 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return active.Resume() } -func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) error { +func libcontainerCriuOpts(runconfigOpts *runconfig.CriuConfig) *libcontainer.CriuOpts { + return &libcontainer.CriuOpts{ + ImagesDirectory: runconfigOpts.ImagesDirectory, + WorkDirectory: runconfigOpts.WorkDirectory, + LeaveRunning: runconfigOpts.LeaveRunning, + TcpEstablished: runconfigOpts.TcpEstablished, + ExternalUnixConnections: runconfigOpts.ExternalUnixConnections, + ShellJob: runconfigOpts.ShellJob, + } +} + +func (d *driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { active := d.activeContainers[c.ID] if active == nil { return fmt.Errorf("active container for %s does not exist", c.ID) @@ -307,7 +318,7 @@ func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) d.Lock() defer d.Unlock() - err := active.Checkpoint(opts) + err := active.Checkpoint(libcontainerCriuOpts(opts)) if err != nil { return err } @@ -315,7 +326,7 @@ func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) return nil } -func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (execdriver.ExitStatus, error) { +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { var ( cont libcontainer.Container err error @@ -358,7 +369,7 @@ func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restore d.cleanContainer(c.ID) }() - if err := cont.Restore(p, opts); err != nil { + if err := cont.Restore(p, libcontainerCriuOpts(opts)); err != nil { return execdriver.ExitStatus{ExitCode: -1}, err } diff --git a/daemon/execdriver/windows/windows.go b/daemon/execdriver/windows/windows.go index 198ddc8dd7184..c87c50e230524 100644 --- a/daemon/execdriver/windows/windows.go +++ b/daemon/execdriver/windows/windows.go @@ -11,6 +11,7 @@ import ( "github.com/docker/docker/autogen/dockerversion" "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/pkg/parsers" + "github.com/docker/docker/runconfig" ) // This is a daemon development variable only and should not be @@ -93,3 +94,11 @@ func setupEnvironmentVariables(a []string) map[string]string { } return r } + +func (d *driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { + return fmt.Errorf("Windows: Containers cannot be checkpointed") +} + +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Windows: Containers cannot be restored") +} diff --git a/daemon/monitor.go b/daemon/monitor.go index a2c90f779961f..de1c77ee36205 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -189,43 +189,45 @@ func (m *containerMonitor) Start() error { } // Like Start() but for restoring a container. -func (m *containerMonitor) Restore() error { +func (m *containerMonitor) Restore(opts *runconfig.CriuConfig, forceRestore bool) error { var ( err error // XXX The following line should be changed to // exitStatus execdriver.ExitStatus to match Start() - exitCode int + exitCode execdriver.ExitStatus afterRestore bool ) - defer func() { if afterRestore { m.container.Lock() - m.container.setStopped(&execdriver.ExitStatus{exitCode, false}) + m.container.setStopped(&execdriver.ExitStatus{exitCode.ExitCode, false}) defer m.container.Unlock() } m.Close() }() - if err := m.container.startLoggingToDisk(); err != nil { - m.resetContainer(false) - return err + // FIXME: right now if we startLogging again we get double logs after a restore + if m.container.logCopier == nil { + if err := m.container.startLogging(); err != nil { + m.resetContainer(false) + return err + } } pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) m.container.LogEvent("restore") m.lastStartTime = time.Now() - if exitCode, err = m.container.daemon.Restore(m.container, pipes, m.restoreCallback); err != nil { - log.Errorf("Error restoring container: %s, exitCode=%d", err, exitCode) + if exitCode, err = m.container.daemon.Restore(m.container, pipes, m.restoreCallback, opts, forceRestore); err != nil { + logrus.Errorf("Error restoring container: %s, exitCode=%d", err, exitCode) m.container.ExitCode = -1 m.resetContainer(false) return err } afterRestore = true - m.container.ExitCode = exitCode - m.resetMonitor(err == nil && exitCode == 0) + m.container.ExitCode = exitCode.ExitCode + m.resetMonitor(err == nil && exitCode.ExitCode == 0) m.container.LogEvent("die") m.resetContainer(true) return err @@ -335,7 +337,7 @@ func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConf // Write config.json and hostconfig.json files // to /var/lib/docker/containers/. if err := m.container.ToDisk(); err != nil { - log.Debugf("%s", err) + logrus.Debugf("%s", err) } } } diff --git a/daemon/state.go b/daemon/state.go index f1292b4d1d183..f52aa395d174b 100644 --- a/daemon/state.go +++ b/daemon/state.go @@ -25,7 +25,6 @@ type State struct { FinishedAt time.Time CheckpointedAt time.Time waitChan chan struct{} - } func NewState() *State { @@ -45,14 +44,16 @@ func (s *State) String() string { } return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) - } else if s.Checkpointed { - return fmt.Sprintf("Checkpointed %s ago", units.HumanDuration(time.Now().UTC().Sub(s.CheckpointedAt))) } if s.removalInProgress { return "Removal In Progress" } + if s.Checkpointed { + return fmt.Sprintf("Checkpointed %s ago", units.HumanDuration(time.Now().UTC().Sub(s.CheckpointedAt))) + } + if s.Dead { return "Dead" } @@ -80,6 +81,10 @@ func (s *State) StateString() string { return "running" } + if s.Checkpointed { + return "checkpointed'" + } + if s.Dead { return "dead" } @@ -281,11 +286,11 @@ func (s *State) SetDead() { s.Unlock() } -func (s *State) SetCheckpointed() { +func (s *State) SetCheckpointed(leaveRunning bool) { s.Lock() s.CheckpointedAt = time.Now().UTC() - s.Checkpointed = true - s.Running = false + s.Checkpointed = !leaveRunning + s.Running = leaveRunning s.Paused = false s.Restarting = false // XXX Not sure if we need to close and recreate waitChan. @@ -294,6 +299,10 @@ func (s *State) SetCheckpointed() { s.Unlock() } +func (s *State) HasBeenCheckpointed() bool { + return s.CheckpointedAt != time.Time{} +} + func (s *State) IsCheckpointed() bool { return s.Checkpointed } diff --git a/docker/flags.go b/docker/flags.go index afff0bd9bc6e6..6b4fe4ade9041 100644 --- a/docker/flags.go +++ b/docker/flags.go @@ -23,6 +23,7 @@ func (a byName) Less(i, j int) bool { return a[i].name < a[j].name } var dockerCommands = []command{ {"attach", "Attach to a running container"}, {"build", "Build an image from a Dockerfile"}, + {"checkpoint", "Checkpoint one or more running containers"}, {"commit", "Create a new image from a container's changes"}, {"cp", "Copy files/folders from a container to a HOSTDIR or to STDOUT"}, {"create", "Create a new container"}, @@ -47,6 +48,7 @@ var dockerCommands = []command{ {"push", "Push an image or a repository to a registry"}, {"rename", "Rename a container"}, {"restart", "Restart a running container"}, + {"restore", "Restore one or more checkpointed containers"}, {"rm", "Remove one or more containers"}, {"rmi", "Remove one or more images"}, {"run", "Run a command in a new container"}, diff --git a/runconfig/restore.go b/runconfig/restore.go new file mode 100644 index 0000000000000..22f8b0ab0a096 --- /dev/null +++ b/runconfig/restore.go @@ -0,0 +1,15 @@ +package runconfig + +type CriuConfig struct { + ImagesDirectory string + WorkDirectory string + LeaveRunning bool + TcpEstablished bool + ExternalUnixConnections bool + ShellJob bool +} + +type RestoreConfig struct { + CriuOpts CriuConfig + ForceRestore bool +} From bc20d7eb40ca07734348c3756b2363cdb3988f62 Mon Sep 17 00:00:00 2001 From: boucher Date: Tue, 2 Jun 2015 14:04:14 -0700 Subject: [PATCH 06/13] Add compilation steps for Criu to the Dockerfile Add a basic test for checkpoint/restore to the integration tests Docker-DCO-1.1-Signed-off-by: Ross Boucher (github: boucher) --- Dockerfile | 18 +++++++++ api/types/types.go | 15 ++++++++ daemon/inspect.go | 22 ++++++----- integration-cli/docker_cli_checkpoint_test.go | 37 +++++++++++++++++++ 4 files changed, 82 insertions(+), 10 deletions(-) create mode 100644 integration-cli/docker_cli_checkpoint_test.go diff --git a/Dockerfile b/Dockerfile index 0e58df106531b..22294606cca1f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,9 +32,11 @@ RUN echo deb http://ppa.launchpad.net/zfs-native/stable/ubuntu trusty main > /et # Packaged dependencies RUN apt-get update && apt-get install -y \ apparmor \ + asciidoc \ aufs-tools \ automake \ bash-completion \ + bsdmainutils \ btrfs-tools \ build-essential \ createrepo \ @@ -43,19 +45,28 @@ RUN apt-get update && apt-get install -y \ gcc-mingw-w64 \ git \ iptables \ + libaio-dev \ libapparmor-dev \ libcap-dev \ + libprotobuf-c0-dev \ + libprotobuf-dev \ libsqlite3-dev \ mercurial \ parallel \ + pkg-config \ + protobuf-compiler \ + protobuf-c-compiler \ + python-minimal \ python-mock \ python-pip \ + python-protobuf \ python-websocket \ reprepro \ ruby1.9.1 \ ruby1.9.1-dev \ s3cmd=1.1.0* \ ubuntu-zfs \ + xmlto \ libzfs-dev \ --no-install-recommends @@ -80,6 +91,13 @@ RUN cd /usr/src/lxc \ && make install \ && ldconfig +# Install Criu +RUN mkdir -p /usr/src/criu \ + && curl -sSL https://github.com/xemul/criu/archive/v1.6.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 +RUN cd /usr/src/criu \ + && make \ + && make install + # Install Go ENV GO_VERSION 1.4.2 RUN curl -sSL https://golang.org/dl/go${GO_VERSION}.src.tar.gz | tar -v -C /usr/local -xz \ diff --git a/api/types/types.go b/api/types/types.go index 5cbb9bdba8c0a..6d6ac2fdad3e4 100644 --- a/api/types/types.go +++ b/api/types/types.go @@ -227,6 +227,7 @@ type ExecStartCheck struct { // ContainerState stores container's running state // it's part of ContainerJSONBase and will return by "inspect" command type ContainerState struct { +<<<<<<< HEAD Running bool Paused bool Restarting bool @@ -237,6 +238,20 @@ type ContainerState struct { Error string StartedAt string FinishedAt string +======= + Running bool + Paused bool + Checkpointed bool + Restarting bool + OOMKilled bool + Dead bool + Pid int + ExitCode int + Error string + StartedAt time.Time + FinishedAt time.Time + CheckpointedAt time.Time +>>>>>>> Add compilation steps for Criu to the Dockerfile } // ContainerJSONBase contains response of Remote API: diff --git a/daemon/inspect.go b/daemon/inspect.go index 0bff1cba541fb..16cc2f9a3a106 100644 --- a/daemon/inspect.go +++ b/daemon/inspect.go @@ -42,16 +42,18 @@ func (daemon *Daemon) getInspectData(container *Container) (*types.ContainerJSON } containerState := &types.ContainerState{ - Running: container.State.Running, - Paused: container.State.Paused, - Restarting: container.State.Restarting, - OOMKilled: container.State.OOMKilled, - Dead: container.State.Dead, - Pid: container.State.Pid, - ExitCode: container.State.ExitCode, - Error: container.State.Error, - StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), - FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), + Running: container.State.Running, + Paused: container.State.Paused, + Checkpointed: container.State.Checkpointed, + Restarting: container.State.Restarting, + OOMKilled: container.State.OOMKilled, + Dead: container.State.Dead, + Pid: container.State.Pid, + ExitCode: container.State.ExitCode, + Error: container.State.Error, + StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), + FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), + CheckpointedAt: container.State.CheckpointedAt.Format(time.RFC3339Nano), } contJSONBase := &types.ContainerJSONBase{ diff --git a/integration-cli/docker_cli_checkpoint_test.go b/integration-cli/docker_cli_checkpoint_test.go new file mode 100644 index 0000000000000..e19ef524efd43 --- /dev/null +++ b/integration-cli/docker_cli_checkpoint_test.go @@ -0,0 +1,37 @@ +package main + +import ( + "os/exec" + "strings" + + "github.com/go-check/check" +) + +func (s *DockerSuite) TestCheckpointAndRestore(c *check.C) { + defer unpauseAllContainers() + + runCmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top") + out, _, err := runCommandWithOutput(runCmd) + if err != nil { + c.Fatalf("failed to run container: %v, output: %q", err, out) + } + + containerID := strings.TrimSpace(out) + checkpointCmd := exec.Command(dockerBinary, "checkpoint", containerID) + out, _, err = runCommandWithOutput(checkpointCmd) + if err != nil { + c.Fatalf("failed to checkpoint container: %v, output: %q", err, out) + } + + out, err = inspectField(containerID, "State.Checkpointed") + c.Assert(out, check.Equals, "true") + + restoreCmd := exec.Command(dockerBinary, "restore", containerID) + out, _, _, err = runCommandWithStdoutStderr(restoreCmd) + if err != nil { + c.Fatalf("failed to restore container: %v, output: %q", err, out) + } + + out, err = inspectField(containerID, "State.Checkpointed") + c.Assert(out, check.Equals, "false") +} From 580ff58f31bb55edd3bc96ffcdd842a0f2e18878 Mon Sep 17 00:00:00 2001 From: boucher Date: Tue, 16 Jun 2015 14:41:05 -0700 Subject: [PATCH 07/13] Add optional dependency info to the PACKAGERS file. Docker-DCO-1.1-Signed-off-by: Ross Boucher (github: boucher) --- project/PACKAGERS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/project/PACKAGERS.md b/project/PACKAGERS.md index 22f24b4789b77..5ba406cedf009 100644 --- a/project/PACKAGERS.md +++ b/project/PACKAGERS.md @@ -303,6 +303,9 @@ by having support for them in the kernel or userspace. A few examples include: least the "auplink" utility from aufs-tools) * BTRFS graph driver (requires BTRFS support enabled in the kernel) * ZFS graph driver (requires userspace zfs-utils and a corresponding kernel module) +* Checkpoint/Restore containers: + - requires criu version 1.5.2 or later (criu.org) + - requires kernel version 3.19 or later if using overlay-fs ## Daemon Init Script From 4284cf7a34e9e3cd471d499ef83cc4cbdaffc945 Mon Sep 17 00:00:00 2001 From: boucher Date: Thu, 18 Jun 2015 15:18:09 -0700 Subject: [PATCH 08/13] Don't destroy/delete the container if it has been checkpointed. Docker-DCO-1.1-Signed-off-by: Ross Boucher (github: boucher) --- daemon/execdriver/native/driver.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index e9bd2d981600f..f4f34095ee50a 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -158,8 +158,11 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba d.activeContainers[c.ID] = cont d.Unlock() defer func() { - cont.Destroy() - d.cleanContainer(c.ID) + status, err := cont.Status() + if err != nil || status != libcontainer.Checkpointed { + cont.Destroy() + d.cleanContainer(c.ID) + } }() if err := cont.Start(p); err != nil { From 81f43c740f225637583c66e7763a3ff5ee0dbe3d Mon Sep 17 00:00:00 2001 From: boucher Date: Thu, 9 Jul 2015 09:40:43 -0700 Subject: [PATCH 09/13] Move checkpoint methods into a separate container_checkpoint file. Docker-DCO-1.1-Signed-off-by: Ross Boucher (github: boucher) --- api/types/types.go | 33 ++++--------- daemon/container.go | 76 ------------------------------ daemon/container_checkpoint.go | 84 ++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 99 deletions(-) create mode 100644 daemon/container_checkpoint.go diff --git a/api/types/types.go b/api/types/types.go index 6d6ac2fdad3e4..407644a4205c0 100644 --- a/api/types/types.go +++ b/api/types/types.go @@ -227,31 +227,18 @@ type ExecStartCheck struct { // ContainerState stores container's running state // it's part of ContainerJSONBase and will return by "inspect" command type ContainerState struct { -<<<<<<< HEAD - Running bool - Paused bool - Restarting bool - OOMKilled bool - Dead bool - Pid int - ExitCode int - Error string - StartedAt string - FinishedAt string -======= Running bool - Paused bool + Paused bool Checkpointed bool - Restarting bool - OOMKilled bool - Dead bool - Pid int - ExitCode int - Error string - StartedAt time.Time - FinishedAt time.Time - CheckpointedAt time.Time ->>>>>>> Add compilation steps for Criu to the Dockerfile + Restarting bool + OOMKilled bool + Dead bool + Pid int + ExitCode int + Error string + StartedAt string + FinishedAt string + CheckpointedAt string } // ContainerJSONBase contains response of Remote API: diff --git a/daemon/container.go b/daemon/container.go index eacace7bfd0b4..73677d3aeeb8b 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -608,62 +608,6 @@ func validateID(id string) error { return nil } -func (container *Container) Checkpoint(opts *runconfig.CriuConfig) error { - if err := container.daemon.Checkpoint(container, opts); err != nil { - return err - } - - if opts.LeaveRunning == false { - container.ReleaseNetwork() - } - return nil -} - -func (container *Container) Restore(opts *runconfig.CriuConfig, forceRestore bool) error { - var err error - container.Lock() - defer container.Unlock() - - defer func() { - if err != nil { - container.setError(err) - // if no one else has set it, make sure we don't leave it at zero - if container.ExitCode == 0 { - container.ExitCode = 128 - } - container.toDisk() - container.cleanup() - } - }() - - if err := container.Mount(); err != nil { - return err - } - if err = container.initializeNetworking(true); err != nil { - return err - } - linkedEnv, err := container.setupLinkedContainers() - if err != nil { - return err - } - if err = container.setupWorkingDirectory(); err != nil { - return err - } - - env := container.createDaemonEnvironment(linkedEnv) - if err = populateCommand(container, env); err != nil { - return err - } - - mounts, err := container.setupMounts() - if err != nil { - return err - } - - container.command.Mounts = mounts - return container.waitForRestore(opts, forceRestore) -} - func (container *Container) Copy(resource string) (rc io.ReadCloser, err error) { container.Lock() @@ -824,26 +768,6 @@ func (container *Container) waitForStart() error { return nil } -func (container *Container) waitForRestore(opts *runconfig.CriuConfig, forceRestore bool) error { - container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy) - - // After calling promise.Go() we'll have two goroutines: - // - The current goroutine that will block in the select - // below until restore is done. - // - A new goroutine that will restore the container and - // wait for it to exit. - select { - case <-container.monitor.restoreSignal: - if container.ExitCode != 0 { - return fmt.Errorf("restore process failed") - } - case err := <-promise.Go(func() error { return container.monitor.Restore(opts, forceRestore) }): - return err - } - - return nil -} - func (container *Container) GetProcessLabel() string { // even if we have a process label return "" if we are running // in privileged mode diff --git a/daemon/container_checkpoint.go b/daemon/container_checkpoint.go new file mode 100644 index 0000000000000..468816e448dc1 --- /dev/null +++ b/daemon/container_checkpoint.go @@ -0,0 +1,84 @@ +package daemon + +import ( + "fmt" + + "github.com/docker/docker/pkg/promise" + "github.com/docker/docker/runconfig" +) + +func (container *Container) Checkpoint(opts *runconfig.CriuConfig) error { + if err := container.daemon.Checkpoint(container, opts); err != nil { + return err + } + + if opts.LeaveRunning == false { + container.ReleaseNetwork() + } + return nil +} + +func (container *Container) Restore(opts *runconfig.CriuConfig, forceRestore bool) error { + var err error + container.Lock() + defer container.Unlock() + + defer func() { + if err != nil { + container.setError(err) + // if no one else has set it, make sure we don't leave it at zero + if container.ExitCode == 0 { + container.ExitCode = 128 + } + container.toDisk() + container.cleanup() + } + }() + + if err := container.Mount(); err != nil { + return err + } + if err = container.initializeNetworking(true); err != nil { + return err + } + linkedEnv, err := container.setupLinkedContainers() + if err != nil { + return err + } + if err = container.setupWorkingDirectory(); err != nil { + return err + } + + env := container.createDaemonEnvironment(linkedEnv) + if err = populateCommand(container, env); err != nil { + return err + } + + mounts, err := container.setupMounts() + if err != nil { + return err + } + + container.command.Mounts = mounts + return container.waitForRestore(opts, forceRestore) +} + +func (container *Container) waitForRestore(opts *runconfig.CriuConfig, forceRestore bool) error { + container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy) + + // After calling promise.Go() we'll have two goroutines: + // - The current goroutine that will block in the select + // below until restore is done. + // - A new goroutine that will restore the container and + // wait for it to exit. + select { + case <-container.monitor.restoreSignal: + if container.ExitCode != 0 { + return fmt.Errorf("restore process failed") + } + case err := <-promise.Go(func() error { return container.monitor.Restore(opts, forceRestore) }): + return err + } + + return nil +} From c4a70bd668d383039072eae9065dd737f605aaee Mon Sep 17 00:00:00 2001 From: boucher Date: Fri, 17 Jul 2015 12:31:05 -0700 Subject: [PATCH 10/13] Move checkpoint/restore interface into docker experimental build. Docker-DCO-1.1-Signed-off-by: Ross Boucher (github: boucher) --- api/client/checkpoint.go | 2 + api/client/restore.go | 2 + api/server/server.go | 90 +++++-------------- api/server/server_experimental_unix.go | 47 ++++++++++ api/server/server_stub.go | 4 + api/types/types.go | 2 +- daemon/container.go | 5 +- daemon/daemon.go | 8 +- docker/docker.go | 6 +- docker/flags.go | 2 - docker/flags_experimental.go | 10 +++ docker/flags_stub.go | 7 ++ integration-cli/docker_cli_checkpoint_test.go | 2 + .../docker_cli_help_experimental_test.go | 5 ++ .../docker_cli_help_standard_test.go | 5 ++ integration-cli/docker_cli_help_test.go | 4 +- 16 files changed, 123 insertions(+), 78 deletions(-) create mode 100644 docker/flags_experimental.go create mode 100644 docker/flags_stub.go create mode 100644 integration-cli/docker_cli_help_experimental_test.go create mode 100644 integration-cli/docker_cli_help_standard_test.go diff --git a/api/client/checkpoint.go b/api/client/checkpoint.go index 8c681bcf9716f..02990d9499adb 100644 --- a/api/client/checkpoint.go +++ b/api/client/checkpoint.go @@ -1,3 +1,5 @@ +// +build experimental + package client import ( diff --git a/api/client/restore.go b/api/client/restore.go index 0c4085fbbbd84..013acb4cf04b0 100644 --- a/api/client/restore.go +++ b/api/client/restore.go @@ -1,3 +1,5 @@ +// +build experimental + package client import ( diff --git a/api/server/server.go b/api/server/server.go index 3cfa958f66fec..a0b820a456d43 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -220,48 +220,6 @@ func writeJSON(w http.ResponseWriter, code int, v interface{}) error { return json.NewEncoder(w).Encode(v) } -func (s *Server) postContainersCheckpoint(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { - if vars == nil { - return fmt.Errorf("Missing parameter") - } - if err := parseForm(r); err != nil { - return err - } - - criuOpts := &runconfig.CriuConfig{} - if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil { - return err - } - - if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil { - return err - } - - w.WriteHeader(http.StatusNoContent) - return nil -} - -func (s *Server) postContainersRestore(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { - if vars == nil { - return fmt.Errorf("Missing parameter") - } - if err := parseForm(r); err != nil { - return err - } - - restoreOpts := runconfig.RestoreConfig{} - if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil { - return err - } - - if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil { - return err - } - - w.WriteHeader(http.StatusNoContent) - return nil -} - func (s *Server) optionsHandler(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { w.WriteHeader(http.StatusOK) return nil @@ -373,30 +331,28 @@ func createRouter(s *Server) *mux.Router { "/containers/{name:.*}/archive": s.getContainersArchive, }, "POST": { - "/auth": s.postAuth, - "/commit": s.postCommit, - "/build": s.postBuild, - "/images/create": s.postImagesCreate, - "/images/load": s.postImagesLoad, - "/images/{name:.*}/push": s.postImagesPush, - "/images/{name:.*}/tag": s.postImagesTag, - "/containers/create": s.postContainersCreate, - "/containers/{name:.*}/kill": s.postContainersKill, - "/containers/{name:.*}/pause": s.postContainersPause, - "/containers/{name:.*}/unpause": s.postContainersUnpause, - "/containers/{name:.*}/restart": s.postContainersRestart, - "/containers/{name:.*}/start": s.postContainersStart, - "/containers/{name:.*}/stop": s.postContainersStop, - "/containers/{name:.*}/wait": s.postContainersWait, - "/containers/{name:.*}/resize": s.postContainersResize, - "/containers/{name:.*}/attach": s.postContainersAttach, - "/containers/{name:.*}/copy": s.postContainersCopy, - "/containers/{name:.*}/exec": s.postContainerExecCreate, - "/exec/{name:.*}/start": s.postContainerExecStart, - "/exec/{name:.*}/resize": s.postContainerExecResize, - "/containers/{name:.*}/rename": s.postContainerRename, - "/containers/{name:.*}/checkpoint": s.postContainersCheckpoint, - "/containers/{name:.*}/restore": s.postContainersRestore, + "/auth": s.postAuth, + "/commit": s.postCommit, + "/build": s.postBuild, + "/images/create": s.postImagesCreate, + "/images/load": s.postImagesLoad, + "/images/{name:.*}/push": s.postImagesPush, + "/images/{name:.*}/tag": s.postImagesTag, + "/containers/create": s.postContainersCreate, + "/containers/{name:.*}/kill": s.postContainersKill, + "/containers/{name:.*}/pause": s.postContainersPause, + "/containers/{name:.*}/unpause": s.postContainersUnpause, + "/containers/{name:.*}/restart": s.postContainersRestart, + "/containers/{name:.*}/start": s.postContainersStart, + "/containers/{name:.*}/stop": s.postContainersStop, + "/containers/{name:.*}/wait": s.postContainersWait, + "/containers/{name:.*}/resize": s.postContainersResize, + "/containers/{name:.*}/attach": s.postContainersAttach, + "/containers/{name:.*}/copy": s.postContainersCopy, + "/containers/{name:.*}/exec": s.postContainerExecCreate, + "/exec/{name:.*}/start": s.postContainerExecStart, + "/exec/{name:.*}/resize": s.postContainerExecResize, + "/containers/{name:.*}/rename": s.postContainerRename, }, "PUT": { "/containers/{name:.*}/archive": s.putContainersArchive, @@ -410,6 +366,8 @@ func createRouter(s *Server) *mux.Router { }, } + addExperimentalRoutes(s, &m) + // If "api-cors-header" is not given, but "api-enable-cors" is true, we set cors to "*" // otherwise, all head values will be passed to HTTP handler corsHeaders := s.cfg.CorsHeaders diff --git a/api/server/server_experimental_unix.go b/api/server/server_experimental_unix.go index 03829a11ecb76..da48a42524376 100644 --- a/api/server/server_experimental_unix.go +++ b/api/server/server_experimental_unix.go @@ -2,6 +2,11 @@ package server +func addExperimentalRoutes(s *Server, m *map[string]map[string]HttpApiFunc) { + m["GET"]["/containers/{name:.*}/checkpoint"] = s.postContainersCheckpoint + m["GET"]["/containers/{name:.*}/restore"] = s.postContainersRestore +} + func (s *Server) registerSubRouter() { httpHandler := s.daemon.NetworkApiRouter() @@ -15,3 +20,45 @@ func (s *Server) registerSubRouter() { subrouter = s.router.PathPrefix("/services").Subrouter() subrouter.Methods("GET", "POST", "PUT", "DELETE").HandlerFunc(httpHandler) } + +func (s *Server) postContainersCheckpoint(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := parseForm(r); err != nil { + return err + } + + criuOpts := &runconfig.CriuConfig{} + if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil { + return err + } + + if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} + +func (s *Server) postContainersRestore(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := parseForm(r); err != nil { + return err + } + + restoreOpts := runconfig.RestoreConfig{} + if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil { + return err + } + + if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} diff --git a/api/server/server_stub.go b/api/server/server_stub.go index cae28493836f4..aa5c82b952502 100644 --- a/api/server/server_stub.go +++ b/api/server/server_stub.go @@ -2,5 +2,9 @@ package server +func addExperimentalRoutes(s *Server, m *map[string]map[string]HttpApiFunc) { + +} + func (s *Server) registerSubRouter() { } diff --git a/api/types/types.go b/api/types/types.go index 407644a4205c0..16314ee6ecd51 100644 --- a/api/types/types.go +++ b/api/types/types.go @@ -238,7 +238,7 @@ type ContainerState struct { Error string StartedAt string FinishedAt string - CheckpointedAt string + CheckpointedAt string `json:"-"` } // ContainerJSONBase contains response of Remote API: diff --git a/daemon/container.go b/daemon/container.go index 73677d3aeeb8b..d3473485e264d 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -343,11 +343,12 @@ func (container *Container) isNetworkAllocated() bool { // cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. func (container *Container) cleanup() { - if container.IsCheckpointed() { + /*if container.IsCheckpointed() { logrus.Debugf("not calling ReleaseNetwork() for checkpointed container %s", container.ID) } else { container.ReleaseNetwork() - } + }*/ + container.ReleaseNetwork() if err := container.CleanupStorage(); err != nil { logrus.Errorf("%v: Failed to cleanup storage: %v", container.ID, err) diff --git a/daemon/daemon.go b/daemon/daemon.go index 21fe2bf864129..170c97e2cfd38 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -279,13 +279,13 @@ func (daemon *Daemon) restore() error { // the IP address that it was using. // // XXX We should also reserve host ports (if any). - if container.IsCheckpointed() { - /*err = bridge.ReserveIP(container.ID, container.NetworkSettings.IPAddress) + /*if container.IsCheckpointed() { + err = bridge.ReserveIP(container.ID, container.NetworkSettings.IPAddress) if err != nil { log.Errorf("Failed to reserve IP %s for container %s", container.ID, container.NetworkSettings.IPAddress) - }*/ - } + } + }*/ } else { logrus.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID) } diff --git a/docker/docker.go b/docker/docker.go index 8ad0d13c05c1a..4d3477110c03f 100644 --- a/docker/docker.go +++ b/docker/docker.go @@ -35,7 +35,11 @@ func main() { help := "\nCommands:\n" - for _, cmd := range dockerCommands { + // TODO(tiborvass): no need to sort if we ensure dockerCommands is sorted + allCommands := append(dockerCommands, experimentalCommands...) + sort.Sort(byName(allCommands)) + + for _, cmd := range allCommands { help += fmt.Sprintf(" %-10.10s%s\n", cmd.name, cmd.description) } diff --git a/docker/flags.go b/docker/flags.go index 6b4fe4ade9041..afff0bd9bc6e6 100644 --- a/docker/flags.go +++ b/docker/flags.go @@ -23,7 +23,6 @@ func (a byName) Less(i, j int) bool { return a[i].name < a[j].name } var dockerCommands = []command{ {"attach", "Attach to a running container"}, {"build", "Build an image from a Dockerfile"}, - {"checkpoint", "Checkpoint one or more running containers"}, {"commit", "Create a new image from a container's changes"}, {"cp", "Copy files/folders from a container to a HOSTDIR or to STDOUT"}, {"create", "Create a new container"}, @@ -48,7 +47,6 @@ var dockerCommands = []command{ {"push", "Push an image or a repository to a registry"}, {"rename", "Rename a container"}, {"restart", "Restart a running container"}, - {"restore", "Restore one or more checkpointed containers"}, {"rm", "Remove one or more containers"}, {"rmi", "Remove one or more images"}, {"run", "Run a command in a new container"}, diff --git a/docker/flags_experimental.go b/docker/flags_experimental.go new file mode 100644 index 0000000000000..08893a7cf0b49 --- /dev/null +++ b/docker/flags_experimental.go @@ -0,0 +1,10 @@ +// +build experimental + +package main + +var ( + experimentalCommands = []command{ + {"checkpoint", "Checkpoint one or more running containers"}, + {"restore", "Restore one or more checkpointed containers"}, + } +) diff --git a/docker/flags_stub.go b/docker/flags_stub.go new file mode 100644 index 0000000000000..d627b86a3a8f7 --- /dev/null +++ b/docker/flags_stub.go @@ -0,0 +1,7 @@ +// +build !experimental + +package main + +var ( + experimentalCommands = []command{} +) diff --git a/integration-cli/docker_cli_checkpoint_test.go b/integration-cli/docker_cli_checkpoint_test.go index e19ef524efd43..09ec47a9a0d54 100644 --- a/integration-cli/docker_cli_checkpoint_test.go +++ b/integration-cli/docker_cli_checkpoint_test.go @@ -1,3 +1,5 @@ +// +build experimental + package main import ( diff --git a/integration-cli/docker_cli_help_experimental_test.go b/integration-cli/docker_cli_help_experimental_test.go new file mode 100644 index 0000000000000..55080154100a3 --- /dev/null +++ b/integration-cli/docker_cli_help_experimental_test.go @@ -0,0 +1,5 @@ +// +build experimental + +package main + +var totalDockerCLICommands = 41 diff --git a/integration-cli/docker_cli_help_standard_test.go b/integration-cli/docker_cli_help_standard_test.go new file mode 100644 index 0000000000000..f889ba4b27731 --- /dev/null +++ b/integration-cli/docker_cli_help_standard_test.go @@ -0,0 +1,5 @@ +// +build !experimental + +package main + +var totalDockerCLICommands = 39 diff --git a/integration-cli/docker_cli_help_test.go b/integration-cli/docker_cli_help_test.go index 311308359ac9d..dc317ddb348d6 100644 --- a/integration-cli/docker_cli_help_test.go +++ b/integration-cli/docker_cli_help_test.go @@ -238,13 +238,13 @@ func (s *DockerSuite) TestHelpTextVerify(c *check.C) { } - expected := 39 + expected := totalDockerCLICommands if isLocalDaemon { expected++ // for the daemon command } if len(cmds) != expected { c.Fatalf("Wrong # of cmds(%d), it should be: %d\nThe list:\n%q", - len(cmds), expected, cmds) + len(cmds), totalDockerCLICommands, cmds) } } From 6befec5b49ce67f868e0fb3af4004b860fe14bff Mon Sep 17 00:00:00 2001 From: fl0yd Date: Fri, 17 Jul 2015 16:23:01 -0500 Subject: [PATCH 11/13] imports to fix compilation issues and drop * from map Docker-DCO-1.1-Signed-off-by: Mark Oates fl0yd@me.com (github: fl0yd) --- api/server/server.go | 2 +- api/server/server_experimental_unix.go | 14 +++++++++++--- api/server/server_stub.go | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/api/server/server.go b/api/server/server.go index a0b820a456d43..bf944f92e0e52 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -366,7 +366,7 @@ func createRouter(s *Server) *mux.Router { }, } - addExperimentalRoutes(s, &m) + addExperimentalRoutes(s, m) // If "api-cors-header" is not given, but "api-enable-cors" is true, we set cors to "*" // otherwise, all head values will be passed to HTTP handler diff --git a/api/server/server_experimental_unix.go b/api/server/server_experimental_unix.go index da48a42524376..6d4b32ee79167 100644 --- a/api/server/server_experimental_unix.go +++ b/api/server/server_experimental_unix.go @@ -2,9 +2,17 @@ package server -func addExperimentalRoutes(s *Server, m *map[string]map[string]HttpApiFunc) { - m["GET"]["/containers/{name:.*}/checkpoint"] = s.postContainersCheckpoint - m["GET"]["/containers/{name:.*}/restore"] = s.postContainersRestore +import ( + "encoding/json" + "fmt" + "net/http" + "github.com/docker/docker/pkg/version" + "github.com/docker/docker/runconfig" + ) + +func addExperimentalRoutes(s *Server, m map[string]map[string]HttpApiFunc) { + m["POST"]["/containers/{name:.*}/checkpoint"] = s.postContainersCheckpoint + m["POST"]["/containers/{name:.*}/restore"] = s.postContainersRestore } func (s *Server) registerSubRouter() { diff --git a/api/server/server_stub.go b/api/server/server_stub.go index aa5c82b952502..ce6669da845be 100644 --- a/api/server/server_stub.go +++ b/api/server/server_stub.go @@ -2,7 +2,7 @@ package server -func addExperimentalRoutes(s *Server, m *map[string]map[string]HttpApiFunc) { +func addExperimentalRoutes(s *Server, m map[string]map[string]HttpApiFunc) { } From 2791165ccaa27d3f6f03bdb9e1efe03961d30133 Mon Sep 17 00:00:00 2001 From: boucher Date: Wed, 5 Aug 2015 09:32:41 -0700 Subject: [PATCH 12/13] Update checkpoint/restore to match changes in latest docker. Docker-DCO-1.1-Signed-off-by: Ross Boucher (github: boucher) --- api/client/checkpoint.go | 3 ++- api/client/restore.go | 3 ++- api/server/server_experimental_unix.go | 6 +++--- api/server/server_stub.go | 2 +- api/types/types.go | 18 +++++++++--------- daemon/execdriver/lxc/driver.go | 4 ++-- daemon/execdriver/native/create.go | 18 ------------------ daemon/execdriver/native/driver.go | 4 ++-- daemon/inspect.go | 16 ++++++++-------- docker/docker.go | 2 +- 10 files changed, 30 insertions(+), 46 deletions(-) diff --git a/api/client/checkpoint.go b/api/client/checkpoint.go index 02990d9499adb..24fed5f68b634 100644 --- a/api/client/checkpoint.go +++ b/api/client/checkpoint.go @@ -5,12 +5,13 @@ package client import ( "fmt" + Cli "github.com/docker/docker/cli" flag "github.com/docker/docker/pkg/mflag" "github.com/docker/docker/runconfig" ) func (cli *DockerCli) CmdCheckpoint(args ...string) error { - cmd := cli.Subcmd("checkpoint", []string{"CONTAINER [CONTAINER...]"}, "Checkpoint one or more running containers", true) + cmd := Cli.Subcmd("checkpoint", []string{"CONTAINER [CONTAINER...]"}, "Checkpoint one or more running containers", true) cmd.Require(flag.Min, 1) var ( diff --git a/api/client/restore.go b/api/client/restore.go index 013acb4cf04b0..bef78a262b54a 100644 --- a/api/client/restore.go +++ b/api/client/restore.go @@ -5,12 +5,13 @@ package client import ( "fmt" + Cli "github.com/docker/docker/cli" flag "github.com/docker/docker/pkg/mflag" "github.com/docker/docker/runconfig" ) func (cli *DockerCli) CmdRestore(args ...string) error { - cmd := cli.Subcmd("restore", []string{"CONTAINER [CONTAINER...]"}, "Restore one or more checkpointed containers", true) + cmd := Cli.Subcmd("restore", []string{"CONTAINER [CONTAINER...]"}, "Restore one or more checkpointed containers", true) cmd.Require(flag.Min, 1) var ( diff --git a/api/server/server_experimental_unix.go b/api/server/server_experimental_unix.go index 6d4b32ee79167..ca17cdd1036b8 100644 --- a/api/server/server_experimental_unix.go +++ b/api/server/server_experimental_unix.go @@ -5,12 +5,12 @@ package server import ( "encoding/json" "fmt" - "net/http" "github.com/docker/docker/pkg/version" "github.com/docker/docker/runconfig" - ) + "net/http" +) -func addExperimentalRoutes(s *Server, m map[string]map[string]HttpApiFunc) { +func addExperimentalRoutes(s *Server, m map[string]map[string]HTTPAPIFunc) { m["POST"]["/containers/{name:.*}/checkpoint"] = s.postContainersCheckpoint m["POST"]["/containers/{name:.*}/restore"] = s.postContainersRestore } diff --git a/api/server/server_stub.go b/api/server/server_stub.go index ce6669da845be..6d93d21626dd1 100644 --- a/api/server/server_stub.go +++ b/api/server/server_stub.go @@ -2,7 +2,7 @@ package server -func addExperimentalRoutes(s *Server, m map[string]map[string]HttpApiFunc) { +func addExperimentalRoutes(s *Server, m map[string]map[string]HTTPAPIFunc) { } diff --git a/api/types/types.go b/api/types/types.go index 16314ee6ecd51..ee994f3bbe2a3 100644 --- a/api/types/types.go +++ b/api/types/types.go @@ -228,16 +228,16 @@ type ExecStartCheck struct { // it's part of ContainerJSONBase and will return by "inspect" command type ContainerState struct { Running bool - Paused bool + Paused bool Checkpointed bool - Restarting bool - OOMKilled bool - Dead bool - Pid int - ExitCode int - Error string - StartedAt string - FinishedAt string + Restarting bool + OOMKilled bool + Dead bool + Pid int + ExitCode int + Error string + StartedAt string + FinishedAt string CheckpointedAt string `json:"-"` } diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index e8196316098ad..dce464b7937db 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -561,11 +561,11 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return err } -func (d *driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { +func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { return fmt.Errorf("Checkpointing lxc containers not supported yet\n") } -func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { +func (d *Driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Restoring lxc containers not supported yet\n") } diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 95c2fd03eb5fc..85f72f8c2cc83 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -113,24 +113,6 @@ func generateIfaceName() (string, error) { return "", errors.New("Failed to find name for new interface") } -// Re-create the container type from the image that was saved during checkpoint. -func (d *driver) createRestoreContainer(c *execdriver.Command, imageDir string) (*libcontainer.Config, error) { - // Read the container.json. - f1, err := os.Open(filepath.Join(imageDir, "container.json")) - if err != nil { - return nil, err - } - defer f1.Close() - - var container *libcontainer.Config - err = json.NewDecoder(f1).Decode(&container) - if err != nil { - return nil, err - } - - return container, nil -} - func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command) error { if c.Network == nil { return nil diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index f4f34095ee50a..2b54aa3ff68ce 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -313,7 +313,7 @@ func libcontainerCriuOpts(runconfigOpts *runconfig.CriuConfig) *libcontainer.Cri } } -func (d *driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { +func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { active := d.activeContainers[c.ID] if active == nil { return fmt.Errorf("active container for %s does not exist", c.ID) @@ -329,7 +329,7 @@ func (d *driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) e return nil } -func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { +func (d *Driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { var ( cont libcontainer.Container err error diff --git a/daemon/inspect.go b/daemon/inspect.go index 16cc2f9a3a106..bcc84bf936eca 100644 --- a/daemon/inspect.go +++ b/daemon/inspect.go @@ -45,14 +45,14 @@ func (daemon *Daemon) getInspectData(container *Container) (*types.ContainerJSON Running: container.State.Running, Paused: container.State.Paused, Checkpointed: container.State.Checkpointed, - Restarting: container.State.Restarting, - OOMKilled: container.State.OOMKilled, - Dead: container.State.Dead, - Pid: container.State.Pid, - ExitCode: container.State.ExitCode, - Error: container.State.Error, - StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), - FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), + Restarting: container.State.Restarting, + OOMKilled: container.State.OOMKilled, + Dead: container.State.Dead, + Pid: container.State.Pid, + ExitCode: container.State.ExitCode, + Error: container.State.Error, + StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), + FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), CheckpointedAt: container.State.CheckpointedAt.Format(time.RFC3339Nano), } diff --git a/docker/docker.go b/docker/docker.go index 4d3477110c03f..fb0a5240e6936 100644 --- a/docker/docker.go +++ b/docker/docker.go @@ -3,6 +3,7 @@ package main import ( "fmt" "os" + "sort" "github.com/Sirupsen/logrus" "github.com/docker/docker/api/client" @@ -35,7 +36,6 @@ func main() { help := "\nCommands:\n" - // TODO(tiborvass): no need to sort if we ensure dockerCommands is sorted allCommands := append(dockerCommands, experimentalCommands...) sort.Sort(byName(allCommands)) From cf937feece4166541dcd45e5fd11a9dfa46dc7c0 Mon Sep 17 00:00:00 2001 From: Hui Kang Date: Wed, 19 Aug 2015 19:50:15 +0000 Subject: [PATCH 13/13] Allow restore network to have network connectivity Reuse the endpoint of the checkpointed container when restore. Pass veth pair name to ciur when restore a checkpointed container. Signed-off-by: Hui Kang --- daemon/container.go | 7 +-- daemon/container_checkpoint.go | 28 ++++++++- daemon/container_unix.go | 61 +++++++++++++++---- daemon/execdriver/native/driver.go | 12 +++- runconfig/restore.go | 6 ++ .../github.com/docker/libnetwork/endpoint.go | 14 +++++ .../runc/libcontainer/container_linux.go | 10 +++ .../runc/libcontainer/criu_opts.go | 6 ++ 8 files changed, 125 insertions(+), 19 deletions(-) diff --git a/daemon/container.go b/daemon/container.go index d3473485e264d..872f9ebb0fc8b 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -343,12 +343,11 @@ func (container *Container) isNetworkAllocated() bool { // cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. func (container *Container) cleanup() { - /*if container.IsCheckpointed() { + if container.IsCheckpointed() { logrus.Debugf("not calling ReleaseNetwork() for checkpointed container %s", container.ID) } else { - container.ReleaseNetwork() - }*/ - container.ReleaseNetwork() + container.ReleaseNetwork(false) + } if err := container.CleanupStorage(); err != nil { logrus.Errorf("%v: Failed to cleanup storage: %v", container.ID, err) diff --git a/daemon/container_checkpoint.go b/daemon/container_checkpoint.go index 468816e448dc1..b6aad41aabcf4 100644 --- a/daemon/container_checkpoint.go +++ b/daemon/container_checkpoint.go @@ -5,6 +5,8 @@ import ( "github.com/docker/docker/pkg/promise" "github.com/docker/docker/runconfig" + + "github.com/docker/libnetwork/netutils" ) func (container *Container) Checkpoint(opts *runconfig.CriuConfig) error { @@ -13,7 +15,7 @@ func (container *Container) Checkpoint(opts *runconfig.CriuConfig) error { } if opts.LeaveRunning == false { - container.ReleaseNetwork() + container.ReleaseNetwork(true) } return nil } @@ -41,6 +43,30 @@ func (container *Container) Restore(opts *runconfig.CriuConfig, forceRestore boo if err = container.initializeNetworking(true); err != nil { return err } + + nctl := container.daemon.netController + network, err := nctl.NetworkByID(container.NetworkSettings.NetworkID) + if err != nil { + return err + } + + ep_t, err := network.EndpointByID(container.NetworkSettings.EndpointID) + if err != nil { + return err + } + + for _, i := range ep_t.SandboxInterfaces() { + outname, err := netutils.GenerateIfaceName("veth", 7) + if err != nil { + return err + } + vethpair := runconfig.VethPairName{ + InName: i.DstName(), + OutName: outname, + } + opts.VethPairs = append(opts.VethPairs, vethpair) + } + linkedEnv, err := container.setupLinkedContainers() if err != nil { return err diff --git a/daemon/container_unix.go b/daemon/container_unix.go index 68b3d126f5565..f948a3b071cd1 100644 --- a/daemon/container_unix.go +++ b/daemon/container_unix.go @@ -759,13 +759,13 @@ func (container *Container) buildCreateEndpointOptions(restoring bool) ([]libnet createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption)) } - /*if restoring && container.NetworkSettings.IPAddress != "" { + /* if restoring && container.NetworkSettings.IPAddress != "" { genericOption := options.Generic{ netlabel.IPAddress: net.ParseIP(container.NetworkSettings.IPAddress), } createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption)) - }*/ + } */ return createOptions, nil } @@ -881,20 +881,51 @@ func (container *Container) configureNetwork(networkName, service, networkDriver } } - ep, err := n.EndpointByName(service) - if err != nil { - if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok { - return err - } + var ep libnetwork.Endpoint - createOptions, err := container.buildCreateEndpointOptions(isRestoring) - if err != nil { - return err + if isRestoring == true { + // Use existing Endpoint for a checkpointed container + for _, endpoint := range n.Endpoints() { + if endpoint.ID() == container.NetworkSettings.EndpointID { + ep = endpoint + } } + if ep == nil { + //return fmt.Errorf("Fail to find the Endpoint for the checkpointed container") + fmt.Println("Fail to find the Endpoint for the checkpointed container") + ep, err = n.EndpointByName(service) + if err != nil { + if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok { + return err + } - ep, err = n.CreateEndpoint(service, createOptions...) + createOptions, err := container.buildCreateEndpointOptions(isRestoring) + if err != nil { + return err + } + + ep, err = n.CreateEndpoint(service, createOptions...) + if err != nil { + return err + } + } + } + } else { + ep, err = n.EndpointByName(service) if err != nil { - return err + if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok { + return err + } + + createOptions, err := container.buildCreateEndpointOptions(isRestoring) + if err != nil { + return err + } + + ep, err = n.CreateEndpoint(service, createOptions...) + if err != nil { + return err + } } } @@ -1032,7 +1063,7 @@ func (container *Container) getNetworkedContainer() (*Container, error) { } } -func (container *Container) ReleaseNetwork() { +func (container *Container) ReleaseNetwork(is_checkpoint bool) { if container.hostConfig.NetworkMode.IsContainer() || container.Config.NetworkDisabled { return } @@ -1071,6 +1102,10 @@ func (container *Container) ReleaseNetwork() { } } + if is_checkpoint == true { + return + } + // In addition to leaving all endpoints, delete implicitly created endpoint if container.Config.PublishService == "" { if err := ep.Delete(); err != nil { diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index 2b54aa3ff68ce..5f2e8f4ea0a83 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -266,6 +266,7 @@ func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*o // Kill implements the exec driver Driver interface. func (d *Driver) Kill(c *execdriver.Command, sig int) error { d.Lock() + _, err := d.factory.Load(c.ID) active := d.activeContainers[c.ID] d.Unlock() if active == nil { @@ -303,7 +304,7 @@ func (d *Driver) Unpause(c *execdriver.Command) error { } func libcontainerCriuOpts(runconfigOpts *runconfig.CriuConfig) *libcontainer.CriuOpts { - return &libcontainer.CriuOpts{ + criuopts := &libcontainer.CriuOpts{ ImagesDirectory: runconfigOpts.ImagesDirectory, WorkDirectory: runconfigOpts.WorkDirectory, LeaveRunning: runconfigOpts.LeaveRunning, @@ -311,6 +312,15 @@ func libcontainerCriuOpts(runconfigOpts *runconfig.CriuConfig) *libcontainer.Cri ExternalUnixConnections: runconfigOpts.ExternalUnixConnections, ShellJob: runconfigOpts.ShellJob, } + + for _, i := range runconfigOpts.VethPairs { + criuopts.VethPairs = append(criuopts.VethPairs, + libcontainer.VethPairName{ + InName: i.InName, + OutName: i.OutName, + }) + } + return criuopts } func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { diff --git a/runconfig/restore.go b/runconfig/restore.go index 22f8b0ab0a096..70deac8e8683a 100644 --- a/runconfig/restore.go +++ b/runconfig/restore.go @@ -1,5 +1,10 @@ package runconfig +type VethPairName struct { + InName string + OutName string +} + type CriuConfig struct { ImagesDirectory string WorkDirectory string @@ -7,6 +12,7 @@ type CriuConfig struct { TcpEstablished bool ExternalUnixConnections bool ShellJob bool + VethPairs []VethPairName } type RestoreConfig struct { diff --git a/vendor/src/github.com/docker/libnetwork/endpoint.go b/vendor/src/github.com/docker/libnetwork/endpoint.go index 3475e9eafda04..b0354a03bc6e1 100644 --- a/vendor/src/github.com/docker/libnetwork/endpoint.go +++ b/vendor/src/github.com/docker/libnetwork/endpoint.go @@ -54,6 +54,9 @@ type Endpoint interface { // Retrieve the interfaces' statistics from the sandbox Statistics() (map[string]*sandbox.InterfaceStatistics, error) + + // Retrieve the interfaces from sandbox, for restoring checkpointed container + SandboxInterfaces() []sandbox.Interface } // EndpointOption is a option setter function type used to pass varios options to Network @@ -583,6 +586,17 @@ func (ep *endpoint) Statistics() (map[string]*sandbox.InterfaceStatistics, error return m, nil } +func (ep *endpoint) SandboxInterfaces() []sandbox.Interface { + n := ep.network + + n.Lock() + c := n.ctrlr + n.Unlock() + + sbox := c.sandboxGet(ep.container.data.SandboxKey) + return sbox.Info().Interfaces() +} + func (ep *endpoint) deleteEndpoint() error { ep.Lock() n := ep.network diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go index 9a27eb432faad..57d565ab69bc7 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go @@ -520,6 +520,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { break } } + + /* XXX, we can not get interface from config */ for _, iface := range c.config.Networks { switch iface.Type { case "veth": @@ -532,6 +534,14 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { break } } + /* Use criuop to pass veth pairs */ + for _, i := range criuOpts.VethPairs { + outname := i.OutName + "@docker0" + veth := new(criurpc.CriuVethPair) + veth.IfOut = proto.String(outname) + veth.IfIn = proto.String(i.InName) + req.Opts.Veths = append(req.Opts.Veths, veth) + } var ( fds []string diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go b/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go index bca81672eac8b..1e3c79e6ce9db 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go @@ -5,6 +5,11 @@ type CriuPageServerInfo struct { Port int32 // port number of CRIU page server } +type VethPairName struct { + InName string + OutName string +} + type CriuOpts struct { ImagesDirectory string // directory for storing image files WorkDirectory string // directory to cd and write logs/pidfiles/stats to @@ -14,4 +19,5 @@ type CriuOpts struct { ShellJob bool // allow to dump and restore shell jobs FileLocks bool // handle file locks, for safety PageServer CriuPageServerInfo // allow to dump to criu page server + VethPairs []VethPairName // pass the veth to criu when restore }