From 87b60ad64d4634f7065067f871b66926f74523bf Mon Sep 17 00:00:00 2001 From: Saied Kazemi Date: Thu, 5 Feb 2015 20:32:27 -0800 Subject: [PATCH 1/4] Checkpoint/Restore Support: add exec driver methods Methods for checkpointing and restoring containers were added to the native driver. The LXC driver returns an error message that these methods are not implemented yet. Signed-off-by: Saied Kazemi Conflicts: daemon/execdriver/native/create.go daemon/execdriver/native/driver.go daemon/execdriver/native/init.go Conflicts: daemon/execdriver/driver.go daemon/execdriver/native/create.go --- daemon/execdriver/driver.go | 6 ++ daemon/execdriver/lxc/driver.go | 9 +- daemon/execdriver/native/driver.go | 150 +++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 1 deletion(-) diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index 75d266fcf112d..76c2870a3ae76 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -41,6 +41,8 @@ type Hooks struct { PostStop []DriverCallback } +type RestoreCallback func(*ProcessConfig, int) + // Info is driver specific information based on // processes registered with the driver type Info interface { @@ -83,6 +85,10 @@ type Driver interface { // Unpause unpauses a container. Unpause(c *Command) error + Checkpoint(c *Command) error + + Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback) (int, error) + // Name returns the name of the driver. Name() string diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index 12793bd01f0de..5940babf731d5 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -575,7 +575,14 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return err } -// Terminate implements the exec driver Driver interface. +func (d *driver) Checkpoint(c *execdriver.Command) error { + return fmt.Errorf("Checkpointing lxc containers not supported yet\n") +} + +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { + return 0, fmt.Errorf("Restoring lxc containers not supported yet\n") +} + func (d *Driver) Terminate(c *execdriver.Command) error { return killLxc(c.ID, 9) } diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index 09171c56dde3f..f87a40a0083ec 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -20,6 +20,7 @@ import ( "github.com/docker/docker/pkg/reexec" sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" + "github.com/docker/docker/utils" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" @@ -302,6 +303,155 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return active.Resume() } +// XXX Where is the right place for the following +// const and getCheckpointImageDir() function? +const ( + containersDir = "/var/lib/docker/containers" + criuImgDir = "criu_img" +) + +func getCheckpointImageDir(containerId string) string { + return filepath.Join(containersDir, containerId, criuImgDir) +} + +func (d *driver) Checkpoint(c *execdriver.Command) error { + active := d.activeContainers[c.ID] + if active == nil { + return fmt.Errorf("active container for %s does not exist", c.ID) + } + container := active.container + + // Create an image directory for this container (which + // may already exist from a previous checkpoint). + imageDir := getCheckpointImageDir(c.ID) + err := os.MkdirAll(imageDir, 0700) + if err != nil && !os.IsExist(err) { + return err + } + + // Copy container.json and state.json files to the CRIU + // image directory for later use during restore. Do this + // before checkpointing because after checkpoint the container + // will exit and these files will be removed. + log.CRDbg("saving container.json and state.json before calling CRIU in %s", imageDir) + srcFiles := []string{"container.json", "state.json"} + for _, f := range srcFiles { + srcFile := filepath.Join(d.root, c.ID, f) + dstFile := filepath.Join(imageDir, f) + if _, err := utils.CopyFile(srcFile, dstFile); err != nil { + return err + } + } + + d.Lock() + defer d.Unlock() + err = namespaces.Checkpoint(container, imageDir, c.ProcessConfig.Process.Pid) + if err != nil { + return err + } + + return nil +} + +type restoreOutput struct { + exitCode int + err error +} + +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { + imageDir := getCheckpointImageDir(c.ID) + container, err := d.createRestoreContainer(c, imageDir) + if err != nil { + return 1, err + } + + var term execdriver.Terminal + + if c.ProcessConfig.Tty { + term, err = NewTtyConsole(&c.ProcessConfig, pipes) + } else { + term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes) + } + if err != nil { + return -1, err + } + c.ProcessConfig.Terminal = term + + d.Lock() + d.activeContainers[c.ID] = &activeContainer{ + container: container, + cmd: &c.ProcessConfig.Cmd, + } + d.Unlock() + defer d.cleanContainer(c.ID) + + // Since the CRIU binary exits after restoring the container, we + // need to reap its child by setting PR_SET_CHILD_SUBREAPER (36) + // so that it'll be owned by this process (Docker daemon) after restore. + // + // XXX This really belongs to where the Docker daemon starts. + if _, _, syserr := syscall.RawSyscall(syscall.SYS_PRCTL, 36, 1, 0); syserr != 0 { + return -1, fmt.Errorf("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)", syserr) + } + + restoreOutputChan := make(chan restoreOutput, 1) + waitForRestore := make(chan struct{}) + + go func() { + exitCode, err := namespaces.Restore(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, filepath.Join(d.root, c.ID), imageDir, + func(child *os.File, args []string) *exec.Cmd { + cmd := new(exec.Cmd) + cmd.Path = d.initPath + cmd.Args = append([]string{ + DriverName, + "-restore", + "-pipe", "3", + "--", + }, args...) + cmd.ExtraFiles = []*os.File{child} + return cmd + }, + func(restorePid int) error { + log.CRDbg("restorePid=%d", restorePid) + if restorePid == 0 { + restoreCallback(&c.ProcessConfig, 0) + return nil + } + + // The container.json file should be written *after* the container + // has started because its StdFds cannot be initialized before. + // + // XXX How do we handle error here? + d.writeContainerFile(container, c.ID) + close(waitForRestore) + if restoreCallback != nil { + c.ProcessConfig.Process, err = os.FindProcess(restorePid) + if err != nil { + log.Debugf("cannot find restored process %d", restorePid) + return err + } + c.ContainerPid = c.ProcessConfig.Process.Pid + restoreCallback(&c.ProcessConfig, c.ContainerPid) + } + return nil + }) + restoreOutputChan <- restoreOutput{exitCode, err} + }() + + select { + case restoreOutput := <-restoreOutputChan: + // there was an error + return restoreOutput.exitCode, restoreOutput.err + case <-waitForRestore: + // container restored + break + } + + // Wait for the container to exit. + restoreOutput := <-restoreOutputChan + return restoreOutput.exitCode, restoreOutput.err +} + // Terminate implements the exec driver Driver interface. func (d *Driver) Terminate(c *execdriver.Command) error { defer d.cleanContainer(c.ID) From 5f626a64fd6ad9fff88f5e3f48abe97f57a09a75 Mon Sep 17 00:00:00 2001 From: Saied Kazemi Date: Thu, 5 Feb 2015 20:37:07 -0800 Subject: [PATCH 2/4] Checkpoint/Restore Support: add functionality to daemon Support was added to the daemon to use the Checkpoint and Restore methods of the native exec driver for checkpointing and restoring containers. Signed-off-by: Saied Kazemi Conflicts: api/server/server.go daemon/container.go daemon/daemon.go daemon/networkdriver/bridge/driver.go daemon/state.go vendor/src/github.com/docker/libnetwork/ipallocator/allocator.go Conflicts: api/server/server.go --- api/server/server.go | 30 +++++++++++++++++ daemon/checkpoint.go | 55 +++++++++++++++++++++++++++++++ daemon/container.go | 65 +++++++++++++++++++++++++++++++++++-- daemon/container_unix.go | 49 +++++++++++++++++++++++++++- daemon/daemon.go | 31 ++++++++++++++++++ daemon/monitor.go | 70 ++++++++++++++++++++++++++++++++++++++++ daemon/state.go | 23 +++++++++++++ 7 files changed, 320 insertions(+), 3 deletions(-) create mode 100644 daemon/checkpoint.go diff --git a/api/server/server.go b/api/server/server.go index 7c9262c5bef9e..9f2ee186ee6cc 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -118,6 +118,36 @@ func (s *HTTPServer) Close() error { return s.l.Close() } +func postContainersCheckpoint(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := parseForm(r); err != nil { + return err + } + job := eng.Job("checkpoint", vars["name"]) + if err := job.Run(); err != nil { + return err + } + w.WriteHeader(http.StatusNoContent) + return nil +} + +func postContainersRestore(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := parseForm(r); err != nil { + return err + } + job := eng.Job("restore", vars["name"]) + if err := job.Run(); err != nil { + return err + } + w.WriteHeader(http.StatusNoContent) + return nil +} + func writeCorsHeaders(w http.ResponseWriter, r *http.Request, corsHeaders string) { logrus.Debugf("CORS header is enabled and set to: %s", corsHeaders) w.Header().Add("Access-Control-Allow-Origin", corsHeaders) diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go new file mode 100644 index 0000000000000..f6057c6a028f9 --- /dev/null +++ b/daemon/checkpoint.go @@ -0,0 +1,55 @@ +package daemon + +import ( + "github.com/docker/docker/engine" +) + +// Checkpoint a running container. +func (daemon *Daemon) ContainerCheckpoint(job *engine.Job) engine.Status { + if len(job.Args) != 1 { + return job.Errorf("Usage: %s CONTAINER\n", job.Name) + } + + name := job.Args[0] + container, err := daemon.Get(name) + if err != nil { + return job.Error(err) + } + if !container.IsRunning() { + return job.Errorf("Container %s not running", name) + } + + if err := container.Checkpoint(); err != nil { + return job.Errorf("Cannot checkpoint container %s: %s", name, err) + } + + container.LogEvent("checkpoint") + return engine.StatusOK +} + +// Restore a checkpointed container. +func (daemon *Daemon) ContainerRestore(job *engine.Job) engine.Status { + if len(job.Args) != 1 { + return job.Errorf("Usage: %s CONTAINER\n", job.Name) + } + + name := job.Args[0] + container, err := daemon.Get(name) + if err != nil { + return job.Error(err) + } + if container.IsRunning() { + return job.Errorf("Container %s already running", name) + } + if !container.State.IsCheckpointed() { + return job.Errorf("Container %s is not checkpointed", name) + } + + if err := container.Restore(); err != nil { + container.LogEvent("die") + return job.Errorf("Cannot restore container %s: %s", name, err) + } + + container.LogEvent("restore") + return engine.StatusOK +} diff --git a/daemon/container.go b/daemon/container.go index 480bd8eb96277..fa2982d63eb5b 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -335,7 +335,11 @@ func (streamConfig *streamConfig) StderrPipe() io.ReadCloser { // cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. func (container *Container) cleanup() { - container.releaseNetwork() + if container.IsCheckpointed() { + log.CRDbg("not calling ReleaseNetwork() for checkpointed container %s", container.ID) + } else { + container.ReleaseNetwork() + } if err := container.unmountIpcMounts(); err != nil { logrus.Errorf("%s: Failed to umount ipc filesystems: %v", container.ID, err) @@ -682,6 +686,41 @@ func (container *Container) copy(resource string) (rc io.ReadCloser, err error) return reader, nil } +func (container *Container) Checkpoint() error { + return container.daemon.Checkpoint(container) +} + +func (container *Container) Restore() error { + var err error + + container.Lock() + defer container.Unlock() + + defer func() { + if err != nil { + container.cleanup() + } + }() + + if err = container.initializeNetworking(); err != nil { + return err + } + + linkedEnv, err := container.setupLinkedContainers() + if err != nil { + return err + } + if err = container.setupWorkingDirectory(); err != nil { + return err + } + env := container.createDaemonEnvironment(linkedEnv) + if err = populateCommandRestore(container, env); err != nil { + return err + } + + return container.waitForRestore() +} + // Returns true if the container exposes a certain port func (container *Container) exposes(p nat.Port) bool { _, exists := container.Config.ExposedPorts[p] @@ -773,6 +812,29 @@ func (container *Container) waitForStart() error { return nil } +// Like waitForStart() but for restoring a container. +// +// XXX Does RestartPolicy apply here? +func (container *Container) waitForRestore() error { + container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy) + + // After calling promise.Go() we'll have two goroutines: + // - The current goroutine that will block in the select + // below until restore is done. + // - A new goroutine that will restore the container and + // wait for it to exit. + select { + case <-container.monitor.restoreSignal: + if container.ExitCode != 0 { + return fmt.Errorf("restore process failed") + } + case err := <-promise.Go(container.monitor.Restore): + return err + } + + return nil +} + func (container *Container) getProcessLabel() string { // even if we have a process label return "" if we are running // in privileged mode @@ -973,7 +1035,6 @@ func attach(streamConfig *streamConfig, openStdin, stdinOnce, tty bool, stdin io _, err = copyEscapable(cStdin, stdin) } else { _, err = io.Copy(cStdin, stdin) - } if err == io.ErrClosedPipe { err = nil diff --git a/daemon/container_unix.go b/daemon/container_unix.go index fb6695cc3ed53..9af815c5d2064 100644 --- a/daemon/container_unix.go +++ b/daemon/container_unix.go @@ -378,7 +378,54 @@ func mergeDevices(defaultDevices, userDevices []*configs.Device) []*configs.Devi return append(devs, userDevices...) } -// GetSize returns the real size & virtual size of the container. +// Like populateCommand() but for restoring a container. +// +// XXX populateCommand() does a lot more. Not sure if we have +// to do everything it does. +func populateCommandRestore(c *Container, env []string) error { + resources := &execdriver.Resources{ + Memory: c.Config.Memory, + MemorySwap: c.Config.MemorySwap, + CpuShares: c.Config.CpuShares, + Cpuset: c.Config.Cpuset, + } + + processConfig := execdriver.ProcessConfig{ + Privileged: c.hostConfig.Privileged, + Entrypoint: c.Path, + Arguments: c.Args, + Tty: c.Config.Tty, + User: c.Config.User, + } + + processConfig.SysProcAttr = &syscall.SysProcAttr{Setsid: true} + processConfig.Env = env + + c.command = &execdriver.Command{ + ID: c.ID, + Rootfs: c.RootfsPath(), + ReadonlyRootfs: c.hostConfig.ReadonlyRootfs, + InitPath: "/.dockerinit", + WorkingDir: c.Config.WorkingDir, + // Network: en, + // Ipc: ipc, + // Pid: pid, + Resources: resources, + // AllowedDevices: allowedDevices, + // AutoCreatedDevices: autoCreatedDevices, + CapAdd: c.hostConfig.CapAdd, + CapDrop: c.hostConfig.CapDrop, + ProcessConfig: processConfig, + ProcessLabel: c.GetProcessLabel(), + MountLabel: c.GetMountLabel(), + // LxcConfig: lxcConfig, + AppArmorProfile: c.AppArmorProfile, + } + + return nil +} + +// GetSize, return real size, virtual size func (container *Container) getSize() (int64, int64) { var ( sizeRw, sizeRootfs int64 diff --git a/daemon/daemon.go b/daemon/daemon.go index ec9a759f776c2..0cad264e6d461 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -296,6 +296,18 @@ func (daemon *Daemon) restore() error { logrus.Debugf("Loaded container %v", container.ID) containers[container.ID] = &cr{container: container} + + // If the container was checkpointed, we need to reserve + // the IP address that it was using. + // + // XXX We should also reserve host ports (if any). + if container.IsCheckpointed() { + /*err = bridge.ReserveIP(container.ID, container.NetworkSettings.IPAddress) + if err != nil { + log.Errorf("Failed to reserve IP %s for container %s", + container.ID, container.NetworkSettings.IPAddress) + }*/ + } } else { logrus.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID) } @@ -946,6 +958,25 @@ func (daemon *Daemon) run(c *Container, pipes *execdriver.Pipes, startCallback e return daemon.execDriver.Run(c.command, pipes, hooks) } +func (daemon *Daemon) Checkpoint(c *Container) error { + if err := daemon.execDriver.Checkpoint(c.command); err != nil { + return err + } + c.SetCheckpointed() + return nil +} + +func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { + // Mount the container's filesystem (daemon/graphdriver/aufs/aufs.go). + _, err := daemon.driver.Get(c.ID, c.GetMountLabel()) + if err != nil { + return 0, err + } + + exitCode, err := daemon.execDriver.Restore(c.command, pipes, restoreCallback) + return exitCode, err +} + func (daemon *Daemon) kill(c *Container, sig int) error { return daemon.execDriver.Kill(c.command, sig) } diff --git a/daemon/monitor.go b/daemon/monitor.go index 4af0d2a2fd0a8..5c9d687556734 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -47,6 +47,9 @@ type containerMonitor struct { // left waiting for nothing to happen during this time stopChan chan struct{} + // like startSignal but for restoring a container + restoreSignal chan struct{} + // timeIncrement is the amount of time to wait between restarts // this is in milliseconds timeIncrement int @@ -64,6 +67,7 @@ func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) * timeIncrement: defaultTimeIncrement, stopChan: make(chan struct{}), startSignal: make(chan struct{}), + restoreSignal: make(chan struct{}), } } @@ -183,6 +187,49 @@ func (m *containerMonitor) Start() error { } } +// Like Start() but for restoring a container. +func (m *containerMonitor) Restore() error { + var ( + err error + // XXX The following line should be changed to + // exitStatus execdriver.ExitStatus to match Start() + exitCode int + afterRestore bool + ) + + defer func() { + if afterRestore { + m.container.Lock() + m.container.setStopped(&execdriver.ExitStatus{exitCode, false}) + defer m.container.Unlock() + } + m.Close() + }() + + if err := m.container.startLoggingToDisk(); err != nil { + m.resetContainer(false) + return err + } + + pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) + + m.container.LogEvent("restore") + m.lastStartTime = time.Now() + if exitCode, err = m.container.daemon.Restore(m.container, pipes, m.restoreCallback); err != nil { + log.Errorf("Error restoring container: %s, exitCode=%d", err, exitCode) + m.container.ExitCode = -1 + m.resetContainer(false) + return err + } + afterRestore = true + + m.container.ExitCode = exitCode + m.resetMonitor(err == nil && exitCode == 0) + m.container.LogEvent("die") + m.resetContainer(true) + return err +} + // resetMonitor resets the stateful fields on the containerMonitor based on the // previous runs success or failure. Regardless of success, if the container had // an execution time of more than 10s then reset the timer back to the default @@ -278,6 +325,29 @@ func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid return nil } +// Like callback() but for restoring a container. +func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConfig, restorePid int) { + // If restorePid is 0, it means that restore failed. + if restorePid != 0 { + m.container.setRunning(restorePid) + } + + // Unblock the goroutine waiting in waitForRestore(). + select { + case <-m.restoreSignal: + default: + close(m.restoreSignal) + } + + if restorePid != 0 { + // Write config.json and hostconfig.json files + // to /var/lib/docker/containers/. + if err := m.container.ToDisk(); err != nil { + log.Debugf("%s", err) + } + } +} + // resetContainer resets the container's IO and ensures that the command is able to be executed again // by copying the data into a new struct // if lock is true, then container locked during reset diff --git a/daemon/state.go b/daemon/state.go index 5adeb77e23bcb..dce806dd00c97 100644 --- a/daemon/state.go +++ b/daemon/state.go @@ -20,6 +20,7 @@ type State struct { Running bool Paused bool Restarting bool + Checkpointed bool OOMKilled bool removalInProgress bool // Not need for this to be persistent on disk. Dead bool @@ -28,7 +29,9 @@ type State struct { Error string // contains last known error when starting the container StartedAt time.Time FinishedAt time.Time + CheckpointedAt time.Time waitChan chan struct{} + } // NewState creates a default state object with a fresh channel for state changes. @@ -49,6 +52,8 @@ func (s *State) String() string { } return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) + } else if s.Checkpointed { + return fmt.Sprintf("Checkpointed %s ago", units.HumanDuration(time.Now().UTC().Sub(s.CheckpointedAt))) } if s.removalInProgress { @@ -182,6 +187,7 @@ func (s *State) setRunning(pid int) { s.Error = "" s.Running = true s.Paused = false + s.Checkpointed = false s.Restarting = false s.ExitCode = 0 s.Pid = pid @@ -263,3 +269,20 @@ func (s *State) setDead() { s.Dead = true s.Unlock() } + +func (s *State) SetCheckpointed() { + s.Lock() + s.CheckpointedAt = time.Now().UTC() + s.Checkpointed = true + s.Running = false + s.Paused = false + s.Restarting = false + // XXX Not sure if we need to close and recreate waitChan. + // close(s.waitChan) + // s.waitChan = make(chan struct{}) + s.Unlock() +} + +func (s *State) IsCheckpointed() bool { + return s.Checkpointed +} From 022530af53e02571777baa436a814505cb77d5e0 Mon Sep 17 00:00:00 2001 From: boucher Date: Thu, 29 Oct 2015 15:59:22 -0400 Subject: [PATCH 3/4] Update checkpoint and restore to latest docker/master. - C/R is now an EXPERIMENTAL level feature. - Requires CRIU 1.6 (and builds it from source in the Dockerfile) - Introduces checkpoint and restore as top level cli methods (will likely change) Signed-off-by: Ross Boucher --- Dockerfile | 17 ++ api/client/checkpoint.go | 55 +++++ api/client/restore.go | 57 +++++ api/server/router/local/local.go | 2 + api/server/router/local/local_experimental.go | 65 ++++++ api/server/router/local/local_stable.go | 6 + api/server/server.go | 30 --- api/types/types.go | 24 +- daemon/checkpoint.go | 67 +++--- daemon/container.go | 69 +----- daemon/container_checkpoint.go | 104 +++++++++ daemon/container_unix.go | 70 ++---- daemon/container_windows.go | 4 +- daemon/daemon.go | 20 +- daemon/execdriver/driver.go | 11 +- daemon/execdriver/lxc/driver.go | 10 +- daemon/execdriver/native/driver.go | 209 ++++++++---------- daemon/execdriver/windows/windows.go | 11 + daemon/inspect.go | 24 +- daemon/monitor.go | 52 +++-- daemon/state.go | 20 +- docker/docker.go | 2 +- docker/flags_experimental.go | 21 ++ experimental/README.md | 9 +- experimental/checkpoint_restore.md | 154 +++++++++++++ integration-cli/docker_cli_checkpoint_test.go | 39 ++++ integration-cli/docker_cli_help_test.go | 2 +- project/PACKAGERS.md | 3 + runconfig/restore.go | 18 ++ 29 files changed, 810 insertions(+), 365 deletions(-) create mode 100644 api/client/checkpoint.go create mode 100644 api/client/restore.go create mode 100644 api/server/router/local/local_experimental.go create mode 100644 api/server/router/local/local_stable.go create mode 100644 daemon/container_checkpoint.go create mode 100644 docker/flags_experimental.go create mode 100644 experimental/checkpoint_restore.md create mode 100644 integration-cli/docker_cli_checkpoint_test.go create mode 100644 runconfig/restore.go diff --git a/Dockerfile b/Dockerfile index 4c6825c966a29..140e0e79ae9e9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,9 +32,11 @@ RUN echo deb http://ppa.launchpad.net/zfs-native/stable/ubuntu trusty main > /et # Packaged dependencies RUN apt-get update && apt-get install -y \ apparmor \ + asciidoc \ aufs-tools \ automake \ bash-completion \ + bsdmainutils \ btrfs-tools \ build-essential \ createrepo \ @@ -43,21 +45,29 @@ RUN apt-get update && apt-get install -y \ gcc-mingw-w64 \ git \ iptables \ + libaio-dev \ libapparmor-dev \ libcap-dev \ + libprotobuf-c0-dev \ + libprotobuf-dev \ libsqlite3-dev \ libsystemd-journal-dev \ mercurial \ parallel \ pkg-config \ + protobuf-compiler \ + protobuf-c-compiler \ + python-minimal \ python-mock \ python-pip \ + python-protobuf \ python-websocket \ reprepro \ ruby1.9.1 \ ruby1.9.1-dev \ s3cmd=1.1.0* \ ubuntu-zfs \ + xmlto \ libzfs-dev \ --no-install-recommends @@ -82,6 +92,13 @@ RUN cd /usr/src/lxc \ && make install \ && ldconfig +# Install Criu +RUN mkdir -p /usr/src/criu \ + && curl -sSL https://github.com/xemul/criu/archive/v1.6.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 +RUN cd /usr/src/criu \ + && make \ + && make install + # Install Go ENV GO_VERSION 1.5.1 RUN curl -sSL "https://storage.googleapis.com/golang/go${GO_VERSION}.linux-amd64.tar.gz" | tar -v -C /usr/local -xz diff --git a/api/client/checkpoint.go b/api/client/checkpoint.go new file mode 100644 index 0000000000000..9655e68de221f --- /dev/null +++ b/api/client/checkpoint.go @@ -0,0 +1,55 @@ +// +build experimental + +package client + +import ( + "fmt" + + Cli "github.com/docker/docker/cli" + flag "github.com/docker/docker/pkg/mflag" + "github.com/docker/docker/runconfig" +) + +// CmdCheckpoint checkpoints the process running in a container +// +// Usage: docker checkpoint CONTAINER +func (cli *DockerCli) CmdCheckpoint(args ...string) error { + cmd := Cli.Subcmd("checkpoint", []string{"CONTAINER [CONTAINER...]"}, "Checkpoint one or more running containers", true) + cmd.Require(flag.Min, 1) + + var ( + flImgDir = cmd.String([]string{"-image-dir"}, "", "directory for storing checkpoint image files") + flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for storing log file") + flLeaveRunning = cmd.Bool([]string{"-leave-running"}, false, "leave the container running after checkpoint") + ) + + if err := cmd.ParseFlags(args, true); err != nil { + return err + } + + if cmd.NArg() < 1 { + cmd.Usage() + return nil + } + + criuOpts := &runconfig.CriuConfig{ + ImagesDirectory: *flImgDir, + WorkDirectory: *flWorkDir, + LeaveRunning: *flLeaveRunning, + TCPEstablished: true, + ExternalUnixConnections: true, + FileLocks: true, + } + + var encounteredError error + for _, name := range cmd.Args() { + _, _, err := readBody(cli.call("POST", "/containers/"+name+"/checkpoint", criuOpts, nil)) + if err != nil { + fmt.Fprintf(cli.err, "%s\n", err) + encounteredError = fmt.Errorf("Error: failed to checkpoint one or more containers") + } else { + fmt.Fprintf(cli.out, "%s\n", name) + } + } + return encounteredError +} diff --git a/api/client/restore.go b/api/client/restore.go new file mode 100644 index 0000000000000..e73b62b509303 --- /dev/null +++ b/api/client/restore.go @@ -0,0 +1,57 @@ +// +build experimental + +package client + +import ( + "fmt" + + Cli "github.com/docker/docker/cli" + flag "github.com/docker/docker/pkg/mflag" + "github.com/docker/docker/runconfig" +) + +// CmdRestore restores the process in a checkpointed container +// +// Usage: docker restore CONTAINER +func (cli *DockerCli) CmdRestore(args ...string) error { + cmd := Cli.Subcmd("restore", []string{"CONTAINER [CONTAINER...]"}, "Restore one or more checkpointed containers", true) + cmd.Require(flag.Min, 1) + + var ( + flImgDir = cmd.String([]string{"-image-dir"}, "", "directory to restore image files from") + flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for restore log") + flForce = cmd.Bool([]string{"-force"}, false, "bypass checks for current container state") + ) + + if err := cmd.ParseFlags(args, true); err != nil { + return err + } + + if cmd.NArg() < 1 { + cmd.Usage() + return nil + } + + restoreOpts := &runconfig.RestoreConfig{ + CriuOpts: runconfig.CriuConfig{ + ImagesDirectory: *flImgDir, + WorkDirectory: *flWorkDir, + TCPEstablished: true, + ExternalUnixConnections: true, + FileLocks: true, + }, + ForceRestore: *flForce, + } + + var encounteredError error + for _, name := range cmd.Args() { + _, _, err := readBody(cli.call("POST", "/containers/"+name+"/restore", restoreOpts, nil)) + if err != nil { + fmt.Fprintf(cli.err, "%s\n", err) + encounteredError = fmt.Errorf("Error: failed to restore one or more containers") + } else { + fmt.Fprintf(cli.out, "%s\n", name) + } + } + return encounteredError +} diff --git a/api/server/router/local/local.go b/api/server/router/local/local.go index c73e852a22639..fff5bd1fd53e2 100644 --- a/api/server/router/local/local.go +++ b/api/server/router/local/local.go @@ -149,6 +149,8 @@ func (r *router) initRoutes() { NewDeleteRoute("/images/{name:.*}", r.deleteImages), NewDeleteRoute("/volumes/{name:.*}", r.deleteVolumes), } + + addExperimentalRoutes(r) } func optionsHandler(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error { diff --git a/api/server/router/local/local_experimental.go b/api/server/router/local/local_experimental.go new file mode 100644 index 0000000000000..56da2f2a97924 --- /dev/null +++ b/api/server/router/local/local_experimental.go @@ -0,0 +1,65 @@ +// +build experimental + +package local + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/docker/docker/api/server/httputils" + dkrouter "github.com/docker/docker/api/server/router" + "github.com/docker/docker/runconfig" + "golang.org/x/net/context" +) + +func addExperimentalRoutes(r *router) { + newRoutes := []dkrouter.Route{ + NewPostRoute("/containers/{name:.*}/checkpoint", r.postContainersCheckpoint), + NewPostRoute("/containers/{name:.*}/restore", r.postContainersRestore), + } + + r.routes = append(r.routes, newRoutes...) +} + +func (s *router) postContainersCheckpoint(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := httputils.CheckForJSON(r); err != nil { + return err + } + + criuOpts := &runconfig.CriuConfig{} + if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil { + return err + } + + if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} + +func (s *router) postContainersRestore(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := httputils.CheckForJSON(r); err != nil { + return err + } + + restoreOpts := runconfig.RestoreConfig{} + if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil { + return err + } + + if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} diff --git a/api/server/router/local/local_stable.go b/api/server/router/local/local_stable.go new file mode 100644 index 0000000000000..7c6c012be06ae --- /dev/null +++ b/api/server/router/local/local_stable.go @@ -0,0 +1,6 @@ +// +build !experimental + +package local + +func addExperimentalRoutes(r *router) { +} diff --git a/api/server/server.go b/api/server/server.go index 9f2ee186ee6cc..7c9262c5bef9e 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -118,36 +118,6 @@ func (s *HTTPServer) Close() error { return s.l.Close() } -func postContainersCheckpoint(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { - if vars == nil { - return fmt.Errorf("Missing parameter") - } - if err := parseForm(r); err != nil { - return err - } - job := eng.Job("checkpoint", vars["name"]) - if err := job.Run(); err != nil { - return err - } - w.WriteHeader(http.StatusNoContent) - return nil -} - -func postContainersRestore(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { - if vars == nil { - return fmt.Errorf("Missing parameter") - } - if err := parseForm(r); err != nil { - return err - } - job := eng.Job("restore", vars["name"]) - if err := job.Run(); err != nil { - return err - } - w.WriteHeader(http.StatusNoContent) - return nil -} - func writeCorsHeaders(w http.ResponseWriter, r *http.Request, corsHeaders string) { logrus.Debugf("CORS header is enabled and set to: %s", corsHeaders) w.Header().Add("Access-Control-Allow-Origin", corsHeaders) diff --git a/api/types/types.go b/api/types/types.go index 3160ef94ed918..03532a0b3ac8a 100644 --- a/api/types/types.go +++ b/api/types/types.go @@ -235,17 +235,19 @@ type ExecStartCheck struct { // ContainerState stores container's running state // it's part of ContainerJSONBase and will return by "inspect" command type ContainerState struct { - Status string - Running bool - Paused bool - Restarting bool - OOMKilled bool - Dead bool - Pid int - ExitCode int - Error string - StartedAt string - FinishedAt string + Status string + Running bool + Paused bool + Checkpointed bool + Restarting bool + OOMKilled bool + Dead bool + Pid int + ExitCode int + Error string + StartedAt string + FinishedAt string + CheckpointedAt string `json:"-"` } // ContainerJSONBase contains response of Remote API: diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go index f6057c6a028f9..57bfa9b71be5a 100644 --- a/daemon/checkpoint.go +++ b/daemon/checkpoint.go @@ -1,55 +1,56 @@ package daemon import ( - "github.com/docker/docker/engine" -) + "fmt" -// Checkpoint a running container. -func (daemon *Daemon) ContainerCheckpoint(job *engine.Job) engine.Status { - if len(job.Args) != 1 { - return job.Errorf("Usage: %s CONTAINER\n", job.Name) - } + "github.com/docker/docker/runconfig" +) - name := job.Args[0] +// ContainerCheckpoint checkpoints the process running in a container with CRIU +func (daemon *Daemon) ContainerCheckpoint(name string, opts *runconfig.CriuConfig) error { container, err := daemon.Get(name) if err != nil { - return job.Error(err) + return err } if !container.IsRunning() { - return job.Errorf("Container %s not running", name) + return fmt.Errorf("Container %s not running", name) } - - if err := container.Checkpoint(); err != nil { - return job.Errorf("Cannot checkpoint container %s: %s", name, err) + if err := container.Checkpoint(opts); err != nil { + return fmt.Errorf("Cannot checkpoint container %s: %s", name, err) } - container.LogEvent("checkpoint") - return engine.StatusOK + container.logEvent("checkpoint") + return nil } -// Restore a checkpointed container. -func (daemon *Daemon) ContainerRestore(job *engine.Job) engine.Status { - if len(job.Args) != 1 { - return job.Errorf("Usage: %s CONTAINER\n", job.Name) - } - - name := job.Args[0] +// ContainerRestore restores the process in a container with CRIU +func (daemon *Daemon) ContainerRestore(name string, opts *runconfig.CriuConfig, forceRestore bool) error { container, err := daemon.Get(name) if err != nil { - return job.Error(err) - } - if container.IsRunning() { - return job.Errorf("Container %s already running", name) + return err } - if !container.State.IsCheckpointed() { - return job.Errorf("Container %s is not checkpointed", name) + + if !forceRestore { + // TODO: It's possible we only want to bypass the checkpointed check, + // I'm not sure how this will work if the container is already running + if container.IsRunning() { + return fmt.Errorf("Container %s already running", name) + } + + if !container.IsCheckpointed() { + return fmt.Errorf("Container %s is not checkpointed", name) + } + } else { + if !container.HasBeenCheckpointed() && opts.ImagesDirectory == "" { + return fmt.Errorf("You must specify an image directory to restore from %s", name) + } } - if err := container.Restore(); err != nil { - container.LogEvent("die") - return job.Errorf("Cannot restore container %s: %s", name, err) + if err = container.Restore(opts, forceRestore); err != nil { + container.logEvent("die") + return fmt.Errorf("Cannot restore container %s: %s", name, err) } - container.LogEvent("restore") - return engine.StatusOK + container.logEvent("restore") + return nil } diff --git a/daemon/container.go b/daemon/container.go index fa2982d63eb5b..8f836bd922026 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -277,7 +277,7 @@ func (container *Container) Start() (err error) { // backwards API compatibility. container.hostConfig = runconfig.SetDefaultNetModeIfBlank(container.hostConfig) - if err := container.initializeNetworking(); err != nil { + if err := container.initializeNetworking(false); err != nil { return err } linkedEnv, err := container.setupLinkedContainers() @@ -335,11 +335,12 @@ func (streamConfig *streamConfig) StderrPipe() io.ReadCloser { // cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. func (container *Container) cleanup() { - if container.IsCheckpointed() { - log.CRDbg("not calling ReleaseNetwork() for checkpointed container %s", container.ID) + /*if container.IsCheckpointed() { + log.CRDbg("not calling releaseNetwork() for checkpointed container %s", container.ID) } else { - container.ReleaseNetwork() - } + container.releaseNetwork() + }*/ + container.releaseNetwork() if err := container.unmountIpcMounts(); err != nil { logrus.Errorf("%s: Failed to umount ipc filesystems: %v", container.ID, err) @@ -686,41 +687,6 @@ func (container *Container) copy(resource string) (rc io.ReadCloser, err error) return reader, nil } -func (container *Container) Checkpoint() error { - return container.daemon.Checkpoint(container) -} - -func (container *Container) Restore() error { - var err error - - container.Lock() - defer container.Unlock() - - defer func() { - if err != nil { - container.cleanup() - } - }() - - if err = container.initializeNetworking(); err != nil { - return err - } - - linkedEnv, err := container.setupLinkedContainers() - if err != nil { - return err - } - if err = container.setupWorkingDirectory(); err != nil { - return err - } - env := container.createDaemonEnvironment(linkedEnv) - if err = populateCommandRestore(container, env); err != nil { - return err - } - - return container.waitForRestore() -} - // Returns true if the container exposes a certain port func (container *Container) exposes(p nat.Port) bool { _, exists := container.Config.ExposedPorts[p] @@ -812,29 +778,6 @@ func (container *Container) waitForStart() error { return nil } -// Like waitForStart() but for restoring a container. -// -// XXX Does RestartPolicy apply here? -func (container *Container) waitForRestore() error { - container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy) - - // After calling promise.Go() we'll have two goroutines: - // - The current goroutine that will block in the select - // below until restore is done. - // - A new goroutine that will restore the container and - // wait for it to exit. - select { - case <-container.monitor.restoreSignal: - if container.ExitCode != 0 { - return fmt.Errorf("restore process failed") - } - case err := <-promise.Go(container.monitor.Restore): - return err - } - - return nil -} - func (container *Container) getProcessLabel() string { // even if we have a process label return "" if we are running // in privileged mode diff --git a/daemon/container_checkpoint.go b/daemon/container_checkpoint.go new file mode 100644 index 0000000000000..8330f9ec9fab4 --- /dev/null +++ b/daemon/container_checkpoint.go @@ -0,0 +1,104 @@ +package daemon + +import ( + "fmt" + + "github.com/docker/docker/pkg/promise" + "github.com/docker/docker/runconfig" +) + +// Checkpoint checkpoints the running container, saving its state afterwards +func (container *Container) Checkpoint(opts *runconfig.CriuConfig) error { + if err := container.daemon.Checkpoint(container, opts); err != nil { + return err + } + + if opts.LeaveRunning == false { + container.cleanup() + } + + if err := container.toDisk(); err != nil { + return fmt.Errorf("Cannot update config for container: %s", err) + } + + return nil +} + +// Restore restores the container's process from images on disk +func (container *Container) Restore(opts *runconfig.CriuConfig, forceRestore bool) error { + var err error + container.Lock() + defer container.Unlock() + + defer func() { + if err != nil { + container.setError(err) + // if no one else has set it, make sure we don't leave it at zero + if container.ExitCode == 0 { + container.ExitCode = 128 + } + container.toDisk() + container.cleanup() + } + }() + + if err := container.Mount(); err != nil { + return err + } + + // Make sure NetworkMode has an acceptable value. We do this to ensure + // backwards API compatibility. + container.hostConfig = runconfig.SetDefaultNetModeIfBlank(container.hostConfig) + if err = container.initializeNetworking(true); err != nil { + return err + } + + linkedEnv, err := container.setupLinkedContainers() + if err != nil { + return err + } + + if err = container.setupWorkingDirectory(); err != nil { + return err + } + + env := container.createDaemonEnvironment(linkedEnv) + if err = populateCommand(container, env); err != nil { + return err + } + + if !container.hostConfig.IpcMode.IsContainer() && !container.hostConfig.IpcMode.IsHost() { + if err := container.setupIpcDirs(); err != nil { + return err + } + } + + mounts, err := container.setupMounts() + if err != nil { + return err + } + mounts = append(mounts, container.ipcMounts()...) + + container.command.Mounts = mounts + return container.waitForRestore(opts, forceRestore) +} + +func (container *Container) waitForRestore(opts *runconfig.CriuConfig, forceRestore bool) error { + container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy) + + // After calling promise.Go() we'll have two goroutines: + // - The current goroutine that will block in the select + // below until restore is done. + // - A new goroutine that will restore the container and + // wait for it to exit. + select { + case <-container.monitor.restoreSignal: + if container.ExitCode != 0 { + return fmt.Errorf("restore process failed") + } + case err := <-promise.Go(func() error { return container.monitor.Restore(opts, forceRestore) }): + return err + } + + return nil +} diff --git a/daemon/container_unix.go b/daemon/container_unix.go index 9af815c5d2064..3caf3545129c3 100644 --- a/daemon/container_unix.go +++ b/daemon/container_unix.go @@ -378,53 +378,6 @@ func mergeDevices(defaultDevices, userDevices []*configs.Device) []*configs.Devi return append(devs, userDevices...) } -// Like populateCommand() but for restoring a container. -// -// XXX populateCommand() does a lot more. Not sure if we have -// to do everything it does. -func populateCommandRestore(c *Container, env []string) error { - resources := &execdriver.Resources{ - Memory: c.Config.Memory, - MemorySwap: c.Config.MemorySwap, - CpuShares: c.Config.CpuShares, - Cpuset: c.Config.Cpuset, - } - - processConfig := execdriver.ProcessConfig{ - Privileged: c.hostConfig.Privileged, - Entrypoint: c.Path, - Arguments: c.Args, - Tty: c.Config.Tty, - User: c.Config.User, - } - - processConfig.SysProcAttr = &syscall.SysProcAttr{Setsid: true} - processConfig.Env = env - - c.command = &execdriver.Command{ - ID: c.ID, - Rootfs: c.RootfsPath(), - ReadonlyRootfs: c.hostConfig.ReadonlyRootfs, - InitPath: "/.dockerinit", - WorkingDir: c.Config.WorkingDir, - // Network: en, - // Ipc: ipc, - // Pid: pid, - Resources: resources, - // AllowedDevices: allowedDevices, - // AutoCreatedDevices: autoCreatedDevices, - CapAdd: c.hostConfig.CapAdd, - CapDrop: c.hostConfig.CapDrop, - ProcessConfig: processConfig, - ProcessLabel: c.GetProcessLabel(), - MountLabel: c.GetMountLabel(), - // LxcConfig: lxcConfig, - AppArmorProfile: c.AppArmorProfile, - } - - return nil -} - // GetSize, return real size, virtual size func (container *Container) getSize() (int64, int64) { var ( @@ -865,7 +818,7 @@ func (container *Container) updateNetwork() error { return nil } -func (container *Container) buildCreateEndpointOptions(n libnetwork.Network) ([]libnetwork.EndpointOption, error) { +func (container *Container) buildCreateEndpointOptions(n libnetwork.Network, isRestoring bool) ([]libnetwork.EndpointOption, error) { var ( portSpecs = make(nat.PortSet) bindings = make(nat.PortMap) @@ -945,6 +898,13 @@ func (container *Container) buildCreateEndpointOptions(n libnetwork.Network) ([] if n.Name() == "bridge" || container.NetworkSettings.IsAnonymousEndpoint { createOptions = append(createOptions, libnetwork.CreateOptionAnonymous()) + /*if isRestoring && container.NetworkSettings.IPAddress != "" { + genericOption := options.Generic{ + netlabel.IPAddress: net.ParseIP(container.NetworkSettings.IPAddress), + } + + createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption)) + }*/ } return createOptions, nil @@ -966,7 +926,7 @@ func createNetwork(controller libnetwork.NetworkController, dnet string, driver return controller.NewNetwork(driver, dnet, createOptions...) } -func (container *Container) allocateNetwork() error { +func (container *Container) allocateNetwork(isRestoring bool) error { updateSettings := false if len(container.NetworkSettings.Networks) == 0 { mode := container.hostConfig.NetworkMode @@ -985,7 +945,7 @@ func (container *Container) allocateNetwork() error { } for n := range container.NetworkSettings.Networks { - if err := container.connectToNetwork(n, updateSettings); err != nil { + if err := container.connectToNetwork(n, updateSettings, isRestoring); err != nil { return err } } @@ -998,10 +958,10 @@ func (container *Container) ConnectToNetwork(idOrName string) error { if !container.Running { return derr.ErrorCodeNotRunning.WithArgs(container.ID) } - return container.connectToNetwork(idOrName, true) + return container.connectToNetwork(idOrName, true, false) } -func (container *Container) connectToNetwork(idOrName string, updateSettings bool) error { +func (container *Container) connectToNetwork(idOrName string, updateSettings bool, isRestoring bool) error { var err error if container.hostConfig.NetworkMode.IsContainer() { @@ -1036,7 +996,7 @@ func (container *Container) connectToNetwork(idOrName string, updateSettings boo return err } - createOptions, err := container.buildCreateEndpointOptions(n) + createOptions, err := container.buildCreateEndpointOptions(n, isRestoring) if err != nil { return err } @@ -1090,7 +1050,7 @@ func (container *Container) connectToNetwork(idOrName string, updateSettings boo return nil } -func (container *Container) initializeNetworking() error { +func (container *Container) initializeNetworking(isRestoring bool) error { var err error if container.hostConfig.NetworkMode.IsContainer() { @@ -1121,7 +1081,7 @@ func (container *Container) initializeNetworking() error { } - if err := container.allocateNetwork(); err != nil { + if err := container.allocateNetwork(isRestoring); err != nil { return err } diff --git a/daemon/container_windows.go b/daemon/container_windows.go index 5d62a96d68995..94e61b2f9fe30 100644 --- a/daemon/container_windows.go +++ b/daemon/container_windows.go @@ -36,7 +36,7 @@ func (container *Container) createDaemonEnvironment(linkedEnv []string) []string return container.Config.Env } -func (container *Container) initializeNetworking() error { +func (container *Container) initializeNetworking(isRestoring bool) error { return nil } @@ -161,7 +161,7 @@ func (container *Container) setNetworkNamespaceKey(pid int) error { } // allocateNetwork is a no-op on Windows. -func (container *Container) allocateNetwork() error { +func (container *Container) allocateNetwork(isRestoring bool) error { return nil } diff --git a/daemon/daemon.go b/daemon/daemon.go index 0cad264e6d461..067f3232cdf66 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -958,22 +958,28 @@ func (daemon *Daemon) run(c *Container, pipes *execdriver.Pipes, startCallback e return daemon.execDriver.Run(c.command, pipes, hooks) } -func (daemon *Daemon) Checkpoint(c *Container) error { - if err := daemon.execDriver.Checkpoint(c.command); err != nil { +// Checkpoint the container +func (daemon *Daemon) Checkpoint(c *Container, opts *runconfig.CriuConfig) error { + if err := daemon.execDriver.Checkpoint(c.command, opts); err != nil { return err } - c.SetCheckpointed() + c.SetCheckpointed(opts.LeaveRunning) return nil } -func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { +// Restore the container +func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.DriverCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { // Mount the container's filesystem (daemon/graphdriver/aufs/aufs.go). - _, err := daemon.driver.Get(c.ID, c.GetMountLabel()) + _, err := daemon.driver.Get(c.ID, c.getMountLabel()) if err != nil { - return 0, err + return execdriver.ExitStatus{ExitCode: 0}, err } - exitCode, err := daemon.execDriver.Restore(c.command, pipes, restoreCallback) + hooks := execdriver.Hooks{ + Restore: restoreCallback, + } + + exitCode, err := daemon.execDriver.Restore(c.command, pipes, hooks, opts, forceRestore) return exitCode, err } diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index 76c2870a3ae76..e06a6750b0434 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -7,6 +7,7 @@ import ( "time" "github.com/docker/docker/pkg/idtools" + "github.com/docker/docker/runconfig" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/configs" ) @@ -39,10 +40,10 @@ type Hooks struct { Start DriverCallback // PostStop is called after the container process exits PostStop []DriverCallback + // Restore is called after the container is restored + Restore DriverCallback } -type RestoreCallback func(*ProcessConfig, int) - // Info is driver specific information based on // processes registered with the driver type Info interface { @@ -85,9 +86,11 @@ type Driver interface { // Unpause unpauses a container. Unpause(c *Command) error - Checkpoint(c *Command) error + // Checkpoints a container (with criu). + Checkpoint(c *Command, opts *runconfig.CriuConfig) error - Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback) (int, error) + // Restores a checkpoint image into a container (with criu). + Restore(c *Command, pipes *Pipes, hooks Hooks, opts *runconfig.CriuConfig, forceRestore bool) (ExitStatus, error) // Name returns the name of the driver. Name() string diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index 5940babf731d5..67ec76c58f29d 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -25,6 +25,7 @@ import ( sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" "github.com/docker/docker/pkg/version" + "github.com/docker/docker/runconfig" "github.com/kr/pty" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups" @@ -575,14 +576,17 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return err } -func (d *driver) Checkpoint(c *execdriver.Command) error { +// Checkpoint does not currently implement checkpoint, but complies to the Driver interface +func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { return fmt.Errorf("Checkpointing lxc containers not supported yet\n") } -func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { - return 0, fmt.Errorf("Restoring lxc containers not supported yet\n") +// Restore does not currently implement restore, but complies to the Driver interface +func (d *Driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Restoring lxc containers not supported yet\n") } +// Terminate implements the exec driver Driver interface. func (d *Driver) Terminate(c *execdriver.Command) error { return killLxc(c.ID, 9) } diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index f87a40a0083ec..d67783b2ecb44 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -20,7 +20,7 @@ import ( "github.com/docker/docker/pkg/reexec" sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" - "github.com/docker/docker/utils" + "github.com/docker/docker/runconfig" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" @@ -159,10 +159,13 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd d.activeContainers[c.ID] = cont d.Unlock() defer func() { - if !destroyed { - cont.Destroy() + status, _ := cont.Status() + if status != libcontainer.Checkpointed { + if !destroyed { + cont.Destroy() + } + d.cleanContainer(c.ID) } - d.cleanContainer(c.ID) }() if err := cont.Start(p); err != nil { @@ -303,49 +306,28 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return active.Resume() } -// XXX Where is the right place for the following -// const and getCheckpointImageDir() function? -const ( - containersDir = "/var/lib/docker/containers" - criuImgDir = "criu_img" -) - -func getCheckpointImageDir(containerId string) string { - return filepath.Join(containersDir, containerId, criuImgDir) +func libcontainerCriuOpts(runconfigOpts *runconfig.CriuConfig) *libcontainer.CriuOpts { + return &libcontainer.CriuOpts{ + ImagesDirectory: runconfigOpts.ImagesDirectory, + WorkDirectory: runconfigOpts.WorkDirectory, + LeaveRunning: runconfigOpts.LeaveRunning, + TcpEstablished: runconfigOpts.TCPEstablished, + ExternalUnixConnections: runconfigOpts.ExternalUnixConnections, + ShellJob: runconfigOpts.ShellJob, + FileLocks: runconfigOpts.FileLocks, + } } -func (d *driver) Checkpoint(c *execdriver.Command) error { +// Checkpoint implements the exec driver Driver interface. +func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { active := d.activeContainers[c.ID] if active == nil { return fmt.Errorf("active container for %s does not exist", c.ID) } - container := active.container - - // Create an image directory for this container (which - // may already exist from a previous checkpoint). - imageDir := getCheckpointImageDir(c.ID) - err := os.MkdirAll(imageDir, 0700) - if err != nil && !os.IsExist(err) { - return err - } - - // Copy container.json and state.json files to the CRIU - // image directory for later use during restore. Do this - // before checkpointing because after checkpoint the container - // will exit and these files will be removed. - log.CRDbg("saving container.json and state.json before calling CRIU in %s", imageDir) - srcFiles := []string{"container.json", "state.json"} - for _, f := range srcFiles { - srcFile := filepath.Join(d.root, c.ID, f) - dstFile := filepath.Join(imageDir, f) - if _, err := utils.CopyFile(srcFile, dstFile); err != nil { - return err - } - } d.Lock() defer d.Unlock() - err = namespaces.Checkpoint(container, imageDir, c.ProcessConfig.Process.Pid) + err := active.Checkpoint(libcontainerCriuOpts(opts)) if err != nil { return err } @@ -353,103 +335,90 @@ func (d *driver) Checkpoint(c *execdriver.Command) error { return nil } -type restoreOutput struct { - exitCode int - err error -} +// Restore implements the exec driver Driver interface. +func (d *Driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + var ( + cont libcontainer.Container + err error + ) -func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) { - imageDir := getCheckpointImageDir(c.ID) - container, err := d.createRestoreContainer(c, imageDir) + destroyed := false + cont, err = d.factory.Load(c.ID) if err != nil { - return 1, err + if forceRestore { + var config *configs.Config + config, err = d.createContainer(c, hooks) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + cont, err = d.factory.Create(c.ID, config) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + } else { + return execdriver.ExitStatus{ExitCode: -1}, err + } } - var term execdriver.Terminal - - if c.ProcessConfig.Tty { - term, err = NewTtyConsole(&c.ProcessConfig, pipes) - } else { - term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes) + p := &libcontainer.Process{ + Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...), + Env: c.ProcessConfig.Env, + Cwd: c.WorkingDir, + User: c.ProcessConfig.User, } - if err != nil { - return -1, err + + config := cont.Config() + if err := setupPipes(&config, &c.ProcessConfig, p, pipes); err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err } - c.ProcessConfig.Terminal = term d.Lock() - d.activeContainers[c.ID] = &activeContainer{ - container: container, - cmd: &c.ProcessConfig.Cmd, - } + d.activeContainers[c.ID] = cont d.Unlock() - defer d.cleanContainer(c.ID) + defer func() { + status, _ := cont.Status() + if status != libcontainer.Checkpointed { + if !destroyed { + cont.Destroy() + } + d.cleanContainer(c.ID) + } + }() - // Since the CRIU binary exits after restoring the container, we - // need to reap its child by setting PR_SET_CHILD_SUBREAPER (36) - // so that it'll be owned by this process (Docker daemon) after restore. - // - // XXX This really belongs to where the Docker daemon starts. - if _, _, syserr := syscall.RawSyscall(syscall.SYS_PRCTL, 36, 1, 0); syserr != 0 { - return -1, fmt.Errorf("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)", syserr) + if err := cont.Restore(p, libcontainerCriuOpts(opts)); err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err } - restoreOutputChan := make(chan restoreOutput, 1) - waitForRestore := make(chan struct{}) - - go func() { - exitCode, err := namespaces.Restore(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, filepath.Join(d.root, c.ID), imageDir, - func(child *os.File, args []string) *exec.Cmd { - cmd := new(exec.Cmd) - cmd.Path = d.initPath - cmd.Args = append([]string{ - DriverName, - "-restore", - "-pipe", "3", - "--", - }, args...) - cmd.ExtraFiles = []*os.File{child} - return cmd - }, - func(restorePid int) error { - log.CRDbg("restorePid=%d", restorePid) - if restorePid == 0 { - restoreCallback(&c.ProcessConfig, 0) - return nil - } - - // The container.json file should be written *after* the container - // has started because its StdFds cannot be initialized before. - // - // XXX How do we handle error here? - d.writeContainerFile(container, c.ID) - close(waitForRestore) - if restoreCallback != nil { - c.ProcessConfig.Process, err = os.FindProcess(restorePid) - if err != nil { - log.Debugf("cannot find restored process %d", restorePid) - return err - } - c.ContainerPid = c.ProcessConfig.Process.Pid - restoreCallback(&c.ProcessConfig, c.ContainerPid) - } - return nil - }) - restoreOutputChan <- restoreOutput{exitCode, err} - }() + oom := notifyOnOOM(cont) + if hooks.Restore != nil { + pid, err := p.Pid() + if err != nil { + p.Signal(os.Kill) + p.Wait() + return execdriver.ExitStatus{ExitCode: -1}, err + } + hooks.Restore(&c.ProcessConfig, pid, oom) + } - select { - case restoreOutput := <-restoreOutputChan: - // there was an error - return restoreOutput.exitCode, restoreOutput.err - case <-waitForRestore: - // container restored - break + waitF := p.Wait + if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) { + // we need such hack for tracking processes with inherited fds, + // because cmd.Wait() waiting for all streams to be copied + waitF = waitInPIDHost(p, cont) + } + ps, err := waitF() + if err != nil { + execErr, ok := err.(*exec.ExitError) + if !ok { + return execdriver.ExitStatus{ExitCode: -1}, err + } + ps = execErr.ProcessState } - // Wait for the container to exit. - restoreOutput := <-restoreOutputChan - return restoreOutput.exitCode, restoreOutput.err + cont.Destroy() + destroyed = true + _, oomKill := <-oom + return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil } // Terminate implements the exec driver Driver interface. diff --git a/daemon/execdriver/windows/windows.go b/daemon/execdriver/windows/windows.go index a1f4f48ae3122..324a383fc8b35 100644 --- a/daemon/execdriver/windows/windows.go +++ b/daemon/execdriver/windows/windows.go @@ -11,6 +11,7 @@ import ( "github.com/docker/docker/autogen/dockerversion" "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/pkg/parsers" + "github.com/docker/docker/runconfig" ) // This is a daemon development variable only and should not be @@ -94,3 +95,13 @@ func setupEnvironmentVariables(a []string) map[string]string { } return r } + +// Checkpoint does not currently implement checkpoint, but complies to the Driver interface +func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { + return fmt.Errorf("Windows: Containers cannot be checkpointed") +} + +// Restore does not currently implement restore, but complies to the Driver interface +func (d *Driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Windows: Containers cannot be restored") +} diff --git a/daemon/inspect.go b/daemon/inspect.go index bd1f07fdca98e..832aac6cf065a 100644 --- a/daemon/inspect.go +++ b/daemon/inspect.go @@ -93,17 +93,19 @@ func (daemon *Daemon) getInspectData(container *Container, size bool) (*types.Co } containerState := &types.ContainerState{ - Status: container.State.StateString(), - Running: container.State.Running, - Paused: container.State.Paused, - Restarting: container.State.Restarting, - OOMKilled: container.State.OOMKilled, - Dead: container.State.Dead, - Pid: container.State.Pid, - ExitCode: container.State.ExitCode, - Error: container.State.Error, - StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), - FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), + Status: container.State.StateString(), + Running: container.State.Running, + Paused: container.State.Paused, + Checkpointed: container.State.Checkpointed, + Restarting: container.State.Restarting, + OOMKilled: container.State.OOMKilled, + Dead: container.State.Dead, + Pid: container.State.Pid, + ExitCode: container.State.ExitCode, + Error: container.State.Error, + StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), + FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), + CheckpointedAt: container.State.CheckpointedAt.Format(time.RFC3339Nano), } contJSONBase := &types.ContainerJSONBase{ diff --git a/daemon/monitor.go b/daemon/monitor.go index 5c9d687556734..d644465a3bf67 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -188,44 +188,46 @@ func (m *containerMonitor) Start() error { } // Like Start() but for restoring a container. -func (m *containerMonitor) Restore() error { +func (m *containerMonitor) Restore(opts *runconfig.CriuConfig, forceRestore bool) error { var ( err error // XXX The following line should be changed to // exitStatus execdriver.ExitStatus to match Start() - exitCode int + exitCode execdriver.ExitStatus afterRestore bool ) - defer func() { if afterRestore { m.container.Lock() - m.container.setStopped(&execdriver.ExitStatus{exitCode, false}) + m.container.setStopped(&execdriver.ExitStatus{exitCode.ExitCode, false}) defer m.container.Unlock() } m.Close() }() - if err := m.container.startLoggingToDisk(); err != nil { - m.resetContainer(false) - return err + // FIXME: right now if we startLogging again we get double logs after a restore + if m.container.logCopier == nil { + if err := m.container.startLogging(); err != nil { + m.resetContainer(false) + return err + } } pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) - m.container.LogEvent("restore") + m.container.logEvent("restore") m.lastStartTime = time.Now() - if exitCode, err = m.container.daemon.Restore(m.container, pipes, m.restoreCallback); err != nil { - log.Errorf("Error restoring container: %s, exitCode=%d", err, exitCode) + if exitCode, err = m.container.daemon.Restore(m.container, pipes, m.restoreCallback, opts, forceRestore); err != nil { + logrus.Errorf("Error restoring container: %s, exitCode=%d", err, exitCode) m.container.ExitCode = -1 m.resetContainer(false) return err } afterRestore = true - m.container.ExitCode = exitCode - m.resetMonitor(err == nil && exitCode == 0) - m.container.LogEvent("die") + m.container.ExitCode = exitCode.ExitCode + m.resetMonitor(err == nil && exitCode.ExitCode == 0) + m.container.logEvent("die") m.resetContainer(true) return err } @@ -326,7 +328,23 @@ func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid } // Like callback() but for restoring a container. -func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConfig, restorePid int) { +func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConfig, restorePid int, chOOM <-chan struct{}) error { + go func() { + _, ok := <-chOOM + if ok { + m.container.logEvent("oom") + } + }() + + if processConfig.Tty { + // The callback is called after the process Start() + // so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave + // which we close here. + if c, ok := processConfig.Stdout.(io.Closer); ok { + c.Close() + } + } + // If restorePid is 0, it means that restore failed. if restorePid != 0 { m.container.setRunning(restorePid) @@ -342,10 +360,12 @@ func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConf if restorePid != 0 { // Write config.json and hostconfig.json files // to /var/lib/docker/containers/. - if err := m.container.ToDisk(); err != nil { - log.Debugf("%s", err) + if err := m.container.toDiskLocking(); err != nil { + logrus.Errorf("Error saving container to disk: %s", err) } } + + return nil } // resetContainer resets the container's IO and ensures that the command is able to be executed again diff --git a/daemon/state.go b/daemon/state.go index dce806dd00c97..06ed1c48b30cd 100644 --- a/daemon/state.go +++ b/daemon/state.go @@ -31,7 +31,6 @@ type State struct { FinishedAt time.Time CheckpointedAt time.Time waitChan chan struct{} - } // NewState creates a default state object with a fresh channel for state changes. @@ -52,7 +51,9 @@ func (s *State) String() string { } return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) - } else if s.Checkpointed { + } + + if s.Checkpointed { return fmt.Sprintf("Checkpointed %s ago", units.HumanDuration(time.Now().UTC().Sub(s.CheckpointedAt))) } @@ -87,6 +88,10 @@ func (s *State) StateString() string { return "running" } + if s.Checkpointed { + return "checkpointed" + } + if s.Dead { return "dead" } @@ -270,10 +275,11 @@ func (s *State) setDead() { s.Unlock() } -func (s *State) SetCheckpointed() { +// SetCheckpointed sets the container's status to indicate it has been checkpointed +func (s *State) SetCheckpointed(leaveRunning bool) { s.Lock() s.CheckpointedAt = time.Now().UTC() - s.Checkpointed = true + s.Checkpointed = !leaveRunning s.Running = false s.Paused = false s.Restarting = false @@ -283,6 +289,12 @@ func (s *State) SetCheckpointed() { s.Unlock() } +// HasBeenCheckpointed indicates whether the container has ever been checkpointed +func (s *State) HasBeenCheckpointed() bool { + return s.CheckpointedAt != time.Time{} +} + +// IsCheckpointed indicates whether the container is currently checkpointed func (s *State) IsCheckpointed() bool { return s.Checkpointed } diff --git a/docker/docker.go b/docker/docker.go index 4795b8046df41..9267b7836e44a 100644 --- a/docker/docker.go +++ b/docker/docker.go @@ -36,7 +36,7 @@ func main() { help := "\nCommands:\n" for _, cmd := range dockerCommands { - help += fmt.Sprintf(" %-10.10s%s\n", cmd.Name, cmd.Description) + help += fmt.Sprintf(" %-11.11s%s\n", cmd.Name, cmd.Description) } help += "\nRun 'docker COMMAND --help' for more information on a command." diff --git a/docker/flags_experimental.go b/docker/flags_experimental.go new file mode 100644 index 0000000000000..608865d4e37b9 --- /dev/null +++ b/docker/flags_experimental.go @@ -0,0 +1,21 @@ +// +build experimental + +package main + +import ( + "sort" + + "github.com/docker/docker/cli" +) + +func init() { + experimentalCommands := []cli.Command{ + {"checkpoint", "Checkpoint one or more running containers"}, + {"restore", "Restore one or more checkpointed containers"}, + } + + dockerCommands = append(dockerCommands, experimentalCommands...) + + //Sorting logic required here to pass Command Sort Test. + sort.Sort(byName(dockerCommands)) +} diff --git a/experimental/README.md b/experimental/README.md index ca4f1022610b3..42758298c7403 100644 --- a/experimental/README.md +++ b/experimental/README.md @@ -1,8 +1,8 @@ -# Docker Experimental Features +# Docker Experimental Features This page contains a list of features in the Docker engine which are experimental. Experimental features are **not** ready for production. They are -provided for test and evaluation in your sandbox environments. +provided for test and evaluation in your sandbox environments. The information below describes each feature and the GitHub pull requests and issues associated with it. If necessary, links are provided to additional @@ -73,9 +73,10 @@ to build a Docker binary with the experimental features enabled: * [External graphdriver plugins](plugins_graphdriver.md) * [User namespaces](userns.md) + * [Checkpoint & Restore](checkpoint_restore.md) ## How to comment on an experimental feature -Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR. +Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR. -Issues or problems with a feature? Inquire for help on the `#docker` IRC channel or in on the [Docker Google group](https://groups.google.com/forum/#!forum/docker-user). +Issues or problems with a feature? Inquire for help on the `#docker` IRC channel or in on the [Docker Google group](https://groups.google.com/forum/#!forum/docker-user). diff --git a/experimental/checkpoint_restore.md b/experimental/checkpoint_restore.md new file mode 100644 index 0000000000000..f3ed0b5898e85 --- /dev/null +++ b/experimental/checkpoint_restore.md @@ -0,0 +1,154 @@ +# Docker Checkpoint & Restore + +Checkpoint & Restore is a new feature that allows you to freeze a running +container by checkpointing it, which turns its state into a collection of files +on disk. Later, the container can be restored from the point it was frozen. + +This is accomplished using a tool called [CRIU](http://criu.org), which is an +external dependency of this feature. A good overview of the history of +checkpoint and restore in Docker is available in this +[Kubernetes blog post](http://blog.kubernetes.io/2015/07/how-did-quake-demo-from-dockercon-work.html). + +## Installing CRIU + +If you use a Debian system, you can add the CRIU PPA and install with apt-get +https://launchpad.net/~criu/+archive/ubuntu/ppa. + +Alternatively, you can [build CRIU from source](http://criu.org/Installation). + +## Use cases for checkpoint & restore + +This feature is currently focused on single-host use cases for checkpoint and +restore. Here are a few: + +- Restarting / upgrading the docker daemon without stopping containers +- Restarting the host machine without stopping/starting containers +- Speeding up the start time of slow start applications +- "Rewinding" processes to an earlier point in time +- "Forensic debugging" of running processes + +Another primary use case of checkpoint & restore outside of Docker is the live +migration of a server from one machine to another. This is possible with the +current implementation, but not currently a priority (and so the workflow is +not optimized for the task). + +## Using Checkpoint & Restore + +Two new top level commands are introduced in the CLI: `checkpoint` & `restore`. +The options for checkpoint: + + Usage: docker checkpoint [OPTIONS] CONTAINER [CONTAINER...] + + Checkpoint one or more running containers + + --allow-shell=false allow checkpointing shell jobs + --image-dir= directory for storing checkpoint image files + --leave-running=false leave the container running after checkpoint + --work-dir= directory for storing log file + +And for restore: + + Usage: docker restore [OPTIONS] CONTAINER [CONTAINER...] + + Restore one or more checkpointed containers + + --allow-shell=false allow restoring shell jobs + --force=false bypass checks for current container state + --image-dir= directory to restore image files from + --work-dir= directory for restore log + +A simple example of using checkpoint & restore on a container: + + $ docker run --name cr -d busybox /bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done' + > abc0123 + + $ docker checkpoint cr + > abc0123 + + $ docker restore cr + > abc0123 + +This process just logs an incrementing counter to stdout. If you `docker logs` +in between running/checkpoint/restoring you should see that the counter +increases while the process is running, stops while it's checkpointed, and +resumes from the point it left off once you restore. + +### Same container checkpoint/restore + +The above example falls into the category of "same container" use cases for c/r. +Restarting the daemon is an example of this kind of use case. There is only one +container here at any point in time. That container's status, once it is +checkpointed, will be "Checkpointed" and docker inspect will contain that status +as well as the time of the last checkpoint. The IP address and other container +state do not change (see known issues at the bottom of this document). + +### New container checkpoint/restore + +Here's an example of a "new container" use case for c/r: + + $ docker run some_image + > abc789 + + ## the container runs for a while + + $ docker checkpoint --image-dir=/some/path abc789 + > abc789 + +At this point, we've created a checkpoint image at `/some/path` that encodes a +process at the exact state we want it to be. Now, at some later point in time, +we can put a copy of that exact state into a new container (perhaps many times): + + $ docker create some_image + > def123 + + $ docker restore --force=true --image-dir=/some/path def123 + > def123 + +We created a new container (but didn't start it), and then we restored our +checkpointed process into that container. + +This is obviously more involved than the simple use case shown earlier. It +requires starting subsequent containers with the same configuration (e.g. +the same mounted volumes, the same base image, etc.). + +### Options + +Checkpoint & Restore: + + --image-dir= directory for storing checkpoint image files + +Allows you to specify the path for writing a checkpoint image, or the path for +the image you want to restore. + + --work-dir= directory for storing log file + +Allows you to specify the path for writing the CRIU log. + + --leave-running=false leave the container running after checkpoint + +Normally, when checkpointing a process, the process is stopped aftewrards. +When this flag is enabled, the process keeps running after a checkpoint. This is +useful if you want to capture a process at multiple points in time, for later +use in debugging or rewinding a process for some reason. It's also used for +minimizing downtime when checkpointing processes with a large memory footprint. + +Restore Only: + + --force=false force restoring into a container + +As shown in the "new container" example, this flag allows you to restore a +checkpoint image into a container that was not previously checkpointed. +Normally, docker would return an error when restoring into a container that +has not been previously checkpointed. + +## Known Issues + +- Currently, networking is broken in this PR. Although it's implemented at the +libcontainer level, the method used no longer works since the introduction of +libnetwork. See: + - https://github.com/docker/libnetwork/pull/465 + - https://github.com/boucher/docker/pull/15 +- There are likely several networking related issues to work out, like: + - ensuring IPs are reserved across daemon restarts + - ensuring port maps are reserved + - deciding how to deal with network resources in the "new container" model diff --git a/integration-cli/docker_cli_checkpoint_test.go b/integration-cli/docker_cli_checkpoint_test.go new file mode 100644 index 0000000000000..09ec47a9a0d54 --- /dev/null +++ b/integration-cli/docker_cli_checkpoint_test.go @@ -0,0 +1,39 @@ +// +build experimental + +package main + +import ( + "os/exec" + "strings" + + "github.com/go-check/check" +) + +func (s *DockerSuite) TestCheckpointAndRestore(c *check.C) { + defer unpauseAllContainers() + + runCmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top") + out, _, err := runCommandWithOutput(runCmd) + if err != nil { + c.Fatalf("failed to run container: %v, output: %q", err, out) + } + + containerID := strings.TrimSpace(out) + checkpointCmd := exec.Command(dockerBinary, "checkpoint", containerID) + out, _, err = runCommandWithOutput(checkpointCmd) + if err != nil { + c.Fatalf("failed to checkpoint container: %v, output: %q", err, out) + } + + out, err = inspectField(containerID, "State.Checkpointed") + c.Assert(out, check.Equals, "true") + + restoreCmd := exec.Command(dockerBinary, "restore", containerID) + out, _, _, err = runCommandWithStdoutStderr(restoreCmd) + if err != nil { + c.Fatalf("failed to restore container: %v, output: %q", err, out) + } + + out, err = inspectField(containerID, "State.Checkpointed") + c.Assert(out, check.Equals, "false") +} diff --git a/integration-cli/docker_cli_help_test.go b/integration-cli/docker_cli_help_test.go index 325b28013b509..7be8b111a9bcf 100644 --- a/integration-cli/docker_cli_help_test.go +++ b/integration-cli/docker_cli_help_test.go @@ -260,7 +260,7 @@ func (s *DockerSuite) TestHelpTextVerify(c *check.C) { // Number of commands for standard release and experimental release standard := 40 - experimental := 1 + experimental := 3 expected := standard + experimental if isLocalDaemon { expected++ // for the daemon command diff --git a/project/PACKAGERS.md b/project/PACKAGERS.md index 22f24b4789b77..4e595204b3e58 100644 --- a/project/PACKAGERS.md +++ b/project/PACKAGERS.md @@ -303,6 +303,9 @@ by having support for them in the kernel or userspace. A few examples include: least the "auplink" utility from aufs-tools) * BTRFS graph driver (requires BTRFS support enabled in the kernel) * ZFS graph driver (requires userspace zfs-utils and a corresponding kernel module) +* Checkpoint/Restore containers: + - requires criu version 1.6 or later (criu.org) + - requires kernel version 3.19 or later if using overlay-fs ## Daemon Init Script diff --git a/runconfig/restore.go b/runconfig/restore.go new file mode 100644 index 0000000000000..8993294411a96 --- /dev/null +++ b/runconfig/restore.go @@ -0,0 +1,18 @@ +package runconfig + +// CriuConfig holds configuration options passed down to libcontainer and CRIU +type CriuConfig struct { + ImagesDirectory string + WorkDirectory string + LeaveRunning bool + TCPEstablished bool + ExternalUnixConnections bool + ShellJob bool + FileLocks bool +} + +// RestoreConfig holds the restore command options, which is a superset of the CRIU options +type RestoreConfig struct { + CriuOpts CriuConfig + ForceRestore bool +} From c08a743b70a1e8209a273fe8fc429cab9e2f3de5 Mon Sep 17 00:00:00 2001 From: Hui Kang Date: Sun, 1 Nov 2015 15:07:54 -0500 Subject: [PATCH 4/4] Commit the filesystem layer during checkpoint - The aufs layer is commited during checkpoint - criu image path and image ID is persisted to the container config file Signed-off-by: Hui Kang --- daemon/container_checkpoint.go | 25 +++++++++++++++++++++++++ daemon/container_unix.go | 2 ++ daemon/daemon_unix.go | 1 + 3 files changed, 28 insertions(+) diff --git a/daemon/container_checkpoint.go b/daemon/container_checkpoint.go index 8330f9ec9fab4..cbcb2bc666a86 100644 --- a/daemon/container_checkpoint.go +++ b/daemon/container_checkpoint.go @@ -2,6 +2,7 @@ package daemon import ( "fmt" + "path/filepath" "github.com/docker/docker/pkg/promise" "github.com/docker/docker/runconfig" @@ -17,6 +18,26 @@ func (container *Container) Checkpoint(opts *runconfig.CriuConfig) error { container.cleanup() } + // commit the filesystem as well + commitCfg := &ContainerCommitConfig{ + Pause: true, + Config: container.Config, + } + img, err := container.daemon.Commit(container, commitCfg) + if err != nil { + return err + } + + // Update the criu image path and image ID of the container + criuImagePath := opts.ImagesDirectory + if criuImagePath == "" { + criuImagePath = filepath.Join(container.daemon.configStore.ExecRoot, "execdriver", container.daemon.configStore.ExecDriver, container.ID, "criu.image") + } + container.CriuimagePaths[criuImagePath] = img.ID + + // Update image layer of the committed container + container.ImageID = img.ID + if err := container.toDisk(); err != nil { return fmt.Errorf("Cannot update config for container: %s", err) } @@ -42,6 +63,10 @@ func (container *Container) Restore(opts *runconfig.CriuConfig, forceRestore boo } }() + if err := container.daemon.createRootfs(container); err != nil { + return err + } + if err := container.Mount(); err != nil { return err } diff --git a/daemon/container_unix.go b/daemon/container_unix.go index 3caf3545129c3..b5b470ad1326c 100644 --- a/daemon/container_unix.go +++ b/daemon/container_unix.go @@ -60,6 +60,8 @@ type Container struct { Volumes map[string]string // Deprecated since 1.7, kept for backwards compatibility VolumesRW map[string]bool // Deprecated since 1.7, kept for backwards compatibility + + CriuimagePaths map[string]string // Format: } func killProcessDirectly(container *Container) error { diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index c94ea49b5278e..dca99e536c4f4 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -610,6 +610,7 @@ func (daemon *Daemon) newBaseContainer(id string) *Container { }, Volumes: make(map[string]string), VolumesRW: make(map[string]bool), + CriuimagePaths: make(map[string]string), } }