From 86181e90050bb444e888e8b35db8eafca8f22caf Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Thu, 6 Nov 2025 14:11:56 -0800 Subject: [PATCH] Remove "AddressSpace activation". PiperOrigin-RevId: 829101729 --- pkg/context/context.go | 17 +- pkg/devutil/devutil.go | 4 +- pkg/hostarch/addr.go | 12 ++ pkg/lisafs/client.go | 8 +- pkg/lisafs/client_file.go | 120 +++++++------- pkg/metric/metric.go | 15 ++ pkg/sentry/fsimpl/gofer/gofer.go | 4 +- pkg/sentry/fsimpl/gofer/handle.go | 12 +- pkg/sentry/fsimpl/iouringfs/iouringfs.go | 6 +- pkg/sentry/fsimpl/testutil/kernel.go | 5 +- pkg/sentry/kernel/kernel.go | 2 + pkg/sentry/kernel/pipe/pipe_util.go | 3 - pkg/sentry/kernel/ptrace.go | 14 +- pkg/sentry/kernel/ptrace_amd64.go | 12 -- pkg/sentry/kernel/rseq.go | 4 +- pkg/sentry/kernel/task_block.go | 17 +- pkg/sentry/kernel/task_clone.go | 2 +- pkg/sentry/kernel/task_exec.go | 6 +- pkg/sentry/kernel/task_exit.go | 5 +- pkg/sentry/kernel/task_futex.go | 12 +- pkg/sentry/kernel/task_image.go | 5 +- pkg/sentry/kernel/task_run.go | 12 +- pkg/sentry/kernel/task_stop.go | 4 +- pkg/sentry/kernel/task_usermem.go | 44 ++---- pkg/sentry/mm/address_space.go | 147 +----------------- pkg/sentry/mm/io.go | 94 +++++++---- pkg/sentry/mm/lifecycle.go | 54 +++---- pkg/sentry/mm/mm.go | 30 +--- pkg/sentry/mm/mm_test.go | 27 ++-- pkg/sentry/mm/save_restore.go | 7 + pkg/sentry/platform/cpuid_amd64.go | 3 +- pkg/sentry/platform/kvm/context.go | 15 +- pkg/sentry/platform/kvm/kvm.go | 9 +- pkg/sentry/platform/kvm/kvm_test.go | 2 +- pkg/sentry/platform/platform.go | 51 +++--- pkg/sentry/platform/ptrace/ptrace.go | 23 ++- pkg/sentry/platform/systrap/systrap.go | 27 ++-- .../systrap/usertrap/usertrap_amd64.go | 6 +- pkg/sentry/socket/hostinet/socket_unsafe.go | 13 +- pkg/sentry/socket/plugin/stack/socket.go | 20 +-- pkg/sentry/syscalls/linux/points.go | 2 +- pkg/sentry/syscalls/linux/sys_aio.go | 15 +- pkg/sentry/syscalls/linux/sys_getdents.go | 4 +- pkg/sentry/syscalls/linux/sys_mempolicy.go | 4 +- pkg/sentry/syscalls/linux/sys_process_vm.go | 5 +- pkg/sentry/syscalls/linux/sys_random.go | 4 +- pkg/sentry/syscalls/linux/sys_read_write.go | 40 ++--- pkg/sentry/syscalls/linux/sys_socket.go | 16 +- pkg/sentry/syscalls/linux/sys_thread.go | 4 +- pkg/sentry/watchdog/watchdog.go | 12 +- pkg/usermem/usermem.go | 5 - 51 files changed, 378 insertions(+), 606 deletions(-) diff --git a/pkg/context/context.go b/pkg/context/context.go index d7fe3fc2d8..1fde70b643 100644 --- a/pkg/context/context.go +++ b/pkg/context/context.go @@ -66,17 +66,12 @@ type Blocker interface { BlockWithTimeoutOn(waiter.Waitable, waiter.EventMask, time.Duration) (time.Duration, bool) // UninterruptibleSleepStart indicates the beginning of an uninterruptible - // sleep state (equivalent to Linux's TASK_UNINTERRUPTIBLE). If deactivate - // is true and the Context represents a Task, the Task's AddressSpace is - // deactivated. - UninterruptibleSleepStart(deactivate bool) + // sleep state (equivalent to Linux's TASK_UNINTERRUPTIBLE). + UninterruptibleSleepStart() // UninterruptibleSleepFinish indicates the end of an uninterruptible sleep - // state that was begun by a previous call to UninterruptibleSleepStart. If - // activate is true and the Context represents a Task, the Task's - // AddressSpace is activated. Normally activate is the same value as the - // deactivate parameter passed to UninterruptibleSleepStart. - UninterruptibleSleepFinish(activate bool) + // state that was begun by a previous call to UninterruptibleSleepStart. + UninterruptibleSleepFinish() } // NoTask is an implementation of Blocker that does not block. @@ -155,10 +150,10 @@ func (nt *NoTask) BlockWithTimeoutOn(w waiter.Waitable, mask waiter.EventMask, d } // UninterruptibleSleepStart implmenents Blocker.UninterruptedSleepStart. -func (*NoTask) UninterruptibleSleepStart(bool) {} +func (*NoTask) UninterruptibleSleepStart() {} // UninterruptibleSleepFinish implmenents Blocker.UninterruptibleSleepFinish. -func (*NoTask) UninterruptibleSleepFinish(bool) {} +func (*NoTask) UninterruptibleSleepFinish() {} // Context represents a thread of execution (hereafter "goroutine" to reflect // Go idiosyncrasy). It carries state associated with the goroutine across API diff --git a/pkg/devutil/devutil.go b/pkg/devutil/devutil.go index 6429c6ddae..4a2d286619 100644 --- a/pkg/devutil/devutil.go +++ b/pkg/devutil/devutil.go @@ -36,8 +36,8 @@ type GoferClient struct { // NewGoferClient establishes the LISAFS connection to the dev gofer server. // It takes ownership of fd. contName is the owning container name. func NewGoferClient(ctx context.Context, contName string, fd int) (*GoferClient, error) { - ctx.UninterruptibleSleepStart(false) - defer ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepStart() + defer ctx.UninterruptibleSleepFinish() sock, err := unet.NewSocket(fd) if err != nil { diff --git a/pkg/hostarch/addr.go b/pkg/hostarch/addr.go index 2f7dcf1d5f..11c41ea9f2 100644 --- a/pkg/hostarch/addr.go +++ b/pkg/hostarch/addr.go @@ -101,6 +101,18 @@ func (v Addr) ToRange(length uint64) (AddrRange, bool) { return AddrRange{v, end}, ok } +// MustToRange is equivalent to ToRange, but panics if the end of the range +// wraps around. +// +//go:nosplit +func (v Addr) MustToRange(length uint64) AddrRange { + ar, ok := v.ToRange(length) + if !ok { + panic("hostarch.Addr.ToRange() wraps") + } + return ar +} + // IsPageAligned returns true if ar.Start.IsPageAligned() and // ar.End.IsPageAligned(). func (ar AddrRange) IsPageAligned() bool { diff --git a/pkg/lisafs/client.go b/pkg/lisafs/client.go index 8305a0edc6..5f95d7114b 100644 --- a/pkg/lisafs/client.go +++ b/pkg/lisafs/client.go @@ -290,9 +290,9 @@ func (c *Client) CloseFD(ctx context.Context, fd FDID, flush bool) { req := CloseReq{FDs: toClose} var resp CloseResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := c.SndRcvMessage(Close, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() if err != nil { log.Warningf("lisafs: batch closing FDs returned error: %v", err) } @@ -305,9 +305,9 @@ func (c *Client) SyncFDs(ctx context.Context, fds []FDID) error { } req := FsyncReq{FDs: fds} var resp FsyncResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := c.SndRcvMessage(FSync, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } diff --git a/pkg/lisafs/client_file.go b/pkg/lisafs/client_file.go index 875b0052a0..9981efa1c6 100644 --- a/pkg/lisafs/client_file.go +++ b/pkg/lisafs/client_file.go @@ -72,9 +72,9 @@ func (f *ClientFD) OpenAt(ctx context.Context, flags uint32) (FDID, int, error) } var respFD [1]int var resp OpenAtResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(OpenAt, uint32(req.SizeBytes()), req.MarshalUnsafe, resp.CheckedUnmarshal, respFD[:], req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return resp.OpenFD, respFD[0], err } @@ -90,18 +90,18 @@ func (f *ClientFD) OpenCreateAt(ctx context.Context, name string, flags uint32, var respFD [1]int var resp OpenCreateAtResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(OpenCreateAt, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, respFD[:], req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return resp.Child, resp.NewFD, respFD[0], err } // StatTo makes the Fstat RPC and populates stat with the result. func (f *ClientFD) StatTo(ctx context.Context, stat *linux.Statx) error { req := StatReq{FD: f.fd} - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(FStat, uint32(req.SizeBytes()), req.MarshalUnsafe, stat.CheckedUnmarshal, nil, req.String, stat.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } @@ -109,9 +109,9 @@ func (f *ClientFD) StatTo(ctx context.Context, stat *linux.Statx) error { func (f *ClientFD) Sync(ctx context.Context) error { req := FsyncReq{FDs: []FDID{f.fd}} var resp FsyncResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(FSync, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } @@ -172,9 +172,9 @@ func (f *ClientFD) Read(ctx context.Context, dst []byte, offset uint64) (uint64, // allocate a temporary buffer during unmarshalling. // PReadResp.CheckedUnmarshal expects this to be set. resp.Buf = buf - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(PRead, uint32(req.SizeBytes()), req.MarshalUnsafe, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() if err != nil { return 0, err } @@ -208,9 +208,9 @@ func (f *ClientFD) Write(ctx context.Context, src []byte, offset uint64) (uint64 } var resp PWriteResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(PWrite, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return resp.Count, err }) } @@ -225,9 +225,9 @@ func (f *ClientFD) MkdirAt(ctx context.Context, name string, mode linux.FileMode req.GID = gid var resp MkdirAtResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(MkdirAt, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return resp.ChildDir, err } @@ -242,9 +242,9 @@ func (f *ClientFD) SymlinkAt(ctx context.Context, name, target string, uid UID, } var resp SymlinkAtResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(SymlinkAt, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return resp.Symlink, err } @@ -257,9 +257,9 @@ func (f *ClientFD) LinkAt(ctx context.Context, targetFD FDID, name string) (Inod } var resp LinkAtResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(LinkAt, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return resp.Link, err } @@ -275,9 +275,9 @@ func (f *ClientFD) MknodAt(ctx context.Context, name string, mode linux.FileMode req.Major = primitive.Uint32(major) var resp MknodAtResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(MknodAt, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return resp.Child, err } @@ -301,9 +301,9 @@ func (f *ClientFD) SetStat(ctx context.Context, stat *linux.Statx) (uint32, erro } var resp SetStatResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(SetStat, uint32(req.SizeBytes()), req.MarshalUnsafe, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() if err != nil { return 0, nil, err } @@ -321,9 +321,9 @@ func (f *ClientFD) WalkMultiple(ctx context.Context, names []string) (WalkStatus } var resp WalkResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(Walk, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return resp.Status, resp.Inodes, err } @@ -336,9 +336,9 @@ func (f *ClientFD) Walk(ctx context.Context, name string) (Inode, error) { var inode [1]Inode resp := WalkResp{Inodes: inode[:]} - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(Walk, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() if err != nil { return Inode{}, err } @@ -372,18 +372,18 @@ func (f *ClientFD) WalkStat(ctx context.Context, names []string) ([]linux.Statx, } var resp WalkStatResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(WalkStat, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return resp.Stats, err } // StatFSTo makes the FStatFS RPC and populates statFS with the result. func (f *ClientFD) StatFSTo(ctx context.Context, statFS *StatFS) error { req := FStatFSReq{FD: f.fd} - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(FStatFS, uint32(req.SizeBytes()), req.MarshalUnsafe, statFS.CheckedUnmarshal, nil, req.String, statFS.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } @@ -396,9 +396,9 @@ func (f *ClientFD) Allocate(ctx context.Context, mode, offset, length uint64) er Length: length, } var resp FAllocateResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(FAllocate, uint32(req.SizeBytes()), req.MarshalUnsafe, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } @@ -406,9 +406,9 @@ func (f *ClientFD) Allocate(ctx context.Context, mode, offset, length uint64) er func (f *ClientFD) ReadLinkAt(ctx context.Context) (string, error) { req := ReadLinkAtReq{FD: f.fd} var resp ReadLinkAtResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(ReadLinkAt, uint32(req.SizeBytes()), req.MarshalUnsafe, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return string(resp.Target), err } @@ -420,9 +420,9 @@ func (f *ClientFD) Flush(ctx context.Context) error { } req := FlushReq{FD: f.fd} var resp FlushResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(Flush, uint32(req.SizeBytes()), req.MarshalUnsafe, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } @@ -439,9 +439,9 @@ func (f *ClientFD) BindAt(ctx context.Context, sockType linux.SockType, name str req.Mode = mode req.UID = uid req.GID = gid - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(BindAt, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, hostSocketFD[:], req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() if err == nil && hostSocketFD[0] < 0 { // No host socket fd? We can't proceed. // Clean up any resources the gofer sent to us. @@ -484,13 +484,13 @@ func (f *ClientFD) Connect(ctx context.Context, sockType linux.SockType, euid UI UID: euid, GID: egid, } - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err = f.client.SndRcvMessage(ConnectWithCreds, uint32(reqWithCreds.SizeBytes()), reqWithCreds.MarshalUnsafe, resp.CheckedUnmarshal, sockFD[:], reqWithCreds.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() } else { - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err = f.client.SndRcvMessage(Connect, uint32(req.SizeBytes()), req.MarshalUnsafe, resp.CheckedUnmarshal, sockFD[:], req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() } if err == nil && sockFD[0] < 0 { @@ -507,9 +507,9 @@ func (f *ClientFD) UnlinkAt(ctx context.Context, name string, flags uint32) erro Flags: primitive.Uint32(flags), } var resp UnlinkAtResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(UnlinkAt, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } @@ -523,9 +523,9 @@ func (f *ClientFD) RenameAt(ctx context.Context, oldName string, newDirFD FDID, NewName: SizedString(newName), } var resp RenameAtResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(RenameAt, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } @@ -537,9 +537,9 @@ func (f *ClientFD) Getdents64(ctx context.Context, count int32) ([]Dirent64, err } var resp Getdents64Resp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(Getdents64, uint32(req.SizeBytes()), req.MarshalUnsafe, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return resp.Dirents, err } @@ -551,9 +551,9 @@ func (f *ClientFD) ListXattr(ctx context.Context, size uint64) ([]string, error) } var resp FListXattrResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(FListXattr, uint32(req.SizeBytes()), req.MarshalUnsafe, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return resp.Xattrs, err } @@ -566,9 +566,9 @@ func (f *ClientFD) GetXattr(ctx context.Context, name string, size uint64) (stri } var resp FGetXattrResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(FGetXattr, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return string(resp.Value), err } @@ -581,9 +581,9 @@ func (f *ClientFD) SetXattr(ctx context.Context, name string, value string, flag Flags: primitive.Uint32(flags), } var resp FSetXattrResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(FSetXattr, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } @@ -594,9 +594,9 @@ func (f *ClientFD) RemoveXattr(ctx context.Context, name string) error { Name: SizedString(name), } var resp FRemoveXattrResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(FRemoveXattr, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } @@ -635,9 +635,9 @@ func (f *ClientBoundSocketFD) Listen(ctx context.Context, backlog int32) error { Backlog: backlog, } var resp ListenResp - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(Listen, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } @@ -648,9 +648,9 @@ func (f *ClientBoundSocketFD) Accept(ctx context.Context) (int, error) { } var resp AcceptResp var hostSocketFD [1]int - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := f.client.SndRcvMessage(Accept, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, hostSocketFD[:], req.String, resp.String) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() if err == nil && hostSocketFD[0] < 0 { err = unix.EBADF } diff --git a/pkg/metric/metric.go b/pkg/metric/metric.go index 1bba73dda5..3e13abcd2a 100644 --- a/pkg/metric/metric.go +++ b/pkg/metric/metric.go @@ -1508,6 +1508,21 @@ func EmitMetricUpdate() { } } +// EmitMetricUpdateWithTimeout is equivalent to EmitMetricUpdate, but only +// waits for timeout before returning. Metrics may be emitted after +// EmitMetricUpdateWithTimeout returns. +func EmitMetricUpdateWithTimeout(timeout time.Duration) { + ch := make(chan struct{}) + go func() { + EmitMetricUpdate() + close(ch) + }() + select { + case <-ch: + case <-time.After(timeout): + } +} + // SnapshotOptions controls how snapshots are exported in GetSnapshot. type SnapshotOptions struct { // Filter, if set, should return true for metrics that should be written to diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index d0cc2dd305..31f778501c 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -636,8 +636,8 @@ func (fs *filesystem) initClientAndGetRoot(ctx context.Context) (lisafs.Inode, i return lisafs.Inode{}, -1, err } - ctx.UninterruptibleSleepStart(false) - defer ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepStart() + defer ctx.UninterruptibleSleepFinish() var ( rootInode lisafs.Inode diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go index 7634ec8787..51cf292ae8 100644 --- a/pkg/sentry/fsimpl/gofer/handle.go +++ b/pkg/sentry/fsimpl/gofer/handle.go @@ -52,9 +52,9 @@ func (h *handle) readToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offs return 0, nil } if h.fd >= 0 { - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() n, err := hostfd.Preadv2(h.fd, dsts, int64(offset), 0 /* flags */) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return n, err } rw := getHandleReadWriter(ctx, h, int64(offset)) @@ -67,9 +67,9 @@ func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, o return 0, nil } if h.fd >= 0 { - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() n, err := hostfd.Pwritev2(h.fd, srcs, int64(offset), 0 /* flags */) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return n, err } rw := getHandleReadWriter(ctx, h, int64(offset)) @@ -91,9 +91,9 @@ func (h *handle) sync(ctx context.Context) error { // If we have a host FD, fsyncing it is likely to be faster than an fsync // RPC. if h.fd >= 0 { - ctx.UninterruptibleSleepStart(false) + ctx.UninterruptibleSleepStart() err := unix.Fsync(int(h.fd)) - ctx.UninterruptibleSleepFinish(false) + ctx.UninterruptibleSleepFinish() return err } if h.fdLisa.Ok() { diff --git a/pkg/sentry/fsimpl/iouringfs/iouringfs.go b/pkg/sentry/fsimpl/iouringfs/iouringfs.go index 1a576b19d4..2dbd190f14 100644 --- a/pkg/sentry/fsimpl/iouringfs/iouringfs.go +++ b/pkg/sentry/fsimpl/iouringfs/iouringfs.go @@ -497,11 +497,7 @@ func (fd *FileDescription) handleReadv(t *kernel.Task, sqe *linux.IOUringSqe, fl return 0, linuxerr.EINVAL } - // AddressSpaceActive is set to true as we are doing this from the task goroutine.And this is a - // case as we currently don't support neither IOPOLL nor SQPOLL modes. - dst, err := t.IovecsIOSequence(hostarch.Addr(sqe.AddrOrSpliceOff), int(sqe.Len), usermem.IOOpts{ - AddressSpaceActive: true, - }) + dst, err := t.IovecsIOSequence(hostarch.Addr(sqe.AddrOrSpliceOff), int(sqe.Len), usermem.IOOpts{}) if err != nil { return 0, err } diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go index 8767180672..501bf590c3 100644 --- a/pkg/sentry/fsimpl/testutil/kernel.go +++ b/pkg/sentry/fsimpl/testutil/kernel.go @@ -138,7 +138,10 @@ func CreateTask(ctx context.Context, name string, tc *kernel.ThreadGroup, mntns if err != nil { return nil, err } - m := mm.NewMemoryManager(k, k.MemoryFile(), k.SleepForAddressSpaceActivation) + m, err := mm.NewMemoryManager(k, k.MemoryFile()) + if err != nil { + return nil, err + } m.SetExecutable(ctx, exe) creds := auth.CredentialsFromContext(ctx) diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 513aec2d38..5792ed1125 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -1495,6 +1495,8 @@ func (k *Kernel) IsPaused() bool { } // ReceiveTaskStates receives full states for all tasks. +// +// Precondition: The kernel must be paused. func (k *Kernel) ReceiveTaskStates() { k.extMu.Lock() k.tasks.PullFullState() diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go index 807e9c469b..616dc256b6 100644 --- a/pkg/sentry/kernel/pipe/pipe_util.go +++ b/pkg/sentry/kernel/pipe/pipe_util.go @@ -147,9 +147,6 @@ func (p *Pipe) Ioctl(ctx context.Context, io usermem.IO, sysno uintptr, args arc iocc := usermem.IOCopyContext{ IO: io, Ctx: ctx, - Opts: usermem.IOOpts{ - AddressSpaceActive: true, - }, } _, err := primitive.CopyInt32Out(&iocc, args[2].Pointer(), int32(v)) return 0, err diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index 42c9349834..43fb24e7a2 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -1092,13 +1092,9 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { // may not yet have reached Task.doStop; wait for it to do so. This is safe // because there's no way for target to initiate a ptrace-stop and then // block (by calling Task.block) before entering it. - // - // Caveat: If tasks were just restored, the tracee's first call to - // Task.Activate (in Task.run) occurs before its first call to Task.doStop, - // which may block if the tracer's address space is active. - t.UninterruptibleSleepStart(true) + t.UninterruptibleSleepStart() target.waitGoroutineStoppedOrExited() - t.UninterruptibleSleepFinish(true) + t.UninterruptibleSleepFinish() // Resuming commands end the ptrace stop, but only if successful. // PTRACE_LISTEN ends the ptrace stop if trapNotifyPending is already set on the @@ -1206,9 +1202,6 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { Ctx: t, IO: t.MemoryManager(), Addr: ar.Start, - Opts: usermem.IOOpts{ - AddressSpaceActive: true, - }, }, int(ar.Length()), target.Kernel().FeatureSet()) if err != nil { return err @@ -1233,9 +1226,6 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { Ctx: t, IO: t.MemoryManager(), Addr: ar.Start, - Opts: usermem.IOOpts{ - AddressSpaceActive: true, - }, }, int(ar.Length()), target.Kernel().FeatureSet()) if err != nil { return err diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go index 38e97a167e..a5c15bd9c4 100644 --- a/pkg/sentry/kernel/ptrace_amd64.go +++ b/pkg/sentry/kernel/ptrace_amd64.go @@ -46,9 +46,6 @@ func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) err Ctx: t, IO: t.MemoryManager(), Addr: data, - Opts: usermem.IOOpts{ - AddressSpaceActive: true, - }, }) return err @@ -58,9 +55,6 @@ func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) err Ctx: t, IO: t.MemoryManager(), Addr: data, - Opts: usermem.IOOpts{ - AddressSpaceActive: true, - }, }, len(*s)) return err @@ -69,9 +63,6 @@ func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) err Ctx: t, IO: t.MemoryManager(), Addr: data, - Opts: usermem.IOOpts{ - AddressSpaceActive: true, - }, }) if err == nil { target.p.FullStateChanged() @@ -84,9 +75,6 @@ func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) err Ctx: t, IO: t.MemoryManager(), Addr: data, - Opts: usermem.IOOpts{ - AddressSpaceActive: true, - }, }, len(*s)) if err == nil { target.p.FullStateChanged() diff --git a/pkg/sentry/kernel/rseq.go b/pkg/sentry/kernel/rseq.go index 033cdb0607..91b5bb4242 100644 --- a/pkg/sentry/kernel/rseq.go +++ b/pkg/sentry/kernel/rseq.go @@ -368,9 +368,7 @@ func (t *Task) rseqAddrInterrupt() { // NOTE(b/143949567): We don't support any rseq flags, so we always // restart if we are in the critical section, and thus *always* clear // critAddrAddr. - if _, err := t.MemoryManager().ZeroOut(t, critAddrAddr, int64(t.Arch().Width()), usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := t.MemoryManager().ZeroOut(t, critAddrAddr, int64(t.Arch().Width()), usermem.IOOpts{}); err != nil { t.Debugf("Failed to clear critical section address from %#x for rseq: %v", critAddrAddr, err) t.forceSignal(linux.SIGSEGV, false /* unconditional */) t.SendSignal(SignalInfoPriv(linux.SIGSEGV)) diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go index f5c537b32d..08d15002a9 100644 --- a/pkg/sentry/kernel/task_block.go +++ b/pkg/sentry/kernel/task_block.go @@ -213,18 +213,16 @@ func (t *Task) block(C <-chan struct{}, timerChan <-chan struct{}) error { } } -// prepareSleep prepares to sleep. +// prepareSleep is called immediately before the task goroutine blocks. func (t *Task) prepareSleep() { t.assertTaskGoroutine() t.p.PrepareSleep() - t.Deactivate() t.accountTaskGoroutineEnter(TaskGoroutineBlockedInterruptible) } -// completeSleep reactivates the address space. +// completeSleep is called immediately after the task goroutine unblocks. func (t *Task) completeSleep() { t.accountTaskGoroutineLeave(TaskGoroutineBlockedInterruptible) - t.Activate() } // Interrupted implements context.Context.Interrupted. @@ -239,20 +237,15 @@ func (t *Task) Interrupted() bool { } // UninterruptibleSleepStart implements context.Context.UninterruptibleSleepStart. -func (t *Task) UninterruptibleSleepStart(deactivate bool) { +func (t *Task) UninterruptibleSleepStart() { t.assertTaskGoroutine() - if deactivate { - t.Deactivate() - } + t.p.PrepareUninterruptibleSleep() t.accountTaskGoroutineEnter(TaskGoroutineBlockedUninterruptible) } // UninterruptibleSleepFinish implements context.Context.UninterruptibleSleepFinish. -func (t *Task) UninterruptibleSleepFinish(activate bool) { +func (t *Task) UninterruptibleSleepFinish() { t.accountTaskGoroutineLeave(TaskGoroutineBlockedUninterruptible) - if activate { - t.Activate() - } } // interrupted returns true if interrupt or interruptSelf has been called at diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index eee16c0266..3963a8f079 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -344,7 +344,7 @@ func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) { } if args.Flags&linux.CLONE_CHILD_SETTID != 0 { ctid := nt.ThreadID() - ctid.CopyOut(nt.CopyContext(t, usermem.IOOpts{AddressSpaceActive: false}), hostarch.Addr(args.ChildTID)) + ctid.CopyOut(nt.CopyContext(t, usermem.IOOpts{}), hostarch.Addr(args.ChildTID)) } ntid := t.tg.pidns.IDOfTask(nt) if args.Flags&linux.CLONE_PARENT_SETTID != 0 { diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go index 1931edcd00..5e8a38bec3 100644 --- a/pkg/sentry/kernel/task_exec.go +++ b/pkg/sentry/kernel/task_exec.go @@ -380,7 +380,7 @@ func (r *runExecveAfterSiblingExitStop) execute(t *Task) taskRunState { } // Switch to the new process. - t.MemoryManager().Deactivate() + t.p.PrepareExecve() // Update credentials to reflect the execve. This should precede switching // MMs to ensure that dumpability has been reset first, if needed. t.creds.Store(r.newCreds) @@ -396,10 +396,6 @@ func (r *runExecveAfterSiblingExitStop) execute(t *Task) taskRunState { t.unstopVforkParent() t.p.FullStateChanged() - // NOTE(b/30316266): All locks must be dropped prior to calling Activate. - if err := t.MemoryManager().Activate(t); err != nil { - panic("unable to activate mm: " + err.Error()) - } t.ptraceExec(oldTID) return (*runSyscallExit)(nil) diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index 9edf187b14..a4f790b3e0 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -278,9 +278,7 @@ func (*runExitMain) execute(t *Task) taskRunState { // Handle the robust futex list. t.exitRobustList() - // Deactivate the address space and update max RSS before releasing the - // task's MM. - t.Deactivate() + // Update max RSS before releasing the task's MM. t.tg.pidns.owner.mu.Lock() t.updateRSSLocked() t.tg.pidns.owner.mu.Unlock() @@ -288,6 +286,7 @@ func (*runExitMain) execute(t *Task) taskRunState { // Release the task image resources. Accessing these fields must be // done with t.mu held, but the mm.DecUsers() call must be done outside // of that lock. + t.p.PrepareExit() t.mu.Lock() mm := t.image.MemoryManager t.image.MemoryManager = nil diff --git a/pkg/sentry/kernel/task_futex.go b/pkg/sentry/kernel/task_futex.go index 4dc41b82b2..f13c39f2dd 100644 --- a/pkg/sentry/kernel/task_futex.go +++ b/pkg/sentry/kernel/task_futex.go @@ -32,23 +32,17 @@ func (t *Task) Futex() *futex.Manager { // SwapUint32 implements futex.Target.SwapUint32. func (t *Task) SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) { - return t.MemoryManager().SwapUint32(t, addr, new, usermem.IOOpts{ - AddressSpaceActive: true, - }) + return t.MemoryManager().SwapUint32(t, addr, new, usermem.IOOpts{}) } // CompareAndSwapUint32 implements futex.Target.CompareAndSwapUint32. func (t *Task) CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) { - return t.MemoryManager().CompareAndSwapUint32(t, addr, old, new, usermem.IOOpts{ - AddressSpaceActive: true, - }) + return t.MemoryManager().CompareAndSwapUint32(t, addr, old, new, usermem.IOOpts{}) } // LoadUint32 implements futex.Target.LoadUint32. func (t *Task) LoadUint32(addr hostarch.Addr) (uint32, error) { - return t.MemoryManager().LoadUint32(t, addr, usermem.IOOpts{ - AddressSpaceActive: true, - }) + return t.MemoryManager().LoadUint32(t, addr, usermem.IOOpts{}) } // GetSharedKey implements futex.Target.GetSharedKey. diff --git a/pkg/sentry/kernel/task_image.go b/pkg/sentry/kernel/task_image.go index 6f29337807..fceed2ade5 100644 --- a/pkg/sentry/kernel/task_image.go +++ b/pkg/sentry/kernel/task_image.go @@ -141,7 +141,10 @@ func (t *Task) Stack() *arch.Stack { // args.MemoryManager does not need to be set by the caller. func (k *Kernel) LoadTaskImage(ctx context.Context, args loader.LoadArgs) (*TaskImage, *auth.Credentials, bool, *syserr.Error) { // Prepare a new user address space to load into. - m := mm.NewMemoryManager(k, k.mf, k.SleepForAddressSpaceActivation) + m, merr := mm.NewMemoryManager(k, k.mf) + if merr != nil { + return nil, nil, false, syserr.FromError(merr) + } defer m.DecUsers(ctx) args.MemoryManager = m diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go index 79e374b3f0..7e8a9c0f5a 100644 --- a/pkg/sentry/kernel/task_run.go +++ b/pkg/sentry/kernel/task_run.go @@ -69,13 +69,6 @@ func (t *Task) run(threadID uintptr) { t.blockingTimer = ktime.NewSampledTimer(t.k.MonotonicClock(), t.blockingTimerListener) defer t.blockingTimer.Destroy() - // Activate our address space. - t.Activate() - // The corresponding t.Deactivate occurs in the exit path - // (runExitMain.execute) so that when - // Platform.CooperativelySharesAddressSpace() == true, we give up the - // AddressSpace before the task goroutine finishes executing. - // If this is a newly-started task, it should check for participation in // group stops. If this is a task resuming after restore, it was // interrupted by saving. In either case, the task is initially @@ -125,10 +118,7 @@ func (t *Task) doStop() { if t.stopCount.Load() == 0 { return } - t.Deactivate() - // NOTE(b/30316266): t.Activate() must be called without any locks held, so - // this defer must precede the defer for unlocking the signal mutex. - defer t.Activate() + t.p.PrepareStop() t.accountTaskGoroutineEnter(TaskGoroutineStopped) defer t.accountTaskGoroutineLeave(TaskGoroutineStopped) t.tg.signalHandlers.mu.Lock() diff --git a/pkg/sentry/kernel/task_stop.go b/pkg/sentry/kernel/task_stop.go index 345648c933..829ddd9534 100644 --- a/pkg/sentry/kernel/task_stop.go +++ b/pkg/sentry/kernel/task_stop.go @@ -201,6 +201,8 @@ func (ts *TaskSet) BeginExternalStop() { } // PullFullState receives full states for all tasks. +// +// Precondition: The kernel must be paused. func (ts *TaskSet) PullFullState() { ts.mu.Lock() defer ts.mu.Unlock() @@ -208,11 +210,9 @@ func (ts *TaskSet) PullFullState() { return } for t := range ts.Root.tids { - t.Activate() if mm := t.MemoryManager(); mm != nil { t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch()) } - t.Deactivate() } } diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go index cda826f3cc..8c5b09241b 100644 --- a/pkg/sentry/kernel/task_usermem.go +++ b/pkg/sentry/kernel/task_usermem.go @@ -28,51 +28,27 @@ import ( const iovecLength = 16 -// Activate ensures that the task has an active address space. -func (t *Task) Activate() { - if mm := t.MemoryManager(); mm != nil { - if err := mm.Activate(t); err != nil { - panic("unable to activate mm: " + err.Error()) - } - } -} - -// Deactivate relinquishes the task's active address space. -func (t *Task) Deactivate() { - if mm := t.MemoryManager(); mm != nil { - mm.Deactivate() - } -} - -// CopyInBytes is a fast version of CopyIn if the caller can serialize the -// data without reflection and pass in a byte slice. +// CopyInBytes is a legacy wrapper for t.MemoryManager().CopyIn. // -// This Task's AddressSpace must be active. +// Preconditions: The caller must be running on the task goroutine. func (t *Task) CopyInBytes(addr hostarch.Addr, dst []byte) (int, error) { - return t.MemoryManager().CopyIn(t, addr, dst, usermem.IOOpts{ - AddressSpaceActive: true, - }) + return t.MemoryManager().CopyIn(t, addr, dst, usermem.IOOpts{}) } -// CopyOutBytes is a fast version of CopyOut if the caller can serialize the -// data without reflection and pass in a byte slice. +// CopyOutBytes is a legacy wrapper for t.MemoryManager().CopyOut. // -// This Task's AddressSpace must be active. +// Preconditions: The caller must be running on the task goroutine. func (t *Task) CopyOutBytes(addr hostarch.Addr, src []byte) (int, error) { - return t.MemoryManager().CopyOut(t, addr, src, usermem.IOOpts{ - AddressSpaceActive: true, - }) + return t.MemoryManager().CopyOut(t, addr, src, usermem.IOOpts{}) } // CopyInString copies a NUL-terminated string of length at most maxlen in from // the task's memory. The copy will fail with syscall.EFAULT if it traverses // user memory that is unmapped or not readable by the user. // -// This Task's AddressSpace must be active. +// Preconditions: The caller must be running on the task goroutine. func (t *Task) CopyInString(addr hostarch.Addr, maxlen int) (string, error) { - return usermem.CopyStringIn(t, t.MemoryManager(), addr, maxlen, usermem.IOOpts{ - AddressSpaceActive: true, - }) + return usermem.CopyStringIn(t, t.MemoryManager(), addr, maxlen, usermem.IOOpts{}) } // CopyInVector copies a NULL-terminated vector of strings from the task's @@ -88,7 +64,7 @@ func (t *Task) CopyInString(addr hostarch.Addr, maxlen int) (string, error) { // { "a", "b", "c" } => 6 (3 for lengths, 3 for elements) // { "abc" } => 4 (3 for length, 1 for elements) // -// This Task's AddressSpace must be active. +// Preconditions: The caller must be running on the task goroutine. func (t *Task) CopyInVector(addr hostarch.Addr, maxElemSize, maxTotalSize int) ([]string, error) { var v []string for { @@ -124,7 +100,6 @@ func (t *Task) CopyInVector(addr hostarch.Addr, maxElemSize, maxTotalSize int) ( // // Preconditions: Same as usermem.IO.CopyOut, plus: // - The caller must be running on the task goroutine. -// - t's AddressSpace must be active. func (t *Task) CopyOutIovecs(addr hostarch.Addr, src hostarch.AddrRangeSeq) error { switch t.Arch().Width() { case 8: @@ -154,7 +129,6 @@ func (t *Task) CopyOutIovecs(addr hostarch.Addr, src hostarch.AddrRangeSeq) erro // // Preconditions: Same as usermem.IO.CopyIn, plus: // * The caller must be running on the task goroutine. -// * t's AddressSpace must be active. func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRangeSeq, error) { // Special case to avoid allocating allocating a single hostaddr.AddrRange. if numIovecs == 1 { diff --git a/pkg/sentry/mm/address_space.go b/pkg/sentry/mm/address_space.go index e543797806..8018c4b8b1 100644 --- a/pkg/sentry/mm/address_space.go +++ b/pkg/sentry/mm/address_space.go @@ -27,149 +27,15 @@ import ( // AddressSpace returns the platform.AddressSpace bound to mm. // -// Preconditions: The caller must have called mm.Activate(). +// Preconditions: mm.users != 0. func (mm *MemoryManager) AddressSpace() platform.AddressSpace { - if mm.active.Load() == 0 { - panic("trying to use inactive address space?") - } return mm.as } -// Activate ensures this MemoryManager has a platform.AddressSpace. -// -// The caller must not hold any locks when calling Activate. -// -// When this MemoryManager is no longer needed by a task, it should call -// Deactivate to release the reference. -func (mm *MemoryManager) Activate(ctx context.Context) error { - // Fast path: the MemoryManager already has an active - // platform.AddressSpace, and we just need to indicate that we need it too. - for { - active := mm.active.Load() - if active == 0 { - // Fall back to the slow path. - break - } - if mm.active.CompareAndSwap(active, active+1) { - return nil - } - } - - for { - // Slow path: may need to synchronize with other goroutines changing - // mm.active to or from zero. - mm.activeMu.Lock() - // Inline Unlock instead of using a defer for performance since this - // method is commonly in the hot-path. - - // Check if we raced with another goroutine performing activation. - if mm.active.Load() > 0 { - // This can't race; Deactivate can't decrease mm.active from 1 to 0 - // without holding activeMu. - mm.active.Add(1) - mm.activeMu.Unlock() - return nil - } - - // Do we have a context? If so, then we never unmapped it. This can - // only be the case if !mm.p.CooperativelySchedulesAddressSpace(). - if mm.as != nil { - mm.active.Store(1) - mm.activeMu.Unlock() - return nil - } - - // Get a new address space. We must force unmapping by passing nil to - // NewAddressSpace if requested. (As in the nil interface object, not a - // typed nil.) - mappingsID := (any)(mm) - if mm.unmapAllOnActivate { - mappingsID = nil - } - as, c, err := mm.p.NewAddressSpace(mappingsID) - if err != nil { - mm.activeMu.Unlock() - return err - } - if as == nil { - // AddressSpace is unavailable, we must wait. - // - // activeMu must not be held while waiting, as the user of the address - // space we are waiting on may attempt to take activeMu. - mm.activeMu.Unlock() - - sleep := mm.p.CooperativelySchedulesAddressSpace() && mm.sleepForActivation - if sleep { - // Mark this task sleeping while waiting for the address space to - // prevent the watchdog from reporting it as a stuck task. - ctx.UninterruptibleSleepStart(false) - } - <-c - if sleep { - ctx.UninterruptibleSleepFinish(false) - } - continue - } - - // Okay, we could restore all mappings at this point. - // But forget that. Let's just let them fault in. - mm.as = as - - // Unmapping is done, if necessary. - mm.unmapAllOnActivate = false - - // Now that m.as has been assigned, we can set m.active to a non-zero value - // to enable the fast path. - mm.active.Store(1) - - mm.activeMu.Unlock() - return nil - } -} - -// Deactivate releases a reference to the MemoryManager. -func (mm *MemoryManager) Deactivate() { - // Fast path: this is not the last goroutine to deactivate the - // MemoryManager. - for { - active := mm.active.Load() - if active == 1 { - // Fall back to the slow path. - break - } - if mm.active.CompareAndSwap(active, active-1) { - return - } - } - - mm.activeMu.Lock() - // Same as Activate. - - // Still active? - if mm.active.Add(-1) > 0 { - mm.activeMu.Unlock() - return - } - - // Can we hold on to the address space? - if !mm.p.CooperativelySchedulesAddressSpace() { - mm.activeMu.Unlock() - return - } - - // Release the address space. - mm.as.Release() - - // Lost it. - mm.as = nil - mm.activeMu.Unlock() -} - // mapASLocked maps addresses in ar into mm.as. // // Preconditions: // - mm.activeMu must be locked. -// - mm.as != nil. // - ar.Length() != 0. // - ar must be page-aligned. // - pseg == mm.pmas.LowerBoundSegment(ar.Start). @@ -261,21 +127,12 @@ func (mm *MemoryManager) unmapASLocked(ar hostarch.AddrRange) { return } if mm.as == nil { - // No AddressSpace? Force all mappings to be unmapped on the next - // Activate. - mm.unmapAllOnActivate = true + // Being called from mm.DecUsers() after AddressSpace release. return } - // unmapASLocked doesn't require vmas or pmas to exist for ar, so it can be // passed ranges that include addresses that can't be mapped by the // application. ar = ar.Intersect(mm.applicationAddrRange()) - - // Note that this AddressSpace may or may not be active. If the - // platform does not require cooperative sharing of AddressSpaces, they - // are retained between Deactivate/Activate calls. Despite not being - // active, it is still valid to perform operations on these address - // spaces. mm.as.Unmap(ar.Start, uint64(ar.Length())) } diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go index ffa8f7d8b8..d6ab1dbabb 100644 --- a/pkg/sentry/mm/io.go +++ b/pkg/sentry/mm/io.go @@ -90,7 +90,7 @@ func (mm *MemoryManager) checkIOVec(ars hostarch.AddrRangeSeq) bool { } func (mm *MemoryManager) asioEnabled(opts usermem.IOOpts) bool { - return mm.haveASIO && !opts.IgnorePermissions && opts.AddressSpaceActive + return mm.haveASIO && !opts.IgnorePermissions } // translateIOError converts errors to EFAULT, as is usually reported for all @@ -107,7 +107,7 @@ func translateIOError(ctx context.Context, err error) error { // CopyOut implements usermem.IO.CopyOut. func (mm *MemoryManager) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) { - ar, ok := mm.CheckIORange(addr, int64(len(src))) + _, ok := mm.CheckIORange(addr, int64(len(src))) if !ok { return 0, linuxerr.EFAULT } @@ -118,7 +118,7 @@ func (mm *MemoryManager) CopyOut(ctx context.Context, addr hostarch.Addr, src [] // Do AddressSpace IO if applicable. if mm.asioEnabled(opts) && len(src) < copyMapMinBytes { - return mm.asCopyOut(ctx, addr, src) + return mm.asCopyOut(ctx, addr, src, opts) } // Go through internal mappings. @@ -127,14 +127,10 @@ func (mm *MemoryManager) CopyOut(ctx context.Context, addr hostarch.Addr, src [] // traverse an unnecessary layer of buffering. This can be fixed by // inlining mm.withInternalMappings() and passing src subslices directly to // memmap.File.BufferWriteAt(). - n64, err := mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { - n, err := safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src))) - return n, translateIOError(ctx, err) - }) - return int(n64), err + return mm.imCopyOut(ctx, addr, src, opts) } -func (mm *MemoryManager) asCopyOut(ctx context.Context, addr hostarch.Addr, src []byte) (int, error) { +func (mm *MemoryManager) asCopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) { var done int for { n, err := mm.as.CopyOut(addr+hostarch.Addr(done), src[done:]) @@ -149,13 +145,25 @@ func (mm *MemoryManager) asCopyOut(ctx context.Context, addr hostarch.Addr, src } continue } + if _, ok := err.(platform.AddressSpaceIOUnavailable); ok { + // Fall back to using internal mappings. + return mm.imCopyOut(ctx, addr+hostarch.Addr(done), src[done:], opts) + } return done, translateIOError(ctx, err) } } +func (mm *MemoryManager) imCopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) { + n64, err := mm.withInternalMappings(ctx, addr.MustToRange(uint64(len(src))), hostarch.Write, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { + n, err := safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src))) + return n, translateIOError(ctx, err) + }) + return int(n64), err +} + // CopyIn implements usermem.IO.CopyIn. func (mm *MemoryManager) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) { - ar, ok := mm.CheckIORange(addr, int64(len(dst))) + _, ok := mm.CheckIORange(addr, int64(len(dst))) if !ok { return 0, linuxerr.EFAULT } @@ -166,7 +174,7 @@ func (mm *MemoryManager) CopyIn(ctx context.Context, addr hostarch.Addr, dst []b // Do AddressSpace IO if applicable. if mm.asioEnabled(opts) && len(dst) < copyMapMinBytes { - return mm.asCopyIn(ctx, addr, dst) + return mm.asCopyIn(ctx, addr, dst, opts) } // Go through internal mappings. @@ -175,14 +183,10 @@ func (mm *MemoryManager) CopyIn(ctx context.Context, addr hostarch.Addr, dst []b // traverse an unnecessary layer of buffering. This can be fixed by // inlining mm.withInternalMappings() and passing dst subslices directly to // memmap.File.BufferReadAt(). - n64, err := mm.withInternalMappings(ctx, ar, hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { - n, err := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), ims) - return n, translateIOError(ctx, err) - }) - return int(n64), err + return mm.imCopyIn(ctx, addr, dst, opts) } -func (mm *MemoryManager) asCopyIn(ctx context.Context, addr hostarch.Addr, dst []byte) (int, error) { +func (mm *MemoryManager) asCopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) { var done int for { n, err := mm.as.CopyIn(addr+hostarch.Addr(done), dst[done:]) @@ -197,13 +201,25 @@ func (mm *MemoryManager) asCopyIn(ctx context.Context, addr hostarch.Addr, dst [ } continue } + if _, ok := err.(platform.AddressSpaceIOUnavailable); ok { + // Fall back to using internal mappings. + return mm.imCopyIn(ctx, addr+hostarch.Addr(done), dst[done:], opts) + } return done, translateIOError(ctx, err) } } +func (mm *MemoryManager) imCopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) { + n64, err := mm.withInternalMappings(ctx, addr.MustToRange(uint64(len(dst))), hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { + n, err := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), ims) + return n, translateIOError(ctx, err) + }) + return int(n64), err +} + // ZeroOut implements usermem.IO.ZeroOut. func (mm *MemoryManager) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { - ar, ok := mm.CheckIORange(addr, toZero) + _, ok := mm.CheckIORange(addr, toZero) if !ok { return 0, linuxerr.EFAULT } @@ -214,17 +230,14 @@ func (mm *MemoryManager) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero // Do AddressSpace IO if applicable. if mm.asioEnabled(opts) && toZero < copyMapMinBytes { - return mm.asZeroOut(ctx, addr, toZero) + return mm.asZeroOut(ctx, addr, toZero, opts) } // Go through internal mappings. - return mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(dsts safemem.BlockSeq) (uint64, error) { - n, err := safemem.ZeroSeq(dsts) - return n, translateIOError(ctx, err) - }) + return mm.imZeroOut(ctx, addr, toZero, opts) } -func (mm *MemoryManager) asZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64) (int64, error) { +func (mm *MemoryManager) asZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { var done int64 for { n, err := mm.as.ZeroOut(addr+hostarch.Addr(done), uintptr(toZero-done)) @@ -239,10 +252,21 @@ func (mm *MemoryManager) asZeroOut(ctx context.Context, addr hostarch.Addr, toZe } continue } + if _, ok := err.(platform.AddressSpaceIOUnavailable); ok { + // Fall back to using internal mappings. + return mm.imZeroOut(ctx, addr+hostarch.Addr(done), toZero-done, opts) + } return done, translateIOError(ctx, err) } } +func (mm *MemoryManager) imZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { + return mm.withInternalMappings(ctx, addr.MustToRange(uint64(toZero)), hostarch.Write, opts.IgnorePermissions, func(dsts safemem.BlockSeq) (uint64, error) { + n, err := safemem.ZeroSeq(dsts) + return n, translateIOError(ctx, err) + }) +} + // CopyOutFrom implements usermem.IO.CopyOutFrom. func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { if !mm.checkIOVec(ars) { @@ -273,7 +297,7 @@ func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars hostarch.AddrRange if cplen > int64(bufN)-done { cplen = int64(bufN) - done } - n, err := mm.asCopyOut(ctx, ar.Start, buf[int(done):int(done+cplen)]) + n, err := mm.asCopyOut(ctx, ar.Start, buf[int(done):int(done+cplen)], opts) done += int64(n) if err != nil { return done, err @@ -306,7 +330,7 @@ func (mm *MemoryManager) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq for !ars.IsEmpty() { ar := ars.Head() var n int - n, bufErr = mm.asCopyIn(ctx, ar.Start, buf[done:done+int(ar.Length())]) + n, bufErr = mm.asCopyIn(ctx, ar.Start, buf[done:done+int(ar.Length())], opts) done += n if bufErr != nil { break @@ -348,7 +372,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new } // Do AddressSpace IO if applicable. - if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions { + if mm.asioEnabled(opts) { for { old, err := mm.as.SwapUint32(addr, new) if err == nil { @@ -360,6 +384,10 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new } continue } + if _, ok := err.(platform.AddressSpaceIOUnavailable); ok { + // Fall back to using internal mappings. + break + } return 0, translateIOError(ctx, err) } } @@ -391,7 +419,7 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch } // Do AddressSpace IO if applicable. - if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions { + if mm.asioEnabled(opts) { for { prev, err := mm.as.CompareAndSwapUint32(addr, old, new) if err == nil { @@ -403,6 +431,10 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch } continue } + if _, ok := err.(platform.AddressSpaceIOUnavailable); ok { + // Fall back to using internal mappings. + break + } return 0, translateIOError(ctx, err) } } @@ -434,7 +466,7 @@ func (mm *MemoryManager) LoadUint32(ctx context.Context, addr hostarch.Addr, opt } // Do AddressSpace IO if applicable. - if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions { + if mm.asioEnabled(opts) { for { val, err := mm.as.LoadUint32(addr) if err == nil { @@ -446,6 +478,10 @@ func (mm *MemoryManager) LoadUint32(ctx context.Context, addr hostarch.Addr, opt } continue } + if _, ok := err.(platform.AddressSpaceIOUnavailable); ok { + // Fall back to using internal mappings. + break + } return 0, translateIOError(ctx, err) } } diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go index 0b5a836fef..1071d1b807 100644 --- a/pkg/sentry/mm/lifecycle.go +++ b/pkg/sentry/mm/lifecycle.go @@ -28,17 +28,21 @@ import ( ) // NewMemoryManager returns a new MemoryManager with no mappings and 1 user. -func NewMemoryManager(p platform.Platform, mf *pgalloc.MemoryFile, sleepForActivation bool) *MemoryManager { - return &MemoryManager{ - p: p, - mf: mf, - haveASIO: p.SupportsAddressSpaceIO(), - users: atomicbitops.FromInt32(1), - auxv: arch.Auxv{}, - dumpability: atomicbitops.FromInt32(int32(UserDumpable)), - aioManager: aioManager{contexts: make(map[uint64]*AIOContext)}, - sleepForActivation: sleepForActivation, +func NewMemoryManager(p platform.Platform, mf *pgalloc.MemoryFile) (*MemoryManager, error) { + as, err := p.NewAddressSpace() + if err != nil { + return nil, err } + return &MemoryManager{ + p: p, + mf: mf, + haveASIO: p.SupportsAddressSpaceIO(), + users: atomicbitops.FromInt32(1), + as: as, + auxv: arch.Auxv{}, + dumpability: atomicbitops.FromInt32(int32(UserDumpable)), + aioManager: aioManager{contexts: make(map[uint64]*AIOContext)}, + }, nil } // SetMmapLayout initializes mm's layout from the given arch.Context64. @@ -56,6 +60,11 @@ func (mm *MemoryManager) SetMmapLayout(ac *arch.Context64, r *limits.LimitSet) ( // Fork creates a copy of mm with 1 user, as for Linux syscalls fork() or // clone() (without CLONE_VM). func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) { + as, err := mm.p.NewAddressSpace() + if err != nil { + return nil, err + } + mm.AddressSpace().PreFork() defer mm.AddressSpace().PostFork() mm.metadataMu.Lock() @@ -77,6 +86,7 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) { haveASIO: mm.haveASIO, layout: mm.layout, users: atomicbitops.FromInt32(1), + as: as, brk: mm.brk, usageAS: mm.usageAS, dataAS: mm.dataAS, @@ -89,11 +99,10 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) { envv: mm.envv, auxv: append(arch.Auxv(nil), mm.auxv...), // IncRef'd below, once we know that there isn't an error. - executable: mm.executable, - dumpability: atomicbitops.FromInt32(mm.dumpability.Load()), - aioManager: aioManager{contexts: make(map[uint64]*AIOContext)}, - sleepForActivation: mm.sleepForActivation, - vdsoSigReturnAddr: mm.vdsoSigReturnAddr, + executable: mm.executable, + dumpability: atomicbitops.FromInt32(mm.dumpability.Load()), + aioManager: aioManager{contexts: make(map[uint64]*AIOContext)}, + vdsoSigReturnAddr: mm.vdsoSigReturnAddr, } // Copy vmas. @@ -117,6 +126,7 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) { if vma.mappable != nil { if err := vma.mappable.AddMapping(ctx, mm2, vmaAR, vma.off, vma.canWriteMappableLocked()); err != nil { _, droppedIDs = mm2.removeVMAsLocked(ctx, mm2.applicationAddrRange(), droppedIDs) + as.Release() return nil, err } } @@ -271,17 +281,9 @@ func (mm *MemoryManager) DecUsers(ctx context.Context) { exe.DecRef(ctx) } - mm.activeMu.Lock() - // Sanity check. - if mm.active.Load() != 0 { - panic("active address space lost?") - } - // Make sure the AddressSpace is returned. - if mm.as != nil { - mm.as.Release() - mm.as = nil - } - mm.activeMu.Unlock() + // User count 0 => no concurrent access to mm.as. + mm.as.Release() + mm.as = nil var droppedIDs []memmap.MappingIdentity mm.mappingMu.Lock() diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index d10a30ce81..14ab06e08d 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -158,28 +158,9 @@ type MemoryManager struct { // maxRSS is protected by activeMu. maxRSS uint64 - // as is the platform.AddressSpace that pmas are mapped into. active is the - // number of contexts that require as to be non-nil; if active == 0, as may - // be nil. - // - // as is protected by activeMu. active is manipulated with atomic memory - // operations; transitions to and from zero are additionally protected by - // activeMu. (This is because such transitions may need to be atomic with - // changes to as.) - as platform.AddressSpace `state:"nosave"` - active atomicbitops.Int32 `state:"zerovalue"` - - // unmapAllOnActivate indicates that the next Activate call should activate - // an empty AddressSpace. - // - // This is used to ensure that an AddressSpace cached in - // NewAddressSpace is not used after some change in the MemoryManager - // or VMAs has made that AddressSpace stale. - // - // unmapAllOnActivate is protected by activeMu. It must only be set when - // there is no active or cached AddressSpace. If as != nil, then - // invalidations should be propagated immediately. - unmapAllOnActivate bool `state:"nosave"` + // as is the platform.AddressSpace that pmas are mapped into. as is immutable + // until users becomes 0, at which point as becomes nil. + as platform.AddressSpace `state:"nosave"` // If captureInvalidations is true, calls to MM.Invalidate() are recorded // in capturedInvalidations rather than being applied immediately to pmas. @@ -228,11 +209,6 @@ type MemoryManager struct { // must be cloned when CLONE_VM is used. aioManager aioManager - // sleepForActivation indicates whether the task should report to be sleeping - // before trying to activate the address space. When set to true, delays in - // activation are not reported as stuck tasks by the watchdog. - sleepForActivation bool - // vdsoSigReturnAddr is the address of 'vdso_sigreturn'. vdsoSigReturnAddr uint64 diff --git a/pkg/sentry/mm/mm_test.go b/pkg/sentry/mm/mm_test.go index 8d47cef380..645e24d9bd 100644 --- a/pkg/sentry/mm/mm_test.go +++ b/pkg/sentry/mm/mm_test.go @@ -29,9 +29,12 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -func testMemoryManagerWithMmapDirection(ctx context.Context, mmapDirection arch.MmapDirection) *MemoryManager { +func testMemoryManagerWithMmapDirection(ctx context.Context, t *testing.T, mmapDirection arch.MmapDirection) *MemoryManager { p := platform.FromContext(ctx) - mm := NewMemoryManager(p, pgalloc.MemoryFileFromContext(ctx), false) + mm, err := NewMemoryManager(p, pgalloc.MemoryFileFromContext(ctx)) + if err != nil { + t.Fatalf("failed to create MemoryManager: %s", err) + } mm.layout = arch.MmapLayout{ MinAddr: p.MinUserAddress(), MaxAddr: p.MaxUserAddress(), @@ -42,8 +45,8 @@ func testMemoryManagerWithMmapDirection(ctx context.Context, mmapDirection arch. return mm } -func testMemoryManager(ctx context.Context) *MemoryManager { - return testMemoryManagerWithMmapDirection(ctx, arch.MmapBottomUp) +func testMemoryManager(ctx context.Context, t *testing.T) *MemoryManager { + return testMemoryManagerWithMmapDirection(ctx, t, arch.MmapBottomUp) } func (mm *MemoryManager) realUsageAS() uint64 { @@ -52,7 +55,7 @@ func (mm *MemoryManager) realUsageAS() uint64 { func TestUsageASUpdates(t *testing.T) { ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) + mm := testMemoryManager(ctx, t) defer mm.DecUsers(ctx) addr, err := mm.MMap(ctx, memmap.MMapOpts{ @@ -87,7 +90,7 @@ func (mm *MemoryManager) realDataAS() uint64 { func TestDataASUpdates(t *testing.T) { ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) + mm := testMemoryManager(ctx, t) defer mm.DecUsers(ctx) addr, err := mm.MMap(ctx, memmap.MMapOpts{ @@ -133,7 +136,7 @@ func TestBrkDataLimitUpdates(t *testing.T) { limitSet.Set(limits.Data, limits.Limit{}, true /* privileged */) // zero RLIMIT_DATA ctx := contexttest.WithLimitSet(contexttest.Context(t), limitSet) - mm := testMemoryManager(ctx) + mm := testMemoryManager(ctx, t) defer mm.DecUsers(ctx) // Try to extend the brk by one page and expect doing so to fail. @@ -146,7 +149,7 @@ func TestBrkDataLimitUpdates(t *testing.T) { // TestIOAfterUnmap ensures that IO fails after unmap. func TestIOAfterUnmap(t *testing.T) { ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) + mm := testMemoryManager(ctx, t) defer mm.DecUsers(ctx) addr, err := mm.MMap(ctx, memmap.MMapOpts{ @@ -186,7 +189,7 @@ func TestIOAfterUnmap(t *testing.T) { // TestIOAfterMProtect tests IO interaction with mprotect permissions. func TestIOAfterMProtect(t *testing.T) { ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) + mm := testMemoryManager(ctx, t) defer mm.DecUsers(ctx) addr, err := mm.MMap(ctx, memmap.MMapOpts{ @@ -239,7 +242,7 @@ func TestIOAfterMProtect(t *testing.T) { // prepared after destruction. func TestAIOPrepareAfterDestroy(t *testing.T) { ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) + mm := testMemoryManager(ctx, t) defer mm.DecUsers(ctx) id, err := mm.NewAIOContext(ctx, 1) @@ -264,7 +267,7 @@ func TestAIOPrepareAfterDestroy(t *testing.T) { // looked up after memory manager is destroyed. func TestAIOLookupAfterDestroy(t *testing.T) { ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) + mm := testMemoryManager(ctx, t) id, err := mm.NewAIOContext(ctx, 1) if err != nil { @@ -332,7 +335,7 @@ func TestGetAllocationDirection(t *testing.T) { for _, test := range testCases { t.Run(test.name, func(t *testing.T) { ctx := contexttest.Context(t) - mm := testMemoryManagerWithMmapDirection(ctx, test.mmapDirection) + mm := testMemoryManagerWithMmapDirection(ctx, t, test.mmapDirection) actual := mm.getAllocationDirection(test.ar, test.vma) if actual != test.expected { t.Errorf("Unexpected allocation direction. Expected: %s, Actual: %s", test.expected, actual) diff --git a/pkg/sentry/mm/save_restore.go b/pkg/sentry/mm/save_restore.go index 6d1c38ec9d..cc4f6ab6f8 100644 --- a/pkg/sentry/mm/save_restore.go +++ b/pkg/sentry/mm/save_restore.go @@ -41,6 +41,13 @@ func (mm *MemoryManager) InvalidateUnsavable(ctx context.Context) error { func (mm *MemoryManager) afterLoad(ctx goContext.Context) { mm.mf = pgalloc.MemoryFileFromContext(ctx) mm.haveASIO = mm.p.SupportsAddressSpaceIO() + if mm.users.Load() != 0 { + as, err := mm.p.NewAddressSpace() + if err != nil { + panic(fmt.Sprintf("failed to create AddressSpace after restore: %v", err)) + } + mm.as = as + } } // afterLoad is invoked by stateify. diff --git a/pkg/sentry/platform/cpuid_amd64.go b/pkg/sentry/platform/cpuid_amd64.go index cead005399..c215b45615 100644 --- a/pkg/sentry/platform/cpuid_amd64.go +++ b/pkg/sentry/platform/cpuid_amd64.go @@ -50,8 +50,7 @@ func TryCPUIDEmulate(ctx context.Context, mm MemoryManager, ac *arch.Context64) taskWrapper: taskWrapper{ctx}, } if _, err := mm.CopyIn(&tasklessCtx, hostarch.Addr(s.Regs.Rip), inst, usermem.IOOpts{ - IgnorePermissions: true, - AddressSpaceActive: true, + IgnorePermissions: true, }); err != nil { return false } diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go index 8725c87934..53822ce843 100644 --- a/pkg/sentry/platform/kvm/context.go +++ b/pkg/sentry/platform/kvm/context.go @@ -134,5 +134,18 @@ func (c *platformContext) PullFullState(as platform.AddressSpace, ac *arch.Conte return nil } -// PrepareSleep implements platform.Context.platform.Context. +// PrepareSleep implements platform.Context.PrepareSleep. func (*platformContext) PrepareSleep() {} + +// PrepareUninterruptibleSleep implements +// platform.Context.PrepareUninterruptibleSleep. +func (*platformContext) PrepareUninterruptibleSleep() {} + +// PrepareStop implements platform.Context.PrepareStop. +func (*platformContext) PrepareStop() {} + +// PrepareExecve implements platform.Context.PrepareExecve. +func (*platformContext) PrepareExecve() {} + +// PrepareExit implements platform.Context.PrepareExit. +func (*platformContext) PrepareExit() {} diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index fb3fbd7480..6a84e7b662 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -137,11 +137,6 @@ func (*KVM) SupportsAddressSpaceIO() bool { return false } -// CooperativelySchedulesAddressSpace implements platform.Platform.CooperativelySchedulesAddressSpace. -func (*KVM) CooperativelySchedulesAddressSpace() bool { - return false -} - // MapUnit implements platform.Platform.MapUnit. func (*KVM) MapUnit() uint64 { // We greedily creates PTEs in MapFile, so extremely large mappings can @@ -162,7 +157,7 @@ func (*KVM) MaxUserAddress() hostarch.Addr { } // NewAddressSpace returns a new pagetable root. -func (k *KVM) NewAddressSpace(any) (platform.AddressSpace, <-chan struct{}, error) { +func (k *KVM) NewAddressSpace() (platform.AddressSpace, error) { // Allocate page tables and install system mappings. pageTables := pagetables.NewWithUpper(newAllocator(), k.machine.upperSharedPageTables, ring0.KernelStartAddress) @@ -171,7 +166,7 @@ func (k *KVM) NewAddressSpace(any) (platform.AddressSpace, <-chan struct{}, erro machine: k.machine, pageTables: pageTables, dirtySet: k.machine.newDirtySet(), - }, nil, nil + }, nil } // ConcurrencyCount implements platform.Platform.ConcurrencyCount. diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index d64db6a5fd..193fd65b39 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -133,7 +133,7 @@ func applicationTest(t testHarness, useHostMappings bool, targetFn uintptr, fn f kvmTest(t, func(k *KVM) { // Create new page tables. - as, _, err := k.NewAddressSpace(nil /* invalidator */) + as, err := k.NewAddressSpace() if err != nil { t.Fatalf("can't create new address space: %v", err) } diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go index 272b4df7a4..65fa89f823 100644 --- a/pkg/sentry/platform/platform.go +++ b/pkg/sentry/platform/platform.go @@ -43,15 +43,6 @@ type Platform interface { // unchanged over the lifetime of the Platform. SupportsAddressSpaceIO() bool - // CooperativelySchedulesAddressSpace returns true if the Platform has a - // limited number of AddressSpaces, such that mm.MemoryManager.Deactivate - // should call AddressSpace.Release when there are no goroutines that - // require the mm.MemoryManager to have an active AddressSpace. - // - // The value returned by CooperativelySchedulesAddressSpace is guaranteed - // to remain unchanged over the lifetime of the Platform. - CooperativelySchedulesAddressSpace() bool - // DetectsCPUPreemption returns true if Contexts returned by the Platform // can reliably return ErrContextCPUPreempted. DetectsCPUPreemption() bool @@ -77,21 +68,7 @@ type Platform interface { MaxUserAddress() hostarch.Addr // NewAddressSpace returns a new memory context for this platform. - // - // If mappingsID is not nil, the platform may assume that (1) all calls - // to NewAddressSpace with the same mappingsID represent the same - // (mutable) set of mappings, and (2) the set of mappings has not - // changed since the last time AddressSpace.Release was called on an - // AddressSpace returned by a call to NewAddressSpace with the same - // mappingsID. - // - // If a new AddressSpace cannot be created immediately, a nil - // AddressSpace is returned, along with channel that is closed when - // the caller should retry a call to NewAddressSpace. - // - // In general, this blocking behavior only occurs when - // CooperativelySchedulesAddressSpace (above) returns false. - NewAddressSpace(mappingsID any) (AddressSpace, <-chan struct{}, error) + NewAddressSpace() (AddressSpace, error) // NewContext returns a new execution context. NewContext(context.Context) Context @@ -264,6 +241,21 @@ type Context interface { // PrepareSleep() is called when the thread switches to the // interruptible sleep state. PrepareSleep() + + // PrepareUninterruptibleSleep is called when the thread switches to the + // uninterruptible sleep state. + PrepareUninterruptibleSleep() + + // PrepareStop is called when the thread enters a kernel.TaskStop. + PrepareStop() + + // PrepareExecve is called when the thread invokes execve(), immediately + // before switching MMs. + PrepareExecve() + + // PrepareExit is called when the thread exits, immediately before + // releasing its MM. + PrepareExit() } // ContextError is one of the possible errors returned by Context.Switch(). @@ -441,6 +433,17 @@ func (f SegmentationFault) Error() string { return fmt.Sprintf("segmentation fault at %#x", f.Addr) } +// AddressSpaceIOUnavailable is an error returned by AddressSpaceIO methods +// when AddressSpaceIO is unavailable for this operation due to implementation +// limitations, such that the caller should fall back to I/O through other +// means (rather than returning an error). +type AddressSpaceIOUnavailable struct{} + +// Error implements error.Error. +func (AddressSpaceIOUnavailable) Error() string { + return "platform.AddressSpaceIO currently unavailable" +} + // Requirements is used to specify platform specific requirements. type Requirements struct { // RequiresCapSysPtrace indicates that the sandbox has to be started with diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go index 619f4c5960..57f89b185a 100644 --- a/pkg/sentry/platform/ptrace/ptrace.go +++ b/pkg/sentry/platform/ptrace/ptrace.go @@ -209,6 +209,19 @@ func (c *context) PullFullState(as platform.AddressSpace, ac *arch.Context64) er // PrepareSleep implements platform.Context.platform.PrepareSleep. func (*context) PrepareSleep() {} +// PrepareUninterruptibleSleep implements +// platform.Context.platform.PrepareUninterruptibleSleep. +func (*context) PrepareUninterruptibleSleep() {} + +// PrepareStop implements platform.Context.PrepareStop. +func (*context) PrepareStop() {} + +// PrepareExecve implements platform.Context.PrepareExecve. +func (*context) PrepareExecve() {} + +// PrepareExit implements platform.Context.PrepareExit. +func (*context) PrepareExit() {} + // PTrace represents a collection of ptrace subprocesses. type PTrace struct { platform.MMapMinAddr @@ -242,11 +255,6 @@ func (*PTrace) SupportsAddressSpaceIO() bool { return false } -// CooperativelySchedulesAddressSpace implements platform.Platform.CooperativelySchedulesAddressSpace. -func (*PTrace) CooperativelySchedulesAddressSpace() bool { - return false -} - // MapUnit implements platform.Platform.MapUnit. func (*PTrace) MapUnit() uint64 { // The host kernel manages page tables and arbitrary-sized mappings @@ -261,9 +269,8 @@ func (*PTrace) MaxUserAddress() hostarch.Addr { } // NewAddressSpace returns a new subprocess. -func (p *PTrace) NewAddressSpace(any) (platform.AddressSpace, <-chan struct{}, error) { - as, err := newSubprocess(globalPool.master.createStub) - return as, nil, err +func (p *PTrace) NewAddressSpace() (platform.AddressSpace, error) { + return newSubprocess(globalPool.master.createStub) } // ConcurrencyCount implements platform.Platform.ConcurrencyCount. diff --git a/pkg/sentry/platform/systrap/systrap.go b/pkg/sentry/platform/systrap/systrap.go index 3758f46444..e55ad5231f 100644 --- a/pkg/sentry/platform/systrap/systrap.go +++ b/pkg/sentry/platform/systrap/systrap.go @@ -223,7 +223,7 @@ func (c *platformContext) Release() { } } -// PrepareSleep implements platform.Context.platform.PrepareSleep. +// PrepareSleep implements platform.Context.PrepareSleep. func (c *platformContext) PrepareSleep() { ctx := c.sharedContext if ctx == nil { @@ -235,6 +235,21 @@ func (c *platformContext) PrepareSleep() { } } +// PrepareUninterruptibleSleep implements +// platform.Context.PrepareUninterruptibleSleep. +func (*platformContext) PrepareUninterruptibleSleep() {} + +// PrepareStop implements platform.Context.PrepareStop. +func (c *platformContext) PrepareStop() { + c.PrepareSleep() +} + +// PrepareExecve implements platform.Context.PrepareExecve. +func (*platformContext) PrepareExecve() {} + +// PrepareExit implements platform.Context.PrepareExit. +func (*platformContext) PrepareExit() {} + // Systrap represents a collection of seccomp subprocesses. type Systrap struct { platform.NoCPUPreemptionDetection @@ -312,11 +327,6 @@ func (*Systrap) SupportsAddressSpaceIO() bool { return false } -// CooperativelySchedulesAddressSpace implements platform.Platform.CooperativelySchedulesAddressSpace. -func (*Systrap) CooperativelySchedulesAddressSpace() bool { - return false -} - // MapUnit implements platform.Platform.MapUnit. func (*Systrap) MapUnit() uint64 { // The host kernel manages page tables and arbitrary-sized mappings @@ -331,9 +341,8 @@ func (*Systrap) MaxUserAddress() hostarch.Addr { } // NewAddressSpace returns a new subprocess. -func (p *Systrap) NewAddressSpace(any) (platform.AddressSpace, <-chan struct{}, error) { - as, err := newSubprocess(globalPool.source.createStub, p.memoryFile, true) - return as, nil, err +func (p *Systrap) NewAddressSpace() (platform.AddressSpace, error) { + return newSubprocess(globalPool.source.createStub, p.memoryFile, true) } // NewContext returns an interruptible platformContext. diff --git a/pkg/sentry/platform/systrap/usertrap/usertrap_amd64.go b/pkg/sentry/platform/systrap/usertrap/usertrap_amd64.go index 730f526533..5b5a83fa37 100644 --- a/pkg/sentry/platform/systrap/usertrap/usertrap_amd64.go +++ b/pkg/sentry/platform/systrap/usertrap/usertrap_amd64.go @@ -111,7 +111,7 @@ func (s *State) newTrapLocked(ctx context.Context, mm memoryManager) (hostarch.A // next unused trap. s.nextTrap = 1 s.tableAddr = addr - } else if _, err := hdr.CopyIn(task.OwnCopyContext(usermem.IOOpts{AddressSpaceActive: false}), addr); err != nil { + } else if _, err := hdr.CopyIn(task.OwnCopyContext(usermem.IOOpts{}), addr); err != nil { return 0, err } else { // Read an index of a next unused trap. @@ -200,7 +200,7 @@ func (s *State) PatchSyscall(ctx context.Context, ac *arch.Context64, mm memoryM patchAddr := ac.IP() - uintptr(len(jmpInst)) prevCode := make([]uint8, len(jmpInst)) - if _, err := primitive.CopyUint8SliceIn(task.OwnCopyContext(usermem.IOOpts{AddressSpaceActive: false}), hostarch.Addr(patchAddr), prevCode); err != nil { + if _, err := primitive.CopyUint8SliceIn(task, hostarch.Addr(patchAddr), prevCode); err != nil { return err } @@ -297,7 +297,7 @@ func (s *State) HandleFault(ctx context.Context, ac *arch.Context64, mm memoryMa code := make([]uint8, len(jmpInst)) ip := ac.IP() - faultInstOffset - if _, err := primitive.CopyUint8SliceIn(task.OwnCopyContext(usermem.IOOpts{AddressSpaceActive: false}), hostarch.Addr(ip), code); err != nil { + if _, err := primitive.CopyUint8SliceIn(task, hostarch.Addr(ip), code); err != nil { return err } diff --git a/pkg/sentry/socket/hostinet/socket_unsafe.go b/pkg/sentry/socket/hostinet/socket_unsafe.go index 98d1933951..0152f935c5 100644 --- a/pkg/sentry/socket/hostinet/socket_unsafe.go +++ b/pkg/sentry/socket/hostinet/socket_unsafe.go @@ -64,9 +64,7 @@ func ioctl(ctx context.Context, fd int, io usermem.IO, sysno uintptr, args arch. } var buf [4]byte hostarch.ByteOrder.PutUint32(buf[:], uint32(val)) - _, err := io.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := io.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{}) return 0, err case linux.SIOCGIFFLAGS, linux.SIOCGIFHWADDR, @@ -78,9 +76,6 @@ func ioctl(ctx context.Context, fd int, io usermem.IO, sysno uintptr, args arch. cc := &usermem.IOCopyContext{ Ctx: ctx, IO: io, - Opts: usermem.IOOpts{ - AddressSpaceActive: true, - }, } var ifr linux.IFReq if _, err := ifr.CopyIn(cc, args[2].Pointer()); err != nil { @@ -95,9 +90,6 @@ func ioctl(ctx context.Context, fd int, io usermem.IO, sysno uintptr, args arch. cc := &usermem.IOCopyContext{ Ctx: ctx, IO: io, - Opts: usermem.IOOpts{ - AddressSpaceActive: true, - }, } var ifc linux.IFConf if _, err := ifc.CopyIn(cc, args[2].Pointer()); err != nil { @@ -127,9 +119,6 @@ func ioctl(ctx context.Context, fd int, io usermem.IO, sysno uintptr, args arch. cc := &usermem.IOCopyContext{ Ctx: ctx, IO: io, - Opts: usermem.IOOpts{ - AddressSpaceActive: true, - }, } var ifr linux.IFReq if _, err := ifr.CopyIn(cc, args[2].Pointer()); err != nil { diff --git a/pkg/sentry/socket/plugin/stack/socket.go b/pkg/sentry/socket/plugin/stack/socket.go index 058bf6b9cf..876bb8d866 100644 --- a/pkg/sentry/socket/plugin/stack/socket.go +++ b/pkg/sentry/socket/plugin/stack/socket.go @@ -471,9 +471,7 @@ func (s *socketOperations) Ioctl(ctx context.Context, io usermem.IO, sysno uintp syscall.SIOCGIFTXQLEN: var ifr linux.IFReq ifrBuf := ctx.(*kernel.Task).CopyScratchBuffer(linux.SizeOfIFReq) - if _, err := io.CopyIn(ctx, args[2].Pointer(), ifrBuf, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := io.CopyIn(ctx, args[2].Pointer(), ifrBuf, usermem.IOOpts{}); err != nil { return 0, err } // Decode ifr from ifrBuf @@ -485,9 +483,7 @@ func (s *socketOperations) Ioctl(ctx context.Context, io usermem.IO, sysno uintp } copy(ifrBuf[0:linux.IFNAMSIZ], ifr.IFName[0:linux.IFNAMSIZ]) copy(ifrBuf[linux.IFNAMSIZ:linux.SizeOfIFReq], ifr.Data[0:linux.SizeOfIFReq-linux.IFNAMSIZ]) - _, err := io.CopyOut(ctx, arg, ifrBuf, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := io.CopyOut(ctx, arg, ifrBuf, usermem.IOOpts{}) return 0, err case syscall.SIOCGIFCONF: @@ -495,9 +491,7 @@ func (s *socketOperations) Ioctl(ctx context.Context, io usermem.IO, sysno uintp // will need to populate the array of ifreqs. var ifc linux.IFConf ifcBuf := ctx.(*kernel.Task).CopyScratchBuffer(linux.SizeOfIFConf) - if _, err := io.CopyIn(ctx, arg, ifcBuf, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := io.CopyIn(ctx, arg, ifcBuf, usermem.IOOpts{}); err != nil { return 0, err } // Decode ifc from ifcBuf @@ -510,9 +504,7 @@ func (s *socketOperations) Ioctl(ctx context.Context, io usermem.IO, sysno uintp } hostarch.ByteOrder.PutUint32(ifcBuf[0:4], uint32(ifc.Len)) hostarch.ByteOrder.PutUint64(ifcBuf[8:], ifc.Ptr) - _, err := io.CopyOut(ctx, arg, ifcBuf, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := io.CopyOut(ctx, arg, ifcBuf, usermem.IOOpts{}) return 0, err case linux.SIOCGIFMEM, linux.SIOCGIFPFLAGS, linux.SIOCGMIIPHY, linux.SIOCGMIIREG: @@ -532,9 +524,7 @@ func (s *socketOperations) Ioctl(ctx context.Context, io usermem.IO, sysno uintp return 0, err } - _, err := io.CopyOut(ctx, arg, buf, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := io.CopyOut(ctx, arg, buf, usermem.IOOpts{}) return 0, err } diff --git a/pkg/sentry/syscalls/linux/points.go b/pkg/sentry/syscalls/linux/points.go index 06c2283d66..de58455691 100644 --- a/pkg/sentry/syscalls/linux/points.go +++ b/pkg/sentry/syscalls/linux/points.go @@ -61,7 +61,7 @@ func getFilePath(t *kernel.Task, fd int32) string { } func getIovecSize(t *kernel.Task, addr hostarch.Addr, iovcnt int) uint64 { - dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{AddressSpaceActive: true}) + dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{}) if err != nil { return 0 } diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go index caa655d31a..a2ffc336ef 100644 --- a/pkg/sentry/syscalls/linux/sys_aio.go +++ b/pkg/sentry/syscalls/linux/sys_aio.go @@ -81,9 +81,9 @@ func IoDestroy(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintp } // The task cannot be interrupted during the wait. Equivalent to // TASK_UNINTERRUPTIBLE in Linux. - t.UninterruptibleSleepStart(true /* deactivate */) + t.UninterruptibleSleepStart() <-ch - t.UninterruptibleSleepFinish(true /* activate */) + t.UninterruptibleSleepFinish() } } @@ -187,19 +187,12 @@ func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error) return usermem.IOSequence{}, linuxerr.EINVAL } - // Since this I/O will be asynchronous with respect to t's task goroutine, - // we have no guarantee that t's AddressSpace will be active during the - // I/O. switch cb.OpCode { case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PWRITE: - return t.SingleIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{ - AddressSpaceActive: false, - }) + return t.SingleIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{}) case linux.IOCB_CMD_PREADV, linux.IOCB_CMD_PWRITEV: - return t.IovecsIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{ - AddressSpaceActive: false, - }) + return t.IovecsIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{}) case linux.IOCB_CMD_FSYNC, linux.IOCB_CMD_FDSYNC, linux.IOCB_CMD_NOOP: return usermem.IOSequence{}, nil diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go index 99e131dde7..491405b251 100644 --- a/pkg/sentry/syscalls/linux/sys_getdents.go +++ b/pkg/sentry/syscalls/linux/sys_getdents.go @@ -58,9 +58,7 @@ func getdents(t *kernel.Task, args arch.SyscallArguments, isGetdents64 bool) (ui // We want to be sure of the allowed buffer size before calling IterDirents, // because this function depends on IterDirents saving state of which dirent // was the last one that was successfully operated on. - allowedSize, err := t.MemoryManager().EnsurePMAsExist(t, addr, int64(size), usermem.IOOpts{ - AddressSpaceActive: true, - }) + allowedSize, err := t.MemoryManager().EnsurePMAsExist(t, addr, int64(size), usermem.IOOpts{}) if allowedSize == 0 { return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/sys_mempolicy.go b/pkg/sentry/syscalls/linux/sys_mempolicy.go index 4d68af5203..735b32cdb7 100644 --- a/pkg/sentry/syscalls/linux/sys_mempolicy.go +++ b/pkg/sentry/syscalls/linux/sys_mempolicy.go @@ -92,9 +92,7 @@ func copyOutNodemask(t *kernel.Task, addr hostarch.Addr, maxnode uint32, val uin return linuxerr.EFAULT } remUint64 := (bits - 1) / 64 - if _, err := t.MemoryManager().ZeroOut(t, remAddr, int64(remUint64)*8, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := t.MemoryManager().ZeroOut(t, remAddr, int64(remUint64)*8, usermem.IOOpts{}); err != nil { return err } } diff --git a/pkg/sentry/syscalls/linux/sys_process_vm.go b/pkg/sentry/syscalls/linux/sys_process_vm.go index ced188d6f4..96ad0ef9d9 100644 --- a/pkg/sentry/syscalls/linux/sys_process_vm.go +++ b/pkg/sentry/syscalls/linux/sys_process_vm.go @@ -86,7 +86,6 @@ func processVMOp(t *kernel.Task, args arch.SyscallArguments, op processVMOpType) } localOps := processVMOps{ mm: t.MemoryManager(), - ioOpts: usermem.IOOpts{AddressSpaceActive: true}, iovecs: localIovecs, } remoteIovecs, err := t.CopyInIovecsAsSlice(rvec, riovcnt) @@ -97,10 +96,8 @@ func processVMOp(t *kernel.Task, args arch.SyscallArguments, op processVMOpType) iovecs: remoteIovecs, } if remoteTask == t { - // No need to take remoteTask.mu to fetch the memory manager, - // and we can assume address space is active. + // No need to take remoteTask.mu to fetch the memory manager. remoteOps.mm = t.MemoryManager() - remoteOps.ioOpts = usermem.IOOpts{AddressSpaceActive: true} } else { // Grab the remoteTask memory manager, and pin it by adding // ourselves as a user. diff --git a/pkg/sentry/syscalls/linux/sys_random.go b/pkg/sentry/syscalls/linux/sys_random.go index 1e6698ecf2..fed7930d27 100644 --- a/pkg/sentry/syscalls/linux/sys_random.go +++ b/pkg/sentry/syscalls/linux/sys_random.go @@ -56,9 +56,7 @@ func GetRandom(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintp return 0, nil, linuxerr.EFAULT } - n, err := t.MemoryManager().CopyOutFrom(t, hostarch.AddrRangeSeqOf(ar), safemem.FromIOReader{rand.Reader}, usermem.IOOpts{ - AddressSpaceActive: true, - }) + n, err := t.MemoryManager().CopyOutFrom(t, hostarch.AddrRangeSeqOf(ar), safemem.FromIOReader{rand.Reader}, usermem.IOOpts{}) if n > 0 { return uintptr(n), nil, nil } diff --git a/pkg/sentry/syscalls/linux/sys_read_write.go b/pkg/sentry/syscalls/linux/sys_read_write.go index 1d0e37fb76..71a2465364 100644 --- a/pkg/sentry/syscalls/linux/sys_read_write.go +++ b/pkg/sentry/syscalls/linux/sys_read_write.go @@ -52,9 +52,7 @@ func Read(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, * } // Get the destination of the read. - dst, err := t.SingleIOSequence(addr, si, usermem.IOOpts{ - AddressSpaceActive: true, - }) + dst, err := t.SingleIOSequence(addr, si, usermem.IOOpts{}) if err != nil { return 0, nil, err } @@ -77,9 +75,7 @@ func Readv(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, defer file.DecRef(t) // Get the destination of the read. - dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ - AddressSpaceActive: true, - }) + dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{}) if err != nil { return 0, nil, err } @@ -157,9 +153,7 @@ func Pread64(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr } // Get the destination of the read. - dst, err := t.SingleIOSequence(addr, si, usermem.IOOpts{ - AddressSpaceActive: true, - }) + dst, err := t.SingleIOSequence(addr, si, usermem.IOOpts{}) if err != nil { return 0, nil, err } @@ -188,9 +182,7 @@ func Preadv(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, } // Get the destination of the read. - dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ - AddressSpaceActive: true, - }) + dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{}) if err != nil { return 0, nil, err } @@ -226,9 +218,7 @@ func Preadv2(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr } // Get the destination of the read. - dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ - AddressSpaceActive: true, - }) + dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{}) if err != nil { return 0, nil, err } @@ -306,9 +296,7 @@ func Write(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, } // Get the source of the write. - src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{ - AddressSpaceActive: true, - }) + src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{}) if err != nil { return 0, nil, err } @@ -331,9 +319,7 @@ func Writev(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, defer file.DecRef(t) // Get the source of the write. - src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ - AddressSpaceActive: true, - }) + src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{}) if err != nil { return 0, nil, err } @@ -410,9 +396,7 @@ func Pwrite64(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintpt } // Get the source of the write. - src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{ - AddressSpaceActive: true, - }) + src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{}) if err != nil { return 0, nil, err } @@ -441,9 +425,7 @@ func Pwritev(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr } // Get the source of the write. - src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ - AddressSpaceActive: true, - }) + src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{}) if err != nil { return 0, nil, err } @@ -479,9 +461,7 @@ func Pwritev2(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintpt } // Get the source of the write. - src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ - AddressSpaceActive: true, - }) + src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{}) if err != nil { return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index 275a9b75ad..30c3b29277 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -800,9 +800,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr hostarch.Addr, flags if msg.IovLen > linux.UIO_MAXIOV { return 0, linuxerr.EMSGSIZE } - dst, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ - AddressSpaceActive: true, - }) + dst, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{}) if err != nil { return 0, err } @@ -904,9 +902,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, fla flags |= linux.MSG_DONTWAIT } - dst, err := t.SingleIOSequence(bufPtr, int(bufLen), usermem.IOOpts{ - AddressSpaceActive: true, - }) + dst, err := t.SingleIOSequence(bufPtr, int(bufLen), usermem.IOOpts{}) if err != nil { return 0, err } @@ -1086,9 +1082,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *vfs.FileDescription, m if msg.IovLen > linux.UIO_MAXIOV { return 0, linuxerr.EMSGSIZE } - src, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ - AddressSpaceActive: true, - }) + src, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{}) if err != nil { return 0, err } @@ -1153,9 +1147,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags } } - src, err := t.SingleIOSequence(bufPtr, bl, usermem.IOOpts{ - AddressSpaceActive: true, - }) + src, err := t.SingleIOSequence(bufPtr, bl, usermem.IOOpts{}) if err != nil { return 0, err } diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index d0ff084324..fb542aa00b 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -535,9 +535,7 @@ func Getcpu(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, } // We always return node 0. if node != 0 { - if _, err := t.MemoryManager().ZeroOut(t, node, 4, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := t.MemoryManager().ZeroOut(t, node, 4, usermem.IOOpts{}); err != nil { return 0, nil, err } } diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go index e8bafc7166..ba1926f6fe 100644 --- a/pkg/sentry/watchdog/watchdog.go +++ b/pkg/sentry/watchdog/watchdog.go @@ -358,16 +358,8 @@ func (w *Watchdog) doAction(action Action, forceStack bool, msg *bytes.Buffer) { log.TracebackAll(msg.String()) // Attempt to flush metrics, timeout and move on in case metrics are stuck as well. - metricsEmitted := make(chan struct{}, 1) - go func() { // S/R-SAFE: watchdog is stopped during save and restarted after restore. - // Flush metrics before killing process. - metric.EmitMetricUpdate() - metricsEmitted <- struct{}{} - }() - select { - case <-metricsEmitted: - case <-time.After(1 * time.Second): - } + metric.EmitMetricUpdateWithTimeout(time.Second) + panic(fmt.Sprintf("%s\nStack for running G's are skipped while panicking.", msg.String())) default: diff --git a/pkg/usermem/usermem.go b/pkg/usermem/usermem.go index 52ec5914ce..41493ac4a1 100644 --- a/pkg/usermem/usermem.go +++ b/pkg/usermem/usermem.go @@ -130,11 +130,6 @@ type IOOpts struct { // by mmap(2) or mprotect(2) will be ignored. (Memory protections required // by the target of the mapping are never ignored.) IgnorePermissions bool - - // If AddressSpaceActive is true, the IO implementation may assume that it - // has an active AddressSpace and can therefore use AddressSpace copying - // without performing activation. See mm/io.go for details. - AddressSpaceActive bool } // IOReadWriter is an io.ReadWriter that reads from / writes to addresses