Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions pkg/sentry/kernel/kernel.go
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,11 @@ func (k *Kernel) Init(args InitKernelArgs) error {
k.cpuClockTickerWakeCh = make(chan struct{}, 1)
k.cpuClockTickerStopCond.L = &k.runningTasksMu
k.applicationCores = args.ApplicationCores
if args.UseHostCores && k.HasCPUNumbers() {
args.UseHostCores = false
log.Infof("UseHostCores enabled but the platform implements HasCPUNumbers(): setting UseHostCores to false")
}

if args.UseHostCores {
k.useHostCores = true
maxCPU, err := hostcpu.MaxPossibleCPU()
Expand All @@ -502,6 +507,15 @@ func (k *Kernel) Init(args InitKernelArgs) error {
k.applicationCores = minAppCores
}
}

if k.HasCPUNumbers() {
if k.applicationCores < uint(k.NumCPUs()) {
log.Infof("ApplicationCores is less than NumCPUs: %d < %d", k.applicationCores, k.NumCPUs())
log.Infof("Setting applicationCores to NumCPUs: %d", k.NumCPUs())
k.applicationCores = uint(k.NumCPUs())
}
}

k.extraAuxv = args.ExtraAuxv
k.vdso = args.Vdso
k.vdsoParams = args.VdsoParams
Expand Down
5 changes: 2 additions & 3 deletions pkg/sentry/kernel/rseq.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
"gvisor.dev/gvisor/pkg/usermem"
)

Expand Down Expand Up @@ -50,7 +49,7 @@ type OldRSeqCriticalRegion struct {

// RSeqAvailable returns true if t supports (old and new) restartable sequences.
func (t *Task) RSeqAvailable() bool {
return t.k.useHostCores && t.k.Platform.DetectsCPUPreemption()
return (t.k.useHostCores || t.k.Platform.HasCPUNumbers()) && t.k.Platform.DetectsCPUPreemption()
}

// SetRSeq registers addr as this thread's rseq structure.
Expand Down Expand Up @@ -201,7 +200,7 @@ func (t *Task) rseqUpdateCPU() error {
return nil
}

t.rseqCPU = int32(hostcpu.GetCPU())
t.rseqCPU = t.CPU()

// Update both CPUs, even if one fails.
rerr := t.rseqCopyOutCPU()
Expand Down
3 changes: 1 addition & 2 deletions pkg/sentry/kernel/task_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/goid"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
"gvisor.dev/gvisor/pkg/sentry/ktime"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/platform"
Expand Down Expand Up @@ -207,7 +206,7 @@ func (app *runApp) execute(t *Task) taskRunState {
if t.rseqPreempted {
t.rseqPreempted = false
if t.rseqAddr != 0 || t.oldRSeqCPUAddr != 0 {
t.rseqCPU = int32(hostcpu.GetCPU())
t.rseqCPU = t.CPU()
if err := t.rseqCopyOutCPU(); err != nil {
t.Debugf("Failed to copy CPU to %#x for rseq: %v", t.rseqAddr, err)
t.forceSignal(linux.SIGSEGV, false)
Expand Down
6 changes: 5 additions & 1 deletion pkg/sentry/kernel/task_sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ func (t *Task) SetCPUMask(mask sched.CPUSet) error {
return linuxerr.EINVAL
}

if t.k.useHostCores {
if t.k.useHostCores || t.k.Platform.HasCPUNumbers() {
// No-op; pretend the mask was immediately changed back.
return nil
}
Expand All @@ -383,6 +383,10 @@ func (t *Task) SetCPUMask(mask sched.CPUSet) error {

// CPU returns the cpu id for a given task.
func (t *Task) CPU() int32 {
if t.k.Platform.HasCPUNumbers() {
return t.p.LastCPUNumber()
}

if t.k.useHostCores {
return int32(hostcpu.GetCPU())
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/sentry/platform/kvm/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ type Config struct {
// kernel.InitKernelArgs. It is necessary to forward it to KVM in order
// to initialize the correct amount of vCPUs.
ApplicationCores int

// UseCPUNums use KVM CPU numbers as CPU numbers in the sentry.
// This is necessary to support features like rseq and KVM based
// preemption.
UseCPUNums bool
}

func (*machine) applyConfig(config *Config) error { return nil }
25 changes: 24 additions & 1 deletion pkg/sentry/platform/kvm/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package kvm

import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/atomicbitops"
pkgcontext "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
Expand All @@ -36,6 +37,9 @@ type platformContext struct {

// interrupt is the interrupt platformContext.
interrupt interrupt.Forwarder

// lastUsedCPU is the last CPU ID used by this platformContext.
lastUsedCPU atomicbitops.Int32
}

// tryCPUIDError indicates that CPUID emulation should occur.
Expand All @@ -45,7 +49,7 @@ type tryCPUIDError struct{}
func (tryCPUIDError) Error() string { return "cpuid emulation failed" }

// Switch runs the provided platformContext in the given address space.
func (c *platformContext) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac *arch.Context64, _ int32) (*linux.SignalInfo, hostarch.AccessType, error) {
func (c *platformContext) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac *arch.Context64, rseqCPU int32) (*linux.SignalInfo, hostarch.AccessType, error) {
as := mm.AddressSpace()
localAS := as.(*addressSpace)

Expand All @@ -58,6 +62,20 @@ restart:
c.machine.Put(cpu) // Already preempted.
return nil, hostarch.NoAccess, platform.ErrContextInterrupt
}
// If this CPU was last used to run a different context
// or if this context last ran on a different CPU, then we've
// been preempted.
last := cpu.lastCtx.Swap(c)
c.lastUsedCPU.Store(int32(cpu.id))
preempted := rseqCPU >= 0 && (last != c || rseqCPU != int32(cpu.id))
if preempted {
// Release resources.
c.machine.Put(cpu)

// All done.
c.interrupt.Disable()
return nil, hostarch.NoAccess, platform.ErrContextCPUPreempted
}

// Set the active address space.
//
Expand Down Expand Up @@ -136,3 +154,8 @@ func (c *platformContext) PullFullState(as platform.AddressSpace, ac *arch.Conte

// PrepareSleep implements platform.Context.platform.Context.
func (*platformContext) PrepareSleep() {}

// LastCPUNumber implements platform.Context.LastCPUNumber.
func (c *platformContext) LastCPUNumber() int32 {
return c.lastUsedCPU.Load()
}
38 changes: 36 additions & 2 deletions pkg/sentry/platform/kvm/kvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ type runData struct {

// KVM represents a lightweight VM context.
type KVM struct {
platform.NoCPUPreemptionDetection

// KVM never changes mm_structs.
platform.UseHostProcessMemoryBarrier

Expand Down Expand Up @@ -180,6 +178,41 @@ func (k *KVM) ConcurrencyCount() int {
return k.machine.maxVCPUs
}

// HasCPUNumbers implements platform.Platform.HasCPUNumbers.
func (k *KVM) HasCPUNumbers() bool {
return k.machine.useCPUNums
}

// NumCPUs implements platform.Platform.NumCPUs.
func (k *KVM) NumCPUs() int32 {
if !k.HasCPUNumbers() {
panic("platform is not configured to use CPU numbers")
}
return int32(k.machine.maxVCPUs)
}

// DetectsCPUPreemption implements platform.Platform.DetectsCPUPreemption.
func (k *KVM) DetectsCPUPreemption() bool {
return true
}

// PreemptAllCPUs implements platform.Platform.PreemptAllCPUs.
func (k *KVM) PreemptAllCPUs() error {
for _, c := range k.machine.vCPUsByID {
c.lastCtx.Store(nil)
c.BounceToHost()
}
return nil
}

// PreemptCPU implements platform.Platform.PreemptCPU.
func (k *KVM) PreemptCPU(cpu int32) error {
c := k.machine.vCPUsByID[cpu]
c.lastCtx.Store(nil)
c.BounceToHost()
return nil
}

// NewContext returns an interruptible context.
func (k *KVM) NewContext(pkgcontext.Context) platform.Context {
return &platformContext{
Expand All @@ -192,6 +225,7 @@ type constructor struct{}
func (*constructor) New(opts platform.Options) (platform.Platform, error) {
return New(opts.DeviceFile, Config{
ApplicationCores: opts.ApplicationCores,
UseCPUNums: opts.UseCPUNums,
})
}

Expand Down
7 changes: 7 additions & 0 deletions pkg/sentry/platform/kvm/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ type machine struct {

// usedSlots is the set of used physical addresses (not sorted).
usedSlots []uintptr

// useCPUNums indicates whether to enable the use vCPU numbers as CPU numbers.
useCPUNums bool
}

const (
Expand Down Expand Up @@ -216,6 +219,9 @@ type vCPU struct {

// dieState holds state related to vCPU death.
dieState dieState

// lastCtx is the last context that was scheduled on this vCPU
lastCtx atomic.Pointer[platformContext]
}

type dieState struct {
Expand Down Expand Up @@ -275,6 +281,7 @@ func newMachine(vm int, config *Config) (*machine, error) {
m := &machine{
fd: vm,
applicationCores: config.ApplicationCores,
useCPUNums: config.UseCPUNums,
}
m.available.L = &m.mu

Expand Down
51 changes: 51 additions & 0 deletions pkg/sentry/platform/platform.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,17 @@ type Platform interface {
// NewContext returns a new execution context.
NewContext(context.Context) Context

// PreemptAllCPUs causes all concurrent calls to Context.Switch() on the given CPU, as well
// as the first following call to Context.Switch() for each Context, to
// return ErrContextCPUPreempted.
//
// Precondition(s): cpu must be in the range [0, NumCPUs()).
//
// PreemptCPU is only supported if DetectsCPUPremption() && HasCPUNumbers() == true.
// Platforms for which this does not hold may panic if PreemptCPU is
// called.
PreemptCPU(cpu int32) error

// PreemptAllCPUs causes all concurrent calls to Context.Switch(), as well
// as the first following call to Context.Switch() for each Context, to
// return ErrContextCPUPreempted.
Expand All @@ -121,6 +132,12 @@ type Platform interface {
// in parallel. Concurrent calls to Context.Switch() beyond
// ConcurrencyCount() may block until previous calls have returned.
ConcurrencyCount() int

// HasCPUNumbers returns true if the platform assigns CPU numbers to contexts.
HasCPUNumbers() bool

// NumCPUs returns the number of CPUs on the platform.
NumCPUs() int32
}

// NoCPUPreemptionDetection implements Platform.DetectsCPUPreemption and
Expand All @@ -137,6 +154,25 @@ func (NoCPUPreemptionDetection) PreemptAllCPUs() error {
panic("This platform does not support CPU preemption detection")
}

// NoCPUNumbers implements Platform.HasCPUNumbers for platforms that do
// not support it.
type NoCPUNumbers struct{}

// HasCPUNumbers implements Platform.HasCPUNumbers.
func (NoCPUNumbers) HasCPUNumbers() bool {
return false
}

// NumCPUs implements Platform.NumCPUs.
func (NoCPUNumbers) NumCPUs() int32 {
panic("platform does not support CPU numbers")
}

// PreemptCPU implements Platform.PreemptCPU.
func (NoCPUNumbers) PreemptCPU(cpu int32) error {
panic("platform does not support preempting a specific CPU")
}

// UseHostGlobalMemoryBarrier implements Platform.HaveGlobalMemoryBarrier and
// Platform.GlobalMemoryBarrier by invoking equivalent functionality on the
// host.
Expand Down Expand Up @@ -264,6 +300,16 @@ type Context interface {
// PrepareSleep() is called when the thread switches to the
// interruptible sleep state.
PrepareSleep()

// LastCPUNumber returns the last CPU number that this context was running on.
// If the context never ran on a CPU, it may return any valid CPU number, as long as the first
// call to Switch will detect that the CPU number is incorrect and return ErrContextCPUPreempted.
LastCPUNumber() int32
}

// LastCPUNumber implements Context.LastCPUNumber.
func (NoCPUNumbers) LastCPUNumber() int32 {
panic("context does not support last CPU number")
}

// ContextError is one of the possible errors returned by Context.Switch().
Expand Down Expand Up @@ -538,6 +584,11 @@ type Options struct {
// ApplicationCores is used by KVM to determine the correct amount of
// vCPUs to create.
ApplicationCores int

// UseCPUNums is used by KVM to determine whether to use KVM CPU numbers
// as CPU numbers in the sentry. This is necessary to support features like
// rseq
UseCPUNums bool
}

// Constructor represents a platform type.
Expand Down
2 changes: 2 additions & 0 deletions pkg/sentry/platform/ptrace/ptrace.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ var (

type context struct {
archContext
platform.NoCPUNumbers

// signalInfo is the signal info, if and when a signal is received.
signalInfo linux.SignalInfo
Expand Down Expand Up @@ -214,6 +215,7 @@ type PTrace struct {
platform.MMapMinAddr
platform.NoCPUPreemptionDetection
platform.UseHostGlobalMemoryBarrier
platform.NoCPUNumbers
}

// New returns a new ptrace-based implementation of the platform interface.
Expand Down
3 changes: 3 additions & 0 deletions pkg/sentry/platform/systrap/systrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ var (

// platformContext is an implementation of the platform context.
type platformContext struct {
platform.NoCPUNumbers

// signalInfo is the signal info, if and when a signal is received.
signalInfo linux.SignalInfo

Expand Down Expand Up @@ -239,6 +241,7 @@ func (c *platformContext) PrepareSleep() {
type Systrap struct {
platform.NoCPUPreemptionDetection
platform.UseHostGlobalMemoryBarrier
platform.NoCPUNumbers

// memoryFile is used to create a stub sysmsg stack which is shared with
// the Sentry. Since memoryFile is platform-private, it is never restored,
Expand Down
14 changes: 12 additions & 2 deletions pkg/sentry/syscalls/linux/sys_membarrier.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
func Membarrier(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
cmd := args[0].Int()
flags := args[1].Uint()
cpu := args[2].Int()

switch cmd {
case linux.MEMBARRIER_CMD_QUERY:
Expand Down Expand Up @@ -83,8 +84,17 @@ func Membarrier(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uint
if !t.MemoryManager().IsMembarrierRSeqEnabled() {
return 0, nil, linuxerr.EPERM
}
// MEMBARRIER_CMD_FLAG_CPU and cpu_id are ignored since we don't have
// the ability to preempt specific CPUs.

if flags&linux.MEMBARRIER_CMD_FLAG_CPU != 0 && cpu >= 0 && t.Kernel().Platform.HasCPUNumbers() {
// Per membarrier(2), an out of range cpu# that is >= 0 is a no-op.
if cpu >= t.Kernel().Platform.NumCPUs() {
return 0, nil, nil
}
return 0, nil, t.Kernel().Platform.PreemptCPU(cpu)
}

// Preempt all CPUs if the platform does not support CPU numbers or cpu # is less than 0 -
// this is the same behavior as Linux.
return 0, nil, t.Kernel().Platform.PreemptAllCPUs()
case linux.MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ:
if flags != 0 {
Expand Down
Loading