Skip to content

Commit a33f8e5

Browse files
tranji-cloudgvisor-bot
authored andcommitted
kvm: RSEQ: Add RSEQ support
This change adds RSEQ support for platform/kvm. To support RSEQ, the KVM platform must provide two key capabilities: (1) A stable, unique CPU ID. (2) A way to detect when a thread as been preempted. This implementation provides the necessary support as follows: CPU ID: - platform/kvm now advertises the KVM vCPU ID as the cpu_id Preemption Detection: - Compares the last context the CPU ran against the current context being scheduled. - Compares the context's rseqCPU and CPU ID retrieved by the platform To facilitate this, several new methods are introduced to the platform interface and implemented by platform/kvm: - HasCpuNumbers() - NumCPUs() - DetectsCPUPreemption() - PreemptCpu() - PreemptAllCpus() A new user-facing KVM specific option, use_cpu_numbers, is introduced to effectively feature guard RSEQ. When use_cpu_numbers is true, vCPU IDs are exposed to the guest, enabling RSEQ support. Setting it to false reverts to the previous behavior and effectively disables RSEQ for applications that rely on it. PiperOrigin-RevId: 823228013
1 parent c976ee8 commit a33f8e5

File tree

19 files changed

+210
-13
lines changed

19 files changed

+210
-13
lines changed

pkg/sentry/kernel/kernel.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,11 @@ func (k *Kernel) Init(args InitKernelArgs) error {
490490
k.cpuClockTickerWakeCh = make(chan struct{}, 1)
491491
k.cpuClockTickerStopCond.L = &k.runningTasksMu
492492
k.applicationCores = args.ApplicationCores
493+
if args.UseHostCores && k.HasCPUNumbers() {
494+
args.UseHostCores = false
495+
log.Infof("UseHostCores enabled but the platform implements HasCPUNumbers(): setting UseHostCores to false")
496+
}
497+
493498
if args.UseHostCores {
494499
k.useHostCores = true
495500
maxCPU, err := hostcpu.MaxPossibleCPU()
@@ -502,6 +507,15 @@ func (k *Kernel) Init(args InitKernelArgs) error {
502507
k.applicationCores = minAppCores
503508
}
504509
}
510+
511+
if k.HasCPUNumbers() {
512+
if k.applicationCores < uint(k.NumCPUs()) {
513+
log.Infof("ApplicationCores is less than NumCPUs: %d < %d", k.applicationCores, k.NumCPUs())
514+
log.Infof("Setting applicationCores to NumCPUs: %d", k.NumCPUs())
515+
k.applicationCores = uint(k.NumCPUs())
516+
}
517+
}
518+
505519
k.extraAuxv = args.ExtraAuxv
506520
k.vdso = args.Vdso
507521
k.vdsoParams = args.VdsoParams

pkg/sentry/kernel/rseq.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"gvisor.dev/gvisor/pkg/abi/linux"
2121
"gvisor.dev/gvisor/pkg/errors/linuxerr"
2222
"gvisor.dev/gvisor/pkg/hostarch"
23-
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
2423
"gvisor.dev/gvisor/pkg/usermem"
2524
)
2625

@@ -50,7 +49,7 @@ type OldRSeqCriticalRegion struct {
5049

5150
// RSeqAvailable returns true if t supports (old and new) restartable sequences.
5251
func (t *Task) RSeqAvailable() bool {
53-
return t.k.useHostCores && t.k.Platform.DetectsCPUPreemption()
52+
return (t.k.useHostCores || t.k.Platform.HasCPUNumbers()) && t.k.Platform.DetectsCPUPreemption()
5453
}
5554

5655
// SetRSeq registers addr as this thread's rseq structure.
@@ -201,7 +200,7 @@ func (t *Task) rseqUpdateCPU() error {
201200
return nil
202201
}
203202

204-
t.rseqCPU = int32(hostcpu.GetCPU())
203+
t.rseqCPU = t.CPU()
205204

206205
// Update both CPUs, even if one fails.
207206
rerr := t.rseqCopyOutCPU()

pkg/sentry/kernel/task_run.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"gvisor.dev/gvisor/pkg/goid"
2525
"gvisor.dev/gvisor/pkg/hostarch"
2626
"gvisor.dev/gvisor/pkg/refs"
27-
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
2827
"gvisor.dev/gvisor/pkg/sentry/ktime"
2928
"gvisor.dev/gvisor/pkg/sentry/memmap"
3029
"gvisor.dev/gvisor/pkg/sentry/platform"
@@ -207,7 +206,7 @@ func (app *runApp) execute(t *Task) taskRunState {
207206
if t.rseqPreempted {
208207
t.rseqPreempted = false
209208
if t.rseqAddr != 0 || t.oldRSeqCPUAddr != 0 {
210-
t.rseqCPU = int32(hostcpu.GetCPU())
209+
t.rseqCPU = t.CPU()
211210
if err := t.rseqCopyOutCPU(); err != nil {
212211
t.Debugf("Failed to copy CPU to %#x for rseq: %v", t.rseqAddr, err)
213212
t.forceSignal(linux.SIGSEGV, false)

pkg/sentry/kernel/task_sched.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ func (t *Task) SetCPUMask(mask sched.CPUSet) error {
365365
return linuxerr.EINVAL
366366
}
367367

368-
if t.k.useHostCores {
368+
if t.k.useHostCores || t.k.Platform.HasCPUNumbers() {
369369
// No-op; pretend the mask was immediately changed back.
370370
return nil
371371
}
@@ -383,6 +383,10 @@ func (t *Task) SetCPUMask(mask sched.CPUSet) error {
383383

384384
// CPU returns the cpu id for a given task.
385385
func (t *Task) CPU() int32 {
386+
if t.k.Platform.HasCPUNumbers() {
387+
return t.p.LastCPUNumber()
388+
}
389+
386390
if t.k.useHostCores {
387391
return int32(hostcpu.GetCPU())
388392
}

pkg/sentry/platform/kvm/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ type Config struct {
2323
// kernel.InitKernelArgs. It is necessary to forward it to KVM in order
2424
// to initialize the correct amount of vCPUs.
2525
ApplicationCores int
26+
27+
// UseCPUNums use KVM CPU numbers as CPU numbers in the sentry.
28+
// This is necessary to support features like rseq and KVM based
29+
// preemption.
30+
UseCPUNums bool
2631
}
2732

2833
func (*machine) applyConfig(config *Config) error { return nil }

pkg/sentry/platform/kvm/context.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package kvm
1616

1717
import (
1818
"gvisor.dev/gvisor/pkg/abi/linux"
19+
"gvisor.dev/gvisor/pkg/atomicbitops"
1920
pkgcontext "gvisor.dev/gvisor/pkg/context"
2021
"gvisor.dev/gvisor/pkg/hostarch"
2122
"gvisor.dev/gvisor/pkg/ring0"
@@ -36,6 +37,9 @@ type platformContext struct {
3637

3738
// interrupt is the interrupt platformContext.
3839
interrupt interrupt.Forwarder
40+
41+
// lastUsedCPU is the last CPU ID used by this platformContext.
42+
lastUsedCPU atomicbitops.Int32
3943
}
4044

4145
// tryCPUIDError indicates that CPUID emulation should occur.
@@ -45,7 +49,7 @@ type tryCPUIDError struct{}
4549
func (tryCPUIDError) Error() string { return "cpuid emulation failed" }
4650

4751
// Switch runs the provided platformContext in the given address space.
48-
func (c *platformContext) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac *arch.Context64, _ int32) (*linux.SignalInfo, hostarch.AccessType, error) {
52+
func (c *platformContext) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac *arch.Context64, rseqCPU int32) (*linux.SignalInfo, hostarch.AccessType, error) {
4953
as := mm.AddressSpace()
5054
localAS := as.(*addressSpace)
5155

@@ -58,6 +62,20 @@ restart:
5862
c.machine.Put(cpu) // Already preempted.
5963
return nil, hostarch.NoAccess, platform.ErrContextInterrupt
6064
}
65+
// If this CPU was last used to run a different context
66+
// or if this context last ran on a different CPU, then we've
67+
// been preempted.
68+
last := cpu.lastCtx.Swap(c)
69+
c.lastUsedCPU.Store(int32(cpu.id))
70+
preempted := rseqCPU >= 0 && (last != c || rseqCPU != int32(cpu.id))
71+
if preempted {
72+
// Release resources.
73+
c.machine.Put(cpu)
74+
75+
// All done.
76+
c.interrupt.Disable()
77+
return nil, hostarch.NoAccess, platform.ErrContextCPUPreempted
78+
}
6179

6280
// Set the active address space.
6381
//
@@ -136,3 +154,8 @@ func (c *platformContext) PullFullState(as platform.AddressSpace, ac *arch.Conte
136154

137155
// PrepareSleep implements platform.Context.platform.Context.
138156
func (*platformContext) PrepareSleep() {}
157+
158+
// LastCPUNumber implements platform.Context.LastCPUNumber.
159+
func (c *platformContext) LastCPUNumber() int32 {
160+
return c.lastUsedCPU.Load()
161+
}

pkg/sentry/platform/kvm/kvm.go

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ type runData struct {
6262

6363
// KVM represents a lightweight VM context.
6464
type KVM struct {
65-
platform.NoCPUPreemptionDetection
66-
6765
// KVM never changes mm_structs.
6866
platform.UseHostProcessMemoryBarrier
6967

@@ -180,6 +178,41 @@ func (k *KVM) ConcurrencyCount() int {
180178
return k.machine.maxVCPUs
181179
}
182180

181+
// HasCPUNumbers implements platform.Platform.HasCPUNumbers.
182+
func (k *KVM) HasCPUNumbers() bool {
183+
return k.machine.useCPUNums
184+
}
185+
186+
// NumCPUs implements platform.Platform.NumCPUs.
187+
func (k *KVM) NumCPUs() int32 {
188+
if !k.HasCPUNumbers() {
189+
panic("platform is not configured to use CPU numbers")
190+
}
191+
return int32(k.machine.maxVCPUs)
192+
}
193+
194+
// DetectsCPUPreemption implements platform.Platform.DetectsCPUPreemption.
195+
func (k *KVM) DetectsCPUPreemption() bool {
196+
return true
197+
}
198+
199+
// PreemptAllCPUs implements platform.Platform.PreemptAllCPUs.
200+
func (k *KVM) PreemptAllCPUs() error {
201+
for _, c := range k.machine.vCPUsByID {
202+
c.lastCtx.Store(nil)
203+
c.BounceToHost()
204+
}
205+
return nil
206+
}
207+
208+
// PreemptCPU implements platform.Platform.PreemptCPU.
209+
func (k *KVM) PreemptCPU(cpu int32) error {
210+
c := k.machine.vCPUsByID[cpu]
211+
c.lastCtx.Store(nil)
212+
c.BounceToHost()
213+
return nil
214+
}
215+
183216
// NewContext returns an interruptible context.
184217
func (k *KVM) NewContext(pkgcontext.Context) platform.Context {
185218
return &platformContext{
@@ -192,6 +225,7 @@ type constructor struct{}
192225
func (*constructor) New(opts platform.Options) (platform.Platform, error) {
193226
return New(opts.DeviceFile, Config{
194227
ApplicationCores: opts.ApplicationCores,
228+
UseCPUNums: opts.UseCPUNums,
195229
})
196230
}
197231

pkg/sentry/platform/kvm/machine.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ type machine struct {
8989

9090
// usedSlots is the set of used physical addresses (not sorted).
9191
usedSlots []uintptr
92+
93+
// useCPUNums indicates whether to enable the use vCPU numbers as CPU numbers.
94+
useCPUNums bool
9295
}
9396

9497
const (
@@ -216,6 +219,9 @@ type vCPU struct {
216219

217220
// dieState holds state related to vCPU death.
218221
dieState dieState
222+
223+
// lastCtx is the last context that was scheduled on this vCPU
224+
lastCtx atomic.Pointer[platformContext]
219225
}
220226

221227
type dieState struct {
@@ -275,6 +281,7 @@ func newMachine(vm int, config *Config) (*machine, error) {
275281
m := &machine{
276282
fd: vm,
277283
applicationCores: config.ApplicationCores,
284+
useCPUNums: config.UseCPUNums,
278285
}
279286
m.available.L = &m.mu
280287

pkg/sentry/platform/platform.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,17 @@ type Platform interface {
9696
// NewContext returns a new execution context.
9797
NewContext(context.Context) Context
9898

99+
// PreemptAllCPUs causes all concurrent calls to Context.Switch() on the given CPU, as well
100+
// as the first following call to Context.Switch() for each Context, to
101+
// return ErrContextCPUPreempted.
102+
//
103+
// Precondition(s): cpu must be in the range [0, NumCPUs()).
104+
//
105+
// PreemptCPU is only supported if DetectsCPUPremption() && HasCPUNumbers() == true.
106+
// Platforms for which this does not hold may panic if PreemptCPU is
107+
// called.
108+
PreemptCPU(cpu int32) error
109+
99110
// PreemptAllCPUs causes all concurrent calls to Context.Switch(), as well
100111
// as the first following call to Context.Switch() for each Context, to
101112
// return ErrContextCPUPreempted.
@@ -121,6 +132,12 @@ type Platform interface {
121132
// in parallel. Concurrent calls to Context.Switch() beyond
122133
// ConcurrencyCount() may block until previous calls have returned.
123134
ConcurrencyCount() int
135+
136+
// HasCPUNumbers returns true if the platform assigns CPU numbers to contexts.
137+
HasCPUNumbers() bool
138+
139+
// NumCPUs returns the number of CPUs on the platform.
140+
NumCPUs() int32
124141
}
125142

126143
// NoCPUPreemptionDetection implements Platform.DetectsCPUPreemption and
@@ -137,6 +154,25 @@ func (NoCPUPreemptionDetection) PreemptAllCPUs() error {
137154
panic("This platform does not support CPU preemption detection")
138155
}
139156

157+
// NoCPUNumbers implements Platform.HasCPUNumbers for platforms that do
158+
// not support it.
159+
type NoCPUNumbers struct{}
160+
161+
// HasCPUNumbers implements Platform.HasCPUNumbers.
162+
func (NoCPUNumbers) HasCPUNumbers() bool {
163+
return false
164+
}
165+
166+
// NumCPUs implements Platform.NumCPUs.
167+
func (NoCPUNumbers) NumCPUs() int32 {
168+
panic("platform does not support CPU numbers")
169+
}
170+
171+
// PreemptCPU implements Platform.PreemptCPU.
172+
func (NoCPUNumbers) PreemptCPU(cpu int32) error {
173+
panic("platform does not support preempting a specific CPU")
174+
}
175+
140176
// UseHostGlobalMemoryBarrier implements Platform.HaveGlobalMemoryBarrier and
141177
// Platform.GlobalMemoryBarrier by invoking equivalent functionality on the
142178
// host.
@@ -264,6 +300,16 @@ type Context interface {
264300
// PrepareSleep() is called when the thread switches to the
265301
// interruptible sleep state.
266302
PrepareSleep()
303+
304+
// LastCPUNumber returns the last CPU number that this context was running on.
305+
// If the context never ran on a CPU, it may return any valid CPU number, as long as the first
306+
// call to Switch will detect that the CPU number is incorrect and return ErrContextCPUPreempted.
307+
LastCPUNumber() int32
308+
}
309+
310+
// LastCPUNumber implements Context.LastCPUNumber.
311+
func (NoCPUNumbers) LastCPUNumber() int32 {
312+
panic("context does not support last CPU number")
267313
}
268314

269315
// ContextError is one of the possible errors returned by Context.Switch().
@@ -538,6 +584,11 @@ type Options struct {
538584
// ApplicationCores is used by KVM to determine the correct amount of
539585
// vCPUs to create.
540586
ApplicationCores int
587+
588+
// UseCPUNums is used by KVM to determine whether to use KVM CPU numbers
589+
// as CPU numbers in the sentry. This is necessary to support features like
590+
// rseq
591+
UseCPUNums bool
541592
}
542593

543594
// Constructor represents a platform type.

pkg/sentry/platform/ptrace/ptrace.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ var (
7474

7575
type context struct {
7676
archContext
77+
platform.NoCPUNumbers
7778

7879
// signalInfo is the signal info, if and when a signal is received.
7980
signalInfo linux.SignalInfo
@@ -214,6 +215,7 @@ type PTrace struct {
214215
platform.MMapMinAddr
215216
platform.NoCPUPreemptionDetection
216217
platform.UseHostGlobalMemoryBarrier
218+
platform.NoCPUNumbers
217219
}
218220

219221
// New returns a new ptrace-based implementation of the platform interface.

0 commit comments

Comments
 (0)