Skip to content

Commit e04c9e2

Browse files
committed
Merge release-20250820.0-30-g49a6cc35f (automated)
2 parents 0a608df + 49a6cc3 commit e04c9e2

File tree

13 files changed

+572
-112
lines changed

13 files changed

+572
-112
lines changed

pkg/ring0/kernel_amd64.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ func (c *CPU) CR4() uint64 {
205205
if hasUMIP {
206206
cr4 |= _CR4_UMIP
207207
}
208+
if hasLA57 {
209+
cr4 |= _CR4_LA57
210+
}
208211
return cr4
209212
}
210213

pkg/ring0/lib_amd64.go

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ var (
8888
hasXSAVEOPT bool
8989
hasXSAVE bool
9090
hasFSGSBASE bool
91+
hasLA57 bool
9192
validXCR0Mask uintptr
9293
localXCR0 uintptr
9394
)
@@ -100,19 +101,12 @@ var (
100101
func Init(fs cpuid.FeatureSet) {
101102
// Initialize all sizes.
102103
VirtualAddressBits = uintptr(fs.VirtualAddressBits())
103-
// TODO(gvisor.dev/issue/7349): introduce support for 5-level paging.
104-
// Four-level page tables allows to address up to 48-bit virtual
105-
// addresses.
106-
if VirtualAddressBits > 48 {
107-
VirtualAddressBits = 48
108-
}
109104
if PhysicalAddressBits == 0 {
110105
PhysicalAddressBits = uintptr(fs.PhysicalAddressBits())
111106
}
112107
UserspaceSize = uintptr(1) << (VirtualAddressBits - 1)
113108
MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(hostarch.PageSize-1)
114109
KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1)
115-
116110
// Initialize all functions.
117111
hasSMEP = fs.HasFeature(cpuid.X86FeatureSMEP)
118112
hasSMAP = fs.HasFeature(cpuid.X86FeatureSMAP)
@@ -121,6 +115,7 @@ func Init(fs cpuid.FeatureSet) {
121115
hasXSAVEOPT = fs.UseXsaveopt()
122116
hasXSAVE = fs.UseXsave()
123117
hasFSGSBASE = fs.HasFeature(cpuid.X86FeatureFSGSBase)
118+
hasLA57 = fs.HasFeature(cpuid.X86FeatureLA57)
124119
validXCR0Mask = uintptr(fs.ValidXCR0Mask())
125120
if hasXSAVE {
126121
XCR0DisabledMask := uintptr((1 << 9) | (1 << 17) | (1 << 18))

pkg/ring0/pagetables/pagetables.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ type PageTables struct {
5656
// readOnlyShared indicates the Pagetables are read-only and
5757
// own the ranges that are shared with other Pagetables.
5858
readOnlyShared bool
59+
60+
// largeAddressesEnabled indicates the Pagetables support addresess
61+
// larger than 48 bits.
62+
largeAddressesEnabled bool
5963
}
6064

6165
// Init initializes a set of PageTables.

pkg/ring0/pagetables/pagetables_amd64.go

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,34 @@
1414

1515
package pagetables
1616

17+
import (
18+
"gvisor.dev/gvisor/pkg/cpuid"
19+
)
20+
1721
// Address constraints.
18-
//
19-
// The lowerTop and upperBottom currently apply to four-level pagetables;
20-
// additional refactoring would be necessary to support five-level pagetables.
21-
const (
22-
lowerTop = 0x00007fffffffffff
23-
upperBottom = 0xffff800000000000
22+
var (
23+
lowerTop uintptr = 0x00007fffffffffff
24+
upperBottom uintptr = 0xffff800000000000
25+
pgdShift = 39
26+
pgdMask uintptr = 0x1ff << pgdShift
27+
pgdSize uintptr = 1 << pgdShift
28+
)
2429

30+
const (
2531
pteShift = 12
2632
pmdShift = 21
2733
pudShift = 30
28-
pgdShift = 39
34+
p4dShift = 39
2935

3036
pteMask = 0x1ff << pteShift
3137
pmdMask = 0x1ff << pmdShift
3238
pudMask = 0x1ff << pudShift
33-
pgdMask = 0x1ff << pgdShift
39+
p4dMask = 0x1ff << p4dShift
3440

3541
pteSize = 1 << pteShift
3642
pmdSize = 1 << pmdShift
3743
pudSize = 1 << pudShift
38-
pgdSize = 1 << pgdShift
44+
p4dSize = 1 << p4dShift
3945

4046
executeDisable = 1 << 63
4147
entriesPerPage = 512
@@ -47,6 +53,16 @@ const (
4753
//
4854
//go:nosplit
4955
func (p *PageTables) InitArch(allocator Allocator) {
56+
featureSet := cpuid.HostFeatureSet()
57+
if featureSet.HasFeature(cpuid.X86FeatureLA57) {
58+
p.largeAddressesEnabled = true
59+
lowerTop = 0x00FFFFFFFFFFFFFF
60+
upperBottom = 0xFF00000000000000
61+
pgdShift = 48
62+
pgdMask = 0x1ff << pgdShift
63+
pgdSize = 1 << pgdShift
64+
}
65+
5066
if p.upperSharedPageTables != nil {
5167
p.cloneUpperShared()
5268
}
@@ -58,10 +74,10 @@ func pgdIndex(upperStart uintptr) uintptr {
5874
panic("upperStart should be pgd size aligned")
5975
}
6076
if upperStart >= upperBottom {
61-
return entriesPerPage/2 + (upperStart-upperBottom)/pgdSize
77+
return entriesPerPage/2 + (upperStart-upperBottom)>>pgdShift
6278
}
6379
if upperStart < lowerTop {
64-
return upperStart / pgdSize
80+
return upperStart >> pgdShift
6581
}
6682
panic("upperStart should be in canonical range")
6783
}

pkg/ring0/pagetables/walker_amd64.go

Lines changed: 74 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -262,30 +262,85 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) bool {
262262
nextBoundary := addrEnd(start, end, pgdSize)
263263
pgdIndex := uint16((start & pgdMask) >> pgdShift)
264264
pgdEntry := &w.pageTables.root[pgdIndex]
265-
if !pgdEntry.Valid() {
266-
if !w.visitor.requiresAlloc() {
267-
// Skip over this entry.
268-
start = nextBoundary
269-
continue
265+
if !w.pageTables.largeAddressesEnabled {
266+
if !pgdEntry.Valid() {
267+
if !w.visitor.requiresAlloc() {
268+
// Skip over this entry.
269+
start = nextBoundary
270+
continue
271+
}
272+
273+
// Allocate a new pgd.
274+
pudEntries = w.pageTables.Allocator.NewPTEs() // escapes: depends on allocator.
275+
pgdEntry.setPageTable(w.pageTables, pudEntries)
276+
} else {
277+
pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) // escapes: see above.
278+
}
279+
// Map the next level.
280+
ok, clearPUDEntries := w.walkPUDs(pudEntries, start, nextBoundary)
281+
if !ok {
282+
return false
270283
}
271284

272-
// Allocate a new pgd.
273-
pudEntries = w.pageTables.Allocator.NewPTEs() // escapes: depends on allocator.
274-
pgdEntry.setPageTable(w.pageTables, pudEntries)
285+
// Check if we no longer need this page table.
286+
if clearPUDEntries == entriesPerPage {
287+
pgdEntry.Clear()
288+
w.pageTables.Allocator.FreePTEs(pudEntries) // escapes: see above.
289+
}
275290
} else {
276-
pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) // escapes: see above.
277-
}
291+
var p4dEntries *PTEs
292+
if !pgdEntry.Valid() {
293+
if !w.visitor.requiresAlloc() {
294+
// Skip over this entry.
295+
start = nextBoundary
296+
continue
297+
}
278298

279-
// Map the next level.
280-
ok, clearPUDEntries := w.walkPUDs(pudEntries, start, nextBoundary)
281-
if !ok {
282-
return false
283-
}
299+
// Allocate a new pgd.
300+
p4dEntries = w.pageTables.Allocator.NewPTEs() // escapes: depends on allocator.
301+
pgdEntry.setPageTable(w.pageTables, p4dEntries)
302+
} else {
303+
p4dEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) // escapes: see above.
304+
}
305+
var clearP4DEntries uint16 = 0
306+
p4dStart := start
307+
p4dEnd := nextBoundary
308+
for p4dStart < p4dEnd {
309+
nextP4DBoundary := addrEnd(p4dStart, p4dEnd, p4dSize)
310+
p4dIndex := uint16((p4dStart & p4dMask) >> p4dShift)
311+
p4dEntry := &p4dEntries[p4dIndex]
312+
if !p4dEntry.Valid() {
313+
if !w.visitor.requiresAlloc() {
314+
// Skip over this entry.
315+
clearP4DEntries++
316+
p4dStart = nextP4DBoundary
317+
continue
318+
}
319+
// Allocate a new pud.
320+
pudEntries = w.pageTables.Allocator.NewPTEs() // escapes: depends on allocator.
321+
p4dEntry.setPageTable(w.pageTables, pudEntries)
322+
} else {
323+
pudEntries = w.pageTables.Allocator.LookupPTEs(p4dEntry.Address()) // escapes: see above.
324+
}
325+
326+
ok, clearPUDEntries := w.walkPUDs(pudEntries, p4dStart, nextP4DBoundary)
327+
if !ok {
328+
return false
329+
}
330+
if clearPUDEntries == entriesPerPage {
331+
p4dEntry.Clear()
332+
w.pageTables.Allocator.FreePTEs(pudEntries) // escapes: see above.
333+
clearP4DEntries++
334+
}
284335

285-
// Check if we no longer need this page table.
286-
if clearPUDEntries == entriesPerPage {
287-
pgdEntry.Clear()
288-
w.pageTables.Allocator.FreePTEs(pudEntries) // escapes: see above.
336+
p4dStart = nextP4DBoundary
337+
}
338+
339+
// Check if we no longer need this page table.
340+
if clearP4DEntries == entriesPerPage {
341+
pgdEntry.Clear()
342+
w.pageTables.Allocator.FreePTEs(p4dEntries) // escapes: see above.
343+
}
289344
}
290345

291346
// Advance to the next PGD entry's range for the next loop.

pkg/ring0/pagetables/walker_empty_amd64.go

Lines changed: 69 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -229,27 +229,81 @@ func (w *emptyWalker) iterateRangeCanonical(start, end uintptr) bool {
229229
nextBoundary := emptyaddrEnd(start, end, pgdSize)
230230
pgdIndex := uint16((start & pgdMask) >> pgdShift)
231231
pgdEntry := &w.pageTables.root[pgdIndex]
232-
if !pgdEntry.Valid() {
233-
if !w.visitor.requiresAlloc() {
232+
if !w.pageTables.largeAddressesEnabled {
233+
if !pgdEntry.Valid() {
234+
if !w.visitor.requiresAlloc() {
234235

235-
start = nextBoundary
236-
continue
236+
start = nextBoundary
237+
continue
238+
}
239+
240+
pudEntries = w.pageTables.Allocator.NewPTEs()
241+
pgdEntry.setPageTable(w.pageTables, pudEntries)
242+
} else {
243+
pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
244+
}
245+
246+
ok, clearPUDEntries := w.walkPUDs(pudEntries, start, nextBoundary)
247+
if !ok {
248+
return false
237249
}
238250

239-
pudEntries = w.pageTables.Allocator.NewPTEs()
240-
pgdEntry.setPageTable(w.pageTables, pudEntries)
251+
if clearPUDEntries == entriesPerPage {
252+
pgdEntry.Clear()
253+
w.pageTables.Allocator.FreePTEs(pudEntries)
254+
}
241255
} else {
242-
pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
243-
}
256+
var p4dEntries *PTEs
257+
if !pgdEntry.Valid() {
258+
if !w.visitor.requiresAlloc() {
244259

245-
ok, clearPUDEntries := w.walkPUDs(pudEntries, start, nextBoundary)
246-
if !ok {
247-
return false
248-
}
260+
start = nextBoundary
261+
continue
262+
}
263+
264+
p4dEntries = w.pageTables.Allocator.NewPTEs()
265+
pgdEntry.setPageTable(w.pageTables, p4dEntries)
266+
} else {
267+
p4dEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
268+
}
269+
var clearP4DEntries uint16 = 0
270+
p4dStart := start
271+
p4dEnd := nextBoundary
272+
for p4dStart < p4dEnd {
273+
nextP4DBoundary := emptyaddrEnd(p4dStart, p4dEnd, p4dSize)
274+
p4dIndex := uint16((p4dStart & p4dMask) >> p4dShift)
275+
p4dEntry := &p4dEntries[p4dIndex]
276+
if !p4dEntry.Valid() {
277+
if !w.visitor.requiresAlloc() {
278+
279+
clearP4DEntries++
280+
p4dStart = nextP4DBoundary
281+
continue
282+
}
283+
284+
pudEntries = w.pageTables.Allocator.NewPTEs()
285+
p4dEntry.setPageTable(w.pageTables, pudEntries)
286+
} else {
287+
pudEntries = w.pageTables.Allocator.LookupPTEs(p4dEntry.Address())
288+
}
249289

250-
if clearPUDEntries == entriesPerPage {
251-
pgdEntry.Clear()
252-
w.pageTables.Allocator.FreePTEs(pudEntries)
290+
ok, clearPUDEntries := w.walkPUDs(pudEntries, p4dStart, nextP4DBoundary)
291+
if !ok {
292+
return false
293+
}
294+
if clearPUDEntries == entriesPerPage {
295+
p4dEntry.Clear()
296+
w.pageTables.Allocator.FreePTEs(pudEntries)
297+
clearP4DEntries++
298+
}
299+
300+
p4dStart = nextP4DBoundary
301+
}
302+
303+
if clearP4DEntries == entriesPerPage {
304+
pgdEntry.Clear()
305+
w.pageTables.Allocator.FreePTEs(p4dEntries)
306+
}
253307
}
254308

255309
start = nextBoundary

0 commit comments

Comments
 (0)