Skip to content

Commit 6a55e27

Browse files
authored
Updates from master (#801)
* Decode option data
2 parents 42b4b08 + 66aec6d commit 6a55e27

File tree

8 files changed

+24953
-2
lines changed

8 files changed

+24953
-2
lines changed

kmir/src/kmir/decoding.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,19 @@
1313
BoolT,
1414
EnumT,
1515
Initialized,
16+
IntegerLength,
1617
IntT,
1718
IntTy,
1819
Multiple,
20+
Pointer,
1921
PrimitiveInt,
2022
PtrT,
2123
RefT,
2224
Single,
2325
StrT,
2426
StructT,
2527
UintT,
28+
WrappingRange,
2629
)
2730
from .value import (
2831
NO_SIZE,
@@ -43,7 +46,7 @@
4346

4447
from pyk.kast import KInner
4548

46-
from .ty import FieldsShape, IntegerLength, LayoutShape, MachineSize, Scalar, TagEncoding, Ty, TypeMetadata, UintTy
49+
from .ty import FieldsShape, LayoutShape, MachineSize, Scalar, TagEncoding, Ty, TypeMetadata, UintTy
4750
from .value import MetadataSize
4851

4952

@@ -419,5 +422,10 @@ def _extract_tag(*, data: bytes, tag_offset: MachineSize, tag: Scalar) -> tuple[
419422
tag_data = data[tag_offset.in_bytes : tag_offset.in_bytes + length.value]
420423
tag_value = int.from_bytes(tag_data, byteorder='little', signed=False)
421424
return tag_value, length
425+
# special case: niche-encoded optional pointer, None == 0x00000000
426+
case Initialized(value=Pointer(), valid_range=WrappingRange(start=1, end=0)) if (
427+
data == b'\x00\x00\x00\x00\x00\x00\x00\x00'
428+
):
429+
return 0, IntegerLength.I64
422430
case _:
423431
raise ValueError(f'Unsupported tag: {tag}')

kmir/src/kmir/kdist/mir-semantics/rt/decoding.md

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,10 @@ Known element sizes for common types:
206206

207207
### Enum decoding
208208

209-
Enum decoding is for now restricted to enums wihout any fields.
209+
Enum decoding is for now restricted to a few special cases.
210+
211+
#### Enums without any fields
212+
If there are no fields, the enum can be decoded by using their data as the discriminant.
210213

211214
```k
212215
rule #decodeValue(
@@ -289,6 +292,112 @@ Enum decoding is for now restricted to enums wihout any fields.
289292
requires TAG =/=Int DISCRIMINANT
290293
```
291294

295+
#### Enums with two variants
296+
297+
Having two variants with possibly zero or one field each is a very common case,
298+
it includes a number of standard library types such as `Option` and `Result`.
299+
300+
The following rules are specialised to the case of encoding an `Option`.
301+
An important distinction here is whether or not the tag is niche-encoded.
302+
If the option holds data that has all-zero as a possible value, a separate tag is used, usually as the first field.
303+
In both cases we expect the tag to be in the single shared field, and the discriminant to be just 0 and 1.
304+
305+
```k
306+
rule #decodeValue(
307+
BYTES
308+
, typeInfoEnumType(...
309+
name: _
310+
, adtDef: _
311+
, discriminants: discriminant(0) discriminant(1) .Discriminants
312+
, fields: (.Tys : (FIELD_TYPE .Tys) : .Tyss)
313+
, layout:
314+
someLayoutShape(layoutShape(...
315+
fields: fieldsShapeArbitrary(mk(... offsets: machineSize(0) .MachineSizes))
316+
, variants:
317+
variantsShapeMultiple(
318+
mk(...
319+
tag: scalarInitialized(
320+
mk(...
321+
value: primitiveInt(mk(... length: TAG_WIDTH, signed: _))
322+
, validRange: _RANGE
323+
)
324+
)
325+
, tagEncoding: tagEncodingDirect
326+
, tagField: 0
327+
, variants: _VARIANTS
328+
)
329+
)
330+
, abi: _ABI
331+
, abiAlign: _ABI_ALIGN
332+
, size: _SIZE
333+
))
334+
) #as ENUM_TYPE
335+
)
336+
=> #decodeOptionTag01(BYTES, TAG_WIDTH, FIELD_TYPE, ENUM_TYPE)
337+
338+
syntax Evaluation ::= #decodeOptionTag01 ( Bytes , IntegerLength , Ty , TypeInfo ) [function, total]
339+
// --------------------------------------------------------------------------------------
340+
rule #decodeOptionTag01(BYTES, _LEN, _TY, _ENUM_TYPE)
341+
=> Aggregate(variantIdx(0), .List)
342+
requires 0 ==Int BYTES[0] // expect only 0 or 1 as tags, so higher bytes do not matter
343+
[preserves-definedness]
344+
rule #decodeOptionTag01(BYTES, LEN, TY, _ENUM_TYPE)
345+
=> Aggregate(variantIdx(1), ListItem(#decodeValue(substrBytes(BYTES, #byteLength(LEN), lengthBytes(BYTES)), lookupTy(TY))))
346+
requires 1 ==Int BYTES[0] // expect only 0 or 1 as tags, so higher bytes do not matter
347+
[preserves-definedness]
348+
rule #decodeOptionTag01(BYTES, _LEN, _TY, ENUM_TYPE)
349+
=> UnableToDecode(BYTES, ENUM_TYPE)
350+
[owise]
351+
352+
syntax Int ::= #byteLength ( IntegerLength ) [function, total]
353+
// -----------------------------------------------------------
354+
rule #byteLength(integerLengthI8 ) => 1
355+
rule #byteLength(integerLengthI16 ) => 2
356+
rule #byteLength(integerLengthI32 ) => 4
357+
rule #byteLength(integerLengthI64 ) => 8
358+
rule #byteLength(integerLengthI128) => 16
359+
360+
```
361+
362+
If the option holds a reference or pointer, no extra tag field is required.
363+
The tag is niche-encoded with an all-zero value representing `None`.
364+
However, in this case only a `None` can actually be decoded.
365+
Any pointer or reference would have a very different encoding in KMIR, not a non-zero address.
366+
367+
```k
368+
rule #decodeValue(
369+
BYTES
370+
, typeInfoEnumType(...
371+
name: _
372+
, adtDef: _
373+
, discriminants: discriminant(0) discriminant(1) .Discriminants
374+
, fields: (.Tys : (_FIELD_TYPE .Tys) : .Tyss)
375+
, layout:
376+
someLayoutShape(layoutShape(...
377+
fields: fieldsShapeArbitrary(mk(... offsets: machineSize(0) .MachineSizes))
378+
, variants:
379+
variantsShapeMultiple(
380+
mk(...
381+
tag: scalarInitialized(
382+
mk(...
383+
value: primitivePointer(_ADDR_SPACE)
384+
, validRange: wrappingRange(1, 0)
385+
)
386+
)
387+
, tagEncoding: tagEncodingNiche() // FIXME incomplete data model in the AST
388+
, tagField: 0
389+
, variants: _VARIANTS
390+
)
391+
)
392+
, abi: _ABI
393+
, abiAlign: _ABI_ALIGN
394+
, size: _SIZE
395+
))
396+
)
397+
)
398+
=> Aggregate(variantIdx(0), .List)
399+
requires 0 ==Int Bytes2Int(BYTES, LE, Unsigned)
400+
```
292401

293402
## Array Allocations
294403

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Aggregate ( variantIdx ( 0 ) , .List )
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
{
2+
"bytes": [
3+
0,
4+
0,
5+
0,
6+
0,
7+
0,
8+
0,
9+
0,
10+
0
11+
],
12+
"types": [
13+
[
14+
0,
15+
{
16+
"PrimitiveType": {
17+
"Uint": "U8"
18+
}
19+
}
20+
],
21+
[
22+
11,
23+
{
24+
"RefType": {
25+
"pointee_type": 0,
26+
"layout": {
27+
"fields": "Primitive",
28+
"variants": {
29+
"Single": {
30+
"index": 0
31+
}
32+
},
33+
"abi": {
34+
"Scalar": {
35+
"Initialized": {
36+
"value": {
37+
"Pointer": 0
38+
},
39+
"valid_range": {
40+
"start": 1,
41+
"end": 18446744073709551615
42+
}
43+
}
44+
}
45+
},
46+
"abi_align": 8,
47+
"size": {
48+
"num_bits": 64
49+
}
50+
}
51+
}
52+
}
53+
]
54+
],
55+
"typeInfo": {
56+
"EnumType": {
57+
"name": "std::option::Option<&u8>",
58+
"adt_def": 24,
59+
"discriminants": [
60+
0,
61+
1
62+
],
63+
"fields": [
64+
[],
65+
[
66+
4
67+
]
68+
],
69+
"layout": {
70+
"fields": {
71+
"Arbitrary": {
72+
"offsets": [
73+
{
74+
"num_bits": 0
75+
}
76+
]
77+
}
78+
},
79+
"variants": {
80+
"Multiple": {
81+
"tag": {
82+
"Initialized": {
83+
"value": {
84+
"Pointer": 0
85+
},
86+
"valid_range": {
87+
"start": 1,
88+
"end": 0
89+
}
90+
}
91+
},
92+
"tag_encoding": {
93+
"Niche": {
94+
"untagged_variant": 1,
95+
"niche_variants": {
96+
"start": 0,
97+
"end": 0
98+
},
99+
"niche_start": 0
100+
}
101+
},
102+
"tag_field": 0,
103+
"variants": [
104+
{
105+
"fields": {
106+
"Arbitrary": {
107+
"offsets": []
108+
}
109+
},
110+
"variants": {
111+
"Single": {
112+
"index": 0
113+
}
114+
},
115+
"abi": {
116+
"Aggregate": {
117+
"sized": true
118+
}
119+
},
120+
"abi_align": 1,
121+
"size": {
122+
"num_bits": 0
123+
}
124+
},
125+
{
126+
"fields": {
127+
"Arbitrary": {
128+
"offsets": [
129+
{
130+
"num_bits": 0
131+
}
132+
]
133+
}
134+
},
135+
"variants": {
136+
"Single": {
137+
"index": 1
138+
}
139+
},
140+
"abi": {
141+
"Scalar": {
142+
"Initialized": {
143+
"value": {
144+
"Pointer": 0
145+
},
146+
"valid_range": {
147+
"start": 1,
148+
"end": 18446744073709551615
149+
}
150+
}
151+
}
152+
},
153+
"abi_align": 8,
154+
"size": {
155+
"num_bits": 64
156+
}
157+
}
158+
]
159+
}
160+
},
161+
"abi": {
162+
"Scalar": {
163+
"Initialized": {
164+
"value": {
165+
"Pointer": 0
166+
},
167+
"valid_range": {
168+
"start": 1,
169+
"end": 0
170+
}
171+
}
172+
}
173+
},
174+
"abi_align": 8,
175+
"size": {
176+
"num_bits": 64
177+
}
178+
}
179+
}
180+
}
181+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
const OPT_NO_U32: Option<u32> = None;
2+
const OPT_A_U64: Option<u64> = Some(42);
3+
const OPT_NO_REF: Option<&[u32;3]> = None;
4+
5+
fn main() {
6+
let a = 1;
7+
let result = sub_(0,a);
8+
assert_eq!(result, OPT_NO_U32);
9+
assert_eq!(OPT_A_U64, Some(41 + a as u64));
10+
assert!(OPT_NO_REF.is_none());
11+
let arr0 = [0u32; 3];
12+
assert_eq!(opt_ref(&arr0), None);
13+
let arr = [a;3];
14+
assert_eq!(opt_ref(&arr), Some(&arr));
15+
}
16+
17+
// basically checked_sub
18+
fn sub_(a:u32, b:u32) -> Option<u32> {
19+
if a < b {
20+
None // this becomes an Operand::Constant
21+
} else {
22+
Some(a - b)
23+
}
24+
}
25+
26+
fn opt_ref(arr: &[u32;3]) -> Option<&[u32;3]> {
27+
if arr[0] == 0 {
28+
None
29+
} else {
30+
Some(arr)
31+
}
32+
}

0 commit comments

Comments
 (0)