Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
992889a
Add head_shape type
Jun 17, 2021
e987258
Adapt datarepr to generate head_shape thunks in unboxed constructors
Jun 17, 2021
462656a
Add dump option for head shapes
Jun 17, 2021
5013782
Add head_shape computing algorithm
Jun 29, 2021
86da520
sw_num{non}consts fix
Jun 29, 2021
7841d30
Add tests
Jun 22, 2021
6f61925
WIP: restrict matching test to ocamlc.byte-only
Jun 30, 2021
c79987f
Variant data refactoring
nchataing Jul 9, 2021
d9cb31d
Move variant_data binding (and add comment)
nchataing Jul 13, 2021
e2f46e4
replace invalid arg by an assert false
nchataing Jul 13, 2021
29437d6
document typing/types.mli + renamings
gasche Jul 13, 2021
658e627
document typedecl_unboxed.ml{,i}
gasche Jul 13, 2021
5615005
document changes in lambda/matching.ml
gasche Jul 13, 2021
3c2c631
document our disappointing bytecode generation
gasche Jul 13, 2021
8d1058a
a TODO item with a concrete proposal for more compact bytecode
gasche Jul 13, 2021
aa6192a
Compile switch with a Pisint if the any_const case action is Some.
nchataing Jul 13, 2021
75330c0
refactoring commit
gasche Jul 14, 2021
5674bfe
Revert "refactoring commit"
gasche Jul 14, 2021
e4c7ccc
RAEDME.head_shape.md: prepare a description for PR submission
gasche Aug 25, 2021
4c66957
PR description: describe the implementation
gasche Aug 26, 2021
13548e6
spec: why we are not turning imm/tag into constructed types
gasche Aug 29, 2021
678ac93
[minor] typedecl_unboxed: refactor into Head_shape.of_primitive_type
gasche Aug 29, 2021
a7abc2a
[minor] matching.ml: sum/product refactoring for Cases
gasche Aug 29, 2021
d71b6f5
update README.head_shape
gasche Aug 29, 2021
b9feb70
split the head-shape spec in two files, specification (-> RFC) and im…
gasche Aug 30, 2021
f3a8838
rephrase the separability proposal
gasche Sep 3, 2021
a6691d1
Update HEAD_SHAPE.spec.md
nchataing Sep 4, 2021
cede37e
[minor] more code markers
gasche Sep 7, 2021
3143ee5
slightly more in .impl.md
gasche Sep 7, 2021
6d238a5
mention separability in the spec
gasche Sep 7, 2021
a3702ed
HEAD_SHAPE.impl.md: add genprintval handling in the TODO
gasche Sep 9, 2021
cacade1
[minor] genprintval.ml: helper functions to print variant/record values
gasche Sep 9, 2021
e0aa910
[minor] genprintval.ml: clarify exception flow
gasche Sep 10, 2021
66bf695
[WIP] Typedecl_unboxed: new operations on head shapes
gasche Sep 14, 2021
76c0062
head_shape: expose printer for debugging
gasche Sep 14, 2021
e705659
toplevel/genprintval.ml: support unboxed constructors
gasche Sep 14, 2021
22c5041
[refactoring] Outcometree: introduce a record type for constructors
gasche Sep 14, 2021
58260e3
outcometree: print [@unboxed] attributes
gasche Sep 14, 2021
717e88b
oops: restore the fatal error when declaring conflicting types
gasche Sep 16, 2021
7ad168f
more precise head shape for function types
gasche Sep 16, 2021
5c5336d
separability
nchataing Sep 21, 2021
45f2c11
fix bug with global [@@unboxed] annotation
nchataing Sep 21, 2021
6a8c98e
update the testsuite
gasche Sep 21, 2021
44dc585
head_shape: change separability computation (still unfinished)
gasche Sep 21, 2021
d983c43
head-shape spec: genprintval is done
gasche Mar 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .depend
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,7 @@ typing/datarepr.cmo : \
typing/path.cmi \
parsing/location.cmi \
typing/ident.cmi \
parsing/builtin_attributes.cmi \
typing/btype.cmi \
parsing/asttypes.cmi \
typing/datarepr.cmi
Expand All @@ -566,6 +567,7 @@ typing/datarepr.cmx : \
typing/path.cmx \
parsing/location.cmx \
typing/ident.cmx \
parsing/builtin_attributes.cmx \
typing/btype.cmx \
parsing/asttypes.cmi \
typing/datarepr.cmi
Expand Down Expand Up @@ -1543,18 +1545,31 @@ typing/typedecl_separability.cmi : \
typing/env.cmi
typing/typedecl_unboxed.cmo : \
typing/types.cmi \
typing/printtyp.cmi \
typing/predef.cmi \
typing/path.cmi \
parsing/location.cmi \
typing/env.cmi \
typing/ctype.cmi \
utils/config.cmi \
utils/clflags.cmi \
typing/btype.cmi \
typing/typedecl_unboxed.cmi
typing/typedecl_unboxed.cmx : \
typing/types.cmx \
typing/printtyp.cmx \
typing/predef.cmx \
typing/path.cmx \
parsing/location.cmx \
typing/env.cmx \
typing/ctype.cmx \
utils/config.cmx \
utils/clflags.cmx \
typing/btype.cmx \
typing/typedecl_unboxed.cmi
typing/typedecl_unboxed.cmi : \
typing/types.cmi \
typing/path.cmi \
typing/env.cmi
typing/typedecl_variance.cmo : \
typing/types.cmi \
Expand Down Expand Up @@ -3425,6 +3440,8 @@ lambda/matching.cmo : \
typing/types.cmi \
typing/typeopt.cmi \
typing/typedtree.cmi \
typing/typedecl_unboxed.cmi \
typing/typedecl.cmi \
lambda/switch.cmi \
typing/printpat.cmi \
lambda/printlambda.cmi \
Expand All @@ -3447,6 +3464,8 @@ lambda/matching.cmx : \
typing/types.cmx \
typing/typeopt.cmx \
typing/typedtree.cmx \
typing/typedecl_unboxed.cmx \
typing/typedecl.cmx \
lambda/switch.cmx \
typing/printpat.cmx \
lambda/printlambda.cmx \
Expand Down
324 changes: 324 additions & 0 deletions HEAD_SHAPE.impl.md

Large diffs are not rendered by default.

159 changes: 159 additions & 0 deletions HEAD_SHAPE.spec.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Constructor unboxing specification

We (@nchataing as intern, @gasche as advisor) implemented a variant of @yallop's constructor-unboxing specification as an experimental branch that we would now like to discuss and consider for upstreaming.

Our intent was to implement the simplest possible form of unboxing in presence of several constructors, and leave more advanced aspects -- anything that could be left off -- to further work.

We support a per-constructor `[@unboxed]` attribute, that can be used in a variant type as long as the set of values corresponding to each constructor (boxed or unboxed) remain disjoint.

For example:

```ocaml
type bignum =
| Short of int [@unboxed] (* represented directly by an integer *)
| Long of Gmp.t (* Block of tag 0 (first non-unboxed constructor) *)
```

## Precise specification

We define the *head* of an OCaml value as follows:
- the head of an immediate value `v` is the pair `(Imm, v)`
- the head of a heap block with tag `t` is the pair `(Block, t)`.

(In other words, the head tracks whether a value is immediate or a block, and for blocks only keeps the tag.)

The "head shape" of a type is a (slight over-approximation of) the set of heads of all possible values of this type.

Now consider a variant type declaration containing one or several constructors annotated with `[@unboxed]`:

```ocaml
type ('a, 'b, ...) t =
| Const0 (* some constant constructors *)
| Const1
| ...
| Const{m}
| NonConst0 of t00 * t01 * ...
| Nonconst1 of t10 * t11 * ...
| ...
| NonConst{n} of t{n}0 * t{n}1 * ...
| Unboxed1 of u0 [@unboxed]
| Unboxed2 of u1 [@unboxed]
| ...
| Unboxed{l} of u{l} [@unboxed]
```

(For simplicity we wrote above all constant constructors first, then all non-constant constructors then all unboxed constructors. But in fact we support arbitrary interleaving of these categories, and the representation is exactly the same as long as the ordering within constant constructors and within non-constant constructors is preserved.)

The compiled representation of this type is as follows:
- as before, constant constructors `Const{k}` are represented by the immediate number `k`
- as before, non-constant constructors `Nonconst{k} of ...` are represented by a heap block with tag `k`
- unboxed constructors `Unboxed{k} of u{k}` are represented directly by the value of type `u{k}`, without
any boxing

This definition is *rejected* statically if the unboxed constructors overlap with the other values of the type, in the following precise sense:

1. We compute the "boxed head shape" `BS` of this type *without* the unboxed constructors; by definition of the head shape, this is the set `{(Imm, 0), (Imm, 1), ..., (Imm, m)} ∪ {(Block, 0), (Block, 1), ,.., (Block, n)}`.

2. Then we compute the "unboxed shapes" `US{k}` of each unboxed constructor, that is the head shape of `u{k}`.

3. The type is accepted if and only if the shapes `BS, US0, US1, ..., US{l}` are disjoint from each other. The head shape of the whole shape is then the disjoint union `BS ⊎ US0 ⊎ US1 ⊎ ... ⊎ US{l}`.

Unknown/abstract types are assumed to have a "top" shape with containing potentially all heads. (This should be refined when the abstract type is used to represent an FFI type with a precise shape implemented in C; supporting head shape assertions on abstract types is future work.)


## Examples

```ocaml
(* rejected *)
type t =
| Int of int [@unboxed] (* shape: (Imm, _) *)
| Unit (* shape: (Imm, 0), conflicts with Int above *)

(* accepted *)
type t =
| Int of int [@unboxed] (* shape: (Imm, _) *)
| Box of t (* shape: (Block, 0), as the first non-constant non-unboxed constructor *)
(* shape(t): (Imm, _) ∪ {(Block, 0)} *)

(* accepted *)
type prod = t * t
and t =
| Int of int [@unboxed] (* shape: (Imm, _): any immediate *)
| String of string [@unboxed] (* shape: (Block, Obj.string_tag) (Obj.string_tag is 252) *)
| Prod of prod [@unboxed] (* shape: (Block, 0) *)
(* shape(t): (Imm, _) ∪ {(Block, 0), (Block, Obj.string_tag)} *)


(** With abstract types *)

type abstract

(* accepted *)
type t =
| Int of int [@unboxed] (* shape: (Imm, _) *)
| Abs of abstract (* shape: (Block, 0) *)
(* shape(t): (Imm, _) ∪ {(Block, 0)} *)

(* rejected *)
type t =
| Int of int (* shape: (Block, 0) *)
| Abs of abstract [@unboxed] (* any shape, conflicts with Int *)


(** Nested unboxing *)

type t1 =
| Int of int [@unboxed]
| Block of unit
(* shape(t1): (Imm, _) ∪ {(Block, 0)} *)

(* rejected *)
type t2 =
| T1 of t1 [@unboxed] (* shape: (Imm, _) ∪ {(Block, 0)} *)
| S of string (* shape: (Block, 0), conflicts with T1 *)

(* accepted *)
type t3 =
| T1 of t1 [@unboxed] (* shape: {(Imm, _), (Block, 0)} *)
| S of string [@unboxed] (* shape: (Block, Obj.stringₜag) *)
(* shape(t3): (Imm, _) ∪ {(Block, 0)} ∪ {(Block, Obj.string_tag)} *)
```


### Comparison with Yallop's proposal [RFC#14](https://github.com/ocaml/RFCs/pull/14)

Jeremy Yallop's proposal uses a global annotation `[@@unboxed]` on all constructors at once, we use a per-constructor annotation `[@unboxed]`. (The RFC mentions this as a possible extension in "Extension: partial unboxing".) It would be easy to interpret `[@@unboxed]` as just "[@unboxed] on all constructors", but we have not implemented this yet.

A major difference is that the RFC#14 specification suggests renumbering constructors in some cases, where the representation of `C of foo [@unboxed]` is taken to be different from the representation of `foo`, in order to avoid conflicts with other constructors at this type. We do not support any such renumbering:
- the representation of `Unboxed of foo [@unboxed]` is always the representation of `foo`
- the representation of `Boxed of foo` always uses the block tag consecutive/next/succedent to the previous boxed-constructor tag in the declaration (filtering out unboxed constructors).

(Note: @stedolan calls this aspect of RFC#14 "[conflating inlining and disjointness](https://github.com/ocaml/RFCs/pull/14#issuecomment-603867960)". We only deal with disjointness.)


## separability

When the compiler is in `flat-float-array` mode, soundness relies on the property that all OCaml types are "separated": they contain either (1) only `float` values, or (2) no `float` value. New forms of unboxing must preserve this property.

We can track separatedness as part of the head-shape computation for unboxed type declaration, by adding to head-shape data a "separated" bit (see the details in [HEAD_SHAPE.impl.md](HEAD_SHAPE.impl.md)). We reject type declarations whose head-shape is not separated (when in `flat-float-array` mode).

It may be that this tracking is precise enough to entirely replace the pre-existing "separability analysis" of the type-checker. We have not implemented it yet, and have not evaluated this possibility.


## Leftover question: how close to the compiler-distribution runtime should the specification be?

We define static accept/reject decisions for partially-unboxed types using "head shapes", which are defined in terms of the value-representation strategy of the main OCaml implementation. Should we have a more abstract definition, that leaves more room to other representations in alternative implementations?

We have not studied this question yet and we believe it is a pressing question. In particular, any choice that would end up being merged in the language probably MUST support the js_of_ocaml value representation. (Do you know of a reference document that describes the js_of_ocaml value representation? Pointers are welcome are we are not jsoo experts ourselves. cc @hhugo.)

Our intuition is that we could fine a "weakening" of our current specification that distinguishes less different sort of shapes -- thus rejects more definitions -- and gives good flexibility for alternative implementations. Here are some things we could do:

- We could stop making assumptions about the shape of function closures (currently: {Closure_tag, Infix_tag}), preventing the unboxing of closure-holding constructors.
- We could also weaken our assumptions about built-in containers (string/byte, arrays, double-arrays, etc.)
- We could stop distinguishing "float" from immediates (ouch!) if jsoo does this. What about Int32, Int64, should they be known as custom values?

In other words: what amount of runtime type information should we require from OCaml implementations?

At the limit, one extreme choice would be to only reason on the tag of variant constructors (constant or not), which are distinguishable from each other in any OCaml implementation, and not make any other assumption about head shapes (map all types except variants to the "top" shape). This would reject most unboxing definitions, leave maximal freedom for language implementations. Unfortunately this would also prevent the actually-interesting uses of the feature we know about, which mostly resolve around unboxing an `int`-carrying constructor.

This is an aspect of our design on which we need external feedback from people with a good taste for these matters. (cc @xavierleroy, @damiendoligez, @yallop, @stedolan, @lpw25, @let-def, etc.).
11 changes: 11 additions & 0 deletions driver/main_args.ml
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,10 @@ let mk_dinterval f =
"-dinterval", Arg.Unit f, " (undocumented)"
;;

let mk_dheadshape f =
"-dheadshape", Arg.Unit f, " (undocumented)"
;;

let mk_dstartup f =
"-dstartup", Arg.Unit f, " (undocumented)"
;;
Expand Down Expand Up @@ -950,6 +954,7 @@ module type Core_options = sig
val _dtypedtree : unit -> unit
val _drawlambda : unit -> unit
val _dlambda : unit -> unit
val _dheadshape : unit -> unit

end

Expand Down Expand Up @@ -1244,6 +1249,7 @@ struct
mk_dparsetree F._dparsetree;
mk_dtypedtree F._dtypedtree;
mk_drawlambda F._drawlambda;
mk_dheadshape F._dheadshape;
mk_dlambda F._dlambda;
mk_dinstr F._dinstr;
mk_dcamlprimc F._dcamlprimc;
Expand Down Expand Up @@ -1310,6 +1316,7 @@ struct
mk_dsource F._dsource;
mk_dparsetree F._dparsetree;
mk_dtypedtree F._dtypedtree;
mk_dheadshape F._dheadshape;
mk_drawlambda F._drawlambda;
mk_dlambda F._dlambda;
mk_dinstr F._dinstr;
Expand Down Expand Up @@ -1441,6 +1448,7 @@ struct
mk_dsource F._dsource;
mk_dparsetree F._dparsetree;
mk_dtypedtree F._dtypedtree;
mk_dheadshape F._dheadshape;
mk_drawlambda F._drawlambda;
mk_dlambda F._dlambda;
mk_drawclambda F._drawclambda;
Expand Down Expand Up @@ -1551,6 +1559,7 @@ module Make_opttop_options (F : Opttop_options) = struct
mk_dsource F._dsource;
mk_dparsetree F._dparsetree;
mk_dtypedtree F._dtypedtree;
mk_dheadshape F._dheadshape;
mk_drawlambda F._drawlambda;
mk_drawclambda F._drawclambda;
mk_dclambda F._dclambda;
Expand Down Expand Up @@ -1715,6 +1724,7 @@ module Default = struct
let _drawlambda = set dump_rawlambda
let _dsource = set dump_source
let _dtypedtree = set dump_typedtree
let _dheadshape = set dump_headshape
let _dunique_ids = set unique_ids
let _dno_unique_ids = clear unique_ids
let _dlocations = set locations
Expand Down Expand Up @@ -1747,6 +1757,7 @@ module Default = struct
let _dflambda_verbose () =
set dump_flambda (); set dump_flambda_verbose ()
let _dinterval = set dump_interval
let _dheadshape = set dump_headshape
let _dinterf = set dump_interf
let _dlinear = set dump_linear
let _dlive () = dump_live := true
Expand Down
1 change: 1 addition & 0 deletions driver/main_args.mli
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ module type Core_options = sig
val _dtypedtree : unit -> unit
val _drawlambda : unit -> unit
val _dlambda : unit -> unit
val _dheadshape : unit -> unit

end

Expand Down
Loading