From 2b777fca012c399d1bcb003bf97ba68617a75c8e Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Mon, 13 Jun 2022 13:09:24 +0200 Subject: [PATCH 01/10] parsing/builtin_attributes: support code for list literals --- parsing/builtin_attributes.ml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/parsing/builtin_attributes.ml b/parsing/builtin_attributes.ml index c90542567ada..940bb1488da7 100644 --- a/parsing/builtin_attributes.ml +++ b/parsing/builtin_attributes.ml @@ -30,6 +30,22 @@ let string_of_opt_payload p = | Some s -> s | None -> "" +let list_of_exp exp = + let rec loop acc = function + | {pexp_desc = Pexp_construct ({txt = Longident.Lident "[]"; _}, None)} -> + Ok (List.rev acc) + | {pexp_desc = Pexp_construct ({txt = Longident.Lident "::"; _}, + Some {pexp_desc = Pexp_tuple [e1; e2]})} -> + loop (e1 :: acc) e2 + | {pexp_loc = loc} -> + Error loc + in loop [] exp + +let list_of_payload loc = function + | PStr[{pstr_desc = Pstr_eval (li, _)}] -> + list_of_exp li + | _ -> Error loc + let error_of_extension ext = let submessage_from main_loc main_txt = function | {pstr_desc=Pstr_extension From 98e405bb1abcc8b2f0c08f673d07bc008c68d7c1 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Mon, 13 Jun 2022 13:10:22 +0200 Subject: [PATCH 02/10] parsing/builtin_attributes support for [@shape [int; double; lazy; object]] --- parsing/builtin_attributes.ml | 46 ++++++++++++++++++++++++++++++++++ parsing/builtin_attributes.mli | 2 ++ utils/misc.ml | 15 +++++++++++ utils/misc.mli | 15 +++++++++++ 4 files changed, 78 insertions(+) diff --git a/parsing/builtin_attributes.ml b/parsing/builtin_attributes.ml index 940bb1488da7..edc81760849c 100644 --- a/parsing/builtin_attributes.ml +++ b/parsing/builtin_attributes.ml @@ -303,3 +303,49 @@ let has_unboxed attr = let has_boxed attr = List.exists (check ["ocaml.boxed"; "boxed"]) attr + + +let find_shapes attrs : Misc.named_shape list option = + let err loc msg = + warn_payload loc "shape" msg; + None + in + let shape_of_exp exp = + let shapes = [ + "int", `Int; + "lazy", `Lazy; + "closure", `Closure; + "infix", `Infix; + "forward", `Forward; + "abstract", `Abstract; + "string", `String; + "double", `Double; + "double_array", `Double_array; + "custom", `Custom; + ] in + let loc = exp.pexp_loc in + match exp.pexp_desc with + | Pexp_ident {loc; txt = Longident.Lident shape_name} -> + begin match List.assoc_opt shape_name shapes with + | Some tag -> Some tag + | None -> + Printf.ksprintf (err loc) "Unknown shape name %s." shape_name + end + | _ -> + err loc "Unsupported shape format" + in + let shape_of_payload loc payload = + match list_of_payload loc payload with + | Error loc -> err loc "A shape list such as [int; lazy; custom] was expected." + | Ok args -> Some (List.filter_map shape_of_exp args) + in + let find_shape_payload a = + match a.attr_name.txt with + | "shape" | "ocaml.shape" -> + shape_of_payload a.attr_loc a.attr_payload + | _ -> None + in + match List.filter_map find_shape_payload attrs with + | [] -> None + | (_ :: _) as shape_specs -> + Some (List.fold_left List.rev_append [] shape_specs) diff --git a/parsing/builtin_attributes.mli b/parsing/builtin_attributes.mli index 6200fd74ec54..15d3e32924c8 100644 --- a/parsing/builtin_attributes.mli +++ b/parsing/builtin_attributes.mli @@ -82,3 +82,5 @@ val immediate64: Parsetree.attributes -> bool val has_unboxed: Parsetree.attributes -> bool val has_boxed: Parsetree.attributes -> bool + +val find_shapes: Parsetree.attributes -> Misc.named_shape list option diff --git a/utils/misc.ml b/utils/misc.ml index 942061720d85..9ed1dc921b62 100644 --- a/utils/misc.ml +++ b/utils/misc.ml @@ -1124,3 +1124,18 @@ module Magic_number = struct | Error err -> Error (Unexpected_error err) | Ok () -> Ok info end + + +(* shapes that the user can name in [@shape ...] payloads *) +type named_shape = [ + | `Int (* all immediate shapes *) + | `Lazy (* Obj.lazy_tag, etc. *) + | `Closure + | `Infix + | `Forward + | `Abstract + | `String + | `Double + | `Double_array + | `Custom +] diff --git a/utils/misc.mli b/utils/misc.mli index 6aea772091a9..fea7f2a108ca 100644 --- a/utils/misc.mli +++ b/utils/misc.mli @@ -671,3 +671,18 @@ module Magic_number : sig val all_kinds : kind list end + + +(* shapes that the user can name in [@shape ...] payloads *) +type named_shape = [ + | `Int (* all immediate shapes *) + | `Lazy (* Obj.lazy_tag, etc. *) + | `Closure + | `Infix + | `Forward + | `Abstract + | `String + | `Double + | `Double_array + | `Custom +] From dd3313467630c37fb025858f987eb5d7397b9e54 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Mon, 13 Jun 2022 15:39:03 +0200 Subject: [PATCH 03/10] typedecl_unboxed: support for [@shape ...] --- typing/typedecl_unboxed.ml | 59 +++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/typing/typedecl_unboxed.ml b/typing/typedecl_unboxed.ml index 059a206f5b9a..add789cb0eb2 100644 --- a/typing/typedecl_unboxed.ml +++ b/typing/typedecl_unboxed.ml @@ -170,6 +170,13 @@ module Head_shape = struct head_separated = true; } + let any_imm_shape = + { + head_imm = Shape_any; + head_blocks = Shape_set []; + head_separated = true; + } + let block_shape tags = let imm = Shape_set [] in let blocks = Shape_set tags in @@ -189,16 +196,32 @@ module Head_shape = struct true); } - let disjoint_union hd1 hd2 = + let of_named : Misc.named_shape -> t = function + | `Int -> any_imm_shape + | `Lazy -> block_shape [Obj.lazy_tag] + | `Closure -> block_shape [Obj.closure_tag] + | `Infix -> block_shape [Obj.infix_tag] + | `Forward -> block_shape [Obj.forward_tag] + | `Abstract -> block_shape [Obj.abstract_tag] + | `String -> block_shape [Obj.string_tag] + | `Double -> block_shape [Obj.double_tag] + | `Double_array -> block_shape [Obj.double_array_tag] + | `Custom -> block_shape [Obj.custom_tag] + + let union ~disjoint hd1 hd2 = + let conflict_or k = + if disjoint then raise Conflict; + k () + in let union s1 s2 = match s1, s2 with | Shape_any, Shape_set [] | Shape_set [], Shape_any -> Shape_any - | Shape_any, _ | _, Shape_any -> raise Conflict + | Shape_any, _ | _, Shape_any -> conflict_or (fun () -> Shape_any) | Shape_set l1, Shape_set l2 -> (* invariant : l1 and l2 are sorted *) let rec merge l1 l2 = match l1, l2 with | [], l | l, [] -> l | x :: xx, y :: yy -> - if x = y then raise Conflict + if x = y then conflict_or (fun () -> x :: merge xx yy) else if x < y then x :: (merge xx l2) else y :: (merge l1 yy) in Shape_set (merge l1 l2) @@ -213,6 +236,9 @@ module Head_shape = struct in { head_imm; head_blocks; head_separated } + let disjoint_union hd1 hd2 = + union ~disjoint:true hd1 hd2 + module Callstack = struct type t = Path.t list @@ -269,26 +295,21 @@ module Head_shape = struct | _ -> ty let of_primitive_type = function - | Int -> - { - head_imm = Shape_any; - head_blocks = Shape_set []; - head_separated = true; - } - | Float -> block_shape [Obj.double_tag] + | Int -> of_named `Int + | Float -> of_named `Double | String - | Bytes -> block_shape [Obj.string_tag] + | Bytes -> of_named `String | Array -> block_shape (if Config.flat_float_array then [0] else [0; Obj.double_array_tag]) - | Floatarray -> block_shape [Obj.double_array_tag] + | Floatarray -> of_named `Double_array | Lazy -> any_shape (* Lazy values can 'shortcut' the lazy block, and thus have many different tags. When Config.flat_float_array, they cannot be floats, so we might want to refine that if there are strong use-cases. *) - | Custom -> block_shape [Obj.custom_tag] + | Custom -> of_named `Custom let rec of_type_expr env ty callstack_map = (* TODO : try the Ctype.expand_head_opt version here *) @@ -351,8 +372,18 @@ module Head_shape = struct match ty_descr with | Type_abstract -> begin match ty_decl.type_manifest with - | None -> any_shape | Some ty -> of_type_expr_with_params ty + | None -> + begin match Builtin_attributes.find_shapes ty_decl.type_attributes with + | None -> any_shape + | Some named_shapes -> + let shapes = List.map of_named named_shapes in + try List.fold_left disjoint_union empty_shape shapes + with Conflict -> + (* TODO use a proper error here as well *) + Location.raise_errorf ~loc:ty_decl.type_loc ~sub:[] + "The [@shape ...] attribute(s) contains conflicting shapes." + end end | Type_record (_, Record_regular) -> block_shape [0] | Type_record (_, Record_float) -> block_shape [Obj.double_array_tag] From 5e26a4503e5b7d144062c7f08b5c443d17e7ddc3 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Mon, 13 Jun 2022 15:39:37 +0200 Subject: [PATCH 04/10] fixup! document typedecl_unboxed.ml{,i} --- typing/typedecl_unboxed.mli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typing/typedecl_unboxed.mli b/typing/typedecl_unboxed.mli index f86db2669903..b544960b93a8 100644 --- a/typing/typedecl_unboxed.mli +++ b/typing/typedecl_unboxed.mli @@ -41,7 +41,7 @@ module Head_shape : sig val pp : Format.formatter -> t -> unit - (** Check a new type decalaration, that may be a variant type + (** Check a new type declaration, that may be a variant type containing unboxed constructors, to verify that the unboxing requests respect the "disjointness" requirement of constructor unboxing -- the values of two constructors must not conflict. From e37aed4dadbb7975c3ec5d5aae61085a1f5f7917 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Wed, 30 Jun 2021 22:05:03 +0200 Subject: [PATCH 05/10] import Zarith C code --- Zarith/caml_z.c | 3440 +++++++++++++++++++++++++++++++++++++++++++++++ Zarith/zarith.h | 46 + 2 files changed, 3486 insertions(+) create mode 100644 Zarith/caml_z.c create mode 100644 Zarith/zarith.h diff --git a/Zarith/caml_z.c b/Zarith/caml_z.c new file mode 100644 index 000000000000..63283e647b09 --- /dev/null +++ b/Zarith/caml_z.c @@ -0,0 +1,3440 @@ +/* This file is cloned from + https://github.com/ocaml/Zarith/blob/39df015/caml_z.c +*/ + +/** + Implementation of Z module. + + + This file is part of the Zarith library + http://forge.ocamlcore.org/projects/zarith . + It is distributed under LGPL 2 licensing, with static linking exception. + See the LICENSE file included in the distribution. + + Copyright (c) 2010-2011 Antoine Miné, Abstraction project. + Abstraction is part of the LIENS (Laboratoire d'Informatique de l'ENS), + a joint laboratory by: + CNRS (Centre national de la recherche scientifique, France), + ENS (École normale supérieure, Paris, France), + INRIA Rocquencourt (Institut national de recherche en informatique, France). + +*/ + + +/*--------------------------------------------------- + INCLUDES + ---------------------------------------------------*/ + +#include +#include +#include +#include +#include +#include + +#ifdef HAS_GMP +#include +#endif +#ifdef HAS_MPIR +#include +#endif + +#include "zarith.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define inline __inline + +#ifdef _MSC_VER +#include +#include +#endif + +/* The "__has_builtin" special macro from Clang */ +#ifdef __has_builtin +#define HAS_BUILTIN(x) __has_builtin(x) +#else +#define HAS_BUILTIN(x) 0 +#endif + +/*--------------------------------------------------- + CONFIGURATION + ---------------------------------------------------*/ + +/* Whether to enable native (i.e. non-mpn_) operations and output + ocaml integers when possible. + Highly recommended. + */ +#define Z_FAST_PATH 1 +#define Z_USE_NATINT 1 + +/* Whether the fast path (arguments and result are small integers) + has already be handled in OCaml, so that there is no need to + re-test for it in C functions. + Applies to: neg, abs, add, sub, mul, div, rem, succ, pred, + logand, logor, logxor, lognot, shifts, divexact. +*/ +#define Z_FAST_PATH_IN_OCAML 1 + +/* Sanity checks. */ +#define Z_PERFORM_CHECK 0 + +/* Enable performance counters. + Prints some info on stdout at exit. +*/ +/* + #define Z_PERF_COUNTER 0 + now set by configure +*/ + +/* whether to use custom blocks (supporting serialization, comparison & + hashing) instead of abstract tags +*/ +#define Z_CUSTOM_BLOCK 1 + +/*--------------------------------------------------- + DATA STRUCTURES + ---------------------------------------------------*/ + +/* + we assume that: + - intnat is a signed integer type + - mp_limb_t is an unsigned integer type + - sizeof(intnat) == sizeof(mp_limb_t) == either 4 or 8 +*/ + +#ifdef _WIN64 +#define PRINTF_LIMB "I64" +#else +#define PRINTF_LIMB "l" +#endif + +/* + A z object x can be: + - either an ocaml int + - or a block with abstract or custom tag and containing: + . a 1 value header containing the sign Z_SIGN(x) and the size Z_SIZE(x) + . Z_SIZE(x) mp_limb_t + + Invariant: + - if the number fits in an int, it is stored in an int, not a block + - if the number is stored in a block, then Z_SIZE(x) >= 1 and + the most significant limb Z_LIMB(x)[Z_SIZE(x)] is not 0 + */ + + +/* a sign is always denoted as 0 (+) or Z_SIGN_MASK (-) */ +#ifdef ARCH_SIXTYFOUR +#define Z_SIGN_MASK 0x8000000000000000 +#define Z_SIZE_MASK 0x7fffffffffffffff +#else +#define Z_SIGN_MASK 0x80000000 +#define Z_SIZE_MASK 0x7fffffff +#endif + +#if Z_CUSTOM_BLOCK +#define Z_HEAD(x) (*((value*)Data_custom_val((x)))) +#define Z_LIMB(x) ((mp_limb_t*)Data_custom_val((x)) + 1) +#else +#define Z_HEAD(x) (Field((x),0)) +#define Z_LIMB(x) ((mp_limb_t*)&(Field((x),1))) +#endif +#define Z_SIGN(x) (Z_HEAD((x)) & Z_SIGN_MASK) +#define Z_SIZE(x) (Z_HEAD((x)) & Z_SIZE_MASK) + +/* bounds of an Ocaml int */ +#ifdef ARCH_SIXTYFOUR +#define Z_MAX_INT 0x3fffffffffffffff +#define Z_MIN_INT (-0x4000000000000000) +#else +#define Z_MAX_INT 0x3fffffff +#define Z_MIN_INT (-0x40000000) +#endif +#define Z_FITS_INT(v) ((v) >= Z_MIN_INT && (v) <= Z_MAX_INT) + +/* Z_MAX_INT may not be representable exactly as a double => we use a + lower approximation to be safe + */ +#ifdef ARCH_SIXTYFOUR +#define Z_MAX_INT_FL 0x3ffffffffffff000 +#define Z_MIN_INT_FL (-Z_MAX_INT_FL) +#else +#define Z_MAX_INT_FL Z_MAX_INT +#define Z_MIN_INT_FL Z_MIN_INT +#endif + +/* safe bounds to avoid overflow in multiplication */ +#ifdef ARCH_SIXTYFOUR +#define Z_MAX_HINT 0x3fffffff +#else +#define Z_MAX_HINT 0x3fff +#endif +#define Z_MIN_HINT (-Z_MAX_HINT) +#define Z_FITS_HINT(v) ((v) >= Z_MIN_HINT && (v) <= Z_MAX_HINT) + +/* hi bit of OCaml int32, int64 & nativeint */ +#define Z_HI_INT32 0x80000000 +#define Z_HI_INT64 0x8000000000000000LL +#ifdef ARCH_SIXTYFOUR +#define Z_HI_INTNAT Z_HI_INT64 +#define Z_HI_INT 0x4000000000000000 +#else +#define Z_HI_INTNAT Z_HI_INT32 +#define Z_HI_INT 0x40000000 +#endif + +/* safe bounds for the length of a base n string fitting in a native + int. Defined as the result of (n - 2) log_base(2) with n = 64 or + 32. +*/ +#ifdef ARCH_SIXTYFOUR +#define Z_BASE16_LENGTH_OP 15 +#define Z_BASE10_LENGTH_OP 18 +#define Z_BASE8_LENGTH_OP 20 +#define Z_BASE2_LENGTH_OP 62 +#else +#define Z_BASE16_LENGTH_OP 7 +#define Z_BASE10_LENGTH_OP 9 +#define Z_BASE8_LENGTH_OP 10 +#define Z_BASE2_LENGTH_OP 30 +#endif + +#define Z_LIMB_BITS (8 * sizeof(mp_limb_t)) + + +/* performance counters */ +unsigned long ml_z_ops = 0; +unsigned long ml_z_slow = 0; +unsigned long ml_z_ops_as = 0; + +#if Z_PERF_COUNTER +#define Z_MARK_OP ml_z_ops++ +#define Z_MARK_SLOW ml_z_slow++ +#else +#define Z_MARK_OP +#define Z_MARK_SLOW +#endif + +/*--------------------------------------------------- + UTILITIES + ---------------------------------------------------*/ + +extern struct custom_operations ml_z_custom_ops; + +static double ml_z_2p32; /* 2 ^ 32 in double */ + +#if Z_PERFORM_CHECK +/* for debugging: dump a mp_limb_t array */ +static void ml_z_dump(const char* msg, mp_limb_t* p, mp_size_t sz) +{ + mp_size_t i; + printf("%s %i: ",msg,(int)sz); + for (i = 0; i < sz; i++) +#ifdef ARCH_SIXTYFOUR + printf("%08" PRINTF_LIMB "x ",p[i]); +#else + printf("%04" PRINTF_LIMB "x ",p[i]); +#endif + printf("\n"); + fflush(stdout); +} +#endif + +#if Z_PERFORM_CHECK +/* for debugging: check invariant */ +void ml_z_check(const char* fn, int line, const char* arg, value v) +{ + mp_size_t sz; + + if (Is_long(v)) { +#if Z_USE_NATINT + return; +#else + printf("ml_z_check: unexpected tagged integer for %s at %s:%i.\n", arg, fn, line); + exit(1); +#endif + } +#if Z_CUSTOM_BLOCK + if (Custom_ops_val(v) != &ml_z_custom_ops) { + printf("ml_z_check: wrong custom block for %s at %s:%i.\n", + arg, fn, line); + exit(1); + } + sz = Wosize_val(v) - 1; +#else + sz = Wosize_val(v); +#endif + if (Z_SIZE(v) + 2 > sz) { + printf("ml_z_check: invalid block size (%i / %i) for %s at %s:%i.\n", + (int)Z_SIZE(v), (int)sz, + arg, fn, line); + exit(1); + } + if ((mp_size_t) Z_LIMB(v)[sz - 2] != (mp_size_t)(0xDEADBEEF ^ (sz - 2))) { + printf("ml_z_check: corrupted block for %s at %s:%i.\n", + arg, fn, line); + exit(1); + } + if (Z_SIZE(v) && !Z_LIMB(v)[Z_SIZE(v)-1]) { + printf("ml_z_check: unreduced argument for %s at %s:%i.\n", arg, fn, line); + ml_z_dump("offending argument: ", Z_LIMB(v), Z_SIZE(v)); + exit(1); + } +#if Z_USE_NATINT + if (Z_SIZE(v) == 0 + || (Z_SIZE(v) <= 1 + && (Z_LIMB(v)[0] <= Z_MAX_INT + || (Z_LIMB(v)[0] == -Z_MIN_INT && Z_SIGN(v))))) { + printf("ml_z_check: expected a tagged integer for %s at %s:%i.\n", arg, fn, line); + ml_z_dump("offending argument: ", Z_LIMB(v), Z_SIZE(v)); + exit(1); + } +#else + if (!Z_SIZE(v) && Z_SIGN(v)) { + printf("ml_z_check: invalid sign of 0 for %s at %s:%i.\n", + arg, fn, line); + exit(1); + } +#endif +} +#endif + +/* for debugging */ +#if Z_PERFORM_CHECK +#define Z_CHECK(v) ml_z_check(__FUNCTION__, __LINE__, #v, v) +#else +#define Z_CHECK(v) +#endif + +/* allocates z object block with space for sz mp_limb_t; + does not set the header + */ + +#if !Z_PERFORM_CHECK +/* inlined allocation */ +#if Z_CUSTOM_BLOCK +#define ml_z_alloc(sz) \ + caml_alloc_custom(&ml_z_custom_ops, (1 + (sz)) * sizeof(value), 0, 1) +#else +#define ml_z_alloc(sz) \ + caml_alloc(1 + (sz), Abstract_tag); +#endif + +#else +/* out-of-line allocation, inserting a canary after the last limb */ +static value ml_z_alloc(mp_size_t sz) +{ + value v; +#if Z_CUSTOM_BLOCK + v = caml_alloc_custom(&ml_z_custom_ops, (1 + sz + 1) * sizeof(value), 0, 1); +#else + v = caml_alloc(1 + sz + 1, Abstract_tag); +#endif + Z_LIMB(v)[sz] = 0xDEADBEEF ^ sz; + return v; +} +#endif + +/* duplicates the caml block src */ +static inline void ml_z_cpy_limb(mp_limb_t* dst, mp_limb_t* src, mp_size_t sz) +{ + memcpy(dst, src, sz * sizeof(mp_limb_t)); +} + +/* duplicates the mp_limb_t array src */ +static inline mp_limb_t* ml_z_dup_limb(mp_limb_t* src, mp_size_t sz) +{ + mp_limb_t* r = (mp_limb_t*) malloc(sz * sizeof(mp_limb_t)); + memcpy(r, src, sz * sizeof(mp_limb_t)); + return r; +} + + +#ifdef _MSC_VER +#define MAYBE_UNUSED +#else +#define MAYBE_UNUSED (void) +#endif + +/* given a z object, define: + - ptr_arg: a pointer to the first mp_limb_t + - size_arg: the number of mp-limb_t + - sign_arg: the sign of the number + if arg is an int, it is converted to a 1-limb number +*/ +#define Z_DECL(arg) \ + mp_limb_t loc_##arg, *ptr_##arg; \ + mp_size_t size_##arg; \ + intnat sign_##arg; \ + MAYBE_UNUSED loc_##arg; \ + MAYBE_UNUSED ptr_##arg; \ + MAYBE_UNUSED size_##arg; \ + MAYBE_UNUSED sign_##arg; + +#define Z_ARG(arg) \ + if (Is_long(arg)) { \ + intnat n = Long_val(arg); \ + loc_##arg = n < 0 ? -n : n; \ + sign_##arg = n & Z_SIGN_MASK; \ + size_##arg = n != 0; \ + ptr_##arg = &loc_##arg; \ + } \ + else { \ + size_##arg = Z_SIZE(arg); \ + sign_##arg = Z_SIGN(arg); \ + ptr_##arg = Z_LIMB(arg); \ + } + +/* After an allocation, a heap-allocated Z argument may have moved and + its ptr_arg pointer can be invalid. Reset the ptr_arg pointer to + its correct value. */ + +#define Z_REFRESH(arg) \ + if (! Is_long(arg)) ptr_##arg = Z_LIMB(arg); + +/* computes the actual size of the z object r and updates its header, + either returns r or, if the number is small enough, an int + */ +static value ml_z_reduce(value r, mp_size_t sz, intnat sign) +{ + while (sz > 0 && !Z_LIMB(r)[sz-1]) sz--; +#if Z_USE_NATINT + if (!sz) return Val_long(0); + if (sz <= 1) { + if (Z_LIMB(r)[0] <= Z_MAX_INT) { + if (sign) return Val_long(-Z_LIMB(r)[0]); + else return Val_long(Z_LIMB(r)[0]); + } + if (Z_LIMB(r)[0] == -Z_MIN_INT && sign) { + return Val_long(Z_MIN_INT); + } + } +#else + if (!sz) sign = 0; +#endif + Z_HEAD(r) = sz | sign; + return r; +} + +static void ml_z_raise_overflow() +{ + caml_raise_constant(*caml_named_value("ml_z_overflow")); +} + +#define ml_z_raise_divide_by_zero() \ + caml_raise_zero_divide() + + + +/*--------------------------------------------------- + CONVERSION FUNCTIONS + ---------------------------------------------------*/ + +CAMLprim value ml_z_of_int(value v) +{ +#if Z_USE_NATINT + Z_MARK_OP; + return v; +#else + intnat x; + value r; + Z_MARK_OP; + Z_MARK_SLOW; + x = Long_val(v); + r = ml_z_alloc(1); + if (x > 0) { Z_HEAD(r) = 1; Z_LIMB(r)[0] = x; } + else if (x < 0) { Z_HEAD(r) = 1 | Z_SIGN_MASK; Z_LIMB(r)[0] = -x; } + else Z_HEAD(r) = 0; + Z_CHECK(r); + return r; +#endif +} + +CAMLprim value ml_z_of_nativeint(value v) +{ + intnat x; + value r; + Z_MARK_OP; + x = Nativeint_val(v); +#if Z_USE_NATINT + if (Z_FITS_INT(x)) return Val_long(x); +#endif + Z_MARK_SLOW; + r = ml_z_alloc(1); + if (x > 0) { Z_HEAD(r) = 1; Z_LIMB(r)[0] = x; } + else if (x < 0) { Z_HEAD(r) = 1 | Z_SIGN_MASK; Z_LIMB(r)[0] = -x; } + else Z_HEAD(r) = 0; + Z_CHECK(r); + return r; +} + +CAMLprim value ml_z_of_int32(value v) +{ + int32_t x; + Z_MARK_OP; + x = Int32_val(v); +#if Z_USE_NATINT && defined(ARCH_SIXTYFOUR) + return Val_long(x); +#else +#if Z_USE_NATINT + if (Z_FITS_INT(x)) return Val_long(x); +#endif + { + value r; + Z_MARK_SLOW; + r = ml_z_alloc(1); + if (x > 0) { Z_HEAD(r) = 1; Z_LIMB(r)[0] = x; } + else if (x < 0) { Z_HEAD(r) = 1 | Z_SIGN_MASK; Z_LIMB(r)[0] = -(mp_limb_t)x; } + else Z_HEAD(r) = 0; + Z_CHECK(r); + return r; + } +#endif +} + +CAMLprim value ml_z_of_int64(value v) +{ + int64_t x; + value r; + Z_MARK_OP; + x = Int64_val(v); +#if Z_USE_NATINT + if (Z_FITS_INT(x)) return Val_long(x); +#endif + Z_MARK_SLOW; +#ifdef ARCH_SIXTYFOUR + r = ml_z_alloc(1); + if (x > 0) { Z_HEAD(r) = 1; Z_LIMB(r)[0] = x; } + else if (x < 0) { Z_HEAD(r) = 1 | Z_SIGN_MASK; Z_LIMB(r)[0] = -x; } + else Z_HEAD(r) = 0; +#else + { + mp_limb_t sign; + r = ml_z_alloc(2); + if (x >= 0) { sign = 0; } + else { sign = Z_SIGN_MASK; x = -x; } + Z_LIMB(r)[0] = x; + Z_LIMB(r)[1] = x >> 32; + r = ml_z_reduce(r, 2, sign); + } +#endif + Z_CHECK(r); + return r; +} + +CAMLprim value ml_z_of_float(value v) +{ + double x; + int exp; + int64_t y, m; + value r; + Z_MARK_OP; + x = Double_val(v); +#if Z_USE_NATINT + if (x >= Z_MIN_INT_FL && x <= Z_MAX_INT_FL) return Val_long((intnat) x); +#endif + Z_MARK_SLOW; +#ifdef ARCH_ALIGN_INT64 + memcpy(&y, (void *) v, 8); +#else + y = *((int64_t*)v); +#endif + exp = ((y >> 52) & 0x7ff) - 1023; /* exponent */ + if (exp < 0) return(Val_long(0)); + if (exp == 1024) ml_z_raise_overflow(); /* NaN or infinity */ + m = (y & 0x000fffffffffffffLL) | 0x0010000000000000LL; /* mantissa */ + if (exp <= 52) { + m >>= 52-exp; +#ifdef ARCH_SIXTYFOUR + r = Val_long((x >= 0.) ? m : -m); +#else + r = ml_z_alloc(2); + Z_LIMB(r)[0] = m; + Z_LIMB(r)[1] = m >> 32; + r = ml_z_reduce(r, 2, (x >= 0.) ? 0 : Z_SIGN_MASK); +#endif + } + else { + int c1 = (exp-52) / Z_LIMB_BITS; + int c2 = (exp-52) % Z_LIMB_BITS; + mp_size_t i; +#ifdef ARCH_SIXTYFOUR + r = ml_z_alloc(c1 + 2); + for (i = 0; i < c1; i++) Z_LIMB(r)[i] = 0; + Z_LIMB(r)[c1] = m << c2; + Z_LIMB(r)[c1+1] = c2 ? (m >> (64-c2)) : 0; + r = ml_z_reduce(r, c1 + 2, (x >= 0.) ? 0 : Z_SIGN_MASK); +#else + r = ml_z_alloc(c1 + 3); + for (i = 0; i < c1; i++) Z_LIMB(r)[i] = 0; + Z_LIMB(r)[c1] = m << c2; + Z_LIMB(r)[c1+1] = m >> (32-c2); + Z_LIMB(r)[c1+2] = c2 ? (m >> (64-c2)) : 0; + r = ml_z_reduce(r, c1 + 3, (x >= 0.) ? 0 : Z_SIGN_MASK); +#endif + } + Z_CHECK(r); + return r; +} + +CAMLprim value ml_z_of_substring_base(value b, value v, value offset, value length) +{ + CAMLparam1(v); + CAMLlocal1(r); + intnat ofs = Long_val(offset); + intnat len = Long_val(length); + /* make sure the ofs/length make sense */ + if (ofs < 0 + || len < 0 + || (intnat)caml_string_length(v) < ofs + len) + caml_invalid_argument("Z.of_substring_base: invalid offset or length"); + /* process the string */ + const char *d = String_val(v) + ofs; + const char *end = d + len; + mp_size_t i, j, sz, sz2, num_digits = 0; + mp_limb_t sign = 0; + intnat base = Long_val(b); + /* We allow [d] to advance beyond [end] while parsing the prefix: + sign, base, and/or leading zeros. + This simplifies the code, and reading these locations is safe since + we don't progress beyond a terminating null character. + At the end of the prefix, if we ran past the end, we return 0. + */ + /* get optional sign */ + if (*d == '-') { sign ^= Z_SIGN_MASK; d++; } + if (*d == '+') d++; + /* get optional base */ + if (!base) { + base = 10; + if (*d == '0') { + d++; + if (*d == 'o' || *d == 'O') { base = 8; d++; } + else if (*d == 'x' || *d == 'X') { base = 16; d++; } + else if (*d == 'b' || *d == 'B') { base = 2; d++; } + else { + /* The leading zero is not part of a base prefix. This is an + important distinction for the check below looking at + leading underscore + */ + d--; } + } + } + if (base < 2 || base > 16) + caml_invalid_argument("Z.of_substring_base: base must be between 2 and 16"); + /* we do not allow leading underscore */ + if (*d == '_') + caml_invalid_argument("Z.of_substring_base: invalid digit"); + while (*d == '0' || *d == '_') d++; + /* sz is the length of the substring that has not been consumed above. */ + sz = end - d; + for(i = 0; i < sz; i++){ + /* underscores are going to be ignored below. Assuming the string + is well formatted, this will give us the exact number of digits */ + if(d[i] != '_') num_digits++; + } +#if Z_USE_NATINT + if (sz <= 0) { + /* "+", "-", "0x" are parsed as 0. */ + r = Val_long(0); + } + /* Process common case (fits into a native integer) */ + else if ((base == 10 && num_digits <= Z_BASE10_LENGTH_OP) + || (base == 16 && num_digits <= Z_BASE16_LENGTH_OP) + || (base == 8 && num_digits <= Z_BASE8_LENGTH_OP) + || (base == 2 && num_digits <= Z_BASE2_LENGTH_OP)) { + Z_MARK_OP; + intnat ret = 0; + for (i = 0; i < sz; i++) { + int digit = 0; + if (d[i] == '_') continue; + if (d[i] >= '0' && d[i] <= '9') digit = d[i] - '0'; + else if (d[i] >= 'a' && d[i] <= 'f') digit = d[i] - 'a' + 10; + else if (d[i] >= 'A' && d[i] <= 'F') digit = d[i] - 'A' + 10; + else caml_invalid_argument("Z.of_substring_base: invalid digit"); + if (digit >= base) + caml_invalid_argument("Z.of_substring_base: invalid digit"); + ret = ret * base + digit; + } + r = Val_long(ret * (sign ? -1 : 1)); + } else +#endif + { + /* converts to sequence of digits */ + char* digits = (char*)malloc(num_digits+1); + for (i = 0, j = 0; i < sz; i++) { + if (d[i] == '_') continue; + if (d[i] >= '0' && d[i] <= '9') digits[j] = d[i] - '0'; + else if (d[i] >= 'a' && d[i] <= 'f') digits[j] = d[i] - 'a' + 10; + else if (d[i] >= 'A' && d[i] <= 'F') digits[j] = d[i] - 'A' + 10; + else { + free(digits); + caml_invalid_argument("Z.of_substring_base: invalid digit"); + } + if (digits[j] >= base) { + free(digits); + caml_invalid_argument("Z.of_substring_base: invalid digit"); + } + j++; + } + /* make sure that digits is nul terminated */ + digits[j] = 0; + r = ml_z_alloc(1 + j / (2 * sizeof(mp_limb_t))); + sz2 = mpn_set_str(Z_LIMB(r), (unsigned char*)digits, j, base); + r = ml_z_reduce(r, sz2, sign); + free(digits); + } + Z_CHECK(r); + CAMLreturn(r); +} + +CAMLprim value ml_z_to_int(value v) +{ + intnat x; + Z_DECL(v); + Z_MARK_OP; + Z_CHECK(v); + if (Is_long(v)) return v; + Z_MARK_SLOW; + Z_ARG(v); + if (size_v > 1) ml_z_raise_overflow(); + if (!size_v) return Val_long(0); + x = *ptr_v; + if (sign_v) { + if ((uintnat)x > Z_HI_INT) ml_z_raise_overflow(); + x = -x; + } + else { + if ((uintnat)x >= Z_HI_INT) ml_z_raise_overflow(); + } + return Val_long(x); +} + +CAMLprim value ml_z_to_nativeint(value v) +{ + intnat x; + Z_DECL(v); + Z_MARK_OP; + Z_CHECK(v); + if (Is_long(v)) return caml_copy_nativeint(Long_val(v)); + Z_MARK_SLOW; + Z_ARG(v); + if (size_v > 1) ml_z_raise_overflow(); + if (!size_v) x = 0; + else { + x = *ptr_v; + if (sign_v) { + if ((uintnat)x > Z_HI_INTNAT) ml_z_raise_overflow(); + x = -x; + } + else { + if ((uintnat)x >= Z_HI_INTNAT) ml_z_raise_overflow(); + } + } + return caml_copy_nativeint(x); +} + +CAMLprim value ml_z_to_int32(value v) +{ + intnat x; + Z_DECL(v); + Z_MARK_OP; + Z_CHECK(v); + if (Is_long(v)) { + x = Long_val(v); +#ifdef ARCH_SIXTYFOUR + if (x >= (intnat)Z_HI_INT32 || x < -(intnat)Z_HI_INT32) + ml_z_raise_overflow(); +#endif + return caml_copy_int32(x); + } + else { + Z_ARG(v); + Z_MARK_SLOW; + if (size_v > 1) ml_z_raise_overflow(); + if (!size_v) x = 0; + else { + x = *ptr_v; + if (sign_v) { + if ((uintnat)x > Z_HI_INT32) ml_z_raise_overflow(); + x = -x; + } + else { + if ((uintnat)x >= Z_HI_INT32) ml_z_raise_overflow(); + } + } + return caml_copy_int32(x); + } +} + +CAMLprim value ml_z_to_int64(value v) +{ + int64_t x = 0; + Z_DECL(v); + Z_MARK_OP; + Z_CHECK(v); + if (Is_long(v)) return caml_copy_int64(Long_val(v)); + Z_MARK_SLOW; + Z_ARG(v); + switch (size_v) { + case 0: x = 0; break; + case 1: x = ptr_v[0]; break; +#ifndef ARCH_SIXTYFOUR + case 2: x = ptr_v[0] | ((uint64_t)ptr_v[1] << 32); break; +#endif + default: ml_z_raise_overflow(); break; + } + if (sign_v) { + if ((uint64_t)x > Z_HI_INT64) ml_z_raise_overflow(); + x = -x; + } + else { + if ((uint64_t)x >= Z_HI_INT64) ml_z_raise_overflow(); + } + return caml_copy_int64(x); +} + +/* XXX: characters that do not belong to the format are ignored, this departs + from the classic printf behavior (it copies them in the output) + */ +CAMLprim value ml_z_format(value f, value v) +{ + CAMLparam2(f,v); + Z_DECL(v); + const char tab[2][16] = + { { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }, + { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' } }; + char* buf, *dst; + mp_size_t i, size_dst, max_size; + value r; + const char* fmt = String_val(f); + int base = 10; /* base */ + int cas = 0; /* uppercase X / lowercase x */ + int width = 0; + int alt = 0; /* alternate # */ + int dir = 0; /* right / left adjusted */ + char sign = 0; /* sign char */ + char pad = ' '; /* padding char */ + char *prefix = ""; + Z_MARK_OP; + Z_CHECK(v); + Z_ARG(v); + Z_MARK_SLOW; + + /* parse format */ + while (*fmt == '%') fmt++; + for (; ; fmt++) { + if (*fmt == '#') alt = 1; + else if (*fmt == '0') pad = '0'; + else if (*fmt == '-') dir = 1; + else if (*fmt == ' ' || *fmt == '+') sign = *fmt; + else break; + } + if (sign_v) sign = '-'; + for (;*fmt>='0' && *fmt<='9';fmt++) + width = 10*width + *fmt-'0'; + switch (*fmt) { + case 'i': case 'd': case 'u': break; + case 'b': base = 2; if (alt) prefix = "0b"; break; + case 'o': base = 8; if (alt) prefix = "0o"; break; + case 'x': base = 16; if (alt) prefix = "0x"; cas = 1; break; + case 'X': base = 16; if (alt) prefix = "0X"; break; + default: caml_invalid_argument("Z.format: invalid format"); + } + if (dir) pad = ' '; + /* get digits */ + /* we need space for sign + prefix + digits + 1 + padding + terminal 0 */ + max_size = 1 + 2 + Z_LIMB_BITS * size_v + 1 + 2 * width + 1; + buf = (char*) malloc(max_size); + dst = buf + 1 + 2 + width; + if (!size_v) { + size_dst = 1; + *dst = '0'; + } + else { + mp_limb_t* copy_v = ml_z_dup_limb(ptr_v, size_v); + size_dst = mpn_get_str((unsigned char*)dst, base, copy_v, size_v); + if (dst + size_dst >= buf + max_size) + caml_failwith("Z.format: internal error"); + free(copy_v); + while (size_dst && !*dst) { dst++; size_dst--; } + for (i = 0; i < size_dst; i++) + dst[i] = tab[cas][ (int) dst[i] ]; + } + /* add prefix, sign & padding */ + if (pad == ' ') { + if (dir) { + /* left alignment */ + for (i = strlen(prefix); i > 0; i--, size_dst++) + *(--dst) = prefix[i-1]; + if (sign) { *(--dst) = sign; size_dst++; } + for (; size_dst < width; size_dst++) + dst[size_dst] = pad; + } + else { + /* right alignment, space padding */ + for (i = strlen(prefix); i > 0; i--, size_dst++) + *(--dst) = prefix[i-1]; + if (sign) { *(--dst) = sign; size_dst++; } + for (; size_dst < width; size_dst++) *(--dst) = pad; + } + } + else { + /* right alignment, non-space padding */ + width -= strlen(prefix) + (sign ? 1 : 0); + for (; size_dst < width; size_dst++) *(--dst) = pad; + for (i = strlen(prefix); i > 0; i--, size_dst++) + *(--dst) = prefix[i-1]; + if (sign) { *(--dst) = sign; size_dst++; } + } + dst[size_dst] = 0; + if (dst < buf || dst + size_dst >= buf + max_size) + caml_failwith("Z.format: internal error"); + r = caml_copy_string(dst); + free(buf); + CAMLreturn(r); +} + +#ifdef ARCH_SIXTYFOUR +#define BITS_PER_WORD 64 +#else +#define BITS_PER_WORD 32 +#endif + +CAMLprim value ml_z_extract(value arg, value off, value len) +{ + intnat o, l, x; + mp_size_t sz, c1, c2, csz, i; + mp_limb_t cr; + value r; + Z_DECL(arg); + Z_MARK_OP; + MAYBE_UNUSED x; + o = Long_val(off); + l = Long_val(len); + if (o < 0) caml_invalid_argument("Z.extract: negative bit offset"); + if (l <= 0) caml_invalid_argument("Z.extract: nonpositive bit length"); +#if Z_USE_NATINT + /* Fast path */ + if (Is_long(arg)) { + x = Long_val(arg); + /* Shift away low "o" bits. If "o" too big, just replicate sign bit. */ + if (o >= BITS_PER_WORD) o = BITS_PER_WORD - 1; + x = x >> o; + /* Extract "l" low bits, if "l" is small enough */ + if (l < BITS_PER_WORD - 1) { + x = x & (((intnat)1 << l) - 1); + return Val_long(x); + } else { + /* If x >= 0, the extraction of "l" low bits keeps x unchanged. */ + if (x >= 0) return Val_long(x); + /* If x < 0, fall through slow path */ + } + } +#endif + Z_MARK_SLOW; + { + CAMLparam1(arg); + Z_ARG(arg); + sz = (l + Z_LIMB_BITS - 1) / Z_LIMB_BITS; + r = ml_z_alloc(sz + 1); + Z_REFRESH(arg); + c1 = o / Z_LIMB_BITS; + c2 = o % Z_LIMB_BITS; + /* shift or copy */ + csz = size_arg - c1; + if (csz > sz + 1) csz = sz + 1; + cr = 0; + if (csz > 0) { + if (c2) cr = mpn_rshift(Z_LIMB(r), ptr_arg + c1, csz, c2); + else ml_z_cpy_limb(Z_LIMB(r), ptr_arg + c1, csz); + } + else csz = 0; + /* 0-pad */ + for (i = csz; i < sz; i++) + Z_LIMB(r)[i] = 0; + /* 2's complement */ + if (sign_arg) { + for (i = 0; i < sz; i++) + Z_LIMB(r)[i] = ~Z_LIMB(r)[i]; + /* carry (cr=0 if all shifted-out bits are 0) */ + for (i = 0; !cr && i < c1 && i < size_arg; i++) + cr = ptr_arg[i]; + if (!cr) mpn_add_1(Z_LIMB(r), Z_LIMB(r), sz, 1); + } + /* mask out high bits */ + l %= Z_LIMB_BITS; + if (l) Z_LIMB(r)[sz-1] &= ((uintnat)(intnat)-1) >> (Z_LIMB_BITS - l); + r = ml_z_reduce(r, sz, 0); + CAMLreturn(r); + } +} + +/* NOTE: the sign is not stored */ +CAMLprim value ml_z_to_bits(value arg) +{ + CAMLparam1(arg); + CAMLlocal1(r); + Z_DECL(arg); + mp_size_t i; + unsigned char* p; + Z_MARK_OP; + Z_MARK_SLOW; + Z_ARG(arg); + r = caml_alloc_string(size_arg * sizeof(mp_limb_t)); + Z_REFRESH(arg); + p = (unsigned char*) String_val(r); + memset(p, 0, size_arg * sizeof(mp_limb_t)); + for (i = 0; i < size_arg; i++) { + mp_limb_t x = ptr_arg[i]; + *(p++) = x; + *(p++) = x >> 8; + *(p++) = x >> 16; + *(p++) = x >> 24; +#ifdef ARCH_SIXTYFOUR + *(p++) = x >> 32; + *(p++) = x >> 40; + *(p++) = x >> 48; + *(p++) = x >> 56; +#endif + } + CAMLreturn(r); +} + +CAMLprim value ml_z_of_bits(value arg) +{ + CAMLparam1(arg); + CAMLlocal1(r); + mp_size_t sz, szw; + mp_size_t i = 0; + mp_limb_t x; + const unsigned char* p; + Z_MARK_OP; + Z_MARK_SLOW; + sz = caml_string_length(arg); + szw = (sz + sizeof(mp_limb_t) - 1) / sizeof(mp_limb_t); + r = ml_z_alloc(szw); + p = (const unsigned char*) String_val(arg); + /* all limbs but last */ + if (szw > 1) { + for (; i < szw - 1; i++) { + x = *(p++); + x |= ((mp_limb_t) *(p++)) << 8; + x |= ((mp_limb_t) *(p++)) << 16; + x |= ((mp_limb_t) *(p++)) << 24; +#ifdef ARCH_SIXTYFOUR + x |= ((mp_limb_t) *(p++)) << 32; + x |= ((mp_limb_t) *(p++)) << 40; + x |= ((mp_limb_t) *(p++)) << 48; + x |= ((mp_limb_t) *(p++)) << 56; +#endif + Z_LIMB(r)[i] = x; + } + sz -= i * sizeof(mp_limb_t); + } + /* last limb */ + if (sz > 0) { + x = *(p++); + if (sz > 1) x |= ((mp_limb_t) *(p++)) << 8; + if (sz > 2) x |= ((mp_limb_t) *(p++)) << 16; + if (sz > 3) x |= ((mp_limb_t) *(p++)) << 24; +#ifdef ARCH_SIXTYFOUR + if (sz > 4) x |= ((mp_limb_t) *(p++)) << 32; + if (sz > 5) x |= ((mp_limb_t) *(p++)) << 40; + if (sz > 6) x |= ((mp_limb_t) *(p++)) << 48; + if (sz > 7) x |= ((mp_limb_t) *(p++)) << 56; +#endif + Z_LIMB(r)[i] = x; + } + r = ml_z_reduce(r, szw, 0); + Z_CHECK(r); + CAMLreturn(r); +} + +/*--------------------------------------------------- + TESTS AND COMPARISONS + ---------------------------------------------------*/ + +CAMLprim value ml_z_compare(value arg1, value arg2) +{ + int r; + Z_DECL(arg1); Z_DECL(arg2); + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH + /* Value-equal small integers are equal. + Pointer-equal big integers are equal as well. */ + if (arg1 == arg2) return Val_long(0); + if (Is_long(arg2)) { + if (Is_long(arg1)) { + return arg1 > arg2 ? Val_long(1) : Val_long(-1); + } else { + /* Either arg1 is positive and arg1 > Z_MAX_INT >= arg2 -> result +1 + or arg1 is negative and arg1 < Z_MIN_INT <= arg2 -> result -1 */ + return Z_SIGN(arg1) ? Val_long(-1) : Val_long(1); + } + } + else if (Is_long(arg1)) { + /* Either arg2 is positive and arg2 > Z_MAX_INT >= arg1 -> result -1 + or arg2 is negative and arg2 < Z_MIN_INT <= arg1 -> result +1 */ + return Z_SIGN(arg2) ? Val_long(1) : Val_long(-1); + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + Z_ARG(arg1); + Z_ARG(arg2); + r = 0; + if (sign_arg1 != sign_arg2) r = 1; + else if (size_arg1 > size_arg2) r = 1; + else if (size_arg1 < size_arg2) r = -1; + else { + mp_size_t i; + for (i = size_arg1 - 1; i >= 0; i--) { + if (ptr_arg1[i] > ptr_arg2[i]) { r = 1; break; } + if (ptr_arg1[i] < ptr_arg2[i]) { r = -1; break; } + } + } + if (sign_arg1) r = -r; + return Val_long(r); +} + +CAMLprim value ml_z_equal(value arg1, value arg2) +{ + mp_size_t i; + Z_DECL(arg1); Z_DECL(arg2); + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH + /* Value-equal small integers are equal. + Pointer-equal big integers are equal as well. */ + if (arg1 == arg2) return Val_true; + /* If both arg1 and arg2 are small integers but failed the equality + test above, they are different. + If one of arg1/arg2 is a small integer and the other is a big integer, + they are different: one is in the range [Z_MIN_INT,Z_MAX_INT] + and the other is outside this range. */ + if (Is_long(arg2) || Is_long(arg1)) return Val_false; +#endif + /* mpn_ version */ + Z_MARK_SLOW; + Z_ARG(arg1); + Z_ARG(arg2); + if (sign_arg1 != sign_arg2 || size_arg1 != size_arg2) return Val_false; + for (i = 0; i < size_arg1; i++) + if (ptr_arg1[i] != ptr_arg2[i]) return Val_false; + return Val_true; +} + +int ml_z_sgn(value arg) +{ + if (Is_long(arg)) { + if (arg > Val_long(0)) return 1; + else if (arg < Val_long(0)) return -1; + else return 0; + } + else { + Z_MARK_SLOW; +#if !Z_USE_NATINT + /* In "use natint" mode, zero is a small integer, treated above */ + if (!Z_SIZE(arg)) return 0; +#endif + if (Z_SIGN(arg)) return -1; else return 1; + } +} +CAMLprim value ml_z_sign(value arg) +{ + Z_MARK_OP; + Z_CHECK(arg); + return Val_long(ml_z_sgn(arg)); +} + +CAMLprim value ml_z_fits_int(value v) +{ + intnat x; + Z_DECL(v); + Z_MARK_OP; + Z_CHECK(v); + if (Is_long(v)) return Val_true; + Z_MARK_SLOW; + Z_ARG(v); + if (size_v > 1) return Val_false; + if (!size_v) return Val_true; + x = *ptr_v; + if (sign_v) { + if ((uintnat)x > Z_HI_INT) return Val_false; + } + else { + if ((uintnat)x >= Z_HI_INT) return Val_false; + } + return Val_true; +} + +CAMLprim value ml_z_fits_nativeint(value v) +{ + intnat x; + Z_DECL(v); + Z_MARK_OP; + Z_CHECK(v); + if (Is_long(v)) return Val_true; + Z_MARK_SLOW; + Z_ARG(v); + if (size_v > 1) return Val_false; + if (!size_v) return Val_true; + x = *ptr_v; + if (sign_v) { + if ((uintnat)x > Z_HI_INTNAT) return Val_false; + } + else { + if ((uintnat)x >= Z_HI_INTNAT) return Val_false; + } + return Val_true; +} + +CAMLprim value ml_z_fits_int32(value v) +{ + intnat x; + Z_MARK_OP; + Z_CHECK(v); + if (Is_long(v)) { +#ifdef ARCH_SIXTYFOUR + x = Long_val(v); + if (x >= (intnat)Z_HI_INT32 || x < -(intnat)Z_HI_INT32) + return Val_false; +#endif + return Val_true; + } + else { + Z_DECL(v); + Z_MARK_SLOW; + Z_ARG(v); + if (size_v > 1) return Val_false; + if (!size_v) return Val_true; + x = *ptr_v; + if (sign_v) { + if ((uintnat)x > Z_HI_INT32) return Val_false; + } + else { + if ((uintnat)x >= Z_HI_INT32) return Val_false; + } + return Val_true; + } +} + +CAMLprim value ml_z_fits_int64(value v) +{ + int64_t x; + Z_DECL(v); + Z_MARK_OP; + Z_CHECK(v); + if (Is_long(v)) return Val_true; + Z_MARK_SLOW; + Z_ARG(v); + switch (size_v) { + case 0: return Val_true; + case 1: x = ptr_v[0]; break; +#ifndef ARCH_SIXTYFOUR + case 2: x = ptr_v[0] | ((uint64_t)ptr_v[1] << 32); break; +#endif + default: return Val_false; + } + if (sign_v) { + if ((uint64_t)x > Z_HI_INT64) return Val_false; + } + else { + if ((uint64_t)x >= Z_HI_INT64) return Val_false; + } + return Val_true; +} + +CAMLprim value ml_z_size(value v) +{ + Z_MARK_OP; + if (Is_long(v)) return Val_long(1); + else return Val_long(Z_SIZE(v)); +} + + +/*--------------------------------------------------- + ARITHMETIC OPERATORS + ---------------------------------------------------*/ + +CAMLprim value ml_z_neg(value arg) +{ + Z_MARK_OP; + Z_CHECK(arg); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg)) { + /* fast path */ + if (arg > Val_long(Z_MIN_INT)) return 2 - arg; + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + { + CAMLparam1(arg); + value r; + Z_DECL(arg); + Z_ARG(arg); + r = ml_z_alloc(size_arg); + Z_REFRESH(arg); + ml_z_cpy_limb(Z_LIMB(r), ptr_arg, size_arg); + r = ml_z_reduce(r, size_arg, sign_arg ^ Z_SIGN_MASK); + Z_CHECK(r); + CAMLreturn(r); + } +} + +CAMLprim value ml_z_abs(value arg) +{ + Z_MARK_OP; + Z_CHECK(arg); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg)) { + /* fast path */ + if (arg >= Val_long(0)) return arg; + if (arg > Val_long(Z_MIN_INT)) return 2 - arg; + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + { + CAMLparam1(arg); + Z_DECL(arg); + value r; + Z_ARG(arg); + if (sign_arg) { + r = ml_z_alloc(size_arg); + Z_REFRESH(arg); + ml_z_cpy_limb(Z_LIMB(r), ptr_arg, size_arg); + r = ml_z_reduce(r, size_arg, 0); + Z_CHECK(r); + } + else r = arg; + CAMLreturn(r); + } +} + +/* helper function for add/sub */ +static value ml_z_addsub(value arg1, value arg2, intnat sign) +{ + CAMLparam2(arg1,arg2); + Z_DECL(arg1); Z_DECL(arg2); + value r; + mp_limb_t c; + Z_ARG(arg1); + Z_ARG(arg2); + sign_arg2 ^= sign; + if (!size_arg2) r = arg1; + else if (!size_arg1) { + if (sign) { + /* negation */ + r = ml_z_alloc(size_arg2); + Z_REFRESH(arg2); + ml_z_cpy_limb(Z_LIMB(r), ptr_arg2, size_arg2); + r = ml_z_reduce(r, size_arg2, sign_arg2); + } + else r = arg2; + } + else if (sign_arg1 == sign_arg2) { + /* addition */ + if (size_arg1 >= size_arg2) { + r = ml_z_alloc(size_arg1 + 1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + c = mpn_add(Z_LIMB(r), ptr_arg1, size_arg1, ptr_arg2, size_arg2); + Z_LIMB(r)[size_arg1] = c; + r = ml_z_reduce(r, size_arg1+1, sign_arg1); + } + else { + r = ml_z_alloc(size_arg2 + 1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + c = mpn_add(Z_LIMB(r), ptr_arg2, size_arg2, ptr_arg1, size_arg1); + Z_LIMB(r)[size_arg2] = c; + r = ml_z_reduce(r, size_arg2+1, sign_arg1); + } + } + else { + /* subtraction */ + if (size_arg1 > size_arg2) { + r = ml_z_alloc(size_arg1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub(Z_LIMB(r), ptr_arg1, size_arg1, ptr_arg2, size_arg2); + r = ml_z_reduce(r, size_arg1, sign_arg1); + } + else if (size_arg1 < size_arg2) { + r = ml_z_alloc(size_arg2); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub(Z_LIMB(r), ptr_arg2, size_arg2, ptr_arg1, size_arg1); + r = ml_z_reduce(r, size_arg2, sign_arg2); + } + else { + int cmp = mpn_cmp(ptr_arg1, ptr_arg2, size_arg1); + if (cmp > 0) { + r = ml_z_alloc(size_arg1+1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub_n(Z_LIMB(r), ptr_arg1, ptr_arg2, size_arg1); + r = ml_z_reduce(r, size_arg1, sign_arg1); + } + else if (cmp < 0) { + r = ml_z_alloc(size_arg1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub_n(Z_LIMB(r), ptr_arg2, ptr_arg1, size_arg1); + r = ml_z_reduce(r, size_arg1, sign_arg2); + } + else r = Val_long(0); + } + } + Z_CHECK(r); + CAMLreturn(r); +} + +CAMLprim value ml_z_add(value arg1, value arg2) +{ + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + intnat a1 = Long_val(arg1); + intnat a2 = Long_val(arg2); + intnat v = a1 + a2; + if (Z_FITS_INT(v)) return Val_long(v); + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + return ml_z_addsub(arg1, arg2, 0); +} + +CAMLprim value ml_z_sub(value arg1, value arg2) +{ + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + intnat a1 = Long_val(arg1); + intnat a2 = Long_val(arg2); + intnat v = a1 - a2; + if (Z_FITS_INT(v)) return Val_long(v); + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + return ml_z_addsub(arg1, arg2, Z_SIGN_MASK); +} + +CAMLprim value ml_z_mul_overflows(value vx, value vy) +{ +#if HAS_BUILTIN(__builtin_mul_overflow) || __GNUC__ >= 5 + intnat z; + return Val_bool(__builtin_mul_overflow(vx - 1, vy >> 1, &z)); +#elif defined(__GNUC__) && defined(__x86_64__) + intnat z; + unsigned char o; + asm("imulq %1, %3; seto %0" + : "=q" (o), "=r" (z) + : "1" (vx - 1), "r" (vy >> 1) + : "cc"); + return Val_int(o); +#elif defined(_MSC_VER) && defined(_M_X64) + intnat hi, lo; + lo = _mul128(vx - 1, vy >> 1, &hi); + return Val_bool(hi != lo >> 63); +#else + /* Portable C code */ + intnat x = Long_val(vx); + intnat y = Long_val(vy); + /* Quick approximate check for small values of x and y. + Also catches the cases x = 0, x = 1, y = 0, y = 1. */ + if (Z_FITS_HINT(x)) { + if (Z_FITS_HINT(y)) return Val_false; + if ((uintnat) x <= 1) return Val_false; + } + if ((uintnat) y <= 1) return Val_false; +#if 1 + /* Give up at this point; we'll go through the general case in ml_z_mul */ + return Val_true; +#else + /* The product x*y is representable as an unboxed integer if + it is in [Z_MIN_INT, Z_MAX_INT]. + x >= 0 y >= 0: x*y >= 0 and x*y <= Z_MAX_INT <-> y <= Z_MAX_INT / x + x < 0 y >= 0: x*y <= 0 and x*y >= Z_MIN_INT <-> x >= Z_MIN_INT / y + x >= 0 y < 0 : x*y <= 0 and x*y >= Z_MIN_INT <-> y >= Z_MIN_INT / x + x < 0 y < 0 : x*y >= 0 and x*y <= Z_MAX_INT <-> x >= Z_MAX_INT / y */ + if (x >= 0) + if (y >= 0) + return Val_bool(y > Z_MAX_INT / x); + else + return Val_bool(y < Z_MIN_INT / x); + else + if (y >= 0) + return Val_bool(x < Z_MIN_INT / y); + else + return Val_bool(x < Z_MAX_INT / y); +#endif +#endif +} + +CAMLprim value ml_z_mul(value arg1, value arg2) +{ + Z_DECL(arg1); Z_DECL(arg2); + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg1) && Is_long(arg2) && + ml_z_mul_overflows(arg1, arg2) == Val_false) { + return Val_long(Long_val(a1) * Long_val(a2)); + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + Z_ARG(arg1); + Z_ARG(arg2); + if (!size_arg1 || !size_arg2) return Val_long(0); + { + CAMLparam2(arg1,arg2); + value r = ml_z_alloc(size_arg1 + size_arg2); + mp_limb_t c; + Z_REFRESH(arg1); + Z_REFRESH(arg2); + if (size_arg2 == 1) { + c = mpn_mul_1(Z_LIMB(r), ptr_arg1, size_arg1, *ptr_arg2); + Z_LIMB(r)[size_arg1] = c; + } + else if (size_arg1 == 1) { + c = mpn_mul_1(Z_LIMB(r), ptr_arg2, size_arg2, *ptr_arg1); + Z_LIMB(r)[size_arg2] = c; + } +#if HAVE_NATIVE_mpn_mul_2 /* untested */ + else if (size_arg2 == 2) { + c = mpn_mul_2(Z_LIMB(r), ptr_arg1, size_arg1, ptr_arg2); + Z_LIMB(r)[size_arg1 + 1] = c; + } + else if (size_arg1 == 2) { + c = mpn_mul_2(Z_LIMB(r), ptr_arg2, size_arg2, ptr_arg1); + Z_LIMB(r)[size_arg2 + 1] = c; + } +#endif + else if (size_arg1 > size_arg2) + mpn_mul(Z_LIMB(r), ptr_arg1, size_arg1, ptr_arg2, size_arg2); + else if (size_arg1 < size_arg2) + mpn_mul(Z_LIMB(r), ptr_arg2, size_arg2, ptr_arg1, size_arg1); +/* older GMP don't have mpn_sqr, so we make the optimisation optional */ +#ifdef mpn_sqr + else if (ptr_arg1 == ptr_arg2) + mpn_sqr(Z_LIMB(r), ptr_arg1, size_arg1); +#endif + else + mpn_mul_n(Z_LIMB(r), ptr_arg1, ptr_arg2, size_arg1); + r = ml_z_reduce(r, size_arg1 + size_arg2, sign_arg1^sign_arg2); + Z_CHECK(r); + CAMLreturn(r); + } +} + +/* helper function for division: returns truncated quotient and remainder */ +static value ml_z_tdiv_qr(value arg1, value arg2) +{ + CAMLparam2(arg1, arg2); + CAMLlocal3(q, r, p); + Z_DECL(arg1); Z_DECL(arg2); + Z_ARG(arg1); Z_ARG(arg2); + if (!size_arg2) ml_z_raise_divide_by_zero(); + if (size_arg1 >= size_arg2) { + q = ml_z_alloc(size_arg1 - size_arg2 + 1); + r = ml_z_alloc(size_arg2); + Z_REFRESH(arg1); Z_REFRESH(arg2); + mpn_tdiv_qr(Z_LIMB(q), Z_LIMB(r), 0, + ptr_arg1, size_arg1, ptr_arg2, size_arg2); + q = ml_z_reduce(q, size_arg1 - size_arg2 + 1, sign_arg1 ^ sign_arg2); + r = ml_z_reduce(r, size_arg2, sign_arg1); + } + else { + q = Val_long(0); + r = arg1; + } + Z_CHECK(q); + Z_CHECK(r); + p = caml_alloc_small(2, 0); + Field(p,0) = q; + Field(p,1) = r; + CAMLreturn(p); +} + +CAMLprim value ml_z_div_rem(value arg1, value arg2) +{ + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + intnat a1 = Long_val(arg1); + intnat a2 = Long_val(arg2); + intnat q, r; + if (!a2) ml_z_raise_divide_by_zero(); + q = a1 / a2; + r = a1 % a2; + if (Z_FITS_INT(q) && Z_FITS_INT(r)) { + value p = caml_alloc_small(2, 0); + Field(p,0) = Val_long(q); + Field(p,1) = Val_long(r); + return p; + } + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + return ml_z_tdiv_qr(arg1, arg2); +} + +CAMLprim value ml_z_div(value arg1, value arg2) +{ + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + intnat a1 = Long_val(arg1); + intnat a2 = Long_val(arg2); + intnat q; + if (!a2) ml_z_raise_divide_by_zero(); + q = a1 / a2; + if (Z_FITS_INT(q)) return Val_long(q); + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + return Field(ml_z_tdiv_qr(arg1, arg2), 0); +} + +CAMLprim value ml_z_rem(value arg1, value arg2) +{ + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + intnat a1 = Long_val(arg1); + intnat a2 = Long_val(arg2); + intnat r; + if (!a2) ml_z_raise_divide_by_zero(); + r = a1 % a2; + if (Z_FITS_INT(r)) return Val_long(r); + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + return Field(ml_z_tdiv_qr(arg1, arg2), 1); +} + +/* helper function for division with rounding towards +oo / -oo */ +static value ml_z_rdiv(value arg1, value arg2, intnat dir) +{ + CAMLparam2(arg1, arg2); + CAMLlocal2(q, r); + Z_DECL(arg1); Z_DECL(arg2); + Z_ARG(arg1); Z_ARG(arg2); + if (!size_arg2) ml_z_raise_divide_by_zero(); + if (size_arg1 >= size_arg2) { + mp_limb_t c = 0; + q = ml_z_alloc(size_arg1 - size_arg2 + 2); + r = ml_z_alloc(size_arg2); + Z_REFRESH(arg1); Z_REFRESH(arg2); + mpn_tdiv_qr(Z_LIMB(q), Z_LIMB(r), 0, + ptr_arg1, size_arg1, ptr_arg2, size_arg2); + if ((sign_arg1 ^ sign_arg2) == dir) { + /* outward rounding */ + mp_size_t sz; + for (sz = size_arg2; sz > 0 && !Z_LIMB(r)[sz-1]; sz--); + if (sz) { + /* r != 0: needs adjustment */ + c = mpn_add_1(Z_LIMB(q), Z_LIMB(q), size_arg1 - size_arg2 + 1, 1); + } + } + Z_LIMB(q)[size_arg1 - size_arg2 + 1] = c; + q = ml_z_reduce(q, size_arg1 - size_arg2 + 2, sign_arg1 ^ sign_arg2); + } + else { + if (size_arg1 && (sign_arg1 ^ sign_arg2) == dir) { + if (dir) q = Val_long(-1); + else q = Val_long(1); + } + else q = Val_long(0); + } + Z_CHECK(q); + CAMLreturn(q); +} + +CAMLprim value ml_z_cdiv(value arg1, value arg2) +{ + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + intnat a1 = Long_val(arg1); + intnat a2 = Long_val(arg2); + intnat q; + if (!a2) ml_z_raise_divide_by_zero(); + /* adjust to round towards +oo */ + if (a1 > 0 && a2 > 0) a1 += a2-1; + else if (a1 < 0 && a2 < 0) a1 += a2+1; + q = a1 / a2; + if (Z_FITS_INT(q)) return Val_long(q); + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + return ml_z_rdiv(arg1, arg2, 0); +} + +CAMLprim value ml_z_fdiv(value arg1, value arg2) +{ + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + intnat a1 = Long_val(arg1); + intnat a2 = Long_val(arg2); + intnat q; + if (!a2) ml_z_raise_divide_by_zero(); + /* adjust to round towards -oo */ + if (a1 < 0 && a2 > 0) a1 -= a2-1; + else if (a1 > 0 && a2 < 0) a1 -= a2+1; + q = a1 / a2; + if (Z_FITS_INT(q)) return Val_long(q); + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + return ml_z_rdiv(arg1, arg2, Z_SIGN_MASK); +} + +/* helper function for succ / pred */ +static value ml_z_succpred(value arg, intnat sign) +{ + CAMLparam1(arg); + Z_DECL(arg); + value r; + Z_ARG(arg); + r = ml_z_alloc(size_arg + 1); + Z_REFRESH(arg); + if (!size_arg) { + Z_LIMB(r)[0] = 1; + r = ml_z_reduce(r, 1, sign); + } + else if (sign_arg == sign) { + /* add 1 */ + mp_limb_t c = mpn_add_1(Z_LIMB(r), ptr_arg, size_arg, 1); + Z_LIMB(r)[size_arg] = c; + r = ml_z_reduce(r, size_arg + 1, sign_arg); + } + else { + /* subtract 1 */ + mpn_sub_1(Z_LIMB(r), ptr_arg, size_arg, 1); + r = ml_z_reduce(r, size_arg, sign_arg); + } + Z_CHECK(r); + CAMLreturn(r); +} + +CAMLprim value ml_z_succ(value arg) +{ + Z_MARK_OP; + Z_CHECK(arg); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg)) { + /* fast path */ + if (arg < Val_long(Z_MAX_INT)) return arg + 2; + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + return ml_z_succpred(arg, 0); +} + +CAMLprim value ml_z_pred(value arg) +{ + Z_MARK_OP; + Z_CHECK(arg); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg)) { + /* fast path */ + if (arg > Val_long(Z_MIN_INT)) return arg - 2; + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + return ml_z_succpred(arg, Z_SIGN_MASK); +} + +CAMLprim value ml_z_sqrt(value arg) +{ + /* XXX TODO: fast path */ + CAMLparam1(arg); + Z_DECL(arg); + value r; + Z_MARK_OP; + Z_MARK_SLOW; + Z_CHECK(arg); + Z_ARG(arg); + if (sign_arg) + caml_invalid_argument("Z.sqrt: square root of a negative number"); + if (size_arg) { + mp_size_t sz = (size_arg + 1) / 2; + r = ml_z_alloc(sz); + Z_REFRESH(arg); + mpn_sqrtrem(Z_LIMB(r), NULL, ptr_arg, size_arg); + r = ml_z_reduce(r, sz, 0); + } + else r = Val_long(0); + Z_CHECK(r); + CAMLreturn(r); +} + +CAMLprim value ml_z_sqrt_rem(value arg) +{ + CAMLparam1(arg); + CAMLlocal3(r, s, p); + Z_DECL(arg); + /* XXX TODO: fast path */ + Z_MARK_OP; + Z_MARK_SLOW; + Z_CHECK(arg); + Z_ARG(arg); + if (sign_arg) + caml_invalid_argument("Z.sqrt_rem: square root of a negative number"); + if (size_arg) { + mp_size_t sz = (size_arg + 1) / 2, sz2; + r = ml_z_alloc(sz); + s = ml_z_alloc(size_arg); + Z_REFRESH(arg); + sz2 = mpn_sqrtrem(Z_LIMB(r), Z_LIMB(s), ptr_arg, size_arg); + r = ml_z_reduce(r, sz, 0); + s = ml_z_reduce(s, sz2, 0); + } + else r = s = Val_long(0); + Z_CHECK(r); + Z_CHECK(s); + p = caml_alloc_small(2, 0); + Field(p,0) = r; + Field(p,1) = s; + CAMLreturn(p); +} + +CAMLprim value ml_z_gcd(value arg1, value arg2) +{ + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + intnat a1 = Long_val(arg1); + intnat a2 = Long_val(arg2); + if (a1 < 0) a1 = -a1; + if (a2 < 0) a2 = -a2; + if (a1 < a2) { intnat t = a1; a1 = a2; a2 = t; } + while (a2) { + intnat r = a1 % a2; + a1 = a2; a2 = r; + } + /* If arg1 = arg2 = min_int, the result a1 is -min_int, not representable + as a tagged integer; fall through the slow case, then. */ + if (a1 <= Z_MAX_INT) return Val_long(a1); + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + { + CAMLparam2(arg1, arg2); + CAMLlocal3(r, tmp1, tmp2); + mp_size_t sz, pos1, pos2, limb1, limb2, bit1, bit2, pos, limb, bit, i; + Z_DECL(arg1); Z_DECL(arg2); + Z_ARG(arg1); Z_ARG(arg2); + if (!size_arg1) r = sign_arg2 ? ml_z_neg(arg2) : arg2; + else if (!size_arg2) r = sign_arg1 ? ml_z_neg(arg1) : arg1; + else { + /* copy args to tmp storage & remove lower 0 bits */ + pos1 = mpn_scan1(ptr_arg1, 0); + pos2 = mpn_scan1(ptr_arg2, 0); + limb1 = pos1 / Z_LIMB_BITS; + limb2 = pos2 / Z_LIMB_BITS; + bit1 = pos1 % Z_LIMB_BITS; + bit2 = pos2 % Z_LIMB_BITS; + size_arg1 -= limb1; + size_arg2 -= limb2; + tmp1 = ml_z_alloc(size_arg1 + 1); + tmp2 = ml_z_alloc(size_arg2 + 1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + if (bit1) { + mpn_rshift(Z_LIMB(tmp1), ptr_arg1 + limb1, size_arg1, bit1); + if (!Z_LIMB(tmp1)[size_arg1-1]) size_arg1--; + } + else ml_z_cpy_limb(Z_LIMB(tmp1), ptr_arg1 + limb1, size_arg1); + if (bit2) { + mpn_rshift(Z_LIMB(tmp2), ptr_arg2 + limb2, size_arg2, bit2); + if (!Z_LIMB(tmp2)[size_arg2-1]) size_arg2--; + } + else ml_z_cpy_limb(Z_LIMB(tmp2), ptr_arg2 + limb2, size_arg2); + /* compute gcd of 2^pos1 & 2^pos2 */ + pos = (pos1 <= pos2) ? pos1 : pos2; + limb = pos / Z_LIMB_BITS; + bit = pos % Z_LIMB_BITS; + /* compute gcd of arg1 & arg2 without lower 0 bits */ + /* second argument must have less bits than first */ + if ((size_arg1 > size_arg2) || + ((size_arg1 == size_arg2) && + (Z_LIMB(tmp1)[size_arg1 - 1] >= Z_LIMB(tmp2)[size_arg1 - 1]))) { + r = ml_z_alloc(size_arg2 + limb + 1); + sz = mpn_gcd(Z_LIMB(r) + limb, Z_LIMB(tmp1), size_arg1, Z_LIMB(tmp2), size_arg2); + } + else { + r = ml_z_alloc(size_arg1 + limb + 1); + sz = mpn_gcd(Z_LIMB(r) + limb, Z_LIMB(tmp2), size_arg2, Z_LIMB(tmp1), size_arg1); + } + /* glue the two results */ + for (i = 0; i < limb; i++) + Z_LIMB(r)[i] = 0; + Z_LIMB(r)[sz + limb] = 0; + if (bit) mpn_lshift(Z_LIMB(r) + limb, Z_LIMB(r) + limb, sz + 1, bit); + r = ml_z_reduce(r, limb + sz + 1, 0); + } + Z_CHECK(r); + CAMLreturn(r); + } +} + +/* only computes one cofactor */ +CAMLprim value ml_z_gcdext_intern(value arg1, value arg2) +{ + /* XXX TODO: fast path */ + CAMLparam2(arg1, arg2); + CAMLlocal5(r, res_arg1, res_arg2, s, p); + Z_DECL(arg1); Z_DECL(arg2); + mp_size_t sz, sn; + Z_MARK_OP; + Z_MARK_SLOW; + Z_CHECK(arg1); Z_CHECK(arg2); + Z_ARG(arg1); Z_ARG(arg2); + if (!size_arg1 || !size_arg2) ml_z_raise_divide_by_zero(); + /* copy args to tmp storage */ + res_arg1 = ml_z_alloc(size_arg1 + 1); + res_arg2 = ml_z_alloc(size_arg2 + 1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + ml_z_cpy_limb(Z_LIMB(res_arg1), ptr_arg1, size_arg1); + ml_z_cpy_limb(Z_LIMB(res_arg2), ptr_arg2, size_arg2); + /* must have arg1 >= arg2 */ + if ((size_arg1 > size_arg2) || + ((size_arg1 == size_arg2) && + (mpn_cmp(Z_LIMB(res_arg1), Z_LIMB(res_arg2), size_arg1) >= 0))) { + r = ml_z_alloc(size_arg1 + 1); + s = ml_z_alloc(size_arg1 + 1); + sz = mpn_gcdext(Z_LIMB(r), Z_LIMB(s), &sn, + Z_LIMB(res_arg1), size_arg1, Z_LIMB(res_arg2), size_arg2); + p = caml_alloc_small(3, 0); + Field(p,2) = Val_true; + } + else { + r = ml_z_alloc(size_arg2 + 1); + s = ml_z_alloc(size_arg2 + 1); + sz = mpn_gcdext(Z_LIMB(r), Z_LIMB(s), &sn, + Z_LIMB(res_arg2), size_arg2, Z_LIMB(res_arg1), size_arg1); + p = caml_alloc_small(3, 0); + Field(p,2) = Val_false; + sign_arg1 = sign_arg2; + } + /* pack result */ + r = ml_z_reduce(r, sz, 0); + if ((int)sn >= 0) s = ml_z_reduce(s, sn, sign_arg1); + else s = ml_z_reduce(s, -sn, sign_arg1 ^ Z_SIGN_MASK); + Z_CHECK(r); + Z_CHECK(s); + Field(p,0) = r; + Field(p,1) = s; + CAMLreturn(p); +} + + +/*--------------------------------------------------- + BITWISE OPERATORS + ---------------------------------------------------*/ + +CAMLprim value ml_z_logand(value arg1, value arg2) +{ + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + return arg1 & arg2; + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + { + CAMLparam2(arg1,arg2); + value r; + mp_size_t i; + mp_limb_t c; + Z_DECL(arg1); Z_DECL(arg2); + Z_ARG(arg1); Z_ARG(arg2); + /* ensure size_arg1 >= size_arg2 */ + if (size_arg1 < size_arg2) { + mp_size_t sz; + mp_limb_t *p, s; + value a; + sz = size_arg1; size_arg1 = size_arg2; size_arg2 = sz; + p = ptr_arg1; ptr_arg1 = ptr_arg2; ptr_arg2 = p; + s = sign_arg1; sign_arg1 = sign_arg2; sign_arg2 = s; + a = arg1; arg1 = arg2; arg2 = a; + } + if (!size_arg2) r = arg2; + else if (sign_arg1 && sign_arg2) { + /* arg1 < 0, arg2 < 0 => r < 0 */ + r = ml_z_alloc(size_arg1 + 1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub_1(Z_LIMB(r), ptr_arg1, size_arg1, 1); + c = 1; /* carry when decrementing arg2 */ + for (i = 0; i < size_arg2; i++) { + mp_limb_t v = ptr_arg2[i]; + Z_LIMB(r)[i] = Z_LIMB(r)[i] | (v - c); + c = c && !v; + } + c = mpn_add_1(Z_LIMB(r), Z_LIMB(r), size_arg1, 1); + Z_LIMB(r)[size_arg1] = c; + r = ml_z_reduce(r, size_arg1 + 1, Z_SIGN_MASK); + } + else if (sign_arg1) { + /* arg1 < 0, arg2 > 0 => r >= 0 */ + r = ml_z_alloc(size_arg2); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub_1(Z_LIMB(r), ptr_arg1, size_arg2, 1); + for (i = 0; i < size_arg2; i++) + Z_LIMB(r)[i] = (~Z_LIMB(r)[i]) & ptr_arg2[i]; + r = ml_z_reduce(r, size_arg2, 0); + } + else if (sign_arg2) { + /* arg1 > 0, arg2 < 0 => r >= 0 */ + r = ml_z_alloc(size_arg1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub_1(Z_LIMB(r), ptr_arg2, size_arg2, 1); + for (i = 0; i < size_arg2; i++) + Z_LIMB(r)[i] = ptr_arg1[i] & (~Z_LIMB(r)[i]); + for (; i < size_arg1; i++) + Z_LIMB(r)[i] = ptr_arg1[i]; + r = ml_z_reduce(r, size_arg1, 0); + } + else { + /* arg1, arg2 > 0 => r >= 0 */ + r = ml_z_alloc(size_arg2); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + for (i = 0; i < size_arg2; i++) + Z_LIMB(r)[i] = ptr_arg1[i] & ptr_arg2[i]; + r = ml_z_reduce(r, size_arg2, 0); + } + Z_CHECK(r); + CAMLreturn(r); + } +} + +CAMLprim value ml_z_logor(value arg1, value arg2) +{ + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + return arg1 | arg2; + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + { + CAMLparam2(arg1,arg2); + Z_DECL(arg1); Z_DECL(arg2); + mp_size_t i; + mp_limb_t c; + value r; + Z_ARG(arg1); Z_ARG(arg2); + /* ensure size_arg1 >= size_arg2 */ + if (size_arg1 < size_arg2) { + mp_size_t sz; + mp_limb_t *p, s; + value a; + sz = size_arg1; size_arg1 = size_arg2; size_arg2 = sz; + p = ptr_arg1; ptr_arg1 = ptr_arg2; ptr_arg2 = p; + s = sign_arg1; sign_arg1 = sign_arg2; sign_arg2 = s; + a = arg1; arg1 = arg2; arg2 = a; + } + if (!size_arg2) r = arg1; + else if (sign_arg1 && sign_arg2) { + /* arg1 < 0, arg2 < 0 => r < 0 */ + r = ml_z_alloc(size_arg2 + 1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub_1(Z_LIMB(r), ptr_arg1, size_arg2, 1); + c = 1; /* carry when decrementing arg2 */ + for (i = 0; i < size_arg2; i++) { + mp_limb_t v = ptr_arg2[i]; + Z_LIMB(r)[i] = Z_LIMB(r)[i] & (v - c); + c = c && !v; + } + c = mpn_add_1(Z_LIMB(r), Z_LIMB(r), size_arg2, 1); + Z_LIMB(r)[size_arg2] = c; + r = ml_z_reduce(r, size_arg2 + 1, Z_SIGN_MASK); + } + else if (sign_arg1) { + /* arg1 < 0, arg2 > 0 => r < 0 */ + r = ml_z_alloc(size_arg1 + 1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub_1(Z_LIMB(r), ptr_arg1, size_arg1, 1); + for (i = 0; i < size_arg2; i++) + Z_LIMB(r)[i] = Z_LIMB(r)[i] & (~ptr_arg2[i]); + c = mpn_add_1(Z_LIMB(r), Z_LIMB(r), size_arg1, 1); + Z_LIMB(r)[size_arg1] = c; + r = ml_z_reduce(r, size_arg1 + 1, Z_SIGN_MASK); + } + else if (sign_arg2) { + /* arg1 > 0, arg2 < 0 => r < 0*/ + r = ml_z_alloc(size_arg2 + 1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub_1(Z_LIMB(r), ptr_arg2, size_arg2, 1); + for (i = 0; i < size_arg2; i++) + Z_LIMB(r)[i] = (~ptr_arg1[i]) & Z_LIMB(r)[i]; + c = mpn_add_1(Z_LIMB(r), Z_LIMB(r), size_arg2, 1); + Z_LIMB(r)[size_arg2] = c; + r = ml_z_reduce(r, size_arg2 + 1, Z_SIGN_MASK); + } + else { + /* arg1, arg2 > 0 => r > 0 */ + r = ml_z_alloc(size_arg1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + for (i = 0; i < size_arg2; i++) + Z_LIMB(r)[i] = ptr_arg1[i] | ptr_arg2[i]; + for (; i < size_arg1; i++) + Z_LIMB(r)[i] = ptr_arg1[i]; + r = ml_z_reduce(r, size_arg1, 0); + } + Z_CHECK(r); + CAMLreturn(r); + } +} + +CAMLprim value ml_z_logxor(value arg1, value arg2) +{ + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + return (arg1 ^ arg2) | 1; + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + { + CAMLparam2(arg1,arg2); + Z_DECL(arg1); Z_DECL(arg2); + value r; + mp_size_t i; + mp_limb_t c; + Z_ARG(arg1); Z_ARG(arg2); + /* ensure size_arg1 >= size_arg2 */ + if (size_arg1 < size_arg2) { + mp_size_t sz; + mp_limb_t *p, s; + value a; + sz = size_arg1; size_arg1 = size_arg2; size_arg2 = sz; + p = ptr_arg1; ptr_arg1 = ptr_arg2; ptr_arg2 = p; + s = sign_arg1; sign_arg1 = sign_arg2; sign_arg2 = s; + a = arg1; arg1 = arg2; arg2 = a; + } + if (!size_arg2) r = arg1; + else if (sign_arg1 && sign_arg2) { + /* arg1 < 0, arg2 < 0 => r >=0 */ + r = ml_z_alloc(size_arg1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub_1(Z_LIMB(r), ptr_arg1, size_arg1, 1); + c = 1; /* carry when decrementing arg2 */ + for (i = 0; i < size_arg2; i++) { + mp_limb_t v = ptr_arg2[i]; + Z_LIMB(r)[i] = Z_LIMB(r)[i] ^ (v - c); + c = c && !v; + } + r = ml_z_reduce(r, size_arg1, 0); + } + else if (sign_arg1) { + /* arg1 < 0, arg2 > 0 => r < 0 */ + r = ml_z_alloc(size_arg1 + 1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub_1(Z_LIMB(r), ptr_arg1, size_arg1, 1); + for (i = 0; i < size_arg2; i++) + Z_LIMB(r)[i] = Z_LIMB(r)[i] ^ ptr_arg2[i]; + c = mpn_add_1(Z_LIMB(r), Z_LIMB(r), size_arg1, 1); + Z_LIMB(r)[size_arg1] = c; + r = ml_z_reduce(r, size_arg1 + 1, Z_SIGN_MASK); + } + else if (sign_arg2) { + /* arg1 > 0, arg2 < 0 => r < 0 */ + r = ml_z_alloc(size_arg1 + 1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + mpn_sub_1(Z_LIMB(r), ptr_arg2, size_arg2, 1); + for (i = 0; i < size_arg2; i++) + Z_LIMB(r)[i] = ptr_arg1[i] ^ Z_LIMB(r)[i]; + for (; i < size_arg1; i++) + Z_LIMB(r)[i] = ptr_arg1[i]; + c = mpn_add_1(Z_LIMB(r), Z_LIMB(r), size_arg1, 1); + Z_LIMB(r)[size_arg1] = c; + r = ml_z_reduce(r, size_arg1 + 1, Z_SIGN_MASK); + } + else { + /* arg1, arg2 > 0 => r >= 0 */ + r = ml_z_alloc(size_arg1); + Z_REFRESH(arg1); + Z_REFRESH(arg2); + for (i = 0; i < size_arg2; i++) + Z_LIMB(r)[i] = ptr_arg1[i] ^ ptr_arg2[i]; + for (; i < size_arg1; i++) + Z_LIMB(r)[i] = ptr_arg1[i]; + r = ml_z_reduce(r, size_arg1, 0); + } + Z_CHECK(r); + CAMLreturn(r); + } +} + +CAMLprim value ml_z_lognot(value arg) +{ + Z_MARK_OP; + Z_CHECK(arg); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg)) { + /* fast path */ + return (~arg) | 1; + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + { + CAMLparam1(arg); + Z_DECL(arg); + value r; + Z_ARG(arg); + r = ml_z_alloc(size_arg + 1); + Z_REFRESH(arg); + /* compute r = -arg - 1 */ + if (!size_arg) { + /* arg = 0 => r = -1 */ + Z_LIMB(r)[0] = 1; + r = ml_z_reduce(r, 1, Z_SIGN_MASK); + } + else if (sign_arg) { + /* arg < 0, r > 0, |r| = |arg| - 1 */ + mpn_sub_1(Z_LIMB(r), ptr_arg, size_arg, 1); + r = ml_z_reduce(r, size_arg, 0); + } + else { + /* arg > 0, r < 0, |r| = |arg| + 1 */ + mp_limb_t c = mpn_add_1(Z_LIMB(r), ptr_arg, size_arg, 1); + Z_LIMB(r)[size_arg] = c; + r = ml_z_reduce(r, size_arg + 1, Z_SIGN_MASK); + } + Z_CHECK(r); + CAMLreturn(r); + } +} + +CAMLprim value ml_z_shift_left(value arg, value count) +{ + Z_DECL(arg); + intnat c = Long_val(count); + intnat c1, c2; + Z_MARK_OP; + Z_CHECK(arg); + if (c < 0) + caml_invalid_argument("Z.shift_left: count argument must be positive"); + if (!c) return arg; + c1 = c / Z_LIMB_BITS; + c2 = c % Z_LIMB_BITS; +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg) && !c1) { + /* fast path */ + value a = arg - 1; + value r = arg << c2; + if (a == (r >> c2)) return r | 1; + } +#endif + Z_ARG(arg); + if (!size_arg) return Val_long(0); + /* mpn_ version */ + Z_MARK_SLOW; + { + CAMLparam1(arg); + value r; + mp_size_t i; + r = ml_z_alloc(size_arg + c1 + 1); + Z_REFRESH(arg); + /* 0-filled limbs */ + for (i = 0; i < c1; i++) Z_LIMB(r)[i] = 0; + if (c2) { + /* shifted bits */ + mp_limb_t x = mpn_lshift(Z_LIMB(r) + c1, ptr_arg, size_arg, c2); + Z_LIMB(r)[size_arg + c1] = x; + } + else { + /* unshifted copy */ + ml_z_cpy_limb(Z_LIMB(r) + c1, ptr_arg, size_arg); + Z_LIMB(r)[size_arg + c1] = 0; + } + r = ml_z_reduce(r, size_arg + c1 + 1, sign_arg); + Z_CHECK(r); + CAMLreturn(r); + } +} + +CAMLprim value ml_z_shift_right(value arg, value count) +{ + Z_DECL(arg); + intnat c = Long_val(count); + intnat c1, c2; + value r; + Z_MARK_OP; + Z_CHECK(arg); + if (c < 0) + caml_invalid_argument("Z.shift_right: count argument must be positive"); + if (!c) return arg; + c1 = c / Z_LIMB_BITS; + c2 = c % Z_LIMB_BITS; +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg)) { + /* fast path */ + if (c1) { + if (arg < 0) return Val_long(-1); + else return Val_long(0); + } + return (arg >> c2) | 1; + } +#endif + Z_ARG(arg); + if (c1 >= size_arg) { + if (sign_arg) return Val_long(-1); + else return Val_long(0); + } + /* mpn_ version */ + Z_MARK_SLOW; + { + CAMLparam1(arg); + mp_limb_t cr; + r = ml_z_alloc(size_arg - c1 + 1); + Z_REFRESH(arg); + if (c2) + /* shifted bits */ + cr = mpn_rshift(Z_LIMB(r), ptr_arg + c1, size_arg - c1, c2); + else { + /* unshifted copy */ + ml_z_cpy_limb(Z_LIMB(r), ptr_arg + c1, size_arg - c1); + cr = 0; + } + if (sign_arg) { + /* round |arg| to +oo */ + mp_size_t i; + if (!cr) { + for (i = 0; i < c1; i++) + if (ptr_arg[i]) { cr = 1; break; } + } + if (cr) + cr = mpn_add_1(Z_LIMB(r), Z_LIMB(r), size_arg - c1, 1); + } + else cr = 0; + Z_LIMB(r)[size_arg - c1] = cr; + r = ml_z_reduce(r, size_arg - c1 + 1, sign_arg); + Z_CHECK(r); + CAMLreturn(r); + } +} + +CAMLprim value ml_z_shift_right_trunc(value arg, value count) +{ + Z_DECL(arg); + intnat c = Long_val(count); + intnat c1, c2; + value r; + Z_MARK_OP; + Z_CHECK(arg); + if (c < 0) + caml_invalid_argument("Z.shift_right_trunc: count argument must be positive"); + if (!c) return arg; + c1 = c / Z_LIMB_BITS; + c2 = c % Z_LIMB_BITS; +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg)) { + /* fast path */ + if (c1) return Val_long(0); + if (arg >= 1) return (arg >> c2) | 1; + else return Val_long(- ((- Long_val(arg)) >> c2)); + } +#endif + Z_ARG(arg); + if (c1 >= size_arg) return Val_long(0); + /* mpn_ version */ + Z_MARK_SLOW; + { + CAMLparam1(arg); + r = ml_z_alloc(size_arg - c1); + Z_REFRESH(arg); + if (c2) + /* shifted bits */ + mpn_rshift(Z_LIMB(r), ptr_arg + c1, size_arg - c1, c2); + else + /* unshifted copy */ + ml_z_cpy_limb(Z_LIMB(r), ptr_arg + c1, size_arg - c1); + r = ml_z_reduce(r, size_arg - c1, sign_arg); + Z_CHECK(r); + CAMLreturn(r); + } +} + +/* Helper function for numbits: number of leading 0 bits in x */ + +#ifdef _LONG_LONG_LIMB +#define BUILTIN_CLZ __builtin_clzll +#else +#define BUILTIN_CLZ __builtin_clzl +#endif + +/* Use GCC or Clang built-in if available. The argument must be != 0. */ +#if defined(__clang__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +#define ml_z_clz BUILTIN_CLZ +#else +/* Portable C implementation - Hacker's Delight fig 5.12 */ +int ml_z_clz(mp_limb_t x) +{ + int n; + mp_limb_t y; +#ifdef ARCH_SIXTYFOUR + n = 64; + y = x >> 32; if (y != 0) { n = n - 32; x = y; } +#else + n = 32; +#endif + y = x >> 16; if (y != 0) { n = n - 16; x = y; } + y = x >> 8; if (y != 0) { n = n - 8; x = y; } + y = x >> 4; if (y != 0) { n = n - 4; x = y; } + y = x >> 2; if (y != 0) { n = n - 2; x = y; } + y = x >> 1; if (y != 0) return n - 2; + return n - x; +} +#endif + +CAMLprim value ml_z_numbits(value arg) +{ + Z_DECL(arg); + intnat r; + int n; + Z_MARK_OP; + Z_CHECK(arg); +#if Z_FAST_PATH + if (Is_long(arg)) { + /* fast path */ + r = Long_val(arg); + if (r == 0) { + return Val_int(0); + } else { + n = ml_z_clz(r > 0 ? r : -r); + return Val_long(sizeof(intnat) * 8 - n); + } + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + Z_ARG(arg); + if (size_arg == 0) return Val_int(0); + n = ml_z_clz(ptr_arg[size_arg - 1]); + return Val_long(size_arg * Z_LIMB_BITS - n); +} + +/* Helper function for trailing_zeros: number of trailing 0 bits in x */ + +#ifdef _LONG_LONG_LIMB +#define BUILTIN_CTZ __builtin_ctzll +#else +#define BUILTIN_CTZ __builtin_ctzl +#endif + +/* Use GCC or Clang built-in if available. The argument must be != 0. */ +#if defined(__clang__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +#define ml_z_ctz BUILTIN_CTZ +#else +/* Portable C implementation - Hacker's Delight fig 5.21 */ +int ml_z_ctz(mp_limb_t x) +{ + int n; + mp_limb_t y; + CAMLassert (x != 0); +#ifdef ARCH_SIXTYFOUR + n = 63; + y = x << 32; if (y != 0) { n = n - 32; x = y; } +#else + n = 31; +#endif + y = x << 16; if (y != 0) { n = n - 16; x = y; } + y = x << 8; if (y != 0) { n = n - 8; x = y; } + y = x << 4; if (y != 0) { n = n - 4; x = y; } + y = x << 2; if (y != 0) { n = n - 2; x = y; } + y = x << 1; if (y != 0) { n = n - 1; } + return n; +} +#endif + +CAMLprim value ml_z_trailing_zeros(value arg) +{ + Z_DECL(arg); + intnat r; + mp_size_t i; + Z_MARK_OP; + Z_CHECK(arg); +#if Z_FAST_PATH + if (Is_long(arg)) { + /* fast path */ + r = Long_val(arg); + if (r == 0) { + return Val_long (Max_long); + } else { + /* No need to take absolute value of r, as ctz(-x) = ctz(x) */ + return Val_long (ml_z_ctz(r)); + } + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + Z_ARG(arg); + if (size_arg == 0) return Val_long (Max_long); + for (i = 0; ptr_arg[i] == 0; i++) /* skip */; + return Val_long(i * Z_LIMB_BITS + ml_z_ctz(ptr_arg[i])); +} + +/* helper function for popcount & hamdist: number of bits at 1 in x */ +/* maybe we should use the mpn_ function even for small arguments, in case + the CPU has a fast popcount opcode? + */ +uintnat ml_z_count(uintnat x) +{ +#ifdef ARCH_SIXTYFOUR + x = (x & 0x5555555555555555UL) + ((x >> 1) & 0x5555555555555555UL); + x = (x & 0x3333333333333333UL) + ((x >> 2) & 0x3333333333333333UL); + x = (x & 0x0f0f0f0f0f0f0f0fUL) + ((x >> 4) & 0x0f0f0f0f0f0f0f0fUL); + x = (x & 0x00ff00ff00ff00ffUL) + ((x >> 8) & 0x00ff00ff00ff00ffUL); + x = (x & 0x0000ffff0000ffffUL) + ((x >> 16) & 0x0000ffff0000ffffUL); + x = (x & 0x00000000ffffffffUL) + ((x >> 32) & 0x00000000ffffffffUL); +#else + x = (x & 0x55555555UL) + ((x >> 1) & 0x55555555UL); + x = (x & 0x33333333UL) + ((x >> 2) & 0x33333333UL); + x = (x & 0x0f0f0f0fUL) + ((x >> 4) & 0x0f0f0f0fUL); + x = (x & 0x00ff00ffUL) + ((x >> 8) & 0x00ff00ffUL); + x = (x & 0x0000ffffUL) + ((x >> 16) & 0x0000ffffUL); +#endif + return x; +} + +CAMLprim value ml_z_popcount(value arg) +{ + Z_DECL(arg); + intnat r; + Z_MARK_OP; + Z_CHECK(arg); +#if Z_FAST_PATH + if (Is_long(arg)) { + /* fast path */ + r = Long_val(arg); + if (r < 0) ml_z_raise_overflow(); + return Val_long(ml_z_count(r)); + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + Z_ARG(arg); + if (sign_arg) ml_z_raise_overflow(); + if (!size_arg) return Val_long(0); + r = mpn_popcount(ptr_arg, size_arg); + if (r < 0 || !Z_FITS_INT(r)) ml_z_raise_overflow(); + return Val_long(r); +} + +CAMLprim value ml_z_hamdist(value arg1, value arg2) +{ + Z_DECL(arg1); Z_DECL(arg2); + intnat r; + mp_size_t sz; + Z_MARK_OP; + Z_CHECK(arg1); + Z_CHECK(arg2); +#if Z_FAST_PATH + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + r = Long_val(arg1) ^ Long_val(arg2); + if (r < 0) ml_z_raise_overflow(); + return Val_long(ml_z_count(r)); + } +#endif + /* mpn_ version */ + Z_MARK_SLOW; + Z_ARG(arg1); + Z_ARG(arg2); + if (sign_arg1 != sign_arg2) ml_z_raise_overflow(); + /* XXX TODO: case where arg1 & arg2 are both negative */ + if (sign_arg1 || sign_arg2) + caml_invalid_argument("Z.hamdist: negative arguments"); + /* distance on common size */ + sz = (size_arg1 <= size_arg2) ? size_arg1 : size_arg2; + if (sz) { + r = mpn_hamdist(ptr_arg1, ptr_arg2, sz); + if (r < 0 || !Z_FITS_INT(r)) ml_z_raise_overflow(); + } + else r = 0; + /* add stray bits */ + if (size_arg1 > size_arg2) { + r += mpn_popcount(ptr_arg1 + size_arg2, size_arg1 - size_arg2); + if (r < 0 || !Z_FITS_INT(r)) ml_z_raise_overflow(); + } + else if (size_arg2 > size_arg1) { + r += mpn_popcount(ptr_arg2 + size_arg1, size_arg2 - size_arg1); + if (r < 0 || !Z_FITS_INT(r)) ml_z_raise_overflow(); + } + return Val_long(r); +} + +CAMLprim value ml_z_testbit(value arg, value index) +{ + Z_DECL(arg); + uintnat b_idx; + mp_size_t l_idx, i; + mp_limb_t limb; + Z_MARK_OP; + Z_CHECK(arg); + b_idx = Long_val(index); /* Caml code checked index >= 0 */ +#if Z_FAST_PATH + if (Is_long(arg)) { + if (b_idx >= Z_LIMB_BITS) b_idx = Z_LIMB_BITS - 1; + return Val_int((Long_val(arg) >> b_idx) & 1); + } +#endif + Z_MARK_SLOW; + Z_ARG(arg); + l_idx = b_idx / Z_LIMB_BITS; + if (l_idx >= size_arg) return Val_bool(sign_arg); + limb = ptr_arg[l_idx]; + if (sign_arg != 0) { + /* If arg is negative, its 2-complement representation is + bitnot(abs(arg) - 1). + If any of the limbs of abs(arg) below l_idx is nonzero, + the carry from the decrement dies before reaching l_idx, + and we just test bitnot(limb). + If all the limbs below l_idx are zero, the carry from the + decrement propagates to l_idx, + and we test bitnot(limb - 1) = - limb. */ + for (i = 0; i < l_idx; i++) { + if (ptr_arg[i] != 0) { limb = ~limb; goto extract; } + } + limb = -limb; + } + extract: + return Val_int((limb >> (b_idx % Z_LIMB_BITS)) & 1); +} + +/*--------------------------------------------------- + FUNCTIONS BASED ON mpz_t + ---------------------------------------------------*/ + +/* sets rop to the value in op (limbs are copied) */ +void ml_z_mpz_set_z(mpz_t rop, value op) +{ + Z_DECL(op); + Z_CHECK(op); + Z_ARG(op); + if (size_op * Z_LIMB_BITS > INT_MAX) + caml_invalid_argument("Z: risk of overflow in mpz type"); + mpz_realloc2(rop, size_op * Z_LIMB_BITS); + rop->_mp_size = (sign_op >= 0) ? size_op : -size_op; + ml_z_cpy_limb(rop->_mp_d, ptr_op, size_op); +} + +/* inits and sets rop to the value in op (limbs are copied) */ +void ml_z_mpz_init_set_z(mpz_t rop, value op) +{ + mpz_init(rop); + ml_z_mpz_set_z(rop,op); +} + +/* returns a new z objects equal to op (limbs are copied) */ +value ml_z_from_mpz(mpz_t op) +{ + value r; + size_t sz = mpz_size(op); + r = ml_z_alloc(sz); + ml_z_cpy_limb(Z_LIMB(r), op->_mp_d, sz); + return ml_z_reduce(r, sz, (mpz_sgn(op) >= 0) ? 0 : Z_SIGN_MASK); +} + +#if __GNU_MP_VERSION >= 5 +/* not exported by gmp.h */ +extern void __gmpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); +#endif + +CAMLprim value ml_z_divexact(value arg1, value arg2) +{ + Z_DECL(arg1); Z_DECL(arg2); + Z_MARK_OP; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg1) && Is_long(arg2)) { + /* fast path */ + intnat a1 = Long_val(arg1); + intnat a2 = Long_val(arg2); + intnat q; + if (!a2) ml_z_raise_divide_by_zero(); + q = a1 / a2; + if (Z_FITS_INT(q)) return Val_long(q); + } +#endif + Z_MARK_SLOW; +#if __GNU_MP_VERSION >= 5 + { + /* mpn_ version */ + Z_ARG(arg1); + Z_ARG(arg2); + if (!size_arg2) + ml_z_raise_divide_by_zero(); + if (size_arg1 < size_arg2) + return Val_long(0); + { + CAMLparam2(arg1,arg2); + CAMLlocal1(q); + q = ml_z_alloc(size_arg1 - size_arg2 + 1); + Z_REFRESH(arg1); Z_REFRESH(arg2); + __gmpn_divexact(Z_LIMB(q), + ptr_arg1, size_arg1, ptr_arg2, size_arg2); + q = ml_z_reduce(q, size_arg1 - size_arg2 + 1, sign_arg1 ^ sign_arg2); + Z_CHECK(q); + CAMLreturn(q); + } + } +#else + { + /* mpz_ version */ + CAMLparam2(arg1,arg2); + CAMLlocal1(r); + mpz_t a,b; + if (!ml_z_sgn(arg2)) + ml_z_raise_divide_by_zero(); + ml_z_mpz_init_set_z(a, arg1); + ml_z_mpz_init_set_z(b, arg2); + mpz_divexact(a, a, b); + r = ml_z_from_mpz(a); + mpz_clear(a); + mpz_clear(b); + CAMLreturn(r); + } +#endif +} + +CAMLprim value ml_z_powm(value base, value exp, value mod) +{ + CAMLparam3(base,exp,mod); + CAMLlocal1(r); + Z_DECL(mod); + mpz_t mbase, mexp, mmod; + Z_ARG(mod); + if (!size_mod) + ml_z_raise_divide_by_zero(); + ml_z_mpz_init_set_z(mbase, base); + ml_z_mpz_init_set_z(mexp, exp); + ml_z_mpz_init_set_z(mmod, mod); + if (mpz_sgn(mexp) < 0) { + /* we need to check whether base is invertible to avoid a division by zero + in mpz_powm, so we can as well use the computed inverse + */ + if (!mpz_invert(mbase, mbase, mmod)) { + mpz_clear(mbase); + mpz_clear(mexp); + mpz_clear(mmod); + ml_z_raise_divide_by_zero(); + } + mpz_neg(mexp, mexp); + } + mpz_powm(mbase, mbase, mexp, mmod); + r = ml_z_from_mpz(mbase); + mpz_clear(mbase); + mpz_clear(mexp); + mpz_clear(mmod); + CAMLreturn(r); +} + +CAMLprim value ml_z_powm_sec(value base, value exp, value mod) +{ +#ifndef HAS_MPIR +#if __GNU_MP_VERSION >= 5 + CAMLparam3(base,exp,mod); + CAMLlocal1(r); + mpz_t mbase, mexp, mmod; + ml_z_mpz_init_set_z(mbase, base); + ml_z_mpz_init_set_z(mexp, exp); + ml_z_mpz_init_set_z(mmod, mod); + if (mpz_sgn(mexp) <= 0) { + mpz_clear(mbase); + mpz_clear(mexp); + mpz_clear(mmod); + caml_invalid_argument("Z.powm_sec: exponent must be positive"); + } + if (! mpz_odd_p(mmod)) { + mpz_clear(mbase); + mpz_clear(mexp); + mpz_clear(mmod); + caml_invalid_argument("Z.powm_sec: modulus must be odd"); + } + mpz_powm_sec(mbase, mbase, mexp, mmod); + r = ml_z_from_mpz(mbase); + mpz_clear(mbase); + mpz_clear(mexp); + mpz_clear(mmod); + CAMLreturn(r); +#else + MAYBE_UNUSED(base); + MAYBE_UNUSED(exp); + MAYBE_UNUSED(mod); + caml_invalid_argument("Z.powm_sec: not available, needs GMP version >= 5"); +#endif +#else + MAYBE_UNUSED(base); + MAYBE_UNUSED(exp); + MAYBE_UNUSED(mod); + caml_invalid_argument("Z.powm_sec: not available in MPIR, needs GMP version >= 5"); +#endif +} + +CAMLprim value ml_z_pow(value base, value exp) +{ + CAMLparam2(base,exp); + CAMLlocal1(r); + mpz_t mbase; + intnat e = Long_val(exp); + mp_size_t sz, ralloc; + int cnt; + if (e < 0) + caml_invalid_argument("Z.pow: exponent must be nonnegative"); + ml_z_mpz_init_set_z(mbase, base); + + /* Safe overapproximation of the size of the result. + In case this overflows an int, GMP may abort with a message + "gmp: overflow in mpz type". To avoid this, we test the size before + calling mpz_pow_ui and raise an OCaml exception. + Note: we lifted the computation from mpz_n_pow_ui. + */ + sz = mbase->_mp_size; + if (sz < 0) sz = -sz; + cnt = sz > 0 ? ml_z_clz(mbase->_mp_d[sz - 1]) : 0; + ralloc = (sz * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS) * e / GMP_NUMB_BITS + 5; + if (ralloc > INT_MAX) { + mpz_clear(mbase); + caml_invalid_argument("Z.pow: risk of overflow in mpz type"); + } + mpz_pow_ui(mbase, mbase, e); + r = ml_z_from_mpz(mbase); + mpz_clear(mbase); + CAMLreturn(r); +} + +CAMLprim value ml_z_root(value a, value b) +{ + CAMLparam2(a,b); + CAMLlocal1(r); + Z_DECL(a); + mpz_t ma; + intnat mb = Long_val(b); + if (mb <= 0) + caml_invalid_argument("Z.root: exponent must be positive"); + Z_ARG(a); + if (!(mb & 1) && sign_a) + caml_invalid_argument("Z.root: even root of a negative number"); + ml_z_mpz_init_set_z(ma, a); + mpz_root(ma, ma, mb); + r = ml_z_from_mpz(ma); + mpz_clear(ma); + CAMLreturn(r); +} + +CAMLprim value ml_z_rootrem(value a, value b) +{ + CAMLparam2(a,b); + CAMLlocal3(r1,r2,r3); + Z_DECL(a); + mpz_t ma, mr1, mr2; + intnat mb = Long_val(b); + if (mb <= 0) + caml_invalid_argument("Z.rootrem: exponent must be positive"); + Z_ARG(a); + if (!(mb & 1) && sign_a) + caml_invalid_argument("Z.rootrem: even root of a negative number"); + ml_z_mpz_init_set_z(ma, a); + mpz_init(mr1); + mpz_init(mr2); + mpz_rootrem(mr1, mr2, ma, mb); + r1 = ml_z_from_mpz(mr1); + r2 = ml_z_from_mpz(mr2); + r3 = caml_alloc_small(2, 0); + Field(r3,0) = r1; + Field(r3,1) = r2; + mpz_clear(ma); + mpz_clear(mr1); + mpz_clear(mr2); + CAMLreturn(r3); +} + +CAMLprim value ml_z_perfect_power(value a) +{ + CAMLparam1(a); + int r; + mpz_t ma; + ml_z_mpz_init_set_z(ma, a); + r = mpz_perfect_power_p(ma); + mpz_clear(ma); + CAMLreturn(r ? Val_true : Val_false); +} + +CAMLprim value ml_z_perfect_square(value a) +{ + CAMLparam1(a); + int r; + mpz_t ma; + ml_z_mpz_init_set_z(ma, a); + r = mpz_perfect_square_p(ma); + mpz_clear(ma); + CAMLreturn(r ? Val_true : Val_false); +} + +CAMLprim value ml_z_probab_prime(value a, int b) +{ + CAMLparam1(a); + int r; + mpz_t ma; + ml_z_mpz_init_set_z(ma, a); + r = mpz_probab_prime_p(ma, Int_val(b)); + mpz_clear(ma); + CAMLreturn(Val_int(r)); +} + +CAMLprim value ml_z_nextprime(value a) +{ + CAMLparam1(a); + CAMLlocal1(r); + mpz_t ma; + ml_z_mpz_init_set_z(ma, a); + mpz_nextprime(ma, ma); + r = ml_z_from_mpz(ma); + mpz_clear(ma); + CAMLreturn(r); +} + +CAMLprim value ml_z_invert(value base, value mod) +{ + CAMLparam2(base,mod); + CAMLlocal1(r); + mpz_t mbase, mmod; + ml_z_mpz_init_set_z(mbase, base); + ml_z_mpz_init_set_z(mmod, mod); + if (!mpz_invert(mbase, mbase, mmod)) { + mpz_clear(mbase); + mpz_clear(mmod); + ml_z_raise_divide_by_zero(); + } + r = ml_z_from_mpz(mbase); + mpz_clear(mbase); + mpz_clear(mmod); + CAMLreturn(r); +} + +CAMLprim value ml_z_divisible(value a, value b) +{ + CAMLparam2(a,b); + mpz_t ma, mb; + int r; + ml_z_mpz_init_set_z(ma, a); + ml_z_mpz_init_set_z(mb, b); + r = mpz_divisible_p(ma, mb); + mpz_clear(ma); + mpz_clear(mb); + CAMLreturn(Val_bool(r)); +} + +CAMLprim value ml_z_congruent(value a, value b, value c) +{ + CAMLparam3(a,b,c); + mpz_t ma, mb, mc; + int r; + ml_z_mpz_init_set_z(ma, a); + ml_z_mpz_init_set_z(mb, b); + ml_z_mpz_init_set_z(mc, c); + r = mpz_congruent_p(ma, mb, mc); + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mc); + CAMLreturn(Val_bool(r)); +} + +CAMLprim value ml_z_jacobi(value a, value b) +{ + CAMLparam2(a,b); + mpz_t ma, mb; + int r; + ml_z_mpz_init_set_z(ma, a); + ml_z_mpz_init_set_z(mb, b); + r = mpz_jacobi(ma, mb); + mpz_clear(ma); + mpz_clear(mb); + CAMLreturn(Val_int(r)); +} + +CAMLprim value ml_z_legendre(value a, value b) +{ + CAMLparam2(a,b); + mpz_t ma, mb; + int r; + ml_z_mpz_init_set_z(ma, a); + ml_z_mpz_init_set_z(mb, b); + r = mpz_legendre(ma, mb); + mpz_clear(ma); + mpz_clear(mb); + CAMLreturn(Val_int(r)); +} + +CAMLprim value ml_z_kronecker(value a, value b) +{ + CAMLparam2(a,b); + mpz_t ma, mb; + int r; + ml_z_mpz_init_set_z(ma, a); + ml_z_mpz_init_set_z(mb, b); + r = mpz_kronecker(ma, mb); + mpz_clear(ma); + mpz_clear(mb); + CAMLreturn(Val_int(r)); +} + +CAMLprim value ml_z_remove(value a, value b) +{ + CAMLparam2(a,b); + CAMLlocal1(r); + mpz_t ma, mb, mr; + int i; + ml_z_mpz_init_set_z(ma, a); + ml_z_mpz_init_set_z(mb, b); + mpz_init(mr); + i = mpz_remove(mr, ma, mb); + r = caml_alloc_small(2, 0); + Field(r,0) = ml_z_from_mpz(mr); + Field(r,1) = Val_int(i); + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); + CAMLreturn(r); +} + +CAMLprim value ml_z_fac(value a) +{ + CAMLparam1(a); + CAMLlocal1(r); + mpz_t mr; + intnat ma = Long_val(a); + if (ma < 0) + caml_invalid_argument("Z.fac: non-positive argument"); + mpz_init(mr); + mpz_fac_ui(mr, ma); + r = ml_z_from_mpz(mr); + mpz_clear(mr); + CAMLreturn(r); +} + +CAMLprim value ml_z_fac2(value a) +{ + CAMLparam1(a); + CAMLlocal1(r); + mpz_t mr; + intnat ma = Long_val(a); + if (ma < 0) + caml_invalid_argument("Z.fac2: non-positive argument"); + mpz_init(mr); + mpz_2fac_ui(mr, ma); + r = ml_z_from_mpz(mr); + mpz_clear(mr); + CAMLreturn(r); +} + +CAMLprim value ml_z_facM(value a, value b) +{ + CAMLparam2(a,b); + CAMLlocal1(r); + mpz_t mr; + intnat ma = Long_val(a), mb = Long_val(b); + if (ma < 0 || mb < 0) + caml_invalid_argument("Z.facM: non-positive argument"); + mpz_init(mr); + mpz_mfac_uiui(mr, ma, mb); + r = ml_z_from_mpz(mr); + mpz_clear(mr); + CAMLreturn(r); +} + +CAMLprim value ml_z_primorial(value a) +{ + CAMLparam1(a); + CAMLlocal1(r); + mpz_t mr; + intnat ma = Long_val(a); + if (ma < 0) + caml_invalid_argument("Z.primorial: non-positive argument"); + mpz_init(mr); + mpz_primorial_ui(mr, ma); + r = ml_z_from_mpz(mr); + mpz_clear(mr); + CAMLreturn(r); +} + +CAMLprim value ml_z_bin(value a, value b) +{ + CAMLparam2(a,b); + CAMLlocal1(r); + mpz_t ma; + intnat mb = Long_val(b); + if (mb < 0) + caml_invalid_argument("Z.bin: non-positive argument"); + ml_z_mpz_init_set_z(ma, a); + mpz_bin_ui(ma, ma, mb); + r = ml_z_from_mpz(ma); + mpz_clear(ma); + CAMLreturn(r); +} + +CAMLprim value ml_z_fib(value a) +{ + CAMLparam1(a); + CAMLlocal1(r); + mpz_t mr; + intnat ma = Long_val(a); + if (ma < 0) + caml_invalid_argument("Z.fib: non-positive argument"); + mpz_init(mr); + mpz_fib_ui(mr, ma); + r = ml_z_from_mpz(mr); + mpz_clear(mr); + CAMLreturn(r); +} + +CAMLprim value ml_z_lucnum(value a) +{ + CAMLparam1(a); + CAMLlocal1(r); + mpz_t mr; + intnat ma = Long_val(a); + if (ma < 0) + caml_invalid_argument("Z.lucnum: non-positive argument"); + mpz_init(mr); + mpz_lucnum_ui(mr, ma); + r = ml_z_from_mpz(mr); + mpz_clear(mr); + CAMLreturn(r); +} + + + +/* XXX should we support the following? + mpz_scan0, mpz_scan1 + mpz_setbit, mpz_clrbit, mpz_combit, mpz_tstbit + mpz_odd_p, mpz_even_p + random numbers +*/ + + + +/*--------------------------------------------------- + CUSTOMS BLOCKS + ---------------------------------------------------*/ + +/* With OCaml < 3.12.1, comparing a block an int with OCaml's + polymorphic compare will give erroneous results (int always + strictly smaller than block). OCaml 3.12.1 and above + give the correct result. +*/ +int ml_z_custom_compare(value arg1, value arg2) +{ + Z_DECL(arg1); Z_DECL(arg2); + int r; + Z_CHECK(arg1); Z_CHECK(arg2); +#if Z_FAST_PATH + /* Value-equal small integers are equal. + Pointer-equal big integers are equal as well. */ + if (arg1 == arg2) return 0; + if (Is_long(arg2)) { + if (Is_long(arg1)) { + return arg1 > arg2 ? 1 : -1; + } else { + /* Either arg1 is positive and arg1 > Z_MAX_INT >= arg2 -> result +1 + or arg1 is negative and arg1 < Z_MIN_INT <= arg2 -> result -1 */ + return Z_SIGN(arg1) ? -1 : 1; + } + } + else if (Is_long(arg1)) { + /* Either arg2 is positive and arg2 > Z_MAX_INT >= arg1 -> result -1 + or arg2 is negative and arg2 < Z_MIN_INT <= arg1 -> result +1 */ + return Z_SIGN(arg2) ? 1 : -1; + } +#endif + r = 0; + Z_ARG(arg1); + Z_ARG(arg2); + if (sign_arg1 != sign_arg2) r = 1; + else if (size_arg1 > size_arg2) r = 1; + else if (size_arg1 < size_arg2) r = -1; + else { + mp_size_t i; + for (i = size_arg1 - 1; i >= 0; i--) { + if (ptr_arg1[i] > ptr_arg2[i]) { r = 1; break; } + if (ptr_arg1[i] < ptr_arg2[i]) { r = -1; break; } + } + } + if (sign_arg1) r = -r; + return r; +} + +static intnat ml_z_custom_hash(value v) +{ + Z_DECL(v); + mp_size_t i; + uint32_t acc = 0; + Z_CHECK(v); + Z_ARG(v); + for (i = 0; i < size_v; i++) { + acc = caml_hash_mix_uint32(acc, (uint32_t)(ptr_v[i])); +#ifdef ARCH_SIXTYFOUR + acc = caml_hash_mix_uint32(acc, ptr_v[i] >> 32); +#endif + } +#ifndef ARCH_SIXTYFOUR + /* To obtain the same hash value on 32- and 64-bit platforms */ + if (size_v % 2 != 0) + acc = caml_hash_mix_uint32(acc, 0); +#endif + if (sign_v) acc++; + return acc; +} + +CAMLprim value ml_z_hash(value v) +{ + return Val_long(ml_z_custom_hash(v)); +} + +/* serialized format: + - 1-byte sign (1 for negative, 0 for positive) + - 4-byte size in bytes + - size-byte unsigned integer, in little endian order + */ +static void ml_z_custom_serialize(value v, + uintnat * wsize_32, + uintnat * wsize_64) +{ + mp_size_t i,nb; + Z_DECL(v); + Z_CHECK(v); + Z_ARG(v); + if ((mp_size_t)(uint32_t) size_v != size_v) + caml_failwith("Z.serialize: number is too large"); + nb = size_v * sizeof(mp_limb_t); + caml_serialize_int_1(sign_v ? 1 : 0); + caml_serialize_int_4(nb); + for (i = 0; i < size_v; i++) { + mp_limb_t x = ptr_v[i]; + caml_serialize_int_1(x); + caml_serialize_int_1(x >> 8); + caml_serialize_int_1(x >> 16); + caml_serialize_int_1(x >> 24); +#ifdef ARCH_SIXTYFOUR + caml_serialize_int_1(x >> 32); + caml_serialize_int_1(x >> 40); + caml_serialize_int_1(x >> 48); + caml_serialize_int_1(x >> 56); +#endif + } + *wsize_32 = 4 * (1 + (nb + 3) / 4); + *wsize_64 = 8 * (1 + (nb + 7) / 8); +#if Z_PERFORM_CHECK + /* Add space for canary */ + *wsize_32 += 4; + *wsize_64 += 8; +#endif +} + +/* XXX: serializing a large (i.e., > 2^31) int on a 64-bit machine and + deserializing on a 32-bit machine will fail (instead of returning a + block). + */ +static uintnat ml_z_custom_deserialize(void * dst) +{ + mp_limb_t* d = ((mp_limb_t*)dst) + 1; + int sign = caml_deserialize_uint_1(); + uint32_t sz = caml_deserialize_uint_4(); + uint32_t szw = (sz + sizeof(mp_limb_t) - 1) / sizeof(mp_limb_t); + uint32_t i = 0; + mp_limb_t x; + /* all limbs but last */ + if (szw > 1) { + for (; i < szw - 1; i++) { + x = caml_deserialize_uint_1(); + x |= ((mp_limb_t) caml_deserialize_uint_1()) << 8; + x |= ((mp_limb_t) caml_deserialize_uint_1()) << 16; + x |= ((mp_limb_t) caml_deserialize_uint_1()) << 24; +#ifdef ARCH_SIXTYFOUR + x |= ((mp_limb_t) caml_deserialize_uint_1()) << 32; + x |= ((mp_limb_t) caml_deserialize_uint_1()) << 40; + x |= ((mp_limb_t) caml_deserialize_uint_1()) << 48; + x |= ((mp_limb_t) caml_deserialize_uint_1()) << 56; +#endif + d[i] = x; + } + sz -= i * sizeof(mp_limb_t); + } + /* last limb */ + if (sz > 0) { + x = caml_deserialize_uint_1(); + if (sz > 1) x |= ((mp_limb_t) caml_deserialize_uint_1()) << 8; + if (sz > 2) x |= ((mp_limb_t) caml_deserialize_uint_1()) << 16; + if (sz > 3) x |= ((mp_limb_t) caml_deserialize_uint_1()) << 24; +#ifdef ARCH_SIXTYFOUR + if (sz > 4) x |= ((mp_limb_t) caml_deserialize_uint_1()) << 32; + if (sz > 5) x |= ((mp_limb_t) caml_deserialize_uint_1()) << 40; + if (sz > 6) x |= ((mp_limb_t) caml_deserialize_uint_1()) << 48; + if (sz > 7) x |= ((mp_limb_t) caml_deserialize_uint_1()) << 56; +#endif + d[i] = x; + i++; + } + while (i > 0 && !d[i-1]) i--; + d[-1] = i | (sign ? Z_SIGN_MASK : 0); +#if Z_PERFORM_CHECK + d[szw] = 0xDEADBEEF ^ szw; + szw++; +#endif + return (szw+1) * sizeof(mp_limb_t); +} + +struct custom_operations ml_z_custom_ops = { + /* Identifiers starting with _ are normally reserved for the OCaml runtime + system, but we got authorization form Gallium to use "_z". + It is very compact and stays in the spirit of identifiers used for + int32 & co ("_i" & co.). + */ + "_z", + custom_finalize_default, + ml_z_custom_compare, + ml_z_custom_hash, + ml_z_custom_serialize, + ml_z_custom_deserialize, + ml_z_custom_compare, +#ifndef Z_OCAML_LEGACY_CUSTOM_OPERATIONS + custom_fixed_length_default +#endif +}; + + +/*--------------------------------------------------- + CONVERSION WITH MLGMPIDL + ---------------------------------------------------*/ + +CAMLprim value ml_z_mlgmpidl_of_mpz(value a) +{ + CAMLparam1(a); + mpz_ptr mpz = (mpz_ptr)(Data_custom_val(a)); + CAMLreturn(ml_z_from_mpz(mpz)); +} + +/* stores the Z.t object into an existing Mpz.t one; + as we never allocate Mpz.t objects, we don't need any pointer to + mlgmpidl's custom block ops, and so, can link the function even if + mlgmpidl is not installed + */ +CAMLprim value ml_z_mlgmpidl_set_mpz(value r, value a) +{ + CAMLparam2(r,a); + mpz_ptr mpz = (mpz_ptr)(Data_custom_val(r)); + ml_z_mpz_set_z(mpz,a); + CAMLreturn(Val_unit); +} + + + +/*--------------------------------------------------- + INIT / EXIT + ---------------------------------------------------*/ + +/* called at program exit to display performance information */ +#if Z_PERF_COUNTER +static void ml_z_dump_count() +{ + printf("Z: %lu asm operations, %lu C operations, %lu slow (%lu%%)\n", + ml_z_ops_as, ml_z_ops, ml_z_slow, + ml_z_ops ? (ml_z_slow*100/(ml_z_ops+ml_z_ops_as)) : 0); +} +#endif + +CAMLprim value ml_z_init() +{ + ml_z_2p32 = ldexp(1., 32); + /* run-time checks */ +#ifdef ARCH_SIXTYFOUR + if (sizeof(intnat) != 8 || sizeof(mp_limb_t) != 8) + caml_failwith("Z.init: invalid size of types, 8 expected"); +#else + if (sizeof(intnat) != 4 || sizeof(mp_limb_t) != 4) + caml_failwith("Z.init: invalid size of types, 4 expected"); +#endif + /* install functions */ +#if Z_PERF_COUNTER + atexit(ml_z_dump_count); +#endif +#if Z_CUSTOM_BLOCK + caml_register_custom_operations(&ml_z_custom_ops); +#endif + return Val_unit; +} + +#ifdef __cplusplus +} +#endif diff --git a/Zarith/zarith.h b/Zarith/zarith.h new file mode 100644 index 000000000000..b6ce28798882 --- /dev/null +++ b/Zarith/zarith.h @@ -0,0 +1,46 @@ +/* This file is cloned from + https://github.com/ocaml/Zarith/blob/39df015/zarith.h +*/ + +/** + Public C interface for Zarith. + + This is intended for C libraries that wish to convert between mpz_t and + Z.t objects. + + + This file is part of the Zarith library + http://forge.ocamlcore.org/projects/zarith . + It is distributed under LGPL 2 licensing, with static linking exception. + See the LICENSE file included in the distribution. + + Copyright (c) 2010-2011 Antoine Miné, Abstraction project. + Abstraction is part of the LIENS (Laboratoire d'Informatique de l'ENS), + a joint laboratory by: + CNRS (Centre national de la recherche scientifique, France), + ENS (École normale supérieure, Paris, France), + INRIA Rocquencourt (Institut national de recherche en informatique, France). + +*/ + + +/* gmp.h or mpir.h must be included manually before zarith.h */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* sets rop to the value in op (limbs are copied) */ +void ml_z_mpz_set_z(mpz_t rop, value op); + +/* inits and sets rop to the value in op (limbs are copied) */ +void ml_z_mpz_init_set_z(mpz_t rop, value op); + +/* returns a new z objects equal to op (limbs are copied) */ +value ml_z_from_mpz(mpz_t op); + +#ifdef __cplusplus +} +#endif From 22e2529b3f7da13f3136c2ac645ecf38f27bc2af Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Wed, 30 Jun 2021 22:16:13 +0200 Subject: [PATCH 06/10] import the Zarith-inspired benchmark code --- Zarith/Makefile | 31 ++++++++ Zarith/test.ml | 195 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 226 insertions(+) create mode 100644 Zarith/Makefile create mode 100644 Zarith/test.ml diff --git a/Zarith/Makefile b/Zarith/Makefile new file mode 100644 index 000000000000..f9d39817f5f5 --- /dev/null +++ b/Zarith/Makefile @@ -0,0 +1,31 @@ +ROOTDIR=.. +OCAMLRUN=$(ROOTDIR)/runtime/ocamlrun +STDFLAGS=-nostdlib -I $(ROOTDIR)/stdlib +OCAMLC=$(OCAMLRUN) $(ROOTDIR)/ocamlc $(STDFLAGS) +OCAMLOPT=$(OCAMLRUN) $(ROOTDIR)/ocamlopt $(STDFLAGS) + +BENCHS=wrong-int zarith unboxed boxed +BENCH_SIZE ?= 20 +run: test.byte test.native + for bench in $(BENCHS); do\ + echo -e "\n\ntest.byte $$bench:";\ + time ./test.byte $$bench $(BENCH_SIZE) 1_000_000;\ + done;\ + for bench in $(BENCHS); do\ + echo -e "\n\ntest.native $$bench:";\ + time ./test.native $$bench $(BENCH_SIZE) 10_000_000;\ + done + +ZARITH_FLAGS=-ccopt "-I$(ROOTDIR)/runtime -DHAS_GMP -O3 -Wall -Wextra" -cclib -lgmp + +test.cmm: test.ml + $(OCAMLOPT) -c test.ml -dcmm 2>test.cmm + +test.byte: test.ml + $(OCAMLC) -o test.byte -custom $(ZARITH_FLAGS) -linkall caml_z.c test.ml + +test.native: test.ml + $(OCAMLOPT) -o test.native $(ZARITH_FLAGS) caml_z.c test.ml + +clean: + rm -f test.cm* test.o caml_z.o test.byte test.native diff --git a/Zarith/test.ml b/Zarith/test.ml new file mode 100644 index 000000000000..9d20197a3efd --- /dev/null +++ b/Zarith/test.ml @@ -0,0 +1,195 @@ +(* See 'Makefile' for building and running instructions *) +type test = + | Wrong_int + | Zarith + | Boxed + | Unboxed + + +let usage () = + Printf.eprintf + "Usage: %s [wrong-int|zarith|boxed|unboxed] \n%!" + Sys.argv.(0) + +let impl_choice = match Sys.argv.(1) with + | "wrong-int" -> Wrong_int + | "zarith" -> Zarith + | "boxed" -> Boxed + | "unboxed" -> Unboxed + | _ | exception _ -> usage (); exit 2 + +let factorial_input = + try int_of_string Sys.argv.(2) + with _ -> usage (); exit 3 + +let n_iters = + try int_of_string Sys.argv.(3) + with _ -> usage (); exit 4 + +(* Our benchmark computes (factorial n) in a silly way, + using the repeated-addition definition of multiplication: + + !3 is computed as + let !0 = 1 in + let !1 = !0 in + let !2 = !1 + !1 in + let !3 = !2 + !2 + !2 in + !3 + + The number of additions is quadratic in the input n. + + The result of (factorial n) exceeds OCaml's 63-bit integers for n=21. +*) +let factorial of_int ( +! ) n = + let rec ( *! ) a b = + if b = 0 then of_int 0 + else a *! (b - 1) +! a + in + let rec fac n = + if n = 0 then of_int 1 + else fac (n - 1) *! n + in + fac n + +module Wrong_int = struct + (* This a *wrong* implementation of bignums using integers that will overflow and wraparound. + It is informative to compare the benchmark performance of this implementation + with the correct implementations. *) + let of_int n = n + let add x y = x + y + let fac = factorial of_int add + let to_string = string_of_int +end + +module Zarith = struct + (* This is taken verbatim from Zarith's z.ml: + https://github.com/ocaml/Zarith/blob/39df015/z.ml#L41-L49 + *) + type t + + external is_small_int: t -> bool = "%obj_is_int" + external unsafe_to_int: t -> int = "%identity" + external of_int: int -> t = "%identity" + + external c_add: t -> t -> t = "ml_z_add" + let add_zarith x y = + if is_small_int x && is_small_int y then begin + let z = unsafe_to_int x + unsafe_to_int y in + (* Overflow check -- Hacker's Delight, section 2.12 *) + if (z lxor unsafe_to_int x) land (z lxor unsafe_to_int y) >= 0 + then of_int z + else c_add x y + end else + c_add x y + + (* new code *) + let fac n = factorial of_int add_zarith n + + external c_format: string -> t -> string = "ml_z_format" + let to_string n = c_format "%d" n +end + +module Boxed = struct + (* This is a naive, safe implementation of Zarith's fast-path logic, + using an algebraic datatype to distinguish short from long integers. *) + type t + + external long_int: int -> t = "%identity" + external c_add: t -> t -> t = "ml_z_add" + + type boxed = + | Short of int + | Long of t + + let of_int n = Short n + + let to_long = function + | Short x -> long_int x + | Long x -> x + + let add_boxed a b = + match a, b with + | Short x, Short y -> + let z = x + y in + (* Overflow check -- Hacker's Delight, section 2.12 *) + if (z lxor x) land (z lxor y) >= 0 + then Short z + else + (* Note: in the general case of additions, in some cases the + result of c_add may be a "short" integer again, and + should be turned back into a Short if we wanted to mimick + Zarith' representation invariants. The current approach + is correct, simpler, and strictly equivalent for this + benchmark as we only manipulate non-negative numbers. *) + Long (c_add (long_int x) (long_int y)) + | _, _ -> Long (c_add (to_long a) (to_long b)) + + let fac n = factorial of_int add_boxed n + + external c_format: string -> t -> string = "ml_z_format" + let to_string = function + | Short n -> string_of_int n + | Long n -> c_format "%d" n +end + +module Unboxed = struct + (* This is a safe implementation of Zarith's fast-path logic, + using an algebraic to distinguish short from long integers, + unboxing the "short" constructor to get something close to Zarith's internal representation. + + The only different compared to the Boxed implementation above is the [@unboxed] annotation. + + Note: in theory we could also design the implementation to unbox the Long constructor + (provided it only stores custom blocks, never immediate integers), but we have not + implemented support for refining abstract type head-shapes, which would be required. + *) + type t + + external long_int: int -> t = "%identity" + external c_add: t -> t -> t = "ml_z_add" + + type boxed = + | Short of int [@unboxed] + | Long of t + + let of_int n = Short n + + let to_long = function + | Short x -> long_int x + | Long x -> x + + let add_unboxed a b = + match a, b with + | Short x, Short y -> + let z = x + y in + (* Overflow check -- Hacker's Delight, section 2.12 *) + if (z lxor x) land (z lxor y) >= 0 + then Short z + else Long (c_add (long_int x) (long_int y)) + | _, _ -> Long (c_add (to_long a) (to_long b)) + + let fac n = factorial of_int add_unboxed n + + external c_format: string -> t -> string = "ml_z_format" + let to_string = function + | Short n -> string_of_int n + | Long n -> c_format "%d" n +end + + +let test to_string fac = + for _i = 1 to n_iters - 1 do + ignore (fac factorial_input); + done; + print_endline (to_string (fac factorial_input)) + +let () = + match impl_choice with + | Wrong_int -> + test Wrong_int.to_string Wrong_int.fac + | Zarith -> + test Zarith.to_string Zarith.fac + | Boxed -> + test Boxed.to_string Boxed.fac + | Unboxed -> + test Unboxed.to_string Unboxed.fac From 92a7825c2b424800ba5a3ac7bed7fe8d75aa6093 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Sun, 10 Oct 2021 15:39:08 +0200 Subject: [PATCH 07/10] adapt the code to follow the paper presentation --- Zarith/Makefile | 4 ++++ Zarith/caml_z.c | 22 ++++++++++++++++++++++ Zarith/test.ml | 33 ++++++++++++++------------------- 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/Zarith/Makefile b/Zarith/Makefile index f9d39817f5f5..3627514e4f7a 100644 --- a/Zarith/Makefile +++ b/Zarith/Makefile @@ -6,6 +6,7 @@ OCAMLOPT=$(OCAMLRUN) $(ROOTDIR)/ocamlopt $(STDFLAGS) BENCHS=wrong-int zarith unboxed boxed BENCH_SIZE ?= 20 + run: test.byte test.native for bench in $(BENCHS); do\ echo -e "\n\ntest.byte $$bench:";\ @@ -16,6 +17,9 @@ run: test.byte test.native time ./test.native $$bench $(BENCH_SIZE) 10_000_000;\ done +run-with-overflow: + $(MAKE) run BENCH_SIZE=22 + ZARITH_FLAGS=-ccopt "-I$(ROOTDIR)/runtime -DHAS_GMP -O3 -Wall -Wextra" -cclib -lgmp test.cmm: test.ml diff --git a/Zarith/caml_z.c b/Zarith/caml_z.c index 63283e647b09..c7c44d3d9e27 100644 --- a/Zarith/caml_z.c +++ b/Zarith/caml_z.c @@ -1423,6 +1423,28 @@ CAMLprim value ml_z_add(value arg1, value arg2) return ml_z_addsub(arg1, arg2, 0); } +static inline value unbox(value arg) { + if (Is_long(arg)) return arg; + else return Field(arg, 0); +} + +CAMLprim value box(value arg) { + if (Is_long(arg)) + return arg; + else { + CAMLparam1(arg); + value ret; + ret = caml_alloc_small(1, 0); + Field(ret, 0) = arg; + CAMLreturn(ret); + } +} + +CAMLprim value ml_z_add_boxcustom(value arg1, value arg2) +{ + return box(ml_z_addsub(unbox(arg1), unbox(arg2), 0)); +} + CAMLprim value ml_z_sub(value arg1, value arg2) { Z_MARK_OP; diff --git a/Zarith/test.ml b/Zarith/test.ml index 9d20197a3efd..dfac75a58306 100644 --- a/Zarith/test.ml +++ b/Zarith/test.ml @@ -92,14 +92,14 @@ end module Boxed = struct (* This is a naive, safe implementation of Zarith's fast-path logic, using an algebraic datatype to distinguish short from long integers. *) - type t + type gmp_t - external long_int: int -> t = "%identity" - external c_add: t -> t -> t = "ml_z_add" + external c_add: gmp_t -> gmp_t -> gmp_t = "ml_z_add" + external long_int : int -> gmp_t = "ml_z_of_int" - type boxed = + type t = | Short of int - | Long of t + | Long of gmp_t let of_int n = Short n @@ -126,7 +126,7 @@ module Boxed = struct let fac n = factorial of_int add_boxed n - external c_format: string -> t -> string = "ml_z_format" + external c_format: string -> gmp_t -> string = "ml_z_format" let to_string = function | Short n -> string_of_int n | Long n -> c_format "%d" n @@ -143,20 +143,15 @@ module Unboxed = struct (provided it only stores custom blocks, never immediate integers), but we have not implemented support for refining abstract type head-shapes, which would be required. *) - type t + type gmp_t - external long_int: int -> t = "%identity" - external c_add: t -> t -> t = "ml_z_add" - - type boxed = + type t = | Short of int [@unboxed] - | Long of t + | Long of gmp_t - let of_int n = Short n + external c_add: t -> t -> t = "ml_z_add_boxcustom" - let to_long = function - | Short x -> long_int x - | Long x -> x + let of_int n = Short n let add_unboxed a b = match a, b with @@ -165,12 +160,12 @@ module Unboxed = struct (* Overflow check -- Hacker's Delight, section 2.12 *) if (z lxor x) land (z lxor y) >= 0 then Short z - else Long (c_add (long_int x) (long_int y)) - | _, _ -> Long (c_add (to_long a) (to_long b)) + else c_add a b + | _, _ -> c_add a b let fac n = factorial of_int add_unboxed n - external c_format: string -> t -> string = "ml_z_format" + external c_format: string -> gmp_t -> string = "ml_z_format" let to_string = function | Short n -> string_of_int n | Long n -> c_format "%d" n From 63d7e10f9a8e98554b9dda5297eb8c0a1ed9f935 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Sun, 10 Oct 2021 15:39:36 +0200 Subject: [PATCH 08/10] overflow counting logic --- Zarith/test.ml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Zarith/test.ml b/Zarith/test.ml index dfac75a58306..89a82f0ec7d8 100644 --- a/Zarith/test.ml +++ b/Zarith/test.ml @@ -153,15 +153,24 @@ module Unboxed = struct let of_int n = Short n + let with_overflow = ref 0 + let total_ops = ref 0 + + let () = + at_exit @@ fun () -> + Printf.printf "Overflow ratio: %f%%.\n%!" + (100. *. float !with_overflow /. float !total_ops) + let add_unboxed a b = + incr total_ops; match a, b with | Short x, Short y -> let z = x + y in (* Overflow check -- Hacker's Delight, section 2.12 *) if (z lxor x) land (z lxor y) >= 0 then Short z - else c_add a b - | _, _ -> c_add a b + else (incr with_overflow; c_add a b) + | _, _ -> (incr with_overflow; c_add a b) let fac n = factorial of_int add_unboxed n From a5944741674a6f786dcaac97cc1af7fa5cc18bf2 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Sun, 10 Oct 2021 15:39:38 +0200 Subject: [PATCH 09/10] Revert "overflow counting logic" This reverts commit 3a09ced4e0391f745755972d947faa6080af6370. The overflow-counting logic may change the performance profile, so it is simpler to not have it at all in normal benchmarking mode. The figure we computed is that with BENCH_SIZE=22, 16% of operations overflow. --- Zarith/test.ml | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/Zarith/test.ml b/Zarith/test.ml index 89a82f0ec7d8..dfac75a58306 100644 --- a/Zarith/test.ml +++ b/Zarith/test.ml @@ -153,24 +153,15 @@ module Unboxed = struct let of_int n = Short n - let with_overflow = ref 0 - let total_ops = ref 0 - - let () = - at_exit @@ fun () -> - Printf.printf "Overflow ratio: %f%%.\n%!" - (100. *. float !with_overflow /. float !total_ops) - let add_unboxed a b = - incr total_ops; match a, b with | Short x, Short y -> let z = x + y in (* Overflow check -- Hacker's Delight, section 2.12 *) if (z lxor x) land (z lxor y) >= 0 then Short z - else (incr with_overflow; c_add a b) - | _, _ -> (incr with_overflow; c_add a b) + else c_add a b + | _, _ -> c_add a b let fac n = factorial of_int add_unboxed n From bda7606f05d42d970e1efcdcf5ce8ca192d728f0 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Mon, 13 Jun 2022 22:08:33 +0200 Subject: [PATCH 10/10] Zarith benchmark: include an unboxed_both version that also unboxes Custom --- Zarith/Makefile | 6 ++--- Zarith/caml_z.c | 5 +++++ Zarith/test.ml | 59 ++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 56 insertions(+), 14 deletions(-) diff --git a/Zarith/Makefile b/Zarith/Makefile index 3627514e4f7a..0c841c5b4dce 100644 --- a/Zarith/Makefile +++ b/Zarith/Makefile @@ -4,7 +4,7 @@ STDFLAGS=-nostdlib -I $(ROOTDIR)/stdlib OCAMLC=$(OCAMLRUN) $(ROOTDIR)/ocamlc $(STDFLAGS) OCAMLOPT=$(OCAMLRUN) $(ROOTDIR)/ocamlopt $(STDFLAGS) -BENCHS=wrong-int zarith unboxed boxed +BENCHS=wrong-int zarith unboxed unboxed_both boxed BENCH_SIZE ?= 20 run: test.byte test.native @@ -25,10 +25,10 @@ ZARITH_FLAGS=-ccopt "-I$(ROOTDIR)/runtime -DHAS_GMP -O3 -Wall -Wextra" -cclib -l test.cmm: test.ml $(OCAMLOPT) -c test.ml -dcmm 2>test.cmm -test.byte: test.ml +test.byte: test.ml caml_z.c $(OCAMLC) -o test.byte -custom $(ZARITH_FLAGS) -linkall caml_z.c test.ml -test.native: test.ml +test.native: test.ml caml_z.c $(OCAMLOPT) -o test.native $(ZARITH_FLAGS) caml_z.c test.ml clean: diff --git a/Zarith/caml_z.c b/Zarith/caml_z.c index c7c44d3d9e27..797e4599c533 100644 --- a/Zarith/caml_z.c +++ b/Zarith/caml_z.c @@ -1445,6 +1445,11 @@ CAMLprim value ml_z_add_boxcustom(value arg1, value arg2) return box(ml_z_addsub(unbox(arg1), unbox(arg2), 0)); } +CAMLprim value ml_z_add_unboxcustom(value arg1, value arg2) +{ + return ml_z_addsub(arg1, arg2, 0); +} + CAMLprim value ml_z_sub(value arg1, value arg2) { Z_MARK_OP; diff --git a/Zarith/test.ml b/Zarith/test.ml index dfac75a58306..b6b1364912fa 100644 --- a/Zarith/test.ml +++ b/Zarith/test.ml @@ -4,19 +4,25 @@ type test = | Zarith | Boxed | Unboxed + | Unboxed_both +let impls = [ + "wrong-int", Wrong_int; + "zarith", Zarith; + "boxed", Boxed; + "unboxed", Unboxed; + "unboxed_both", Unboxed_both; +] let usage () = Printf.eprintf - "Usage: %s [wrong-int|zarith|boxed|unboxed] \n%!" + "Usage: %s [%s] \n%!" + (String.concat "|" (List.map fst impls)) Sys.argv.(0) -let impl_choice = match Sys.argv.(1) with - | "wrong-int" -> Wrong_int - | "zarith" -> Zarith - | "boxed" -> Boxed - | "unboxed" -> Unboxed - | _ | exception _ -> usage (); exit 2 +let impl_choice = match List.assoc Sys.argv.(1) impls with + | v -> v + | exception _ -> usage (); exit 2 let factorial_input = try int_of_string Sys.argv.(2) @@ -138,10 +144,6 @@ module Unboxed = struct unboxing the "short" constructor to get something close to Zarith's internal representation. The only different compared to the Boxed implementation above is the [@unboxed] annotation. - - Note: in theory we could also design the implementation to unbox the Long constructor - (provided it only stores custom blocks, never immediate integers), but we have not - implemented support for refining abstract type head-shapes, which would be required. *) type gmp_t @@ -171,6 +173,39 @@ module Unboxed = struct | Long n -> c_format "%d" n end +module Unboxed_both = struct + (* This is a safe implementation of Zarith's fast-path logic, + using an algebraic to distinguish short from long integers, + unboxing both constructors to recover Zarith's internal representation. + *) + type gmp_t [@@shape [custom]] + + type t = + | Short of int [@unboxed] + | Long of gmp_t [@unboxed] + + external c_add: t -> t -> t = "ml_z_add_unboxcustom" + + let of_int n = Short n + + let add_unboxed a b = + match a, b with + | Short x, Short y -> + let z = x + y in + (* Overflow check -- Hacker's Delight, section 2.12 *) + if (z lxor x) land (z lxor y) >= 0 + then Short z + else c_add a b + | _, _ -> c_add a b + + let fac n = factorial of_int add_unboxed n + + external c_format: string -> gmp_t -> string = "ml_z_format" + let to_string = function + | Short n -> string_of_int n + | Long n -> c_format "%d" n +end + let test to_string fac = for _i = 1 to n_iters - 1 do @@ -188,3 +223,5 @@ let () = test Boxed.to_string Boxed.fac | Unboxed -> test Unboxed.to_string Unboxed.fac + | Unboxed_both -> + test Unboxed_both.to_string Unboxed_both.fac