From 82ea87557bc053752b4fa05af6bb41bc7481ffec Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Sun, 3 Jun 2018 17:44:56 -0400
Subject: [PATCH 01/12] sys/div: calculate magic numbers at compile time

---
 sys/include/div.h | 238 ++++++++++++++++++++++++++++------------------
 1 file changed, 146 insertions(+), 92 deletions(-)

diff --git a/sys/include/div.h b/sys/include/div.h
index 54bfa1524257..f2f46de88fd9 100644
--- a/sys/include/div.h
+++ b/sys/include/div.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2015 Kaspar Schleiser <kaspar@schleiser.de>
- * Copyright (C) 2016 Eistec AB
+ *               2016 Eistec AB
+ *               2018 Acutam Automation, LLC
  *
  * This file is subject to the terms and conditions of the GNU Lesser
  * General Public License v2.1. See the file LICENSE in the top level
@@ -16,169 +17,222 @@
  *
  * @file
  * @ingroup   sys
+ *
  * @author    Kaspar Schleiser <kaspar@schleiser.de>
  * @author    Joakim Nohlgård <joakim.nohlgard@eistec.se>
+ * @author    Matthew Blue <matthew.blue.neuro@gmail.com>
+ *
  * @{
  */
 
 #ifndef DIV_H
 #define DIV_H
 
-#include <assert.h>
 #include <stdint.h>
 
+#include "assert.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /**
- * @brief Approximation of (2**l)/d for d=15625, l=12, 32 bits
+ * @brief   Calculate 8-bit multiplicative inverse
+ *
+ * Result is calculated during compilation (requires const num). Bitshifted by
+ * 9 to give the most representation to the smallest interesting number (3)
+ *
+ * @param[in] num  reciprocal
+ *
+ * @return         result, bitshifted by 9
  */
-#define DIV_H_INV_15625_32    0x431bde83ul
+__attribute__((always_inline)) static inline uint8_t div_inv_8(
+                                             const uint8_t num)
+{
+    /* cannot be represented due to bit shift */
+    assert(num > 2);
 
-/**
- * @brief Approximation of (2**l)/d for d=15625, l=12, 64 bits
- */
-#define DIV_H_INV_15625_64    0x431bde82d7b634dbull
+    const uint16_t numerator = (1 << 9);
 
-/**
- * @brief Required shifts for division by 15625, l above
- */
-#define DIV_H_INV_15625_SHIFT 12
+    if ((numerator % num) * 2 >= num) {
+        /* fraction is >= 0.5, so round up */
+        return (uint8_t)(numerator / num) + 1;
+    }
+    else {
+        return (uint8_t)(numerator / num);
+    }
+}
 
 /**
- * @internal
- * @brief Multiply two 64 bit integers into a 128 bit integer and return the upper half.
+ * @brief   Calculate 16-bit multiplicative inverse
  *
- * The implementation only uses 64 bit integers internally, no __int128 support
- * is necessary.
+ * Result is calculated during compilation (requires const num). Bitshifted by
+ * 17 to give the most representation to the smallest interesting number (3)
  *
- * @see http://stackoverflow.com/questions/28868367/getting-the-high-part-of-64-bit-integer-multiplication
-
- * @param[in]   a     operand a
- * @param[in]   b     operand b
- * @return      (((uint128_t)a * b) >> 64)
- */
-uint64_t _div_mulhi64(const uint64_t a, const uint64_t b);
-
-/**
- * @brief Integer divide val by 15625, 64 bit version
+ * @param[in] num  reciprocal
  *
- * @param[in]   val     dividend
- * @return      (val / 15625)
+ * @return         result, bitshifted by 17
  */
-static inline uint64_t div_u64_by_15625(uint64_t val)
+__attribute__((always_inline)) static inline uint16_t div_inv_16(
+                                             const uint16_t num)
 {
-    if (val > 16383999997ull) {
-        return (_div_mulhi64(DIV_H_INV_15625_64, val) >> DIV_H_INV_15625_SHIFT);
+    /* cannot be represented due to bit shift */
+    assert(num > 2);
+
+    const uint32_t numerator = (1UL << 17);
+
+    if ((numerator % num) * 2 >= num) {
+        /* fraction is >= 0.5, so round up */
+        return (uint16_t)(numerator / num) + 1;
+    }
+    else {
+        return (uint16_t)(numerator / num);
     }
-    return (val * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32);
 }
 
 /**
- * @brief Integer divide val by 125
+ * @brief   Calculate 32-bit multiplicative inverse
  *
- * This function can be used to convert uint64_t microsecond times (or
- * intervals) to miliseconds and store them in uint32_t variables, with up to
- * ~50 days worth of miliseconds ((2**32*1000) -1).
- * Use e.g.,  ms = div_u64_by_125(microseconds >> 3)
+ * Result is calculated during compilation (requires const num). Bitshifted by
+ * 33 to give the most representation to the smallest interesting number (3)
  *
- * @pre val <= 536870911999 ((2**32 * 125) -1)
+ * @param[in] num  reciprocal
  *
- * @param[in]   val     dividend
- * @return      (val / 125)
+ * @return         result, bitshifted by 33
  */
-static inline uint32_t div_u64_by_125(uint64_t val)
+__attribute__((always_inline)) static inline uint32_t div_inv_32(
+                                             const uint32_t num)
 {
-  /* a higher value would overflow the result type */
-  assert(val <= 536870911999LLU);
-
-  uint32_t hi = val >> 32;
-  uint32_t lo = val;
-  uint32_t r = (lo >> 16) + (hi << 16);
-  uint32_t res = r / 125;
-  r = ((r % 125) << 16) + (lo & 0xFFFF);
-  res = (res << 16) + r / 125;
-  return res;
+    /* cannot be represented due to bit shift */
+    assert(num > 2);
+
+    const uint64_t numerator = (1ULL << 33);
+
+    if ((numerator % num) * 2 >= num) {
+        /* fraction is >= 0.5, so round up */
+        return (uint32_t)(numerator / num) + 1;
+    }
+    else {
+        return (uint32_t)(numerator / num);
+    }
 }
 
 /**
- * @brief Integer divide val by 1000000
+ * @brief   Calculate 64-bit multiplicative inverse
+ *
+ * Result is calculated during compilation (requires const num). Bitshifted by
+ * 65 to give the most representation to the smallest interesting number (3)
+ *
+ * @param[in] num  reciprocal
  *
- * @param[in]   val     dividend
- * @return      (val / 1000000)
+ * @return         result, bitshifted by 65
  */
-static inline uint64_t div_u64_by_1000000(uint64_t val)
+__attribute__((always_inline)) static inline uint64_t div_inv_64(
+                                             const uint64_t num)
 {
-    return div_u64_by_15625(val) >> 6;
+    /* cannot be represented due to bit shift */
+    assert(num > 2);
+
+    /* find the last bits without using 128-bit ints */
+    const uint64_t numerator = (1ULL << 63);
+    const uint64_t most_sig = (numerator / num) << 2;
+    const uint64_t least_sig = ((numerator % num) << 2) / num;
+
+    if ((((numerator % num) << 2) % num) * 2 >= num) {
+        /* fraction is >= 0.5, so round up */
+        return (most_sig + least_sig + 1);
+    }
+    else {
+        return (most_sig + least_sig);
+    }
 }
 
 /**
- * @brief Divide val by (15625/512)
+ * @internal
+ * @brief Multiply two 64 bit integers into a 128 bit integer and return the upper half.
  *
- * This is used to quantize a 1MHz value to the closest 32768Hz value,
- * e.g., for timers.
+ * The implementation only uses 64 bit integers internally, no __int128 support
+ * is necessary.
  *
- * The algorithm uses the modular multiplicative inverse of 15625 to use only
- * multiplication and bit shifts to perform the division.
+ * @see http://stackoverflow.com/questions/28868367/getting-the-high-part-of-64-bit-integer-multiplication
+
+ * @param[in]   a     operand a
+ * @param[in]   b     operand b
+ * @return      (((uint128_t)a * b) >> 64)
+ */
+uint64_t _div_mulhi64(const uint64_t a, const uint64_t b);
+
+/**
+ * @brief   Divide 8-bit number using an inverse
  *
- * The result will be equal to the mathematical expression: floor((val * 512) / 15625)
+ * @param[in] val  numerator
+ * @param[in] inv  multiplicative inverse of denominator (bitshifted by 9)
  *
- * @param[in]   val     dividend
- * @return      (val / (15625/512))
+ * @return         result
  */
-static inline uint32_t div_u32_by_15625div512(uint32_t val)
+static inline uint8_t div_u8_by_inv(const uint8_t val, const uint8_t inv)
 {
-    return ((uint64_t)(val) * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32 - 9);
+    uint16_t tmp;
+
+    tmp = (uint16_t)val * (uint16_t)inv;
+    tmp >>= 9;
+
+    return (uint8_t)tmp;
 }
 
 /**
- * @brief Divide val by (15625/512)
+ * @brief   Divide 16-bit number using an inverse
  *
- * This is used to quantize a 1MHz value to the closest 32768Hz value,
- * e.g., for timers.
+ * @param[in] val  numerator
+ * @param[in] inv  multiplicative inverse of denominator (bitshifted by 17)
  *
- * @param[in]   val     dividend
- * @return      (val / (15625/512))
+ * @return         result
  */
-static inline uint64_t div_u64_by_15625div512(uint64_t val)
+static inline uint16_t div_u16_by_inv(const uint16_t val, const uint16_t inv)
 {
-    /*
-     * This saves around 1400 bytes of ROM on Cortex-M platforms (both ARMv6 and
-     * ARMv7) from avoiding linking against __aeabi_uldivmod and related helpers
-     */
-    if (val > 16383999997ull) {
-        /* this would overflow 2^64 in the multiplication that follows, need to
-         * use the long version */
-        return (_div_mulhi64(DIV_H_INV_15625_64, val) >> (DIV_H_INV_15625_SHIFT - 9));
-    }
-    return (val * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32 - 9);
+    uint32_t tmp;
+
+    tmp = (uint32_t)val * (uint32_t)inv;
+    tmp >>= 17;
+
+    return (uint16_t)tmp;
 }
 
 /**
- * @brief Integer divide val by 44488
+ * @brief   Divide 32-bit number using an inverse
  *
- * @param[in]   val     dividend
- * @return      (val / 44488)
+ * @param[in] val  numerator
+ * @param[in] inv  multiplicative inverse of denominator (bitshifted by 33)
+ *
+ * @return         result
  */
-static inline uint32_t div_u32_by_44488(uint32_t val)
+static inline uint32_t div_u32_by_inv(const uint32_t val, const uint32_t inv)
 {
-    return ((uint64_t)val * 0xBC8F1391UL) >> (15 + 32);
+    uint64_t tmp;
+
+    tmp = (uint64_t)val * (uint64_t)inv;
+    tmp >>= 33;
+
+    return (uint32_t)tmp;
 }
 
 /**
- * @brief Modulo 44488
+ * @brief   Divide 64-bit number using an inverse
+ *
+ * @param[in] val  numerator
+ * @param[in] inv  multiplicative inverse of denominator (bitshifted by 65)
  *
- * @param[in]   val     dividend
- * @return      (val % 44488)
+ * @return         result
  */
-static inline uint32_t div_u32_mod_44488(uint32_t val)
+static inline uint64_t div_u64_by_inv(const uint64_t val, const uint64_t inv)
 {
-    return val - (div_u32_by_44488(val)*44488);
+    return (_div_mulhi64(val, inv) >> 1);
 }
 
 #ifdef __cplusplus
 }
 #endif
+
 /** @} */
+
 #endif /* DIV_H */

From 5ad5cc8e80265b8014745b675e04ccfc35e6e2dc Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Sun, 3 Jun 2018 17:46:12 -0400
Subject: [PATCH 02/12] cpu/mips32r2_common: sys/div API change

---
 cpu/mips32r2_common/periph/timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpu/mips32r2_common/periph/timer.c b/cpu/mips32r2_common/periph/timer.c
index 16e373f2f730..2629dd8a2508 100644
--- a/cpu/mips32r2_common/periph/timer.c
+++ b/cpu/mips32r2_common/periph/timer.c
@@ -83,7 +83,7 @@ int gettimeofday(struct timeval *__restrict __p, void *__restrict __tz)
     (void)__tz;
 
     uint64_t now = counter * US_PER_MS;
-    __p->tv_sec = div_u64_by_1000000(now);
+    __p->tv_sec = div_u64_by_inv(now, div_inv_64(1000000));
     __p->tv_usec = now - (__p->tv_sec * US_PER_SEC);
 
     return 0;

From dec2f8e5b07c79be441219cebe8342b0924e43ab Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Sun, 3 Jun 2018 17:47:02 -0400
Subject: [PATCH 03/12] tests/unittests: sys/div API change

---
 tests/unittests/tests-div/tests-div.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/unittests/tests-div/tests-div.c b/tests/unittests/tests-div/tests-div.c
index 85010e05662f..4f92a467e0a8 100644
--- a/tests/unittests/tests-div/tests-div.c
+++ b/tests/unittests/tests-div/tests-div.c
@@ -74,14 +74,14 @@ static void test_div_u64_by_15625(void)
         DEBUG("Dividing %12"PRIu32" by 15625...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u32_test_values[i] / 15625,
-            div_u64_by_15625(u32_test_values[i]));
+            div_u64_by_inv(u32_test_values[i], div_inv_64(15625)));
     }
 
     for (unsigned i = 0; i < N_U64_VALS; i++) {
         DEBUG("Dividing %12"PRIu64" by 15625...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u64_test_values[i] / 15625,
-            div_u64_by_15625(u64_test_values[i]));
+            div_u64_by_inv(u64_test_values[i], div_inv_64(15625)));
     }
 }
 
@@ -91,7 +91,7 @@ static void test_div_u32_by_15625div512(void)
         DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u32_test_values[i] * 512lu / 15625,
-            div_u32_by_15625div512(u32_test_values[i]));
+            div_u32_by_inv(u32_test_values[i], div_inv_32(15625)) >> 9);
     }
 }
 
@@ -101,14 +101,14 @@ static void test_div_u64_by_1000000(void)
         DEBUG("Dividing %"PRIu32" by 1000000...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u32_test_values[i] / 1000000lu,
-            div_u64_by_1000000(u32_test_values[i]));
+            div_u64_by_inv(u32_test_values[i], div_inv_64(1000000)));
     }
 
     for (unsigned i = 0; i < N_U64_VALS; i++) {
         DEBUG("Dividing %"PRIu64" by 1000000...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             u64_test_values[i] / 1000000lu,
-            div_u64_by_1000000(u64_test_values[i]));
+            div_u64_by_inv(u64_test_values[i], div_inv_64(1000000)));
     }
 }
 
@@ -118,14 +118,14 @@ static void test_div_u64_by_15625div512(void)
         DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u32_test_values[i] * 512lu / 15625,
-            div_u64_by_15625div512(u32_test_values[i]));
+            div_u64_by_inv(u32_test_values[i], div_inv_64(15625)) >> 9);
     }
 
     for (unsigned i = 0; i < N_U64_VALS; i++) {
         DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             u64_15625_512_expected_values[i],
-            div_u64_by_15625div512(u64_test_values[i]));
+            div_u64_by_inv(u64_test_values[i], div_inv_64(15625)) >> 9);
     }
 }
 

From 141abee3ea3ad4f6ac7686e7e63d087c34cbb766 Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Sun, 3 Jun 2018 17:47:28 -0400
Subject: [PATCH 04/12] sys/*: sys/div API change

---
 sys/evtimer/evtimer.c                  | 8 +++++++-
 sys/include/xtimer/tick_conversion.h   | 4 ++--
 sys/newlib_syscalls_default/syscalls.c | 2 +-
 sys/random/minstd.c                    | 4 ++--
 sys/xtimer/xtimer.c                    | 2 +-
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/sys/evtimer/evtimer.c b/sys/evtimer/evtimer.c
index aa0b11df9e35..981c37f46a54 100644
--- a/sys/evtimer/evtimer.c
+++ b/sys/evtimer/evtimer.c
@@ -124,8 +124,14 @@ static uint32_t _get_offset(xtimer_t *timer)
     }
     else {
         target_us -= now_us;
+
         /* add half of 125 so integer division rounds to nearest */
-        return div_u64_by_125((target_us >> 3) + 62);
+        target_us = (target_us >> 3) + 62;
+
+        /* a higher value would overflow the result type */
+        assert(target_us <= 536870911999LLU);
+
+        return (uint32_t)div_u64_by_inv(target_us, div_inv_64(125));
     }
 }
 
diff --git a/sys/include/xtimer/tick_conversion.h b/sys/include/xtimer/tick_conversion.h
index a4306cf58f58..da606aeb490d 100644
--- a/sys/include/xtimer/tick_conversion.h
+++ b/sys/include/xtimer/tick_conversion.h
@@ -102,11 +102,11 @@ static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) {
  * multiplying by the fraction (32768 / 1000000), we will instead use
  * (512 / 15625), which reduces the truncation caused by the integer widths */
 static inline uint32_t _xtimer_ticks_from_usec(uint32_t usec) {
-    return div_u32_by_15625div512(usec);
+    return div_u32_by_inv(usec, div_inv_32(15625)) >> 9;
 }
 
 static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) {
-    return div_u64_by_15625div512(usec);
+    return div_u64_by_inv(usec, div_inv_64(15625)) >> 9;
 }
 
 static inline uint32_t _xtimer_usec_from_ticks(uint32_t ticks) {
diff --git a/sys/newlib_syscalls_default/syscalls.c b/sys/newlib_syscalls_default/syscalls.c
index 91543fcd186b..4912324fb613 100644
--- a/sys/newlib_syscalls_default/syscalls.c
+++ b/sys/newlib_syscalls_default/syscalls.c
@@ -503,7 +503,7 @@ int _gettimeofday_r(struct _reent *r, struct timeval *restrict tp, void *restric
     (void) r;
     (void) tzp;
     uint64_t now = xtimer_now_usec64();
-    tp->tv_sec = div_u64_by_1000000(now);
+    tp->tv_sec = div_u64_by_inv(now, div_inv_64(1000000));
     tp->tv_usec = now - (tp->tv_sec * US_PER_SEC);
     return 0;
 }
diff --git a/sys/random/minstd.c b/sys/random/minstd.c
index dc87738162d6..8530756117bb 100644
--- a/sys/random/minstd.c
+++ b/sys/random/minstd.c
@@ -41,8 +41,8 @@ static uint32_t _seed = 1;
 
 int rand_minstd(void)
 {
-    uint32_t hi = div_u32_by_44488(_seed);
-    uint32_t lo = div_u32_mod_44488(_seed);
+    uint32_t hi = div_u32_by_inv(_seed, div_inv_32(44488));
+    uint32_t lo = _seed - hi*44488;
     uint32_t test = (a * lo) - (r * hi);
 
     if(test > 0) {
diff --git a/sys/xtimer/xtimer.c b/sys/xtimer/xtimer.c
index 612bbc9622ff..f3e8c82231da 100644
--- a/sys/xtimer/xtimer.c
+++ b/sys/xtimer/xtimer.c
@@ -186,7 +186,7 @@ void xtimer_now_timex(timex_t *out)
 {
     uint64_t now = xtimer_usec_from_ticks64(xtimer_now64());
 
-    out->seconds = div_u64_by_1000000(now);
+    out->seconds = div_u64_by_inv(now, div_inv_64(1000000));
     out->microseconds = now - (out->seconds * US_PER_SEC);
 }
 

From 2899deb6e3dd5320606a1b6c451bcfe7caf0daf9 Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Sun, 3 Jun 2018 22:49:26 -0400
Subject: [PATCH 05/12] fixup! sys/div: calculate magic numbers at compile time

---
 sys/include/div.h | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/sys/include/div.h b/sys/include/div.h
index f2f46de88fd9..32365070e49e 100644
--- a/sys/include/div.h
+++ b/sys/include/div.h
@@ -172,12 +172,9 @@ uint64_t _div_mulhi64(const uint64_t a, const uint64_t b);
  */
 static inline uint8_t div_u8_by_inv(const uint8_t val, const uint8_t inv)
 {
-    uint16_t tmp;
+    const uint16_t tmp = (uint16_t)val * (uint16_t)inv;
 
-    tmp = (uint16_t)val * (uint16_t)inv;
-    tmp >>= 9;
-
-    return (uint8_t)tmp;
+    return (uint8_t)(tmp >> 9);
 }
 
 /**
@@ -190,12 +187,9 @@ static inline uint8_t div_u8_by_inv(const uint8_t val, const uint8_t inv)
  */
 static inline uint16_t div_u16_by_inv(const uint16_t val, const uint16_t inv)
 {
-    uint32_t tmp;
-
-    tmp = (uint32_t)val * (uint32_t)inv;
-    tmp >>= 17;
+    const uint32_t tmp = (uint32_t)val * (uint32_t)inv;
 
-    return (uint16_t)tmp;
+    return (uint16_t)(tmp >> 17);
 }
 
 /**
@@ -208,12 +202,9 @@ static inline uint16_t div_u16_by_inv(const uint16_t val, const uint16_t inv)
  */
 static inline uint32_t div_u32_by_inv(const uint32_t val, const uint32_t inv)
 {
-    uint64_t tmp;
-
-    tmp = (uint64_t)val * (uint64_t)inv;
-    tmp >>= 33;
+    const uint64_t tmp = (uint64_t)val * (uint64_t)inv;
 
-    return (uint32_t)tmp;
+    return (uint32_t)(tmp >> 33);
 }
 
 /**

From 11299166c898baad23c0a0980981b0e62698c6d3 Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Sun, 3 Jun 2018 22:50:01 -0400
Subject: [PATCH 06/12] fixup! sys/*: sys/div API change

---
 sys/include/xtimer/tick_conversion.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/include/xtimer/tick_conversion.h b/sys/include/xtimer/tick_conversion.h
index da606aeb490d..ac9318911347 100644
--- a/sys/include/xtimer/tick_conversion.h
+++ b/sys/include/xtimer/tick_conversion.h
@@ -102,11 +102,11 @@ static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) {
  * multiplying by the fraction (32768 / 1000000), we will instead use
  * (512 / 15625), which reduces the truncation caused by the integer widths */
 static inline uint32_t _xtimer_ticks_from_usec(uint32_t usec) {
-    return div_u32_by_inv(usec, div_inv_32(15625)) >> 9;
+    return div_u32_by_inv(usec, div_inv_32(15625)) << 9;
 }
 
 static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) {
-    return div_u64_by_inv(usec, div_inv_64(15625)) >> 9;
+    return div_u64_by_inv(usec, div_inv_64(15625)) << 9;
 }
 
 static inline uint32_t _xtimer_usec_from_ticks(uint32_t ticks) {

From 620997b6461235984c8c1c3b7deb21149d589a03 Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Sun, 3 Jun 2018 22:50:28 -0400
Subject: [PATCH 07/12] fixup! tests/unittests: sys/div API change

---
 tests/unittests/tests-div/tests-div.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/unittests/tests-div/tests-div.c b/tests/unittests/tests-div/tests-div.c
index 4f92a467e0a8..289c4bd5f99e 100644
--- a/tests/unittests/tests-div/tests-div.c
+++ b/tests/unittests/tests-div/tests-div.c
@@ -91,7 +91,7 @@ static void test_div_u32_by_15625div512(void)
         DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u32_test_values[i] * 512lu / 15625,
-            div_u32_by_inv(u32_test_values[i], div_inv_32(15625)) >> 9);
+            div_u32_by_inv(u32_test_values[i], div_inv_32(15625)) << 9);
     }
 }
 
@@ -118,14 +118,14 @@ static void test_div_u64_by_15625div512(void)
         DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u32_test_values[i] * 512lu / 15625,
-            div_u64_by_inv(u32_test_values[i], div_inv_64(15625)) >> 9);
+            div_u64_by_inv(u32_test_values[i], div_inv_64(15625)) << 9);
     }
 
     for (unsigned i = 0; i < N_U64_VALS; i++) {
         DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             u64_15625_512_expected_values[i],
-            div_u64_by_inv(u64_test_values[i], div_inv_64(15625)) >> 9);
+            div_u64_by_inv(u64_test_values[i], div_inv_64(15625)) << 9);
     }
 }
 

From e8e31623832e349a5575ec0b594812f9c2985ace Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Tue, 5 Jun 2018 19:12:56 -0400
Subject: [PATCH 08/12] fixup! sys/div: calculate magic numbers at compile time

---
 sys/include/div.h | 308 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 218 insertions(+), 90 deletions(-)

diff --git a/sys/include/div.h b/sys/include/div.h
index 32365070e49e..236b29b848f0 100644
--- a/sys/include/div.h
+++ b/sys/include/div.h
@@ -28,6 +28,7 @@
 #ifndef DIV_H
 #define DIV_H
 
+#include <inttypes.h>
 #include <stdint.h>
 
 #include "assert.h"
@@ -37,116 +38,168 @@ extern "C" {
 #endif
 
 /**
- * @brief   Calculate 8-bit multiplicative inverse
+ * @brief   Internal 65-bit shift and comparison: 2*x > y
  *
- * Result is calculated during compilation (requires const num). Bitshifted by
- * 9 to give the most representation to the smallest interesting number (3)
+ * @param[in] x    greater / 2
+ * @param[in] y    lesser
  *
- * @param[in] num  reciprocal
+ * @return         if 2*x > y then 1, else 0
+ */
+__attribute__((always_inline)) static inline uint8_t _div_shcom(
+                                             const uint64_t x, const uint64_t y)
+{
+    if (x & (1ULL << 63)) {
+        return 1;
+    }
+
+    if ((x << 1) > y) {
+        return 1;
+    }
+
+    return 0;
+}
+
+/**
+ * @brief   Internal 65-bit shift and subtraction: 2*x - y
  *
- * @return         result, bitshifted by 9
+ * @param[in] x    minuend / 2
+ * @param[in] y    subtrahend
+ *
+ * @return         difference
  */
-__attribute__((always_inline)) static inline uint8_t div_inv_8(
-                                             const uint8_t num)
+__attribute__((always_inline)) static inline uint64_t _div_shsub(
+                                             const uint64_t x, const uint64_t y)
 {
-    /* cannot be represented due to bit shift */
-    assert(num > 2);
+    if (x & (1ULL << 63)) {
+        uint64_t tmp;
 
-    const uint16_t numerator = (1 << 9);
+        /* = 2^64 - 1 - y */
+        tmp = (uint64_t)(-1) - y;
 
-    if ((numerator % num) * 2 >= num) {
-        /* fraction is >= 0.5, so round up */
-        return (uint8_t)(numerator / num) + 1;
-    }
-    else {
-        return (uint8_t)(numerator / num);
+        /* = (2^64 - 1 - y) + (2*x - 2^64) */
+        /* = 2*x - y - 1 */
+        tmp += (x << 1);
+
+        /* = 2*x - y */
+        return tmp + 1;
     }
+
+    return (x << 1) - y;
 }
 
 /**
- * @brief   Calculate 16-bit multiplicative inverse
+ * @brief   Internal 64-bit fraction calculation
  *
- * Result is calculated during compilation (requires const num). Bitshifted by
- * 17 to give the most representation to the smallest interesting number (3)
+ * @param[in] num  numerator
+ * @param[in] den  denominator
  *
- * @param[in] num  reciprocal
- *
- * @return         result, bitshifted by 17
+ * @return         result, bitshifted by 64
  */
-__attribute__((always_inline)) static inline uint16_t div_inv_16(
-                                             const uint16_t num)
+__attribute__((always_inline, optimize("unroll-all-loops"))) static inline
+               uint64_t _div_frac(const uint64_t num, const uint64_t den)
 {
-    /* cannot be represented due to bit shift */
-    assert(num > 2);
+    uint64_t ans = 0, rem = num;
 
-    const uint32_t numerator = (1UL << 17);
+    /* binary long division */
+    for (uint8_t i = 0; i < 64; i++) {
+        if (_div_shcom(rem, den)) {
+            /* den goes into num for this bit */
+            ans |= (1ULL << (63 - i));
 
-    if ((numerator % num) * 2 >= num) {
-        /* fraction is >= 0.5, so round up */
-        return (uint16_t)(numerator / num) + 1;
+            /* subtract and move to next bit */
+            rem = _div_shsub(rem, den);
+        }
+        else {
+            /* move to next bit */
+            rem <<= 1;
+        }
     }
-    else {
-        return (uint16_t)(numerator / num);
+
+    /* rounding */
+    if (_div_shcom(rem, den)) {
+        ans++;
     }
+
+    return ans;
 }
 
 /**
- * @brief   Calculate 32-bit multiplicative inverse
+ * @brief   Compile-time calculate 16-bit fraction
  *
- * Result is calculated during compilation (requires const num). Bitshifted by
- * 33 to give the most representation to the smallest interesting number (3)
+ * For fractions that are less than one. Result is calculated during
+ * compilation (requires const input).
  *
- * @param[in] num  reciprocal
+ * @param[in] num  numerator
+ * @param[in] den  denominator
  *
- * @return         result, bitshifted by 33
+ * @return         result, bitshifted by 16
  */
-__attribute__((always_inline)) static inline uint32_t div_inv_32(
-                                             const uint32_t num)
+__attribute__((always_inline)) static inline uint16_t div_frac_16(
+                               const uint16_t num, const uint16_t den)
 {
-    /* cannot be represented due to bit shift */
-    assert(num > 2);
+    assert((den > 0) && (num < den));
 
-    const uint64_t numerator = (1ULL << 33);
+    /* done during compile time, so trade efficiency for simplicity */
+    const uint64_t ans = _div_frac((uint64_t)num << 48, (uint64_t)den << 48);
 
-    if ((numerator % num) * 2 >= num) {
-        /* fraction is >= 0.5, so round up */
-        return (uint32_t)(numerator / num) + 1;
+    if ((ans << 16) > (1ULL << 63)) {
+        /* round the result */
+        return (uint16_t)(ans >> 48) + 1;
     }
     else {
-        return (uint32_t)(numerator / num);
+        return (uint16_t)(ans >> 48);
     }
 }
 
 /**
- * @brief   Calculate 64-bit multiplicative inverse
+ * @brief   Compile-time calculate 32-bit fraction
  *
- * Result is calculated during compilation (requires const num). Bitshifted by
- * 65 to give the most representation to the smallest interesting number (3)
+ * For fractions that are less than one. Result is calculated during
+ * compilation (requires const input).
  *
- * @param[in] num  reciprocal
+ * @param[in] num  numerator
+ * @param[in] den  denominator
  *
- * @return         result, bitshifted by 65
+ * @return         result, bitshifted by 32
  */
-__attribute__((always_inline)) static inline uint64_t div_inv_64(
-                                             const uint64_t num)
+__attribute__((always_inline)) static inline uint32_t div_frac_32(
+                               const uint32_t num, const uint32_t den)
 {
-    /* cannot be represented due to bit shift */
-    assert(num > 2);
+    assert((den > 0) && (num < den));
 
-    /* find the last bits without using 128-bit ints */
-    const uint64_t numerator = (1ULL << 63);
-    const uint64_t most_sig = (numerator / num) << 2;
-    const uint64_t least_sig = ((numerator % num) << 2) / num;
+    /* done during compile time, so trade efficiency for simplicity */
+    const uint64_t ans = _div_frac((uint64_t)num << 32, (uint64_t)den << 32);
 
-    if ((((numerator % num) << 2) % num) * 2 >= num) {
-        /* fraction is >= 0.5, so round up */
-        return (most_sig + least_sig + 1);
+    if ((ans << 32) > (1ULL << 63)) {
+        /* round the result */
+        return (uint32_t)(ans >> 32) + 1;
     }
     else {
-        return (most_sig + least_sig);
+        return (uint32_t)(ans >> 32);
     }
 }
 
+/**
+ * @brief   Compile-time calculate 64-bit fraction
+ *
+ * For fractions that are less than one. Result is calculated during
+ * compilation (requires const input).
+ *
+ * @param[in] num  numerator
+ * @param[in] den  denominator
+ *
+ * @return         result, bitshifted by 64
+ */
+__attribute__((always_inline)) static inline uint64_t div_frac_64(
+                               const uint64_t num, const uint64_t den)
+{
+    assert((den > 0) && (num < den));
+
+    const uint64_t ans = _div_frac(num, den);
+
+    return ans;
+}
+
 /**
  * @internal
  * @brief Multiply two 64 bit integers into a 128 bit integer and return the upper half.
@@ -163,61 +216,136 @@ __attribute__((always_inline)) static inline uint64_t div_inv_64(
 uint64_t _div_mulhi64(const uint64_t a, const uint64_t b);
 
 /**
- * @brief   Divide 8-bit number using an inverse
+ * @brief   Multiply 16-bit number with a fraction
  *
- * @param[in] val  numerator
- * @param[in] inv  multiplicative inverse of denominator (bitshifted by 9)
+ * @param[in] val   integer
+ * @param[in] frac  fraction (bitshifted by 16)
  *
- * @return         result
+ * @return          result
  */
-static inline uint8_t div_u8_by_inv(const uint8_t val, const uint8_t inv)
+static inline uint16_t div_mul_w_frac_16(const uint16_t val, const uint16_t frac)
 {
-    const uint16_t tmp = (uint16_t)val * (uint16_t)inv;
+    const uint32_t tmp = (uint32_t)val * (uint32_t)frac;
 
-    return (uint8_t)(tmp >> 9);
+    return (uint16_t)(tmp >> 16);
 }
 
 /**
- * @brief   Divide 16-bit number using an inverse
+ * @brief   Multiply 32-bit number with a fraction
  *
- * @param[in] val  numerator
- * @param[in] inv  multiplicative inverse of denominator (bitshifted by 17)
+ * @param[in] val   integer
+ * @param[in] frac  fraction (bitshifted by 32)
  *
- * @return         result
+ * @return          result
  */
-static inline uint16_t div_u16_by_inv(const uint16_t val, const uint16_t inv)
+static inline uint32_t div_mul_w_frac_32(const uint32_t val, const uint32_t frac)
 {
-    const uint32_t tmp = (uint32_t)val * (uint32_t)inv;
+    const uint64_t tmp = (uint64_t)val * (uint64_t)frac;
 
-    return (uint16_t)(tmp >> 17);
+    return (uint32_t)(tmp >> 32);
 }
 
 /**
- * @brief   Divide 32-bit number using an inverse
+ * @brief   Multiply 64-bit number with a fraction
  *
- * @param[in] val  numerator
- * @param[in] inv  multiplicative inverse of denominator (bitshifted by 33)
+ * @param[in] val   integer
+ * @param[in] frac  fraction (bitshifted by 64)
  *
- * @return         result
+ * @return          result
  */
-static inline uint32_t div_u32_by_inv(const uint32_t val, const uint32_t inv)
+static inline uint64_t div_mul_w_frac_64(const uint64_t val, const uint64_t frac)
 {
-    const uint64_t tmp = (uint64_t)val * (uint64_t)inv;
+    return _div_mulhi64(val, frac);
+}
+
+/**
+ * @brief   Division of a 16-bit number with high-accuracy
+ *
+ * This is partially calculated during compilation for speed, which requires
+ * that the denominator is a constant.
+ *
+ * @param[in] num   numerator
+ * @param[in] den   denominator
+ *
+ * @return          result
+ */
+__attribute__((always_inline, optimize("unroll-all-loops"))) static inline
+               uint16_t div_16(const uint16_t num, const uint16_t den)
+{
+    uint8_t exp;
+
+    /* find highest power of two less than den */
+    for (exp = 15; exp > 0; exp--) {
+        if ((1U << exp) < den) {
+            break;
+        }
+    }
+
+    /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */
+    const uint16_t inv = div_frac_16((1U << exp), den);
+
+    /* last step is only thing calculated at runtime */
+    return (div_mul_w_frac_16(num, inv) >> exp);
+}
+
+/**
+ * @brief   Division of a 32-bit number with high-accuracy
+ *
+ * This is partially calculated during compilation for speed, which requires
+ * that the denominator is a constant.
+ *
+ * @param[in] num   numerator
+ * @param[in] den   denominator
+ *
+ * @return          result
+ */
+__attribute__((always_inline, optimize("unroll-all-loops"))) static inline
+               uint32_t div_32(const uint32_t num, const uint32_t den)
+{
+    uint8_t exp;
+
+    /* find highest power of two less than den */
+    for (exp = 31; exp > 0; exp--) {
+        if ((1UL << exp) < den) {
+            break;
+        }
+    }
+
+    /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */
+    const uint32_t inv = div_frac_32((1UL << exp), den);
 
-    return (uint32_t)(tmp >> 33);
+    /* last step is only thing calculated at runtime */
+    return (div_mul_w_frac_32(num, inv) >> exp);
 }
 
 /**
- * @brief   Divide 64-bit number using an inverse
+ * @brief   Division of a 64-bit number with high-accuracy
  *
- * @param[in] val  numerator
- * @param[in] inv  multiplicative inverse of denominator (bitshifted by 65)
+ * This is partially calculated during compilation for speed, which requires
+ * that the denominator is a constant.
  *
- * @return         result
+ * @param[in] num   numerator
+ * @param[in] den   denominator
+ *
+ * @return          result
  */
-static inline uint64_t div_u64_by_inv(const uint64_t val, const uint64_t inv)
+__attribute__((always_inline, optimize("unroll-all-loops"))) static inline
+               uint64_t div_64(const uint64_t num, const uint64_t den)
 {
-    return (_div_mulhi64(val, inv) >> 1);
+    uint8_t exp;
+
+    /* find highest power of two less than den */
+    for (exp = 63; exp > 0; exp--) {
+        if ((1ULL << exp) < den) {
+            break;
+        }
+    }
+
+    /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */
+    const uint64_t inv = div_frac_64((1ULL << exp), den);
+
+    /* last step is only thing calculated at runtime */
+    return (div_mul_w_frac_64(num, inv) >> exp);
 }
 
 #ifdef __cplusplus

From d69716b433106c4c159118aa356acf70615b3739 Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Tue, 5 Jun 2018 19:13:22 -0400
Subject: [PATCH 09/12] fixup! cpu/mips32r2_common: sys/div API change

---
 cpu/mips32r2_common/periph/timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpu/mips32r2_common/periph/timer.c b/cpu/mips32r2_common/periph/timer.c
index 2629dd8a2508..4cb107cc6a95 100644
--- a/cpu/mips32r2_common/periph/timer.c
+++ b/cpu/mips32r2_common/periph/timer.c
@@ -83,7 +83,7 @@ int gettimeofday(struct timeval *__restrict __p, void *__restrict __tz)
     (void)__tz;
 
     uint64_t now = counter * US_PER_MS;
-    __p->tv_sec = div_u64_by_inv(now, div_inv_64(1000000));
+    __p->tv_sec = div_64(now, 1000000UL);
     __p->tv_usec = now - (__p->tv_sec * US_PER_SEC);
 
     return 0;

From ea729a364d6b8ca0b45f046c5cdd6203e55923af Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Tue, 5 Jun 2018 19:13:55 -0400
Subject: [PATCH 10/12] fixup! tests/unittests: sys/div API change

---
 tests/unittests/tests-div/tests-div.c | 37 +++++++++++++++------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/tests/unittests/tests-div/tests-div.c b/tests/unittests/tests-div/tests-div.c
index 289c4bd5f99e..8c79c60ff976 100644
--- a/tests/unittests/tests-div/tests-div.c
+++ b/tests/unittests/tests-div/tests-div.c
@@ -74,14 +74,14 @@ static void test_div_u64_by_15625(void)
         DEBUG("Dividing %12"PRIu32" by 15625...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u32_test_values[i] / 15625,
-            div_u64_by_inv(u32_test_values[i], div_inv_64(15625)));
+            div_64(u32_test_values[i], 15625));
     }
 
     for (unsigned i = 0; i < N_U64_VALS; i++) {
         DEBUG("Dividing %12"PRIu64" by 15625...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u64_test_values[i] / 15625,
-            div_u64_by_inv(u64_test_values[i], div_inv_64(15625)));
+            div_64(u64_test_values[i], 15625));
     }
 }
 
@@ -91,41 +91,44 @@ static void test_div_u32_by_15625div512(void)
         DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u32_test_values[i] * 512lu / 15625,
-            div_u32_by_inv(u32_test_values[i], div_inv_32(15625)) << 9);
+            div_mul_w_frac_32(u32_test_values[i],
+                              div_frac_32((512 << 4), 15625)) >> 4);
     }
 }
 
-static void test_div_u64_by_1000000(void)
+static void test_div_u64_by_15625div512(void)
 {
     for (unsigned i = 0; i < N_U32_VALS; i++) {
-        DEBUG("Dividing %"PRIu32" by 1000000...\n", u32_test_values[i]);
+        DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
-            (uint64_t)u32_test_values[i] / 1000000lu,
-            div_u64_by_inv(u32_test_values[i], div_inv_64(1000000)));
+            (uint64_t)u32_test_values[i] * 512lu / 15625,
+            div_mul_w_frac_64(u32_test_values[i],
+                              div_frac_64((512 << 4), 15625)) >> 4);
     }
 
     for (unsigned i = 0; i < N_U64_VALS; i++) {
-        DEBUG("Dividing %"PRIu64" by 1000000...\n", u64_test_values[i]);
+        DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
-            u64_test_values[i] / 1000000lu,
-            div_u64_by_inv(u64_test_values[i], div_inv_64(1000000)));
+            u64_15625_512_expected_values[i],
+            div_mul_w_frac_64(u64_test_values[i],
+                              div_frac_64((512 << 4), 15625)) >> 4);
     }
 }
 
-static void test_div_u64_by_15625div512(void)
+static void test_div_u64_by_1000000(void)
 {
     for (unsigned i = 0; i < N_U32_VALS; i++) {
-        DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]);
+        DEBUG("Dividing %"PRIu32" by 1000000...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
-            (uint64_t)u32_test_values[i] * 512lu / 15625,
-            div_u64_by_inv(u32_test_values[i], div_inv_64(15625)) << 9);
+            (uint64_t)u32_test_values[i] / 1000000lu,
+            div_64(u32_test_values[i], 1000000UL));
     }
 
     for (unsigned i = 0; i < N_U64_VALS; i++) {
-        DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]);
+        DEBUG("Dividing %"PRIu64" by 1000000...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
-            u64_15625_512_expected_values[i],
-            div_u64_by_inv(u64_test_values[i], div_inv_64(15625)) << 9);
+            u64_test_values[i] / 1000000lu,
+            div_64(u64_test_values[i], 1000000UL));
     }
 }
 

From b1429f0c5c101b1db9562efcc9b837be24dc3a34 Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Tue, 5 Jun 2018 19:14:26 -0400
Subject: [PATCH 11/12] fixup! sys/*: sys/div API change

---
 sys/evtimer/evtimer.c                  | 2 +-
 sys/include/xtimer/tick_conversion.h   | 6 ++++--
 sys/newlib_syscalls_default/syscalls.c | 2 +-
 sys/random/minstd.c                    | 2 +-
 sys/xtimer/xtimer.c                    | 2 +-
 5 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/sys/evtimer/evtimer.c b/sys/evtimer/evtimer.c
index 981c37f46a54..815c08e2eee8 100644
--- a/sys/evtimer/evtimer.c
+++ b/sys/evtimer/evtimer.c
@@ -131,7 +131,7 @@ static uint32_t _get_offset(xtimer_t *timer)
         /* a higher value would overflow the result type */
         assert(target_us <= 536870911999LLU);
 
-        return (uint32_t)div_u64_by_inv(target_us, div_inv_64(125));
+        return (uint32_t)div_64(target_us, 125);
     }
 }
 
diff --git a/sys/include/xtimer/tick_conversion.h b/sys/include/xtimer/tick_conversion.h
index ac9318911347..9add96aa588c 100644
--- a/sys/include/xtimer/tick_conversion.h
+++ b/sys/include/xtimer/tick_conversion.h
@@ -102,11 +102,13 @@ static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) {
  * multiplying by the fraction (32768 / 1000000), we will instead use
  * (512 / 15625), which reduces the truncation caused by the integer widths */
 static inline uint32_t _xtimer_ticks_from_usec(uint32_t usec) {
-    return div_u32_by_inv(usec, div_inv_32(15625)) << 9;
+    /* bitshifts increase the accuracy */
+    return div_mul_w_frac_32(usec, div_frac_32((512 << 4), 15625)) >> 4;
 }
 
 static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) {
-    return div_u64_by_inv(usec, div_inv_64(15625)) << 9;
+    /* bitshifts increase the accuracy */
+    return div_mul_w_frac_64(usec, div_frac_64((512 << 4), 15625)) >> 4;
 }
 
 static inline uint32_t _xtimer_usec_from_ticks(uint32_t ticks) {
diff --git a/sys/newlib_syscalls_default/syscalls.c b/sys/newlib_syscalls_default/syscalls.c
index 4912324fb613..a71e2ade9377 100644
--- a/sys/newlib_syscalls_default/syscalls.c
+++ b/sys/newlib_syscalls_default/syscalls.c
@@ -503,7 +503,7 @@ int _gettimeofday_r(struct _reent *r, struct timeval *restrict tp, void *restric
     (void) r;
     (void) tzp;
     uint64_t now = xtimer_now_usec64();
-    tp->tv_sec = div_u64_by_inv(now, div_inv_64(1000000));
+    tp->tv_sec = div_64(now, 1000000UL);
     tp->tv_usec = now - (tp->tv_sec * US_PER_SEC);
     return 0;
 }
diff --git a/sys/random/minstd.c b/sys/random/minstd.c
index 8530756117bb..12e786bd3f23 100644
--- a/sys/random/minstd.c
+++ b/sys/random/minstd.c
@@ -41,7 +41,7 @@ static uint32_t _seed = 1;
 
 int rand_minstd(void)
 {
-    uint32_t hi = div_u32_by_inv(_seed, div_inv_32(44488));
+    uint32_t hi = div_32(_seed, 44488);
     uint32_t lo = _seed - hi*44488;
     uint32_t test = (a * lo) - (r * hi);
 
diff --git a/sys/xtimer/xtimer.c b/sys/xtimer/xtimer.c
index f3e8c82231da..f723c9c819d8 100644
--- a/sys/xtimer/xtimer.c
+++ b/sys/xtimer/xtimer.c
@@ -186,7 +186,7 @@ void xtimer_now_timex(timex_t *out)
 {
     uint64_t now = xtimer_usec_from_ticks64(xtimer_now64());
 
-    out->seconds = div_u64_by_inv(now, div_inv_64(1000000));
+    out->seconds = div_64(now, 1000000UL);
     out->microseconds = now - (out->seconds * US_PER_SEC);
 }
 

From 5a8f68d93e8923bddad4e704634d14d01c6e3f8e Mon Sep 17 00:00:00 2001
From: Matthew Blue <matthew.blue.neuro@gmail.com>
Date: Thu, 7 Jun 2018 03:59:45 -0400
Subject: [PATCH 12/12] fixup! sys/div: calculate magic numbers at compile time

---
 sys/include/div.h | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/sys/include/div.h b/sys/include/div.h
index 236b29b848f0..9333ca929d98 100644
--- a/sys/include/div.h
+++ b/sys/include/div.h
@@ -45,8 +45,8 @@ extern "C" {
  *
  * @return         if 2*x > y then 1, else 0
  */
-__attribute__((always_inline)) static inline uint8_t _div_shcom(
-                                             const uint64_t x, const uint64_t y)
+__attribute__((always_inline)) __attribute__((optimize("merge-all-constants")))
+    static inline uint8_t _div_shcom(const uint64_t x, const uint64_t y)
 {
     if (x & (1ULL << 63)) {
         return 1;
@@ -67,8 +67,8 @@ __attribute__((always_inline)) static inline uint8_t _div_shcom(
  *
  * @return         difference
  */
-__attribute__((always_inline)) static inline uint64_t _div_shsub(
-                                             const uint64_t x, const uint64_t y)
+__attribute__((always_inline)) __attribute__((optimize("merge-all-constants")))
+    static inline uint64_t _div_shsub(const uint64_t x, const uint64_t y)
 {
     if (x & (1ULL << 63)) {
         uint64_t tmp;
@@ -95,8 +95,9 @@ __attribute__((always_inline)) static inline uint64_t _div_shsub(
  *
  * @return         result, bitshifted by 64
  */
-__attribute__((always_inline, optimize("unroll-all-loops"))) static inline
-               uint64_t _div_frac(const uint64_t num, const uint64_t den)
+__attribute__((always_inline))
+__attribute__((optimize("unroll-all-loops", "merge-all-constants")))
+    static inline uint64_t _div_frac(const uint64_t num, const uint64_t den)
 {
     uint64_t ans = 0, rem = num;
 
@@ -134,8 +135,8 @@ __attribute__((always_inline, optimize("unroll-all-loops"))) static inline
  *
  * @return         result, bitshifted by 16
  */
-__attribute__((always_inline)) static inline uint16_t div_frac_16(
-                               const uint16_t num, const uint16_t den)
+__attribute__((always_inline)) __attribute__((optimize("merge-all-constants")))
+    static inline uint16_t div_frac_16(const uint16_t num, const uint16_t den)
 {
     assert((den > 0) && (num < den));
 
@@ -162,8 +163,8 @@ __attribute__((always_inline)) static inline uint16_t div_frac_16(
  *
  * @return         result, bitshifted by 32
  */
-__attribute__((always_inline)) static inline uint32_t div_frac_32(
-                               const uint32_t num, const uint32_t den)
+__attribute__((always_inline)) __attribute__((optimize("merge-all-constants")))
+    static inline uint32_t div_frac_32(const uint32_t num, const uint32_t den)
 {
     assert((den > 0) && (num < den));
 
@@ -190,8 +191,8 @@ __attribute__((always_inline)) static inline uint32_t div_frac_32(
  *
  * @return         result, bitshifted by 64
  */
-__attribute__((always_inline)) static inline uint64_t div_frac_64(
-                               const uint64_t num, const uint64_t den)
+__attribute__((always_inline)) __attribute__((optimize("merge-all-constants")))
+    static inline uint64_t div_frac_64(const uint64_t num, const uint64_t den)
 {
     assert((den > 0) && (num < den));
 
@@ -269,8 +270,9 @@ static inline uint64_t div_mul_w_frac_64(const uint64_t val, const uint64_t frac
  *
  * @return          result
  */
-__attribute__((always_inline, optimize("unroll-all-loops"))) static inline
-               uint16_t div_16(const uint16_t num, const uint16_t den)
+__attribute__((always_inline))
+__attribute__((optimize("unroll-all-loops", "merge-all-constants")))
+    static inline uint16_t div_16(const uint16_t num, const uint16_t den)
 {
     uint8_t exp;
 
@@ -299,8 +301,9 @@ __attribute__((always_inline, optimize("unroll-all-loops"))) static inline
  *
  * @return          result
  */
-__attribute__((always_inline, optimize("unroll-all-loops"))) static inline
-               uint32_t div_32(const uint32_t num, const uint32_t den)
+__attribute__((always_inline))
+__attribute__((optimize("unroll-all-loops", "merge-all-constants")))
+    static inline uint32_t div_32(const uint32_t num, const uint32_t den)
 {
     uint8_t exp;
 
@@ -329,8 +332,9 @@ __attribute__((always_inline, optimize("unroll-all-loops"))) static inline
  *
  * @return          result
  */
-__attribute__((always_inline, optimize("unroll-all-loops"))) static inline
-               uint64_t div_64(const uint64_t num, const uint64_t den)
+__attribute__((always_inline))
+__attribute__((optimize("unroll-all-loops", "merge-all-constants")))
+    static inline uint64_t div_64(const uint64_t num, const uint64_t den)
 {
     uint8_t exp;