diff --git a/cpu/mips32r2_common/periph/timer.c b/cpu/mips32r2_common/periph/timer.c
index 16e373f2f730..4cb107cc6a95 100644
--- a/cpu/mips32r2_common/periph/timer.c
+++ b/cpu/mips32r2_common/periph/timer.c
@@ -83,7 +83,7 @@ int gettimeofday(struct timeval *__restrict __p, void *__restrict __tz)
     (void)__tz;
 
     uint64_t now = counter * US_PER_MS;
-    __p->tv_sec = div_u64_by_1000000(now);
+    __p->tv_sec = div_64(now, 1000000UL);
     __p->tv_usec = now - (__p->tv_sec * US_PER_SEC);
 
     return 0;
diff --git a/sys/evtimer/evtimer.c b/sys/evtimer/evtimer.c
index aa0b11df9e35..815c08e2eee8 100644
--- a/sys/evtimer/evtimer.c
+++ b/sys/evtimer/evtimer.c
@@ -124,8 +124,14 @@ static uint32_t _get_offset(xtimer_t *timer)
     }
     else {
         target_us -= now_us;
+
         /* add half of 125 so integer division rounds to nearest */
-        return div_u64_by_125((target_us >> 3) + 62);
+        target_us = (target_us >> 3) + 62;
+
+        /* a higher value would overflow the result type */
+        assert(target_us <= 536870911999LLU);
+
+        return (uint32_t)div_64(target_us, 125);
     }
 }
 
diff --git a/sys/include/div.h b/sys/include/div.h
index 54bfa1524257..9333ca929d98 100644
--- a/sys/include/div.h
+++ b/sys/include/div.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2015 Kaspar Schleiser <kaspar@schleiser.de>
- * Copyright (C) 2016 Eistec AB
+ *               2016 Eistec AB
+ *               2018 Acutam Automation, LLC
  *
  * This file is subject to the terms and conditions of the GNU Lesser
  * General Public License v2.1. See the file LICENSE in the top level
@@ -16,35 +17,189 @@
  *
  * @file
  * @ingroup   sys
+ *
  * @author    Kaspar Schleiser <kaspar@schleiser.de>
  * @author    Joakim Nohlgård <joakim.nohlgard@eistec.se>
+ * @author    Matthew Blue <matthew.blue.neuro@gmail.com>
+ *
  * @{
  */
 
 #ifndef DIV_H
 #define DIV_H
 
-#include <assert.h>
+#include <inttypes.h>
 #include <stdint.h>
 
+#include "assert.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /**
- * @brief Approximation of (2**l)/d for d=15625, l=12, 32 bits
+ * @brief   Internal 65-bit shift and comparison: 2*x > y
+ *
+ * @param[in] x    greater / 2
+ * @param[in] y    lesser
+ *
+ * @return         if 2*x > y then 1, else 0
  */
-#define DIV_H_INV_15625_32    0x431bde83ul
+__attribute__((always_inline)) __attribute__((optimize("merge-all-constants")))
+    static inline uint8_t _div_shcom(const uint64_t x, const uint64_t y)
+{
+    if (x & (1ULL << 63)) {
+        return 1;
+    }
+
+    if ((x << 1) > y) {
+        return 1;
+    }
+
+    return 0;
+}
+
+/**
+ * @brief   Internal 65-bit shift and subtraction: 2*x - y
+ *
+ * @param[in] x    minuend / 2
+ * @param[in] y    subtrahend
+ *
+ * @return         difference
+ */
+__attribute__((always_inline)) __attribute__((optimize("merge-all-constants")))
+    static inline uint64_t _div_shsub(const uint64_t x, const uint64_t y)
+{
+    if (x & (1ULL << 63)) {
+        uint64_t tmp;
+
+        /* = 2^64 - 1 - y */
+        tmp = (uint64_t)(-1) - y;
+
+        /* = (2^64 - 1 - y) + (2*x - 2^64) */
+        /* = 2*x - y - 1 */
+        tmp += (x << 1);
+
+        /* = 2*x - y */
+        return tmp + 1;
+    }
+
+    return (x << 1) - y;
+}
 
 /**
- * @brief Approximation of (2**l)/d for d=15625, l=12, 64 bits
+ * @brief   Internal 64-bit fraction calculation
+ *
+ * @param[in] num  numerator
+ * @param[in] den  denominator
+ *
+ * @return         result, bitshifted by 64
  */
-#define DIV_H_INV_15625_64    0x431bde82d7b634dbull
+__attribute__((always_inline))
+__attribute__((optimize("unroll-all-loops", "merge-all-constants")))
+    static inline uint64_t _div_frac(const uint64_t num, const uint64_t den)
+{
+    uint64_t ans = 0, rem = num;
+
+    /* binary long division */
+    for (uint8_t i = 0; i < 64; i++) {
+        if (_div_shcom(rem, den)) {
+            /* den goes into num for this bit */
+            ans |= (1ULL << (63 - i));
+
+            /* subtract and move to next bit */
+            rem = _div_shsub(rem, den);
+        }
+        else {
+            /* move to next bit */
+            rem <<= 1;
+        }
+    }
+
+    /* rounding */
+    if (_div_shcom(rem, den)) {
+        ans++;
+    }
+
+    return ans;
+}
 
 /**
- * @brief Required shifts for division by 15625, l above
+ * @brief   Compile-time calculate 16-bit fraction
+ *
+ * For fractions that are less than one. Result is calculated during
+ * compilation (requires const input).
+ *
+ * @param[in] num  numerator
+ * @param[in] den  denominator
+ *
+ * @return         result, bitshifted by 16
  */
-#define DIV_H_INV_15625_SHIFT 12
+__attribute__((always_inline)) __attribute__((optimize("merge-all-constants")))
+    static inline uint16_t div_frac_16(const uint16_t num, const uint16_t den)
+{
+    assert((den > 0) && (num < den));
+
+    /* done during compile time, so trade efficiency for simplicity */
+    const uint64_t ans = _div_frac((uint64_t)num << 48, (uint64_t)den << 48);
+
+    if ((ans << 16) > (1ULL << 63)) {
+        /* round the result */
+        return (uint16_t)(ans >> 48) + 1;
+    }
+    else {
+        return (uint16_t)(ans >> 48);
+    }
+}
+
+/**
+ * @brief   Compile-time calculate 32-bit fraction
+ *
+ * For fractions that are less than one. Result is calculated during
+ * compilation (requires const input).
+ *
+ * @param[in] num  numerator
+ * @param[in] den  denominator
+ *
+ * @return         result, bitshifted by 32
+ */
+__attribute__((always_inline)) __attribute__((optimize("merge-all-constants")))
+    static inline uint32_t div_frac_32(const uint32_t num, const uint32_t den)
+{
+    assert((den > 0) && (num < den));
+
+    /* done during compile time, so trade efficiency for simplicity */
+    const uint64_t ans = _div_frac((uint64_t)num << 32, (uint64_t)den << 32);
+
+    if ((ans << 32) > (1ULL << 63)) {
+        /* round the result */
+        return (uint32_t)(ans >> 32) + 1;
+    }
+    else {
+        return (uint32_t)(ans >> 32);
+    }
+}
+
+/**
+ * @brief   Compile-time calculate 64-bit fraction
+ *
+ * For fractions that are less than one. Result is calculated during
+ * compilation (requires const input).
+ *
+ * @param[in] num  numerator
+ * @param[in] den  denominator
+ *
+ * @return         result, bitshifted by 64
+ */
+__attribute__((always_inline)) __attribute__((optimize("merge-all-constants")))
+    static inline uint64_t div_frac_64(const uint64_t num, const uint64_t den)
+{
+    assert((den > 0) && (num < den));
+
+    const uint64_t ans = _div_frac(num, den);
+
+    return ans;
+}
 
 /**
  * @internal
@@ -62,123 +217,145 @@ extern "C" {
 uint64_t _div_mulhi64(const uint64_t a, const uint64_t b);
 
 /**
- * @brief Integer divide val by 15625, 64 bit version
+ * @brief   Multiply 16-bit number with a fraction
  *
- * @param[in]   val     dividend
- * @return      (val / 15625)
+ * @param[in] val   integer
+ * @param[in] frac  fraction (bitshifted by 16)
+ *
+ * @return          result
  */
-static inline uint64_t div_u64_by_15625(uint64_t val)
+static inline uint16_t div_mul_w_frac_16(const uint16_t val, const uint16_t frac)
 {
-    if (val > 16383999997ull) {
-        return (_div_mulhi64(DIV_H_INV_15625_64, val) >> DIV_H_INV_15625_SHIFT);
-    }
-    return (val * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32);
+    const uint32_t tmp = (uint32_t)val * (uint32_t)frac;
+
+    return (uint16_t)(tmp >> 16);
 }
 
 /**
- * @brief Integer divide val by 125
+ * @brief   Multiply 32-bit number with a fraction
  *
- * This function can be used to convert uint64_t microsecond times (or
- * intervals) to miliseconds and store them in uint32_t variables, with up to
- * ~50 days worth of miliseconds ((2**32*1000) -1).
- * Use e.g.,  ms = div_u64_by_125(microseconds >> 3)
+ * @param[in] val   integer
+ * @param[in] frac  fraction (bitshifted by 32)
  *
- * @pre val <= 536870911999 ((2**32 * 125) -1)
- *
- * @param[in]   val     dividend
- * @return      (val / 125)
+ * @return          result
  */
-static inline uint32_t div_u64_by_125(uint64_t val)
+static inline uint32_t div_mul_w_frac_32(const uint32_t val, const uint32_t frac)
 {
-  /* a higher value would overflow the result type */
-  assert(val <= 536870911999LLU);
-
-  uint32_t hi = val >> 32;
-  uint32_t lo = val;
-  uint32_t r = (lo >> 16) + (hi << 16);
-  uint32_t res = r / 125;
-  r = ((r % 125) << 16) + (lo & 0xFFFF);
-  res = (res << 16) + r / 125;
-  return res;
+    const uint64_t tmp = (uint64_t)val * (uint64_t)frac;
+
+    return (uint32_t)(tmp >> 32);
 }
 
 /**
- * @brief Integer divide val by 1000000
+ * @brief   Multiply 64-bit number with a fraction
  *
- * @param[in]   val     dividend
- * @return      (val / 1000000)
+ * @param[in] val   integer
+ * @param[in] frac  fraction (bitshifted by 64)
+ *
+ * @return          result
  */
-static inline uint64_t div_u64_by_1000000(uint64_t val)
+static inline uint64_t div_mul_w_frac_64(const uint64_t val, const uint64_t frac)
 {
-    return div_u64_by_15625(val) >> 6;
+    return _div_mulhi64(val, frac);
 }
 
 /**
- * @brief Divide val by (15625/512)
- *
- * This is used to quantize a 1MHz value to the closest 32768Hz value,
- * e.g., for timers.
+ * @brief   Division of a 16-bit number with high-accuracy
  *
- * The algorithm uses the modular multiplicative inverse of 15625 to use only
- * multiplication and bit shifts to perform the division.
+ * This is partially calculated during compilation for speed, which requires
+ * that the denominator is a constant.
  *
- * The result will be equal to the mathematical expression: floor((val * 512) / 15625)
+ * @param[in] num   numerator
+ * @param[in] den   denominator
  *
- * @param[in]   val     dividend
- * @return      (val / (15625/512))
+ * @return          result
  */
-static inline uint32_t div_u32_by_15625div512(uint32_t val)
+__attribute__((always_inline))
+__attribute__((optimize("unroll-all-loops", "merge-all-constants")))
+    static inline uint16_t div_16(const uint16_t num, const uint16_t den)
 {
-    return ((uint64_t)(val) * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32 - 9);
+    uint8_t exp;
+
+    /* find highest power of two less than den */
+    for (exp = 15; exp > 0; exp--) {
+        if ((1U << exp) < den) {
+            break;
+        }
+    }
+
+    /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */
+    const uint16_t inv = div_frac_16((1U << exp), den);
+
+    /* last step is only thing calculated at runtime */
+    return (div_mul_w_frac_16(num, inv) >> exp);
 }
 
 /**
- * @brief Divide val by (15625/512)
+ * @brief   Division of a 32-bit number with high-accuracy
  *
- * This is used to quantize a 1MHz value to the closest 32768Hz value,
- * e.g., for timers.
+ * This is partially calculated during compilation for speed, which requires
+ * that the denominator is a constant.
  *
- * @param[in]   val     dividend
- * @return      (val / (15625/512))
+ * @param[in] num   numerator
+ * @param[in] den   denominator
+ *
+ * @return          result
  */
-static inline uint64_t div_u64_by_15625div512(uint64_t val)
+__attribute__((always_inline))
+__attribute__((optimize("unroll-all-loops", "merge-all-constants")))
+    static inline uint32_t div_32(const uint32_t num, const uint32_t den)
 {
-    /*
-     * This saves around 1400 bytes of ROM on Cortex-M platforms (both ARMv6 and
-     * ARMv7) from avoiding linking against __aeabi_uldivmod and related helpers
-     */
-    if (val > 16383999997ull) {
-        /* this would overflow 2^64 in the multiplication that follows, need to
-         * use the long version */
-        return (_div_mulhi64(DIV_H_INV_15625_64, val) >> (DIV_H_INV_15625_SHIFT - 9));
+    uint8_t exp;
+
+    /* find highest power of two less than den */
+    for (exp = 31; exp > 0; exp--) {
+        if ((1UL << exp) < den) {
+            break;
+        }
     }
-    return (val * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32 - 9);
-}
 
-/**
- * @brief Integer divide val by 44488
- *
- * @param[in]   val     dividend
- * @return      (val / 44488)
- */
-static inline uint32_t div_u32_by_44488(uint32_t val)
-{
-    return ((uint64_t)val * 0xBC8F1391UL) >> (15 + 32);
+    /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */
+    const uint32_t inv = div_frac_32((1UL << exp), den);
+
+    /* last step is only thing calculated at runtime */
+    return (div_mul_w_frac_32(num, inv) >> exp);
 }
 
 /**
- * @brief Modulo 44488
+ * @brief   Division of a 64-bit number with high-accuracy
  *
- * @param[in]   val     dividend
- * @return      (val % 44488)
+ * This is partially calculated during compilation for speed, which requires
+ * that the denominator is a constant.
+ *
+ * @param[in] num   numerator
+ * @param[in] den   denominator
+ *
+ * @return          result
  */
-static inline uint32_t div_u32_mod_44488(uint32_t val)
+__attribute__((always_inline))
+__attribute__((optimize("unroll-all-loops", "merge-all-constants")))
+    static inline uint64_t div_64(const uint64_t num, const uint64_t den)
 {
-    return val - (div_u32_by_44488(val)*44488);
+    uint8_t exp;
+
+    /* find highest power of two less than den */
+    for (exp = 63; exp > 0; exp--) {
+        if ((1ULL << exp) < den) {
+            break;
+        }
+    }
+
+    /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */
+    const uint64_t inv = div_frac_64((1ULL << exp), den);
+
+    /* last step is only thing calculated at runtime */
+    return (div_mul_w_frac_64(num, inv) >> exp);
 }
 
 #ifdef __cplusplus
 }
 #endif
+
 /** @} */
+
 #endif /* DIV_H */
diff --git a/sys/include/xtimer/tick_conversion.h b/sys/include/xtimer/tick_conversion.h
index a4306cf58f58..9add96aa588c 100644
--- a/sys/include/xtimer/tick_conversion.h
+++ b/sys/include/xtimer/tick_conversion.h
@@ -102,11 +102,13 @@ static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) {
  * multiplying by the fraction (32768 / 1000000), we will instead use
  * (512 / 15625), which reduces the truncation caused by the integer widths */
 static inline uint32_t _xtimer_ticks_from_usec(uint32_t usec) {
-    return div_u32_by_15625div512(usec);
+    /* bitshifts increase the accuracy */
+    return div_mul_w_frac_32(usec, div_frac_32((512 << 4), 15625)) >> 4;
 }
 
 static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) {
-    return div_u64_by_15625div512(usec);
+    /* bitshifts increase the accuracy */
+    return div_mul_w_frac_64(usec, div_frac_64((512 << 4), 15625)) >> 4;
 }
 
 static inline uint32_t _xtimer_usec_from_ticks(uint32_t ticks) {
diff --git a/sys/newlib_syscalls_default/syscalls.c b/sys/newlib_syscalls_default/syscalls.c
index 91543fcd186b..a71e2ade9377 100644
--- a/sys/newlib_syscalls_default/syscalls.c
+++ b/sys/newlib_syscalls_default/syscalls.c
@@ -503,7 +503,7 @@ int _gettimeofday_r(struct _reent *r, struct timeval *restrict tp, void *restric
     (void) r;
     (void) tzp;
     uint64_t now = xtimer_now_usec64();
-    tp->tv_sec = div_u64_by_1000000(now);
+    tp->tv_sec = div_64(now, 1000000UL);
     tp->tv_usec = now - (tp->tv_sec * US_PER_SEC);
     return 0;
 }
diff --git a/sys/random/minstd.c b/sys/random/minstd.c
index dc87738162d6..12e786bd3f23 100644
--- a/sys/random/minstd.c
+++ b/sys/random/minstd.c
@@ -41,8 +41,8 @@ static uint32_t _seed = 1;
 
 int rand_minstd(void)
 {
-    uint32_t hi = div_u32_by_44488(_seed);
-    uint32_t lo = div_u32_mod_44488(_seed);
+    uint32_t hi = div_32(_seed, 44488);
+    uint32_t lo = _seed - hi*44488;
     uint32_t test = (a * lo) - (r * hi);
 
     if(test > 0) {
diff --git a/sys/xtimer/xtimer.c b/sys/xtimer/xtimer.c
index 612bbc9622ff..f723c9c819d8 100644
--- a/sys/xtimer/xtimer.c
+++ b/sys/xtimer/xtimer.c
@@ -186,7 +186,7 @@ void xtimer_now_timex(timex_t *out)
 {
     uint64_t now = xtimer_usec_from_ticks64(xtimer_now64());
 
-    out->seconds = div_u64_by_1000000(now);
+    out->seconds = div_64(now, 1000000UL);
     out->microseconds = now - (out->seconds * US_PER_SEC);
 }
 
diff --git a/tests/unittests/tests-div/tests-div.c b/tests/unittests/tests-div/tests-div.c
index 85010e05662f..8c79c60ff976 100644
--- a/tests/unittests/tests-div/tests-div.c
+++ b/tests/unittests/tests-div/tests-div.c
@@ -74,14 +74,14 @@ static void test_div_u64_by_15625(void)
         DEBUG("Dividing %12"PRIu32" by 15625...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u32_test_values[i] / 15625,
-            div_u64_by_15625(u32_test_values[i]));
+            div_64(u32_test_values[i], 15625));
     }
 
     for (unsigned i = 0; i < N_U64_VALS; i++) {
         DEBUG("Dividing %12"PRIu64" by 15625...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u64_test_values[i] / 15625,
-            div_u64_by_15625(u64_test_values[i]));
+            div_64(u64_test_values[i], 15625));
     }
 }
 
@@ -91,41 +91,44 @@ static void test_div_u32_by_15625div512(void)
         DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u32_test_values[i] * 512lu / 15625,
-            div_u32_by_15625div512(u32_test_values[i]));
+            div_mul_w_frac_32(u32_test_values[i],
+                              div_frac_32((512 << 4), 15625)) >> 4);
     }
 }
 
-static void test_div_u64_by_1000000(void)
+static void test_div_u64_by_15625div512(void)
 {
     for (unsigned i = 0; i < N_U32_VALS; i++) {
-        DEBUG("Dividing %"PRIu32" by 1000000...\n", u32_test_values[i]);
+        DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
-            (uint64_t)u32_test_values[i] / 1000000lu,
-            div_u64_by_1000000(u32_test_values[i]));
+            (uint64_t)u32_test_values[i] * 512lu / 15625,
+            div_mul_w_frac_64(u32_test_values[i],
+                              div_frac_64((512 << 4), 15625)) >> 4);
     }
 
     for (unsigned i = 0; i < N_U64_VALS; i++) {
-        DEBUG("Dividing %"PRIu64" by 1000000...\n", u64_test_values[i]);
+        DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
-            u64_test_values[i] / 1000000lu,
-            div_u64_by_1000000(u64_test_values[i]));
+            u64_15625_512_expected_values[i],
+            div_mul_w_frac_64(u64_test_values[i],
+                              div_frac_64((512 << 4), 15625)) >> 4);
     }
 }
 
-static void test_div_u64_by_15625div512(void)
+static void test_div_u64_by_1000000(void)
 {
     for (unsigned i = 0; i < N_U32_VALS; i++) {
-        DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]);
+        DEBUG("Dividing %"PRIu32" by 1000000...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
-            (uint64_t)u32_test_values[i] * 512lu / 15625,
-            div_u64_by_15625div512(u32_test_values[i]));
+            (uint64_t)u32_test_values[i] / 1000000lu,
+            div_64(u32_test_values[i], 1000000UL));
     }
 
     for (unsigned i = 0; i < N_U64_VALS; i++) {
-        DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]);
+        DEBUG("Dividing %"PRIu64" by 1000000...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
-            u64_15625_512_expected_values[i],
-            div_u64_by_15625div512(u64_test_values[i]));
+            u64_test_values[i] / 1000000lu,
+            div_64(u64_test_values[i], 1000000UL));
     }
 }