diff --git a/cpu/mips32r2_common/periph/timer.c b/cpu/mips32r2_common/periph/timer.c index 16e373f2f730..4cb107cc6a95 100644 --- a/cpu/mips32r2_common/periph/timer.c +++ b/cpu/mips32r2_common/periph/timer.c @@ -83,7 +83,7 @@ int gettimeofday(struct timeval *__restrict __p, void *__restrict __tz) (void)__tz; uint64_t now = counter * US_PER_MS; - __p->tv_sec = div_u64_by_1000000(now); + __p->tv_sec = div_64(now, 1000000UL); __p->tv_usec = now - (__p->tv_sec * US_PER_SEC); return 0; diff --git a/sys/evtimer/evtimer.c b/sys/evtimer/evtimer.c index aa0b11df9e35..815c08e2eee8 100644 --- a/sys/evtimer/evtimer.c +++ b/sys/evtimer/evtimer.c @@ -124,8 +124,14 @@ static uint32_t _get_offset(xtimer_t *timer) } else { target_us -= now_us; + /* add half of 125 so integer division rounds to nearest */ - return div_u64_by_125((target_us >> 3) + 62); + target_us = (target_us >> 3) + 62; + + /* a higher value would overflow the result type */ + assert(target_us <= 536870911999LLU); + + return (uint32_t)div_64(target_us, 125); } } diff --git a/sys/include/div.h b/sys/include/div.h index 54bfa1524257..9333ca929d98 100644 --- a/sys/include/div.h +++ b/sys/include/div.h @@ -1,6 +1,7 @@ /* * Copyright (C) 2015 Kaspar Schleiser - * Copyright (C) 2016 Eistec AB + * 2016 Eistec AB + * 2018 Acutam Automation, LLC * * This file is subject to the terms and conditions of the GNU Lesser * General Public License v2.1. See the file LICENSE in the top level @@ -16,35 +17,189 @@ * * @file * @ingroup sys + * * @author Kaspar Schleiser * @author Joakim NohlgÄrd + * @author Matthew Blue + * * @{ */ #ifndef DIV_H #define DIV_H -#include +#include #include +#include "assert.h" + #ifdef __cplusplus extern "C" { #endif /** - * @brief Approximation of (2**l)/d for d=15625, l=12, 32 bits + * @brief Internal 65-bit shift and comparison: 2*x > y + * + * @param[in] x greater / 2 + * @param[in] y lesser + * + * @return if 2*x > y then 1, else 0 */ -#define DIV_H_INV_15625_32 0x431bde83ul +__attribute__((always_inline)) __attribute__((optimize("merge-all-constants"))) + static inline uint8_t _div_shcom(const uint64_t x, const uint64_t y) +{ + if (x & (1ULL << 63)) { + return 1; + } + + if ((x << 1) > y) { + return 1; + } + + return 0; +} + +/** + * @brief Internal 65-bit shift and subtraction: 2*x - y + * + * @param[in] x minuend / 2 + * @param[in] y subtrahend + * + * @return difference + */ +__attribute__((always_inline)) __attribute__((optimize("merge-all-constants"))) + static inline uint64_t _div_shsub(const uint64_t x, const uint64_t y) +{ + if (x & (1ULL << 63)) { + uint64_t tmp; + + /* = 2^64 - 1 - y */ + tmp = (uint64_t)(-1) - y; + + /* = (2^64 - 1 - y) + (2*x - 2^64) */ + /* = 2*x - y - 1 */ + tmp += (x << 1); + + /* = 2*x - y */ + return tmp + 1; + } + + return (x << 1) - y; +} /** - * @brief Approximation of (2**l)/d for d=15625, l=12, 64 bits + * @brief Internal 64-bit fraction calculation + * + * @param[in] num numerator + * @param[in] den denominator + * + * @return result, bitshifted by 64 */ -#define DIV_H_INV_15625_64 0x431bde82d7b634dbull +__attribute__((always_inline)) +__attribute__((optimize("unroll-all-loops", "merge-all-constants"))) + static inline uint64_t _div_frac(const uint64_t num, const uint64_t den) +{ + uint64_t ans = 0, rem = num; + + /* binary long division */ + for (uint8_t i = 0; i < 64; i++) { + if (_div_shcom(rem, den)) { + /* den goes into num for this bit */ + ans |= (1ULL << (63 - i)); + + /* subtract and move to next bit */ + rem = _div_shsub(rem, den); + } + else { + /* move to next bit */ + rem <<= 1; + } + } + + /* rounding */ + if (_div_shcom(rem, den)) { + ans++; + } + + return ans; +} /** - * @brief Required shifts for division by 15625, l above + * @brief Compile-time calculate 16-bit fraction + * + * For fractions that are less than one. Result is calculated during + * compilation (requires const input). + * + * @param[in] num numerator + * @param[in] den denominator + * + * @return result, bitshifted by 16 */ -#define DIV_H_INV_15625_SHIFT 12 +__attribute__((always_inline)) __attribute__((optimize("merge-all-constants"))) + static inline uint16_t div_frac_16(const uint16_t num, const uint16_t den) +{ + assert((den > 0) && (num < den)); + + /* done during compile time, so trade efficiency for simplicity */ + const uint64_t ans = _div_frac((uint64_t)num << 48, (uint64_t)den << 48); + + if ((ans << 16) > (1ULL << 63)) { + /* round the result */ + return (uint16_t)(ans >> 48) + 1; + } + else { + return (uint16_t)(ans >> 48); + } +} + +/** + * @brief Compile-time calculate 32-bit fraction + * + * For fractions that are less than one. Result is calculated during + * compilation (requires const input). + * + * @param[in] num numerator + * @param[in] den denominator + * + * @return result, bitshifted by 32 + */ +__attribute__((always_inline)) __attribute__((optimize("merge-all-constants"))) + static inline uint32_t div_frac_32(const uint32_t num, const uint32_t den) +{ + assert((den > 0) && (num < den)); + + /* done during compile time, so trade efficiency for simplicity */ + const uint64_t ans = _div_frac((uint64_t)num << 32, (uint64_t)den << 32); + + if ((ans << 32) > (1ULL << 63)) { + /* round the result */ + return (uint32_t)(ans >> 32) + 1; + } + else { + return (uint32_t)(ans >> 32); + } +} + +/** + * @brief Compile-time calculate 64-bit fraction + * + * For fractions that are less than one. Result is calculated during + * compilation (requires const input). + * + * @param[in] num numerator + * @param[in] den denominator + * + * @return result, bitshifted by 64 + */ +__attribute__((always_inline)) __attribute__((optimize("merge-all-constants"))) + static inline uint64_t div_frac_64(const uint64_t num, const uint64_t den) +{ + assert((den > 0) && (num < den)); + + const uint64_t ans = _div_frac(num, den); + + return ans; +} /** * @internal @@ -62,123 +217,145 @@ extern "C" { uint64_t _div_mulhi64(const uint64_t a, const uint64_t b); /** - * @brief Integer divide val by 15625, 64 bit version + * @brief Multiply 16-bit number with a fraction * - * @param[in] val dividend - * @return (val / 15625) + * @param[in] val integer + * @param[in] frac fraction (bitshifted by 16) + * + * @return result */ -static inline uint64_t div_u64_by_15625(uint64_t val) +static inline uint16_t div_mul_w_frac_16(const uint16_t val, const uint16_t frac) { - if (val > 16383999997ull) { - return (_div_mulhi64(DIV_H_INV_15625_64, val) >> DIV_H_INV_15625_SHIFT); - } - return (val * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32); + const uint32_t tmp = (uint32_t)val * (uint32_t)frac; + + return (uint16_t)(tmp >> 16); } /** - * @brief Integer divide val by 125 + * @brief Multiply 32-bit number with a fraction * - * This function can be used to convert uint64_t microsecond times (or - * intervals) to miliseconds and store them in uint32_t variables, with up to - * ~50 days worth of miliseconds ((2**32*1000) -1). - * Use e.g., ms = div_u64_by_125(microseconds >> 3) + * @param[in] val integer + * @param[in] frac fraction (bitshifted by 32) * - * @pre val <= 536870911999 ((2**32 * 125) -1) - * - * @param[in] val dividend - * @return (val / 125) + * @return result */ -static inline uint32_t div_u64_by_125(uint64_t val) +static inline uint32_t div_mul_w_frac_32(const uint32_t val, const uint32_t frac) { - /* a higher value would overflow the result type */ - assert(val <= 536870911999LLU); - - uint32_t hi = val >> 32; - uint32_t lo = val; - uint32_t r = (lo >> 16) + (hi << 16); - uint32_t res = r / 125; - r = ((r % 125) << 16) + (lo & 0xFFFF); - res = (res << 16) + r / 125; - return res; + const uint64_t tmp = (uint64_t)val * (uint64_t)frac; + + return (uint32_t)(tmp >> 32); } /** - * @brief Integer divide val by 1000000 + * @brief Multiply 64-bit number with a fraction * - * @param[in] val dividend - * @return (val / 1000000) + * @param[in] val integer + * @param[in] frac fraction (bitshifted by 64) + * + * @return result */ -static inline uint64_t div_u64_by_1000000(uint64_t val) +static inline uint64_t div_mul_w_frac_64(const uint64_t val, const uint64_t frac) { - return div_u64_by_15625(val) >> 6; + return _div_mulhi64(val, frac); } /** - * @brief Divide val by (15625/512) - * - * This is used to quantize a 1MHz value to the closest 32768Hz value, - * e.g., for timers. + * @brief Division of a 16-bit number with high-accuracy * - * The algorithm uses the modular multiplicative inverse of 15625 to use only - * multiplication and bit shifts to perform the division. + * This is partially calculated during compilation for speed, which requires + * that the denominator is a constant. * - * The result will be equal to the mathematical expression: floor((val * 512) / 15625) + * @param[in] num numerator + * @param[in] den denominator * - * @param[in] val dividend - * @return (val / (15625/512)) + * @return result */ -static inline uint32_t div_u32_by_15625div512(uint32_t val) +__attribute__((always_inline)) +__attribute__((optimize("unroll-all-loops", "merge-all-constants"))) + static inline uint16_t div_16(const uint16_t num, const uint16_t den) { - return ((uint64_t)(val) * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32 - 9); + uint8_t exp; + + /* find highest power of two less than den */ + for (exp = 15; exp > 0; exp--) { + if ((1U << exp) < den) { + break; + } + } + + /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */ + const uint16_t inv = div_frac_16((1U << exp), den); + + /* last step is only thing calculated at runtime */ + return (div_mul_w_frac_16(num, inv) >> exp); } /** - * @brief Divide val by (15625/512) + * @brief Division of a 32-bit number with high-accuracy * - * This is used to quantize a 1MHz value to the closest 32768Hz value, - * e.g., for timers. + * This is partially calculated during compilation for speed, which requires + * that the denominator is a constant. * - * @param[in] val dividend - * @return (val / (15625/512)) + * @param[in] num numerator + * @param[in] den denominator + * + * @return result */ -static inline uint64_t div_u64_by_15625div512(uint64_t val) +__attribute__((always_inline)) +__attribute__((optimize("unroll-all-loops", "merge-all-constants"))) + static inline uint32_t div_32(const uint32_t num, const uint32_t den) { - /* - * This saves around 1400 bytes of ROM on Cortex-M platforms (both ARMv6 and - * ARMv7) from avoiding linking against __aeabi_uldivmod and related helpers - */ - if (val > 16383999997ull) { - /* this would overflow 2^64 in the multiplication that follows, need to - * use the long version */ - return (_div_mulhi64(DIV_H_INV_15625_64, val) >> (DIV_H_INV_15625_SHIFT - 9)); + uint8_t exp; + + /* find highest power of two less than den */ + for (exp = 31; exp > 0; exp--) { + if ((1UL << exp) < den) { + break; + } } - return (val * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32 - 9); -} -/** - * @brief Integer divide val by 44488 - * - * @param[in] val dividend - * @return (val / 44488) - */ -static inline uint32_t div_u32_by_44488(uint32_t val) -{ - return ((uint64_t)val * 0xBC8F1391UL) >> (15 + 32); + /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */ + const uint32_t inv = div_frac_32((1UL << exp), den); + + /* last step is only thing calculated at runtime */ + return (div_mul_w_frac_32(num, inv) >> exp); } /** - * @brief Modulo 44488 + * @brief Division of a 64-bit number with high-accuracy * - * @param[in] val dividend - * @return (val % 44488) + * This is partially calculated during compilation for speed, which requires + * that the denominator is a constant. + * + * @param[in] num numerator + * @param[in] den denominator + * + * @return result */ -static inline uint32_t div_u32_mod_44488(uint32_t val) +__attribute__((always_inline)) +__attribute__((optimize("unroll-all-loops", "merge-all-constants"))) + static inline uint64_t div_64(const uint64_t num, const uint64_t den) { - return val - (div_u32_by_44488(val)*44488); + uint8_t exp; + + /* find highest power of two less than den */ + for (exp = 63; exp > 0; exp--) { + if ((1ULL << exp) < den) { + break; + } + } + + /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */ + const uint64_t inv = div_frac_64((1ULL << exp), den); + + /* last step is only thing calculated at runtime */ + return (div_mul_w_frac_64(num, inv) >> exp); } #ifdef __cplusplus } #endif + /** @} */ + #endif /* DIV_H */ diff --git a/sys/include/xtimer/tick_conversion.h b/sys/include/xtimer/tick_conversion.h index a4306cf58f58..9add96aa588c 100644 --- a/sys/include/xtimer/tick_conversion.h +++ b/sys/include/xtimer/tick_conversion.h @@ -102,11 +102,13 @@ static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) { * multiplying by the fraction (32768 / 1000000), we will instead use * (512 / 15625), which reduces the truncation caused by the integer widths */ static inline uint32_t _xtimer_ticks_from_usec(uint32_t usec) { - return div_u32_by_15625div512(usec); + /* bitshifts increase the accuracy */ + return div_mul_w_frac_32(usec, div_frac_32((512 << 4), 15625)) >> 4; } static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) { - return div_u64_by_15625div512(usec); + /* bitshifts increase the accuracy */ + return div_mul_w_frac_64(usec, div_frac_64((512 << 4), 15625)) >> 4; } static inline uint32_t _xtimer_usec_from_ticks(uint32_t ticks) { diff --git a/sys/newlib_syscalls_default/syscalls.c b/sys/newlib_syscalls_default/syscalls.c index 91543fcd186b..a71e2ade9377 100644 --- a/sys/newlib_syscalls_default/syscalls.c +++ b/sys/newlib_syscalls_default/syscalls.c @@ -503,7 +503,7 @@ int _gettimeofday_r(struct _reent *r, struct timeval *restrict tp, void *restric (void) r; (void) tzp; uint64_t now = xtimer_now_usec64(); - tp->tv_sec = div_u64_by_1000000(now); + tp->tv_sec = div_64(now, 1000000UL); tp->tv_usec = now - (tp->tv_sec * US_PER_SEC); return 0; } diff --git a/sys/random/minstd.c b/sys/random/minstd.c index dc87738162d6..12e786bd3f23 100644 --- a/sys/random/minstd.c +++ b/sys/random/minstd.c @@ -41,8 +41,8 @@ static uint32_t _seed = 1; int rand_minstd(void) { - uint32_t hi = div_u32_by_44488(_seed); - uint32_t lo = div_u32_mod_44488(_seed); + uint32_t hi = div_32(_seed, 44488); + uint32_t lo = _seed - hi*44488; uint32_t test = (a * lo) - (r * hi); if(test > 0) { diff --git a/sys/xtimer/xtimer.c b/sys/xtimer/xtimer.c index 612bbc9622ff..f723c9c819d8 100644 --- a/sys/xtimer/xtimer.c +++ b/sys/xtimer/xtimer.c @@ -186,7 +186,7 @@ void xtimer_now_timex(timex_t *out) { uint64_t now = xtimer_usec_from_ticks64(xtimer_now64()); - out->seconds = div_u64_by_1000000(now); + out->seconds = div_64(now, 1000000UL); out->microseconds = now - (out->seconds * US_PER_SEC); } diff --git a/tests/unittests/tests-div/tests-div.c b/tests/unittests/tests-div/tests-div.c index 85010e05662f..8c79c60ff976 100644 --- a/tests/unittests/tests-div/tests-div.c +++ b/tests/unittests/tests-div/tests-div.c @@ -74,14 +74,14 @@ static void test_div_u64_by_15625(void) DEBUG("Dividing %12"PRIu32" by 15625...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u32_test_values[i] / 15625, - div_u64_by_15625(u32_test_values[i])); + div_64(u32_test_values[i], 15625)); } for (unsigned i = 0; i < N_U64_VALS; i++) { DEBUG("Dividing %12"PRIu64" by 15625...\n", u64_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u64_test_values[i] / 15625, - div_u64_by_15625(u64_test_values[i])); + div_64(u64_test_values[i], 15625)); } } @@ -91,41 +91,44 @@ static void test_div_u32_by_15625div512(void) DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u32_test_values[i] * 512lu / 15625, - div_u32_by_15625div512(u32_test_values[i])); + div_mul_w_frac_32(u32_test_values[i], + div_frac_32((512 << 4), 15625)) >> 4); } } -static void test_div_u64_by_1000000(void) +static void test_div_u64_by_15625div512(void) { for (unsigned i = 0; i < N_U32_VALS; i++) { - DEBUG("Dividing %"PRIu32" by 1000000...\n", u32_test_values[i]); + DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( - (uint64_t)u32_test_values[i] / 1000000lu, - div_u64_by_1000000(u32_test_values[i])); + (uint64_t)u32_test_values[i] * 512lu / 15625, + div_mul_w_frac_64(u32_test_values[i], + div_frac_64((512 << 4), 15625)) >> 4); } for (unsigned i = 0; i < N_U64_VALS; i++) { - DEBUG("Dividing %"PRIu64" by 1000000...\n", u64_test_values[i]); + DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]); TEST_ASSERT_EQUAL_INT( - u64_test_values[i] / 1000000lu, - div_u64_by_1000000(u64_test_values[i])); + u64_15625_512_expected_values[i], + div_mul_w_frac_64(u64_test_values[i], + div_frac_64((512 << 4), 15625)) >> 4); } } -static void test_div_u64_by_15625div512(void) +static void test_div_u64_by_1000000(void) { for (unsigned i = 0; i < N_U32_VALS; i++) { - DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]); + DEBUG("Dividing %"PRIu32" by 1000000...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( - (uint64_t)u32_test_values[i] * 512lu / 15625, - div_u64_by_15625div512(u32_test_values[i])); + (uint64_t)u32_test_values[i] / 1000000lu, + div_64(u32_test_values[i], 1000000UL)); } for (unsigned i = 0; i < N_U64_VALS; i++) { - DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]); + DEBUG("Dividing %"PRIu64" by 1000000...\n", u64_test_values[i]); TEST_ASSERT_EQUAL_INT( - u64_15625_512_expected_values[i], - div_u64_by_15625div512(u64_test_values[i])); + u64_test_values[i] / 1000000lu, + div_64(u64_test_values[i], 1000000UL)); } }