From 82ea87557bc053752b4fa05af6bb41bc7481ffec Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Sun, 3 Jun 2018 17:44:56 -0400 Subject: [PATCH 01/12] sys/div: calculate magic numbers at compile time --- sys/include/div.h | 238 ++++++++++++++++++++++++++++------------------ 1 file changed, 146 insertions(+), 92 deletions(-) diff --git a/sys/include/div.h b/sys/include/div.h index 54bfa1524257..f2f46de88fd9 100644 --- a/sys/include/div.h +++ b/sys/include/div.h @@ -1,6 +1,7 @@ /* * Copyright (C) 2015 Kaspar Schleiser - * Copyright (C) 2016 Eistec AB + * 2016 Eistec AB + * 2018 Acutam Automation, LLC * * This file is subject to the terms and conditions of the GNU Lesser * General Public License v2.1. See the file LICENSE in the top level @@ -16,169 +17,222 @@ * * @file * @ingroup sys + * * @author Kaspar Schleiser * @author Joakim NohlgÄrd + * @author Matthew Blue + * * @{ */ #ifndef DIV_H #define DIV_H -#include #include +#include "assert.h" + #ifdef __cplusplus extern "C" { #endif /** - * @brief Approximation of (2**l)/d for d=15625, l=12, 32 bits + * @brief Calculate 8-bit multiplicative inverse + * + * Result is calculated during compilation (requires const num). Bitshifted by + * 9 to give the most representation to the smallest interesting number (3) + * + * @param[in] num reciprocal + * + * @return result, bitshifted by 9 */ -#define DIV_H_INV_15625_32 0x431bde83ul +__attribute__((always_inline)) static inline uint8_t div_inv_8( + const uint8_t num) +{ + /* cannot be represented due to bit shift */ + assert(num > 2); -/** - * @brief Approximation of (2**l)/d for d=15625, l=12, 64 bits - */ -#define DIV_H_INV_15625_64 0x431bde82d7b634dbull + const uint16_t numerator = (1 << 9); -/** - * @brief Required shifts for division by 15625, l above - */ -#define DIV_H_INV_15625_SHIFT 12 + if ((numerator % num) * 2 >= num) { + /* fraction is >= 0.5, so round up */ + return (uint8_t)(numerator / num) + 1; + } + else { + return (uint8_t)(numerator / num); + } +} /** - * @internal - * @brief Multiply two 64 bit integers into a 128 bit integer and return the upper half. + * @brief Calculate 16-bit multiplicative inverse * - * The implementation only uses 64 bit integers internally, no __int128 support - * is necessary. + * Result is calculated during compilation (requires const num). Bitshifted by + * 17 to give the most representation to the smallest interesting number (3) * - * @see http://stackoverflow.com/questions/28868367/getting-the-high-part-of-64-bit-integer-multiplication - - * @param[in] a operand a - * @param[in] b operand b - * @return (((uint128_t)a * b) >> 64) - */ -uint64_t _div_mulhi64(const uint64_t a, const uint64_t b); - -/** - * @brief Integer divide val by 15625, 64 bit version + * @param[in] num reciprocal * - * @param[in] val dividend - * @return (val / 15625) + * @return result, bitshifted by 17 */ -static inline uint64_t div_u64_by_15625(uint64_t val) +__attribute__((always_inline)) static inline uint16_t div_inv_16( + const uint16_t num) { - if (val > 16383999997ull) { - return (_div_mulhi64(DIV_H_INV_15625_64, val) >> DIV_H_INV_15625_SHIFT); + /* cannot be represented due to bit shift */ + assert(num > 2); + + const uint32_t numerator = (1UL << 17); + + if ((numerator % num) * 2 >= num) { + /* fraction is >= 0.5, so round up */ + return (uint16_t)(numerator / num) + 1; + } + else { + return (uint16_t)(numerator / num); } - return (val * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32); } /** - * @brief Integer divide val by 125 + * @brief Calculate 32-bit multiplicative inverse * - * This function can be used to convert uint64_t microsecond times (or - * intervals) to miliseconds and store them in uint32_t variables, with up to - * ~50 days worth of miliseconds ((2**32*1000) -1). - * Use e.g., ms = div_u64_by_125(microseconds >> 3) + * Result is calculated during compilation (requires const num). Bitshifted by + * 33 to give the most representation to the smallest interesting number (3) * - * @pre val <= 536870911999 ((2**32 * 125) -1) + * @param[in] num reciprocal * - * @param[in] val dividend - * @return (val / 125) + * @return result, bitshifted by 33 */ -static inline uint32_t div_u64_by_125(uint64_t val) +__attribute__((always_inline)) static inline uint32_t div_inv_32( + const uint32_t num) { - /* a higher value would overflow the result type */ - assert(val <= 536870911999LLU); - - uint32_t hi = val >> 32; - uint32_t lo = val; - uint32_t r = (lo >> 16) + (hi << 16); - uint32_t res = r / 125; - r = ((r % 125) << 16) + (lo & 0xFFFF); - res = (res << 16) + r / 125; - return res; + /* cannot be represented due to bit shift */ + assert(num > 2); + + const uint64_t numerator = (1ULL << 33); + + if ((numerator % num) * 2 >= num) { + /* fraction is >= 0.5, so round up */ + return (uint32_t)(numerator / num) + 1; + } + else { + return (uint32_t)(numerator / num); + } } /** - * @brief Integer divide val by 1000000 + * @brief Calculate 64-bit multiplicative inverse + * + * Result is calculated during compilation (requires const num). Bitshifted by + * 65 to give the most representation to the smallest interesting number (3) + * + * @param[in] num reciprocal * - * @param[in] val dividend - * @return (val / 1000000) + * @return result, bitshifted by 65 */ -static inline uint64_t div_u64_by_1000000(uint64_t val) +__attribute__((always_inline)) static inline uint64_t div_inv_64( + const uint64_t num) { - return div_u64_by_15625(val) >> 6; + /* cannot be represented due to bit shift */ + assert(num > 2); + + /* find the last bits without using 128-bit ints */ + const uint64_t numerator = (1ULL << 63); + const uint64_t most_sig = (numerator / num) << 2; + const uint64_t least_sig = ((numerator % num) << 2) / num; + + if ((((numerator % num) << 2) % num) * 2 >= num) { + /* fraction is >= 0.5, so round up */ + return (most_sig + least_sig + 1); + } + else { + return (most_sig + least_sig); + } } /** - * @brief Divide val by (15625/512) + * @internal + * @brief Multiply two 64 bit integers into a 128 bit integer and return the upper half. * - * This is used to quantize a 1MHz value to the closest 32768Hz value, - * e.g., for timers. + * The implementation only uses 64 bit integers internally, no __int128 support + * is necessary. * - * The algorithm uses the modular multiplicative inverse of 15625 to use only - * multiplication and bit shifts to perform the division. + * @see http://stackoverflow.com/questions/28868367/getting-the-high-part-of-64-bit-integer-multiplication + + * @param[in] a operand a + * @param[in] b operand b + * @return (((uint128_t)a * b) >> 64) + */ +uint64_t _div_mulhi64(const uint64_t a, const uint64_t b); + +/** + * @brief Divide 8-bit number using an inverse * - * The result will be equal to the mathematical expression: floor((val * 512) / 15625) + * @param[in] val numerator + * @param[in] inv multiplicative inverse of denominator (bitshifted by 9) * - * @param[in] val dividend - * @return (val / (15625/512)) + * @return result */ -static inline uint32_t div_u32_by_15625div512(uint32_t val) +static inline uint8_t div_u8_by_inv(const uint8_t val, const uint8_t inv) { - return ((uint64_t)(val) * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32 - 9); + uint16_t tmp; + + tmp = (uint16_t)val * (uint16_t)inv; + tmp >>= 9; + + return (uint8_t)tmp; } /** - * @brief Divide val by (15625/512) + * @brief Divide 16-bit number using an inverse * - * This is used to quantize a 1MHz value to the closest 32768Hz value, - * e.g., for timers. + * @param[in] val numerator + * @param[in] inv multiplicative inverse of denominator (bitshifted by 17) * - * @param[in] val dividend - * @return (val / (15625/512)) + * @return result */ -static inline uint64_t div_u64_by_15625div512(uint64_t val) +static inline uint16_t div_u16_by_inv(const uint16_t val, const uint16_t inv) { - /* - * This saves around 1400 bytes of ROM on Cortex-M platforms (both ARMv6 and - * ARMv7) from avoiding linking against __aeabi_uldivmod and related helpers - */ - if (val > 16383999997ull) { - /* this would overflow 2^64 in the multiplication that follows, need to - * use the long version */ - return (_div_mulhi64(DIV_H_INV_15625_64, val) >> (DIV_H_INV_15625_SHIFT - 9)); - } - return (val * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32 - 9); + uint32_t tmp; + + tmp = (uint32_t)val * (uint32_t)inv; + tmp >>= 17; + + return (uint16_t)tmp; } /** - * @brief Integer divide val by 44488 + * @brief Divide 32-bit number using an inverse * - * @param[in] val dividend - * @return (val / 44488) + * @param[in] val numerator + * @param[in] inv multiplicative inverse of denominator (bitshifted by 33) + * + * @return result */ -static inline uint32_t div_u32_by_44488(uint32_t val) +static inline uint32_t div_u32_by_inv(const uint32_t val, const uint32_t inv) { - return ((uint64_t)val * 0xBC8F1391UL) >> (15 + 32); + uint64_t tmp; + + tmp = (uint64_t)val * (uint64_t)inv; + tmp >>= 33; + + return (uint32_t)tmp; } /** - * @brief Modulo 44488 + * @brief Divide 64-bit number using an inverse + * + * @param[in] val numerator + * @param[in] inv multiplicative inverse of denominator (bitshifted by 65) * - * @param[in] val dividend - * @return (val % 44488) + * @return result */ -static inline uint32_t div_u32_mod_44488(uint32_t val) +static inline uint64_t div_u64_by_inv(const uint64_t val, const uint64_t inv) { - return val - (div_u32_by_44488(val)*44488); + return (_div_mulhi64(val, inv) >> 1); } #ifdef __cplusplus } #endif + /** @} */ + #endif /* DIV_H */ From 5ad5cc8e80265b8014745b675e04ccfc35e6e2dc Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Sun, 3 Jun 2018 17:46:12 -0400 Subject: [PATCH 02/12] cpu/mips32r2_common: sys/div API change --- cpu/mips32r2_common/periph/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpu/mips32r2_common/periph/timer.c b/cpu/mips32r2_common/periph/timer.c index 16e373f2f730..2629dd8a2508 100644 --- a/cpu/mips32r2_common/periph/timer.c +++ b/cpu/mips32r2_common/periph/timer.c @@ -83,7 +83,7 @@ int gettimeofday(struct timeval *__restrict __p, void *__restrict __tz) (void)__tz; uint64_t now = counter * US_PER_MS; - __p->tv_sec = div_u64_by_1000000(now); + __p->tv_sec = div_u64_by_inv(now, div_inv_64(1000000)); __p->tv_usec = now - (__p->tv_sec * US_PER_SEC); return 0; From dec2f8e5b07c79be441219cebe8342b0924e43ab Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Sun, 3 Jun 2018 17:47:02 -0400 Subject: [PATCH 03/12] tests/unittests: sys/div API change --- tests/unittests/tests-div/tests-div.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/unittests/tests-div/tests-div.c b/tests/unittests/tests-div/tests-div.c index 85010e05662f..4f92a467e0a8 100644 --- a/tests/unittests/tests-div/tests-div.c +++ b/tests/unittests/tests-div/tests-div.c @@ -74,14 +74,14 @@ static void test_div_u64_by_15625(void) DEBUG("Dividing %12"PRIu32" by 15625...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u32_test_values[i] / 15625, - div_u64_by_15625(u32_test_values[i])); + div_u64_by_inv(u32_test_values[i], div_inv_64(15625))); } for (unsigned i = 0; i < N_U64_VALS; i++) { DEBUG("Dividing %12"PRIu64" by 15625...\n", u64_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u64_test_values[i] / 15625, - div_u64_by_15625(u64_test_values[i])); + div_u64_by_inv(u64_test_values[i], div_inv_64(15625))); } } @@ -91,7 +91,7 @@ static void test_div_u32_by_15625div512(void) DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u32_test_values[i] * 512lu / 15625, - div_u32_by_15625div512(u32_test_values[i])); + div_u32_by_inv(u32_test_values[i], div_inv_32(15625)) >> 9); } } @@ -101,14 +101,14 @@ static void test_div_u64_by_1000000(void) DEBUG("Dividing %"PRIu32" by 1000000...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u32_test_values[i] / 1000000lu, - div_u64_by_1000000(u32_test_values[i])); + div_u64_by_inv(u32_test_values[i], div_inv_64(1000000))); } for (unsigned i = 0; i < N_U64_VALS; i++) { DEBUG("Dividing %"PRIu64" by 1000000...\n", u64_test_values[i]); TEST_ASSERT_EQUAL_INT( u64_test_values[i] / 1000000lu, - div_u64_by_1000000(u64_test_values[i])); + div_u64_by_inv(u64_test_values[i], div_inv_64(1000000))); } } @@ -118,14 +118,14 @@ static void test_div_u64_by_15625div512(void) DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u32_test_values[i] * 512lu / 15625, - div_u64_by_15625div512(u32_test_values[i])); + div_u64_by_inv(u32_test_values[i], div_inv_64(15625)) >> 9); } for (unsigned i = 0; i < N_U64_VALS; i++) { DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]); TEST_ASSERT_EQUAL_INT( u64_15625_512_expected_values[i], - div_u64_by_15625div512(u64_test_values[i])); + div_u64_by_inv(u64_test_values[i], div_inv_64(15625)) >> 9); } } From 141abee3ea3ad4f6ac7686e7e63d087c34cbb766 Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Sun, 3 Jun 2018 17:47:28 -0400 Subject: [PATCH 04/12] sys/*: sys/div API change --- sys/evtimer/evtimer.c | 8 +++++++- sys/include/xtimer/tick_conversion.h | 4 ++-- sys/newlib_syscalls_default/syscalls.c | 2 +- sys/random/minstd.c | 4 ++-- sys/xtimer/xtimer.c | 2 +- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/sys/evtimer/evtimer.c b/sys/evtimer/evtimer.c index aa0b11df9e35..981c37f46a54 100644 --- a/sys/evtimer/evtimer.c +++ b/sys/evtimer/evtimer.c @@ -124,8 +124,14 @@ static uint32_t _get_offset(xtimer_t *timer) } else { target_us -= now_us; + /* add half of 125 so integer division rounds to nearest */ - return div_u64_by_125((target_us >> 3) + 62); + target_us = (target_us >> 3) + 62; + + /* a higher value would overflow the result type */ + assert(target_us <= 536870911999LLU); + + return (uint32_t)div_u64_by_inv(target_us, div_inv_64(125)); } } diff --git a/sys/include/xtimer/tick_conversion.h b/sys/include/xtimer/tick_conversion.h index a4306cf58f58..da606aeb490d 100644 --- a/sys/include/xtimer/tick_conversion.h +++ b/sys/include/xtimer/tick_conversion.h @@ -102,11 +102,11 @@ static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) { * multiplying by the fraction (32768 / 1000000), we will instead use * (512 / 15625), which reduces the truncation caused by the integer widths */ static inline uint32_t _xtimer_ticks_from_usec(uint32_t usec) { - return div_u32_by_15625div512(usec); + return div_u32_by_inv(usec, div_inv_32(15625)) >> 9; } static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) { - return div_u64_by_15625div512(usec); + return div_u64_by_inv(usec, div_inv_64(15625)) >> 9; } static inline uint32_t _xtimer_usec_from_ticks(uint32_t ticks) { diff --git a/sys/newlib_syscalls_default/syscalls.c b/sys/newlib_syscalls_default/syscalls.c index 91543fcd186b..4912324fb613 100644 --- a/sys/newlib_syscalls_default/syscalls.c +++ b/sys/newlib_syscalls_default/syscalls.c @@ -503,7 +503,7 @@ int _gettimeofday_r(struct _reent *r, struct timeval *restrict tp, void *restric (void) r; (void) tzp; uint64_t now = xtimer_now_usec64(); - tp->tv_sec = div_u64_by_1000000(now); + tp->tv_sec = div_u64_by_inv(now, div_inv_64(1000000)); tp->tv_usec = now - (tp->tv_sec * US_PER_SEC); return 0; } diff --git a/sys/random/minstd.c b/sys/random/minstd.c index dc87738162d6..8530756117bb 100644 --- a/sys/random/minstd.c +++ b/sys/random/minstd.c @@ -41,8 +41,8 @@ static uint32_t _seed = 1; int rand_minstd(void) { - uint32_t hi = div_u32_by_44488(_seed); - uint32_t lo = div_u32_mod_44488(_seed); + uint32_t hi = div_u32_by_inv(_seed, div_inv_32(44488)); + uint32_t lo = _seed - hi*44488; uint32_t test = (a * lo) - (r * hi); if(test > 0) { diff --git a/sys/xtimer/xtimer.c b/sys/xtimer/xtimer.c index 612bbc9622ff..f3e8c82231da 100644 --- a/sys/xtimer/xtimer.c +++ b/sys/xtimer/xtimer.c @@ -186,7 +186,7 @@ void xtimer_now_timex(timex_t *out) { uint64_t now = xtimer_usec_from_ticks64(xtimer_now64()); - out->seconds = div_u64_by_1000000(now); + out->seconds = div_u64_by_inv(now, div_inv_64(1000000)); out->microseconds = now - (out->seconds * US_PER_SEC); } From 2899deb6e3dd5320606a1b6c451bcfe7caf0daf9 Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Sun, 3 Jun 2018 22:49:26 -0400 Subject: [PATCH 05/12] fixup! sys/div: calculate magic numbers at compile time --- sys/include/div.h | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/sys/include/div.h b/sys/include/div.h index f2f46de88fd9..32365070e49e 100644 --- a/sys/include/div.h +++ b/sys/include/div.h @@ -172,12 +172,9 @@ uint64_t _div_mulhi64(const uint64_t a, const uint64_t b); */ static inline uint8_t div_u8_by_inv(const uint8_t val, const uint8_t inv) { - uint16_t tmp; + const uint16_t tmp = (uint16_t)val * (uint16_t)inv; - tmp = (uint16_t)val * (uint16_t)inv; - tmp >>= 9; - - return (uint8_t)tmp; + return (uint8_t)(tmp >> 9); } /** @@ -190,12 +187,9 @@ static inline uint8_t div_u8_by_inv(const uint8_t val, const uint8_t inv) */ static inline uint16_t div_u16_by_inv(const uint16_t val, const uint16_t inv) { - uint32_t tmp; - - tmp = (uint32_t)val * (uint32_t)inv; - tmp >>= 17; + const uint32_t tmp = (uint32_t)val * (uint32_t)inv; - return (uint16_t)tmp; + return (uint16_t)(tmp >> 17); } /** @@ -208,12 +202,9 @@ static inline uint16_t div_u16_by_inv(const uint16_t val, const uint16_t inv) */ static inline uint32_t div_u32_by_inv(const uint32_t val, const uint32_t inv) { - uint64_t tmp; - - tmp = (uint64_t)val * (uint64_t)inv; - tmp >>= 33; + const uint64_t tmp = (uint64_t)val * (uint64_t)inv; - return (uint32_t)tmp; + return (uint32_t)(tmp >> 33); } /** From 11299166c898baad23c0a0980981b0e62698c6d3 Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Sun, 3 Jun 2018 22:50:01 -0400 Subject: [PATCH 06/12] fixup! sys/*: sys/div API change --- sys/include/xtimer/tick_conversion.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/include/xtimer/tick_conversion.h b/sys/include/xtimer/tick_conversion.h index da606aeb490d..ac9318911347 100644 --- a/sys/include/xtimer/tick_conversion.h +++ b/sys/include/xtimer/tick_conversion.h @@ -102,11 +102,11 @@ static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) { * multiplying by the fraction (32768 / 1000000), we will instead use * (512 / 15625), which reduces the truncation caused by the integer widths */ static inline uint32_t _xtimer_ticks_from_usec(uint32_t usec) { - return div_u32_by_inv(usec, div_inv_32(15625)) >> 9; + return div_u32_by_inv(usec, div_inv_32(15625)) << 9; } static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) { - return div_u64_by_inv(usec, div_inv_64(15625)) >> 9; + return div_u64_by_inv(usec, div_inv_64(15625)) << 9; } static inline uint32_t _xtimer_usec_from_ticks(uint32_t ticks) { From 620997b6461235984c8c1c3b7deb21149d589a03 Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Sun, 3 Jun 2018 22:50:28 -0400 Subject: [PATCH 07/12] fixup! tests/unittests: sys/div API change --- tests/unittests/tests-div/tests-div.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unittests/tests-div/tests-div.c b/tests/unittests/tests-div/tests-div.c index 4f92a467e0a8..289c4bd5f99e 100644 --- a/tests/unittests/tests-div/tests-div.c +++ b/tests/unittests/tests-div/tests-div.c @@ -91,7 +91,7 @@ static void test_div_u32_by_15625div512(void) DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u32_test_values[i] * 512lu / 15625, - div_u32_by_inv(u32_test_values[i], div_inv_32(15625)) >> 9); + div_u32_by_inv(u32_test_values[i], div_inv_32(15625)) << 9); } } @@ -118,14 +118,14 @@ static void test_div_u64_by_15625div512(void) DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u32_test_values[i] * 512lu / 15625, - div_u64_by_inv(u32_test_values[i], div_inv_64(15625)) >> 9); + div_u64_by_inv(u32_test_values[i], div_inv_64(15625)) << 9); } for (unsigned i = 0; i < N_U64_VALS; i++) { DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]); TEST_ASSERT_EQUAL_INT( u64_15625_512_expected_values[i], - div_u64_by_inv(u64_test_values[i], div_inv_64(15625)) >> 9); + div_u64_by_inv(u64_test_values[i], div_inv_64(15625)) << 9); } } From e8e31623832e349a5575ec0b594812f9c2985ace Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Tue, 5 Jun 2018 19:12:56 -0400 Subject: [PATCH 08/12] fixup! sys/div: calculate magic numbers at compile time --- sys/include/div.h | 308 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 218 insertions(+), 90 deletions(-) diff --git a/sys/include/div.h b/sys/include/div.h index 32365070e49e..236b29b848f0 100644 --- a/sys/include/div.h +++ b/sys/include/div.h @@ -28,6 +28,7 @@ #ifndef DIV_H #define DIV_H +#include #include #include "assert.h" @@ -37,116 +38,168 @@ extern "C" { #endif /** - * @brief Calculate 8-bit multiplicative inverse + * @brief Internal 65-bit shift and comparison: 2*x > y * - * Result is calculated during compilation (requires const num). Bitshifted by - * 9 to give the most representation to the smallest interesting number (3) + * @param[in] x greater / 2 + * @param[in] y lesser * - * @param[in] num reciprocal + * @return if 2*x > y then 1, else 0 + */ +__attribute__((always_inline)) static inline uint8_t _div_shcom( + const uint64_t x, const uint64_t y) +{ + if (x & (1ULL << 63)) { + return 1; + } + + if ((x << 1) > y) { + return 1; + } + + return 0; +} + +/** + * @brief Internal 65-bit shift and subtraction: 2*x - y * - * @return result, bitshifted by 9 + * @param[in] x minuend / 2 + * @param[in] y subtrahend + * + * @return difference */ -__attribute__((always_inline)) static inline uint8_t div_inv_8( - const uint8_t num) +__attribute__((always_inline)) static inline uint64_t _div_shsub( + const uint64_t x, const uint64_t y) { - /* cannot be represented due to bit shift */ - assert(num > 2); + if (x & (1ULL << 63)) { + uint64_t tmp; - const uint16_t numerator = (1 << 9); + /* = 2^64 - 1 - y */ + tmp = (uint64_t)(-1) - y; - if ((numerator % num) * 2 >= num) { - /* fraction is >= 0.5, so round up */ - return (uint8_t)(numerator / num) + 1; - } - else { - return (uint8_t)(numerator / num); + /* = (2^64 - 1 - y) + (2*x - 2^64) */ + /* = 2*x - y - 1 */ + tmp += (x << 1); + + /* = 2*x - y */ + return tmp + 1; } + + return (x << 1) - y; } /** - * @brief Calculate 16-bit multiplicative inverse + * @brief Internal 64-bit fraction calculation * - * Result is calculated during compilation (requires const num). Bitshifted by - * 17 to give the most representation to the smallest interesting number (3) + * @param[in] num numerator + * @param[in] den denominator * - * @param[in] num reciprocal - * - * @return result, bitshifted by 17 + * @return result, bitshifted by 64 */ -__attribute__((always_inline)) static inline uint16_t div_inv_16( - const uint16_t num) +__attribute__((always_inline, optimize("unroll-all-loops"))) static inline + uint64_t _div_frac(const uint64_t num, const uint64_t den) { - /* cannot be represented due to bit shift */ - assert(num > 2); + uint64_t ans = 0, rem = num; - const uint32_t numerator = (1UL << 17); + /* binary long division */ + for (uint8_t i = 0; i < 64; i++) { + if (_div_shcom(rem, den)) { + /* den goes into num for this bit */ + ans |= (1ULL << (63 - i)); - if ((numerator % num) * 2 >= num) { - /* fraction is >= 0.5, so round up */ - return (uint16_t)(numerator / num) + 1; + /* subtract and move to next bit */ + rem = _div_shsub(rem, den); + } + else { + /* move to next bit */ + rem <<= 1; + } } - else { - return (uint16_t)(numerator / num); + + /* rounding */ + if (_div_shcom(rem, den)) { + ans++; } + + return ans; } /** - * @brief Calculate 32-bit multiplicative inverse + * @brief Compile-time calculate 16-bit fraction * - * Result is calculated during compilation (requires const num). Bitshifted by - * 33 to give the most representation to the smallest interesting number (3) + * For fractions that are less than one. Result is calculated during + * compilation (requires const input). * - * @param[in] num reciprocal + * @param[in] num numerator + * @param[in] den denominator * - * @return result, bitshifted by 33 + * @return result, bitshifted by 16 */ -__attribute__((always_inline)) static inline uint32_t div_inv_32( - const uint32_t num) +__attribute__((always_inline)) static inline uint16_t div_frac_16( + const uint16_t num, const uint16_t den) { - /* cannot be represented due to bit shift */ - assert(num > 2); + assert((den > 0) && (num < den)); - const uint64_t numerator = (1ULL << 33); + /* done during compile time, so trade efficiency for simplicity */ + const uint64_t ans = _div_frac((uint64_t)num << 48, (uint64_t)den << 48); - if ((numerator % num) * 2 >= num) { - /* fraction is >= 0.5, so round up */ - return (uint32_t)(numerator / num) + 1; + if ((ans << 16) > (1ULL << 63)) { + /* round the result */ + return (uint16_t)(ans >> 48) + 1; } else { - return (uint32_t)(numerator / num); + return (uint16_t)(ans >> 48); } } /** - * @brief Calculate 64-bit multiplicative inverse + * @brief Compile-time calculate 32-bit fraction * - * Result is calculated during compilation (requires const num). Bitshifted by - * 65 to give the most representation to the smallest interesting number (3) + * For fractions that are less than one. Result is calculated during + * compilation (requires const input). * - * @param[in] num reciprocal + * @param[in] num numerator + * @param[in] den denominator * - * @return result, bitshifted by 65 + * @return result, bitshifted by 32 */ -__attribute__((always_inline)) static inline uint64_t div_inv_64( - const uint64_t num) +__attribute__((always_inline)) static inline uint32_t div_frac_32( + const uint32_t num, const uint32_t den) { - /* cannot be represented due to bit shift */ - assert(num > 2); + assert((den > 0) && (num < den)); - /* find the last bits without using 128-bit ints */ - const uint64_t numerator = (1ULL << 63); - const uint64_t most_sig = (numerator / num) << 2; - const uint64_t least_sig = ((numerator % num) << 2) / num; + /* done during compile time, so trade efficiency for simplicity */ + const uint64_t ans = _div_frac((uint64_t)num << 32, (uint64_t)den << 32); - if ((((numerator % num) << 2) % num) * 2 >= num) { - /* fraction is >= 0.5, so round up */ - return (most_sig + least_sig + 1); + if ((ans << 32) > (1ULL << 63)) { + /* round the result */ + return (uint32_t)(ans >> 32) + 1; } else { - return (most_sig + least_sig); + return (uint32_t)(ans >> 32); } } +/** + * @brief Compile-time calculate 64-bit fraction + * + * For fractions that are less than one. Result is calculated during + * compilation (requires const input). + * + * @param[in] num numerator + * @param[in] den denominator + * + * @return result, bitshifted by 64 + */ +__attribute__((always_inline)) static inline uint64_t div_frac_64( + const uint64_t num, const uint64_t den) +{ + assert((den > 0) && (num < den)); + + const uint64_t ans = _div_frac(num, den); + + return ans; +} + /** * @internal * @brief Multiply two 64 bit integers into a 128 bit integer and return the upper half. @@ -163,61 +216,136 @@ __attribute__((always_inline)) static inline uint64_t div_inv_64( uint64_t _div_mulhi64(const uint64_t a, const uint64_t b); /** - * @brief Divide 8-bit number using an inverse + * @brief Multiply 16-bit number with a fraction * - * @param[in] val numerator - * @param[in] inv multiplicative inverse of denominator (bitshifted by 9) + * @param[in] val integer + * @param[in] frac fraction (bitshifted by 16) * - * @return result + * @return result */ -static inline uint8_t div_u8_by_inv(const uint8_t val, const uint8_t inv) +static inline uint16_t div_mul_w_frac_16(const uint16_t val, const uint16_t frac) { - const uint16_t tmp = (uint16_t)val * (uint16_t)inv; + const uint32_t tmp = (uint32_t)val * (uint32_t)frac; - return (uint8_t)(tmp >> 9); + return (uint16_t)(tmp >> 16); } /** - * @brief Divide 16-bit number using an inverse + * @brief Multiply 32-bit number with a fraction * - * @param[in] val numerator - * @param[in] inv multiplicative inverse of denominator (bitshifted by 17) + * @param[in] val integer + * @param[in] frac fraction (bitshifted by 32) * - * @return result + * @return result */ -static inline uint16_t div_u16_by_inv(const uint16_t val, const uint16_t inv) +static inline uint32_t div_mul_w_frac_32(const uint32_t val, const uint32_t frac) { - const uint32_t tmp = (uint32_t)val * (uint32_t)inv; + const uint64_t tmp = (uint64_t)val * (uint64_t)frac; - return (uint16_t)(tmp >> 17); + return (uint32_t)(tmp >> 32); } /** - * @brief Divide 32-bit number using an inverse + * @brief Multiply 64-bit number with a fraction * - * @param[in] val numerator - * @param[in] inv multiplicative inverse of denominator (bitshifted by 33) + * @param[in] val integer + * @param[in] frac fraction (bitshifted by 64) * - * @return result + * @return result */ -static inline uint32_t div_u32_by_inv(const uint32_t val, const uint32_t inv) +static inline uint64_t div_mul_w_frac_64(const uint64_t val, const uint64_t frac) { - const uint64_t tmp = (uint64_t)val * (uint64_t)inv; + return _div_mulhi64(val, frac); +} + +/** + * @brief Division of a 16-bit number with high-accuracy + * + * This is partially calculated during compilation for speed, which requires + * that the denominator is a constant. + * + * @param[in] num numerator + * @param[in] den denominator + * + * @return result + */ +__attribute__((always_inline, optimize("unroll-all-loops"))) static inline + uint16_t div_16(const uint16_t num, const uint16_t den) +{ + uint8_t exp; + + /* find highest power of two less than den */ + for (exp = 15; exp > 0; exp--) { + if ((1U << exp) < den) { + break; + } + } + + /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */ + const uint16_t inv = div_frac_16((1U << exp), den); + + /* last step is only thing calculated at runtime */ + return (div_mul_w_frac_16(num, inv) >> exp); +} + +/** + * @brief Division of a 32-bit number with high-accuracy + * + * This is partially calculated during compilation for speed, which requires + * that the denominator is a constant. + * + * @param[in] num numerator + * @param[in] den denominator + * + * @return result + */ +__attribute__((always_inline, optimize("unroll-all-loops"))) static inline + uint32_t div_32(const uint32_t num, const uint32_t den) +{ + uint8_t exp; + + /* find highest power of two less than den */ + for (exp = 31; exp > 0; exp--) { + if ((1UL << exp) < den) { + break; + } + } + + /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */ + const uint32_t inv = div_frac_32((1UL << exp), den); - return (uint32_t)(tmp >> 33); + /* last step is only thing calculated at runtime */ + return (div_mul_w_frac_32(num, inv) >> exp); } /** - * @brief Divide 64-bit number using an inverse + * @brief Division of a 64-bit number with high-accuracy * - * @param[in] val numerator - * @param[in] inv multiplicative inverse of denominator (bitshifted by 65) + * This is partially calculated during compilation for speed, which requires + * that the denominator is a constant. * - * @return result + * @param[in] num numerator + * @param[in] den denominator + * + * @return result */ -static inline uint64_t div_u64_by_inv(const uint64_t val, const uint64_t inv) +__attribute__((always_inline, optimize("unroll-all-loops"))) static inline + uint64_t div_64(const uint64_t num, const uint64_t den) { - return (_div_mulhi64(val, inv) >> 1); + uint8_t exp; + + /* find highest power of two less than den */ + for (exp = 63; exp > 0; exp--) { + if ((1ULL << exp) < den) { + break; + } + } + + /* Make inverse: (1 > inv >= 0.5) for greatest accuracy */ + const uint64_t inv = div_frac_64((1ULL << exp), den); + + /* last step is only thing calculated at runtime */ + return (div_mul_w_frac_64(num, inv) >> exp); } #ifdef __cplusplus From d69716b433106c4c159118aa356acf70615b3739 Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Tue, 5 Jun 2018 19:13:22 -0400 Subject: [PATCH 09/12] fixup! cpu/mips32r2_common: sys/div API change --- cpu/mips32r2_common/periph/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpu/mips32r2_common/periph/timer.c b/cpu/mips32r2_common/periph/timer.c index 2629dd8a2508..4cb107cc6a95 100644 --- a/cpu/mips32r2_common/periph/timer.c +++ b/cpu/mips32r2_common/periph/timer.c @@ -83,7 +83,7 @@ int gettimeofday(struct timeval *__restrict __p, void *__restrict __tz) (void)__tz; uint64_t now = counter * US_PER_MS; - __p->tv_sec = div_u64_by_inv(now, div_inv_64(1000000)); + __p->tv_sec = div_64(now, 1000000UL); __p->tv_usec = now - (__p->tv_sec * US_PER_SEC); return 0; From ea729a364d6b8ca0b45f046c5cdd6203e55923af Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Tue, 5 Jun 2018 19:13:55 -0400 Subject: [PATCH 10/12] fixup! tests/unittests: sys/div API change --- tests/unittests/tests-div/tests-div.c | 37 +++++++++++++++------------ 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/tests/unittests/tests-div/tests-div.c b/tests/unittests/tests-div/tests-div.c index 289c4bd5f99e..8c79c60ff976 100644 --- a/tests/unittests/tests-div/tests-div.c +++ b/tests/unittests/tests-div/tests-div.c @@ -74,14 +74,14 @@ static void test_div_u64_by_15625(void) DEBUG("Dividing %12"PRIu32" by 15625...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u32_test_values[i] / 15625, - div_u64_by_inv(u32_test_values[i], div_inv_64(15625))); + div_64(u32_test_values[i], 15625)); } for (unsigned i = 0; i < N_U64_VALS; i++) { DEBUG("Dividing %12"PRIu64" by 15625...\n", u64_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u64_test_values[i] / 15625, - div_u64_by_inv(u64_test_values[i], div_inv_64(15625))); + div_64(u64_test_values[i], 15625)); } } @@ -91,41 +91,44 @@ static void test_div_u32_by_15625div512(void) DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( (uint64_t)u32_test_values[i] * 512lu / 15625, - div_u32_by_inv(u32_test_values[i], div_inv_32(15625)) << 9); + div_mul_w_frac_32(u32_test_values[i], + div_frac_32((512 << 4), 15625)) >> 4); } } -static void test_div_u64_by_1000000(void) +static void test_div_u64_by_15625div512(void) { for (unsigned i = 0; i < N_U32_VALS; i++) { - DEBUG("Dividing %"PRIu32" by 1000000...\n", u32_test_values[i]); + DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( - (uint64_t)u32_test_values[i] / 1000000lu, - div_u64_by_inv(u32_test_values[i], div_inv_64(1000000))); + (uint64_t)u32_test_values[i] * 512lu / 15625, + div_mul_w_frac_64(u32_test_values[i], + div_frac_64((512 << 4), 15625)) >> 4); } for (unsigned i = 0; i < N_U64_VALS; i++) { - DEBUG("Dividing %"PRIu64" by 1000000...\n", u64_test_values[i]); + DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]); TEST_ASSERT_EQUAL_INT( - u64_test_values[i] / 1000000lu, - div_u64_by_inv(u64_test_values[i], div_inv_64(1000000))); + u64_15625_512_expected_values[i], + div_mul_w_frac_64(u64_test_values[i], + div_frac_64((512 << 4), 15625)) >> 4); } } -static void test_div_u64_by_15625div512(void) +static void test_div_u64_by_1000000(void) { for (unsigned i = 0; i < N_U32_VALS; i++) { - DEBUG("Dividing %"PRIu32" by (15625/512)...\n", u32_test_values[i]); + DEBUG("Dividing %"PRIu32" by 1000000...\n", u32_test_values[i]); TEST_ASSERT_EQUAL_INT( - (uint64_t)u32_test_values[i] * 512lu / 15625, - div_u64_by_inv(u32_test_values[i], div_inv_64(15625)) << 9); + (uint64_t)u32_test_values[i] / 1000000lu, + div_64(u32_test_values[i], 1000000UL)); } for (unsigned i = 0; i < N_U64_VALS; i++) { - DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]); + DEBUG("Dividing %"PRIu64" by 1000000...\n", u64_test_values[i]); TEST_ASSERT_EQUAL_INT( - u64_15625_512_expected_values[i], - div_u64_by_inv(u64_test_values[i], div_inv_64(15625)) << 9); + u64_test_values[i] / 1000000lu, + div_64(u64_test_values[i], 1000000UL)); } } From b1429f0c5c101b1db9562efcc9b837be24dc3a34 Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Tue, 5 Jun 2018 19:14:26 -0400 Subject: [PATCH 11/12] fixup! sys/*: sys/div API change --- sys/evtimer/evtimer.c | 2 +- sys/include/xtimer/tick_conversion.h | 6 ++++-- sys/newlib_syscalls_default/syscalls.c | 2 +- sys/random/minstd.c | 2 +- sys/xtimer/xtimer.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sys/evtimer/evtimer.c b/sys/evtimer/evtimer.c index 981c37f46a54..815c08e2eee8 100644 --- a/sys/evtimer/evtimer.c +++ b/sys/evtimer/evtimer.c @@ -131,7 +131,7 @@ static uint32_t _get_offset(xtimer_t *timer) /* a higher value would overflow the result type */ assert(target_us <= 536870911999LLU); - return (uint32_t)div_u64_by_inv(target_us, div_inv_64(125)); + return (uint32_t)div_64(target_us, 125); } } diff --git a/sys/include/xtimer/tick_conversion.h b/sys/include/xtimer/tick_conversion.h index ac9318911347..9add96aa588c 100644 --- a/sys/include/xtimer/tick_conversion.h +++ b/sys/include/xtimer/tick_conversion.h @@ -102,11 +102,13 @@ static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) { * multiplying by the fraction (32768 / 1000000), we will instead use * (512 / 15625), which reduces the truncation caused by the integer widths */ static inline uint32_t _xtimer_ticks_from_usec(uint32_t usec) { - return div_u32_by_inv(usec, div_inv_32(15625)) << 9; + /* bitshifts increase the accuracy */ + return div_mul_w_frac_32(usec, div_frac_32((512 << 4), 15625)) >> 4; } static inline uint64_t _xtimer_ticks_from_usec64(uint64_t usec) { - return div_u64_by_inv(usec, div_inv_64(15625)) << 9; + /* bitshifts increase the accuracy */ + return div_mul_w_frac_64(usec, div_frac_64((512 << 4), 15625)) >> 4; } static inline uint32_t _xtimer_usec_from_ticks(uint32_t ticks) { diff --git a/sys/newlib_syscalls_default/syscalls.c b/sys/newlib_syscalls_default/syscalls.c index 4912324fb613..a71e2ade9377 100644 --- a/sys/newlib_syscalls_default/syscalls.c +++ b/sys/newlib_syscalls_default/syscalls.c @@ -503,7 +503,7 @@ int _gettimeofday_r(struct _reent *r, struct timeval *restrict tp, void *restric (void) r; (void) tzp; uint64_t now = xtimer_now_usec64(); - tp->tv_sec = div_u64_by_inv(now, div_inv_64(1000000)); + tp->tv_sec = div_64(now, 1000000UL); tp->tv_usec = now - (tp->tv_sec * US_PER_SEC); return 0; } diff --git a/sys/random/minstd.c b/sys/random/minstd.c index 8530756117bb..12e786bd3f23 100644 --- a/sys/random/minstd.c +++ b/sys/random/minstd.c @@ -41,7 +41,7 @@ static uint32_t _seed = 1; int rand_minstd(void) { - uint32_t hi = div_u32_by_inv(_seed, div_inv_32(44488)); + uint32_t hi = div_32(_seed, 44488); uint32_t lo = _seed - hi*44488; uint32_t test = (a * lo) - (r * hi); diff --git a/sys/xtimer/xtimer.c b/sys/xtimer/xtimer.c index f3e8c82231da..f723c9c819d8 100644 --- a/sys/xtimer/xtimer.c +++ b/sys/xtimer/xtimer.c @@ -186,7 +186,7 @@ void xtimer_now_timex(timex_t *out) { uint64_t now = xtimer_usec_from_ticks64(xtimer_now64()); - out->seconds = div_u64_by_inv(now, div_inv_64(1000000)); + out->seconds = div_64(now, 1000000UL); out->microseconds = now - (out->seconds * US_PER_SEC); } From 5a8f68d93e8923bddad4e704634d14d01c6e3f8e Mon Sep 17 00:00:00 2001 From: Matthew Blue Date: Thu, 7 Jun 2018 03:59:45 -0400 Subject: [PATCH 12/12] fixup! sys/div: calculate magic numbers at compile time --- sys/include/div.h | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/sys/include/div.h b/sys/include/div.h index 236b29b848f0..9333ca929d98 100644 --- a/sys/include/div.h +++ b/sys/include/div.h @@ -45,8 +45,8 @@ extern "C" { * * @return if 2*x > y then 1, else 0 */ -__attribute__((always_inline)) static inline uint8_t _div_shcom( - const uint64_t x, const uint64_t y) +__attribute__((always_inline)) __attribute__((optimize("merge-all-constants"))) + static inline uint8_t _div_shcom(const uint64_t x, const uint64_t y) { if (x & (1ULL << 63)) { return 1; @@ -67,8 +67,8 @@ __attribute__((always_inline)) static inline uint8_t _div_shcom( * * @return difference */ -__attribute__((always_inline)) static inline uint64_t _div_shsub( - const uint64_t x, const uint64_t y) +__attribute__((always_inline)) __attribute__((optimize("merge-all-constants"))) + static inline uint64_t _div_shsub(const uint64_t x, const uint64_t y) { if (x & (1ULL << 63)) { uint64_t tmp; @@ -95,8 +95,9 @@ __attribute__((always_inline)) static inline uint64_t _div_shsub( * * @return result, bitshifted by 64 */ -__attribute__((always_inline, optimize("unroll-all-loops"))) static inline - uint64_t _div_frac(const uint64_t num, const uint64_t den) +__attribute__((always_inline)) +__attribute__((optimize("unroll-all-loops", "merge-all-constants"))) + static inline uint64_t _div_frac(const uint64_t num, const uint64_t den) { uint64_t ans = 0, rem = num; @@ -134,8 +135,8 @@ __attribute__((always_inline, optimize("unroll-all-loops"))) static inline * * @return result, bitshifted by 16 */ -__attribute__((always_inline)) static inline uint16_t div_frac_16( - const uint16_t num, const uint16_t den) +__attribute__((always_inline)) __attribute__((optimize("merge-all-constants"))) + static inline uint16_t div_frac_16(const uint16_t num, const uint16_t den) { assert((den > 0) && (num < den)); @@ -162,8 +163,8 @@ __attribute__((always_inline)) static inline uint16_t div_frac_16( * * @return result, bitshifted by 32 */ -__attribute__((always_inline)) static inline uint32_t div_frac_32( - const uint32_t num, const uint32_t den) +__attribute__((always_inline)) __attribute__((optimize("merge-all-constants"))) + static inline uint32_t div_frac_32(const uint32_t num, const uint32_t den) { assert((den > 0) && (num < den)); @@ -190,8 +191,8 @@ __attribute__((always_inline)) static inline uint32_t div_frac_32( * * @return result, bitshifted by 64 */ -__attribute__((always_inline)) static inline uint64_t div_frac_64( - const uint64_t num, const uint64_t den) +__attribute__((always_inline)) __attribute__((optimize("merge-all-constants"))) + static inline uint64_t div_frac_64(const uint64_t num, const uint64_t den) { assert((den > 0) && (num < den)); @@ -269,8 +270,9 @@ static inline uint64_t div_mul_w_frac_64(const uint64_t val, const uint64_t frac * * @return result */ -__attribute__((always_inline, optimize("unroll-all-loops"))) static inline - uint16_t div_16(const uint16_t num, const uint16_t den) +__attribute__((always_inline)) +__attribute__((optimize("unroll-all-loops", "merge-all-constants"))) + static inline uint16_t div_16(const uint16_t num, const uint16_t den) { uint8_t exp; @@ -299,8 +301,9 @@ __attribute__((always_inline, optimize("unroll-all-loops"))) static inline * * @return result */ -__attribute__((always_inline, optimize("unroll-all-loops"))) static inline - uint32_t div_32(const uint32_t num, const uint32_t den) +__attribute__((always_inline)) +__attribute__((optimize("unroll-all-loops", "merge-all-constants"))) + static inline uint32_t div_32(const uint32_t num, const uint32_t den) { uint8_t exp; @@ -329,8 +332,9 @@ __attribute__((always_inline, optimize("unroll-all-loops"))) static inline * * @return result */ -__attribute__((always_inline, optimize("unroll-all-loops"))) static inline - uint64_t div_64(const uint64_t num, const uint64_t den) +__attribute__((always_inline)) +__attribute__((optimize("unroll-all-loops", "merge-all-constants"))) + static inline uint64_t div_64(const uint64_t num, const uint64_t den) { uint8_t exp;