From 740918a316299dd4a0d899a7ecf5e9f0517cb9f1 Mon Sep 17 00:00:00 2001 From: Alexander Kiselev Date: Thu, 22 Dec 2016 21:07:21 +0300 Subject: [PATCH 1/9] 1) const u32 tok = delta_ms * (car->cir / (BITS_PER_BYTE * MSEC_PER_SEC)); MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Раccчет tok не зависит от переменных, изменяемых внутри критической секции (cs). Он зависит от delte_ms, которая рассчитывается вне cs и от cir - фактически константа. Если в рассчет tok подставить формул delta_ms, пыполнить сокращения, то останется (now - car->last) * (car->cir / (HZ * BITS_PER_BYTE)) Можно выссчитывать вот эту часть заранее, избавившись от лишнего деления (он ведь дорогие, вроде дороже умножения). car->cir_hz = (car->cir / (HZ * BITS_PER_BYTE)) В итоге получим const u32 tok = (now - car->last) * car->cir_hz; Конечно, добавится умножение в выводе статистике, она же должна много реже выводиться. 2) критическую секцию можно еще упростить, вынести статистику во вне и переведя ее на atomic операции. Правда в случае включенного RATE_ESTIMATOR укоратить cs вроде не получается, но вот я бы у себя его выключил. Это ведь тоже только статистика, правильно? 3) переменная struct ratelimit_stat.first нигде не используется. просто удалил. --- xt_ratelimit.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/xt_ratelimit.c b/xt_ratelimit.c index 1f184bb..bb0fe74 100644 --- a/xt_ratelimit.c +++ b/xt_ratelimit.c @@ -85,15 +85,14 @@ struct ratelimit_car { u32 cbs; /* committed burst size (bytes) */ u32 ebs; /* extended burst size (bytes) */ - u32 cir; /* committed information rate (bits/s) */ + u32 cir_hz; /* committed information rate (bits/s) / (HZ * 8) */ }; struct ratelimit_stat { - u64 green_bytes; - u64 red_bytes; - u32 green_pkt; - u32 red_pkt; - unsigned long first; /* first time seen */ + atomic64_t green_bytes; + atomic64_t red_bytes; + atomic_t green_pkt; + atomic_t red_pkt; #ifdef RATE_ESTIMATOR #define RATEST_SECONDS 4 /* length of rate estimator time slot */ @@ -198,7 +197,7 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, &ent->matches[i].addr); } seq_printf(s, " cir %u cbs %u ebs %u;", - ent->car.cir, ent->car.cbs, ent->car.ebs); + ent->car.cir_hz * (HZ * BITS_PER_BYTE), ent->car.cbs, ent->car.ebs); seq_printf(s, " tc %u te %u last", ent->car.tc, ent->car.te); if (ent->car.last) @@ -207,14 +206,16 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, seq_printf(s, " never;"); seq_printf(s, " conf %u/%llu", - ent->stat.green_pkt, ent->stat.green_bytes); + (u32)atomic_read(&ent->stat.green_pkt), + (u64)atomic64_read(&ent->stat.green_bytes)); #ifdef RATE_ESTIMATOR seq_printf(s, " %lu bps", calc_rate_est(&ent->stat)); #endif seq_printf(s, ", rej %u/%llu", - ent->stat.red_pkt, ent->stat.red_bytes); + (u32)atomic_read(&ent->stat.red_pkt), + (u64)atomic64_read(&ent->stat.red_bytes)); seq_printf(s, "\n"); @@ -442,7 +443,7 @@ static int parse_rule(struct xt_ratelimit_htable *ht, char *str, size_t size) val = simple_strtoul(v, NULL, 10); switch (i) { case 0: - ent->car.cir = val; + ent->car.cir_hz = val / (HZ * BITS_PER_BYTE); /* autoconfigure optimal parameters */ val = val / 8 + (val / 8 / 2); /* FALLTHROUGH */ @@ -841,37 +842,40 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ent) { struct ratelimit_car *car = &ent->car; const unsigned int len = skb->len; /* L3 */ - const unsigned long delta_ms = (now - car->last) * (MSEC_PER_SEC / HZ); + const u32 tok = (now - car->last) * car->cir_hz; spin_lock(&ent->lock_bh); car->tc += len; - if (delta_ms) { - const u32 tok = delta_ms * (car->cir / (BITS_PER_BYTE * MSEC_PER_SEC)); - + if (tok) { car->tc -= min(tok, car->tc); - if (!ent->stat.first) - ent->stat.first = now; car->last = now; } if (car->tc > car->cbs) { /* extended burst */ car->te += car->tc - car->cbs; if (car->te > car->ebs) { car->te = 0; + car->tc -= len; match = true; /* match is drop */ } } +#ifndef RATE_ESTIMATOR + spin_unlock(&ent->lock_bh); +#endif + if (match) { - ent->stat.red_bytes += len; - ent->stat.red_pkt++; - car->tc -= len; + atomic64_add(len, &ent->stat.red_bytes); + atomic_inc(&ent->stat.red_pkt); } else { - ent->stat.green_bytes += len; - ent->stat.green_pkt++; + atomic64_add(len, &ent->stat.green_bytes); + atomic_inc(&ent->stat.green_pkt); #ifdef RATE_ESTIMATOR rate_estimator(&ent->stat, now / RATEST_JIFFIES, len); #endif } + +#ifdef RATE_ESTIMATOR spin_unlock(&ent->lock_bh); +#endif } else { if (ht->other == OT_MATCH) match = true; /* match is drop */ From a7b2638770c8f1fa77472768294c6640ae69ca28 Mon Sep 17 00:00:00 2001 From: Alexander Kiselev Date: Fri, 23 Dec 2016 10:47:48 +0300 Subject: [PATCH 2/9] unnecessary ifdef RATE_ESTIMATOR is removed cir_hz is renamed back to cir --- xt_ratelimit.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/xt_ratelimit.c b/xt_ratelimit.c index bb0fe74..681467b 100644 --- a/xt_ratelimit.c +++ b/xt_ratelimit.c @@ -85,7 +85,7 @@ struct ratelimit_car { u32 cbs; /* committed burst size (bytes) */ u32 ebs; /* extended burst size (bytes) */ - u32 cir_hz; /* committed information rate (bits/s) / (HZ * 8) */ + u32 cir; /* committed information rate (bits/s) / (HZ * 8) */ }; struct ratelimit_stat { @@ -197,7 +197,7 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, &ent->matches[i].addr); } seq_printf(s, " cir %u cbs %u ebs %u;", - ent->car.cir_hz * (HZ * BITS_PER_BYTE), ent->car.cbs, ent->car.ebs); + ent->car.cir * (HZ * BITS_PER_BYTE), ent->car.cbs, ent->car.ebs); seq_printf(s, " tc %u te %u last", ent->car.tc, ent->car.te); if (ent->car.last) @@ -443,7 +443,7 @@ static int parse_rule(struct xt_ratelimit_htable *ht, char *str, size_t size) val = simple_strtoul(v, NULL, 10); switch (i) { case 0: - ent->car.cir_hz = val / (HZ * BITS_PER_BYTE); + ent->car.cir = val / (HZ * BITS_PER_BYTE); /* autoconfigure optimal parameters */ val = val / 8 + (val / 8 / 2); /* FALLTHROUGH */ @@ -842,7 +842,7 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ent) { struct ratelimit_car *car = &ent->car; const unsigned int len = skb->len; /* L3 */ - const u32 tok = (now - car->last) * car->cir_hz; + const u32 tok = (now - car->last) * car->cir; spin_lock(&ent->lock_bh); car->tc += len; @@ -858,9 +858,13 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) match = true; /* match is drop */ } } -#ifndef RATE_ESTIMATOR - spin_unlock(&ent->lock_bh); + +#ifdef RATE_ESTIMATOR + if (!match) { + rate_estimator(&ent->stat, now / RATEST_JIFFIES, len); + } #endif + spin_unlock(&ent->lock_bh); if (match) { atomic64_add(len, &ent->stat.red_bytes); @@ -868,14 +872,7 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) } else { atomic64_add(len, &ent->stat.green_bytes); atomic_inc(&ent->stat.green_pkt); -#ifdef RATE_ESTIMATOR - rate_estimator(&ent->stat, now / RATEST_JIFFIES, len); -#endif } - -#ifdef RATE_ESTIMATOR - spin_unlock(&ent->lock_bh); -#endif } else { if (ht->other == OT_MATCH) match = true; /* match is drop */ From e2f5d7f9bcd790bfb41262a962c67932f8d15c41 Mon Sep 17 00:00:00 2001 From: Alexander Kiselev Date: Fri, 23 Dec 2016 14:20:56 +0300 Subject: [PATCH 3/9] a debug lockless version: most of the time atomic is used. critical section is entered only when bucket is full. --- xt_ratelimit.c | 50 ++++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/xt_ratelimit.c b/xt_ratelimit.c index 681467b..8819386 100644 --- a/xt_ratelimit.c +++ b/xt_ratelimit.c @@ -80,7 +80,7 @@ struct ratelimit_net { /* CAR accounting */ struct ratelimit_car { unsigned long last; /* last refill (jiffies) */ - u32 tc; /* committed token bucket counter */ + atomic_t tc; /* committed token bucket counter */ u32 te; /* exceeded token bucket counter */ u32 cbs; /* committed burst size (bytes) */ @@ -199,7 +199,7 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, seq_printf(s, " cir %u cbs %u ebs %u;", ent->car.cir * (HZ * BITS_PER_BYTE), ent->car.cbs, ent->car.ebs); - seq_printf(s, " tc %u te %u last", ent->car.tc, ent->car.te); + seq_printf(s, " tc %u te %u last", (u32)atomic_read(&ent->car.tc), ent->car.te); if (ent->car.last) seq_printf(s, " %ld;", jiffies - ent->car.last); else @@ -839,34 +839,40 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) rcu_read_lock(); ent = ratelimit_match_find(ht, addr); - if (ent) { + if (likely(ent)) { struct ratelimit_car *car = &ent->car; const unsigned int len = skb->len; /* L3 */ - const u32 tok = (now - car->last) * car->cir; - spin_lock(&ent->lock_bh); - car->tc += len; - if (tok) { - car->tc -= min(tok, car->tc); - car->last = now; - } - if (car->tc > car->cbs) { /* extended burst */ - car->te += car->tc - car->cbs; - if (car->te > car->ebs) { - car->te = 0; - car->tc -= len; - match = true; /* match is drop */ + atomic_add(len, &car->tc); + + if (unlikely((u32)atomic_read(&car->tc) > car->cbs)) { + u32 tc; + const u32 tok = (now - car->last) * car->cir; + spin_lock(&ent->lock_bh); + + if (likely(tok)) { + atomic_sub(min(tok, (u32)atomic_read(&car->tc)), &car->tc); + car->last = now; } - } + tc = (u32)atomic_read(&car->tc); + if (tc > car->cbs) { /* extended burst */ + car->te += tc - car->cbs; + if (car->te > car->ebs) { + car->te = 0; + atomic_sub(len, &car->tc); + match = true; /* match is drop */ + } + } #ifdef RATE_ESTIMATOR - if (!match) { - rate_estimator(&ent->stat, now / RATEST_JIFFIES, len); - } + if (!match) { + rate_estimator(&ent->stat, now / RATEST_JIFFIES, len); + } #endif - spin_unlock(&ent->lock_bh); + spin_unlock(&ent->lock_bh); + } - if (match) { + if (likely(match)) { atomic64_add(len, &ent->stat.red_bytes); atomic_inc(&ent->stat.red_pkt); } else { From 4d149176126e9412c912fc1caf46b6a55968c962 Mon Sep 17 00:00:00 2001 From: Alexander Kiselev Date: Fri, 23 Dec 2016 19:45:58 +0300 Subject: [PATCH 4/9] fast/slow path -- debug version #2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Я предполагаю, что не меняя конечного результата, т.е. для каждого пакета итоговое решение не должно поменяться по сравнению с оригинальным алогоритмом, можно изменить сам алгоритм обработки пакетов, так чтобы большинство из них обрабатывались быстрее, проходя простой и короткий путь без взаимных блокировок. Допустим, пришло пять пакетов. Не важно один и несколько потоков. Оригинальный алогоритм для каждого пакета пытается выполнить два шага (пока не берем в рассчет переполнение корзины): 1) добавить рамер пакета в корзину; 2) если накопились токены (прошло время) вычесть из корзины накопленные токены; Допустим в корзине достаточно токенов для обработки первых 4 пакетов. Тогда для каждого пакета будет выполняться только вычетание. Пакеты чаще идут быстро друг за другом, поэтому пополнений корзины не будет. Переполнений пока тоже нет. Для для таких случаев можно написать алгоритм только с вычитанием, назвав его условно fast path. Отличия от предыдущего патча в том, что теперь я отслеживаю конфликты гонок с помощью СAS (compare and swap), гарантированно соблюдая условие достаточности токенов для каждого отдельного пакета. На этом этапе можно не думать о пополнениях корзины, даже если и пора. Это можно сделать позже, накопления не пропадут, т.к. время last пока не меняется. Пробуя пройти fast path для 5ого последний пакета, замечаем, что в корзине переполнение и идем по медленному пути с критической секцией. Здесь все внутри секции, никаких сюрпризов. И последний шаг. Fast path могут пересекаться с другими fast path, они thread safe за счет CAS. Но, fast path одного потока не должен пересекаться с CS slow path другого. Для этого добавляем exlusive/shared (или read/write) блокировку вместо spinlock'а. Shared блокировки дешевые, не должны влиять на производительно быстрых секций. Правда exlusive дороже, чем обычная. Предполагаем, что в итоге суммарная польза от одновременной работы быстрых, путей превысет намного потерю от более дорогих, но редких exclusive (write) блокировок. P.S. Снова пока не думал про estimator. --- xt_ratelimit.c | 66 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/xt_ratelimit.c b/xt_ratelimit.c index 8819386..d75c0e9 100644 --- a/xt_ratelimit.c +++ b/xt_ratelimit.c @@ -42,6 +42,7 @@ #include #include "compat.h" #include "xt_ratelimit.h" +#include #define XT_RATELIMIT_VERSION "0.2" #include "version.h" @@ -116,7 +117,7 @@ struct ratelimit_ent { int mtcnt; /* size of matches[mtcnt] */ struct ratelimit_stat stat; struct ratelimit_car car; - spinlock_t lock_bh; + rwlock_t lock_bh; /* variable sized array for actual hash entries, it's * to optimize memory allocation and data locality @@ -190,7 +191,7 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, return 0; /* lock for consistent reads from the counters */ - spin_lock_bh(&ent->lock_bh); + write_lock_bh(&ent->lock_bh); for (i = 0; i < ent->mtcnt; i++) { seq_printf(s, "%s%pI4", i == 0? "" : ",", @@ -219,7 +220,7 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, seq_printf(s, "\n"); - spin_unlock_bh(&ent->lock_bh); + write_unlock_bh(&ent->lock_bh); return seq_has_overflowed(s); } @@ -395,7 +396,7 @@ static int parse_rule(struct xt_ratelimit_htable *ht, char *str, size_t size) if (!ent) return -ENOMEM; - spin_lock_init(&ent->lock_bh); + rwlock_init(&ent->lock_bh); for (i = 0, p = str; p < endp && *p && in4_pton(p, size - (p - str), (u8 *)&addr, -1, &p); ++p, ++i) { @@ -504,9 +505,9 @@ static int parse_rule(struct xt_ratelimit_htable *ht, char *str, size_t size) if (add) { if (ent_chk) { /* update */ - spin_lock_bh(&ent_chk->lock_bh); + write_lock_bh(&ent_chk->lock_bh); ent_chk->car = ent->car; - spin_unlock_bh(&ent_chk->lock_bh); + write_unlock_bh(&ent_chk->lock_bh); } else { ratelimit_ent_add(ht, ent); ent = NULL; @@ -831,6 +832,7 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) const unsigned long now = jiffies; __be32 addr; int match = false; /* no match, no drop */ + int slow_path = false; if (mtinfo->mode & XT_RATELIMIT_DST) addr = ip_hdr(skb)->daddr; @@ -842,40 +844,70 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) if (likely(ent)) { struct ratelimit_car *car = &ent->car; const unsigned int len = skb->len; /* L3 */ + u32 tc, new_tc, tok; + + /* try a fast path first */ + read_lock(&ent->lock_bh); + while (1) { /* cas loop */ + tc = (u32) atomic_read(&car->tc); + new_tc = tc + len; + + /* is there are enough tokens in the bucket to pass a packet? */ + if (likely(new_tc <= car->cbs)) { + if (atomic_cmpxchg(&car->tc, tc, new_tc)) { + /* success, no concurrent updates */ + break; + } + /* CAS failed, the bucket was updated by another thread, + * try one more time + */ + } + else { + /* not enough tokens, we have to take a slow path */ + slow_path = 1; + break; + } + } + read_unlock(&ent->lock_bh); - atomic_add(len, &car->tc); + if (unlikely(slow_path)) { + write_lock(&ent->lock_bh); - if (unlikely((u32)atomic_read(&car->tc) > car->cbs)) { - u32 tc; - const u32 tok = (now - car->last) * car->cir; - spin_lock(&ent->lock_bh); + /* read tc */ + tc = (u32) atomic_read(&car->tc); + tc += len; - if (likely(tok)) { - atomic_sub(min(tok, (u32)atomic_read(&car->tc)), &car->tc); + /* tok to add */ + tok = (now - car->last) * car->cir; + if (tok) { + tc -= min(tok, tc); car->last = now; } - tc = (u32)atomic_read(&car->tc); if (tc > car->cbs) { /* extended burst */ car->te += tc - car->cbs; if (car->te > car->ebs) { car->te = 0; - atomic_sub(len, &car->tc); + tc -= len; match = true; /* match is drop */ } } + /* store tc */ + atomic_set(&car->tc, tc); + #ifdef RATE_ESTIMATOR if (!match) { rate_estimator(&ent->stat, now / RATEST_JIFFIES, len); } #endif - spin_unlock(&ent->lock_bh); + write_unlock(&ent->lock_bh); } if (likely(match)) { atomic64_add(len, &ent->stat.red_bytes); atomic_inc(&ent->stat.red_pkt); - } else { + } + else { atomic64_add(len, &ent->stat.green_bytes); atomic_inc(&ent->stat.green_pkt); } From c20a11dd0bde05b551a84d04e7d3aec78f0580ba Mon Sep 17 00:00:00 2001 From: Alexander Kiselev Date: Tue, 27 Dec 2016 03:20:31 +0300 Subject: [PATCH 5/9] a lock-less debug version --- xt_ratelimit.c | 114 +++++++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 56 deletions(-) diff --git a/xt_ratelimit.c b/xt_ratelimit.c index d75c0e9..fd32af5 100644 --- a/xt_ratelimit.c +++ b/xt_ratelimit.c @@ -42,7 +42,6 @@ #include #include "compat.h" #include "xt_ratelimit.h" -#include #define XT_RATELIMIT_VERSION "0.2" #include "version.h" @@ -78,11 +77,22 @@ struct ratelimit_net { struct proc_dir_entry *ipt_ratelimit; }; +typedef union { + struct { + u32 tc; + u32 te; + } flds; + long long_v; +} +tce_t; + /* CAR accounting */ struct ratelimit_car { unsigned long last; /* last refill (jiffies) */ - atomic_t tc; /* committed token bucket counter */ - u32 te; /* exceeded token bucket counter */ + + /* committed token bucket counter */ + /* exceeded token bucket counter */ + tce_t tce; u32 cbs; /* committed burst size (bytes) */ u32 ebs; /* extended burst size (bytes) */ @@ -117,7 +127,7 @@ struct ratelimit_ent { int mtcnt; /* size of matches[mtcnt] */ struct ratelimit_stat stat; struct ratelimit_car car; - rwlock_t lock_bh; + spinlock_t lock_bh; /* variable sized array for actual hash entries, it's * to optimize memory allocation and data locality @@ -191,7 +201,7 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, return 0; /* lock for consistent reads from the counters */ - write_lock_bh(&ent->lock_bh); + spin_lock_bh(&ent->lock_bh); for (i = 0; i < ent->mtcnt; i++) { seq_printf(s, "%s%pI4", i == 0? "" : ",", @@ -200,7 +210,7 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, seq_printf(s, " cir %u cbs %u ebs %u;", ent->car.cir * (HZ * BITS_PER_BYTE), ent->car.cbs, ent->car.ebs); - seq_printf(s, " tc %u te %u last", (u32)atomic_read(&ent->car.tc), ent->car.te); + seq_printf(s, " tc %u te %u last", ent->car.tce.flds.tc, ent->car.tce.flds.te); if (ent->car.last) seq_printf(s, " %ld;", jiffies - ent->car.last); else @@ -220,7 +230,7 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, seq_printf(s, "\n"); - write_unlock_bh(&ent->lock_bh); + spin_unlock_bh(&ent->lock_bh); return seq_has_overflowed(s); } @@ -396,7 +406,7 @@ static int parse_rule(struct xt_ratelimit_htable *ht, char *str, size_t size) if (!ent) return -ENOMEM; - rwlock_init(&ent->lock_bh); + spin_lock_init(&ent->lock_bh); for (i = 0, p = str; p < endp && *p && in4_pton(p, size - (p - str), (u8 *)&addr, -1, &p); ++p, ++i) { @@ -505,9 +515,9 @@ static int parse_rule(struct xt_ratelimit_htable *ht, char *str, size_t size) if (add) { if (ent_chk) { /* update */ - write_lock_bh(&ent_chk->lock_bh); + spin_lock_bh(&ent_chk->lock_bh); ent_chk->car = ent->car; - write_unlock_bh(&ent_chk->lock_bh); + spin_unlock_bh(&ent_chk->lock_bh); } else { ratelimit_ent_add(ht, ent); ent = NULL; @@ -832,7 +842,6 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) const unsigned long now = jiffies; __be32 addr; int match = false; /* no match, no drop */ - int slow_path = false; if (mtinfo->mode & XT_RATELIMIT_DST) addr = ip_hdr(skb)->daddr; @@ -841,73 +850,66 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) rcu_read_lock(); ent = ratelimit_match_find(ht, addr); - if (likely(ent)) { + if (ent) { struct ratelimit_car *car = &ent->car; const unsigned int len = skb->len; /* L3 */ - u32 tc, new_tc, tok; - - /* try a fast path first */ - read_lock(&ent->lock_bh); - while (1) { /* cas loop */ - tc = (u32) atomic_read(&car->tc); - new_tc = tc + len; - - /* is there are enough tokens in the bucket to pass a packet? */ - if (likely(new_tc <= car->cbs)) { - if (atomic_cmpxchg(&car->tc, tc, new_tc)) { - /* success, no concurrent updates */ - break; - } - /* CAS failed, the bucket was updated by another thread, - * try one more time - */ - } - else { - /* not enough tokens, we have to take a slow path */ - slow_path = 1; - break; + + unsigned long time_passed, last, old; + u32 tc, te, tok; + tce_t tce, new_tce; + long old_tce; + + /* + * calculate how much time has passed since the last update + * and then update last update time + */ + while (true) { /* cas loop */ + last = car->last; + time_passed = now - last; + old = atomic64_cmpxchg((atomic64_t*) &car->last, last, now); + if (old == last) { + break; /* success */ } } - read_unlock(&ent->lock_bh); - if (unlikely(slow_path)) { - write_lock(&ent->lock_bh); + /* + * process packet's bytes + */ + while (true) { /* cas loop */ + tce = car->tce; + tc = tce.flds.tc; + te = tce.flds.te; - /* read tc */ - tc = (u32) atomic_read(&car->tc); tc += len; - /* tok to add */ - tok = (now - car->last) * car->cir; - if (tok) { + if (time_passed) { + tok = time_passed * car->cir; tc -= min(tok, tc); - car->last = now; } if (tc > car->cbs) { /* extended burst */ - car->te += tc - car->cbs; - if (car->te > car->ebs) { - car->te = 0; + te += tc - car->cbs; + if (te > car->ebs) { + te = 0; tc -= len; - match = true; /* match is drop */ + match = true; /* drop */ } } - /* store tc */ - atomic_set(&car->tc, tc); -#ifdef RATE_ESTIMATOR - if (!match) { - rate_estimator(&ent->stat, now / RATEST_JIFFIES, len); + /* new tce */ + new_tce.flds.tc = tc; + new_tce.flds.te = te; + old_tce = atomic64_cmpxchg((atomic64_t*) &car->tce.long_v, tce.long_v, + new_tce.long_v); + if (old_tce == tce.long_v) { + break; /* success */ } -#endif - write_unlock(&ent->lock_bh); } if (likely(match)) { atomic64_add(len, &ent->stat.red_bytes); atomic_inc(&ent->stat.red_pkt); - } - else { + } else { atomic64_add(len, &ent->stat.green_bytes); atomic_inc(&ent->stat.green_pkt); } From 45a7a80eb85619f59d8cb8cf60f6282f433f3cd0 Mon Sep 17 00:00:00 2001 From: Alexander Kiselev Date: Tue, 27 Dec 2016 03:57:05 +0300 Subject: [PATCH 6/9] ratelimit_car->last type is changed to atomic64_t --- xt_ratelimit.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/xt_ratelimit.c b/xt_ratelimit.c index fd32af5..c109e48 100644 --- a/xt_ratelimit.c +++ b/xt_ratelimit.c @@ -88,7 +88,7 @@ tce_t; /* CAR accounting */ struct ratelimit_car { - unsigned long last; /* last refill (jiffies) */ + atomic64_t last; /* last refill (jiffies) */ /* committed token bucket counter */ /* exceeded token bucket counter */ @@ -194,6 +194,7 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, { struct ratelimit_ent *ent = mt->ent; int i; + unsigned long last; /* to print entities only once, we print only entities * where match is first element */ @@ -211,8 +212,9 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, ent->car.cir * (HZ * BITS_PER_BYTE), ent->car.cbs, ent->car.ebs); seq_printf(s, " tc %u te %u last", ent->car.tce.flds.tc, ent->car.tce.flds.te); - if (ent->car.last) - seq_printf(s, " %ld;", jiffies - ent->car.last); + last = atomic64_read(&ent->car.last); + if (last) + seq_printf(s, " %ld;", jiffies - last); else seq_printf(s, " never;"); @@ -854,7 +856,7 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) struct ratelimit_car *car = &ent->car; const unsigned int len = skb->len; /* L3 */ - unsigned long time_passed, last, old; + unsigned long time_passed, old, last; u32 tc, te, tok; tce_t tce, new_tce; long old_tce; @@ -864,9 +866,9 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) * and then update last update time */ while (true) { /* cas loop */ - last = car->last; + last = atomic64_read(&car->last); time_passed = now - last; - old = atomic64_cmpxchg((atomic64_t*) &car->last, last, now); + old = atomic64_cmpxchg(&car->last, last, now); if (old == last) { break; /* success */ } From e57264fac37732704f32ba3306c31e80797fc0f6 Mon Sep 17 00:00:00 2001 From: Alexander Kiselev Date: Tue, 27 Dec 2016 04:12:19 +0300 Subject: [PATCH 7/9] a small but has fixed: match value should be calculated in each cas loop iteration --- xt_ratelimit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xt_ratelimit.c b/xt_ratelimit.c index c109e48..a26abfe 100644 --- a/xt_ratelimit.c +++ b/xt_ratelimit.c @@ -843,7 +843,7 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) struct ratelimit_ent *ent; const unsigned long now = jiffies; __be32 addr; - int match = false; /* no match, no drop */ + int match; /* no match, no drop */ if (mtinfo->mode & XT_RATELIMIT_DST) addr = ip_hdr(skb)->daddr; @@ -881,6 +881,7 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) tce = car->tce; tc = tce.flds.tc; te = tce.flds.te; + match = false; tc += len; From 91e53ee815500ff6936ca4b8c358841e8763c505 Mon Sep 17 00:00:00 2001 From: Alexander Kiselev Date: Tue, 27 Dec 2016 04:16:24 +0300 Subject: [PATCH 8/9] a small but has fixed: match value should be calculated in each cas loop iteration --- xt_ratelimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xt_ratelimit.c b/xt_ratelimit.c index a26abfe..152a727 100644 --- a/xt_ratelimit.c +++ b/xt_ratelimit.c @@ -843,7 +843,7 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) struct ratelimit_ent *ent; const unsigned long now = jiffies; __be32 addr; - int match; /* no match, no drop */ + int match = false; /* no match, no drop */ if (mtinfo->mode & XT_RATELIMIT_DST) addr = ip_hdr(skb)->daddr; From c5c70d3216a13f9d250b3bf41578e375b4275857 Mon Sep 17 00:00:00 2001 From: Alexander Kiselev Date: Tue, 27 Dec 2016 20:18:34 +0300 Subject: [PATCH 9/9] car->tce type is changed to atomic64_t --- xt_ratelimit.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/xt_ratelimit.c b/xt_ratelimit.c index 152a727..b413775 100644 --- a/xt_ratelimit.c +++ b/xt_ratelimit.c @@ -92,7 +92,7 @@ struct ratelimit_car { /* committed token bucket counter */ /* exceeded token bucket counter */ - tce_t tce; + atomic64_t tce; u32 cbs; /* committed burst size (bytes) */ u32 ebs; /* extended burst size (bytes) */ @@ -195,6 +195,7 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, struct ratelimit_ent *ent = mt->ent; int i; unsigned long last; + tce_t tce; /* to print entities only once, we print only entities * where match is first element */ @@ -211,7 +212,9 @@ static int ratelimit_seq_ent_show(struct ratelimit_match *mt, seq_printf(s, " cir %u cbs %u ebs %u;", ent->car.cir * (HZ * BITS_PER_BYTE), ent->car.cbs, ent->car.ebs); - seq_printf(s, " tc %u te %u last", ent->car.tce.flds.tc, ent->car.tce.flds.te); + tce.long_v = atomic64_read(&ent->car.tce); + seq_printf(s, " tc %u te %u last", tce.flds.tc, tce.flds.te); + last = atomic64_read(&ent->car.last); if (last) seq_printf(s, " %ld;", jiffies - last); @@ -878,7 +881,7 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) * process packet's bytes */ while (true) { /* cas loop */ - tce = car->tce; + tce.long_v = atomic64_read(&car->tce); tc = tce.flds.tc; te = tce.flds.te; match = false; @@ -902,8 +905,7 @@ ratelimit_mt(const struct sk_buff *skb, struct xt_action_param *par) /* new tce */ new_tce.flds.tc = tc; new_tce.flds.te = te; - old_tce = atomic64_cmpxchg((atomic64_t*) &car->tce.long_v, tce.long_v, - new_tce.long_v); + old_tce = atomic64_cmpxchg(&car->tce, tce.long_v, new_tce.long_v); if (old_tce == tce.long_v) { break; /* success */ }