Skip to content

Commit ce14147

Browse files
committed
Implement clearing the statistics data daily
Gitignore the webring.log Implement clearing the statistics data daily
1 parent 4764f76 commit ce14147

File tree

5 files changed

+104
-71
lines changed

5 files changed

+104
-71
lines changed

Cargo.lock

Lines changed: 6 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ missing-docs-in-private-items = "warn"
2020

2121
[dependencies]
2222
axum = { version = "0.8.4", default-features = false, features = [ "http1", "http2", "tokio", "tracing", "query" ] }
23-
chrono = { version = "0.4.41", default-features = false, features = [ "serde", "now" ] }
23+
chrono = { version = "0.4.42", default-features = false, features = [ "serde", "now" ] }
2424
clap = { version = "4.5.41", features = ["derive"] }
2525
eyre = "0.6.12"
2626
futures = { version = "0.3.31", default-features = false, features = [ "alloc" ] }
@@ -34,6 +34,7 @@ rand = { version = "0.9.2", default-features = false, features = [ "thread_rng"
3434
reqwest = { version = "0.12.22", features = ["stream", "json"] }
3535
same-file = "1.0.6"
3636
sarlacc = "0.1.4"
37+
seize = "0.5.1"
3738
# sarlacc = { path = "../sarlacc" }
3839
serde = { version = "1.0.219", features = ["derive"] }
3940
serde_json = "1.0.141"

src/routes.rs

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ mod tests {
489489
http::{Request, Uri, header},
490490
response::IntoResponse,
491491
};
492-
use chrono::Utc;
492+
493493
use eyre::eyre;
494494
use http_body_util::BodyExt;
495495
use indoc::indoc;
@@ -501,7 +501,7 @@ mod tests {
501501
use tower_http::catch_panic::ResponseForPanic;
502502

503503
use crate::{
504-
stats::{TIMEZONE, UNKNOWN_ORIGIN},
504+
stats::UNKNOWN_ORIGIN,
505505
webring::Webring,
506506
};
507507

@@ -559,8 +559,6 @@ mod tests {
559559
async fn index() {
560560
let (router, webring, tmpfiles) = app().await;
561561

562-
let today = Utc::now().with_timezone(&TIMEZONE).date_naive();
563-
564562
// Request `/`
565563
let res = router
566564
.oneshot(
@@ -579,7 +577,7 @@ mod tests {
579577
assert_eq!("Hello homepage!", text);
580578
assert_eq!(status, StatusCode::OK);
581579
webring.assert_stat_entry(
582-
(today, "kasad.com", "ring.purduehackers.com", "kasad.com"),
580+
("kasad.com", "ring.purduehackers.com", "kasad.com"),
583581
1,
584582
);
585583

@@ -590,8 +588,6 @@ mod tests {
590588
async fn index_unknown_referer() {
591589
let (router, webring, tmpfiles) = app().await;
592590

593-
let today = Utc::now().with_timezone(&TIMEZONE).date_naive();
594-
595591
let res = router
596592
.oneshot(
597593
Request::builder()
@@ -610,7 +606,6 @@ mod tests {
610606
assert_eq!(status, StatusCode::OK);
611607
webring.assert_stat_entry(
612608
(
613-
today,
614609
UNKNOWN_ORIGIN.as_str(),
615610
"ring.purduehackers.com",
616611
UNKNOWN_ORIGIN.as_str(),
@@ -625,8 +620,6 @@ mod tests {
625620
async fn visit() {
626621
let (router, webring, tmpfiles) = app().await;
627622

628-
let today = Utc::now().with_timezone(&TIMEZONE).date_naive();
629-
630623
let res = router
631624
.oneshot(
632625
Request::builder()
@@ -644,7 +637,6 @@ mod tests {
644637
assert_eq!(res.status(), StatusCode::SEE_OTHER);
645638
webring.assert_stat_entry(
646639
(
647-
today,
648640
"ring.purduehackers.com",
649641
"clementine.viridian.page",
650642
"ring.purduehackers.com",

src/stats/mod.rs

Lines changed: 87 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@ use std::{
88
net::IpAddr,
99
sync::{
1010
LazyLock,
11-
atomic::{AtomicU64, Ordering},
11+
atomic::{AtomicI32, AtomicPtr, AtomicU64, Ordering},
1212
},
1313
};
1414

1515
use axum::http::uri::Authority;
16-
use chrono::{DateTime, Duration, FixedOffset, NaiveDate, Utc};
17-
use papaya::HashMap;
16+
use chrono::{DateTime, Duration, FixedOffset, Utc};
17+
use papaya::{Guard, HashMap};
1818
use sarlacc::Intern;
19+
use seize::Collector;
1920
use tracing::{info, instrument};
2021

2122
/// The TTL for IP tracking entries, after which they are considered stale and removed.
@@ -36,22 +37,67 @@ struct IpInfo {
3637
started_from: Intern<Authority>,
3738
}
3839

39-
#[derive(Debug, Default)]
40+
type Counts = HashMap<(Intern<Authority>, Intern<Authority>, Intern<Authority>), AtomicU64>;
41+
42+
/// The counts for all of the possible webring redirects
43+
#[derive(Debug)]
4044
struct AggregatedStats {
41-
/// (Date (with timezone `TIMEZONE`), From, To, Started From) → Count
42-
#[expect(clippy::type_complexity)]
43-
counters: HashMap<
44-
(
45-
NaiveDate,
46-
Intern<Authority>,
47-
Intern<Authority>,
48-
Intern<Authority>,
49-
),
50-
AtomicU64,
51-
>,
45+
/// The collector for the atomic data that we're handling
46+
collector: Collector,
47+
/// The last date that a redirect was tracked (hopefully today)
48+
today: AtomicI32,
49+
/// (From, To, Started From) → Count
50+
/// Invariant: This MUST ALWAYS be a valid pointer
51+
counter: AtomicPtr<Counts>,
52+
}
53+
54+
impl AggregatedStats {
55+
/// Create a new `AggregatedStats`
56+
fn new(now: DateTime<Utc>) -> Self {
57+
let counter: Counts = HashMap::default();
58+
59+
AggregatedStats {
60+
today: AtomicI32::new(Self::mk_num(now)),
61+
counter: AtomicPtr::new(Box::into_raw(Box::new(counter))),
62+
collector: Collector::new(),
63+
}
64+
}
65+
66+
/// Convert the current time into the days since the epoch
67+
fn mk_num(time: DateTime<Utc>) -> i32 {
68+
time.date_naive().to_epoch_days()
69+
}
70+
71+
/// Retrieve the current counter from a guard
72+
fn counter<'a>(&'a self, guard: &'a impl Guard) -> &'a Counts {
73+
// SAFETY: The counter is guaranteed to be a valid pointer and we are using Acquire ordering to synchronize-with its initialization
74+
unsafe { &*guard.protect(&self.counter, Ordering::Acquire) }
75+
}
76+
77+
/// Retrieve the current counter from a guard while updating it if the current time is a new calendar date
78+
fn maybe_update_counter<'a>(&'a self, now: DateTime<Utc>, guard: &'a impl Guard) -> &'a Counts {
79+
let now = AggregatedStats::mk_num(now);
80+
81+
let prev_day = self.today.swap(now, Ordering::Relaxed);
82+
83+
if prev_day != now {
84+
let new_counter: *mut Counts = Box::into_raw(Box::new(HashMap::new()));
85+
86+
// Release to synchronize-with `counter`. We don't need Acquire because we won't read the previous pointer.
87+
let prev = guard.swap(&self.counter, new_counter, Ordering::Release);
88+
// SAFETY: The pointer can no longer be accessed now that it has been swapped into `prev`, and `Box::from_raw` is the correct way to drop the pointer.
89+
unsafe {
90+
self.collector
91+
.retire(prev, |ptr, _| drop(Box::from_raw(ptr)));
92+
}
93+
}
94+
95+
self.counter(guard)
96+
}
5297
}
5398

54-
#[derive(Debug, Default)]
99+
/// Statistics tracking for the webring
100+
#[derive(Debug)]
55101
pub struct Stats {
56102
/// Aggregated statistics
57103
aggregated: AggregatedStats,
@@ -61,9 +107,9 @@ pub struct Stats {
61107

62108
impl Stats {
63109
/// Creates a new instance of `Stats`.
64-
pub fn new() -> Stats {
110+
pub fn new(now: DateTime<Utc>) -> Stats {
65111
Stats {
66-
aggregated: AggregatedStats::default(),
112+
aggregated: AggregatedStats::new(now),
67113
ip_tracking: HashMap::new(),
68114
}
69115
}
@@ -96,11 +142,10 @@ impl Stats {
96142
},
97143
);
98144

99-
let date = now.with_timezone(&TIMEZONE).date_naive();
100-
101-
let pinned_map = self.aggregated.counters.pin();
145+
let guard = self.aggregated.collector.enter();
146+
let pinned_map = self.aggregated.maybe_update_counter(now, &guard).pin();
102147
let counter =
103-
pinned_map.get_or_insert((date, from, to, ip_info.started_from), AtomicU64::new(0));
148+
pinned_map.get_or_insert((from, to, ip_info.started_from), AtomicU64::new(0));
104149

105150
counter.fetch_add(1, Ordering::Relaxed);
106151
}
@@ -125,17 +170,19 @@ impl Stats {
125170
}
126171

127172
#[cfg(test)]
128-
pub fn assert_stat_entry(&self, entry: (NaiveDate, &str, &str, &str), count: u64) {
173+
pub fn assert_stat_entry(&self, entry: (&str, &str, &str), count: u64) {
174+
let guard = self.aggregated.collector.enter();
129175
assert_eq!(
130176
self.aggregated
131-
.counters
177+
.counter(&guard)
132178
.pin()
133-
.get(&(
134-
entry.0,
135-
Intern::new(entry.1.parse::<Authority>().unwrap()),
136-
Intern::new(entry.2.parse::<Authority>().unwrap()),
137-
Intern::new(entry.3.parse::<Authority>().unwrap()),
138-
))
179+
.get(
180+
&(
181+
Intern::new(entry.0.parse::<Authority>().unwrap()),
182+
Intern::new(entry.1.parse::<Authority>().unwrap()),
183+
Intern::new(entry.2.parse::<Authority>().unwrap()),
184+
),
185+
)
139186
.map_or(0, |v| v.load(Ordering::Relaxed)),
140187
count,
141188
"{self:#?}\n{entry:?}"
@@ -173,7 +220,7 @@ mod tests {
173220

174221
#[tokio::test]
175222
async fn test_stat_tracking() {
176-
let stats = Stats::new();
223+
let stats = Stats::new(t(0));
177224

178225
stats.redirected_impl(a("0.0.0.0"), i("a.com"), i("b.com"), t(0));
179226
stats.redirected_impl(a("0.0.0.0"), i("b.com"), i("c.com"), t(1));
@@ -182,11 +229,12 @@ mod tests {
182229
stats.redirected_impl(a("1.0.0.0"), i("b.com"), i("homepage.com"), t(2));
183230
stats.redirected_impl(a("1.0.0.0"), i("homepage.com"), i("c.com"), t(3));
184231

185-
assert_eq!(stats.aggregated.counters.len(), 4);
186-
stats.assert_stat_entry((d(0), "a.com", "b.com", "a.com"), 2);
187-
stats.assert_stat_entry((d(0), "b.com", "c.com", "a.com"), 1);
188-
stats.assert_stat_entry((d(0), "b.com", "homepage.com", "a.com"), 1);
189-
stats.assert_stat_entry((d(0), "homepage.com", "c.com", "a.com"), 1);
232+
let guard = stats.aggregated.collector.enter();
233+
assert_eq!(stats.aggregated.counter(&guard).len(), 4);
234+
stats.assert_stat_entry(("a.com", "b.com", "a.com"), 2);
235+
stats.assert_stat_entry(("b.com", "c.com", "a.com"), 1);
236+
stats.assert_stat_entry(("b.com", "homepage.com", "a.com"), 1);
237+
stats.assert_stat_entry(("homepage.com", "c.com", "a.com"), 1);
190238

191239
let tracking = stats.ip_tracking.pin();
192240
assert_eq!(tracking.len(), 2);
@@ -224,13 +272,8 @@ mod tests {
224272
stats.redirected_impl(a("0.0.0.0"), i("b.com"), i("c.com"), t(1) + day);
225273
stats.redirected_impl(a("0.0.0.0"), i("c.com"), i("a.com"), t(2) + day);
226274

227-
assert_eq!(stats.aggregated.counters.len(), 6);
228-
stats.assert_stat_entry((d(0), "a.com", "b.com", "a.com"), 2);
229-
stats.assert_stat_entry((d(0), "b.com", "c.com", "a.com"), 1);
230-
stats.assert_stat_entry((d(0), "b.com", "homepage.com", "a.com"), 1);
231-
stats.assert_stat_entry((d(0), "homepage.com", "c.com", "a.com"), 1);
232-
stats.assert_stat_entry((d(day.num_seconds()), "b.com", "c.com", "b.com"), 1);
233-
stats.assert_stat_entry((d(day.num_seconds()), "b.com", "c.com", "b.com"), 1);
234-
stats.assert_stat_entry((d(day.num_seconds()), "c.com", "a.com", "b.com"), 1);
275+
assert_eq!(stats.aggregated.counter(&guard).len(), 2);
276+
stats.assert_stat_entry(("b.com", "c.com", "b.com"), 1);
277+
stats.assert_stat_entry(("c.com", "a.com", "b.com"), 1);
235278
}
236279
}

0 commit comments

Comments
 (0)