From fb249df0baf5c430e07fd1212428c26989e48008 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 11:45:22 +0200 Subject: [PATCH 01/19] Add the notion of a time vector I have this idea in my head for months, let's actually try it this time. Music is very seasonal for me, I have tracks that are better suited for a hot summer night, tracks that are better suited for a grey autumn day. Also, time of the week matters, I listen to chill music on a Saturday or Sunday morning, I listen to bangers more in the evenings. So I thought, what if we create a moving average for a track (and album, and artist) to track the time of the year, week, and day? Then we can make better suggestions for what to listen to based on the current time. This adds the types for that, and also a mapping of weekdays to the circle, based on me toying around with Geogebra. --- src/playcount.rs | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/src/playcount.rs b/src/playcount.rs index efdd889..af070e2 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -98,6 +98,59 @@ impl Instant { seconds: self.seconds_since_jan_2000 - t0.seconds_since_jan_2000, } } + + /// Embed the instant into the time vector space, see also [`TimeVector`]. + pub fn embed(&self) -> TimeVector { + const SECONDS_PER_YEAR: u32 = 365 * 24 * 3600 + 6 * 3600; + const SECONDS_PER_WEEK: u32 = 7 * 24 * 3600; + const SECONDS_PER_DAY: u32 = 24 * 3600; + + // We convert to radians to map to the circle; precompute as much of + // the multiplication as we can. + const NORM_YEAR: f32 = std::f32::consts::TAU / (SECONDS_PER_YEAR as f32); + const NORM_DAY: f32 = std::f32::consts::TAU / (SECONDS_PER_DAY as f32); + + // We center the day transitions around noon UTC, this matters for how + // we map the weekdays below. + let t = self.seconds_since_jan_2000 - 12 * 3600; + let t_day = t % SECONDS_PER_DAY; + let t_week = t % SECONDS_PER_WEEK; + let t_year = t % SECONDS_PER_YEAR; + + let r_day = (t_day as f32) * NORM_DAY; + let r_year = (t_year as f32) * NORM_YEAR; + + // We map weekdays non-linearly, as follows (where the angle goes from + // 0 to 1 for a full rotation): + const MON: f32 = 7.5 / 9.0; + const TUE: f32 = 8.5 / 9.0; + const WED: f32 = 0.5 / 9.0; + const THU: f32 = 1.5 / 9.0; + const FRI: f32 = 1.1 / 4.0; + const SAT: f32 = 4.0 / 9.0; + const SUN: f32 = 6.0 / 9.0; + let r_week = match t_week / SECONDS_PER_DAY { + // Jan 1st 2000 was a Saturday, + 0 => SAT + (SUN - SAT) * r_day, + 1 => SUN + (MON - SUN) * r_day, + 2 => MON + (TUE - MON) * r_day, + // Wrap around 0 happens between Tue and Wed. + 3 => TUE + (1.0 + WED - TUE) * r_day, + 4 => WED + (THU - WED) * r_day, + 5 => THU + (FRI - THU) * r_day, + 6 => FRI + (SAT - FRI) * r_day, + _ => unreachable!("There are only 7 days in a week."), + }; + + TimeVector([ + r_year.cos(), + r_year.sin(), + r_week.cos(), + r_week.sin(), + r_day.cos(), + r_day.sin(), + ]) + } } impl Epoch { @@ -122,6 +175,35 @@ pub struct RateLimit { pub fill_rate_per_second: f32, } +/// A vector representation of the time of day, week, and year. +/// +/// The rationale behind this is that we can compare how "similar" moments are +/// using the cosine difference, which we can use to classify tracks as morning +/// vs. evening, or weekend vs. weekday, or summer vs. winter. Based on this we +/// hope to suggest better tracks to listen to based on the current moment. E.g. +/// in the early morning we may suggest some chill jazz but not heavy dancefloor +/// banger. +/// +/// Because years, weeks, and days are all cyclic, we treat them as circles, and +/// we embed the moment as x, y coordinate on the circle. This ensures that +/// taking the cosine distance is meaningful. +/// +/// We populate the space as follows: +/// - Dimension 0, 1: Time of year +/// - Dimension 2, 3: Time of week[^1] +/// - Dimension 4, 5: Time of day (24h) +/// +/// [^1]: For the time of the week, we don't map the time uniformly to the +/// circle. We care more about "weekday" vs. "weekend", so the weekdays are +/// relatively squashed. +/// +/// We map instants to time vectors without regard for time zone. Local times +/// are irrelevant as long as all listens are mostly in the same time zone, so +/// that the morning/afternoon distinction makes sense. If you move time zones, +/// we should adjust for that, but that's not a problem I have so I'm not +/// solving it right now. +pub struct TimeVector([f32; 6]); + /// Exponential moving averages at different timescales plus leaky bucket rate limiter. pub struct ExpCounter { /// Time at which the counts were last updated. From 6f406ae1f6f22da2cb92581f85c2b6ea04201bdf Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 11:49:16 +0200 Subject: [PATCH 02/19] Autoformat playcount module I'm using Helix now and it formats on save, the diff is not too big, let's accept it. --- src/playcount.rs | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/playcount.rs b/src/playcount.rs index af070e2..9931d56 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -11,12 +11,12 @@ use std::collections::BinaryHeap; use std::collections::HashMap; use std::path::Path; +use crate::album_table::AlbumTable; use crate::database::{self, Transaction}; use crate::database_utils::connect_readonly; use crate::prim::{AlbumId, ArtistId, TrackId}; -use crate::{MemoryMetaIndex, MetaIndex}; -use crate::album_table::AlbumTable; use crate::user_data::AlbumState; +use crate::{MemoryMetaIndex, MetaIndex}; /// A point in time with second granularity. /// @@ -282,9 +282,9 @@ impl ExpCounter { 19260.0, // 3650 days / 10 years // 9630.615234, // 1826 days / 5 years 2407.653809, // 457 days / 1.25 years - 601.913452, // 114 days / ~3.75 months / 16 weeks - 150.478363, // 29 days / 1 month - 37.619591, // 7 days + 601.913452, // 114 days / ~3.75 months / 16 weeks + 150.478363, // 29 days / 1 month + 37.619591, // 7 days ]; /// Return how much to decay the counters by after the elapsed time. @@ -669,10 +669,7 @@ fn print_ranking( /// playcount on a short timescale, while still mixing in a bit of a longer /// time horizon. fn score_trending(counter: &ExpCounter) -> f32 { - 0.0 - + (2.0 * counter.n[4]) - + (0.5 * counter.n[3]) - + (0.1 * counter.n[2]) + (2.0 * counter.n[4]) + (0.5 * counter.n[3]) + (0.1 * counter.n[2]) } /// Score for sorting entries by _falling_. @@ -718,7 +715,10 @@ pub fn main(index: &MemoryMetaIndex, db_path: &Path) -> crate::Result<()> { counts.get_top_by(150, |counter: &ExpCounter| RevNotNan(counter.n[timescale])); print_ranking( "TOP", - format!("timescale {}, {:.0} days / {:.0} months", timescale, n_days, n_months), + format!( + "timescale {}, {:.0} days / {:.0} months", + timescale, n_days, n_months + ), index, &top_artists, &top_albums, @@ -737,7 +737,8 @@ pub fn main(index: &MemoryMetaIndex, db_path: &Path) -> crate::Result<()> { &trending_tracks, ); - let (falling_artists, falling_albums, falling_tracks) = counts.get_top_by(350, |c| RevNotNan(score_falling(c))); + let (falling_artists, falling_albums, falling_tracks) = + counts.get_top_by(350, |c| RevNotNan(score_falling(c))); print_ranking( "FALLING", "see code for formula".to_string(), From 72fdac12228df44ca834a768038048a63fa25282 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 15:46:47 +0200 Subject: [PATCH 03/19] Print time embedding with playcounts They don't look right to me, the months are skewed too much towards spring, and days too much towards Thursday, though maybe that is due to me meddling and not providing the proper inverse mapping. I need to write some tests to sort this out. --- src/playcount.rs | 108 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 3 deletions(-) diff --git a/src/playcount.rs b/src/playcount.rs index 9931d56..81b734c 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -204,6 +204,87 @@ pub struct RateLimit { /// solving it right now. pub struct TimeVector([f32; 6]); +impl TimeVector { + const fn zero() -> TimeVector { + TimeVector([0.0; 6]) + } + + fn mul_add(&self, factor: f32, term: TimeVector) -> TimeVector { + TimeVector([ + self.0[0].mul_add(factor, term.0[0]), + self.0[1].mul_add(factor, term.0[1]), + self.0[2].mul_add(factor, term.0[2]), + self.0[3].mul_add(factor, term.0[3]), + self.0[4].mul_add(factor, term.0[4]), + self.0[5].mul_add(factor, term.0[5]), + ]) + } + + /// For debugging, format as human-readable direction that the vector points in. + #[rustfmt::skip] + fn fmt_dir(&self) -> String { + use std::f32::consts::TAU; + + let mut r_year = self.0[1].atan2(self.0[0]); + let mut r_week = self.0[3].atan2(self.0[2]); + let mut r_day = self.0[5].atan2(self.0[4]); + + r_year += if r_year < 0.0 { TAU } else { 0.0 }; + r_week += if r_week < 0.0 { TAU } else { 0.0 }; + r_day += if r_day < 0.0 { TAU } else { 0.0 }; + + let month = (r_year * (11.999 / TAU)) as usize; + let day = (r_week * (6.999 / TAU)) as usize; + let hour = (r_day * (23.999 / TAU)) as usize; + + const MONTHS: [&'static str; 12] = [ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", + ]; + // Days start on Saturday, see [`Instant::embed`]. + const DAYS: [&'static str; 7] = [ + "Wed", "Thu", "Fri", "Sat", "Sun", "Mon", "Tue" + ]; + // Hours start at noon UTC, see [`Instant::embed`]. + const HOURS: [&'static str; 24] = [ + "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", + "00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", + ]; + + format!("{} {} {}:00Z", MONTHS[month], DAYS[day], HOURS[hour]) + } +} + +impl std::ops::Add<&TimeVector> for TimeVector { + type Output = TimeVector; + + fn add(self, rhs: &TimeVector) -> TimeVector { + TimeVector([ + self.0[0] + rhs.0[0], + self.0[1] + rhs.0[1], + self.0[2] + rhs.0[2], + self.0[3] + rhs.0[3], + self.0[4] + rhs.0[4], + self.0[5] + rhs.0[5], + ]) + } +} + +impl std::ops::Mul for TimeVector { + type Output = TimeVector; + + fn mul(self, rhs: f32) -> TimeVector { + TimeVector([ + self.0[0] * rhs, + self.0[1] * rhs, + self.0[2] * rhs, + self.0[3] * rhs, + self.0[4] * rhs, + self.0[5] * rhs, + ]) + } +} + /// Exponential moving averages at different timescales plus leaky bucket rate limiter. pub struct ExpCounter { /// Time at which the counts were last updated. @@ -214,6 +295,9 @@ pub struct ExpCounter { /// Exponentially decaying counts for different half-lives. pub n: [f32; 5], + + /// Exponential moving average of the time vector of each play. + pub time_embedding: TimeVector, } impl ExpCounter { @@ -304,6 +388,7 @@ impl ExpCounter { // have long replenished. bucket: 0.0, n: [0.0; 5], + time_embedding: TimeVector::zero(), } } @@ -358,6 +443,13 @@ impl ExpCounter { } self.t = t1; + + // In addition to updating the counters, we update the time vector for + // this item. We used a fixed decay factor of 0.9 (so every new play + // weighs 0.1), but we do take into account the rate limit. + let v = t1.embed(); + let f = count * 0.1; + self.time_embedding = self.time_embedding.mul_add(1.0 - f, v * f); } } @@ -612,6 +704,7 @@ fn print_ranking( title: &'static str, description: String, index: &MemoryMetaIndex, + counts: &PlayCounts, top_artists: &[(RevNotNan, ArtistId)], top_albums: &[(RevNotNan, AlbumId)], top_tracks: &[(RevNotNan, TrackId)], @@ -620,11 +713,13 @@ fn print_ranking( for (i, (count, artist_id)) in top_artists.iter().enumerate() { let artist = index.get_artist(*artist_id).unwrap(); let artist_name = index.get_string(artist.name); + let counter = counts.counter.artists.get(artist_id).unwrap(); println!( - " {:2} {:7.3} {} {}", + " {:2} {:7.3} {} {} {}", i + 1, count.0, + counter.time_embedding.fmt_dir(), artist_id, artist_name ); @@ -635,11 +730,13 @@ fn print_ranking( let album = index.get_album(*album_id).unwrap(); let album_title = index.get_string(album.title); let album_artist = index.get_string(album.artist); + let counter = counts.counter.albums.get(album_id).unwrap(); println!( - " {:2} {:7.3} {} {:25} {}", + " {:2} {:7.3} {} {} {:25} {}", i + 1, count.0, + counter.time_embedding.fmt_dir(), album_id, album_title, album_artist @@ -651,11 +748,13 @@ fn print_ranking( let track = index.get_track(*track_id).unwrap(); let track_title = index.get_string(track.title); let track_artist = index.get_string(track.artist); + let counter = counts.counter.tracks.get(track_id).unwrap(); println!( - " {:2} {:7.3} {} {:25} {}", + " {:2} {:7.3} {} {} {:25} {}", i + 1, count.0, + counter.time_embedding.fmt_dir(), track_id, track_title, track_artist @@ -720,6 +819,7 @@ pub fn main(index: &MemoryMetaIndex, db_path: &Path) -> crate::Result<()> { timescale, n_days, n_months ), index, + &counts, &top_artists, &top_albums, &top_tracks, @@ -732,6 +832,7 @@ pub fn main(index: &MemoryMetaIndex, db_path: &Path) -> crate::Result<()> { "TRENDING", "see code for formula".to_string(), index, + &counts, &trending_artists, &trending_albums, &trending_tracks, @@ -743,6 +844,7 @@ pub fn main(index: &MemoryMetaIndex, db_path: &Path) -> crate::Result<()> { "FALLING", "see code for formula".to_string(), index, + &counts, &falling_artists, &falling_albums, &falling_tracks, From 7939a6d695715b96b2bd5e7b057015a691cfb3f0 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 16:20:14 +0200 Subject: [PATCH 04/19] Test time vector embedding Okay the weekdays were wrong, but the rest of it was right. Weird. Let's see again if it makes sense now. --- src/playcount.rs | 54 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/src/playcount.rs b/src/playcount.rs index 81b734c..0a76272 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -142,6 +142,9 @@ impl Instant { _ => unreachable!("There are only 7 days in a week."), }; + // TODO: Restore the above mapping. + let r_week = t_week as f32 * (std::f32::consts::TAU / SECONDS_PER_WEEK as f32); + TimeVector([ r_year.cos(), r_year.sin(), @@ -221,6 +224,10 @@ impl TimeVector { } /// For debugging, format as human-readable direction that the vector points in. + /// + /// Note, this is only approximate. We assume for example that every month + /// is exactly 1/12 of a year, where a year is 365.25 days. It's about the + /// rough direction anyway so this is fine. #[rustfmt::skip] fn fmt_dir(&self) -> String { use std::f32::consts::TAU; @@ -229,6 +236,10 @@ impl TimeVector { let mut r_week = self.0[3].atan2(self.0[2]); let mut r_day = self.0[5].atan2(self.0[4]); + // During embedding, we consider midnight the day boundary and we + // subtract half a day from the timestamp, so here we add it back. + r_week += std::f32::consts::TAU / 14.0; + r_year += if r_year < 0.0 { TAU } else { 0.0 }; r_week += if r_week < 0.0 { TAU } else { 0.0 }; r_day += if r_day < 0.0 { TAU } else { 0.0 }; @@ -243,7 +254,7 @@ impl TimeVector { ]; // Days start on Saturday, see [`Instant::embed`]. const DAYS: [&'static str; 7] = [ - "Wed", "Thu", "Fri", "Sat", "Sun", "Mon", "Tue" + "Sat", "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", ]; // Hours start at noon UTC, see [`Instant::embed`]. const HOURS: [&'static str; 24] = [ @@ -852,3 +863,44 @@ pub fn main(index: &MemoryMetaIndex, db_path: &Path) -> crate::Result<()> { Ok(()) } + +#[cfg(test)] +pub mod test { + use super::Instant; + use chrono::{DateTime, Utc}; + + fn fmt_dir(dt: DateTime) -> String { + Instant::from_posix_timestamp(dt.timestamp()) + .embed() + .fmt_dir() + } + + #[test] + #[rustfmt::skip] + fn time_vector_embed_format_works_as_expected() { + use chrono::{TimeZone, Utc}; + + // Month, day of week, hour of day. + // 2025-04-14 is a Monday. + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 14).and_hms( 9, 5, 0)), "Apr Mon 09:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 15).and_hms(11, 5, 0)), "Apr Tue 11:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 16).and_hms(13, 5, 0)), "Apr Wed 13:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 17).and_hms(15, 5, 0)), "Apr Thu 15:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 18).and_hms(17, 5, 0)), "Apr Fri 17:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 19).and_hms(19, 5, 0)), "Apr Sat 19:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 20).and_hms(21, 5, 0)), "Apr Sun 21:00Z"); + + assert_eq!(fmt_dir(Utc.ymd(2025, 1, 15).and_hms( 7, 5, 0)), "Jan Wed 07:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 2, 15).and_hms( 9, 5, 0)), "Feb Sat 09:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 3, 15).and_hms(11, 5, 0)), "Mar Sat 11:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 15).and_hms(13, 5, 0)), "Apr Tue 13:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 5, 15).and_hms(15, 5, 0)), "May Thu 15:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 6, 15).and_hms(17, 5, 0)), "Jun Sun 17:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 7, 15).and_hms(19, 5, 0)), "Jul Tue 19:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 8, 15).and_hms(21, 5, 0)), "Aug Fri 21:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 9, 15).and_hms(23, 5, 0)), "Sep Mon 23:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 10, 15).and_hms( 1, 5, 0)), "Oct Wed 01:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 11, 15).and_hms( 2, 5, 0)), "Nov Sat 02:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 12, 15).and_hms( 6, 5, 0)), "Dec Mon 06:00Z"); + } +} From 2188b4d9b536e8fbcd5c81ace9c1972c8c5a570a Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 16:38:11 +0200 Subject: [PATCH 05/19] Remove decay factor from time embeddings After eyeballing the output, it seems to work better without this. --- src/playcount.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/playcount.rs b/src/playcount.rs index 0a76272..045e1da 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -212,7 +212,7 @@ impl TimeVector { TimeVector([0.0; 6]) } - fn mul_add(&self, factor: f32, term: TimeVector) -> TimeVector { + fn mul_add(&self, factor: f32, term: &TimeVector) -> TimeVector { TimeVector([ self.0[0].mul_add(factor, term.0[0]), self.0[1].mul_add(factor, term.0[1]), @@ -266,10 +266,10 @@ impl TimeVector { } } -impl std::ops::Add<&TimeVector> for TimeVector { +impl std::ops::Add for TimeVector { type Output = TimeVector; - fn add(self, rhs: &TimeVector) -> TimeVector { + fn add(self, rhs: TimeVector) -> TimeVector { TimeVector([ self.0[0] + rhs.0[0], self.0[1] + rhs.0[1], @@ -456,11 +456,12 @@ impl ExpCounter { self.t = t1; // In addition to updating the counters, we update the time vector for - // this item. We used a fixed decay factor of 0.9 (so every new play - // weighs 0.1), but we do take into account the rate limit. - let v = t1.embed(); - let f = count * 0.1; - self.time_embedding = self.time_embedding.mul_add(1.0 - f, v * f); + // this item. I experimented with a decay factor of 1 - 0.1 * count, + // so when the rate limiter doesn't limit, a factor of 0.9, but that was + // decaying way too aggressively. Just adding without decay seems to + // work far better, even though it skews the item to the initial + // discovery phase. + self.time_embedding = t1.embed().mul_add(count, &self.time_embedding); } } From 2343f00031c0eb6fc4f2092b22902f939f37f1e2 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 17:28:59 +0200 Subject: [PATCH 06/19] Tweak the weekday embedding In the end this piecewise-linear mapping to the circle is a lot simpler than the manual tweaking I did before, and at least on paper it has very nice properties for distinguishing between weekends and weekdays, party- nights and quiet nights. --- src/playcount.rs | 89 +++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/src/playcount.rs b/src/playcount.rs index 045e1da..ebe33ea 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -101,50 +101,55 @@ impl Instant { /// Embed the instant into the time vector space, see also [`TimeVector`]. pub fn embed(&self) -> TimeVector { + use std::f32::consts::TAU; + const SECONDS_PER_YEAR: u32 = 365 * 24 * 3600 + 6 * 3600; const SECONDS_PER_WEEK: u32 = 7 * 24 * 3600; const SECONDS_PER_DAY: u32 = 24 * 3600; // We convert to radians to map to the circle; precompute as much of // the multiplication as we can. - const NORM_YEAR: f32 = std::f32::consts::TAU / (SECONDS_PER_YEAR as f32); - const NORM_DAY: f32 = std::f32::consts::TAU / (SECONDS_PER_DAY as f32); + const NORM_YEAR: f32 = TAU / (SECONDS_PER_YEAR as f32); + const NORM_DAY: f32 = TAU / (SECONDS_PER_DAY as f32); - // We center the day transitions around noon UTC, this matters for how - // we map the weekdays below. - let t = self.seconds_since_jan_2000 - 12 * 3600; + let t = self.seconds_since_jan_2000; let t_day = t % SECONDS_PER_DAY; - let t_week = t % SECONDS_PER_WEEK; let t_year = t % SECONDS_PER_YEAR; + // The epoch we use, 2000-01-01, is a Saturday, but we want the week to + // start on Monday midnight to simplify the circle mapping below. + let t_week = (t + SECONDS_PER_DAY * 5) % SECONDS_PER_WEEK; let r_day = (t_day as f32) * NORM_DAY; let r_year = (t_year as f32) * NORM_YEAR; - // We map weekdays non-linearly, as follows (where the angle goes from - // 0 to 1 for a full rotation): - const MON: f32 = 7.5 / 9.0; - const TUE: f32 = 8.5 / 9.0; - const WED: f32 = 0.5 / 9.0; - const THU: f32 = 1.5 / 9.0; - const FRI: f32 = 1.1 / 4.0; - const SAT: f32 = 4.0 / 9.0; - const SUN: f32 = 6.0 / 9.0; - let r_week = match t_week / SECONDS_PER_DAY { - // Jan 1st 2000 was a Saturday, - 0 => SAT + (SUN - SAT) * r_day, - 1 => SUN + (MON - SUN) * r_day, - 2 => MON + (TUE - MON) * r_day, - // Wrap around 0 happens between Tue and Wed. - 3 => TUE + (1.0 + WED - TUE) * r_day, - 4 => WED + (THU - WED) * r_day, - 5 => THU + (FRI - THU) * r_day, - 6 => FRI + (SAT - FRI) * r_day, - _ => unreachable!("There are only 7 days in a week."), + // We map weekdays non-linearly around the circle. The first quadrant + // contains Mon-Thu, then the next three quadrants contain Fri, Sat, Sun + // respectively. This mapping has the following properties: + // + // - All weekdays lie above the x-axis, the weekend lies below, so the + // time-weighed average vector of weekend vs. weekday have a dot + // product close to -1, definitely below 0. + // - Saturday is diametrically opposite the "weekdays" excluding Friday. + // The time-weighed average vector of Saturday vs. Mon-Thu have a dot + // product of exactly -1. + // - "Party nights" (Friday and Saturday) lie left of the y-axis, + // weekday + Sunday night all lie right of the y-axis. The dot product + // of the time-weighed average vector of days with party nights vs. + // days without is close to -1, definitely below 0. + // + // Hopefully this does a good job of mapping the time of the week into + // R^2 in a meaningful way. + let r_week = if t_week <= SECONDS_PER_DAY * 4 { + // One factor 0.25 for the quarter circle, one because we fit 4 days + // into this quadrant. + (t_week as f32) * (TAU * 0.25 * 0.25 / SECONDS_PER_DAY as f32) + } else { + // We subtract 3 full days, so `t_weekend` is 0.0 at the start of + // Thursday. Then we allocate a quarter of the circle to each day. + let t_weekend = t_week - SECONDS_PER_DAY * 3; + (t_weekend as f32) * (TAU * 0.25 / SECONDS_PER_DAY as f32) }; - // TODO: Restore the above mapping. - let r_week = t_week as f32 * (std::f32::consts::TAU / SECONDS_PER_WEEK as f32); - TimeVector([ r_year.cos(), r_year.sin(), @@ -236,33 +241,31 @@ impl TimeVector { let mut r_week = self.0[3].atan2(self.0[2]); let mut r_day = self.0[5].atan2(self.0[4]); - // During embedding, we consider midnight the day boundary and we - // subtract half a day from the timestamp, so here we add it back. - r_week += std::f32::consts::TAU / 14.0; - r_year += if r_year < 0.0 { TAU } else { 0.0 }; r_week += if r_week < 0.0 { TAU } else { 0.0 }; r_day += if r_day < 0.0 { TAU } else { 0.0 }; let month = (r_year * (11.999 / TAU)) as usize; - let day = (r_week * (6.999 / TAU)) as usize; let hour = (r_day * (23.999 / TAU)) as usize; + // For the day, we don't bother to undo the non-linear mapping that + // [`Instant::embed`] applies, instead we factor this into the lookup + // table below. + let day = (r_week * (15.999 / TAU)) as usize; + const MONTHS: [&'static str; 12] = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", ]; - // Days start on Saturday, see [`Instant::embed`]. - const DAYS: [&'static str; 7] = [ - "Sat", "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", - ]; - // Hours start at noon UTC, see [`Instant::embed`]. - const HOURS: [&'static str; 24] = [ - "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", - "00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", + // The inverse mapping of [`Instant::embed`]. + const DAYS: [&'static str; 16] = [ + "Mon", "Tue", "Wed", "Thu", + "Fri", "Fri", "Fri", "Fri", + "Sat", "Sat", "Sat", "Sat", + "Sun", "Sun", "Sun", "Sun", ]; - format!("{} {} {}:00Z", MONTHS[month], DAYS[day], HOURS[hour]) + format!("{} {} {:02}:00Z", MONTHS[month], DAYS[day], hour) } } From 37bf1e4830f1b789485b026212fdebfe86401fbf Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 18:00:17 +0200 Subject: [PATCH 07/19] Print normalized weight of the 3 time components I was curious about whether this would work, and from looking at the numbers, it works very well. See also the doc comment. --- src/playcount.rs | 97 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 71 insertions(+), 26 deletions(-) diff --git a/src/playcount.rs b/src/playcount.rs index ebe33ea..0aa1c8a 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -185,6 +185,8 @@ pub struct RateLimit { /// A vector representation of the time of day, week, and year. /// +/// ## Summary +/// /// The rationale behind this is that we can compare how "similar" moments are /// using the cosine difference, which we can use to classify tracks as morning /// vs. evening, or weekend vs. weekday, or summer vs. winter. Based on this we @@ -205,11 +207,32 @@ pub struct RateLimit { /// circle. We care more about "weekday" vs. "weekend", so the weekdays are /// relatively squashed. /// -/// We map instants to time vectors without regard for time zone. Local times -/// are irrelevant as long as all listens are mostly in the same time zone, so -/// that the morning/afternoon distinction makes sense. If you move time zones, -/// we should adjust for that, but that's not a problem I have so I'm not -/// solving it right now. +/// ## Local time +/// +/// We map instants to time vectors based on UTC time, without regard for time +/// zone. Ideally, we would do it based on local time, but that information is +/// not available from historical Last.fm scrobbles, and even in Musium I made +/// the mistake of saving listens always as UTC, not including time zone offset. +/// For me this is not a big problem, the vast majority of my listens are in +/// UTC + {0, 1, 2}, so the impact on the day shift is small. If I ever move to +/// a very different time zone and I want to preserve the time of the day, I +/// suppose we could try to infer the time zone from the median listen time or +/// something like that. +/// +/// ## Normalization +/// +/// When we embed an instant, the length of the vector is 3. Each of the +/// 3 components (year/week/day) has a length of 1 by construction, so the +/// relative length of the components is equal. After adding time vectors +/// together, this is no longer true. For example, if we listen a track on every +/// weekday, but only in March, the day-of-week components will cancel each +/// other out, while the time-of-year components will reinforce each other. If +/// we normalize the result, the time-of-year component will be much larger. So +/// naturally, when we add time vectors, they pick out which component an item +/// is most seasonal in. When we take the cosine distance with the embedding +/// of the current time to find tracks suitable for the current moment, because +/// it's not sensitive to absolute length, that will naturally emphasize the +/// right component. pub struct TimeVector([f32; 6]); impl TimeVector { @@ -265,7 +288,29 @@ impl TimeVector { "Sun", "Sun", "Sun", "Sun", ]; - format!("{} {} {:02}:00Z", MONTHS[month], DAYS[day], hour) + // The length of the embedding vector of an instant is by construction + // 3.0, and restricted to the year/week/day part, each of those parts + // has length 1.0. But when we add those embeddings together, the ones + // that point in the same direction reinforce while ones that point in + // different directions cancel out. So we play a track on every day of + // the week in one month, the year part becomes longer relative to the + // week part. We print those weights to classify an item in which of + // these three cycles it is most seasonal. + let w2_year = self.0[0] * self.0[0] + self.0[1] * self.0[1]; + let w2_week = self.0[2] * self.0[2] + self.0[3] * self.0[3]; + let w2_day = self.0[4] * self.0[4] + self.0[5] * self.0[5]; + let inv_norm = (w2_year + w2_week + w2_day).sqrt().recip(); + let w_year = w2_year.sqrt() * inv_norm; + let w_week = w2_week.sqrt() * inv_norm; + let w_day = w2_day.sqrt() * inv_norm; + + format!( + "{} {} {:02}hZ Y{:1.0}-D{:1.0}-H{:1.0}", + MONTHS[month], DAYS[day], hour, + // We print these to 1 digit precision, and it would be wasteful to + // add the "0." in front, so we print as integer from 0 to 9. + w_year * 9.49, w_week * 9.49, w_day * 9.49, + ) } } @@ -886,25 +931,25 @@ pub mod test { // Month, day of week, hour of day. // 2025-04-14 is a Monday. - assert_eq!(fmt_dir(Utc.ymd(2025, 4, 14).and_hms( 9, 5, 0)), "Apr Mon 09:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 4, 15).and_hms(11, 5, 0)), "Apr Tue 11:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 4, 16).and_hms(13, 5, 0)), "Apr Wed 13:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 4, 17).and_hms(15, 5, 0)), "Apr Thu 15:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 4, 18).and_hms(17, 5, 0)), "Apr Fri 17:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 4, 19).and_hms(19, 5, 0)), "Apr Sat 19:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 4, 20).and_hms(21, 5, 0)), "Apr Sun 21:00Z"); - - assert_eq!(fmt_dir(Utc.ymd(2025, 1, 15).and_hms( 7, 5, 0)), "Jan Wed 07:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 2, 15).and_hms( 9, 5, 0)), "Feb Sat 09:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 3, 15).and_hms(11, 5, 0)), "Mar Sat 11:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 4, 15).and_hms(13, 5, 0)), "Apr Tue 13:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 5, 15).and_hms(15, 5, 0)), "May Thu 15:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 6, 15).and_hms(17, 5, 0)), "Jun Sun 17:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 7, 15).and_hms(19, 5, 0)), "Jul Tue 19:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 8, 15).and_hms(21, 5, 0)), "Aug Fri 21:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 9, 15).and_hms(23, 5, 0)), "Sep Mon 23:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 10, 15).and_hms( 1, 5, 0)), "Oct Wed 01:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 11, 15).and_hms( 2, 5, 0)), "Nov Sat 02:00Z"); - assert_eq!(fmt_dir(Utc.ymd(2025, 12, 15).and_hms( 6, 5, 0)), "Dec Mon 06:00Z"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 14).and_hms( 9, 5, 0)), "Apr Mon 09hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 15).and_hms(11, 5, 0)), "Apr Tue 11hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 16).and_hms(13, 5, 0)), "Apr Wed 13hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 17).and_hms(15, 5, 0)), "Apr Thu 15hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 18).and_hms(17, 5, 0)), "Apr Fri 17hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 19).and_hms(19, 5, 0)), "Apr Sat 19hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 20).and_hms(21, 5, 0)), "Apr Sun 21hZ Y5-D5-H5"); + + assert_eq!(fmt_dir(Utc.ymd(2025, 1, 15).and_hms( 7, 5, 0)), "Jan Wed 07hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 2, 15).and_hms( 9, 5, 0)), "Feb Sat 09hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 3, 15).and_hms(11, 5, 0)), "Mar Sat 11hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 15).and_hms(13, 5, 0)), "Apr Tue 13hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 5, 15).and_hms(15, 5, 0)), "May Thu 15hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 6, 15).and_hms(17, 5, 0)), "Jun Sun 17hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 7, 15).and_hms(19, 5, 0)), "Jul Tue 19hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 8, 15).and_hms(21, 5, 0)), "Aug Fri 21hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 9, 15).and_hms(23, 5, 0)), "Sep Mon 23hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 10, 15).and_hms( 1, 5, 0)), "Oct Wed 01hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 11, 15).and_hms( 2, 5, 0)), "Nov Sat 02hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 12, 15).and_hms( 6, 5, 0)), "Dec Mon 06hZ Y5-D5-H5"); } } From d377554011f2d75f9ddd5699662ca67ab49b0e52 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 18:44:11 +0200 Subject: [PATCH 08/19] Compute a "for now" score to rank items It does a decent job from what I can tell, but on its own this score is no good, because it doesn't take popularity into account, so it will suggest obscure tracks just because I played them once almost exactly a year ago. But as a multiplier for the discovery ranking, I think it would do very well. Let me check! --- src/playcount.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/playcount.rs b/src/playcount.rs index 0aa1c8a..3cbe119 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -251,6 +251,21 @@ impl TimeVector { ]) } + /// Return the L2-norm (Euclidean norm) of this vector. + fn norm(&self) -> f32 { + let w2_year = self.0[0] * self.0[0] + self.0[1] * self.0[1]; + let w2_week = self.0[2] * self.0[2] + self.0[3] * self.0[3]; + let w2_day = self.0[4] * self.0[4] + self.0[5] * self.0[5]; + (w2_year + w2_week + w2_day).sqrt() + } + + /// Return the dot product between the two vectors. + fn dot(&self, other: &TimeVector) -> f32 { + 0.0 + ((self.0[0] * other.0[0]) + (self.0[1] * other.0[1])) + + ((self.0[2] * other.0[2]) + (self.0[3] * other.0[3])) + + ((self.0[4] * other.0[4]) + (self.0[5] * other.0[5])) + } + /// For debugging, format as human-readable direction that the vector points in. /// /// Note, this is only approximate. We assume for example that every month @@ -852,6 +867,22 @@ fn score_falling(counter: &ExpCounter) -> f32 { f0 + f1 * 0.2 + f2 * 0.6 } +/// Score for sorting entries as the most suitable for this time. +/// +/// Based on time of the year, day of the week, and time of the day, when we +/// played the item in the past. +/// +/// Takes a normalized embedding of the current time as `now_embed`. +fn score_for_now(now_embed: &TimeVector, counter: &ExpCounter) -> f32 { + // We take the cosine distance. We assume `now_embed` is already normalized, + // so we only need to normalize the counter's vector. + let cos_dist = counter.time_embedding.dot(now_embed) / counter.time_embedding.norm(); + + // Put the score in the range [0.0, 1.0], so we can easily use it as a + // multiplier for other scores. + cos_dist.mul_add(0.5, 0.5) +} + /// Print playcount statistics about the library. /// /// This is mostly for debugging and development purposes, playcounts should be @@ -910,6 +941,21 @@ pub fn main(index: &MemoryMetaIndex, db_path: &Path) -> crate::Result<()> { &falling_tracks, ); + let now = Instant::from_posix_timestamp(chrono::Utc::now().timestamp()); + let now_embed = now.embed() * (1.0 / now.embed().norm()); + + let (falling_artists, falling_albums, falling_tracks) = + counts.get_top_by(150, |c| RevNotNan(score_for_now(&now_embed, c))); + print_ranking( + "FOR NOW", + "time vector cosine distance".to_string(), + index, + &counts, + &falling_artists, + &falling_albums, + &falling_tracks, + ); + Ok(()) } From 5cb19a5769d2b96c074981c93cca937461754767 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 19:21:06 +0200 Subject: [PATCH 09/19] Put the time embedding in the user data This is needed to expose it through the API, though right now we do not take it into account, that will be in a follow-up commit. For now I'm not worrying about memory usage. --- src/playcount.rs | 19 +++++++++++++++---- src/user_data.rs | 18 ++++++++++++------ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/playcount.rs b/src/playcount.rs index 3cbe119..a8b35df 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -233,14 +233,18 @@ pub struct RateLimit { /// of the current time to find tracks suitable for the current moment, because /// it's not sensitive to absolute length, that will naturally emphasize the /// right component. +// TODO: Instead of deriving copy, make a quantized version that holds i8's. +// It saves memory in the user data, and I hope it's faster to compute the inner +// products as well, it could even be vectorized. +#[derive(Copy, Clone)] pub struct TimeVector([f32; 6]); impl TimeVector { - const fn zero() -> TimeVector { + pub const fn zero() -> TimeVector { TimeVector([0.0; 6]) } - fn mul_add(&self, factor: f32, term: &TimeVector) -> TimeVector { + pub fn mul_add(&self, factor: f32, term: &TimeVector) -> TimeVector { TimeVector([ self.0[0].mul_add(factor, term.0[0]), self.0[1].mul_add(factor, term.0[1]), @@ -252,7 +256,7 @@ impl TimeVector { } /// Return the L2-norm (Euclidean norm) of this vector. - fn norm(&self) -> f32 { + pub fn norm(&self) -> f32 { let w2_year = self.0[0] * self.0[0] + self.0[1] * self.0[1]; let w2_week = self.0[2] * self.0[2] + self.0[3] * self.0[3]; let w2_day = self.0[4] * self.0[4] + self.0[5] * self.0[5]; @@ -260,7 +264,7 @@ impl TimeVector { } /// Return the dot product between the two vectors. - fn dot(&self, other: &TimeVector) -> f32 { + pub fn dot(&self, other: &TimeVector) -> f32 { 0.0 + ((self.0[0] * other.0[0]) + (self.0[1] * other.0[1])) + ((self.0[2] * other.0[2]) + (self.0[3] * other.0[3])) + ((self.0[4] * other.0[4]) + (self.0[5] * other.0[5])) @@ -329,6 +333,12 @@ impl TimeVector { } } +impl Default for TimeVector { + fn default() -> Self { + TimeVector::zero() + } +} + impl std::ops::Add for TimeVector { type Output = TimeVector; @@ -768,6 +778,7 @@ impl PlayCounts { let state = AlbumState { discover_score: score_falling(counter), trending_score: score_trending(counter), + time_embedding: counter.time_embedding, }; albums.insert(*album_id, state); } diff --git a/src/user_data.rs b/src/user_data.rs index 861437f..4683f88 100644 --- a/src/user_data.rs +++ b/src/user_data.rs @@ -25,11 +25,11 @@ use std::collections::HashMap; use std::convert::TryFrom; -use crate::MemoryMetaIndex; use crate::album_table::AlbumTable; -use crate::playcount::{PlayCounter, PlayCounts}; +use crate::database as db; +use crate::playcount::{PlayCounter, PlayCounts, TimeVector}; use crate::prim::{AlbumId, ArtistId, TrackId}; -use crate::{database as db}; +use crate::MemoryMetaIndex; /// Track rating. /// @@ -83,6 +83,9 @@ pub struct AlbumState { // Playcount on the shortest timescale. pub trending_score: f32, + + // Vector embedding of the play times, used to weigh the discover score. + pub time_embedding: TimeVector, } #[derive(Default)] @@ -108,7 +111,6 @@ impl Default for UserData { artists: HashMap::with_hasher(s), } } - } impl UserData { @@ -126,7 +128,8 @@ impl UserData { for opt_rating in db::iter_ratings(tx)? { let rating = opt_rating?; let tid = TrackId(rating.track_id as u64); - let rating = Rating::try_from(rating.rating).expect("Invalid rating value in the database."); + let rating = + Rating::try_from(rating.rating).expect("Invalid rating value in the database."); stats.set_track_rating(tid, rating); } @@ -143,7 +146,10 @@ impl UserData { } pub fn get_track_rating(&self, track_id: TrackId) -> Rating { - self.tracks.get(&track_id).map(|t| t.rating).unwrap_or_default() + self.tracks + .get(&track_id) + .map(|t| t.rating) + .unwrap_or_default() } pub fn get_album_scores(&self, album_id: AlbumId) -> AlbumState { From 1208af0fbd265b132d3d8e8232f4595e8a21ca74 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 19:59:27 +0200 Subject: [PATCH 10/19] Record top score in user data So we can use it as a weight in a "for now" ranking. --- src/playcount.rs | 8 ++++++++ src/user_data.rs | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/src/playcount.rs b/src/playcount.rs index a8b35df..addd838 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -778,6 +778,7 @@ impl PlayCounts { let state = AlbumState { discover_score: score_falling(counter), trending_score: score_trending(counter), + top_score: score_top(counter), time_embedding: counter.time_embedding, }; albums.insert(*album_id, state); @@ -857,6 +858,13 @@ fn score_trending(counter: &ExpCounter) -> f32 { (2.0 * counter.n[4]) + (0.5 * counter.n[3]) + (0.1 * counter.n[2]) } +/// Score for sorting by top. +/// +/// This is a mix of the longest two time scales. +fn score_top(counter: &ExpCounter) -> f32 { + counter.n[0].ln() + counter.n[1].ln() +} + /// Score for sorting entries by _falling_. /// /// Falling entries (tracks, albums, artists) are entries that have a high diff --git a/src/user_data.rs b/src/user_data.rs index 4683f88..734c544 100644 --- a/src/user_data.rs +++ b/src/user_data.rs @@ -84,6 +84,15 @@ pub struct AlbumState { // Playcount on the shortest timescale. pub trending_score: f32, + // Log playcount on the longer timescales. + // + // Could be used directly to sort by top albums, but in the UI this is not + // _that_ useful. Instead, we can mix it with the time embedding to provide + // a list of "for now" albums for this time of the day, where we don't + // suggest albums with a low playcount just because the one time we played + // them was at this time of the day. + pub top_score: f32, + // Vector embedding of the play times, used to weigh the discover score. pub time_embedding: TimeVector, } From a7dc7de16fe99852e1400c234c8a6a541fa47797 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 20:22:41 +0200 Subject: [PATCH 11/19] Serve time-adjusted discover score from API This integrates the new "for now"-weighed discover scores, and it also exposes a more raw "for now" score that needs to be integrated into the UI still. This includes some rustfmt reformattings, but not everything it would reformat in the same files; some changes are very invasive and I want to avoid doing that here. --- src/playcount.rs | 21 ++++++---- src/serialization.rs | 96 +++++++++++++++++++++++++++++++------------- src/server.rs | 16 ++++---- src/user_data.rs | 39 ++++++++++++++++-- 4 files changed, 126 insertions(+), 46 deletions(-) diff --git a/src/playcount.rs b/src/playcount.rs index addd838..aa8d8c5 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -244,6 +244,15 @@ impl TimeVector { TimeVector([0.0; 6]) } + /// Return the normalized embedding of the current moment. + pub fn now() -> TimeVector { + use chrono::Utc; + let t = Instant::from_posix_timestamp(Utc::now().timestamp()); + let v = t.embed(); + let n = v.norm(); + v * n.recip() + } + pub fn mul_add(&self, factor: f32, term: &TimeVector) -> TimeVector { TimeVector([ self.0[0].mul_add(factor, term.0[0]), @@ -776,9 +785,9 @@ impl PlayCounts { let mut albums = AlbumTable::new(self.counter.albums.len(), AlbumState::default()); for (album_id, counter) in self.counter.albums.iter() { let state = AlbumState { - discover_score: score_falling(counter), - trending_score: score_trending(counter), - top_score: score_top(counter), + score_discover: score_falling(counter), + score_trending: score_trending(counter), + score_longterm: score_longterm(counter), time_embedding: counter.time_embedding, }; albums.insert(*album_id, state); @@ -858,10 +867,8 @@ fn score_trending(counter: &ExpCounter) -> f32 { (2.0 * counter.n[4]) + (0.5 * counter.n[3]) + (0.1 * counter.n[2]) } -/// Score for sorting by top. -/// -/// This is a mix of the longest two time scales. -fn score_top(counter: &ExpCounter) -> f32 { +/// Score for sorting by top on the longest two time scales. +fn score_longterm(counter: &ExpCounter) -> f32 { counter.n[0].ln() + counter.n[1].ln() } diff --git a/src/serialization.rs b/src/serialization.rs index a8f304f..62afd74 100644 --- a/src/serialization.rs +++ b/src/serialization.rs @@ -12,6 +12,7 @@ use serde_json; use std::io; use std::io::Write; +use crate::playcount::TimeVector; use crate::player::{Params, TrackSnapshot}; use crate::scan; use crate::user_data::UserData; @@ -23,6 +24,7 @@ use crate::{Album, AlbumId, Artist, ArtistId, MetaIndex, TrackId}; pub fn write_brief_album_json( index: &dyn MetaIndex, user_data: &UserData, + now_embed: &TimeVector, mut w: W, album_id: AlbumId, album: &Album, @@ -32,24 +34,27 @@ pub fn write_brief_album_json( write!(w, r#","artist_ids":["#)?; let mut first = true; for artist_id in index.get_album_artists(album.artist_ids) { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write!(w, r#""{}""#, artist_id)?; first = false; } write!(w, r#"],"artist":"#)?; serde_json::to_writer(&mut w, index.get_string(album.artist))?; - let scores = user_data.get_album_scores(album_id); + let scores = user_data.get_album_scores(album_id).score(now_embed); write!( w, // The discover score can have large-ish magnitude and ranges from negative // to positive, it does not need a lot of precision. The trending score // is always between 0 and 1 though, it needs more digits for precision // near the end of the ranking. - r#","release_date":"{}","first_seen":"{}","discover_score":{:.2},"trending_score":{:.4}}}"#, + r#","release_date":"{}","first_seen":"{}","discover_score":{:.2},"trending_score":{:.4},"for_now_score":{:.3}}}"#, album.original_release_date, album.first_seen.format_iso8601(), - scores.discover_score, - scores.trending_score, + scores.discover, + scores.trending, + scores.for_now, )?; Ok(()) } @@ -58,13 +63,16 @@ pub fn write_brief_album_json( pub fn write_albums_json( index: &dyn MetaIndex, user_data: &UserData, + now_embed: &TimeVector, mut w: W, ) -> io::Result<()> { write!(w, "[")?; let mut first = true; for kv in index.get_albums() { - if !first { write!(w, ",")?; } - write_brief_album_json(index, user_data, &mut w, kv.album_id, &kv.album)?; + if !first { + write!(w, ",")?; + } + write_brief_album_json(index, user_data, now_embed, &mut w, kv.album_id, &kv.album)?; first = false; } write!(w, "]") @@ -86,17 +94,25 @@ pub fn write_album_json( write!(w, r#","artist_ids":["#)?; let mut first = true; for artist_id in index.get_album_artists(album.artist_ids) { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write!(w, r#""{}""#, artist_id)?; first = false; } write!(w, r#"],"artist":"#)?; serde_json::to_writer(&mut w, index.get_string(album.artist))?; - write!(w, r#","release_date":"{}","tracks":["#, album.original_release_date)?; + write!( + w, + r#","release_date":"{}","tracks":["#, + album.original_release_date + )?; let mut first = true; for kv in index.get_album_tracks(id) { let track_id = kv.track_id; - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write!( w, r#"{{"id":"{}","disc_number":{},"track_number":{},"title":"#, @@ -122,6 +138,7 @@ pub fn write_album_json( pub fn write_artist_json( index: &dyn MetaIndex, user_data: &UserData, + now_embed: &TimeVector, mut w: W, artist: &Artist, albums: &[(ArtistId, AlbumId)], @@ -137,8 +154,10 @@ pub fn write_artist_json( // well-formed, it will never fail. The id is provided by the index // itself, not user input, so the album should be present. let album = index.get_album(album_id).unwrap(); - if !first { write!(w, ",")?; } - write_brief_album_json(index, user_data, &mut w, album_id, album)?; + if !first { + write!(w, ",")?; + } + write_brief_album_json(index, user_data, now_embed, &mut w, album_id, album)?; first = false; } write!(w, "]}}") @@ -154,28 +173,38 @@ pub fn write_search_results_json( write!(w, r#"{{"artists":["#)?; let mut first = true; for &aid in artists { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write_search_artist_json(index, &mut w, aid)?; first = false; } write!(w, r#"],"albums":["#)?; let mut first = true; for &aid in albums { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write_search_album_json(index, &mut w, aid)?; first = false; } write!(w, r#"],"tracks":["#)?; let mut first = true; for &tid in tracks { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write_search_track_json(index, &mut w, tid)?; first = false; } write!(w, r#"]}}"#) } -pub fn write_search_artist_json(index: &dyn MetaIndex, mut w: W, id: ArtistId) -> io::Result<()> { +pub fn write_search_artist_json( + index: &dyn MetaIndex, + mut w: W, + id: ArtistId, +) -> io::Result<()> { let artist = index.get_artist(id).unwrap(); let albums = index.get_albums_by_artist(id); write!(w, r#"{{"id":"{}","name":"#, id)?; @@ -183,14 +212,20 @@ pub fn write_search_artist_json(index: &dyn MetaIndex, mut w: W, id: A write!(w, r#","albums":["#)?; let mut first = true; for &(_artist_id, album_id) in albums { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write!(w, r#""{}""#, album_id)?; first = false; } write!(w, r#"]}}"#) } -pub fn write_search_album_json(index: &dyn MetaIndex, mut w: W, id: AlbumId) -> io::Result<()> { +pub fn write_search_album_json( + index: &dyn MetaIndex, + mut w: W, + id: AlbumId, +) -> io::Result<()> { let album = index.get_album(id).unwrap(); write!(w, r#"{{"id":"{}","title":"#, id)?; serde_json::to_writer(&mut w, index.get_string(album.title))?; @@ -199,7 +234,11 @@ pub fn write_search_album_json(index: &dyn MetaIndex, mut w: W, id: Al write!(w, r#","release_date":"{}"}}"#, album.original_release_date) } -pub fn write_search_track_json(index: &dyn MetaIndex, mut w: W, id: TrackId) -> io::Result<()> { +pub fn write_search_track_json( + index: &dyn MetaIndex, + mut w: W, + id: TrackId, +) -> io::Result<()> { let track = index.get_track(id).unwrap(); let album_id = id.album_id(); let album = index.get_album(album_id).unwrap(); @@ -230,8 +269,7 @@ fn write_queued_track_json( write!( w, r#"{{"queue_id":"{}","track_id":"{}","title":"#, - queued_track.queue_id, - queued_track.track_id, + queued_track.queue_id, queued_track.track_id, )?; serde_json::to_writer(&mut w, index.get_string(track.title))?; write!( @@ -243,7 +281,9 @@ fn write_queued_track_json( )?; let mut first = true; for artist_id in index.get_album_artists(album.artist_ids) { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write!(w, r#""{}""#, artist_id)?; first = false; } @@ -266,7 +306,6 @@ fn write_queued_track_json( write!(w, r#","is_buffering":{}}}"#, queued_track.is_buffering) } - pub fn write_queue_json( index: &dyn MetaIndex, user_data: &UserData, @@ -318,7 +357,8 @@ pub fn write_scan_status_json( ScanStage::Done => "done", }; - write!(w, + write!( + w, "{{\ \"stage\":\"{}\",\ \"files_discovered\":{},\ @@ -345,11 +385,9 @@ pub fn write_scan_status_json( } /// Write library statistics as json. -pub fn write_stats_json( - index: &dyn MetaIndex, - mut w: W, -) -> io::Result<()> { - write!(w, +pub fn write_stats_json(index: &dyn MetaIndex, mut w: W) -> io::Result<()> { + write!( + w, "{{\ \"tracks\":{},\ \"albums\":{},\ diff --git a/src/server.rs b/src/server.rs index 19222e4..e6c4587 100644 --- a/src/server.rs +++ b/src/server.rs @@ -20,6 +20,7 @@ use crate::database as db; use crate::database::Connection; use crate::database_utils; use crate::mvar::Var; +use crate::playcount::TimeVector; use crate::player::{Millibel, Player, QueueId}; use crate::prim::{AlbumId, ArtistId, Hertz, TrackId}; use crate::scan::BackgroundScanner; @@ -265,16 +266,19 @@ impl MetaServer { }; let albums = index.get_albums_by_artist(artist_id); + let now = TimeVector::now(); let buffer = Vec::new(); let mut w = io::Cursor::new(buffer); serialization::write_artist_json( index, &self.user_data.lock().unwrap(), + &now, &mut w, artist, albums, - ).unwrap(); + ) + .unwrap(); Response::from_data(w.into_inner()) .with_header(header_content_type("application/json")) @@ -283,13 +287,11 @@ impl MetaServer { fn handle_albums(&self) -> ResponseBox { let index = &*self.index_var.get(); + let now = TimeVector::now(); let buffer = Vec::new(); let mut w = io::Cursor::new(buffer); - serialization::write_albums_json( - index, - &self.user_data.lock().unwrap(), - &mut w, - ).unwrap(); + serialization::write_albums_json(index, &self.user_data.lock().unwrap(), &now, &mut w) + .unwrap(); Response::from_data(w.into_inner()) .with_header(header_content_type("application/json")) @@ -600,7 +602,7 @@ impl MetaServer { }; match request.respond(response) { - Ok(()) => {}, + Ok(()) => {} Err(err) => println!("Error while responding to request: {:?}", err), } } diff --git a/src/user_data.rs b/src/user_data.rs index 734c544..add0a8a 100644 --- a/src/user_data.rs +++ b/src/user_data.rs @@ -79,10 +79,10 @@ pub struct AlbumState { /// /// The discovery sorting methods identifies albums that were popular in the /// past, but not recently. See the [`playcount`] module for more details. - pub discover_score: f32, + pub score_discover: f32, // Playcount on the shortest timescale. - pub trending_score: f32, + pub score_trending: f32, // Log playcount on the longer timescales. // @@ -91,12 +91,45 @@ pub struct AlbumState { // a list of "for now" albums for this time of the day, where we don't // suggest albums with a low playcount just because the one time we played // them was at this time of the day. - pub top_score: f32, + pub score_longterm: f32, // Vector embedding of the play times, used to weigh the discover score. pub time_embedding: TimeVector, } +/// Scores (for ranking) evaluated at a given point in time. +#[derive(Copy, Clone, Default)] +pub struct ScoreSnapshot { + /// Trending score, see [`AlbumState::score_trending`]. + pub trending: f32, + + /// Discovery score, adjusted for the current moment. + pub discover: f32, + + /// "For now" score, based on the time of day, week, and year. + pub for_now: f32, +} + +impl AlbumState { + /// Evaluate scores for the current moment. + /// + /// The `at` time vector should be the embedding of the desired time to + /// evaluate at, and then normalized. + pub fn score(&self, at: &TimeVector) -> ScoreSnapshot { + // The cosine distance between our time vector and the query time vector. + // We put it in the range [0, 1] so that when we multiply with a negative + // discover score, it doesn't flip the sign. + let time_cos = self.time_embedding.dot(at) / self.time_embedding.norm(); + let time_weight = time_cos.mul_add(0.5, 0.5); + + ScoreSnapshot { + trending: self.score_trending, + discover: self.score_discover * time_weight, + for_now: self.score_longterm * time_weight * time_weight, + } + } +} + #[derive(Default)] pub struct ArtistState { // TODO: Add playcount. From 62692c95f3e10b330444e8a960b7ee68d4669df9 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 20:27:14 +0200 Subject: [PATCH 12/19] Autoformat server.rs --- src/server.rs | 77 ++++++++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/src/server.rs b/src/server.rs index e6c4587..cbd77d9 100644 --- a/src/server.rs +++ b/src/server.rs @@ -38,7 +38,9 @@ fn header_content_type(content_type: &str) -> Header { fn header_expires_seconds(age_seconds: i64) -> Header { let now = chrono::Utc::now(); - let at = now.checked_add_signed(chrono::Duration::seconds(age_seconds)).unwrap(); + let at = now + .checked_add_signed(chrono::Duration::seconds(age_seconds)) + .unwrap(); // The format from https://tools.ietf.org/html/rfc7234#section-5.3. let value = at.format("%a, %e %b %Y %H:%M:%S GMT").to_string(); Header::from_bytes(&b"Expires"[..], value) @@ -68,10 +70,7 @@ impl MetaServer { thumb_cache_var: thumb_cache_var.clone(), user_data: user_data, player: player, - scanner: BackgroundScanner::new( - index_var, - thumb_cache_var, - ), + scanner: BackgroundScanner::new(index_var, thumb_cache_var), } } @@ -111,7 +110,10 @@ impl MetaServer { let index = &*self.index_var.get(); let tracks = index.get_album_tracks(album_id); - let track = &tracks.first().expect("Albums have at least one track.").track; + let track = &tracks + .first() + .expect("Albums have at least one track.") + .track; let fname = index.get_filename(track.filename); let opts = claxon::FlacReaderOptions { @@ -166,13 +168,11 @@ impl MetaServer { None => return self.handle_bad_request("Invalid track id."), }; - let waveform = db - .begin() - .and_then(|mut tx| { - let result = db::select_track_waveform(&mut tx, track_id.0 as i64)?; - tx.commit()?; - Ok(result) - }); + let waveform = db.begin().and_then(|mut tx| { + let result = db::select_track_waveform(&mut tx, track_id.0 as i64)?; + tx.commit()?; + Ok(result) + }); let waveform = match waveform { Ok(Some(data)) => Waveform::from_bytes(data), @@ -184,7 +184,9 @@ impl MetaServer { }; let mut svg = Vec::new(); - waveform.write_svg(&mut svg).expect("Write to memory does not fail."); + waveform + .write_svg(&mut svg) + .expect("Write to memory does not fail."); Response::from_data(svg) .with_header(header_content_type("image/svg+xml")) @@ -195,7 +197,7 @@ impl MetaServer { fn handle_track(&self, path: &str) -> ResponseBox { // Track urls are of the form `/track/f7c153f2b16dc101.flac`. if !path.ends_with(".flac") { - return self.handle_bad_request("Expected a path ending in .flac.") + return self.handle_bad_request("Expected a path ending in .flac."); } let id_part = &path[..path.len() - ".flac".len()]; @@ -246,7 +248,8 @@ impl MetaServer { &mut w, album_id, album, - ).unwrap(); + ) + .unwrap(); Response::from_data(w.into_inner()) .with_header(header_content_type("application/json")) @@ -338,7 +341,8 @@ impl MetaServer { &self.user_data.lock().unwrap(), &mut w, &queue.tracks[..], - ).unwrap(); + ) + .unwrap(); Response::from_data(w.into_inner()) .with_header(header_content_type("application/json")) .boxed() @@ -423,7 +427,7 @@ impl MetaServer { if k == "q" { opt_query = Some(v); } - }; + } let query = match opt_query { Some(q) => q, None => return self.handle_bad_request("Missing search query."), @@ -458,7 +462,8 @@ impl MetaServer { &artists[..n_artists], &albums[..n_albums], &tracks[..n_tracks], - ).unwrap(); + ) + .unwrap(); Response::from_data(w.into_inner()) .with_status_code(200) @@ -502,6 +507,7 @@ impl MetaServer { } /// Router function for all /api/«endpoint» calls. + #[rustfmt::skip] fn handle_api_request( &self, db: &mut Connection, @@ -580,6 +586,7 @@ impl MetaServer { let query = url_iter.next().unwrap_or(""); // A very basic router. See also docs/api.md for an overview. + #[rustfmt::skip] let response = match (request.method(), p0, p1) { // API endpoints go through the API router, to keep this match arm // a bit more concise. @@ -630,21 +637,23 @@ pub fn serve(bind: &str, service: Arc) -> ! { let service_i = service.clone(); let name = format!("http_server_{}", i); let builder = thread::Builder::new().name(name); - let join_handle = builder.spawn(move || { - let connection = database_utils::connect_readonly(&service_i.config.db_path) - .expect("Failed to connect to database."); - let mut db = Connection::new(&connection); - loop { - let request = match server_i.recv() { - Ok(rq) => rq, - Err(e) => { - println!("Error: {:?}", e); - break; - } - }; - service_i.handle_request(&mut db, request); - } - }).unwrap(); + let join_handle = builder + .spawn(move || { + let connection = database_utils::connect_readonly(&service_i.config.db_path) + .expect("Failed to connect to database."); + let mut db = Connection::new(&connection); + loop { + let request = match server_i.recv() { + Ok(rq) => rq, + Err(e) => { + println!("Error: {:?}", e); + break; + } + }; + service_i.handle_request(&mut db, request); + } + }) + .unwrap(); threads.push(join_handle); } From cf370eb5a99008ee14b80141828021fd2cd1ea98 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 20:40:29 +0200 Subject: [PATCH 13/19] Avoid division by zero when computing scores Albums without plays have a zero norm on their time embedding, but we should just return zero scores for them, not compute anything. It makes the call site prettier too. --- src/serialization.rs | 11 +++++------ src/user_data.rs | 11 +++++++++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/serialization.rs b/src/serialization.rs index 62afd74..beb8d43 100644 --- a/src/serialization.rs +++ b/src/serialization.rs @@ -42,7 +42,7 @@ pub fn write_brief_album_json( } write!(w, r#"],"artist":"#)?; serde_json::to_writer(&mut w, index.get_string(album.artist))?; - let scores = user_data.get_album_scores(album_id).score(now_embed); + let scores = user_data.get_album_scores(album_id, now_embed); write!( w, // The discover score can have large-ish magnitude and ranges from negative @@ -315,17 +315,16 @@ pub fn write_queue_json( write!(w, "[")?; let mut first = true; for queued_track in tracks.iter() { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write_queued_track_json(index, user_data, &mut w, queued_track)?; first = false; } write!(w, "]") } -pub fn write_player_params_json( - mut w: W, - params: &Params, -) -> io::Result<()> { +pub fn write_player_params_json(mut w: W, params: &Params) -> io::Result<()> { write!( w, r#"{{"volume_db":{:.02},"high_pass_cutoff_hz":{}}}"#, diff --git a/src/user_data.rs b/src/user_data.rs index add0a8a..6ae8662 100644 --- a/src/user_data.rs +++ b/src/user_data.rs @@ -119,6 +119,7 @@ impl AlbumState { // The cosine distance between our time vector and the query time vector. // We put it in the range [0, 1] so that when we multiply with a negative // discover score, it doesn't flip the sign. + debug_assert!(self.time_embedding.norm().is_finite()); let time_cos = self.time_embedding.dot(at) / self.time_embedding.norm(); let time_weight = time_cos.mul_add(0.5, 0.5); @@ -194,10 +195,16 @@ impl UserData { .unwrap_or_default() } - pub fn get_album_scores(&self, album_id: AlbumId) -> AlbumState { + /// Take a snapshot of the scores for the given album, evaluated at the given query time. + /// + /// See also [`AlbumState::score`]. + pub fn get_album_scores(&self, album_id: AlbumId, at: &TimeVector) -> ScoreSnapshot { // If an album is not present, we don't have playcounts, so it is // ranked as low as possible for all scores. - self.albums.get(album_id).unwrap_or_default() + self.albums + .get(album_id) + .map(|state| state.score(at)) + .unwrap_or_default() } /// Replace the album scores with new scores. From 528f63ae7a0067f38aa095430af04d7c915689cc Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 20:44:35 +0200 Subject: [PATCH 14/19] Load "For Now" score into the app --- app/src/Model.purs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/src/Model.purs b/app/src/Model.purs index 7be80ba..3446ae7 100644 --- a/app/src/Model.purs +++ b/app/src/Model.purs @@ -140,6 +140,7 @@ newtype Album = Album , firstSeen :: String , discoverScore :: Number , trendingScore :: Number + , forNowScore :: Number } instance decodeJsonAlbum :: DecodeJson Album where @@ -156,6 +157,7 @@ instance decodeJsonAlbum :: DecodeJson Album where firstSeen <- Json.getField obj "first_seen" discoverScore <- Json.getField obj "discover_score" trendingScore <- Json.getField obj "trending_score" + forNowScore <- Json.getField obj "for_now_score" pure $ Album { id , title @@ -165,6 +167,7 @@ instance decodeJsonAlbum :: DecodeJson Album where , firstSeen , discoverScore , trendingScore + , forNowScore } getAlbums :: Aff (Array Album) From 8cc85f0535ed37d60e529de10f9f442696779673 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 20:49:16 +0200 Subject: [PATCH 15/19] Add "For Now" sorting to the webinterface --- app/src/AlbumListView.purs | 10 ++++++++-- app/src/Event.purs | 1 + app/src/State.purs | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/app/src/AlbumListView.purs b/app/src/AlbumListView.purs index 736a0a2..3870deb 100644 --- a/app/src/AlbumListView.purs +++ b/app/src/AlbumListView.purs @@ -198,7 +198,7 @@ renderSortOptions postEvent = Html.div $ do let onClickPost field = Html.onClick $ void $ launchAff $ postEvent $ Event.SetSortField field optReleaseDate <- Html.div $ do Html.addClass "config-option" - Html.text "Release Date" + Html.text "Date" onClickPost SortReleaseDate ask optFirstSeen <- Html.div $ do @@ -216,17 +216,23 @@ renderSortOptions postEvent = Html.div $ do Html.text "Trending" onClickPost SortTrending ask + optForNow <- Html.div $ do + Html.addClass "config-option" + Html.text "For Now" + onClickPost SortForNow + ask pure $ case _ of SortReleaseDate -> optReleaseDate SortFirstSeen -> optFirstSeen SortDiscover -> optDiscover SortTrending -> optTrending + SortForNow -> optForNow setSortMode :: SortMode -> AlbumListView -> Effect Unit setSortMode { field, direction } state = let - allFields = [SortReleaseDate, SortFirstSeen, SortDiscover, SortTrending] + allFields = [SortReleaseDate, SortFirstSeen, SortDiscover, SortTrending, SortForNow] unsort = do Html.removeClass "increasing" Html.removeClass "decreasing" diff --git a/app/src/Event.purs b/app/src/Event.purs index d49bae2..b7217cc 100644 --- a/app/src/Event.purs +++ b/app/src/Event.purs @@ -26,6 +26,7 @@ data SortField | SortFirstSeen | SortDiscover | SortTrending + | SortForNow derive instance sortFieldEq :: Eq SortField diff --git a/app/src/State.purs b/app/src/State.purs index b026ded..f45a684 100644 --- a/app/src/State.purs +++ b/app/src/State.purs @@ -317,6 +317,7 @@ sortAlbums {field, direction} albums = SortFirstSeen -> Array.sortWith (\(Album album) -> album.firstSeen) albums SortDiscover -> Array.sortWith (\(Album album) -> album.discoverScore) albums SortTrending -> Array.sortWith (\(Album album) -> album.trendingScore) albums + SortForNow -> Array.sortWith (\(Album album) -> album.forNowScore) albums toggleSortDirection :: SortDirection -> SortDirection toggleSortDirection = case _ of From c1ce014f7b633a54adab3a8dfb17f1c82d20413c Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 20:56:02 +0200 Subject: [PATCH 16/19] Write changelog for recent features --- docs/changelog.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index f6c400a..7697157 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -22,10 +22,18 @@ Musium versions are named `MAJOR.MINOR.PATCH`. ## Next - * The ranking behind the _discover_ sort mode now better balances past and - recent popularity to show you albums worth listening to again. + * A new sort option is available in the album list: _For Now_. This ranking + shows you albums that you played at similar times of the day, week, and year + in the past. For example, if you tend to listen to more quiet music in the + morning, and more intense music on Friday nights, this will surface those + albums at the right times. + * The ranking behind the _Discover_ sort mode now better balances past and + recent popularity to show you albums worth listening to again. Like the + _For Now_ ranking, it takes into account the time of the day, week, and year, + to show the most relevant suggestions. * The queue tab in the webinterface is now implemented, including buttons to shuffle and clear the queue. + * Add support for Czech diacritics in text normalization. ## 0.16.0 From 379cc70a898c2a6b958391c3a56e45d179fe3388 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 21:53:43 +0200 Subject: [PATCH 17/19] Correct typo in comment --- src/playcount.rs | 16 ++++++++-------- src/user_data.rs | 4 +++- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/playcount.rs b/src/playcount.rs index aa8d8c5..8c2f2db 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -192,7 +192,7 @@ pub struct RateLimit { /// vs. evening, or weekend vs. weekday, or summer vs. winter. Based on this we /// hope to suggest better tracks to listen to based on the current moment. E.g. /// in the early morning we may suggest some chill jazz but not heavy dancefloor -/// banger. +/// bangers. /// /// Because years, weeks, and days are all cyclic, we treat them as circles, and /// we embed the moment as x, y coordinate on the circle. This ensures that @@ -221,7 +221,7 @@ pub struct RateLimit { /// /// ## Normalization /// -/// When we embed an instant, the length of the vector is 3. Each of the +/// When we embed an instant, the length of the vector is sqrt(3). Each of the /// 3 components (year/week/day) has a length of 1 by construction, so the /// relative length of the components is equal. After adding time vectors /// together, this is no longer true. For example, if we listen a track on every @@ -317,12 +317,12 @@ impl TimeVector { ]; // The length of the embedding vector of an instant is by construction - // 3.0, and restricted to the year/week/day part, each of those parts - // has length 1.0. But when we add those embeddings together, the ones - // that point in the same direction reinforce while ones that point in - // different directions cancel out. So we play a track on every day of - // the week in one month, the year part becomes longer relative to the - // week part. We print those weights to classify an item in which of + // sqrt(3.0), and restricted to the year/week/day part, each of those + // parts has length 1.0. But when we add those embeddings together, the + // ones that point in the same direction reinforce while ones that point + // in different directions cancel out. So we play a track on every day + // of the week in one month, the year part becomes longer relative to + // the week part. We print those weights to classify an item in which of // these three cycles it is most seasonal. let w2_year = self.0[0] * self.0[0] + self.0[1] * self.0[1]; let w2_week = self.0[2] * self.0[2] + self.0[3] * self.0[3]; diff --git a/src/user_data.rs b/src/user_data.rs index 6ae8662..e342dfd 100644 --- a/src/user_data.rs +++ b/src/user_data.rs @@ -93,7 +93,9 @@ pub struct AlbumState { // them was at this time of the day. pub score_longterm: f32, - // Vector embedding of the play times, used to weigh the discover score. + // Vector embedding of the play times. + // + // Used to weigh the discover score, and compute the "for now" score. pub time_embedding: TimeVector, } From 0938fc1a4bdf9c7c7867ccd636080743bc4c1030 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 21:59:43 +0200 Subject: [PATCH 18/19] Ensure sort buttons fit on one line on mobile At least, on my viewport, on narrower screens it may still not fit. Not a great solution there, I think it would need an (implicit) scrollbar. Bot for now (haha, see what I did there) it works. --- app/style.css | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/style.css b/app/style.css index 2d37816..f6e6a31 100644 --- a/app/style.css +++ b/app/style.css @@ -99,11 +99,16 @@ body { display: none; } +.list-config { + white-space: nowrap; + overflow: hidden; +} + .list-config .config-option { display: inline-block; height: 1.5rem; line-height: 1.5rem; - padding: 1rem; + padding: 0.9rem; padding-bottom: 0; padding-top: 0; margin-top: 1rem; From ea275ae681505dd83b6fe8e7cac1a27c67262153 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 29 Apr 2025 22:09:04 +0200 Subject: [PATCH 19/19] Remove unused Add impl for TimeVector I end up doing only mul_adds. --- src/playcount.rs | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/playcount.rs b/src/playcount.rs index 8c2f2db..1ae4deb 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -348,21 +348,6 @@ impl Default for TimeVector { } } -impl std::ops::Add for TimeVector { - type Output = TimeVector; - - fn add(self, rhs: TimeVector) -> TimeVector { - TimeVector([ - self.0[0] + rhs.0[0], - self.0[1] + rhs.0[1], - self.0[2] + rhs.0[2], - self.0[3] + rhs.0[3], - self.0[4] + rhs.0[4], - self.0[5] + rhs.0[5], - ]) - } -} - impl std::ops::Mul for TimeVector { type Output = TimeVector;