diff --git a/app/src/AlbumListView.purs b/app/src/AlbumListView.purs index 736a0a2..3870deb 100644 --- a/app/src/AlbumListView.purs +++ b/app/src/AlbumListView.purs @@ -198,7 +198,7 @@ renderSortOptions postEvent = Html.div $ do let onClickPost field = Html.onClick $ void $ launchAff $ postEvent $ Event.SetSortField field optReleaseDate <- Html.div $ do Html.addClass "config-option" - Html.text "Release Date" + Html.text "Date" onClickPost SortReleaseDate ask optFirstSeen <- Html.div $ do @@ -216,17 +216,23 @@ renderSortOptions postEvent = Html.div $ do Html.text "Trending" onClickPost SortTrending ask + optForNow <- Html.div $ do + Html.addClass "config-option" + Html.text "For Now" + onClickPost SortForNow + ask pure $ case _ of SortReleaseDate -> optReleaseDate SortFirstSeen -> optFirstSeen SortDiscover -> optDiscover SortTrending -> optTrending + SortForNow -> optForNow setSortMode :: SortMode -> AlbumListView -> Effect Unit setSortMode { field, direction } state = let - allFields = [SortReleaseDate, SortFirstSeen, SortDiscover, SortTrending] + allFields = [SortReleaseDate, SortFirstSeen, SortDiscover, SortTrending, SortForNow] unsort = do Html.removeClass "increasing" Html.removeClass "decreasing" diff --git a/app/src/Event.purs b/app/src/Event.purs index d49bae2..b7217cc 100644 --- a/app/src/Event.purs +++ b/app/src/Event.purs @@ -26,6 +26,7 @@ data SortField | SortFirstSeen | SortDiscover | SortTrending + | SortForNow derive instance sortFieldEq :: Eq SortField diff --git a/app/src/Model.purs b/app/src/Model.purs index 7be80ba..3446ae7 100644 --- a/app/src/Model.purs +++ b/app/src/Model.purs @@ -140,6 +140,7 @@ newtype Album = Album , firstSeen :: String , discoverScore :: Number , trendingScore :: Number + , forNowScore :: Number } instance decodeJsonAlbum :: DecodeJson Album where @@ -156,6 +157,7 @@ instance decodeJsonAlbum :: DecodeJson Album where firstSeen <- Json.getField obj "first_seen" discoverScore <- Json.getField obj "discover_score" trendingScore <- Json.getField obj "trending_score" + forNowScore <- Json.getField obj "for_now_score" pure $ Album { id , title @@ -165,6 +167,7 @@ instance decodeJsonAlbum :: DecodeJson Album where , firstSeen , discoverScore , trendingScore + , forNowScore } getAlbums :: Aff (Array Album) diff --git a/app/src/State.purs b/app/src/State.purs index b026ded..f45a684 100644 --- a/app/src/State.purs +++ b/app/src/State.purs @@ -317,6 +317,7 @@ sortAlbums {field, direction} albums = SortFirstSeen -> Array.sortWith (\(Album album) -> album.firstSeen) albums SortDiscover -> Array.sortWith (\(Album album) -> album.discoverScore) albums SortTrending -> Array.sortWith (\(Album album) -> album.trendingScore) albums + SortForNow -> Array.sortWith (\(Album album) -> album.forNowScore) albums toggleSortDirection :: SortDirection -> SortDirection toggleSortDirection = case _ of diff --git a/app/style.css b/app/style.css index 2d37816..f6e6a31 100644 --- a/app/style.css +++ b/app/style.css @@ -99,11 +99,16 @@ body { display: none; } +.list-config { + white-space: nowrap; + overflow: hidden; +} + .list-config .config-option { display: inline-block; height: 1.5rem; line-height: 1.5rem; - padding: 1rem; + padding: 0.9rem; padding-bottom: 0; padding-top: 0; margin-top: 1rem; diff --git a/docs/changelog.md b/docs/changelog.md index f6c400a..7697157 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -22,10 +22,18 @@ Musium versions are named `MAJOR.MINOR.PATCH`. ## Next - * The ranking behind the _discover_ sort mode now better balances past and - recent popularity to show you albums worth listening to again. + * A new sort option is available in the album list: _For Now_. This ranking + shows you albums that you played at similar times of the day, week, and year + in the past. For example, if you tend to listen to more quiet music in the + morning, and more intense music on Friday nights, this will surface those + albums at the right times. + * The ranking behind the _Discover_ sort mode now better balances past and + recent popularity to show you albums worth listening to again. Like the + _For Now_ ranking, it takes into account the time of the day, week, and year, + to show the most relevant suggestions. * The queue tab in the webinterface is now implemented, including buttons to shuffle and clear the queue. + * Add support for Czech diacritics in text normalization. ## 0.16.0 diff --git a/src/playcount.rs b/src/playcount.rs index efdd889..1ae4deb 100644 --- a/src/playcount.rs +++ b/src/playcount.rs @@ -11,12 +11,12 @@ use std::collections::BinaryHeap; use std::collections::HashMap; use std::path::Path; +use crate::album_table::AlbumTable; use crate::database::{self, Transaction}; use crate::database_utils::connect_readonly; use crate::prim::{AlbumId, ArtistId, TrackId}; -use crate::{MemoryMetaIndex, MetaIndex}; -use crate::album_table::AlbumTable; use crate::user_data::AlbumState; +use crate::{MemoryMetaIndex, MetaIndex}; /// A point in time with second granularity. /// @@ -98,6 +98,67 @@ impl Instant { seconds: self.seconds_since_jan_2000 - t0.seconds_since_jan_2000, } } + + /// Embed the instant into the time vector space, see also [`TimeVector`]. + pub fn embed(&self) -> TimeVector { + use std::f32::consts::TAU; + + const SECONDS_PER_YEAR: u32 = 365 * 24 * 3600 + 6 * 3600; + const SECONDS_PER_WEEK: u32 = 7 * 24 * 3600; + const SECONDS_PER_DAY: u32 = 24 * 3600; + + // We convert to radians to map to the circle; precompute as much of + // the multiplication as we can. + const NORM_YEAR: f32 = TAU / (SECONDS_PER_YEAR as f32); + const NORM_DAY: f32 = TAU / (SECONDS_PER_DAY as f32); + + let t = self.seconds_since_jan_2000; + let t_day = t % SECONDS_PER_DAY; + let t_year = t % SECONDS_PER_YEAR; + // The epoch we use, 2000-01-01, is a Saturday, but we want the week to + // start on Monday midnight to simplify the circle mapping below. + let t_week = (t + SECONDS_PER_DAY * 5) % SECONDS_PER_WEEK; + + let r_day = (t_day as f32) * NORM_DAY; + let r_year = (t_year as f32) * NORM_YEAR; + + // We map weekdays non-linearly around the circle. The first quadrant + // contains Mon-Thu, then the next three quadrants contain Fri, Sat, Sun + // respectively. This mapping has the following properties: + // + // - All weekdays lie above the x-axis, the weekend lies below, so the + // time-weighed average vector of weekend vs. weekday have a dot + // product close to -1, definitely below 0. + // - Saturday is diametrically opposite the "weekdays" excluding Friday. + // The time-weighed average vector of Saturday vs. Mon-Thu have a dot + // product of exactly -1. + // - "Party nights" (Friday and Saturday) lie left of the y-axis, + // weekday + Sunday night all lie right of the y-axis. The dot product + // of the time-weighed average vector of days with party nights vs. + // days without is close to -1, definitely below 0. + // + // Hopefully this does a good job of mapping the time of the week into + // R^2 in a meaningful way. + let r_week = if t_week <= SECONDS_PER_DAY * 4 { + // One factor 0.25 for the quarter circle, one because we fit 4 days + // into this quadrant. + (t_week as f32) * (TAU * 0.25 * 0.25 / SECONDS_PER_DAY as f32) + } else { + // We subtract 3 full days, so `t_weekend` is 0.0 at the start of + // Thursday. Then we allocate a quarter of the circle to each day. + let t_weekend = t_week - SECONDS_PER_DAY * 3; + (t_weekend as f32) * (TAU * 0.25 / SECONDS_PER_DAY as f32) + }; + + TimeVector([ + r_year.cos(), + r_year.sin(), + r_week.cos(), + r_week.sin(), + r_day.cos(), + r_day.sin(), + ]) + } } impl Epoch { @@ -122,6 +183,186 @@ pub struct RateLimit { pub fill_rate_per_second: f32, } +/// A vector representation of the time of day, week, and year. +/// +/// ## Summary +/// +/// The rationale behind this is that we can compare how "similar" moments are +/// using the cosine difference, which we can use to classify tracks as morning +/// vs. evening, or weekend vs. weekday, or summer vs. winter. Based on this we +/// hope to suggest better tracks to listen to based on the current moment. E.g. +/// in the early morning we may suggest some chill jazz but not heavy dancefloor +/// bangers. +/// +/// Because years, weeks, and days are all cyclic, we treat them as circles, and +/// we embed the moment as x, y coordinate on the circle. This ensures that +/// taking the cosine distance is meaningful. +/// +/// We populate the space as follows: +/// - Dimension 0, 1: Time of year +/// - Dimension 2, 3: Time of week[^1] +/// - Dimension 4, 5: Time of day (24h) +/// +/// [^1]: For the time of the week, we don't map the time uniformly to the +/// circle. We care more about "weekday" vs. "weekend", so the weekdays are +/// relatively squashed. +/// +/// ## Local time +/// +/// We map instants to time vectors based on UTC time, without regard for time +/// zone. Ideally, we would do it based on local time, but that information is +/// not available from historical Last.fm scrobbles, and even in Musium I made +/// the mistake of saving listens always as UTC, not including time zone offset. +/// For me this is not a big problem, the vast majority of my listens are in +/// UTC + {0, 1, 2}, so the impact on the day shift is small. If I ever move to +/// a very different time zone and I want to preserve the time of the day, I +/// suppose we could try to infer the time zone from the median listen time or +/// something like that. +/// +/// ## Normalization +/// +/// When we embed an instant, the length of the vector is sqrt(3). Each of the +/// 3 components (year/week/day) has a length of 1 by construction, so the +/// relative length of the components is equal. After adding time vectors +/// together, this is no longer true. For example, if we listen a track on every +/// weekday, but only in March, the day-of-week components will cancel each +/// other out, while the time-of-year components will reinforce each other. If +/// we normalize the result, the time-of-year component will be much larger. So +/// naturally, when we add time vectors, they pick out which component an item +/// is most seasonal in. When we take the cosine distance with the embedding +/// of the current time to find tracks suitable for the current moment, because +/// it's not sensitive to absolute length, that will naturally emphasize the +/// right component. +// TODO: Instead of deriving copy, make a quantized version that holds i8's. +// It saves memory in the user data, and I hope it's faster to compute the inner +// products as well, it could even be vectorized. +#[derive(Copy, Clone)] +pub struct TimeVector([f32; 6]); + +impl TimeVector { + pub const fn zero() -> TimeVector { + TimeVector([0.0; 6]) + } + + /// Return the normalized embedding of the current moment. + pub fn now() -> TimeVector { + use chrono::Utc; + let t = Instant::from_posix_timestamp(Utc::now().timestamp()); + let v = t.embed(); + let n = v.norm(); + v * n.recip() + } + + pub fn mul_add(&self, factor: f32, term: &TimeVector) -> TimeVector { + TimeVector([ + self.0[0].mul_add(factor, term.0[0]), + self.0[1].mul_add(factor, term.0[1]), + self.0[2].mul_add(factor, term.0[2]), + self.0[3].mul_add(factor, term.0[3]), + self.0[4].mul_add(factor, term.0[4]), + self.0[5].mul_add(factor, term.0[5]), + ]) + } + + /// Return the L2-norm (Euclidean norm) of this vector. + pub fn norm(&self) -> f32 { + let w2_year = self.0[0] * self.0[0] + self.0[1] * self.0[1]; + let w2_week = self.0[2] * self.0[2] + self.0[3] * self.0[3]; + let w2_day = self.0[4] * self.0[4] + self.0[5] * self.0[5]; + (w2_year + w2_week + w2_day).sqrt() + } + + /// Return the dot product between the two vectors. + pub fn dot(&self, other: &TimeVector) -> f32 { + 0.0 + ((self.0[0] * other.0[0]) + (self.0[1] * other.0[1])) + + ((self.0[2] * other.0[2]) + (self.0[3] * other.0[3])) + + ((self.0[4] * other.0[4]) + (self.0[5] * other.0[5])) + } + + /// For debugging, format as human-readable direction that the vector points in. + /// + /// Note, this is only approximate. We assume for example that every month + /// is exactly 1/12 of a year, where a year is 365.25 days. It's about the + /// rough direction anyway so this is fine. + #[rustfmt::skip] + fn fmt_dir(&self) -> String { + use std::f32::consts::TAU; + + let mut r_year = self.0[1].atan2(self.0[0]); + let mut r_week = self.0[3].atan2(self.0[2]); + let mut r_day = self.0[5].atan2(self.0[4]); + + r_year += if r_year < 0.0 { TAU } else { 0.0 }; + r_week += if r_week < 0.0 { TAU } else { 0.0 }; + r_day += if r_day < 0.0 { TAU } else { 0.0 }; + + let month = (r_year * (11.999 / TAU)) as usize; + let hour = (r_day * (23.999 / TAU)) as usize; + + // For the day, we don't bother to undo the non-linear mapping that + // [`Instant::embed`] applies, instead we factor this into the lookup + // table below. + let day = (r_week * (15.999 / TAU)) as usize; + + const MONTHS: [&'static str; 12] = [ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", + ]; + // The inverse mapping of [`Instant::embed`]. + const DAYS: [&'static str; 16] = [ + "Mon", "Tue", "Wed", "Thu", + "Fri", "Fri", "Fri", "Fri", + "Sat", "Sat", "Sat", "Sat", + "Sun", "Sun", "Sun", "Sun", + ]; + + // The length of the embedding vector of an instant is by construction + // sqrt(3.0), and restricted to the year/week/day part, each of those + // parts has length 1.0. But when we add those embeddings together, the + // ones that point in the same direction reinforce while ones that point + // in different directions cancel out. So we play a track on every day + // of the week in one month, the year part becomes longer relative to + // the week part. We print those weights to classify an item in which of + // these three cycles it is most seasonal. + let w2_year = self.0[0] * self.0[0] + self.0[1] * self.0[1]; + let w2_week = self.0[2] * self.0[2] + self.0[3] * self.0[3]; + let w2_day = self.0[4] * self.0[4] + self.0[5] * self.0[5]; + let inv_norm = (w2_year + w2_week + w2_day).sqrt().recip(); + let w_year = w2_year.sqrt() * inv_norm; + let w_week = w2_week.sqrt() * inv_norm; + let w_day = w2_day.sqrt() * inv_norm; + + format!( + "{} {} {:02}hZ Y{:1.0}-D{:1.0}-H{:1.0}", + MONTHS[month], DAYS[day], hour, + // We print these to 1 digit precision, and it would be wasteful to + // add the "0." in front, so we print as integer from 0 to 9. + w_year * 9.49, w_week * 9.49, w_day * 9.49, + ) + } +} + +impl Default for TimeVector { + fn default() -> Self { + TimeVector::zero() + } +} + +impl std::ops::Mul for TimeVector { + type Output = TimeVector; + + fn mul(self, rhs: f32) -> TimeVector { + TimeVector([ + self.0[0] * rhs, + self.0[1] * rhs, + self.0[2] * rhs, + self.0[3] * rhs, + self.0[4] * rhs, + self.0[5] * rhs, + ]) + } +} + /// Exponential moving averages at different timescales plus leaky bucket rate limiter. pub struct ExpCounter { /// Time at which the counts were last updated. @@ -132,6 +373,9 @@ pub struct ExpCounter { /// Exponentially decaying counts for different half-lives. pub n: [f32; 5], + + /// Exponential moving average of the time vector of each play. + pub time_embedding: TimeVector, } impl ExpCounter { @@ -200,9 +444,9 @@ impl ExpCounter { 19260.0, // 3650 days / 10 years // 9630.615234, // 1826 days / 5 years 2407.653809, // 457 days / 1.25 years - 601.913452, // 114 days / ~3.75 months / 16 weeks - 150.478363, // 29 days / 1 month - 37.619591, // 7 days + 601.913452, // 114 days / ~3.75 months / 16 weeks + 150.478363, // 29 days / 1 month + 37.619591, // 7 days ]; /// Return how much to decay the counters by after the elapsed time. @@ -222,6 +466,7 @@ impl ExpCounter { // have long replenished. bucket: 0.0, n: [0.0; 5], + time_embedding: TimeVector::zero(), } } @@ -276,6 +521,14 @@ impl ExpCounter { } self.t = t1; + + // In addition to updating the counters, we update the time vector for + // this item. I experimented with a decay factor of 1 - 0.1 * count, + // so when the rate limiter doesn't limit, a factor of 0.9, but that was + // decaying way too aggressively. Just adding without decay seems to + // work far better, even though it skews the item to the initial + // discovery phase. + self.time_embedding = t1.embed().mul_add(count, &self.time_embedding); } } @@ -517,8 +770,10 @@ impl PlayCounts { let mut albums = AlbumTable::new(self.counter.albums.len(), AlbumState::default()); for (album_id, counter) in self.counter.albums.iter() { let state = AlbumState { - discover_score: score_falling(counter), - trending_score: score_trending(counter), + score_discover: score_falling(counter), + score_trending: score_trending(counter), + score_longterm: score_longterm(counter), + time_embedding: counter.time_embedding, }; albums.insert(*album_id, state); } @@ -530,6 +785,7 @@ fn print_ranking( title: &'static str, description: String, index: &MemoryMetaIndex, + counts: &PlayCounts, top_artists: &[(RevNotNan, ArtistId)], top_albums: &[(RevNotNan, AlbumId)], top_tracks: &[(RevNotNan, TrackId)], @@ -538,11 +794,13 @@ fn print_ranking( for (i, (count, artist_id)) in top_artists.iter().enumerate() { let artist = index.get_artist(*artist_id).unwrap(); let artist_name = index.get_string(artist.name); + let counter = counts.counter.artists.get(artist_id).unwrap(); println!( - " {:2} {:7.3} {} {}", + " {:2} {:7.3} {} {} {}", i + 1, count.0, + counter.time_embedding.fmt_dir(), artist_id, artist_name ); @@ -553,11 +811,13 @@ fn print_ranking( let album = index.get_album(*album_id).unwrap(); let album_title = index.get_string(album.title); let album_artist = index.get_string(album.artist); + let counter = counts.counter.albums.get(album_id).unwrap(); println!( - " {:2} {:7.3} {} {:25} {}", + " {:2} {:7.3} {} {} {:25} {}", i + 1, count.0, + counter.time_embedding.fmt_dir(), album_id, album_title, album_artist @@ -569,11 +829,13 @@ fn print_ranking( let track = index.get_track(*track_id).unwrap(); let track_title = index.get_string(track.title); let track_artist = index.get_string(track.artist); + let counter = counts.counter.tracks.get(track_id).unwrap(); println!( - " {:2} {:7.3} {} {:25} {}", + " {:2} {:7.3} {} {} {:25} {}", i + 1, count.0, + counter.time_embedding.fmt_dir(), track_id, track_title, track_artist @@ -587,10 +849,12 @@ fn print_ranking( /// playcount on a short timescale, while still mixing in a bit of a longer /// time horizon. fn score_trending(counter: &ExpCounter) -> f32 { - 0.0 - + (2.0 * counter.n[4]) - + (0.5 * counter.n[3]) - + (0.1 * counter.n[2]) + (2.0 * counter.n[4]) + (0.5 * counter.n[3]) + (0.1 * counter.n[2]) +} + +/// Score for sorting by top on the longest two time scales. +fn score_longterm(counter: &ExpCounter) -> f32 { + counter.n[0].ln() + counter.n[1].ln() } /// Score for sorting entries by _falling_. @@ -614,6 +878,22 @@ fn score_falling(counter: &ExpCounter) -> f32 { f0 + f1 * 0.2 + f2 * 0.6 } +/// Score for sorting entries as the most suitable for this time. +/// +/// Based on time of the year, day of the week, and time of the day, when we +/// played the item in the past. +/// +/// Takes a normalized embedding of the current time as `now_embed`. +fn score_for_now(now_embed: &TimeVector, counter: &ExpCounter) -> f32 { + // We take the cosine distance. We assume `now_embed` is already normalized, + // so we only need to normalize the counter's vector. + let cos_dist = counter.time_embedding.dot(now_embed) / counter.time_embedding.norm(); + + // Put the score in the range [0.0, 1.0], so we can easily use it as a + // multiplier for other scores. + cos_dist.mul_add(0.5, 0.5) +} + /// Print playcount statistics about the library. /// /// This is mostly for debugging and development purposes, playcounts should be @@ -636,8 +916,12 @@ pub fn main(index: &MemoryMetaIndex, db_path: &Path) -> crate::Result<()> { counts.get_top_by(150, |counter: &ExpCounter| RevNotNan(counter.n[timescale])); print_ranking( "TOP", - format!("timescale {}, {:.0} days / {:.0} months", timescale, n_days, n_months), + format!( + "timescale {}, {:.0} days / {:.0} months", + timescale, n_days, n_months + ), index, + &counts, &top_artists, &top_albums, &top_tracks, @@ -650,16 +934,34 @@ pub fn main(index: &MemoryMetaIndex, db_path: &Path) -> crate::Result<()> { "TRENDING", "see code for formula".to_string(), index, + &counts, &trending_artists, &trending_albums, &trending_tracks, ); - let (falling_artists, falling_albums, falling_tracks) = counts.get_top_by(350, |c| RevNotNan(score_falling(c))); + let (falling_artists, falling_albums, falling_tracks) = + counts.get_top_by(350, |c| RevNotNan(score_falling(c))); print_ranking( "FALLING", "see code for formula".to_string(), index, + &counts, + &falling_artists, + &falling_albums, + &falling_tracks, + ); + + let now = Instant::from_posix_timestamp(chrono::Utc::now().timestamp()); + let now_embed = now.embed() * (1.0 / now.embed().norm()); + + let (falling_artists, falling_albums, falling_tracks) = + counts.get_top_by(150, |c| RevNotNan(score_for_now(&now_embed, c))); + print_ranking( + "FOR NOW", + "time vector cosine distance".to_string(), + index, + &counts, &falling_artists, &falling_albums, &falling_tracks, @@ -667,3 +969,44 @@ pub fn main(index: &MemoryMetaIndex, db_path: &Path) -> crate::Result<()> { Ok(()) } + +#[cfg(test)] +pub mod test { + use super::Instant; + use chrono::{DateTime, Utc}; + + fn fmt_dir(dt: DateTime) -> String { + Instant::from_posix_timestamp(dt.timestamp()) + .embed() + .fmt_dir() + } + + #[test] + #[rustfmt::skip] + fn time_vector_embed_format_works_as_expected() { + use chrono::{TimeZone, Utc}; + + // Month, day of week, hour of day. + // 2025-04-14 is a Monday. + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 14).and_hms( 9, 5, 0)), "Apr Mon 09hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 15).and_hms(11, 5, 0)), "Apr Tue 11hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 16).and_hms(13, 5, 0)), "Apr Wed 13hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 17).and_hms(15, 5, 0)), "Apr Thu 15hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 18).and_hms(17, 5, 0)), "Apr Fri 17hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 19).and_hms(19, 5, 0)), "Apr Sat 19hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 20).and_hms(21, 5, 0)), "Apr Sun 21hZ Y5-D5-H5"); + + assert_eq!(fmt_dir(Utc.ymd(2025, 1, 15).and_hms( 7, 5, 0)), "Jan Wed 07hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 2, 15).and_hms( 9, 5, 0)), "Feb Sat 09hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 3, 15).and_hms(11, 5, 0)), "Mar Sat 11hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 4, 15).and_hms(13, 5, 0)), "Apr Tue 13hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 5, 15).and_hms(15, 5, 0)), "May Thu 15hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 6, 15).and_hms(17, 5, 0)), "Jun Sun 17hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 7, 15).and_hms(19, 5, 0)), "Jul Tue 19hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 8, 15).and_hms(21, 5, 0)), "Aug Fri 21hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 9, 15).and_hms(23, 5, 0)), "Sep Mon 23hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 10, 15).and_hms( 1, 5, 0)), "Oct Wed 01hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 11, 15).and_hms( 2, 5, 0)), "Nov Sat 02hZ Y5-D5-H5"); + assert_eq!(fmt_dir(Utc.ymd(2025, 12, 15).and_hms( 6, 5, 0)), "Dec Mon 06hZ Y5-D5-H5"); + } +} diff --git a/src/serialization.rs b/src/serialization.rs index a8f304f..beb8d43 100644 --- a/src/serialization.rs +++ b/src/serialization.rs @@ -12,6 +12,7 @@ use serde_json; use std::io; use std::io::Write; +use crate::playcount::TimeVector; use crate::player::{Params, TrackSnapshot}; use crate::scan; use crate::user_data::UserData; @@ -23,6 +24,7 @@ use crate::{Album, AlbumId, Artist, ArtistId, MetaIndex, TrackId}; pub fn write_brief_album_json( index: &dyn MetaIndex, user_data: &UserData, + now_embed: &TimeVector, mut w: W, album_id: AlbumId, album: &Album, @@ -32,24 +34,27 @@ pub fn write_brief_album_json( write!(w, r#","artist_ids":["#)?; let mut first = true; for artist_id in index.get_album_artists(album.artist_ids) { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write!(w, r#""{}""#, artist_id)?; first = false; } write!(w, r#"],"artist":"#)?; serde_json::to_writer(&mut w, index.get_string(album.artist))?; - let scores = user_data.get_album_scores(album_id); + let scores = user_data.get_album_scores(album_id, now_embed); write!( w, // The discover score can have large-ish magnitude and ranges from negative // to positive, it does not need a lot of precision. The trending score // is always between 0 and 1 though, it needs more digits for precision // near the end of the ranking. - r#","release_date":"{}","first_seen":"{}","discover_score":{:.2},"trending_score":{:.4}}}"#, + r#","release_date":"{}","first_seen":"{}","discover_score":{:.2},"trending_score":{:.4},"for_now_score":{:.3}}}"#, album.original_release_date, album.first_seen.format_iso8601(), - scores.discover_score, - scores.trending_score, + scores.discover, + scores.trending, + scores.for_now, )?; Ok(()) } @@ -58,13 +63,16 @@ pub fn write_brief_album_json( pub fn write_albums_json( index: &dyn MetaIndex, user_data: &UserData, + now_embed: &TimeVector, mut w: W, ) -> io::Result<()> { write!(w, "[")?; let mut first = true; for kv in index.get_albums() { - if !first { write!(w, ",")?; } - write_brief_album_json(index, user_data, &mut w, kv.album_id, &kv.album)?; + if !first { + write!(w, ",")?; + } + write_brief_album_json(index, user_data, now_embed, &mut w, kv.album_id, &kv.album)?; first = false; } write!(w, "]") @@ -86,17 +94,25 @@ pub fn write_album_json( write!(w, r#","artist_ids":["#)?; let mut first = true; for artist_id in index.get_album_artists(album.artist_ids) { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write!(w, r#""{}""#, artist_id)?; first = false; } write!(w, r#"],"artist":"#)?; serde_json::to_writer(&mut w, index.get_string(album.artist))?; - write!(w, r#","release_date":"{}","tracks":["#, album.original_release_date)?; + write!( + w, + r#","release_date":"{}","tracks":["#, + album.original_release_date + )?; let mut first = true; for kv in index.get_album_tracks(id) { let track_id = kv.track_id; - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write!( w, r#"{{"id":"{}","disc_number":{},"track_number":{},"title":"#, @@ -122,6 +138,7 @@ pub fn write_album_json( pub fn write_artist_json( index: &dyn MetaIndex, user_data: &UserData, + now_embed: &TimeVector, mut w: W, artist: &Artist, albums: &[(ArtistId, AlbumId)], @@ -137,8 +154,10 @@ pub fn write_artist_json( // well-formed, it will never fail. The id is provided by the index // itself, not user input, so the album should be present. let album = index.get_album(album_id).unwrap(); - if !first { write!(w, ",")?; } - write_brief_album_json(index, user_data, &mut w, album_id, album)?; + if !first { + write!(w, ",")?; + } + write_brief_album_json(index, user_data, now_embed, &mut w, album_id, album)?; first = false; } write!(w, "]}}") @@ -154,28 +173,38 @@ pub fn write_search_results_json( write!(w, r#"{{"artists":["#)?; let mut first = true; for &aid in artists { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write_search_artist_json(index, &mut w, aid)?; first = false; } write!(w, r#"],"albums":["#)?; let mut first = true; for &aid in albums { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write_search_album_json(index, &mut w, aid)?; first = false; } write!(w, r#"],"tracks":["#)?; let mut first = true; for &tid in tracks { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write_search_track_json(index, &mut w, tid)?; first = false; } write!(w, r#"]}}"#) } -pub fn write_search_artist_json(index: &dyn MetaIndex, mut w: W, id: ArtistId) -> io::Result<()> { +pub fn write_search_artist_json( + index: &dyn MetaIndex, + mut w: W, + id: ArtistId, +) -> io::Result<()> { let artist = index.get_artist(id).unwrap(); let albums = index.get_albums_by_artist(id); write!(w, r#"{{"id":"{}","name":"#, id)?; @@ -183,14 +212,20 @@ pub fn write_search_artist_json(index: &dyn MetaIndex, mut w: W, id: A write!(w, r#","albums":["#)?; let mut first = true; for &(_artist_id, album_id) in albums { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write!(w, r#""{}""#, album_id)?; first = false; } write!(w, r#"]}}"#) } -pub fn write_search_album_json(index: &dyn MetaIndex, mut w: W, id: AlbumId) -> io::Result<()> { +pub fn write_search_album_json( + index: &dyn MetaIndex, + mut w: W, + id: AlbumId, +) -> io::Result<()> { let album = index.get_album(id).unwrap(); write!(w, r#"{{"id":"{}","title":"#, id)?; serde_json::to_writer(&mut w, index.get_string(album.title))?; @@ -199,7 +234,11 @@ pub fn write_search_album_json(index: &dyn MetaIndex, mut w: W, id: Al write!(w, r#","release_date":"{}"}}"#, album.original_release_date) } -pub fn write_search_track_json(index: &dyn MetaIndex, mut w: W, id: TrackId) -> io::Result<()> { +pub fn write_search_track_json( + index: &dyn MetaIndex, + mut w: W, + id: TrackId, +) -> io::Result<()> { let track = index.get_track(id).unwrap(); let album_id = id.album_id(); let album = index.get_album(album_id).unwrap(); @@ -230,8 +269,7 @@ fn write_queued_track_json( write!( w, r#"{{"queue_id":"{}","track_id":"{}","title":"#, - queued_track.queue_id, - queued_track.track_id, + queued_track.queue_id, queued_track.track_id, )?; serde_json::to_writer(&mut w, index.get_string(track.title))?; write!( @@ -243,7 +281,9 @@ fn write_queued_track_json( )?; let mut first = true; for artist_id in index.get_album_artists(album.artist_ids) { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write!(w, r#""{}""#, artist_id)?; first = false; } @@ -266,7 +306,6 @@ fn write_queued_track_json( write!(w, r#","is_buffering":{}}}"#, queued_track.is_buffering) } - pub fn write_queue_json( index: &dyn MetaIndex, user_data: &UserData, @@ -276,17 +315,16 @@ pub fn write_queue_json( write!(w, "[")?; let mut first = true; for queued_track in tracks.iter() { - if !first { write!(w, ",")?; } + if !first { + write!(w, ",")?; + } write_queued_track_json(index, user_data, &mut w, queued_track)?; first = false; } write!(w, "]") } -pub fn write_player_params_json( - mut w: W, - params: &Params, -) -> io::Result<()> { +pub fn write_player_params_json(mut w: W, params: &Params) -> io::Result<()> { write!( w, r#"{{"volume_db":{:.02},"high_pass_cutoff_hz":{}}}"#, @@ -318,7 +356,8 @@ pub fn write_scan_status_json( ScanStage::Done => "done", }; - write!(w, + write!( + w, "{{\ \"stage\":\"{}\",\ \"files_discovered\":{},\ @@ -345,11 +384,9 @@ pub fn write_scan_status_json( } /// Write library statistics as json. -pub fn write_stats_json( - index: &dyn MetaIndex, - mut w: W, -) -> io::Result<()> { - write!(w, +pub fn write_stats_json(index: &dyn MetaIndex, mut w: W) -> io::Result<()> { + write!( + w, "{{\ \"tracks\":{},\ \"albums\":{},\ diff --git a/src/server.rs b/src/server.rs index 19222e4..cbd77d9 100644 --- a/src/server.rs +++ b/src/server.rs @@ -20,6 +20,7 @@ use crate::database as db; use crate::database::Connection; use crate::database_utils; use crate::mvar::Var; +use crate::playcount::TimeVector; use crate::player::{Millibel, Player, QueueId}; use crate::prim::{AlbumId, ArtistId, Hertz, TrackId}; use crate::scan::BackgroundScanner; @@ -37,7 +38,9 @@ fn header_content_type(content_type: &str) -> Header { fn header_expires_seconds(age_seconds: i64) -> Header { let now = chrono::Utc::now(); - let at = now.checked_add_signed(chrono::Duration::seconds(age_seconds)).unwrap(); + let at = now + .checked_add_signed(chrono::Duration::seconds(age_seconds)) + .unwrap(); // The format from https://tools.ietf.org/html/rfc7234#section-5.3. let value = at.format("%a, %e %b %Y %H:%M:%S GMT").to_string(); Header::from_bytes(&b"Expires"[..], value) @@ -67,10 +70,7 @@ impl MetaServer { thumb_cache_var: thumb_cache_var.clone(), user_data: user_data, player: player, - scanner: BackgroundScanner::new( - index_var, - thumb_cache_var, - ), + scanner: BackgroundScanner::new(index_var, thumb_cache_var), } } @@ -110,7 +110,10 @@ impl MetaServer { let index = &*self.index_var.get(); let tracks = index.get_album_tracks(album_id); - let track = &tracks.first().expect("Albums have at least one track.").track; + let track = &tracks + .first() + .expect("Albums have at least one track.") + .track; let fname = index.get_filename(track.filename); let opts = claxon::FlacReaderOptions { @@ -165,13 +168,11 @@ impl MetaServer { None => return self.handle_bad_request("Invalid track id."), }; - let waveform = db - .begin() - .and_then(|mut tx| { - let result = db::select_track_waveform(&mut tx, track_id.0 as i64)?; - tx.commit()?; - Ok(result) - }); + let waveform = db.begin().and_then(|mut tx| { + let result = db::select_track_waveform(&mut tx, track_id.0 as i64)?; + tx.commit()?; + Ok(result) + }); let waveform = match waveform { Ok(Some(data)) => Waveform::from_bytes(data), @@ -183,7 +184,9 @@ impl MetaServer { }; let mut svg = Vec::new(); - waveform.write_svg(&mut svg).expect("Write to memory does not fail."); + waveform + .write_svg(&mut svg) + .expect("Write to memory does not fail."); Response::from_data(svg) .with_header(header_content_type("image/svg+xml")) @@ -194,7 +197,7 @@ impl MetaServer { fn handle_track(&self, path: &str) -> ResponseBox { // Track urls are of the form `/track/f7c153f2b16dc101.flac`. if !path.ends_with(".flac") { - return self.handle_bad_request("Expected a path ending in .flac.") + return self.handle_bad_request("Expected a path ending in .flac."); } let id_part = &path[..path.len() - ".flac".len()]; @@ -245,7 +248,8 @@ impl MetaServer { &mut w, album_id, album, - ).unwrap(); + ) + .unwrap(); Response::from_data(w.into_inner()) .with_header(header_content_type("application/json")) @@ -265,16 +269,19 @@ impl MetaServer { }; let albums = index.get_albums_by_artist(artist_id); + let now = TimeVector::now(); let buffer = Vec::new(); let mut w = io::Cursor::new(buffer); serialization::write_artist_json( index, &self.user_data.lock().unwrap(), + &now, &mut w, artist, albums, - ).unwrap(); + ) + .unwrap(); Response::from_data(w.into_inner()) .with_header(header_content_type("application/json")) @@ -283,13 +290,11 @@ impl MetaServer { fn handle_albums(&self) -> ResponseBox { let index = &*self.index_var.get(); + let now = TimeVector::now(); let buffer = Vec::new(); let mut w = io::Cursor::new(buffer); - serialization::write_albums_json( - index, - &self.user_data.lock().unwrap(), - &mut w, - ).unwrap(); + serialization::write_albums_json(index, &self.user_data.lock().unwrap(), &now, &mut w) + .unwrap(); Response::from_data(w.into_inner()) .with_header(header_content_type("application/json")) @@ -336,7 +341,8 @@ impl MetaServer { &self.user_data.lock().unwrap(), &mut w, &queue.tracks[..], - ).unwrap(); + ) + .unwrap(); Response::from_data(w.into_inner()) .with_header(header_content_type("application/json")) .boxed() @@ -421,7 +427,7 @@ impl MetaServer { if k == "q" { opt_query = Some(v); } - }; + } let query = match opt_query { Some(q) => q, None => return self.handle_bad_request("Missing search query."), @@ -456,7 +462,8 @@ impl MetaServer { &artists[..n_artists], &albums[..n_albums], &tracks[..n_tracks], - ).unwrap(); + ) + .unwrap(); Response::from_data(w.into_inner()) .with_status_code(200) @@ -500,6 +507,7 @@ impl MetaServer { } /// Router function for all /api/«endpoint» calls. + #[rustfmt::skip] fn handle_api_request( &self, db: &mut Connection, @@ -578,6 +586,7 @@ impl MetaServer { let query = url_iter.next().unwrap_or(""); // A very basic router. See also docs/api.md for an overview. + #[rustfmt::skip] let response = match (request.method(), p0, p1) { // API endpoints go through the API router, to keep this match arm // a bit more concise. @@ -600,7 +609,7 @@ impl MetaServer { }; match request.respond(response) { - Ok(()) => {}, + Ok(()) => {} Err(err) => println!("Error while responding to request: {:?}", err), } } @@ -628,21 +637,23 @@ pub fn serve(bind: &str, service: Arc) -> ! { let service_i = service.clone(); let name = format!("http_server_{}", i); let builder = thread::Builder::new().name(name); - let join_handle = builder.spawn(move || { - let connection = database_utils::connect_readonly(&service_i.config.db_path) - .expect("Failed to connect to database."); - let mut db = Connection::new(&connection); - loop { - let request = match server_i.recv() { - Ok(rq) => rq, - Err(e) => { - println!("Error: {:?}", e); - break; - } - }; - service_i.handle_request(&mut db, request); - } - }).unwrap(); + let join_handle = builder + .spawn(move || { + let connection = database_utils::connect_readonly(&service_i.config.db_path) + .expect("Failed to connect to database."); + let mut db = Connection::new(&connection); + loop { + let request = match server_i.recv() { + Ok(rq) => rq, + Err(e) => { + println!("Error: {:?}", e); + break; + } + }; + service_i.handle_request(&mut db, request); + } + }) + .unwrap(); threads.push(join_handle); } diff --git a/src/user_data.rs b/src/user_data.rs index 861437f..e342dfd 100644 --- a/src/user_data.rs +++ b/src/user_data.rs @@ -25,11 +25,11 @@ use std::collections::HashMap; use std::convert::TryFrom; -use crate::MemoryMetaIndex; use crate::album_table::AlbumTable; -use crate::playcount::{PlayCounter, PlayCounts}; +use crate::database as db; +use crate::playcount::{PlayCounter, PlayCounts, TimeVector}; use crate::prim::{AlbumId, ArtistId, TrackId}; -use crate::{database as db}; +use crate::MemoryMetaIndex; /// Track rating. /// @@ -79,10 +79,58 @@ pub struct AlbumState { /// /// The discovery sorting methods identifies albums that were popular in the /// past, but not recently. See the [`playcount`] module for more details. - pub discover_score: f32, + pub score_discover: f32, // Playcount on the shortest timescale. - pub trending_score: f32, + pub score_trending: f32, + + // Log playcount on the longer timescales. + // + // Could be used directly to sort by top albums, but in the UI this is not + // _that_ useful. Instead, we can mix it with the time embedding to provide + // a list of "for now" albums for this time of the day, where we don't + // suggest albums with a low playcount just because the one time we played + // them was at this time of the day. + pub score_longterm: f32, + + // Vector embedding of the play times. + // + // Used to weigh the discover score, and compute the "for now" score. + pub time_embedding: TimeVector, +} + +/// Scores (for ranking) evaluated at a given point in time. +#[derive(Copy, Clone, Default)] +pub struct ScoreSnapshot { + /// Trending score, see [`AlbumState::score_trending`]. + pub trending: f32, + + /// Discovery score, adjusted for the current moment. + pub discover: f32, + + /// "For now" score, based on the time of day, week, and year. + pub for_now: f32, +} + +impl AlbumState { + /// Evaluate scores for the current moment. + /// + /// The `at` time vector should be the embedding of the desired time to + /// evaluate at, and then normalized. + pub fn score(&self, at: &TimeVector) -> ScoreSnapshot { + // The cosine distance between our time vector and the query time vector. + // We put it in the range [0, 1] so that when we multiply with a negative + // discover score, it doesn't flip the sign. + debug_assert!(self.time_embedding.norm().is_finite()); + let time_cos = self.time_embedding.dot(at) / self.time_embedding.norm(); + let time_weight = time_cos.mul_add(0.5, 0.5); + + ScoreSnapshot { + trending: self.score_trending, + discover: self.score_discover * time_weight, + for_now: self.score_longterm * time_weight * time_weight, + } + } } #[derive(Default)] @@ -108,7 +156,6 @@ impl Default for UserData { artists: HashMap::with_hasher(s), } } - } impl UserData { @@ -126,7 +173,8 @@ impl UserData { for opt_rating in db::iter_ratings(tx)? { let rating = opt_rating?; let tid = TrackId(rating.track_id as u64); - let rating = Rating::try_from(rating.rating).expect("Invalid rating value in the database."); + let rating = + Rating::try_from(rating.rating).expect("Invalid rating value in the database."); stats.set_track_rating(tid, rating); } @@ -143,13 +191,22 @@ impl UserData { } pub fn get_track_rating(&self, track_id: TrackId) -> Rating { - self.tracks.get(&track_id).map(|t| t.rating).unwrap_or_default() + self.tracks + .get(&track_id) + .map(|t| t.rating) + .unwrap_or_default() } - pub fn get_album_scores(&self, album_id: AlbumId) -> AlbumState { + /// Take a snapshot of the scores for the given album, evaluated at the given query time. + /// + /// See also [`AlbumState::score`]. + pub fn get_album_scores(&self, album_id: AlbumId, at: &TimeVector) -> ScoreSnapshot { // If an album is not present, we don't have playcounts, so it is // ranked as low as possible for all scores. - self.albums.get(album_id).unwrap_or_default() + self.albums + .get(album_id) + .map(|state| state.score(at)) + .unwrap_or_default() } /// Replace the album scores with new scores.