diff --git a/.gitignore b/.gitignore index bcd6aef..bb43466 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ __pycache__ .DS_Store .venv *.json +*.bak raw_list *.egg-info/ deps/ diff --git a/qubed/Cargo.toml b/qubed/Cargo.toml index 5186e2a..41f2d80 100644 --- a/qubed/Cargo.toml +++ b/qubed/Cargo.toml @@ -13,6 +13,7 @@ slotmap = "1.0.7" smallbitvec = "2.6.0" tiny-str = "0.10.0" tiny-vec = "0.10.0" +chrono = "0.4" rayon = "1.7" [lib] diff --git a/qubed/src/coordinates/datetime.rs b/qubed/src/coordinates/datetime.rs new file mode 100644 index 0000000..7ed02a7 --- /dev/null +++ b/qubed/src/coordinates/datetime.rs @@ -0,0 +1,282 @@ +use std::hash::Hash; + +use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; +use tiny_vec::TinyVec; + +use crate::coordinates::{Coordinates, IntersectionResult}; + +#[derive(Debug, Clone, PartialEq)] +pub enum DateTimeCoordinates { + List(TinyVec), +} + +impl DateTimeCoordinates { + pub(crate) fn extend(&mut self, new_coords: &DateTimeCoordinates) { + match (self, new_coords) { + (DateTimeCoordinates::List(list), DateTimeCoordinates::List(new_list)) => { + for v in new_list.iter() { + list.push(*v); + } + } + } + } + + pub(crate) fn append(&mut self, new_coord: NaiveDateTime) { + match self { + DateTimeCoordinates::List(list) => list.push(new_coord), + } + } + + pub(crate) fn len(&self) -> usize { + match self { + DateTimeCoordinates::List(list) => list.len(), + } + } + + pub(crate) fn contains(&self, value: NaiveDateTime) -> bool { + match self { + DateTimeCoordinates::List(list) => list.iter().any(|&v| v == value), + } + } + + pub(crate) fn to_string(&self) -> String { + match self { + DateTimeCoordinates::List(list) => list + .iter() + .map(|dt| dt.format("%Y-%m-%dT%H:%M:%S").to_string()) + .collect::>() + .join("/"), + } + } + + pub(crate) fn hash(&self, hasher: &mut std::collections::hash_map::DefaultHasher) { + "datetime".hash(hasher); + match self { + DateTimeCoordinates::List(list) => { + for dt in list.iter() { + // use seconds and nanoseconds for stable hashing + dt.and_utc().timestamp().hash(hasher); + dt.and_utc().timestamp_subsec_nanos().hash(hasher); + } + } + } + } + + pub(crate) fn intersect( + &self, + other: &DateTimeCoordinates, + ) -> IntersectionResult { + match (self, other) { + (DateTimeCoordinates::List(list_a), DateTimeCoordinates::List(list_b)) => { + use std::collections::HashSet; + + let mut set_b: HashSet = HashSet::new(); + for v in list_b.iter() { + set_b.insert(*v); + } + + let mut set_a: HashSet = HashSet::new(); + for v in list_a.iter() { + set_a.insert(*v); + } + + let mut intersection = TinyVec::new(); + let mut only_a = TinyVec::new(); + + let mut added: HashSet = HashSet::new(); + for v in list_a.iter() { + if set_b.contains(v) { + if !added.contains(v) { + intersection.push(*v); + added.insert(*v); + } + } else { + only_a.push(*v); + } + } + + let mut only_b = TinyVec::new(); + for v in list_b.iter() { + if !set_a.contains(v) { + only_b.push(*v); + } + } + + IntersectionResult { + intersection: DateTimeCoordinates::List(intersection), + only_a: DateTimeCoordinates::List(only_a), + only_b: DateTimeCoordinates::List(only_b), + } + } + } + } + + /// Try to parse a string into `NaiveDateTime` using common formats. + pub(crate) fn parse_from_str(s: &str) -> Option { + // Try RFC3339 / ISO 8601 + if let Ok(dt) = DateTime::parse_from_rfc3339(s) { + return Some(dt.with_timezone(&Utc).naive_utc()); + } + + // Try YYYY-MM-DD HH:MM:SS + if let Ok(ndt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") { + return Some(ndt); + } + + // Try YYYY-MM-DD + if let Ok(d) = NaiveDate::parse_from_str(s, "%Y-%m-%d") { + return Some(NaiveDateTime::new(d, NaiveTime::from_hms_opt(0, 0, 0).unwrap())); + } + + // Try YYYYMMDD + if s.len() == 8 { + if let Ok(d) = NaiveDate::parse_from_str(s, "%Y%m%d") { + return Some(NaiveDateTime::new(d, NaiveTime::from_hms_opt(0, 0, 0).unwrap())); + } + } + + // Try compact datetime YYYYMMDDTHHMM + if s.len() == 13 { + if let Ok(ndt) = NaiveDateTime::parse_from_str(s, "%Y%m%dT%H%M") { + return Some(ndt); + } + } + + None + } +} + +impl Default for DateTimeCoordinates { + fn default() -> Self { + DateTimeCoordinates::List(TinyVec::new()) + } +} + +impl From for Coordinates { + fn from(value: NaiveDateTime) -> Self { + let mut vec = TinyVec::new(); + vec.push(value); + Coordinates::DateTimes(DateTimeCoordinates::List(vec)) + } +} + +impl From for Coordinates { + fn from(value: DateTimeCoordinates) -> Self { + Coordinates::DateTimes(value) + } +} + +impl From<&str> for DateTimeCoordinates { + fn from(value: &str) -> Self { + if let Some(ndt) = DateTimeCoordinates::parse_from_str(value) { + let mut vec = TinyVec::new(); + vec.push(ndt); + DateTimeCoordinates::List(vec) + } else { + DateTimeCoordinates::default() + } + } +} + +impl From<&[NaiveDateTime]> for Coordinates { + fn from(value: &[NaiveDateTime]) -> Self { + let mut vec = TinyVec::new(); + for &v in value { + vec.push(v); + } + Coordinates::DateTimes(DateTimeCoordinates::List(vec)) + } +} + +impl From<&[NaiveDateTime; N]> for Coordinates { + fn from(value: &[NaiveDateTime; N]) -> Self { + let mut vec = TinyVec::new(); + for &v in value.iter() { + vec.push(v); + } + Coordinates::DateTimes(DateTimeCoordinates::List(vec)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_datetime_append_and_len() { + let mut coords = DateTimeCoordinates::default(); + let d1 = NaiveDate::from_ymd(2020, 1, 1).and_hms(0, 0, 0); + let d2 = NaiveDate::from_ymd(2020, 1, 2).and_hms(12, 30, 0); + coords.append(d1); + coords.append(d2); + + match coords { + DateTimeCoordinates::List(list) => { + assert_eq!(list.len(), 2); + assert_eq!(list[0], d1); + assert_eq!(list[1], d2); + } + } + } + + #[test] + fn test_datetime_extend() { + let mut a = DateTimeCoordinates::default(); + let d1 = NaiveDate::from_ymd(2020, 1, 1).and_hms(0, 0, 0); + let d2 = NaiveDate::from_ymd(2020, 1, 2).and_hms(0, 0, 0); + a.append(d1); + + let mut b = DateTimeCoordinates::default(); + b.append(d2); + + a.extend(&b); + + match a { + DateTimeCoordinates::List(list) => { + assert_eq!(list.len(), 2); + assert_eq!(list[0], d1); + assert_eq!(list[1], d2); + } + } + } + + #[test] + fn test_datetime_to_string_and_parse() { + let d = NaiveDate::from_ymd(2021, 5, 4).and_hms(6, 7, 8); + let mut c = DateTimeCoordinates::default(); + c.append(d); + let s = c.to_string(); + assert!(s.contains("2021-05-04T06:07:08")); + + // parse from iso string + let parsed = DateTimeCoordinates::parse_from_str("2021-05-04T06:07:08Z"); + assert!(parsed.is_some()); + assert_eq!(parsed.unwrap(), d); + } + + #[test] + fn test_datetime_intersect() { + let mut a = DateTimeCoordinates::default(); + let d1 = NaiveDate::from_ymd(2020, 1, 1).and_hms(0, 0, 0); + let d2 = NaiveDate::from_ymd(2020, 1, 2).and_hms(0, 0, 0); + let d3 = NaiveDate::from_ymd(2020, 1, 3).and_hms(0, 0, 0); + a.append(d1); + a.append(d2); + a.append(d3); + + let mut b = DateTimeCoordinates::default(); + b.append(d2); + b.append(d3); + b.append(NaiveDate::from_ymd(2020, 1, 4).and_hms(0, 0, 0)); + + let res = a.intersect(&b); + + match res.intersection { + DateTimeCoordinates::List(list) => { + assert_eq!(list.len(), 2); + assert_eq!(list[0], d2); + assert_eq!(list[1], d3); + } + } + } +} diff --git a/qubed/src/coordinates/floats.rs b/qubed/src/coordinates/floats.rs index 32785cf..d0ce070 100644 --- a/qubed/src/coordinates/floats.rs +++ b/qubed/src/coordinates/floats.rs @@ -1,6 +1,6 @@ use std::hash::Hash; -use crate::coordinates::Coordinates; +use crate::coordinates::{Coordinates, IntersectionResult}; use tiny_vec::TinyVec; #[derive(Debug, Clone, PartialEq)] @@ -9,11 +9,20 @@ pub enum FloatCoordinates { } impl FloatCoordinates { - pub(crate) fn extend(&mut self, _new_coords: &FloatCoordinates) { - todo!() + pub(crate) fn extend(&mut self, new_coords: &FloatCoordinates) { + match (self, new_coords) { + (FloatCoordinates::List(list), FloatCoordinates::List(new_list)) => { + for &v in new_list.iter() { + list.push(v); + } + } + } } - pub(crate) fn append(&mut self, _new_coord: f64) { - todo!() + + pub(crate) fn append(&mut self, new_coord: f64) { + match self { + FloatCoordinates::List(list) => list.push(new_coord), + } } pub(crate) fn len(&self) -> usize { @@ -22,6 +31,12 @@ impl FloatCoordinates { } } + pub(crate) fn contains(&self, value: f64) -> bool { + match self { + FloatCoordinates::List(list) => list.iter().any(|&v| v.to_bits() == value.to_bits()), + } + } + pub(crate) fn to_string(&self) -> String { match self { FloatCoordinates::List(list) => { @@ -40,6 +55,59 @@ impl FloatCoordinates { } } } + + pub(crate) fn intersect( + &self, + other: &FloatCoordinates, + ) -> IntersectionResult { + match (self, other) { + (FloatCoordinates::List(list_a), FloatCoordinates::List(list_b)) => { + use std::collections::HashSet; + + let mut set_a: HashSet = HashSet::new(); + for v in list_a.iter() { + set_a.insert(v.to_bits()); + } + + let mut set_b: HashSet = HashSet::new(); + for v in list_b.iter() { + set_b.insert(v.to_bits()); + } + + let mut intersection = TinyVec::new(); + let mut only_a = TinyVec::new(); + let mut only_b = TinyVec::new(); + + // preserve order from list_a for intersection and only_a + let mut added: HashSet = HashSet::new(); + for v in list_a.iter() { + let bits = v.to_bits(); + if set_b.contains(&bits) { + if !added.contains(&bits) { + intersection.push(*v); + added.insert(bits); + } + } else { + only_a.push(*v); + } + } + + // for only_b, preserve order from list_b skipping those present in set_a + for v in list_b.iter() { + let bits = v.to_bits(); + if !set_a.contains(&bits) { + only_b.push(*v); + } + } + + IntersectionResult { + intersection: FloatCoordinates::List(intersection), + only_a: FloatCoordinates::List(only_a), + only_b: FloatCoordinates::List(only_b), + } + } + } + } } impl Default for FloatCoordinates { @@ -55,3 +123,202 @@ impl From for Coordinates { Coordinates::Floats(FloatCoordinates::List(vec)) } } + +impl From for Coordinates { + fn from(value: FloatCoordinates) -> Self { + Coordinates::Floats(value) + } +} + +impl From<&[f64]> for Coordinates { + fn from(value: &[f64]) -> Self { + let mut vec = TinyVec::new(); + for &v in value { + vec.push(v); + } + Coordinates::Floats(FloatCoordinates::List(vec)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tiny_vec::TinyVec; + + #[test] + fn test_float_coordinates_append_and_len() { + let mut coords = FloatCoordinates::default(); + coords.append(1.0); + coords.append(2.5); + + match coords { + FloatCoordinates::List(list) => { + assert_eq!(list.len(), 2); + assert_eq!(list[0], 1.0); + assert_eq!(list[1], 2.5); + } + } + } + + #[test] + fn test_float_coordinates_extend() { + let mut a = FloatCoordinates::default(); + a.append(1.0); + a.append(2.0); + + let mut b = FloatCoordinates::default(); + b.append(3.0); + b.append(4.0); + + a.extend(&b); + + match a { + FloatCoordinates::List(list) => { + assert_eq!(list.len(), 4); + assert_eq!(list[0], 1.0); + assert_eq!(list[1], 2.0); + assert_eq!(list[2], 3.0); + assert_eq!(list[3], 4.0); + } + } + } + + #[test] + fn test_float_coordinates_to_string() { + let mut c = FloatCoordinates::default(); + c.append(1.25); + c.append(2.5); + let s = c.to_string(); + assert!(s.contains("1.25")); + assert!(s.contains("2.5")); + } + + #[test] + fn test_float_coordinates_intersect() { + let mut a = FloatCoordinates::default(); + a.append(1.0); + a.append(2.0); + a.append(3.0); + + let mut b = FloatCoordinates::default(); + b.append(2.0); + b.append(3.0); + b.append(4.0); + + let result = a.intersect(&b); + + match result.intersection { + FloatCoordinates::List(list) => { + assert_eq!(list.len(), 2); + assert_eq!(list[0], 2.0); + assert_eq!(list[1], 3.0); + } + } + + match result.only_a { + FloatCoordinates::List(list) => { + assert_eq!(list.len(), 1); + assert_eq!(list[0], 1.0); + } + } + + match result.only_b { + FloatCoordinates::List(list) => { + assert_eq!(list.len(), 1); + assert_eq!(list[0], 4.0); + } + } + } + + #[test] + fn test_from_conversions() { + // From + let c = Coordinates::from(3.14f64); + match c { + Coordinates::Floats(fc) => match fc { + FloatCoordinates::List(list) => { + assert_eq!(list.len(), 1); + assert!((list[0] - 3.14).abs() < 1e-12); + } + }, + _ => panic!("Expected Coordinates::Floats variant"), + } + + // From<&[f64]> + let slice = &[1.0f64, 2.0f64, 3.0f64][..]; + let c2 = Coordinates::from(slice); + match c2 { + Coordinates::Floats(fc) => match fc { + FloatCoordinates::List(list) => { + assert_eq!(list.len(), 3); + assert_eq!(list[0], 1.0); + assert_eq!(list[2], 3.0); + } + }, + _ => panic!("Expected Coordinates::Floats variant"), + } + + // From<&[f64; N]> + let arr: [f64; 2] = [9.0, 10.0]; + let c3 = Coordinates::from(&arr); + match c3 { + Coordinates::Floats(fc) => match fc { + FloatCoordinates::List(list) => { + assert_eq!(list.len(), 2); + assert_eq!(list[0], 9.0); + assert_eq!(list[1], 10.0); + } + }, + _ => panic!("Expected Coordinates::Floats variant"), + } + + // From + let mut fc = FloatCoordinates::default(); + fc.append(7.5); + let c4 = Coordinates::from(fc.clone()); + match c4 { + Coordinates::Floats(inner) => { + assert_eq!(inner, fc); + } + _ => panic!("Expected Coordinates::Floats variant"), + } + } +} + +impl From<&[f64; N]> for Coordinates { + fn from(value: &[f64; N]) -> Self { + let mut vec = TinyVec::new(); + for &v in value.iter() { + vec.push(v); + } + Coordinates::Floats(FloatCoordinates::List(vec)) + } +} + +impl From for Coordinates { + fn from(value: f32) -> Self { + let mut vec = TinyVec::new(); + vec.push(value as f64); + Coordinates::Floats(FloatCoordinates::List(vec)) + } +} + +impl From<&[f32]> for Coordinates { + fn from(value: &[f32]) -> Self { + let mut vec = TinyVec::new(); + for &v in value { + vec.push(v as f64); + } + Coordinates::Floats(FloatCoordinates::List(vec)) + } +} + +impl From<&[f32; N]> for Coordinates { + fn from(value: &[f32; N]) -> Self { + let mut vec = TinyVec::new(); + for &v in value.iter() { + vec.push(v as f64); + } + Coordinates::Floats(FloatCoordinates::List(vec)) + } +} diff --git a/qubed/src/coordinates/mod.rs b/qubed/src/coordinates/mod.rs index 6c9bfbe..a7f7d8b 100644 --- a/qubed/src/coordinates/mod.rs +++ b/qubed/src/coordinates/mod.rs @@ -1,9 +1,12 @@ +pub mod datetime; pub mod floats; pub mod integers; pub mod ops; pub mod strings; use std::hash::Hash; +use chrono::NaiveDateTime; +use datetime::DateTimeCoordinates; use floats::FloatCoordinates; use integers::IntegerCoordinates; use strings::StringCoordinates; @@ -20,6 +23,7 @@ pub enum Coordinates { Integers(IntegerCoordinates), Floats(FloatCoordinates), Strings(StringCoordinates), + DateTimes(DateTimeCoordinates), Mixed(Box), } @@ -27,6 +31,7 @@ pub enum CoordinateTypes { Integer(i32), Float(f64), String(String), + DateTime(NaiveDateTime), } #[derive(Debug, Clone, PartialEq, Default)] @@ -34,6 +39,7 @@ pub struct MixedCoordinates { integers: integers::IntegerCoordinates, floats: FloatCoordinates, strings: StringCoordinates, + datetimes: DateTimeCoordinates, } impl Coordinates { @@ -73,6 +79,7 @@ impl Coordinates { Coordinates::Empty => "".to_string(), Coordinates::Integers(ints) => ints.to_string(), Coordinates::Floats(floats) => floats.to_string(), + Coordinates::DateTimes(datetimes) => datetimes.to_string(), Coordinates::Strings(strings) => strings.to_string(), Coordinates::Mixed(_) => { todo!() @@ -86,8 +93,12 @@ impl Coordinates { Coordinates::Integers(ints) => ints.len(), Coordinates::Floats(floats) => floats.len(), Coordinates::Strings(strings) => strings.len(), + Coordinates::DateTimes(datetimes) => datetimes.len(), Coordinates::Mixed(mixed) => { - mixed.integers.len() + mixed.floats.len() + mixed.strings.len() + mixed.integers.len() + + mixed.floats.len() + + mixed.strings.len() + + mixed.datetimes.len() } } } @@ -104,10 +115,22 @@ impl Coordinates { match (self, coord_type) { (Coordinates::Empty, _) => false, (Coordinates::Integers(ints), CoordinateTypes::Integer(val)) => ints.contains(val), - (Coordinates::Floats(_), _) => unimplemented!(), - (Coordinates::Strings(_), _) => unimplemented!(), - (Coordinates::Mixed(_), _) => unimplemented!(), - _ => unimplemented!(), + (Coordinates::DateTimes(datetimes), CoordinateTypes::DateTime(val)) => { + datetimes.contains(val) + } + (Coordinates::Floats(floats), CoordinateTypes::Float(val)) => floats.contains(val), + (Coordinates::Strings(strings), CoordinateTypes::String(val)) => strings.contains(val), + (Coordinates::Mixed(mixed), CoordinateTypes::Integer(val)) => { + mixed.integers.contains(val) + } + (Coordinates::Mixed(mixed), CoordinateTypes::Float(val)) => mixed.floats.contains(val), + (Coordinates::Mixed(mixed), CoordinateTypes::DateTime(val)) => { + mixed.datetimes.contains(val) + } + (Coordinates::Mixed(mixed), CoordinateTypes::String(val)) => { + mixed.strings.contains(val) + } + _ => false, } } @@ -122,6 +145,9 @@ impl Coordinates { Coordinates::Strings(strings) => { Box::new(MixedCoordinates { strings: strings.to_owned(), ..Default::default() }) } + Coordinates::DateTimes(datetimes) => { + Box::new(MixedCoordinates { datetimes: datetimes.to_owned(), ..Default::default() }) + } Coordinates::Empty => Box::new(MixedCoordinates::default()), Coordinates::Mixed(_) => { return self; @@ -175,6 +201,10 @@ impl Coordinates { mixed.integers.hash(hasher); mixed.floats.hash(hasher); mixed.strings.hash(hasher); + mixed.datetimes.hash(hasher); + } + Coordinates::DateTimes(datetimes) => { + datetimes.hash(hasher); } } } @@ -331,8 +361,34 @@ impl Coordinates { } } + match &boxed.datetimes { + datetime::DateTimeCoordinates::List(list) => { + if list.len() > 0 { + let vals: Vec = list + .iter() + .map(|dt: &NaiveDateTime| { + // Serialize NaiveDateTime as an ISO-like string without timezone. + Value::String(dt.format("%Y%m%dT%H%M").to_string()) + }) + .collect(); + map.insert("datetimes".to_string(), Value::Array(vals)); + } + } + } + Value::Object(map) } + Coordinates::DateTimes(coords) => match coords { + datetime::DateTimeCoordinates::List(list) => { + let vals: Vec = list + .iter() + .map(|dt: &NaiveDateTime| { + Value::String(dt.format("%Y%m%dT%H%M").to_string()) + }) + .collect(); + Value::Array(vals) + } + }, } } @@ -395,6 +451,23 @@ impl Coordinates { } if all_string { + // Try to parse all strings as datetimes first (round-trip support for DateTimes). + let mut dt_coords = datetime::DateTimeCoordinates::default(); + let mut all_datetime = true; + for v in arr.iter() { + if let Value::String(s) = v { + if let Some(ndt) = datetime::DateTimeCoordinates::parse_from_str(s) { + dt_coords.append(ndt); + } else { + all_datetime = false; + break; + } + } + } + if all_datetime { + return Ok(Coordinates::DateTimes(dt_coords)); + } + let mut sc = strings::StringCoordinates::default(); for v in arr.iter() { if let Value::String(s) = v { @@ -445,6 +518,19 @@ impl Coordinates { } } + if let Some(v) = map.get("datetimes") { + if let Value::Array(arr) = v { + for val in arr.iter() { + if let Value::String(s) = val { + if let Some(ndt) = datetime::DateTimeCoordinates::parse_from_str(s) + { + mixed.datetimes.append(ndt); + } + } + } + } + } + Ok(Coordinates::Mixed(Box::new(mixed))) } Value::Null => Ok(Coordinates::Empty), diff --git a/qubed/src/coordinates/ops.rs b/qubed/src/coordinates/ops.rs index a426e53..bab19c3 100644 --- a/qubed/src/coordinates/ops.rs +++ b/qubed/src/coordinates/ops.rs @@ -1,5 +1,22 @@ use crate::Coordinates; use crate::coordinates::CoordinateTypes; +use chrono::NaiveDateTime; + +impl From for CoordinateTypes { + fn from(value: NaiveDateTime) -> Self { + CoordinateTypes::DateTime(value) + } +} + +impl FromIterator for Coordinates { + fn from_iter>(iter: T) -> Self { + let mut coords = Coordinates::Empty; + for dt in iter { + coords.append_datetime(dt); + } + coords + } +} impl Coordinates { pub fn extend(&mut self, new_coords: &Coordinates) { @@ -47,6 +64,20 @@ impl Coordinates { } }, Coordinates::Empty => {} + Coordinates::DateTimes(new_datetimes) => match self { + Coordinates::DateTimes(datetimes) => { + datetimes.extend(new_datetimes); + } + Coordinates::Mixed(mixed) => { + mixed.datetimes.extend(new_datetimes); + } + Coordinates::Empty => { + let _ = std::mem::replace(self, new_coords.clone()); + } + _ => { + self.convert_to_mixed().extend(new_coords); + } + }, Coordinates::Mixed(mixed) => match self { Coordinates::Mixed(self_mixed) => { self_mixed.integers.extend(&mixed.integers); @@ -84,6 +115,9 @@ impl Coordinates { CoordinateTypes::String(val) => { self.append_string(val); } + CoordinateTypes::DateTime(val) => { + self.append_datetime(val); + } } } @@ -140,6 +174,24 @@ impl Coordinates { } } } + + fn append_datetime(&mut self, value: NaiveDateTime) { + match self { + Coordinates::DateTimes(datetimes) => { + datetimes.append(value); + } + Coordinates::Mixed(mixed) => { + mixed.datetimes.append(value); + } + Coordinates::Empty => { + *self = Coordinates::from(value); + } + _ => { + self.convert_to_mixed(); + self.append_datetime(value); + } + } + } } impl FromIterator for Coordinates { diff --git a/qubed/src/coordinates/strings.rs b/qubed/src/coordinates/strings.rs index 4b50959..887dbac 100644 --- a/qubed/src/coordinates/strings.rs +++ b/qubed/src/coordinates/strings.rs @@ -56,6 +56,15 @@ impl StringCoordinates { } } } + pub(crate) fn contains(&self, value: impl AsRef) -> bool { + match self { + StringCoordinates::Set(set) => { + let tiny_value = TinyString::from(value.as_ref()); + set.contains(&tiny_value) + } + } + } + pub(crate) fn hash(&self, hasher: &mut impl std::hash::Hasher) { "strings".hash(hasher); match self { diff --git a/qubed/src/serde/json.rs b/qubed/src/serde/json.rs index d0210fe..3aaf787 100644 --- a/qubed/src/serde/json.rs +++ b/qubed/src/serde/json.rs @@ -78,8 +78,73 @@ impl Qube { for id in order.iter() { let nref = self.node(*id).expect("valid node"); let dim = nref.dimension().unwrap_or("root").to_string(); - // Preserve native types for coordinates using Coordinates -> JSON helpers - let coords_value = nref.coordinates().to_json_value(); + // Build coords object with explicit type tags so consumers know the + // coordinate type without guessing. Examples: + // { "ints": [1,2,3] }, { "strings": ["od"] }, { "floats": [...] }, or mixed object. + let coords_value = { + use serde_json::{Map, Value}; + let mut map = Map::new(); + + // Use the public Coordinates -> JSON helper which returns a + // native serde_json::Value (array/string/object/null). + let native = nref.coordinates().to_json_value(); + + match nref.coordinates() { + // Represent empty coordinates as JSON null so they round-trip as `Empty`, + // not as `Mixed(empty)` (which is how an empty object `{}` would be read). + crate::Coordinates::Empty => Value::Null, + crate::Coordinates::Integers(_) => match native { + Value::Array(arr) => { + map.insert("ints".to_string(), Value::Array(arr)); + Value::Object(map) + } + Value::String(s) => { + // RangeSet or other textual form – preserve as string under "ints_text" + map.insert("ints_text".to_string(), Value::String(s)); + Value::Object(map) + } + other => { + map.insert("ints".to_string(), other); + Value::Object(map) + } + }, + crate::Coordinates::Floats(_) => match native { + Value::Array(arr) => { + map.insert("floats".to_string(), Value::Array(arr)); + Value::Object(map) + } + other => { + map.insert("floats".to_string(), other); + Value::Object(map) + } + }, + crate::Coordinates::Strings(_) => match native { + Value::Array(arr) => { + map.insert("strings".to_string(), Value::Array(arr)); + Value::Object(map) + } + other => { + map.insert("strings".to_string(), other); + Value::Object(map) + } + }, + crate::Coordinates::DateTimes(_) => { + // Fallback to whatever the generic serializer produces (not implemented elsewhere yet) + let v = nref.coordinates().to_json_value(); + match v { + Value::Array(arr) => { + map.insert("datetimes".to_string(), Value::Array(arr)); + Value::Object(map) + } + other => Value::Object(map), + } + } + crate::Coordinates::Mixed(_) => { + // Mixed already produces an object with keys like ints/floats/strings + nref.coordinates().to_json_value() + } + } + }; let parent_idx = nref.parent().map(|p| idx_map.get(&p).copied().unwrap()); @@ -103,16 +168,41 @@ impl Qube { nodes_json.push(Value::Object(map)); } - Value::Array(nodes_json) + // Wrap the arena array with a versioned envelope so format changes + // can be detected by consumers. + let mut root_map = Map::new(); + root_map.insert("version".to_string(), Value::String("1".to_string())); + root_map.insert("qube".to_string(), Value::Array(nodes_json)); + Value::Object(root_map) } /// Reconstruct a Qube from an arena JSON layout created by `to_arena_json`. pub fn from_arena_json(value: Value) -> Result { use std::collections::HashMap; + // Expect a versioned envelope with structure { "version": "1", "qube": [ ... ] } let arr = match value { - Value::Array(a) => a, - _ => return Err("Expected JSON array for arena layout".to_string()), + Value::Object(map) => { + // check version + let version_val = map + .get("version") + .ok_or_else(|| "Arena JSON missing 'version' field".to_string())?; + let ok = match version_val { + Value::String(s) => s == "1", + Value::Number(n) => n.as_u64().map(|v| v == 1).unwrap_or(false), + _ => false, + }; + if !ok { + return Err(format!("Unsupported arena JSON version: {:?}", version_val)); + } + + // extract qube array + match map.get("qube") { + Some(Value::Array(a)) => a.clone(), + _ => return Err("Arena JSON missing 'qube' array".to_string()), + } + } + _ => return Err("Expected JSON object envelope for arena layout".to_string()), }; // We will create nodes in the same order. Start with a fresh Qube which @@ -144,9 +234,56 @@ impl Qube { }; // create child under parent - // Parse coords using Coordinates::from_json_value so native JSON types - // (numbers/strings/mixed) are preserved. - let coords_parsed = Coordinates::from_json_value(coords_value)?; + // Interpret typed coords object if present so we deserialize into + // the most specific `Coordinates` variant (Integers, Strings, + // Floats) rather than always producing a Mixed variant. If the + // coords object contains a single typed key (e.g. `ints`, + // `strings`, `floats`) we'll pass the underlying array/string to + // `from_json_value`. If it contains multiple keys we pass the + // whole object to obtain a `Mixed` coordinates value. + let coords_parsed = { + use serde_json::Value; + + // Build a Value suitable for Coordinates::from_json_value + let coords_for_parse: Value = match coords_value { + Value::Object(map) => { + // Detect typed keys + let has_ints = map.get("ints").is_some(); + let has_ints_text = map.get("ints_text").is_some(); + let has_strings = map.get("strings").is_some(); + let has_floats = map.get("floats").is_some(); + let has_datetimes = map.get("datetimes").is_some(); + + let typed_key_count = + [has_ints, has_ints_text, has_strings, has_floats, has_datetimes] + .iter() + .filter(|&&b| b) + .count(); + + if has_ints_text && typed_key_count == 1 { + // textual integer representation -> parse as string + map.get("ints_text").cloned().unwrap_or(Value::Null) + } else if has_ints && typed_key_count == 1 { + // ints as native array -> pass array so `from_json_value` + // returns `Coordinates::Integers` where possible + map.get("ints").cloned().unwrap_or(Value::Null) + } else if has_strings && typed_key_count == 1 { + map.get("strings").cloned().unwrap_or(Value::Null) + } else if has_floats && typed_key_count == 1 { + map.get("floats").cloned().unwrap_or(Value::Null) + } else if has_datetimes && typed_key_count == 1 { + map.get("datetimes").cloned().unwrap_or(Value::Null) + } else { + // Mixed or unknown: pass the whole object so + // `from_json_value` can create a MixedCoordinates + Value::Object(map.clone()) + } + } + other => other.clone(), + }; + + Coordinates::from_json_value(&coords_for_parse)? + }; let created = if i == 0 { // first entry corresponds to root; update root coords if provided // skip creating a new node; optionally set coords on root diff --git a/qubed_meteo/Cargo.toml b/qubed_meteo/Cargo.toml index f328ffc..38a8544 100644 --- a/qubed_meteo/Cargo.toml +++ b/qubed_meteo/Cargo.toml @@ -7,7 +7,7 @@ edition = "2024" qubed = { path = "../qubed" } serde = "1.0.228" serde_json = "1.0.145" -rsfdb = { path = "../../rsfdb" } +rsfdb = { git = "https://github.com/ecmwf/rsfdb" } [lib] path = "src/lib.rs" diff --git a/qubed_meteo/bin/fdb_db_reader.rs b/qubed_meteo/bin/fdb_db_reader.rs new file mode 100644 index 0000000..014c676 --- /dev/null +++ b/qubed_meteo/bin/fdb_db_reader.rs @@ -0,0 +1,50 @@ +use qubed::Qube; +use qubed_meteo::adapters::fdb::FromFDBList; +use rsfdb::{FDB, request::Request}; +use serde_json::json; +use std::env; +use std::time::Instant; +use std::fs::File; + +fn main() -> Result<(), Box> { + // Ensure FDB config is set so the internal listing can open the DB + use std::path::PathBuf; + + let config_path = PathBuf::from("xxx"); // Adjust this path to point to your local FDB config.yaml + unsafe { + std::env::set_var("FDB5_CONFIG_FILE", config_path.to_str().expect("Invalid config path")); + } + + let lib_path = PathBuf::from("xxx"); // Adjust this path to point to the directory containing FDB shared libraries + + unsafe { + std::env::set_var("DYLD_LIBRARY_PATH", lib_path.to_str().expect("Invalid path to shared libraries")); + } + + let request_map = json!({ + "class" : "d1", + "dataset": "extremes-dt", + "expver" : "0001", + "stream" : "oper", + "date": "20260303", + "time" : "0000", + "domain" : "g", + "levtype" : "sfc", + }); + let start_time = Instant::now(); + + // Build the Qube directly from the request; the adapter will open FDB and list. + let qube = Qube::from_fdb_list(&request_map).expect("Failed to build Qube from FDB list"); + + // Stop the timer + let duration = start_time.elapsed(); + + // Print the time taken + println!("Time taken to construct Qube: {:?}", duration); + + let file = File::create("extremes_eg.json")?; + serde_json::to_writer(file, &qube.to_arena_json())?; + + Ok(()) + +} \ No newline at end of file diff --git a/qubed_meteo/examples/read_from_fdb_list.rs b/qubed_meteo/examples/read_from_fdb_list.rs index c780cce..a370516 100644 --- a/qubed_meteo/examples/read_from_fdb_list.rs +++ b/qubed_meteo/examples/read_from_fdb_list.rs @@ -26,7 +26,9 @@ fn main() { // Build the Qube directly from the request; the adapter will open FDB and list. let qube = Qube::from_fdb_list(&request_map).expect("Failed to build Qube from FDB list"); - println!("Qube structure:\n{}", qube.to_ascii()); + // println!("Qube structure:\n{}", qube.to_ascii()); + + println!("Qube in arena json format:\n{}", qube.to_arena_json()); // Stop the timer let duration = start_time.elapsed(); diff --git a/qubed_meteo/qube_examples/large_climate_eg.json b/qubed_meteo/qube_examples/large_climate_eg.json new file mode 100644 index 0000000..f7b62c2 --- /dev/null +++ b/qubed_meteo/qube_examples/large_climate_eg.json @@ -0,0 +1,4 @@ +{ + "version": "1", + "qube": [] +} diff --git a/qubed_meteo/qube_examples/large_extremes_eg.json b/qubed_meteo/qube_examples/large_extremes_eg.json new file mode 100644 index 0000000..60ca535 --- /dev/null +++ b/qubed_meteo/qube_examples/large_extremes_eg.json @@ -0,0 +1,4 @@ +{ + "version": "1", + "qube": [{"children":[1],"coords":{},"dim":"root","parent":null},{"children":[2],"coords":{"strings":["d1"]},"dim":"class","parent":0},{"children":[3],"coords":{"strings":["extremes-dt"]},"dim":"dataset","parent":1},{"children":[4],"coords":{"ints":[20260303]},"dim":"date","parent":2},{"children":[5],"coords":{"strings":["0001"]},"dim":"expver","parent":3},{"children":[6],"coords":{"strings":["oper"]},"dim":"stream","parent":4},{"children":[7],"coords":{"strings":["0000"]},"dim":"time","parent":5},{"children":[8],"coords":{"strings":["sfc"]},"dim":"levtype","parent":6},{"children":[9,10,11],"coords":{"strings":["fc"]},"dim":"type","parent":7},{"children":[12],"coords":{"ints":[142,144,169,175,176,177,178,179,180,181,205,228,228216]},"dim":"param","parent":8},{"children":[13],"coords":{"ints":[228058]},"dim":"param","parent":8},{"children":[14],"coords":{"ints":[31,34,78,134,136,137,151,165,166,167,168,235,3020,228029,228050,228218,228219,228221,228235,260015]},"dim":"param","parent":8},{"children":[],"coords":{"strings":["0-1","1-2","10-11","11-12","12-13","13-14","14-15","15-16","16-17","17-18","18-19","19-20","2-3","20-21","21-22","22-23","23-24","24-25","25-26","26-27","27-28","28-29","29-30","3-4","30-31","31-32","32-33","33-34","34-35","35-36","36-37","37-38","38-39","39-40","4-5","40-41","41-42","42-43","43-44","44-45","45-46","46-47","47-48","48-49","49-50","5-6","50-51","51-52","52-53","53-54","54-55","55-56","56-57","57-58","58-59","59-60","6-7","60-61","61-62","62-63","63-64","64-65","65-66","66-67","67-68","68-69","69-70","7-8","70-71","71-72","72-73","73-74","74-75","75-76","76-77","77-78","78-79","79-80","8-9","80-81","81-82","82-83","83-84","84-85","85-86","86-87","87-88","88-89","89-90","9-10","90-91","91-92","92-93","93-94","94-95","95-96"]},"dim":"step","parent":9},{"children":[],"coords":{"strings":["0-6","12-18","18-24","24-30","30-36","36-42","42-48","48-54","54-60","6-12","60-66","66-72","72-78","78-84","84-90","90-96"]},"dim":"step","parent":10},{"children":[],"coords":{"ints":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96]},"dim":"step","parent":11}] +} diff --git a/qubed_meteo/qube_examples/medium_climate_eg.json b/qubed_meteo/qube_examples/medium_climate_eg.json new file mode 100644 index 0000000..f7b62c2 --- /dev/null +++ b/qubed_meteo/qube_examples/medium_climate_eg.json @@ -0,0 +1,4 @@ +{ + "version": "1", + "qube": [] +} diff --git a/qubed_meteo/qube_examples/medium_extremes_eg.json b/qubed_meteo/qube_examples/medium_extremes_eg.json new file mode 100644 index 0000000..4bb8657 --- /dev/null +++ b/qubed_meteo/qube_examples/medium_extremes_eg.json @@ -0,0 +1,4 @@ +{ + "version": "1", + "qube": [{"children":[1],"coords":{},"dim":"root","parent":null},{"children":[2],"coords":{"strings":["d1"]},"dim":"class","parent":0},{"children":[3],"coords":{"strings":["extremes-dt"]},"dim":"dataset","parent":1},{"children":[4],"coords":{"ints":[20260303]},"dim":"date","parent":2},{"children":[5],"coords":{"strings":["0001"]},"dim":"expver","parent":3},{"children":[6],"coords":{"strings":["oper"]},"dim":"stream","parent":4},{"children":[7],"coords":{"strings":["0000"]},"dim":"time","parent":5},{"children":[8],"coords":{"strings":["sfc"]},"dim":"levtype","parent":6},{"children":[9],"coords":{"strings":["fc"]},"dim":"type","parent":7},{"children":[10],"coords":{"ints":[31,34,78,134,136,137,151,165,166,167,168,235,3020,228029,228050,228218,228219,228221,228235,260015]},"dim":"param","parent":8},{"children":[],"coords":{"ints":[0]},"dim":"step","parent":9}] +} diff --git a/qubed_meteo/qube_examples/oper_fdb.json b/qubed_meteo/qube_examples/oper_fdb.json new file mode 100644 index 0000000..807a6fd --- /dev/null +++ b/qubed_meteo/qube_examples/oper_fdb.json @@ -0,0 +1,4 @@ +{ + "version": "1", + "qube": [{"children":[1],"coords":{},"dim":"root","parent":null},{"children":[2,3,4,5],"coords":{"strings":["od"]},"dim":"class","parent":0},{"children":[6],"coords":{"ints":[20231102]},"dim":"date","parent":1},{"children":[7],"coords":{"ints":[20240103]},"dim":"date","parent":1},{"children":[8],"coords":{"ints":[20240118]},"dim":"date","parent":1},{"children":[9],"coords":{"ints":[20240129]},"dim":"date","parent":1},{"children":[10],"coords":{"strings":["g"]},"dim":"domain","parent":2},{"children":[11],"coords":{"strings":["g"]},"dim":"domain","parent":3},{"children":[12],"coords":{"strings":["g"]},"dim":"domain","parent":4},{"children":[13],"coords":{"strings":["g"]},"dim":"domain","parent":5},{"children":[14],"coords":{"strings":["0001"]},"dim":"expver","parent":6},{"children":[15],"coords":{"strings":["0001"]},"dim":"expver","parent":7},{"children":[16],"coords":{"strings":["0001"]},"dim":"expver","parent":8},{"children":[17],"coords":{"strings":["0001"]},"dim":"expver","parent":9},{"children":[18],"coords":{"strings":["oper"]},"dim":"stream","parent":10},{"children":[19],"coords":{"strings":["oper"]},"dim":"stream","parent":11},{"children":[20],"coords":{"strings":["oper"]},"dim":"stream","parent":12},{"children":[21],"coords":{"strings":["oper"]},"dim":"stream","parent":13},{"children":[22],"coords":{"strings":["0000"]},"dim":"time","parent":14},{"children":[23],"coords":{"strings":["0000"]},"dim":"time","parent":15},{"children":[24],"coords":{"strings":["0000"]},"dim":"time","parent":16},{"children":[25],"coords":{"strings":["0000"]},"dim":"time","parent":17},{"children":[26],"coords":{"strings":["sfc"]},"dim":"levtype","parent":18},{"children":[27],"coords":{"strings":["sfc"]},"dim":"levtype","parent":19},{"children":[28],"coords":{"strings":["sfc"]},"dim":"levtype","parent":20},{"children":[29],"coords":{"strings":["sfc"]},"dim":"levtype","parent":21},{"children":[30],"coords":{"strings":["fc"]},"dim":"type","parent":22},{"children":[31],"coords":{"strings":["fc"]},"dim":"type","parent":23},{"children":[32],"coords":{"strings":["fc"]},"dim":"type","parent":24},{"children":[33],"coords":{"strings":["fc"]},"dim":"type","parent":25},{"children":[34],"coords":{"ints":[167]},"dim":"param","parent":26},{"children":[35],"coords":{"ints":[167]},"dim":"param","parent":27},{"children":[36],"coords":{"ints":[49,167]},"dim":"param","parent":28},{"children":[37],"coords":{"ints":[167]},"dim":"param","parent":29},{"children":[],"coords":{"ints":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,93,96,99]},"dim":"step","parent":30},{"children":[],"coords":{"ints":[0,1,2]},"dim":"step","parent":31},{"children":[],"coords":{"ints":[0]},"dim":"step","parent":32},{"children":[],"coords":{"ints":[0]},"dim":"step","parent":33}] +} diff --git a/qubed_meteo/qube_examples/small_climate_eg.json b/qubed_meteo/qube_examples/small_climate_eg.json new file mode 100644 index 0000000..1a17dd0 --- /dev/null +++ b/qubed_meteo/qube_examples/small_climate_eg.json @@ -0,0 +1,4 @@ +{ + "version": "1", + "qube": [{"children":[1],"coords":{},"dim":"root","parent":null},{"children":[2],"coords":{"strings":["highresmip"]},"dim":"activity","parent":0},{"children":[3],"coords":{"strings":["d1"]},"dim":"class","parent":1},{"children":[4],"coords":{"strings":["climate-dt"]},"dim":"dataset","parent":2},{"children":[5],"coords":{"strings":["cont"]},"dim":"experiment","parent":3},{"children":[6],"coords":{"strings":["0001"]},"dim":"expver","parent":4},{"children":[7],"coords":{"ints":[1]},"dim":"generation","parent":5},{"children":[8],"coords":{"strings":["ifs-nemo"]},"dim":"model","parent":6},{"children":[9],"coords":{"ints":[1]},"dim":"realization","parent":7},{"children":[10],"coords":{"strings":["clte"]},"dim":"stream","parent":8},{"children":[11],"coords":{"ints":[1990]},"dim":"year","parent":9},{"children":[12],"coords":{"strings":["sfc"]},"dim":"levtype","parent":10},{"children":[13],"coords":{"ints":[1]},"dim":"month","parent":11},{"children":[14],"coords":{"strings":["high"]},"dim":"resolution","parent":12},{"children":[15],"coords":{"strings":["fc"]},"dim":"type","parent":13},{"children":[16],"coords":{"ints":[19900101]},"dim":"date","parent":14},{"children":[17],"coords":{"ints":[167]},"dim":"param","parent":15},{"children":[],"coords":{"strings":["0000"]},"dim":"time","parent":16}] +} diff --git a/qubed_meteo/qube_examples/small_extremes_eg.json b/qubed_meteo/qube_examples/small_extremes_eg.json new file mode 100644 index 0000000..e54b1a5 --- /dev/null +++ b/qubed_meteo/qube_examples/small_extremes_eg.json @@ -0,0 +1,4 @@ +{ + "version": "1", + "qube": [{"children":[1],"coords":{},"dim":"root","parent":null},{"children":[2],"coords":{"strings":["d1"]},"dim":"class","parent":0},{"children":[3],"coords":{"strings":["extremes-dt"]},"dim":"dataset","parent":1},{"children":[4],"coords":{"ints":[20260303]},"dim":"date","parent":2},{"children":[5],"coords":{"strings":["0001"]},"dim":"expver","parent":3},{"children":[6],"coords":{"strings":["oper"]},"dim":"stream","parent":4},{"children":[7],"coords":{"strings":["0000"]},"dim":"time","parent":5},{"children":[8],"coords":{"strings":["sfc"]},"dim":"levtype","parent":6},{"children":[9],"coords":{"strings":["fc"]},"dim":"type","parent":7},{"children":[10],"coords":{"ints":[34]},"dim":"param","parent":8},{"children":[],"coords":{"ints":[0]},"dim":"step","parent":9}] +} diff --git a/qubed_meteo/src/adapters/fdb.rs b/qubed_meteo/src/adapters/fdb.rs index 4ad7b1a..63fdb18 100644 --- a/qubed_meteo/src/adapters/fdb.rs +++ b/qubed_meteo/src/adapters/fdb.rs @@ -20,7 +20,7 @@ impl FromFDBList for Qube { let fdb = FDB::new(None).map_err(|e| format!("Failed to open FDB: {:?}", e))?; let list_iter = - fdb.list(&request, true, true).map_err(|e| format!("FDB list failed: {:?}", e))?; + fdb.list(&request, true, false).map_err(|e| format!("FDB list failed: {:?}", e))?; let mut qube = Qube::new(); let root = qube.root(); @@ -72,6 +72,11 @@ impl FromFDBList for Qube { if let Some((key, val)) = part.split_once('=') { let vals: Vec<&str> = val.split('/').map(|s| s.trim()).filter(|s| !s.is_empty()).collect(); + + // If there are no value parts (e.g. "key=") skip creating an empty child + if vals.is_empty() { + continue; + } let coords = make_coords(&vals); let child = qube .get_or_create_child(key.trim(), parent, coords)