From 6ffb02497217d862fe426bbe04d1589d88b49cbd Mon Sep 17 00:00:00 2001 From: wellorbetter <1419919418@qq.com> Date: Mon, 6 Apr 2026 22:41:28 +0800 Subject: [PATCH] feat: add number formatting options for display (#197) --- src/app.rs | 21 +++ src/format.rs | 479 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/runner.rs | 76 ++++++++ src/ui.rs | 42 ++++- 5 files changed, 610 insertions(+), 9 deletions(-) create mode 100644 src/format.rs diff --git a/src/app.rs b/src/app.rs index e02d4a9..4f14cc8 100644 --- a/src/app.rs +++ b/src/app.rs @@ -183,6 +183,7 @@ pub struct App { sorter: Option>, sort_order: SortOrder, wrap_mode: WrapMode, + column_format_config: Option>, #[cfg(feature = "clipboard")] clipboard: Result, _seekable_file: SeekableFile, @@ -206,6 +207,7 @@ impl App { wrap_mode: Option, auto_reload: bool, no_streaming_stdin: bool, + column_format_config: Option, ) -> CsvlensResult { // TODO: pass a base_config to wait for header properly? let seekable_file = SeekableFile::new(&original_filename, no_streaming_stdin)?; @@ -270,6 +272,15 @@ impl App { Err(e) => Err(anyhow::anyhow!(e)), }; + // Warn if --column-format is used with --no-headers: named column formats will never match. + if no_headers { + if let Some(ref cfg) = column_format_config { + if cfg.has_named() { + eprintln!("Warning: --column-format has no effect when --no-headers is used (columns have no names)"); + } + } + } + let mut app = App { input_handler, num_rows_not_visible, @@ -288,6 +299,7 @@ impl App { sorter: None, sort_order: SortOrder::Ascending, wrap_mode: WrapMode::default(), + column_format_config: column_format_config.map(Arc::new), #[cfg(feature = "clipboard")] clipboard, _seekable_file: seekable_file, @@ -1093,6 +1105,7 @@ impl App { let rows = self.rows_view.rows(); let csv_table = CsvTable::new(self.rows_view.headers(), rows); + self.csv_table_state.column_format_config = self.column_format_config.clone(); f.render_stateful_widget(csv_table, size, &mut self.csv_table_state); if let Some((x, y)) = self.csv_table_state.cursor_xy { f.set_cursor_position(Position::new(x, y)); @@ -1131,6 +1144,7 @@ mod tests { find_regex: Option, prompt: Option, wrap_mode: Option, + column_format_config: Option, } impl AppBuilder { @@ -1147,6 +1161,7 @@ mod tests { find_regex: None, prompt: Some("stdin".to_owned()), wrap_mode: None, + column_format_config: None, } } @@ -1167,6 +1182,7 @@ mod tests { self.wrap_mode, false, false, + self.column_format_config, ) } @@ -1214,6 +1230,11 @@ mod tests { self.wrap_mode = wrap_mode; self } + + pub fn column_format_config(mut self, config: crate::format::ColumnFormatConfig) -> Self { + self.column_format_config = Some(config); + self + } } fn to_lines(buf: &Buffer) -> Vec { diff --git a/src/format.rs b/src/format.rs new file mode 100644 index 0000000..e5c0084 --- /dev/null +++ b/src/format.rs @@ -0,0 +1,479 @@ +use std::collections::HashMap; + +#[derive(Debug, Clone, PartialEq)] +pub enum NumberFormat { + Thousands, // e.g. 1,234,567.89 + Scientific, // e.g. 1.23e6 + Si, // e.g. 1.23M (k/M/B/T) + Fixed, // e.g. 23.51 (decimal places controlled by --precision) +} + +impl NumberFormat { + pub fn from_str(s: &str) -> Option { + match s.to_lowercase().as_str() { + "thousands" => Some(NumberFormat::Thousands), + "scientific" | "sci" => Some(NumberFormat::Scientific), + "si" => Some(NumberFormat::Si), + "fixed" => Some(NumberFormat::Fixed), + _ => None, + } + } + + /// Formats `value` according to this number format. + /// `precision` controls decimal places for Scientific/Si/Fixed; defaults to 2 if None. + /// For Thousands, `precision` truncates/pads decimal places if specified; + /// if None, the original decimal representation is preserved. + /// Returns the original string unchanged if `value` cannot be parsed as a finite number. + pub fn apply(&self, value: &str, precision: Option) -> String { + let trimmed = value.trim(); + if trimmed.is_empty() { + return value.to_string(); + } + let f: f64 = match trimmed.parse() { + Ok(v) => v, + Err(_) => return value.to_string(), + }; + if f.is_nan() || f.is_infinite() { + return value.to_string(); + } + match self { + NumberFormat::Thousands => format_thousands(f, precision), + NumberFormat::Scientific => format_scientific(f, precision.unwrap_or(2)), + NumberFormat::Si => format_human(f, precision.unwrap_or(2)), + NumberFormat::Fixed => format_fixed(f, precision.unwrap_or(2)), + } + } +} + +fn format_thousands(f: f64, precision: Option) -> String { + let negative = f < 0.0; + // Use precision-controlled formatting if specified, otherwise preserve original representation. + let abs_str = match precision { + Some(p) => format!("{:.prec$}", f.abs(), prec = p), + None => format!("{}", f.abs()), + }; + + // Split on '.' + let (int_part, dec_part) = if let Some(dot_pos) = abs_str.find('.') { + (&abs_str[..dot_pos], Some(&abs_str[dot_pos..])) + } else { + (abs_str.as_str(), None) + }; + + // Insert commas every 3 digits from the right + let int_chars: Vec = int_part.chars().collect(); + let mut with_commas = String::new(); + let len = int_chars.len(); + for (i, ch) in int_chars.iter().enumerate() { + if i > 0 && (len - i) % 3 == 0 { + with_commas.push(','); + } + with_commas.push(*ch); + } + + let result = match dec_part { + Some(dec) => format!("{}{}", with_commas, dec), + None => with_commas, + }; + + if negative { + format!("-{}", result) + } else { + result + } +} + +fn format_scientific(f: f64, precision: usize) -> String { + // Special-case zero: log10(0) is undefined (-∞), handle separately. + if f == 0.0 { + return format!("{:.prec$}e0", 0.0, prec = precision); + } + let negative = f < 0.0; + let abs_f = f.abs(); + let exp = abs_f.log10().floor() as i32; + let mantissa = abs_f / 10f64.powi(exp); + let result = format!("{:.prec$}e{}", mantissa, exp, prec = precision); + if negative { + format!("-{}", result) + } else { + result + } +} + +fn format_fixed(f: f64, precision: usize) -> String { + format!("{:.prec$}", f, prec = precision) +} + +fn format_human(f: f64, precision: usize) -> String { + let negative = f < 0.0; + let abs_f = f.abs(); + + let result = if abs_f >= 1e12 { + format!("{:.prec$}T", abs_f / 1e12, prec = precision) + } else if abs_f >= 1e9 { + format!("{:.prec$}B", abs_f / 1e9, prec = precision) + } else if abs_f >= 1e6 { + format!("{:.prec$}M", abs_f / 1e6, prec = precision) + } else if abs_f >= 1e3 { + format!("{:.prec$}k", abs_f / 1e3, prec = precision) + } else { + // Below 1000: no suffix, still apply precision. + format!("{:.prec$}", f, prec = precision) + }; + + // For values >= 1000 with a suffix, the abs value was used so prepend "-" if negative. + if negative && abs_f >= 1e3 { + format!("-{}", result) + } else { + result + } +} + +#[derive(Debug, Clone, Default)] +pub struct ColumnFormatConfig { + named: HashMap, + global: Option, + /// Decimal places for Fixed/Scientific/Si formats. None uses each format's built-in default. + precision: Option, +} + +impl ColumnFormatConfig { + pub fn new() -> Self { + Default::default() + } + + pub fn insert_named(&mut self, column: String, fmt: NumberFormat) { + self.named.insert(column, fmt); + } + + pub fn set_global(&mut self, fmt: NumberFormat) { + self.global = Some(fmt); + } + + pub fn set_precision(&mut self, p: usize) { + self.precision = Some(p); + } + + pub fn precision(&self) -> Option { + self.precision + } + + /// Named config takes priority over global config. + pub fn get(&self, column_name: &str) -> Option<&NumberFormat> { + if let Some(fmt) = self.named.get(column_name) { + return Some(fmt); + } + self.global.as_ref() + } + + pub fn is_empty(&self) -> bool { + self.named.is_empty() && self.global.is_none() && self.precision.is_none() + } + + /// Returns true if there are any named (per-column) format configurations. + pub fn has_named(&self) -> bool { + !self.named.is_empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // --- from_str --- + + #[test] + fn from_str_thousands_aliases() { + assert_eq!(NumberFormat::from_str("thousands"), Some(NumberFormat::Thousands)); + assert_eq!(NumberFormat::from_str("THOUSANDS"), Some(NumberFormat::Thousands)); + } + + #[test] + fn from_str_scientific_aliases() { + assert_eq!(NumberFormat::from_str("scientific"), Some(NumberFormat::Scientific)); + assert_eq!(NumberFormat::from_str("sci"), Some(NumberFormat::Scientific)); + assert_eq!(NumberFormat::from_str("SCI"), Some(NumberFormat::Scientific)); + } + + #[test] + fn from_str_si_aliases() { + assert_eq!(NumberFormat::from_str("si"), Some(NumberFormat::Si)); + assert_eq!(NumberFormat::from_str("SI"), Some(NumberFormat::Si)); + } + + #[test] + fn from_str_fixed_aliases() { + assert_eq!(NumberFormat::from_str("fixed"), Some(NumberFormat::Fixed)); + assert_eq!(NumberFormat::from_str("FIXED"), Some(NumberFormat::Fixed)); + } + + #[test] + fn from_str_unknown() { + assert!(NumberFormat::from_str("unknown").is_none()); + assert!(NumberFormat::from_str("").is_none()); + } + + // --- Thousands --- + + #[test] + fn thousands_positive_integer() { + assert_eq!(NumberFormat::Thousands.apply("1234567", None), "1,234,567"); + } + + #[test] + fn thousands_negative_integer() { + assert_eq!(NumberFormat::Thousands.apply("-1234", None), "-1,234"); + } + + #[test] + fn thousands_float() { + assert_eq!(NumberFormat::Thousands.apply("1234567.89", None), "1,234,567.89"); + } + + #[test] + fn thousands_no_decimal() { + assert_eq!(NumberFormat::Thousands.apply("1000", None), "1,000"); + } + + #[test] + fn thousands_small_number() { + assert_eq!(NumberFormat::Thousands.apply("999", None), "999"); + } + + #[test] + fn thousands_negative_float() { + assert_eq!(NumberFormat::Thousands.apply("-9876543.21", None), "-9,876,543.21"); + } + + #[test] + fn thousands_with_precision() { + assert_eq!(NumberFormat::Thousands.apply("1234567.89123", Some(2)), "1,234,567.89"); + assert_eq!(NumberFormat::Thousands.apply("1234567.89123", Some(0)), "1,234,568"); + } + + // --- Scientific --- + + #[test] + fn scientific_positive() { + let result = NumberFormat::Scientific.apply("20263.89", None); + // mantissa = 2.026389, exp = 4 → "2.03e4" + assert_eq!(result, "2.03e4"); + } + + #[test] + fn scientific_with_precision() { + assert_eq!(NumberFormat::Scientific.apply("20263.89", Some(4)), "2.0264e4"); + assert_eq!(NumberFormat::Scientific.apply("20263.89", Some(0)), "2e4"); + } + + #[test] + fn scientific_negative() { + let result = NumberFormat::Scientific.apply("-20263.89", None); + assert_eq!(result, "-2.03e4"); + } + + #[test] + fn scientific_small_decimal() { + let result = NumberFormat::Scientific.apply("0.00123", None); + // exp = floor(log10(0.00123)) = floor(-2.91) = -3 + // mantissa = 0.00123 / 1e-3 = 1.23 + assert_eq!(result, "1.23e-3"); + } + + #[test] + fn scientific_zero() { + assert_eq!(NumberFormat::Scientific.apply("0", None), "0.00e0"); + } + + // --- Si --- + + #[test] + fn si_kilo() { + assert_eq!(NumberFormat::Si.apply("1500", None), "1.50k"); + } + + #[test] + fn si_mega() { + assert_eq!(NumberFormat::Si.apply("1234567", None), "1.23M"); + } + + #[test] + fn si_giga() { + assert_eq!(NumberFormat::Si.apply("2500000000", None), "2.50B"); + } + + #[test] + fn si_tera() { + assert_eq!(NumberFormat::Si.apply("1500000000000", None), "1.50T"); + } + + #[test] + fn si_negative() { + assert_eq!(NumberFormat::Si.apply("-1500", None), "-1.50k"); + } + + #[test] + fn si_small() { + // Below 1000: no suffix, precision still applies. + assert_eq!(NumberFormat::Si.apply("42", None), "42.00"); + assert_eq!(NumberFormat::Si.apply("42", Some(0)), "42"); + assert_eq!(NumberFormat::Si.apply("42.123", Some(1)), "42.1"); + } + + #[test] + fn si_with_precision() { + assert_eq!(NumberFormat::Si.apply("1500", Some(1)), "1.5k"); + assert_eq!(NumberFormat::Si.apply("1500", Some(0)), "2k"); + } + + // --- Fixed --- + + #[test] + fn fixed_default_precision() { + assert_eq!(NumberFormat::Fixed.apply("23.505744680851063", None), "23.51"); + } + + #[test] + fn fixed_custom_precision() { + assert_eq!(NumberFormat::Fixed.apply("23.505744680851063", Some(4)), "23.5057"); + assert_eq!(NumberFormat::Fixed.apply("23.505744680851063", Some(0)), "24"); + } + + #[test] + fn fixed_negative() { + assert_eq!(NumberFormat::Fixed.apply("-23.505744680851063", Some(2)), "-23.51"); + } + + #[test] + fn fixed_non_numeric() { + assert_eq!(NumberFormat::Fixed.apply("hello", None), "hello"); + } + + // --- Non-numeric / edge cases --- + + #[test] + fn non_numeric_passthrough() { + assert_eq!(NumberFormat::Thousands.apply("hello", None), "hello"); + assert_eq!(NumberFormat::Scientific.apply("N/A", None), "N/A"); + assert_eq!(NumberFormat::Si.apply("abc", None), "abc"); + } + + #[test] + fn empty_string_passthrough() { + assert_eq!(NumberFormat::Thousands.apply("", None), ""); + assert_eq!(NumberFormat::Scientific.apply("", None), ""); + assert_eq!(NumberFormat::Si.apply("", None), ""); + } + + // --- ColumnFormatConfig --- + + #[test] + fn config_named_priority_over_global() { + let mut cfg = ColumnFormatConfig::new(); + cfg.set_global(NumberFormat::Thousands); + cfg.insert_named("price".to_string(), NumberFormat::Si); + + assert_eq!(cfg.get("price"), Some(&NumberFormat::Si)); + assert_eq!(cfg.get("other"), Some(&NumberFormat::Thousands)); + } + + #[test] + fn config_is_empty() { + let mut cfg = ColumnFormatConfig::new(); + assert!(cfg.is_empty()); + cfg.set_global(NumberFormat::Scientific); + assert!(!cfg.is_empty()); + } + + #[test] + fn config_none_when_no_match() { + let cfg = ColumnFormatConfig::new(); + assert!(cfg.get("any_column").is_none()); + } + + // --- Real-world data scenarios --- + + // NSW electricity price data: values like 23.505744680851063 + #[test] + fn fixed_nsw_electricity_prices() { + let values = [ + ("23.505744680851063", "23.51"), + ("19.44625", "19.45"), + ("281.0552083333333", "281.06"), + ("17.108541666666667", "17.11"), + ("-5.25", "-5.25"), + ("0.0", "0.00"), + ]; + for (input, expected) in values { + assert_eq!( + NumberFormat::Fixed.apply(input, Some(2)), + expected, + "input: {input}" + ); + } + } + + // fixed with precision 0 (integer rounding) + // Note: Rust uses round-half-to-even (banker's rounding), so 0.5 → "0", 1.5 → "2" + #[test] + fn fixed_precision_zero_rounding() { + assert_eq!(NumberFormat::Fixed.apply("23.505", Some(0)), "24"); + assert_eq!(NumberFormat::Fixed.apply("23.499", Some(0)), "23"); + assert_eq!(NumberFormat::Fixed.apply("-23.505", Some(0)), "-24"); + assert_eq!(NumberFormat::Fixed.apply("0.4", Some(0)), "0"); + assert_eq!(NumberFormat::Fixed.apply("0.5", Some(0)), "0"); // banker's rounding: 0.5 → 0 + assert_eq!(NumberFormat::Fixed.apply("1.5", Some(0)), "2"); // banker's rounding: 1.5 → 2 + } + + // thousands + precision: financial data + #[test] + fn thousands_with_precision_financial() { + assert_eq!(NumberFormat::Thousands.apply("1234567.8912", Some(2)), "1,234,567.89"); + assert_eq!(NumberFormat::Thousands.apply("1000000.0", Some(0)), "1,000,000"); + assert_eq!(NumberFormat::Thousands.apply("999.999", Some(1)), "1,000.0"); + assert_eq!(NumberFormat::Thousands.apply("-9876543.21", Some(2)), "-9,876,543.21"); + } + + // si + precision: large counts + #[test] + fn si_with_precision_large_counts() { + assert_eq!(NumberFormat::Si.apply("1_500_000", None), "1_500_000"); // underscores not valid f64 + assert_eq!(NumberFormat::Si.apply("1500000", Some(1)), "1.5M"); + assert_eq!(NumberFormat::Si.apply("1500000", Some(3)), "1.500M"); + assert_eq!(NumberFormat::Si.apply("999", Some(2)), "999.00"); // < 1000, precision applies + assert_eq!(NumberFormat::Si.apply("1000", Some(2)), "1.00k"); + assert_eq!(NumberFormat::Si.apply("1000000000000", Some(1)), "1.0T"); + } + + // scientific + precision + #[test] + fn scientific_with_precision_variety() { + assert_eq!(NumberFormat::Scientific.apply("0.00123", Some(3)), "1.230e-3"); + assert_eq!(NumberFormat::Scientific.apply("1234567.0", Some(1)), "1.2e6"); + assert_eq!(NumberFormat::Scientific.apply("1.0", Some(0)), "1e0"); + assert_eq!(NumberFormat::Scientific.apply("0", Some(4)), "0.0000e0"); + } + + // non-numeric values must pass through unchanged regardless of format + #[test] + fn passthrough_all_formats() { + let non_numeric = ["N/A", "null", "", " ", "2009-01-01", "hello world", "1e", "1.2.3"]; + for v in non_numeric { + assert_eq!(NumberFormat::Fixed.apply(v, Some(2)), v, "Fixed passthrough: {v:?}"); + assert_eq!(NumberFormat::Thousands.apply(v, None), v, "Thousands passthrough: {v:?}"); + assert_eq!(NumberFormat::Scientific.apply(v, Some(2)), v, "Scientific passthrough: {v:?}"); + assert_eq!(NumberFormat::Si.apply(v, Some(2)), v, "Si passthrough: {v:?}"); + } + } + + // precision propagation via ColumnFormatConfig + #[test] + fn config_precision_propagates() { + let mut cfg = ColumnFormatConfig::new(); + cfg.set_global(NumberFormat::Fixed); + cfg.set_precision(3); + + let fmt = cfg.get("any_col").unwrap(); + assert_eq!(fmt.apply("23.505744680851063", cfg.precision()), "23.506"); + } +} diff --git a/src/lib.rs b/src/lib.rs index a2f30ec..52fb55f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,6 +43,7 @@ mod common; mod csv; mod delimiter; pub mod errors; +pub mod format; mod find; mod help; mod history; diff --git a/src/runner.rs b/src/runner.rs index f142c4b..17ef0d1 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -117,6 +117,30 @@ struct Args { /// Disable streaming stdin (load entire input before displaying) #[clap(long)] pub no_streaming_stdin: bool, + + /// Format numbers in all columns using the specified format. + /// + /// Supported formats: thousands, scientific, si, fixed + /// + /// Example: --number-format fixed + #[arg(long, value_name = "FORMAT")] + pub number_format: Option, + + /// Format numbers in a specific column. + /// + /// Format: COL=FMT where FMT is one of: thousands, scientific, si, fixed + /// Can be specified multiple times for different columns. + /// + /// Example: --column-format "price=thousands" --column-format "value=fixed" + #[arg(long, value_name = "COL=FMT")] + pub column_format: Vec, + + /// Number of decimal places for fixed/scientific/si formats (default: 2). + /// Applies to all columns. Requires --number-format or --column-format to have any effect. + /// + /// Example: --number-format fixed --precision 4 + #[arg(long, value_name = "N")] + pub precision: Option, } #[cfg(feature = "cli")] @@ -143,9 +167,58 @@ impl Args { } } +#[cfg(feature = "cli")] +fn parse_column_format_config( + column_format: &[String], + number_format: Option<&str>, + precision: Option, +) -> Option { + use crate::format::{ColumnFormatConfig, NumberFormat}; + + let mut config = ColumnFormatConfig::new(); + + // Parse global format + if let Some(fmt_str) = number_format { + if let Some(fmt) = NumberFormat::from_str(fmt_str) { + config.set_global(fmt); + } else { + eprintln!("Warning: unknown number format '{}', ignoring", fmt_str); + } + } + + // Parse per-column format "col=fmt" + for s in column_format { + if let Some((col, fmt_str)) = s.split_once('=') { + if let Some(fmt) = NumberFormat::from_str(fmt_str) { + config.insert_named(col.to_string(), fmt); + } else { + eprintln!("Warning: unknown column format '{}' for column '{}', ignoring", fmt_str, col); + } + } else { + eprintln!("Warning: invalid --column-format '{}', expected COL=FMT format, ignoring", s); + } + } + + // Warn early (before set_precision) so config.is_empty() still reflects format-only state. + if precision.is_some() && number_format.is_none() && column_format.is_empty() { + eprintln!("Warning: --precision has no effect without --number-format or --column-format"); + } + + if let Some(p) = precision { + config.set_precision(p); + } + + if config.is_empty() { None } else { Some(config) } +} + #[cfg(feature = "cli")] impl From for CsvlensOptions { fn from(args: Args) -> Self { + let column_format_config = parse_column_format_config( + &args.column_format, + args.number_format.as_deref(), + args.precision, + ); Self { filename: args.filename, delimiter: args.delimiter, @@ -164,6 +237,7 @@ impl From for CsvlensOptions { wrap_mode: Args::get_wrap_mode(args.wrap, args.wrap_chars, args.wrap_words), auto_reload: args.auto_reload, no_streaming_stdin: args.no_streaming_stdin, + column_format_config, } } } @@ -188,6 +262,7 @@ pub struct CsvlensOptions { pub wrap_mode: Option, pub auto_reload: bool, pub no_streaming_stdin: bool, + pub column_format_config: Option, } struct AppRunner { @@ -277,6 +352,7 @@ pub fn run_csvlens_with_options(options: CsvlensOptions) -> CsvlensResult