diff --git a/crates/bashkit/src/builtins/mod.rs b/crates/bashkit/src/builtins/mod.rs index b8a74e25..fb7aaa93 100644 --- a/crates/bashkit/src/builtins/mod.rs +++ b/crates/bashkit/src/builtins/mod.rs @@ -72,6 +72,7 @@ mod mapfile; mod mkfifo; mod navigation; mod nl; +mod numfmt; mod parallel; mod paste; mod patch; @@ -162,6 +163,7 @@ pub use mapfile::Mapfile; pub use mkfifo::Mkfifo; pub use navigation::{Cd, Pwd}; pub use nl::Nl; +pub use numfmt::Numfmt; pub use parallel::Parallel; pub use paste::Paste; pub use patch::Patch; diff --git a/crates/bashkit/src/builtins/numfmt.rs b/crates/bashkit/src/builtins/numfmt.rs new file mode 100644 index 00000000..3175f01b --- /dev/null +++ b/crates/bashkit/src/builtins/numfmt.rs @@ -0,0 +1,586 @@ +//! numfmt builtin - convert numbers to/from human-readable format +//! +//! Supports --to=si/iec/iec-i, --from=si/iec/auto, --suffix, --padding, +//! --round, --format, --field, --delimiter. + +use async_trait::async_trait; + +use super::{Builtin, Context}; +use crate::error::Result; +use crate::interpreter::ExecResult; + +/// Maximum output size to prevent memory exhaustion. +/// THREAT[TM-DOS-059]: Bound numfmt output +const MAX_OUTPUT_BYTES: usize = 1_048_576; + +pub struct Numfmt; + +#[derive(Debug, Clone, Copy, PartialEq)] +enum Scale { + None, + Si, + Iec, + IecI, + Auto, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +enum RoundMode { + FromZero, + TowardsZero, + Up, + Down, + Nearest, +} + +struct Options { + from: Scale, + to: Scale, + suffix: String, + padding: i32, + round: RoundMode, + format: Option, + field: usize, + delimiter: Option, +} + +impl Default for Options { + fn default() -> Self { + Self { + from: Scale::None, + to: Scale::None, + suffix: String::new(), + padding: 0, + round: RoundMode::FromZero, + format: None, + field: 1, + delimiter: None, + } + } +} + +fn parse_scale(s: &str) -> std::result::Result { + match s { + "none" => Ok(Scale::None), + "si" => Ok(Scale::Si), + "iec" => Ok(Scale::Iec), + "iec-i" => Ok(Scale::IecI), + "auto" => Ok(Scale::Auto), + _ => Err(format!("numfmt: invalid unit size: '{}'\n", s)), + } +} + +fn parse_round(s: &str) -> std::result::Result { + match s { + "up" => Ok(RoundMode::Up), + "down" => Ok(RoundMode::Down), + "from-zero" => Ok(RoundMode::FromZero), + "towards-zero" => Ok(RoundMode::TowardsZero), + "nearest" => Ok(RoundMode::Nearest), + _ => Err(format!("numfmt: invalid rounding mode: '{}'\n", s)), + } +} + +/// SI suffixes: K=1000, M=1e6, G=1e9, T=1e12, P=1e15, E=1e18 +const SI_SUFFIXES: &[(char, f64)] = &[ + ('K', 1e3), + ('M', 1e6), + ('G', 1e9), + ('T', 1e12), + ('P', 1e15), + ('E', 1e18), +]; + +/// IEC suffixes: K=1024, M=1024^2, G=1024^3, ... +const IEC_SUFFIXES: &[(char, f64)] = &[ + ('K', 1024.0), + ('M', 1_048_576.0), + ('G', 1_073_741_824.0), + ('T', 1_099_511_627_776.0), + ('P', 1_125_899_906_842_624.0), + ('E', 1_152_921_504_606_846_976.0), +]; + +fn round_value(val: f64, mode: RoundMode) -> f64 { + match mode { + RoundMode::Up => val.ceil(), + RoundMode::Down => val.floor(), + RoundMode::FromZero => { + if val >= 0.0 { + val.ceil() + } else { + val.floor() + } + } + RoundMode::TowardsZero => { + if val >= 0.0 { + val.floor() + } else { + val.ceil() + } + } + RoundMode::Nearest => val.round(), + } +} + +/// Parse an input number, possibly with a suffix (from --from mode). +fn parse_number(input: &str, from: Scale) -> std::result::Result { + let s = input.trim(); + if s.is_empty() { + return Err(format!("numfmt: invalid number: '{}'\n", input.trim_end())); + } + + match from { + Scale::None => s + .parse::() + .map_err(|_| format!("numfmt: invalid number: '{}'\n", s)), + Scale::Si | Scale::Iec | Scale::IecI | Scale::Auto => { + // Try to split trailing suffix + let (num_part, suffix) = split_suffix(s); + let base: f64 = num_part + .parse() + .map_err(|_| format!("numfmt: invalid number: '{}'\n", s))?; + + if suffix.is_empty() { + return Ok(base); + } + + let suffix_upper = suffix.to_ascii_uppercase(); + let Some(first_char) = suffix_upper.chars().next() else { + return Err(format!("numfmt: invalid suffix in input: '{}'\n", s)); + }; + + // Auto: if suffix ends with 'i' (like Ki, Mi), use IEC; otherwise SI + let use_iec = match from { + Scale::Iec | Scale::IecI => true, + Scale::Auto => suffix_upper.ends_with('I') && suffix_upper.len() >= 2, + _ => false, + }; + + let table = if use_iec { IEC_SUFFIXES } else { SI_SUFFIXES }; + + for &(c, factor) in table { + if first_char == c { + return Ok(base * factor); + } + } + + Err(format!("numfmt: invalid suffix in input: '{}'\n", s)) + } + } +} + +fn split_suffix(s: &str) -> (&str, &str) { + // Find where the numeric part ends + let end = s + .rfind(|c: char| c.is_ascii_digit() || c == '.') + .map(|i| i + 1) + .unwrap_or(0); + (&s[..end], &s[end..]) +} + +/// Format a number for output with --to mode. +fn format_number(val: f64, to: Scale, round: RoundMode, suffix: &str, padding: i32) -> String { + let formatted = match to { + Scale::None => { + let rounded = round_value(val, round); + if rounded.fract() == 0.0 && rounded.abs() < i64::MAX as f64 { + format!("{}{}", rounded as i64, suffix) + } else { + format!("{}{}", rounded, suffix) + } + } + Scale::Si => format_with_scale(val, SI_SUFFIXES, false, round, suffix), + Scale::Iec => format_with_scale(val, IEC_SUFFIXES, false, round, suffix), + Scale::IecI => format_with_scale(val, IEC_SUFFIXES, true, round, suffix), + Scale::Auto => { + // --to=auto not valid, treat as none + let rounded = round_value(val, round); + format!("{}{}", rounded, suffix) + } + }; + + apply_padding(&formatted, padding) +} + +fn format_with_scale( + val: f64, + table: &[(char, f64)], + iec_i_suffix: bool, + round: RoundMode, + suffix: &str, +) -> String { + let abs_val = val.abs(); + + // Find the largest unit that gives a value >= 1 + let mut chosen: Option<(char, f64)> = None; + for &(c, factor) in table { + if abs_val >= factor { + chosen = Some((c, factor)); + } + } + + match chosen { + Some((c, factor)) => { + let scaled = val / factor; + let display = format_scaled_value(scaled, round); + if iec_i_suffix { + format!("{}{}i{}", display, c, suffix) + } else { + format!("{}{}{}", display, c, suffix) + } + } + None => { + // Value too small for any suffix + let rounded = round_value(val, round); + if rounded.fract() == 0.0 && rounded.abs() < i64::MAX as f64 { + format!("{}{}", rounded as i64, suffix) + } else { + format!("{}{}", rounded, suffix) + } + } + } +} + +/// Format a scaled value like "1.0", "1.5", etc. +/// GNU numfmt shows one decimal place when the value is < 10. +fn format_scaled_value(val: f64, round: RoundMode) -> String { + let abs = val.abs(); + if abs < 10.0 { + // One decimal place, with rounding applied to the tenths + let shifted = val * 10.0; + let rounded = round_value(shifted, round) / 10.0; + format!("{:.1}", rounded) + } else { + let rounded = round_value(val, round); + format!("{}", rounded as i64) + } +} + +fn apply_padding(s: &str, padding: i32) -> String { + let width = padding.unsigned_abs() as usize; + if width <= s.len() { + return s.to_string(); + } + if padding > 0 { + // Right-align (pad with spaces on left) + format!("{:>width$}", s, width = width) + } else { + // Left-align (pad with spaces on right) + format!("{: std::result::Result<(Options, Vec), String> { + let mut opts = Options::default(); + let mut operands = Vec::new(); + let mut i = 0; + + while i < args.len() { + let arg = &args[i]; + if arg == "--" { + operands.extend(args[i + 1..].iter().cloned()); + break; + } else if let Some(val) = arg.strip_prefix("--to=") { + opts.to = parse_scale(val)?; + } else if arg == "--to" { + i += 1; + if i >= args.len() { + return Err("numfmt: missing argument for --to\n".to_string()); + } + opts.to = parse_scale(&args[i])?; + } else if let Some(val) = arg.strip_prefix("--from=") { + opts.from = parse_scale(val)?; + } else if arg == "--from" { + i += 1; + if i >= args.len() { + return Err("numfmt: missing argument for --from\n".to_string()); + } + opts.from = parse_scale(&args[i])?; + } else if let Some(val) = arg.strip_prefix("--suffix=") { + opts.suffix = val.to_string(); + } else if arg == "--suffix" { + i += 1; + if i >= args.len() { + return Err("numfmt: missing argument for --suffix\n".to_string()); + } + opts.suffix = args[i].clone(); + } else if let Some(val) = arg.strip_prefix("--padding=") { + opts.padding = val + .parse() + .map_err(|_| format!("numfmt: invalid padding value: '{}'\n", val))?; + } else if arg == "--padding" { + i += 1; + if i >= args.len() { + return Err("numfmt: missing argument for --padding\n".to_string()); + } + opts.padding = args[i] + .parse() + .map_err(|_| format!("numfmt: invalid padding value: '{}'\n", &args[i]))?; + } else if let Some(val) = arg.strip_prefix("--round=") { + opts.round = parse_round(val)?; + } else if arg == "--round" { + i += 1; + if i >= args.len() { + return Err("numfmt: missing argument for --round\n".to_string()); + } + opts.round = parse_round(&args[i])?; + } else if let Some(val) = arg.strip_prefix("--format=") { + opts.format = Some(val.to_string()); + } else if arg == "--format" { + i += 1; + if i >= args.len() { + return Err("numfmt: missing argument for --format\n".to_string()); + } + opts.format = Some(args[i].clone()); + } else if let Some(val) = arg.strip_prefix("--field=") { + opts.field = val + .parse() + .map_err(|_| format!("numfmt: invalid field value: '{}'\n", val))?; + if opts.field == 0 { + return Err("numfmt: invalid field value: '0'\n".to_string()); + } + } else if arg == "--field" { + i += 1; + if i >= args.len() { + return Err("numfmt: missing argument for --field\n".to_string()); + } + opts.field = args[i] + .parse() + .map_err(|_| format!("numfmt: invalid field value: '{}'\n", &args[i]))?; + if opts.field == 0 { + return Err("numfmt: invalid field value: '0'\n".to_string()); + } + } else if let Some(val) = arg.strip_prefix("--delimiter=") { + opts.delimiter = Some(val.to_string()); + } else if arg == "--delimiter" || arg == "-d" { + i += 1; + if i >= args.len() { + return Err("numfmt: missing argument for --delimiter\n".to_string()); + } + opts.delimiter = Some(args[i].clone()); + } else if arg.starts_with('-') && arg.len() > 1 && !arg.starts_with("--") { + // Unknown short option + return Err(format!("numfmt: invalid option -- '{}'\n", &arg[1..])); + } else if arg.starts_with("--") { + return Err(format!("numfmt: unrecognized option '{}'\n", arg)); + } else { + operands.push(arg.clone()); + } + i += 1; + } + + Ok((opts, operands)) +} + +fn convert_line(line: &str, opts: &Options) -> std::result::Result { + if let Some(ref delim) = opts.delimiter { + // Split by delimiter, convert the specified field + let parts: Vec<&str> = line.split(delim.as_str()).collect(); + let field_idx = opts.field - 1; + if field_idx >= parts.len() { + return Ok(line.to_string()); + } + let val = parse_number(parts[field_idx], opts.from)?; + let converted = format_number(val, opts.to, opts.round, &opts.suffix, opts.padding); + let mut result_parts: Vec = parts.iter().map(|s| s.to_string()).collect(); + result_parts[field_idx] = converted; + Ok(result_parts.join(delim)) + } else if opts.field > 1 { + // Split by whitespace, convert the specified field + let parts: Vec<&str> = line.split_whitespace().collect(); + let field_idx = opts.field - 1; + if field_idx >= parts.len() { + return Ok(line.to_string()); + } + let val = parse_number(parts[field_idx], opts.from)?; + let converted = format_number(val, opts.to, opts.round, &opts.suffix, opts.padding); + let mut result_parts: Vec = parts.iter().map(|s| s.to_string()).collect(); + result_parts[field_idx] = converted; + Ok(result_parts.join(" ")) + } else { + // Convert the whole line (trimmed) + let trimmed = line.trim(); + // Strip user suffix before parsing if present + let to_parse = if !opts.suffix.is_empty() { + trimmed.strip_suffix(&opts.suffix).unwrap_or(trimmed) + } else { + trimmed + }; + let val = parse_number(to_parse, opts.from)?; + format_with_printf(val, opts) + } +} + +fn format_with_printf(val: f64, opts: &Options) -> std::result::Result { + if let Some(ref fmt) = opts.format { + // Basic printf-style: support %f, %g, %e with optional width/precision + apply_printf_format(val, fmt, &opts.suffix, opts.padding) + } else { + Ok(format_number( + val, + opts.to, + opts.round, + &opts.suffix, + opts.padding, + )) + } +} + +fn apply_printf_format( + val: f64, + fmt: &str, + suffix: &str, + padding: i32, +) -> std::result::Result { + // Find the % format specifier + let Some(pct_pos) = fmt.find('%') else { + return Ok(format!("{}{}", fmt, suffix)); + }; + + let before = &fmt[..pct_pos]; + let rest = &fmt[pct_pos + 1..]; + + // Find the conversion character (f, g, e, d) + let conv_pos = rest + .find(['f', 'g', 'e', 'd', 'i']) + .ok_or_else(|| format!("numfmt: invalid format '{}'\n", fmt))?; + + let spec = &rest[..conv_pos]; + let conv = rest.as_bytes()[conv_pos] as char; + let after = &rest[conv_pos + 1..]; + + let formatted = match conv { + 'f' => { + if let Some(dot_pos) = spec.find('.') { + let precision: usize = spec[dot_pos + 1..] + .parse() + .map_err(|_| format!("numfmt: invalid format '{}'\n", fmt))?; + format!("{:.prec$}", val, prec = precision) + } else { + format!("{:.6}", val) + } + } + 'g' => format!("{}", val), + 'e' => format!("{:e}", val), + 'd' | 'i' => format!("{}", val as i64), + _ => unreachable!(), + }; + + let result = format!("{}{}{}{}", before, formatted, suffix, after); + Ok(apply_padding(&result, padding)) +} + +#[async_trait] +impl Builtin for Numfmt { + async fn execute(&self, ctx: Context<'_>) -> Result { + let (opts, operands) = match parse_options(ctx.args) { + Ok(v) => v, + Err(e) => return Ok(ExecResult::err(e, 1)), + }; + + let mut output = String::new(); + + if operands.is_empty() { + // Read from stdin + if let Some(stdin) = ctx.stdin { + for line in stdin.lines() { + if output.len() > MAX_OUTPUT_BYTES { + break; + } + match convert_line(line, &opts) { + Ok(converted) => { + output.push_str(&converted); + output.push('\n'); + } + Err(e) => return Ok(ExecResult::err(e, 2)), + } + } + } + } else { + // Process each operand + for operand in &operands { + if output.len() > MAX_OUTPUT_BYTES { + break; + } + match convert_line(operand, &opts) { + Ok(converted) => { + output.push_str(&converted); + output.push('\n'); + } + Err(e) => return Ok(ExecResult::err(e, 2)), + } + } + } + + Ok(ExecResult::ok(output)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_number_plain() { + assert_eq!(parse_number("1024", Scale::None).unwrap(), 1024.0); + assert_eq!(parse_number("1048576", Scale::None).unwrap(), 1048576.0); + } + + #[test] + fn test_parse_number_iec() { + assert_eq!(parse_number("1K", Scale::Iec).unwrap(), 1024.0); + assert_eq!(parse_number("1M", Scale::Iec).unwrap(), 1_048_576.0); + } + + #[test] + fn test_parse_number_si() { + assert_eq!(parse_number("1K", Scale::Si).unwrap(), 1000.0); + assert_eq!(parse_number("1M", Scale::Si).unwrap(), 1_000_000.0); + } + + #[test] + fn test_format_to_iec() { + let s = format_number(1_048_576.0, Scale::Iec, RoundMode::FromZero, "", 0); + assert_eq!(s, "1.0M"); + } + + #[test] + fn test_format_to_si() { + let s = format_number(1_048_576.0, Scale::Si, RoundMode::FromZero, "", 0); + assert_eq!(s, "1.1M"); + } + + #[test] + fn test_format_to_iec_i() { + let s = format_number(1_048_576.0, Scale::IecI, RoundMode::FromZero, "", 0); + assert_eq!(s, "1.0Mi"); + } + + #[test] + fn test_format_with_suffix() { + let s = format_number(1_048_576.0, Scale::Iec, RoundMode::FromZero, "B", 0); + assert_eq!(s, "1.0MB"); + } + + #[test] + fn test_round_modes() { + assert_eq!(round_value(1.1, RoundMode::Up), 2.0); + assert_eq!(round_value(1.9, RoundMode::Down), 1.0); + assert_eq!(round_value(1.5, RoundMode::Nearest), 2.0); + assert_eq!(round_value(-1.5, RoundMode::FromZero), -2.0); + assert_eq!(round_value(-1.5, RoundMode::TowardsZero), -1.0); + } + + #[test] + fn test_padding() { + let s = format_number(1024.0, Scale::Iec, RoundMode::FromZero, "", 10); + assert_eq!(s, " 1.0K"); + } + + #[test] + fn test_invalid_number() { + assert!(parse_number("abc", Scale::None).is_err()); + assert!(parse_number("", Scale::None).is_err()); + } +} diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index 3d73e7d1..06d42a11 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -610,6 +610,7 @@ impl Interpreter { "seq" => Seq, "expr" => Expr, "bc" => Bc, + "numfmt" => Numfmt, // Misc utilities "yes" => Yes, "sleep" => Sleep, diff --git a/crates/bashkit/tests/spec_cases/bash/numfmt.test.sh b/crates/bashkit/tests/spec_cases/bash/numfmt.test.sh new file mode 100644 index 00000000..fb38afab --- /dev/null +++ b/crates/bashkit/tests/spec_cases/bash/numfmt.test.sh @@ -0,0 +1,80 @@ +### numfmt_to_iec +numfmt --to=iec 1048576 +### expect +1.0M +### end + +### numfmt_to_si +numfmt --to=si 1048576 +### expect +1.1M +### end + +### numfmt_from_iec +numfmt --from=iec 1K +### expect +1024 +### end + +### numfmt_from_si +numfmt --from=si 1K +### expect +1000 +### end + +### numfmt_to_iec_suffix +numfmt --to=iec --suffix=B 1048576 +### expect +1.0MB +### end + +### numfmt_to_iec_i +numfmt --to=iec-i 1048576 +### expect +1.0Mi +### end + +### numfmt_roundtrip_iec +numfmt --from=iec --to=iec 1M +### expect +1.0M +### end + +### numfmt_stdin +echo 1048576 | numfmt --to=iec +### expect +1.0M +### end + +### numfmt_multiple_args +numfmt --to=iec 1024 2048 1048576 +### expect +1.0K +2.0K +1.0M +### end + +### numfmt_padding +numfmt --to=iec --padding=10 1048576 +### expect + 1.0M +### end + +### numfmt_small_number +numfmt --to=iec 500 +### expect +500 +### end + +### numfmt_large_number +numfmt --to=si 1000000000 +### expect +1.0G +### end + +### numfmt_invalid_number +numfmt --to=iec abc +echo "exit: $?" +### expect +exit: 2 +### end