From 71ed413f0e949bb5cb845bd40997a9ec75b351e7 Mon Sep 17 00:00:00 2001 From: "Z.-L. Deng" Date: Tue, 7 Oct 2025 01:10:04 +0800 Subject: [PATCH 1/3] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7bde0bf..47c7290 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ - [Additional tips](#additional-tips) ## Overview -`tsvkit` combines versatile column selection with an expression engine for statistics, filtering, and data transformation. This makes it straightforward to generate matrices from `samtools idxstats` or `featureCounts`, compute multi-column summaries, or pipe TSV/Excel data through complex workflows without leaving the shell. Multi-sheet Excel workbooks are supported alongside `.tsv`, `.tsv.gz`, and `.tsv.xz` files. +`tsvkit` combines versatile column selection with an expression engine for statistics, filtering, and data transformation. This makes it straightforward to join multiple files and select column from each file to generate data matrix (e.g. gene count table), filter row based on selected columns, compute multi-column summaries, or pipe TSV/Excel data through complex workflows without leaving the shell. Multi-sheet Excel workbooks are supported alongside `.tsv`, `.tsv.gz`, and `.tsv.xz` files. ### Key features - Stream-friendly processing; every command reads from files or standard input and writes to standard output. From 08f85672a2246f72b5bb143a270b82670f899e12 Mon Sep 17 00:00:00 2001 From: "Z.-L. Deng" Date: Tue, 7 Oct 2025 01:11:50 +0800 Subject: [PATCH 2/3] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 47c7290..e7c23b8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # tsvkit -`tsvkit` is a fast, ergonomic toolkit for working with TSV tables. Written in Rust, it brings familiar data-wrangling verbs (join, cut, filter, mutate, summarize, reshape, slice, pretty-print) to the command line with consistent column selection, rich expressions, and streaming-friendly performance. The CLI is inspired by projects such as `csvtk`, `csvkit`, `datamash`, `awk`, `xsv`, and `mlr`, and many options are intentionally compatible with `csvtk` so existing users can transition quickly. +`tsvkit` is a fast, ergonomic toolkit for working with TSV tables. Written in Rust, it brings familiar data-wrangling verbs (join, cut, filter, mutate, summarize, reshape, slice, pretty-print) to the command line with consistent column selection, rich expressions, and streaming-friendly performance. The CLI is inspired by projects such as `csvtk`, `csvkit`, `datamash`, `awk`, `xsv`, and `mlr`, and many options are intentionally compatible with `csvtk` so existing users can adapt quickly. ## Table of Contents - [Overview](#overview) From 26590c4bde2da37861e68ce329bbedd0d9fc2191 Mon Sep 17 00:00:00 2001 From: "Z.-L. Deng" Date: Mon, 6 Oct 2025 19:27:37 +0200 Subject: [PATCH 3/3] Defer filter header output until rows match --- src/filter.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/filter.rs b/src/filter.rs index 8e5cdd3..4e1a814 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -98,16 +98,20 @@ pub fn run(args: FilterArgs) -> Result<()> { .collect::>(); let bound = bind_expression(expr_ast, &headers, false)?; let expected_width = headers.len(); - - if !headers.is_empty() { - writeln!(writer, "{}", headers.join("\t"))?; - } + let header_line = (!headers.is_empty()).then(|| headers.join("\t")); + let mut header_written = false; for record in reader.records() { let record = record.with_context(|| format!("failed reading from {:?}", args.file))?; if should_skip_record(&record, &input_opts, Some(expected_width)) { continue; } if evaluate(&bound, &record) { + if !header_written { + if let Some(line) = header_line.as_ref() { + writeln!(writer, "{}", line)?; + } + header_written = true; + } emit_record(&record, &mut writer)?; } }