From ea2a49035b4d46a138025e2baf4a58e3a660158d Mon Sep 17 00:00:00 2001 From: Davide Beatrici Date: Fri, 13 Mar 2026 09:00:38 +0100 Subject: [PATCH] feat(cmd): Make "extract" more user-friendly, matching common archive tools Usage: car extract --file file.car [--output output] [mydir/mysubdir] [mydir/mysubdir2/file] [mydir2] [myfile]... This also makes "extract" much more consistent with "create" and matching common archive tools: - Not specifying any paths results in a complete extraction. - To read from stdin a single dash ('-') is specified as the input file. --- cmd/car/car.go | 20 ++++++-------- cmd/car/extract.go | 31 +++++++++++++++------ cmd/car/testdata/script/create-extract.txt | 2 +- cmd/car/testdata/script/extract.txt | 32 ++++++++++++++++------ 4 files changed, 55 insertions(+), 30 deletions(-) diff --git a/cmd/car/car.go b/cmd/car/car.go index 29dd9c10..1e7ee8d2 100644 --- a/cmd/car/car.go +++ b/cmd/car/car.go @@ -100,23 +100,21 @@ func main1() int { }}, }, { - Name: "extract", - Aliases: []string{"x"}, - Usage: "Extract the contents of a car when the car encodes UnixFS data", - Action: ExtractCar, - ArgsUsage: "[output directory|-]", + Name: "extract", + Aliases: []string{"x"}, + Usage: "Extract the contents of a car when the car encodes UnixFS data", + Action: ExtractCar, Flags: []cli.Flag{ &cli.StringFlag{ Name: "file", - Aliases: []string{"f"}, - Usage: "The car file to extract from, or stdin if omitted", - Required: false, + Aliases: []string{"f", "input", "i"}, + Usage: "The car file to extract from. Use a single dash ('-') to read from stdin", TakesFile: true, }, &cli.StringFlag{ - Name: "path", - Aliases: []string{"p"}, - Usage: "The unixfs path to extract", + Name: "output", + Aliases: []string{"o"}, + Usage: "The path to write into", Required: false, }, &cli.BoolFlag{ diff --git a/cmd/car/extract.go b/cmd/car/extract.go index 91c0c033..ec574e84 100644 --- a/cmd/car/extract.go +++ b/cmd/car/extract.go @@ -22,18 +22,22 @@ var ErrNotDir = fmt.Errorf("not a directory") // ExtractCar pulls files and directories out of a car func ExtractCar(c *cli.Context) error { + if !c.IsSet("file") { + return fmt.Errorf("a file source must be specified") + } + outputDir, err := os.Getwd() if err != nil { return err } - if c.Args().Present() { - outputDir = c.Args().First() + if c.IsSet("output") { + outputDir = c.String("output") } var store storage.ReadableStorage var roots []cid.Cid - if c.String("file") == "" { + if c.String("file") == "-" { if f, ok := c.App.Reader.(*os.File); ok { stat, err := f.Stat() if err != nil { @@ -71,19 +75,28 @@ func ExtractCar(c *cli.Context) error { ls.TrustedStorage = true ls.SetReadStorage(store) - path, err := pathSegments(c.String("path")) - if err != nil { - return err + paths := c.Args().Slice() + if len(paths) == 0 { + paths = append(paths, "") } var extractedFiles int - for _, root := range roots { - count, err := lib.ExtractToDir(c.Context, &ls, root, outputDir, path, c.IsSet("verbose"), c.App.ErrWriter) + + for _, p := range paths { + path, err := pathSegments(p) if err != nil { return err } - extractedFiles += count + + for _, root := range roots { + count, err := lib.ExtractToDir(c.Context, &ls, root, outputDir, path, c.IsSet("verbose"), c.App.ErrWriter) + if err != nil { + return err + } + extractedFiles += count + } } + if extractedFiles == 0 { return cli.Exit("no files extracted", 1) } else { diff --git a/cmd/car/testdata/script/create-extract.txt b/cmd/car/testdata/script/create-extract.txt index e4dac77c..f3433959 100644 --- a/cmd/car/testdata/script/create-extract.txt +++ b/cmd/car/testdata/script/create-extract.txt @@ -1,6 +1,6 @@ car create --file=out.car foo.txt bar.txt mkdir out -car extract -v -f out.car out +car extract -v -f out.car -o out stderr -count=2 'txt$' stderr -count=1 '^extracted 2 file\(s\)$' car create --file=out2.car out/foo.txt out/bar.txt diff --git a/cmd/car/testdata/script/extract.txt b/cmd/car/testdata/script/extract.txt index aa2713be..a604bead 100644 --- a/cmd/car/testdata/script/extract.txt +++ b/cmd/car/testdata/script/extract.txt @@ -1,6 +1,6 @@ # full DAG export, everything in the CAR mkdir actual-full -car extract -f ${INPUTS}/simple-unixfs.car actual-full +car extract -f ${INPUTS}/simple-unixfs.car -o actual-full stderr '^extracted 9 file\(s\)$' cmp actual-full/a/1/A.txt expected/a/1/A.txt cmp actual-full/a/2/B.txt expected/a/2/B.txt @@ -15,7 +15,7 @@ cmp actual-full/c/8/H.txt expected/c/8/H.txt # full DAG export, everything in the CAR, accepted from stdin mkdir actual-stdin stdin ${INPUTS}/simple-unixfs.car -car extract actual-stdin +car extract -f - -o actual-stdin stderr '^extracted 9 file\(s\)$' cmp actual-stdin/a/1/A.txt expected/a/1/A.txt cmp actual-stdin/a/2/B.txt expected/a/2/B.txt @@ -29,7 +29,7 @@ cmp actual-stdin/c/8/H.txt expected/c/8/H.txt # full DAG export, everything in the CAR, but the CAR is missing blocks (incomplete DAG) mkdir actual-missing -car extract -f ${INPUTS}/simple-unixfs-missing-blocks.car actual-missing +car extract -f ${INPUTS}/simple-unixfs-missing-blocks.car -o actual-missing stderr -count=1 'data for entry not found: /b/4 \(skipping\.\.\.\)' stderr -count=1 'data for entry not found: /b/5/E.txt \(skipping\.\.\.\)' stderr -count=1 'data for entry not found: /b/6 \(skipping\.\.\.\)' @@ -46,7 +46,7 @@ cmp actual-missing/c/8/H.txt expected/c/8/H.txt # path-based partial export, everything under the path specified (also without leading / in path) mkdir actual-partial -car extract -f ${INPUTS}/simple-unixfs.car -p b actual-partial +car extract -f ${INPUTS}/simple-unixfs.car -o actual-partial b stderr '^extracted 3 file\(s\)$' ! exists actual-partial/a/1/A.txt ! exists actual-partial/a/2/B.txt @@ -60,7 +60,7 @@ cmp actual-partial/b/4/D.txt expected/b/4/D.txt # path-based single-file export (also with leading /) mkdir actual-single -car extract -f ${INPUTS}/simple-unixfs.car -p /a/2/B.txt actual-single +car extract -f ${INPUTS}/simple-unixfs.car -o actual-single /a/2/B.txt stderr '^extracted 1 file\(s\)$' ! exists actual-single/a/1/A.txt cmp actual-single/a/2/B.txt expected/a/2/B.txt @@ -72,18 +72,32 @@ cmp actual-single/a/2/B.txt expected/a/2/B.txt ! exists actual-single/c/7/G.txt ! exists actual-single/c/8/H.txt +# path-based multiple export +mkdir actual-multiple +car extract -f ${INPUTS}/simple-unixfs.car -o actual-multiple /a b/6 /c/7/G.txt +stderr '^extracted 5 file\(s\)$' +cmp actual-multiple/a/1/A.txt expected/a/1/A.txt +cmp actual-multiple/a/2/B.txt expected/a/2/B.txt +cmp actual-multiple/a/3/C.txt expected/a/3/C.txt +! exists actual-multiple/b/5/E.txt +cmp actual-multiple/b/6/F.txt expected/b/6/F.txt +! exists actual-multiple/b/4/D.txt +! exists actual-multiple/c/9/I.txt +cmp actual-multiple/c/7/G.txt expected/c/7/G.txt +! exists actual-multiple/c/8/H.txt + # extract that doesn't yield any files should error -! car extract -f ${INPUTS}/simple-unixfs-missing-blocks.car -p b +! car extract -f ${INPUTS}/simple-unixfs-missing-blocks.car b stderr '^no files extracted$' # car with only one file, nested inside sharded directory, output to stdout -car extract -f ${INPUTS}/wikipedia-cryptographic-hash-function.car -p wiki/Cryptographic_hash_function - +car extract -f ${INPUTS}/wikipedia-cryptographic-hash-function.car -o - wiki/Cryptographic_hash_function stderr '^extracted 1 file\(s\)$' stdout -count=1 '^ Cryptographic hash function$' # car with only one file, full extract, lots of errors mkdir actual-wiki -car extract -f ${INPUTS}/wikipedia-cryptographic-hash-function.car actual-wiki +car extract -f ${INPUTS}/wikipedia-cryptographic-hash-function.car -o actual-wiki stderr '^extracted 1 file\(s\)$' stderr -count=1 '^data for entry not found for 570 unknown sharded entries \(skipped\.\.\.\)$' # random sampling of expected skip errors @@ -110,4 +124,4 @@ c9I -- expected/c/7/G.txt -- c7G -- expected/c/8/H.txt -- -c8H \ No newline at end of file +c8H