From 71c1df1af906e0e94cb34880ad03c63b3386124c Mon Sep 17 00:00:00 2001 From: simlecode <69969590+simlecode@users.noreply.github.com> Date: Tue, 15 Jul 2025 11:23:12 +0800 Subject: [PATCH] feat: add random rename source file flag --- chunk.go | 19 ++++++++++++++++--- cmd/graphsplit/main.go | 14 ++++++++++---- extra_file.go | 11 +++++++---- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/chunk.go b/chunk.go index be01765..09f7e41 100644 --- a/chunk.go +++ b/chunk.go @@ -163,6 +163,7 @@ func Chunk(ctx context.Context, parallel int, cb GraphBuildCallback, ef *ExtraFile, + randomRenameSourceFile bool, ) error { var cumuSize int64 = 0 graphSliceCount := 0 @@ -186,7 +187,10 @@ func Chunk(ctx context.Context, } files := GetFileListAsync(args) for item := range files { - item := tryRenameFileName([]Finfo{item})[0] + item := item + if randomRenameSourceFile { + item = tryRenameFileName([]Finfo{item})[0] + } // log.Infof("name: %s", item.Name) fileSize := item.Info.Size() switch { @@ -221,7 +225,11 @@ func Chunk(ctx context.Context, SeekStart: seekStart, SeekEnd: seekEnd, } - graphFiles = append(graphFiles, tryRenameFileName([]Finfo{fi})...) + if randomRenameSourceFile { + graphFiles = append(graphFiles, tryRenameFileName([]Finfo{fi})...) + } else { + graphFiles = append(graphFiles, fi) + } fileSliceCount++ // todo build ipld from graphFiles BuildIpldGraph(ctx, append(ef.getFiles(), graphFiles...), GenGraphName(graphName, graphSliceCount, sliceTotal), parentPath, carDir, parallel, cb, expectSliceSize, ef) @@ -247,7 +255,12 @@ func Chunk(ctx context.Context, SeekStart: seekStart, SeekEnd: seekEnd, } - graphFiles = append(graphFiles, tryRenameFileName([]Finfo{fi})...) + if randomRenameSourceFile { + graphFiles = append(graphFiles, tryRenameFileName([]Finfo{fi})...) + } else { + graphFiles = append(graphFiles, fi) + } + fileSliceCount++ if seekEnd-seekStart == partSliceSize-1 { // todo build ipld from graphFiles diff --git a/cmd/graphsplit/main.go b/cmd/graphsplit/main.go index d1d2477..c228365 100644 --- a/cmd/graphsplit/main.go +++ b/cmd/graphsplit/main.go @@ -76,6 +76,11 @@ var chunkCmd = &cli.Command{ Value: false, Usage: "rename carfile to piece", }, + &cli.BoolFlag{ + Name: "random-rename-source-file", + Value: false, + Usage: "random rename source file name", + }, &cli.BoolFlag{ Name: "add-padding", Value: false, @@ -98,6 +103,7 @@ var chunkCmd = &cli.Command{ parentPath := c.String("parent-path") carDir := c.String("car-dir") graphName := c.String("graph-name") + randomRenameSourceFile := c.Bool("random-rename-source-file") if !graphsplit.ExistDir(carDir) { return fmt.Errorf("the path of car-dir does not exist") } @@ -138,8 +144,8 @@ var chunkCmd = &cli.Command{ if sliceSize+int(extraFileSliceSize) > 32*graphsplit.Gib { return fmt.Errorf("slice size %d + extra file slice size %d exceeds 32 GiB", sliceSize, extraFileSliceSize) } - log.Infof("extra file slice size: %d", extraFileSliceSize) - rf, err := graphsplit.NewRealFile(strings.TrimSuffix(cfg.ExtraFilePath, "/"), int64(extraFileSliceSize), int64(sliceSize)) + log.Infof("extra file slice size: %d, random rename source file: %v", extraFileSliceSize, randomRenameSourceFile) + rf, err := graphsplit.NewRealFile(strings.TrimSuffix(cfg.ExtraFilePath, "/"), int64(extraFileSliceSize), int64(sliceSize), randomRenameSourceFile) if err != nil { return err } @@ -158,11 +164,11 @@ var chunkCmd = &cli.Command{ fmt.Println("loop: ", loop) if !loop { fmt.Println("chunking once...") - return graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf) + return graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile) } fmt.Println("loop chunking...") for { - err = graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf) + err = graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile) if err != nil { return fmt.Errorf("failed to chunk: %v", err) } diff --git a/extra_file.go b/extra_file.go index 8471f63..8cd2a5b 100644 --- a/extra_file.go +++ b/extra_file.go @@ -15,7 +15,7 @@ type ExtraFile struct { pieceRawSize int64 } -func NewRealFile(path string, sliceSize int64, pieceRawSize int64) (*ExtraFile, error) { +func NewRealFile(path string, sliceSize int64, pieceRawSize int64, randomRenameSourceFile bool) (*ExtraFile, error) { rf := &ExtraFile{path: path, sliceSize: sliceSize, pieceRawSize: pieceRawSize} if path != "" { finfo, err := os.Stat(path) @@ -25,18 +25,21 @@ func NewRealFile(path string, sliceSize int64, pieceRawSize int64) (*ExtraFile, if !finfo.IsDir() { return nil, fmt.Errorf("the path %s is not a directory", path) } - rf.walk() + rf.walk(randomRenameSourceFile) } return rf, nil } -func (rf *ExtraFile) walk() { +func (rf *ExtraFile) walk(randomRenameSourceFile bool) { files := GetFileListAsync([]string{rf.path}) for item := range files { rf.files = append(rf.files, item) } - rf.files = tryRenameFileName(rf.files) + if randomRenameSourceFile { + rf.files = tryRenameFileName(rf.files) + } + } func (rf *ExtraFile) getFiles() []Finfo {