diff --git a/chunk.go b/chunk.go index 09f7e41..509aa26 100644 --- a/chunk.go +++ b/chunk.go @@ -164,6 +164,7 @@ func Chunk(ctx context.Context, cb GraphBuildCallback, ef *ExtraFile, randomRenameSourceFile bool, + randomSelectFile bool, ) error { var cumuSize int64 = 0 graphSliceCount := 0 @@ -185,8 +186,16 @@ func Chunk(ctx context.Context, log.Warn("Empty folder or file!") return nil } + var allFiles []Finfo files := GetFileListAsync(args) for item := range files { + allFiles = append(allFiles, item) + } + log.Infof("total files: %d", len(allFiles)) + + Shuffle(allFiles) + + for _, item := range allFiles { item := item if randomRenameSourceFile { item = tryRenameFileName([]Finfo{item})[0] diff --git a/cmd/graphsplit/main.go b/cmd/graphsplit/main.go index c228365..8352a73 100644 --- a/cmd/graphsplit/main.go +++ b/cmd/graphsplit/main.go @@ -95,6 +95,11 @@ var chunkCmd = &cli.Command{ Name: "loop", Usage: "loop chunking", }, + &cli.BoolFlag{ + Name: "random-select-file", + Usage: "random select file to chunk", + Value: true, + }, }, ArgsUsage: "", Action: func(c *cli.Context) error { @@ -104,6 +109,7 @@ var chunkCmd = &cli.Command{ carDir := c.String("car-dir") graphName := c.String("graph-name") randomRenameSourceFile := c.Bool("random-rename-source-file") + randomSelectFile := c.Bool("random-select-file") if !graphsplit.ExistDir(carDir) { return fmt.Errorf("the path of car-dir does not exist") } @@ -144,7 +150,7 @@ var chunkCmd = &cli.Command{ if sliceSize+int(extraFileSliceSize) > 32*graphsplit.Gib { return fmt.Errorf("slice size %d + extra file slice size %d exceeds 32 GiB", sliceSize, extraFileSliceSize) } - log.Infof("extra file slice size: %d, random rename source file: %v", extraFileSliceSize, randomRenameSourceFile) + log.Infof("extra file slice size: %d, random rename source file: %v, random select file: %v", extraFileSliceSize, randomRenameSourceFile, randomSelectFile) rf, err := graphsplit.NewRealFile(strings.TrimSuffix(cfg.ExtraFilePath, "/"), int64(extraFileSliceSize), int64(sliceSize), randomRenameSourceFile) if err != nil { return err @@ -164,11 +170,11 @@ var chunkCmd = &cli.Command{ fmt.Println("loop: ", loop) if !loop { fmt.Println("chunking once...") - return graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile) + return graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile, randomSelectFile) } fmt.Println("loop chunking...") for { - err = graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile) + err = graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile, randomSelectFile) if err != nil { return fmt.Errorf("failed to chunk: %v", err) } diff --git a/extra_file.go b/extra_file.go index 8cd2a5b..b0db8b3 100644 --- a/extra_file.go +++ b/extra_file.go @@ -39,7 +39,7 @@ func (rf *ExtraFile) walk(randomRenameSourceFile bool) { if randomRenameSourceFile { rf.files = tryRenameFileName(rf.files) } - + Shuffle(rf.files) } func (rf *ExtraFile) getFiles() []Finfo { diff --git a/utils.go b/utils.go index 687073e..d9f608e 100644 --- a/utils.go +++ b/utils.go @@ -646,3 +646,15 @@ func RandomLetters() string { return string(selected) } + +// Shuffle 使用泛型和自定义种子随机打乱任意类型的切片 +func Shuffle[T any](arr []T) { + s := rand.NewSource(time.Now().UnixNano()) + r := rand.New(s) + + // Fisher-Yates 洗牌算法 + for i := len(arr) - 1; i > 0; i-- { + j := r.Intn(i + 1) + arr[i], arr[j] = arr[j], arr[i] + } +}