Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ func Chunk(ctx context.Context,
cb GraphBuildCallback,
ef *ExtraFile,
randomRenameSourceFile bool,
randomSelectFile bool,
) error {
var cumuSize int64 = 0
graphSliceCount := 0
Expand All @@ -185,8 +186,16 @@ func Chunk(ctx context.Context,
log.Warn("Empty folder or file!")
return nil
}
var allFiles []Finfo
files := GetFileListAsync(args)
for item := range files {
allFiles = append(allFiles, item)
}
log.Infof("total files: %d", len(allFiles))

Shuffle(allFiles)

for _, item := range allFiles {
item := item
if randomRenameSourceFile {
item = tryRenameFileName([]Finfo{item})[0]
Expand Down
12 changes: 9 additions & 3 deletions cmd/graphsplit/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ var chunkCmd = &cli.Command{
Name: "loop",
Usage: "loop chunking",
},
&cli.BoolFlag{
Name: "random-select-file",
Usage: "random select file to chunk",
Value: true,
},
},
ArgsUsage: "<input path>",
Action: func(c *cli.Context) error {
Expand All @@ -104,6 +109,7 @@ var chunkCmd = &cli.Command{
carDir := c.String("car-dir")
graphName := c.String("graph-name")
randomRenameSourceFile := c.Bool("random-rename-source-file")
randomSelectFile := c.Bool("random-select-file")
if !graphsplit.ExistDir(carDir) {
return fmt.Errorf("the path of car-dir does not exist")
}
Expand Down Expand Up @@ -144,7 +150,7 @@ var chunkCmd = &cli.Command{
if sliceSize+int(extraFileSliceSize) > 32*graphsplit.Gib {
return fmt.Errorf("slice size %d + extra file slice size %d exceeds 32 GiB", sliceSize, extraFileSliceSize)
}
log.Infof("extra file slice size: %d, random rename source file: %v", extraFileSliceSize, randomRenameSourceFile)
log.Infof("extra file slice size: %d, random rename source file: %v, random select file: %v", extraFileSliceSize, randomRenameSourceFile, randomSelectFile)
rf, err := graphsplit.NewRealFile(strings.TrimSuffix(cfg.ExtraFilePath, "/"), int64(extraFileSliceSize), int64(sliceSize), randomRenameSourceFile)
if err != nil {
return err
Expand All @@ -164,11 +170,11 @@ var chunkCmd = &cli.Command{
fmt.Println("loop: ", loop)
if !loop {
fmt.Println("chunking once...")
return graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile)
return graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile, randomSelectFile)
}
fmt.Println("loop chunking...")
for {
err = graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile)
err = graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile, randomSelectFile)
if err != nil {
return fmt.Errorf("failed to chunk: %v", err)
}
Expand Down
2 changes: 1 addition & 1 deletion extra_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (rf *ExtraFile) walk(randomRenameSourceFile bool) {
if randomRenameSourceFile {
rf.files = tryRenameFileName(rf.files)
}

Shuffle(rf.files)
}

func (rf *ExtraFile) getFiles() []Finfo {
Expand Down
12 changes: 12 additions & 0 deletions utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -646,3 +646,15 @@ func RandomLetters() string {

return string(selected)
}

// Shuffle 使用泛型和自定义种子随机打乱任意类型的切片
func Shuffle[T any](arr []T) {
s := rand.NewSource(time.Now().UnixNano())
r := rand.New(s)

// Fisher-Yates 洗牌算法
for i := len(arr) - 1; i > 0; i-- {
j := r.Intn(i + 1)
arr[i], arr[j] = arr[j], arr[i]
}
}