Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ func Chunk(ctx context.Context,
parallel int,
cb GraphBuildCallback,
ef *ExtraFile,
randomRenameSourceFile bool,
) error {
var cumuSize int64 = 0
graphSliceCount := 0
Expand All @@ -186,7 +187,10 @@ func Chunk(ctx context.Context,
}
files := GetFileListAsync(args)
for item := range files {
item := tryRenameFileName([]Finfo{item})[0]
item := item
if randomRenameSourceFile {
item = tryRenameFileName([]Finfo{item})[0]
}
// log.Infof("name: %s", item.Name)
fileSize := item.Info.Size()
switch {
Expand Down Expand Up @@ -221,7 +225,11 @@ func Chunk(ctx context.Context,
SeekStart: seekStart,
SeekEnd: seekEnd,
}
graphFiles = append(graphFiles, tryRenameFileName([]Finfo{fi})...)
if randomRenameSourceFile {
graphFiles = append(graphFiles, tryRenameFileName([]Finfo{fi})...)
} else {
graphFiles = append(graphFiles, fi)
}
fileSliceCount++
// todo build ipld from graphFiles
BuildIpldGraph(ctx, append(ef.getFiles(), graphFiles...), GenGraphName(graphName, graphSliceCount, sliceTotal), parentPath, carDir, parallel, cb, expectSliceSize, ef)
Expand All @@ -247,7 +255,12 @@ func Chunk(ctx context.Context,
SeekStart: seekStart,
SeekEnd: seekEnd,
}
graphFiles = append(graphFiles, tryRenameFileName([]Finfo{fi})...)
if randomRenameSourceFile {
graphFiles = append(graphFiles, tryRenameFileName([]Finfo{fi})...)
} else {
graphFiles = append(graphFiles, fi)
}

fileSliceCount++
if seekEnd-seekStart == partSliceSize-1 {
// todo build ipld from graphFiles
Expand Down
14 changes: 10 additions & 4 deletions cmd/graphsplit/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ var chunkCmd = &cli.Command{
Value: false,
Usage: "rename carfile to piece",
},
&cli.BoolFlag{
Name: "random-rename-source-file",
Value: false,
Usage: "random rename source file name",
},
&cli.BoolFlag{
Name: "add-padding",
Value: false,
Expand All @@ -98,6 +103,7 @@ var chunkCmd = &cli.Command{
parentPath := c.String("parent-path")
carDir := c.String("car-dir")
graphName := c.String("graph-name")
randomRenameSourceFile := c.Bool("random-rename-source-file")
if !graphsplit.ExistDir(carDir) {
return fmt.Errorf("the path of car-dir does not exist")
}
Expand Down Expand Up @@ -138,8 +144,8 @@ var chunkCmd = &cli.Command{
if sliceSize+int(extraFileSliceSize) > 32*graphsplit.Gib {
return fmt.Errorf("slice size %d + extra file slice size %d exceeds 32 GiB", sliceSize, extraFileSliceSize)
}
log.Infof("extra file slice size: %d", extraFileSliceSize)
rf, err := graphsplit.NewRealFile(strings.TrimSuffix(cfg.ExtraFilePath, "/"), int64(extraFileSliceSize), int64(sliceSize))
log.Infof("extra file slice size: %d, random rename source file: %v", extraFileSliceSize, randomRenameSourceFile)
rf, err := graphsplit.NewRealFile(strings.TrimSuffix(cfg.ExtraFilePath, "/"), int64(extraFileSliceSize), int64(sliceSize), randomRenameSourceFile)
if err != nil {
return err
}
Expand All @@ -158,11 +164,11 @@ var chunkCmd = &cli.Command{
fmt.Println("loop: ", loop)
if !loop {
fmt.Println("chunking once...")
return graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf)
return graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile)
}
fmt.Println("loop chunking...")
for {
err = graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf)
err = graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb, rf, randomRenameSourceFile)
if err != nil {
return fmt.Errorf("failed to chunk: %v", err)
}
Expand Down
11 changes: 7 additions & 4 deletions extra_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type ExtraFile struct {
pieceRawSize int64
}

func NewRealFile(path string, sliceSize int64, pieceRawSize int64) (*ExtraFile, error) {
func NewRealFile(path string, sliceSize int64, pieceRawSize int64, randomRenameSourceFile bool) (*ExtraFile, error) {
rf := &ExtraFile{path: path, sliceSize: sliceSize, pieceRawSize: pieceRawSize}
if path != "" {
finfo, err := os.Stat(path)
Expand All @@ -25,18 +25,21 @@ func NewRealFile(path string, sliceSize int64, pieceRawSize int64) (*ExtraFile,
if !finfo.IsDir() {
return nil, fmt.Errorf("the path %s is not a directory", path)
}
rf.walk()
rf.walk(randomRenameSourceFile)
}

return rf, nil
}

func (rf *ExtraFile) walk() {
func (rf *ExtraFile) walk(randomRenameSourceFile bool) {
files := GetFileListAsync([]string{rf.path})
for item := range files {
rf.files = append(rf.files, item)
}
rf.files = tryRenameFileName(rf.files)
if randomRenameSourceFile {
rf.files = tryRenameFileName(rf.files)
}

}

func (rf *ExtraFile) getFiles() []Finfo {
Expand Down