diff --git a/cmd/entire/cli/agent/claudecode/claude.go b/cmd/entire/cli/agent/claudecode/claude.go index d65854673..0bf7da63c 100644 --- a/cmd/entire/cli/agent/claudecode/claude.go +++ b/cmd/entire/cli/agent/claudecode/claude.go @@ -2,7 +2,7 @@ package claudecode import ( - "bufio" + "bytes" "context" "encoding/json" "errors" @@ -257,7 +257,7 @@ func SanitizePathForClaude(path string) string { // GetTranscriptPosition returns the current line count of a Claude Code transcript. // Claude Code uses JSONL format, so position is the number of lines. // This is a lightweight operation that only counts lines without parsing JSON. -// Uses bufio.Reader to handle arbitrarily long lines (no size limit). +// Uses a fixed-size buffer to handle arbitrarily long lines safely. // Returns 0 if the file doesn't exist or is empty. func (c *ClaudeCodeAgent) GetTranscriptPosition(path string) (int, error) { if path == "" { @@ -273,21 +273,25 @@ func (c *ClaudeCodeAgent) GetTranscriptPosition(path string) (int, error) { } defer file.Close() - reader := bufio.NewReader(file) + buf := make([]byte, 32*1024) lineCount := 0 + var lastByte byte = '\n' for { - line, err := reader.ReadBytes('\n') + n, err := file.Read(buf) + if n > 0 { + lineCount += bytes.Count(buf[:n], []byte{'\n'}) + lastByte = buf[n-1] + } if err != nil { if err == io.EOF { - if len(line) > 0 { - lineCount++ // Count final line without trailing newline + if lastByte != '\n' { + lineCount++ } break } return 0, fmt.Errorf("failed to read transcript: %w", err) } - lineCount++ } return lineCount, nil @@ -295,7 +299,7 @@ func (c *ClaudeCodeAgent) GetTranscriptPosition(path string) (int, error) { // ExtractModifiedFilesFromOffset extracts files modified since a given line number. // For Claude Code (JSONL format), offset is the starting line number. -// Uses bufio.Reader to handle arbitrarily long lines (no size limit). +// Uses a fixed-size buffer to handle arbitrarily long lines safely. // Returns: // - files: list of file paths modified by Claude (from Write/Edit tools) // - currentPosition: total number of lines in the file @@ -307,33 +311,64 @@ func (c *ClaudeCodeAgent) ExtractModifiedFilesFromOffset(path string, startOffse file, openErr := os.Open(path) //nolint:gosec // Path comes from Claude Code transcript location if openErr != nil { + if os.IsNotExist(openErr) { + return nil, 0, nil + } return nil, 0, fmt.Errorf("failed to open transcript file: %w", openErr) } defer file.Close() - reader := bufio.NewReader(file) var lines []TranscriptLine lineNum := 0 + var lastByte byte = '\n' + buf := make([]byte, 32*1024) + var lineBuf []byte + const maxLineSize = 10 * 1024 * 1024 // 10MB limit for { - lineData, readErr := reader.ReadBytes('\n') - if readErr != nil && readErr != io.EOF { - return nil, 0, fmt.Errorf("failed to read transcript: %w", readErr) - } + n, readErr := file.Read(buf) + if n > 0 { + chunk := buf[:n] + for len(chunk) > 0 { + idx := bytes.IndexByte(chunk, '\n') + if idx == -1 { + if lineNum >= startOffset && len(lineBuf) < maxLineSize { + lineBuf = append(lineBuf, chunk...) + } + lastByte = chunk[len(chunk)-1] + break + } - if len(lineData) > 0 { - lineNum++ - if lineNum > startOffset { - var line TranscriptLine - if parseErr := json.Unmarshal(lineData, &line); parseErr == nil { - lines = append(lines, line) + if lineNum >= startOffset { + if len(lineBuf) < maxLineSize { + lineBuf = append(lineBuf, chunk[:idx]...) + var line TranscriptLine + if parseErr := json.Unmarshal(lineBuf, &line); parseErr == nil { + lines = append(lines, line) + } + } + lineBuf = lineBuf[:0] } - // Skip malformed lines silently + lineNum++ + lastByte = '\n' + chunk = chunk[idx+1:] } } - if readErr == io.EOF { - break + if readErr != nil { + if readErr == io.EOF { + if lastByte != '\n' { + if lineNum >= startOffset && len(lineBuf) > 0 && len(lineBuf) < maxLineSize { + var line TranscriptLine + if parseErr := json.Unmarshal(lineBuf, &line); parseErr == nil { + lines = append(lines, line) + } + } + lineNum++ + } + break + } + return nil, 0, fmt.Errorf("failed to read transcript: %w", readErr) } } diff --git a/cmd/entire/cli/agent/geminicli/gemini.go b/cmd/entire/cli/agent/geminicli/gemini.go index 0cbc92ba4..6567911ce 100644 --- a/cmd/entire/cli/agent/geminicli/gemini.go +++ b/cmd/entire/cli/agent/geminicli/gemini.go @@ -317,7 +317,8 @@ func (g *GeminiCLIAgent) ChunkTranscript(ctx context.Context, content []byte, ma var chunks [][]byte var currentMessages []GeminiMessage - currentSize := len(`{"messages":[]}`) // Base JSON structure size + baseSize := len(`{"messages":[]}`) + currentSize := baseSize // Base JSON structure size for i, msg := range transcript.Messages { // Marshal message to get its size @@ -330,6 +331,10 @@ func (g *GeminiCLIAgent) ChunkTranscript(ctx context.Context, content []byte, ma continue } msgSize := len(msgBytes) + 1 // +1 for comma separator + + if msgSize+baseSize > maxSize { + return nil, fmt.Errorf("single message size (%d) exceeds chunk maxSize (%d)", msgSize+baseSize, maxSize) + } if currentSize+msgSize > maxSize && len(currentMessages) > 0 { // Save current chunk diff --git a/cmd/entire/cli/agent/opencode/opencode.go b/cmd/entire/cli/agent/opencode/opencode.go index b26047fff..b299c875b 100644 --- a/cmd/entire/cli/agent/opencode/opencode.go +++ b/cmd/entire/cli/agent/opencode/opencode.go @@ -98,6 +98,10 @@ func (a *OpenCodeAgent) ChunkTranscript(_ context.Context, content []byte, maxSi } msgSize := len(msgBytes) + 1 // +1 for comma separator + if msgSize+baseSize > maxSize { + return nil, fmt.Errorf("single message size (%d) exceeds chunk maxSize (%d)", msgSize+baseSize, maxSize) + } + if currentSize+msgSize > maxSize && len(currentMessages) > 0 { // Save current chunk chunkData, err := json.Marshal(ExportSession{Info: session.Info, Messages: currentMessages}) @@ -167,7 +171,7 @@ func (a *OpenCodeAgent) GetSessionID(input *agent.HookInput) string { // GetSessionDir returns the directory where Entire stores OpenCode session transcripts. // Transcripts are ephemeral handoff files between the TS plugin and the Go hook handler. // Once checkpointed, the data lives on git refs and the file is disposable. -// Stored in os.TempDir()/entire-opencode// to avoid squatting on +// Stored in repoPath/.git/entire-opencode// to avoid squatting on // OpenCode's own directories (~/.opencode/ is project-level, not home-level). func (a *OpenCodeAgent) GetSessionDir(repoPath string) (string, error) { // Check for test environment override @@ -176,7 +180,11 @@ func (a *OpenCodeAgent) GetSessionDir(repoPath string) (string, error) { } projectDir := SanitizePathForOpenCode(repoPath) - return filepath.Join(os.TempDir(), "entire-opencode", projectDir), nil + dir := filepath.Join(repoPath, ".git", "entire-opencode", projectDir) + if err := os.MkdirAll(dir, 0o700); err != nil { + return "", fmt.Errorf("failed to create secure opencode session dir: %w", err) + } + return dir, nil } func (a *OpenCodeAgent) ResolveSessionFile(sessionDir, agentSessionID string) string { diff --git a/cmd/entire/cli/git_operations.go b/cmd/entire/cli/git_operations.go index cc555172e..b593bbae0 100644 --- a/cmd/entire/cli/git_operations.go +++ b/cmd/entire/cli/git_operations.go @@ -467,23 +467,35 @@ func FetchBlobsByHash(ctx context.Context, hashes []plumbing.Hash) error { ctx, cancel := context.WithTimeout(ctx, 2*time.Minute) defer cancel() - // Build fetch args: "git fetch origin ..." - // This uses the normal transport + credential helpers, unlike fetch-pack. - args := []string{"fetch", "--no-write-fetch-head", "origin"} - for _, h := range hashes { - args = append(args, h.String()) - } - - fetchCmd := exec.CommandContext(ctx, "git", args...) - if _, fetchErr := fetchCmd.CombinedOutput(); fetchErr != nil { - logging.Debug(ctx, "fetch-by-hash failed, falling back to full metadata fetch", - slog.Int("blob_count", len(hashes)), - slog.String("error", fetchErr.Error()), - ) - // Fallback: full metadata branch fetch (pack negotiation skips already-local objects) - if fallbackErr := FetchMetadataBranch(ctx); fallbackErr != nil { - return fmt.Errorf("fetch-by-hash failed: %w; fallback fetch also failed: %w", - fetchErr, fallbackErr) + const batchSize = 500 + for i := 0; i < len(hashes); i += batchSize { + end := i + batchSize + if end > len(hashes) { + end = len(hashes) + } + + batchHashes := hashes[i:end] + // Build fetch args: "git fetch origin ..." + // This uses the normal transport + credential helpers, unlike fetch-pack. + args := []string{"fetch", "--no-write-fetch-head", "origin"} + for _, h := range batchHashes { + args = append(args, h.String()) + } + + fetchCmd := exec.CommandContext(ctx, "git", args...) + if _, fetchErr := fetchCmd.CombinedOutput(); fetchErr != nil { + logging.Debug(ctx, "fetch-by-hash failed for batch, falling back to full metadata fetch", + slog.Int("total_blobs", len(hashes)), + slog.Int("batch_start", i), + slog.Int("batch_size", len(batchHashes)), + slog.String("error", fetchErr.Error()), + ) + // Fallback: full metadata branch fetch (pack negotiation skips already-local objects) + if fallbackErr := FetchMetadataBranch(ctx); fallbackErr != nil { + return fmt.Errorf("fetch-by-hash failed: %w; fallback fetch also failed: %w", + fetchErr, fallbackErr) + } + return nil // Fallback fetched everything, no need to process remaining batches } } diff --git a/cmd/entire/cli/session/state.go b/cmd/entire/cli/session/state.go index d895a9588..e53ebc676 100644 --- a/cmd/entire/cli/session/state.go +++ b/cmd/entire/cli/session/state.go @@ -3,6 +3,7 @@ package session import ( "context" "encoding/json" + "errors" "fmt" "log/slog" "os" @@ -364,6 +365,15 @@ func (s *StateStore) Save(ctx context.Context, state *State) error { if err := os.Rename(tmpFile, stateFile); err != nil { return fmt.Errorf("failed to rename session state file: %w", err) } + + info, err := os.Lstat(stateFile) + if err != nil { + return fmt.Errorf("failed to stat session state file after rename: %w", err) + } + if info.Mode()&os.ModeSymlink != 0 || !info.Mode().IsRegular() { + _ = os.Remove(stateFile) + return errors.New("security error: state file is not a regular file (symlink / path traversal detected)") + } return nil } diff --git a/cmd/entire/cli/utils.go b/cmd/entire/cli/utils.go index fbde35738..f98f36574 100644 --- a/cmd/entire/cli/utils.go +++ b/cmd/entire/cli/utils.go @@ -10,7 +10,6 @@ import ( "github.com/charmbracelet/huh" - "github.com/entireio/cli/cmd/entire/cli/osroot" "github.com/entireio/cli/cmd/entire/cli/paths" ) @@ -87,10 +86,11 @@ func copyFile(src, dst string) error { return fmt.Errorf("copyFile: dst must be absolute, got %q", dst) } - input, err := os.ReadFile(src) + srcFile, err := os.Open(src) if err != nil { return err //nolint:wrapcheck // already present in codebase } + defer srcFile.Close() root, relPath, err := openAllowedRoot(dst) if err != nil { @@ -98,7 +98,13 @@ func copyFile(src, dst string) error { } defer root.Close() - if err := osroot.WriteFile(root, relPath, input, 0o600); err != nil { + dstFile, err := root.OpenFile(relPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600) + if err != nil { + return fmt.Errorf("failed to write file: %w", err) + } + defer dstFile.Close() + + if _, err := io.Copy(dstFile, srcFile); err != nil { return fmt.Errorf("failed to write file: %w", err) } return nil diff --git a/redact/pii.go b/redact/pii.go index e48b3db45..2b3532f4a 100644 --- a/redact/pii.go +++ b/redact/pii.go @@ -71,7 +71,31 @@ func ConfigurePII(cfg PIIConfig) { func getPIIConfig() *PIIConfig { piiConfigMu.RLock() defer piiConfigMu.RUnlock() - return piiConfig + + if piiConfig == nil { + return nil + } + + cfgCopy := &PIIConfig{ + Enabled: piiConfig.Enabled, + patterns: piiConfig.patterns, + } + + if piiConfig.Categories != nil { + cfgCopy.Categories = make(map[PIICategory]bool, len(piiConfig.Categories)) + for k, v := range piiConfig.Categories { + cfgCopy.Categories[k] = v + } + } + + if piiConfig.CustomPatterns != nil { + cfgCopy.CustomPatterns = make(map[string]string, len(piiConfig.CustomPatterns)) + for k, v := range piiConfig.CustomPatterns { + cfgCopy.CustomPatterns[k] = v + } + } + + return cfgCopy } // Pre-compiled builtin PII regexes.