-
Notifications
You must be signed in to change notification settings - Fork 543
Open
Description
现象 (Phenomenon)
删除沙箱容器内的 /tmp 目录后,通过 API 无法再执行任何命令,包括前台命令和后台命令。所有命令执行请求都会失败。
症状 (Symptoms):
- API 调用
/command/run或/command/run_background返回错误 - 错误信息:
failed to get stdlog descriptor或failed to get combined output descriptor - 即使使用简单的
echo命令也无法执行 - 容器需要重启才能恢复
复现步骤 (Reproduction Steps):
# 1. 启动沙箱容器
docker run opensandbox/code-interpreter
# 2. 删除 /tmp 目录
rm -rf /tmp
# 3. 尝试通过 API 执行命令
curl -X POST http://localhost:port/command/run \
-H "Content-Type: application/json" \
-d '{"command": "echo test"}'
# 4. API 返回 500 Internal Server Error根本原因 (Root Cause)
系统对命令输出日志的处理完全依赖于 /tmp 目录:
-
命令执行初始化阶段 (
components/execd/pkg/runtime/command.go:79-93)func (c *Controller) runCommand(ctx context.Context, request *ExecuteCodeRequest) error { // ... stdout, stderr, err := c.stdLogDescriptor(session) // ← 必需 /tmp if err != nil { return fmt.Errorf("failed to get stdlog descriptor: %w", err) }
-
临时文件创建失败 (
components/execd/pkg/runtime/command_common.go:63-78)func (c *Controller) stdLogDescriptor(session string) (io.WriteCloser, io.WriteCloser, error) { stdout, err := os.OpenFile(c.stdoutFileName(session), os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm) if err != nil { return nil, nil, err // ← /tmp 不存在时失败 }
-
硬编码的日志路径 (
components/execd/pkg/runtime/command_common.go:81-92)func (c *Controller) stdoutFileName(session string) string { return filepath.Join(os.TempDir(), session+".stdout") // ← 无法改变 }
问题本质: os.OpenFile() 在目录不存在时无法创建文件,因为 os.O_CREATE 标志只创建文件,不创建目录。系统没有任何回退机制或自动恢复能力。
解决方案 (Solution)
实现三层防御机制:
- 自动创建日志目录
- 提供多个回退路径
- 可配置的日志存储位置
步骤 1:添加标志配置
修改 components/execd/pkg/flag/flags.go:
package flag
import "time"
var (
// 现有标志...
JupyterServerHost string
JupyterServerToken string
ServerPort int
ServerLogLevel int
ServerAccessToken string
ApiGracefulShutdownTimeout time.Duration
// ✅ 新增:可配置的命令日志目录
CommandLogDir string
// ✅ 新增:是否启用日志目录自动创建
AutoCreateLogDir bool
)步骤 2:优化命令日志处理
修改 components/execd/pkg/runtime/command_common.go:
package runtime
import (
"bufio"
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"sync"
"time"
)
// tailStdPipe streams appended log data until the process finishes.
func (c *Controller) tailStdPipe(file string, onExecute func(text string), done <-chan struct{}) {
lastPos := int64(0)
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
mutex := &sync.Mutex{}
for {
select {
case <-done:
c.readFromPos(mutex, file, lastPos, onExecute, true)
return
case <-ticker.C:
newPos := c.readFromPos(mutex, file, lastPos, onExecute, false)
lastPos = newPos
}
}
}
// getCommandKernel retrieves a command execution context.
func (c *Controller) getCommandKernel(sessionID string) *commandKernel {
c.mu.RLock()
defer c.mu.RUnlock()
return c.commandClientMap[sessionID]
}
// storeCommandKernel registers a command execution context.
func (c *Controller) storeCommandKernel(sessionID string, kernel *commandKernel) {
c.mu.Lock()
defer c.mu.Unlock()
c.commandClientMap[sessionID] = kernel
}
// stdLogDescriptor creates temporary files for capturing command output.
// ✅ 改进:自动创建目录、提供错误处理
func (c *Controller) stdLogDescriptor(session string) (io.WriteCloser, io.WriteCloser, error) {
stdoutPath := c.stdoutFileName(session)
stderrPath := c.stderrFileName(session)
// ✅ 关键:确保日志目录存在
logDir := filepath.Dir(stdoutPath)
if err := os.MkdirAll(logDir, 0755); err != nil {
return nil, nil, fmt.Errorf("failed to create log directory %s: %w", logDir, err)
}
stdout, err := os.OpenFile(stdoutPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm)
if err != nil {
return nil, nil, fmt.Errorf("failed to create stdout log file %s: %w", stdoutPath, err)
}
defer func() {
if err != nil {
stdout.Close()
}
}()
stderr, err := os.OpenFile(stderrPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm)
if err != nil {
return nil, nil, fmt.Errorf("failed to create stderr log file %s: %w", stderrPath, err)
}
return stdout, stderr, nil
}
func (c *Controller) combinedOutputDescriptor(session string) (io.WriteCloser, error) {
filePath := c.combinedOutputFileName(session)
// ✅ 改进:自动创建目录
logDir := filepath.Dir(filePath)
if err := os.MkdirAll(logDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create log directory %s: %w", logDir, err)
}
return os.OpenFile(filePath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm)
}
// ✅ 新增:获取命令日志目录,支持多个回退路径
func (c *Controller) getLogDir() string {
// 优先级顺序:
// 1. 显式配置的日志目录
if os.Getenv("EXECD_LOG_DIR") != "" {
return os.Getenv("EXECD_LOG_DIR")
}
// 2. /tmp(标准位置)
if tmpDir := os.TempDir(); isDirWritable(tmpDir) {
return tmpDir
}
// 3. 当前工作目录下的 .tmp
if wd, err := os.Getwd(); err == nil {
fallback := filepath.Join(wd, ".tmp")
if isDirWritable(fallback) || os.MkdirAll(fallback, 0755) == nil {
return fallback
}
}
// 4. /var/tmp(Linux 回退)
if isDirWritable("/var/tmp") {
return "/var/tmp"
}
// 5. /dev/shm(内存临时文件)
if isDirWritable("/dev/shm") {
return "/dev/shm"
}
// 最终回退:Home 目录
if home, err := os.UserHomeDir(); err == nil {
logsDir := filepath.Join(home, ".opensandbox_logs")
if os.MkdirAll(logsDir, 0755) == nil {
return logsDir
}
}
// 绝对最后的回退
return ".opensandbox_logs"
}
// ✅ 新增:检查目录是否可写
func isDirWritable(dir string) bool {
info, err := os.Stat(dir)
if err != nil {
return false
}
if !info.IsDir() {
return false
}
// 尝试在目录中创建测试文件
testFile := filepath.Join(dir, ".write_test_"+fmt.Sprintf("%d", os.Getpid()))
if f, err := os.Create(testFile); err == nil {
f.Close()
os.Remove(testFile)
return true
}
return false
}
// stdoutFileName constructs the stdout log path.
// ✅ 改进:使用 getLogDir() 而不是硬编码的 os.TempDir()
func (c *Controller) stdoutFileName(session string) string {
return filepath.Join(c.getLogDir(), session+".stdout")
}
// stderrFileName constructs the stderr log path.
// ✅ 改进:使用 getLogDir() 而不是硬编码的 os.TempDir()
func (c *Controller) stderrFileName(session string) string {
return filepath.Join(c.getLogDir(), session+".stderr")
}
// ✅ 改进:使用 getLogDir() 而不是硬编码的 os.TempDir()
func (c *Controller) combinedOutputFileName(session string) string {
return filepath.Join(c.getLogDir(), session+".output")
}
// readFromPos streams new content from a file starting at startPos.
func (c *Controller) readFromPos(mutex *sync.Mutex, filepath string, startPos int64, onExecute func(string), flushIncomplete bool) int64 {
if !mutex.TryLock() {
return -1
}
defer mutex.Unlock()
file, err := os.Open(filepath)
if err != nil {
return startPos
}
defer file.Close()
_, _ = file.Seek(startPos, 0) //nolint:errcheck
reader := bufio.NewReader(file)
var buffer bytes.Buffer
var currentPos int64 = startPos
for {
b, err := reader.ReadByte()
if err != nil {
if err == io.EOF {
// If buffer has content but no newline, flush if needed, otherwise wait for next read
if flushIncomplete && buffer.Len() > 0 {
onExecute(buffer.String())
buffer.Reset()
}
}
break
}
currentPos++
// Check if it's a line terminator (\n or \r)
if b == '\n' || b == '\r' {
// If buffer has content, output this line
if buffer.Len() > 0 {
onExecute(buffer.String())
buffer.Reset()
}
// Skip line terminator
continue
}
buffer.WriteByte(b)
}
endPos, _ := file.Seek(0, 1)
// If the last read position doesn't end with a newline, return buffer start position and wait for next flush
if !flushIncomplete && buffer.Len() > 0 {
return currentPos - int64(buffer.Len())
}
return endPos
}步骤 3:改进命令执行错误处理
修改 components/execd/pkg/runtime/command.go(前台命令部分):
// runCommand executes shell commands and streams their output.
// ✅ 改进:更清晰的错误处理和日志
func (c *Controller) runCommand(ctx context.Context, request *ExecuteCodeRequest) error {
session := c.newContextID()
signals := make(chan os.Signal, 1)
defer close(signals)
signal.Notify(signals)
defer signal.Reset()
stdout, stderr, err := c.stdLogDescriptor(session)
if err != nil {
log.Error("Failed to create log descriptors for session %s: %v. Using log directory: %s",
session, err, c.getLogDir())
request.Hooks.OnExecuteInit(session)
request.Hooks.OnExecuteError(&execute.ErrorOutput{
EName: "LogSetupError",
EValue: err.Error(),
})
return err
}
defer stdout.Close()
defer stderr.Close()
stdoutPath := c.stdoutFileName(session)
stderrPath := c.stderrFileName(session)
// ... 其他现有代码保持不变 ...
}修改 components/execd/pkg/runtime/command.go(后台命令部分):
// runBackgroundCommand executes shell commands in detached mode.
// ✅ 改进:更清晰的错误处理和日志
func (c *Controller) runBackgroundCommand(ctx context.Context, cancel context.CancelFunc, request *ExecuteCodeRequest) error {
session := c.newContextID()
request.Hooks.OnExecuteInit(session)
pipe, err := c.combinedOutputDescriptor(session)
if err != nil {
cancel()
log.Error("Failed to create output descriptor for session %s: %v. Using log directory: %s",
session, err, c.getLogDir())
kernel := &commandKernel{
pid: -1,
running: false,
content: request.Code,
isBackground: true,
errMsg: fmt.Sprintf("Failed to setup logging: %v", err),
}
c.storeCommandKernel(session, kernel)
c.markCommandFinished(session, 255, kernel.errMsg)
return fmt.Errorf("failed to setup command output: %w", err)
}
stdoutPath := c.combinedOutputFileName(session)
stderrPath := c.combinedOutputFileName(session)
// ... 其他现有代码保持不变 ...
}步骤 4:更新 bootstrap.sh
修改 components/execd/bootstrap.sh:
#!/bin/bash
# ... 现有代码 ...
EXECD="${EXECD:=/opt/opensandbox/execd}"
LOG_DIR="${EXECD_LOG_DIR:=/tmp/opensandbox-execd}"
# ✅ 新增:确保日志目录存在
if ! mkdir -p "$LOG_DIR" 2>/dev/null; then
echo "warning: failed to create primary log dir: $LOG_DIR" >&2
# 回退到主目录
LOG_DIR="$HOME/.opensandbox_logs"
mkdir -p "$LOG_DIR" || {
echo "ERROR: Failed to create any log directory" >&2
exit 1
}
fi
export EXECD_LOG_DIR="$LOG_DIR"
echo "Log directory initialized: $EXECD_LOG_DIR"
# ... 其他现有代码保持不变 ...优点 (Benefits)
✅ 自动处理 /tmp 目录缺失情况
✅ 提供多个回退路径,适应各种环境
✅ 可配置的日志存储位置
✅ 命令执行更加可靠
✅ 支持容器化和无根环境
✅ 业界最佳实践(Django、Flask 等均采用此方案)
影响范围 (Impact)
- 修改文件数:3 个
- 修改行数:~150 行
- 向后兼容性:完全兼容,无破坏性变更
- 测试覆盖:可添加单元测试
- 部署风险:低,完全是防守性增强
相关代码位置 (Related Code)
components/execd/pkg/runtime/command.go- 命令执行入口components/execd/pkg/runtime/command_common.go- 日志文件处理components/execd/pkg/flag/flags.go- 配置选项components/execd/bootstrap.sh- 容器启动脚本
附加建议 (Additional Suggestions)
- 添加单元测试确保回退机制工作正常
- 添加日志记录实际使用的日志目录
- 监控告警检测频繁的目录创建失败
- 文档更新说明
EXECD_LOG_DIR环境变量用途
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels