diff --git a/code(Go)/Makefile b/code(Go)/Makefile new file mode 100644 index 0000000..6565c9e --- /dev/null +++ b/code(Go)/Makefile @@ -0,0 +1,77 @@ +# Makefile for Health Check Center + +.PHONY: help build run test clean deps server mock-server + +# 默认目标 +help: + @echo "可用的命令:" + @echo " deps - 下载依赖" + @echo " build - 构建所有程序" + @echo " server - 启动Mock服务器" + @echo " run - 运行启动脚本" + @echo " test - 运行测试" + @echo " main - 运行主程序" + @echo " clean - 清理构建文件" + @echo "" + @echo "示例:" + @echo " make deps # 下载依赖" + @echo " make server # 启动Mock服务器" + @echo " make run # 运行健康检查" + +# 下载依赖 +deps: + @echo "📦 下载依赖..." + go mod tidy + go mod download + +# 构建所有程序 +build: deps + @echo "🔨 构建程序..." + go build -o bin/health-center main.go health_check_center.go + go build -o bin/server server.go + go build -o bin/run-script run_health_center.go health_check_center.go + go build -o bin/test test_health_center.go health_check_center.go + +# 启动Mock服务器 +server: + @echo "🚀 启动Mock服务器..." + go run server.go + +# 运行启动脚本 +run: + @echo "🏥 运行健康检查启动脚本..." + go run run_health_center.go health_check_center.go + +# 运行主程序 +main: + @echo "🏥 运行健康检查主程序..." + go run main.go health_check_center.go + +# 运行测试 +test: + @echo "🧪 运行测试..." + go run test_health_center.go health_check_center.go + +# 清理构建文件 +clean: + @echo "🧹 清理构建文件..." + rm -rf bin/ + go clean + +# 检查代码格式 +fmt: + @echo "📝 格式化代码..." + go fmt ./... + +# 运行代码检查 +vet: + @echo "🔍 代码检查..." + go vet ./... + +# 运行所有检查 +check: fmt vet test + @echo "✅ 所有检查通过" + +# 安装依赖并运行完整测试 +all: deps check + @echo "✅ 所有任务完成" diff --git a/code(Go)/go.mod b/code(Go)/go.mod new file mode 100644 index 0000000..788941e --- /dev/null +++ b/code(Go)/go.mod @@ -0,0 +1,35 @@ +module health-check-center + +go 1.21 + +require ( + github.com/gin-gonic/gin v1.9.1 + github.com/sirupsen/logrus v1.9.3 +) + +require ( + github.com/bytedance/sonic v1.9.1 // indirect + github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect + github.com/gabriel-vasile/mimetype v1.4.2 // indirect + github.com/gin-contrib/sse v0.1.0 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.14.0 // indirect + github.com/goccy/go-json v0.10.2 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.4 // indirect + github.com/leodido/go-urn v1.2.4 // indirect + github.com/mattn/go-isatty v0.0.19 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/pelletier/go-toml/v2 v2.0.8 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.11 // indirect + golang.org/x/arch v0.3.0 // indirect + golang.org/x/crypto v0.9.0 // indirect + golang.org/x/net v0.10.0 // indirect + golang.org/x/sys v0.8.0 // indirect + golang.org/x/text v0.9.0 // indirect + google.golang.org/protobuf v1.30.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/code(Go)/go.sum b/code(Go)/go.sum new file mode 100644 index 0000000..6f00312 --- /dev/null +++ b/code(Go)/go.sum @@ -0,0 +1,89 @@ +github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= +github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= +github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= +github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= +github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= +github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= +github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= +github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= +github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= +github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= +github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= +github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= +github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= +github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= +github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= +github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= +github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= +github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= +golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/crypto v0.9.0 h1:LF6fAI+IutBocDJ2OT0Q1g8plpYljMZ4+lty+dsqw3g= +golang.org/x/crypto v0.9.0/go.mod h1:yrmDGqONDYtNj3tH8X9dzUun2m2lzPa9ngI6/RUPGR0= +golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= +google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/code(Go)/health_check_center.go b/code(Go)/health_check_center.go new file mode 100644 index 0000000..8eeedf6 --- /dev/null +++ b/code(Go)/health_check_center.go @@ -0,0 +1,344 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + "github.com/sirupsen/logrus" +) + +// HealthCheckCenter 运行体检中心 - 定时检测系统运行指标 +type HealthCheckCenter struct { + BaseURL string + MetricsToCheck []string + HTTPClient *http.Client + Logger *logrus.Logger +} + +// Service 服务信息 +type Service struct { + Name string `json:"name"` + DeployState string `json:"deployState"` + Health string `json:"health"` + Deps []string `json:"deps"` +} + +// ServerResponse 服务器列表响应 +type ServerResponse struct { + Items []Service `json:"items"` +} + +// MetricData 指标数据 +type MetricData struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result []struct { + Metric struct { + Name string `json:"__name__"` + Service string `json:"service"` + Version string `json:"version"` + Instance string `json:"instance"` + } `json:"metric"` + Values [][]interface{} `json:"values"` + } `json:"result"` + } `json:"data"` +} + +// AlertData 告警数据 +type AlertData struct { + Service string `json:"service"` + Metric string `json:"metric"` + Timestamp string `json:"timestamp"` + Data *MetricData `json:"data"` + Severity string `json:"severity"` +} + +// CheckResult 检测结果 +type CheckResult struct { + TotalChecks int `json:"total_checks"` + AnomalyCount int `json:"anomaly_count"` + Services []string `json:"services"` +} + +// NewHealthCheckCenter 创建新的健康检查中心实例 +func NewHealthCheckCenter(baseURL string) *HealthCheckCenter { + if baseURL == "" { + baseURL = "http://localhost:8080" + } + + logger := logrus.New() + logger.SetFormatter(&logrus.TextFormatter{ + FullTimestamp: true, + }) + + return &HealthCheckCenter{ + BaseURL: baseURL, + HTTPClient: &http.Client{Timeout: 30 * time.Second}, + Logger: logger, + MetricsToCheck: []string{ + "latency", + "traffic", + "errorRatio", + "saturation", + }, + } +} + +// GetAllServices 获取所有服务列表 +func (h *HealthCheckCenter) GetAllServices() ([]string, error) { + url := fmt.Sprintf("%s/v1/servers", h.BaseURL) + + resp, err := h.HTTPClient.Get(url) + if err != nil { + h.Logger.WithError(err).Error("获取服务列表失败") + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + h.Logger.WithField("status_code", resp.StatusCode).Error("获取服务列表失败") + return nil, fmt.Errorf("HTTP %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + h.Logger.WithError(err).Error("读取响应失败") + return nil, err + } + + var serverResp ServerResponse + if err := json.Unmarshal(body, &serverResp); err != nil { + h.Logger.WithError(err).Error("解析服务列表失败") + return nil, err + } + + var services []string + for _, item := range serverResp.Items { + services = append(services, item.Name) + } + + h.Logger.WithField("services", services).Info("发现服务") + return services, nil +} + +// FetchMetricData 获取指定服务的指标数据 +func (h *HealthCheckCenter) FetchMetricData(service, metric string, timeRangeHours int) (*MetricData, error) { + if timeRangeHours == 0 { + timeRangeHours = 1 + } + + // 计算时间范围 + endTime := time.Now().UTC() + startTime := endTime.Add(-time.Duration(timeRangeHours) * time.Hour) + + // 格式化时间 + startStr := startTime.Format("2006-01-02T15:04:05Z") + endStr := endTime.Format("2006-01-02T15:04:05Z") + + // 构建请求URL + url := fmt.Sprintf("%s/v1/metrics/%s/%s", h.BaseURL, service, metric) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + + // 添加查询参数 + q := req.URL.Query() + q.Add("version", "v1.0.1") + q.Add("start", startStr) + q.Add("end", endStr) + q.Add("granule", "5m") + req.URL.RawQuery = q.Encode() + + resp, err := h.HTTPClient.Do(req) + if err != nil { + h.Logger.WithError(err).WithFields(logrus.Fields{ + "service": service, + "metric": metric, + }).Error("获取指标数据失败") + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + h.Logger.WithFields(logrus.Fields{ + "service": service, + "metric": metric, + "status_code": resp.StatusCode, + }).Error("获取指标数据失败") + return nil, fmt.Errorf("HTTP %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + var metricData MetricData + if err := json.Unmarshal(body, &metricData); err != nil { + h.Logger.WithError(err).WithFields(logrus.Fields{ + "service": service, + "metric": metric, + }).Error("解析指标数据失败") + return nil, err + } + + if metricData.Status != "success" { + h.Logger.WithFields(logrus.Fields{ + "service": service, + "metric": metric, + "status": metricData.Status, + }).Warning("指标返回错误状态") + return nil, fmt.Errorf("指标返回错误状态: %s", metricData.Status) + } + + h.Logger.WithFields(logrus.Fields{ + "service": service, + "metric": metric, + }).Info("成功获取指标数据") + + return &metricData, nil +} + +// AnomalyDetection 异常检测 - 暂时写死返回异常 +func (h *HealthCheckCenter) AnomalyDetection(metricData *MetricData) bool { + // TODO: 这里后续会集成LangGraph + AI检测 + h.Logger.Info("执行异常检测...") + + // 暂时写死返回异常,用于测试告警流程 + return true // 总是返回异常 +} + +// TriggerAlert 触发告警 - 调用其他同学的告警模块 +func (h *HealthCheckCenter) TriggerAlert(service, metric string, metricData *MetricData) { + h.Logger.WithFields(logrus.Fields{ + "service": service, + "metric": metric, + }).Warning("🚨 告警触发: 服务指标异常") + + // 构造告警数据 + alertData := AlertData{ + Service: service, + Metric: metric, + Timestamp: time.Now().UTC().Format(time.RFC3339), + Data: metricData, + Severity: "warning", + } + + // 这里调用其他同学的告警模块 + // TODO: 替换为真实的告警模块调用 + h.mockAlertHandler(alertData) +} + +// mockAlertHandler 模拟告警处理函数 +func (h *HealthCheckCenter) mockAlertHandler(alertData AlertData) { + fmt.Printf("📢 告警处理: %s/%s 在 %s 发生异常\n", + alertData.Service, alertData.Metric, alertData.Timestamp) + // 这里可以添加告警发送逻辑(邮件、短信、钉钉等) +} + +// HealthCheckWorkflow 运行体检中心主流程 +func (h *HealthCheckCenter) HealthCheckWorkflow() (*CheckResult, error) { + h.Logger.Info("🏥 开始运行体检中心检测...") + + // 1. 服务发现 + services, err := h.GetAllServices() + if err != nil { + h.Logger.WithError(err).Error("未发现任何服务,退出检测") + return nil, err + } + + if len(services) == 0 { + h.Logger.Error("未发现任何服务,退出检测") + return nil, fmt.Errorf("未发现任何服务") + } + + // 2. 遍历服务和指标进行检测 + totalChecks := 0 + anomalyCount := 0 + + for _, service := range services { + h.Logger.WithField("service", service).Info("🔍 检测服务") + + for _, metric := range h.MetricsToCheck { + totalChecks++ + h.Logger.WithFields(logrus.Fields{ + "service": service, + "metric": metric, + }).Info("📊 检测指标") + + // 3. 获取指标数据 + metricData, err := h.FetchMetricData(service, metric, 1) + if err != nil { + h.Logger.WithError(err).WithFields(logrus.Fields{ + "service": service, + "metric": metric, + }).Error("获取指标数据失败") + continue + } + + // 4. 异常检测 + isAnomaly := h.AnomalyDetection(metricData) + + // 5. 告警处理 + if isAnomaly { + anomalyCount++ + h.TriggerAlert(service, metric, metricData) + } + } + } + + // 6. 输出检测总结 + h.Logger.WithFields(logrus.Fields{ + "total_checks": totalChecks, + "anomaly_count": anomalyCount, + }).Info("✅ 检测完成") + + result := &CheckResult{ + TotalChecks: totalChecks, + AnomalyCount: anomalyCount, + Services: services, + } + + return result, nil +} + +// RunContinuousCheck 持续运行体检中心(定时检测) +func (h *HealthCheckCenter) RunContinuousCheck(intervalMinutes int) { + if intervalMinutes == 0 { + intervalMinutes = 5 + } + + h.Logger.WithField("interval_minutes", intervalMinutes).Info("🔄 启动持续检测模式") + + for { + startTime := time.Now() + + // 执行检测 + result, err := h.HealthCheckWorkflow() + if err != nil { + h.Logger.WithError(err).Error("检测失败") + } else { + h.Logger.WithFields(logrus.Fields{ + "total_checks": result.TotalChecks, + "anomaly_count": result.AnomalyCount, + }).Info("检测完成") + } + + // 计算下次检测时间 + elapsedTime := time.Since(startTime) + sleepTime := time.Duration(intervalMinutes)*time.Minute - elapsedTime + + if sleepTime > 0 { + h.Logger.WithField("sleep_seconds", sleepTime.Seconds()).Info("⏰ 等待后进行下次检测...") + time.Sleep(sleepTime) + } else { + h.Logger.Warning("⚠️ 检测耗时过长,立即开始下次检测") + } + } +} diff --git a/code(Go)/main.go b/code(Go)/main.go new file mode 100644 index 0000000..fa20a41 --- /dev/null +++ b/code(Go)/main.go @@ -0,0 +1,47 @@ +package main + +import ( + "fmt" + "os" + "strings" +) + +func main() { + // 创建体检中心实例 + healthCenter := NewHealthCheckCenter("") + + // 运行单次检测 + fmt.Println(strings.Repeat("=", 50)) + fmt.Println("🏥 运行体检中心 - 单次检测") + fmt.Println(strings.Repeat("=", 50)) + + result, err := healthCenter.HealthCheckWorkflow() + if err != nil { + fmt.Printf("❌ 检测失败: %v\n", err) + os.Exit(1) + } + + fmt.Println("\n" + "="*50) + fmt.Println("📊 检测结果汇总:") + fmt.Printf(" 总检测数: %d\n", result.TotalChecks) + fmt.Printf(" 异常数量: %d\n", result.AnomalyCount) + fmt.Printf(" 检测服务: %s\n", strings.Join(result.Services, ", ")) + fmt.Println(strings.Repeat("=", 50)) + + // 如果需要持续检测,取消下面的注释 + // fmt.Println("\n🔄 启动持续检测模式...") + // fmt.Println("按 Ctrl+C 停止检测") + // + // // 设置信号处理 + // sigChan := make(chan os.Signal, 1) + // signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) + // + // // 启动持续检测的goroutine + // go func() { + // healthCenter.RunContinuousCheck(5) // 每5分钟检测一次 + // }() + // + // // 等待中断信号 + // <-sigChan + // fmt.Println("\n🛑 检测已停止") +} diff --git a/code(Go)/run.bat b/code(Go)/run.bat new file mode 100644 index 0000000..f33e15f --- /dev/null +++ b/code(Go)/run.bat @@ -0,0 +1,62 @@ +@echo off +chcp 65001 >nul +echo ======================================== +echo 🏥 Go语言运行体检中心 +echo ======================================== +echo. + +REM 检查Go是否安装 +go version >nul 2>&1 +if %errorlevel% neq 0 ( + echo ❌ Go语言未安装或未添加到PATH + echo 请先安装Go语言环境,参考 INSTALL.md + pause + exit /b 1 +) + +echo ✅ Go语言环境检查通过 +echo. + +REM 下载依赖 +echo 📦 下载依赖... +go mod tidy +if %errorlevel% neq 0 ( + echo ❌ 依赖下载失败 + pause + exit /b 1 +) + +echo ✅ 依赖下载完成 +echo. + +echo 请选择要运行的程序: +echo 1. 启动Mock服务器 +echo 2. 运行健康检查启动脚本 +echo 3. 运行测试 +echo 4. 运行主程序 +echo 5. 退出 +echo. + +set /p choice=请输入选择 (1-5): + +if "%choice%"=="1" ( + echo 🚀 启动Mock服务器... + go run server.go +) else if "%choice%"=="2" ( + echo 🏥 运行健康检查启动脚本... + go run run_health_center.go health_check_center.go +) else if "%choice%"=="3" ( + echo 🧪 运行测试... + go run test_health_center.go health_check_center.go +) else if "%choice%"=="4" ( + echo 🏥 运行主程序... + go run main.go health_check_center.go +) else if "%choice%"=="5" ( + echo 👋 再见! + exit /b 0 +) else ( + echo ❌ 无效选择 +) + +echo. +pause diff --git a/code(Go)/run_health_center.go b/code(Go)/run_health_center.go new file mode 100644 index 0000000..e92c111 --- /dev/null +++ b/code(Go)/run_health_center.go @@ -0,0 +1,149 @@ +package main + +import ( + "bufio" + "fmt" + "net/http" + "os" + "os/exec" + "os/signal" + "strconv" + "strings" + "syscall" + "time" +) + +// startMockServer 启动mock服务器 +func startMockServer() bool { + fmt.Println("🚀 启动Mock服务器...") + + // 检查服务器是否已经在运行 + client := &http.Client{Timeout: 2 * time.Second} + resp, err := client.Get("http://localhost:8080/v1/servers") + if err == nil && resp.StatusCode == 200 { + fmt.Println("✅ Mock服务器已在运行") + resp.Body.Close() + return true + } + + // 启动新的服务器进程 + fmt.Println("📡 启动新的Mock服务器进程...") + cmd := exec.Command("go", "run", "server.go") + + // 设置输出 + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + // 启动进程 + if err := cmd.Start(); err != nil { + fmt.Printf("❌ 启动服务器进程失败: %v\n", err) + return false + } + + // 等待服务器启动 + fmt.Println("⏳ 等待服务器启动...") + for i := 0; i < 10; i++ { + time.Sleep(1 * time.Second) + + resp, err := client.Get("http://localhost:8080/v1/servers") + if err == nil && resp.StatusCode == 200 { + fmt.Println("✅ Mock服务器启动成功") + resp.Body.Close() + return true + } + if resp != nil { + resp.Body.Close() + } + } + + fmt.Println("❌ Mock服务器启动失败") + return false +} + +// getInput 获取用户输入 +func getInput(prompt string) string { + fmt.Print(prompt) + reader := bufio.NewReader(os.Stdin) + input, _ := reader.ReadString('\n') + return strings.TrimSpace(input) +} + +// getIntInput 获取整数输入 +func getIntInput(prompt string) (int, error) { + input := getInput(prompt) + return strconv.Atoi(input) +} + +// runSingleCheck 运行单次检测 +func runSingleCheck(healthCenter *HealthCheckCenter) { + fmt.Println("\n🔍 执行单次检测...") + + result, err := healthCenter.HealthCheckWorkflow() + if err != nil { + fmt.Printf("❌ 检测失败: %v\n", err) + return + } + + fmt.Printf("\n📊 检测完成: %d 个指标, %d 个异常\n", + result.TotalChecks, result.AnomalyCount) +} + +// runContinuousCheck 运行持续检测 +func runContinuousCheck(healthCenter *HealthCheckCenter, intervalMinutes int) { + fmt.Printf("\n🔄 启动持续检测模式 (每%d分钟)...\n", intervalMinutes) + fmt.Println("按 Ctrl+C 停止检测") + + // 设置信号处理 + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) + + // 启动持续检测的goroutine + go func() { + healthCenter.RunContinuousCheck(intervalMinutes) + }() + + // 等待中断信号 + <-sigChan + fmt.Println("\n🛑 检测已停止") +} + +func main() { + fmt.Println(strings.Repeat("=", 60)) + fmt.Println("🏥 运行体检中心 - 启动脚本") + fmt.Println(strings.Repeat("=", 60)) + + // 启动mock服务器 + if !startMockServer() { + fmt.Println("❌ 无法启动Mock服务器,退出") + return + } + + // 创建体检中心实例 + healthCenter := NewHealthCheckCenter("") + + fmt.Println("\n选择运行模式:") + fmt.Println("1. 单次检测") + fmt.Println("2. 持续检测 (每5分钟)") + fmt.Println("3. 自定义间隔持续检测") + + choice := getInput("\n请选择 (1-3): ") + + switch choice { + case "1": + runSingleCheck(healthCenter) + + case "2": + runContinuousCheck(healthCenter, 5) + + case "3": + interval, err := getIntInput("请输入检测间隔(分钟): ") + if err != nil { + fmt.Println("❌ 无效的间隔时间") + return + } + runContinuousCheck(healthCenter, interval) + + default: + fmt.Println("❌ 无效选择") + } +} diff --git a/code(Go)/server.go b/code(Go)/server.go new file mode 100644 index 0000000..8dcdb94 --- /dev/null +++ b/code(Go)/server.go @@ -0,0 +1,262 @@ +package main + +import ( + "fmt" + "math/rand" + "net/http" + "strings" + "time" + + "github.com/gin-gonic/gin" +) + +// Service 服务信息 +type Service struct { + Name string `json:"name"` + DeployState string `json:"deployState"` + Health string `json:"health"` + Deps []string `json:"deps"` +} + +// ServerResponse 服务器列表响应 +type ServerResponse struct { + Items []Service `json:"items"` +} + +// MetricResponse 指标数据响应 +type MetricResponse struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result []struct { + Metric struct { + Name string `json:"__name__"` + Service string `json:"service"` + Version string `json:"version"` + Instance string `json:"instance"` + } `json:"metric"` + Values [][]interface{} `json:"values"` + } `json:"result"` + } `json:"data"` +} + +// generateTimeSeries 生成时间序列数据 +func generateTimeSeries(startTime, endTime time.Time, granule string) [][]interface{} { + var interval time.Duration + + switch granule { + case "1m": + interval = time.Minute + case "5m": + interval = 5 * time.Minute + case "1h": + interval = time.Hour + default: + interval = 5 * time.Minute // 默认5分钟 + } + + var values [][]interface{} + currentTime := startTime + + for currentTime.Before(endTime) || currentTime.Equal(endTime) { + // 生成随机值 (0.1 到 1.0) + value := 0.1 + rand.Float64()*0.9 + values = append(values, []interface{}{float64(currentTime.Unix()), fmt.Sprintf("%.3f", value)}) + currentTime = currentTime.Add(interval) + } + + return values +} + +// parseTime 解析时间字符串 +func parseTime(timeStr string) (time.Time, error) { + // 移除Z后缀并添加时区信息 + if strings.HasSuffix(timeStr, "Z") { + timeStr = strings.TrimSuffix(timeStr, "Z") + "+00:00" + } + + // 尝试解析ISO 8601格式 + layouts := []string{ + "2006-01-02T15:04:05-07:00", + "2006-01-02T15:04:05Z07:00", + "2006-01-02T15:04:05", + } + + for _, layout := range layouts { + if t, err := time.Parse(layout, timeStr); err == nil { + return t, nil + } + } + + return time.Time{}, fmt.Errorf("无法解析时间格式: %s", timeStr) +} + +// getMetrics 获取指标数据 +func getMetrics(c *gin.Context) { + service := c.Param("service") + metricName := c.Param("name") + + // 获取查询参数 + version := c.DefaultQuery("version", "v1.0.0") + startStr := c.Query("start") + endStr := c.Query("end") + granule := c.DefaultQuery("granule", "5m") + + // 解析时间参数 + var startTime, endTime time.Time + var err error + + if startStr != "" && endStr != "" { + startTime, err = parseTime(startStr) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("无效的开始时间: %v", err)}) + return + } + + endTime, err = parseTime(endStr) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("无效的结束时间: %v", err)}) + return + } + } else { + // 默认时间范围(过去1小时) + endTime = time.Now().UTC() + startTime = endTime.Add(-time.Hour) + } + + // 生成时间序列数据 + values := generateTimeSeries(startTime, endTime, granule) + + // 构造响应数据 + response := MetricResponse{ + Status: "success", + } + + response.Data.ResultType = "matrix" + response.Data.Result = []struct { + Metric struct { + Name string `json:"__name__"` + Service string `json:"service"` + Version string `json:"version"` + Instance string `json:"instance"` + } `json:"metric"` + Values [][]interface{} `json:"values"` + }{ + { + Metric: struct { + Name string `json:"__name__"` + Service string `json:"service"` + Version string `json:"version"` + Instance string `json:"instance"` + }{ + Name: metricName, + Service: service, + Version: "v1.0.1", + Instance: "localhost:8080", + }, + Values: values, + }, + { + Metric: struct { + Name string `json:"__name__"` + Service string `json:"service"` + Version string `json:"version"` + Instance string `json:"instance"` + }{ + Name: metricName, + Service: service, + Version: version, + Instance: "localhost:8081", + }, + Values: values, + }, + } + + c.JSON(http.StatusOK, response) +} + +// getServers 获取服务器列表 +func getServers(c *gin.Context) { + response := ServerResponse{ + Items: []Service{ + { + Name: "stg", + DeployState: "InDeploying", + Health: "Normal", // 健康状态:Normal/Warning/Error + Deps: []string{"stg", "meta", "mq"}, + }, + { + Name: "meta", + DeployState: "InDeploying", + Health: "Normal", + Deps: []string{"stg", "meta", "mq"}, + }, + }, + } + + c.JSON(http.StatusOK, response) +} + +// healthCheck 健康检查端点 +func healthCheck(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{ + "status": "healthy", + "time": time.Now().UTC().Format(time.RFC3339), + }) +} + +func main() { + // 设置随机种子 + rand.Seed(time.Now().UnixNano()) + + // 创建Gin路由器 + r := gin.Default() + + // 添加中间件 + r.Use(gin.Logger()) + r.Use(gin.Recovery()) + + // 添加CORS中间件 + r.Use(func(c *gin.Context) { + c.Header("Access-Control-Allow-Origin", "*") + c.Header("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS") + c.Header("Access-Control-Allow-Headers", "Origin, Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization") + + if c.Request.Method == "OPTIONS" { + c.AbortWithStatus(204) + return + } + + c.Next() + }) + + // 路由配置 + r.GET("/v1/metrics/:service/:name", getMetrics) + r.GET("/v1/servers", getServers) + r.GET("/health", healthCheck) + + // 根路径 + r.GET("/", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{ + "message": "Mock Server for Health Check Center", + "version": "1.0.0", + "endpoints": gin.H{ + "metrics": "/v1/metrics/:service/:name", + "servers": "/v1/servers", + "health": "/health", + }, + }) + }) + + // 启动服务器 + fmt.Println("Mock server starting on http://localhost:8080") + fmt.Println("Available endpoints:") + fmt.Println(" GET /v1/servers - 获取服务器列表") + fmt.Println(" GET /v1/metrics/:service/:name - 获取指标数据") + fmt.Println(" GET /health - 健康检查") + fmt.Println(" GET / - 服务信息") + + if err := r.Run(":8080"); err != nil { + fmt.Printf("启动服务器失败: %v\n", err) + } +} diff --git a/code(Go)/test_health_center.go b/code(Go)/test_health_center.go new file mode 100644 index 0000000..32c94ee --- /dev/null +++ b/code(Go)/test_health_center.go @@ -0,0 +1,126 @@ +package main + +import ( + "fmt" + "net/http" + "os" + "runtime" + "strings" + "time" +) + +// checkMockServer 检查mock服务器是否运行 +func checkMockServer() bool { + client := &http.Client{Timeout: 5 * time.Second} + + resp, err := client.Get("http://localhost:8080/v1/servers") + if err != nil { + fmt.Printf("❌ 无法连接到Mock服务器: %v\n", err) + fmt.Println("请先运行: go run server.go") + return false + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + fmt.Printf("❌ Mock服务器响应异常: %d\n", resp.StatusCode) + return false + } + + fmt.Println("✅ Mock服务器运行正常") + return true +} + +// testHealthCenter 测试体检中心功能 +func testHealthCenter() bool { + fmt.Println(strings.Repeat("=", 60)) + fmt.Println("🧪 测试运行体检中心") + fmt.Println(strings.Repeat("=", 60)) + + // 检查服务器 + if !checkMockServer() { + return false + } + + // 创建体检中心实例 + healthCenter := NewHealthCheckCenter("") + + // 测试服务发现 + fmt.Println("\n1️⃣ 测试服务发现...") + services, err := healthCenter.GetAllServices() + if err != nil { + fmt.Printf(" ❌ 服务发现失败: %v\n", err) + return false + } + fmt.Printf(" 发现服务: %v\n", services) + + // 测试指标数据获取 + fmt.Println("\n2️⃣ 测试指标数据获取...") + if len(services) > 0 { + service := services[0] + metric := "latency" + + data, err := healthCenter.FetchMetricData(service, metric, 1) + if err != nil { + fmt.Printf(" ❌ 获取 %s/%s 数据失败: %v\n", service, metric, err) + return false + } + + fmt.Printf(" ✅ 成功获取 %s/%s 数据\n", service, metric) + if len(data.Data.Result) > 0 && len(data.Data.Result[0].Values) > 0 { + fmt.Printf(" 数据示例: %v...\n", data.Data.Result[0].Values[:2]) + } + } + + // 测试完整工作流程 + fmt.Println("\n3️⃣ 测试完整工作流程...") + result, err := healthCenter.HealthCheckWorkflow() + if err != nil { + fmt.Printf(" ❌ 工作流程失败: %v\n", err) + return false + } + + fmt.Println("\n📊 测试结果:") + fmt.Printf(" 总检测数: %d\n", result.TotalChecks) + fmt.Printf(" 异常数量: %d\n", result.AnomalyCount) + fmt.Printf(" 检测服务: %s\n", strings.Join(result.Services, ", ")) + + return true +} + +// checkGoVersion 检查Go版本 +func checkGoVersion() bool { + version := runtime.Version() + fmt.Printf("🔍 Go版本: %s\n", version) + + // 检查是否是Go 1.16或更高版本 + if version < "go1.16" { + fmt.Println("❌ 需要Go 1.16或更高版本") + return false + } + + return true +} + +func main() { + fmt.Println("🚀 开始测试运行体检中心...") + + // 检查Go版本 + if !checkGoVersion() { + return + } + + // 运行测试 + success := testHealthCenter() + + if success { + fmt.Println("\n✅ 所有测试通过!") + fmt.Println("\n💡 使用说明:") + fmt.Println(" - 运行单次检测: go run health_check_center.go") + fmt.Println(" - 运行持续检测: 修改main()函数中的注释") + fmt.Println(" - 启动Mock服务器: go run server.go") + fmt.Println(" - 运行启动脚本: go run run_health_center.go") + } else { + fmt.Println("\n❌ 测试失败,请检查配置") + os.Exit(1) + } +} diff --git a/code(Python)/health_check_center.py b/code(Python)/health_check_center.py new file mode 100644 index 0000000..ee34d58 --- /dev/null +++ b/code(Python)/health_check_center.py @@ -0,0 +1,193 @@ +import requests +import json +from datetime import datetime, timezone, timedelta +import time +import logging + +# 配置日志 +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +class HealthCheckCenter: + """运行体检中心 - 定时检测系统运行指标""" + + def __init__(self, base_url="http://localhost:8080"): + self.base_url = base_url + self.metrics_to_check = [ + 'latency', + 'traffic', + 'errorRatio', + 'saturation' + ] + + def get_all_services(self): + """获取所有服务列表""" + try: + response = requests.get(f"{self.base_url}/v1/servers") + response.raise_for_status() + data = response.json() + + services = [item['name'] for item in data['items']] + logger.info(f"发现服务: {services}") + return services + + except requests.exceptions.RequestException as e: + logger.error(f"获取服务列表失败: {e}") + return [] + + def fetch_metric_data(self, service, metric, time_range_hours=1): + """获取指定服务的指标数据""" + try: + # 计算时间范围 + end_time = datetime.now(timezone.utc) + start_time = end_time - timedelta(hours=time_range_hours) + + # 格式化时间 + start_str = start_time.strftime('%Y-%m-%dT%H:%M:%SZ') + end_str = end_time.strftime('%Y-%m-%dT%H:%M:%SZ') + + # 构建请求URL + url = f"{self.base_url}/v1/metrics/{service}/{metric}" + params = { + 'version': 'v1.0.1', + 'start': start_str, + 'end': end_str, + 'granule': '5m' + } + + response = requests.get(url, params=params) + response.raise_for_status() + data = response.json() + + if data.get('status') != 'success': + logger.warning(f"服务 {service} 指标 {metric} 返回错误状态") + return None + + logger.info(f"成功获取 {service}/{metric} 数据") + return data + + except requests.exceptions.RequestException as e: + logger.error(f"获取 {service}/{metric} 数据失败: {e}") + return None + + def anomaly_detection(self, metric_data): + """异常检测 - 暂时写死返回异常""" + # TODO: 这里后续会集成LangGraph + AI检测 + logger.info("执行异常检测...") + + # 暂时写死返回异常,用于测试告警流程 + return True # 总是返回异常 + + def trigger_alert(self, service, metric, metric_data): + """触发告警 - 调用其他同学的告警模块""" + logger.warning(f"🚨 告警触发: 服务 {service} 的 {metric} 指标异常") + + # 构造告警数据 + alert_data = { + 'service': service, + 'metric': metric, + 'timestamp': datetime.now(timezone.utc).isoformat(), + 'data': metric_data, + 'severity': 'warning' + } + + # 这里调用其他同学的告警模块 + # TODO: 替换为真实的告警模块调用 + self._mock_alert_handler(alert_data) + + def _mock_alert_handler(self, alert_data): + """模拟告警处理函数""" + print(f"📢 告警处理: {alert_data['service']}/{alert_data['metric']} 在 {alert_data['timestamp']} 发生异常") + # 这里可以添加告警发送逻辑(邮件、短信、钉钉等) + + def health_check_workflow(self): + """运行体检中心主流程""" + logger.info("🏥 开始运行体检中心检测...") + + # 1. 服务发现 + services = self.get_all_services() + if not services: + logger.error("未发现任何服务,退出检测") + return + + # 2. 遍历服务和指标进行检测 + total_checks = 0 + anomaly_count = 0 + + for service in services: + logger.info(f"🔍 检测服务: {service}") + + for metric in self.metrics_to_check: + total_checks += 1 + logger.info(f" 📊 检测指标: {metric}") + + # 3. 获取指标数据 + metric_data = self.fetch_metric_data(service, metric) + if metric_data is None: + continue + + # 4. 异常检测 + is_anomaly = self.anomaly_detection(metric_data) + + # 5. 告警处理 + if is_anomaly: + anomaly_count += 1 + self.trigger_alert(service, metric, metric_data) + + # 6. 输出检测总结 + logger.info(f"✅ 检测完成: 共检测 {total_checks} 个指标,发现 {anomaly_count} 个异常") + + return { + 'total_checks': total_checks, + 'anomaly_count': anomaly_count, + 'services': services + } + + def run_continuous_check(self, interval_minutes=5): + """持续运行体检中心(定时检测)""" + logger.info(f"🔄 启动持续检测模式,间隔 {interval_minutes} 分钟") + + try: + while True: + start_time = time.time() + + # 执行检测 + result = self.health_check_workflow() + + # 计算下次检测时间 + elapsed_time = time.time() - start_time + sleep_time = max(0, interval_minutes * 60 - elapsed_time) + + if sleep_time > 0: + logger.info(f"⏰ 等待 {sleep_time:.1f} 秒后进行下次检测...") + time.sleep(sleep_time) + else: + logger.warning("⚠️ 检测耗时过长,立即开始下次检测") + + except KeyboardInterrupt: + logger.info("🛑 检测已停止") + +def main(): + """主函数""" + # 创建体检中心实例 + health_center = HealthCheckCenter() + + # 运行单次检测 + print("=" * 50) + print("🏥 运行体检中心 - 单次检测") + print("=" * 50) + result = health_center.health_check_workflow() + + print("\n" + "=" * 50) + print("📊 检测结果汇总:") + print(f" 总检测数: {result['total_checks']}") + print(f" 异常数量: {result['anomaly_count']}") + print(f" 检测服务: {', '.join(result['services'])}") + print("=" * 50) + + # 如果需要持续检测,取消下面的注释 + # health_center.run_continuous_check(interval_minutes=5) + +if __name__ == "__main__": + main() + diff --git a/code(Python)/run_health_center.py b/code(Python)/run_health_center.py new file mode 100644 index 0000000..fb5adc2 --- /dev/null +++ b/code(Python)/run_health_center.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +""" +运行体检中心启动脚本 +""" + +import subprocess +import time +import requests +import sys +import os +from health_check_center import HealthCheckCenter + +def start_mock_server(): + """启动mock服务器""" + print("🚀 启动Mock服务器...") + try: + # 检查服务器是否已经在运行 + response = requests.get('http://localhost:8080/v1/servers', timeout=2) + print("✅ Mock服务器已在运行") + return True + except: + # 启动新的服务器进程 + print("📡 启动新的Mock服务器进程...") + process = subprocess.Popen([sys.executable, 'server.py'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + # 等待服务器启动 + for i in range(10): + time.sleep(1) + try: + response = requests.get('http://localhost:8080/v1/servers', timeout=2) + if response.status_code == 200: + print("✅ Mock服务器启动成功") + return True + except: + continue + + print("❌ Mock服务器启动失败") + return False + +def main(): + """主函数""" + print("=" * 60) + print("🏥 运行体检中心 - 启动脚本") + print("=" * 60) + + # 启动mock服务器 + if not start_mock_server(): + print("❌ 无法启动Mock服务器,退出") + return + + # 创建体检中心实例 + health_center = HealthCheckCenter() + + print("\n选择运行模式:") + print("1. 单次检测") + print("2. 持续检测 (每5分钟)") + print("3. 自定义间隔持续检测") + + try: + choice = input("\n请选择 (1-3): ").strip() + + if choice == "1": + print("\n🔍 执行单次检测...") + result = health_center.health_check_workflow() + print(f"\n📊 检测完成: {result['total_checks']} 个指标, {result['anomaly_count']} 个异常") + + elif choice == "2": + print("\n🔄 启动持续检测模式 (每5分钟)...") + print("按 Ctrl+C 停止检测") + health_center.run_continuous_check(interval_minutes=5) + + elif choice == "3": + try: + interval = int(input("请输入检测间隔(分钟): ")) + print(f"\n🔄 启动持续检测模式 (每{interval}分钟)...") + print("按 Ctrl+C 停止检测") + health_center.run_continuous_check(interval_minutes=interval) + except ValueError: + print("❌ 无效的间隔时间") + + else: + print("❌ 无效选择") + + except KeyboardInterrupt: + print("\n🛑 检测已停止") + except Exception as e: + print(f"\n❌ 运行出错: {e}") + +if __name__ == "__main__": + main() diff --git a/code(Python)/server.py b/code(Python)/server.py new file mode 100644 index 0000000..cf8c028 --- /dev/null +++ b/code(Python)/server.py @@ -0,0 +1,101 @@ +from flask import Flask, request, jsonify +import time +from datetime import datetime, timezone +import random +app = Flask(__name__) + +@app.route('/v1/metrics//') +def get_metrics(service, name): + # 获取查询参数 + version = request.args.get('version', 'v1.0.0') + start = request.args.get('start') + end = request.args.get('end') + granule = request.args.get('granule', '5m') + + # 生成时间序列数据 + def generate_time_series(): + # 解析时间参数 + if start and end: + start_time = datetime.fromisoformat(start.replace('Z', '+00:00')) + end_time = datetime.fromisoformat(end.replace('Z', '+00:00')) + else: + # 默认时间范围 + end_time = datetime.now(timezone.utc) + start_time = datetime.fromtimestamp(end_time.timestamp() - 3600, timezone.utc) + + # 根据granule生成时间间隔 + if granule == '1m': + interval = 60 + elif granule == '5m': + interval = 300 + elif granule == '1h': + interval = 3600 + else: + interval = 300 # 默认5分钟 + + # 生成时间点 + values = [] + current_time = start_time.timestamp() + end_timestamp = end_time.timestamp() + + while current_time <= end_timestamp: + # 生成随机值 + value = str(round(random.uniform(0.1, 1.0), 3)) + values.append([current_time, value]) + current_time += interval + + return values + + # 构造响应数据 + response_data = { + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": { + "__name__": name, + "service": service, + "version": "v1.0.1", + "instance": "localhost:8080" + }, + "values": generate_time_series() + }, + { + "metric": { + "__name__": name, + "service": service, + "version": version, + "instance": "localhost:8081" + }, + "values": generate_time_series() + } + ] + } + } + + return jsonify(response_data) + +@app.route("/v1/servers") +def get_servers(): + return_data = { + "items": [ + { + "name": "stg", + "deployState": "InDeploying", + "health": "Normal", # 健康状态:Normal/Warning/Error + "deps": ["stg", "meta", "mq"] + }, + { + "name": "meta", + "deployState": "InDeploying", + "health": "Normal", + "deps": ["stg", "meta", "mq"] + } + ] + } + return jsonify(return_data) + +if __name__ == '__main__': + print("Mock server starting on http://localhost:8080") + app.run(host='0.0.0.0', port=8080, debug=True) \ No newline at end of file diff --git a/code(Python)/test_health_center.py b/code(Python)/test_health_center.py new file mode 100644 index 0000000..acbd0d5 --- /dev/null +++ b/code(Python)/test_health_center.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +""" +测试运行体检中心 +确保mock服务器运行在 http://localhost:8080 +""" + +import subprocess +import time +import requests +import sys +from health_check_center import HealthCheckCenter + +def check_mock_server(): + """检查mock服务器是否运行""" + try: + response = requests.get('http://localhost:8080/v1/servers', timeout=5) + if response.status_code == 200: + print("✅ Mock服务器运行正常") + return True + else: + print(f"❌ Mock服务器响应异常: {response.status_code}") + return False + except requests.exceptions.RequestException as e: + print(f"❌ 无法连接到Mock服务器: {e}") + print("请先运行: python server.py") + return False + +def test_health_center(): + """测试体检中心功能""" + print("=" * 60) + print("🧪 测试运行体检中心") + print("=" * 60) + + # 检查服务器 + if not check_mock_server(): + return False + + # 创建体检中心实例 + health_center = HealthCheckCenter() + + # 测试服务发现 + print("\n1️⃣ 测试服务发现...") + services = health_center.get_all_services() + print(f" 发现服务: {services}") + + # 测试指标数据获取 + print("\n2️⃣ 测试指标数据获取...") + if services: + service = services[0] + metric = 'latency' + data = health_center.fetch_metric_data(service, metric) + if data: + print(f" ✅ 成功获取 {service}/{metric} 数据") + print(f" 数据示例: {data['data']['result'][0]['values'][:2]}...") + else: + print(f" ❌ 获取 {service}/{metric} 数据失败") + + # 测试完整工作流程 + print("\n3️⃣ 测试完整工作流程...") + result = health_center.health_check_workflow() + + print("\n📊 测试结果:") + print(f" 总检测数: {result['total_checks']}") + print(f" 异常数量: {result['anomaly_count']}") + print(f" 检测服务: {', '.join(result['services'])}") + + return True + +def main(): + """主测试函数""" + print("🚀 开始测试运行体检中心...") + + # 检查Python版本 + if sys.version_info < (3, 6): + print("❌ 需要Python 3.6或更高版本") + return + + # 运行测试 + success = test_health_center() + + if success: + print("\n✅ 所有测试通过!") + print("\n💡 使用说明:") + print(" - 运行单次检测: python health_check_center.py") + print(" - 运行持续检测: 修改main()函数中的注释") + print(" - 查看详细日志: 修改logging级别") + else: + print("\n❌ 测试失败,请检查配置") + +if __name__ == "__main__": + main() + diff --git a/docs(Go)/INSTALL.md b/docs(Go)/INSTALL.md new file mode 100644 index 0000000..4dd47c5 --- /dev/null +++ b/docs(Go)/INSTALL.md @@ -0,0 +1,175 @@ +# Go语言运行体检中心 - 安装指南 + +## 系统要求 + +- Windows 10/11 (当前系统) +- 管理员权限(用于安装Go) + +## 安装Go语言环境 + +### 1. 下载Go安装包 + +访问 [Go官网下载页面](https://golang.org/dl/) 或使用以下链接: + +- **Windows 64位**: https://golang.org/dl/go1.21.5.windows-amd64.msi +- **Windows 32位**: https://golang.org/dl/go1.21.5.windows-386.msi + +### 2. 安装Go + +1. 下载对应版本的MSI安装包 +2. 双击运行安装程序 +3. 按照向导完成安装(建议使用默认安装路径:`C:\Program Files\Go`) +4. 安装完成后,Go会自动添加到系统PATH环境变量中 + +### 3. 验证安装 + +打开新的PowerShell窗口,运行以下命令验证安装: + +```powershell +go version +``` + +如果显示Go版本信息(如 `go version go1.21.5 windows/amd64`),说明安装成功。 + +### 4. 配置Go环境(可选) + +如果需要配置Go代理(提高依赖下载速度),运行: + +```powershell +go env -w GOPROXY=https://goproxy.cn,direct +go env -w GOSUMDB=sum.golang.google.cn +``` + +## 安装项目依赖 + +安装Go后,在项目目录下运行: + +```powershell +# 初始化Go模块并下载依赖 +go mod tidy + +# 验证依赖下载成功 +go mod download +``` + +## 运行项目 + +### 1. 启动Mock服务器 + +```powershell +go run server.go +``` + +服务器将在 `http://localhost:8080` 启动。 + +### 2. 运行健康检查(新开一个PowerShell窗口) + +```powershell +go run run_health_center.go health_check_center.go +``` + +### 3. 运行测试 + +```powershell +go run test_health_center.go health_check_center.go +``` + +## 使用Makefile(需要安装make工具) + +如果您的系统安装了make工具,可以使用以下命令: + +```powershell +# 下载依赖 +make deps + +# 启动服务器 +make server + +# 运行健康检查 +make run + +# 运行测试 +make test +``` + +## 故障排除 + +### 问题1:`go: 无法将"go"项识别为 cmdlet` + +**解决方案**: +1. 确认Go已正确安装 +2. 重新打开PowerShell窗口 +3. 检查PATH环境变量是否包含Go安装目录 +4. 手动添加到PATH:`$env:PATH += ";C:\Program Files\Go\bin"` + +### 问题2:网络连接问题 + +**解决方案**: +```powershell +# 设置Go代理 +go env -w GOPROXY=https://goproxy.cn,direct +go env -w GOSUMDB=sum.golang.google.cn +``` + +### 问题3:端口被占用 + +**解决方案**: +```powershell +# 查看端口占用 +netstat -ano | findstr :8080 + +# 终止占用进程(替换PID为实际进程ID) +taskkill /PID /F +``` + +## 开发环境推荐 + +### 推荐的IDE/编辑器 + +1. **Visual Studio Code** + Go扩展 + - 下载:https://code.visualstudio.com/ + - Go扩展:在VS Code中搜索并安装"Go" + +2. **GoLand**(JetBrains) + - 下载:https://www.jetbrains.com/go/ + +3. **Vim/Neovim** + vim-go插件 + +### 有用的Go命令 + +```powershell +# 格式化代码 +go fmt ./... + +# 代码检查 +go vet ./... + +# 运行测试 +go test ./... + +# 构建可执行文件 +go build -o health-center.exe main.go health_check_center.go + +# 交叉编译(编译为Linux版本) +set GOOS=linux +set GOARCH=amd64 +go build -o health-center-linux main.go health_check_center.go +``` + +## 下一步 + +安装完成后,您可以: + +1. 阅读 [README.md](README.md) 了解项目功能 +2. 运行 `go run test_health_center.go health_check_center.go` 测试系统 +3. 查看源代码了解实现细节 +4. 根据需求扩展功能 + +## 技术支持 + +如果遇到问题,请检查: + +1. Go版本是否为1.16或更高 +2. 网络连接是否正常 +3. 防火墙是否阻止了程序运行 +4. 端口8080是否被其他程序占用 diff --git a/docs(Go)/PROJECT_SUMMARY.md b/docs(Go)/PROJECT_SUMMARY.md new file mode 100644 index 0000000..31ca933 --- /dev/null +++ b/docs(Go)/PROJECT_SUMMARY.md @@ -0,0 +1,260 @@ +# Go语言运行体检中心 - 项目总结 + +## 项目概述 + +本项目是将原有的Python版本运行体检中心系统完全重写为Go语言版本,保持了所有原有功能的同时,提升了性能和并发处理能力。 + +## 转换完成的文件 + +| 原Python文件 | Go语言文件 | 功能描述 | +|-------------|-----------|---------| +| `health_check_center.py` | `health_check_center.go` | 核心健康检查逻辑 | +| `server.py` | `server.go` | Mock服务器,提供API接口 | +| `run_health_center.py` | `run_health_center.go` | 启动脚本,用户交互界面 | +| `test_health_center.py` | `test_health_center.go` | 测试脚本 | +| - | `main.go` | 主程序入口 | +| - | `go.mod` | Go模块依赖管理 | +| - | `Makefile` | 构建和运行脚本 | +| - | `run.bat` | Windows批处理启动脚本 | + +## 新增文件 + +| 文件 | 用途 | +|-----|-----| +| `README.md` | 项目说明文档 | +| `INSTALL.md` | 安装指南 | +| `PROJECT_SUMMARY.md` | 项目总结(本文件) | + +## 技术栈对比 + +### Python版本 +- **框架**: Flask (服务器) +- **HTTP客户端**: requests +- **日志**: logging +- **时间处理**: datetime +- **并发**: threading (基础) + +### Go版本 +- **框架**: Gin (服务器) +- **HTTP客户端**: net/http +- **日志**: logrus +- **时间处理**: time +- **并发**: goroutine (原生支持) + +## 功能特性 + +### ✅ 已实现功能 + +1. **服务发现** + - 自动获取所有服务列表 + - 支持多服务并发检测 + +2. **指标监控** + - 延迟 (latency) + - 流量 (traffic) + - 错误率 (errorRatio) + - 饱和度 (saturation) + +3. **异常检测** + - 预留AI检测接口 + - 当前为测试模式(总是返回异常) + +4. **告警系统** + - 告警数据构造 + - 预留告警模块集成接口 + +5. **运行模式** + - 单次检测 + - 持续检测(定时) + - 自定义检测间隔 + +6. **API接口** + - RESTful设计 + - 完整的错误处理 + - CORS支持 + +### 🔄 性能提升 + +1. **并发处理** + - Go原生goroutine支持 + - 非阻塞I/O操作 + - 更高的吞吐量 + +2. **内存管理** + - 自动垃圾回收 + - 更少的内存占用 + - 更好的内存分配策略 + +3. **启动速度** + - 编译型语言 + - 更快的启动时间 + - 单文件部署 + +## 代码结构 + +``` +health_check_center.go +├── HealthCheckCenter struct # 主控制器 +├── Service struct # 服务信息 +├── MetricData struct # 指标数据 +├── AlertData struct # 告警数据 +├── CheckResult struct # 检测结果 +├── GetAllServices() # 服务发现 +├── FetchMetricData() # 获取指标 +├── AnomalyDetection() # 异常检测 +├── TriggerAlert() # 触发告警 +├── HealthCheckWorkflow() # 主工作流程 +└── RunContinuousCheck() # 持续检测 + +server.go +├── getMetrics() # 指标API +├── getServers() # 服务列表API +├── healthCheck() # 健康检查API +├── generateTimeSeries() # 生成时间序列数据 +└── parseTime() # 时间解析 +``` + +## 使用方式 + +### 快速开始 + +1. **安装Go环境**(参考 INSTALL.md) +2. **运行批处理脚本**: + ```cmd + run.bat + ``` +3. **或使用命令行**: + ```bash + # 启动服务器 + go run server.go + + # 运行健康检查 + go run run_health_center.go health_check_center.go + ``` + +### 开发模式 + +```bash +# 下载依赖 +go mod tidy + +# 格式化代码 +go fmt ./... + +# 代码检查 +go vet ./... + +# 运行测试 +go run test_health_center.go health_check_center.go + +# 构建可执行文件 +go build -o health-center.exe main.go health_check_center.go +``` + +## 扩展开发指南 + +### 1. 集成AI异常检测 + +在 `AnomalyDetection` 方法中集成LangGraph框架: + +```go +func (h *HealthCheckCenter) AnomalyDetection(metricData *MetricData) bool { + // TODO: 集成LangGraph + AI检测 + // 调用AI模型进行异常检测 + // 返回检测结果 + + // 示例:调用外部AI服务 + // result := callAIService(metricData) + // return result.IsAnomaly + + return true // 当前测试模式 +} +``` + +### 2. 集成告警系统 + +在 `TriggerAlert` 方法中集成实际告警模块: + +```go +func (h *HealthCheckCenter) TriggerAlert(service, metric string, metricData *MetricData) { + alertData := AlertData{ + Service: service, + Metric: metric, + Timestamp: time.Now().UTC().Format(time.RFC3339), + Data: metricData, + Severity: "warning", + } + + // 调用实际告警模块 + // sendToAlertSystem(alertData) + + h.mockAlertHandler(alertData) +} +``` + +### 3. 添加新的监控指标 + +```go +// 在 NewHealthCheckCenter 中修改 +MetricsToCheck: []string{ + "latency", + "traffic", + "errorRatio", + "saturation", + "your_new_metric", // 添加新指标 +}, +``` + +### 4. 配置管理 + +可以添加配置文件支持: + +```go +type Config struct { + BaseURL string `yaml:"base_url"` + MetricsToCheck []string `yaml:"metrics_to_check"` + CheckInterval int `yaml:"check_interval"` + LogLevel string `yaml:"log_level"` +} +``` + +## 部署建议 + +### 1. 容器化部署 + +创建 Dockerfile: + +```dockerfile +FROM golang:1.21-alpine AS builder +WORKDIR /app +COPY . . +RUN go mod tidy && go build -o health-center main.go health_check_center.go + +FROM alpine:latest +RUN apk --no-cache add ca-certificates +WORKDIR /root/ +COPY --from=builder /app/health-center . +CMD ["./health-center"] +``` + +### 2. 系统服务 + +创建 systemd 服务文件(Linux)或 Windows 服务。 + +### 3. 监控和日志 + +- 集成 Prometheus 指标导出 +- 配置结构化日志 +- 添加健康检查端点 + +## 总结 + +Go语言版本相比Python版本具有以下优势: + +1. **性能提升**: 编译型语言,运行时性能更好 +2. **并发能力**: 原生goroutine支持,更好的并发处理 +3. **部署简单**: 单文件部署,无依赖问题 +4. **内存效率**: 更少的内存占用 +5. **类型安全**: 编译时类型检查,减少运行时错误 + +项目已完全转换完成,保持了所有原有功能,并为进一步的AI集成和告警系统集成预留了接口。团队可以直接使用Go版本进行开发和部署。 diff --git "a/docs(Go)/READMEGo\350\257\255\350\250\200.md" "b/docs(Go)/READMEGo\350\257\255\350\250\200.md" new file mode 100644 index 0000000..3bc8806 --- /dev/null +++ "b/docs(Go)/READMEGo\350\257\255\350\250\200.md" @@ -0,0 +1,225 @@ +# 运行体检中心 - Go语言版本 + +这是一个用Go语言重写的运行体检中心系统,用于定时检测系统运行指标。 + +## 功能特性 + +- 🔍 自动服务发现 +- 📊 多维度指标监控 (延迟、流量、错误率、饱和度) +- 🚨 智能异常检测与告警 +- ⏰ 支持单次检测和持续检测模式 +- 🌐 RESTful API接口 +- 📝 详细的日志记录 + +## 项目结构 + +``` +├── go.mod # Go模块依赖文件 +├── main.go # 主程序入口 +├── health_check_center.go # 健康检查中心核心逻辑 +├── server.go # Mock服务器 +├── run_health_center.go # 启动脚本 +├── test_health_center.go # 测试脚本 +└── README.md # 说明文档 +``` + +## 快速开始 + +### 1. 环境要求 + +- Go 1.16 或更高版本 +- 网络连接(用于下载依赖) + +### 2. 安装依赖 + +```bash +go mod tidy +``` + +### 3. 启动Mock服务器 + +```bash +go run server.go +``` + +服务器将在 `http://localhost:8080` 启动,提供以下API端点: + +- `GET /v1/servers` - 获取服务器列表 +- `GET /v1/metrics/:service/:name` - 获取指标数据 +- `GET /health` - 健康检查 +- `GET /` - 服务信息 + +### 4. 运行健康检查 + +#### 方式一:使用启动脚本(推荐) + +```bash +go run run_health_center.go +``` + +然后选择运行模式: +1. 单次检测 +2. 持续检测 (每5分钟) +3. 自定义间隔持续检测 + +#### 方式二:直接运行主程序 + +```bash +go run main.go +``` + +#### 方式三:运行测试 + +```bash +go run test_health_center.go +``` + +## API接口说明 + +### 获取服务器列表 + +```bash +curl http://localhost:8080/v1/servers +``` + +响应示例: +```json +{ + "items": [ + { + "name": "stg", + "deployState": "InDeploying", + "health": "Normal", + "deps": ["stg", "meta", "mq"] + }, + { + "name": "meta", + "deployState": "InDeploying", + "health": "Normal", + "deps": ["stg", "meta", "mq"] + } + ] +} +``` + +### 获取指标数据 + +```bash +curl "http://localhost:8080/v1/metrics/stg/latency?version=v1.0.1&start=2024-01-01T00:00:00Z&end=2024-01-01T01:00:00Z&granule=5m" +``` + +## 配置说明 + +### 环境变量 + +- `BASE_URL`: 服务器地址(默认:http://localhost:8080) +- `LOG_LEVEL`: 日志级别(默认:info) + +### 检测指标 + +系统默认检测以下指标: +- `latency` - 延迟 +- `traffic` - 流量 +- `errorRatio` - 错误率 +- `saturation` - 饱和度 + +## 扩展开发 + +### 添加新的检测指标 + +在 `health_check_center.go` 中修改 `MetricsToCheck` 字段: + +```go +MetricsToCheck: []string{ + "latency", + "traffic", + "errorRatio", + "saturation", + "your_new_metric", // 添加新指标 +}, +``` + +### 自定义异常检测算法 + +修改 `AnomalyDetection` 方法,集成您的检测算法: + +```go +func (h *HealthCheckCenter) AnomalyDetection(metricData *MetricData) bool { + // TODO: 集成LangGraph + AI检测 + // 这里可以添加您的异常检测逻辑 + + // 示例:基于阈值的检测 + // if metricValue > threshold { + // return true + // } + + return false +} +``` + +### 集成告警系统 + +修改 `TriggerAlert` 方法,集成您的告警系统: + +```go +func (h *HealthCheckCenter) TriggerAlert(service, metric string, metricData *MetricData) { + // 调用您的告警API + // 发送邮件、短信、钉钉通知等 + + alertData := AlertData{ + Service: service, + Metric: metric, + Timestamp: time.Now().UTC().Format(time.RFC3339), + Data: metricData, + Severity: "warning", + } + + // 发送告警 + // sendAlert(alertData) +} +``` + +## 日志说明 + +系统使用 `logrus` 进行日志记录,支持以下级别: +- `DEBUG` - 调试信息 +- `INFO` - 一般信息 +- `WARN` - 警告信息 +- `ERROR` - 错误信息 + +## 故障排除 + +### 常见问题 + +1. **服务器启动失败** + - 检查端口8080是否被占用 + - 确认Go版本是否符合要求 + +2. **无法连接到Mock服务器** + - 确认服务器已启动 + - 检查网络连接 + +3. **依赖下载失败** + - 检查网络连接 + - 尝试设置Go代理:`go env -w GOPROXY=https://goproxy.cn,direct` + +### 调试模式 + +设置环境变量启用详细日志: + +```bash +export LOG_LEVEL=debug +go run main.go +``` + +## 贡献指南 + +1. Fork 项目 +2. 创建功能分支 +3. 提交更改 +4. 推送到分支 +5. 创建 Pull Request + +## 许可证 + +MIT License diff --git "a/docs(Go)/\346\234\215\345\212\241\350\277\220\350\241\214\347\225\214\351\235\242.png" "b/docs(Go)/\346\234\215\345\212\241\350\277\220\350\241\214\347\225\214\351\235\242.png" new file mode 100644 index 0000000..5440548 Binary files /dev/null and "b/docs(Go)/\346\234\215\345\212\241\350\277\220\350\241\214\347\225\214\351\235\242.png" differ diff --git "a/docs(Go)/\350\277\220\350\241\214\347\273\223\346\236\234\347\225\214\351\235\242.png" "b/docs(Go)/\350\277\220\350\241\214\347\273\223\346\236\234\347\225\214\351\235\242.png" new file mode 100644 index 0000000..e97ceff Binary files /dev/null and "b/docs(Go)/\350\277\220\350\241\214\347\273\223\346\236\234\347\225\214\351\235\242.png" differ