Skip to content

Commit 4555e06

Browse files
committed
fix(prometheus_adapter): 改进服务进程管理和告警表达式生成
- 在deploy.sh和build.sh中添加PID文件管理,优化服务启动和停止流程 - 修复alert_service.go中告警表达式生成的标签处理逻辑 - 使用%g代替%f格式化浮点数以避免科学计数法显示
1 parent 4c7dab9 commit 4555e06

File tree

3 files changed

+137
-33
lines changed

3 files changed

+137
-33
lines changed

internal/prometheus_adapter/service/alert_service.go

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ func (s *AlertService) buildPrometheusRules(rules []model.AlertRule, ruleMetas [
402402
// 构建注释
403403
annotations := map[string]string{
404404
"description": rule.Description,
405-
"summary": fmt.Sprintf("%s %s %f", rule.Expr, rule.Op, meta.Threshold),
405+
"summary": fmt.Sprintf("%s %s %g", rule.Expr, rule.Op, meta.Threshold),
406406
}
407407

408408
// 计算for字段
@@ -475,29 +475,54 @@ func (s *AlertService) buildExpression(rule *model.AlertRule, meta *model.AlertR
475475
if len(labelMatchers) > 0 {
476476
// 如果表达式包含{,说明已经有标签选择器
477477
if strings.Contains(expr, "{") {
478-
expr = strings.Replace(expr, "}", ","+strings.Join(labelMatchers, ",")+"}", 1)
479-
} else {
480-
// 在指标名后添加标签选择器
481-
// 查找第一个非字母数字下划线的字符
482-
metricEnd := 0
483-
for i, ch := range expr {
484-
if !((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
485-
(ch >= '0' && ch <= '9') || ch == '_') {
486-
metricEnd = i
487-
break
478+
// 查找第一个 { 后的内容
479+
start := strings.Index(expr, "{")
480+
end := strings.Index(expr[start:], "}")
481+
if end != -1 {
482+
end += start
483+
existingLabels := strings.TrimSpace(expr[start+1 : end])
484+
if existingLabels == "" {
485+
// 空的标签选择器,直接替换
486+
expr = expr[:start+1] + strings.Join(labelMatchers, ",") + expr[end:]
487+
} else {
488+
// 已有标签,需要检查是否重复
489+
existingLabelMap := make(map[string]bool)
490+
// 解析现有标签
491+
labelPairs := strings.Split(existingLabels, ",")
492+
for _, pair := range labelPairs {
493+
if strings.Contains(pair, "=") {
494+
key := strings.TrimSpace(strings.Split(pair, "=")[0])
495+
if key != "" {
496+
existingLabelMap[key] = true
497+
}
498+
}
499+
}
500+
// 只添加不重复的标签
501+
newLabels := []string{}
502+
for k, v := range labels {
503+
if !existingLabelMap[k] && k != "" && v != "" {
504+
newLabels = append(newLabels, fmt.Sprintf(`%s="%s"`, k, v))
505+
}
506+
}
507+
if len(newLabels) > 0 {
508+
expr = expr[:end] + "," + strings.Join(newLabels, ",") + expr[end:]
509+
}
488510
}
489511
}
490-
if metricEnd == 0 {
491-
metricEnd = len(expr)
512+
} else {
513+
// 对于没有标签的简单指标,只处理单个单词的情况
514+
// 如果表达式包含空格、括号等,不进行标签注入
515+
if !strings.ContainsAny(expr, " ()[]{}") {
516+
// 只有单个指标名,可以安全添加标签
517+
expr = expr + "{" + strings.Join(labelMatchers, ",") + "}"
492518
}
493-
expr = expr[:metricEnd] + "{" + strings.Join(labelMatchers, ",") + "}" + expr[metricEnd:]
494519
}
495520
}
496521
}
497522

498523
// 添加比较操作符和阈值
499524
if meta.Threshold != 0 {
500-
expr = fmt.Sprintf("%s %s %f", expr, rule.Op, meta.Threshold)
525+
expr = fmt.Sprintf("%s %s %g", expr, rule.Op, meta.Threshold)
501526
}
502527

503528
return expr

scripts/prometheus_adapter/build.sh

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,26 @@ cat > "$BUILD_DIR/start.sh" << 'EOF'
118118
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
119119
BIN_PATH="$SCRIPT_DIR/bin/prometheus_adapter"
120120
CONFIG_FILE="$SCRIPT_DIR/config/prometheus_adapter.yml"
121+
PID_FILE="$SCRIPT_DIR/prometheus_adapter.pid"
122+
LOG_FILE="$SCRIPT_DIR/prometheus_adapter.log"
121123
122124
# 检查二进制文件
123125
if [ ! -f "$BIN_PATH" ]; then
124126
echo "错误: 找不到可执行文件 $BIN_PATH"
125127
exit 1
126128
fi
127129
130+
# 检查是否已在运行
131+
if [ -f "$PID_FILE" ]; then
132+
PID=$(cat "$PID_FILE")
133+
if kill -0 "$PID" 2>/dev/null; then
134+
echo "Prometheus Adapter已在运行 (PID: $PID)"
135+
exit 1
136+
else
137+
rm -f "$PID_FILE"
138+
fi
139+
fi
140+
128141
# 检查配置文件
129142
if [ -f "$CONFIG_FILE" ]; then
130143
echo "使用配置文件: $CONFIG_FILE"
@@ -140,11 +153,23 @@ fi
140153
141154
echo "启动 Prometheus Adapter..."
142155
143-
# 切换到 bin 目录,以便程序能正确找到相对路径的配置文件
156+
# 切换到脚本目录
144157
cd "$SCRIPT_DIR"
145158
146-
# 启动服务
147-
exec "$BIN_PATH"
159+
# 后台启动服务
160+
nohup "$BIN_PATH" > "$LOG_FILE" 2>&1 &
161+
PID=$!
162+
163+
# 保存PID
164+
echo $PID > "$PID_FILE"
165+
166+
echo "Prometheus Adapter已启动"
167+
echo "PID: $PID"
168+
echo "日志文件: $LOG_FILE"
169+
echo "PID文件: $PID_FILE"
170+
echo ""
171+
echo "查看日志: tail -f $LOG_FILE"
172+
echo "停止服务: ./stop.sh"
148173
EOF
149174
chmod +x "$BUILD_DIR/start.sh"
150175

@@ -155,26 +180,54 @@ cat > "$BUILD_DIR/stop.sh" << 'EOF'
155180
156181
# Prometheus Adapter 停止脚本
157182
183+
# 获取脚本所在目录
184+
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
185+
PID_FILE="$SCRIPT_DIR/prometheus_adapter.pid"
158186
APP_NAME="prometheus_adapter"
159187
160-
# 查找进程
161-
PID=$(ps aux | grep -v grep | grep "$APP_NAME" | awk '{print $2}')
188+
# 优先从PID文件读取
189+
if [ -f "$PID_FILE" ]; then
190+
PID=$(cat "$PID_FILE" 2>/dev/null)
191+
if [ -n "$PID" ] && kill -0 "$PID" 2>/dev/null; then
192+
echo "从PID文件获取进程ID: $PID"
193+
else
194+
echo "PID文件中的进程已不存在,清理PID文件"
195+
rm -f "$PID_FILE"
196+
PID=""
197+
fi
198+
else
199+
PID=""
200+
fi
201+
202+
# 如果PID文件不存在或进程已死,通过进程名查找
203+
if [ -z "$PID" ]; then
204+
PID=$(ps aux | grep -v grep | grep "$APP_NAME" | awk '{print $2}')
205+
fi
162206
163207
if [ -z "$PID" ]; then
164208
echo "没有找到运行中的 $APP_NAME 进程"
165209
exit 0
166210
fi
167211
168212
echo "停止 $APP_NAME (PID: $PID)..."
169-
kill -TERM $PID
213+
kill -TERM $PID 2>/dev/null || true
170214
171215
# 等待进程退出
172-
sleep 2
216+
count=0
217+
while [ $count -lt 10 ] && ps -p "$PID" > /dev/null 2>&1; do
218+
sleep 1
219+
count=$((count + 1))
220+
done
221+
222+
# 检查是否已退出
223+
if ps -p "$PID" > /dev/null 2>&1; then
224+
echo "强制停止 $APP_NAME..."
225+
kill -KILL "$PID" 2>/dev/null || true
226+
fi
173227
174-
# 检查是否还在运行
175-
if ps -p $PID > /dev/null 2>&1; then
176-
echo "强制停止进程..."
177-
kill -KILL $PID
228+
# 清理PID文件
229+
if [ -f "$PID_FILE" ]; then
230+
rm -f "$PID_FILE"
178231
fi
179232
180233
echo "$APP_NAME 已停止"

scripts/prometheus_adapter/deploy.sh

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,16 @@ fi
132132

133133
# 检查是否有运行中的服务
134134
check_running_service() {
135+
# 优先从PID文件读取
136+
if [ -f "$DEPLOY_DIR/prometheus_adapter.pid" ]; then
137+
local pid=$(cat "$DEPLOY_DIR/prometheus_adapter.pid" 2>/dev/null)
138+
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
139+
echo "$pid"
140+
return
141+
fi
142+
fi
143+
144+
# 如果PID文件不存在或进程已死,通过进程名查找
135145
local pid=$(ps aux | grep -v grep | grep "prometheus_adapter" | grep -v "$0" | awk '{print $2}')
136146
if [ -n "$pid" ]; then
137147
echo "$pid"
@@ -158,6 +168,11 @@ stop_service() {
158168
kill -KILL "$pid" 2>/dev/null || true
159169
fi
160170

171+
# 清理PID文件
172+
if [ -f "$DEPLOY_DIR/prometheus_adapter.pid" ]; then
173+
rm -f "$DEPLOY_DIR/prometheus_adapter.pid"
174+
fi
175+
161176
log_info "服务已停止"
162177
fi
163178
}
@@ -340,17 +355,27 @@ if [ "$START_SERVICE" = true ] || [ "$RESTART_SERVICE" = true ]; then
340355

341356
# 启动服务
342357
cd "$DEPLOY_DIR"
343-
nohup ./start.sh > prometheus_adapter.log 2>&1 &
358+
359+
# 直接启动二进制文件而不是通过start.sh脚本
360+
nohup ./bin/prometheus_adapter > prometheus_adapter.log 2>&1 &
361+
PID=$!
362+
363+
# 保存PID到文件
364+
echo $PID > prometheus_adapter.pid
365+
366+
log_info "服务已启动 (PID: $PID)"
367+
echo "PID文件: $DEPLOY_DIR/prometheus_adapter.pid"
368+
echo "日志文件: $DEPLOY_DIR/prometheus_adapter.log"
344369

345370
# 等待服务启动
346371
sleep 2
347372

348373
# 检查是否启动成功
349-
NEW_PID=$(check_running_service)
350-
if [ -n "$NEW_PID" ]; then
351-
log_info "服务已启动 (PID: $NEW_PID)"
374+
if kill -0 "$PID" 2>/dev/null; then
375+
log_info "服务启动成功,正在运行"
352376
echo ""
353377
echo "查看日志: tail -f $DEPLOY_DIR/prometheus_adapter.log"
378+
echo "停止服务: kill \$(cat $DEPLOY_DIR/prometheus_adapter.pid)"
354379
else
355380
log_error "服务启动失败,请检查日志"
356381
exit 1
@@ -359,10 +384,11 @@ else
359384
echo ""
360385
echo "手动启动服务:"
361386
echo " cd $DEPLOY_DIR"
362-
echo " ./start.sh"
387+
echo " nohup ./bin/prometheus_adapter > prometheus_adapter.log 2>&1 &"
388+
echo " echo \$! > prometheus_adapter.pid"
363389
echo ""
364-
echo "或使用后台模式:"
365-
echo " nohup ./start.sh > prometheus_adapter.log 2>&1 &"
390+
echo "停止服务:"
391+
echo " kill \$(cat prometheus_adapter.pid)"
366392
fi
367393

368394
log_info "部署完成!"

0 commit comments

Comments
 (0)