Compare commits

...

2 Commits

Author SHA1 Message Date
yyhuni
bb5ce66a31 fix:agent容器版本号匹配 2025-12-19 19:20:15 +08:00
github-actions[bot]
ac958571a5 chore: bump version to v1.0.19 2025-12-19 11:12:14 +00:00
7 changed files with 111 additions and 15 deletions

View File

@@ -104,6 +104,8 @@ jobs:
tags: |
${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }}
${{ steps.version.outputs.IS_RELEASE == 'true' && format('{0}/{1}:latest', env.IMAGE_PREFIX, matrix.image) || '' }}
build-args: |
IMAGE_TAG=${{ steps.version.outputs.VERSION }}
cache-from: type=gha
cache-to: type=gha,mode=max
provenance: false

View File

@@ -1 +1 @@
v1.0.18
v1.0.19

View File

@@ -118,8 +118,25 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
@action(detail=True, methods=['post'])
def heartbeat(self, request, pk=None):
"""接收心跳上报(写 Redis首次心跳更新部署状态"""
"""
接收心跳上报(写 Redis首次心跳更新部署状态检查版本
请求体:
{
"cpu_percent": 50.0,
"memory_percent": 60.0,
"version": "v1.0.9"
}
返回:
{
"status": "ok",
"need_update": true/false,
"server_version": "v1.0.19"
}
"""
from apps.engine.services.worker_load_service import worker_load_service
from django.conf import settings
worker = self.get_object()
info = request.data if request.data else {}
@@ -134,7 +151,24 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
worker.status = 'online'
worker.save(update_fields=['status'])
return Response({'status': 'ok'})
# 3. 版本检查:比较 agent 版本与 server 版本
agent_version = info.get('version', '')
server_version = settings.IMAGE_TAG # Server 当前版本
need_update = False
if agent_version and agent_version != 'unknown':
# 版本不匹配时通知 agent 更新
need_update = agent_version != server_version
if need_update:
logger.info(
f"Worker {worker.name} 版本不匹配: agent={agent_version}, server={server_version}"
)
return Response({
'status': 'ok',
'need_update': need_update,
'server_version': server_version
})
@action(detail=False, methods=['post'])
def register(self, request):

View File

@@ -1,7 +1,7 @@
#!/bin/bash
# ============================================
# XingRin Agent
# 用途:心跳上报 + 负载监控
# 用途:心跳上报 + 负载监控 + 版本检查
# 适用:远程 VPS 或 Docker 容器内
# ============================================
@@ -17,6 +17,9 @@ SRC_DIR="${MARKER_DIR}/src"
ENV_FILE="${SRC_DIR}/backend/.env"
INTERVAL=${AGENT_INTERVAL:-3}
# Agent 版本(从环境变量获取,由 Docker 镜像构建时注入)
AGENT_VERSION="${IMAGE_TAG:-unknown}"
# 颜色定义
GREEN='\033[0;32m'
RED='\033[0;31m'
@@ -172,22 +175,72 @@ while true; do
fi
# 构建 JSON 数据(使用数值而非字符串,便于比较和排序)
# 包含版本号,供 Server 端检查版本一致性
JSON_DATA=$(cat <<EOF
{
"cpu_percent": $CPU_PERCENT,
"memory_percent": $MEM_PERCENT
"memory_percent": $MEM_PERCENT,
"version": "$AGENT_VERSION"
}
EOF
)
# 发送心跳
RESPONSE=$(curl -k -s -o /dev/null -w "%{http_code}" -X POST \
# 发送心跳,获取响应内容
RESPONSE_FILE=$(mktemp)
HTTP_CODE=$(curl -k -s -o "$RESPONSE_FILE" -w "%{http_code}" -X POST \
-H "Content-Type: application/json" \
-d "$JSON_DATA" \
"${API_URL}/api/workers/${WORKER_ID}/heartbeat/" 2>/dev/null || echo "000")
RESPONSE_BODY=$(cat "$RESPONSE_FILE" 2>/dev/null)
rm -f "$RESPONSE_FILE"
if [ "$RESPONSE" != "200" ] && [ "$RESPONSE" != "201" ]; then
log "${YELLOW}心跳发送失败 (HTTP $RESPONSE)${NC}"
if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "201" ]; then
log "${YELLOW}心跳发送失败 (HTTP $HTTP_CODE)${NC}"
else
# 检查是否需要更新
NEED_UPDATE=$(echo "$RESPONSE_BODY" | grep -oE '"need_update":\s*(true|false)' | grep -oE '(true|false)')
if [ "$NEED_UPDATE" = "true" ]; then
SERVER_VERSION=$(echo "$RESPONSE_BODY" | grep -oE '"server_version":\s*"[^"]+"' | sed 's/.*"\([^"]*\)"$/\1/')
log "${YELLOW}检测到版本不匹配: Agent=$AGENT_VERSION, Server=$SERVER_VERSION${NC}"
log "${GREEN}正在自动更新...${NC}"
# 执行自动更新
if [ "$RUN_MODE" = "container" ]; then
# 容器模式:通知外部重启(退出后由 docker-compose restart policy 重启)
log "容器模式:退出以触发重启更新"
exit 0
else
# 远程模式:拉取新镜像并重启 agent 容器
log "远程模式:更新 agent 镜像..."
DOCKER_USER="${DOCKER_USER:-yyhuni}"
NEW_IMAGE="${DOCKER_USER}/xingrin-agent:${SERVER_VERSION}"
# 拉取新镜像
if $DOCKER_CMD pull "$NEW_IMAGE" 2>/dev/null; then
log "${GREEN}镜像拉取成功: $NEW_IMAGE${NC}"
# 停止当前容器并用新镜像重启
CONTAINER_NAME="xingrin-agent"
$DOCKER_CMD stop "$CONTAINER_NAME" 2>/dev/null || true
$DOCKER_CMD rm "$CONTAINER_NAME" 2>/dev/null || true
# 重新启动(使用相同的环境变量)
$DOCKER_CMD run -d \
--name "$CONTAINER_NAME" \
--restart unless-stopped \
-e HEARTBEAT_API_URL="$API_URL" \
-e WORKER_ID="$WORKER_ID" \
-e IMAGE_TAG="$SERVER_VERSION" \
-v /proc:/host/proc:ro \
"$NEW_IMAGE"
log "${GREEN}Agent 已更新到 $SERVER_VERSION${NC}"
exit 0
else
log "${RED}镜像拉取失败: $NEW_IMAGE${NC}"
fi
fi
fi
fi
# 休眠

View File

@@ -1,12 +1,15 @@
# ============================================
# XingRin Agent - 轻量心跳上报镜像
# 用途:心跳上报 + 负载监控
# 用途:心跳上报 + 负载监控 + 版本检查
# 基础镜像Alpine Linux (~5MB)
# 最终大小:~10MB
# ============================================
FROM alpine:3.19
# 构建参数:版本号
ARG IMAGE_TAG=unknown
# 安装必要工具
RUN apk add --no-cache \
bash \
@@ -17,6 +20,9 @@ RUN apk add --no-cache \
COPY backend/scripts/worker-deploy/agent.sh /app/agent.sh
RUN chmod +x /app/agent.sh
# 将版本号写入环境变量(运行时可用)
ENV IMAGE_TAG=${IMAGE_TAG}
# 工作目录
WORKDIR /app

View File

@@ -54,19 +54,19 @@ services:
retries: 3
start_period: 60s
# Agent心跳上报 + 负载监控
# Agent心跳上报 + 负载监控 + 版本检查
agent:
build:
context: ..
dockerfile: docker/worker/Dockerfile
dockerfile: docker/agent/Dockerfile
args:
IMAGE_TAG: ${IMAGE_TAG:-dev}
restart: always
env_file:
- .env
environment:
- SERVER_URL=http://server:8888
- WORKER_NAME=本地节点
- IS_LOCAL=true
command: bash /app/backend/scripts/worker-deploy/agent.sh
- IMAGE_TAG=${IMAGE_TAG:-dev}
depends_on:
server:
condition: service_healthy

View File

@@ -72,6 +72,7 @@ services:
- SERVER_URL=http://server:8888
- WORKER_NAME=本地节点
- IS_LOCAL=true
- IMAGE_TAG=${IMAGE_TAG}
depends_on:
server:
condition: service_healthy