diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 522e56f9..0f46a77a 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -104,6 +104,8 @@ jobs: tags: | ${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }} ${{ steps.version.outputs.IS_RELEASE == 'true' && format('{0}/{1}:latest', env.IMAGE_PREFIX, matrix.image) || '' }} + build-args: | + IMAGE_TAG=${{ steps.version.outputs.VERSION }} cache-from: type=gha cache-to: type=gha,mode=max provenance: false diff --git a/backend/apps/engine/views/worker_views.py b/backend/apps/engine/views/worker_views.py index 7505d3d4..2669bd78 100644 --- a/backend/apps/engine/views/worker_views.py +++ b/backend/apps/engine/views/worker_views.py @@ -118,8 +118,25 @@ class WorkerNodeViewSet(viewsets.ModelViewSet): @action(detail=True, methods=['post']) def heartbeat(self, request, pk=None): - """接收心跳上报(写 Redis,首次心跳更新部署状态)""" + """ + 接收心跳上报(写 Redis,首次心跳更新部署状态,检查版本) + + 请求体: + { + "cpu_percent": 50.0, + "memory_percent": 60.0, + "version": "v1.0.9" + } + + 返回: + { + "status": "ok", + "need_update": true/false, + "server_version": "v1.0.19" + } + """ from apps.engine.services.worker_load_service import worker_load_service + from django.conf import settings worker = self.get_object() info = request.data if request.data else {} @@ -134,7 +151,24 @@ class WorkerNodeViewSet(viewsets.ModelViewSet): worker.status = 'online' worker.save(update_fields=['status']) - return Response({'status': 'ok'}) + # 3. 版本检查:比较 agent 版本与 server 版本 + agent_version = info.get('version', '') + server_version = settings.IMAGE_TAG # Server 当前版本 + need_update = False + + if agent_version and agent_version != 'unknown': + # 版本不匹配时通知 agent 更新 + need_update = agent_version != server_version + if need_update: + logger.info( + f"Worker {worker.name} 版本不匹配: agent={agent_version}, server={server_version}" + ) + + return Response({ + 'status': 'ok', + 'need_update': need_update, + 'server_version': server_version + }) @action(detail=False, methods=['post']) def register(self, request): diff --git a/backend/scripts/worker-deploy/agent.sh b/backend/scripts/worker-deploy/agent.sh index 4ca410a9..dac2ab39 100755 --- a/backend/scripts/worker-deploy/agent.sh +++ b/backend/scripts/worker-deploy/agent.sh @@ -1,7 +1,7 @@ #!/bin/bash # ============================================ # XingRin Agent -# 用途:心跳上报 + 负载监控 +# 用途:心跳上报 + 负载监控 + 版本检查 # 适用:远程 VPS 或 Docker 容器内 # ============================================ @@ -17,6 +17,9 @@ SRC_DIR="${MARKER_DIR}/src" ENV_FILE="${SRC_DIR}/backend/.env" INTERVAL=${AGENT_INTERVAL:-3} +# Agent 版本(从环境变量获取,由 Docker 镜像构建时注入) +AGENT_VERSION="${IMAGE_TAG:-unknown}" + # 颜色定义 GREEN='\033[0;32m' RED='\033[0;31m' @@ -172,22 +175,72 @@ while true; do fi # 构建 JSON 数据(使用数值而非字符串,便于比较和排序) + # 包含版本号,供 Server 端检查版本一致性 JSON_DATA=$(cat </dev/null || echo "000") + RESPONSE_BODY=$(cat "$RESPONSE_FILE" 2>/dev/null) + rm -f "$RESPONSE_FILE" - if [ "$RESPONSE" != "200" ] && [ "$RESPONSE" != "201" ]; then - log "${YELLOW}心跳发送失败 (HTTP $RESPONSE)${NC}" + if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "201" ]; then + log "${YELLOW}心跳发送失败 (HTTP $HTTP_CODE)${NC}" + else + # 检查是否需要更新 + NEED_UPDATE=$(echo "$RESPONSE_BODY" | grep -oE '"need_update":\s*(true|false)' | grep -oE '(true|false)') + if [ "$NEED_UPDATE" = "true" ]; then + SERVER_VERSION=$(echo "$RESPONSE_BODY" | grep -oE '"server_version":\s*"[^"]+"' | sed 's/.*"\([^"]*\)"$/\1/') + log "${YELLOW}检测到版本不匹配: Agent=$AGENT_VERSION, Server=$SERVER_VERSION${NC}" + log "${GREEN}正在自动更新...${NC}" + + # 执行自动更新 + if [ "$RUN_MODE" = "container" ]; then + # 容器模式:通知外部重启(退出后由 docker-compose restart policy 重启) + log "容器模式:退出以触发重启更新" + exit 0 + else + # 远程模式:拉取新镜像并重启 agent 容器 + log "远程模式:更新 agent 镜像..." + DOCKER_USER="${DOCKER_USER:-yyhuni}" + NEW_IMAGE="${DOCKER_USER}/xingrin-agent:${SERVER_VERSION}" + + # 拉取新镜像 + if $DOCKER_CMD pull "$NEW_IMAGE" 2>/dev/null; then + log "${GREEN}镜像拉取成功: $NEW_IMAGE${NC}" + + # 停止当前容器并用新镜像重启 + CONTAINER_NAME="xingrin-agent" + $DOCKER_CMD stop "$CONTAINER_NAME" 2>/dev/null || true + $DOCKER_CMD rm "$CONTAINER_NAME" 2>/dev/null || true + + # 重新启动(使用相同的环境变量) + $DOCKER_CMD run -d \ + --name "$CONTAINER_NAME" \ + --restart unless-stopped \ + -e HEARTBEAT_API_URL="$API_URL" \ + -e WORKER_ID="$WORKER_ID" \ + -e IMAGE_TAG="$SERVER_VERSION" \ + -v /proc:/host/proc:ro \ + "$NEW_IMAGE" + + log "${GREEN}Agent 已更新到 $SERVER_VERSION${NC}" + exit 0 + else + log "${RED}镜像拉取失败: $NEW_IMAGE${NC}" + fi + fi + fi fi # 休眠 diff --git a/docker/agent/Dockerfile b/docker/agent/Dockerfile index 724e4f53..2084774f 100644 --- a/docker/agent/Dockerfile +++ b/docker/agent/Dockerfile @@ -1,12 +1,15 @@ # ============================================ # XingRin Agent - 轻量心跳上报镜像 -# 用途:心跳上报 + 负载监控 +# 用途:心跳上报 + 负载监控 + 版本检查 # 基础镜像:Alpine Linux (~5MB) # 最终大小:~10MB # ============================================ FROM alpine:3.19 +# 构建参数:版本号 +ARG IMAGE_TAG=unknown + # 安装必要工具 RUN apk add --no-cache \ bash \ @@ -17,6 +20,9 @@ RUN apk add --no-cache \ COPY backend/scripts/worker-deploy/agent.sh /app/agent.sh RUN chmod +x /app/agent.sh +# 将版本号写入环境变量(运行时可用) +ENV IMAGE_TAG=${IMAGE_TAG} + # 工作目录 WORKDIR /app diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 420b9716..de9597e5 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -54,19 +54,19 @@ services: retries: 3 start_period: 60s - # Agent:心跳上报 + 负载监控 + # Agent:心跳上报 + 负载监控 + 版本检查 agent: build: context: .. - dockerfile: docker/worker/Dockerfile + dockerfile: docker/agent/Dockerfile + args: + IMAGE_TAG: ${IMAGE_TAG:-dev} restart: always - env_file: - - .env environment: - SERVER_URL=http://server:8888 - WORKER_NAME=本地节点 - IS_LOCAL=true - command: bash /app/backend/scripts/worker-deploy/agent.sh + - IMAGE_TAG=${IMAGE_TAG:-dev} depends_on: server: condition: service_healthy diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index cd02e364..0a520890 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -72,6 +72,7 @@ services: - SERVER_URL=http://server:8888 - WORKER_NAME=本地节点 - IS_LOCAL=true + - IMAGE_TAG=${IMAGE_TAG} depends_on: server: condition: service_healthy