Files
xingrin/backend/scripts/worker-deploy/agent.sh
2025-12-19 19:41:12 +08:00

249 lines
8.1 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# ============================================
# XingRin Agent
# 用途:心跳上报 + 负载监控 + 版本检查
# 适用:远程 VPS 或 Docker 容器内
# ============================================
# 检查是否禁用 Agent
if [ "${AGENT_DISABLED:-false}" = "true" ]; then
echo "[AGENT] 已禁用,跳过启动"
exit 0
fi
# 配置
MARKER_DIR="/opt/xingrin"
SRC_DIR="${MARKER_DIR}/src"
ENV_FILE="${SRC_DIR}/backend/.env"
INTERVAL=${AGENT_INTERVAL:-3}
# Agent 版本(从环境变量获取,由 Docker 镜像构建时注入)
AGENT_VERSION="${IMAGE_TAG:-unknown}"
# 颜色定义
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[0;33m'
NC='\033[0m'
log() {
echo -e "[$(date +'%Y-%m-%d %H:%M:%S')] [AGENT] $1"
}
# 检测运行模式:容器内 or 远程 VPS
# 如果 /.dockerenv 存在,说明在容器内
if [ -f "/.dockerenv" ]; then
RUN_MODE="container"
log "运行模式: Docker 容器内"
else
RUN_MODE="remote"
log "运行模式: 远程 VPS"
# 远程模式:检测 Docker 命令
if docker info >/dev/null 2>&1; then
DOCKER_CMD="docker"
else
DOCKER_CMD="sudo docker"
fi
fi
# 加载环境变量(远程模式从文件,容器模式从环境变量)
if [ "$RUN_MODE" = "remote" ] && [ -f "$ENV_FILE" ]; then
set -a
source "$ENV_FILE"
set +a
fi
# 获取配置
# SERVER_URL: 后端 API 地址(容器内用 http://server:8888远程用 https://{PUBLIC_HOST}
API_URL="${HEARTBEAT_API_URL:-${SERVER_URL:-}}"
WORKER_NAME="${WORKER_NAME:-}"
IS_LOCAL="${IS_LOCAL:-false}"
# 容器模式默认标记为本地节点
if [ "$RUN_MODE" = "container" ]; then
IS_LOCAL="true"
fi
log "${GREEN}Agent 启动...${NC}"
log "心跳间隔: ${INTERVAL}s"
if [ -z "$API_URL" ]; then
log "${RED}错误: 未配置 API 地址 (HEARTBEAT_API_URL 或 SERVER_URL)${NC}"
exit 1
fi
log "API 地址: ${API_URL}"
# ============================================
# 自注册功能(如果 WORKER_ID 未设置)
# ============================================
register_worker() {
if [ -z "$WORKER_NAME" ]; then
WORKER_NAME="Worker-$(hostname)"
fi
log "注册 Worker: ${WORKER_NAME}..."
REGISTER_DATA=$(cat <<EOF
{
"name": "$WORKER_NAME",
"is_local": $IS_LOCAL
}
EOF
)
RESPONSE=$(curl -k -s -X POST \
-H "Content-Type: application/json" \
-d "$REGISTER_DATA" \
"${API_URL}/api/workers/register/" 2>/dev/null)
if [ $? -eq 0 ]; then
# 解析返回的 workerIdAPI 使用 camelCase
WORKER_ID=$(echo "$RESPONSE" | grep -oE '"workerId":\s*[0-9]+' | grep -oE '[0-9]+')
if [ -n "$WORKER_ID" ]; then
log "${GREEN}注册成功: ${WORKER_NAME} (ID: ${WORKER_ID})${NC}"
return 0
fi
fi
log "${RED}注册失败: ${RESPONSE}${NC}"
return 1
}
# 如果没有 WORKER_ID执行自注册
if [ -z "$WORKER_ID" ]; then
# 等待 Server 就绪
log "等待 Server 就绪..."
for i in $(seq 1 30); do
if curl -k -s "${API_URL}/api/" > /dev/null 2>&1; then
log "${GREEN}Server 已就绪${NC}"
break
fi
log "Server 未就绪,等待... ($i/30)"
sleep 5
done
# 注册
while ! register_worker; do
log "${YELLOW}注册失败5 秒后重试...${NC}"
sleep 5
done
fi
log "Worker ID: ${WORKER_ID}"
# ============================================
# 心跳循环
# Agent 独立运行,始终发送心跳
# 主服务器根据心跳数据选择负载最低的节点分发任务
# ============================================
while true; do
# 收集系统负载CPU + 内存)
# 容器内使用挂载的 /host/proc 获取宿主机数据
if [ -d "/host/proc" ]; then
PROC_DIR="/host/proc"
else
PROC_DIR="/proc"
fi
# CPU 使用率(百分比数值)
# /proc/stat 是累计值,需要两次采样计算差值
CPU_STAT1=$(grep 'cpu ' ${PROC_DIR}/stat | awk '{print $2,$3,$4,$5,$6,$7,$8}')
sleep 0.5
CPU_STAT2=$(grep 'cpu ' ${PROC_DIR}/stat | awk '{print $2,$3,$4,$5,$6,$7,$8}')
CPU_PERCENT=$(echo "$CPU_STAT1 $CPU_STAT2" | awk '{
user1=$1; nice1=$2; sys1=$3; idle1=$4; iowait1=$5; irq1=$6; softirq1=$7;
user2=$8; nice2=$9; sys2=$10; idle2=$11; iowait2=$12; irq2=$13; softirq2=$14;
total1=user1+nice1+sys1+idle1+iowait1+irq1+softirq1;
total2=user2+nice2+sys2+idle2+iowait2+irq2+softirq2;
idle_diff=idle2-idle1;
total_diff=total2-total1;
if(total_diff>0) printf "%.1f", (1-idle_diff/total_diff)*100;
else printf "0.0";
}')
# 内存使用率(百分比数值)
if [ -d "/host/proc" ]; then
# 从 /host/proc/meminfo 读取
MEM_TOTAL=$(grep 'MemTotal' ${PROC_DIR}/meminfo | awk '{print $2}')
MEM_AVAILABLE=$(grep 'MemAvailable' ${PROC_DIR}/meminfo | awk '{print $2}')
MEM_PERCENT=$(awk "BEGIN {printf \"%.1f\", 100 - ($MEM_AVAILABLE / $MEM_TOTAL * 100)}")
else
# 使用 free 命令
MEM_PERCENT=$(free | grep Mem | awk '{printf "%.1f", $3/$2 * 100}')
fi
# 构建 JSON 数据(使用数值而非字符串,便于比较和排序)
# 包含版本号,供 Server 端检查版本一致性
JSON_DATA=$(cat <<EOF
{
"cpu_percent": $CPU_PERCENT,
"memory_percent": $MEM_PERCENT,
"version": "$AGENT_VERSION"
}
EOF
)
# 发送心跳,获取响应内容
RESPONSE_FILE=$(mktemp)
HTTP_CODE=$(curl -k -s -o "$RESPONSE_FILE" -w "%{http_code}" -X POST \
-H "Content-Type: application/json" \
-d "$JSON_DATA" \
"${API_URL}/api/workers/${WORKER_ID}/heartbeat/" 2>/dev/null || echo "000")
RESPONSE_BODY=$(cat "$RESPONSE_FILE" 2>/dev/null)
rm -f "$RESPONSE_FILE"
if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "201" ]; then
log "${YELLOW}心跳发送失败 (HTTP $HTTP_CODE)${NC}"
else
# 检查是否需要更新
NEED_UPDATE=$(echo "$RESPONSE_BODY" | grep -oE '"need_update":\s*(true|false)' | grep -oE '(true|false)')
if [ "$NEED_UPDATE" = "true" ]; then
SERVER_VERSION=$(echo "$RESPONSE_BODY" | grep -oE '"server_version":\s*"[^"]+"' | sed 's/.*"\([^"]*\)"$/\1/')
log "${YELLOW}检测到版本不匹配: Agent=$AGENT_VERSION, Server=$SERVER_VERSION${NC}"
log "${GREEN}正在自动更新...${NC}"
# 执行自动更新
if [ "$RUN_MODE" = "container" ]; then
# 容器模式:通知外部重启(退出后由 docker-compose restart policy 重启)
log "容器模式:退出以触发重启更新"
exit 0
else
# 远程模式:拉取新镜像并重启 agent 容器
log "远程模式:更新 agent 镜像..."
DOCKER_USER="${DOCKER_USER:-yyhuni}"
NEW_IMAGE="${DOCKER_USER}/xingrin-agent:${SERVER_VERSION}"
# 拉取新镜像
if $DOCKER_CMD pull "$NEW_IMAGE" 2>/dev/null; then
log "${GREEN}镜像拉取成功: $NEW_IMAGE${NC}"
# 停止当前容器并用新镜像重启
CONTAINER_NAME="xingrin-agent"
$DOCKER_CMD stop "$CONTAINER_NAME" 2>/dev/null || true
$DOCKER_CMD rm "$CONTAINER_NAME" 2>/dev/null || true
# 重新启动(使用相同的环境变量)
$DOCKER_CMD run -d \
--name "$CONTAINER_NAME" \
--restart unless-stopped \
-e HEARTBEAT_API_URL="$API_URL" \
-e WORKER_ID="$WORKER_ID" \
-e IMAGE_TAG="$SERVER_VERSION" \
-v /proc:/host/proc:ro \
"$NEW_IMAGE"
log "${GREEN}Agent 已更新到 $SERVER_VERSION${NC}"
exit 0
else
log "${RED}镜像拉取失败: $NEW_IMAGE${NC}"
fi
fi
fi
fi
# 休眠
sleep $INTERVAL
done