Compare commits

..

6 Commits

Author SHA1 Message Date
yyhuni
9703add22d feat(nuclei): support configurable Nuclei templates repository with Gitee mirror
- Add NUCLEI_TEMPLATES_REPO_URL setting to allow runtime configuration of template repository URL
- Refactor install.sh mirror parameter handling to use boolean flag instead of URL string
- Replace hardcoded GitHub repository URL with Gitee mirror option for faster downloads in mainland China
- Update environment variable configuration to persist Nuclei repository URL in .env file
- Improve shell script variable quoting and conditional syntax for better reliability
- Simplify mirror detection logic by using USE_MIRROR boolean flag throughout installation process
- Add support for automatic Gitee mirror selection when --mirror flag is enabled
2026-01-11 17:19:09 +08:00
github-actions[bot]
f5a489e2d6 chore: bump version to v1.5.11-dev 2026-01-11 08:54:04 +00:00
yyhuni
d75a3f6882 fix(task_distributor): adjust high load wait parameters and improve timeout handling
- Increase high load wait interval from 60 to 120 seconds (2 minutes)
- Increase max retries from 10 to 60 to support up to 2 hours total wait time
- Improve timeout message to show actual wait duration in minutes
- Remove duplicate return statement in worker selection logic
- Update notification message to reflect new wait parameters (2 minutes check interval, 2 hours max wait)
- Clean up trailing whitespace in task_distributor.py
- Remove redundant error message from install.sh about missing/incorrect image versions
- Better handling of high load scenarios with clearer logging and user communication
2026-01-11 16:41:05 +08:00
github-actions[bot]
59e48e5b15 chore: bump version to v1.5.10-dev 2026-01-11 08:19:39 +00:00
yyhuni
2d2ec93626 perf(screenshot): optimize memory usage and add URL collection fallback logic
- Add iterator(chunk_size=50) to ScreenshotSnapshot query to prevent BinaryField data caching and reduce memory consumption
- Implement fallback logic in URL collection: WebSite → HostPortMapping → Default URL with priority handling
- Update _collect_urls_from_provider to return tuple with data source information for better logging and debugging
- Add detailed logging to track which data source was used during URL collection
- Improve code documentation with clear return type hints and fallback priority explanation
- Prevents memory spikes when processing large screenshot datasets with binary image data
2026-01-11 16:14:56 +08:00
github-actions[bot]
ced9f811f4 chore: bump version to v1.5.8-dev 2026-01-11 08:09:37 +00:00
7 changed files with 82 additions and 56 deletions

View File

@@ -1 +1 @@
v1.5.7
v1.5.11-dev

View File

@@ -146,7 +146,9 @@ class ScreenshotService:
"""
from apps.asset.models import Screenshot, ScreenshotSnapshot
snapshots = ScreenshotSnapshot.objects.filter(scan_id=scan_id)
# 使用 iterator() 避免 QuerySet 缓存大量 BinaryField 数据导致内存飙升
# chunk_size=50: 每次只加载 50 条记录,处理完后释放内存
snapshots = ScreenshotSnapshot.objects.filter(scan_id=scan_id).iterator(chunk_size=50)
count = 0
for snapshot in snapshots:

View File

@@ -21,11 +21,11 @@ from apps.engine.services import NucleiTemplateRepoService
logger = logging.getLogger(__name__)
# 默认仓库配置
# 默认仓库配置(从 settings 读取,支持 Gitee 镜像)
DEFAULT_REPOS = [
{
"name": "nuclei-templates",
"repo_url": "https://github.com/projectdiscovery/nuclei-templates.git",
"repo_url": getattr(settings, 'NUCLEI_TEMPLATES_REPO_URL', 'https://github.com/projectdiscovery/nuclei-templates.git'),
"description": "Nuclei 官方模板仓库,包含数千个漏洞检测模板",
},
]

View File

@@ -156,10 +156,10 @@ class TaskDistributor:
# 降级策略:如果没有正常负载的,循环等待后重新检测
if not scored_workers:
if high_load_workers:
# 高负载等待参数(默认每 60 秒检测一次,最多 10 次
high_load_wait = getattr(settings, 'HIGH_LOAD_WAIT_SECONDS', 60)
high_load_max_retries = getattr(settings, 'HIGH_LOAD_MAX_RETRIES', 10)
# 高负载等待参数(每 2 分钟检测一次,最多等待 2 小时
high_load_wait = getattr(settings, 'HIGH_LOAD_WAIT_SECONDS', 120)
high_load_max_retries = getattr(settings, 'HIGH_LOAD_MAX_RETRIES', 60)
# 开始等待前发送高负载通知
high_load_workers.sort(key=lambda x: x[1])
_, _, first_cpu, first_mem = high_load_workers[0]
@@ -170,51 +170,51 @@ class TaskDistributor:
cpu=first_cpu,
mem=first_mem
)
for retry in range(high_load_max_retries):
logger.warning(
"所有 Worker 高负载,等待 %d 秒后重试... (%d/%d)",
high_load_wait, retry + 1, high_load_max_retries
)
time.sleep(high_load_wait)
# 重新获取负载数据
loads = worker_load_service.get_all_loads(worker_ids)
# 重新评估
scored_workers = []
high_load_workers = []
for worker in workers:
load = loads.get(worker.id)
if not load:
continue
cpu = load.get('cpu', 0)
mem = load.get('mem', 0)
score = cpu * 0.7 + mem * 0.3
if cpu > 85 or mem > 85:
high_load_workers.append((worker, score, cpu, mem))
else:
scored_workers.append((worker, score, cpu, mem))
# 如果有正常负载的 Worker跳出循环
if scored_workers:
logger.info("检测到正常负载 Worker结束等待")
break
# 超时或仍然高负载,选择负载最低的
# 超时后强制派发到负载最低的 Worker
if not scored_workers and high_load_workers:
high_load_workers.sort(key=lambda x: x[1])
best_worker, _, cpu, mem = high_load_workers[0]
logger.warning(
"等待超时,强制分发到高负载 Worker: %s (CPU: %.1f%%, MEM: %.1f%%)",
"等待 %d 分钟后仍高负载,强制分发到 Worker: %s (CPU: %.1f%%, MEM: %.1f%%)",
(high_load_wait * high_load_max_retries) // 60,
best_worker.name, cpu, mem
)
return best_worker
return best_worker
else:
logger.warning("没有可用的 Worker")
return None

View File

@@ -31,10 +31,35 @@ def _parse_screenshot_config(enabled_tools: dict) -> dict:
}
def _collect_urls_from_provider(provider: TargetProvider) -> list[str]:
"""从 Provider 收集网站 URL"""
def _collect_urls_from_provider(provider: TargetProvider) -> tuple[list[str], str]:
"""
从 Provider 收集网站 URL带回退逻辑
优先级WebSite → HostPortMapping → Default URL
Returns:
tuple: (urls, source)
- urls: URL 列表
- source: 数据来源 ('website' | 'host_port' | 'default')
"""
logger.info("从 Provider 获取网站 URL - Provider: %s", type(provider).__name__)
return list(provider.iter_websites())
# 优先从 WebSite 获取
urls = list(provider.iter_websites())
if urls:
logger.info("使用 WebSite 数据源 - 数量: %d", len(urls))
return urls, "website"
# 回退到 HostPortMapping
urls = list(provider.iter_host_port_urls())
if urls:
logger.info("WebSite 为空,回退到 HostPortMapping - 数量: %d", len(urls))
return urls, "host_port"
# 最终回退到默认 URL
urls = list(provider.iter_default_urls())
logger.info("HostPortMapping 为空,回退到默认 URL - 数量: %d", len(urls))
return urls, "default"
def _build_empty_result(scan_id: int, target_name: str) -> dict:
@@ -96,9 +121,9 @@ def screenshot_flow(
concurrency = config['concurrency']
logger.info("截图配置 - 并发: %d", concurrency)
# Step 2: 从 Provider 收集 URL 列表
urls = _collect_urls_from_provider(provider)
logger.info("URL 收集完成 - 数量: %d", len(urls))
# Step 2: 从 Provider 收集 URL 列表(带回退逻辑)
urls, source = _collect_urls_from_provider(provider)
logger.info("URL 收集完成 - 来源: %s, 数量: %d", source, len(urls))
if not urls:
logger.warning("没有可截图的 URL跳过截图任务")

View File

@@ -87,7 +87,7 @@ def on_all_workers_high_load(sender, worker_name, cpu, mem, **kwargs):
"""所有 Worker 高负载时的通知处理"""
create_notification(
title="系统负载较高",
message=f"所有节点负载较高(最低负载节点 CPU: {cpu:.1f}%, 内存: {mem:.1f}%),系统将等待最多 10 分钟后分发任务,扫描速度可能受影响",
message=f"所有节点负载较高(最低负载节点 CPU: {cpu:.1f}%, 内存: {mem:.1f}%),系统将每 2 分钟检测一次,最多等待 2 小时后分发任务",
level=NotificationLevel.MEDIUM,
category=NotificationCategory.SYSTEM
)

View File

@@ -12,21 +12,20 @@ set -e
# 解析参数
START_ARGS=""
DEV_MODE=false
GIT_MIRROR=""
USE_MIRROR=false
for arg in "$@"; do
case $arg in
case ${arg} in
--dev)
DEV_MODE=true
START_ARGS="$START_ARGS --dev"
START_ARGS="${START_ARGS} --dev"
;;
--no-frontend)
START_ARGS="$START_ARGS --no-frontend"
START_ARGS="${START_ARGS} --no-frontend"
;;
--mirror)
GIT_MIRROR="https://gh-proxy.org"
USE_MIRROR=true
;;
--mirror=*)
GIT_MIRROR="${arg#*=}"
*)
;;
esac
done
@@ -134,9 +133,9 @@ fi
show_banner
info "当前用户: ${BOLD}$REAL_USER${RESET}"
info "项目路径: ${BOLD}$ROOT_DIR${RESET}"
info "安装版本: ${BOLD}$APP_VERSION${RESET}"
if [ -n "$GIT_MIRROR" ]; then
info "Git 加速: ${BOLD}${GREEN}已启用${RESET} - $GIT_MIRROR"
info "安装版本: ${BOLD}${APP_VERSION}${RESET}"
if [[ "${USE_MIRROR}" == true ]]; then
info "国内加速: ${BOLD}${GREEN}已启用${RESET}"
fi
# ==============================================================================
@@ -424,7 +423,7 @@ else
info "正在安装 Docker..."
# 根据是否启用加速选择下载方式
if [ -n "$GIT_MIRROR" ]; then
if [[ "${USE_MIRROR}" == true ]]; then
# 使用阿里云 Docker 安装脚本(国内加速)
info "使用国内镜像安装 Docker..."
if curl -fsSL https://get.docker.com | sh -s -- --mirror Aliyun; then
@@ -452,13 +451,13 @@ else
usermod -aG docker "$REAL_USER"
# 配置 Docker 镜像加速(仅当启用 --mirror 时)
if [ -n "$GIT_MIRROR" ]; then
if [[ "${USE_MIRROR}" == true ]]; then
configure_docker_mirror
fi
fi
# 如果 Docker 已安装但启用了 --mirror也配置镜像加速
if [ -n "$GIT_MIRROR" ] && command -v docker &>/dev/null; then
if [[ "${USE_MIRROR}" == true ]] && command -v docker &>/dev/null; then
# 检查是否已配置镜像加速
if [ ! -f "/etc/docker/daemon.json" ] || ! grep -q "registry-mirrors" /etc/docker/daemon.json 2>/dev/null; then
configure_docker_mirror
@@ -537,10 +536,10 @@ if [ -f "$DOCKER_DIR/.env.example" ]; then
update_env_var "$DOCKER_DIR/.env" "IMAGE_TAG" "$APP_VERSION"
success "已锁定版本: IMAGE_TAG=$APP_VERSION"
# Git 加速仅用于安装过程,不写入运行时配置
# 运行时用户如需加速,可通过代理或其他方式自行配置
if [ -n "$GIT_MIRROR" ]; then
info "Git 加速已启用(仅用于安装阶段)"
# Git 加速:写入 Gitee 镜像地址到 .env后续 git pull 也走 Gitee
if [[ "${USE_MIRROR}" == true ]]; then
update_env_var "${DOCKER_DIR}/.env" "NUCLEI_TEMPLATES_REPO_URL" "https://gitee.com/yianyuk/nuclei-templates.git"
info "Nuclei 模板将使用 Gitee 镜像"
fi
# 开发模式:开启调试日志
@@ -684,9 +683,9 @@ else
info "正在拉取: $WORKER_IMAGE"
# 镜像加速通过 daemon.json 的 registry-mirrors 实现
PULL_IMAGE="$WORKER_IMAGE"
PULL_IMAGE="${WORKER_IMAGE}"
if [ -n "$GIT_MIRROR" ]; then
if [[ "${USE_MIRROR}" == true ]]; then
info "已配置 Docker 镜像加速,拉取将自动走加速通道"
fi
@@ -694,16 +693,15 @@ else
success "Worker 镜像拉取完成"
else
error "Worker 镜像拉取失败,无法继续安装"
error "镜像地址: $WORKER_IMAGE"
error "镜像地址: ${WORKER_IMAGE}"
echo
if [ -z "$GIT_MIRROR" ]; then
if [[ "${USE_MIRROR}" != true ]]; then
warn "如果您在中国大陆,建议使用 --mirror 参数启用加速:"
echo -e " ${BOLD}sudo ./install.sh --mirror${RESET}"
else
warn "镜像加速已配置,但拉取仍然失败,可能原因:"
echo -e " 1. 镜像源暂时不可用,请稍后重试"
echo -e " 2. 网络连接问题"
echo -e " 3. 镜像不存在或版本错误"
fi
echo
exit 1
@@ -715,20 +713,21 @@ fi
# ==============================================================================
step "预下载 Nuclei 模板仓库..."
NUCLEI_TEMPLATES_DIR="/opt/xingrin/nuclei-repos/nuclei-templates"
NUCLEI_TEMPLATES_REPO="https://github.com/projectdiscovery/nuclei-templates.git"
NUCLEI_TEMPLATES_REPO_GITHUB="https://github.com/projectdiscovery/nuclei-templates.git"
NUCLEI_TEMPLATES_REPO_GITEE="https://gitee.com/yianyuk/nuclei-templates.git"
# 确保目录存在
mkdir -p /opt/xingrin/nuclei-repos
if [ -d "$NUCLEI_TEMPLATES_DIR/.git" ]; then
if [[ -d "$NUCLEI_TEMPLATES_DIR/.git" ]]; then
info "Nuclei 模板仓库已存在,跳过下载"
else
# 构建 clone URL如果启用了 Git 加速
if [ -n "$GIT_MIRROR" ]; then
CLONE_URL="${GIT_MIRROR}/${NUCLEI_TEMPLATES_REPO}"
info "使用 Git 加速下载: $CLONE_URL"
# 选择 clone URL启用加速时使用 Gitee 镜像
if [[ "${USE_MIRROR}" == true ]]; then
CLONE_URL="${NUCLEI_TEMPLATES_REPO_GITEE}"
info "使用 Gitee 镜像下载: ${CLONE_URL}"
else
CLONE_URL="$NUCLEI_TEMPLATES_REPO"
CLONE_URL="$NUCLEI_TEMPLATES_REPO_GITHUB"
fi
# 执行 git clone