Compare commits

..

3 Commits

Author SHA1 Message Date
yyhuni
7cd4354d8f feat(scan,asset): add scan logging system and improve search view architecture
- Add user_logger utility for structured scan operation logging
- Create scan log views and API endpoints for retrieving scan execution logs
- Add scan-log-list component and use-scan-logs hook for frontend log display
- Refactor asset search views to remove ArrayField support from pg_ivm IMMV
- Update search_service.py to JOIN original tables for array field retrieval
- Add system architecture requirements (AMD64/ARM64) to README
- Update scan flow handlers to integrate logging system
- Enhance scan progress dialog with log viewer integration
- Add ANSI log viewer component for formatted log display
- Update scan service API to support log retrieval endpoints
- Migrate database schema to support new logging infrastructure
- Add internationalization strings for scan logs (en/zh)
This change improves observability of scan operations and resolves pg_ivm limitations with ArrayField types by fetching array data from original tables via JOIN operations.
2026-01-04 18:19:45 +08:00
yyhuni
6bf35a760f chore(docker): configure Prefect home directory in worker image
- Add PREFECT_HOME environment variable pointing to /app/.prefect
- Create Prefect configuration directory to prevent home directory warnings
- Update step numbering in Dockerfile comments for clarity
- Ensures Prefect can properly initialize configuration without relying on user home directory
2026-01-04 10:39:11 +08:00
github-actions[bot]
be9ecadffb chore: bump version to v1.3.12-dev 2026-01-04 01:05:00 +00:00
32 changed files with 835 additions and 197 deletions

View File

@@ -198,6 +198,7 @@ url="/api/v1" && status!="404"
### 环境要求
- **操作系统**: Ubuntu 20.04+ / Debian 11+
- **系统架构**: AMD64 (x86_64) / ARM64 (aarch64)
- **硬件**: 2核 4G 内存起步20GB+ 磁盘空间
### 一键安装

View File

@@ -1 +1 @@
v1.3.11-dev
v1.3.12-dev

View File

@@ -6,6 +6,18 @@
包含:
1. asset_search_view - Website 搜索视图
2. endpoint_search_view - Endpoint 搜索视图
重要限制:
⚠️ pg_ivm 不支持数组类型字段ArrayField因为其使用 anyarray 伪类型进行比较时,
PostgreSQL 无法确定空数组的元素类型,导致错误:
"cannot determine element type of \"anyarray\" argument"
因此,所有 ArrayField 字段tech, matched_gf_patterns 等)已从 IMMV 中移除,
搜索时通过 JOIN 原表获取。
如需添加新的数组字段,请:
1. 不要将其包含在 IMMV 视图中
2. 在搜索服务中通过 JOIN 原表获取
"""
from django.db import migrations
@@ -33,6 +45,8 @@ class Migration(migrations.Migration):
# ==================== Website IMMV ====================
# 2. 创建 asset_search_view IMMV
# ⚠️ 注意:不包含 w.tech 数组字段pg_ivm 不支持 ArrayField
# 数组字段通过 search_service.py 中 JOIN website 表获取
migrations.RunSQL(
sql="""
SELECT pgivm.create_immv('asset_search_view', $$
@@ -41,7 +55,6 @@ class Migration(migrations.Migration):
w.url,
w.host,
w.title,
w.tech,
w.status_code,
w.response_headers,
w.response_body,
@@ -85,10 +98,6 @@ class Migration(migrations.Migration):
CREATE INDEX IF NOT EXISTS asset_search_view_body_trgm_idx
ON asset_search_view USING gin (response_body gin_trgm_ops);
-- tech 数组索引
CREATE INDEX IF NOT EXISTS asset_search_view_tech_idx
ON asset_search_view USING gin (tech);
-- status_code 索引
CREATE INDEX IF NOT EXISTS asset_search_view_status_idx
ON asset_search_view (status_code);
@@ -104,7 +113,6 @@ class Migration(migrations.Migration):
DROP INDEX IF EXISTS asset_search_view_url_trgm_idx;
DROP INDEX IF EXISTS asset_search_view_headers_trgm_idx;
DROP INDEX IF EXISTS asset_search_view_body_trgm_idx;
DROP INDEX IF EXISTS asset_search_view_tech_idx;
DROP INDEX IF EXISTS asset_search_view_status_idx;
DROP INDEX IF EXISTS asset_search_view_created_idx;
"""
@@ -113,6 +121,8 @@ class Migration(migrations.Migration):
# ==================== Endpoint IMMV ====================
# 4. 创建 endpoint_search_view IMMV
# ⚠️ 注意:不包含 e.tech 和 e.matched_gf_patterns 数组字段pg_ivm 不支持 ArrayField
# 数组字段通过 search_service.py 中 JOIN endpoint 表获取
migrations.RunSQL(
sql="""
SELECT pgivm.create_immv('endpoint_search_view', $$
@@ -121,7 +131,6 @@ class Migration(migrations.Migration):
e.url,
e.host,
e.title,
e.tech,
e.status_code,
e.response_headers,
e.response_body,
@@ -130,7 +139,6 @@ class Migration(migrations.Migration):
e.webserver,
e.location,
e.vhost,
e.matched_gf_patterns,
e.created_at,
e.target_id
FROM endpoint e
@@ -166,10 +174,6 @@ class Migration(migrations.Migration):
CREATE INDEX IF NOT EXISTS endpoint_search_view_body_trgm_idx
ON endpoint_search_view USING gin (response_body gin_trgm_ops);
-- tech 数组索引
CREATE INDEX IF NOT EXISTS endpoint_search_view_tech_idx
ON endpoint_search_view USING gin (tech);
-- status_code 索引
CREATE INDEX IF NOT EXISTS endpoint_search_view_status_idx
ON endpoint_search_view (status_code);
@@ -185,7 +189,6 @@ class Migration(migrations.Migration):
DROP INDEX IF EXISTS endpoint_search_view_url_trgm_idx;
DROP INDEX IF EXISTS endpoint_search_view_headers_trgm_idx;
DROP INDEX IF EXISTS endpoint_search_view_body_trgm_idx;
DROP INDEX IF EXISTS endpoint_search_view_tech_idx;
DROP INDEX IF EXISTS endpoint_search_view_status_idx;
DROP INDEX IF EXISTS endpoint_search_view_created_idx;
"""

View File

@@ -37,46 +37,55 @@ VIEW_MAPPING = {
'endpoint': 'endpoint_search_view',
}
# 资产类型到原表名的映射(用于 JOIN 获取数组字段)
# ⚠️ 重要pg_ivm 不支持 ArrayField所有数组字段必须从原表 JOIN 获取
TABLE_MAPPING = {
'website': 'website',
'endpoint': 'endpoint',
}
# 有效的资产类型
VALID_ASSET_TYPES = {'website', 'endpoint'}
# Website 查询字段
# Website 查询字段v=视图t=原表)
# ⚠️ 注意t.tech 从原表获取,因为 pg_ivm 不支持 ArrayField
WEBSITE_SELECT_FIELDS = """
id,
url,
host,
title,
tech,
status_code,
response_headers,
response_body,
content_type,
content_length,
webserver,
location,
vhost,
created_at,
target_id
v.id,
v.url,
v.host,
v.title,
t.tech, -- ArrayField从 website 表 JOIN 获取
v.status_code,
v.response_headers,
v.response_body,
v.content_type,
v.content_length,
v.webserver,
v.location,
v.vhost,
v.created_at,
v.target_id
"""
# Endpoint 查询字段(包含 matched_gf_patterns
# Endpoint 查询字段
# ⚠️ 注意t.tech 和 t.matched_gf_patterns 从原表获取,因为 pg_ivm 不支持 ArrayField
ENDPOINT_SELECT_FIELDS = """
id,
url,
host,
title,
tech,
status_code,
response_headers,
response_body,
content_type,
content_length,
webserver,
location,
vhost,
matched_gf_patterns,
created_at,
target_id
v.id,
v.url,
v.host,
v.title,
t.tech, -- ArrayField从 endpoint 表 JOIN 获取
v.status_code,
v.response_headers,
v.response_body,
v.content_type,
v.content_length,
v.webserver,
v.location,
v.vhost,
t.matched_gf_patterns, -- ArrayField从 endpoint 表 JOIN 获取
v.created_at,
v.target_id
"""
@@ -119,8 +128,8 @@ class SearchQueryParser:
# 检查是否包含操作符语法,如果不包含则作为 host 模糊搜索
if not cls.CONDITION_PATTERN.search(query):
# 裸文本,默认作为 host 模糊搜索
return "host ILIKE %s", [f"%{query}%"]
# 裸文本,默认作为 host 模糊搜索v 是视图别名)
return "v.host ILIKE %s", [f"%{query}%"]
# 按 || 分割为 OR 组
or_groups = cls._split_by_or(query)
@@ -273,45 +282,45 @@ class SearchQueryParser:
def _build_like_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
"""构建模糊匹配条件"""
if is_array:
# 数组字段:检查数组中是否有元素包含该值
return f"EXISTS (SELECT 1 FROM unnest({field}) AS t WHERE t ILIKE %s)", [f"%{value}%"]
# 数组字段:检查数组中是否有元素包含该值(从原表 t 获取)
return f"EXISTS (SELECT 1 FROM unnest(t.{field}) AS elem WHERE elem ILIKE %s)", [f"%{value}%"]
elif field == 'status_code':
# 状态码是整数,模糊匹配转为精确匹配
try:
return f"{field} = %s", [int(value)]
return f"v.{field} = %s", [int(value)]
except ValueError:
return f"{field}::text ILIKE %s", [f"%{value}%"]
return f"v.{field}::text ILIKE %s", [f"%{value}%"]
else:
return f"{field} ILIKE %s", [f"%{value}%"]
return f"v.{field} ILIKE %s", [f"%{value}%"]
@classmethod
def _build_exact_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
"""构建精确匹配条件"""
if is_array:
# 数组字段:检查数组中是否包含该精确值
return f"%s = ANY({field})", [value]
# 数组字段:检查数组中是否包含该精确值(从原表 t 获取)
return f"%s = ANY(t.{field})", [value]
elif field == 'status_code':
# 状态码是整数
try:
return f"{field} = %s", [int(value)]
return f"v.{field} = %s", [int(value)]
except ValueError:
return f"{field}::text = %s", [value]
return f"v.{field}::text = %s", [value]
else:
return f"{field} = %s", [value]
return f"v.{field} = %s", [value]
@classmethod
def _build_not_equal_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
"""构建不等于条件"""
if is_array:
# 数组字段:检查数组中不包含该值
return f"NOT (%s = ANY({field}))", [value]
# 数组字段:检查数组中不包含该值(从原表 t 获取)
return f"NOT (%s = ANY(t.{field}))", [value]
elif field == 'status_code':
try:
return f"({field} IS NULL OR {field} != %s)", [int(value)]
return f"(v.{field} IS NULL OR v.{field} != %s)", [int(value)]
except ValueError:
return f"({field} IS NULL OR {field}::text != %s)", [value]
return f"(v.{field} IS NULL OR v.{field}::text != %s)", [value]
else:
return f"({field} IS NULL OR {field} != %s)", [value]
return f"(v.{field} IS NULL OR v.{field} != %s)", [value]
AssetType = Literal['website', 'endpoint']
@@ -339,15 +348,18 @@ class AssetSearchService:
"""
where_clause, params = SearchQueryParser.parse(query)
# 根据资产类型选择视图和字段
# 根据资产类型选择视图、原表和字段
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
table_name = TABLE_MAPPING.get(asset_type, 'website')
select_fields = ENDPOINT_SELECT_FIELDS if asset_type == 'endpoint' else WEBSITE_SELECT_FIELDS
# JOIN 原表获取数组字段tech, matched_gf_patterns
sql = f"""
SELECT {select_fields}
FROM {view_name}
FROM {view_name} v
JOIN {table_name} t ON v.id = t.id
WHERE {where_clause}
ORDER BY created_at DESC
ORDER BY v.created_at DESC
"""
# 添加 LIMIT
@@ -383,10 +395,12 @@ class AssetSearchService:
"""
where_clause, params = SearchQueryParser.parse(query)
# 根据资产类型选择视图
# 根据资产类型选择视图和原表
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
table_name = TABLE_MAPPING.get(asset_type, 'website')
sql = f"SELECT COUNT(*) FROM {view_name} WHERE {where_clause}"
# JOIN 原表以支持数组字段查询
sql = f"SELECT COUNT(*) FROM {view_name} v JOIN {table_name} t ON v.id = t.id WHERE {where_clause}"
try:
with connection.cursor() as cursor:
@@ -419,8 +433,9 @@ class AssetSearchService:
"""
where_clause, params = SearchQueryParser.parse(query)
# 根据资产类型选择视图和字段
# 根据资产类型选择视图、原表和字段
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
table_name = TABLE_MAPPING.get(asset_type, 'website')
select_fields = ENDPOINT_SELECT_FIELDS if asset_type == 'endpoint' else WEBSITE_SELECT_FIELDS
# 使用 OFFSET/LIMIT 分批查询Django 不支持命名游标)
@@ -428,11 +443,13 @@ class AssetSearchService:
try:
while True:
# JOIN 原表获取数组字段
sql = f"""
SELECT {select_fields}
FROM {view_name}
FROM {view_name} v
JOIN {table_name} t ON v.id = t.id
WHERE {where_clause}
ORDER BY created_at DESC
ORDER BY v.created_at DESC
LIMIT {batch_size} OFFSET {offset}
"""

View File

@@ -33,7 +33,7 @@ from apps.scan.handlers.scan_flow_handlers import (
on_scan_flow_completed,
on_scan_flow_failed,
)
from apps.scan.utils import config_parser, build_scan_command, ensure_wordlist_local
from apps.scan.utils import config_parser, build_scan_command, ensure_wordlist_local, user_log
logger = logging.getLogger(__name__)
@@ -413,6 +413,7 @@ def _run_scans_concurrently(
logger.info("="*60)
logger.info("使用工具: %s (并发模式, max_workers=%d)", tool_name, max_workers)
logger.info("="*60)
user_log(scan_id, "directory_scan", f"Running {tool_name}")
# 如果配置了 wordlist_name则先确保本地存在对应的字典文件含 hash 校验)
wordlist_name = tool_config.get('wordlist_name')
@@ -467,6 +468,11 @@ def _run_scans_concurrently(
total_tasks = len(scan_params_list)
logger.info("开始分批执行 %d 个扫描任务(每批 %d 个)...", total_tasks, max_workers)
# 进度里程碑跟踪
last_progress_percent = 0
tool_directories = 0
tool_processed = 0
batch_num = 0
for batch_start in range(0, total_tasks, max_workers):
batch_end = min(batch_start + max_workers, total_tasks)
@@ -498,7 +504,9 @@ def _run_scans_concurrently(
result = future.result() # 阻塞等待单个任务完成
directories_found = result.get('created_directories', 0)
total_directories += directories_found
tool_directories += directories_found
processed_sites_count += 1
tool_processed += 1
logger.info(
"✓ [%d/%d] 站点扫描完成: %s - 发现 %d 个目录",
@@ -517,6 +525,19 @@ def _run_scans_concurrently(
"✗ [%d/%d] 站点扫描失败: %s - 错误: %s",
idx, len(sites), site_url, exc
)
# 进度里程碑:每 20% 输出一次
current_progress = int((batch_end / total_tasks) * 100)
if current_progress >= last_progress_percent + 20:
user_log(scan_id, "directory_scan", f"Progress: {batch_end}/{total_tasks} sites scanned")
last_progress_percent = (current_progress // 20) * 20
# 工具完成日志(开发者日志 + 用户日志)
logger.info(
"✓ 工具 %s 执行完成 - 已处理站点: %d/%d, 发现目录: %d",
tool_name, tool_processed, total_tasks, tool_directories
)
user_log(scan_id, "directory_scan", f"{tool_name} completed: found {tool_directories} directories")
# 输出汇总信息
if failed_sites:
@@ -605,6 +626,8 @@ def directory_scan_flow(
"="*60
)
user_log(scan_id, "directory_scan", "Starting directory scan")
# 参数验证
if scan_id is None:
raise ValueError("scan_id 不能为空")
@@ -625,7 +648,8 @@ def directory_scan_flow(
sites_file, site_count = _export_site_urls(target_id, target_name, directory_scan_dir)
if site_count == 0:
logger.warning("目标下没有站点,跳过目录扫描")
logger.warning("跳过目录扫描:没有站点可扫描 - Scan ID: %s", scan_id)
user_log(scan_id, "directory_scan", "Skipped: no sites to scan", "warning")
return {
'success': True,
'scan_id': scan_id,
@@ -664,7 +688,9 @@ def directory_scan_flow(
logger.warning("所有站点扫描均失败 - 总站点数: %d, 失败数: %d", site_count, len(failed_sites))
# 不抛出异常,让扫描继续
logger.info("="*60 + "\n✓ 目录扫描完成\n" + "="*60)
# 记录 Flow 完成
logger.info("✓ 目录扫描完成 - 发现目录: %d", total_directories)
user_log(scan_id, "directory_scan", f"directory_scan completed: found {total_directories} directories")
return {
'success': True,

View File

@@ -29,7 +29,7 @@ from apps.scan.tasks.fingerprint_detect import (
export_urls_for_fingerprint_task,
run_xingfinger_and_stream_update_tech_task,
)
from apps.scan.utils import build_scan_command
from apps.scan.utils import build_scan_command, user_log
from apps.scan.utils.fingerprint_helpers import get_fingerprint_paths
logger = logging.getLogger(__name__)
@@ -168,6 +168,7 @@ def _run_fingerprint_detect(
"开始执行 %s 指纹识别 - URL数: %d, 超时: %ds, 指纹库: %s",
tool_name, url_count, timeout, list(fingerprint_paths.keys())
)
user_log(scan_id, "fingerprint_detect", f"Running {tool_name}: {command}")
# 6. 执行扫描任务
try:
@@ -190,17 +191,21 @@ def _run_fingerprint_detect(
'fingerprint_libs': list(fingerprint_paths.keys())
}
tool_updated = result.get('updated_count', 0)
logger.info(
"✓ 工具 %s 执行完成 - 处理记录: %d, 更新: %d, 未找到: %d",
tool_name,
result.get('processed_records', 0),
result.get('updated_count', 0),
tool_updated,
result.get('not_found_count', 0)
)
user_log(scan_id, "fingerprint_detect", f"{tool_name} completed: identified {tool_updated} fingerprints")
except Exception as exc:
failed_tools.append({'tool': tool_name, 'reason': str(exc)})
reason = str(exc)
failed_tools.append({'tool': tool_name, 'reason': reason})
logger.error("工具 %s 执行失败: %s", tool_name, exc, exc_info=True)
user_log(scan_id, "fingerprint_detect", f"{tool_name} failed: {reason}", "error")
if failed_tools:
logger.warning(
@@ -272,6 +277,8 @@ def fingerprint_detect_flow(
"="*60
)
user_log(scan_id, "fingerprint_detect", "Starting fingerprint detection")
# 参数验证
if scan_id is None:
raise ValueError("scan_id 不能为空")
@@ -293,7 +300,8 @@ def fingerprint_detect_flow(
urls_file, url_count = _export_urls(target_id, fingerprint_dir, source)
if url_count == 0:
logger.warning("目标下没有可用的 URL跳过指纹识别")
logger.warning("跳过指纹识别:没有 URL 可扫描 - Scan ID: %s", scan_id)
user_log(scan_id, "fingerprint_detect", "Skipped: no URLs to scan", "warning")
return {
'success': True,
'scan_id': scan_id,
@@ -332,8 +340,6 @@ def fingerprint_detect_flow(
source=source
)
logger.info("="*60 + "\n✓ 指纹识别完成\n" + "="*60)
# 动态生成已执行的任务列表
executed_tasks = ['export_urls_for_fingerprint']
executed_tasks.extend([f'run_xingfinger ({tool})' for tool in tool_stats.keys()])
@@ -344,6 +350,10 @@ def fingerprint_detect_flow(
total_created = sum(stats['result'].get('created_count', 0) for stats in tool_stats.values())
total_snapshots = sum(stats['result'].get('snapshot_count', 0) for stats in tool_stats.values())
# 记录 Flow 完成
logger.info("✓ 指纹识别完成 - 识别指纹: %d", total_updated)
user_log(scan_id, "fingerprint_detect", f"fingerprint_detect completed: identified {total_updated} fingerprints")
successful_tools = [name for name in enabled_tools.keys()
if name not in [f['tool'] for f in failed_tools]]

View File

@@ -28,7 +28,7 @@ from apps.scan.handlers.scan_flow_handlers import (
on_scan_flow_completed,
on_scan_flow_failed,
)
from apps.scan.utils import config_parser, build_scan_command
from apps.scan.utils import config_parser, build_scan_command, user_log
logger = logging.getLogger(__name__)
@@ -265,6 +265,7 @@ def _run_scans_sequentially(
# 3. 执行扫描任务
logger.info("开始执行 %s 扫描(超时: %d秒)...", tool_name, config_timeout)
user_log(scan_id, "port_scan", f"Running {tool_name}: {command}")
try:
# 直接调用 task串行执行
@@ -286,26 +287,31 @@ def _run_scans_sequentially(
'result': result,
'timeout': config_timeout
}
processed_records += result.get('processed_records', 0)
tool_records = result.get('processed_records', 0)
processed_records += tool_records
logger.info(
"✓ 工具 %s 流式处理完成 - 记录数: %d",
tool_name, result.get('processed_records', 0)
tool_name, tool_records
)
user_log(scan_id, "port_scan", f"{tool_name} completed: found {tool_records} ports")
except subprocess.TimeoutExpired as exc:
# 超时异常单独处理
# 注意:流式处理任务超时时,已解析的数据已保存到数据库
reason = f"执行超时(配置: {config_timeout}秒)"
reason = f"timeout after {config_timeout}s"
failed_tools.append({'tool': tool_name, 'reason': reason})
logger.warning(
"⚠️ 工具 %s 执行超时 - 超时配置: %d\n"
"注意:超时前已解析的端口数据已保存到数据库,但扫描未完全完成。",
tool_name, config_timeout
)
user_log(scan_id, "port_scan", f"{tool_name} failed: {reason}", "error")
except Exception as exc:
# 其他异常
failed_tools.append({'tool': tool_name, 'reason': str(exc)})
reason = str(exc)
failed_tools.append({'tool': tool_name, 'reason': reason})
logger.error("工具 %s 执行失败: %s", tool_name, exc, exc_info=True)
user_log(scan_id, "port_scan", f"{tool_name} failed: {reason}", "error")
if failed_tools:
logger.warning(
@@ -420,6 +426,8 @@ def port_scan_flow(
"="*60
)
user_log(scan_id, "port_scan", "Starting port scan")
# Step 0: 创建工作目录
from apps.scan.utils import setup_scan_directory
port_scan_dir = setup_scan_directory(scan_workspace_dir, 'port_scan')
@@ -428,7 +436,8 @@ def port_scan_flow(
targets_file, target_count, target_type = _export_scan_targets(target_id, port_scan_dir)
if target_count == 0:
logger.warning("目标下没有可扫描的地址,跳过端口扫描")
logger.warning("跳过端口扫描:没有目标可扫描 - Scan ID: %s", scan_id)
user_log(scan_id, "port_scan", "Skipped: no targets to scan", "warning")
return {
'success': True,
'scan_id': scan_id,
@@ -467,7 +476,9 @@ def port_scan_flow(
target_name=target_name
)
logger.info("="*60 + "\n✓ 端口扫描完成\n" + "="*60)
# 记录 Flow 完成
logger.info("✓ 端口扫描完成 - 发现端口: %d", processed_records)
user_log(scan_id, "port_scan", f"port_scan completed: found {processed_records} ports")
# 动态生成已执行的任务列表
executed_tasks = ['export_scan_targets', 'parse_config']

View File

@@ -17,6 +17,7 @@ from apps.common.prefect_django_setup import setup_django_for_prefect
import logging
import os
import subprocess
import time
from pathlib import Path
from typing import Callable
from prefect import flow
@@ -26,7 +27,7 @@ from apps.scan.handlers.scan_flow_handlers import (
on_scan_flow_completed,
on_scan_flow_failed,
)
from apps.scan.utils import config_parser, build_scan_command
from apps.scan.utils import config_parser, build_scan_command, user_log
logger = logging.getLogger(__name__)
@@ -198,6 +199,7 @@ def _run_scans_sequentially(
"开始执行 %s 站点扫描 - URL数: %d, 最终超时: %ds",
tool_name, total_urls, timeout
)
user_log(scan_id, "site_scan", f"Running {tool_name}: {command}")
# 3. 执行扫描任务
try:
@@ -218,29 +220,35 @@ def _run_scans_sequentially(
'result': result,
'timeout': timeout
}
processed_records += result.get('processed_records', 0)
tool_records = result.get('processed_records', 0)
tool_created = result.get('created_websites', 0)
processed_records += tool_records
logger.info(
"✓ 工具 %s 流式处理完成 - 处理记录: %d, 创建站点: %d, 跳过: %d",
tool_name,
result.get('processed_records', 0),
result.get('created_websites', 0),
tool_records,
tool_created,
result.get('skipped_no_subdomain', 0) + result.get('skipped_failed', 0)
)
user_log(scan_id, "site_scan", f"{tool_name} completed: found {tool_created} websites")
except subprocess.TimeoutExpired as exc:
# 超时异常单独处理
reason = f"执行超时(配置: {timeout}秒)"
reason = f"timeout after {timeout}s"
failed_tools.append({'tool': tool_name, 'reason': reason})
logger.warning(
"⚠️ 工具 %s 执行超时 - 超时配置: %d\n"
"注意:超时前已解析的站点数据已保存到数据库,但扫描未完全完成。",
tool_name, timeout
)
user_log(scan_id, "site_scan", f"{tool_name} failed: {reason}", "error")
except Exception as exc:
# 其他异常
failed_tools.append({'tool': tool_name, 'reason': str(exc)})
reason = str(exc)
failed_tools.append({'tool': tool_name, 'reason': reason})
logger.error("工具 %s 执行失败: %s", tool_name, exc, exc_info=True)
user_log(scan_id, "site_scan", f"{tool_name} failed: {reason}", "error")
if failed_tools:
logger.warning(
@@ -379,6 +387,8 @@ def site_scan_flow(
if not scan_workspace_dir:
raise ValueError("scan_workspace_dir 不能为空")
user_log(scan_id, "site_scan", "Starting site scan")
# Step 0: 创建工作目录
from apps.scan.utils import setup_scan_directory
site_scan_dir = setup_scan_directory(scan_workspace_dir, 'site_scan')
@@ -389,7 +399,8 @@ def site_scan_flow(
)
if total_urls == 0:
logger.warning("目标下没有可用的站点URL,跳过站点扫描")
logger.warning("跳过站点扫描:没有站点 URL 可扫描 - Scan ID: %s", scan_id)
user_log(scan_id, "site_scan", "Skipped: no site URLs to scan", "warning")
return {
'success': True,
'scan_id': scan_id,
@@ -432,8 +443,6 @@ def site_scan_flow(
target_name=target_name
)
logger.info("="*60 + "\n✓ 站点扫描完成\n" + "="*60)
# 动态生成已执行的任务列表
executed_tasks = ['export_site_urls', 'parse_config']
executed_tasks.extend([f'run_and_stream_save_websites ({tool})' for tool in tool_stats.keys()])
@@ -443,6 +452,10 @@ def site_scan_flow(
total_skipped_no_subdomain = sum(stats['result'].get('skipped_no_subdomain', 0) for stats in tool_stats.values())
total_skipped_failed = sum(stats['result'].get('skipped_failed', 0) for stats in tool_stats.values())
# 记录 Flow 完成
logger.info("✓ 站点扫描完成 - 创建站点: %d", total_created)
user_log(scan_id, "site_scan", f"site_scan completed: found {total_created} websites")
return {
'success': True,
'scan_id': scan_id,

View File

@@ -30,7 +30,7 @@ from apps.scan.handlers.scan_flow_handlers import (
on_scan_flow_completed,
on_scan_flow_failed,
)
from apps.scan.utils import build_scan_command, ensure_wordlist_local
from apps.scan.utils import build_scan_command, ensure_wordlist_local, user_log
from apps.engine.services.wordlist_service import WordlistService
from apps.common.normalizer import normalize_domain
from apps.common.validators import validate_domain
@@ -77,7 +77,8 @@ def _validate_and_normalize_target(target_name: str) -> str:
def _run_scans_parallel(
enabled_tools: dict,
domain_name: str,
result_dir: Path
result_dir: Path,
scan_id: int
) -> tuple[list, list, list]:
"""
并行运行所有启用的子域名扫描工具
@@ -86,6 +87,7 @@ def _run_scans_parallel(
enabled_tools: 启用的工具配置字典 {'tool_name': {'timeout': 600, ...}}
domain_name: 目标域名
result_dir: 结果输出目录
scan_id: 扫描任务 ID用于记录日志
Returns:
tuple: (result_files, failed_tools, successful_tool_names)
@@ -137,6 +139,9 @@ def _run_scans_parallel(
f"提交任务 - 工具: {tool_name}, 超时: {timeout}s, 输出: {output_file}"
)
# 记录工具开始执行日志
user_log(scan_id, "subdomain_discovery", f"Running {tool_name}: {command}")
future = run_subdomain_discovery_task.submit(
tool=tool_name,
command=command,
@@ -164,16 +169,19 @@ def _run_scans_parallel(
if result:
result_files.append(result)
logger.info("✓ 扫描工具 %s 执行成功: %s", tool_name, result)
user_log(scan_id, "subdomain_discovery", f"{tool_name} completed")
else:
failure_msg = f"{tool_name}: 未生成结果文件"
failures.append(failure_msg)
failed_tools.append({'tool': tool_name, 'reason': '未生成结果文件'})
logger.warning("⚠️ 扫描工具 %s 未生成结果文件", tool_name)
user_log(scan_id, "subdomain_discovery", f"{tool_name} failed: no output file", "error")
except Exception as e:
failure_msg = f"{tool_name}: {str(e)}"
failures.append(failure_msg)
failed_tools.append({'tool': tool_name, 'reason': str(e)})
logger.warning("⚠️ 扫描工具 %s 执行失败: %s", tool_name, str(e))
user_log(scan_id, "subdomain_discovery", f"{tool_name} failed: {str(e)}", "error")
# 4. 检查是否有成功的工具
if not result_files:
@@ -203,7 +211,8 @@ def _run_single_tool(
tool_config: dict,
command_params: dict,
result_dir: Path,
scan_type: str = 'subdomain_discovery'
scan_type: str = 'subdomain_discovery',
scan_id: int = None
) -> str:
"""
运行单个扫描工具
@@ -214,6 +223,7 @@ def _run_single_tool(
command_params: 命令参数
result_dir: 结果目录
scan_type: 扫描类型
scan_id: 扫描 ID用于记录用户日志
Returns:
str: 输出文件路径,失败返回空字符串
@@ -242,7 +252,9 @@ def _run_single_tool(
if timeout == 'auto':
timeout = 3600
logger.info(f"执行 {tool_name}: timeout={timeout}s")
logger.info(f"执行 {tool_name}: {command}")
if scan_id:
user_log(scan_id, scan_type, f"Running {tool_name}: {command}")
try:
result = run_subdomain_discovery_task(
@@ -401,7 +413,6 @@ def subdomain_discovery_flow(
logger.warning("目标域名无效,跳过子域名发现扫描: %s", e)
return _empty_result(scan_id, target_name, scan_workspace_dir)
# 验证成功后打印日志
logger.info(
"="*60 + "\n" +
"开始子域名发现扫描\n" +
@@ -410,6 +421,7 @@ def subdomain_discovery_flow(
f" Workspace: {scan_workspace_dir}\n" +
"="*60
)
user_log(scan_id, "subdomain_discovery", f"Starting subdomain discovery for {domain_name}")
# 解析配置
passive_tools = scan_config.get('passive_tools', {})
@@ -429,23 +441,22 @@ def subdomain_discovery_flow(
successful_tool_names = []
# ==================== Stage 1: 被动收集(并行)====================
logger.info("=" * 40)
logger.info("Stage 1: 被动收集(并行)")
logger.info("=" * 40)
if enabled_passive_tools:
logger.info("=" * 40)
logger.info("Stage 1: 被动收集(并行)")
logger.info("=" * 40)
logger.info("启用工具: %s", ', '.join(enabled_passive_tools.keys()))
user_log(scan_id, "subdomain_discovery", f"Stage 1: passive collection ({', '.join(enabled_passive_tools.keys())})")
result_files, stage1_failed, stage1_success = _run_scans_parallel(
enabled_tools=enabled_passive_tools,
domain_name=domain_name,
result_dir=result_dir
result_dir=result_dir,
scan_id=scan_id
)
all_result_files.extend(result_files)
failed_tools.extend(stage1_failed)
successful_tool_names.extend(stage1_success)
executed_tasks.extend([f'passive ({tool})' for tool in stage1_success])
else:
logger.warning("未启用任何被动收集工具")
# 合并 Stage 1 结果
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
@@ -456,7 +467,6 @@ def subdomain_discovery_flow(
else:
# 创建空文件
Path(current_result).touch()
logger.warning("Stage 1 无结果,创建空文件")
# ==================== Stage 2: 字典爆破(可选)====================
bruteforce_enabled = bruteforce_config.get('enabled', False)
@@ -464,6 +474,7 @@ def subdomain_discovery_flow(
logger.info("=" * 40)
logger.info("Stage 2: 字典爆破")
logger.info("=" * 40)
user_log(scan_id, "subdomain_discovery", "Stage 2: bruteforce")
bruteforce_tool_config = bruteforce_config.get('subdomain_bruteforce', {})
wordlist_name = bruteforce_tool_config.get('wordlist_name', 'dns_wordlist.txt')
@@ -496,22 +507,16 @@ def subdomain_discovery_flow(
**bruteforce_tool_config,
'timeout': timeout_value,
}
logger.info(
"subdomain_bruteforce 使用自动 timeout: %s 秒 (字典行数=%s, 3秒/行)",
timeout_value,
line_count_int,
)
brute_output = str(result_dir / f"subs_brute_{timestamp}.txt")
brute_result = _run_single_tool(
tool_name='subdomain_bruteforce',
tool_config=bruteforce_tool_config,
command_params={
'domain': domain_name,
'wordlist': local_wordlist_path,
'output_file': brute_output
},
result_dir=result_dir
result_dir=result_dir,
scan_id=scan_id
)
if brute_result:
@@ -522,11 +527,16 @@ def subdomain_discovery_flow(
)
successful_tool_names.append('subdomain_bruteforce')
executed_tasks.append('bruteforce')
logger.info("✓ subdomain_bruteforce 执行完成")
user_log(scan_id, "subdomain_discovery", "subdomain_bruteforce completed")
else:
failed_tools.append({'tool': 'subdomain_bruteforce', 'reason': '执行失败'})
logger.warning("⚠️ subdomain_bruteforce 执行失败")
user_log(scan_id, "subdomain_discovery", "subdomain_bruteforce failed: execution failed", "error")
except Exception as exc:
logger.warning("字典准备失败,跳过字典爆破: %s", exc)
failed_tools.append({'tool': 'subdomain_bruteforce', 'reason': str(exc)})
logger.warning("字典准备失败,跳过字典爆破: %s", exc)
user_log(scan_id, "subdomain_discovery", f"subdomain_bruteforce failed: {str(exc)}", "error")
# ==================== Stage 3: 变异生成 + 验证(可选)====================
permutation_enabled = permutation_config.get('enabled', False)
@@ -534,6 +544,7 @@ def subdomain_discovery_flow(
logger.info("=" * 40)
logger.info("Stage 3: 变异生成 + 存活验证(流式管道)")
logger.info("=" * 40)
user_log(scan_id, "subdomain_discovery", "Stage 3: permutation + resolve")
permutation_tool_config = permutation_config.get('subdomain_permutation_resolve', {})
@@ -587,20 +598,19 @@ def subdomain_discovery_flow(
'tool': 'subdomain_permutation_resolve',
'reason': f"采样检测到泛解析 (膨胀率 {ratio:.1f}x)"
})
user_log(scan_id, "subdomain_discovery", f"subdomain_permutation_resolve skipped: wildcard detected (ratio {ratio:.1f}x)", "warning")
else:
# === Step 3.2: 采样通过,执行完整变异 ===
logger.info("采样检测通过,执行完整变异...")
permuted_output = str(result_dir / f"subs_permuted_{timestamp}.txt")
permuted_result = _run_single_tool(
tool_name='subdomain_permutation_resolve',
tool_config=permutation_tool_config,
command_params={
'input_file': current_result,
'output_file': permuted_output,
},
result_dir=result_dir
result_dir=result_dir,
scan_id=scan_id
)
if permuted_result:
@@ -611,15 +621,21 @@ def subdomain_discovery_flow(
)
successful_tool_names.append('subdomain_permutation_resolve')
executed_tasks.append('permutation')
logger.info("✓ subdomain_permutation_resolve 执行完成")
user_log(scan_id, "subdomain_discovery", "subdomain_permutation_resolve completed")
else:
failed_tools.append({'tool': 'subdomain_permutation_resolve', 'reason': '执行失败'})
logger.warning("⚠️ subdomain_permutation_resolve 执行失败")
user_log(scan_id, "subdomain_discovery", "subdomain_permutation_resolve failed: execution failed", "error")
except subprocess.TimeoutExpired:
logger.warning(f"采样检测超时 ({SAMPLE_TIMEOUT}秒),跳过变异")
failed_tools.append({'tool': 'subdomain_permutation_resolve', 'reason': '采样检测超时'})
logger.warning(f"采样检测超时 ({SAMPLE_TIMEOUT}秒),跳过变异")
user_log(scan_id, "subdomain_discovery", "subdomain_permutation_resolve failed: sample detection timeout", "error")
except Exception as e:
logger.warning(f"采样检测失败: {e},跳过变异")
failed_tools.append({'tool': 'subdomain_permutation_resolve', 'reason': f'采样检测失败: {e}'})
logger.warning(f"采样检测失败: {e},跳过变异")
user_log(scan_id, "subdomain_discovery", f"subdomain_permutation_resolve failed: {str(e)}", "error")
# ==================== Stage 4: DNS 存活验证(可选)====================
# 无论是否启用 Stage 3只要 resolve.enabled 为 true 就会执行,对当前所有候选子域做统一 DNS 验证
@@ -628,6 +644,7 @@ def subdomain_discovery_flow(
logger.info("=" * 40)
logger.info("Stage 4: DNS 存活验证")
logger.info("=" * 40)
user_log(scan_id, "subdomain_discovery", "Stage 4: DNS resolve")
resolve_tool_config = resolve_config.get('subdomain_resolve', {})
@@ -651,30 +668,27 @@ def subdomain_discovery_flow(
**resolve_tool_config,
'timeout': timeout_value,
}
logger.info(
"subdomain_resolve 使用自动 timeout: %s 秒 (候选子域数=%s, 3秒/域名)",
timeout_value,
line_count_int,
)
alive_output = str(result_dir / f"subs_alive_{timestamp}.txt")
alive_result = _run_single_tool(
tool_name='subdomain_resolve',
tool_config=resolve_tool_config,
command_params={
'input_file': current_result,
'output_file': alive_output,
},
result_dir=result_dir
result_dir=result_dir,
scan_id=scan_id
)
if alive_result:
current_result = alive_result
successful_tool_names.append('subdomain_resolve')
executed_tasks.append('resolve')
logger.info("✓ subdomain_resolve 执行完成")
user_log(scan_id, "subdomain_discovery", "subdomain_resolve completed")
else:
failed_tools.append({'tool': 'subdomain_resolve', 'reason': '执行失败'})
logger.warning("⚠️ subdomain_resolve 执行失败")
user_log(scan_id, "subdomain_discovery", "subdomain_resolve failed: execution failed", "error")
# ==================== Final: 保存到数据库 ====================
logger.info("=" * 40)
@@ -695,7 +709,9 @@ def subdomain_discovery_flow(
processed_domains = save_result.get('processed_records', 0)
executed_tasks.append('save_domains')
# 记录 Flow 完成
logger.info("="*60 + "\n✓ 子域名发现扫描完成\n" + "="*60)
user_log(scan_id, "subdomain_discovery", f"subdomain_discovery completed: found {processed_domains} subdomains")
return {
'success': True,

View File

@@ -59,6 +59,8 @@ def domain_name_url_fetch_flow(
- IP 和 CIDR 类型会自动跳过waymore 等工具不支持)
- 工具会自动收集 *.target_name 的所有历史 URL无需遍历子域名
"""
from apps.scan.utils import user_log
try:
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
@@ -145,6 +147,9 @@ def domain_name_url_fetch_flow(
timeout,
)
# 记录工具开始执行日志
user_log(scan_id, "url_fetch", f"Running {tool_name}: {command}")
future = run_url_fetcher_task.submit(
tool_name=tool_name,
command=command,
@@ -163,22 +168,28 @@ def domain_name_url_fetch_flow(
if result and result.get("success"):
result_files.append(result["output_file"])
successful_tools.append(tool_name)
url_count = result.get("url_count", 0)
logger.info(
"✓ 工具 %s 执行成功 - 发现 URL: %d",
tool_name,
result.get("url_count", 0),
url_count,
)
user_log(scan_id, "url_fetch", f"{tool_name} completed: found {url_count} urls")
else:
reason = "未生成结果或无有效 URL"
failed_tools.append(
{
"tool": tool_name,
"reason": "未生成结果或无有效 URL",
"reason": reason,
}
)
logger.warning("⚠️ 工具 %s 未生成有效结果", tool_name)
user_log(scan_id, "url_fetch", f"{tool_name} failed: {reason}", "error")
except Exception as e:
failed_tools.append({"tool": tool_name, "reason": str(e)})
reason = str(e)
failed_tools.append({"tool": tool_name, "reason": reason})
logger.warning("⚠️ 工具 %s 执行失败: %s", tool_name, e)
user_log(scan_id, "url_fetch", f"{tool_name} failed: {reason}", "error")
logger.info(
"基于 domain_name 的 URL 获取完成 - 成功工具: %s, 失败工具: %s",

View File

@@ -25,6 +25,7 @@ from apps.scan.handlers.scan_flow_handlers import (
on_scan_flow_completed,
on_scan_flow_failed,
)
from apps.scan.utils import user_log
from .domain_name_url_fetch_flow import domain_name_url_fetch_flow
from .sites_url_fetch_flow import sites_url_fetch_flow
@@ -291,6 +292,8 @@ def url_fetch_flow(
"="*60
)
user_log(scan_id, "url_fetch", "Starting URL fetch")
# Step 1: 准备工作目录
logger.info("Step 1: 准备工作目录")
from apps.scan.utils import setup_scan_directory
@@ -403,7 +406,9 @@ def url_fetch_flow(
target_id=target_id
)
logger.info("="*60 + "\n✓ URL 获取扫描完成\n" + "="*60)
# 记录 Flow 完成
logger.info("✓ URL 获取完成 - 保存 endpoints: %d", saved_count)
user_log(scan_id, "url_fetch", f"url_fetch completed: found {saved_count} endpoints")
# 构建已执行的任务列表
executed_tasks = ['setup_directory', 'classify_tools']

View File

@@ -116,7 +116,8 @@ def sites_url_fetch_flow(
tools=enabled_tools,
input_file=sites_file,
input_type="sites_file",
output_dir=output_path
output_dir=output_path,
scan_id=scan_id
)
logger.info(

View File

@@ -152,7 +152,8 @@ def run_tools_parallel(
tools: dict,
input_file: str,
input_type: str,
output_dir: Path
output_dir: Path,
scan_id: int
) -> tuple[list, list, list]:
"""
并行执行工具列表
@@ -162,11 +163,13 @@ def run_tools_parallel(
input_file: 输入文件路径
input_type: 输入类型
output_dir: 输出目录
scan_id: 扫描任务 ID用于记录日志
Returns:
tuple: (result_files, failed_tools, successful_tool_names)
"""
from apps.scan.tasks.url_fetch import run_url_fetcher_task
from apps.scan.utils import user_log
futures: dict[str, object] = {}
failed_tools: list[dict] = []
@@ -192,6 +195,9 @@ def run_tools_parallel(
exec_params["timeout"],
)
# 记录工具开始执行日志
user_log(scan_id, "url_fetch", f"Running {tool_name}: {exec_params['command']}")
# 提交并行任务
future = run_url_fetcher_task.submit(
tool_name=tool_name,
@@ -208,22 +214,28 @@ def run_tools_parallel(
result = future.result()
if result and result['success']:
result_files.append(result['output_file'])
url_count = result['url_count']
logger.info(
"✓ 工具 %s 执行成功 - 发现 URL: %d",
tool_name, result['url_count']
tool_name, url_count
)
user_log(scan_id, "url_fetch", f"{tool_name} completed: found {url_count} urls")
else:
reason = '未生成结果或无有效URL'
failed_tools.append({
'tool': tool_name,
'reason': '未生成结果或无有效URL'
'reason': reason
})
logger.warning("⚠️ 工具 %s 未生成有效结果", tool_name)
user_log(scan_id, "url_fetch", f"{tool_name} failed: {reason}", "error")
except Exception as e:
reason = str(e)
failed_tools.append({
'tool': tool_name,
'reason': str(e)
'reason': reason
})
logger.warning("⚠️ 工具 %s 执行失败: %s", tool_name, e)
user_log(scan_id, "url_fetch", f"{tool_name} failed: {reason}", "error")
# 计算成功的工具列表
failed_tool_names = [f['tool'] for f in failed_tools]

View File

@@ -12,7 +12,7 @@ from apps.scan.handlers.scan_flow_handlers import (
on_scan_flow_completed,
on_scan_flow_failed,
)
from apps.scan.utils import build_scan_command, ensure_nuclei_templates_local
from apps.scan.utils import build_scan_command, ensure_nuclei_templates_local, user_log
from apps.scan.tasks.vuln_scan import (
export_endpoints_task,
run_vuln_tool_task,
@@ -141,6 +141,7 @@ def endpoints_vuln_scan_flow(
# Dalfox XSS 使用流式任务,一边解析一边保存漏洞结果
if tool_name == "dalfox_xss":
logger.info("开始执行漏洞扫描工具 %s(流式保存漏洞结果,已提交任务)", tool_name)
user_log(scan_id, "vuln_scan", f"Running {tool_name}: {command}")
future = run_and_stream_save_dalfox_vulns_task.submit(
cmd=command,
tool_name=tool_name,
@@ -163,6 +164,7 @@ def endpoints_vuln_scan_flow(
elif tool_name == "nuclei":
# Nuclei 使用流式任务
logger.info("开始执行漏洞扫描工具 %s(流式保存漏洞结果,已提交任务)", tool_name)
user_log(scan_id, "vuln_scan", f"Running {tool_name}: {command}")
future = run_and_stream_save_nuclei_vulns_task.submit(
cmd=command,
tool_name=tool_name,
@@ -185,6 +187,7 @@ def endpoints_vuln_scan_flow(
else:
# 其他工具仍使用非流式执行逻辑
logger.info("开始执行漏洞扫描工具 %s(已提交任务)", tool_name)
user_log(scan_id, "vuln_scan", f"Running {tool_name}: {command}")
future = run_vuln_tool_task.submit(
tool_name=tool_name,
command=command,
@@ -203,24 +206,34 @@ def endpoints_vuln_scan_flow(
# 统一收集所有工具的执行结果
for tool_name, meta in tool_futures.items():
future = meta["future"]
result = future.result()
try:
result = future.result()
if meta["mode"] == "streaming":
tool_results[tool_name] = {
"command": meta["command"],
"timeout": meta["timeout"],
"processed_records": result.get("processed_records"),
"created_vulns": result.get("created_vulns"),
"command_log_file": meta["log_file"],
}
else:
tool_results[tool_name] = {
"command": meta["command"],
"timeout": meta["timeout"],
"duration": result.get("duration"),
"returncode": result.get("returncode"),
"command_log_file": result.get("command_log_file"),
}
if meta["mode"] == "streaming":
created_vulns = result.get("created_vulns", 0)
tool_results[tool_name] = {
"command": meta["command"],
"timeout": meta["timeout"],
"processed_records": result.get("processed_records"),
"created_vulns": created_vulns,
"command_log_file": meta["log_file"],
}
logger.info("✓ 工具 %s 执行完成 - 漏洞: %d", tool_name, created_vulns)
user_log(scan_id, "vuln_scan", f"{tool_name} completed: found {created_vulns} vulnerabilities")
else:
tool_results[tool_name] = {
"command": meta["command"],
"timeout": meta["timeout"],
"duration": result.get("duration"),
"returncode": result.get("returncode"),
"command_log_file": result.get("command_log_file"),
}
logger.info("✓ 工具 %s 执行完成 - returncode=%s", tool_name, result.get("returncode"))
user_log(scan_id, "vuln_scan", f"{tool_name} completed")
except Exception as e:
reason = str(e)
logger.error("工具 %s 执行失败: %s", tool_name, e, exc_info=True)
user_log(scan_id, "vuln_scan", f"{tool_name} failed: {reason}", "error")
return {
"success": True,

View File

@@ -11,6 +11,7 @@ from apps.scan.handlers.scan_flow_handlers import (
on_scan_flow_failed,
)
from apps.scan.configs.command_templates import get_command_template
from apps.scan.utils import user_log
from .endpoints_vuln_scan_flow import endpoints_vuln_scan_flow
@@ -72,6 +73,9 @@ def vuln_scan_flow(
if not enabled_tools:
raise ValueError("enabled_tools 不能为空")
logger.info("开始漏洞扫描 - Scan ID: %s, Target: %s", scan_id, target_name)
user_log(scan_id, "vuln_scan", "Starting vulnerability scan")
# Step 1: 分类工具
endpoints_tools, other_tools = _classify_vuln_tools(enabled_tools)
@@ -99,6 +103,14 @@ def vuln_scan_flow(
enabled_tools=endpoints_tools,
)
# 记录 Flow 完成
total_vulns = sum(
r.get("created_vulns", 0)
for r in endpoint_result.get("tool_results", {}).values()
)
logger.info("✓ 漏洞扫描完成 - 新增漏洞: %d", total_vulns)
user_log(scan_id, "vuln_scan", f"vuln_scan completed: found {total_vulns} vulnerabilities")
# 目前只有一个子 Flow直接返回其结果
return endpoint_result

View File

@@ -14,6 +14,7 @@ from prefect import Flow
from prefect.client.schemas import FlowRun, State
from apps.scan.utils.performance import FlowPerformanceTracker
from apps.scan.utils import user_log
logger = logging.getLogger(__name__)
@@ -136,6 +137,7 @@ def on_scan_flow_failed(flow: Flow, flow_run: FlowRun, state: State) -> None:
- 更新阶段进度为 failed
- 发送扫描失败通知
- 记录性能指标(含错误信息)
- 写入 ScanLog 供前端显示
Args:
flow: Prefect Flow 对象
@@ -152,6 +154,11 @@ def on_scan_flow_failed(flow: Flow, flow_run: FlowRun, state: State) -> None:
# 提取错误信息
error_message = str(state.message) if state.message else "未知错误"
# 写入 ScanLog 供前端显示
stage = _get_stage_from_flow_name(flow.name)
if scan_id and stage:
user_log(scan_id, stage, f"Failed: {error_message}", "error")
# 记录性能指标(失败情况)
tracker = _flow_trackers.pop(str(flow_run.id), None)
if tracker:

View File

@@ -116,4 +116,21 @@ class Migration(migrations.Migration):
'indexes': [models.Index(fields=['-created_at'], name='scheduled_s_created_9b9c2e_idx'), models.Index(fields=['is_enabled', '-created_at'], name='scheduled_s_is_enab_23d660_idx'), models.Index(fields=['name'], name='scheduled_s_name_bf332d_idx')],
},
),
migrations.CreateModel(
name='ScanLog',
fields=[
('id', models.BigAutoField(primary_key=True, serialize=False)),
('level', models.CharField(choices=[('info', 'Info'), ('warning', 'Warning'), ('error', 'Error')], default='info', help_text='日志级别', max_length=10)),
('content', models.TextField(help_text='日志内容')),
('created_at', models.DateTimeField(auto_now_add=True, db_index=True, help_text='创建时间')),
('scan', models.ForeignKey(db_index=True, help_text='关联的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='logs', to='scan.scan')),
],
options={
'verbose_name': '扫描日志',
'verbose_name_plural': '扫描日志',
'db_table': 'scan_log',
'ordering': ['created_at'],
'indexes': [models.Index(fields=['scan', 'created_at'], name='scan_log_scan_id_e8c8f5_idx')],
},
),
]

View File

@@ -106,6 +106,55 @@ class Scan(models.Model):
return f"Scan #{self.id} - {self.target.name}"
class ScanLog(models.Model):
"""扫描日志模型
存储扫描过程中的关键处理日志,用于前端实时查看扫描进度。
日志类型:
- 阶段开始/完成/失败
- 处理进度(如 "Progress: 50/120"
- 发现结果统计(如 "Found 120 subdomains"
- 错误信息
日志格式:[stage_name] message
"""
class Level(models.TextChoices):
INFO = 'info', 'Info'
WARNING = 'warning', 'Warning'
ERROR = 'error', 'Error'
id = models.BigAutoField(primary_key=True)
scan = models.ForeignKey(
'Scan',
on_delete=models.CASCADE,
related_name='logs',
db_index=True,
help_text='关联的扫描任务'
)
level = models.CharField(
max_length=10,
choices=Level.choices,
default=Level.INFO,
help_text='日志级别'
)
content = models.TextField(help_text='日志内容')
created_at = models.DateTimeField(auto_now_add=True, db_index=True, help_text='创建时间')
class Meta:
db_table = 'scan_log'
verbose_name = '扫描日志'
verbose_name_plural = '扫描日志'
ordering = ['created_at']
indexes = [
models.Index(fields=['scan', 'created_at']),
]
def __str__(self):
return f"[{self.level}] {self.content[:50]}"
class ScheduledScan(models.Model):
"""
定时扫描任务模型

View File

@@ -2,7 +2,17 @@ from rest_framework import serializers
from django.db.models import Count
import yaml
from .models import Scan, ScheduledScan
from .models import Scan, ScheduledScan, ScanLog
# ==================== 扫描日志序列化器 ====================
class ScanLogSerializer(serializers.ModelSerializer):
"""扫描日志序列化器"""
class Meta:
model = ScanLog
fields = ['id', 'level', 'content', 'created_at']
# ==================== 通用验证 Mixin ====================

View File

@@ -1,6 +1,6 @@
from django.urls import path, include
from rest_framework.routers import DefaultRouter
from .views import ScanViewSet, ScheduledScanViewSet
from .views import ScanViewSet, ScheduledScanViewSet, ScanLogListView
from .notifications.views import notification_callback
from apps.asset.views import (
SubdomainSnapshotViewSet, WebsiteSnapshotViewSet, DirectorySnapshotViewSet,
@@ -31,6 +31,8 @@ urlpatterns = [
path('', include(router.urls)),
# Worker 回调 API
path('callbacks/notification/', notification_callback, name='notification-callback'),
# 扫描日志 API
path('scans/<int:scan_id>/logs/', ScanLogListView.as_view(), name='scan-logs-list'),
# 嵌套路由:/api/scans/{scan_pk}/xxx/
path('scans/<int:scan_pk>/subdomains/', scan_subdomains_list, name='scan-subdomains-list'),
path('scans/<int:scan_pk>/subdomains/export/', scan_subdomains_export, name='scan-subdomains-export'),

View File

@@ -11,6 +11,7 @@ from .wordlist_helpers import ensure_wordlist_local
from .nuclei_helpers import ensure_nuclei_templates_local
from .performance import FlowPerformanceTracker, CommandPerformanceTracker
from .workspace_utils import setup_scan_workspace, setup_scan_directory
from .user_logger import user_log
from . import config_parser
__all__ = [
@@ -31,6 +32,8 @@ __all__ = [
# 性能监控
'FlowPerformanceTracker', # Flow 性能追踪器(含系统资源采样)
'CommandPerformanceTracker', # 命令性能追踪器
# 扫描日志
'user_log', # 用户可见扫描日志记录
# 配置解析
'config_parser',
]

View File

@@ -0,0 +1,56 @@
"""
扫描日志记录器
提供统一的日志记录接口,用于在 Flow 中记录用户可见的扫描进度日志。
特性:
- 简单的函数式 API
- 只写入数据库ScanLog 表),不写 Python logging
- 错误容忍(数据库失败不影响扫描执行)
职责分离:
- user_log: 用户可见日志(写数据库,前端展示)
- logger: 开发者日志(写日志文件/控制台,调试用)
使用示例:
from apps.scan.utils import user_log
# 用户日志(写数据库)
user_log(scan_id, "port_scan", "Starting port scan")
user_log(scan_id, "port_scan", "naabu completed: found 120 ports")
# 开发者日志(写日志文件)
logger.info("✓ 工具 %s 执行完成 - 记录数: %d", tool_name, count)
"""
import logging
from django.db import DatabaseError
logger = logging.getLogger(__name__)
def user_log(scan_id: int, stage: str, message: str, level: str = "info"):
"""
记录用户可见的扫描日志(只写数据库)
Args:
scan_id: 扫描任务 ID
stage: 阶段名称,如 "port_scan", "site_scan"
message: 日志消息
level: 日志级别,默认 "info",可选 "warning", "error"
数据库 content 格式: "[{stage}] {message}"
"""
formatted = f"[{stage}] {message}"
try:
from apps.scan.models import ScanLog
ScanLog.objects.create(
scan_id=scan_id,
level=level,
content=formatted
)
except DatabaseError as e:
logger.error("ScanLog write failed - scan_id=%s, error=%s", scan_id, e)
except Exception as e:
logger.error("ScanLog write unexpected error - scan_id=%s, error=%s", scan_id, e)

View File

@@ -2,8 +2,10 @@
from .scan_views import ScanViewSet
from .scheduled_scan_views import ScheduledScanViewSet
from .scan_log_views import ScanLogListView
__all__ = [
'ScanViewSet',
'ScheduledScanViewSet',
'ScanLogListView',
]

View File

@@ -0,0 +1,56 @@
"""
扫描日志 API
提供扫描日志查询接口,支持游标分页用于增量轮询。
"""
from rest_framework.views import APIView
from rest_framework.response import Response
from apps.scan.models import ScanLog
from apps.scan.serializers import ScanLogSerializer
class ScanLogListView(APIView):
"""
GET /scans/{scan_id}/logs/
游标分页 API用于增量查询日志
查询参数:
- afterId: 只返回此 ID 之后的日志(用于增量轮询,避免时间戳重复导致的重复日志)
- limit: 返回数量限制(默认 200最大 1000
返回:
- results: 日志列表
- hasMore: 是否还有更多日志
"""
def get(self, request, scan_id: int):
# 参数解析
after_id = request.query_params.get('afterId')
try:
limit = min(int(request.query_params.get('limit', 200)), 1000)
except (ValueError, TypeError):
limit = 200
# 查询日志(按 ID 排序ID 是自增的,保证顺序一致)
queryset = ScanLog.objects.filter(scan_id=scan_id).order_by('id')
# 游标过滤(使用 ID 而非时间戳,避免同一时间戳多条日志导致重复)
if after_id:
try:
queryset = queryset.filter(id__gt=int(after_id))
except (ValueError, TypeError):
pass
# 限制返回数量(多取一条用于判断 hasMore
logs = list(queryset[:limit + 1])
has_more = len(logs) > limit
if has_more:
logs = logs[:limit]
return Response({
'results': ScanLogSerializer(logs, many=True).data,
'hasMore': has_more,
})

View File

@@ -102,7 +102,11 @@ RUN pip install uv --break-system-packages && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# 6. 复制后端代码
# 6. 设置 Prefect 配置目录(避免 home 目录不存在的警告)
ENV PREFECT_HOME=/app/.prefect
RUN mkdir -p /app/.prefect
# 7. 复制后端代码
COPY backend /app/backend
ENV PYTHONPATH=/app/backend

View File

@@ -0,0 +1,111 @@
"use client"
import { useEffect, useRef, useMemo } from "react"
import type { ScanLog } from "@/services/scan.service"
interface ScanLogListProps {
logs: ScanLog[]
loading?: boolean
}
/**
* 格式化时间为 HH:mm:ss
*/
function formatTime(isoString: string): string {
try {
const date = new Date(isoString)
return date.toLocaleTimeString('zh-CN', {
hour: '2-digit',
minute: '2-digit',
second: '2-digit',
hour12: false,
})
} catch {
return isoString
}
}
/**
* HTML 转义,防止 XSS
*/
function escapeHtml(text: string): string {
return text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;')
}
/**
* 扫描日志列表组件
*
* 特性:
* - 预渲染 HTML 字符串,减少 DOM 节点提升性能
* - 颜色区分info=默认, warning=黄色, error=红色
* - 自动滚动到底部
*/
export function ScanLogList({ logs, loading }: ScanLogListProps) {
const containerRef = useRef<HTMLDivElement>(null)
const isAtBottomRef = useRef(true) // 跟踪用户是否在底部
// 预渲染 HTML 字符串
const htmlContent = useMemo(() => {
if (logs.length === 0) return ''
return logs.map(log => {
const time = formatTime(log.createdAt)
const content = escapeHtml(log.content)
const levelStyle = log.level === 'error'
? 'color:#ef4444'
: log.level === 'warning'
? 'color:#eab308'
: ''
return `<div style="line-height:1.625;word-break:break-all;${levelStyle}"><span style="color:#6b7280">${time}</span> ${content}</div>`
}).join('')
}, [logs])
// 监听滚动事件,检测用户是否在底部
useEffect(() => {
const container = containerRef.current
if (!container) return
const handleScroll = () => {
const { scrollTop, scrollHeight, clientHeight } = container
// 允许 30px 的容差,认为在底部附近
isAtBottomRef.current = scrollHeight - scrollTop - clientHeight < 30
}
container.addEventListener('scroll', handleScroll)
return () => container.removeEventListener('scroll', handleScroll)
}, [])
// 只有用户在底部时才自动滚动
useEffect(() => {
if (containerRef.current && isAtBottomRef.current) {
containerRef.current.scrollTop = containerRef.current.scrollHeight
}
}, [htmlContent])
return (
<div
ref={containerRef}
className="h-[400px] overflow-y-auto font-mono text-[11px] p-3 bg-muted/30 rounded-lg"
>
{logs.length === 0 && !loading && (
<div className="text-muted-foreground text-center py-8">
</div>
)}
{htmlContent && (
<div dangerouslySetInnerHTML={{ __html: htmlContent }} />
)}
{loading && logs.length === 0 && (
<div className="text-muted-foreground text-center py-8">
...
</div>
)}
</div>
)
}

View File

@@ -1,6 +1,7 @@
"use client"
import * as React from "react"
import { useState } from "react"
import {
Dialog,
DialogContent,
@@ -9,6 +10,7 @@ import {
} from "@/components/ui/dialog"
import { Badge } from "@/components/ui/badge"
import { Separator } from "@/components/ui/separator"
import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"
import {
IconCircleCheck,
IconLoader,
@@ -19,6 +21,8 @@ import {
import { cn } from "@/lib/utils"
import { useTranslations, useLocale } from "next-intl"
import type { ScanStage, ScanRecord, StageProgress, StageStatus } from "@/types/scan.types"
import { useScanLogs } from "@/hooks/use-scan-logs"
import { ScanLogList } from "./scan-log-list"
/**
* Scan stage details
@@ -190,12 +194,26 @@ export function ScanProgressDialog({
}: ScanProgressDialogProps) {
const t = useTranslations("scan.progress")
const locale = useLocale()
const [activeTab, setActiveTab] = useState<'stages' | 'logs'>('stages')
// 判断扫描是否正在运行(用于控制轮询)
const isRunning = data?.status === 'running' || data?.status === 'initiated'
// 日志轮询 Hook
const { logs, loading: logsLoading } = useScanLogs({
scanId: data?.id ?? 0,
enabled: open && activeTab === 'logs' && !!data?.id,
pollingInterval: isRunning ? 3000 : 0, // 运行中时 3s 轮询,否则不轮询
})
if (!data) return null
// 固定宽度,切换 Tab 时不变化
const dialogWidth = 'sm:max-w-[600px] sm:min-w-[550px]'
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-[500px] sm:min-w-[450px]">
<DialogContent className={cn(dialogWidth, "transition-all duration-200")}>
<DialogHeader>
<DialogTitle className="flex items-center gap-2">
<ScanStatusIcon status={data.status} />
@@ -244,37 +262,26 @@ export function ScanProgressDialog({
<Separator />
{/* Total progress */}
<div className="space-y-2">
<div className="flex items-center justify-between text-sm">
<span className="font-medium">{t("totalProgress")}</span>
<span className="font-mono text-muted-foreground">{data.progress}%</span>
{/* Tab 切换 */}
<Tabs value={activeTab} onValueChange={(v) => setActiveTab(v as 'stages' | 'logs')}>
<TabsList className="grid w-full grid-cols-2">
<TabsTrigger value="stages">{t("tab_stages")}</TabsTrigger>
<TabsTrigger value="logs">{t("tab_logs")}</TabsTrigger>
</TabsList>
</Tabs>
{/* Tab 内容 */}
{activeTab === 'stages' ? (
/* Stage list */
<div className="space-y-2 max-h-[300px] overflow-y-auto">
{data.stages.map((stage) => (
<StageRow key={stage.stage} stage={stage} t={t} />
))}
</div>
<div className="h-2 bg-primary/10 rounded-full overflow-hidden border border-border">
<div
className={`h-full transition-all ${
data.status === "completed" ? "bg-[#238636]/80" :
data.status === "failed" ? "bg-[#da3633]/80" :
data.status === "running" ? "bg-[#d29922]/80 progress-striped" :
data.status === "cancelled" ? "bg-[#848d97]/80" :
data.status === "cancelling" ? "bg-[#d29922]/80 progress-striped" :
data.status === "initiated" ? "bg-[#d29922]/80 progress-striped" :
"bg-muted-foreground/80"
}`}
style={{ width: `${data.status === "completed" ? 100 : data.progress}%` }}
/>
</div>
</div>
<Separator />
{/* Stage list */}
<div className="space-y-2 max-h-[300px] overflow-y-auto">
{data.stages.map((stage) => (
<StageRow key={stage.stage} stage={stage} t={t} />
))}
</div>
) : (
/* Log list */
<ScanLogList logs={logs} loading={logsLoading} />
)}
</DialogContent>
</Dialog>
)

View File

@@ -36,6 +36,7 @@ const converter = new AnsiToHtml({
export function AnsiLogViewer({ content, className }: AnsiLogViewerProps) {
const containerRef = useRef<HTMLPreElement>(null)
const isAtBottomRef = useRef(true) // 跟踪用户是否在底部
// 将 ANSI 转换为 HTML
const htmlContent = useMemo(() => {
@@ -43,9 +44,24 @@ export function AnsiLogViewer({ content, className }: AnsiLogViewerProps) {
return converter.toHtml(content)
}, [content])
// 自动滚动到底部
// 监听滚动事件,检测用户是否在底部
useEffect(() => {
if (containerRef.current) {
const container = containerRef.current
if (!container) return
const handleScroll = () => {
const { scrollTop, scrollHeight, clientHeight } = container
// 允许 30px 的容差,认为在底部附近
isAtBottomRef.current = scrollHeight - scrollTop - clientHeight < 30
}
container.addEventListener('scroll', handleScroll)
return () => container.removeEventListener('scroll', handleScroll)
}, [])
// 只有用户在底部时才自动滚动
useEffect(() => {
if (containerRef.current && isAtBottomRef.current) {
containerRef.current.scrollTop = containerRef.current.scrollHeight
}
}, [htmlContent])

View File

@@ -0,0 +1,106 @@
/**
* 扫描日志轮询 Hook
*
* 功能:
* - 初始加载获取全部日志
* - 增量轮询获取新日志3s 间隔)
* - 扫描结束后停止轮询
*/
import { useState, useEffect, useCallback, useRef } from 'react'
import { getScanLogs, type ScanLog } from '@/services/scan.service'
interface UseScanLogsOptions {
scanId: number
enabled?: boolean
pollingInterval?: number // 默认 3000ms
}
interface UseScanLogsReturn {
logs: ScanLog[]
loading: boolean
refetch: () => void
}
export function useScanLogs({
scanId,
enabled = true,
pollingInterval = 3000,
}: UseScanLogsOptions): UseScanLogsReturn {
const [logs, setLogs] = useState<ScanLog[]>([])
const [loading, setLoading] = useState(false)
const lastLogId = useRef<number | null>(null)
const isMounted = useRef(true)
const fetchLogs = useCallback(async (incremental = false) => {
if (!enabled || !isMounted.current) return
setLoading(true)
try {
const params: { limit: number; afterId?: number } = { limit: 200 }
if (incremental && lastLogId.current !== null) {
params.afterId = lastLogId.current
}
const response = await getScanLogs(scanId, params)
const newLogs = response.results
if (!isMounted.current) return
if (newLogs.length > 0) {
// 使用 ID 作为游标ID 是唯一且自增的,避免时间戳重复导致的重复日志
lastLogId.current = newLogs[newLogs.length - 1].id
if (incremental) {
// 按 ID 去重,防止 React Strict Mode 或竞态条件导致的重复
setLogs(prev => {
const existingIds = new Set(prev.map(l => l.id))
const uniqueNewLogs = newLogs.filter(l => !existingIds.has(l.id))
return uniqueNewLogs.length > 0 ? [...prev, ...uniqueNewLogs] : prev
})
} else {
setLogs(newLogs)
}
}
} catch (error) {
console.error('Failed to fetch scan logs:', error)
} finally {
if (isMounted.current) {
setLoading(false)
}
}
}, [scanId, enabled])
// 初始加载
useEffect(() => {
isMounted.current = true
if (enabled) {
// 重置状态
setLogs([])
lastLogId.current = null
fetchLogs(false)
}
return () => {
isMounted.current = false
}
}, [scanId, enabled])
// 轮询
useEffect(() => {
if (!enabled) return
const interval = setInterval(() => {
fetchLogs(true) // 增量查询
}, pollingInterval)
return () => clearInterval(interval)
}, [enabled, pollingInterval, fetchLogs])
const refetch = useCallback(() => {
setLogs([])
lastLogId.current = null
fetchLogs(false)
}, [fetchLogs])
return { logs, loading, refetch }
}

View File

@@ -737,6 +737,8 @@
"status": "Status",
"errorReason": "Error Reason",
"totalProgress": "Total Progress",
"tab_stages": "Stages",
"tab_logs": "Logs",
"status_running": "Scanning",
"status_cancelled": "Cancelled",
"status_completed": "Completed",

View File

@@ -737,6 +737,8 @@
"status": "状态",
"errorReason": "错误原因",
"totalProgress": "总进度",
"tab_stages": "阶段",
"tab_logs": "日志",
"status_running": "扫描中",
"status_cancelled": "已取消",
"status_completed": "已完成",

View File

@@ -113,3 +113,40 @@ export async function getScanStatistics(): Promise<ScanStatistics> {
const res = await api.get<ScanStatistics>('/scans/statistics/')
return res.data
}
/**
* Scan log entry type
*/
export interface ScanLog {
id: number
level: 'info' | 'warning' | 'error'
content: string
createdAt: string
}
/**
* Get scan logs response type
*/
export interface GetScanLogsResponse {
results: ScanLog[]
hasMore: boolean
}
/**
* Get scan logs params type
*/
export interface GetScanLogsParams {
afterId?: number
limit?: number
}
/**
* Get scan logs
* @param scanId - Scan ID
* @param params - Query parameters (afterId for cursor, limit for max results)
* @returns Scan logs with hasMore indicator
*/
export async function getScanLogs(scanId: number, params?: GetScanLogsParams): Promise<GetScanLogsResponse> {
const res = await api.get<GetScanLogsResponse>(`/scans/${scanId}/logs/`, { params })
return res.data
}