mirror of
https://github.com/yyhuni/xingrin.git
synced 2026-01-31 11:46:16 +08:00
337 lines
11 KiB
Python
337 lines
11 KiB
Python
"""
|
|
指纹识别 Flow
|
|
|
|
负责编排指纹识别的完整流程
|
|
|
|
架构:
|
|
- Flow 负责编排多个原子 Task
|
|
- 在 site_scan 后串行执行
|
|
- 使用 xingfinger 工具识别技术栈
|
|
- 流式处理输出,批量更新数据库
|
|
"""
|
|
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from prefect import flow
|
|
|
|
from apps.scan.handlers.scan_flow_handlers import (
|
|
on_scan_flow_completed,
|
|
on_scan_flow_failed,
|
|
on_scan_flow_running,
|
|
)
|
|
from apps.scan.tasks.fingerprint_detect import (
|
|
export_site_urls_for_fingerprint_task,
|
|
run_xingfinger_and_stream_update_tech_task,
|
|
)
|
|
from apps.scan.utils import build_scan_command, setup_scan_directory, user_log, wait_for_system_load
|
|
from apps.scan.utils.fingerprint_helpers import get_fingerprint_paths
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class FingerprintContext:
|
|
"""指纹识别上下文,用于在各函数间传递状态"""
|
|
scan_id: int
|
|
target_id: int
|
|
target_name: str
|
|
scan_workspace_dir: str
|
|
fingerprint_dir: Optional[Path] = None
|
|
urls_file: str = ""
|
|
url_count: int = 0
|
|
source: str = "website"
|
|
|
|
|
|
def calculate_fingerprint_detect_timeout(
|
|
url_count: int,
|
|
base_per_url: float = 10.0,
|
|
min_timeout: int = 300
|
|
) -> int:
|
|
"""根据 URL 数量计算超时时间(最小 300 秒)"""
|
|
return max(min_timeout, int(url_count * base_per_url))
|
|
|
|
|
|
|
|
def _export_urls(fingerprint_dir: Path, provider) -> tuple[str, int]:
|
|
"""导出 URL 到文件,返回 (urls_file, total_count)"""
|
|
logger.info("Step 1: 导出 URL 列表")
|
|
|
|
urls_file = str(fingerprint_dir / 'urls.txt')
|
|
export_result = export_site_urls_for_fingerprint_task(
|
|
output_file=urls_file,
|
|
provider=provider,
|
|
)
|
|
|
|
total_count = export_result['total_count']
|
|
logger.info("✓ URL 导出完成 - 文件: %s, 数量: %d", export_result['output_file'], total_count)
|
|
|
|
return export_result['output_file'], total_count
|
|
|
|
|
|
def _run_single_tool(
|
|
tool_name: str,
|
|
tool_config: dict,
|
|
ctx: FingerprintContext
|
|
) -> tuple[Optional[dict], Optional[dict]]:
|
|
"""执行单个指纹识别工具,返回 (stats, failed_info)"""
|
|
# 获取指纹库路径
|
|
lib_names = tool_config.get('fingerprint_libs', ['ehole'])
|
|
fingerprint_paths = get_fingerprint_paths(lib_names)
|
|
|
|
if not fingerprint_paths:
|
|
reason = f"没有可用的指纹库: {lib_names}"
|
|
logger.warning(reason)
|
|
return None, {'tool': tool_name, 'reason': reason}
|
|
|
|
# 构建命令
|
|
tool_config_with_paths = {**tool_config, **fingerprint_paths}
|
|
try:
|
|
command = build_scan_command(
|
|
tool_name=tool_name,
|
|
scan_type='fingerprint_detect',
|
|
command_params={'urls_file': ctx.urls_file},
|
|
tool_config=tool_config_with_paths
|
|
)
|
|
except Exception as e:
|
|
reason = f"命令构建失败: {e}"
|
|
logger.error("构建 %s 命令失败: %s", tool_name, e)
|
|
return None, {'tool': tool_name, 'reason': reason}
|
|
|
|
# 计算超时时间和日志文件
|
|
timeout = calculate_fingerprint_detect_timeout(ctx.url_count)
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
log_file = ctx.fingerprint_dir / f"{tool_name}_{timestamp}.log"
|
|
|
|
logger.info(
|
|
"开始执行 %s 指纹识别 - URL数: %d, 超时: %ds, 指纹库: %s",
|
|
tool_name, ctx.url_count, timeout, list(fingerprint_paths.keys())
|
|
)
|
|
user_log(ctx.scan_id, "fingerprint_detect", f"Running {tool_name}: {command}")
|
|
|
|
# 执行扫描任务
|
|
try:
|
|
result = run_xingfinger_and_stream_update_tech_task(
|
|
cmd=command,
|
|
tool_name=tool_name,
|
|
scan_id=ctx.scan_id,
|
|
target_id=ctx.target_id,
|
|
source=ctx.source,
|
|
cwd=str(ctx.fingerprint_dir),
|
|
timeout=timeout,
|
|
log_file=str(log_file),
|
|
batch_size=100
|
|
)
|
|
|
|
stats = {
|
|
'command': command,
|
|
'result': result,
|
|
'timeout': timeout,
|
|
'fingerprint_libs': list(fingerprint_paths.keys())
|
|
}
|
|
|
|
tool_updated = result.get('updated_count', 0)
|
|
logger.info(
|
|
"✓ 工具 %s 执行完成 - 处理记录: %d, 更新: %d, 未找到: %d",
|
|
tool_name,
|
|
result.get('processed_records', 0),
|
|
tool_updated,
|
|
result.get('not_found_count', 0)
|
|
)
|
|
user_log(
|
|
ctx.scan_id, "fingerprint_detect",
|
|
f"{tool_name} completed: identified {tool_updated} fingerprints"
|
|
)
|
|
return stats, None
|
|
|
|
except Exception as exc:
|
|
reason = str(exc)
|
|
logger.error("工具 %s 执行失败: %s", tool_name, exc, exc_info=True)
|
|
user_log(ctx.scan_id, "fingerprint_detect", f"{tool_name} failed: {reason}", "error")
|
|
return None, {'tool': tool_name, 'reason': reason}
|
|
|
|
|
|
def _run_fingerprint_detect(enabled_tools: dict, ctx: FingerprintContext) -> tuple[dict, list]:
|
|
"""执行指纹识别任务,返回 (tool_stats, failed_tools)"""
|
|
tool_stats = {}
|
|
failed_tools = []
|
|
|
|
for tool_name, tool_config in enabled_tools.items():
|
|
stats, failed_info = _run_single_tool(tool_name, tool_config, ctx)
|
|
if stats:
|
|
tool_stats[tool_name] = stats
|
|
if failed_info:
|
|
failed_tools.append(failed_info)
|
|
|
|
if failed_tools:
|
|
logger.warning(
|
|
"以下指纹识别工具执行失败: %s",
|
|
', '.join([f['tool'] for f in failed_tools])
|
|
)
|
|
|
|
return tool_stats, failed_tools
|
|
|
|
|
|
def _aggregate_results(tool_stats: dict) -> dict:
|
|
"""汇总所有工具的结果"""
|
|
return {
|
|
'processed_records': sum(
|
|
s['result'].get('processed_records', 0) for s in tool_stats.values()
|
|
),
|
|
'updated_count': sum(
|
|
s['result'].get('updated_count', 0) for s in tool_stats.values()
|
|
),
|
|
'created_count': sum(
|
|
s['result'].get('created_count', 0) for s in tool_stats.values()
|
|
),
|
|
'snapshot_count': sum(
|
|
s['result'].get('snapshot_count', 0) for s in tool_stats.values()
|
|
),
|
|
}
|
|
|
|
|
|
@flow(
|
|
name="fingerprint_detect",
|
|
log_prints=True,
|
|
on_running=[on_scan_flow_running],
|
|
on_completion=[on_scan_flow_completed],
|
|
on_failure=[on_scan_flow_failed],
|
|
)
|
|
def fingerprint_detect_flow(
|
|
scan_id: int,
|
|
target_id: int,
|
|
scan_workspace_dir: str,
|
|
enabled_tools: dict,
|
|
provider,
|
|
) -> dict:
|
|
"""
|
|
指纹识别 Flow
|
|
|
|
主要功能:
|
|
1. 从数据库导出目标下所有 WebSite URL 到文件
|
|
2. 使用 xingfinger 进行技术栈识别
|
|
3. 解析结果并更新 WebSite.tech 字段(合并去重)
|
|
"""
|
|
try:
|
|
wait_for_system_load(context="fingerprint_detect_flow")
|
|
|
|
# 从 provider 获取 target_name
|
|
target_name = provider.get_target_name()
|
|
if not target_name:
|
|
raise ValueError("无法获取 Target 名称")
|
|
|
|
# 参数验证
|
|
if scan_id is None:
|
|
raise ValueError("scan_id 不能为空")
|
|
if target_id is None:
|
|
raise ValueError("target_id 不能为空")
|
|
if not scan_workspace_dir:
|
|
raise ValueError("scan_workspace_dir 不能为空")
|
|
|
|
logger.info(
|
|
"开始指纹识别 - Scan ID: %s, Target: %s, Workspace: %s",
|
|
scan_id, target_name, scan_workspace_dir
|
|
)
|
|
user_log(scan_id, "fingerprint_detect", "Starting fingerprint detection")
|
|
|
|
# 创建上下文
|
|
ctx = FingerprintContext(
|
|
scan_id=scan_id,
|
|
target_id=target_id,
|
|
target_name=target_name,
|
|
scan_workspace_dir=scan_workspace_dir,
|
|
fingerprint_dir=setup_scan_directory(scan_workspace_dir, 'fingerprint_detect')
|
|
)
|
|
|
|
# Step 1: 导出 URL
|
|
ctx.urls_file, ctx.url_count = _export_urls(ctx.fingerprint_dir, provider)
|
|
|
|
if ctx.url_count == 0:
|
|
logger.warning("跳过指纹识别:没有 URL 可扫描 - Scan ID: %s", scan_id)
|
|
user_log(scan_id, "fingerprint_detect", "Skipped: no URLs to scan", "warning")
|
|
return _build_empty_result(scan_id, target_name, scan_workspace_dir, ctx.urls_file)
|
|
|
|
# Step 2: 工具配置信息
|
|
logger.info("Step 2: 工具配置信息")
|
|
logger.info("✓ 启用工具: %s", ', '.join(enabled_tools.keys()))
|
|
|
|
# Step 3: 执行指纹识别
|
|
logger.info("Step 3: 执行指纹识别")
|
|
tool_stats, failed_tools = _run_fingerprint_detect(enabled_tools, ctx)
|
|
|
|
# 汇总结果
|
|
totals = _aggregate_results(tool_stats)
|
|
failed_tool_names = {f['tool'] for f in failed_tools}
|
|
successful_tools = [name for name in enabled_tools if name not in failed_tool_names]
|
|
|
|
logger.info("✓ 指纹识别完成 - 识别指纹: %d", totals['updated_count'])
|
|
user_log(
|
|
scan_id, "fingerprint_detect",
|
|
f"fingerprint_detect completed: identified {totals['updated_count']} fingerprints"
|
|
)
|
|
|
|
executed_tasks = ['export_site_urls_for_fingerprint']
|
|
executed_tasks.extend([f'run_xingfinger ({tool})' for tool in tool_stats])
|
|
|
|
return {
|
|
'success': True,
|
|
'scan_id': scan_id,
|
|
'target': target_name,
|
|
'scan_workspace_dir': scan_workspace_dir,
|
|
'urls_file': ctx.urls_file,
|
|
'url_count': ctx.url_count,
|
|
**totals,
|
|
'executed_tasks': executed_tasks,
|
|
'tool_stats': {
|
|
'total': len(enabled_tools),
|
|
'successful': len(successful_tools),
|
|
'failed': len(failed_tools),
|
|
'successful_tools': successful_tools,
|
|
'failed_tools': failed_tools,
|
|
'details': tool_stats
|
|
}
|
|
}
|
|
|
|
except ValueError as e:
|
|
logger.error("配置错误: %s", e)
|
|
raise
|
|
except RuntimeError as e:
|
|
logger.error("运行时错误: %s", e)
|
|
raise
|
|
except Exception as e:
|
|
logger.exception("指纹识别失败: %s", e)
|
|
raise
|
|
|
|
|
|
def _build_empty_result(
|
|
scan_id: int,
|
|
target_name: str,
|
|
scan_workspace_dir: str,
|
|
urls_file: str
|
|
) -> dict:
|
|
"""构建空结果(无 URL 可扫描时)"""
|
|
return {
|
|
'success': True,
|
|
'scan_id': scan_id,
|
|
'target': target_name,
|
|
'scan_workspace_dir': scan_workspace_dir,
|
|
'urls_file': urls_file,
|
|
'url_count': 0,
|
|
'processed_records': 0,
|
|
'updated_count': 0,
|
|
'created_count': 0,
|
|
'snapshot_count': 0,
|
|
'executed_tasks': ['export_site_urls_for_fingerprint'],
|
|
'tool_stats': {
|
|
'total': 0,
|
|
'successful': 0,
|
|
'failed': 0,
|
|
'successful_tools': [],
|
|
'failed_tools': [],
|
|
'details': {}
|
|
}
|
|
}
|