mirror of
https://github.com/yyhuni/xingrin.git
synced 2026-01-31 19:53:11 +08:00
- Add user_logger utility for structured scan operation logging - Create scan log views and API endpoints for retrieving scan execution logs - Add scan-log-list component and use-scan-logs hook for frontend log display - Refactor asset search views to remove ArrayField support from pg_ivm IMMV - Update search_service.py to JOIN original tables for array field retrieval - Add system architecture requirements (AMD64/ARM64) to README - Update scan flow handlers to integrate logging system - Enhance scan progress dialog with log viewer integration - Add ANSI log viewer component for formatted log display - Update scan service API to support log retrieval endpoints - Migrate database schema to support new logging infrastructure - Add internationalization strings for scan logs (en/zh) This change improves observability of scan operations and resolves pg_ivm limitations with ArrayField types by fetching array data from original tables via JOIN operations.
248 lines
7.6 KiB
Python
248 lines
7.6 KiB
Python
"""
|
||
URL Fetch 共享工具函数
|
||
"""
|
||
|
||
import logging
|
||
import subprocess
|
||
import uuid
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
from apps.scan.utils import build_scan_command
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def calculate_timeout_by_line_count(
|
||
tool_config: dict,
|
||
file_path: str,
|
||
base_per_time: int = 1,
|
||
min_timeout: int = 60,
|
||
) -> int:
|
||
"""
|
||
根据文件行数自动计算超时时间
|
||
|
||
Args:
|
||
tool_config: 工具配置(保留参数,未来可能用于更复杂的计算)
|
||
file_path: 输入文件路径
|
||
base_per_time: 每行的基础时间(秒)
|
||
min_timeout: 最小超时时间(秒),默认60秒
|
||
|
||
Returns:
|
||
int: 计算出的超时时间(秒),不低于 min_timeout
|
||
"""
|
||
try:
|
||
result = subprocess.run(
|
||
['wc', '-l', file_path],
|
||
capture_output=True,
|
||
text=True,
|
||
check=True,
|
||
)
|
||
line_count = int(result.stdout.strip().split()[0])
|
||
timeout = max(line_count * base_per_time, min_timeout)
|
||
logger.info(
|
||
"timeout 自动计算: 文件=%s, 行数=%d, 每行时间=%d秒, 最小值=%d秒, timeout=%d秒",
|
||
file_path,
|
||
line_count,
|
||
base_per_time,
|
||
min_timeout,
|
||
timeout,
|
||
)
|
||
return timeout
|
||
except Exception as e:
|
||
logger.warning("wc -l 计算行数失败: %s,将使用默认 timeout: %d秒", e, min_timeout)
|
||
return min_timeout
|
||
|
||
|
||
def prepare_tool_execution(
|
||
tool_name: str,
|
||
tool_config: dict,
|
||
input_file: str,
|
||
input_type: str,
|
||
output_dir: Path,
|
||
scan_type: str = "url_fetch"
|
||
) -> dict:
|
||
"""
|
||
准备单个工具的执行参数
|
||
|
||
Args:
|
||
tool_name: 工具名称
|
||
tool_config: 工具配置
|
||
input_file: 输入文件路径
|
||
input_type: 输入类型(domains_file 或 sites_file)
|
||
output_dir: 输出目录
|
||
scan_type: 扫描类型
|
||
|
||
Returns:
|
||
dict: 执行参数,包含 command, input_file, output_file, timeout
|
||
或包含 error 键表示失败
|
||
"""
|
||
# 1. 统计输入文件行数
|
||
try:
|
||
with open(input_file, 'r') as f:
|
||
input_count = sum(1 for _ in f)
|
||
logger.info("工具 %s - 输入类型: %s, 数量: %d", tool_name, input_type, input_count)
|
||
except Exception as e:
|
||
return {"error": f"读取输入文件失败: {e}"}
|
||
|
||
# 2. 生成输出文件路径(带时间戳和短 UUID 后缀)
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
short_uuid = uuid.uuid4().hex[:4]
|
||
output_file = str(output_dir / f"{tool_name}_{timestamp}_{short_uuid}.txt")
|
||
|
||
# 3. 构建命令
|
||
command_params = {
|
||
input_type: input_file,
|
||
"output_file": output_file,
|
||
}
|
||
|
||
try:
|
||
command = build_scan_command(
|
||
tool_name=tool_name,
|
||
scan_type=scan_type,
|
||
command_params=command_params,
|
||
tool_config=tool_config,
|
||
)
|
||
except Exception as e:
|
||
logger.error("构建 %s 命令失败: %s", tool_name, e)
|
||
return {"error": f"命令构建失败: {e}"}
|
||
|
||
# 4. 计算超时时间(支持 auto 和显式整数)
|
||
raw_timeout = tool_config.get("timeout", 3600)
|
||
timeout = 3600
|
||
|
||
if isinstance(raw_timeout, str) and raw_timeout == "auto":
|
||
try:
|
||
# katana / waymore 每个站点需要更长时间
|
||
base_per_time = 360 if tool_name in ("katana", "waymore") else 1
|
||
timeout = calculate_timeout_by_line_count(
|
||
tool_config=tool_config,
|
||
file_path=input_file,
|
||
base_per_time=base_per_time,
|
||
)
|
||
except Exception as e:
|
||
logger.warning(
|
||
"工具 %s 自动计算 timeout 失败,将使用默认 3600 秒: %s",
|
||
tool_name,
|
||
e,
|
||
)
|
||
timeout = 3600
|
||
else:
|
||
try:
|
||
timeout = int(raw_timeout)
|
||
except (TypeError, ValueError):
|
||
logger.warning(
|
||
"工具 %s 的 timeout 配置无效(%s),将使用默认 3600 秒",
|
||
tool_name,
|
||
raw_timeout,
|
||
)
|
||
timeout = 3600
|
||
|
||
# 5. 返回执行参数
|
||
return {
|
||
"command": command,
|
||
"input_file": input_file,
|
||
"input_type": input_type,
|
||
"output_file": output_file,
|
||
"timeout": timeout,
|
||
}
|
||
|
||
|
||
def run_tools_parallel(
|
||
tools: dict,
|
||
input_file: str,
|
||
input_type: str,
|
||
output_dir: Path,
|
||
scan_id: int
|
||
) -> tuple[list, list, list]:
|
||
"""
|
||
并行执行工具列表
|
||
|
||
Args:
|
||
tools: 工具配置字典 {tool_name: tool_config}
|
||
input_file: 输入文件路径
|
||
input_type: 输入类型
|
||
output_dir: 输出目录
|
||
scan_id: 扫描任务 ID(用于记录日志)
|
||
|
||
Returns:
|
||
tuple: (result_files, failed_tools, successful_tool_names)
|
||
"""
|
||
from apps.scan.tasks.url_fetch import run_url_fetcher_task
|
||
from apps.scan.utils import user_log
|
||
|
||
futures: dict[str, object] = {}
|
||
failed_tools: list[dict] = []
|
||
|
||
# 提交所有工具的并行任务
|
||
for tool_name, tool_config in tools.items():
|
||
exec_params = prepare_tool_execution(
|
||
tool_name=tool_name,
|
||
tool_config=tool_config,
|
||
input_file=input_file,
|
||
input_type=input_type,
|
||
output_dir=output_dir,
|
||
)
|
||
|
||
if "error" in exec_params:
|
||
failed_tools.append({"tool": tool_name, "reason": exec_params["error"]})
|
||
continue
|
||
|
||
logger.info(
|
||
"提交任务 - 工具: %s, 输入: %s, 超时: %d秒",
|
||
tool_name,
|
||
input_type,
|
||
exec_params["timeout"],
|
||
)
|
||
|
||
# 记录工具开始执行日志
|
||
user_log(scan_id, "url_fetch", f"Running {tool_name}: {exec_params['command']}")
|
||
|
||
# 提交并行任务
|
||
future = run_url_fetcher_task.submit(
|
||
tool_name=tool_name,
|
||
command=exec_params["command"],
|
||
timeout=exec_params["timeout"],
|
||
output_file=exec_params["output_file"],
|
||
)
|
||
futures[tool_name] = future
|
||
|
||
# 收集执行结果
|
||
result_files = []
|
||
for tool_name, future in futures.items():
|
||
try:
|
||
result = future.result()
|
||
if result and result['success']:
|
||
result_files.append(result['output_file'])
|
||
url_count = result['url_count']
|
||
logger.info(
|
||
"✓ 工具 %s 执行成功 - 发现 URL: %d",
|
||
tool_name, url_count
|
||
)
|
||
user_log(scan_id, "url_fetch", f"{tool_name} completed: found {url_count} urls")
|
||
else:
|
||
reason = '未生成结果或无有效URL'
|
||
failed_tools.append({
|
||
'tool': tool_name,
|
||
'reason': reason
|
||
})
|
||
logger.warning("⚠️ 工具 %s 未生成有效结果", tool_name)
|
||
user_log(scan_id, "url_fetch", f"{tool_name} failed: {reason}", "error")
|
||
except Exception as e:
|
||
reason = str(e)
|
||
failed_tools.append({
|
||
'tool': tool_name,
|
||
'reason': reason
|
||
})
|
||
logger.warning("⚠️ 工具 %s 执行失败: %s", tool_name, e)
|
||
user_log(scan_id, "url_fetch", f"{tool_name} failed: {reason}", "error")
|
||
|
||
# 计算成功的工具列表
|
||
failed_tool_names = [f['tool'] for f in failed_tools]
|
||
successful_tool_names = [
|
||
name for name in tools.keys()
|
||
if name not in failed_tool_names
|
||
]
|
||
|
||
return result_files, failed_tools, successful_tool_names
|