mirror of
https://github.com/yyhuni/xingrin.git
synced 2026-02-03 05:03:11 +08:00
233 lines
6.8 KiB
Python
233 lines
6.8 KiB
Python
|
|
"""
|
|||
|
|
URL Fetch 共享工具函数
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import logging
|
|||
|
|
import subprocess
|
|||
|
|
import uuid
|
|||
|
|
from datetime import datetime
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
from apps.scan.utils import build_scan_command
|
|||
|
|
|
|||
|
|
logger = logging.getLogger(__name__)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def calculate_timeout_by_line_count(
|
|||
|
|
tool_config: dict,
|
|||
|
|
file_path: str,
|
|||
|
|
base_per_time: int = 1,
|
|||
|
|
) -> int:
|
|||
|
|
"""
|
|||
|
|
根据文件行数自动计算超时时间
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
tool_config: 工具配置(保留参数,未来可能用于更复杂的计算)
|
|||
|
|
file_path: 输入文件路径
|
|||
|
|
base_per_time: 每行的基础时间(秒)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
int: 计算出的超时时间(秒)
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
result = subprocess.run(
|
|||
|
|
['wc', '-l', file_path],
|
|||
|
|
capture_output=True,
|
|||
|
|
text=True,
|
|||
|
|
check=True,
|
|||
|
|
)
|
|||
|
|
line_count = int(result.stdout.strip().split()[0])
|
|||
|
|
timeout = line_count * base_per_time
|
|||
|
|
logger.info(
|
|||
|
|
"timeout 自动计算: 文件=%s, 行数=%d, 每行时间=%d秒, timeout=%d秒",
|
|||
|
|
file_path,
|
|||
|
|
line_count,
|
|||
|
|
base_per_time,
|
|||
|
|
timeout,
|
|||
|
|
)
|
|||
|
|
return timeout
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.warning("wc -l 计算行数失败: %s,将使用默认 timeout: 600秒", e)
|
|||
|
|
return 600
|
|||
|
|
|
|||
|
|
|
|||
|
|
def prepare_tool_execution(
|
|||
|
|
tool_name: str,
|
|||
|
|
tool_config: dict,
|
|||
|
|
input_file: str,
|
|||
|
|
input_type: str,
|
|||
|
|
output_dir: Path,
|
|||
|
|
scan_type: str = "url_fetch"
|
|||
|
|
) -> dict:
|
|||
|
|
"""
|
|||
|
|
准备单个工具的执行参数
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
tool_name: 工具名称
|
|||
|
|
tool_config: 工具配置
|
|||
|
|
input_file: 输入文件路径
|
|||
|
|
input_type: 输入类型(domains_file 或 sites_file)
|
|||
|
|
output_dir: 输出目录
|
|||
|
|
scan_type: 扫描类型
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
dict: 执行参数,包含 command, input_file, output_file, timeout
|
|||
|
|
或包含 error 键表示失败
|
|||
|
|
"""
|
|||
|
|
# 1. 统计输入文件行数
|
|||
|
|
try:
|
|||
|
|
with open(input_file, 'r') as f:
|
|||
|
|
input_count = sum(1 for _ in f)
|
|||
|
|
logger.info("工具 %s - 输入类型: %s, 数量: %d", tool_name, input_type, input_count)
|
|||
|
|
except Exception as e:
|
|||
|
|
return {"error": f"读取输入文件失败: {e}"}
|
|||
|
|
|
|||
|
|
# 2. 生成输出文件路径(带时间戳和短 UUID 后缀)
|
|||
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|||
|
|
short_uuid = uuid.uuid4().hex[:4]
|
|||
|
|
output_file = str(output_dir / f"{tool_name}_{timestamp}_{short_uuid}.txt")
|
|||
|
|
|
|||
|
|
# 3. 构建命令
|
|||
|
|
command_params = {
|
|||
|
|
input_type: input_file,
|
|||
|
|
"output_file": output_file,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
command = build_scan_command(
|
|||
|
|
tool_name=tool_name,
|
|||
|
|
scan_type=scan_type,
|
|||
|
|
command_params=command_params,
|
|||
|
|
tool_config=tool_config,
|
|||
|
|
)
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error("构建 %s 命令失败: %s", tool_name, e)
|
|||
|
|
return {"error": f"命令构建失败: {e}"}
|
|||
|
|
|
|||
|
|
# 4. 计算超时时间(支持 auto 和显式整数)
|
|||
|
|
raw_timeout = tool_config.get("timeout", 3600)
|
|||
|
|
timeout = 3600
|
|||
|
|
|
|||
|
|
if isinstance(raw_timeout, str) and raw_timeout == "auto":
|
|||
|
|
try:
|
|||
|
|
# katana / waymore 每个站点需要更长时间
|
|||
|
|
base_per_time = 360 if tool_name in ("katana", "waymore") else 1
|
|||
|
|
timeout = calculate_timeout_by_line_count(
|
|||
|
|
tool_config=tool_config,
|
|||
|
|
file_path=input_file,
|
|||
|
|
base_per_time=base_per_time,
|
|||
|
|
)
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.warning(
|
|||
|
|
"工具 %s 自动计算 timeout 失败,将使用默认 3600 秒: %s",
|
|||
|
|
tool_name,
|
|||
|
|
e,
|
|||
|
|
)
|
|||
|
|
timeout = 3600
|
|||
|
|
else:
|
|||
|
|
try:
|
|||
|
|
timeout = int(raw_timeout)
|
|||
|
|
except (TypeError, ValueError):
|
|||
|
|
logger.warning(
|
|||
|
|
"工具 %s 的 timeout 配置无效(%s),将使用默认 3600 秒",
|
|||
|
|
tool_name,
|
|||
|
|
raw_timeout,
|
|||
|
|
)
|
|||
|
|
timeout = 3600
|
|||
|
|
|
|||
|
|
# 5. 返回执行参数
|
|||
|
|
return {
|
|||
|
|
"command": command,
|
|||
|
|
"input_file": input_file,
|
|||
|
|
"input_type": input_type,
|
|||
|
|
"output_file": output_file,
|
|||
|
|
"timeout": timeout,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def run_tools_parallel(
|
|||
|
|
tools: dict,
|
|||
|
|
input_file: str,
|
|||
|
|
input_type: str,
|
|||
|
|
output_dir: Path
|
|||
|
|
) -> tuple[list, list, list]:
|
|||
|
|
"""
|
|||
|
|
并行执行工具列表
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
tools: 工具配置字典 {tool_name: tool_config}
|
|||
|
|
input_file: 输入文件路径
|
|||
|
|
input_type: 输入类型
|
|||
|
|
output_dir: 输出目录
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
tuple: (result_files, failed_tools, successful_tool_names)
|
|||
|
|
"""
|
|||
|
|
from apps.scan.tasks.url_fetch import run_url_fetcher_task
|
|||
|
|
|
|||
|
|
futures: dict[str, object] = {}
|
|||
|
|
failed_tools: list[dict] = []
|
|||
|
|
|
|||
|
|
# 提交所有工具的并行任务
|
|||
|
|
for tool_name, tool_config in tools.items():
|
|||
|
|
exec_params = prepare_tool_execution(
|
|||
|
|
tool_name=tool_name,
|
|||
|
|
tool_config=tool_config,
|
|||
|
|
input_file=input_file,
|
|||
|
|
input_type=input_type,
|
|||
|
|
output_dir=output_dir,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if "error" in exec_params:
|
|||
|
|
failed_tools.append({"tool": tool_name, "reason": exec_params["error"]})
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
logger.info(
|
|||
|
|
"提交任务 - 工具: %s, 输入: %s, 超时: %d秒",
|
|||
|
|
tool_name,
|
|||
|
|
input_type,
|
|||
|
|
exec_params["timeout"],
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 提交并行任务
|
|||
|
|
future = run_url_fetcher_task.submit(
|
|||
|
|
tool_name=tool_name,
|
|||
|
|
command=exec_params["command"],
|
|||
|
|
timeout=exec_params["timeout"],
|
|||
|
|
output_file=exec_params["output_file"],
|
|||
|
|
)
|
|||
|
|
futures[tool_name] = future
|
|||
|
|
|
|||
|
|
# 收集执行结果
|
|||
|
|
result_files = []
|
|||
|
|
for tool_name, future in futures.items():
|
|||
|
|
try:
|
|||
|
|
result = future.result()
|
|||
|
|
if result and result['success']:
|
|||
|
|
result_files.append(result['output_file'])
|
|||
|
|
logger.info(
|
|||
|
|
"✓ 工具 %s 执行成功 - 发现 URL: %d",
|
|||
|
|
tool_name, result['url_count']
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
failed_tools.append({
|
|||
|
|
'tool': tool_name,
|
|||
|
|
'reason': '未生成结果或无有效URL'
|
|||
|
|
})
|
|||
|
|
logger.warning("⚠️ 工具 %s 未生成有效结果", tool_name)
|
|||
|
|
except Exception as e:
|
|||
|
|
failed_tools.append({
|
|||
|
|
'tool': tool_name,
|
|||
|
|
'reason': str(e)
|
|||
|
|
})
|
|||
|
|
logger.warning("⚠️ 工具 %s 执行失败: %s", tool_name, e)
|
|||
|
|
|
|||
|
|
# 计算成功的工具列表
|
|||
|
|
failed_tool_names = [f['tool'] for f in failed_tools]
|
|||
|
|
successful_tool_names = [
|
|||
|
|
name for name in tools.keys()
|
|||
|
|
if name not in failed_tool_names
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
return result_files, failed_tools, successful_tool_names
|