Files
xingrin/backend/apps/scan/utils/performance.py
2025-12-13 18:40:36 +08:00

486 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
性能监控工具模块
提供 Flow 层的性能监控能力
功能:
1. Flow 性能监控 - 记录整体流程耗时、系统资源CPU/内存)
2. 定时采样 - 每 N 秒记录一次系统资源状态
使用方式:
# Flow 层(在 handlers 中使用)
from apps.scan.utils.performance import FlowPerformanceTracker
tracker = FlowPerformanceTracker(flow_name, scan_id)
tracker.start()
# ... 执行流程 ...
tracker.finish(success=True, result=result)
"""
import logging
import threading
import time
from dataclasses import dataclass
from typing import Optional
try:
import psutil
except ImportError:
psutil = None
# 性能日志使用专门的 logger
perf_logger = logging.getLogger('performance')
# 采样间隔(秒)
SAMPLE_INTERVAL = 30
def _get_system_stats() -> dict:
"""
获取当前系统资源状态
Returns:
dict: {'cpu_percent': float, 'memory_gb': float, 'memory_percent': float}
"""
if not psutil:
return {'cpu_percent': 0.0, 'memory_gb': 0.0, 'memory_percent': 0.0}
try:
cpu_percent = psutil.cpu_percent(interval=0.1)
memory = psutil.virtual_memory()
memory_gb = memory.used / (1024 ** 3)
memory_percent = memory.percent # psutil 直接提供内存使用百分比
return {
'cpu_percent': cpu_percent,
'memory_gb': memory_gb,
'memory_percent': memory_percent
}
except Exception:
return {'cpu_percent': 0.0, 'memory_gb': 0.0, 'memory_percent': 0.0}
@dataclass
class FlowPerformanceMetrics:
"""Flow 性能指标"""
flow_name: str
scan_id: int
target_id: Optional[int] = None
target_name: Optional[str] = None
# 时间指标
start_time: float = 0.0
end_time: float = 0.0
duration_seconds: float = 0.0
# 系统资源指标
cpu_start: float = 0.0
cpu_end: float = 0.0
cpu_peak: float = 0.0
memory_gb_start: float = 0.0
memory_gb_end: float = 0.0
memory_gb_peak: float = 0.0
memory_percent_start: float = 0.0
memory_percent_end: float = 0.0
memory_percent_peak: float = 0.0
# 执行结果
success: bool = False
error_message: Optional[str] = None
class FlowPerformanceTracker:
"""
Flow 性能追踪器
用于追踪 Prefect Flow 的执行性能,包括:
- 执行耗时
- 系统 CPU 和内存使用
- 定时采样(每 30 秒)
使用方式:
tracker = FlowPerformanceTracker("directory_scan", scan_id=1)
tracker.start(target_id=1, target_name="example.com")
# ... flow 执行 ...
tracker.finish(success=True, result={'created_count': 100})
"""
def __init__(self, flow_name: str, scan_id: int):
self.metrics = FlowPerformanceMetrics(
flow_name=flow_name,
scan_id=scan_id
)
self._sampler_thread: Optional[threading.Thread] = None
self._stop_event = threading.Event()
self._samples: list[dict] = []
def start(
self,
target_id: Optional[int] = None,
target_name: Optional[str] = None
) -> None:
"""开始追踪"""
self.metrics.start_time = time.time()
self.metrics.target_id = target_id
self.metrics.target_name = target_name
# 记录初始系统状态
stats = _get_system_stats()
self.metrics.cpu_start = stats['cpu_percent']
self.metrics.memory_gb_start = stats['memory_gb']
self.metrics.memory_percent_start = stats['memory_percent']
self.metrics.cpu_peak = stats['cpu_percent']
self.metrics.memory_gb_peak = stats['memory_gb']
self.metrics.memory_percent_peak = stats['memory_percent']
# 记录开始日志
perf_logger.info(
"📊 Flow 开始 - %s, scan_id=%d, 系统: CPU %.1f%%, 内存 %.1fGB(%.1f%%)",
self.metrics.flow_name,
self.metrics.scan_id,
stats['cpu_percent'],
stats['memory_gb'],
stats['memory_percent']
)
# 启动采样线程
self._stop_event.clear()
self._sampler_thread = threading.Thread(
target=self._sample_loop,
daemon=True,
name=f"perf-sampler-{self.metrics.flow_name}-{self.metrics.scan_id}"
)
self._sampler_thread.start()
def _sample_loop(self) -> None:
"""定时采样循环"""
elapsed = 0
while not self._stop_event.wait(timeout=SAMPLE_INTERVAL):
elapsed += SAMPLE_INTERVAL
stats = _get_system_stats()
# 更新峰值
if stats['cpu_percent'] > self.metrics.cpu_peak:
self.metrics.cpu_peak = stats['cpu_percent']
if stats['memory_gb'] > self.metrics.memory_gb_peak:
self.metrics.memory_gb_peak = stats['memory_gb']
if stats['memory_percent'] > self.metrics.memory_percent_peak:
self.metrics.memory_percent_peak = stats['memory_percent']
# 记录采样
self._samples.append({
'elapsed': elapsed,
'cpu': stats['cpu_percent'],
'memory_gb': stats['memory_gb'],
'memory_percent': stats['memory_percent']
})
# 输出采样日志
perf_logger.info(
"📊 Flow 执行中 - %s [%ds], 系统: CPU %.1f%%, 内存 %.1fGB(%.1f%%)",
self.metrics.flow_name,
elapsed,
stats['cpu_percent'],
stats['memory_gb'],
stats['memory_percent']
)
def finish(
self,
success: bool = True,
error_message: Optional[str] = None
) -> None:
"""
结束追踪并记录性能日志
Args:
success: 是否成功
error_message: 错误信息
"""
# 停止采样线程
self._stop_event.set()
if self._sampler_thread and self._sampler_thread.is_alive():
self._sampler_thread.join(timeout=1.0)
# 记录结束时间和状态
self.metrics.end_time = time.time()
self.metrics.duration_seconds = self.metrics.end_time - self.metrics.start_time
self.metrics.success = success
self.metrics.error_message = error_message
# 记录结束时的系统状态
stats = _get_system_stats()
self.metrics.cpu_end = stats['cpu_percent']
self.metrics.memory_gb_end = stats['memory_gb']
self.metrics.memory_percent_end = stats['memory_percent']
# 更新峰值(最后一次采样)
if stats['cpu_percent'] > self.metrics.cpu_peak:
self.metrics.cpu_peak = stats['cpu_percent']
if stats['memory_gb'] > self.metrics.memory_gb_peak:
self.metrics.memory_gb_peak = stats['memory_gb']
if stats['memory_percent'] > self.metrics.memory_percent_peak:
self.metrics.memory_percent_peak = stats['memory_percent']
# 记录结束日志
status = "" if success else ""
perf_logger.info(
"📊 Flow 结束 - %s %s, scan_id=%d, 耗时: %.1fs, "
"CPU: %.1f%%%.1f%%(峰值%.1f%%), 内存: %.1fGB(%.1f%%)→%.1fGB(%.1f%%)(峰值%.1fGB/%.1f%%)",
self.metrics.flow_name,
status,
self.metrics.scan_id,
self.metrics.duration_seconds,
self.metrics.cpu_start,
self.metrics.cpu_end,
self.metrics.cpu_peak,
self.metrics.memory_gb_start,
self.metrics.memory_percent_start,
self.metrics.memory_gb_end,
self.metrics.memory_percent_end,
self.metrics.memory_gb_peak,
self.metrics.memory_percent_peak
)
if not success and error_message:
perf_logger.warning(
"📊 Flow 失败原因 - %s: %s",
self.metrics.flow_name,
error_message
)
def _get_process_stats(pid: int) -> dict:
"""
获取指定进程及其子进程的资源使用(类似 htop 显示)
Args:
pid: 进程 ID
Returns:
dict: {
'cpu_percent': float, # 进程 CPU 使用率
'memory_mb': float, # 进程内存使用 (MB)
'memory_percent': float # 进程内存占比
}
"""
if not psutil:
return {'cpu_percent': 0.0, 'memory_mb': 0.0, 'memory_percent': 0.0}
try:
process = psutil.Process(pid)
# 获取进程及所有子进程
children = process.children(recursive=True)
all_processes = [process] + children
total_cpu = 0.0
total_memory = 0
for p in all_processes:
try:
# CPU 百分比计算:
# - interval=0 使用上次调用的缓存值(需要先调用过一次初始化)
# - 如果没有缓存值,会返回 0.0
# - 这避免了每次采样都等待 0.1 秒的问题
cpu_percent = p.cpu_percent()
total_cpu += cpu_percent
mem_info = p.memory_info()
total_memory += mem_info.rss # RSS: Resident Set Size
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
# 转换为 MB
memory_mb = total_memory / (1024 * 1024)
# 计算内存占比
total_mem = psutil.virtual_memory().total
memory_percent = (total_memory / total_mem) * 100 if total_mem > 0 else 0.0
return {
'cpu_percent': total_cpu,
'memory_mb': memory_mb,
'memory_percent': memory_percent
}
except (psutil.NoSuchProcess, psutil.AccessDenied):
return {'cpu_percent': 0.0, 'memory_mb': 0.0, 'memory_percent': 0.0}
except Exception:
return {'cpu_percent': 0.0, 'memory_mb': 0.0, 'memory_percent': 0.0}
class CommandPerformanceTracker:
"""
命令执行性能追踪器
用于追踪单个命令的执行性能,包括:
- 执行耗时
- 进程级 CPU 和内存使用(类似 htop
- 系统整体资源状态
使用方式:
tracker = CommandPerformanceTracker("ffuf", command="ffuf -u http://...")
tracker.start()
tracker.set_pid(process.pid) # 进程启动后设置 PID
# ... 执行命令 ...
tracker.finish(success=True, duration=45.2)
"""
def __init__(self, tool_name: str, command: str = ""):
self.tool_name = tool_name
self.command = command
self.start_time: float = 0.0
self.pid: Optional[int] = None
# 系统级资源
self.sys_cpu_start: float = 0.0
self.sys_memory_gb_start: float = 0.0
self.sys_memory_percent_start: float = 0.0
# 进程级资源峰值
self.proc_cpu_peak: float = 0.0
self.proc_memory_mb_peak: float = 0.0
self.proc_memory_percent_peak: float = 0.0
def start(self) -> None:
"""开始追踪,记录初始系统状态"""
self.start_time = time.time()
stats = _get_system_stats()
self.sys_cpu_start = stats['cpu_percent']
self.sys_memory_gb_start = stats['memory_gb']
self.sys_memory_percent_start = stats['memory_percent']
# 截断过长的命令
cmd_display = self.command[:200] + "..." if len(self.command) > 200 else self.command
perf_logger.info(
"📊 命令开始 - %s, 系统: CPU %.1f%%, 内存 %.1fGB(%.1f%%), 命令: %s",
self.tool_name,
self.sys_cpu_start,
self.sys_memory_gb_start,
self.sys_memory_percent_start,
cmd_display
)
def set_pid(self, pid: int) -> None:
"""
设置要追踪的进程 PID
Args:
pid: 进程 ID
"""
self.pid = pid
# 初始化 CPU 采样psutil 需要先调用一次)
# CPU 百分比计算需要两次调用之间的时间间隔,第一次调用是初始化
if psutil and pid:
try:
process = psutil.Process(pid)
# 第一次调用初始化 CPU 计算基准
process.cpu_percent()
# 为所有子进程也初始化 CPU 计算
for child in process.children(recursive=True):
try:
child.cpu_percent()
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
def sample(self) -> dict:
"""
采样当前进程资源使用(可选,用于长时间运行的命令)
Returns:
dict: 进程资源使用情况
"""
if not self.pid:
return {'cpu_percent': 0.0, 'memory_mb': 0.0, 'memory_percent': 0.0}
stats = _get_process_stats(self.pid)
# 更新峰值
if stats['cpu_percent'] > self.proc_cpu_peak:
self.proc_cpu_peak = stats['cpu_percent']
if stats['memory_mb'] > self.proc_memory_mb_peak:
self.proc_memory_mb_peak = stats['memory_mb']
if stats['memory_percent'] > self.proc_memory_percent_peak:
self.proc_memory_percent_peak = stats['memory_percent']
return stats
def finish(
self,
success: bool = True,
duration: Optional[float] = None,
timeout: Optional[int] = None,
is_timeout: bool = False
) -> None:
"""
结束追踪并记录性能日志
Args:
success: 是否成功
duration: 执行耗时(秒),如果不传则自动计算
timeout: 超时配置(秒)
is_timeout: 是否超时
"""
# 计算耗时
if duration is None:
duration = time.time() - self.start_time
# 获取结束时的系统状态
sys_stats = _get_system_stats()
# 获取进程最终资源使用(如果进程还在)
proc_stats = {'cpu_percent': 0.0, 'memory_mb': 0.0, 'memory_percent': 0.0}
if self.pid:
proc_stats = _get_process_stats(self.pid)
# 更新峰值
if proc_stats['cpu_percent'] > self.proc_cpu_peak:
self.proc_cpu_peak = proc_stats['cpu_percent']
if proc_stats['memory_mb'] > self.proc_memory_mb_peak:
self.proc_memory_mb_peak = proc_stats['memory_mb']
if proc_stats['memory_percent'] > self.proc_memory_percent_peak:
self.proc_memory_percent_peak = proc_stats['memory_percent']
status = "" if success else ("⏱ 超时" if is_timeout else "")
# 截断过长的命令
cmd_display = self.command[:200] + "..." if len(self.command) > 200 else self.command
# 日志格式:进程资源 + 系统资源
if self.pid and (self.proc_cpu_peak > 0 or self.proc_memory_mb_peak > 0):
perf_logger.info(
"📊 命令结束 - %s %s, 耗时: %.2fs%s, "
"进程: CPU %.1f%%(峰值), 内存 %.1fMB(%.1f%%峰值), "
"系统: CPU %.1f%%%.1f%%, 内存 %.1fGB(%.1f%%)→%.1fGB(%.1f%%), "
"命令: %s",
self.tool_name,
status,
duration,
f", 超时配置: {timeout}s" if timeout else "",
self.proc_cpu_peak,
self.proc_memory_mb_peak,
self.proc_memory_percent_peak,
self.sys_cpu_start,
sys_stats['cpu_percent'],
self.sys_memory_gb_start,
self.sys_memory_percent_start,
sys_stats['memory_gb'],
sys_stats['memory_percent'],
cmd_display
)
else:
# 没有进程级数据,只显示系统级
perf_logger.info(
"📊 命令结束 - %s %s, 耗时: %.2fs%s, "
"系统: CPU %.1f%%%.1f%%, 内存 %.1fGB(%.1f%%)→%.1fGB(%.1f%%), "
"命令: %s",
self.tool_name,
status,
duration,
f", 超时配置: {timeout}s" if timeout else "",
self.sys_cpu_start,
sys_stats['cpu_percent'],
self.sys_memory_gb_start,
self.sys_memory_percent_start,
sys_stats['memory_gb'],
sys_stats['memory_percent'],
cmd_display
)