Files
xingrin/backend/apps/scan/utils/performance.py

486 lines
17 KiB
Python
Raw Normal View History

2025-12-13 09:41:37 +08:00
"""
性能监控工具模块
提供 Flow 层的性能监控能力
功能
1. Flow 性能监控 - 记录整体流程耗时系统资源CPU/内存
2. 定时采样 - N 秒记录一次系统资源状态
使用方式
# Flow 层(在 handlers 中使用)
from apps.scan.utils.performance import FlowPerformanceTracker
tracker = FlowPerformanceTracker(flow_name, scan_id)
tracker.start()
# ... 执行流程 ...
tracker.finish(success=True, result=result)
"""
import logging
import threading
import time
2025-12-13 18:40:36 +08:00
from dataclasses import dataclass
2025-12-13 09:41:37 +08:00
from typing import Optional
try:
import psutil
except ImportError:
psutil = None
# 性能日志使用专门的 logger
perf_logger = logging.getLogger('performance')
# 采样间隔(秒)
SAMPLE_INTERVAL = 30
def _get_system_stats() -> dict:
"""
获取当前系统资源状态
Returns:
2025-12-13 18:40:36 +08:00
dict: {'cpu_percent': float, 'memory_gb': float, 'memory_percent': float}
2025-12-13 09:41:37 +08:00
"""
if not psutil:
2025-12-13 18:40:36 +08:00
return {'cpu_percent': 0.0, 'memory_gb': 0.0, 'memory_percent': 0.0}
2025-12-13 09:41:37 +08:00
try:
cpu_percent = psutil.cpu_percent(interval=0.1)
memory = psutil.virtual_memory()
memory_gb = memory.used / (1024 ** 3)
2025-12-13 18:40:36 +08:00
memory_percent = memory.percent # psutil 直接提供内存使用百分比
2025-12-13 09:41:37 +08:00
return {
'cpu_percent': cpu_percent,
2025-12-13 18:40:36 +08:00
'memory_gb': memory_gb,
'memory_percent': memory_percent
2025-12-13 09:41:37 +08:00
}
except Exception:
2025-12-13 18:40:36 +08:00
return {'cpu_percent': 0.0, 'memory_gb': 0.0, 'memory_percent': 0.0}
2025-12-13 09:41:37 +08:00
@dataclass
class FlowPerformanceMetrics:
"""Flow 性能指标"""
flow_name: str
scan_id: int
target_id: Optional[int] = None
target_name: Optional[str] = None
# 时间指标
start_time: float = 0.0
end_time: float = 0.0
duration_seconds: float = 0.0
# 系统资源指标
cpu_start: float = 0.0
cpu_end: float = 0.0
cpu_peak: float = 0.0
memory_gb_start: float = 0.0
memory_gb_end: float = 0.0
memory_gb_peak: float = 0.0
2025-12-13 18:40:36 +08:00
memory_percent_start: float = 0.0
memory_percent_end: float = 0.0
memory_percent_peak: float = 0.0
2025-12-13 09:41:37 +08:00
# 执行结果
success: bool = False
error_message: Optional[str] = None
class FlowPerformanceTracker:
"""
Flow 性能追踪器
用于追踪 Prefect Flow 的执行性能包括
- 执行耗时
- 系统 CPU 和内存使用
- 定时采样 30
使用方式
tracker = FlowPerformanceTracker("directory_scan", scan_id=1)
tracker.start(target_id=1, target_name="example.com")
# ... flow 执行 ...
tracker.finish(success=True, result={'created_count': 100})
"""
def __init__(self, flow_name: str, scan_id: int):
self.metrics = FlowPerformanceMetrics(
flow_name=flow_name,
scan_id=scan_id
)
self._sampler_thread: Optional[threading.Thread] = None
self._stop_event = threading.Event()
self._samples: list[dict] = []
def start(
self,
target_id: Optional[int] = None,
target_name: Optional[str] = None
) -> None:
"""开始追踪"""
self.metrics.start_time = time.time()
self.metrics.target_id = target_id
self.metrics.target_name = target_name
# 记录初始系统状态
stats = _get_system_stats()
self.metrics.cpu_start = stats['cpu_percent']
self.metrics.memory_gb_start = stats['memory_gb']
2025-12-13 18:40:36 +08:00
self.metrics.memory_percent_start = stats['memory_percent']
2025-12-13 09:41:37 +08:00
self.metrics.cpu_peak = stats['cpu_percent']
self.metrics.memory_gb_peak = stats['memory_gb']
2025-12-13 18:40:36 +08:00
self.metrics.memory_percent_peak = stats['memory_percent']
2025-12-13 09:41:37 +08:00
# 记录开始日志
perf_logger.info(
2025-12-13 18:40:36 +08:00
"📊 Flow 开始 - %s, scan_id=%d, 系统: CPU %.1f%%, 内存 %.1fGB(%.1f%%)",
2025-12-13 09:41:37 +08:00
self.metrics.flow_name,
self.metrics.scan_id,
stats['cpu_percent'],
2025-12-13 18:40:36 +08:00
stats['memory_gb'],
stats['memory_percent']
2025-12-13 09:41:37 +08:00
)
# 启动采样线程
self._stop_event.clear()
self._sampler_thread = threading.Thread(
target=self._sample_loop,
daemon=True,
name=f"perf-sampler-{self.metrics.flow_name}-{self.metrics.scan_id}"
)
self._sampler_thread.start()
def _sample_loop(self) -> None:
"""定时采样循环"""
elapsed = 0
while not self._stop_event.wait(timeout=SAMPLE_INTERVAL):
elapsed += SAMPLE_INTERVAL
stats = _get_system_stats()
# 更新峰值
if stats['cpu_percent'] > self.metrics.cpu_peak:
self.metrics.cpu_peak = stats['cpu_percent']
if stats['memory_gb'] > self.metrics.memory_gb_peak:
self.metrics.memory_gb_peak = stats['memory_gb']
2025-12-13 18:40:36 +08:00
if stats['memory_percent'] > self.metrics.memory_percent_peak:
self.metrics.memory_percent_peak = stats['memory_percent']
2025-12-13 09:41:37 +08:00
# 记录采样
self._samples.append({
'elapsed': elapsed,
'cpu': stats['cpu_percent'],
2025-12-13 18:40:36 +08:00
'memory_gb': stats['memory_gb'],
'memory_percent': stats['memory_percent']
2025-12-13 09:41:37 +08:00
})
# 输出采样日志
perf_logger.info(
2025-12-13 18:40:36 +08:00
"📊 Flow 执行中 - %s [%ds], 系统: CPU %.1f%%, 内存 %.1fGB(%.1f%%)",
2025-12-13 09:41:37 +08:00
self.metrics.flow_name,
elapsed,
stats['cpu_percent'],
2025-12-13 18:40:36 +08:00
stats['memory_gb'],
stats['memory_percent']
2025-12-13 09:41:37 +08:00
)
def finish(
self,
success: bool = True,
error_message: Optional[str] = None
) -> None:
"""
结束追踪并记录性能日志
Args:
success: 是否成功
error_message: 错误信息
"""
# 停止采样线程
self._stop_event.set()
if self._sampler_thread and self._sampler_thread.is_alive():
self._sampler_thread.join(timeout=1.0)
# 记录结束时间和状态
self.metrics.end_time = time.time()
self.metrics.duration_seconds = self.metrics.end_time - self.metrics.start_time
self.metrics.success = success
self.metrics.error_message = error_message
# 记录结束时的系统状态
stats = _get_system_stats()
self.metrics.cpu_end = stats['cpu_percent']
self.metrics.memory_gb_end = stats['memory_gb']
2025-12-13 18:40:36 +08:00
self.metrics.memory_percent_end = stats['memory_percent']
2025-12-13 09:41:37 +08:00
# 更新峰值(最后一次采样)
if stats['cpu_percent'] > self.metrics.cpu_peak:
self.metrics.cpu_peak = stats['cpu_percent']
if stats['memory_gb'] > self.metrics.memory_gb_peak:
self.metrics.memory_gb_peak = stats['memory_gb']
2025-12-13 18:40:36 +08:00
if stats['memory_percent'] > self.metrics.memory_percent_peak:
self.metrics.memory_percent_peak = stats['memory_percent']
2025-12-13 09:41:37 +08:00
# 记录结束日志
status = "" if success else ""
perf_logger.info(
"📊 Flow 结束 - %s %s, scan_id=%d, 耗时: %.1fs, "
2025-12-13 18:40:36 +08:00
"CPU: %.1f%%%.1f%%(峰值%.1f%%), 内存: %.1fGB(%.1f%%)→%.1fGB(%.1f%%)(峰值%.1fGB/%.1f%%)",
2025-12-13 09:41:37 +08:00
self.metrics.flow_name,
status,
self.metrics.scan_id,
self.metrics.duration_seconds,
self.metrics.cpu_start,
self.metrics.cpu_end,
self.metrics.cpu_peak,
self.metrics.memory_gb_start,
2025-12-13 18:40:36 +08:00
self.metrics.memory_percent_start,
2025-12-13 09:41:37 +08:00
self.metrics.memory_gb_end,
2025-12-13 18:40:36 +08:00
self.metrics.memory_percent_end,
self.metrics.memory_gb_peak,
self.metrics.memory_percent_peak
2025-12-13 09:41:37 +08:00
)
if not success and error_message:
perf_logger.warning(
"📊 Flow 失败原因 - %s: %s",
self.metrics.flow_name,
error_message
)
2025-12-13 10:01:21 +08:00
def _get_process_stats(pid: int) -> dict:
"""
获取指定进程及其子进程的资源使用类似 htop 显示
Args:
pid: 进程 ID
Returns:
dict: {
'cpu_percent': float, # 进程 CPU 使用率
'memory_mb': float, # 进程内存使用 (MB)
'memory_percent': float # 进程内存占比
}
"""
if not psutil:
return {'cpu_percent': 0.0, 'memory_mb': 0.0, 'memory_percent': 0.0}
try:
process = psutil.Process(pid)
# 获取进程及所有子进程
children = process.children(recursive=True)
all_processes = [process] + children
total_cpu = 0.0
total_memory = 0
for p in all_processes:
try:
2025-12-13 18:40:36 +08:00
# CPU 百分比计算:
# - interval=0 使用上次调用的缓存值(需要先调用过一次初始化)
# - 如果没有缓存值,会返回 0.0
# - 这避免了每次采样都等待 0.1 秒的问题
cpu_percent = p.cpu_percent()
total_cpu += cpu_percent
2025-12-13 10:01:21 +08:00
mem_info = p.memory_info()
total_memory += mem_info.rss # RSS: Resident Set Size
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
# 转换为 MB
memory_mb = total_memory / (1024 * 1024)
# 计算内存占比
total_mem = psutil.virtual_memory().total
memory_percent = (total_memory / total_mem) * 100 if total_mem > 0 else 0.0
return {
'cpu_percent': total_cpu,
'memory_mb': memory_mb,
'memory_percent': memory_percent
}
except (psutil.NoSuchProcess, psutil.AccessDenied):
return {'cpu_percent': 0.0, 'memory_mb': 0.0, 'memory_percent': 0.0}
except Exception:
return {'cpu_percent': 0.0, 'memory_mb': 0.0, 'memory_percent': 0.0}
2025-12-13 09:41:37 +08:00
class CommandPerformanceTracker:
"""
命令执行性能追踪器
用于追踪单个命令的执行性能包括
- 执行耗时
2025-12-13 10:01:21 +08:00
- 进程级 CPU 和内存使用类似 htop
- 系统整体资源状态
2025-12-13 09:41:37 +08:00
使用方式
tracker = CommandPerformanceTracker("ffuf", command="ffuf -u http://...")
tracker.start()
2025-12-13 10:01:21 +08:00
tracker.set_pid(process.pid) # 进程启动后设置 PID
2025-12-13 09:41:37 +08:00
# ... 执行命令 ...
tracker.finish(success=True, duration=45.2)
"""
def __init__(self, tool_name: str, command: str = ""):
self.tool_name = tool_name
self.command = command
self.start_time: float = 0.0
2025-12-13 10:01:21 +08:00
self.pid: Optional[int] = None
# 系统级资源
self.sys_cpu_start: float = 0.0
self.sys_memory_gb_start: float = 0.0
2025-12-13 18:40:36 +08:00
self.sys_memory_percent_start: float = 0.0
2025-12-13 10:01:21 +08:00
# 进程级资源峰值
self.proc_cpu_peak: float = 0.0
self.proc_memory_mb_peak: float = 0.0
2025-12-13 18:40:36 +08:00
self.proc_memory_percent_peak: float = 0.0
2025-12-13 09:41:37 +08:00
def start(self) -> None:
"""开始追踪,记录初始系统状态"""
self.start_time = time.time()
stats = _get_system_stats()
2025-12-13 10:01:21 +08:00
self.sys_cpu_start = stats['cpu_percent']
self.sys_memory_gb_start = stats['memory_gb']
2025-12-13 18:40:36 +08:00
self.sys_memory_percent_start = stats['memory_percent']
2025-12-13 09:41:37 +08:00
# 截断过长的命令
cmd_display = self.command[:200] + "..." if len(self.command) > 200 else self.command
perf_logger.info(
2025-12-13 18:40:36 +08:00
"📊 命令开始 - %s, 系统: CPU %.1f%%, 内存 %.1fGB(%.1f%%), 命令: %s",
2025-12-13 09:41:37 +08:00
self.tool_name,
2025-12-13 10:01:21 +08:00
self.sys_cpu_start,
self.sys_memory_gb_start,
2025-12-13 18:40:36 +08:00
self.sys_memory_percent_start,
2025-12-13 09:41:37 +08:00
cmd_display
)
2025-12-13 10:01:21 +08:00
def set_pid(self, pid: int) -> None:
"""
设置要追踪的进程 PID
Args:
pid: 进程 ID
"""
self.pid = pid
# 初始化 CPU 采样psutil 需要先调用一次)
2025-12-13 18:40:36 +08:00
# CPU 百分比计算需要两次调用之间的时间间隔,第一次调用是初始化
2025-12-13 10:01:21 +08:00
if psutil and pid:
try:
process = psutil.Process(pid)
2025-12-13 18:40:36 +08:00
# 第一次调用初始化 CPU 计算基准
process.cpu_percent()
# 为所有子进程也初始化 CPU 计算
2025-12-13 10:01:21 +08:00
for child in process.children(recursive=True):
try:
2025-12-13 18:40:36 +08:00
child.cpu_percent()
2025-12-13 10:01:21 +08:00
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
def sample(self) -> dict:
"""
采样当前进程资源使用可选用于长时间运行的命令
Returns:
dict: 进程资源使用情况
"""
if not self.pid:
return {'cpu_percent': 0.0, 'memory_mb': 0.0, 'memory_percent': 0.0}
stats = _get_process_stats(self.pid)
# 更新峰值
if stats['cpu_percent'] > self.proc_cpu_peak:
self.proc_cpu_peak = stats['cpu_percent']
if stats['memory_mb'] > self.proc_memory_mb_peak:
self.proc_memory_mb_peak = stats['memory_mb']
2025-12-13 18:40:36 +08:00
if stats['memory_percent'] > self.proc_memory_percent_peak:
self.proc_memory_percent_peak = stats['memory_percent']
2025-12-13 10:01:21 +08:00
return stats
2025-12-13 09:41:37 +08:00
def finish(
self,
success: bool = True,
duration: Optional[float] = None,
timeout: Optional[int] = None,
is_timeout: bool = False
) -> None:
"""
结束追踪并记录性能日志
Args:
success: 是否成功
duration: 执行耗时如果不传则自动计算
timeout: 超时配置
is_timeout: 是否超时
"""
# 计算耗时
if duration is None:
duration = time.time() - self.start_time
# 获取结束时的系统状态
2025-12-13 10:01:21 +08:00
sys_stats = _get_system_stats()
# 获取进程最终资源使用(如果进程还在)
proc_stats = {'cpu_percent': 0.0, 'memory_mb': 0.0, 'memory_percent': 0.0}
if self.pid:
proc_stats = _get_process_stats(self.pid)
# 更新峰值
if proc_stats['cpu_percent'] > self.proc_cpu_peak:
self.proc_cpu_peak = proc_stats['cpu_percent']
if proc_stats['memory_mb'] > self.proc_memory_mb_peak:
self.proc_memory_mb_peak = proc_stats['memory_mb']
2025-12-13 18:40:36 +08:00
if proc_stats['memory_percent'] > self.proc_memory_percent_peak:
self.proc_memory_percent_peak = proc_stats['memory_percent']
2025-12-13 09:41:37 +08:00
status = "" if success else ("⏱ 超时" if is_timeout else "")
# 截断过长的命令
cmd_display = self.command[:200] + "..." if len(self.command) > 200 else self.command
2025-12-13 10:01:21 +08:00
# 日志格式:进程资源 + 系统资源
if self.pid and (self.proc_cpu_peak > 0 or self.proc_memory_mb_peak > 0):
perf_logger.info(
"📊 命令结束 - %s %s, 耗时: %.2fs%s, "
2025-12-13 18:40:36 +08:00
"进程: CPU %.1f%%(峰值), 内存 %.1fMB(%.1f%%峰值), "
"系统: CPU %.1f%%%.1f%%, 内存 %.1fGB(%.1f%%)→%.1fGB(%.1f%%), "
2025-12-13 10:01:21 +08:00
"命令: %s",
self.tool_name,
status,
duration,
f", 超时配置: {timeout}s" if timeout else "",
self.proc_cpu_peak,
self.proc_memory_mb_peak,
2025-12-13 18:40:36 +08:00
self.proc_memory_percent_peak,
2025-12-13 10:01:21 +08:00
self.sys_cpu_start,
sys_stats['cpu_percent'],
self.sys_memory_gb_start,
2025-12-13 18:40:36 +08:00
self.sys_memory_percent_start,
2025-12-13 10:01:21 +08:00
sys_stats['memory_gb'],
2025-12-13 18:40:36 +08:00
sys_stats['memory_percent'],
2025-12-13 10:01:21 +08:00
cmd_display
)
else:
# 没有进程级数据,只显示系统级
perf_logger.info(
"📊 命令结束 - %s %s, 耗时: %.2fs%s, "
2025-12-13 18:40:36 +08:00
"系统: CPU %.1f%%%.1f%%, 内存 %.1fGB(%.1f%%)→%.1fGB(%.1f%%), "
2025-12-13 10:01:21 +08:00
"命令: %s",
self.tool_name,
status,
duration,
f", 超时配置: {timeout}s" if timeout else "",
self.sys_cpu_start,
sys_stats['cpu_percent'],
self.sys_memory_gb_start,
2025-12-13 18:40:36 +08:00
self.sys_memory_percent_start,
2025-12-13 10:01:21 +08:00
sys_stats['memory_gb'],
2025-12-13 18:40:36 +08:00
sys_stats['memory_percent'],
2025-12-13 10:01:21 +08:00
cmd_display
)