Files
xingrin/backend/apps/common/utils/csv_utils.py
yyhuni adb53c9f85 feat(asset,scan): add configurable statement timeout and improve CSV export
- Add statement_timeout_ms parameter to search_service count() and stream_search() methods for long-running exports
- Replace server-side cursors with OFFSET/LIMIT batching for better Django compatibility
- Introduce create_csv_export_response() utility function to standardize CSV export handling
- Add engine-preset-selector and scan-config-editor components for enhanced scan configuration UI
- Update YAML editor component with improved styling and functionality
- Add i18n translations for new scan configuration features in English and Chinese
- Refactor CSV export endpoints to use new utility function instead of manual StreamingHttpResponse
- Remove unused uuid import from search_service.py
- Update nginx configuration for improved performance
- Enhance search service with configurable timeout support for large dataset exports
2026-01-04 08:58:31 +08:00

245 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""CSV 导出工具模块
提供流式 CSV 生成功能,支持:
- UTF-8 BOMExcel 兼容)
- RFC 4180 规范转义
- 流式生成(内存友好)
- 带 Content-Length 的文件响应(支持浏览器下载进度显示)
"""
import csv
import io
import os
import tempfile
import logging
from datetime import datetime
from typing import Iterator, Dict, Any, List, Callable, Optional
from django.http import FileResponse, StreamingHttpResponse
logger = logging.getLogger(__name__)
# UTF-8 BOM确保 Excel 正确识别编码
UTF8_BOM = '\ufeff'
def generate_csv_rows(
data_iterator: Iterator[Dict[str, Any]],
headers: List[str],
field_formatters: Optional[Dict[str, Callable]] = None
) -> Iterator[str]:
"""
流式生成 CSV 行
Args:
data_iterator: 数据迭代器,每个元素是一个字典
headers: CSV 表头列表
field_formatters: 字段格式化函数字典key 为字段名value 为格式化函数
Yields:
CSV 行字符串(包含换行符)
Example:
>>> data = [{'ip': '192.168.1.1', 'hosts': ['a.com', 'b.com']}]
>>> headers = ['ip', 'hosts']
>>> formatters = {'hosts': format_list_field}
>>> for row in generate_csv_rows(iter(data), headers, formatters):
... print(row, end='')
"""
# 输出 BOM + 表头
output = io.StringIO()
writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
writer.writerow(headers)
yield UTF8_BOM + output.getvalue()
# 输出数据行
for row_data in data_iterator:
output = io.StringIO()
writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
row = []
for header in headers:
value = row_data.get(header, '')
if field_formatters and header in field_formatters:
value = field_formatters[header](value)
row.append(value if value is not None else '')
writer.writerow(row)
yield output.getvalue()
def format_list_field(values: List, separator: str = ';') -> str:
"""
将列表字段格式化为分号分隔的字符串
Args:
values: 值列表
separator: 分隔符,默认为分号
Returns:
分隔符连接的字符串
Example:
>>> format_list_field(['a.com', 'b.com'])
'a.com;b.com'
>>> format_list_field([80, 443])
'80;443'
>>> format_list_field([])
''
>>> format_list_field(None)
''
"""
if not values:
return ''
return separator.join(str(v) for v in values)
def format_datetime(dt: Optional[datetime]) -> str:
"""
格式化日期时间为字符串(转换为本地时区)
Args:
dt: datetime 对象或 None
Returns:
格式化的日期时间字符串,格式为 YYYY-MM-DD HH:MM:SS本地时区
Example:
>>> from datetime import datetime
>>> format_datetime(datetime(2024, 1, 15, 10, 30, 0))
'2024-01-15 10:30:00'
>>> format_datetime(None)
''
"""
if dt is None:
return ''
if isinstance(dt, str):
return dt
# 转换为本地时区(从 Django settings 获取)
from django.utils import timezone
if timezone.is_aware(dt):
dt = timezone.localtime(dt)
return dt.strftime('%Y-%m-%d %H:%M:%S')
def create_csv_export_response(
data_iterator: Iterator[Dict[str, Any]],
headers: List[str],
filename: str,
field_formatters: Optional[Dict[str, Callable]] = None,
show_progress: bool = True
) -> FileResponse | StreamingHttpResponse:
"""
创建 CSV 导出响应
根据 show_progress 参数选择响应类型:
- True: 使用临时文件 + FileResponse带 Content-Length浏览器显示下载进度
- False: 使用 StreamingHttpResponse内存更友好但无下载进度
Args:
data_iterator: 数据迭代器,每个元素是一个字典
headers: CSV 表头列表
filename: 下载文件名(如 "export_2024.csv"
field_formatters: 字段格式化函数字典
show_progress: 是否显示下载进度(默认 True
Returns:
FileResponse 或 StreamingHttpResponse
Example:
>>> data_iter = service.iter_data()
>>> headers = ['url', 'host', 'created_at']
>>> formatters = {'created_at': format_datetime}
>>> response = create_csv_export_response(
... data_iter, headers, 'websites.csv', formatters
... )
>>> return response
"""
if show_progress:
return _create_file_response(data_iterator, headers, filename, field_formatters)
else:
return _create_streaming_response(data_iterator, headers, filename, field_formatters)
def _create_file_response(
data_iterator: Iterator[Dict[str, Any]],
headers: List[str],
filename: str,
field_formatters: Optional[Dict[str, Callable]] = None
) -> FileResponse:
"""
创建带 Content-Length 的文件响应(支持浏览器下载进度)
实现方式:先写入临时文件,再返回 FileResponse
"""
# 创建临时文件
temp_file = tempfile.NamedTemporaryFile(
mode='w',
suffix='.csv',
delete=False,
encoding='utf-8'
)
temp_path = temp_file.name
try:
# 流式写入 CSV 数据到临时文件
for row in generate_csv_rows(data_iterator, headers, field_formatters):
temp_file.write(row)
temp_file.close()
# 获取文件大小
file_size = os.path.getsize(temp_path)
# 创建文件响应
response = FileResponse(
open(temp_path, 'rb'),
content_type='text/csv; charset=utf-8',
as_attachment=True,
filename=filename
)
response['Content-Length'] = file_size
# 设置清理回调:响应完成后删除临时文件
original_close = response.file_to_stream.close
def close_and_cleanup():
original_close()
try:
os.unlink(temp_path)
except OSError:
pass
response.file_to_stream.close = close_and_cleanup
return response
except Exception as e:
# 清理临时文件
try:
temp_file.close()
except:
pass
try:
os.unlink(temp_path)
except OSError:
pass
logger.error(f"创建 CSV 导出响应失败: {e}")
raise
def _create_streaming_response(
data_iterator: Iterator[Dict[str, Any]],
headers: List[str],
filename: str,
field_formatters: Optional[Dict[str, Callable]] = None
) -> StreamingHttpResponse:
"""
创建流式响应(无 Content-Length内存更友好
"""
response = StreamingHttpResponse(
generate_csv_rows(data_iterator, headers, field_formatters),
content_type='text/csv; charset=utf-8'
)
response['Content-Disposition'] = f'attachment; filename="{filename}"'
return response