Compare commits

...

1 Commits

Author SHA1 Message Date
yyhuni
0ccd362535 优化下载逻辑 2026-01-03 13:32:58 +08:00
3 changed files with 98 additions and 66 deletions

View File

@@ -13,14 +13,14 @@
<p align="center">
<a href="#-功能特性">功能特性</a> •
<a href="#-全局资产搜索">资产搜索</a> •
<a href="#-快速开始">快速开始</a> •
<a href="#-文档">文档</a> •
<a href="#-技术栈">技术栈</a> •
<a href="#-反馈与贡献">反馈与贡献</a>
</p>
<p align="center">
<sub>🔍 关键词: ASM | 攻击面管理 | 漏洞扫描 | 资产发现 | Bug Bounty | 渗透测试 | Nuclei | 子域名枚举 | EASM</sub>
<sub>🔍 关键词: ASM | 攻击面管理 | 漏洞扫描 | 资产发现 | 资产搜索 | Bug Bounty | 渗透测试 | Nuclei | 子域名枚举 | EASM</sub>
</p>
---
@@ -162,6 +162,30 @@ flowchart TB
W3 -.心跳上报.-> REDIS
```
### 🔎 全局资产搜索
- **多类型搜索** - 支持 Website 和 Endpoint 两种资产类型
- **表达式语法** - 支持 `=`(模糊)、`==`(精确)、`!=`(不等于)操作符
- **逻辑组合** - 支持 `&&` (AND) 和 `||` (OR) 逻辑组合
- **多字段查询** - 支持 host、url、title、tech、status、body、header 字段
- **CSV 导出** - 流式导出全部搜索结果,无数量限制
#### 搜索语法示例
```bash
# 基础搜索
host="api" # host 包含 "api"
status=="200" # 状态码精确等于 200
tech="nginx" # 技术栈包含 nginx
# 组合搜索
host="api" && status=="200" # host 包含 api 且状态码为 200
tech="vue" || tech="react" # 技术栈包含 vue 或 react
# 复杂查询
host="admin" && tech="php" && status=="200"
url="/api/v1" && status!="404"
```
### 📊 可视化界面
- **数据统计** - 资产/漏洞统计仪表盘
- **实时通知** - WebSocket 消息推送
@@ -172,7 +196,7 @@ flowchart TB
### 环境要求
- **操作系统**: Ubuntu 20.04+ / Debian 11+ (推荐)
- **操作系统**: Ubuntu 20.04+ / Debian 11+
- **硬件**: 2核 4G 内存起步20GB+ 磁盘空间
### 一键安装
@@ -197,6 +221,7 @@ sudo ./install.sh --mirror
### 访问服务
- **Web 界面**: `https://ip:8083`
- **默认账号**: admin / admin首次登录后请修改密码
### 常用命令

View File

@@ -394,3 +394,46 @@ class AssetSearchService:
except Exception as e:
logger.error(f"统计查询失败: {e}")
raise
def search_iter(
self,
query: str,
asset_type: AssetType = 'website',
batch_size: int = 1000
):
"""
流式搜索资产(使用服务端游标,内存友好)
Args:
query: 搜索查询字符串
asset_type: 资产类型 ('website''endpoint')
batch_size: 每批获取的数量
Yields:
Dict: 单条搜索结果
"""
where_clause, params = SearchQueryParser.parse(query)
# 根据资产类型选择视图和字段
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
select_fields = ENDPOINT_SELECT_FIELDS if asset_type == 'endpoint' else WEBSITE_SELECT_FIELDS
sql = f"""
SELECT {select_fields}
FROM {view_name}
WHERE {where_clause}
ORDER BY created_at DESC
"""
try:
# 使用服务端游标,避免一次性加载所有数据到内存
with connection.cursor(name='export_cursor') as cursor:
cursor.itersize = batch_size
cursor.execute(sql, params)
columns = [col[0] for col in cursor.description]
for row in cursor:
yield dict(zip(columns, row))
except Exception as e:
logger.error(f"流式搜索查询失败: {e}, SQL: {sql}, params: {params}")
raise

View File

@@ -28,8 +28,6 @@
import logging
import json
import csv
from io import StringIO
from datetime import datetime
from urllib.parse import urlparse, urlunparse
from rest_framework import status
@@ -287,76 +285,37 @@ class AssetSearchExportView(APIView):
asset_type: 资产类型 ('website''endpoint',默认 'website')
Response:
CSV 文件流
CSV 文件流(使用服务端游标,支持大数据量导出)
"""
# 导出数量限制
MAX_EXPORT_ROWS = 10000
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = AssetSearchService()
def _parse_headers(self, headers_data) -> str:
"""解析响应头为字符串"""
if not headers_data:
return ''
try:
headers = json.loads(headers_data)
return '; '.join(f'{k}: {v}' for k, v in headers.items())
except (json.JSONDecodeError, TypeError):
return str(headers_data)
def _generate_csv(self, results: list, asset_type: str):
"""生成 CSV 内容的生成器"""
# 定义列
def _get_headers_and_formatters(self, asset_type: str):
"""获取 CSV 表头和格式化器"""
from apps.common.utils import format_datetime, format_list_field
if asset_type == 'website':
columns = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length',
headers = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length',
'webserver', 'location', 'tech', 'vhost', 'created_at']
headers = ['URL', 'Host', 'Title', 'Status', 'Content-Type', 'Content-Length',
'Webserver', 'Location', 'Technologies', 'VHost', 'Created At']
else:
columns = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length',
headers = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length',
'webserver', 'location', 'tech', 'matched_gf_patterns', 'vhost', 'created_at']
headers = ['URL', 'Host', 'Title', 'Status', 'Content-Type', 'Content-Length',
'Webserver', 'Location', 'Technologies', 'GF Patterns', 'VHost', 'Created At']
# 写入 BOM 和表头
output = StringIO()
writer = csv.writer(output)
formatters = {
'created_at': format_datetime,
'tech': lambda x: format_list_field(x, separator='; '),
'matched_gf_patterns': lambda x: format_list_field(x, separator='; '),
'vhost': lambda x: 'true' if x else ('false' if x is False else ''),
}
# UTF-8 BOM
yield '\ufeff'
# 表头
writer.writerow(headers)
yield output.getvalue()
output.seek(0)
output.truncate(0)
# 数据行
for result in results:
row = []
for col in columns:
value = result.get(col)
if col == 'tech' or col == 'matched_gf_patterns':
# 数组转字符串
row.append('; '.join(value) if value else '')
elif col == 'created_at':
# 日期格式化
row.append(value.strftime('%Y-%m-%d %H:%M:%S') if value else '')
elif col == 'vhost':
row.append('true' if value else 'false' if value is False else '')
else:
row.append(str(value) if value is not None else '')
writer.writerow(row)
yield output.getvalue()
output.seek(0)
output.truncate(0)
return headers, formatters
def get(self, request: Request):
"""导出搜索结果为 CSV"""
"""导出搜索结果为 CSV(流式导出,无数量限制)"""
from apps.common.utils import generate_csv_rows
# 获取搜索查询
query = request.query_params.get('q', '').strip()
@@ -376,23 +335,28 @@ class AssetSearchExportView(APIView):
status_code=status.HTTP_400_BAD_REQUEST
)
# 获取搜索结果(限制数量
results = self.service.search(query, asset_type, limit=self.MAX_EXPORT_ROWS)
if not results:
# 检查是否有结果(快速检查,避免空导出
total = self.service.count(query, asset_type)
if total == 0:
return error_response(
code=ErrorCodes.NOT_FOUND,
message='No results to export',
status_code=status.HTTP_404_NOT_FOUND
)
# 获取表头和格式化器
headers, formatters = self._get_headers_and_formatters(asset_type)
# 获取流式数据迭代器
data_iterator = self.service.search_iter(query, asset_type)
# 生成文件名
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'search_{asset_type}_{timestamp}.csv'
# 返回流式响应
response = StreamingHttpResponse(
self._generate_csv(results, asset_type),
generate_csv_rows(data_iterator, headers, formatters),
content_type='text/csv; charset=utf-8'
)
response['Content-Disposition'] = f'attachment; filename="{filename}"'