From 0ccd362535f6b5dc93ca2515680e8798480aa6f1 Mon Sep 17 00:00:00 2001 From: yyhuni Date: Sat, 3 Jan 2026 13:32:58 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=B8=8B=E8=BD=BD=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 31 ++++++- backend/apps/asset/services/search_service.py | 43 +++++++++ backend/apps/asset/views/search_views.py | 90 ++++++------------- 3 files changed, 98 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 83668cbd..c60ca2b8 100644 --- a/README.md +++ b/README.md @@ -13,14 +13,14 @@

功能特性 • + 资产搜索快速开始文档 • - 技术栈反馈与贡献

- 🔍 关键词: ASM | 攻击面管理 | 漏洞扫描 | 资产发现 | Bug Bounty | 渗透测试 | Nuclei | 子域名枚举 | EASM + 🔍 关键词: ASM | 攻击面管理 | 漏洞扫描 | 资产发现 | 资产搜索 | Bug Bounty | 渗透测试 | Nuclei | 子域名枚举 | EASM

--- @@ -162,6 +162,30 @@ flowchart TB W3 -.心跳上报.-> REDIS ``` +### 🔎 全局资产搜索 +- **多类型搜索** - 支持 Website 和 Endpoint 两种资产类型 +- **表达式语法** - 支持 `=`(模糊)、`==`(精确)、`!=`(不等于)操作符 +- **逻辑组合** - 支持 `&&` (AND) 和 `||` (OR) 逻辑组合 +- **多字段查询** - 支持 host、url、title、tech、status、body、header 字段 +- **CSV 导出** - 流式导出全部搜索结果,无数量限制 + +#### 搜索语法示例 + +```bash +# 基础搜索 +host="api" # host 包含 "api" +status=="200" # 状态码精确等于 200 +tech="nginx" # 技术栈包含 nginx + +# 组合搜索 +host="api" && status=="200" # host 包含 api 且状态码为 200 +tech="vue" || tech="react" # 技术栈包含 vue 或 react + +# 复杂查询 +host="admin" && tech="php" && status=="200" +url="/api/v1" && status!="404" +``` + ### 📊 可视化界面 - **数据统计** - 资产/漏洞统计仪表盘 - **实时通知** - WebSocket 消息推送 @@ -172,7 +196,7 @@ flowchart TB ### 环境要求 -- **操作系统**: Ubuntu 20.04+ / Debian 11+ (推荐) +- **操作系统**: Ubuntu 20.04+ / Debian 11+ - **硬件**: 2核 4G 内存起步,20GB+ 磁盘空间 ### 一键安装 @@ -197,6 +221,7 @@ sudo ./install.sh --mirror ### 访问服务 - **Web 界面**: `https://ip:8083` +- **默认账号**: admin / admin(首次登录后请修改密码) ### 常用命令 diff --git a/backend/apps/asset/services/search_service.py b/backend/apps/asset/services/search_service.py index cb012678..882e4853 100644 --- a/backend/apps/asset/services/search_service.py +++ b/backend/apps/asset/services/search_service.py @@ -394,3 +394,46 @@ class AssetSearchService: except Exception as e: logger.error(f"统计查询失败: {e}") raise + + def search_iter( + self, + query: str, + asset_type: AssetType = 'website', + batch_size: int = 1000 + ): + """ + 流式搜索资产(使用服务端游标,内存友好) + + Args: + query: 搜索查询字符串 + asset_type: 资产类型 ('website' 或 'endpoint') + batch_size: 每批获取的数量 + + Yields: + Dict: 单条搜索结果 + """ + where_clause, params = SearchQueryParser.parse(query) + + # 根据资产类型选择视图和字段 + view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view') + select_fields = ENDPOINT_SELECT_FIELDS if asset_type == 'endpoint' else WEBSITE_SELECT_FIELDS + + sql = f""" + SELECT {select_fields} + FROM {view_name} + WHERE {where_clause} + ORDER BY created_at DESC + """ + + try: + # 使用服务端游标,避免一次性加载所有数据到内存 + with connection.cursor(name='export_cursor') as cursor: + cursor.itersize = batch_size + cursor.execute(sql, params) + columns = [col[0] for col in cursor.description] + + for row in cursor: + yield dict(zip(columns, row)) + except Exception as e: + logger.error(f"流式搜索查询失败: {e}, SQL: {sql}, params: {params}") + raise diff --git a/backend/apps/asset/views/search_views.py b/backend/apps/asset/views/search_views.py index 21a95b45..c14df841 100644 --- a/backend/apps/asset/views/search_views.py +++ b/backend/apps/asset/views/search_views.py @@ -28,8 +28,6 @@ import logging import json -import csv -from io import StringIO from datetime import datetime from urllib.parse import urlparse, urlunparse from rest_framework import status @@ -287,76 +285,37 @@ class AssetSearchExportView(APIView): asset_type: 资产类型 ('website' 或 'endpoint',默认 'website') Response: - CSV 文件流 + CSV 文件流(使用服务端游标,支持大数据量导出) """ - # 导出数量限制 - MAX_EXPORT_ROWS = 10000 - def __init__(self, **kwargs): super().__init__(**kwargs) self.service = AssetSearchService() - def _parse_headers(self, headers_data) -> str: - """解析响应头为字符串""" - if not headers_data: - return '' - try: - headers = json.loads(headers_data) - return '; '.join(f'{k}: {v}' for k, v in headers.items()) - except (json.JSONDecodeError, TypeError): - return str(headers_data) - - def _generate_csv(self, results: list, asset_type: str): - """生成 CSV 内容的生成器""" - # 定义列 + def _get_headers_and_formatters(self, asset_type: str): + """获取 CSV 表头和格式化器""" + from apps.common.utils import format_datetime, format_list_field + if asset_type == 'website': - columns = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length', + headers = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length', 'webserver', 'location', 'tech', 'vhost', 'created_at'] - headers = ['URL', 'Host', 'Title', 'Status', 'Content-Type', 'Content-Length', - 'Webserver', 'Location', 'Technologies', 'VHost', 'Created At'] else: - columns = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length', + headers = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length', 'webserver', 'location', 'tech', 'matched_gf_patterns', 'vhost', 'created_at'] - headers = ['URL', 'Host', 'Title', 'Status', 'Content-Type', 'Content-Length', - 'Webserver', 'Location', 'Technologies', 'GF Patterns', 'VHost', 'Created At'] - # 写入 BOM 和表头 - output = StringIO() - writer = csv.writer(output) + formatters = { + 'created_at': format_datetime, + 'tech': lambda x: format_list_field(x, separator='; '), + 'matched_gf_patterns': lambda x: format_list_field(x, separator='; '), + 'vhost': lambda x: 'true' if x else ('false' if x is False else ''), + } - # UTF-8 BOM - yield '\ufeff' - - # 表头 - writer.writerow(headers) - yield output.getvalue() - output.seek(0) - output.truncate(0) - - # 数据行 - for result in results: - row = [] - for col in columns: - value = result.get(col) - if col == 'tech' or col == 'matched_gf_patterns': - # 数组转字符串 - row.append('; '.join(value) if value else '') - elif col == 'created_at': - # 日期格式化 - row.append(value.strftime('%Y-%m-%d %H:%M:%S') if value else '') - elif col == 'vhost': - row.append('true' if value else 'false' if value is False else '') - else: - row.append(str(value) if value is not None else '') - - writer.writerow(row) - yield output.getvalue() - output.seek(0) - output.truncate(0) + return headers, formatters def get(self, request: Request): - """导出搜索结果为 CSV""" + """导出搜索结果为 CSV(流式导出,无数量限制)""" + from apps.common.utils import generate_csv_rows + # 获取搜索查询 query = request.query_params.get('q', '').strip() @@ -376,23 +335,28 @@ class AssetSearchExportView(APIView): status_code=status.HTTP_400_BAD_REQUEST ) - # 获取搜索结果(限制数量) - results = self.service.search(query, asset_type, limit=self.MAX_EXPORT_ROWS) - - if not results: + # 检查是否有结果(快速检查,避免空导出) + total = self.service.count(query, asset_type) + if total == 0: return error_response( code=ErrorCodes.NOT_FOUND, message='No results to export', status_code=status.HTTP_404_NOT_FOUND ) + # 获取表头和格式化器 + headers, formatters = self._get_headers_and_formatters(asset_type) + + # 获取流式数据迭代器 + data_iterator = self.service.search_iter(query, asset_type) + # 生成文件名 timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') filename = f'search_{asset_type}_{timestamp}.csv' # 返回流式响应 response = StreamingHttpResponse( - self._generate_csv(results, asset_type), + generate_csv_rows(data_iterator, headers, formatters), content_type='text/csv; charset=utf-8' ) response['Content-Disposition'] = f'attachment; filename="{filename}"'