mirror of
https://github.com/yyhuni/xingrin.git
synced 2026-01-31 19:53:11 +08:00
Compare commits
3 Commits
v1.2.13-de
...
v1.2.14-de
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
857ab737b5 | ||
|
|
ee2d99edda | ||
|
|
db6ce16aca |
@@ -1,7 +1,7 @@
|
||||
"""Endpoint DTO"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, List
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -20,9 +20,12 @@ class EndpointDTO:
|
||||
vhost: Optional[bool] = None
|
||||
location: Optional[str] = None
|
||||
matched_gf_patterns: Optional[List[str]] = None
|
||||
response_headers: Optional[Dict[str, Any]] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
self.tech = []
|
||||
if self.matched_gf_patterns is None:
|
||||
self.matched_gf_patterns = []
|
||||
if self.response_headers is None:
|
||||
self.response_headers = {}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""WebSite DTO"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -20,7 +20,10 @@ class WebSiteDTO:
|
||||
body_preview: str = ''
|
||||
vhost: Optional[bool] = None
|
||||
created_at: str = None
|
||||
response_headers: Dict[str, Any] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
self.tech = []
|
||||
if self.response_headers is None:
|
||||
self.response_headers = {}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""EndpointSnapshot DTO"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -26,12 +26,15 @@ class EndpointSnapshotDTO:
|
||||
vhost: Optional[bool] = None
|
||||
matched_gf_patterns: List[str] = None
|
||||
target_id: Optional[int] = None # 冗余字段,用于同步到资产表
|
||||
response_headers: Dict[str, Any] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
self.tech = []
|
||||
if self.matched_gf_patterns is None:
|
||||
self.matched_gf_patterns = []
|
||||
if self.response_headers is None:
|
||||
self.response_headers = {}
|
||||
|
||||
def to_asset_dto(self):
|
||||
"""
|
||||
@@ -58,5 +61,6 @@ class EndpointSnapshotDTO:
|
||||
tech=self.tech if self.tech else [],
|
||||
vhost=self.vhost,
|
||||
location=self.location,
|
||||
matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else []
|
||||
matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else [],
|
||||
response_headers=self.response_headers if self.response_headers else {},
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""WebsiteSnapshot DTO"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -25,10 +25,13 @@ class WebsiteSnapshotDTO:
|
||||
tech: List[str] = None
|
||||
body_preview: str = ''
|
||||
vhost: Optional[bool] = None
|
||||
response_headers: Dict[str, Any] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
self.tech = []
|
||||
if self.response_headers is None:
|
||||
self.response_headers = {}
|
||||
|
||||
def to_asset_dto(self):
|
||||
"""
|
||||
@@ -51,5 +54,6 @@ class WebsiteSnapshotDTO:
|
||||
content_type=self.content_type,
|
||||
tech=self.tech if self.tech else [],
|
||||
body_preview=self.body_preview,
|
||||
vhost=self.vhost
|
||||
vhost=self.vhost,
|
||||
response_headers=self.response_headers if self.response_headers else {},
|
||||
)
|
||||
|
||||
@@ -124,6 +124,11 @@ class Endpoint(models.Model):
|
||||
default=list,
|
||||
help_text='匹配的GF模式列表,用于识别敏感端点(如api, debug, config等)'
|
||||
)
|
||||
response_headers = models.JSONField(
|
||||
blank=True,
|
||||
default=dict,
|
||||
help_text='HTTP响应头(JSON格式)'
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = 'endpoint'
|
||||
@@ -138,6 +143,7 @@ class Endpoint(models.Model):
|
||||
models.Index(fields=['status_code']), # 状态码索引,优化筛选
|
||||
models.Index(fields=['title']), # title索引,优化智能过滤搜索
|
||||
GinIndex(fields=['tech']), # GIN索引,优化 tech 数组字段的 __contains 查询
|
||||
GinIndex(fields=['response_headers']), # GIN索引,优化 response_headers JSON 字段查询
|
||||
]
|
||||
constraints = [
|
||||
# 普通唯一约束:url + target 组合唯一
|
||||
@@ -221,6 +227,11 @@ class WebSite(models.Model):
|
||||
blank=True,
|
||||
help_text='是否支持虚拟主机'
|
||||
)
|
||||
response_headers = models.JSONField(
|
||||
blank=True,
|
||||
default=dict,
|
||||
help_text='HTTP响应头(JSON格式)'
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = 'website'
|
||||
@@ -235,6 +246,7 @@ class WebSite(models.Model):
|
||||
models.Index(fields=['title']), # title索引,优化智能过滤搜索
|
||||
models.Index(fields=['status_code']), # 状态码索引,优化智能过滤搜索
|
||||
GinIndex(fields=['tech']), # GIN索引,优化 tech 数组字段的 __contains 查询
|
||||
GinIndex(fields=['response_headers']), # GIN索引,优化 response_headers JSON 字段查询
|
||||
]
|
||||
constraints = [
|
||||
# 普通唯一约束:url + target 组合唯一
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from django.db import models
|
||||
from django.contrib.postgres.fields import ArrayField
|
||||
from django.contrib.postgres.indexes import GinIndex
|
||||
from django.core.validators import MinValueValidator, MaxValueValidator
|
||||
|
||||
|
||||
@@ -70,6 +71,11 @@ class WebsiteSnapshot(models.Model):
|
||||
)
|
||||
body_preview = models.TextField(blank=True, default='', help_text='响应体预览')
|
||||
vhost = models.BooleanField(null=True, blank=True, help_text='虚拟主机标志')
|
||||
response_headers = models.JSONField(
|
||||
blank=True,
|
||||
default=dict,
|
||||
help_text='HTTP响应头(JSON格式)'
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
@@ -83,6 +89,8 @@ class WebsiteSnapshot(models.Model):
|
||||
models.Index(fields=['host']), # host索引,优化根据主机名查询
|
||||
models.Index(fields=['title']), # title索引,优化标题搜索
|
||||
models.Index(fields=['-created_at']),
|
||||
GinIndex(fields=['tech']), # GIN索引,优化数组字段查询
|
||||
GinIndex(fields=['response_headers']), # GIN索引,优化 JSON 字段查询
|
||||
]
|
||||
constraints = [
|
||||
# 唯一约束:同一次扫描中,同一个URL只能记录一次
|
||||
@@ -259,6 +267,11 @@ class EndpointSnapshot(models.Model):
|
||||
default=list,
|
||||
help_text='匹配的GF模式列表'
|
||||
)
|
||||
response_headers = models.JSONField(
|
||||
blank=True,
|
||||
default=dict,
|
||||
help_text='HTTP响应头(JSON格式)'
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
@@ -274,6 +287,8 @@ class EndpointSnapshot(models.Model):
|
||||
models.Index(fields=['status_code']), # 状态码索引,优化筛选
|
||||
models.Index(fields=['webserver']), # webserver索引,优化服务器搜索
|
||||
models.Index(fields=['-created_at']),
|
||||
GinIndex(fields=['tech']), # GIN索引,优化数组字段查询
|
||||
GinIndex(fields=['response_headers']), # GIN索引,优化 JSON 字段查询
|
||||
]
|
||||
constraints = [
|
||||
# 唯一约束:同一次扫描中,同一个URL只能记录一次
|
||||
|
||||
@@ -53,7 +53,8 @@ class DjangoEndpointRepository:
|
||||
tech=item.tech if item.tech else [],
|
||||
vhost=item.vhost,
|
||||
location=item.location or '',
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
|
||||
response_headers=item.response_headers if item.response_headers else {}
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
@@ -66,7 +67,7 @@ class DjangoEndpointRepository:
|
||||
update_fields=[
|
||||
'host', 'title', 'status_code', 'content_length',
|
||||
'webserver', 'body_preview', 'content_type', 'tech',
|
||||
'vhost', 'location', 'matched_gf_patterns'
|
||||
'vhost', 'location', 'matched_gf_patterns', 'response_headers'
|
||||
],
|
||||
batch_size=1000
|
||||
)
|
||||
@@ -143,7 +144,8 @@ class DjangoEndpointRepository:
|
||||
tech=item.tech if item.tech else [],
|
||||
vhost=item.vhost,
|
||||
location=item.location or '',
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
|
||||
response_headers=item.response_headers if item.response_headers else {}
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
@@ -54,7 +54,8 @@ class DjangoWebSiteRepository:
|
||||
tech=item.tech if item.tech else [],
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
vhost=item.vhost
|
||||
vhost=item.vhost,
|
||||
response_headers=item.response_headers if item.response_headers else {}
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
@@ -67,7 +68,7 @@ class DjangoWebSiteRepository:
|
||||
update_fields=[
|
||||
'host', 'location', 'title', 'webserver',
|
||||
'body_preview', 'content_type', 'tech',
|
||||
'status_code', 'content_length', 'vhost'
|
||||
'status_code', 'content_length', 'vhost', 'response_headers'
|
||||
],
|
||||
batch_size=1000
|
||||
)
|
||||
@@ -137,7 +138,8 @@ class DjangoWebSiteRepository:
|
||||
tech=item.tech if item.tech else [],
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
vhost=item.vhost
|
||||
vhost=item.vhost,
|
||||
response_headers=item.response_headers if item.response_headers else {}
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
@@ -44,6 +44,7 @@ class DjangoEndpointSnapshotRepository:
|
||||
snapshots.append(EndpointSnapshot(
|
||||
scan_id=item.scan_id,
|
||||
url=item.url,
|
||||
host=item.host if item.host else '',
|
||||
title=item.title,
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
@@ -53,7 +54,8 @@ class DjangoEndpointSnapshotRepository:
|
||||
tech=item.tech if item.tech else [],
|
||||
body_preview=item.body_preview,
|
||||
vhost=item.vhost,
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
|
||||
response_headers=item.response_headers if item.response_headers else {}
|
||||
))
|
||||
|
||||
# 批量创建(忽略冲突,基于唯一约束去重)
|
||||
|
||||
@@ -53,7 +53,8 @@ class DjangoWebsiteSnapshotRepository:
|
||||
content_type=item.content_type,
|
||||
tech=item.tech if item.tech else [],
|
||||
body_preview=item.body_preview,
|
||||
vhost=item.vhost
|
||||
vhost=item.vhost,
|
||||
response_headers=item.response_headers if item.response_headers else {}
|
||||
))
|
||||
|
||||
# 批量创建(忽略冲突,基于唯一约束去重)
|
||||
|
||||
@@ -67,9 +67,10 @@ class SubdomainListSerializer(serializers.ModelSerializer):
|
||||
|
||||
|
||||
class WebSiteSerializer(serializers.ModelSerializer):
|
||||
"""站点序列化器"""
|
||||
"""站点序列化器(目标详情页)"""
|
||||
|
||||
subdomain = serializers.CharField(source='subdomain.name', allow_blank=True, default='')
|
||||
responseHeaders = serializers.JSONField(source='response_headers', read_only=True) # HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = WebSite
|
||||
@@ -86,6 +87,7 @@ class WebSiteSerializer(serializers.ModelSerializer):
|
||||
'body_preview',
|
||||
'tech',
|
||||
'vhost',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'subdomain',
|
||||
'created_at',
|
||||
]
|
||||
@@ -140,6 +142,7 @@ class EndpointListSerializer(serializers.ModelSerializer):
|
||||
source='matched_gf_patterns',
|
||||
read_only=True,
|
||||
)
|
||||
responseHeaders = serializers.JSONField(source='response_headers', read_only=True) # HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = Endpoint
|
||||
@@ -155,6 +158,7 @@ class EndpointListSerializer(serializers.ModelSerializer):
|
||||
'body_preview',
|
||||
'tech',
|
||||
'vhost',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'gfPatterns',
|
||||
'created_at',
|
||||
]
|
||||
@@ -215,6 +219,7 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
|
||||
subdomain_name = serializers.CharField(source='subdomain.name', read_only=True)
|
||||
webserver = serializers.CharField(source='web_server', read_only=True) # 映射字段名
|
||||
status_code = serializers.IntegerField(source='status', read_only=True) # 映射字段名
|
||||
responseHeaders = serializers.JSONField(source='response_headers', read_only=True) # HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = WebsiteSnapshot
|
||||
@@ -230,6 +235,7 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
|
||||
'body_preview',
|
||||
'tech',
|
||||
'vhost',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'subdomain_name',
|
||||
'created_at',
|
||||
]
|
||||
@@ -264,6 +270,7 @@ class EndpointSnapshotSerializer(serializers.ModelSerializer):
|
||||
source='matched_gf_patterns',
|
||||
read_only=True,
|
||||
)
|
||||
responseHeaders = serializers.JSONField(source='response_headers', read_only=True) # HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = EndpointSnapshot
|
||||
@@ -280,6 +287,7 @@ class EndpointSnapshotSerializer(serializers.ModelSerializer):
|
||||
'body_preview',
|
||||
'tech',
|
||||
'vhost',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'gfPatterns',
|
||||
'created_at',
|
||||
]
|
||||
|
||||
@@ -15,9 +15,10 @@
|
||||
"""
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from ruamel.yaml import YAML
|
||||
|
||||
from apps.engine.models import ScanEngine
|
||||
|
||||
@@ -44,10 +45,12 @@ class Command(BaseCommand):
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
default_config = f.read()
|
||||
|
||||
# 解析 YAML 为字典,后续用于生成子引擎配置
|
||||
# 使用 ruamel.yaml 解析,保留注释
|
||||
yaml_parser = YAML()
|
||||
yaml_parser.preserve_quotes = True
|
||||
try:
|
||||
config_dict = yaml.safe_load(default_config) or {}
|
||||
except yaml.YAMLError as e:
|
||||
config_dict = yaml_parser.load(default_config) or {}
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f'引擎配置 YAML 解析失败: {e}'))
|
||||
return
|
||||
|
||||
@@ -83,16 +86,13 @@ class Command(BaseCommand):
|
||||
if scan_type != 'subdomain_discovery' and 'tools' not in scan_cfg:
|
||||
continue
|
||||
|
||||
# 构造只包含当前扫描类型配置的 YAML
|
||||
# 构造只包含当前扫描类型配置的 YAML(保留注释)
|
||||
single_config = {scan_type: scan_cfg}
|
||||
try:
|
||||
single_yaml = yaml.safe_dump(
|
||||
single_config,
|
||||
sort_keys=False,
|
||||
allow_unicode=True,
|
||||
default_flow_style=None,
|
||||
)
|
||||
except yaml.YAMLError as e:
|
||||
stream = StringIO()
|
||||
yaml_parser.dump(single_config, stream)
|
||||
single_yaml = stream.getvalue()
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f'生成子引擎 {scan_type} 配置失败: {e}'))
|
||||
continue
|
||||
|
||||
|
||||
@@ -99,7 +99,8 @@ SITE_SCAN_COMMANDS = {
|
||||
'-status-code -content-type -content-length '
|
||||
'-location -title -server -body-preview '
|
||||
'-tech-detect -cdn -vhost '
|
||||
'-random-agent -no-color -json'
|
||||
'-include-response-header '
|
||||
'-random-agent -no-color -json -silent'
|
||||
),
|
||||
'optional': {
|
||||
'threads': '-threads {threads}',
|
||||
@@ -171,7 +172,8 @@ URL_FETCH_COMMANDS = {
|
||||
'-status-code -content-type -content-length '
|
||||
'-location -title -server -body-preview '
|
||||
'-tech-detect -cdn -vhost '
|
||||
'-random-agent -no-color -json'
|
||||
'-include-response-header '
|
||||
'-random-agent -no-color -json -silent'
|
||||
),
|
||||
'optional': {
|
||||
'threads': '-threads {threads}',
|
||||
|
||||
@@ -4,14 +4,12 @@
|
||||
# 必需参数:enabled(是否启用)
|
||||
# 可选参数:timeout(超时秒数,默认 auto 自动计算)
|
||||
|
||||
# ==================== 子域名发现 ====================
|
||||
#
|
||||
# Stage 1: 被动收集(并行) - 必选,至少启用一个工具
|
||||
# Stage 2: 字典爆破(可选) - 使用字典暴力枚举子域名
|
||||
# Stage 3: 变异生成 + 验证(可选) - 基于已发现域名生成变异,流式验证存活
|
||||
# Stage 4: DNS 存活验证(可选) - 验证所有候选域名是否能解析
|
||||
#
|
||||
subdomain_discovery:
|
||||
# ==================== 子域名发现 ====================
|
||||
# Stage 1: 被动收集(并行) - 必选,至少启用一个工具
|
||||
# Stage 2: 字典爆破(可选) - 使用字典暴力枚举子域名
|
||||
# Stage 3: 变异生成 + 验证(可选) - 基于已发现域名生成变异,流式验证存活
|
||||
# Stage 4: DNS 存活验证(可选) - 验证所有候选域名是否能解析
|
||||
# === Stage 1: 被动收集工具(并行执行)===
|
||||
passive_tools:
|
||||
subfinder:
|
||||
@@ -55,8 +53,8 @@ subdomain_discovery:
|
||||
subdomain_resolve:
|
||||
timeout: auto # 自动根据候选子域数量计算
|
||||
|
||||
# ==================== 端口扫描 ====================
|
||||
port_scan:
|
||||
# ==================== 端口扫描 ====================
|
||||
tools:
|
||||
naabu_active:
|
||||
enabled: true
|
||||
@@ -70,8 +68,8 @@ port_scan:
|
||||
enabled: true
|
||||
# timeout: auto # 被动扫描通常较快
|
||||
|
||||
# ==================== 站点扫描 ====================
|
||||
site_scan:
|
||||
# ==================== 站点扫描 ====================
|
||||
tools:
|
||||
httpx:
|
||||
enabled: true
|
||||
@@ -81,16 +79,16 @@ site_scan:
|
||||
# request-timeout: 10 # 单个请求超时秒数(默认 10)
|
||||
# retries: 2 # 请求失败重试次数
|
||||
|
||||
# ==================== 指纹识别 ====================
|
||||
# 在 site_scan 后串行执行,识别 WebSite 的技术栈
|
||||
fingerprint_detect:
|
||||
# ==================== 指纹识别 ====================
|
||||
# 在 site_scan 后串行执行,识别 WebSite 的技术栈
|
||||
tools:
|
||||
xingfinger:
|
||||
enabled: true
|
||||
fingerprint-libs: [ehole, goby, wappalyzer, fingers, fingerprinthub, arl] # 全部指纹库
|
||||
fingerprint-libs: [ehole, goby, wappalyzer, fingers, fingerprinthub, arl] # 默认启动全部指纹库
|
||||
|
||||
# ==================== 目录扫描 ====================
|
||||
directory_scan:
|
||||
# ==================== 目录扫描 ====================
|
||||
tools:
|
||||
ffuf:
|
||||
enabled: true
|
||||
@@ -103,8 +101,8 @@ directory_scan:
|
||||
match-codes: 200,201,301,302,401,403 # 匹配的 HTTP 状态码
|
||||
# rate: 0 # 每秒请求数(默认 0 不限制)
|
||||
|
||||
# ==================== URL 获取 ====================
|
||||
url_fetch:
|
||||
# ==================== URL 获取 ====================
|
||||
tools:
|
||||
waymore:
|
||||
enabled: true
|
||||
@@ -142,8 +140,8 @@ url_fetch:
|
||||
# request-timeout: 10 # 单个请求超时秒数(默认 10)
|
||||
# retries: 2 # 请求失败重试次数
|
||||
|
||||
# ==================== 漏洞扫描 ====================
|
||||
vuln_scan:
|
||||
# ==================== 漏洞扫描 ====================
|
||||
tools:
|
||||
dalfox_xss:
|
||||
enabled: true
|
||||
|
||||
@@ -256,7 +256,8 @@ def fingerprint_detect_flow(
|
||||
'url_count': int,
|
||||
'processed_records': int,
|
||||
'updated_count': int,
|
||||
'not_found_count': int,
|
||||
'created_count': int,
|
||||
'snapshot_count': int,
|
||||
'executed_tasks': list,
|
||||
'tool_stats': dict
|
||||
}
|
||||
@@ -303,6 +304,7 @@ def fingerprint_detect_flow(
|
||||
'processed_records': 0,
|
||||
'updated_count': 0,
|
||||
'created_count': 0,
|
||||
'snapshot_count': 0,
|
||||
'executed_tasks': ['export_urls_for_fingerprint'],
|
||||
'tool_stats': {
|
||||
'total': 0,
|
||||
@@ -340,6 +342,7 @@ def fingerprint_detect_flow(
|
||||
total_processed = sum(stats['result'].get('processed_records', 0) for stats in tool_stats.values())
|
||||
total_updated = sum(stats['result'].get('updated_count', 0) for stats in tool_stats.values())
|
||||
total_created = sum(stats['result'].get('created_count', 0) for stats in tool_stats.values())
|
||||
total_snapshots = sum(stats['result'].get('snapshot_count', 0) for stats in tool_stats.values())
|
||||
|
||||
successful_tools = [name for name in enabled_tools.keys()
|
||||
if name not in [f['tool'] for f in failed_tools]]
|
||||
@@ -354,6 +357,7 @@ def fingerprint_detect_flow(
|
||||
'processed_records': total_processed,
|
||||
'updated_count': total_updated,
|
||||
'created_count': total_created,
|
||||
'snapshot_count': total_snapshots,
|
||||
'executed_tasks': executed_tasks,
|
||||
'tool_stats': {
|
||||
'total': len(enabled_tools),
|
||||
|
||||
@@ -4,7 +4,6 @@ xingfinger 执行任务
|
||||
流式执行 xingfinger 命令并实时更新 tech 字段
|
||||
"""
|
||||
|
||||
import importlib
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
@@ -15,93 +14,97 @@ from django.db import connection
|
||||
from prefect import task
|
||||
|
||||
from apps.scan.utils import execute_stream
|
||||
from apps.asset.dtos.snapshot import WebsiteSnapshotDTO
|
||||
from apps.asset.repositories.snapshot import DjangoWebsiteSnapshotRepository
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# 数据源映射:source → (module_path, model_name, url_field)
|
||||
SOURCE_MODEL_MAP = {
|
||||
'website': ('apps.asset.models', 'WebSite', 'url'),
|
||||
# 以后扩展:
|
||||
# 'endpoint': ('apps.asset.models', 'Endpoint', 'url'),
|
||||
# 'directory': ('apps.asset.models', 'Directory', 'url'),
|
||||
}
|
||||
|
||||
|
||||
def _get_model_class(source: str):
|
||||
"""根据数据源类型获取 Model 类"""
|
||||
if source not in SOURCE_MODEL_MAP:
|
||||
raise ValueError(f"不支持的数据源: {source}")
|
||||
|
||||
module_path, model_name, _ = SOURCE_MODEL_MAP[source]
|
||||
module = importlib.import_module(module_path)
|
||||
return getattr(module, model_name)
|
||||
|
||||
|
||||
def parse_xingfinger_line(line: str) -> tuple[str, list[str]] | None:
|
||||
def parse_xingfinger_line(line: str) -> dict | None:
|
||||
"""
|
||||
解析 xingfinger 单行 JSON 输出
|
||||
|
||||
xingfinger 静默模式输出格式:
|
||||
{"url": "https://example.com", "cms": "WordPress,PHP,nginx", ...}
|
||||
xingfinger 输出格式:
|
||||
{"url": "...", "cms": "...", "server": "BWS/1.1", "status_code": 200, "length": 642831, "title": "..."}
|
||||
|
||||
Returns:
|
||||
tuple: (url, tech_list) 或 None(解析失败时)
|
||||
dict: 包含 url, techs, server, title, status_code, content_length 的字典
|
||||
None: 解析失败或 URL 为空时
|
||||
"""
|
||||
try:
|
||||
item = json.loads(line)
|
||||
url = item.get('url', '').strip()
|
||||
cms = item.get('cms', '')
|
||||
|
||||
if not url or not cms:
|
||||
if not url:
|
||||
return None
|
||||
|
||||
# cms 字段按逗号分割,去除空白
|
||||
techs = [t.strip() for t in cms.split(',') if t.strip()]
|
||||
cms = item.get('cms', '')
|
||||
techs = [t.strip() for t in cms.split(',') if t.strip()] if cms else []
|
||||
|
||||
return (url, techs) if techs else None
|
||||
return {
|
||||
'url': url,
|
||||
'techs': techs,
|
||||
'server': item.get('server', ''),
|
||||
'title': item.get('title', ''),
|
||||
'status_code': item.get('status_code'),
|
||||
'content_length': item.get('length'),
|
||||
}
|
||||
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
|
||||
def bulk_merge_tech_field(
|
||||
source: str,
|
||||
url_techs_map: dict[str, list[str]],
|
||||
def bulk_merge_website_fields(
|
||||
records: list[dict],
|
||||
target_id: int
|
||||
) -> dict:
|
||||
"""
|
||||
批量合并 tech 数组字段(PostgreSQL 原生 SQL)
|
||||
批量合并更新 WebSite 字段(PostgreSQL 原生 SQL)
|
||||
|
||||
合并策略:
|
||||
- tech:数组合并去重
|
||||
- title, webserver, status_code, content_length:只在原值为空/NULL 时更新
|
||||
|
||||
使用 PostgreSQL 原生 SQL 实现高效的数组合并去重操作。
|
||||
如果 URL 对应的记录不存在,会自动创建新记录。
|
||||
|
||||
Args:
|
||||
records: 解析后的记录列表,每个包含 {url, techs, server, title, status_code, content_length}
|
||||
target_id: 目标 ID
|
||||
|
||||
Returns:
|
||||
dict: {'updated_count': int, 'created_count': int}
|
||||
"""
|
||||
Model = _get_model_class(source)
|
||||
table_name = Model._meta.db_table
|
||||
from apps.asset.models import WebSite
|
||||
table_name = WebSite._meta.db_table
|
||||
|
||||
updated_count = 0
|
||||
created_count = 0
|
||||
|
||||
with connection.cursor() as cursor:
|
||||
for url, techs in url_techs_map.items():
|
||||
if not techs:
|
||||
continue
|
||||
for record in records:
|
||||
url = record['url']
|
||||
techs = record.get('techs', [])
|
||||
server = record.get('server', '') or ''
|
||||
title = record.get('title', '') or ''
|
||||
status_code = record.get('status_code')
|
||||
content_length = record.get('content_length')
|
||||
|
||||
# 先尝试更新(PostgreSQL 数组合并去重)
|
||||
sql = f"""
|
||||
# 先尝试更新(合并策略)
|
||||
update_sql = f"""
|
||||
UPDATE {table_name}
|
||||
SET tech = (
|
||||
SELECT ARRAY(SELECT DISTINCT unnest(
|
||||
SET
|
||||
tech = (SELECT ARRAY(SELECT DISTINCT unnest(
|
||||
COALESCE(tech, ARRAY[]::varchar[]) || %s::varchar[]
|
||||
))
|
||||
)
|
||||
))),
|
||||
title = CASE WHEN title = '' OR title IS NULL THEN %s ELSE title END,
|
||||
webserver = CASE WHEN webserver = '' OR webserver IS NULL THEN %s ELSE webserver END,
|
||||
status_code = CASE WHEN status_code IS NULL THEN %s ELSE status_code END,
|
||||
content_length = CASE WHEN content_length IS NULL THEN %s ELSE content_length END
|
||||
WHERE url = %s AND target_id = %s
|
||||
"""
|
||||
|
||||
cursor.execute(sql, [techs, url, target_id])
|
||||
cursor.execute(update_sql, [techs, title, server, status_code, content_length, url, target_id])
|
||||
|
||||
if cursor.rowcount > 0:
|
||||
updated_count += cursor.rowcount
|
||||
@@ -113,22 +116,27 @@ def bulk_merge_tech_field(
|
||||
host = parsed.hostname or ''
|
||||
|
||||
# 插入新记录(带冲突处理)
|
||||
# 显式传入所有 NOT NULL 字段的默认值
|
||||
insert_sql = f"""
|
||||
INSERT INTO {table_name} (target_id, url, host, location, title, webserver, body_preview, content_type, tech, created_at)
|
||||
VALUES (%s, %s, %s, '', '', '', '', '', %s::varchar[], NOW())
|
||||
INSERT INTO {table_name} (
|
||||
target_id, url, host, location, title, webserver,
|
||||
body_preview, content_type, tech, status_code, content_length,
|
||||
response_headers, created_at
|
||||
)
|
||||
VALUES (%s, %s, %s, '', %s, %s, '', '', %s::varchar[], %s, %s, '{{}}'::jsonb, NOW())
|
||||
ON CONFLICT (target_id, url) DO UPDATE SET
|
||||
tech = (
|
||||
SELECT ARRAY(SELECT DISTINCT unnest(
|
||||
COALESCE({table_name}.tech, ARRAY[]::varchar[]) || EXCLUDED.tech
|
||||
))
|
||||
)
|
||||
tech = (SELECT ARRAY(SELECT DISTINCT unnest(
|
||||
COALESCE({table_name}.tech, ARRAY[]::varchar[]) || EXCLUDED.tech
|
||||
))),
|
||||
title = CASE WHEN {table_name}.title = '' OR {table_name}.title IS NULL THEN EXCLUDED.title ELSE {table_name}.title END,
|
||||
webserver = CASE WHEN {table_name}.webserver = '' OR {table_name}.webserver IS NULL THEN EXCLUDED.webserver ELSE {table_name}.webserver END,
|
||||
status_code = CASE WHEN {table_name}.status_code IS NULL THEN EXCLUDED.status_code ELSE {table_name}.status_code END,
|
||||
content_length = CASE WHEN {table_name}.content_length IS NULL THEN EXCLUDED.content_length ELSE {table_name}.content_length END
|
||||
"""
|
||||
cursor.execute(insert_sql, [target_id, url, host, techs])
|
||||
cursor.execute(insert_sql, [target_id, url, host, title, server, techs, status_code, content_length])
|
||||
created_count += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("创建 %s 记录失败 (url=%s): %s", source, url, e)
|
||||
logger.warning("创建 WebSite 记录失败 (url=%s): %s", url, e)
|
||||
|
||||
return {
|
||||
'updated_count': updated_count,
|
||||
@@ -142,12 +150,12 @@ def _parse_xingfinger_stream_output(
|
||||
cwd: Optional[str] = None,
|
||||
timeout: Optional[int] = None,
|
||||
log_file: Optional[str] = None
|
||||
) -> Generator[tuple[str, list[str]], None, None]:
|
||||
) -> Generator[dict, None, None]:
|
||||
"""
|
||||
流式解析 xingfinger 命令输出
|
||||
|
||||
基于 execute_stream 实时处理 xingfinger 命令的 stdout,将每行 JSON 输出
|
||||
转换为 (url, tech_list) 格式
|
||||
转换为完整字段字典
|
||||
"""
|
||||
logger.info("开始流式解析 xingfinger 命令输出 - 命令: %s", cmd)
|
||||
|
||||
@@ -194,43 +202,46 @@ def run_xingfinger_and_stream_update_tech_task(
|
||||
batch_size: int = 100
|
||||
) -> dict:
|
||||
"""
|
||||
流式执行 xingfinger 命令并实时更新 tech 字段
|
||||
|
||||
根据 source 参数更新对应表的 tech 字段:
|
||||
- website → WebSite.tech
|
||||
- endpoint → Endpoint.tech(以后扩展)
|
||||
流式执行 xingfinger 命令,保存快照并合并更新资产表
|
||||
|
||||
处理流程:
|
||||
1. 流式执行 xingfinger 命令
|
||||
2. 实时解析 JSON 输出
|
||||
3. 累积到 batch_size 条后批量更新数据库
|
||||
4. 使用 PostgreSQL 原生 SQL 进行数组合并去重
|
||||
5. 如果记录不存在,自动创建
|
||||
2. 实时解析 JSON 输出(完整字段)
|
||||
3. 累积到 batch_size 条后批量处理:
|
||||
- 保存快照(WebsiteSnapshot)
|
||||
- 合并更新资产表(WebSite)
|
||||
|
||||
合并策略:
|
||||
- tech:数组合并去重
|
||||
- title, webserver, status_code, content_length:只在原值为空时更新
|
||||
|
||||
Returns:
|
||||
dict: {
|
||||
'processed_records': int,
|
||||
'updated_count': int,
|
||||
'created_count': int,
|
||||
'snapshot_count': int,
|
||||
'batch_count': int
|
||||
}
|
||||
"""
|
||||
logger.info(
|
||||
"开始执行 xingfinger 并更新 tech - target_id=%s, source=%s, timeout=%s秒",
|
||||
target_id, source, timeout
|
||||
"开始执行 xingfinger - scan_id=%s, target_id=%s, timeout=%s秒",
|
||||
scan_id, target_id, timeout
|
||||
)
|
||||
|
||||
data_generator = None
|
||||
snapshot_repo = DjangoWebsiteSnapshotRepository()
|
||||
|
||||
try:
|
||||
# 初始化统计
|
||||
processed_records = 0
|
||||
updated_count = 0
|
||||
created_count = 0
|
||||
snapshot_count = 0
|
||||
batch_count = 0
|
||||
|
||||
# 当前批次的 URL -> techs 映射
|
||||
url_techs_map = {}
|
||||
# 当前批次的记录列表
|
||||
batch_records = []
|
||||
|
||||
# 流式处理
|
||||
data_generator = _parse_xingfinger_stream_output(
|
||||
@@ -241,47 +252,43 @@ def run_xingfinger_and_stream_update_tech_task(
|
||||
log_file=log_file
|
||||
)
|
||||
|
||||
for url, techs in data_generator:
|
||||
for record in data_generator:
|
||||
processed_records += 1
|
||||
batch_records.append(record)
|
||||
|
||||
# 累积到 url_techs_map
|
||||
if url in url_techs_map:
|
||||
# 合并同一 URL 的多次识别结果
|
||||
url_techs_map[url].extend(techs)
|
||||
else:
|
||||
url_techs_map[url] = techs
|
||||
|
||||
# 达到批次大小,执行批量更新
|
||||
if len(url_techs_map) >= batch_size:
|
||||
# 达到批次大小,执行批量处理
|
||||
if len(batch_records) >= batch_size:
|
||||
batch_count += 1
|
||||
result = bulk_merge_tech_field(source, url_techs_map, target_id)
|
||||
updated_count += result['updated_count']
|
||||
created_count += result.get('created_count', 0)
|
||||
|
||||
logger.debug(
|
||||
"批次 %d 完成 - 更新: %d, 创建: %d",
|
||||
batch_count, result['updated_count'], result.get('created_count', 0)
|
||||
result = _process_batch(
|
||||
batch_records, scan_id, target_id, batch_count, snapshot_repo
|
||||
)
|
||||
updated_count += result['updated_count']
|
||||
created_count += result['created_count']
|
||||
snapshot_count += result['snapshot_count']
|
||||
|
||||
# 清空批次
|
||||
url_techs_map = {}
|
||||
batch_records = []
|
||||
|
||||
# 处理最后一批
|
||||
if url_techs_map:
|
||||
if batch_records:
|
||||
batch_count += 1
|
||||
result = bulk_merge_tech_field(source, url_techs_map, target_id)
|
||||
result = _process_batch(
|
||||
batch_records, scan_id, target_id, batch_count, snapshot_repo
|
||||
)
|
||||
updated_count += result['updated_count']
|
||||
created_count += result.get('created_count', 0)
|
||||
created_count += result['created_count']
|
||||
snapshot_count += result['snapshot_count']
|
||||
|
||||
logger.info(
|
||||
"✓ xingfinger 执行完成 - 处理记录: %d, 更新: %d, 创建: %d, 批次: %d",
|
||||
processed_records, updated_count, created_count, batch_count
|
||||
"✓ xingfinger 执行完成 - 处理: %d, 更新: %d, 创建: %d, 快照: %d, 批次: %d",
|
||||
processed_records, updated_count, created_count, snapshot_count, batch_count
|
||||
)
|
||||
|
||||
return {
|
||||
'processed_records': processed_records,
|
||||
'updated_count': updated_count,
|
||||
'created_count': created_count,
|
||||
'snapshot_count': snapshot_count,
|
||||
'batch_count': batch_count
|
||||
}
|
||||
|
||||
@@ -299,3 +306,67 @@ def run_xingfinger_and_stream_update_tech_task(
|
||||
data_generator.close()
|
||||
except Exception as e:
|
||||
logger.debug("关闭生成器时出错: %s", e)
|
||||
|
||||
|
||||
def _process_batch(
|
||||
records: list[dict],
|
||||
scan_id: int,
|
||||
target_id: int,
|
||||
batch_num: int,
|
||||
snapshot_repo: DjangoWebsiteSnapshotRepository
|
||||
) -> dict:
|
||||
"""
|
||||
处理一个批次的数据:保存快照 + 合并更新资产表
|
||||
|
||||
Args:
|
||||
records: 解析后的记录列表
|
||||
scan_id: 扫描任务 ID
|
||||
target_id: 目标 ID
|
||||
batch_num: 批次编号
|
||||
snapshot_repo: 快照仓库
|
||||
|
||||
Returns:
|
||||
dict: {'updated_count': int, 'created_count': int, 'snapshot_count': int}
|
||||
"""
|
||||
# 1. 构建快照 DTO 列表
|
||||
snapshot_dtos = []
|
||||
for record in records:
|
||||
# 从 URL 提取 host
|
||||
parsed = urlparse(record['url'])
|
||||
host = parsed.hostname or ''
|
||||
|
||||
dto = WebsiteSnapshotDTO(
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
url=record['url'],
|
||||
host=host,
|
||||
title=record.get('title', '') or '',
|
||||
status=record.get('status_code'),
|
||||
content_length=record.get('content_length'),
|
||||
web_server=record.get('server', '') or '',
|
||||
tech=record.get('techs', []),
|
||||
)
|
||||
snapshot_dtos.append(dto)
|
||||
|
||||
# 2. 保存快照
|
||||
snapshot_count = 0
|
||||
if snapshot_dtos:
|
||||
try:
|
||||
snapshot_repo.save_snapshots(snapshot_dtos)
|
||||
snapshot_count = len(snapshot_dtos)
|
||||
except Exception as e:
|
||||
logger.warning("批次 %d 保存快照失败: %s", batch_num, e)
|
||||
|
||||
# 3. 合并更新资产表
|
||||
merge_result = bulk_merge_website_fields(records, target_id)
|
||||
|
||||
logger.debug(
|
||||
"批次 %d 完成 - 更新: %d, 创建: %d, 快照: %d",
|
||||
batch_num, merge_result['updated_count'], merge_result['created_count'], snapshot_count
|
||||
)
|
||||
|
||||
return {
|
||||
'updated_count': merge_result['updated_count'],
|
||||
'created_count': merge_result['created_count'],
|
||||
'snapshot_count': snapshot_count
|
||||
}
|
||||
|
||||
@@ -134,6 +134,7 @@ class HttpxRecord:
|
||||
self.vhost = data.get('vhost')
|
||||
self.failed = data.get('failed', False)
|
||||
self.timestamp = data.get('timestamp')
|
||||
self.response_headers = data.get('header', {}) # 响应头(httpx 输出的 header 字段)
|
||||
|
||||
# 从 URL 中提取主机名
|
||||
self.host = self._extract_hostname()
|
||||
@@ -359,7 +360,8 @@ def _save_batch(
|
||||
tech=record.tech if isinstance(record.tech, list) else [],
|
||||
status=record.status_code,
|
||||
content_length=record.content_length,
|
||||
vhost=record.vhost
|
||||
vhost=record.vhost,
|
||||
response_headers=record.response_headers if record.response_headers else {},
|
||||
)
|
||||
|
||||
snapshot_items.append(snapshot_dto)
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
基于 execute_stream 的流式 URL 验证任务
|
||||
|
||||
主要功能:
|
||||
1. 实时执行 httpx 命令验证 URL 存活
|
||||
2. 流式处理命令输出,解析存活的 URL
|
||||
1. 实时执行 httpx 命令验证 URL
|
||||
2. 流式处理命令输出,解析 URL 信息
|
||||
3. 批量保存到数据库(Endpoint 表)
|
||||
4. 避免一次性加载所有 URL 到内存
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
- 使用 execute_stream 实时处理输出
|
||||
- 流式处理避免内存溢出
|
||||
- 批量操作减少数据库交互
|
||||
- 只保存存活的 URL(status 2xx/3xx)
|
||||
- 保存所有有效 URL(包括 4xx/5xx,便于安全分析)
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -73,7 +73,7 @@ def _parse_and_validate_line(line: str) -> Optional[dict]:
|
||||
Returns:
|
||||
Optional[dict]: 有效的 httpx 记录,或 None 如果验证失败
|
||||
|
||||
只返回存活的 URL(2xx/3xx 状态码)
|
||||
保存所有有效 URL(不再过滤状态码,安全扫描中 403/404/500 等也有分析价值)
|
||||
"""
|
||||
try:
|
||||
# 清理 NUL 字符后再解析 JSON
|
||||
@@ -99,24 +99,21 @@ def _parse_and_validate_line(line: str) -> Optional[dict]:
|
||||
logger.info("URL 为空,跳过 - 数据: %s", str(line_data)[:200])
|
||||
return None
|
||||
|
||||
# 只保存存活的 URL(2xx 或 3xx)
|
||||
if status_code and (200 <= status_code < 400):
|
||||
return {
|
||||
'url': _sanitize_string(url),
|
||||
'host': _sanitize_string(line_data.get('host', '')),
|
||||
'status_code': status_code,
|
||||
'title': _sanitize_string(line_data.get('title', '')),
|
||||
'content_length': line_data.get('content_length', 0),
|
||||
'content_type': _sanitize_string(line_data.get('content_type', '')),
|
||||
'webserver': _sanitize_string(line_data.get('webserver', '')),
|
||||
'location': _sanitize_string(line_data.get('location', '')),
|
||||
'tech': line_data.get('tech', []),
|
||||
'body_preview': _sanitize_string(line_data.get('body_preview', '')),
|
||||
'vhost': line_data.get('vhost', False),
|
||||
}
|
||||
else:
|
||||
logger.debug("URL 不存活(状态码: %s),跳过: %s", status_code, url)
|
||||
return None
|
||||
# 保存所有有效 URL(不再过滤状态码)
|
||||
return {
|
||||
'url': _sanitize_string(url),
|
||||
'host': _sanitize_string(line_data.get('host', '')),
|
||||
'status_code': status_code,
|
||||
'title': _sanitize_string(line_data.get('title', '')),
|
||||
'content_length': line_data.get('content_length', 0),
|
||||
'content_type': _sanitize_string(line_data.get('content_type', '')),
|
||||
'webserver': _sanitize_string(line_data.get('webserver', '')),
|
||||
'location': _sanitize_string(line_data.get('location', '')),
|
||||
'tech': line_data.get('tech', []),
|
||||
'body_preview': _sanitize_string(line_data.get('body_preview', '')),
|
||||
'vhost': line_data.get('vhost', False),
|
||||
'response_headers': line_data.get('header', {}),
|
||||
}
|
||||
|
||||
except Exception:
|
||||
logger.info("跳过无法解析的行: %s", line[:100] if line else 'empty')
|
||||
@@ -306,6 +303,7 @@ def _save_batch(
|
||||
vhost=record.get('vhost', False),
|
||||
matched_gf_patterns=[],
|
||||
target_id=target_id,
|
||||
response_headers=record.get('response_headers', {}),
|
||||
)
|
||||
snapshots.append(dto)
|
||||
except Exception as e:
|
||||
|
||||
@@ -41,6 +41,7 @@ python-dateutil==2.9.0
|
||||
pytz==2024.1
|
||||
validators==0.22.0
|
||||
PyYAML==6.0.1
|
||||
ruamel.yaml>=0.18.0 # 保留注释的 YAML 解析
|
||||
colorlog==6.8.2 # 彩色日志输出
|
||||
python-json-logger==2.0.7 # JSON 结构化日志
|
||||
Jinja2>=3.1.6 # 命令模板引擎
|
||||
|
||||
@@ -843,6 +843,18 @@ class TestDataGenerator:
|
||||
# 生成固定 245 长度的 URL
|
||||
url = generate_fixed_length_url(target_name, length=245, path_hint=f'website/{i:04d}')
|
||||
|
||||
# 生成模拟的响应头数据
|
||||
response_headers = {
|
||||
'server': random.choice(['nginx', 'Apache', 'cloudflare', 'Microsoft-IIS/10.0']),
|
||||
'content_type': 'text/html; charset=utf-8',
|
||||
'x_powered_by': random.choice(['PHP/8.2', 'ASP.NET', 'Express', None]),
|
||||
'x_frame_options': random.choice(['DENY', 'SAMEORIGIN', None]),
|
||||
'strict_transport_security': 'max-age=31536000; includeSubDomains' if random.choice([True, False]) else None,
|
||||
'set_cookie': f'session={random.randint(100000, 999999)}; HttpOnly; Secure' if random.choice([True, False]) else None,
|
||||
}
|
||||
# 移除 None 值
|
||||
response_headers = {k: v for k, v in response_headers.items() if v is not None}
|
||||
|
||||
batch_data.append((
|
||||
url, target_id, target_name, random.choice(titles),
|
||||
random.choice(webservers), random.choice(tech_stacks),
|
||||
@@ -850,7 +862,8 @@ class TestDataGenerator:
|
||||
random.randint(1000, 500000), 'text/html; charset=utf-8',
|
||||
f'https://{target_name}/login' if random.choice([True, False]) else '',
|
||||
random.choice(body_previews),
|
||||
random.choice([True, False, None])
|
||||
random.choice([True, False, None]),
|
||||
json.dumps(response_headers)
|
||||
))
|
||||
|
||||
# 批量插入
|
||||
@@ -860,11 +873,11 @@ class TestDataGenerator:
|
||||
INSERT INTO website (
|
||||
url, target_id, host, title, webserver, tech, status_code,
|
||||
content_length, content_type, location, body_preview, vhost,
|
||||
created_at
|
||||
response_headers, created_at
|
||||
) VALUES %s
|
||||
ON CONFLICT DO NOTHING
|
||||
RETURNING id
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
ids = [row[0] for row in cur.fetchall()]
|
||||
|
||||
print(f" ✓ 创建了 {len(batch_data)} 个网站\n")
|
||||
@@ -1017,6 +1030,18 @@ class TestDataGenerator:
|
||||
# 生成 10-20 个 tags (gf_patterns)
|
||||
tags = random.choice(gf_patterns)
|
||||
|
||||
# 生成模拟的响应头数据
|
||||
response_headers = {
|
||||
'server': random.choice(['nginx', 'gunicorn', 'uvicorn', 'Apache']),
|
||||
'content_type': 'application/json',
|
||||
'x_request_id': f'req_{random.randint(100000, 999999)}',
|
||||
'x_ratelimit_limit': str(random.choice([100, 1000, 5000])),
|
||||
'x_ratelimit_remaining': str(random.randint(0, 1000)),
|
||||
'cache_control': random.choice(['no-cache', 'max-age=3600', 'private', None]),
|
||||
}
|
||||
# 移除 None 值
|
||||
response_headers = {k: v for k, v in response_headers.items() if v is not None}
|
||||
|
||||
batch_data.append((
|
||||
url, target_id, target_name, title,
|
||||
random.choice(['nginx/1.24.0', 'gunicorn/21.2.0']),
|
||||
@@ -1024,7 +1049,8 @@ class TestDataGenerator:
|
||||
random.randint(100, 50000), 'application/json',
|
||||
tech_list,
|
||||
'', random.choice(body_previews),
|
||||
random.choice([True, False, None]), tags
|
||||
random.choice([True, False, None]), tags,
|
||||
json.dumps(response_headers)
|
||||
))
|
||||
count += 1
|
||||
|
||||
@@ -1034,10 +1060,10 @@ class TestDataGenerator:
|
||||
INSERT INTO endpoint (
|
||||
url, target_id, host, title, webserver, status_code, content_length,
|
||||
content_type, tech, location, body_preview, vhost, matched_gf_patterns,
|
||||
created_at
|
||||
response_headers, created_at
|
||||
) VALUES %s
|
||||
ON CONFLICT DO NOTHING
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
|
||||
print(f" ✓ 创建了 {count} 个端点\n")
|
||||
|
||||
@@ -1401,13 +1427,23 @@ class TestDataGenerator:
|
||||
# 生成固定 245 长度的 URL
|
||||
url = generate_fixed_length_url(target_name, length=245, path_hint=f'website-snap/{i:04d}')
|
||||
|
||||
# 生成模拟的响应头数据
|
||||
response_headers = {
|
||||
'server': random.choice(['nginx', 'Apache', 'cloudflare']),
|
||||
'content_type': 'text/html; charset=utf-8',
|
||||
'x_frame_options': random.choice(['DENY', 'SAMEORIGIN', None]),
|
||||
}
|
||||
# 移除 None 值
|
||||
response_headers = {k: v for k, v in response_headers.items() if v is not None}
|
||||
|
||||
batch_data.append((
|
||||
scan_id, url, target_name, random.choice(titles),
|
||||
random.choice(webservers), random.choice(tech_stacks),
|
||||
random.choice([200, 301, 403]),
|
||||
random.randint(1000, 50000), 'text/html; charset=utf-8',
|
||||
'', # location 字段
|
||||
'<!DOCTYPE html><html><head><title>Test</title></head><body>Content</body></html>'
|
||||
'<!DOCTYPE html><html><head><title>Test</title></head><body>Content</body></html>',
|
||||
json.dumps(response_headers)
|
||||
))
|
||||
count += 1
|
||||
|
||||
@@ -1416,10 +1452,11 @@ class TestDataGenerator:
|
||||
execute_values(cur, """
|
||||
INSERT INTO website_snapshot (
|
||||
scan_id, url, host, title, web_server, tech, status,
|
||||
content_length, content_type, location, body_preview, created_at
|
||||
content_length, content_type, location, body_preview,
|
||||
response_headers, created_at
|
||||
) VALUES %s
|
||||
ON CONFLICT DO NOTHING
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
|
||||
print(f" ✓ 创建了 {count} 个网站快照\n")
|
||||
|
||||
@@ -1498,6 +1535,13 @@ class TestDataGenerator:
|
||||
num_tags = random.randint(10, 20)
|
||||
tags = random.sample(all_tags, min(num_tags, len(all_tags)))
|
||||
|
||||
# 生成模拟的响应头数据
|
||||
response_headers = {
|
||||
'server': 'nginx/1.24.0',
|
||||
'content_type': 'application/json',
|
||||
'x_request_id': f'req_{random.randint(100000, 999999)}',
|
||||
}
|
||||
|
||||
batch_data.append((
|
||||
scan_id, url, target_name, title,
|
||||
random.choice([200, 201, 401, 403, 404]),
|
||||
@@ -1506,7 +1550,8 @@ class TestDataGenerator:
|
||||
'nginx/1.24.0',
|
||||
'application/json', tech_list,
|
||||
'{"status":"ok","data":{}}',
|
||||
tags
|
||||
tags,
|
||||
json.dumps(response_headers)
|
||||
))
|
||||
count += 1
|
||||
|
||||
@@ -1516,10 +1561,10 @@ class TestDataGenerator:
|
||||
INSERT INTO endpoint_snapshot (
|
||||
scan_id, url, host, title, status_code, content_length,
|
||||
location, webserver, content_type, tech, body_preview,
|
||||
matched_gf_patterns, created_at
|
||||
matched_gf_patterns, response_headers, created_at
|
||||
) VALUES %s
|
||||
ON CONFLICT DO NOTHING
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
|
||||
print(f" ✓ 创建了 {count} 个端点快照\n")
|
||||
|
||||
@@ -2543,9 +2588,10 @@ class MillionDataGenerator:
|
||||
if len(batch_data) >= batch_size:
|
||||
execute_values(cur, """
|
||||
INSERT INTO website (url, target_id, host, title, webserver, tech,
|
||||
status_code, content_length, content_type, location, body_preview, created_at)
|
||||
status_code, content_length, content_type, location, body_preview,
|
||||
vhost, response_headers, created_at)
|
||||
VALUES %s ON CONFLICT DO NOTHING
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NULL, '{}'::jsonb, NOW())")
|
||||
self.conn.commit()
|
||||
batch_data = []
|
||||
print(f" ✓ {count:,} / {target_count:,}")
|
||||
@@ -2555,9 +2601,10 @@ class MillionDataGenerator:
|
||||
if batch_data:
|
||||
execute_values(cur, """
|
||||
INSERT INTO website (url, target_id, host, title, webserver, tech,
|
||||
status_code, content_length, content_type, location, body_preview, created_at)
|
||||
status_code, content_length, content_type, location, body_preview,
|
||||
vhost, response_headers, created_at)
|
||||
VALUES %s ON CONFLICT DO NOTHING
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NULL, '{}'::jsonb, NOW())")
|
||||
self.conn.commit()
|
||||
|
||||
print(f" ✓ 创建了 {count:,} 个网站\n")
|
||||
@@ -2632,9 +2679,9 @@ class MillionDataGenerator:
|
||||
execute_values(cur, """
|
||||
INSERT INTO endpoint (url, target_id, host, title, webserver, status_code,
|
||||
content_length, content_type, tech, location, body_preview, vhost,
|
||||
matched_gf_patterns, created_at)
|
||||
matched_gf_patterns, response_headers, created_at)
|
||||
VALUES %s ON CONFLICT DO NOTHING
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, '{}'::jsonb, NOW())")
|
||||
self.conn.commit()
|
||||
batch_data = []
|
||||
print(f" ✓ {count:,} / {target_count:,}")
|
||||
@@ -2645,9 +2692,9 @@ class MillionDataGenerator:
|
||||
execute_values(cur, """
|
||||
INSERT INTO endpoint (url, target_id, host, title, webserver, status_code,
|
||||
content_length, content_type, tech, location, body_preview, vhost,
|
||||
matched_gf_patterns, created_at)
|
||||
matched_gf_patterns, response_headers, created_at)
|
||||
VALUES %s ON CONFLICT DO NOTHING
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
|
||||
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, '{}'::jsonb, NOW())")
|
||||
self.conn.commit()
|
||||
|
||||
print(f" ✓ 创建了 {count:,} 个端点\n")
|
||||
|
||||
@@ -8,7 +8,7 @@ export default function ScanHistoryDetailPage() {
|
||||
const router = useRouter()
|
||||
|
||||
useEffect(() => {
|
||||
router.replace(`/scan/history/${id}/subdomain/`)
|
||||
router.replace(`/scan/history/${id}/websites/`)
|
||||
}, [id, router])
|
||||
|
||||
return null
|
||||
|
||||
@@ -5,15 +5,15 @@ import { useEffect } from "react"
|
||||
|
||||
/**
|
||||
* Target detail page (compatible with old routes)
|
||||
* Automatically redirects to subdomain page
|
||||
* Automatically redirects to websites page
|
||||
*/
|
||||
export default function TargetDetailsPage() {
|
||||
const { id } = useParams<{ id: string }>()
|
||||
const router = useRouter()
|
||||
|
||||
useEffect(() => {
|
||||
// Redirect to subdomain page
|
||||
router.replace(`/target/${id}/subdomain/`)
|
||||
// Redirect to websites page
|
||||
router.replace(`/target/${id}/websites/`)
|
||||
}, [id, router])
|
||||
|
||||
return null
|
||||
|
||||
@@ -5,15 +5,15 @@ import { useEffect } from "react"
|
||||
|
||||
/**
|
||||
* Target detail default page
|
||||
* Automatically redirects to subdomain page
|
||||
* Automatically redirects to websites page
|
||||
*/
|
||||
export default function TargetDetailPage() {
|
||||
const { id } = useParams<{ id: string }>()
|
||||
const router = useRouter()
|
||||
|
||||
useEffect(() => {
|
||||
// Redirect to subdomain page
|
||||
router.replace(`/target/${id}/subdomain/`)
|
||||
// Redirect to websites page
|
||||
router.replace(`/target/${id}/websites/`)
|
||||
}, [id, router])
|
||||
|
||||
return null
|
||||
|
||||
@@ -12,6 +12,7 @@ import { ExpandableCell, ExpandableTagList } from "@/components/ui/data-table/ex
|
||||
export interface EndpointTranslations {
|
||||
columns: {
|
||||
url: string
|
||||
host: string
|
||||
title: string
|
||||
status: string
|
||||
contentLength: string
|
||||
@@ -22,6 +23,7 @@ export interface EndpointTranslations {
|
||||
bodyPreview: string
|
||||
vhost: string
|
||||
gfPatterns: string
|
||||
responseHeaders: string
|
||||
responseTime: string
|
||||
createdAt: string
|
||||
}
|
||||
@@ -112,6 +114,19 @@ export function createEndpointColumns({
|
||||
<ExpandableCell value={row.getValue("url")} />
|
||||
),
|
||||
},
|
||||
{
|
||||
accessorKey: "host",
|
||||
meta: { title: t.columns.host },
|
||||
header: ({ column }) => (
|
||||
<DataTableColumnHeader column={column} title={t.columns.host} />
|
||||
),
|
||||
size: 200,
|
||||
minSize: 100,
|
||||
maxSize: 300,
|
||||
cell: ({ row }) => (
|
||||
<ExpandableCell value={row.getValue("host")} />
|
||||
),
|
||||
},
|
||||
{
|
||||
accessorKey: "title",
|
||||
meta: { title: t.columns.title },
|
||||
@@ -262,6 +277,24 @@ export function createEndpointColumns({
|
||||
},
|
||||
enableSorting: false,
|
||||
},
|
||||
{
|
||||
accessorKey: "responseHeaders",
|
||||
meta: { title: t.columns.responseHeaders },
|
||||
header: ({ column }) => (
|
||||
<DataTableColumnHeader column={column} title={t.columns.responseHeaders} />
|
||||
),
|
||||
size: 250,
|
||||
minSize: 150,
|
||||
maxSize: 400,
|
||||
cell: ({ row }) => {
|
||||
const headers = row.getValue("responseHeaders") as Record<string, unknown> | null | undefined
|
||||
if (!headers || Object.keys(headers).length === 0) return <span className="text-muted-foreground text-sm">-</span>
|
||||
const formatted = Object.entries(headers)
|
||||
.map(([key, value]) => `${key}: ${value}`)
|
||||
.join("\n")
|
||||
return <ExpandableCell value={formatted} maxLines={3} variant="mono" />
|
||||
},
|
||||
},
|
||||
{
|
||||
accessorKey: "responseTime",
|
||||
meta: { title: t.columns.responseTime },
|
||||
|
||||
@@ -15,6 +15,7 @@ const ENDPOINT_FILTER_FIELDS: FilterField[] = [
|
||||
{ key: "title", label: "Title", description: "Page title" },
|
||||
{ key: "status", label: "Status", description: "HTTP status code" },
|
||||
{ key: "tech", label: "Tech", description: "Technologies" },
|
||||
{ key: "responseHeaders", label: "Headers", description: "Response headers" },
|
||||
]
|
||||
|
||||
// Endpoint page filter examples
|
||||
|
||||
@@ -62,6 +62,7 @@ export function EndpointsDetailView({
|
||||
const translations = useMemo(() => ({
|
||||
columns: {
|
||||
url: tColumns("common.url"),
|
||||
host: tColumns("endpoint.host"),
|
||||
title: tColumns("endpoint.title"),
|
||||
status: tColumns("common.status"),
|
||||
contentLength: tColumns("endpoint.contentLength"),
|
||||
@@ -72,6 +73,7 @@ export function EndpointsDetailView({
|
||||
bodyPreview: tColumns("endpoint.bodyPreview"),
|
||||
vhost: tColumns("endpoint.vhost"),
|
||||
gfPatterns: tColumns("endpoint.gfPatterns"),
|
||||
responseHeaders: tColumns("endpoint.responseHeaders"),
|
||||
responseTime: tColumns("endpoint.responseTime"),
|
||||
createdAt: tColumns("common.createdAt"),
|
||||
},
|
||||
|
||||
@@ -22,6 +22,7 @@ export interface WebsiteTranslations {
|
||||
contentType: string
|
||||
bodyPreview: string
|
||||
vhost: string
|
||||
responseHeaders: string
|
||||
createdAt: string
|
||||
}
|
||||
actions: {
|
||||
@@ -230,6 +231,24 @@ export function createWebSiteColumns({
|
||||
)
|
||||
},
|
||||
},
|
||||
{
|
||||
accessorKey: "responseHeaders",
|
||||
meta: { title: t.columns.responseHeaders },
|
||||
header: ({ column }) => (
|
||||
<DataTableColumnHeader column={column} title={t.columns.responseHeaders} />
|
||||
),
|
||||
size: 250,
|
||||
minSize: 150,
|
||||
maxSize: 400,
|
||||
cell: ({ row }) => {
|
||||
const headers = row.getValue("responseHeaders") as Record<string, unknown> | null
|
||||
if (!headers || Object.keys(headers).length === 0) return "-"
|
||||
const formatted = Object.entries(headers)
|
||||
.map(([key, value]) => `${key}: ${value}`)
|
||||
.join("\n")
|
||||
return <ExpandableCell value={formatted} maxLines={3} variant="mono" />
|
||||
},
|
||||
},
|
||||
{
|
||||
accessorKey: "createdAt",
|
||||
meta: { title: t.columns.createdAt },
|
||||
|
||||
@@ -16,6 +16,7 @@ const WEBSITE_FILTER_FIELDS: FilterField[] = [
|
||||
{ key: "title", label: "Title", description: "Page title" },
|
||||
{ key: "status", label: "Status", description: "HTTP status code" },
|
||||
{ key: "tech", label: "Tech", description: "Technologies" },
|
||||
{ key: "responseHeaders", label: "Headers", description: "Response headers" },
|
||||
]
|
||||
|
||||
// Website page filter examples
|
||||
|
||||
@@ -54,6 +54,7 @@ export function WebSitesView({
|
||||
contentType: tColumns("endpoint.contentType"),
|
||||
bodyPreview: tColumns("endpoint.bodyPreview"),
|
||||
vhost: tColumns("endpoint.vhost"),
|
||||
responseHeaders: tColumns("website.responseHeaders"),
|
||||
createdAt: tColumns("common.createdAt"),
|
||||
},
|
||||
actions: {
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
},
|
||||
"endpoint": {
|
||||
"title": "Title",
|
||||
"host": "Host",
|
||||
"contentLength": "Content Length",
|
||||
"location": "Location",
|
||||
"webServer": "Web Server",
|
||||
@@ -56,10 +57,12 @@
|
||||
"bodyPreview": "Body Preview",
|
||||
"vhost": "VHost",
|
||||
"gfPatterns": "GF Patterns",
|
||||
"responseHeaders": "Response Headers",
|
||||
"responseTime": "Response Time"
|
||||
},
|
||||
"website": {
|
||||
"host": "Host"
|
||||
"host": "Host",
|
||||
"responseHeaders": "Response Headers"
|
||||
},
|
||||
"directory": {
|
||||
"length": "Length",
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
},
|
||||
"endpoint": {
|
||||
"title": "Title",
|
||||
"host": "Host",
|
||||
"contentLength": "Content Length",
|
||||
"location": "Location",
|
||||
"webServer": "Web Server",
|
||||
@@ -56,10 +57,12 @@
|
||||
"bodyPreview": "Body Preview",
|
||||
"vhost": "VHost",
|
||||
"gfPatterns": "GF Patterns",
|
||||
"responseHeaders": "响应头",
|
||||
"responseTime": "Response Time"
|
||||
},
|
||||
"website": {
|
||||
"host": "Host"
|
||||
"host": "Host",
|
||||
"responseHeaders": "响应头"
|
||||
},
|
||||
"directory": {
|
||||
"length": "Length",
|
||||
|
||||
@@ -23,6 +23,7 @@ export interface Endpoint {
|
||||
bodyPreview?: string
|
||||
tech?: string[]
|
||||
vhost?: boolean | null
|
||||
responseHeaders?: Record<string, unknown>
|
||||
createdAt?: string
|
||||
|
||||
// Legacy domain association fields (may not exist in some APIs)
|
||||
|
||||
@@ -18,6 +18,7 @@ export interface WebSite {
|
||||
tech: string[]
|
||||
vhost: boolean | null
|
||||
subdomain: string
|
||||
responseHeaders?: Record<string, unknown>
|
||||
createdAt: string
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user