Compare commits

...

3 Commits

Author SHA1 Message Date
yyhuni
857ab737b5 feat(fingerprint): enhance xingfinger task with snapshot tracking and field merging
- Replace `not_found_count` with `created_count` and `snapshot_count` metrics in fingerprint detect flow
- Initialize and aggregate `snapshot_count` across tool statistics
- Refactor `parse_xingfinger_line()` to return structured dict with url, techs, server, title, status_code, and content_length
- Replace `bulk_merge_tech_field()` with `bulk_merge_website_fields()` to support merging multiple WebSite fields
- Implement smart merge strategy: arrays deduplicated, scalar fields only updated when empty/NULL
- Remove dynamic model loading via importlib in favor of direct WebSite model import
- Add WebsiteSnapshotDTO and DjangoWebsiteSnapshotRepository imports for snapshot handling
- Improve xingfinger output parsing to capture server, title, and HTTP metadata alongside technology detection
2026-01-01 12:40:49 +08:00
yyhuni
ee2d99edda feat(asset): add response headers tracking to endpoints and websites
- Add response_headers field to Endpoint and WebSite models as JSONField
- Add response_headers field to EndpointSnapshot and WebsiteSnapshot models
- Update all related DTOs to include response_headers with Dict[str, Any] type
- Add GIN indexes on response_headers fields for optimized JSON queries
- Update endpoint and website repositories to handle response_headers data
- Update serializers to include response_headers in API responses
- Update frontend components to display response headers in detail views
- Add response_headers to fingerprint detection and site scan tasks
- Update command templates and engine config to support header extraction
- Add internationalization strings for response headers in en.json and zh.json
- Update TypeScript types for endpoint and website to include response_headers
- Enhance scan history and target detail pages to show response header information
2026-01-01 12:25:22 +08:00
github-actions[bot]
db6ce16aca chore: bump version to v1.2.13-dev 2026-01-01 02:24:08 +00:00
34 changed files with 435 additions and 191 deletions

View File

@@ -1 +1 @@
v1.2.12-dev
v1.2.13-dev

View File

@@ -1,7 +1,7 @@
"""Endpoint DTO"""
from dataclasses import dataclass
from typing import Optional, List
from typing import Optional, List, Dict, Any
@dataclass
@@ -20,9 +20,12 @@ class EndpointDTO:
vhost: Optional[bool] = None
location: Optional[str] = None
matched_gf_patterns: Optional[List[str]] = None
response_headers: Optional[Dict[str, Any]] = None
def __post_init__(self):
if self.tech is None:
self.tech = []
if self.matched_gf_patterns is None:
self.matched_gf_patterns = []
if self.response_headers is None:
self.response_headers = {}

View File

@@ -1,7 +1,7 @@
"""WebSite DTO"""
from dataclasses import dataclass
from typing import List, Optional
from typing import List, Optional, Dict, Any
@dataclass
@@ -20,7 +20,10 @@ class WebSiteDTO:
body_preview: str = ''
vhost: Optional[bool] = None
created_at: str = None
response_headers: Dict[str, Any] = None
def __post_init__(self):
if self.tech is None:
self.tech = []
if self.response_headers is None:
self.response_headers = {}

View File

@@ -1,7 +1,7 @@
"""EndpointSnapshot DTO"""
from dataclasses import dataclass
from typing import List, Optional
from typing import List, Optional, Dict, Any
@dataclass
@@ -26,12 +26,15 @@ class EndpointSnapshotDTO:
vhost: Optional[bool] = None
matched_gf_patterns: List[str] = None
target_id: Optional[int] = None # 冗余字段,用于同步到资产表
response_headers: Dict[str, Any] = None
def __post_init__(self):
if self.tech is None:
self.tech = []
if self.matched_gf_patterns is None:
self.matched_gf_patterns = []
if self.response_headers is None:
self.response_headers = {}
def to_asset_dto(self):
"""
@@ -58,5 +61,6 @@ class EndpointSnapshotDTO:
tech=self.tech if self.tech else [],
vhost=self.vhost,
location=self.location,
matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else []
matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else [],
response_headers=self.response_headers if self.response_headers else {},
)

View File

@@ -1,7 +1,7 @@
"""WebsiteSnapshot DTO"""
from dataclasses import dataclass
from typing import List, Optional
from typing import List, Optional, Dict, Any
@dataclass
@@ -25,10 +25,13 @@ class WebsiteSnapshotDTO:
tech: List[str] = None
body_preview: str = ''
vhost: Optional[bool] = None
response_headers: Dict[str, Any] = None
def __post_init__(self):
if self.tech is None:
self.tech = []
if self.response_headers is None:
self.response_headers = {}
def to_asset_dto(self):
"""
@@ -51,5 +54,6 @@ class WebsiteSnapshotDTO:
content_type=self.content_type,
tech=self.tech if self.tech else [],
body_preview=self.body_preview,
vhost=self.vhost
vhost=self.vhost,
response_headers=self.response_headers if self.response_headers else {},
)

View File

@@ -124,6 +124,11 @@ class Endpoint(models.Model):
default=list,
help_text='匹配的GF模式列表用于识别敏感端点如api, debug, config等'
)
response_headers = models.JSONField(
blank=True,
default=dict,
help_text='HTTP响应头JSON格式'
)
class Meta:
db_table = 'endpoint'
@@ -138,6 +143,7 @@ class Endpoint(models.Model):
models.Index(fields=['status_code']), # 状态码索引,优化筛选
models.Index(fields=['title']), # title索引优化智能过滤搜索
GinIndex(fields=['tech']), # GIN索引优化 tech 数组字段的 __contains 查询
GinIndex(fields=['response_headers']), # GIN索引优化 response_headers JSON 字段查询
]
constraints = [
# 普通唯一约束url + target 组合唯一
@@ -221,6 +227,11 @@ class WebSite(models.Model):
blank=True,
help_text='是否支持虚拟主机'
)
response_headers = models.JSONField(
blank=True,
default=dict,
help_text='HTTP响应头JSON格式'
)
class Meta:
db_table = 'website'
@@ -235,6 +246,7 @@ class WebSite(models.Model):
models.Index(fields=['title']), # title索引优化智能过滤搜索
models.Index(fields=['status_code']), # 状态码索引,优化智能过滤搜索
GinIndex(fields=['tech']), # GIN索引优化 tech 数组字段的 __contains 查询
GinIndex(fields=['response_headers']), # GIN索引优化 response_headers JSON 字段查询
]
constraints = [
# 普通唯一约束url + target 组合唯一

View File

@@ -1,5 +1,6 @@
from django.db import models
from django.contrib.postgres.fields import ArrayField
from django.contrib.postgres.indexes import GinIndex
from django.core.validators import MinValueValidator, MaxValueValidator
@@ -70,6 +71,11 @@ class WebsiteSnapshot(models.Model):
)
body_preview = models.TextField(blank=True, default='', help_text='响应体预览')
vhost = models.BooleanField(null=True, blank=True, help_text='虚拟主机标志')
response_headers = models.JSONField(
blank=True,
default=dict,
help_text='HTTP响应头JSON格式'
)
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
class Meta:
@@ -83,6 +89,8 @@ class WebsiteSnapshot(models.Model):
models.Index(fields=['host']), # host索引优化根据主机名查询
models.Index(fields=['title']), # title索引优化标题搜索
models.Index(fields=['-created_at']),
GinIndex(fields=['tech']), # GIN索引优化数组字段查询
GinIndex(fields=['response_headers']), # GIN索引优化 JSON 字段查询
]
constraints = [
# 唯一约束同一次扫描中同一个URL只能记录一次
@@ -259,6 +267,11 @@ class EndpointSnapshot(models.Model):
default=list,
help_text='匹配的GF模式列表'
)
response_headers = models.JSONField(
blank=True,
default=dict,
help_text='HTTP响应头JSON格式'
)
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
class Meta:
@@ -274,6 +287,8 @@ class EndpointSnapshot(models.Model):
models.Index(fields=['status_code']), # 状态码索引,优化筛选
models.Index(fields=['webserver']), # webserver索引优化服务器搜索
models.Index(fields=['-created_at']),
GinIndex(fields=['tech']), # GIN索引优化数组字段查询
GinIndex(fields=['response_headers']), # GIN索引优化 JSON 字段查询
]
constraints = [
# 唯一约束同一次扫描中同一个URL只能记录一次

View File

@@ -53,7 +53,8 @@ class DjangoEndpointRepository:
tech=item.tech if item.tech else [],
vhost=item.vhost,
location=item.location or '',
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
response_headers=item.response_headers if item.response_headers else {}
)
for item in unique_items
]
@@ -66,7 +67,7 @@ class DjangoEndpointRepository:
update_fields=[
'host', 'title', 'status_code', 'content_length',
'webserver', 'body_preview', 'content_type', 'tech',
'vhost', 'location', 'matched_gf_patterns'
'vhost', 'location', 'matched_gf_patterns', 'response_headers'
],
batch_size=1000
)
@@ -143,7 +144,8 @@ class DjangoEndpointRepository:
tech=item.tech if item.tech else [],
vhost=item.vhost,
location=item.location or '',
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
response_headers=item.response_headers if item.response_headers else {}
)
for item in unique_items
]

View File

@@ -54,7 +54,8 @@ class DjangoWebSiteRepository:
tech=item.tech if item.tech else [],
status_code=item.status_code,
content_length=item.content_length,
vhost=item.vhost
vhost=item.vhost,
response_headers=item.response_headers if item.response_headers else {}
)
for item in unique_items
]
@@ -67,7 +68,7 @@ class DjangoWebSiteRepository:
update_fields=[
'host', 'location', 'title', 'webserver',
'body_preview', 'content_type', 'tech',
'status_code', 'content_length', 'vhost'
'status_code', 'content_length', 'vhost', 'response_headers'
],
batch_size=1000
)
@@ -137,7 +138,8 @@ class DjangoWebSiteRepository:
tech=item.tech if item.tech else [],
status_code=item.status_code,
content_length=item.content_length,
vhost=item.vhost
vhost=item.vhost,
response_headers=item.response_headers if item.response_headers else {}
)
for item in unique_items
]

View File

@@ -44,6 +44,7 @@ class DjangoEndpointSnapshotRepository:
snapshots.append(EndpointSnapshot(
scan_id=item.scan_id,
url=item.url,
host=item.host if item.host else '',
title=item.title,
status_code=item.status_code,
content_length=item.content_length,
@@ -53,7 +54,8 @@ class DjangoEndpointSnapshotRepository:
tech=item.tech if item.tech else [],
body_preview=item.body_preview,
vhost=item.vhost,
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
response_headers=item.response_headers if item.response_headers else {}
))
# 批量创建(忽略冲突,基于唯一约束去重)

View File

@@ -53,7 +53,8 @@ class DjangoWebsiteSnapshotRepository:
content_type=item.content_type,
tech=item.tech if item.tech else [],
body_preview=item.body_preview,
vhost=item.vhost
vhost=item.vhost,
response_headers=item.response_headers if item.response_headers else {}
))
# 批量创建(忽略冲突,基于唯一约束去重)

View File

@@ -67,9 +67,10 @@ class SubdomainListSerializer(serializers.ModelSerializer):
class WebSiteSerializer(serializers.ModelSerializer):
"""站点序列化器"""
"""站点序列化器(目标详情页)"""
subdomain = serializers.CharField(source='subdomain.name', allow_blank=True, default='')
responseHeaders = serializers.JSONField(source='response_headers', read_only=True) # HTTP响应头
class Meta:
model = WebSite
@@ -86,6 +87,7 @@ class WebSiteSerializer(serializers.ModelSerializer):
'body_preview',
'tech',
'vhost',
'responseHeaders', # HTTP响应头
'subdomain',
'created_at',
]
@@ -140,6 +142,7 @@ class EndpointListSerializer(serializers.ModelSerializer):
source='matched_gf_patterns',
read_only=True,
)
responseHeaders = serializers.JSONField(source='response_headers', read_only=True) # HTTP响应头
class Meta:
model = Endpoint
@@ -155,6 +158,7 @@ class EndpointListSerializer(serializers.ModelSerializer):
'body_preview',
'tech',
'vhost',
'responseHeaders', # HTTP响应头
'gfPatterns',
'created_at',
]
@@ -215,6 +219,7 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
subdomain_name = serializers.CharField(source='subdomain.name', read_only=True)
webserver = serializers.CharField(source='web_server', read_only=True) # 映射字段名
status_code = serializers.IntegerField(source='status', read_only=True) # 映射字段名
responseHeaders = serializers.JSONField(source='response_headers', read_only=True) # HTTP响应头
class Meta:
model = WebsiteSnapshot
@@ -230,6 +235,7 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
'body_preview',
'tech',
'vhost',
'responseHeaders', # HTTP响应头
'subdomain_name',
'created_at',
]
@@ -264,6 +270,7 @@ class EndpointSnapshotSerializer(serializers.ModelSerializer):
source='matched_gf_patterns',
read_only=True,
)
responseHeaders = serializers.JSONField(source='response_headers', read_only=True) # HTTP响应头
class Meta:
model = EndpointSnapshot
@@ -280,6 +287,7 @@ class EndpointSnapshotSerializer(serializers.ModelSerializer):
'body_preview',
'tech',
'vhost',
'responseHeaders', # HTTP响应头
'gfPatterns',
'created_at',
]

View File

@@ -15,9 +15,10 @@
"""
from django.core.management.base import BaseCommand
from io import StringIO
from pathlib import Path
import yaml
from ruamel.yaml import YAML
from apps.engine.models import ScanEngine
@@ -44,10 +45,12 @@ class Command(BaseCommand):
with open(config_path, 'r', encoding='utf-8') as f:
default_config = f.read()
# 解析 YAML 为字典,后续用于生成子引擎配置
# 使用 ruamel.yaml 解析,保留注释
yaml_parser = YAML()
yaml_parser.preserve_quotes = True
try:
config_dict = yaml.safe_load(default_config) or {}
except yaml.YAMLError as e:
config_dict = yaml_parser.load(default_config) or {}
except Exception as e:
self.stdout.write(self.style.ERROR(f'引擎配置 YAML 解析失败: {e}'))
return
@@ -83,16 +86,13 @@ class Command(BaseCommand):
if scan_type != 'subdomain_discovery' and 'tools' not in scan_cfg:
continue
# 构造只包含当前扫描类型配置的 YAML
# 构造只包含当前扫描类型配置的 YAML(保留注释)
single_config = {scan_type: scan_cfg}
try:
single_yaml = yaml.safe_dump(
single_config,
sort_keys=False,
allow_unicode=True,
default_flow_style=None,
)
except yaml.YAMLError as e:
stream = StringIO()
yaml_parser.dump(single_config, stream)
single_yaml = stream.getvalue()
except Exception as e:
self.stdout.write(self.style.ERROR(f'生成子引擎 {scan_type} 配置失败: {e}'))
continue

View File

@@ -99,7 +99,8 @@ SITE_SCAN_COMMANDS = {
'-status-code -content-type -content-length '
'-location -title -server -body-preview '
'-tech-detect -cdn -vhost '
'-random-agent -no-color -json'
'-include-response-header '
'-random-agent -no-color -json -silent'
),
'optional': {
'threads': '-threads {threads}',
@@ -171,7 +172,8 @@ URL_FETCH_COMMANDS = {
'-status-code -content-type -content-length '
'-location -title -server -body-preview '
'-tech-detect -cdn -vhost '
'-random-agent -no-color -json'
'-include-response-header '
'-random-agent -no-color -json -silent'
),
'optional': {
'threads': '-threads {threads}',

View File

@@ -4,14 +4,12 @@
# 必需参数enabled是否启用
# 可选参数timeout超时秒数默认 auto 自动计算)
# ==================== 子域名发现 ====================
#
# Stage 1: 被动收集(并行) - 必选,至少启用一个工具
# Stage 2: 字典爆破(可选) - 使用字典暴力枚举子域名
# Stage 3: 变异生成 + 验证(可选) - 基于已发现域名生成变异,流式验证存活
# Stage 4: DNS 存活验证(可选) - 验证所有候选域名是否能解析
#
subdomain_discovery:
# ==================== 子域名发现 ====================
# Stage 1: 被动收集(并行) - 必选,至少启用一个工具
# Stage 2: 字典爆破(可选) - 使用字典暴力枚举子域名
# Stage 3: 变异生成 + 验证(可选) - 基于已发现域名生成变异,流式验证存活
# Stage 4: DNS 存活验证(可选) - 验证所有候选域名是否能解析
# === Stage 1: 被动收集工具(并行执行)===
passive_tools:
subfinder:
@@ -55,8 +53,8 @@ subdomain_discovery:
subdomain_resolve:
timeout: auto # 自动根据候选子域数量计算
# ==================== 端口扫描 ====================
port_scan:
# ==================== 端口扫描 ====================
tools:
naabu_active:
enabled: true
@@ -70,8 +68,8 @@ port_scan:
enabled: true
# timeout: auto # 被动扫描通常较快
# ==================== 站点扫描 ====================
site_scan:
# ==================== 站点扫描 ====================
tools:
httpx:
enabled: true
@@ -81,16 +79,16 @@ site_scan:
# request-timeout: 10 # 单个请求超时秒数(默认 10
# retries: 2 # 请求失败重试次数
# ==================== 指纹识别 ====================
# 在 site_scan 后串行执行,识别 WebSite 的技术栈
fingerprint_detect:
# ==================== 指纹识别 ====================
# 在 site_scan 后串行执行,识别 WebSite 的技术栈
tools:
xingfinger:
enabled: true
fingerprint-libs: [ehole, goby, wappalyzer, fingers, fingerprinthub, arl] # 全部指纹库
fingerprint-libs: [ehole, goby, wappalyzer, fingers, fingerprinthub, arl] # 默认启动全部指纹库
# ==================== 目录扫描 ====================
directory_scan:
# ==================== 目录扫描 ====================
tools:
ffuf:
enabled: true
@@ -103,8 +101,8 @@ directory_scan:
match-codes: 200,201,301,302,401,403 # 匹配的 HTTP 状态码
# rate: 0 # 每秒请求数(默认 0 不限制)
# ==================== URL 获取 ====================
url_fetch:
# ==================== URL 获取 ====================
tools:
waymore:
enabled: true
@@ -142,8 +140,8 @@ url_fetch:
# request-timeout: 10 # 单个请求超时秒数(默认 10
# retries: 2 # 请求失败重试次数
# ==================== 漏洞扫描 ====================
vuln_scan:
# ==================== 漏洞扫描 ====================
tools:
dalfox_xss:
enabled: true

View File

@@ -256,7 +256,8 @@ def fingerprint_detect_flow(
'url_count': int,
'processed_records': int,
'updated_count': int,
'not_found_count': int,
'created_count': int,
'snapshot_count': int,
'executed_tasks': list,
'tool_stats': dict
}
@@ -303,6 +304,7 @@ def fingerprint_detect_flow(
'processed_records': 0,
'updated_count': 0,
'created_count': 0,
'snapshot_count': 0,
'executed_tasks': ['export_urls_for_fingerprint'],
'tool_stats': {
'total': 0,
@@ -340,6 +342,7 @@ def fingerprint_detect_flow(
total_processed = sum(stats['result'].get('processed_records', 0) for stats in tool_stats.values())
total_updated = sum(stats['result'].get('updated_count', 0) for stats in tool_stats.values())
total_created = sum(stats['result'].get('created_count', 0) for stats in tool_stats.values())
total_snapshots = sum(stats['result'].get('snapshot_count', 0) for stats in tool_stats.values())
successful_tools = [name for name in enabled_tools.keys()
if name not in [f['tool'] for f in failed_tools]]
@@ -354,6 +357,7 @@ def fingerprint_detect_flow(
'processed_records': total_processed,
'updated_count': total_updated,
'created_count': total_created,
'snapshot_count': total_snapshots,
'executed_tasks': executed_tasks,
'tool_stats': {
'total': len(enabled_tools),

View File

@@ -4,7 +4,6 @@ xingfinger 执行任务
流式执行 xingfinger 命令并实时更新 tech 字段
"""
import importlib
import json
import logging
import subprocess
@@ -15,93 +14,97 @@ from django.db import connection
from prefect import task
from apps.scan.utils import execute_stream
from apps.asset.dtos.snapshot import WebsiteSnapshotDTO
from apps.asset.repositories.snapshot import DjangoWebsiteSnapshotRepository
logger = logging.getLogger(__name__)
# 数据源映射source → (module_path, model_name, url_field)
SOURCE_MODEL_MAP = {
'website': ('apps.asset.models', 'WebSite', 'url'),
# 以后扩展:
# 'endpoint': ('apps.asset.models', 'Endpoint', 'url'),
# 'directory': ('apps.asset.models', 'Directory', 'url'),
}
def _get_model_class(source: str):
"""根据数据源类型获取 Model 类"""
if source not in SOURCE_MODEL_MAP:
raise ValueError(f"不支持的数据源: {source}")
module_path, model_name, _ = SOURCE_MODEL_MAP[source]
module = importlib.import_module(module_path)
return getattr(module, model_name)
def parse_xingfinger_line(line: str) -> tuple[str, list[str]] | None:
def parse_xingfinger_line(line: str) -> dict | None:
"""
解析 xingfinger 单行 JSON 输出
xingfinger 静默模式输出格式:
{"url": "https://example.com", "cms": "WordPress,PHP,nginx", ...}
xingfinger 输出格式:
{"url": "...", "cms": "...", "server": "BWS/1.1", "status_code": 200, "length": 642831, "title": "..."}
Returns:
tuple: (url, tech_list) 或 None解析失败时
dict: 包含 url, techs, server, title, status_code, content_length 的字典
None: 解析失败或 URL 为空时
"""
try:
item = json.loads(line)
url = item.get('url', '').strip()
cms = item.get('cms', '')
if not url or not cms:
if not url:
return None
# cms 字段按逗号分割,去除空白
techs = [t.strip() for t in cms.split(',') if t.strip()]
cms = item.get('cms', '')
techs = [t.strip() for t in cms.split(',') if t.strip()] if cms else []
return (url, techs) if techs else None
return {
'url': url,
'techs': techs,
'server': item.get('server', ''),
'title': item.get('title', ''),
'status_code': item.get('status_code'),
'content_length': item.get('length'),
}
except json.JSONDecodeError:
return None
def bulk_merge_tech_field(
source: str,
url_techs_map: dict[str, list[str]],
def bulk_merge_website_fields(
records: list[dict],
target_id: int
) -> dict:
"""
批量合并 tech 数组字段PostgreSQL 原生 SQL
批量合并更新 WebSite 字段PostgreSQL 原生 SQL
合并策略:
- tech数组合并去重
- title, webserver, status_code, content_length只在原值为空/NULL 时更新
使用 PostgreSQL 原生 SQL 实现高效的数组合并去重操作。
如果 URL 对应的记录不存在,会自动创建新记录。
Args:
records: 解析后的记录列表,每个包含 {url, techs, server, title, status_code, content_length}
target_id: 目标 ID
Returns:
dict: {'updated_count': int, 'created_count': int}
"""
Model = _get_model_class(source)
table_name = Model._meta.db_table
from apps.asset.models import WebSite
table_name = WebSite._meta.db_table
updated_count = 0
created_count = 0
with connection.cursor() as cursor:
for url, techs in url_techs_map.items():
if not techs:
continue
for record in records:
url = record['url']
techs = record.get('techs', [])
server = record.get('server', '') or ''
title = record.get('title', '') or ''
status_code = record.get('status_code')
content_length = record.get('content_length')
# 先尝试更新(PostgreSQL 数组合并去重
sql = f"""
# 先尝试更新(合并策略
update_sql = f"""
UPDATE {table_name}
SET tech = (
SELECT ARRAY(SELECT DISTINCT unnest(
SET
tech = (SELECT ARRAY(SELECT DISTINCT unnest(
COALESCE(tech, ARRAY[]::varchar[]) || %s::varchar[]
))
)
))),
title = CASE WHEN title = '' OR title IS NULL THEN %s ELSE title END,
webserver = CASE WHEN webserver = '' OR webserver IS NULL THEN %s ELSE webserver END,
status_code = CASE WHEN status_code IS NULL THEN %s ELSE status_code END,
content_length = CASE WHEN content_length IS NULL THEN %s ELSE content_length END
WHERE url = %s AND target_id = %s
"""
cursor.execute(sql, [techs, url, target_id])
cursor.execute(update_sql, [techs, title, server, status_code, content_length, url, target_id])
if cursor.rowcount > 0:
updated_count += cursor.rowcount
@@ -113,22 +116,27 @@ def bulk_merge_tech_field(
host = parsed.hostname or ''
# 插入新记录(带冲突处理)
# 显式传入所有 NOT NULL 字段的默认值
insert_sql = f"""
INSERT INTO {table_name} (target_id, url, host, location, title, webserver, body_preview, content_type, tech, created_at)
VALUES (%s, %s, %s, '', '', '', '', '', %s::varchar[], NOW())
INSERT INTO {table_name} (
target_id, url, host, location, title, webserver,
body_preview, content_type, tech, status_code, content_length,
response_headers, created_at
)
VALUES (%s, %s, %s, '', %s, %s, '', '', %s::varchar[], %s, %s, '{{}}'::jsonb, NOW())
ON CONFLICT (target_id, url) DO UPDATE SET
tech = (
SELECT ARRAY(SELECT DISTINCT unnest(
COALESCE({table_name}.tech, ARRAY[]::varchar[]) || EXCLUDED.tech
))
)
tech = (SELECT ARRAY(SELECT DISTINCT unnest(
COALESCE({table_name}.tech, ARRAY[]::varchar[]) || EXCLUDED.tech
))),
title = CASE WHEN {table_name}.title = '' OR {table_name}.title IS NULL THEN EXCLUDED.title ELSE {table_name}.title END,
webserver = CASE WHEN {table_name}.webserver = '' OR {table_name}.webserver IS NULL THEN EXCLUDED.webserver ELSE {table_name}.webserver END,
status_code = CASE WHEN {table_name}.status_code IS NULL THEN EXCLUDED.status_code ELSE {table_name}.status_code END,
content_length = CASE WHEN {table_name}.content_length IS NULL THEN EXCLUDED.content_length ELSE {table_name}.content_length END
"""
cursor.execute(insert_sql, [target_id, url, host, techs])
cursor.execute(insert_sql, [target_id, url, host, title, server, techs, status_code, content_length])
created_count += 1
except Exception as e:
logger.warning("创建 %s 记录失败 (url=%s): %s", source, url, e)
logger.warning("创建 WebSite 记录失败 (url=%s): %s", url, e)
return {
'updated_count': updated_count,
@@ -142,12 +150,12 @@ def _parse_xingfinger_stream_output(
cwd: Optional[str] = None,
timeout: Optional[int] = None,
log_file: Optional[str] = None
) -> Generator[tuple[str, list[str]], None, None]:
) -> Generator[dict, None, None]:
"""
流式解析 xingfinger 命令输出
基于 execute_stream 实时处理 xingfinger 命令的 stdout将每行 JSON 输出
转换为 (url, tech_list) 格式
转换为完整字段字典
"""
logger.info("开始流式解析 xingfinger 命令输出 - 命令: %s", cmd)
@@ -194,43 +202,46 @@ def run_xingfinger_and_stream_update_tech_task(
batch_size: int = 100
) -> dict:
"""
流式执行 xingfinger 命令并实时更新 tech 字段
根据 source 参数更新对应表的 tech 字段:
- website → WebSite.tech
- endpoint → Endpoint.tech以后扩展
流式执行 xingfinger 命令,保存快照并合并更新资产表
处理流程:
1. 流式执行 xingfinger 命令
2. 实时解析 JSON 输出
3. 累积到 batch_size 条后批量更新数据库
4. 使用 PostgreSQL 原生 SQL 进行数组合并去重
5. 如果记录不存在,自动创建
2. 实时解析 JSON 输出(完整字段)
3. 累积到 batch_size 条后批量处理:
- 保存快照WebsiteSnapshot
- 合并更新资产表WebSite
合并策略:
- tech数组合并去重
- title, webserver, status_code, content_length只在原值为空时更新
Returns:
dict: {
'processed_records': int,
'updated_count': int,
'created_count': int,
'snapshot_count': int,
'batch_count': int
}
"""
logger.info(
"开始执行 xingfinger 并更新 tech - target_id=%s, source=%s, timeout=%s",
target_id, source, timeout
"开始执行 xingfinger - scan_id=%s, target_id=%s, timeout=%s",
scan_id, target_id, timeout
)
data_generator = None
snapshot_repo = DjangoWebsiteSnapshotRepository()
try:
# 初始化统计
processed_records = 0
updated_count = 0
created_count = 0
snapshot_count = 0
batch_count = 0
# 当前批次的 URL -> techs 映射
url_techs_map = {}
# 当前批次的记录列表
batch_records = []
# 流式处理
data_generator = _parse_xingfinger_stream_output(
@@ -241,47 +252,43 @@ def run_xingfinger_and_stream_update_tech_task(
log_file=log_file
)
for url, techs in data_generator:
for record in data_generator:
processed_records += 1
batch_records.append(record)
# 累积到 url_techs_map
if url in url_techs_map:
# 合并同一 URL 的多次识别结果
url_techs_map[url].extend(techs)
else:
url_techs_map[url] = techs
# 达到批次大小,执行批量更新
if len(url_techs_map) >= batch_size:
# 达到批次大小,执行批量处理
if len(batch_records) >= batch_size:
batch_count += 1
result = bulk_merge_tech_field(source, url_techs_map, target_id)
updated_count += result['updated_count']
created_count += result.get('created_count', 0)
logger.debug(
"批次 %d 完成 - 更新: %d, 创建: %d",
batch_count, result['updated_count'], result.get('created_count', 0)
result = _process_batch(
batch_records, scan_id, target_id, batch_count, snapshot_repo
)
updated_count += result['updated_count']
created_count += result['created_count']
snapshot_count += result['snapshot_count']
# 清空批次
url_techs_map = {}
batch_records = []
# 处理最后一批
if url_techs_map:
if batch_records:
batch_count += 1
result = bulk_merge_tech_field(source, url_techs_map, target_id)
result = _process_batch(
batch_records, scan_id, target_id, batch_count, snapshot_repo
)
updated_count += result['updated_count']
created_count += result.get('created_count', 0)
created_count += result['created_count']
snapshot_count += result['snapshot_count']
logger.info(
"✓ xingfinger 执行完成 - 处理记录: %d, 更新: %d, 创建: %d, 批次: %d",
processed_records, updated_count, created_count, batch_count
"✓ xingfinger 执行完成 - 处理: %d, 更新: %d, 创建: %d, 快照: %d, 批次: %d",
processed_records, updated_count, created_count, snapshot_count, batch_count
)
return {
'processed_records': processed_records,
'updated_count': updated_count,
'created_count': created_count,
'snapshot_count': snapshot_count,
'batch_count': batch_count
}
@@ -299,3 +306,67 @@ def run_xingfinger_and_stream_update_tech_task(
data_generator.close()
except Exception as e:
logger.debug("关闭生成器时出错: %s", e)
def _process_batch(
records: list[dict],
scan_id: int,
target_id: int,
batch_num: int,
snapshot_repo: DjangoWebsiteSnapshotRepository
) -> dict:
"""
处理一个批次的数据:保存快照 + 合并更新资产表
Args:
records: 解析后的记录列表
scan_id: 扫描任务 ID
target_id: 目标 ID
batch_num: 批次编号
snapshot_repo: 快照仓库
Returns:
dict: {'updated_count': int, 'created_count': int, 'snapshot_count': int}
"""
# 1. 构建快照 DTO 列表
snapshot_dtos = []
for record in records:
# 从 URL 提取 host
parsed = urlparse(record['url'])
host = parsed.hostname or ''
dto = WebsiteSnapshotDTO(
scan_id=scan_id,
target_id=target_id,
url=record['url'],
host=host,
title=record.get('title', '') or '',
status=record.get('status_code'),
content_length=record.get('content_length'),
web_server=record.get('server', '') or '',
tech=record.get('techs', []),
)
snapshot_dtos.append(dto)
# 2. 保存快照
snapshot_count = 0
if snapshot_dtos:
try:
snapshot_repo.save_snapshots(snapshot_dtos)
snapshot_count = len(snapshot_dtos)
except Exception as e:
logger.warning("批次 %d 保存快照失败: %s", batch_num, e)
# 3. 合并更新资产表
merge_result = bulk_merge_website_fields(records, target_id)
logger.debug(
"批次 %d 完成 - 更新: %d, 创建: %d, 快照: %d",
batch_num, merge_result['updated_count'], merge_result['created_count'], snapshot_count
)
return {
'updated_count': merge_result['updated_count'],
'created_count': merge_result['created_count'],
'snapshot_count': snapshot_count
}

View File

@@ -134,6 +134,7 @@ class HttpxRecord:
self.vhost = data.get('vhost')
self.failed = data.get('failed', False)
self.timestamp = data.get('timestamp')
self.response_headers = data.get('header', {}) # 响应头httpx 输出的 header 字段)
# 从 URL 中提取主机名
self.host = self._extract_hostname()
@@ -359,7 +360,8 @@ def _save_batch(
tech=record.tech if isinstance(record.tech, list) else [],
status=record.status_code,
content_length=record.content_length,
vhost=record.vhost
vhost=record.vhost,
response_headers=record.response_headers if record.response_headers else {},
)
snapshot_items.append(snapshot_dto)

View File

@@ -2,8 +2,8 @@
基于 execute_stream 的流式 URL 验证任务
主要功能:
1. 实时执行 httpx 命令验证 URL 存活
2. 流式处理命令输出,解析存活的 URL
1. 实时执行 httpx 命令验证 URL
2. 流式处理命令输出,解析 URL 信息
3. 批量保存到数据库Endpoint 表)
4. 避免一次性加载所有 URL 到内存
@@ -14,7 +14,7 @@
- 使用 execute_stream 实时处理输出
- 流式处理避免内存溢出
- 批量操作减少数据库交互
- 保存存活的 URLstatus 2xx/3xx
- 保存所有有效 URL包括 4xx/5xx,便于安全分析
"""
import logging
@@ -73,7 +73,7 @@ def _parse_and_validate_line(line: str) -> Optional[dict]:
Returns:
Optional[dict]: 有效的 httpx 记录,或 None 如果验证失败
只返回存活的 URL2xx/3xx 状态码
保存所有有效 URL不再过滤状态码安全扫描中 403/404/500 等也有分析价值
"""
try:
# 清理 NUL 字符后再解析 JSON
@@ -99,24 +99,21 @@ def _parse_and_validate_line(line: str) -> Optional[dict]:
logger.info("URL 为空,跳过 - 数据: %s", str(line_data)[:200])
return None
# 保存存活的 URL2xx 或 3xx
if status_code and (200 <= status_code < 400):
return {
'url': _sanitize_string(url),
'host': _sanitize_string(line_data.get('host', '')),
'status_code': status_code,
'title': _sanitize_string(line_data.get('title', '')),
'content_length': line_data.get('content_length', 0),
'content_type': _sanitize_string(line_data.get('content_type', '')),
'webserver': _sanitize_string(line_data.get('webserver', '')),
'location': _sanitize_string(line_data.get('location', '')),
'tech': line_data.get('tech', []),
'body_preview': _sanitize_string(line_data.get('body_preview', '')),
'vhost': line_data.get('vhost', False),
}
else:
logger.debug("URL 不存活(状态码: %s),跳过: %s", status_code, url)
return None
# 保存所有有效 URL不再过滤状态码
return {
'url': _sanitize_string(url),
'host': _sanitize_string(line_data.get('host', '')),
'status_code': status_code,
'title': _sanitize_string(line_data.get('title', '')),
'content_length': line_data.get('content_length', 0),
'content_type': _sanitize_string(line_data.get('content_type', '')),
'webserver': _sanitize_string(line_data.get('webserver', '')),
'location': _sanitize_string(line_data.get('location', '')),
'tech': line_data.get('tech', []),
'body_preview': _sanitize_string(line_data.get('body_preview', '')),
'vhost': line_data.get('vhost', False),
'response_headers': line_data.get('header', {}),
}
except Exception:
logger.info("跳过无法解析的行: %s", line[:100] if line else 'empty')
@@ -306,6 +303,7 @@ def _save_batch(
vhost=record.get('vhost', False),
matched_gf_patterns=[],
target_id=target_id,
response_headers=record.get('response_headers', {}),
)
snapshots.append(dto)
except Exception as e:

View File

@@ -41,6 +41,7 @@ python-dateutil==2.9.0
pytz==2024.1
validators==0.22.0
PyYAML==6.0.1
ruamel.yaml>=0.18.0 # 保留注释的 YAML 解析
colorlog==6.8.2 # 彩色日志输出
python-json-logger==2.0.7 # JSON 结构化日志
Jinja2>=3.1.6 # 命令模板引擎

View File

@@ -843,6 +843,18 @@ class TestDataGenerator:
# 生成固定 245 长度的 URL
url = generate_fixed_length_url(target_name, length=245, path_hint=f'website/{i:04d}')
# 生成模拟的响应头数据
response_headers = {
'server': random.choice(['nginx', 'Apache', 'cloudflare', 'Microsoft-IIS/10.0']),
'content_type': 'text/html; charset=utf-8',
'x_powered_by': random.choice(['PHP/8.2', 'ASP.NET', 'Express', None]),
'x_frame_options': random.choice(['DENY', 'SAMEORIGIN', None]),
'strict_transport_security': 'max-age=31536000; includeSubDomains' if random.choice([True, False]) else None,
'set_cookie': f'session={random.randint(100000, 999999)}; HttpOnly; Secure' if random.choice([True, False]) else None,
}
# 移除 None 值
response_headers = {k: v for k, v in response_headers.items() if v is not None}
batch_data.append((
url, target_id, target_name, random.choice(titles),
random.choice(webservers), random.choice(tech_stacks),
@@ -850,7 +862,8 @@ class TestDataGenerator:
random.randint(1000, 500000), 'text/html; charset=utf-8',
f'https://{target_name}/login' if random.choice([True, False]) else '',
random.choice(body_previews),
random.choice([True, False, None])
random.choice([True, False, None]),
json.dumps(response_headers)
))
# 批量插入
@@ -860,11 +873,11 @@ class TestDataGenerator:
INSERT INTO website (
url, target_id, host, title, webserver, tech, status_code,
content_length, content_type, location, body_preview, vhost,
created_at
response_headers, created_at
) VALUES %s
ON CONFLICT DO NOTHING
RETURNING id
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
ids = [row[0] for row in cur.fetchall()]
print(f" ✓ 创建了 {len(batch_data)} 个网站\n")
@@ -1017,6 +1030,18 @@ class TestDataGenerator:
# 生成 10-20 个 tags (gf_patterns)
tags = random.choice(gf_patterns)
# 生成模拟的响应头数据
response_headers = {
'server': random.choice(['nginx', 'gunicorn', 'uvicorn', 'Apache']),
'content_type': 'application/json',
'x_request_id': f'req_{random.randint(100000, 999999)}',
'x_ratelimit_limit': str(random.choice([100, 1000, 5000])),
'x_ratelimit_remaining': str(random.randint(0, 1000)),
'cache_control': random.choice(['no-cache', 'max-age=3600', 'private', None]),
}
# 移除 None 值
response_headers = {k: v for k, v in response_headers.items() if v is not None}
batch_data.append((
url, target_id, target_name, title,
random.choice(['nginx/1.24.0', 'gunicorn/21.2.0']),
@@ -1024,7 +1049,8 @@ class TestDataGenerator:
random.randint(100, 50000), 'application/json',
tech_list,
'', random.choice(body_previews),
random.choice([True, False, None]), tags
random.choice([True, False, None]), tags,
json.dumps(response_headers)
))
count += 1
@@ -1034,10 +1060,10 @@ class TestDataGenerator:
INSERT INTO endpoint (
url, target_id, host, title, webserver, status_code, content_length,
content_type, tech, location, body_preview, vhost, matched_gf_patterns,
created_at
response_headers, created_at
) VALUES %s
ON CONFLICT DO NOTHING
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
print(f" ✓ 创建了 {count} 个端点\n")
@@ -1401,13 +1427,23 @@ class TestDataGenerator:
# 生成固定 245 长度的 URL
url = generate_fixed_length_url(target_name, length=245, path_hint=f'website-snap/{i:04d}')
# 生成模拟的响应头数据
response_headers = {
'server': random.choice(['nginx', 'Apache', 'cloudflare']),
'content_type': 'text/html; charset=utf-8',
'x_frame_options': random.choice(['DENY', 'SAMEORIGIN', None]),
}
# 移除 None 值
response_headers = {k: v for k, v in response_headers.items() if v is not None}
batch_data.append((
scan_id, url, target_name, random.choice(titles),
random.choice(webservers), random.choice(tech_stacks),
random.choice([200, 301, 403]),
random.randint(1000, 50000), 'text/html; charset=utf-8',
'', # location 字段
'<!DOCTYPE html><html><head><title>Test</title></head><body>Content</body></html>'
'<!DOCTYPE html><html><head><title>Test</title></head><body>Content</body></html>',
json.dumps(response_headers)
))
count += 1
@@ -1416,10 +1452,11 @@ class TestDataGenerator:
execute_values(cur, """
INSERT INTO website_snapshot (
scan_id, url, host, title, web_server, tech, status,
content_length, content_type, location, body_preview, created_at
content_length, content_type, location, body_preview,
response_headers, created_at
) VALUES %s
ON CONFLICT DO NOTHING
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
print(f" ✓ 创建了 {count} 个网站快照\n")
@@ -1498,6 +1535,13 @@ class TestDataGenerator:
num_tags = random.randint(10, 20)
tags = random.sample(all_tags, min(num_tags, len(all_tags)))
# 生成模拟的响应头数据
response_headers = {
'server': 'nginx/1.24.0',
'content_type': 'application/json',
'x_request_id': f'req_{random.randint(100000, 999999)}',
}
batch_data.append((
scan_id, url, target_name, title,
random.choice([200, 201, 401, 403, 404]),
@@ -1506,7 +1550,8 @@ class TestDataGenerator:
'nginx/1.24.0',
'application/json', tech_list,
'{"status":"ok","data":{}}',
tags
tags,
json.dumps(response_headers)
))
count += 1
@@ -1516,10 +1561,10 @@ class TestDataGenerator:
INSERT INTO endpoint_snapshot (
scan_id, url, host, title, status_code, content_length,
location, webserver, content_type, tech, body_preview,
matched_gf_patterns, created_at
matched_gf_patterns, response_headers, created_at
) VALUES %s
ON CONFLICT DO NOTHING
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
print(f" ✓ 创建了 {count} 个端点快照\n")
@@ -2543,9 +2588,10 @@ class MillionDataGenerator:
if len(batch_data) >= batch_size:
execute_values(cur, """
INSERT INTO website (url, target_id, host, title, webserver, tech,
status_code, content_length, content_type, location, body_preview, created_at)
status_code, content_length, content_type, location, body_preview,
vhost, response_headers, created_at)
VALUES %s ON CONFLICT DO NOTHING
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NULL, '{}'::jsonb, NOW())")
self.conn.commit()
batch_data = []
print(f"{count:,} / {target_count:,}")
@@ -2555,9 +2601,10 @@ class MillionDataGenerator:
if batch_data:
execute_values(cur, """
INSERT INTO website (url, target_id, host, title, webserver, tech,
status_code, content_length, content_type, location, body_preview, created_at)
status_code, content_length, content_type, location, body_preview,
vhost, response_headers, created_at)
VALUES %s ON CONFLICT DO NOTHING
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NULL, '{}'::jsonb, NOW())")
self.conn.commit()
print(f" ✓ 创建了 {count:,} 个网站\n")
@@ -2632,9 +2679,9 @@ class MillionDataGenerator:
execute_values(cur, """
INSERT INTO endpoint (url, target_id, host, title, webserver, status_code,
content_length, content_type, tech, location, body_preview, vhost,
matched_gf_patterns, created_at)
matched_gf_patterns, response_headers, created_at)
VALUES %s ON CONFLICT DO NOTHING
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, '{}'::jsonb, NOW())")
self.conn.commit()
batch_data = []
print(f"{count:,} / {target_count:,}")
@@ -2645,9 +2692,9 @@ class MillionDataGenerator:
execute_values(cur, """
INSERT INTO endpoint (url, target_id, host, title, webserver, status_code,
content_length, content_type, tech, location, body_preview, vhost,
matched_gf_patterns, created_at)
matched_gf_patterns, response_headers, created_at)
VALUES %s ON CONFLICT DO NOTHING
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
""", batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, '{}'::jsonb, NOW())")
self.conn.commit()
print(f" ✓ 创建了 {count:,} 个端点\n")

View File

@@ -8,7 +8,7 @@ export default function ScanHistoryDetailPage() {
const router = useRouter()
useEffect(() => {
router.replace(`/scan/history/${id}/subdomain/`)
router.replace(`/scan/history/${id}/websites/`)
}, [id, router])
return null

View File

@@ -5,15 +5,15 @@ import { useEffect } from "react"
/**
* Target detail page (compatible with old routes)
* Automatically redirects to subdomain page
* Automatically redirects to websites page
*/
export default function TargetDetailsPage() {
const { id } = useParams<{ id: string }>()
const router = useRouter()
useEffect(() => {
// Redirect to subdomain page
router.replace(`/target/${id}/subdomain/`)
// Redirect to websites page
router.replace(`/target/${id}/websites/`)
}, [id, router])
return null

View File

@@ -5,15 +5,15 @@ import { useEffect } from "react"
/**
* Target detail default page
* Automatically redirects to subdomain page
* Automatically redirects to websites page
*/
export default function TargetDetailPage() {
const { id } = useParams<{ id: string }>()
const router = useRouter()
useEffect(() => {
// Redirect to subdomain page
router.replace(`/target/${id}/subdomain/`)
// Redirect to websites page
router.replace(`/target/${id}/websites/`)
}, [id, router])
return null

View File

@@ -12,6 +12,7 @@ import { ExpandableCell, ExpandableTagList } from "@/components/ui/data-table/ex
export interface EndpointTranslations {
columns: {
url: string
host: string
title: string
status: string
contentLength: string
@@ -22,6 +23,7 @@ export interface EndpointTranslations {
bodyPreview: string
vhost: string
gfPatterns: string
responseHeaders: string
responseTime: string
createdAt: string
}
@@ -112,6 +114,19 @@ export function createEndpointColumns({
<ExpandableCell value={row.getValue("url")} />
),
},
{
accessorKey: "host",
meta: { title: t.columns.host },
header: ({ column }) => (
<DataTableColumnHeader column={column} title={t.columns.host} />
),
size: 200,
minSize: 100,
maxSize: 300,
cell: ({ row }) => (
<ExpandableCell value={row.getValue("host")} />
),
},
{
accessorKey: "title",
meta: { title: t.columns.title },
@@ -262,6 +277,24 @@ export function createEndpointColumns({
},
enableSorting: false,
},
{
accessorKey: "responseHeaders",
meta: { title: t.columns.responseHeaders },
header: ({ column }) => (
<DataTableColumnHeader column={column} title={t.columns.responseHeaders} />
),
size: 250,
minSize: 150,
maxSize: 400,
cell: ({ row }) => {
const headers = row.getValue("responseHeaders") as Record<string, unknown> | null | undefined
if (!headers || Object.keys(headers).length === 0) return <span className="text-muted-foreground text-sm">-</span>
const formatted = Object.entries(headers)
.map(([key, value]) => `${key}: ${value}`)
.join("\n")
return <ExpandableCell value={formatted} maxLines={3} variant="mono" />
},
},
{
accessorKey: "responseTime",
meta: { title: t.columns.responseTime },

View File

@@ -15,6 +15,7 @@ const ENDPOINT_FILTER_FIELDS: FilterField[] = [
{ key: "title", label: "Title", description: "Page title" },
{ key: "status", label: "Status", description: "HTTP status code" },
{ key: "tech", label: "Tech", description: "Technologies" },
{ key: "responseHeaders", label: "Headers", description: "Response headers" },
]
// Endpoint page filter examples

View File

@@ -62,6 +62,7 @@ export function EndpointsDetailView({
const translations = useMemo(() => ({
columns: {
url: tColumns("common.url"),
host: tColumns("endpoint.host"),
title: tColumns("endpoint.title"),
status: tColumns("common.status"),
contentLength: tColumns("endpoint.contentLength"),
@@ -72,6 +73,7 @@ export function EndpointsDetailView({
bodyPreview: tColumns("endpoint.bodyPreview"),
vhost: tColumns("endpoint.vhost"),
gfPatterns: tColumns("endpoint.gfPatterns"),
responseHeaders: tColumns("endpoint.responseHeaders"),
responseTime: tColumns("endpoint.responseTime"),
createdAt: tColumns("common.createdAt"),
},

View File

@@ -22,6 +22,7 @@ export interface WebsiteTranslations {
contentType: string
bodyPreview: string
vhost: string
responseHeaders: string
createdAt: string
}
actions: {
@@ -230,6 +231,24 @@ export function createWebSiteColumns({
)
},
},
{
accessorKey: "responseHeaders",
meta: { title: t.columns.responseHeaders },
header: ({ column }) => (
<DataTableColumnHeader column={column} title={t.columns.responseHeaders} />
),
size: 250,
minSize: 150,
maxSize: 400,
cell: ({ row }) => {
const headers = row.getValue("responseHeaders") as Record<string, unknown> | null
if (!headers || Object.keys(headers).length === 0) return "-"
const formatted = Object.entries(headers)
.map(([key, value]) => `${key}: ${value}`)
.join("\n")
return <ExpandableCell value={formatted} maxLines={3} variant="mono" />
},
},
{
accessorKey: "createdAt",
meta: { title: t.columns.createdAt },

View File

@@ -16,6 +16,7 @@ const WEBSITE_FILTER_FIELDS: FilterField[] = [
{ key: "title", label: "Title", description: "Page title" },
{ key: "status", label: "Status", description: "HTTP status code" },
{ key: "tech", label: "Tech", description: "Technologies" },
{ key: "responseHeaders", label: "Headers", description: "Response headers" },
]
// Website page filter examples

View File

@@ -54,6 +54,7 @@ export function WebSitesView({
contentType: tColumns("endpoint.contentType"),
bodyPreview: tColumns("endpoint.bodyPreview"),
vhost: tColumns("endpoint.vhost"),
responseHeaders: tColumns("website.responseHeaders"),
createdAt: tColumns("common.createdAt"),
},
actions: {

View File

@@ -48,6 +48,7 @@
},
"endpoint": {
"title": "Title",
"host": "Host",
"contentLength": "Content Length",
"location": "Location",
"webServer": "Web Server",
@@ -56,10 +57,12 @@
"bodyPreview": "Body Preview",
"vhost": "VHost",
"gfPatterns": "GF Patterns",
"responseHeaders": "Response Headers",
"responseTime": "Response Time"
},
"website": {
"host": "Host"
"host": "Host",
"responseHeaders": "Response Headers"
},
"directory": {
"length": "Length",

View File

@@ -48,6 +48,7 @@
},
"endpoint": {
"title": "Title",
"host": "Host",
"contentLength": "Content Length",
"location": "Location",
"webServer": "Web Server",
@@ -56,10 +57,12 @@
"bodyPreview": "Body Preview",
"vhost": "VHost",
"gfPatterns": "GF Patterns",
"responseHeaders": "响应头",
"responseTime": "Response Time"
},
"website": {
"host": "Host"
"host": "Host",
"responseHeaders": "响应头"
},
"directory": {
"length": "Length",

View File

@@ -23,6 +23,7 @@ export interface Endpoint {
bodyPreview?: string
tech?: string[]
vhost?: boolean | null
responseHeaders?: Record<string, unknown>
createdAt?: string
// Legacy domain association fields (may not exist in some APIs)

View File

@@ -18,6 +18,7 @@ export interface WebSite {
tech: string[]
vhost: boolean | null
subdomain: string
responseHeaders?: Record<string, unknown>
createdAt: string
}