feat(fingerprint): enhance xingfinger task with snapshot tracking and field merging

- Replace `not_found_count` with `created_count` and `snapshot_count` metrics in fingerprint detect flow - Initialize and aggregate `snapshot_count` across tool statistics - Refactor `parse_xingfinger_line()` to return structured dict with url, techs, server, title, status_code, and content_length - Replace `bulk_merge_tech_field()` with `bulk_merge_website_fields()` to support merging multiple WebSite fields - Implement smart merge strategy: arrays deduplicated, scalar fields only updated when empty/NULL - Remove dynamic model loading via importlib in favor of direct WebSite model import - Add WebsiteSnapshotDTO and DjangoWebsiteSnapshotRepository imports for snapshot handling - Improve xingfinger output parsing to capture server, title, and HTTP metadata alongside technology detection
feat(asset): add response headers tracking to endpoints and websites
2026-01-31 19:53:11 +08:00 · 2026-01-01 12:40:49 +08:00 · 2026-01-01 12:25:22 +08:00 · 2026-01-01 02:24:08 +00:00
34 changed files with 435 additions and 191 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-v1.2.12-dev
+v1.2.13-dev
--- a/backend/apps/asset/dtos/asset/endpoint_dto.py
+++ b/backend/apps/asset/dtos/asset/endpoint_dto.py
@@ -1,7 +1,7 @@
 """Endpoint DTO"""

 from dataclasses import dataclass
-from typing import Optional, List
+from typing import Optional, List, Dict, Any


@dataclass
@@ -20,9 +20,12 @@ class EndpointDTO:
    vhost: Optional[bool] = None
    location: Optional[str] = None
    matched_gf_patterns: Optional[List[str]] = None
+    response_headers: Optional[Dict[str, Any]] = None
    
    def __post_init__(self):
        if self.tech is None:
            self.tech = []
        if self.matched_gf_patterns is None:
            self.matched_gf_patterns = []
+        if self.response_headers is None:
+            self.response_headers = {}
--- a/backend/apps/asset/dtos/asset/website_dto.py
+++ b/backend/apps/asset/dtos/asset/website_dto.py
@@ -1,7 +1,7 @@
 """WebSite DTO"""

 from dataclasses import dataclass
-from typing import List, Optional
+from typing import List, Optional, Dict, Any


@dataclass
@@ -20,7 +20,10 @@ class WebSiteDTO:
    body_preview: str = ''
    vhost: Optional[bool] = None
    created_at: str = None
+    response_headers: Dict[str, Any] = None
    
    def __post_init__(self):
        if self.tech is None:
            self.tech = []
+        if self.response_headers is None:
+            self.response_headers = {}
--- a/backend/apps/asset/dtos/snapshot/endpoint_snapshot_dto.py
+++ b/backend/apps/asset/dtos/snapshot/endpoint_snapshot_dto.py
@@ -1,7 +1,7 @@
 """EndpointSnapshot DTO"""

 from dataclasses import dataclass
-from typing import List, Optional
+from typing import List, Optional, Dict, Any


@dataclass
@@ -26,12 +26,15 @@ class EndpointSnapshotDTO:
    vhost: Optional[bool] = None
    matched_gf_patterns: List[str] = None
    target_id: Optional[int] = None  # 冗余字段，用于同步到资产表
+    response_headers: Dict[str, Any] = None
    
    def __post_init__(self):
        if self.tech is None:
            self.tech = []
        if self.matched_gf_patterns is None:
            self.matched_gf_patterns = []
+        if self.response_headers is None:
+            self.response_headers = {}
    
    def to_asset_dto(self):
        """
@@ -58,5 +61,6 @@ class EndpointSnapshotDTO:
            tech=self.tech if self.tech else [],
            vhost=self.vhost,
            location=self.location,
-            matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else []
+            matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else [],
+            response_headers=self.response_headers if self.response_headers else {},
        )
--- a/backend/apps/asset/dtos/snapshot/website_snapshot_dto.py
+++ b/backend/apps/asset/dtos/snapshot/website_snapshot_dto.py
@@ -1,7 +1,7 @@
 """WebsiteSnapshot DTO"""

 from dataclasses import dataclass
-from typing import List, Optional
+from typing import List, Optional, Dict, Any


@dataclass
@@ -25,10 +25,13 @@ class WebsiteSnapshotDTO:
    tech: List[str] = None
    body_preview: str = ''
    vhost: Optional[bool] = None
+    response_headers: Dict[str, Any] = None
    
    def __post_init__(self):
        if self.tech is None:
            self.tech = []
+        if self.response_headers is None:
+            self.response_headers = {}
    
    def to_asset_dto(self):
        """
@@ -51,5 +54,6 @@ class WebsiteSnapshotDTO:
            content_type=self.content_type,
            tech=self.tech if self.tech else [],
            body_preview=self.body_preview,
-            vhost=self.vhost
+            vhost=self.vhost,
+            response_headers=self.response_headers if self.response_headers else {},
        )
--- a/backend/apps/asset/models/asset_models.py
+++ b/backend/apps/asset/models/asset_models.py
@@ -124,6 +124,11 @@ class Endpoint(models.Model):
        default=list,
        help_text='匹配的GF模式列表，用于识别敏感端点（如api, debug, config等）'
    )
+    response_headers = models.JSONField(
+        blank=True,
+        default=dict,
+        help_text='HTTP响应头（JSON格式）'
+    )

    class Meta:
        db_table = 'endpoint'
@@ -138,6 +143,7 @@ class Endpoint(models.Model):
            models.Index(fields=['status_code']),  # 状态码索引，优化筛选
            models.Index(fields=['title']),        # title索引，优化智能过滤搜索
            GinIndex(fields=['tech']),             # GIN索引，优化 tech 数组字段的 __contains 查询
+            GinIndex(fields=['response_headers']),  # GIN索引，优化 response_headers JSON 字段查询
        ]
        constraints = [
            # 普通唯一约束：url + target 组合唯一
@@ -221,6 +227,11 @@ class WebSite(models.Model):
        blank=True,
        help_text='是否支持虚拟主机'
    )
+    response_headers = models.JSONField(
+        blank=True,
+        default=dict,
+        help_text='HTTP响应头（JSON格式）'
+    )

    class Meta:
        db_table = 'website'
@@ -235,6 +246,7 @@ class WebSite(models.Model):
            models.Index(fields=['title']),      # title索引，优化智能过滤搜索
            models.Index(fields=['status_code']),  # 状态码索引，优化智能过滤搜索
            GinIndex(fields=['tech']),  # GIN索引，优化 tech 数组字段的 __contains 查询
+            GinIndex(fields=['response_headers']),  # GIN索引，优化 response_headers JSON 字段查询
        ]
        constraints = [
            # 普通唯一约束：url + target 组合唯一
--- a/backend/apps/asset/models/snapshot_models.py
+++ b/backend/apps/asset/models/snapshot_models.py
@@ -1,5 +1,6 @@
 from django.db import models
 from django.contrib.postgres.fields import ArrayField
+from django.contrib.postgres.indexes import GinIndex
 from django.core.validators import MinValueValidator, MaxValueValidator


@@ -70,6 +71,11 @@ class WebsiteSnapshot(models.Model):
    )
    body_preview = models.TextField(blank=True, default='', help_text='响应体预览')
    vhost = models.BooleanField(null=True, blank=True, help_text='虚拟主机标志')
+    response_headers = models.JSONField(
+        blank=True,
+        default=dict,
+        help_text='HTTP响应头（JSON格式）'
+    )
    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')

    class Meta:
@@ -83,6 +89,8 @@ class WebsiteSnapshot(models.Model):
            models.Index(fields=['host']),  # host索引，优化根据主机名查询
            models.Index(fields=['title']),  # title索引，优化标题搜索
            models.Index(fields=['-created_at']),
+            GinIndex(fields=['tech']),  # GIN索引，优化数组字段查询
+            GinIndex(fields=['response_headers']),  # GIN索引，优化 JSON 字段查询
        ]
        constraints = [
            # 唯一约束：同一次扫描中，同一个URL只能记录一次
@@ -259,6 +267,11 @@ class EndpointSnapshot(models.Model):
        default=list,
        help_text='匹配的GF模式列表'
    )
+    response_headers = models.JSONField(
+        blank=True,
+        default=dict,
+        help_text='HTTP响应头（JSON格式）'
+    )
    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')

    class Meta:
@@ -274,6 +287,8 @@ class EndpointSnapshot(models.Model):
            models.Index(fields=['status_code']),  # 状态码索引，优化筛选
            models.Index(fields=['webserver']),  # webserver索引，优化服务器搜索
            models.Index(fields=['-created_at']),
+            GinIndex(fields=['tech']),  # GIN索引，优化数组字段查询
+            GinIndex(fields=['response_headers']),  # GIN索引，优化 JSON 字段查询
        ]
        constraints = [
            # 唯一约束：同一次扫描中，同一个URL只能记录一次
--- a/backend/apps/asset/repositories/asset/endpoint_repository.py
+++ b/backend/apps/asset/repositories/asset/endpoint_repository.py
@@ -53,7 +53,8 @@ class DjangoEndpointRepository:
                    tech=item.tech if item.tech else [],
                    vhost=item.vhost,
                    location=item.location or '',
-                    matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
+                    matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
+                    response_headers=item.response_headers if item.response_headers else {}
                )
                for item in unique_items
            ]
@@ -66,7 +67,7 @@ class DjangoEndpointRepository:
                    update_fields=[
                        'host', 'title', 'status_code', 'content_length',
                        'webserver', 'body_preview', 'content_type', 'tech',
-                        'vhost', 'location', 'matched_gf_patterns'
+                        'vhost', 'location', 'matched_gf_patterns', 'response_headers'
                    ],
                    batch_size=1000
                )
@@ -143,7 +144,8 @@ class DjangoEndpointRepository:
                    tech=item.tech if item.tech else [],
                    vhost=item.vhost,
                    location=item.location or '',
-                    matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
+                    matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
+                    response_headers=item.response_headers if item.response_headers else {}
                )
                for item in unique_items
            ]
--- a/backend/apps/asset/repositories/asset/website_repository.py
+++ b/backend/apps/asset/repositories/asset/website_repository.py
@@ -54,7 +54,8 @@ class DjangoWebSiteRepository:
                    tech=item.tech if item.tech else [],
                    status_code=item.status_code,
                    content_length=item.content_length,
-                    vhost=item.vhost
+                    vhost=item.vhost,
+                    response_headers=item.response_headers if item.response_headers else {}
                )
                for item in unique_items
            ]
@@ -67,7 +68,7 @@ class DjangoWebSiteRepository:
                    update_fields=[
                        'host', 'location', 'title', 'webserver',
                        'body_preview', 'content_type', 'tech',
-                        'status_code', 'content_length', 'vhost'
+                        'status_code', 'content_length', 'vhost', 'response_headers'
                    ],
                    batch_size=1000
                )
@@ -137,7 +138,8 @@ class DjangoWebSiteRepository:
                    tech=item.tech if item.tech else [],
                    status_code=item.status_code,
                    content_length=item.content_length,
-                    vhost=item.vhost
+                    vhost=item.vhost,
+                    response_headers=item.response_headers if item.response_headers else {}
                )
                for item in unique_items
            ]
--- a/backend/apps/asset/repositories/snapshot/endpoint_snapshot_repository.py
+++ b/backend/apps/asset/repositories/snapshot/endpoint_snapshot_repository.py
@@ -44,6 +44,7 @@ class DjangoEndpointSnapshotRepository:
                snapshots.append(EndpointSnapshot(
                    scan_id=item.scan_id,
                    url=item.url,
+                    host=item.host if item.host else '',
                    title=item.title,
                    status_code=item.status_code,
                    content_length=item.content_length,
@@ -53,7 +54,8 @@ class DjangoEndpointSnapshotRepository:
                    tech=item.tech if item.tech else [],
                    body_preview=item.body_preview,
                    vhost=item.vhost,
-                    matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
+                    matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
+                    response_headers=item.response_headers if item.response_headers else {}
                ))
            
            # 批量创建（忽略冲突，基于唯一约束去重）
--- a/backend/apps/asset/repositories/snapshot/website_snapshot_repository.py
+++ b/backend/apps/asset/repositories/snapshot/website_snapshot_repository.py
@@ -53,7 +53,8 @@ class DjangoWebsiteSnapshotRepository:
                    content_type=item.content_type,
                    tech=item.tech if item.tech else [],
                    body_preview=item.body_preview,
-                    vhost=item.vhost
+                    vhost=item.vhost,
+                    response_headers=item.response_headers if item.response_headers else {}
                ))
            
            # 批量创建（忽略冲突，基于唯一约束去重）
--- a/backend/apps/asset/serializers.py
+++ b/backend/apps/asset/serializers.py
@@ -67,9 +67,10 @@ class SubdomainListSerializer(serializers.ModelSerializer):


 class WebSiteSerializer(serializers.ModelSerializer):
-    """站点序列化器"""
+    """站点序列化器（目标详情页）"""
    
    subdomain = serializers.CharField(source='subdomain.name', allow_blank=True, default='')
+    responseHeaders = serializers.JSONField(source='response_headers', read_only=True)  # HTTP响应头
    
    class Meta:
        model = WebSite
@@ -86,6 +87,7 @@ class WebSiteSerializer(serializers.ModelSerializer):
            'body_preview',
            'tech',
            'vhost',
+            'responseHeaders',  # HTTP响应头
            'subdomain',
            'created_at',
        ]
@@ -140,6 +142,7 @@ class EndpointListSerializer(serializers.ModelSerializer):
        source='matched_gf_patterns',
        read_only=True,
    )
+    responseHeaders = serializers.JSONField(source='response_headers', read_only=True)  # HTTP响应头

    class Meta:
        model = Endpoint
@@ -155,6 +158,7 @@ class EndpointListSerializer(serializers.ModelSerializer):
            'body_preview',
            'tech',
            'vhost',
+            'responseHeaders',  # HTTP响应头
            'gfPatterns',
            'created_at',
        ]
@@ -215,6 +219,7 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
    subdomain_name = serializers.CharField(source='subdomain.name', read_only=True)
    webserver = serializers.CharField(source='web_server', read_only=True)  # 映射字段名
    status_code = serializers.IntegerField(source='status', read_only=True)  # 映射字段名
+    responseHeaders = serializers.JSONField(source='response_headers', read_only=True)  # HTTP响应头
    
    class Meta:
        model = WebsiteSnapshot
@@ -230,6 +235,7 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
            'body_preview',
            'tech',
            'vhost',
+            'responseHeaders',  # HTTP响应头
            'subdomain_name',
            'created_at',
        ]
@@ -264,6 +270,7 @@ class EndpointSnapshotSerializer(serializers.ModelSerializer):
        source='matched_gf_patterns',
        read_only=True,
    )
+    responseHeaders = serializers.JSONField(source='response_headers', read_only=True)  # HTTP响应头

    class Meta:
        model = EndpointSnapshot
@@ -280,6 +287,7 @@ class EndpointSnapshotSerializer(serializers.ModelSerializer):
            'body_preview',
            'tech',
            'vhost',
+            'responseHeaders',  # HTTP响应头
            'gfPatterns',
            'created_at',
        ]
--- a/backend/apps/engine/management/commands/init_default_engine.py
+++ b/backend/apps/engine/management/commands/init_default_engine.py
@@ -15,9 +15,10 @@
 """

 from django.core.management.base import BaseCommand
+from io import StringIO
 from pathlib import Path

-import yaml
+from ruamel.yaml import YAML

 from apps.engine.models import ScanEngine

@@ -44,10 +45,12 @@ class Command(BaseCommand):
        with open(config_path, 'r', encoding='utf-8') as f:
            default_config = f.read()

-        # 解析 YAML 为字典，后续用于生成子引擎配置
+        # 使用 ruamel.yaml 解析，保留注释
+        yaml_parser = YAML()
+        yaml_parser.preserve_quotes = True
        try:
-            config_dict = yaml.safe_load(default_config) or {}
-        except yaml.YAMLError as e:
+            config_dict = yaml_parser.load(default_config) or {}
+        except Exception as e:
            self.stdout.write(self.style.ERROR(f'引擎配置 YAML 解析失败: {e}'))
            return

@@ -83,16 +86,13 @@ class Command(BaseCommand):
            if scan_type != 'subdomain_discovery' and 'tools' not in scan_cfg:
                continue

-            # 构造只包含当前扫描类型配置的 YAML
+            # 构造只包含当前扫描类型配置的 YAML（保留注释）
            single_config = {scan_type: scan_cfg}
            try:
-                single_yaml = yaml.safe_dump(
-                    single_config,
-                    sort_keys=False,
-                    allow_unicode=True,
-                    default_flow_style=None,
-                )
-            except yaml.YAMLError as e:
+                stream = StringIO()
+                yaml_parser.dump(single_config, stream)
+                single_yaml = stream.getvalue()
+            except Exception as e:
                self.stdout.write(self.style.ERROR(f'生成子引擎 {scan_type} 配置失败: {e}'))
                continue

--- a/backend/apps/scan/configs/command_templates.py
+++ b/backend/apps/scan/configs/command_templates.py
@@ -99,7 +99,8 @@ SITE_SCAN_COMMANDS = {
            '-status-code -content-type -content-length '
            '-location -title -server -body-preview '
            '-tech-detect -cdn -vhost '
-            '-random-agent -no-color -json'
+            '-include-response-header '
+            '-random-agent -no-color -json -silent'
        ),
        'optional': {
            'threads': '-threads {threads}',
@@ -171,7 +172,8 @@ URL_FETCH_COMMANDS = {
            '-status-code -content-type -content-length '
            '-location -title -server -body-preview '
            '-tech-detect -cdn -vhost '
-            '-random-agent -no-color -json'
+            '-include-response-header '
+            '-random-agent -no-color -json -silent'
        ),
        'optional': {
            'threads': '-threads {threads}',
--- a/backend/apps/scan/configs/engine_config_example.yaml
+++ b/backend/apps/scan/configs/engine_config_example.yaml
@@ -4,14 +4,12 @@
 # 必需参数：enabled（是否启用）
 # 可选参数：timeout（超时秒数，默认 auto 自动计算）

-# ==================== 子域名发现 ====================
-# 
-# Stage 1: 被动收集（并行） - 必选，至少启用一个工具
-# Stage 2: 字典爆破（可选） - 使用字典暴力枚举子域名
-# Stage 3: 变异生成 + 验证（可选） - 基于已发现域名生成变异，流式验证存活
-# Stage 4: DNS 存活验证（可选） - 验证所有候选域名是否能解析
-#
 subdomain_discovery:
+  # ==================== 子域名发现 ====================
+  # Stage 1: 被动收集（并行） - 必选，至少启用一个工具
+  # Stage 2: 字典爆破（可选） - 使用字典暴力枚举子域名
+  # Stage 3: 变异生成 + 验证（可选） - 基于已发现域名生成变异，流式验证存活
+  # Stage 4: DNS 存活验证（可选） - 验证所有候选域名是否能解析
  # === Stage 1: 被动收集工具（并行执行）===
  passive_tools:
    subfinder:
@@ -55,8 +53,8 @@ subdomain_discovery:
    subdomain_resolve:
      timeout: auto    # 自动根据候选子域数量计算

-# ==================== 端口扫描 ====================
 port_scan:
+  # ==================== 端口扫描 ====================
  tools:
    naabu_active:
      enabled: true
@@ -70,8 +68,8 @@ port_scan:
      enabled: true
      # timeout: auto    # 被动扫描通常较快

-# ==================== 站点扫描 ====================
 site_scan:
+  # ==================== 站点扫描 ====================
  tools:
    httpx:
      enabled: true
@@ -81,16 +79,16 @@ site_scan:
      # request-timeout: 10  # 单个请求超时秒数（默认 10）
      # retries: 2           # 请求失败重试次数

-# ==================== 指纹识别 ====================
-# 在 site_scan 后串行执行，识别 WebSite 的技术栈
 fingerprint_detect:
+  # ==================== 指纹识别 ====================
+  # 在 site_scan 后串行执行，识别 WebSite 的技术栈
  tools:
    xingfinger:
      enabled: true
-      fingerprint-libs: [ehole, goby, wappalyzer, fingers, fingerprinthub, arl]  # 全部指纹库
+      fingerprint-libs: [ehole, goby, wappalyzer, fingers, fingerprinthub, arl]  # 默认启动全部指纹库

-# ==================== 目录扫描 ====================
 directory_scan:
+  # ==================== 目录扫描 ====================
  tools:
    ffuf:
      enabled: true
@@ -103,8 +101,8 @@ directory_scan:
      match-codes: 200,201,301,302,401,403  # 匹配的 HTTP 状态码
      # rate: 0                           # 每秒请求数（默认 0 不限制）

-# ==================== URL 获取 ====================
 url_fetch:
+  # ==================== URL 获取 ====================
  tools:
    waymore:
      enabled: true
@@ -142,8 +140,8 @@ url_fetch:
      # request-timeout: 10  # 单个请求超时秒数（默认 10）
      # retries: 2           # 请求失败重试次数

-# ==================== 漏洞扫描 ====================
 vuln_scan:
+  # ==================== 漏洞扫描 ====================
  tools:
    dalfox_xss:
      enabled: true
--- a/backend/apps/scan/flows/fingerprint_detect_flow.py
+++ b/backend/apps/scan/flows/fingerprint_detect_flow.py
@@ -256,7 +256,8 @@ def fingerprint_detect_flow(
            'url_count': int,
            'processed_records': int,
            'updated_count': int,
-            'not_found_count': int,
+            'created_count': int,
+            'snapshot_count': int,
            'executed_tasks': list,
            'tool_stats': dict
        }
@@ -303,6 +304,7 @@ def fingerprint_detect_flow(
                'processed_records': 0,
                'updated_count': 0,
                'created_count': 0,
+                'snapshot_count': 0,
                'executed_tasks': ['export_urls_for_fingerprint'],
                'tool_stats': {
                    'total': 0,
@@ -340,6 +342,7 @@ def fingerprint_detect_flow(
        total_processed = sum(stats['result'].get('processed_records', 0) for stats in tool_stats.values())
        total_updated = sum(stats['result'].get('updated_count', 0) for stats in tool_stats.values())
        total_created = sum(stats['result'].get('created_count', 0) for stats in tool_stats.values())
+        total_snapshots = sum(stats['result'].get('snapshot_count', 0) for stats in tool_stats.values())
        
        successful_tools = [name for name in enabled_tools.keys() 
                           if name not in [f['tool'] for f in failed_tools]]
@@ -354,6 +357,7 @@ def fingerprint_detect_flow(
            'processed_records': total_processed,
            'updated_count': total_updated,
            'created_count': total_created,
+            'snapshot_count': total_snapshots,
            'executed_tasks': executed_tasks,
            'tool_stats': {
                'total': len(enabled_tools),
--- a/backend/apps/scan/tasks/fingerprint_detect/run_xingfinger_task.py
+++ b/backend/apps/scan/tasks/fingerprint_detect/run_xingfinger_task.py
@@ -4,7 +4,6 @@ xingfinger 执行任务
 流式执行 xingfinger 命令并实时更新 tech 字段
 """

-import importlib
 import json
 import logging
 import subprocess
@@ -15,93 +14,97 @@ from django.db import connection
 from prefect import task

 from apps.scan.utils import execute_stream
+from apps.asset.dtos.snapshot import WebsiteSnapshotDTO
+from apps.asset.repositories.snapshot import DjangoWebsiteSnapshotRepository

 logger = logging.getLogger(__name__)


-# 数据源映射：source → (module_path, model_name, url_field)
-SOURCE_MODEL_MAP = {
-    'website': ('apps.asset.models', 'WebSite', 'url'),
-    # 以后扩展：
-    # 'endpoint': ('apps.asset.models', 'Endpoint', 'url'),
-    # 'directory': ('apps.asset.models', 'Directory', 'url'),
-}
-
-
-def _get_model_class(source: str):
-    """根据数据源类型获取 Model 类"""
-    if source not in SOURCE_MODEL_MAP:
-        raise ValueError(f"不支持的数据源: {source}")
-    
-    module_path, model_name, _ = SOURCE_MODEL_MAP[source]
-    module = importlib.import_module(module_path)
-    return getattr(module, model_name)
-
-
-def parse_xingfinger_line(line: str) -> tuple[str, list[str]] | None:
+def parse_xingfinger_line(line: str) -> dict | None:
    """
    解析 xingfinger 单行 JSON 输出
    
-    xingfinger 静默模式输出格式：
-    {"url": "https://example.com", "cms": "WordPress,PHP,nginx", ...}
+    xingfinger 输出格式：
+    {"url": "...", "cms": "...", "server": "BWS/1.1", "status_code": 200, "length": 642831, "title": "..."}
    
    Returns:
-        tuple: (url, tech_list) 或 None（解析失败时）
+        dict: 包含 url, techs, server, title, status_code, content_length 的字典
+        None: 解析失败或 URL 为空时
    """
    try:
        item = json.loads(line)
        url = item.get('url', '').strip()
-        cms = item.get('cms', '')
        
-        if not url or not cms:
+        if not url:
            return None
        
        # cms 字段按逗号分割，去除空白
-        techs = [t.strip() for t in cms.split(',') if t.strip()]
+        cms = item.get('cms', '')
+        techs = [t.strip() for t in cms.split(',') if t.strip()] if cms else []
        
-        return (url, techs) if techs else None
+        return {
+            'url': url,
+            'techs': techs,
+            'server': item.get('server', ''),
+            'title': item.get('title', ''),
+            'status_code': item.get('status_code'),
+            'content_length': item.get('length'),
+        }
        
    except json.JSONDecodeError:
        return None


-def bulk_merge_tech_field(
-    source: str,
-    url_techs_map: dict[str, list[str]],
+def bulk_merge_website_fields(
+    records: list[dict],
    target_id: int
 ) -> dict:
    """
-    批量合并 tech 数组字段（PostgreSQL 原生 SQL）
+    批量合并更新 WebSite 字段（PostgreSQL 原生 SQL）
+    
+    合并策略：
+    - tech：数组合并去重
+    - title, webserver, status_code, content_length：只在原值为空/NULL 时更新
    
-    使用 PostgreSQL 原生 SQL 实现高效的数组合并去重操作。
    如果 URL 对应的记录不存在，会自动创建新记录。
    
+    Args:
+        records: 解析后的记录列表，每个包含 {url, techs, server, title, status_code, content_length}
+        target_id: 目标 ID
+    
    Returns:
        dict: {'updated_count': int, 'created_count': int}
    """
-    Model = _get_model_class(source)
-    table_name = Model._meta.db_table
+    from apps.asset.models import WebSite
+    table_name = WebSite._meta.db_table
    
    updated_count = 0
    created_count = 0
    
    with connection.cursor() as cursor:
-        for url, techs in url_techs_map.items():
-            if not techs:
-                continue
+        for record in records:
+            url = record['url']
+            techs = record.get('techs', [])
+            server = record.get('server', '') or ''
+            title = record.get('title', '') or ''
+            status_code = record.get('status_code')
+            content_length = record.get('content_length')
            
-            # 先尝试更新（PostgreSQL 数组合并去重）
-            sql = f"""
+            # 先尝试更新（合并策略）
+            update_sql = f"""
                UPDATE {table_name}
-                SET tech = (
-                    SELECT ARRAY(SELECT DISTINCT unnest(
+                SET 
+                    tech = (SELECT ARRAY(SELECT DISTINCT unnest(
                        COALESCE(tech, ARRAY[]::varchar[]) || %s::varchar[]
-                    ))
-                )
+                    ))),
+                    title = CASE WHEN title = '' OR title IS NULL THEN %s ELSE title END,
+                    webserver = CASE WHEN webserver = '' OR webserver IS NULL THEN %s ELSE webserver END,
+                    status_code = CASE WHEN status_code IS NULL THEN %s ELSE status_code END,
+                    content_length = CASE WHEN content_length IS NULL THEN %s ELSE content_length END
                WHERE url = %s AND target_id = %s
            """
            
-            cursor.execute(sql, [techs, url, target_id])
+            cursor.execute(update_sql, [techs, title, server, status_code, content_length, url, target_id])
            
            if cursor.rowcount > 0:
                updated_count += cursor.rowcount
@@ -113,22 +116,27 @@ def bulk_merge_tech_field(
                    host = parsed.hostname or ''
                    
                    # 插入新记录（带冲突处理）
-                    # 显式传入所有 NOT NULL 字段的默认值
                    insert_sql = f"""
-                        INSERT INTO {table_name} (target_id, url, host, location, title, webserver, body_preview, content_type, tech, created_at)
-                        VALUES (%s, %s, %s, '', '', '', '', '', %s::varchar[], NOW())
+                        INSERT INTO {table_name} (
+                            target_id, url, host, location, title, webserver, 
+                            body_preview, content_type, tech, status_code, content_length,
+                            response_headers, created_at
+                        )
+                        VALUES (%s, %s, %s, '', %s, %s, '', '', %s::varchar[], %s, %s, '{{}}'::jsonb, NOW())
                        ON CONFLICT (target_id, url) DO UPDATE SET
-                            tech = (
-                                SELECT ARRAY(SELECT DISTINCT unnest(
-                                    COALESCE({table_name}.tech, ARRAY[]::varchar[]) || EXCLUDED.tech
-                                ))
-                            )
+                            tech = (SELECT ARRAY(SELECT DISTINCT unnest(
+                                COALESCE({table_name}.tech, ARRAY[]::varchar[]) || EXCLUDED.tech
+                            ))),
+                            title = CASE WHEN {table_name}.title = '' OR {table_name}.title IS NULL THEN EXCLUDED.title ELSE {table_name}.title END,
+                            webserver = CASE WHEN {table_name}.webserver = '' OR {table_name}.webserver IS NULL THEN EXCLUDED.webserver ELSE {table_name}.webserver END,
+                            status_code = CASE WHEN {table_name}.status_code IS NULL THEN EXCLUDED.status_code ELSE {table_name}.status_code END,
+                            content_length = CASE WHEN {table_name}.content_length IS NULL THEN EXCLUDED.content_length ELSE {table_name}.content_length END
                    """
-                    cursor.execute(insert_sql, [target_id, url, host, techs])
+                    cursor.execute(insert_sql, [target_id, url, host, title, server, techs, status_code, content_length])
                    created_count += 1
                    
                except Exception as e:
-                    logger.warning("创建 %s 记录失败 (url=%s): %s", source, url, e)
+                    logger.warning("创建 WebSite 记录失败 (url=%s): %s", url, e)
    
    return {
        'updated_count': updated_count,
@@ -142,12 +150,12 @@ def _parse_xingfinger_stream_output(
    cwd: Optional[str] = None,
    timeout: Optional[int] = None,
    log_file: Optional[str] = None
-) -> Generator[tuple[str, list[str]], None, None]:
+) -> Generator[dict, None, None]:
    """
    流式解析 xingfinger 命令输出
    
    基于 execute_stream 实时处理 xingfinger 命令的 stdout，将每行 JSON 输出
-    转换为 (url, tech_list) 格式
+    转换为完整字段字典
    """
    logger.info("开始流式解析 xingfinger 命令输出 - 命令: %s", cmd)
    
@@ -194,43 +202,46 @@ def run_xingfinger_and_stream_update_tech_task(
    batch_size: int = 100
 ) -> dict:
    """
-    流式执行 xingfinger 命令并实时更新 tech 字段
-    
-    根据 source 参数更新对应表的 tech 字段：
-    - website → WebSite.tech
-    - endpoint → Endpoint.tech（以后扩展）
+    流式执行 xingfinger 命令，保存快照并合并更新资产表
    
    处理流程：
    1. 流式执行 xingfinger 命令
-    2. 实时解析 JSON 输出
-    3. 累积到 batch_size 条后批量更新数据库
-    4. 使用 PostgreSQL 原生 SQL 进行数组合并去重
-    5. 如果记录不存在，自动创建
+    2. 实时解析 JSON 输出（完整字段）
+    3. 累积到 batch_size 条后批量处理：
+       - 保存快照（WebsiteSnapshot）
+       - 合并更新资产表（WebSite）
+    
+    合并策略：
+    - tech：数组合并去重
+    - title, webserver, status_code, content_length：只在原值为空时更新
    
    Returns:
        dict: {
            'processed_records': int,
            'updated_count': int,
            'created_count': int,
+            'snapshot_count': int,
            'batch_count': int
        }
    """
    logger.info(
-        "开始执行 xingfinger 并更新 tech - target_id=%s, source=%s, timeout=%s秒",
-        target_id, source, timeout
+        "开始执行 xingfinger - scan_id=%s, target_id=%s, timeout=%s秒",
+        scan_id, target_id, timeout
    )
    
    data_generator = None
+    snapshot_repo = DjangoWebsiteSnapshotRepository()
    
    try:
        # 初始化统计
        processed_records = 0
        updated_count = 0
        created_count = 0
+        snapshot_count = 0
        batch_count = 0
        
-        # 当前批次的 URL -> techs 映射
-        url_techs_map = {}
+        # 当前批次的记录列表
+        batch_records = []
        
        # 流式处理
        data_generator = _parse_xingfinger_stream_output(
@@ -241,47 +252,43 @@ def run_xingfinger_and_stream_update_tech_task(
            log_file=log_file
        )
        
-        for url, techs in data_generator:
+        for record in data_generator:
            processed_records += 1
+            batch_records.append(record)
            
-            # 累积到 url_techs_map
-            if url in url_techs_map:
-                # 合并同一 URL 的多次识别结果
-                url_techs_map[url].extend(techs)
-            else:
-                url_techs_map[url] = techs
-            
-            # 达到批次大小，执行批量更新
-            if len(url_techs_map) >= batch_size:
+            # 达到批次大小，执行批量处理
+            if len(batch_records) >= batch_size:
                batch_count += 1
-                result = bulk_merge_tech_field(source, url_techs_map, target_id)
-                updated_count += result['updated_count']
-                created_count += result.get('created_count', 0)
-                
-                logger.debug(
-                    "批次 %d 完成 - 更新: %d, 创建: %d",
-                    batch_count, result['updated_count'], result.get('created_count', 0)
+                result = _process_batch(
+                    batch_records, scan_id, target_id, batch_count, snapshot_repo
                )
+                updated_count += result['updated_count']
+                created_count += result['created_count']
+                snapshot_count += result['snapshot_count']
                
                # 清空批次
-                url_techs_map = {}
+                batch_records = []
        
        # 处理最后一批
-        if url_techs_map:
+        if batch_records:
            batch_count += 1
-            result = bulk_merge_tech_field(source, url_techs_map, target_id)
+            result = _process_batch(
+                batch_records, scan_id, target_id, batch_count, snapshot_repo
+            )
            updated_count += result['updated_count']
-            created_count += result.get('created_count', 0)
+            created_count += result['created_count']
+            snapshot_count += result['snapshot_count']
        
        logger.info(
-            "✓ xingfinger 执行完成 - 处理记录: %d, 更新: %d, 创建: %d, 批次: %d",
-            processed_records, updated_count, created_count, batch_count
+            "✓ xingfinger 执行完成 - 处理: %d, 更新: %d, 创建: %d, 快照: %d, 批次: %d",
+            processed_records, updated_count, created_count, snapshot_count, batch_count
        )
        
        return {
            'processed_records': processed_records,
            'updated_count': updated_count,
            'created_count': created_count,
+            'snapshot_count': snapshot_count,
            'batch_count': batch_count
        }
        
@@ -299,3 +306,67 @@ def run_xingfinger_and_stream_update_tech_task(
                data_generator.close()
            except Exception as e:
                logger.debug("关闭生成器时出错: %s", e)
+
+
+def _process_batch(
+    records: list[dict],
+    scan_id: int,
+    target_id: int,
+    batch_num: int,
+    snapshot_repo: DjangoWebsiteSnapshotRepository
+) -> dict:
+    """
+    处理一个批次的数据：保存快照 + 合并更新资产表
+    
+    Args:
+        records: 解析后的记录列表
+        scan_id: 扫描任务 ID
+        target_id: 目标 ID
+        batch_num: 批次编号
+        snapshot_repo: 快照仓库
+    
+    Returns:
+        dict: {'updated_count': int, 'created_count': int, 'snapshot_count': int}
+    """
+    # 1. 构建快照 DTO 列表
+    snapshot_dtos = []
+    for record in records:
+        # 从 URL 提取 host
+        parsed = urlparse(record['url'])
+        host = parsed.hostname or ''
+        
+        dto = WebsiteSnapshotDTO(
+            scan_id=scan_id,
+            target_id=target_id,
+            url=record['url'],
+            host=host,
+            title=record.get('title', '') or '',
+            status=record.get('status_code'),
+            content_length=record.get('content_length'),
+            web_server=record.get('server', '') or '',
+            tech=record.get('techs', []),
+        )
+        snapshot_dtos.append(dto)
+    
+    # 2. 保存快照
+    snapshot_count = 0
+    if snapshot_dtos:
+        try:
+            snapshot_repo.save_snapshots(snapshot_dtos)
+            snapshot_count = len(snapshot_dtos)
+        except Exception as e:
+            logger.warning("批次 %d 保存快照失败: %s", batch_num, e)
+    
+    # 3. 合并更新资产表
+    merge_result = bulk_merge_website_fields(records, target_id)
+    
+    logger.debug(
+        "批次 %d 完成 - 更新: %d, 创建: %d, 快照: %d",
+        batch_num, merge_result['updated_count'], merge_result['created_count'], snapshot_count
+    )
+    
+    return {
+        'updated_count': merge_result['updated_count'],
+        'created_count': merge_result['created_count'],
+        'snapshot_count': snapshot_count
+    }
--- a/backend/apps/scan/tasks/site_scan/run_and_stream_save_websites_task.py
+++ b/backend/apps/scan/tasks/site_scan/run_and_stream_save_websites_task.py
@@ -134,6 +134,7 @@ class HttpxRecord:
        self.vhost = data.get('vhost')
        self.failed = data.get('failed', False)
        self.timestamp = data.get('timestamp')
+        self.response_headers = data.get('header', {})  # 响应头（httpx 输出的 header 字段）
        
        # 从 URL 中提取主机名
        self.host = self._extract_hostname()
@@ -359,7 +360,8 @@ def _save_batch(
            tech=record.tech if isinstance(record.tech, list) else [],
            status=record.status_code,
            content_length=record.content_length,
-            vhost=record.vhost
+            vhost=record.vhost,
+            response_headers=record.response_headers if record.response_headers else {},
        )
        
        snapshot_items.append(snapshot_dto)
--- a/backend/apps/scan/tasks/url_fetch/run_and_stream_save_urls_task.py
+++ b/backend/apps/scan/tasks/url_fetch/run_and_stream_save_urls_task.py
@@ -2,8 +2,8 @@
 基于 execute_stream 的流式 URL 验证任务

 主要功能：
-    1. 实时执行 httpx 命令验证 URL 存活
-    2. 流式处理命令输出，解析存活的 URL
+    1. 实时执行 httpx 命令验证 URL
+    2. 流式处理命令输出，解析 URL 信息
    3. 批量保存到数据库（Endpoint 表）
    4. 避免一次性加载所有 URL 到内存

@@ -14,7 +14,7 @@
    - 使用 execute_stream 实时处理输出
    - 流式处理避免内存溢出
    - 批量操作减少数据库交互
-    - 只保存存活的 URL（status 2xx/3xx）
+    - 保存所有有效 URL（包括 4xx/5xx，便于安全分析）
 """

 import logging
@@ -73,7 +73,7 @@ def _parse_and_validate_line(line: str) -> Optional[dict]:
    Returns:
        Optional[dict]: 有效的 httpx 记录，或 None 如果验证失败
        
-    只返回存活的 URL（2xx/3xx 状态码）
+    保存所有有效 URL（不再过滤状态码，安全扫描中 403/404/500 等也有分析价值）
    """
    try:
        # 清理 NUL 字符后再解析 JSON
@@ -99,24 +99,21 @@ def _parse_and_validate_line(line: str) -> Optional[dict]:
            logger.info("URL 为空，跳过 - 数据: %s", str(line_data)[:200])
            return None
        
-        # 只保存存活的 URL（2xx 或 3xx）
-        if status_code and (200 <= status_code < 400):
-            return {
-                'url': _sanitize_string(url),
-                'host': _sanitize_string(line_data.get('host', '')),
-                'status_code': status_code,
-                'title': _sanitize_string(line_data.get('title', '')),
-                'content_length': line_data.get('content_length', 0),
-                'content_type': _sanitize_string(line_data.get('content_type', '')),
-                'webserver': _sanitize_string(line_data.get('webserver', '')),
-                'location': _sanitize_string(line_data.get('location', '')),
-                'tech': line_data.get('tech', []),
-                'body_preview': _sanitize_string(line_data.get('body_preview', '')),
-                'vhost': line_data.get('vhost', False),
-            }
-        else:
-            logger.debug("URL 不存活（状态码: %s），跳过: %s", status_code, url)
-            return None
+        # 保存所有有效 URL（不再过滤状态码）
+        return {
+            'url': _sanitize_string(url),
+            'host': _sanitize_string(line_data.get('host', '')),
+            'status_code': status_code,
+            'title': _sanitize_string(line_data.get('title', '')),
+            'content_length': line_data.get('content_length', 0),
+            'content_type': _sanitize_string(line_data.get('content_type', '')),
+            'webserver': _sanitize_string(line_data.get('webserver', '')),
+            'location': _sanitize_string(line_data.get('location', '')),
+            'tech': line_data.get('tech', []),
+            'body_preview': _sanitize_string(line_data.get('body_preview', '')),
+            'vhost': line_data.get('vhost', False),
+            'response_headers': line_data.get('header', {}),
+        }
    
    except Exception:
        logger.info("跳过无法解析的行: %s", line[:100] if line else 'empty')
@@ -306,6 +303,7 @@ def _save_batch(
                vhost=record.get('vhost', False),
                matched_gf_patterns=[],
                target_id=target_id,
+                response_headers=record.get('response_headers', {}),
            )
            snapshots.append(dto)
        except Exception as e:
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -41,6 +41,7 @@ python-dateutil==2.9.0
 pytz==2024.1
 validators==0.22.0
 PyYAML==6.0.1
+ruamel.yaml>=0.18.0  # 保留注释的 YAML 解析
 colorlog==6.8.2  # 彩色日志输出
 python-json-logger==2.0.7  # JSON 结构化日志
 Jinja2>=3.1.6  # 命令模板引擎
--- a/backend/scripts/generate_test_data_sql.py
+++ b/backend/scripts/generate_test_data_sql.py
@@ -843,6 +843,18 @@ class TestDataGenerator:
                # 生成固定 245 长度的 URL
                url = generate_fixed_length_url(target_name, length=245, path_hint=f'website/{i:04d}')
                
+                # 生成模拟的响应头数据
+                response_headers = {
+                    'server': random.choice(['nginx', 'Apache', 'cloudflare', 'Microsoft-IIS/10.0']),
+                    'content_type': 'text/html; charset=utf-8',
+                    'x_powered_by': random.choice(['PHP/8.2', 'ASP.NET', 'Express', None]),
+                    'x_frame_options': random.choice(['DENY', 'SAMEORIGIN', None]),
+                    'strict_transport_security': 'max-age=31536000; includeSubDomains' if random.choice([True, False]) else None,
+                    'set_cookie': f'session={random.randint(100000, 999999)}; HttpOnly; Secure' if random.choice([True, False]) else None,
+                }
+                # 移除 None 值
+                response_headers = {k: v for k, v in response_headers.items() if v is not None}
+                
                batch_data.append((
                    url, target_id, target_name, random.choice(titles),
                    random.choice(webservers), random.choice(tech_stacks),
@@ -850,7 +862,8 @@ class TestDataGenerator:
                    random.randint(1000, 500000), 'text/html; charset=utf-8',
                    f'https://{target_name}/login' if random.choice([True, False]) else '',
                    random.choice(body_previews),
-                    random.choice([True, False, None])
+                    random.choice([True, False, None]),
+                    json.dumps(response_headers)
                ))
        
        # 批量插入
@@ -860,11 +873,11 @@ class TestDataGenerator:
                INSERT INTO website (
                    url, target_id, host, title, webserver, tech, status_code,
                    content_length, content_type, location, body_preview, vhost,
-                    created_at
+                    response_headers, created_at
                ) VALUES %s
                ON CONFLICT DO NOTHING
                RETURNING id
-            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
+            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
            ids = [row[0] for row in cur.fetchall()]
                    
        print(f"  ✓ 创建了 {len(batch_data)} 个网站\n")
@@ -1017,6 +1030,18 @@ class TestDataGenerator:
                # 生成 10-20 个 tags (gf_patterns)
                tags = random.choice(gf_patterns)
                
+                # 生成模拟的响应头数据
+                response_headers = {
+                    'server': random.choice(['nginx', 'gunicorn', 'uvicorn', 'Apache']),
+                    'content_type': 'application/json',
+                    'x_request_id': f'req_{random.randint(100000, 999999)}',
+                    'x_ratelimit_limit': str(random.choice([100, 1000, 5000])),
+                    'x_ratelimit_remaining': str(random.randint(0, 1000)),
+                    'cache_control': random.choice(['no-cache', 'max-age=3600', 'private', None]),
+                }
+                # 移除 None 值
+                response_headers = {k: v for k, v in response_headers.items() if v is not None}
+                
                batch_data.append((
                    url, target_id, target_name, title,
                    random.choice(['nginx/1.24.0', 'gunicorn/21.2.0']),
@@ -1024,7 +1049,8 @@ class TestDataGenerator:
                    random.randint(100, 50000), 'application/json',
                    tech_list,
                    '', random.choice(body_previews),
-                    random.choice([True, False, None]), tags
+                    random.choice([True, False, None]), tags,
+                    json.dumps(response_headers)
                ))
                count += 1
        
@@ -1034,10 +1060,10 @@ class TestDataGenerator:
                INSERT INTO endpoint (
                    url, target_id, host, title, webserver, status_code, content_length,
                    content_type, tech, location, body_preview, vhost, matched_gf_patterns,
-                    created_at
+                    response_headers, created_at
                ) VALUES %s
                ON CONFLICT DO NOTHING
-            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
+            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
                
        print(f"  ✓ 创建了 {count} 个端点\n")

@@ -1401,13 +1427,23 @@ class TestDataGenerator:
                # 生成固定 245 长度的 URL
                url = generate_fixed_length_url(target_name, length=245, path_hint=f'website-snap/{i:04d}')
                
+                # 生成模拟的响应头数据
+                response_headers = {
+                    'server': random.choice(['nginx', 'Apache', 'cloudflare']),
+                    'content_type': 'text/html; charset=utf-8',
+                    'x_frame_options': random.choice(['DENY', 'SAMEORIGIN', None]),
+                }
+                # 移除 None 值
+                response_headers = {k: v for k, v in response_headers.items() if v is not None}
+                
                batch_data.append((
                    scan_id, url, target_name, random.choice(titles),
                    random.choice(webservers), random.choice(tech_stacks),
                    random.choice([200, 301, 403]),
                    random.randint(1000, 50000), 'text/html; charset=utf-8',
                    '',  # location 字段
-                    '<!DOCTYPE html><html><head><title>Test</title></head><body>Content</body></html>'
+                    '<!DOCTYPE html><html><head><title>Test</title></head><body>Content</body></html>',
+                    json.dumps(response_headers)
                ))
                count += 1
        
@@ -1416,10 +1452,11 @@ class TestDataGenerator:
            execute_values(cur, """
                INSERT INTO website_snapshot (
                    scan_id, url, host, title, web_server, tech, status,
-                    content_length, content_type, location, body_preview, created_at
+                    content_length, content_type, location, body_preview,
+                    response_headers, created_at
                ) VALUES %s
                ON CONFLICT DO NOTHING
-            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
+            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
                
        print(f"  ✓ 创建了 {count} 个网站快照\n")

@@ -1498,6 +1535,13 @@ class TestDataGenerator:
                num_tags = random.randint(10, 20)
                tags = random.sample(all_tags, min(num_tags, len(all_tags)))
                
+                # 生成模拟的响应头数据
+                response_headers = {
+                    'server': 'nginx/1.24.0',
+                    'content_type': 'application/json',
+                    'x_request_id': f'req_{random.randint(100000, 999999)}',
+                }
+                
                batch_data.append((
                    scan_id, url, target_name, title,
                    random.choice([200, 201, 401, 403, 404]),
@@ -1506,7 +1550,8 @@ class TestDataGenerator:
                    'nginx/1.24.0',
                    'application/json', tech_list,
                    '{"status":"ok","data":{}}',
-                    tags
+                    tags,
+                    json.dumps(response_headers)
                ))
                count += 1
        
@@ -1516,10 +1561,10 @@ class TestDataGenerator:
                INSERT INTO endpoint_snapshot (
                    scan_id, url, host, title, status_code, content_length,
                    location, webserver, content_type, tech, body_preview,
-                    matched_gf_patterns, created_at
+                    matched_gf_patterns, response_headers, created_at
                ) VALUES %s
                ON CONFLICT DO NOTHING
-            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
+            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
                
        print(f"  ✓ 创建了 {count} 个端点快照\n")

@@ -2543,9 +2588,10 @@ class MillionDataGenerator:
                if len(batch_data) >= batch_size:
                    execute_values(cur, """
                        INSERT INTO website (url, target_id, host, title, webserver, tech, 
-                            status_code, content_length, content_type, location, body_preview, created_at)
+                            status_code, content_length, content_type, location, body_preview, 
+                            vhost, response_headers, created_at)
                        VALUES %s ON CONFLICT DO NOTHING
-                    """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
+                    """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NULL, '{}'::jsonb, NOW())")
                    self.conn.commit()
                    batch_data = []
                    print(f"    ✓ {count:,} / {target_count:,}")
@@ -2555,9 +2601,10 @@ class MillionDataGenerator:
        if batch_data:
            execute_values(cur, """
                INSERT INTO website (url, target_id, host, title, webserver, tech, 
-                    status_code, content_length, content_type, location, body_preview, created_at)
+                    status_code, content_length, content_type, location, body_preview, 
+                    vhost, response_headers, created_at)
                VALUES %s ON CONFLICT DO NOTHING
-            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
+            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NULL, '{}'::jsonb, NOW())")
            self.conn.commit()
                
        print(f"  ✓ 创建了 {count:,} 个网站\n")
@@ -2632,9 +2679,9 @@ class MillionDataGenerator:
                    execute_values(cur, """
                        INSERT INTO endpoint (url, target_id, host, title, webserver, status_code,
                            content_length, content_type, tech, location, body_preview, vhost, 
-                            matched_gf_patterns, created_at)
+                            matched_gf_patterns, response_headers, created_at)
                        VALUES %s ON CONFLICT DO NOTHING
-                    """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
+                    """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, '{}'::jsonb, NOW())")
                    self.conn.commit()
                    batch_data = []
                    print(f"    ✓ {count:,} / {target_count:,}")
@@ -2645,9 +2692,9 @@ class MillionDataGenerator:
            execute_values(cur, """
                INSERT INTO endpoint (url, target_id, host, title, webserver, status_code,
                    content_length, content_type, tech, location, body_preview, vhost, 
-                    matched_gf_patterns, created_at)
+                    matched_gf_patterns, response_headers, created_at)
                VALUES %s ON CONFLICT DO NOTHING
-            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())")
+            """, batch_data, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, '{}'::jsonb, NOW())")
            self.conn.commit()
                
        print(f"  ✓ 创建了 {count:,} 个端点\n")
--- a/frontend/app/[locale]/scan/history/[id]/page.tsx
+++ b/frontend/app/[locale]/scan/history/[id]/page.tsx
@@ -8,7 +8,7 @@ export default function ScanHistoryDetailPage() {
  const router = useRouter()

  useEffect(() => {
-    router.replace(`/scan/history/${id}/subdomain/`)
+    router.replace(`/scan/history/${id}/websites/`)
  }, [id, router])

  return null
--- a/frontend/app/[locale]/target/[id]/details/page.tsx
+++ b/frontend/app/[locale]/target/[id]/details/page.tsx
@@ -5,15 +5,15 @@ import { useEffect } from "react"

 /**
 * Target detail page (compatible with old routes)
- * Automatically redirects to subdomain page
+ * Automatically redirects to websites page
 */
 export default function TargetDetailsPage() {
  const { id } = useParams<{ id: string }>()
  const router = useRouter()

  useEffect(() => {
-    // Redirect to subdomain page
-    router.replace(`/target/${id}/subdomain/`)
+    // Redirect to websites page
+    router.replace(`/target/${id}/websites/`)
  }, [id, router])

  return null
--- a/frontend/app/[locale]/target/[id]/page.tsx
+++ b/frontend/app/[locale]/target/[id]/page.tsx
@@ -5,15 +5,15 @@ import { useEffect } from "react"

 /**
 * Target detail default page
- * Automatically redirects to subdomain page
+ * Automatically redirects to websites page
 */
 export default function TargetDetailPage() {
  const { id } = useParams<{ id: string }>()
  const router = useRouter()

  useEffect(() => {
-    // Redirect to subdomain page
-    router.replace(`/target/${id}/subdomain/`)
+    // Redirect to websites page
+    router.replace(`/target/${id}/websites/`)
  }, [id, router])

  return null
--- a/frontend/components/endpoints/endpoints-columns.tsx
+++ b/frontend/components/endpoints/endpoints-columns.tsx
@@ -12,6 +12,7 @@ import { ExpandableCell, ExpandableTagList } from "@/components/ui/data-table/ex
 export interface EndpointTranslations {
  columns: {
    url: string
+    host: string
    title: string
    status: string
    contentLength: string
@@ -22,6 +23,7 @@ export interface EndpointTranslations {
    bodyPreview: string
    vhost: string
    gfPatterns: string
+    responseHeaders: string
    responseTime: string
    createdAt: string
  }
@@ -112,6 +114,19 @@ export function createEndpointColumns({
        <ExpandableCell value={row.getValue("url")} />
      ),
    },
+    {
+      accessorKey: "host",
+      meta: { title: t.columns.host },
+      header: ({ column }) => (
+        <DataTableColumnHeader column={column} title={t.columns.host} />
+      ),
+      size: 200,
+      minSize: 100,
+      maxSize: 300,
+      cell: ({ row }) => (
+        <ExpandableCell value={row.getValue("host")} />
+      ),
+    },
    {
      accessorKey: "title",
      meta: { title: t.columns.title },
@@ -262,6 +277,24 @@ export function createEndpointColumns({
      },
      enableSorting: false,
    },
+    {
+      accessorKey: "responseHeaders",
+      meta: { title: t.columns.responseHeaders },
+      header: ({ column }) => (
+        <DataTableColumnHeader column={column} title={t.columns.responseHeaders} />
+      ),
+      size: 250,
+      minSize: 150,
+      maxSize: 400,
+      cell: ({ row }) => {
+        const headers = row.getValue("responseHeaders") as Record<string, unknown> | null | undefined
+        if (!headers || Object.keys(headers).length === 0) return <span className="text-muted-foreground text-sm">-</span>
+        const formatted = Object.entries(headers)
+          .map(([key, value]) => `${key}: ${value}`)
+          .join("\n")
+        return <ExpandableCell value={formatted} maxLines={3} variant="mono" />
+      },
+    },
    {
      accessorKey: "responseTime",
      meta: { title: t.columns.responseTime },
--- a/frontend/components/endpoints/endpoints-data-table.tsx
+++ b/frontend/components/endpoints/endpoints-data-table.tsx
@@ -15,6 +15,7 @@ const ENDPOINT_FILTER_FIELDS: FilterField[] = [
  { key: "title", label: "Title", description: "Page title" },
  { key: "status", label: "Status", description: "HTTP status code" },
  { key: "tech", label: "Tech", description: "Technologies" },
+  { key: "responseHeaders", label: "Headers", description: "Response headers" },
 ]

 // Endpoint page filter examples
--- a/frontend/components/endpoints/endpoints-detail-view.tsx
+++ b/frontend/components/endpoints/endpoints-detail-view.tsx
@@ -62,6 +62,7 @@ export function EndpointsDetailView({
  const translations = useMemo(() => ({
    columns: {
      url: tColumns("common.url"),
+      host: tColumns("endpoint.host"),
      title: tColumns("endpoint.title"),
      status: tColumns("common.status"),
      contentLength: tColumns("endpoint.contentLength"),
@@ -72,6 +73,7 @@ export function EndpointsDetailView({
      bodyPreview: tColumns("endpoint.bodyPreview"),
      vhost: tColumns("endpoint.vhost"),
      gfPatterns: tColumns("endpoint.gfPatterns"),
+      responseHeaders: tColumns("endpoint.responseHeaders"),
      responseTime: tColumns("endpoint.responseTime"),
      createdAt: tColumns("common.createdAt"),
    },
--- a/frontend/components/websites/websites-columns.tsx
+++ b/frontend/components/websites/websites-columns.tsx
@@ -22,6 +22,7 @@ export interface WebsiteTranslations {
    contentType: string
    bodyPreview: string
    vhost: string
+    responseHeaders: string
    createdAt: string
  }
  actions: {
@@ -230,6 +231,24 @@ export function createWebSiteColumns({
        )
      },
    },
+    {
+      accessorKey: "responseHeaders",
+      meta: { title: t.columns.responseHeaders },
+      header: ({ column }) => (
+        <DataTableColumnHeader column={column} title={t.columns.responseHeaders} />
+      ),
+      size: 250,
+      minSize: 150,
+      maxSize: 400,
+      cell: ({ row }) => {
+        const headers = row.getValue("responseHeaders") as Record<string, unknown> | null
+        if (!headers || Object.keys(headers).length === 0) return "-"
+        const formatted = Object.entries(headers)
+          .map(([key, value]) => `${key}: ${value}`)
+          .join("\n")
+        return <ExpandableCell value={formatted} maxLines={3} variant="mono" />
+      },
+    },
    {
      accessorKey: "createdAt",
      meta: { title: t.columns.createdAt },
--- a/frontend/components/websites/websites-data-table.tsx
+++ b/frontend/components/websites/websites-data-table.tsx
@@ -16,6 +16,7 @@ const WEBSITE_FILTER_FIELDS: FilterField[] = [
  { key: "title", label: "Title", description: "Page title" },
  { key: "status", label: "Status", description: "HTTP status code" },
  { key: "tech", label: "Tech", description: "Technologies" },
+  { key: "responseHeaders", label: "Headers", description: "Response headers" },
 ]

 // Website page filter examples
--- a/frontend/components/websites/websites-view.tsx
+++ b/frontend/components/websites/websites-view.tsx
@@ -54,6 +54,7 @@ export function WebSitesView({
      contentType: tColumns("endpoint.contentType"),
      bodyPreview: tColumns("endpoint.bodyPreview"),
      vhost: tColumns("endpoint.vhost"),
+      responseHeaders: tColumns("website.responseHeaders"),
      createdAt: tColumns("common.createdAt"),
    },
    actions: {
--- a/frontend/messages/en.json
+++ b/frontend/messages/en.json
@@ -48,6 +48,7 @@
    },
    "endpoint": {
      "title": "Title",
+      "host": "Host",
      "contentLength": "Content Length",
      "location": "Location",
      "webServer": "Web Server",
@@ -56,10 +57,12 @@
      "bodyPreview": "Body Preview",
      "vhost": "VHost",
      "gfPatterns": "GF Patterns",
+      "responseHeaders": "Response Headers",
      "responseTime": "Response Time"
    },
    "website": {
-      "host": "Host"
+      "host": "Host",
+      "responseHeaders": "Response Headers"
    },
    "directory": {
      "length": "Length",
--- a/frontend/messages/zh.json
+++ b/frontend/messages/zh.json
@@ -48,6 +48,7 @@
    },
    "endpoint": {
      "title": "Title",
+      "host": "Host",
      "contentLength": "Content Length",
      "location": "Location",
      "webServer": "Web Server",
@@ -56,10 +57,12 @@
      "bodyPreview": "Body Preview",
      "vhost": "VHost",
      "gfPatterns": "GF Patterns",
+      "responseHeaders": "响应头",
      "responseTime": "Response Time"
    },
    "website": {
-      "host": "Host"
+      "host": "Host",
+      "responseHeaders": "响应头"
    },
    "directory": {
      "length": "Length",
--- a/frontend/types/endpoint.types.ts
+++ b/frontend/types/endpoint.types.ts
@@ -23,6 +23,7 @@ export interface Endpoint {
  bodyPreview?: string
  tech?: string[]
  vhost?: boolean | null
+  responseHeaders?: Record<string, unknown>
  createdAt?: string

  // Legacy domain association fields (may not exist in some APIs)
--- a/frontend/types/website.types.ts
+++ b/frontend/types/website.types.ts
@@ -18,6 +18,7 @@ export interface WebSite {
  tech: string[]
  vhost: boolean | null
  subdomain: string
+  responseHeaders?: Record<string, unknown>
  createdAt: string
 }