Compare commits

...

243 Commits

Author SHA1 Message Date
yyhuni
d6d5338acb 增加资产删除功能 2026-01-07 09:29:31 +08:00
yyhuni
c521bdb511 重构:回退逻辑 2026-01-07 08:45:27 +08:00
yyhuni
abf2d95f6f feat(targets): increase max batch size for target creation from 1000 to 5000
- Update MAX_BATCH_SIZE constant in BatchCreateTargetSerializer from 1000 to 5000
- Increase batch creation limit to support larger bulk operations
- Update documentation comment to reflect new limit
- Allows users to create up to 5000 targets in a single batch operation
2026-01-06 20:39:31 +08:00
github-actions[bot]
ab58cf0d85 chore: bump version to v1.4.0 2026-01-06 09:31:29 +00:00
yyhuni
fb0111adf2 Merge branch 'dev' 2026-01-06 17:27:35 +08:00
yyhuni
161ee9a2b1 Merge branch 'dev' 2026-01-06 17:27:16 +08:00
yyhuni
0cf75585d5 docs: 添加黑名单过滤功能说明到 README 2026-01-06 17:25:31 +08:00
yyhuni
1d8d5f51d9 feat(blacklist): add mock data and service integration for blacklist management
- Create new blacklist mock data module with global and target-specific patterns
- Add mock functions for getting and updating global blacklist rules
- Add mock functions for getting and updating target-specific blacklist rules
- Integrate mock blacklist endpoints into global-blacklist.service.ts
- Integrate mock blacklist endpoints into target.service.ts
- Export blacklist mock functions from main mock index
- Enable testing of blacklist management UI without backend API
2026-01-06 17:08:51 +08:00
github-actions[bot]
3f8de07c8c chore: bump version to v1.4.0-dev 2026-01-06 09:02:31 +00:00
yyhuni
cd5c2b9f11 chore(notifications): remove test notification endpoint
- Remove test notification route from URL patterns
- Delete notifications_test view function and associated logic
- Clean up unused test endpoint that was used for development purposes
- Simplify notification API surface by removing non-production code
2026-01-06 16:57:29 +08:00
yyhuni
54786c22dd feat(scan): increase max batch size for quick scan operations
- Increase MAX_BATCH_SIZE from 1000 to 5000 in QuickScanSerializer
- Allows processing of larger batch scans in a single operation
- Improves throughput for bulk scanning workflows
2026-01-06 16:55:28 +08:00
yyhuni
d468f975ab feat(scan): implement fallback chain for endpoint URL export
- Add fallback chain for URL data sources: Endpoint → WebSite → default generation
- Import WebSite model and Path utility for enhanced file handling
- Create output directory automatically if it doesn't exist
- Add "source" field to return value indicating data origin (endpoint/website/default)
- Update docstring to document the three-tier fallback priority system
- Implement sequential export attempts with logging at each fallback stage
- Improve error handling and data source transparency for endpoint exports
2026-01-06 16:30:42 +08:00
yyhuni
a85a12b8ad feat(asset): create incremental materialized views for asset search
- Add pg_ivm extension for incremental materialized view maintenance
- Create asset_search_view for Website model with optimized columns for full-text search
- Create endpoint_search_view for Endpoint model with matching search schema
- Add database indexes on host, url, title, status_code, and created_at columns for both views
- Enable high-performance asset search queries with automatic view maintenance
2026-01-06 16:22:24 +08:00
yyhuni
a8b0d97b7b feat(targets): update navigation routes and enhance add button UI
- Change target detail navigation route from `/website/` to `/overview/`
- Update TargetNameCell click handler to use new overview route
- Update TargetRowActions onView handler to use new overview route
- Add IconPlus icon import from @tabler/icons-react
- Add icon to create target button for improved visual clarity
- Improves navigation consistency and button affordance in targets table
2026-01-06 16:14:54 +08:00
yyhuni
b8504921c2 feat(fingerprints): add JSONL format support for Goby fingerprint imports
- Add support for JSONL format parsing in addition to standard JSON for Goby fingerprints
- Update GobyFingerprintService to validate both standard format (name/logic/rule) and JSONL format (product/rule)
- Implement _parse_json_content() method to handle both JSON and JSONL file formats with proper error handling
- Add JSONL parsing logic in frontend import dialog with per-line validation and error reporting
- Update file import endpoint documentation to indicate JSONL format support
- Improve error messages for encoding and parsing failures to aid user debugging
- Enable seamless import of Goby fingerprint data from multiple source formats
2026-01-06 16:10:14 +08:00
yyhuni
ecfc1822fb style(target): update vulnerability icon color to muted foreground
- Change ShieldAlert icon color from red-500 to muted-foreground in target overview
- Improves visual consistency with design system color palette
- Reduces visual emphasis on vulnerability section for better UI balance
2026-01-06 12:01:59 +08:00
github-actions[bot]
81633642e6 chore: bump version to v1.3.16-dev 2026-01-06 03:55:16 +00:00
yyhuni
d1ec9b7f27 feat(settings): add global blacklist management page and UI integration
- Add new global blacklist settings page with pattern management interface
- Create useGlobalBlacklist and useUpdateGlobalBlacklist React Query hooks for data fetching and mutations
- Implement global-blacklist.service.ts with API integration for blacklist operations
- Add Global Blacklist navigation item to app sidebar with Ban icon
- Add internationalization support for blacklist UI with English and Chinese translations
- Include pattern matching rules documentation (domain wildcards, keywords, IP addresses, CIDR ranges)
- Add loading states, error handling, and success/error toast notifications
- Implement textarea input with change tracking and save button state management
2026-01-06 11:50:31 +08:00
yyhuni
2a3d9b4446 feat(target): add initiate scan button and improve overview layout
- Add "Initiate Scan" button to target overview header with Play icon
- Implement InitiateScanDialog component integration for quick scan initiation
- Improve scheduled scans card layout with flexbox for better vertical spacing
- Reduce displayed scheduled scans from 3 to 2 items for better UI balance
- Enhance vulnerability statistics card styling with proper flex layout
- Add state management for scan dialog open/close functionality
- Update i18n translations (en.json, zh.json) with "initiateScan" label
- Refactor target info section to accommodate new action button with justify-between layout
- Improve empty state centering in scheduled scans card using flex layout
2026-01-06 11:10:47 +08:00
yyhuni
9b63203b5a refactor(migrations,frontend,backend): reorganize app structure and enhance target management UI
- Consolidate common migrations into dedicated common app module
- Remove asset search materialized view migration (0002) and simplify migration structure
- Reorganize target detail page with new overview and settings sub-routes
- Add target overview component displaying key asset information
- Add target settings component for configuration management
- Enhance scan history UI with improved data table and column definitions
- Update scheduled scan dialog with better form handling
- Refactor target service with improved API integration
- Update scan hooks (use-scans, use-scheduled-scans) with better state management
- Add internationalization strings for new target management features
- Update Docker initialization and startup scripts for new app structure
- Bump Django to 5.2.7 and update dependencies in requirements.txt
- Add WeChat group contact information to README
- Improve UI tabs component with better accessibility and styling
2026-01-06 10:42:38 +08:00
yyhuni
6ff86e14ec Update README.md 2026-01-06 09:59:55 +08:00
yyhuni
4c1282e9bb 完成黑名单后端逻辑 2026-01-05 23:26:50 +08:00
yyhuni
ba3a9b709d feat(system-logs): enhance ANSI log viewer with log level colorization
- Add LOG_LEVEL_COLORS configuration mapping for DEBUG, INFO, WARNING, WARN, ERROR, and CRITICAL levels
- Implement hasAnsiCodes() function to detect presence of ANSI escape sequences in log content
- Add colorizeLogContent() function to parse plain text logs and apply color styling based on log levels
- Support dual-mode log parsing: ANSI color codes and plain text log level detection
- Rename converter to ansiConverter for clarity and consistency
- Change newline handling from true to false for manual line break control
- Apply color-coded styling to timestamps (gray), log levels (level-specific colors), and messages
- Add bold font-weight styling for CRITICAL level logs for better visibility
2026-01-05 16:27:31 +08:00
github-actions[bot]
283b28b46a chore: bump version to v1.3.15-dev 2026-01-05 02:05:29 +00:00
yyhuni
1269e5a314 refactor(scan): reorganize models and serializers into modular structure
- Split monolithic models.py into separate model files (scan_models.py, scan_log_model.py, scheduled_scan_model.py, subfinder_provider_settings_model.py)
- Split monolithic serializers.py into separate serializer files with dedicated modules for each domain
- Add SubfinderProviderSettings model to store API key configurations for subfinder data sources
- Create SubfinderProviderConfigService to generate provider configuration files dynamically
- Add subfinder_provider_settings views and serializers for API key management
- Update subdomain_discovery_flow to support provider configuration file generation and passing to subfinder
- Update command templates to use provider config file and remove recursive flag for better source coverage
- Add frontend settings page for managing API keys at /settings/api-keys
- Add frontend hooks and services for API key settings management
- Update sidebar navigation to include API keys settings link
- Add internationalization support for new API keys settings UI (English and Chinese)
- Improves code maintainability by organizing related models and serializers into logical modules
2026-01-05 10:00:19 +08:00
yyhuni
802e967906 docs: add online demo link to README
- Add new "🌐 在线 Demo" section with live demo URL
- Include disclaimer note that demo is UI-only without backend database
- Improve documentation to help users quickly access and test the application
2026-01-04 19:19:33 +08:00
github-actions[bot]
e446326416 chore: bump version to v1.3.14 2026-01-04 11:02:14 +00:00
yyhuni
e0abb3ce7b Merge branch 'dev' 2026-01-04 18:57:49 +08:00
yyhuni
d418baaf79 feat(mock,scan): add comprehensive mock data and improve system load management
- Add mock data files for directories, fingerprints, IP addresses, notification settings, nuclei templates, search, system logs, tools, and wordlists
- Update mock index to export new mock data modules
- Increase SCAN_LOAD_CHECK_INTERVAL from 30 to 180 seconds for better system stability
- Improve load check logging message to clarify OOM prevention strategy
- Enhance mock data infrastructure to support frontend development and testing
2026-01-04 18:52:08 +08:00
github-actions[bot]
f8da408580 chore: bump version to v1.3.13-dev 2026-01-04 10:24:10 +00:00
yyhuni
7cd4354d8f feat(scan,asset): add scan logging system and improve search view architecture
- Add user_logger utility for structured scan operation logging
- Create scan log views and API endpoints for retrieving scan execution logs
- Add scan-log-list component and use-scan-logs hook for frontend log display
- Refactor asset search views to remove ArrayField support from pg_ivm IMMV
- Update search_service.py to JOIN original tables for array field retrieval
- Add system architecture requirements (AMD64/ARM64) to README
- Update scan flow handlers to integrate logging system
- Enhance scan progress dialog with log viewer integration
- Add ANSI log viewer component for formatted log display
- Update scan service API to support log retrieval endpoints
- Migrate database schema to support new logging infrastructure
- Add internationalization strings for scan logs (en/zh)
This change improves observability of scan operations and resolves pg_ivm limitations with ArrayField types by fetching array data from original tables via JOIN operations.
2026-01-04 18:19:45 +08:00
yyhuni
6bf35a760f chore(docker): configure Prefect home directory in worker image
- Add PREFECT_HOME environment variable pointing to /app/.prefect
- Create Prefect configuration directory to prevent home directory warnings
- Update step numbering in Dockerfile comments for clarity
- Ensures Prefect can properly initialize configuration without relying on user home directory
2026-01-04 10:39:11 +08:00
github-actions[bot]
be9ecadffb chore: bump version to v1.3.12-dev 2026-01-04 01:05:00 +00:00
yyhuni
adb53c9f85 feat(asset,scan): add configurable statement timeout and improve CSV export
- Add statement_timeout_ms parameter to search_service count() and stream_search() methods for long-running exports
- Replace server-side cursors with OFFSET/LIMIT batching for better Django compatibility
- Introduce create_csv_export_response() utility function to standardize CSV export handling
- Add engine-preset-selector and scan-config-editor components for enhanced scan configuration UI
- Update YAML editor component with improved styling and functionality
- Add i18n translations for new scan configuration features in English and Chinese
- Refactor CSV export endpoints to use new utility function instead of manual StreamingHttpResponse
- Remove unused uuid import from search_service.py
- Update nginx configuration for improved performance
- Enhance search service with configurable timeout support for large dataset exports
2026-01-04 08:58:31 +08:00
yyhuni
7b7bbed634 Update README.md 2026-01-03 22:15:35 +08:00
github-actions[bot]
8dd3f0536e chore: bump version to v1.3.11-dev 2026-01-03 11:54:31 +00:00
yyhuni
8a8062a12d refactor(scan): rename merged_configuration to yaml_configuration
- Rename `merged_configuration` field to `yaml_configuration` in Scan and ScheduledScan models for clarity
- Update all references across scan repositories, services, views, and serializers
- Update database migration to reflect field name change with improved help text
- Update frontend components to use new field naming convention
- Add YAML editor component for improved configuration editing in UI
- Update engine configuration retrieval in initiate_scan_flow to use new field name
- Remove unused asset tasks __init__.py module
- Simplify README feedback section for better clarity
- Update frontend type definitions and internationalization messages for consistency
2026-01-03 19:50:20 +08:00
yyhuni
55908a2da5 fix(asset,scan): improve decorator usage and dialog layout
- Fix transaction.non_atomic_requests decorator usage in AssetSearchExportView by wrapping with method_decorator for proper class-based view compatibility
- Update scan progress dialog to use flexible width (sm:max-w-fit sm:min-w-[450px]) instead of fixed width for better responsiveness
- Refactor engine names display from single Badge to grid layout with multiple badges for improved readability when multiple engines are present
- Add proper spacing and alignment adjustments (gap-4, items-start) to accommodate multi-line engine badge display
- Add text-xs and whitespace-nowrap to engine badges for consistent styling in grid layout
2026-01-03 18:46:44 +08:00
github-actions[bot]
22a7d4f091 chore: bump version to v1.3.10-dev 2026-01-03 10:45:32 +00:00
yyhuni
f287f18134 更新锁定镜像 2026-01-03 18:33:25 +08:00
yyhuni
de27230b7a 更新构建ci 2026-01-03 18:28:57 +08:00
github-actions[bot]
15a6295189 chore: bump version to v1.3.8-dev 2026-01-03 10:24:17 +00:00
yyhuni
674acdac66 refactor(asset): move database extension initialization to migrations
- Remove pg_trgm and pg_ivm extension setup from AssetConfig.ready() method
- Move extension creation to migration 0002 using RunSQL operations
- Add pg_trgm extension creation for text search index support
- Add pg_ivm extension creation for IMMV incremental maintenance
- Generate unique cursor names in search_service to prevent concurrent request conflicts
- Add @transaction.non_atomic_requests decorator to export view for server-side cursor compatibility
- Simplify app initialization by delegating extension setup to database migrations
- Improve thread safety and concurrency handling for streaming exports
2026-01-03 18:20:27 +08:00
github-actions[bot]
c59152bedf chore: bump version to v1.3.7-dev 2026-01-03 09:56:39 +00:00
yyhuni
b4037202dc feat: use registry cache for faster builds 2026-01-03 17:35:54 +08:00
yyhuni
4b4f9862bf ci(docker): add postgres image build configuration and update image tags
- Add xingrin-postgres image build job to docker-build workflow for multi-platform support (linux/amd64,linux/arm64)
- Update docker-compose.dev.yml to use IMAGE_TAG variable with dev as default fallback
- Update docker-compose.yml to use IMAGE_TAG variable with required validation
- Replace hardcoded postgres image tag (15) with dynamic IMAGE_TAG for better version management
- Enable flexible image tagging across development and production environments
2026-01-03 17:26:34 +08:00
github-actions[bot]
1c42e4978f chore: bump version to v1.3.5-dev 2026-01-03 08:44:06 +00:00
github-actions[bot]
57bab63997 chore: bump version to v1.3.3-dev 2026-01-03 05:55:07 +00:00
github-actions[bot]
b1f0f18ac0 chore: bump version to v1.3.4-dev 2026-01-03 05:54:50 +00:00
yyhuni
ccee5471b8 docs(readme): add notification push service documentation
- Add notification push service feature to visualization interface section
- Document support for real-time WeChat Work, Telegram, and Discord message push
- Enhance feature list clarity for notification capabilities
2026-01-03 13:34:36 +08:00
yyhuni
0ccd362535 优化下载逻辑 2026-01-03 13:32:58 +08:00
yyhuni
7f2af7f7e2 feat(search): add result export functionality and pagination limit support
- Add optional limit parameter to AssetSearchService.search() method for controlling result set size
- Implement AssetSearchExportView for exporting search results as CSV files with UTF-8 BOM encoding
- Add CSV export endpoint at GET /api/assets/search/export/ with configurable MAX_EXPORT_ROWS limit (10000)
- Support both website and endpoint asset types with type-specific column mappings in CSV export
- Format array fields (tech, matched_gf_patterns) and dates appropriately in exported CSV
- Update URL routing to include new search export endpoint
- Update views __init__.py to export AssetSearchExportView
- Add CSV generation with streaming response for efficient memory usage on large exports
- Update frontend search service to support export functionality
- Add internationalization strings for export feature in en.json and zh.json
- Update smart-filter-input and search-results-table components to support export UI
- Update installation and Docker startup scripts for deployment compatibility
2026-01-03 13:22:21 +08:00
yyhuni
4bd0f9e8c1 feat(search): implement dual-view IMMV architecture for website and endpoint assets
- Add incremental materialized view (IMMV) support for both Website and Endpoint asset types using pg_ivm extension
- Create asset_search_view IMMV with optimized indexes for host, title, url, headers, body, tech, status_code, and created_at fields
- Create endpoint_search_view IMMV with identical field structure and indexing strategy for endpoint-specific searches
- Extend search_service.py to support asset type routing with VIEW_MAPPING and VALID_ASSET_TYPES configuration
- Add comprehensive field mapping and array field definitions for both asset types
- Implement dual-query execution path in search views to handle website and endpoint searches independently
- Update frontend search components to support asset type filtering and result display
- Add search results table component with improved data presentation and filtering capabilities
- Update installation scripts and Docker configuration for pg_ivm extension deployment
- Add internationalization strings for new search UI elements in English and Chinese
- Consolidate index creation and cleanup logic in migrations for maintainability
- Enable automatic incremental updates on data changes without manual view refresh
2026-01-03 12:41:20 +08:00
yyhuni
68cc996e3b refactor(asset): standardize snapshot and asset model field naming and types
- Rename `status` to `status_code` in WebsiteSnapshotDTO for consistency
- Rename `web_server` to `webserver` in WebsiteSnapshotDTO for consistency
- Make `target_id` required field in EndpointSnapshotDTO and WebsiteSnapshotDTO
- Remove optional validation check for `target_id` in EndpointSnapshotDTO
- Convert CharField to TextField for url, location, title, webserver, and content_type fields in Endpoint and EndpointSnapshot models to support longer values
- Update migration 0001_initial.py to reflect field type changes from CharField to TextField
- Update all related services and repositories to use standardized field names
- Update serializers to map renamed fields correctly
- Ensure consistent field naming across DTOs, models, and database schema
2026-01-03 09:08:25 +08:00
github-actions[bot]
f1e79d638e chore: bump version to v1.3.2-dev 2026-01-03 00:33:26 +00:00
yyhuni
d484133e4c chore(docker): optimize server dockerfile with docker-ce-cli installation
- Replace full docker.io package with lightweight docker-ce-cli to reduce image size
- Add ca-certificates and gnupg dependencies for secure package management
- Improve Docker installation process for local Worker task distribution
- Reduce unnecessary dependencies in server container build
2026-01-03 08:09:03 +08:00
yyhuni
fc977ae029 chore(docker,frontend): optimize docker installation and add auth bypass config
- Replace docker.io installation script with apt-get package manager for better reliability
- Add NEXT_PUBLIC_SKIP_AUTH environment variable to Vercel config for development
- Improve Docker build layer caching by using native package manager instead of curl script
- Simplify frontend deployment configuration for local development workflows
2026-01-03 08:08:40 +08:00
yyhuni
f328474404 feat(frontend): add comprehensive mock data infrastructure for services
- Add mock data modules for auth, engines, notifications, scheduled-scans, and workers
- Implement mock authentication data with user profiles and login/logout responses
- Create mock scan engine configurations with multiple predefined scanning profiles
- Add mock notification system with various severity levels and categories
- Implement mock scheduled scan data with cron expressions and run history
- Add mock worker node data with status and performance metrics
- Update service layer to integrate with new mock data infrastructure
- Provide helper functions for filtering and paginating mock data
- Enable frontend development and testing without backend API dependency
2026-01-03 07:59:20 +08:00
yyhuni
68e726a066 chore(docker): update base image to python 3.10-slim-bookworm
- Update Python base image from 3.10-slim to 3.10-slim-bookworm
- Ensures compatibility with latest Debian stable release
- Improves security with updated system packages and dependencies
2026-01-02 23:19:09 +08:00
yyhuni
77a6f45909 fix:搜索的楼栋统计问题 2026-01-02 23:12:55 +08:00
yyhuni
49d1f1f1bb 采用ivm增量更新方案进行搜索 2026-01-02 22:46:40 +08:00
yyhuni
db8ecb1644 feat(search): add mock data infrastructure and vulnerability detail integration
- Add comprehensive mock data configuration for all major entities (dashboard, endpoints, organizations, scans, subdomains, targets, vulnerabilities, websites)
- Implement mock service layer with centralized config for development and testing
- Add vulnerability detail dialog integration to search results with lazy loading
- Enhance search result card with vulnerability viewing capability
- Update search materialized view migration to include vulnerability name field
- Implement default host fuzzy search fallback for bare text queries without operators
- Add vulnerability data formatting in search view for consistent API response structure
- Configure Vercel deployment settings and update Next.js configuration
- Update all service layers to support mock data injection for development environment
- Extend search types with improved vulnerability data structure
- Add internationalization strings for vulnerability loading errors
- Enable rapid frontend development and testing without backend API dependency
2026-01-02 19:06:09 +08:00
yyhuni
18cc016268 feat(search): implement advanced query parser with expression syntax support
- Add SearchQueryParser class to parse complex search expressions with operators (=, ==, !=)
- Support logical operators && (AND) and || (OR) for combining multiple conditions
- Implement field mapping for frontend to database field translation
- Add support for array field searching (tech stack) with unnest and ANY operators
- Support fuzzy matching (=), exact matching (==), and negation (!=) operators
- Add proper SQL injection prevention through parameterized queries
- Refactor search service to use expression-based filtering instead of simple filters
- Update search views to integrate new query parser
- Enhance frontend search hook and service to support new expression syntax
- Update search types to reflect new query structure
- Improve search page UI to display expression syntax examples and help text
- Enable complex multi-condition searches like: host="api" && tech="nginx" || status=="200"
2026-01-02 17:46:31 +08:00
yyhuni
23bc463283 feat(search): improve technology stack filtering with fuzzy matching
- Replace exact array matching with fuzzy search using ILIKE operator
- Update tech filter to search within array elements using unnest() and EXISTS
- Support partial technology name matching (e.g., "node" matches "nodejs")
- Apply consistent fuzzy matching logic across both search methods
- Enhance user experience by allowing flexible technology stack queries
2026-01-02 17:01:24 +08:00
yyhuni
7b903b91b2 feat(search): implement comprehensive search infrastructure with materialized views and pagination
- Add asset search service with materialized view support for optimized queries
- Implement search refresh service for maintaining up-to-date search indexes
- Create database migrations for AssetStatistics, StatisticsHistory, Directory, and DirectorySnapshot models
- Add PostgreSQL GIN indexes with trigram operators for full-text search capabilities
- Implement search pagination component with configurable page size and navigation
- Add search result card component with enhanced asset display formatting
- Create search API views with filtering and sorting capabilities
- Add use-search hook for client-side search state management
- Implement search service client for API communication
- Update search types with pagination metadata and result structures
- Add English and Chinese translations for search UI components
- Enhance scheduler to support search index refresh tasks
- Refactor asset views into modular search_views and asset_views
- Update URL routing to support new search endpoints
- Improve scan flow handlers for better search index integration
2026-01-02 16:57:54 +08:00
yyhuni
b3136d51b9 搜索页面前端UI设计完成 2026-01-02 10:07:26 +08:00
github-actions[bot]
08372588a4 chore: bump version to v1.2.15 2026-01-01 15:44:15 +00:00
yyhuni
236c828041 chore(fingerprints): remove deprecated ARL fingerprint rules
- Remove obsolete fingerprint detection rules from ARL.yaml
- Clean up legacy device and service signatures that are no longer maintained
- Reduce fingerprint database size by eliminating unused detection patterns
- Improve maintainability by removing outdated vendor-specific rules
2026-01-01 22:45:08 +08:00
yyhuni
fb13bb74d8 feat(filter): add array fuzzy search support with PostgreSQL array_to_string
- Add ArrayToString custom PostgreSQL function for converting arrays to delimited strings
- Implement array field annotation in QueryBuilder to support fuzzy matching on JSON array fields
- Enhance _build_single_q to handle three operators for JSON arrays: exact match (==), negation (!=), and fuzzy search (=)
- Update target navigation routes from subdomain to website view for consistency
- Enable fuzzy search on array fields by converting them to text during query building
2026-01-01 22:41:57 +08:00
yyhuni
f076c682b6 feat(scan): add multi-engine support and config merging with enhanced indexing
- Add multi-engine support to Scan model with engine_ids and engine_names fields
- Implement config_merger utility for merging multiple engine configurations
- Add merged_configuration property to Scan model for unified config access
- Update scan creation and scheduling services to handle multiple engines
- Add pg_trgm GIN indexes to asset and snapshot models for fuzzy search on url, title, and name fields
- Update scan views and serializers to support multi-engine selection and display
- Enhance frontend components for multi-engine scan initiation and scheduling
- Update test data generation script for multi-engine scan scenarios
- Add internationalization strings for multi-engine UI elements
- Refactor scan flow to use merged configuration instead of single engine config
- Update Docker compose files with latest configuration
2026-01-01 22:35:05 +08:00
yyhuni
9eda2caceb feat(asset): add response headers and body tracking with pg_trgm indexing
- Rename body_preview to response_body across endpoint and website models for consistency
- Change response_headers from Dict to string type for efficient text indexing
- Add pg_trgm PostgreSQL extension initialization in AssetConfig for GIN index support
- Update all DTOs to reflect response_body and response_headers field changes
- Modify repositories to handle new response_body and response_headers formats
- Update serializers and views to work with string-based response headers
- Add response_headers and response_body columns to frontend endpoint and website tables
- Update command templates and scan tasks to populate response_body and response_headers
- Add database initialization script for pg_trgm extension in PostgreSQL setup
- Update frontend types and translations for new field names
- Enable efficient full-text search on response headers and body content through GIN indexes
2026-01-01 19:34:11 +08:00
yyhuni
b1c9e202dd feat(sidebar): add feedback link to secondary navigation menu
- Import IconMessageReport icon from tabler/icons-react for feedback menu item
- Add feedback navigation item linking to GitHub issues page
- Add "feedback" translation key to English messages (en.json)
- Add "feedback" translation key to Chinese messages (zh.json) as "反馈建议"
- Improves user engagement by providing direct access to issue reporting
2026-01-01 18:31:34 +08:00
yyhuni
918669bc29 style(ui): update expandable cell whitespace handling for better formatting
- Change whitespace class from `whitespace-normal` to `whitespace-pre-wrap` in expandable cell component
- Improves text rendering by preserving whitespace and line breaks in cell content
- Ensures consistent formatting display across different content types (mono, url, muted variants)
2026-01-01 16:41:47 +08:00
yyhuni
fd70b0544d docs(frontend): update Chinese translations to English for consistency
- Change "响应头" to "Response Headers" in endpoint messages
- Change "响应头" to "Response Headers" in website messages
- Maintain consistency across frontend message translations
- Improve clarity for international users by standardizing field labels
2026-01-01 16:23:03 +08:00
github-actions[bot]
0f2df7a5f3 chore: bump version to v1.2.14-dev 2026-01-01 05:13:25 +00:00
yyhuni
857ab737b5 feat(fingerprint): enhance xingfinger task with snapshot tracking and field merging
- Replace `not_found_count` with `created_count` and `snapshot_count` metrics in fingerprint detect flow
- Initialize and aggregate `snapshot_count` across tool statistics
- Refactor `parse_xingfinger_line()` to return structured dict with url, techs, server, title, status_code, and content_length
- Replace `bulk_merge_tech_field()` with `bulk_merge_website_fields()` to support merging multiple WebSite fields
- Implement smart merge strategy: arrays deduplicated, scalar fields only updated when empty/NULL
- Remove dynamic model loading via importlib in favor of direct WebSite model import
- Add WebsiteSnapshotDTO and DjangoWebsiteSnapshotRepository imports for snapshot handling
- Improve xingfinger output parsing to capture server, title, and HTTP metadata alongside technology detection
2026-01-01 12:40:49 +08:00
yyhuni
ee2d99edda feat(asset): add response headers tracking to endpoints and websites
- Add response_headers field to Endpoint and WebSite models as JSONField
- Add response_headers field to EndpointSnapshot and WebsiteSnapshot models
- Update all related DTOs to include response_headers with Dict[str, Any] type
- Add GIN indexes on response_headers fields for optimized JSON queries
- Update endpoint and website repositories to handle response_headers data
- Update serializers to include response_headers in API responses
- Update frontend components to display response headers in detail views
- Add response_headers to fingerprint detection and site scan tasks
- Update command templates and engine config to support header extraction
- Add internationalization strings for response headers in en.json and zh.json
- Update TypeScript types for endpoint and website to include response_headers
- Enhance scan history and target detail pages to show response header information
2026-01-01 12:25:22 +08:00
github-actions[bot]
db6ce16aca chore: bump version to v1.2.13-dev 2026-01-01 02:24:08 +00:00
yyhuni
ab800eca06 feat(frontend): reorder navigation tabs for improved UX
- Move "Websites" tab to first position in scan history and target layouts
- Reposition "IP Addresses" tab before "Ports" for better logical flow
- Maintain consistent tab ordering across both scan history and target pages
- Improve navigation hierarchy by placing primary discovery results first
2026-01-01 09:47:30 +08:00
yyhuni
e8e5572339 perf(asset): add GIN indexes for tech array fields and improve query parser
- Add GinIndex for tech array field in Endpoint model to optimize __contains queries
- Add GinIndex for tech array field in WebSite model to optimize __contains queries
- Import GinIndex from django.contrib.postgres.indexes
- Refactor QueryParser to protect quoted filter values during tokenization
- Implement placeholder-based filter extraction to preserve spaces within quoted values
- Replace filter tokens with placeholders before logical operator normalization
- Restore original filter conditions from placeholders during parsing
- Fix spacing in comments for consistency (add space after "从")
- Improves query performance for technology stack filtering on large datasets
2026-01-01 08:58:03 +08:00
github-actions[bot]
d48d4bbcad chore: bump version to v1.2.12-dev 2025-12-31 16:01:48 +00:00
yyhuni
d1cca4c083 base timeout set 10s 2025-12-31 23:27:02 +08:00
yyhuni
df0810c863 feat: add fingerprint recognition feature and update documentation
- Add fingerprint recognition section to README with support for 2.7W+ rules from multiple sources (EHole, Goby, Wappalyzer, Fingers, FingerPrintHub, ARL)
- Update scanning pipeline architecture diagram to include fingerprint recognition stage between site identification and deep analysis
- Add fingerprint recognition styling to mermaid diagram for visual consistency
- Include WORKER_API_KEY environment variable in task distributor for worker authentication
- Update WeChat QR code image and public account name from "洋洋的小黑屋" to "塔罗安全学苑"
- Fix import statements in nav-system.tsx to use i18n navigation utilities instead of next/link and next/navigation
- Enhance scanning workflow documentation to reflect complete pipeline: subdomain discovery → port scanning → site identification → fingerprint recognition → URL collection → directory scanning → vulnerability scanning
2025-12-31 23:09:25 +08:00
yyhuni
d33e54c440 docs: simplify quick-start guide
- Remove alternative ZIP download method, keep only Git clone approach
- Remove update.sh script reference from service management section
- Remove dedicated "定期更新" (periodic updates) section
- Streamline documentation to focus on primary installation and usage paths
2025-12-31 22:50:08 +08:00
yyhuni
35a306fe8b fix:dev环境 2025-12-31 22:46:42 +08:00
yyhuni
724df82931 chore: pin Docker base image digests and add worker API key generation
- Pin golang:1.24 base image to specific digest to prevent upstream cache invalidation
- Pin ubuntu:24.04 base image to specific digest to prevent upstream cache invalidation
- Add WORKER_API_KEY generation in install.sh auto_fill_docker_env_secrets function
- Generate random 32-character string for WORKER_API_KEY during installation
- Update installation info message to include WORKER_API_KEY in generated secrets list
- Improve build reproducibility and security by using immutable image references
2025-12-31 22:40:38 +08:00
yyhuni
8dfffdf802 fix:认证 2025-12-31 22:21:40 +08:00
github-actions[bot]
b8cb85ce0b chore: bump version to v1.2.9-dev 2025-12-31 13:48:44 +00:00
yyhuni
da96d437a4 增加授权认证 2025-12-31 20:18:34 +08:00
github-actions[bot]
feaf8062e5 chore: bump version to v1.2.8-dev 2025-12-31 11:33:14 +00:00
yyhuni
4bab76f233 fix:组织删除问题 2025-12-31 17:50:37 +08:00
yyhuni
09416b4615 fix:redis端口 2025-12-31 17:45:25 +08:00
github-actions[bot]
bc1c5f6b0e chore: bump version to v1.2.7-dev 2025-12-31 06:16:42 +00:00
github-actions[bot]
2f2742e6fe chore: bump version to v1.2.6-dev 2025-12-31 05:29:36 +00:00
yyhuni
be3c346a74 增加搜索字段 2025-12-31 12:40:21 +08:00
yyhuni
0c7a6fff12 增加tech字段的搜索 2025-12-31 12:37:02 +08:00
yyhuni
3b4f0e3147 fix:指纹识别 2025-12-31 12:30:31 +08:00
yyhuni
51212a2a0c fix:指纹识别 2025-12-31 12:17:23 +08:00
yyhuni
58533bbaf6 fix:docker api 2025-12-31 12:03:08 +08:00
github-actions[bot]
6ccca1602d chore: bump version to v1.2.5-dev 2025-12-31 03:48:32 +00:00
yyhuni
6389b0f672 feat(fingerprints): Add type annotation to getAcceptConfig function
- Add explicit return type annotation `Record<string, string[]>` to getAcceptConfig function
- Improve type safety and IDE autocomplete for file type configuration
- Enhance code clarity for accepted file types mapping in import dialog
2025-12-31 10:17:25 +08:00
yyhuni
d7599b8599 feat(fingerprints): Add database indexes and expand test data generation
- Add database indexes on 'link' field in FingersFingerprint model for improved query performance
- Add database index on 'author' field in FingerPrintHubFingerprint model for filtering optimization
- Expand test data generation to include Fingers, FingerPrintHub, and ARL fingerprint types
- Add comprehensive fingerprint data generation methods with realistic templates and patterns
- Update test data cleanup to include all fingerprint table types
- Add i18n translations for fingerprint-related UI components and labels
- Optimize route prefetching hook for better performance
- Improve fingerprint data table columns and vulnerability columns display consistencyzxc
2025-12-31 10:04:15 +08:00
yyhuni
8eff298293 更新镜像加速逻辑 2025-12-31 08:56:55 +08:00
yyhuni
3634101c5b 添加灯塔等指纹 2025-12-31 08:55:37 +08:00
yyhuni
163973a7df feat(i18n): Add internationalization support to dropzone component
- Add useTranslations hook to DropzoneContent component for multi-language support
- Add useTranslations hook to DropzoneEmptyState component for multi-language support
- Replace hardcoded English strings with i18n translation keys in dropzone UI
- Add comprehensive translation keys for dropzone messages in en.json:
* uploadFile, uploadFiles, dragOrClick, dragOrClickReplace
* moreFiles, supports, minimum, maximum, sizeBetween
- Add corresponding Chinese translations in zh.json for all dropzone messages
- Support dynamic content in translations using parameterized keys (files count, size ranges)
- Ensure consistent user experience across English and Chinese interfaces
2025-12-30 21:19:37 +08:00
yyhuni
80ffecba3e feat(i18n): Add UI component i18n provider and standardize translation keys
- Add UiI18nProvider component to wrap UI library translations globally
- Integrate UiI18nProvider into root layout for consistent i18n support
- Standardize download action translation keys (allEndpoints → all, selectedEndpoints → selected)
- Update ExpandableTagList component prop from maxVisible to maxLines for better layout control
- Fix color scheme in dashboard stop scan button (chart-2 → primary)
- Add DOCKER_API_VERSION configuration to backend settings for Docker client compatibility
- Update task distributor to use configurable Docker API version (default 1.40)
- Add environment variable support for Docker API version in task execution commands
- Update i18n configuration and message files with standardized keys
- Ensure UI components respect application locale settings across all data tables and dialogs
2025-12-30 21:19:28 +08:00
yyhuni
3c21ac940c 恢复ssh docker 2025-12-30 20:35:51 +08:00
yyhuni
5c9f484d70 fix(frontend): Fix i18n translation key references and add missing labels
- Change "nav" translation namespace to "navigation" in scan engine and wordlists pages
- Replace parameterized translation calls with raw translation strings for cron schedule options in scheduled scan page and dashboard component
- Cast raw translation results to string type for proper TypeScript typing
- Add missing "name" and "type" labels to fingerprint section in English and Chinese message files
- Ensure consistent translation key usage across components for better maintainability
2025-12-30 18:21:16 +08:00
yyhuni
7567f6c25b 更新文字描述 2025-12-30 18:08:39 +08:00
yyhuni
0599a0b298 ansi-to-html加入 2025-12-30 18:01:29 +08:00
yyhuni
f7557fe90c ansi-to-html替代log显示 2025-12-30 18:01:22 +08:00
yyhuni
13571b9772 fix(frontend): Fix xterm SSR initialization error
- Add 100ms delay for terminal initialization to ensure DOM is mounted
- Use requestAnimationFrame for fit() to avoid dimensions error
- Add try-catch for all xterm operations
- Proper cleanup on unmount

Fixes: Cannot read properties of undefined (reading 'dimensions')
2025-12-30 17:41:38 +08:00
yyhuni
8ee76eef69 feat(frontend): Add ANSI color support for system logs
- Create AnsiLogViewer component using xterm.js
- Replace Monaco Editor with xterm for log viewing
- Native ANSI escape code rendering (colors, bold, etc.)
- Auto-scroll to bottom, clickable URLs support

Benefits:
- Colorized logs for better readability
- No more escape codes like [32m[0m in UI
- Professional terminal-like experience
2025-12-30 17:39:12 +08:00
yyhuni
2a31e29aa2 fix: Add shell quoting for command arguments
- Use shlex.quote() to escape special characters in argument values
- Fixes: 'unrecognized arguments' error when values contain spaces
- Example: target_name='example.com scan' now correctly quoted
2025-12-30 17:32:09 +08:00
yyhuni
81abc59961 Refactor: Migrate TaskDistributor to Docker SDK
- Replace CLI subprocess with Python Docker SDK
- Add DockerClientManager for unified container management
- Remove 300+ lines of shell command building code
- Enable future features: container status monitoring, log streaming

Breaking changes: None (backward compatible with existing scans)
Rollback: git reset --hard v1.0-before-docker-sdk
2025-12-30 17:23:18 +08:00
yyhuni
ffbfec6dd5 feat(stage2): Refactor TaskDistributor to use Docker SDK
- Replace CLI subprocess calls with DockerClientManager.run_container()
- Add helper methods: _build_container_command, _build_container_environment, _build_container_volumes
- Refactor execute_scan_flow() and execute_cleanup_on_all_workers() to use SDK
- Remove old CLI methods: _build_docker_command, _execute_docker_command, _execute_local_docker, _execute_ssh_docker
- Remove paramiko import (no longer needed for local workers)

Benefits:
- 300+ lines removed (CLI string building complexity)
- Type-safe container configuration (no more shlex.quote errors)
- Structured error handling (ImageNotFound, APIError)
- Ready for container status monitoring and log streaming
2025-12-30 17:20:26 +08:00
yyhuni
a0091636a8 feat(stage1): Add DockerClientManager
- Create docker_client_manager.py with local Docker client support
- Add container lifecycle management (run, status, logs, stop, remove)
- Implement structured error handling (ImageNotFound, APIError)
- Add client connection caching and reuse
- Set Docker API version to 1.40 (compatible with Docker 19.03+)
- Add dependencies: docker>=6.0.0, packaging>=21.0

TODO: Remote worker support (Docker Context or SSH tunnel)
2025-12-30 17:17:17 +08:00
yyhuni
69490ab396 feat: Add DockerClientManager for unified Docker client management
- Create docker_client_manager.py with local Docker client support
- Add container lifecycle management (run, status, logs, stop, remove)
- Implement structured error handling (ImageNotFound, APIError)
- Add client connection caching and reuse
- Set Docker API version to 1.40 (compatible with Docker 19.03+)
- Add docker>=6.0.0 and packaging>=21.0 dependencies

TODO: Remote worker support (Docker Context or SSH tunnel)
2025-12-30 17:15:29 +08:00
yyhuni
7306964abf 更新readme 2025-12-30 16:44:08 +08:00
yyhuni
cb6b0259e3 fix:响应不匹配 2025-12-30 16:40:17 +08:00
yyhuni
e1b4618e58 refactor(worker): isolate scan tools to dedicated directory
- Move scan tools base path from `/usr/local/bin` to `/opt/xingrin-tools/bin` to avoid conflicts with system tools and Python packages
- Create dedicated `/opt/xingrin-tools/bin` directory in worker Dockerfile following FHS standards
- Update PATH environment variable to prioritize project-specific tools directory
- Add `SCAN_TOOLS_PATH` environment variable to `.env.example` with documentation
- Update settings.py to use new default path with explanatory comments
- Fix TypeScript type annotation in system-logs-view.tsx for better maintainability
- Remove frontend package-lock.json to reduce repository size
- Update task distributor comment to reflect new tool location
This change improves tool isolation and prevents naming conflicts while maintaining FHS compliance.
2025-12-30 11:42:09 +08:00
yyhuni
556dcf5f62 重构日志ui功能 2025-12-30 11:13:38 +08:00
yyhuni
0628eef025 重构响应为标准响应格式 2025-12-30 10:56:26 +08:00
yyhuni
38ed8bc642 fix(scan): improve config parser validation and enable subdomain resolve timeout
- Uncomment timeout: auto setting in subdomain discovery config example
- Add validation to reject None or non-dict configuration values
- Raise ValueError with descriptive message when config is None
- Raise ValueError when config is not a dictionary type
- Update docstring to document Raises section for error conditions
- Prevent silent failures from malformed YAML configurations
2025-12-30 08:54:02 +08:00
yyhuni
2f4d6a2168 统一工具挂载为/usr/local/bin 2025-12-30 08:45:36 +08:00
yyhuni
c25cb9e06b fix:工具挂载 2025-12-30 08:39:17 +08:00
yyhuni
b14ab71c7f fix:auth frontend 2025-12-30 08:12:04 +08:00
github-actions[bot]
8b5060e2d3 chore: bump version to v1.2.2-dev 2025-12-29 17:08:05 +00:00
yyhuni
3c9335febf refactor: determine target branch by tag location instead of naming
- Check which branch contains the tag (main or dev)
- Update VERSION file on the source branch
- Only tags from main branch update 'latest' Docker tag
- More flexible and follows standard Git workflow
2025-12-29 23:34:05 +08:00
yyhuni
1b95e4f2c3 feat: update VERSION file for dev tags on dev branch
- Dev tags (v*-dev) now update VERSION file on dev branch
- Release tags (v* without suffix) update VERSION file on main branch
- Keeps main and dev branches independent
2025-12-29 23:30:17 +08:00
yyhuni
d20a600afc refactor: use settings.GIT_MIRROR instead of os.getenv in worker_views 2025-12-29 23:13:35 +08:00
yyhuni
c29b11fd37 feat: add GIT_MIRROR to worker config center
- Add gitMirror field to worker configuration API
- Container bootstrap reads gitMirror and sets GIT_MIRROR env var
- Remove redundant GIT_MIRROR injection from task_distributor
- All environment variables are managed through config center
2025-12-29 23:11:31 +08:00
yyhuni
6caf707072 refactor: replace Chinese comments with English in frontend components
- Replace all Chinese inline comments with English equivalents across 24 frontend component files
- Update JSDoc comments to use English for better code documentation
- Improve code readability and maintainability for international development team
- Standardize comment style across directories, endpoints, ip-addresses, subdomains, and websites components
- Ensure consistency with previous frontend refactoring efforts
2025-12-29 23:01:16 +08:00
yyhuni
2627b1fc40 refactor: replace Chinese comments with English across frontend components
- Replace Chinese comments with English in fingerprint components (ehole, goby, wappalyzer)
- Update comments in scan engine, history, and scheduled scan modules
- Translate comments in worker deployment and configuration dialogs
- Update comments in subdomain management and target components
- Translate comments in tools configuration and command modules
- Replace Chinese comments in vulnerability components
- Improve code maintainability and consistency with English documentation standards
- Update Docker build workflow cache configuration with image-specific scopes for better cache isolation
2025-12-29 22:14:12 +08:00
yyhuni
ec6712b9b4 fix: add null coalescing to prevent undefined values in i18n translations
- Add null coalescing operator (?? "") to all i18n translation parameters across components
- Fix scheduled scan deletion dialog to handle undefined scheduled scan name
- Fix nuclei page to pass locale parameter to formatDateTime function
- Fix organization detail view unlink target dialog to handle undefined target name
- Fix organization list deletion dialog to handle undefined organization name
- Fix organization targets detail view unlink dialog to handle undefined target name
- Fix engine edit dialog to handle undefined engine name
- Fix scan history list deletion and stop dialogs to handle undefined target names
- Fix worker list deletion dialog to handle undefined worker name
- Fix all targets detail view deletion dialog to handle undefined target name
- Fix custom tools and opensource tools lists to handle undefined tool names
- Fix vulnerabilities detail view to handle undefined vulnerability names
- Prevents runtime errors when translation parameters are undefined or null
2025-12-29 21:03:47 +08:00
yyhuni
9d5e4d5408 fix(scan/engine): handle undefined engine name in delete confirmation
- Add nullish coalescing operator to prevent undefined value in delete confirmation message
- Ensure engineToDelete?.name defaults to empty string when undefined
- Improve robustness of alert dialog description rendering
2025-12-29 20:54:00 +08:00
yyhuni
c5d5b24c8f 更新github action dev版本不更新version 2025-12-29 20:48:42 +08:00
yyhuni
671cb56b62 fix:nuclei模板加速同步,模板下载到宿主机同步更新 2025-12-29 20:43:49 +08:00
yyhuni
51025f69a8 fix:大陆加速修复 2025-12-29 20:15:25 +08:00
yyhuni
b2403b29c4 删除update.sh 2025-12-29 20:08:40 +08:00
yyhuni
18ef01a47b fix:cn加速 2025-12-29 20:03:14 +08:00
yyhuni
0bf8108fb3 fix:镜像加速 2025-12-29 19:51:33 +08:00
yyhuni
837ad19131 fix:镜像加速问题 2025-12-29 19:48:48 +08:00
yyhuni
d7de9a7129 fix:镜像加速问题 2025-12-29 19:39:59 +08:00
yyhuni
22b4e51b42 feat(xget): add Git URL acceleration support via Xget proxy
- Add xget_proxy utility module to convert Git repository URLs to Xget proxy format
- Support domain mapping for GitHub, GitLab, Gitea, and Codeberg repositories
- Integrate Xget proxy into Nuclei template repository cloning process
- Add XGET_MIRROR environment variable configuration in container bootstrap
- Export XGET_MIRROR setting to worker node configuration endpoint
- Add --mirror flag to install.sh for easy Xget acceleration setup
- Add configure_docker_mirror function to install.sh for Docker registry mirror configuration
- Enable Git clone acceleration for faster template repository downloads in air-gapped or bandwidth-limited environments
2025-12-29 19:32:05 +08:00
yyhuni
d03628ee45 feat(i18n): translate Chinese comments to English in scan history component
- Replace Chinese console error messages with English equivalents
- Translate all inline code comments from Chinese to English
- Update dialog and section comments for consistency
- Improve code readability and maintainability for international development team
2025-12-29 18:42:13 +08:00
yyhuni
0baabe0753 feat(i18n): internationalize frontend components with English translations
- Replace Chinese comments with English equivalents across auth, dashboard, and scan components
- Update UI text labels and descriptions from Chinese to English in bulk-add-urls-dialog
- Translate placeholder text and dialog titles in asset management components
- Update column headers and data table labels to English in organization and engine modules
- Standardize English documentation strings in auth-guard and auth-layout components
- Improve code maintainability and accessibility for international users
- Align with existing internationalization efforts across the frontend codebase
2025-12-29 18:39:25 +08:00
yyhuni
e1191d7abf 国际化前端ui 2025-12-29 18:10:05 +08:00
yyhuni
82a2e9a0e7 国际化前端 2025-12-29 18:09:57 +08:00
yyhuni
1ccd1bc338 更新gfPatterns 2025-12-28 20:26:32 +08:00
yyhuni
b4d42f5372 更新指纹管理搜索 2025-12-28 20:18:26 +08:00
yyhuni
2c66450756 统一ui 2025-12-28 20:10:46 +08:00
yyhuni
119d82dc89 更新ui 2025-12-28 20:06:17 +08:00
yyhuni
fba7f7c508 更新ui 2025-12-28 19:55:57 +08:00
yyhuni
99d384ce29 修复前端列宽 2025-12-28 16:37:35 +08:00
yyhuni
07f36718ab 重构前端 2025-12-28 16:27:01 +08:00
yyhuni
7e3f69c208 重构前端组件 2025-12-28 12:05:47 +08:00
yyhuni
5f90473c3c fix:ui 2025-12-28 08:48:25 +08:00
yyhuni
e2a815b96a 增加:goby wappalyzer指纹 2025-12-28 08:42:37 +08:00
yyhuni
f86a1a9d47 优化ui 2025-12-27 22:01:40 +08:00
yyhuni
d5945679aa 增加日志 2025-12-27 21:50:43 +08:00
yyhuni
51e2c51748 fix:目录创建挂载 2025-12-27 21:44:47 +08:00
yyhuni
e2cbf98dda fix:target name已去除的bug 2025-12-27 21:27:05 +08:00
yyhuni
cd72bdf7c3 指纹接入 2025-12-27 20:19:25 +08:00
yyhuni
35abcf7e39 加入黑名单逻辑 2025-12-27 20:12:01 +08:00
yyhuni
09f2d343a4 新增:重构导出逻辑代码,加入黑名单过滤 2025-12-27 20:11:50 +08:00
yyhuni
54d1f86bde fix:安装报错 2025-12-27 17:51:32 +08:00
yyhuni
a3997c9676 更新yaml 2025-12-27 12:52:49 +08:00
yyhuni
c90a55f85e 更新负载逻辑 2025-12-27 12:49:14 +08:00
yyhuni
2eab88b452 chore(install): Add banner display and update confirmation
- Add show_banner() function to display XingRin ASCII art logo
- Call show_banner() before header in install.sh initialization
- Add experimental feature warning in update.sh with user confirmation
- Prompt user to confirm before proceeding with update operation
- Suggest full reinstall via uninstall.sh and install.sh as alternative
- Improve user experience with visual feedback and safety checks
2025-12-27 12:41:04 +08:00
yyhuni
1baf0eb5e1 fix:指纹扫描命令 2025-12-27 12:29:50 +08:00
yyhuni
b61e73f7be fix:json输出 2025-12-27 12:14:35 +08:00
yyhuni
e896734dfc feat(scan-engine): Add fingerprint detection feature flag
- Add fingerprint_detect feature flag to engine configuration parser
- Enable fingerprint detection capability in scan engine features
- Integrate fingerprint detection into existing feature detection logic
2025-12-27 11:59:51 +08:00
yyhuni
cd83f52f35 新增指纹识别 2025-12-27 11:39:26 +08:00
yyhuni
3e29554c36 新增:指纹识别 2025-12-27 11:39:19 +08:00
yyhuni
18e02b536e 加入:指纹识别 2025-12-27 10:06:23 +08:00
yyhuni
4c1c6f70ab 更新指纹 2025-12-26 21:50:38 +08:00
yyhuni
a72e7675f5 更新ui 2025-12-26 21:40:56 +08:00
yyhuni
93c2163764 新增:ehole指纹的导入 2025-12-26 21:34:36 +08:00
yyhuni
de72c91561 更新ui 2025-12-25 18:31:09 +08:00
github-actions[bot]
3e6d060b75 chore: bump version to v1.1.14 2025-12-25 10:11:08 +00:00
yyhuni
766f045904 fix:ffuf并发问题 2025-12-25 18:02:25 +08:00
yyhuni
8acfe1cc33 调整日志级别 2025-12-25 17:44:31 +08:00
github-actions[bot]
7aec3eabb2 chore: bump version to v1.1.13 2025-12-25 08:29:39 +00:00
yyhuni
b1f11c36a4 fix:字典下载端口 2025-12-25 16:21:32 +08:00
yyhuni
d97fb5245a 修复:提示 2025-12-25 16:18:46 +08:00
github-actions[bot]
ddf9a1f5a4 chore: bump version to v1.1.12 2025-12-25 08:10:57 +00:00
yyhuni
47f9f96a4b 更新文档 2025-12-25 16:07:30 +08:00
yyhuni
6f43e73162 readme up 2025-12-25 16:06:01 +08:00
yyhuni
9b7d496f3e 更新:端口号为8083 2025-12-25 16:02:55 +08:00
github-actions[bot]
6390849d52 chore: bump version to v1.1.11 2025-12-25 03:58:05 +00:00
yyhuni
7a6d2054f6 更新:ui 2025-12-25 11:50:21 +08:00
yyhuni
73ebaab232 更新:ui 2025-12-25 11:31:25 +08:00
github-actions[bot]
11899b29c2 chore: bump version to v1.1.10 2025-12-25 03:20:57 +00:00
github-actions[bot]
877d2a56d1 chore: bump version to v1.1.9 2025-12-25 03:13:58 +00:00
yyhuni
dc1e94f038 更新:ui 2025-12-25 11:12:51 +08:00
yyhuni
9c3833d13d 更新:ui 2025-12-25 11:06:00 +08:00
github-actions[bot]
92f3b722ef chore: bump version to v1.1.8 2025-12-25 02:16:12 +00:00
yyhuni
9ef503c666 更新:ui 2025-12-25 10:12:06 +08:00
yyhuni
c3a43e94fa 修复:ui 2025-12-25 10:08:25 +08:00
github-actions[bot]
d6d94355fb chore: bump version to v1.1.7 2025-12-25 02:02:27 +00:00
yyhuni
bc638eabf4 更新:ui 2025-12-25 10:02:13 +08:00
yyhuni
5acaada7ab 新增:支持多字段搜索功能 2025-12-25 09:54:50 +08:00
github-actions[bot]
aaad3f29cf chore: bump version to v1.1.6 2025-12-24 12:19:12 +00:00
yyhuni
f13eb2d9b2 更新:ui风格 2025-12-24 20:10:12 +08:00
yyhuni
f1b3b60382 新增:EVA主题 2025-12-24 19:57:26 +08:00
yyhuni
e249056289 Update README.md 2025-12-24 19:14:22 +08:00
yyhuni
dba195b83a 更新readme 2025-12-24 17:28:08 +08:00
github-actions[bot]
9b494e6c67 chore: bump version to v1.1.5 2025-12-24 09:23:21 +00:00
yyhuni
2841157747 优化:字体显示 2025-12-24 17:14:45 +08:00
yyhuni
f6c1fef1a6 修复:仪表盘页面删除问题 2025-12-24 17:10:48 +08:00
yyhuni
6ec0adf9dd 优化:日志打印 2025-12-24 16:39:13 +08:00
yyhuni
22c6661567 更新:ui 2025-12-24 16:25:41 +08:00
github-actions[bot]
d9ed004e35 chore: bump version to v1.1.4 2025-12-24 08:23:12 +00:00
yyhuni
a0d9d1f29d 新增:批量添加资产 2025-12-24 16:15:33 +08:00
yyhuni
8aa9ed2a97 新增:新增功能,目标详细页面批量添加资产 2025-12-24 16:15:22 +08:00
yyhuni
8baf29d1c3 新增:子域名添加功能 2025-12-24 11:27:48 +08:00
yyhuni
248e48353a 更新:数据库字段为create at 2025-12-24 10:35:55 +08:00
yyhuni
0d210be50b 更新:subdomain的字段,discovered_at TO created_at 2025-12-24 10:19:01 +08:00
github-actions[bot]
f7c0d0b215 chore: bump version to v1.1.3 2025-12-24 02:11:23 +00:00
github-actions[bot]
d83428f27b chore: bump version to v1.1.2 2025-12-24 02:08:28 +00:00
yyhuni
45a09b8173 优化:增强数据库连接稳定性 2025-12-24 10:03:24 +08:00
yyhuni
11dfdee6fd 更新ui 2025-12-24 09:57:39 +08:00
yyhuni
e53a884d13 更新:ui 2025-12-24 09:54:48 +08:00
yyhuni
3b318c89e3 fix:主题ui 2025-12-24 09:46:51 +08:00
github-actions[bot]
e564bc116a chore: bump version to v1.1.1 2025-12-23 12:17:52 +00:00
yyhuni
410c543066 优化:大量ui 2025-12-23 20:03:27 +08:00
github-actions[bot]
66da140801 chore: bump version to v1.1.0 2025-12-23 11:20:10 +00:00
yyhuni
e60aac3622 更新输入框ui高度 2025-12-23 19:18:58 +08:00
yyhuni
14aaa71cb1 调整:扫描参数 2025-12-23 19:09:09 +08:00
yyhuni
0309dba510 优化:兼容性 2025-12-23 19:08:12 +08:00
yyhuni
967ff8a69f 调整扫描参数 2025-12-23 19:05:01 +08:00
yyhuni
9ac23d50b6 fix:漏洞扫描问题 2025-12-23 18:59:40 +08:00
yyhuni
265525c61e fix:漏洞扫描问题 2025-12-23 18:59:27 +08:00
yyhuni
1b9d05ce62 fix:增加漏扫的超时时间为10分钟 2025-12-23 16:54:26 +08:00
yyhuni
737980b30f 新增:页面下载为csv 2025-12-23 16:34:24 +08:00
yyhuni
494ee81478 新增:ip add页面的下载为csv 2025-12-23 12:34:41 +08:00
yyhuni
452686b282 fix:ffuf路径拼接问题 2025-12-23 11:14:31 +08:00
yyhuni
c95c68f4e9 refactor(asset): Extract deduplication logic into reusable utility
- Create new `deduplicate_for_bulk` utility function in `apps/common/utils/dedup.py`
- Move hash utility from `apps/common/utils/hash.py` to `apps/common/utils/__init__.py`
- Update all asset repositories to use centralized deduplication before bulk operations
- Apply deduplication to directory, endpoint, host_port_mapping, subdomain, and website repositories
- Apply deduplication to all snapshot repositories for consistency
- Update vulnerability service to use new deduplication utility
- Update wordlist service and related helpers to use new utility structure
- Update organization and target repositories to use new utility
- Automatically deduplicate records by model unique constraints, keeping last occurrence
- Improve code reusability and reduce duplication across repositories
2025-12-23 11:09:17 +08:00
yyhuni
b02f38606d fix(scan): Add quotes to file paths in command templates
- Wrap all file path variables with single quotes to handle paths with spaces
- Update subfinder, amass, sublist3r, assetfinder commands with quoted output paths
- Quote wordlist and input file paths in subdomain bruteforce and resolve commands
- Add quotes to dnsgen pipeline input/output file paths
- Quote domains_file parameter in naabu port scan commands
- Wrap url_file and scan_tools_base paths in httpx site scan command
- Quote wordlist and url parameters in ffuf directory scan command
- Add quotes to output file paths in waymore and katana URL fetch commands
- Quote input/output file paths in uro command
- Add quotes to endpoints_file in dalfox and nuclei vulnerability scan commands
- Prevents command execution failures when file paths contain spaces or special characters
2025-12-23 10:45:37 +08:00
yyhuni
b543f3d2b7 feat(scan): 新增快速扫描服务及批量操作支持
新增 QuickScanService 快速扫描服务,支持最小化数据快速扫描
EndpointRepository 新增 bulk_create_ignore_conflicts 方法,高效批量创建端点
WebSiteRepository 新增 bulk_create_ignore_conflicts 方法,高效批量创建站点
新增 validate_url 函数,强制要求 URL 包含协议头
新增 is_valid_ip 辅助函数,无异常方式验证 IP 地址
新增 detect_input_type 函数,自动识别输入类型(url/domain/ip/cidr)
更新 validators 模块文档,补充 URL 验证说明
优化前端 quick-scan-dialog 组件,增强输入验证
更新前端 target-validator 工具,改进输入处理逻辑
engine_config_example.yaml 新增目录扫描 max-workers 配置
优化 directory_scan_flow,支持每个工具独立配置并发数
更新 scan_views 支持快速扫描接口
2025-12-23 10:15:39 +08:00
yyhuni
a18fb46906 更新readme 2025-12-23 08:09:06 +08:00
github-actions[bot]
bb74f61ea2 chore: bump version to v1.0.36 2025-12-22 23:50:38 +00:00
592 changed files with 215049 additions and 25715 deletions

View File

@@ -19,7 +19,8 @@ permissions:
contents: write
jobs:
build:
# AMD64 构建(原生 x64 runner
build-amd64:
runs-on: ubuntu-latest
strategy:
matrix:
@@ -27,39 +28,30 @@ jobs:
- image: xingrin-server
dockerfile: docker/server/Dockerfile
context: .
platforms: linux/amd64,linux/arm64
- image: xingrin-frontend
dockerfile: docker/frontend/Dockerfile
context: .
platforms: linux/amd64 # ARM64 构建时 Next.js 在 QEMU 下会崩溃
- image: xingrin-worker
dockerfile: docker/worker/Dockerfile
context: .
platforms: linux/amd64,linux/arm64
- image: xingrin-nginx
dockerfile: docker/nginx/Dockerfile
context: .
platforms: linux/amd64,linux/arm64
- image: xingrin-agent
dockerfile: docker/agent/Dockerfile
context: .
platforms: linux/amd64,linux/arm64
- image: xingrin-postgres
dockerfile: docker/postgres/Dockerfile
context: docker/postgres
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Free disk space (for large builds like worker)
- name: Free disk space
run: |
echo "=== Before cleanup ==="
df -h
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
sudo docker image prune -af
echo "=== After cleanup ==="
df -h
- name: Generate SSL certificates for nginx build
if: matrix.image == 'xingrin-nginx'
@@ -69,10 +61,6 @@ jobs:
-keyout docker/nginx/ssl/privkey.pem \
-out docker/nginx/ssl/fullchain.pem \
-subj "/CN=localhost"
echo "SSL certificates generated for CI build"
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -83,7 +71,120 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version from git tag
- name: Get version
id: version
run: |
if [[ $GITHUB_REF == refs/tags/* ]]; then
echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
else
echo "VERSION=dev-$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
fi
- name: Build and push AMD64
uses: docker/build-push-action@v5
with:
context: ${{ matrix.context }}
file: ${{ matrix.dockerfile }}
platforms: linux/amd64
push: true
tags: ${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }}-amd64
build-args: IMAGE_TAG=${{ steps.version.outputs.VERSION }}
cache-from: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-amd64
cache-to: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-amd64,mode=max
provenance: false
sbom: false
# ARM64 构建(原生 ARM64 runner
build-arm64:
runs-on: ubuntu-22.04-arm
strategy:
matrix:
include:
- image: xingrin-server
dockerfile: docker/server/Dockerfile
context: .
- image: xingrin-frontend
dockerfile: docker/frontend/Dockerfile
context: .
- image: xingrin-worker
dockerfile: docker/worker/Dockerfile
context: .
- image: xingrin-nginx
dockerfile: docker/nginx/Dockerfile
context: .
- image: xingrin-agent
dockerfile: docker/agent/Dockerfile
context: .
- image: xingrin-postgres
dockerfile: docker/postgres/Dockerfile
context: docker/postgres
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Generate SSL certificates for nginx build
if: matrix.image == 'xingrin-nginx'
run: |
mkdir -p docker/nginx/ssl
openssl req -x509 -nodes -days 365 -newkey rsa:2048 \
-keyout docker/nginx/ssl/privkey.pem \
-out docker/nginx/ssl/fullchain.pem \
-subj "/CN=localhost"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version
id: version
run: |
if [[ $GITHUB_REF == refs/tags/* ]]; then
echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
else
echo "VERSION=dev-$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
fi
- name: Build and push ARM64
uses: docker/build-push-action@v5
with:
context: ${{ matrix.context }}
file: ${{ matrix.dockerfile }}
platforms: linux/arm64
push: true
tags: ${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }}-arm64
build-args: IMAGE_TAG=${{ steps.version.outputs.VERSION }}
cache-from: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-arm64
cache-to: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-arm64,mode=max
provenance: false
sbom: false
# 合并多架构 manifest
merge-manifests:
runs-on: ubuntu-latest
needs: [build-amd64, build-arm64]
strategy:
matrix:
image:
- xingrin-server
- xingrin-frontend
- xingrin-worker
- xingrin-nginx
- xingrin-agent
- xingrin-postgres
steps:
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version
id: version
run: |
if [[ $GITHUB_REF == refs/tags/* ]]; then
@@ -94,45 +195,76 @@ jobs:
echo "IS_RELEASE=false" >> $GITHUB_OUTPUT
fi
- name: Build and push
uses: docker/build-push-action@v5
with:
context: ${{ matrix.context }}
file: ${{ matrix.dockerfile }}
platforms: ${{ matrix.platforms }}
push: true
tags: |
${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }}
${{ steps.version.outputs.IS_RELEASE == 'true' && format('{0}/{1}:latest', env.IMAGE_PREFIX, matrix.image) || '' }}
build-args: |
IMAGE_TAG=${{ steps.version.outputs.VERSION }}
cache-from: type=gha
cache-to: type=gha,mode=max
provenance: false
sbom: false
- name: Create and push multi-arch manifest
run: |
VERSION=${{ steps.version.outputs.VERSION }}
IMAGE=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}
docker manifest create ${IMAGE}:${VERSION} \
${IMAGE}:${VERSION}-amd64 \
${IMAGE}:${VERSION}-arm64
docker manifest push ${IMAGE}:${VERSION}
if [[ "${{ steps.version.outputs.IS_RELEASE }}" == "true" ]]; then
docker manifest create ${IMAGE}:latest \
${IMAGE}:${VERSION}-amd64 \
${IMAGE}:${VERSION}-arm64
docker manifest push ${IMAGE}:latest
fi
# 所有镜像构建成功后,更新 VERSION 文件
# 更新 VERSION 文件
update-version:
runs-on: ubuntu-latest
needs: build
needs: merge-manifests
if: startsWith(github.ref, 'refs/tags/v')
steps:
- name: Checkout
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: main
fetch-depth: 0 # 获取完整历史,用于判断 tag 所在分支
token: ${{ secrets.GITHUB_TOKEN }}
- name: Determine source branch and version
id: branch
run: |
VERSION="${GITHUB_REF#refs/tags/}"
# 查找包含此 tag 的分支
BRANCHES=$(git branch -r --contains ${{ github.ref_name }})
echo "Branches containing tag: $BRANCHES"
# 判断 tag 来自哪个分支
if echo "$BRANCHES" | grep -q "origin/main"; then
TARGET_BRANCH="main"
UPDATE_LATEST="true"
elif echo "$BRANCHES" | grep -q "origin/dev"; then
TARGET_BRANCH="dev"
UPDATE_LATEST="false"
else
echo "Warning: Tag not found in main or dev branch, defaulting to main"
TARGET_BRANCH="main"
UPDATE_LATEST="false"
fi
echo "BRANCH=$TARGET_BRANCH" >> $GITHUB_OUTPUT
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
echo "UPDATE_LATEST=$UPDATE_LATEST" >> $GITHUB_OUTPUT
echo "Will update VERSION on branch: $TARGET_BRANCH"
- name: Checkout target branch
run: |
git checkout ${{ steps.branch.outputs.BRANCH }}
- name: Update VERSION file
run: |
VERSION="${GITHUB_REF#refs/tags/}"
VERSION="${{ steps.branch.outputs.VERSION }}"
echo "$VERSION" > VERSION
echo "Updated VERSION to $VERSION"
echo "Updated VERSION to $VERSION on branch ${{ steps.branch.outputs.BRANCH }}"
- name: Commit and push
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add VERSION
git diff --staged --quiet || git commit -m "chore: bump version to ${GITHUB_REF#refs/tags/}"
git push
git diff --staged --quiet || git commit -m "chore: bump version to ${{ steps.branch.outputs.VERSION }}"
git push origin ${{ steps.branch.outputs.BRANCH }}

2
.gitignore vendored
View File

@@ -132,3 +132,5 @@ temp/
HGETALL
KEYS
vuln_scan/input_endpoints.txt
open-in-v0

109
README.md
View File

@@ -13,35 +13,35 @@
<p align="center">
<a href="#-功能特性">功能特性</a> •
<a href="#-全局资产搜索">资产搜索</a> •
<a href="#-快速开始">快速开始</a> •
<a href="#-文档">文档</a> •
<a href="#-技术栈">技术栈</a> •
<a href="#-反馈与贡献">反馈与贡献</a>
</p>
<p align="center">
<sub>🔍 关键词: ASM | 攻击面管理 | 漏洞扫描 | 资产发现 | Bug Bounty | 渗透测试 | Nuclei | 子域名枚举 | EASM</sub>
<sub>🔍 关键词: ASM | 攻击面管理 | 漏洞扫描 | 资产发现 | 资产搜索 | Bug Bounty | 渗透测试 | Nuclei | 子域名枚举 | EASM</sub>
</p>
---
## 🌐 在线 Demo
👉 **[https://xingrin.vercel.app/](https://xingrin.vercel.app/)**
> ⚠️ 仅用于 UI 展示,未接入后端数据库
---
<p align="center">
<b>🌗 明暗模式切换</b>
<b>🎨 现代化 UI </b>
</p>
<p align="center">
<img src="docs/screenshots/light.png" alt="Light Mode" width="49%">
<img src="docs/screenshots/dark.png" alt="Dark Mode" width="49%">
</p>
<p align="center">
<b>🎨 多种 UI 主题</b>
</p>
<p align="center">
<img src="docs/screenshots/bubblegum.png" alt="Bubblegum" width="32%">
<img src="docs/screenshots/cosmic-night.png" alt="Cosmic Night" width="32%">
<img src="docs/screenshots/quantum-rose.png" alt="Quantum Rose" width="32%">
<img src="docs/screenshots/light.png" alt="Light Mode" width="24%">
<img src="docs/screenshots/bubblegum.png" alt="Bubblegum" width="24%">
<img src="docs/screenshots/cosmic-night.png" alt="Cosmic Night" width="24%">
<img src="docs/screenshots/quantum-rose.png" alt="Quantum Rose" width="24%">
</p>
## 📚 文档
@@ -69,9 +69,20 @@
- **自定义流程** - YAML 配置扫描流程,灵活编排
- **定时扫描** - Cron 表达式配置,自动化周期扫描
### 🚫 黑名单过滤
- **两层黑名单** - 全局黑名单 + Target 级黑名单,灵活控制扫描范围
- **智能规则识别** - 自动识别域名通配符(`*.gov`、IP、CIDR 网段
- **敏感目标保护** - 过滤政府、军事、教育等敏感域名,防止误扫
- **内网过滤** - 支持 `10.0.0.0/8``172.16.0.0/12``192.168.0.0/16` 等私有网段
### 🔖 指纹识别
- **多源指纹库** - 内置 EHole、Goby、Wappalyzer、Fingers、FingerPrintHub、ARL 等 2.7W+ 指纹规则
- **自动识别** - 扫描流程自动执行,识别 Web 应用技术栈
- **指纹管理** - 支持查询、导入、导出指纹规则
#### 扫描流程架构
完整的扫描流程包括子域名发现、端口扫描、站点发现、URL 收集、目录扫描、漏洞扫描等阶段
完整的扫描流程包括:子域名发现、端口扫描、站点发现、指纹识别、URL 收集、目录扫描、漏洞扫描等阶段
```mermaid
flowchart LR
@@ -82,7 +93,8 @@ flowchart LR
SUB["子域名发现<br/>subfinder, amass, puredns"]
PORT["端口扫描<br/>naabu"]
SITE["站点识别<br/>httpx"]
SUB --> PORT --> SITE
FINGER["指纹识别<br/>xingfinger"]
SUB --> PORT --> SITE --> FINGER
end
subgraph STAGE2["阶段 2: 深度分析"]
@@ -98,7 +110,7 @@ flowchart LR
FINISH["扫描完成"]
START --> STAGE1
SITE --> STAGE2
FINGER --> STAGE2
STAGE2 --> STAGE3
STAGE3 --> FINISH
@@ -110,6 +122,7 @@ flowchart LR
style SUB fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
style PORT fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
style SITE fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
style FINGER fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
style URL fill:#bb8fce,stroke:#9b59b6,stroke-width:1px,color:#fff
style DIR fill:#bb8fce,stroke:#9b59b6,stroke-width:1px,color:#fff
style VULN fill:#f0b27a,stroke:#e67e22,stroke-width:1px,color:#fff
@@ -162,25 +175,43 @@ flowchart TB
W3 -.心跳上报.-> REDIS
```
### 🔎 全局资产搜索
- **多类型搜索** - 支持 Website 和 Endpoint 两种资产类型
- **表达式语法** - 支持 `=`(模糊)、`==`(精确)、`!=`(不等于)操作符
- **逻辑组合** - 支持 `&&` (AND) 和 `||` (OR) 逻辑组合
- **多字段查询** - 支持 host、url、title、tech、status、body、header 字段
- **CSV 导出** - 流式导出全部搜索结果,无数量限制
#### 搜索语法示例
```bash
# 基础搜索
host="api" # host 包含 "api"
status=="200" # 状态码精确等于 200
tech="nginx" # 技术栈包含 nginx
# 组合搜索
host="api" && status=="200" # host 包含 api 且状态码为 200
tech="vue" || tech="react" # 技术栈包含 vue 或 react
# 复杂查询
host="admin" && tech="php" && status=="200"
url="/api/v1" && status!="404"
```
### 📊 可视化界面
- **数据统计** - 资产/漏洞统计仪表盘
- **实时通知** - WebSocket 消息推送
- **暗色主题** - 支持明暗主题切换
- **通知推送** - 实时企业微信tgdiscard消息推送服务
---
## 🛠️ 技术栈
- **前端**: Next.js + React + TailwindCSS
- **后端**: Django + Django REST Framework
- **数据库**: PostgreSQL + Redis
- **部署**: Docker + Nginx
## 📦 快速开始
### 环境要求
- **操作系统**: Ubuntu 20.04+ / Debian 11+ (推荐)
- **操作系统**: Ubuntu 20.04+ / Debian 11+
- **系统架构**: AMD64 (x86_64) / ARM64 (aarch64)
- **硬件**: 2核 4G 内存起步20GB+ 磁盘空间
### 一键安装
@@ -192,11 +223,20 @@ cd xingrin
# 安装并启动(生产模式)
sudo ./install.sh
# 🇨🇳 中国大陆用户推荐使用镜像加速(第三方加速服务可能会失效,不保证长期可用)
sudo ./install.sh --mirror
```
> **💡 --mirror 参数说明**
> - 自动配置 Docker 镜像加速(国内镜像源)
> - 加速 Git 仓库克隆Nuclei 模板等)
> - 大幅提升安装速度,避免网络超时
### 访问服务
- **Web 界面**: `https://localhost`
- **Web 界面**: `https://ip:8083`
- **默认账号**: admin / admin首次登录后请修改密码
### 常用命令
@@ -212,22 +252,15 @@ sudo ./restart.sh
# 卸载
sudo ./uninstall.sh
# 更新
sudo ./update.sh
```
## 🤝 反馈与贡献
- 🐛 **如果发现 Bug** 可以点击右边链接进行提交 [Issue](https://github.com/yyhuni/xingrin/issues)
- 💡 **有新想法比如UI设计功能设计等** 欢迎点击右边链接进行提交建议 [Issue](https://github.com/yyhuni/xingrin/issues)
- 🔧 **想参与开发?** 关注我公众号与我个人联系
- 💡 **发现 Bug有新想法比如UI设计功能设计等** 欢迎点击右边链接进行提交建议 [Issue](https://github.com/yyhuni/xingrin/issues) 或者公众号私信
## 📧 联系
- 目前版本就我个人使用,可能会有很多边界问题
- 如有问题,建议,其他,优先提交[Issue](https://github.com/yyhuni/xingrin/issues),也可以直接给我的公众号发消息,我都会回复的
- 微信公众号: **洋洋的小黑屋**
- 微信公众号: **塔罗安全学苑**
- 微信群去公众号底下的菜单,有个交流群,点击就可以看到了,链接过期可以私信我拉你
<img src="docs/wechat-qrcode.png" alt="微信公众号" width="200">

View File

@@ -1 +1 @@
v1.0.35
v1.4.0

View File

@@ -4,7 +4,3 @@ from django.apps import AppConfig
class AssetConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'apps.asset'
def ready(self):
# 导入所有模型以确保Django发现并注册
from . import models

View File

@@ -14,12 +14,13 @@ class EndpointDTO:
status_code: Optional[int] = None
content_length: Optional[int] = None
webserver: Optional[str] = None
body_preview: Optional[str] = None
response_body: Optional[str] = None
content_type: Optional[str] = None
tech: Optional[List[str]] = None
vhost: Optional[bool] = None
location: Optional[str] = None
matched_gf_patterns: Optional[List[str]] = None
response_headers: Optional[str] = None
def __post_init__(self):
if self.tech is None:

View File

@@ -9,7 +9,7 @@ class WebSiteDTO:
"""网站数据传输对象"""
target_id: int
url: str
host: str
host: str = ''
title: str = ''
status_code: Optional[int] = None
content_length: Optional[int] = None
@@ -17,9 +17,10 @@ class WebSiteDTO:
webserver: str = ''
content_type: str = ''
tech: List[str] = None
body_preview: str = ''
response_body: str = ''
vhost: Optional[bool] = None
created_at: str = None
response_headers: str = ''
def __post_init__(self):
if self.tech is None:

View File

@@ -13,6 +13,7 @@ class EndpointSnapshotDTO:
快照只属于 scan。
"""
scan_id: int
target_id: int # 必填,用于同步到资产表
url: str
host: str = '' # 主机名域名或IP地址
title: str = ''
@@ -22,10 +23,10 @@ class EndpointSnapshotDTO:
webserver: str = ''
content_type: str = ''
tech: List[str] = None
body_preview: str = ''
response_body: str = ''
vhost: Optional[bool] = None
matched_gf_patterns: List[str] = None
target_id: Optional[int] = None # 冗余字段,用于同步到资产表
response_headers: str = ''
def __post_init__(self):
if self.tech is None:
@@ -42,9 +43,6 @@ class EndpointSnapshotDTO:
"""
from apps.asset.dtos.asset import EndpointDTO
if self.target_id is None:
raise ValueError("target_id 不能为 None无法同步到资产表")
return EndpointDTO(
target_id=self.target_id,
url=self.url,
@@ -53,10 +51,11 @@ class EndpointSnapshotDTO:
status_code=self.status_code,
content_length=self.content_length,
webserver=self.webserver,
body_preview=self.body_preview,
response_body=self.response_body,
content_type=self.content_type,
tech=self.tech if self.tech else [],
vhost=self.vhost,
location=self.location,
matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else []
matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else [],
response_headers=self.response_headers,
)

View File

@@ -13,18 +13,19 @@ class WebsiteSnapshotDTO:
快照只属于 scantarget 信息通过 scan.target 获取。
"""
scan_id: int
target_id: int # 仅用于传递数据,不保存到数据库
target_id: int # 必填,用于同步到资产表
url: str
host: str
title: str = ''
status: Optional[int] = None
status_code: Optional[int] = None # 统一命名status -> status_code
content_length: Optional[int] = None
location: str = ''
web_server: str = ''
webserver: str = '' # 统一命名web_server -> webserver
content_type: str = ''
tech: List[str] = None
body_preview: str = ''
response_body: str = ''
vhost: Optional[bool] = None
response_headers: str = ''
def __post_init__(self):
if self.tech is None:
@@ -44,12 +45,13 @@ class WebsiteSnapshotDTO:
url=self.url,
host=self.host,
title=self.title,
status_code=self.status,
status_code=self.status_code,
content_length=self.content_length,
location=self.location,
webserver=self.web_server,
webserver=self.webserver,
content_type=self.content_type,
tech=self.tech if self.tech else [],
body_preview=self.body_preview,
vhost=self.vhost
response_body=self.response_body,
vhost=self.vhost,
response_headers=self.response_headers,
)

View File

@@ -0,0 +1,345 @@
# Generated by Django 5.2.7 on 2026-01-06 00:55
import django.contrib.postgres.fields
import django.contrib.postgres.indexes
import django.core.validators
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
('scan', '0001_initial'),
('targets', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='AssetStatistics',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('total_targets', models.IntegerField(default=0, help_text='目标总数')),
('total_subdomains', models.IntegerField(default=0, help_text='子域名总数')),
('total_ips', models.IntegerField(default=0, help_text='IP地址总数')),
('total_endpoints', models.IntegerField(default=0, help_text='端点总数')),
('total_websites', models.IntegerField(default=0, help_text='网站总数')),
('total_vulns', models.IntegerField(default=0, help_text='漏洞总数')),
('total_assets', models.IntegerField(default=0, help_text='总资产数(子域名+IP+端点+网站)')),
('prev_targets', models.IntegerField(default=0, help_text='上次目标总数')),
('prev_subdomains', models.IntegerField(default=0, help_text='上次子域名总数')),
('prev_ips', models.IntegerField(default=0, help_text='上次IP地址总数')),
('prev_endpoints', models.IntegerField(default=0, help_text='上次端点总数')),
('prev_websites', models.IntegerField(default=0, help_text='上次网站总数')),
('prev_vulns', models.IntegerField(default=0, help_text='上次漏洞总数')),
('prev_assets', models.IntegerField(default=0, help_text='上次总资产数')),
('updated_at', models.DateTimeField(auto_now=True, help_text='最后更新时间')),
],
options={
'verbose_name': '资产统计',
'verbose_name_plural': '资产统计',
'db_table': 'asset_statistics',
},
),
migrations.CreateModel(
name='StatisticsHistory',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('date', models.DateField(help_text='统计日期', unique=True)),
('total_targets', models.IntegerField(default=0, help_text='目标总数')),
('total_subdomains', models.IntegerField(default=0, help_text='子域名总数')),
('total_ips', models.IntegerField(default=0, help_text='IP地址总数')),
('total_endpoints', models.IntegerField(default=0, help_text='端点总数')),
('total_websites', models.IntegerField(default=0, help_text='网站总数')),
('total_vulns', models.IntegerField(default=0, help_text='漏洞总数')),
('total_assets', models.IntegerField(default=0, help_text='总资产数')),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
],
options={
'verbose_name': '统计历史',
'verbose_name_plural': '统计历史',
'db_table': 'statistics_history',
'ordering': ['-date'],
'indexes': [models.Index(fields=['date'], name='statistics__date_1d29cd_idx')],
},
),
migrations.CreateModel(
name='Directory',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('url', models.CharField(help_text='完整请求 URL', max_length=2000)),
('status', models.IntegerField(blank=True, help_text='HTTP 响应状态码', null=True)),
('content_length', models.BigIntegerField(blank=True, help_text='响应体字节大小Content-Length 或实际长度)', null=True)),
('words', models.IntegerField(blank=True, help_text='响应体中单词数量(按空格分割)', null=True)),
('lines', models.IntegerField(blank=True, help_text='响应体行数(按换行符分割)', null=True)),
('content_type', models.CharField(blank=True, default='', help_text='响应头 Content-Type 值', max_length=200)),
('duration', models.BigIntegerField(blank=True, help_text='请求耗时(单位:纳秒)', null=True)),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('target', models.ForeignKey(help_text='所属的扫描目标', on_delete=django.db.models.deletion.CASCADE, related_name='directories', to='targets.target')),
],
options={
'verbose_name': '目录',
'verbose_name_plural': '目录',
'db_table': 'directory',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['-created_at'], name='directory_created_2cef03_idx'), models.Index(fields=['target'], name='directory_target__e310c8_idx'), models.Index(fields=['url'], name='directory_url_ba40cd_idx'), models.Index(fields=['status'], name='directory_status_40bbe6_idx'), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='directory_url_trgm_idx', opclasses=['gin_trgm_ops'])],
'constraints': [models.UniqueConstraint(fields=('target', 'url'), name='unique_directory_url_target')],
},
),
migrations.CreateModel(
name='DirectorySnapshot',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('url', models.CharField(help_text='目录URL', max_length=2000)),
('status', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
('content_length', models.BigIntegerField(blank=True, help_text='内容长度', null=True)),
('words', models.IntegerField(blank=True, help_text='响应体中单词数量(按空格分割)', null=True)),
('lines', models.IntegerField(blank=True, help_text='响应体行数(按换行符分割)', null=True)),
('content_type', models.CharField(blank=True, default='', help_text='响应头 Content-Type 值', max_length=200)),
('duration', models.BigIntegerField(blank=True, help_text='请求耗时(单位:纳秒)', null=True)),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='directory_snapshots', to='scan.scan')),
],
options={
'verbose_name': '目录快照',
'verbose_name_plural': '目录快照',
'db_table': 'directory_snapshot',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['scan'], name='directory_s_scan_id_c45900_idx'), models.Index(fields=['url'], name='directory_s_url_b4b72b_idx'), models.Index(fields=['status'], name='directory_s_status_e9f57e_idx'), models.Index(fields=['content_type'], name='directory_s_content_45e864_idx'), models.Index(fields=['-created_at'], name='directory_s_created_eb9d27_idx'), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='dir_snap_url_trgm', opclasses=['gin_trgm_ops'])],
'constraints': [models.UniqueConstraint(fields=('scan', 'url'), name='unique_directory_per_scan_snapshot')],
},
),
migrations.CreateModel(
name='Endpoint',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('url', models.TextField(help_text='最终访问的完整URL')),
('host', models.CharField(blank=True, default='', help_text='主机名域名或IP地址', max_length=253)),
('location', models.TextField(blank=True, default='', help_text='重定向地址HTTP 3xx 响应头 Location')),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('title', models.TextField(blank=True, default='', help_text='网页标题HTML <title> 标签内容)')),
('webserver', models.TextField(blank=True, default='', help_text='服务器类型HTTP 响应头 Server 值)')),
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
('content_type', models.TextField(blank=True, default='', help_text='响应类型HTTP Content-Type 响应头)')),
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈(服务器/框架/语言等)', size=None)),
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
('content_length', models.IntegerField(blank=True, help_text='响应体大小(单位字节)', null=True)),
('vhost', models.BooleanField(blank=True, help_text='是否支持虚拟主机', null=True)),
('matched_gf_patterns', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='匹配的GF模式列表用于识别敏感端点如api, debug, config等', size=None)),
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
('target', models.ForeignKey(help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)', on_delete=django.db.models.deletion.CASCADE, related_name='endpoints', to='targets.target')),
],
options={
'verbose_name': '端点',
'verbose_name_plural': '端点',
'db_table': 'endpoint',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['-created_at'], name='endpoint_created_44fe9c_idx'), models.Index(fields=['target'], name='endpoint_target__7f9065_idx'), models.Index(fields=['url'], name='endpoint_url_30f66e_idx'), models.Index(fields=['host'], name='endpoint_host_5b4cc8_idx'), models.Index(fields=['status_code'], name='endpoint_status__5d4fdd_idx'), models.Index(fields=['title'], name='endpoint_title_29e26c_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='endpoint_tech_2bfa7c_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='endpoint_resp_headers_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='endpoint_url_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='endpoint_title_trgm_idx', opclasses=['gin_trgm_ops'])],
'constraints': [models.UniqueConstraint(fields=('url', 'target'), name='unique_endpoint_url_target')],
},
),
migrations.CreateModel(
name='EndpointSnapshot',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('url', models.TextField(help_text='端点URL')),
('host', models.CharField(blank=True, default='', help_text='主机名域名或IP地址', max_length=253)),
('title', models.TextField(blank=True, default='', help_text='页面标题')),
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
('content_length', models.IntegerField(blank=True, help_text='内容长度', null=True)),
('location', models.TextField(blank=True, default='', help_text='重定向位置')),
('webserver', models.TextField(blank=True, default='', help_text='Web服务器')),
('content_type', models.TextField(blank=True, default='', help_text='内容类型')),
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈', size=None)),
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
('vhost', models.BooleanField(blank=True, help_text='虚拟主机标志', null=True)),
('matched_gf_patterns', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='匹配的GF模式列表', size=None)),
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='endpoint_snapshots', to='scan.scan')),
],
options={
'verbose_name': '端点快照',
'verbose_name_plural': '端点快照',
'db_table': 'endpoint_snapshot',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['scan'], name='endpoint_sn_scan_id_6ac9a7_idx'), models.Index(fields=['url'], name='endpoint_sn_url_205160_idx'), models.Index(fields=['host'], name='endpoint_sn_host_577bfd_idx'), models.Index(fields=['title'], name='endpoint_sn_title_516a05_idx'), models.Index(fields=['status_code'], name='endpoint_sn_status__83efb0_idx'), models.Index(fields=['webserver'], name='endpoint_sn_webserv_66be83_idx'), models.Index(fields=['-created_at'], name='endpoint_sn_created_21fb5b_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='endpoint_sn_tech_0d0752_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='ep_snap_resp_hdr_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='ep_snap_url_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='ep_snap_title_trgm', opclasses=['gin_trgm_ops'])],
'constraints': [models.UniqueConstraint(fields=('scan', 'url'), name='unique_endpoint_per_scan_snapshot')],
},
),
migrations.CreateModel(
name='HostPortMapping',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('host', models.CharField(help_text='主机名域名或IP', max_length=1000)),
('ip', models.GenericIPAddressField(help_text='IP地址')),
('port', models.IntegerField(help_text='端口号1-65535', validators=[django.core.validators.MinValueValidator(1, message='端口号必须大于等于1'), django.core.validators.MaxValueValidator(65535, message='端口号必须小于等于65535')])),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('target', models.ForeignKey(help_text='所属的扫描目标', on_delete=django.db.models.deletion.CASCADE, related_name='host_port_mappings', to='targets.target')),
],
options={
'verbose_name': '主机端口映射',
'verbose_name_plural': '主机端口映射',
'db_table': 'host_port_mapping',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['target'], name='host_port_m_target__943e9b_idx'), models.Index(fields=['host'], name='host_port_m_host_f78363_idx'), models.Index(fields=['ip'], name='host_port_m_ip_2e6f02_idx'), models.Index(fields=['port'], name='host_port_m_port_9fb9ff_idx'), models.Index(fields=['host', 'ip'], name='host_port_m_host_3ce245_idx'), models.Index(fields=['-created_at'], name='host_port_m_created_11cd22_idx')],
'constraints': [models.UniqueConstraint(fields=('target', 'host', 'ip', 'port'), name='unique_target_host_ip_port')],
},
),
migrations.CreateModel(
name='HostPortMappingSnapshot',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('host', models.CharField(help_text='主机名域名或IP', max_length=1000)),
('ip', models.GenericIPAddressField(help_text='IP地址')),
('port', models.IntegerField(help_text='端口号1-65535', validators=[django.core.validators.MinValueValidator(1, message='端口号必须大于等于1'), django.core.validators.MaxValueValidator(65535, message='端口号必须小于等于65535')])),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('scan', models.ForeignKey(help_text='所属的扫描任务(主关联)', on_delete=django.db.models.deletion.CASCADE, related_name='host_port_mapping_snapshots', to='scan.scan')),
],
options={
'verbose_name': '主机端口映射快照',
'verbose_name_plural': '主机端口映射快照',
'db_table': 'host_port_mapping_snapshot',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['scan'], name='host_port_m_scan_id_50ba0b_idx'), models.Index(fields=['host'], name='host_port_m_host_e99054_idx'), models.Index(fields=['ip'], name='host_port_m_ip_54818c_idx'), models.Index(fields=['port'], name='host_port_m_port_ed7b48_idx'), models.Index(fields=['host', 'ip'], name='host_port_m_host_8a463a_idx'), models.Index(fields=['scan', 'host'], name='host_port_m_scan_id_426fdb_idx'), models.Index(fields=['-created_at'], name='host_port_m_created_fb28b8_idx')],
'constraints': [models.UniqueConstraint(fields=('scan', 'host', 'ip', 'port'), name='unique_scan_host_ip_port_snapshot')],
},
),
migrations.CreateModel(
name='Subdomain',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('name', models.CharField(help_text='子域名名称', max_length=1000)),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('target', models.ForeignKey(help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)', on_delete=django.db.models.deletion.CASCADE, related_name='subdomains', to='targets.target')),
],
options={
'verbose_name': '子域名',
'verbose_name_plural': '子域名',
'db_table': 'subdomain',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['-created_at'], name='subdomain_created_e187a8_idx'), models.Index(fields=['name', 'target'], name='subdomain_name_60e1d0_idx'), models.Index(fields=['target'], name='subdomain_target__e409f0_idx'), models.Index(fields=['name'], name='subdomain_name_d40ba7_idx'), django.contrib.postgres.indexes.GinIndex(fields=['name'], name='subdomain_name_trgm_idx', opclasses=['gin_trgm_ops'])],
'constraints': [models.UniqueConstraint(fields=('name', 'target'), name='unique_subdomain_name_target')],
},
),
migrations.CreateModel(
name='SubdomainSnapshot',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('name', models.CharField(help_text='子域名名称', max_length=1000)),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='subdomain_snapshots', to='scan.scan')),
],
options={
'verbose_name': '子域名快照',
'verbose_name_plural': '子域名快照',
'db_table': 'subdomain_snapshot',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['scan'], name='subdomain_s_scan_id_68c253_idx'), models.Index(fields=['name'], name='subdomain_s_name_2da42b_idx'), models.Index(fields=['-created_at'], name='subdomain_s_created_d2b48e_idx'), django.contrib.postgres.indexes.GinIndex(fields=['name'], name='subdomain_snap_name_trgm', opclasses=['gin_trgm_ops'])],
'constraints': [models.UniqueConstraint(fields=('scan', 'name'), name='unique_subdomain_per_scan_snapshot')],
},
),
migrations.CreateModel(
name='Vulnerability',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('url', models.CharField(help_text='漏洞所在的URL', max_length=2000)),
('vuln_type', models.CharField(help_text='漏洞类型(如 xss, sqli', max_length=100)),
('severity', models.CharField(choices=[('unknown', '未知'), ('info', '信息'), ('low', ''), ('medium', ''), ('high', ''), ('critical', '危急')], default='unknown', help_text='严重性(未知/信息/低/中/高/危急)', max_length=20)),
('source', models.CharField(blank=True, default='', help_text='来源工具(如 dalfox, nuclei, crlfuzz', max_length=50)),
('cvss_score', models.DecimalField(blank=True, decimal_places=1, help_text='CVSS 评分0.0-10.0', max_digits=3, null=True)),
('description', models.TextField(blank=True, default='', help_text='漏洞描述')),
('raw_output', models.JSONField(blank=True, default=dict, help_text='工具原始输出')),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('target', models.ForeignKey(help_text='所属的扫描目标', on_delete=django.db.models.deletion.CASCADE, related_name='vulnerabilities', to='targets.target')),
],
options={
'verbose_name': '漏洞',
'verbose_name_plural': '漏洞',
'db_table': 'vulnerability',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['target'], name='vulnerabili_target__755a02_idx'), models.Index(fields=['vuln_type'], name='vulnerabili_vuln_ty_3010cd_idx'), models.Index(fields=['severity'], name='vulnerabili_severit_1a798b_idx'), models.Index(fields=['source'], name='vulnerabili_source_7c7552_idx'), models.Index(fields=['url'], name='vulnerabili_url_4dcc4d_idx'), models.Index(fields=['-created_at'], name='vulnerabili_created_e25ff7_idx')],
},
),
migrations.CreateModel(
name='VulnerabilitySnapshot',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('url', models.CharField(help_text='漏洞所在的URL', max_length=2000)),
('vuln_type', models.CharField(help_text='漏洞类型(如 xss, sqli', max_length=100)),
('severity', models.CharField(choices=[('unknown', '未知'), ('info', '信息'), ('low', ''), ('medium', ''), ('high', ''), ('critical', '危急')], default='unknown', help_text='严重性(未知/信息/低/中/高/危急)', max_length=20)),
('source', models.CharField(blank=True, default='', help_text='来源工具(如 dalfox, nuclei, crlfuzz', max_length=50)),
('cvss_score', models.DecimalField(blank=True, decimal_places=1, help_text='CVSS 评分0.0-10.0', max_digits=3, null=True)),
('description', models.TextField(blank=True, default='', help_text='漏洞描述')),
('raw_output', models.JSONField(blank=True, default=dict, help_text='工具原始输出')),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='vulnerability_snapshots', to='scan.scan')),
],
options={
'verbose_name': '漏洞快照',
'verbose_name_plural': '漏洞快照',
'db_table': 'vulnerability_snapshot',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['scan'], name='vulnerabili_scan_id_7b81c9_idx'), models.Index(fields=['url'], name='vulnerabili_url_11a707_idx'), models.Index(fields=['vuln_type'], name='vulnerabili_vuln_ty_6b90ee_idx'), models.Index(fields=['severity'], name='vulnerabili_severit_4eae0d_idx'), models.Index(fields=['source'], name='vulnerabili_source_968b1f_idx'), models.Index(fields=['-created_at'], name='vulnerabili_created_53a12e_idx')],
},
),
migrations.CreateModel(
name='WebSite',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('url', models.TextField(help_text='最终访问的完整URL')),
('host', models.CharField(blank=True, default='', help_text='主机名域名或IP地址', max_length=253)),
('location', models.TextField(blank=True, default='', help_text='重定向地址HTTP 3xx 响应头 Location')),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('title', models.TextField(blank=True, default='', help_text='网页标题HTML <title> 标签内容)')),
('webserver', models.TextField(blank=True, default='', help_text='服务器类型HTTP 响应头 Server 值)')),
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
('content_type', models.TextField(blank=True, default='', help_text='响应类型HTTP Content-Type 响应头)')),
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈(服务器/框架/语言等)', size=None)),
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
('content_length', models.IntegerField(blank=True, help_text='响应体大小(单位字节)', null=True)),
('vhost', models.BooleanField(blank=True, help_text='是否支持虚拟主机', null=True)),
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
('target', models.ForeignKey(help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)', on_delete=django.db.models.deletion.CASCADE, related_name='websites', to='targets.target')),
],
options={
'verbose_name': '站点',
'verbose_name_plural': '站点',
'db_table': 'website',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['-created_at'], name='website_created_c9cfd2_idx'), models.Index(fields=['url'], name='website_url_b18883_idx'), models.Index(fields=['host'], name='website_host_996b50_idx'), models.Index(fields=['target'], name='website_target__2a353b_idx'), models.Index(fields=['title'], name='website_title_c2775b_idx'), models.Index(fields=['status_code'], name='website_status__51663d_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='website_tech_e3f0cb_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='website_resp_headers_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='website_url_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='website_title_trgm_idx', opclasses=['gin_trgm_ops'])],
'constraints': [models.UniqueConstraint(fields=('url', 'target'), name='unique_website_url_target')],
},
),
migrations.CreateModel(
name='WebsiteSnapshot',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('url', models.TextField(help_text='站点URL')),
('host', models.CharField(blank=True, default='', help_text='主机名域名或IP地址', max_length=253)),
('title', models.TextField(blank=True, default='', help_text='页面标题')),
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
('content_length', models.BigIntegerField(blank=True, help_text='内容长度', null=True)),
('location', models.TextField(blank=True, default='', help_text='重定向位置')),
('webserver', models.TextField(blank=True, default='', help_text='Web服务器')),
('content_type', models.TextField(blank=True, default='', help_text='内容类型')),
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈', size=None)),
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
('vhost', models.BooleanField(blank=True, help_text='虚拟主机标志', null=True)),
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='website_snapshots', to='scan.scan')),
],
options={
'verbose_name': '网站快照',
'verbose_name_plural': '网站快照',
'db_table': 'website_snapshot',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['scan'], name='website_sna_scan_id_26b6dc_idx'), models.Index(fields=['url'], name='website_sna_url_801a70_idx'), models.Index(fields=['host'], name='website_sna_host_348fe1_idx'), models.Index(fields=['title'], name='website_sna_title_b1a5ee_idx'), models.Index(fields=['-created_at'], name='website_sna_created_2c149a_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='website_sna_tech_3d6d2f_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='ws_snap_resp_hdr_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='ws_snap_url_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='ws_snap_title_trgm', opclasses=['gin_trgm_ops'])],
'constraints': [models.UniqueConstraint(fields=('scan', 'url'), name='unique_website_per_scan_snapshot')],
},
),
]

View File

@@ -0,0 +1,104 @@
"""
创建资产搜索物化视图(使用 pg_ivm 增量维护)
这些视图用于资产搜索功能,提供高性能的全文搜索能力。
"""
from django.db import migrations
class Migration(migrations.Migration):
"""创建资产搜索所需的增量物化视图"""
dependencies = [
('asset', '0001_initial'),
]
operations = [
# 1. 确保 pg_ivm 扩展已安装
migrations.RunSQL(
sql="CREATE EXTENSION IF NOT EXISTS pg_ivm;",
reverse_sql="DROP EXTENSION IF EXISTS pg_ivm;",
),
# 2. 创建 Website 搜索视图
# 注意pg_ivm 不支持 ArrayField所以 tech 字段需要从原表 JOIN 获取
migrations.RunSQL(
sql="""
SELECT pgivm.create_immv('asset_search_view', $$
SELECT
w.id,
w.url,
w.host,
w.title,
w.status_code,
w.response_headers,
w.response_body,
w.content_type,
w.content_length,
w.webserver,
w.location,
w.vhost,
w.created_at,
w.target_id
FROM website w
$$);
""",
reverse_sql="DROP TABLE IF EXISTS asset_search_view CASCADE;",
),
# 3. 创建 Endpoint 搜索视图
migrations.RunSQL(
sql="""
SELECT pgivm.create_immv('endpoint_search_view', $$
SELECT
e.id,
e.url,
e.host,
e.title,
e.status_code,
e.response_headers,
e.response_body,
e.content_type,
e.content_length,
e.webserver,
e.location,
e.vhost,
e.created_at,
e.target_id
FROM endpoint e
$$);
""",
reverse_sql="DROP TABLE IF EXISTS endpoint_search_view CASCADE;",
),
# 4. 为搜索视图创建索引(加速查询)
migrations.RunSQL(
sql=[
# Website 搜索视图索引
"CREATE INDEX IF NOT EXISTS asset_search_view_host_idx ON asset_search_view (host);",
"CREATE INDEX IF NOT EXISTS asset_search_view_url_idx ON asset_search_view (url);",
"CREATE INDEX IF NOT EXISTS asset_search_view_title_idx ON asset_search_view (title);",
"CREATE INDEX IF NOT EXISTS asset_search_view_status_idx ON asset_search_view (status_code);",
"CREATE INDEX IF NOT EXISTS asset_search_view_created_idx ON asset_search_view (created_at DESC);",
# Endpoint 搜索视图索引
"CREATE INDEX IF NOT EXISTS endpoint_search_view_host_idx ON endpoint_search_view (host);",
"CREATE INDEX IF NOT EXISTS endpoint_search_view_url_idx ON endpoint_search_view (url);",
"CREATE INDEX IF NOT EXISTS endpoint_search_view_title_idx ON endpoint_search_view (title);",
"CREATE INDEX IF NOT EXISTS endpoint_search_view_status_idx ON endpoint_search_view (status_code);",
"CREATE INDEX IF NOT EXISTS endpoint_search_view_created_idx ON endpoint_search_view (created_at DESC);",
],
reverse_sql=[
"DROP INDEX IF EXISTS asset_search_view_host_idx;",
"DROP INDEX IF EXISTS asset_search_view_url_idx;",
"DROP INDEX IF EXISTS asset_search_view_title_idx;",
"DROP INDEX IF EXISTS asset_search_view_status_idx;",
"DROP INDEX IF EXISTS asset_search_view_created_idx;",
"DROP INDEX IF EXISTS endpoint_search_view_host_idx;",
"DROP INDEX IF EXISTS endpoint_search_view_url_idx;",
"DROP INDEX IF EXISTS endpoint_search_view_title_idx;",
"DROP INDEX IF EXISTS endpoint_search_view_status_idx;",
"DROP INDEX IF EXISTS endpoint_search_view_created_idx;",
],
),
]

View File

@@ -1,6 +1,7 @@
from django.db import models
from django.contrib.postgres.fields import ArrayField
from django.contrib.postgres.indexes import GinIndex
from django.core.validators import MinValueValidator, MaxValueValidator
@@ -22,18 +23,24 @@ class Subdomain(models.Model):
help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)'
)
name = models.CharField(max_length=1000, help_text='子域名名称')
discovered_at = models.DateTimeField(auto_now_add=True, help_text='首次发现时间')
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
class Meta:
db_table = 'subdomain'
verbose_name = '子域名'
verbose_name_plural = '子域名'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['-discovered_at']),
models.Index(fields=['-created_at']),
models.Index(fields=['name', 'target']), # 复合索引,优化 get_by_names_and_target_id 批量查询
models.Index(fields=['target']), # 优化从target_id快速查找下面的子域名
models.Index(fields=['name']), # 优化从name快速查找子域名搜索场景
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
GinIndex(
name='subdomain_name_trgm_idx',
fields=['name'],
opclasses=['gin_trgm_ops']
),
]
constraints = [
# 普通唯一约束name + target 组合唯一
@@ -58,40 +65,35 @@ class Endpoint(models.Model):
help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)'
)
url = models.CharField(max_length=2000, help_text='最终访问的完整URL')
url = models.TextField(help_text='最终访问的完整URL')
host = models.CharField(
max_length=253,
blank=True,
default='',
help_text='主机名域名或IP地址'
)
location = models.CharField(
max_length=1000,
location = models.TextField(
blank=True,
default='',
help_text='重定向地址HTTP 3xx 响应头 Location'
)
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
title = models.CharField(
max_length=1000,
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
title = models.TextField(
blank=True,
default='',
help_text='网页标题HTML <title> 标签内容)'
)
webserver = models.CharField(
max_length=200,
webserver = models.TextField(
blank=True,
default='',
help_text='服务器类型HTTP 响应头 Server 值)'
)
body_preview = models.CharField(
max_length=1000,
response_body = models.TextField(
blank=True,
default='',
help_text='响应正文前N个字符默认100个字符'
help_text='HTTP响应体'
)
content_type = models.CharField(
max_length=200,
content_type = models.TextField(
blank=True,
default='',
help_text='响应类型HTTP Content-Type 响应头)'
@@ -123,18 +125,41 @@ class Endpoint(models.Model):
default=list,
help_text='匹配的GF模式列表用于识别敏感端点如api, debug, config等'
)
response_headers = models.TextField(
blank=True,
default='',
help_text='原始HTTP响应头'
)
class Meta:
db_table = 'endpoint'
verbose_name = '端点'
verbose_name_plural = '端点'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['-discovered_at']),
models.Index(fields=['target']), # 优化从target_id快速查找下面的端点主关联字段
models.Index(fields=['-created_at']),
models.Index(fields=['target']), # 优化从 target_id快速查找下面的端点主关联字段
models.Index(fields=['url']), # URL索引优化查询性能
models.Index(fields=['host']), # host索引优化根据主机名查询
models.Index(fields=['status_code']), # 状态码索引,优化筛选
models.Index(fields=['title']), # title索引优化智能过滤搜索
GinIndex(fields=['tech']), # GIN索引优化 tech 数组字段的 __contains 查询
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
GinIndex(
name='endpoint_resp_headers_trgm_idx',
fields=['response_headers'],
opclasses=['gin_trgm_ops']
),
GinIndex(
name='endpoint_url_trgm_idx',
fields=['url'],
opclasses=['gin_trgm_ops']
),
GinIndex(
name='endpoint_title_trgm_idx',
fields=['title'],
opclasses=['gin_trgm_ops']
),
]
constraints = [
# 普通唯一约束url + target 组合唯一
@@ -159,40 +184,35 @@ class WebSite(models.Model):
help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)'
)
url = models.CharField(max_length=2000, help_text='最终访问的完整URL')
url = models.TextField(help_text='最终访问的完整URL')
host = models.CharField(
max_length=253,
blank=True,
default='',
help_text='主机名域名或IP地址'
)
location = models.CharField(
max_length=1000,
location = models.TextField(
blank=True,
default='',
help_text='重定向地址HTTP 3xx 响应头 Location'
)
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
title = models.CharField(
max_length=1000,
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
title = models.TextField(
blank=True,
default='',
help_text='网页标题HTML <title> 标签内容)'
)
webserver = models.CharField(
max_length=200,
webserver = models.TextField(
blank=True,
default='',
help_text='服务器类型HTTP 响应头 Server 值)'
)
body_preview = models.CharField(
max_length=1000,
response_body = models.TextField(
blank=True,
default='',
help_text='响应正文前N个字符默认100个字符'
help_text='HTTP响应体'
)
content_type = models.CharField(
max_length=200,
content_type = models.TextField(
blank=True,
default='',
help_text='响应类型HTTP Content-Type 响应头)'
@@ -218,17 +238,41 @@ class WebSite(models.Model):
blank=True,
help_text='是否支持虚拟主机'
)
response_headers = models.TextField(
blank=True,
default='',
help_text='原始HTTP响应头'
)
class Meta:
db_table = 'website'
verbose_name = '站点'
verbose_name_plural = '站点'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['-discovered_at']),
models.Index(fields=['-created_at']),
models.Index(fields=['url']), # URL索引优化查询性能
models.Index(fields=['host']), # host索引优化根据主机名查询
models.Index(fields=['target']), # 优化从target_id快速查找下面的站点
models.Index(fields=['target']), # 优化从 target_id快速查找下面的站点
models.Index(fields=['title']), # title索引优化智能过滤搜索
models.Index(fields=['status_code']), # 状态码索引,优化智能过滤搜索
GinIndex(fields=['tech']), # GIN索引优化 tech 数组字段的 __contains 查询
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
GinIndex(
name='website_resp_headers_trgm_idx',
fields=['response_headers'],
opclasses=['gin_trgm_ops']
),
GinIndex(
name='website_url_trgm_idx',
fields=['url'],
opclasses=['gin_trgm_ops']
),
GinIndex(
name='website_title_trgm_idx',
fields=['title'],
opclasses=['gin_trgm_ops']
),
]
constraints = [
# 普通唯一约束url + target 组合唯一
@@ -293,18 +337,24 @@ class Directory(models.Model):
help_text='请求耗时(单位:纳秒)'
)
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
class Meta:
db_table = 'directory'
verbose_name = '目录'
verbose_name_plural = '目录'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['-discovered_at']),
models.Index(fields=['-created_at']),
models.Index(fields=['target']), # 优化从target_id快速查找下面的目录
models.Index(fields=['url']), # URL索引优化搜索和唯一约束
models.Index(fields=['status']), # 状态码索引,优化筛选
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
GinIndex(
name='directory_url_trgm_idx',
fields=['url'],
opclasses=['gin_trgm_ops']
),
]
constraints = [
# 普通唯一约束target + url 组合唯一
@@ -358,23 +408,23 @@ class HostPortMapping(models.Model):
)
# ==================== 时间字段 ====================
discovered_at = models.DateTimeField(
created_at = models.DateTimeField(
auto_now_add=True,
help_text='发现时间'
help_text='创建时间'
)
class Meta:
db_table = 'host_port_mapping'
verbose_name = '主机端口映射'
verbose_name_plural = '主机端口映射'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['target']), # 优化按目标查询
models.Index(fields=['host']), # 优化按主机名查询
models.Index(fields=['ip']), # 优化按IP查询
models.Index(fields=['port']), # 优化按端口查询
models.Index(fields=['host', 'ip']), # 优化组合查询
models.Index(fields=['-discovered_at']), # 优化时间排序
models.Index(fields=['-created_at']), # 优化时间排序
]
constraints = [
# 复合唯一约束target + host + ip + port 组合唯一
@@ -408,7 +458,7 @@ class Vulnerability(models.Model):
)
# ==================== 核心字段 ====================
url = models.TextField(help_text='漏洞所在的URL')
url = models.CharField(max_length=2000, help_text='漏洞所在的URL')
vuln_type = models.CharField(max_length=100, help_text='漏洞类型(如 xss, sqli')
severity = models.CharField(
max_length=20,
@@ -422,19 +472,20 @@ class Vulnerability(models.Model):
raw_output = models.JSONField(blank=True, default=dict, help_text='工具原始输出')
# ==================== 时间字段 ====================
discovered_at = models.DateTimeField(auto_now_add=True, help_text='首次发现时间')
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
class Meta:
db_table = 'vulnerability'
verbose_name = '漏洞'
verbose_name_plural = '漏洞'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['target']),
models.Index(fields=['vuln_type']),
models.Index(fields=['severity']),
models.Index(fields=['source']),
models.Index(fields=['-discovered_at']),
models.Index(fields=['url']), # url索引优化智能过滤搜索
models.Index(fields=['-created_at']),
]
def __str__(self):

View File

@@ -1,5 +1,6 @@
from django.db import models
from django.contrib.postgres.fields import ArrayField
from django.contrib.postgres.indexes import GinIndex
from django.core.validators import MinValueValidator, MaxValueValidator
@@ -15,17 +16,23 @@ class SubdomainSnapshot(models.Model):
)
name = models.CharField(max_length=1000, help_text='子域名名称')
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
class Meta:
db_table = 'subdomain_snapshot'
verbose_name = '子域名快照'
verbose_name_plural = '子域名快照'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['scan']),
models.Index(fields=['name']),
models.Index(fields=['-discovered_at']),
models.Index(fields=['-created_at']),
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
GinIndex(
name='subdomain_snap_name_trgm',
fields=['name'],
opclasses=['gin_trgm_ops']
),
]
constraints = [
# 唯一约束:同一次扫描中,同一个子域名只能记录一次
@@ -54,34 +61,57 @@ class WebsiteSnapshot(models.Model):
)
# 扫描结果数据
url = models.CharField(max_length=2000, help_text='站点URL')
url = models.TextField(help_text='站点URL')
host = models.CharField(max_length=253, blank=True, default='', help_text='主机名域名或IP地址')
title = models.CharField(max_length=500, blank=True, default='', help_text='页面标题')
status = models.IntegerField(null=True, blank=True, help_text='HTTP状态码')
title = models.TextField(blank=True, default='', help_text='页面标题')
status_code = models.IntegerField(null=True, blank=True, help_text='HTTP状态码')
content_length = models.BigIntegerField(null=True, blank=True, help_text='内容长度')
location = models.CharField(max_length=1000, blank=True, default='', help_text='重定向位置')
web_server = models.CharField(max_length=200, blank=True, default='', help_text='Web服务器')
content_type = models.CharField(max_length=200, blank=True, default='', help_text='内容类型')
location = models.TextField(blank=True, default='', help_text='重定向位置')
webserver = models.TextField(blank=True, default='', help_text='Web服务器')
content_type = models.TextField(blank=True, default='', help_text='内容类型')
tech = ArrayField(
models.CharField(max_length=100),
blank=True,
default=list,
help_text='技术栈'
)
body_preview = models.TextField(blank=True, default='', help_text='响应体预览')
response_body = models.TextField(blank=True, default='', help_text='HTTP响应体')
vhost = models.BooleanField(null=True, blank=True, help_text='虚拟主机标志')
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
response_headers = models.TextField(
blank=True,
default='',
help_text='原始HTTP响应头'
)
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
class Meta:
db_table = 'website_snapshot'
verbose_name = '网站快照'
verbose_name_plural = '网站快照'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['scan']),
models.Index(fields=['url']),
models.Index(fields=['host']), # host索引优化根据主机名查询
models.Index(fields=['-discovered_at']),
models.Index(fields=['title']), # title索引优化标题搜索
models.Index(fields=['-created_at']),
GinIndex(fields=['tech']), # GIN索引优化数组字段查询
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
GinIndex(
name='ws_snap_resp_hdr_trgm',
fields=['response_headers'],
opclasses=['gin_trgm_ops']
),
GinIndex(
name='ws_snap_url_trgm',
fields=['url'],
opclasses=['gin_trgm_ops']
),
GinIndex(
name='ws_snap_title_trgm',
fields=['title'],
opclasses=['gin_trgm_ops']
),
]
constraints = [
# 唯一约束同一次扫描中同一个URL只能记录一次
@@ -118,18 +148,25 @@ class DirectorySnapshot(models.Model):
lines = models.IntegerField(null=True, blank=True, help_text='响应体行数(按换行符分割)')
content_type = models.CharField(max_length=200, blank=True, default='', help_text='响应头 Content-Type 值')
duration = models.BigIntegerField(null=True, blank=True, help_text='请求耗时(单位:纳秒)')
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
class Meta:
db_table = 'directory_snapshot'
verbose_name = '目录快照'
verbose_name_plural = '目录快照'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['scan']),
models.Index(fields=['url']),
models.Index(fields=['status']), # 状态码索引,优化筛选
models.Index(fields=['-discovered_at']),
models.Index(fields=['content_type']), # content_type索引优化内容类型搜索
models.Index(fields=['-created_at']),
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
GinIndex(
name='dir_snap_url_trgm',
fields=['url'],
opclasses=['gin_trgm_ops']
),
]
constraints = [
# 唯一约束同一次扫描中同一个目录URL只能记录一次
@@ -183,16 +220,16 @@ class HostPortMappingSnapshot(models.Model):
)
# ==================== 时间字段 ====================
discovered_at = models.DateTimeField(
created_at = models.DateTimeField(
auto_now_add=True,
help_text='发现时间'
help_text='创建时间'
)
class Meta:
db_table = 'host_port_mapping_snapshot'
verbose_name = '主机端口映射快照'
verbose_name_plural = '主机端口映射快照'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['scan']), # 优化按扫描查询
models.Index(fields=['host']), # 优化按主机名查询
@@ -200,7 +237,7 @@ class HostPortMappingSnapshot(models.Model):
models.Index(fields=['port']), # 优化按端口查询
models.Index(fields=['host', 'ip']), # 优化组合查询
models.Index(fields=['scan', 'host']), # 优化扫描+主机查询
models.Index(fields=['-discovered_at']), # 优化时间排序
models.Index(fields=['-created_at']), # 优化时间排序
]
constraints = [
# 复合唯一约束同一次扫描中scan + host + ip + port 组合唯一
@@ -230,26 +267,26 @@ class EndpointSnapshot(models.Model):
)
# 扫描结果数据
url = models.CharField(max_length=2000, help_text='端点URL')
url = models.TextField(help_text='端点URL')
host = models.CharField(
max_length=253,
blank=True,
default='',
help_text='主机名域名或IP地址'
)
title = models.CharField(max_length=1000, blank=True, default='', help_text='页面标题')
title = models.TextField(blank=True, default='', help_text='页面标题')
status_code = models.IntegerField(null=True, blank=True, help_text='HTTP状态码')
content_length = models.IntegerField(null=True, blank=True, help_text='内容长度')
location = models.CharField(max_length=1000, blank=True, default='', help_text='重定向位置')
webserver = models.CharField(max_length=200, blank=True, default='', help_text='Web服务器')
content_type = models.CharField(max_length=200, blank=True, default='', help_text='内容类型')
location = models.TextField(blank=True, default='', help_text='重定向位置')
webserver = models.TextField(blank=True, default='', help_text='Web服务器')
content_type = models.TextField(blank=True, default='', help_text='内容类型')
tech = ArrayField(
models.CharField(max_length=100),
blank=True,
default=list,
help_text='技术栈'
)
body_preview = models.CharField(max_length=1000, blank=True, default='', help_text='响应体预览')
response_body = models.TextField(blank=True, default='', help_text='HTTP响应体')
vhost = models.BooleanField(null=True, blank=True, help_text='虚拟主机标志')
matched_gf_patterns = ArrayField(
models.CharField(max_length=100),
@@ -257,19 +294,43 @@ class EndpointSnapshot(models.Model):
default=list,
help_text='匹配的GF模式列表'
)
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
response_headers = models.TextField(
blank=True,
default='',
help_text='原始HTTP响应头'
)
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
class Meta:
db_table = 'endpoint_snapshot'
verbose_name = '端点快照'
verbose_name_plural = '端点快照'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['scan']),
models.Index(fields=['url']),
models.Index(fields=['host']), # host索引优化根据主机名查询
models.Index(fields=['title']), # title索引优化标题搜索
models.Index(fields=['status_code']), # 状态码索引,优化筛选
models.Index(fields=['-discovered_at']),
models.Index(fields=['webserver']), # webserver索引优化服务器搜索
models.Index(fields=['-created_at']),
GinIndex(fields=['tech']), # GIN索引优化数组字段查询
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
GinIndex(
name='ep_snap_resp_hdr_trgm',
fields=['response_headers'],
opclasses=['gin_trgm_ops']
),
GinIndex(
name='ep_snap_url_trgm',
fields=['url'],
opclasses=['gin_trgm_ops']
),
GinIndex(
name='ep_snap_title_trgm',
fields=['title'],
opclasses=['gin_trgm_ops']
),
]
constraints = [
# 唯一约束同一次扫描中同一个URL只能记录一次
@@ -302,7 +363,7 @@ class VulnerabilitySnapshot(models.Model):
)
# ==================== 核心字段 ====================
url = models.TextField(help_text='漏洞所在的URL')
url = models.CharField(max_length=2000, help_text='漏洞所在的URL')
vuln_type = models.CharField(max_length=100, help_text='漏洞类型(如 xss, sqli')
severity = models.CharField(
max_length=20,
@@ -316,19 +377,20 @@ class VulnerabilitySnapshot(models.Model):
raw_output = models.JSONField(blank=True, default=dict, help_text='工具原始输出')
# ==================== 时间字段 ====================
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
class Meta:
db_table = 'vulnerability_snapshot'
verbose_name = '漏洞快照'
verbose_name_plural = '漏洞快照'
ordering = ['-discovered_at']
ordering = ['-created_at']
indexes = [
models.Index(fields=['scan']),
models.Index(fields=['url']), # url索引优化URL搜索
models.Index(fields=['vuln_type']),
models.Index(fields=['severity']),
models.Index(fields=['source']),
models.Index(fields=['-discovered_at']),
models.Index(fields=['-created_at']),
]
def __str__(self):

View File

@@ -9,6 +9,7 @@ from django.db import transaction
from apps.asset.models.asset_models import Directory
from apps.asset.dtos import DirectoryDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
logger = logging.getLogger(__name__)
@@ -24,6 +25,8 @@ class DjangoDirectoryRepository:
存在则更新所有字段,不存在则创建。
使用 Django 原生 update_conflicts。
注意:自动按模型唯一约束去重,保留最后一条记录。
Args:
items: Directory DTO 列表
@@ -34,6 +37,9 @@ class DjangoDirectoryRepository:
return 0
try:
# 自动按模型唯一约束去重
unique_items = deduplicate_for_bulk(items, Directory)
# 直接从 DTO 字段构建 Model
directories = [
Directory(
@@ -46,7 +52,7 @@ class DjangoDirectoryRepository:
content_type=item.content_type or '',
duration=item.duration
)
for item in items
for item in unique_items
]
with transaction.atomic():
@@ -61,20 +67,74 @@ class DjangoDirectoryRepository:
batch_size=1000
)
logger.debug(f"批量 upsert Directory 成功: {len(items)}")
return len(items)
logger.debug(f"批量 upsert Directory 成功: {len(unique_items)}")
return len(unique_items)
except Exception as e:
logger.error(f"批量 upsert Directory 失败: {e}")
raise
def bulk_create_ignore_conflicts(self, items: List[DirectoryDTO]) -> int:
"""
批量创建 Directory存在即跳过
与 bulk_upsert 不同,此方法不会更新已存在的记录。
适用于批量添加场景,只提供 URL没有其他字段数据。
注意:自动按模型唯一约束去重,保留最后一条记录。
Args:
items: Directory DTO 列表
Returns:
int: 处理的记录数
"""
if not items:
return 0
try:
# 自动按模型唯一约束去重
unique_items = deduplicate_for_bulk(items, Directory)
directories = [
Directory(
target_id=item.target_id,
url=item.url,
status=item.status,
content_length=item.content_length,
words=item.words,
lines=item.lines,
content_type=item.content_type or '',
duration=item.duration
)
for item in unique_items
]
with transaction.atomic():
Directory.objects.bulk_create(
directories,
ignore_conflicts=True,
batch_size=1000
)
logger.debug(f"批量创建 Directory 成功ignore_conflicts: {len(unique_items)}")
return len(unique_items)
except Exception as e:
logger.error(f"批量创建 Directory 失败: {e}")
raise
def count_by_target(self, target_id: int) -> int:
"""统计目标下的目录总数"""
return Directory.objects.filter(target_id=target_id).count()
def get_all(self):
"""获取所有目录"""
return Directory.objects.all().order_by('-discovered_at')
return Directory.objects.all().order_by('-created_at')
def get_by_target(self, target_id: int):
"""获取目标下的所有目录"""
return Directory.objects.filter(target_id=target_id).order_by('-discovered_at')
return Directory.objects.filter(target_id=target_id).order_by('-created_at')
def get_urls_for_export(self, target_id: int, batch_size: int = 1000) -> Iterator[str]:
"""流式导出目标下的所有目录 URL"""
@@ -91,3 +151,31 @@ class DjangoDirectoryRepository:
except Exception as e:
logger.error("流式导出目录 URL 失败 - Target ID: %s, 错误: %s", target_id, e)
raise
def iter_raw_data_for_export(
self,
target_id: int,
batch_size: int = 1000
) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
target_id: 目标 ID
batch_size: 每批数据量
Yields:
包含所有目录字段的字典
"""
qs = (
Directory.objects
.filter(target_id=target_id)
.values(
'url', 'status', 'content_length', 'words',
'lines', 'content_type', 'duration', 'created_at'
)
.order_by('url')
)
for row in qs.iterator(chunk_size=batch_size):
yield row

View File

@@ -1,11 +1,12 @@
"""Endpoint Repository - Django ORM 实现"""
import logging
from typing import List
from typing import List, Iterator
from apps.asset.models import Endpoint
from apps.asset.dtos.asset import EndpointDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
from django.db import transaction
logger = logging.getLogger(__name__)
@@ -22,6 +23,8 @@ class DjangoEndpointRepository:
存在则更新所有字段,不存在则创建。
使用 Django 原生 update_conflicts。
注意:自动按模型唯一约束去重,保留最后一条记录。
Args:
items: 端点 DTO 列表
@@ -32,6 +35,9 @@ class DjangoEndpointRepository:
return 0
try:
# 自动按模型唯一约束去重
unique_items = deduplicate_for_bulk(items, Endpoint)
# 直接从 DTO 字段构建 Model
endpoints = [
Endpoint(
@@ -42,14 +48,15 @@ class DjangoEndpointRepository:
status_code=item.status_code,
content_length=item.content_length,
webserver=item.webserver or '',
body_preview=item.body_preview or '',
response_body=item.response_body or '',
content_type=item.content_type or '',
tech=item.tech if item.tech else [],
vhost=item.vhost,
location=item.location or '',
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
response_headers=item.response_headers if item.response_headers else ''
)
for item in items
for item in unique_items
]
with transaction.atomic():
@@ -59,14 +66,14 @@ class DjangoEndpointRepository:
unique_fields=['url', 'target'],
update_fields=[
'host', 'title', 'status_code', 'content_length',
'webserver', 'body_preview', 'content_type', 'tech',
'vhost', 'location', 'matched_gf_patterns'
'webserver', 'response_body', 'content_type', 'tech',
'vhost', 'location', 'matched_gf_patterns', 'response_headers'
],
batch_size=1000
)
logger.debug(f"批量 upsert 端点成功: {len(items)}")
return len(items)
logger.debug(f"批量 upsert 端点成功: {len(unique_items)}")
return len(unique_items)
except Exception as e:
logger.error(f"批量 upsert 端点失败: {e}")
@@ -74,7 +81,7 @@ class DjangoEndpointRepository:
def get_all(self):
"""获取所有端点(全局查询)"""
return Endpoint.objects.all().order_by('-discovered_at')
return Endpoint.objects.all().order_by('-created_at')
def get_by_target(self, target_id: int):
"""
@@ -86,7 +93,7 @@ class DjangoEndpointRepository:
Returns:
QuerySet: 端点查询集
"""
return Endpoint.objects.filter(target_id=target_id).order_by('-discovered_at')
return Endpoint.objects.filter(target_id=target_id).order_by('-created_at')
def count_by_target(self, target_id: int) -> int:
"""
@@ -99,3 +106,89 @@ class DjangoEndpointRepository:
int: 端点数量
"""
return Endpoint.objects.filter(target_id=target_id).count()
def bulk_create_ignore_conflicts(self, items: List[EndpointDTO]) -> int:
"""
批量创建端点(存在即跳过)
与 bulk_upsert 不同,此方法不会更新已存在的记录。
适用于快速扫描场景,只提供 URL没有其他字段数据。
注意:自动按模型唯一约束去重,保留最后一条记录。
Args:
items: 端点 DTO 列表
Returns:
int: 处理的记录数
"""
if not items:
return 0
try:
# 自动按模型唯一约束去重
unique_items = deduplicate_for_bulk(items, Endpoint)
# 直接从 DTO 字段构建 Model
endpoints = [
Endpoint(
target_id=item.target_id,
url=item.url,
host=item.host or '',
title=item.title or '',
status_code=item.status_code,
content_length=item.content_length,
webserver=item.webserver or '',
response_body=item.response_body or '',
content_type=item.content_type or '',
tech=item.tech if item.tech else [],
vhost=item.vhost,
location=item.location or '',
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
response_headers=item.response_headers if item.response_headers else ''
)
for item in unique_items
]
with transaction.atomic():
Endpoint.objects.bulk_create(
endpoints,
ignore_conflicts=True,
batch_size=1000
)
logger.debug(f"批量创建端点成功ignore_conflicts: {len(unique_items)}")
return len(unique_items)
except Exception as e:
logger.error(f"批量创建端点失败: {e}")
raise
def iter_raw_data_for_export(
self,
target_id: int,
batch_size: int = 1000
) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
target_id: 目标 ID
batch_size: 每批数据量
Yields:
包含所有端点字段的字典
"""
qs = (
Endpoint.objects
.filter(target_id=target_id)
.values(
'url', 'host', 'location', 'title', 'status_code',
'content_length', 'content_type', 'webserver', 'tech',
'response_body', 'response_headers', 'vhost', 'matched_gf_patterns', 'created_at'
)
.order_by('url')
)
for row in qs.iterator(chunk_size=batch_size):
yield row

View File

@@ -1,32 +1,36 @@
"""HostPortMapping Repository - Django ORM 实现"""
import logging
from typing import List, Iterator
from typing import List, Iterator, Dict, Optional
from django.db.models import QuerySet, Min
from apps.asset.models.asset_models import HostPortMapping
from apps.asset.dtos.asset import HostPortMappingDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
logger = logging.getLogger(__name__)
@auto_ensure_db_connection
class DjangoHostPortMappingRepository:
"""HostPortMapping Repository - Django ORM 实现"""
"""HostPortMapping Repository - Django ORM 实现
职责:纯数据访问,不包含业务逻辑
"""
def bulk_create_ignore_conflicts(self, items: List[HostPortMappingDTO]) -> int:
"""
批量创建主机端口关联(忽略冲突)
注意:自动按模型唯一约束去重,保留最后一条记录。
Args:
items: 主机端口关联 DTO 列表
Returns:
int: 实际创建的记录数注意ignore_conflicts 时可能为 0
Note:
- 基于唯一约束 (target + host + ip + port) 自动去重
- 忽略已存在的记录,不更新
int: 实际创建的记录数
"""
try:
logger.debug("准备批量创建主机端口关联 - 数量: %d", len(items))
@@ -34,8 +38,10 @@ class DjangoHostPortMappingRepository:
if not items:
logger.debug("主机端口关联为空,跳过创建")
return 0
# 自动按模型唯一约束去重
unique_items = deduplicate_for_bulk(items, HostPortMapping)
# 构建记录对象
records = [
HostPortMapping(
target_id=item.target_id,
@@ -43,10 +49,9 @@ class DjangoHostPortMappingRepository:
ip=item.ip,
port=item.port
)
for item in items
for item in unique_items
]
# 批量创建(忽略冲突,基于唯一约束去重)
created = HostPortMapping.objects.bulk_create(
records,
ignore_conflicts=True
@@ -90,79 +95,47 @@ class DjangoHostPortMappingRepository:
for ip in queryset:
yield ip
def get_ip_aggregation_by_target(self, target_id: int, search: str = None):
from django.db.models import Min
def get_queryset_by_target(self, target_id: int) -> QuerySet:
"""获取目标下的 QuerySet"""
return HostPortMapping.objects.filter(target_id=target_id)
qs = HostPortMapping.objects.filter(target_id=target_id)
if search:
qs = qs.filter(ip__icontains=search)
def get_all_queryset(self) -> QuerySet:
"""获取所有记录的 QuerySet"""
return HostPortMapping.objects.all()
ip_aggregated = (
qs
.values('ip')
.annotate(
discovered_at=Min('discovered_at')
)
.order_by('-discovered_at')
def get_queryset_by_ip(self, ip: str, target_id: Optional[int] = None) -> QuerySet:
"""获取指定 IP 的 QuerySet"""
qs = HostPortMapping.objects.filter(ip=ip)
if target_id:
qs = qs.filter(target_id=target_id)
return qs
def iter_raw_data_for_export(
self,
target_id: int,
batch_size: int = 1000
) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
target_id: 目标 ID
batch_size: 每批数据量
Yields:
{
'ip': '192.168.1.1',
'host': 'example.com',
'port': 80,
'created_at': datetime
}
"""
qs = (
HostPortMapping.objects
.filter(target_id=target_id)
.values('ip', 'host', 'port', 'created_at')
.order_by('ip', 'host', 'port')
)
results = []
for item in ip_aggregated:
ip = item['ip']
mappings = (
HostPortMapping.objects
.filter(target_id=target_id, ip=ip)
.values('host', 'port')
.distinct()
)
hosts = sorted({m['host'] for m in mappings})
ports = sorted({m['port'] for m in mappings})
results.append({
'ip': ip,
'hosts': hosts,
'ports': ports,
'discovered_at': item['discovered_at'],
})
return results
def get_all_ip_aggregation(self, search: str = None):
"""获取所有 IP 聚合数据(全局查询)"""
from django.db.models import Min
qs = HostPortMapping.objects.all()
if search:
qs = qs.filter(ip__icontains=search)
ip_aggregated = (
qs
.values('ip')
.annotate(
discovered_at=Min('discovered_at')
)
.order_by('-discovered_at')
)
results = []
for item in ip_aggregated:
ip = item['ip']
mappings = (
HostPortMapping.objects
.filter(ip=ip)
.values('host', 'port')
.distinct()
)
hosts = sorted({m['host'] for m in mappings})
ports = sorted({m['port'] for m in mappings})
results.append({
'ip': ip,
'hosts': hosts,
'ports': ports,
'discovered_at': item['discovered_at'],
})
return results
for row in qs.iterator(chunk_size=batch_size):
yield row

View File

@@ -8,6 +8,7 @@ from django.db import transaction
from apps.asset.models.asset_models import Subdomain
from apps.asset.dtos import SubdomainDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
logger = logging.getLogger(__name__)
@@ -20,6 +21,8 @@ class DjangoSubdomainRepository:
"""
批量创建子域名,忽略冲突
注意:自动按模型唯一约束去重,保留最后一条记录。
Args:
items: 子域名 DTO 列表
"""
@@ -27,12 +30,15 @@ class DjangoSubdomainRepository:
return
try:
# 自动按模型唯一约束去重
unique_items = deduplicate_for_bulk(items, Subdomain)
subdomain_objects = [
Subdomain(
name=item.name,
target_id=item.target_id,
)
for item in items
for item in unique_items
]
with transaction.atomic():
@@ -41,7 +47,7 @@ class DjangoSubdomainRepository:
ignore_conflicts=True,
)
logger.debug(f"成功处理 {len(items)} 条子域名记录")
logger.debug(f"成功处理 {len(unique_items)} 条子域名记录")
except Exception as e:
logger.error(f"批量插入子域名失败: {e}")
@@ -49,11 +55,11 @@ class DjangoSubdomainRepository:
def get_all(self):
"""获取所有子域名"""
return Subdomain.objects.all().order_by('-discovered_at')
return Subdomain.objects.all().order_by('-created_at')
def get_by_target(self, target_id: int):
"""获取目标下的所有子域名"""
return Subdomain.objects.filter(target_id=target_id).order_by('-discovered_at')
return Subdomain.objects.filter(target_id=target_id).order_by('-created_at')
def count_by_target(self, target_id: int) -> int:
"""统计目标下的域名数量"""
@@ -76,3 +82,28 @@ class DjangoSubdomainRepository:
).only('id', 'name')
return {sd.name: sd for sd in subdomains}
def iter_raw_data_for_export(
self,
target_id: int,
batch_size: int = 1000
) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
target_id: 目标 ID
batch_size: 每批数据量
Yields:
{'name': 'sub.example.com', 'created_at': datetime}
"""
qs = (
Subdomain.objects
.filter(target_id=target_id)
.values('name', 'created_at')
.order_by('name')
)
for row in qs.iterator(chunk_size=batch_size):
yield row

View File

@@ -3,12 +3,13 @@ Django ORM 实现的 WebSite Repository
"""
import logging
from typing import List, Generator, Optional
from typing import List, Generator, Optional, Iterator
from django.db import transaction
from apps.asset.models.asset_models import WebSite
from apps.asset.dtos import WebSiteDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
logger = logging.getLogger(__name__)
@@ -24,6 +25,8 @@ class DjangoWebSiteRepository:
存在则更新所有字段,不存在则创建。
使用 Django 原生 update_conflicts。
注意:自动按模型唯一约束去重,保留最后一条记录。
Args:
items: WebSite DTO 列表
@@ -34,6 +37,9 @@ class DjangoWebSiteRepository:
return 0
try:
# 自动按模型唯一约束去重
unique_items = deduplicate_for_bulk(items, WebSite)
# 直接从 DTO 字段构建 Model
websites = [
WebSite(
@@ -43,14 +49,15 @@ class DjangoWebSiteRepository:
location=item.location or '',
title=item.title or '',
webserver=item.webserver or '',
body_preview=item.body_preview or '',
response_body=item.response_body or '',
content_type=item.content_type or '',
tech=item.tech if item.tech else [],
status_code=item.status_code,
content_length=item.content_length,
vhost=item.vhost
vhost=item.vhost,
response_headers=item.response_headers if item.response_headers else ''
)
for item in items
for item in unique_items
]
with transaction.atomic():
@@ -60,14 +67,14 @@ class DjangoWebSiteRepository:
unique_fields=['url', 'target'],
update_fields=[
'host', 'location', 'title', 'webserver',
'body_preview', 'content_type', 'tech',
'status_code', 'content_length', 'vhost'
'response_body', 'content_type', 'tech',
'status_code', 'content_length', 'vhost', 'response_headers'
],
batch_size=1000
)
logger.debug(f"批量 upsert WebSite 成功: {len(items)}")
return len(items)
logger.debug(f"批量 upsert WebSite 成功: {len(unique_items)}")
return len(unique_items)
except Exception as e:
logger.error(f"批量 upsert WebSite 失败: {e}")
@@ -76,13 +83,6 @@ class DjangoWebSiteRepository:
def get_urls_for_export(self, target_id: int, batch_size: int = 1000) -> Generator[str, None, None]:
"""
流式导出目标下的所有站点 URL
Args:
target_id: 目标 ID
batch_size: 批次大小
Yields:
str: 站点 URL
"""
try:
queryset = WebSite.objects.filter(
@@ -97,26 +97,92 @@ class DjangoWebSiteRepository:
def get_all(self):
"""获取所有网站"""
return WebSite.objects.all().order_by('-discovered_at')
return WebSite.objects.all().order_by('-created_at')
def get_by_target(self, target_id: int):
"""获取目标下的所有网站"""
return WebSite.objects.filter(target_id=target_id).order_by('-discovered_at')
return WebSite.objects.filter(target_id=target_id).order_by('-created_at')
def count_by_target(self, target_id: int) -> int:
"""统计目标下的站点总数"""
return WebSite.objects.filter(target_id=target_id).count()
def get_by_url(self, url: str, target_id: int) -> Optional[int]:
"""
根据 URL 和 target_id 查找站点 ID
Args:
url: 站点 URL
target_id: 目标 ID
Returns:
Optional[int]: 站点 ID如果不存在返回 None
"""
"""根据 URL 和 target_id 查找站点 ID"""
website = WebSite.objects.filter(url=url, target_id=target_id).first()
return website.id if website else None
def bulk_create_ignore_conflicts(self, items: List[WebSiteDTO]) -> int:
"""
批量创建 WebSite存在即跳过
注意:自动按模型唯一约束去重,保留最后一条记录。
"""
if not items:
return 0
try:
# 自动按模型唯一约束去重
unique_items = deduplicate_for_bulk(items, WebSite)
websites = [
WebSite(
target_id=item.target_id,
url=item.url,
host=item.host or '',
location=item.location or '',
title=item.title or '',
webserver=item.webserver or '',
response_body=item.response_body or '',
content_type=item.content_type or '',
tech=item.tech if item.tech else [],
status_code=item.status_code,
content_length=item.content_length,
vhost=item.vhost,
response_headers=item.response_headers if item.response_headers else ''
)
for item in unique_items
]
with transaction.atomic():
WebSite.objects.bulk_create(
websites,
ignore_conflicts=True,
batch_size=1000
)
logger.debug(f"批量创建 WebSite 成功ignore_conflicts: {len(unique_items)}")
return len(unique_items)
except Exception as e:
logger.error(f"批量创建 WebSite 失败: {e}")
raise
def iter_raw_data_for_export(
self,
target_id: int,
batch_size: int = 1000
) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
target_id: 目标 ID
batch_size: 每批数据量
Yields:
包含所有网站字段的字典
"""
qs = (
WebSite.objects
.filter(target_id=target_id)
.values(
'url', 'host', 'location', 'title', 'status_code',
'content_length', 'content_type', 'webserver', 'tech',
'response_body', 'response_headers', 'vhost', 'created_at'
)
.order_by('url')
)
for row in qs.iterator(chunk_size=batch_size):
yield row

View File

@@ -1,12 +1,13 @@
"""Directory Snapshot Repository - 目录快照数据访问层"""
import logging
from typing import List
from typing import List, Iterator
from django.db import transaction
from apps.asset.models import DirectorySnapshot
from apps.asset.dtos.snapshot import DirectorySnapshotDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
logger = logging.getLogger(__name__)
@@ -25,6 +26,8 @@ class DjangoDirectorySnapshotRepository:
使用 ignore_conflicts 策略,如果快照已存在(相同 scan + url则跳过
注意:会自动按 (scan_id, url) 去重,保留最后一条记录。
Args:
items: 目录快照 DTO 列表
@@ -37,6 +40,9 @@ class DjangoDirectorySnapshotRepository:
return
try:
# 根据模型唯一约束自动去重
unique_items = deduplicate_for_bulk(items, DirectorySnapshot)
# 转换为 Django 模型对象
snapshot_objects = [
DirectorySnapshot(
@@ -49,7 +55,7 @@ class DjangoDirectorySnapshotRepository:
content_type=item.content_type,
duration=item.duration
)
for item in items
for item in unique_items
]
with transaction.atomic():
@@ -60,7 +66,7 @@ class DjangoDirectorySnapshotRepository:
ignore_conflicts=True
)
logger.debug("成功保存 %d 条目录快照记录", len(items))
logger.debug("成功保存 %d 条目录快照记录", len(unique_items))
except Exception as e:
logger.error(
@@ -72,7 +78,35 @@ class DjangoDirectorySnapshotRepository:
raise
def get_by_scan(self, scan_id: int):
return DirectorySnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
return DirectorySnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')
def get_all(self):
return DirectorySnapshot.objects.all().order_by('-discovered_at')
return DirectorySnapshot.objects.all().order_by('-created_at')
def iter_raw_data_for_export(
self,
scan_id: int,
batch_size: int = 1000
) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
scan_id: 扫描 ID
batch_size: 每批数据量
Yields:
包含所有目录字段的字典
"""
qs = (
DirectorySnapshot.objects
.filter(scan_id=scan_id)
.values(
'url', 'status', 'content_length', 'words',
'lines', 'content_type', 'duration', 'created_at'
)
.order_by('url')
)
for row in qs.iterator(chunk_size=batch_size):
yield row

View File

@@ -1,11 +1,12 @@
"""EndpointSnapshot Repository - Django ORM 实现"""
import logging
from typing import List
from typing import List, Iterator
from apps.asset.models.snapshot_models import EndpointSnapshot
from apps.asset.dtos.snapshot import EndpointSnapshotDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
logger = logging.getLogger(__name__)
@@ -18,6 +19,8 @@ class DjangoEndpointSnapshotRepository:
"""
保存端点快照
注意:会自动按 (scan_id, url) 去重,保留最后一条记录。
Args:
items: 端点快照 DTO 列表
@@ -31,13 +34,17 @@ class DjangoEndpointSnapshotRepository:
if not items:
logger.debug("端点快照为空,跳过保存")
return
# 根据模型唯一约束自动去重
unique_items = deduplicate_for_bulk(items, EndpointSnapshot)
# 构建快照对象
snapshots = []
for item in items:
for item in unique_items:
snapshots.append(EndpointSnapshot(
scan_id=item.scan_id,
url=item.url,
host=item.host if item.host else '',
title=item.title,
status_code=item.status_code,
content_length=item.content_length,
@@ -45,9 +52,10 @@ class DjangoEndpointSnapshotRepository:
webserver=item.webserver,
content_type=item.content_type,
tech=item.tech if item.tech else [],
body_preview=item.body_preview,
response_body=item.response_body,
vhost=item.vhost,
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
response_headers=item.response_headers if item.response_headers else ''
))
# 批量创建(忽略冲突,基于唯一约束去重)
@@ -68,7 +76,36 @@ class DjangoEndpointSnapshotRepository:
raise
def get_by_scan(self, scan_id: int):
return EndpointSnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
return EndpointSnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')
def get_all(self):
return EndpointSnapshot.objects.all().order_by('-discovered_at')
return EndpointSnapshot.objects.all().order_by('-created_at')
def iter_raw_data_for_export(
self,
scan_id: int,
batch_size: int = 1000
) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
scan_id: 扫描 ID
batch_size: 每批数据量
Yields:
包含所有端点字段的字典
"""
qs = (
EndpointSnapshot.objects
.filter(scan_id=scan_id)
.values(
'url', 'host', 'location', 'title', 'status_code',
'content_length', 'content_type', 'webserver', 'tech',
'response_body', 'response_headers', 'vhost', 'matched_gf_patterns', 'created_at'
)
.order_by('url')
)
for row in qs.iterator(chunk_size=batch_size):
yield row

View File

@@ -6,6 +6,7 @@ from typing import List, Iterator
from apps.asset.models.snapshot_models import HostPortMappingSnapshot
from apps.asset.dtos.snapshot import HostPortMappingSnapshotDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
logger = logging.getLogger(__name__)
@@ -18,6 +19,8 @@ class DjangoHostPortMappingSnapshotRepository:
"""
保存主机端口关联快照
注意:会自动按 (scan_id, host, ip, port) 去重,保留最后一条记录。
Args:
items: 主机端口关联快照 DTO 列表
@@ -31,10 +34,13 @@ class DjangoHostPortMappingSnapshotRepository:
if not items:
logger.debug("主机端口关联快照为空,跳过保存")
return
# 根据模型唯一约束自动去重
unique_items = deduplicate_for_bulk(items, HostPortMappingSnapshot)
# 构建快照对象
snapshots = []
for item in items:
for item in unique_items:
snapshots.append(HostPortMappingSnapshot(
scan_id=item.scan_id,
host=item.host,
@@ -59,20 +65,28 @@ class DjangoHostPortMappingSnapshotRepository:
)
raise
def get_ip_aggregation_by_scan(self, scan_id: int, search: str = None):
def get_ip_aggregation_by_scan(self, scan_id: int, filter_query: str = None):
from django.db.models import Min
from apps.common.utils.filter_utils import apply_filters
qs = HostPortMappingSnapshot.objects.filter(scan_id=scan_id)
if search:
qs = qs.filter(ip__icontains=search)
# 应用智能过滤
if filter_query:
field_mapping = {
'ip': 'ip',
'port': 'port',
'host': 'host',
}
qs = apply_filters(qs, filter_query, field_mapping)
ip_aggregated = (
qs
.values('ip')
.annotate(
discovered_at=Min('discovered_at')
created_at=Min('created_at')
)
.order_by('-discovered_at')
.order_by('-created_at')
)
results = []
@@ -92,24 +106,32 @@ class DjangoHostPortMappingSnapshotRepository:
'ip': ip,
'hosts': hosts,
'ports': ports,
'discovered_at': item['discovered_at'],
'created_at': item['created_at'],
})
return results
def get_all_ip_aggregation(self, search: str = None):
def get_all_ip_aggregation(self, filter_query: str = None):
"""获取所有 IP 聚合数据"""
from django.db.models import Min
from apps.common.utils.filter_utils import apply_filters
qs = HostPortMappingSnapshot.objects.all()
if search:
qs = qs.filter(ip__icontains=search)
# 应用智能过滤
if filter_query:
field_mapping = {
'ip': 'ip',
'port': 'port',
'host': 'host',
}
qs = apply_filters(qs, filter_query, field_mapping)
ip_aggregated = (
qs
.values('ip')
.annotate(discovered_at=Min('discovered_at'))
.order_by('-discovered_at')
.annotate(created_at=Min('created_at'))
.order_by('-created_at')
)
results = []
@@ -127,7 +149,7 @@ class DjangoHostPortMappingSnapshotRepository:
'ip': ip,
'hosts': hosts,
'ports': ports,
'discovered_at': item['discovered_at'],
'created_at': item['created_at'],
})
return results
@@ -143,3 +165,33 @@ class DjangoHostPortMappingSnapshotRepository:
)
for ip in queryset:
yield ip
def iter_raw_data_for_export(
self,
scan_id: int,
batch_size: int = 1000
) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
scan_id: 扫描 ID
batch_size: 每批数据量
Yields:
{
'ip': '192.168.1.1',
'host': 'example.com',
'port': 80,
'created_at': datetime
}
"""
qs = (
HostPortMappingSnapshot.objects
.filter(scan_id=scan_id)
.values('ip', 'host', 'port', 'created_at')
.order_by('ip', 'host', 'port')
)
for row in qs.iterator(chunk_size=batch_size):
yield row

View File

@@ -1,11 +1,12 @@
"""Django ORM 实现的 SubdomainSnapshot Repository"""
import logging
from typing import List
from typing import List, Iterator
from apps.asset.models.snapshot_models import SubdomainSnapshot
from apps.asset.dtos import SubdomainSnapshotDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
logger = logging.getLogger(__name__)
@@ -18,6 +19,8 @@ class DjangoSubdomainSnapshotRepository:
"""
保存子域名快照
注意:会自动按 (scan_id, name) 去重,保留最后一条记录。
Args:
items: 子域名快照 DTO 列表
@@ -31,10 +34,13 @@ class DjangoSubdomainSnapshotRepository:
if not items:
logger.debug("子域名快照为空,跳过保存")
return
# 根据模型唯一约束自动去重
unique_items = deduplicate_for_bulk(items, SubdomainSnapshot)
# 构建快照对象
snapshots = []
for item in items:
for item in unique_items:
snapshots.append(SubdomainSnapshot(
scan_id=item.scan_id,
name=item.name,
@@ -55,7 +61,32 @@ class DjangoSubdomainSnapshotRepository:
raise
def get_by_scan(self, scan_id: int):
return SubdomainSnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
return SubdomainSnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')
def get_all(self):
return SubdomainSnapshot.objects.all().order_by('-discovered_at')
return SubdomainSnapshot.objects.all().order_by('-created_at')
def iter_raw_data_for_export(
self,
scan_id: int,
batch_size: int = 1000
) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
scan_id: 扫描 ID
batch_size: 每批数据量
Yields:
{'name': 'sub.example.com', 'created_at': datetime}
"""
qs = (
SubdomainSnapshot.objects
.filter(scan_id=scan_id)
.values('name', 'created_at')
.order_by('name')
)
for row in qs.iterator(chunk_size=batch_size):
yield row

View File

@@ -8,6 +8,7 @@ from django.db import transaction
from apps.asset.models import VulnerabilitySnapshot
from apps.asset.dtos.snapshot import VulnerabilitySnapshotDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
logger = logging.getLogger(__name__)
@@ -21,12 +22,17 @@ class DjangoVulnerabilitySnapshotRepository:
使用 ``ignore_conflicts`` 策略,如果快照已存在则跳过。
具体唯一约束由数据库模型控制。
注意:会自动按唯一约束字段去重,保留最后一条记录。
"""
if not items:
logger.warning("漏洞快照列表为空,跳过保存")
return
try:
# 根据模型唯一约束自动去重
unique_items = deduplicate_for_bulk(items, VulnerabilitySnapshot)
snapshot_objects = [
VulnerabilitySnapshot(
scan_id=item.scan_id,
@@ -38,7 +44,7 @@ class DjangoVulnerabilitySnapshotRepository:
description=item.description,
raw_output=item.raw_output,
)
for item in items
for item in unique_items
]
with transaction.atomic():
@@ -47,7 +53,7 @@ class DjangoVulnerabilitySnapshotRepository:
ignore_conflicts=True,
)
logger.debug("成功保存 %d 条漏洞快照记录", len(items))
logger.debug("成功保存 %d 条漏洞快照记录", len(unique_items))
except Exception as e:
logger.error(
@@ -60,7 +66,7 @@ class DjangoVulnerabilitySnapshotRepository:
def get_by_scan(self, scan_id: int):
"""按扫描任务获取漏洞快照 QuerySet。"""
return VulnerabilitySnapshot.objects.filter(scan_id=scan_id).order_by("-discovered_at")
return VulnerabilitySnapshot.objects.filter(scan_id=scan_id).order_by("-created_at")
def get_all(self):
return VulnerabilitySnapshot.objects.all().order_by('-discovered_at')
return VulnerabilitySnapshot.objects.all().order_by('-created_at')

View File

@@ -1,11 +1,12 @@
"""WebsiteSnapshot Repository - Django ORM 实现"""
import logging
from typing import List
from typing import List, Iterator
from apps.asset.models.snapshot_models import WebsiteSnapshot
from apps.asset.dtos.snapshot import WebsiteSnapshotDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
logger = logging.getLogger(__name__)
@@ -18,6 +19,8 @@ class DjangoWebsiteSnapshotRepository:
"""
保存网站快照
注意:会自动按 (scan_id, url) 去重,保留最后一条记录。
Args:
items: 网站快照 DTO 列表
@@ -31,23 +34,27 @@ class DjangoWebsiteSnapshotRepository:
if not items:
logger.debug("网站快照为空,跳过保存")
return
# 根据模型唯一约束自动去重
unique_items = deduplicate_for_bulk(items, WebsiteSnapshot)
# 构建快照对象
snapshots = []
for item in items:
for item in unique_items:
snapshots.append(WebsiteSnapshot(
scan_id=item.scan_id,
url=item.url,
host=item.host,
title=item.title,
status=item.status,
status_code=item.status_code,
content_length=item.content_length,
location=item.location,
web_server=item.web_server,
webserver=item.webserver,
content_type=item.content_type,
tech=item.tech if item.tech else [],
body_preview=item.body_preview,
vhost=item.vhost
response_body=item.response_body,
vhost=item.vhost,
response_headers=item.response_headers if item.response_headers else ''
))
# 批量创建(忽略冲突,基于唯一约束去重)
@@ -68,7 +75,36 @@ class DjangoWebsiteSnapshotRepository:
raise
def get_by_scan(self, scan_id: int):
return WebsiteSnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
return WebsiteSnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')
def get_all(self):
return WebsiteSnapshot.objects.all().order_by('-discovered_at')
return WebsiteSnapshot.objects.all().order_by('-created_at')
def iter_raw_data_for_export(
self,
scan_id: int,
batch_size: int = 1000
) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
scan_id: 扫描 ID
batch_size: 每批数据量
Yields:
包含所有网站字段的字典
"""
qs = (
WebsiteSnapshot.objects
.filter(scan_id=scan_id)
.values(
'url', 'host', 'location', 'title', 'status_code',
'content_length', 'content_type', 'webserver', 'tech',
'response_body', 'response_headers', 'vhost', 'created_at'
)
.order_by('url')
)
for row in qs.iterator(chunk_size=batch_size):
yield row

View File

@@ -26,9 +26,9 @@ class SubdomainSerializer(serializers.ModelSerializer):
class Meta:
model = Subdomain
fields = [
'id', 'name', 'discovered_at', 'target'
'id', 'name', 'created_at', 'target'
]
read_only_fields = ['id', 'discovered_at']
read_only_fields = ['id', 'created_at']
class SubdomainListSerializer(serializers.ModelSerializer):
@@ -41,9 +41,9 @@ class SubdomainListSerializer(serializers.ModelSerializer):
class Meta:
model = Subdomain
fields = [
'id', 'name', 'discovered_at'
'id', 'name', 'created_at'
]
read_only_fields = ['id', 'discovered_at']
read_only_fields = ['id', 'created_at']
# class IPAddressListSerializer(serializers.ModelSerializer):
@@ -67,9 +67,10 @@ class SubdomainListSerializer(serializers.ModelSerializer):
class WebSiteSerializer(serializers.ModelSerializer):
"""站点序列化器"""
"""站点序列化器(目标详情页)"""
subdomain = serializers.CharField(source='subdomain.name', allow_blank=True, default='')
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
class Meta:
model = WebSite
@@ -83,11 +84,12 @@ class WebSiteSerializer(serializers.ModelSerializer):
'content_type',
'status_code',
'content_length',
'body_preview',
'response_body',
'tech',
'vhost',
'responseHeaders', # HTTP响应头
'subdomain',
'discovered_at',
'created_at',
]
read_only_fields = fields
@@ -107,7 +109,7 @@ class VulnerabilitySerializer(serializers.ModelSerializer):
'cvss_score',
'description',
'raw_output',
'discovered_at',
'created_at',
]
read_only_fields = fields
@@ -126,7 +128,7 @@ class VulnerabilitySnapshotSerializer(serializers.ModelSerializer):
'cvss_score',
'description',
'raw_output',
'discovered_at',
'created_at',
]
read_only_fields = fields
@@ -134,12 +136,13 @@ class VulnerabilitySnapshotSerializer(serializers.ModelSerializer):
class EndpointListSerializer(serializers.ModelSerializer):
"""端点列表序列化器(用于目标端点列表页)"""
# GF 匹配模式映射为前端使用的 tags 字段
tags = serializers.ListField(
# GF 匹配模式gf-patterns 工具匹配的敏感 URL 模式)
gfPatterns = serializers.ListField(
child=serializers.CharField(),
source='matched_gf_patterns',
read_only=True,
)
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
class Meta:
model = Endpoint
@@ -152,11 +155,12 @@ class EndpointListSerializer(serializers.ModelSerializer):
'content_length',
'content_type',
'webserver',
'body_preview',
'response_body',
'tech',
'vhost',
'tags',
'discovered_at',
'responseHeaders', # HTTP响应头
'gfPatterns',
'created_at',
]
read_only_fields = fields
@@ -164,7 +168,7 @@ class EndpointListSerializer(serializers.ModelSerializer):
class DirectorySerializer(serializers.ModelSerializer):
"""目录序列化器"""
discovered_at = serializers.DateTimeField(read_only=True)
created_at = serializers.DateTimeField(read_only=True)
class Meta:
model = Directory
@@ -177,7 +181,7 @@ class DirectorySerializer(serializers.ModelSerializer):
'lines',
'content_type',
'duration',
'discovered_at',
'created_at',
]
read_only_fields = fields
@@ -190,12 +194,12 @@ class IPAddressAggregatedSerializer(serializers.Serializer):
- ip: IP 地址
- hosts: 该 IP 关联的所有主机名列表
- ports: 该 IP 关联的所有端口列表
- discovered_at: 首次发现时间
- created_at: 创建时间
"""
ip = serializers.IPAddressField(read_only=True)
hosts = serializers.ListField(child=serializers.CharField(), read_only=True)
ports = serializers.ListField(child=serializers.IntegerField(), read_only=True)
discovered_at = serializers.DateTimeField(read_only=True)
created_at = serializers.DateTimeField(read_only=True)
# ==================== 快照序列化器 ====================
@@ -205,7 +209,7 @@ class SubdomainSnapshotSerializer(serializers.ModelSerializer):
class Meta:
model = SubdomainSnapshot
fields = ['id', 'name', 'discovered_at']
fields = ['id', 'name', 'created_at']
read_only_fields = fields
@@ -213,8 +217,7 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
"""网站快照序列化器(用于扫描历史)"""
subdomain_name = serializers.CharField(source='subdomain.name', read_only=True)
webserver = serializers.CharField(source='web_server', read_only=True) # 映射字段名
status_code = serializers.IntegerField(source='status', read_only=True) # 映射字段名
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
class Meta:
model = WebsiteSnapshot
@@ -223,15 +226,16 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
'url',
'location',
'title',
'webserver', # 使用映射后的字段名
'webserver',
'content_type',
'status_code', # 使用映射后的字段名
'status_code',
'content_length',
'body_preview',
'response_body',
'tech',
'vhost',
'responseHeaders', # HTTP响应头
'subdomain_name',
'discovered_at',
'created_at',
]
read_only_fields = fields
@@ -250,7 +254,7 @@ class DirectorySnapshotSerializer(serializers.ModelSerializer):
'lines',
'content_type',
'duration',
'discovered_at',
'created_at',
]
read_only_fields = fields
@@ -258,12 +262,13 @@ class DirectorySnapshotSerializer(serializers.ModelSerializer):
class EndpointSnapshotSerializer(serializers.ModelSerializer):
"""端点快照序列化器(用于扫描历史)"""
# GF 匹配模式映射为前端使用的 tags 字段
tags = serializers.ListField(
# GF 匹配模式gf-patterns 工具匹配的敏感 URL 模式)
gfPatterns = serializers.ListField(
child=serializers.CharField(),
source='matched_gf_patterns',
read_only=True,
)
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
class Meta:
model = EndpointSnapshot
@@ -277,10 +282,11 @@ class EndpointSnapshotSerializer(serializers.ModelSerializer):
'content_type',
'status_code',
'content_length',
'body_preview',
'response_body',
'tech',
'vhost',
'tags',
'discovered_at',
'responseHeaders', # HTTP响应头
'gfPatterns',
'created_at',
]
read_only_fields = fields

View File

@@ -1,10 +1,12 @@
"""Directory Service - 目录业务逻辑层"""
import logging
from typing import List, Iterator
from typing import List, Iterator, Optional
from apps.asset.repositories import DjangoDirectoryRepository
from apps.asset.dtos import DirectoryDTO
from apps.common.validators import is_valid_url, is_url_match_target
from apps.common.utils.filter_utils import apply_filters
logger = logging.getLogger(__name__)
@@ -12,6 +14,12 @@ logger = logging.getLogger(__name__)
class DirectoryService:
"""目录业务逻辑层"""
# 智能过滤字段映射
FILTER_FIELD_MAPPING = {
'url': 'url',
'status': 'status',
}
def __init__(self, repository=None):
"""初始化目录服务"""
self.repo = repository or DjangoDirectoryRepository()
@@ -37,17 +45,91 @@ class DirectoryService:
logger.error(f"批量 upsert 目录失败: {e}")
raise
def get_directories_by_target(self, target_id: int):
"""获取目标下的所有目录"""
return self.repo.get_by_target(target_id)
def bulk_create_urls(self, target_id: int, target_name: str, target_type: str, urls: List[str]) -> int:
"""
批量创建目录(仅 URL使用 ignore_conflicts
验证 URL 格式和匹配,过滤无效/不匹配 URL去重后批量创建。
已存在的记录会被跳过。
Args:
target_id: 目标 ID
target_name: 目标名称(用于匹配验证)
target_type: 目标类型 ('domain', 'ip', 'cidr')
urls: URL 列表
Returns:
int: 实际创建的记录数
"""
if not urls:
return 0
# 过滤有效 URL 并去重
valid_urls = []
seen = set()
for url in urls:
if not isinstance(url, str):
continue
url = url.strip()
if not url or url in seen:
continue
if not is_valid_url(url):
continue
# 匹配验证(前端已阻止不匹配的提交,后端作为双重保障)
if not is_url_match_target(url, target_name, target_type):
continue
seen.add(url)
valid_urls.append(url)
if not valid_urls:
return 0
# 获取创建前的数量
count_before = self.repo.count_by_target(target_id)
# 创建 DTO 列表并批量创建
directory_dtos = [
DirectoryDTO(url=url, target_id=target_id)
for url in valid_urls
]
self.repo.bulk_create_ignore_conflicts(directory_dtos)
# 获取创建后的数量
count_after = self.repo.count_by_target(target_id)
return count_after - count_before
def get_all(self):
def get_directories_by_target(self, target_id: int, filter_query: Optional[str] = None):
"""获取目标下的所有目录"""
queryset = self.repo.get_by_target(target_id)
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def get_all(self, filter_query: Optional[str] = None):
"""获取所有目录"""
return self.repo.get_all()
queryset = self.repo.get_all()
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def iter_directory_urls_by_target(self, target_id: int, chunk_size: int = 1000) -> Iterator[str]:
"""流式获取目标下的所有目录 URL"""
return self.repo.get_urls_for_export(target_id=target_id, batch_size=chunk_size)
def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
target_id: 目标 ID
Yields:
原始数据字典
"""
return self.repo.iter_raw_data_for_export(target_id=target_id)
__all__ = ['DirectoryService']

View File

@@ -5,10 +5,12 @@ Endpoint 服务层
"""
import logging
from typing import List, Iterator
from typing import List, Iterator, Optional
from apps.asset.dtos.asset import EndpointDTO
from apps.asset.repositories.asset import DjangoEndpointRepository
from apps.common.validators import is_valid_url, is_url_match_target
from apps.common.utils.filter_utils import apply_filters
logger = logging.getLogger(__name__)
@@ -20,6 +22,15 @@ class EndpointService:
提供 EndpointURL/端点)相关的业务逻辑
"""
# 智能过滤字段映射
FILTER_FIELD_MAPPING = {
'url': 'url',
'host': 'host',
'title': 'title',
'status_code': 'status_code',
'tech': 'tech',
}
def __init__(self):
"""初始化 Endpoint 服务"""
self.repo = DjangoEndpointRepository()
@@ -45,9 +56,68 @@ class EndpointService:
logger.error(f"批量 upsert 端点失败: {e}")
raise
def get_endpoints_by_target(self, target_id: int):
def bulk_create_urls(self, target_id: int, target_name: str, target_type: str, urls: List[str]) -> int:
"""
批量创建端点(仅 URL使用 ignore_conflicts
验证 URL 格式和匹配,过滤无效/不匹配 URL去重后批量创建。
已存在的记录会被跳过。
Args:
target_id: 目标 ID
target_name: 目标名称(用于匹配验证)
target_type: 目标类型 ('domain', 'ip', 'cidr')
urls: URL 列表
Returns:
int: 实际创建的记录数
"""
if not urls:
return 0
# 过滤有效 URL 并去重
valid_urls = []
seen = set()
for url in urls:
if not isinstance(url, str):
continue
url = url.strip()
if not url or url in seen:
continue
if not is_valid_url(url):
continue
# 匹配验证(前端已阻止不匹配的提交,后端作为双重保障)
if not is_url_match_target(url, target_name, target_type):
continue
seen.add(url)
valid_urls.append(url)
if not valid_urls:
return 0
# 获取创建前的数量
count_before = self.repo.count_by_target(target_id)
# 创建 DTO 列表并批量创建
endpoint_dtos = [
EndpointDTO(url=url, target_id=target_id)
for url in valid_urls
]
self.repo.bulk_create_ignore_conflicts(endpoint_dtos)
# 获取创建后的数量
count_after = self.repo.count_by_target(target_id)
return count_after - count_before
def get_endpoints_by_target(self, target_id: int, filter_query: Optional[str] = None):
"""获取目标下的所有端点"""
return self.repo.get_by_target(target_id)
queryset = self.repo.get_by_target(target_id)
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
return queryset
def count_endpoints_by_target(self, target_id: int) -> int:
"""
@@ -61,12 +131,27 @@ class EndpointService:
"""
return self.repo.count_by_target(target_id)
def get_all(self):
def get_all(self, filter_query: Optional[str] = None):
"""获取所有端点(全局查询)"""
return self.repo.get_all()
queryset = self.repo.get_all()
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
return queryset
def iter_endpoint_urls_by_target(self, target_id: int, chunk_size: int = 1000) -> Iterator[str]:
"""流式获取目标下的所有端点 URL用于导出。"""
queryset = self.repo.get_by_target(target_id)
for url in queryset.values_list('url', flat=True).iterator(chunk_size=chunk_size):
yield url
def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
target_id: 目标 ID
Yields:
原始数据字典
"""
return self.repo.iter_raw_data_for_export(target_id=target_id)

View File

@@ -1,16 +1,31 @@
"""HostPortMapping Service - 业务逻辑层"""
import logging
from typing import List, Iterator
from typing import List, Iterator, Optional, Dict
from django.db.models import Min
from apps.asset.repositories.asset import DjangoHostPortMappingRepository
from apps.asset.dtos.asset import HostPortMappingDTO
from apps.common.utils.filter_utils import apply_filters
logger = logging.getLogger(__name__)
class HostPortMappingService:
"""主机端口映射服务 - 负责主机端口映射数据的业务逻辑"""
"""主机端口映射服务 - 负责主机端口映射数据的业务逻辑
职责:
- 业务逻辑处理(过滤、聚合)
- 调用 Repository 进行数据访问
"""
# 智能过滤字段映射
FILTER_FIELD_MAPPING = {
'ip': 'ip',
'port': 'port',
'host': 'host',
}
def __init__(self):
self.repo = DjangoHostPortMappingRepository()
@@ -49,13 +64,106 @@ class HostPortMappingService:
def iter_host_port_by_target(self, target_id: int, batch_size: int = 1000):
return self.repo.get_for_export(target_id=target_id, batch_size=batch_size)
def get_ip_aggregation_by_target(self, target_id: int, search: str = None):
return self.repo.get_ip_aggregation_by_target(target_id, search=search)
def get_ip_aggregation_by_target(
self,
target_id: int,
filter_query: Optional[str] = None
) -> List[Dict]:
"""获取目标下的 IP 聚合数据
Args:
target_id: 目标 ID
filter_query: 智能过滤语法字符串
Returns:
聚合后的 IP 数据列表
"""
# 从 Repository 获取基础 QuerySet
qs = self.repo.get_queryset_by_target(target_id)
# Service 层应用过滤逻辑
if filter_query:
qs = apply_filters(qs, filter_query, self.FILTER_FIELD_MAPPING)
# Service 层处理聚合逻辑
return self._aggregate_by_ip(qs, filter_query, target_id=target_id)
def get_all_ip_aggregation(self, search: str = None):
"""获取所有 IP 聚合数据(全局查询)"""
return self.repo.get_all_ip_aggregation(search=search)
def get_all_ip_aggregation(self, filter_query: Optional[str] = None) -> List[Dict]:
"""获取所有 IP 聚合数据(全局查询)
Args:
filter_query: 智能过滤语法字符串
Returns:
聚合后的 IP 数据列表
"""
# 从 Repository 获取基础 QuerySet
qs = self.repo.get_all_queryset()
# Service 层应用过滤逻辑
if filter_query:
qs = apply_filters(qs, filter_query, self.FILTER_FIELD_MAPPING)
# Service 层处理聚合逻辑
return self._aggregate_by_ip(qs, filter_query)
def _aggregate_by_ip(
self,
qs,
filter_query: Optional[str] = None,
target_id: Optional[int] = None
) -> List[Dict]:
"""按 IP 聚合数据
Args:
qs: 已过滤的 QuerySet
filter_query: 过滤条件(用于子查询)
target_id: 目标 ID用于子查询限定范围
Returns:
聚合后的数据列表
"""
ip_aggregated = (
qs
.values('ip')
.annotate(created_at=Min('created_at'))
.order_by('-created_at')
)
results = []
for item in ip_aggregated:
ip = item['ip']
# 获取该 IP 的所有 host 和 port也需要应用过滤条件
mappings_qs = self.repo.get_queryset_by_ip(ip, target_id=target_id)
if filter_query:
mappings_qs = apply_filters(mappings_qs, filter_query, self.FILTER_FIELD_MAPPING)
mappings = mappings_qs.values('host', 'port').distinct()
hosts = sorted({m['host'] for m in mappings})
ports = sorted({m['port'] for m in mappings})
results.append({
'ip': ip,
'hosts': hosts,
'ports': ports,
'created_at': item['created_at'],
})
return results
def iter_ips_by_target(self, target_id: int, batch_size: int = 1000) -> Iterator[str]:
"""流式获取目标下的所有唯一 IP 地址。"""
return self.repo.get_ips_for_export(target_id=target_id, batch_size=batch_size)
def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
target_id: 目标 ID
Yields:
原始数据字典 {ip, host, port, created_at}
"""
return self.repo.iter_raw_data_for_export(target_id=target_id)

View File

@@ -1,15 +1,33 @@
import logging
from typing import Tuple, List, Dict
from typing import List, Dict, Optional
from dataclasses import dataclass
from apps.asset.repositories import DjangoSubdomainRepository
from apps.asset.dtos import SubdomainDTO
from apps.common.validators import is_valid_domain
from apps.common.utils.filter_utils import apply_filters
logger = logging.getLogger(__name__)
@dataclass
class BulkCreateResult:
"""批量创建结果"""
created_count: int
skipped_count: int
invalid_count: int
mismatched_count: int
total_received: int
class SubdomainService:
"""子域名业务逻辑层"""
# 智能过滤字段映射
FILTER_FIELD_MAPPING = {
'name': 'name',
}
def __init__(self, repository=None):
"""
初始化子域名服务
@@ -21,30 +39,50 @@ class SubdomainService:
# ==================== 查询操作 ====================
def get_all(self):
def get_all(self, filter_query: Optional[str] = None):
"""
获取所有子域名
Args:
filter_query: 智能过滤语法字符串
Returns:
QuerySet: 子域名查询集
"""
logger.debug("获取所有子域名")
return self.repo.get_all()
queryset = self.repo.get_all()
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
# ==================== 创建操作 ====================
def bulk_create_ignore_conflicts(self, items: List[SubdomainDTO]) -> None:
def get_subdomains_by_target(self, target_id: int, filter_query: Optional[str] = None):
"""
批量创建子域名,忽略冲突
获取目标下的子域名
Args:
items: 子域名 DTO 列表
target_id: 目标 ID
filter_query: 智能过滤语法字符串
Note:
使用 ignore_conflicts 策略,重复记录会被跳过
Returns:
QuerySet: 子域名查询集
"""
logger.debug("批量创建子域名 - 数量: %d", len(items))
return self.repo.bulk_create_ignore_conflicts(items)
queryset = self.repo.get_by_target(target_id)
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def count_subdomains_by_target(self, target_id: int) -> int:
"""
统计目标下的子域名数量
Args:
target_id: 目标 ID
Returns:
int: 子域名数量
"""
logger.debug("统计目标下子域名数量 - Target ID: %d", target_id)
return self.repo.count_by_target(target_id)
def get_by_names_and_target_id(self, names: set, target_id: int) -> dict:
"""
@@ -71,25 +109,8 @@ class SubdomainService:
List[str]: 子域名名称列表
"""
logger.debug("获取目标下所有子域名 - Target ID: %d", target_id)
# 通过仓储层统一访问数据库,内部已使用 iterator() 做流式查询
return list(self.repo.get_domains_for_export(target_id=target_id))
def get_subdomains_by_target(self, target_id: int):
return self.repo.get_by_target(target_id)
def count_subdomains_by_target(self, target_id: int) -> int:
"""
统计目标下的子域名数量
Args:
target_id: 目标 ID
Returns:
int: 子域名数量
"""
logger.debug("统计目标下子域名数量 - Target ID: %d", target_id)
return self.repo.count_by_target(target_id)
def iter_subdomain_names_by_target(self, target_id: int, chunk_size: int = 1000):
"""
流式获取目标下的所有子域名名称(内存优化)
@@ -102,8 +123,123 @@ class SubdomainService:
str: 子域名名称
"""
logger.debug("流式获取目标下所有子域名 - Target ID: %d, 批次大小: %d", target_id, chunk_size)
# 通过仓储层统一访问数据库,内部已使用 iterator() 做流式查询
return self.repo.get_domains_for_export(target_id=target_id, batch_size=chunk_size)
def iter_raw_data_for_csv_export(self, target_id: int):
"""
流式获取原始数据用于 CSV 导出
Args:
target_id: 目标 ID
Yields:
原始数据字典 {name, created_at}
"""
return self.repo.iter_raw_data_for_export(target_id=target_id)
__all__ = ['SubdomainService']
# ==================== 创建操作 ====================
def bulk_create_ignore_conflicts(self, items: List[SubdomainDTO]) -> None:
"""
批量创建子域名,忽略冲突
Args:
items: 子域名 DTO 列表
Note:
使用 ignore_conflicts 策略,重复记录会被跳过
"""
logger.debug("批量创建子域名 - 数量: %d", len(items))
return self.repo.bulk_create_ignore_conflicts(items)
def bulk_create_subdomains(
self,
target_id: int,
target_name: str,
subdomains: List[str]
) -> BulkCreateResult:
"""
批量创建子域名(带验证)
Args:
target_id: 目标 ID
target_name: 目标域名(用于匹配验证)
subdomains: 子域名列表
Returns:
BulkCreateResult: 创建结果统计
"""
total_received = len(subdomains)
target_name = target_name.lower().strip()
def is_subdomain_match(subdomain: str) -> bool:
"""验证子域名是否匹配目标域名"""
if subdomain == target_name:
return True
if subdomain.endswith('.' + target_name):
return True
return False
# 过滤有效的子域名
valid_subdomains = []
invalid_count = 0
mismatched_count = 0
for subdomain in subdomains:
if not isinstance(subdomain, str) or not subdomain.strip():
continue
subdomain = subdomain.lower().strip()
# 验证格式
if not is_valid_domain(subdomain):
invalid_count += 1
continue
# 验证匹配
if not is_subdomain_match(subdomain):
mismatched_count += 1
continue
valid_subdomains.append(subdomain)
# 去重
unique_subdomains = list(set(valid_subdomains))
duplicate_count = len(valid_subdomains) - len(unique_subdomains)
if not unique_subdomains:
return BulkCreateResult(
created_count=0,
skipped_count=duplicate_count,
invalid_count=invalid_count,
mismatched_count=mismatched_count,
total_received=total_received,
)
# 获取创建前的数量
count_before = self.repo.count_by_target(target_id)
# 创建 DTO 列表并批量创建
subdomain_dtos = [
SubdomainDTO(name=name, target_id=target_id)
for name in unique_subdomains
]
self.repo.bulk_create_ignore_conflicts(subdomain_dtos)
# 获取创建后的数量
count_after = self.repo.count_by_target(target_id)
created_count = count_after - count_before
# 计算因数据库冲突跳过的数量
db_skipped = len(unique_subdomains) - created_count
return BulkCreateResult(
created_count=created_count,
skipped_count=duplicate_count + db_skipped,
invalid_count=invalid_count,
mismatched_count=mismatched_count,
total_received=total_received,
)
__all__ = ['SubdomainService', 'BulkCreateResult']

View File

@@ -1,11 +1,13 @@
"""Vulnerability Service - 漏洞资产业务逻辑层"""
import logging
from typing import List
from typing import List, Optional
from apps.asset.models import Vulnerability
from apps.asset.dtos.asset import VulnerabilityDTO
from apps.common.decorators import auto_ensure_db_connection
from apps.common.utils import deduplicate_for_bulk
from apps.common.utils.filter_utils import apply_filters
logger = logging.getLogger(__name__)
@@ -16,10 +18,20 @@ class VulnerabilityService:
当前提供基础的批量创建能力,使用 ignore_conflicts 依赖数据库唯一约束去重。
"""
# 智能过滤字段映射
FILTER_FIELD_MAPPING = {
'type': 'vuln_type',
'severity': 'severity',
'source': 'source',
'url': 'url',
}
def bulk_create_ignore_conflicts(self, items: List[VulnerabilityDTO]) -> None:
"""批量创建漏洞资产记录,忽略冲突。
注意:会自动按 (target_id, url, vuln_type, source) 去重,保留最后一条记录。
Note:
- 是否去重取决于模型上的唯一/部分唯一约束;
- 当前 Vulnerability 模型未定义唯一约束,因此会保留全部记录。
@@ -29,6 +41,9 @@ class VulnerabilityService:
return
try:
# 根据模型唯一约束自动去重(如果模型没有唯一约束则跳过)
unique_items = deduplicate_for_bulk(items, Vulnerability)
vulns = [
Vulnerability(
target_id=item.target_id,
@@ -40,7 +55,7 @@ class VulnerabilityService:
description=item.description,
raw_output=item.raw_output,
)
for item in items
for item in unique_items
]
Vulnerability.objects.bulk_create(vulns, ignore_conflicts=True)
@@ -57,24 +72,34 @@ class VulnerabilityService:
# ==================== 查询方法 ====================
def get_all(self):
def get_all(self, filter_query: Optional[str] = None):
"""获取所有漏洞 QuerySet用于全局漏洞列表
Returns:
QuerySet[Vulnerability]: 所有漏洞,按发现时间倒序
"""
return Vulnerability.objects.all().order_by("-discovered_at")
Args:
filter_query: 智能过滤语法字符串
def get_vulnerabilities_by_target(self, target_id: int):
Returns:
QuerySet[Vulnerability]: 所有漏洞,按创建时间倒序
"""
queryset = Vulnerability.objects.all().order_by("-created_at")
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def get_vulnerabilities_by_target(self, target_id: int, filter_query: Optional[str] = None):
"""按目标获取漏洞 QuerySet用于分页
Args:
target_id: 目标 ID
filter_query: 智能过滤语法字符串
Returns:
QuerySet[Vulnerability]: 目标下的所有漏洞,按发现时间倒序
QuerySet[Vulnerability]: 目标下的所有漏洞,按创建时间倒序
"""
return Vulnerability.objects.filter(target_id=target_id).order_by("-discovered_at")
queryset = Vulnerability.objects.filter(target_id=target_id).order_by("-created_at")
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def count_by_target(self, target_id: int) -> int:
"""统计目标下的漏洞数量。"""

View File

@@ -1,10 +1,12 @@
"""WebSite Service - 网站业务逻辑层"""
import logging
from typing import List
from typing import List, Iterator, Optional
from apps.asset.repositories import DjangoWebSiteRepository
from apps.asset.dtos import WebSiteDTO
from apps.common.validators import is_valid_url, is_url_match_target
from apps.common.utils.filter_utils import apply_filters
logger = logging.getLogger(__name__)
@@ -12,6 +14,15 @@ logger = logging.getLogger(__name__)
class WebSiteService:
"""网站业务逻辑层"""
# 智能过滤字段映射
FILTER_FIELD_MAPPING = {
'url': 'url',
'host': 'host',
'title': 'title',
'status_code': 'status_code',
'tech': 'tech',
}
def __init__(self, repository=None):
"""初始化网站服务"""
self.repo = repository or DjangoWebSiteRepository()
@@ -37,13 +48,75 @@ class WebSiteService:
logger.error(f"批量 upsert 网站失败: {e}")
raise
def get_websites_by_target(self, target_id: int):
"""获取目标下的所有网站"""
return self.repo.get_by_target(target_id)
def bulk_create_urls(self, target_id: int, target_name: str, target_type: str, urls: List[str]) -> int:
"""
批量创建网站(仅 URL使用 ignore_conflicts
验证 URL 格式和匹配,过滤无效/不匹配 URL去重后批量创建。
已存在的记录会被跳过。
Args:
target_id: 目标 ID
target_name: 目标名称(用于匹配验证)
target_type: 目标类型 ('domain', 'ip', 'cidr')
urls: URL 列表
Returns:
int: 实际创建的记录数
"""
if not urls:
return 0
# 过滤有效 URL 并去重
valid_urls = []
seen = set()
for url in urls:
if not isinstance(url, str):
continue
url = url.strip()
if not url or url in seen:
continue
if not is_valid_url(url):
continue
# 匹配验证(前端已阻止不匹配的提交,后端作为双重保障)
if not is_url_match_target(url, target_name, target_type):
continue
seen.add(url)
valid_urls.append(url)
if not valid_urls:
return 0
# 获取创建前的数量
count_before = self.repo.count_by_target(target_id)
# 创建 DTO 列表并批量创建
website_dtos = [
WebSiteDTO(url=url, target_id=target_id)
for url in valid_urls
]
self.repo.bulk_create_ignore_conflicts(website_dtos)
# 获取创建后的数量
count_after = self.repo.count_by_target(target_id)
return count_after - count_before
def get_all(self):
def get_websites_by_target(self, target_id: int, filter_query: Optional[str] = None):
"""获取目标下的所有网站"""
queryset = self.repo.get_by_target(target_id)
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
return queryset
def get_all(self, filter_query: Optional[str] = None):
"""获取所有网站"""
return self.repo.get_all()
queryset = self.repo.get_all()
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
return queryset
def get_by_url(self, url: str, target_id: int) -> int:
"""根据 URL 和 target_id 查找网站 ID"""
@@ -53,5 +126,17 @@ class WebSiteService:
"""流式获取目标下的所有站点 URL"""
return self.repo.get_urls_for_export(target_id=target_id, batch_size=chunk_size)
def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
target_id: 目标 ID
Yields:
原始数据字典
"""
return self.repo.iter_raw_data_for_export(target_id=target_id)
__all__ = ['WebSiteService']

View File

@@ -0,0 +1,477 @@
"""
资产搜索服务
提供资产搜索的核心业务逻辑:
- 从物化视图查询数据
- 支持表达式语法解析
- 支持 =(模糊)、==(精确)、!=(不等于)操作符
- 支持 && (AND) 和 || (OR) 逻辑组合
- 支持 Website 和 Endpoint 两种资产类型
"""
import logging
import re
from typing import Optional, List, Dict, Any, Tuple, Literal, Iterator
from django.db import connection
logger = logging.getLogger(__name__)
# 支持的字段映射(前端字段名 -> 数据库字段名)
FIELD_MAPPING = {
'host': 'host',
'url': 'url',
'title': 'title',
'tech': 'tech',
'status': 'status_code',
'body': 'response_body',
'header': 'response_headers',
}
# 数组类型字段
ARRAY_FIELDS = {'tech'}
# 资产类型到视图名的映射
VIEW_MAPPING = {
'website': 'asset_search_view',
'endpoint': 'endpoint_search_view',
}
# 资产类型到原表名的映射(用于 JOIN 获取数组字段)
# ⚠️ 重要pg_ivm 不支持 ArrayField所有数组字段必须从原表 JOIN 获取
TABLE_MAPPING = {
'website': 'website',
'endpoint': 'endpoint',
}
# 有效的资产类型
VALID_ASSET_TYPES = {'website', 'endpoint'}
# Website 查询字段v=视图t=原表)
# ⚠️ 注意t.tech 从原表获取,因为 pg_ivm 不支持 ArrayField
WEBSITE_SELECT_FIELDS = """
v.id,
v.url,
v.host,
v.title,
t.tech, -- ArrayField从 website 表 JOIN 获取
v.status_code,
v.response_headers,
v.response_body,
v.content_type,
v.content_length,
v.webserver,
v.location,
v.vhost,
v.created_at,
v.target_id
"""
# Endpoint 查询字段
# ⚠️ 注意t.tech 和 t.matched_gf_patterns 从原表获取,因为 pg_ivm 不支持 ArrayField
ENDPOINT_SELECT_FIELDS = """
v.id,
v.url,
v.host,
v.title,
t.tech, -- ArrayField从 endpoint 表 JOIN 获取
v.status_code,
v.response_headers,
v.response_body,
v.content_type,
v.content_length,
v.webserver,
v.location,
v.vhost,
t.matched_gf_patterns, -- ArrayField从 endpoint 表 JOIN 获取
v.created_at,
v.target_id
"""
class SearchQueryParser:
"""
搜索查询解析器
支持语法:
- field="value" 模糊匹配ILIKE %value%
- field=="value" 精确匹配
- field!="value" 不等于
- && AND 连接
- || OR 连接
- () 分组(暂不支持嵌套)
示例:
- host="api" && tech="nginx"
- tech="vue" || tech="react"
- status=="200" && host!="test"
"""
# 匹配单个条件: field="value" 或 field=="value" 或 field!="value"
CONDITION_PATTERN = re.compile(r'(\w+)\s*(==|!=|=)\s*"([^"]*)"')
@classmethod
def parse(cls, query: str) -> Tuple[str, List[Any]]:
"""
解析查询字符串,返回 SQL WHERE 子句和参数
Args:
query: 搜索查询字符串
Returns:
(where_clause, params) 元组
"""
if not query or not query.strip():
return "1=1", []
query = query.strip()
# 检查是否包含操作符语法,如果不包含则作为 host 模糊搜索
if not cls.CONDITION_PATTERN.search(query):
# 裸文本,默认作为 host 模糊搜索v 是视图别名)
return "v.host ILIKE %s", [f"%{query}%"]
# 按 || 分割为 OR 组
or_groups = cls._split_by_or(query)
if len(or_groups) == 1:
# 没有 OR直接解析 AND 条件
return cls._parse_and_group(or_groups[0])
# 多个 OR 组
or_clauses = []
all_params = []
for group in or_groups:
clause, params = cls._parse_and_group(group)
if clause and clause != "1=1":
or_clauses.append(f"({clause})")
all_params.extend(params)
if not or_clauses:
return "1=1", []
return " OR ".join(or_clauses), all_params
@classmethod
def _split_by_or(cls, query: str) -> List[str]:
"""按 || 分割查询,但忽略引号内的 ||"""
parts = []
current = ""
in_quotes = False
i = 0
while i < len(query):
char = query[i]
if char == '"':
in_quotes = not in_quotes
current += char
elif not in_quotes and i + 1 < len(query) and query[i:i+2] == '||':
if current.strip():
parts.append(current.strip())
current = ""
i += 1 # 跳过第二个 |
else:
current += char
i += 1
if current.strip():
parts.append(current.strip())
return parts if parts else [query]
@classmethod
def _parse_and_group(cls, group: str) -> Tuple[str, List[Any]]:
"""解析 AND 组(用 && 连接的条件)"""
# 移除外层括号
group = group.strip()
if group.startswith('(') and group.endswith(')'):
group = group[1:-1].strip()
# 按 && 分割
parts = cls._split_by_and(group)
and_clauses = []
all_params = []
for part in parts:
clause, params = cls._parse_condition(part.strip())
if clause:
and_clauses.append(clause)
all_params.extend(params)
if not and_clauses:
return "1=1", []
return " AND ".join(and_clauses), all_params
@classmethod
def _split_by_and(cls, query: str) -> List[str]:
"""按 && 分割查询,但忽略引号内的 &&"""
parts = []
current = ""
in_quotes = False
i = 0
while i < len(query):
char = query[i]
if char == '"':
in_quotes = not in_quotes
current += char
elif not in_quotes and i + 1 < len(query) and query[i:i+2] == '&&':
if current.strip():
parts.append(current.strip())
current = ""
i += 1 # 跳过第二个 &
else:
current += char
i += 1
if current.strip():
parts.append(current.strip())
return parts if parts else [query]
@classmethod
def _parse_condition(cls, condition: str) -> Tuple[Optional[str], List[Any]]:
"""
解析单个条件
Returns:
(sql_clause, params) 或 (None, []) 如果解析失败
"""
# 移除括号
condition = condition.strip()
if condition.startswith('(') and condition.endswith(')'):
condition = condition[1:-1].strip()
match = cls.CONDITION_PATTERN.match(condition)
if not match:
logger.warning(f"无法解析条件: {condition}")
return None, []
field, operator, value = match.groups()
field = field.lower()
# 验证字段
if field not in FIELD_MAPPING:
logger.warning(f"未知字段: {field}")
return None, []
db_field = FIELD_MAPPING[field]
is_array = field in ARRAY_FIELDS
# 根据操作符生成 SQL
if operator == '=':
# 模糊匹配
return cls._build_like_condition(db_field, value, is_array)
elif operator == '==':
# 精确匹配
return cls._build_exact_condition(db_field, value, is_array)
elif operator == '!=':
# 不等于
return cls._build_not_equal_condition(db_field, value, is_array)
return None, []
@classmethod
def _build_like_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
"""构建模糊匹配条件"""
if is_array:
# 数组字段:检查数组中是否有元素包含该值(从原表 t 获取)
return f"EXISTS (SELECT 1 FROM unnest(t.{field}) AS elem WHERE elem ILIKE %s)", [f"%{value}%"]
elif field == 'status_code':
# 状态码是整数,模糊匹配转为精确匹配
try:
return f"v.{field} = %s", [int(value)]
except ValueError:
return f"v.{field}::text ILIKE %s", [f"%{value}%"]
else:
return f"v.{field} ILIKE %s", [f"%{value}%"]
@classmethod
def _build_exact_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
"""构建精确匹配条件"""
if is_array:
# 数组字段:检查数组中是否包含该精确值(从原表 t 获取)
return f"%s = ANY(t.{field})", [value]
elif field == 'status_code':
# 状态码是整数
try:
return f"v.{field} = %s", [int(value)]
except ValueError:
return f"v.{field}::text = %s", [value]
else:
return f"v.{field} = %s", [value]
@classmethod
def _build_not_equal_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
"""构建不等于条件"""
if is_array:
# 数组字段:检查数组中不包含该值(从原表 t 获取)
return f"NOT (%s = ANY(t.{field}))", [value]
elif field == 'status_code':
try:
return f"(v.{field} IS NULL OR v.{field} != %s)", [int(value)]
except ValueError:
return f"(v.{field} IS NULL OR v.{field}::text != %s)", [value]
else:
return f"(v.{field} IS NULL OR v.{field} != %s)", [value]
AssetType = Literal['website', 'endpoint']
class AssetSearchService:
"""资产搜索服务"""
def search(
self,
query: str,
asset_type: AssetType = 'website',
limit: Optional[int] = None
) -> List[Dict[str, Any]]:
"""
搜索资产
Args:
query: 搜索查询字符串
asset_type: 资产类型 ('website''endpoint')
limit: 最大返回数量(可选)
Returns:
List[Dict]: 搜索结果列表
"""
where_clause, params = SearchQueryParser.parse(query)
# 根据资产类型选择视图、原表和字段
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
table_name = TABLE_MAPPING.get(asset_type, 'website')
select_fields = ENDPOINT_SELECT_FIELDS if asset_type == 'endpoint' else WEBSITE_SELECT_FIELDS
# JOIN 原表获取数组字段tech, matched_gf_patterns
sql = f"""
SELECT {select_fields}
FROM {view_name} v
JOIN {table_name} t ON v.id = t.id
WHERE {where_clause}
ORDER BY v.created_at DESC
"""
# 添加 LIMIT
if limit is not None and limit > 0:
sql += f" LIMIT {int(limit)}"
try:
with connection.cursor() as cursor:
cursor.execute(sql, params)
columns = [col[0] for col in cursor.description]
results = []
for row in cursor.fetchall():
result = dict(zip(columns, row))
results.append(result)
return results
except Exception as e:
logger.error(f"搜索查询失败: {e}, SQL: {sql}, params: {params}")
raise
def count(self, query: str, asset_type: AssetType = 'website', statement_timeout_ms: int = 300000) -> int:
"""
统计搜索结果数量
Args:
query: 搜索查询字符串
asset_type: 资产类型 ('website''endpoint')
statement_timeout_ms: SQL 语句超时时间(毫秒),默认 5 分钟
Returns:
int: 结果总数
"""
where_clause, params = SearchQueryParser.parse(query)
# 根据资产类型选择视图和原表
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
table_name = TABLE_MAPPING.get(asset_type, 'website')
# JOIN 原表以支持数组字段查询
sql = f"SELECT COUNT(*) FROM {view_name} v JOIN {table_name} t ON v.id = t.id WHERE {where_clause}"
try:
with connection.cursor() as cursor:
# 为导出设置更长的超时时间(仅影响当前会话)
cursor.execute(f"SET LOCAL statement_timeout = {statement_timeout_ms}")
cursor.execute(sql, params)
return cursor.fetchone()[0]
except Exception as e:
logger.error(f"统计查询失败: {e}")
raise
def search_iter(
self,
query: str,
asset_type: AssetType = 'website',
batch_size: int = 1000,
statement_timeout_ms: int = 300000
) -> Iterator[Dict[str, Any]]:
"""
流式搜索资产(使用分批查询,内存友好)
Args:
query: 搜索查询字符串
asset_type: 资产类型 ('website''endpoint')
batch_size: 每批获取的数量
statement_timeout_ms: SQL 语句超时时间(毫秒),默认 5 分钟
Yields:
Dict: 单条搜索结果
"""
where_clause, params = SearchQueryParser.parse(query)
# 根据资产类型选择视图、原表和字段
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
table_name = TABLE_MAPPING.get(asset_type, 'website')
select_fields = ENDPOINT_SELECT_FIELDS if asset_type == 'endpoint' else WEBSITE_SELECT_FIELDS
# 使用 OFFSET/LIMIT 分批查询Django 不支持命名游标)
offset = 0
try:
while True:
# JOIN 原表获取数组字段
sql = f"""
SELECT {select_fields}
FROM {view_name} v
JOIN {table_name} t ON v.id = t.id
WHERE {where_clause}
ORDER BY v.created_at DESC
LIMIT {batch_size} OFFSET {offset}
"""
with connection.cursor() as cursor:
# 为导出设置更长的超时时间(仅影响当前会话)
cursor.execute(f"SET LOCAL statement_timeout = {statement_timeout_ms}")
cursor.execute(sql, params)
columns = [col[0] for col in cursor.description]
rows = cursor.fetchall()
if not rows:
break
for row in rows:
yield dict(zip(columns, row))
# 如果返回的行数少于 batch_size说明已经是最后一批
if len(rows) < batch_size:
break
offset += batch_size
except Exception as e:
logger.error(f"流式搜索查询失败: {e}, SQL: {sql}, params: {params}")
raise

View File

@@ -50,7 +50,7 @@ class DirectorySnapshotsService:
# 步骤 2: 转换为资产 DTO 并保存到资产表upsert
# - 新记录:插入资产表
# - 已存在的记录:更新字段(discovered_at 不更新,保留首次发现时间)
# - 已存在的记录:更新字段(created_at 不更新,保留创建时间)
logger.debug("步骤 2: 同步到资产表(通过 Service 层upsert")
asset_items = [item.to_asset_dto() for item in items]
@@ -67,15 +67,44 @@ class DirectorySnapshotsService:
)
raise
def get_by_scan(self, scan_id: int):
return self.snapshot_repo.get_by_scan(scan_id)
# 智能过滤字段映射
FILTER_FIELD_MAPPING = {
'url': 'url',
'status': 'status',
'content_type': 'content_type',
}
def get_by_scan(self, scan_id: int, filter_query: str = None):
from apps.common.utils.filter_utils import apply_filters
queryset = self.snapshot_repo.get_by_scan(scan_id)
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def get_all(self):
def get_all(self, filter_query: str = None):
"""获取所有目录快照"""
return self.snapshot_repo.get_all()
from apps.common.utils.filter_utils import apply_filters
queryset = self.snapshot_repo.get_all()
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def iter_directory_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
"""流式获取某次扫描下的所有目录 URL。"""
queryset = self.snapshot_repo.get_by_scan(scan_id)
for snapshot in queryset.iterator(chunk_size=chunk_size):
yield snapshot.url
def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
scan_id: 扫描 ID
Yields:
原始数据字典
"""
return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)

View File

@@ -67,15 +67,47 @@ class EndpointSnapshotsService:
)
raise
def get_by_scan(self, scan_id: int):
return self.snapshot_repo.get_by_scan(scan_id)
# 智能过滤字段映射
FILTER_FIELD_MAPPING = {
'url': 'url',
'host': 'host',
'title': 'title',
'status_code': 'status_code',
'webserver': 'webserver',
'tech': 'tech',
}
def get_by_scan(self, scan_id: int, filter_query: str = None):
from apps.common.utils.filter_utils import apply_filters
queryset = self.snapshot_repo.get_by_scan(scan_id)
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def get_all(self):
def get_all(self, filter_query: str = None):
"""获取所有端点快照"""
return self.snapshot_repo.get_all()
from apps.common.utils.filter_utils import apply_filters
queryset = self.snapshot_repo.get_all()
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def iter_endpoint_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
"""流式获取某次扫描下的所有端点 URL。"""
queryset = self.snapshot_repo.get_by_scan(scan_id)
for snapshot in queryset.iterator(chunk_size=chunk_size):
yield snapshot.url
def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
scan_id: 扫描 ID
Yields:
原始数据字典
"""
return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)

View File

@@ -69,13 +69,25 @@ class HostPortMappingSnapshotsService:
)
raise
def get_ip_aggregation_by_scan(self, scan_id: int, search: str = None):
return self.snapshot_repo.get_ip_aggregation_by_scan(scan_id, search=search)
def get_ip_aggregation_by_scan(self, scan_id: int, filter_query: str = None):
return self.snapshot_repo.get_ip_aggregation_by_scan(scan_id, filter_query=filter_query)
def get_all_ip_aggregation(self, search: str = None):
def get_all_ip_aggregation(self, filter_query: str = None):
"""获取所有 IP 聚合数据"""
return self.snapshot_repo.get_all_ip_aggregation(search=search)
return self.snapshot_repo.get_all_ip_aggregation(filter_query=filter_query)
def iter_ips_by_scan(self, scan_id: int, batch_size: int = 1000) -> Iterator[str]:
"""流式获取某次扫描下的所有唯一 IP 地址。"""
return self.snapshot_repo.get_ips_for_export(scan_id=scan_id, batch_size=batch_size)
def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
scan_id: 扫描 ID
Yields:
原始数据字典 {ip, host, port, created_at}
"""
return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)

View File

@@ -66,14 +66,41 @@ class SubdomainSnapshotsService:
)
raise
def get_by_scan(self, scan_id: int):
return self.subdomain_snapshot_repo.get_by_scan(scan_id)
# 智能过滤字段映射
FILTER_FIELD_MAPPING = {
'name': 'name',
}
def get_by_scan(self, scan_id: int, filter_query: str = None):
from apps.common.utils.filter_utils import apply_filters
queryset = self.subdomain_snapshot_repo.get_by_scan(scan_id)
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def get_all(self):
def get_all(self, filter_query: str = None):
"""获取所有子域名快照"""
return self.subdomain_snapshot_repo.get_all()
from apps.common.utils.filter_utils import apply_filters
queryset = self.subdomain_snapshot_repo.get_all()
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def iter_subdomain_names_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
queryset = self.subdomain_snapshot_repo.get_by_scan(scan_id)
for snapshot in queryset.iterator(chunk_size=chunk_size):
yield snapshot.name
yield snapshot.name
def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
scan_id: 扫描 ID
Yields:
原始数据字典 {name, created_at}
"""
return self.subdomain_snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)

View File

@@ -66,13 +66,31 @@ class VulnerabilitySnapshotsService:
)
raise
def get_by_scan(self, scan_id: int):
"""按扫描任务获取所有漏洞快照。"""
return self.snapshot_repo.get_by_scan(scan_id)
# 智能过滤字段映射
FILTER_FIELD_MAPPING = {
'type': 'vuln_type',
'url': 'url',
'severity': 'severity',
'source': 'source',
}
def get_all(self):
def get_by_scan(self, scan_id: int, filter_query: str = None):
"""按扫描任务获取所有漏洞快照。"""
from apps.common.utils.filter_utils import apply_filters
queryset = self.snapshot_repo.get_by_scan(scan_id)
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def get_all(self, filter_query: str = None):
"""获取所有漏洞快照"""
return self.snapshot_repo.get_all()
from apps.common.utils.filter_utils import apply_filters
queryset = self.snapshot_repo.get_all()
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def iter_vuln_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
"""流式获取某次扫描下的所有漏洞 URL。"""

View File

@@ -51,7 +51,7 @@ class WebsiteSnapshotsService:
# 步骤 2: 转换为资产 DTO 并保存到资产表upsert
# - 新记录:插入资产表
# - 已存在的记录:更新字段(discovered_at 不更新,保留首次发现时间)
# - 已存在的记录:更新字段(created_at 不更新,保留创建时间)
logger.debug("步骤 2: 同步到资产表(通过 Service 层upsert")
asset_items = [item.to_asset_dto() for item in items]
@@ -68,15 +68,47 @@ class WebsiteSnapshotsService:
)
raise
def get_by_scan(self, scan_id: int):
return self.snapshot_repo.get_by_scan(scan_id)
# 智能过滤字段映射
FILTER_FIELD_MAPPING = {
'url': 'url',
'host': 'host',
'title': 'title',
'status_code': 'status_code',
'webserver': 'webserver',
'tech': 'tech',
}
def get_by_scan(self, scan_id: int, filter_query: str = None):
from apps.common.utils.filter_utils import apply_filters
queryset = self.snapshot_repo.get_by_scan(scan_id)
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def get_all(self):
def get_all(self, filter_query: str = None):
"""获取所有网站快照"""
return self.snapshot_repo.get_all()
from apps.common.utils.filter_utils import apply_filters
queryset = self.snapshot_repo.get_all()
if filter_query:
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
return queryset
def iter_website_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
"""流式获取某次扫描下的所有站点 URL发现时间倒序)。"""
"""流式获取某次扫描下的所有站点 URL创建时间倒序)。"""
queryset = self.snapshot_repo.get_by_scan(scan_id)
for snapshot in queryset.iterator(chunk_size=chunk_size):
yield snapshot.url
def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
"""
流式获取原始数据用于 CSV 导出
Args:
scan_id: 扫描 ID
Yields:
原始数据字典
"""
return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)

View File

@@ -10,19 +10,26 @@ from .views import (
DirectoryViewSet,
VulnerabilityViewSet,
AssetStatisticsViewSet,
AssetSearchView,
AssetSearchExportView,
EndpointViewSet,
HostPortMappingViewSet,
)
# 创建 DRF 路由器
router = DefaultRouter()
# 注册 ViewSet
# 注意IPAddress 模型已被重构为 HostPortMapping相关路由已移除
router.register(r'subdomains', SubdomainViewSet, basename='subdomain')
router.register(r'websites', WebSiteViewSet, basename='website')
router.register(r'directories', DirectoryViewSet, basename='directory')
router.register(r'endpoints', EndpointViewSet, basename='endpoint')
router.register(r'ip-addresses', HostPortMappingViewSet, basename='ip-address')
router.register(r'vulnerabilities', VulnerabilityViewSet, basename='vulnerability')
router.register(r'statistics', AssetStatisticsViewSet, basename='asset-statistics')
urlpatterns = [
path('assets/', include(router.urls)),
path('assets/search/', AssetSearchView.as_view(), name='asset-search'),
path('assets/search/export/', AssetSearchExportView.as_view(), name='asset-search-export'),
]

View File

@@ -1,562 +0,0 @@
import logging
from rest_framework import viewsets, status, filters
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.request import Request
from rest_framework.exceptions import NotFound, ValidationError as DRFValidationError
from django.core.exceptions import ValidationError, ObjectDoesNotExist
from django.db import DatabaseError, IntegrityError, OperationalError
from django.http import StreamingHttpResponse
from .serializers import (
SubdomainListSerializer, WebSiteSerializer, DirectorySerializer,
VulnerabilitySerializer, EndpointListSerializer, IPAddressAggregatedSerializer,
SubdomainSnapshotSerializer, WebsiteSnapshotSerializer, DirectorySnapshotSerializer,
EndpointSnapshotSerializer, VulnerabilitySnapshotSerializer
)
from .services import (
SubdomainService, WebSiteService, DirectoryService,
VulnerabilityService, AssetStatisticsService, EndpointService, HostPortMappingService
)
from .services.snapshot import (
SubdomainSnapshotsService, WebsiteSnapshotsService, DirectorySnapshotsService,
EndpointSnapshotsService, HostPortMappingSnapshotsService, VulnerabilitySnapshotsService
)
from apps.common.pagination import BasePagination
logger = logging.getLogger(__name__)
class AssetStatisticsViewSet(viewsets.ViewSet):
"""
资产统计 API
提供仪表盘所需的统计数据(预聚合,读取缓存表)
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = AssetStatisticsService()
def list(self, request):
"""
获取资产统计数据
GET /assets/statistics/
返回:
- totalTargets: 目标总数
- totalSubdomains: 子域名总数
- totalIps: IP 总数
- totalEndpoints: 端点总数
- totalWebsites: 网站总数
- totalVulns: 漏洞总数
- totalAssets: 总资产数
- runningScans: 运行中的扫描数
- updatedAt: 统计更新时间
"""
try:
stats = self.service.get_statistics()
return Response({
'totalTargets': stats['total_targets'],
'totalSubdomains': stats['total_subdomains'],
'totalIps': stats['total_ips'],
'totalEndpoints': stats['total_endpoints'],
'totalWebsites': stats['total_websites'],
'totalVulns': stats['total_vulns'],
'totalAssets': stats['total_assets'],
'runningScans': stats['running_scans'],
'updatedAt': stats['updated_at'],
# 变化值
'changeTargets': stats['change_targets'],
'changeSubdomains': stats['change_subdomains'],
'changeIps': stats['change_ips'],
'changeEndpoints': stats['change_endpoints'],
'changeWebsites': stats['change_websites'],
'changeVulns': stats['change_vulns'],
'changeAssets': stats['change_assets'],
# 漏洞严重程度分布
'vulnBySeverity': stats['vuln_by_severity'],
})
except (DatabaseError, OperationalError) as e:
logger.exception("获取资产统计数据失败")
return Response(
{'error': '获取统计数据失败'},
status=status.HTTP_500_INTERNAL_SERVER_ERROR
)
@action(detail=False, methods=['get'], url_path='history')
def history(self, request: Request):
"""
获取统计历史数据(用于折线图)
GET /assets/statistics/history/?days=7
Query Parameters:
days: 获取最近多少天的数据,默认 7最大 90
Returns:
历史数据列表
"""
try:
days_param = request.query_params.get('days', '7')
try:
days = int(days_param)
except (ValueError, TypeError):
days = 7
days = min(max(days, 1), 90) # 限制在 1-90 天
history = self.service.get_statistics_history(days=days)
return Response(history)
except (DatabaseError, OperationalError) as e:
logger.exception("获取统计历史数据失败")
return Response(
{'error': '获取历史数据失败'},
status=status.HTTP_500_INTERNAL_SERVER_ERROR
)
# 注意IPAddress 模型已被重构为 HostPortMapping
# IPAddressViewSet 已删除,需要根据新架构重新实现
class SubdomainViewSet(viewsets.ModelViewSet):
"""子域名管理 ViewSet
支持两种访问方式:
1. 嵌套路由GET /api/targets/{target_pk}/subdomains/
2. 独立路由GET /api/subdomains/(全局查询)
"""
serializer_class = SubdomainListSerializer
pagination_class = BasePagination
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
search_fields = ['name']
ordering = ['-discovered_at']
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = SubdomainService()
def get_queryset(self):
"""根据是否有 target_pk 参数决定查询范围"""
target_pk = self.kwargs.get('target_pk')
if target_pk:
return self.service.get_subdomains_by_target(target_pk)
return self.service.get_all()
@action(detail=False, methods=['get'], url_path='export')
def export(self, request):
"""导出子域名(纯文本,一行一个)"""
target_pk = self.kwargs.get('target_pk')
if not target_pk:
raise DRFValidationError('必须在目标下导出')
def line_iterator():
for name in self.service.iter_subdomain_names_by_target(target_pk):
yield f"{name}\n"
response = StreamingHttpResponse(
line_iterator(),
content_type='text/plain; charset=utf-8',
)
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-subdomains.txt"'
return response
class WebSiteViewSet(viewsets.ModelViewSet):
"""站点管理 ViewSet
支持两种访问方式:
1. 嵌套路由GET /api/targets/{target_pk}/websites/
2. 独立路由GET /api/websites/(全局查询)
"""
serializer_class = WebSiteSerializer
pagination_class = BasePagination
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
search_fields = ['host']
ordering = ['-discovered_at']
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = WebSiteService()
def get_queryset(self):
"""根据是否有 target_pk 参数决定查询范围"""
target_pk = self.kwargs.get('target_pk')
if target_pk:
return self.service.get_websites_by_target(target_pk)
return self.service.get_all()
@action(detail=False, methods=['get'], url_path='export')
def export(self, request):
"""导出站点 URL纯文本一行一个"""
target_pk = self.kwargs.get('target_pk')
if not target_pk:
raise DRFValidationError('必须在目标下导出')
def line_iterator():
for url in self.service.iter_website_urls_by_target(target_pk):
yield f"{url}\n"
response = StreamingHttpResponse(
line_iterator(),
content_type='text/plain; charset=utf-8',
)
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-websites.txt"'
return response
class DirectoryViewSet(viewsets.ModelViewSet):
"""目录管理 ViewSet
支持两种访问方式:
1. 嵌套路由GET /api/targets/{target_pk}/directories/
2. 独立路由GET /api/directories/(全局查询)
"""
serializer_class = DirectorySerializer
pagination_class = BasePagination
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
search_fields = ['url']
ordering = ['-discovered_at']
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = DirectoryService()
def get_queryset(self):
"""根据是否有 target_pk 参数决定查询范围"""
target_pk = self.kwargs.get('target_pk')
if target_pk:
return self.service.get_directories_by_target(target_pk)
return self.service.get_all()
@action(detail=False, methods=['get'], url_path='export')
def export(self, request):
"""导出目录 URL纯文本一行一个"""
target_pk = self.kwargs.get('target_pk')
if not target_pk:
raise DRFValidationError('必须在目标下导出')
def line_iterator():
for url in self.service.iter_directory_urls_by_target(target_pk):
yield f"{url}\n"
response = StreamingHttpResponse(
line_iterator(),
content_type='text/plain; charset=utf-8',
)
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-directories.txt"'
return response
class EndpointViewSet(viewsets.ModelViewSet):
"""端点管理 ViewSet
支持两种访问方式:
1. 嵌套路由GET /api/targets/{target_pk}/endpoints/
2. 独立路由GET /api/endpoints/(全局查询)
"""
serializer_class = EndpointListSerializer
pagination_class = BasePagination
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
search_fields = ['host']
ordering = ['-discovered_at']
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = EndpointService()
def get_queryset(self):
"""根据是否有 target_pk 参数决定查询范围"""
target_pk = self.kwargs.get('target_pk')
if target_pk:
return self.service.get_endpoints_by_target(target_pk)
return self.service.get_all()
@action(detail=False, methods=['get'], url_path='export')
def export(self, request):
"""导出端点 URL纯文本一行一个"""
target_pk = self.kwargs.get('target_pk')
if not target_pk:
raise DRFValidationError('必须在目标下导出')
def line_iterator():
for url in self.service.iter_endpoint_urls_by_target(target_pk):
yield f"{url}\n"
response = StreamingHttpResponse(
line_iterator(),
content_type='text/plain; charset=utf-8',
)
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-endpoints.txt"'
return response
class HostPortMappingViewSet(viewsets.ModelViewSet):
"""主机端口映射管理 ViewSetIP 地址聚合视图)
支持两种访问方式:
1. 嵌套路由GET /api/targets/{target_pk}/ip-addresses/
2. 独立路由GET /api/ip-addresses/(全局查询)
返回按 IP 聚合的数据,每个 IP 显示其关联的所有 hosts 和 ports
注意:由于返回的是聚合数据(字典列表),不支持 DRF SearchFilter
"""
serializer_class = IPAddressAggregatedSerializer
pagination_class = BasePagination
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = HostPortMappingService()
def get_queryset(self):
"""根据是否有 target_pk 参数决定查询范围,返回按 IP 聚合的数据"""
target_pk = self.kwargs.get('target_pk')
search = self.request.query_params.get('search', None)
if target_pk:
return self.service.get_ip_aggregation_by_target(target_pk, search=search)
return self.service.get_all_ip_aggregation(search=search)
@action(detail=False, methods=['get'], url_path='export')
def export(self, request):
"""导出 IP 地址(纯文本,一行一个)"""
target_pk = self.kwargs.get('target_pk')
if not target_pk:
raise DRFValidationError('必须在目标下导出')
def line_iterator():
for ip in self.service.iter_ips_by_target(target_pk):
yield f"{ip}\n"
response = StreamingHttpResponse(
line_iterator(),
content_type='text/plain; charset=utf-8',
)
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-ip-addresses.txt"'
return response
class VulnerabilityViewSet(viewsets.ModelViewSet):
"""漏洞资产管理 ViewSet只读
支持两种访问方式:
1. 嵌套路由GET /api/targets/{target_pk}/vulnerabilities/
2. 独立路由GET /api/vulnerabilities/(全局查询)
"""
serializer_class = VulnerabilitySerializer
pagination_class = BasePagination
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
search_fields = ['vuln_type']
ordering = ['-discovered_at']
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = VulnerabilityService()
def get_queryset(self):
"""根据是否有 target_pk 参数决定查询范围"""
target_pk = self.kwargs.get('target_pk')
if target_pk:
return self.service.get_vulnerabilities_by_target(target_pk)
return self.service.get_all()
# ==================== 快照 ViewSetScan 嵌套路由) ====================
class SubdomainSnapshotViewSet(viewsets.ModelViewSet):
"""子域名快照 ViewSet - 嵌套路由GET /api/scans/{scan_pk}/subdomains/"""
serializer_class = SubdomainSnapshotSerializer
pagination_class = BasePagination
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
search_fields = ['name']
ordering_fields = ['name', 'discovered_at']
ordering = ['-discovered_at']
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = SubdomainSnapshotsService()
def get_queryset(self):
scan_pk = self.kwargs.get('scan_pk')
if scan_pk:
return self.service.get_by_scan(scan_pk)
return self.service.get_all()
@action(detail=False, methods=['get'], url_path='export')
def export(self, request):
scan_pk = self.kwargs.get('scan_pk')
if not scan_pk:
raise DRFValidationError('必须在扫描下导出')
def line_iterator():
for name in self.service.iter_subdomain_names_by_scan(scan_pk):
yield f"{name}\n"
response = StreamingHttpResponse(line_iterator(), content_type='text/plain; charset=utf-8')
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-subdomains.txt"'
return response
class WebsiteSnapshotViewSet(viewsets.ModelViewSet):
"""网站快照 ViewSet - 嵌套路由GET /api/scans/{scan_pk}/websites/"""
serializer_class = WebsiteSnapshotSerializer
pagination_class = BasePagination
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
search_fields = ['host']
ordering = ['-discovered_at']
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = WebsiteSnapshotsService()
def get_queryset(self):
scan_pk = self.kwargs.get('scan_pk')
if scan_pk:
return self.service.get_by_scan(scan_pk)
return self.service.get_all()
@action(detail=False, methods=['get'], url_path='export')
def export(self, request):
scan_pk = self.kwargs.get('scan_pk')
if not scan_pk:
raise DRFValidationError('必须在扫描下导出')
def line_iterator():
for url in self.service.iter_website_urls_by_scan(scan_pk):
yield f"{url}\n"
response = StreamingHttpResponse(line_iterator(), content_type='text/plain; charset=utf-8')
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-websites.txt"'
return response
class DirectorySnapshotViewSet(viewsets.ModelViewSet):
"""目录快照 ViewSet - 嵌套路由GET /api/scans/{scan_pk}/directories/"""
serializer_class = DirectorySnapshotSerializer
pagination_class = BasePagination
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
search_fields = ['url']
ordering = ['-discovered_at']
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = DirectorySnapshotsService()
def get_queryset(self):
scan_pk = self.kwargs.get('scan_pk')
if scan_pk:
return self.service.get_by_scan(scan_pk)
return self.service.get_all()
@action(detail=False, methods=['get'], url_path='export')
def export(self, request):
scan_pk = self.kwargs.get('scan_pk')
if not scan_pk:
raise DRFValidationError('必须在扫描下导出')
def line_iterator():
for url in self.service.iter_directory_urls_by_scan(scan_pk):
yield f"{url}\n"
response = StreamingHttpResponse(line_iterator(), content_type='text/plain; charset=utf-8')
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-directories.txt"'
return response
class EndpointSnapshotViewSet(viewsets.ModelViewSet):
"""端点快照 ViewSet - 嵌套路由GET /api/scans/{scan_pk}/endpoints/"""
serializer_class = EndpointSnapshotSerializer
pagination_class = BasePagination
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
search_fields = ['host']
ordering = ['-discovered_at']
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = EndpointSnapshotsService()
def get_queryset(self):
scan_pk = self.kwargs.get('scan_pk')
if scan_pk:
return self.service.get_by_scan(scan_pk)
return self.service.get_all()
@action(detail=False, methods=['get'], url_path='export')
def export(self, request):
scan_pk = self.kwargs.get('scan_pk')
if not scan_pk:
raise DRFValidationError('必须在扫描下导出')
def line_iterator():
for url in self.service.iter_endpoint_urls_by_scan(scan_pk):
yield f"{url}\n"
response = StreamingHttpResponse(line_iterator(), content_type='text/plain; charset=utf-8')
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-endpoints.txt"'
return response
class HostPortMappingSnapshotViewSet(viewsets.ModelViewSet):
"""主机端口映射快照 ViewSet - 嵌套路由GET /api/scans/{scan_pk}/ip-addresses/
注意:由于返回的是聚合数据(字典列表),不支持 DRF SearchFilter
"""
serializer_class = IPAddressAggregatedSerializer
pagination_class = BasePagination
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = HostPortMappingSnapshotsService()
def get_queryset(self):
scan_pk = self.kwargs.get('scan_pk')
search = self.request.query_params.get('search', None)
if scan_pk:
return self.service.get_ip_aggregation_by_scan(scan_pk, search=search)
return self.service.get_all_ip_aggregation(search=search)
@action(detail=False, methods=['get'], url_path='export')
def export(self, request):
scan_pk = self.kwargs.get('scan_pk')
if not scan_pk:
raise DRFValidationError('必须在扫描下导出')
def line_iterator():
for ip in self.service.iter_ips_by_scan(scan_pk):
yield f"{ip}\n"
response = StreamingHttpResponse(line_iterator(), content_type='text/plain; charset=utf-8')
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-ip-addresses.txt"'
return response
class VulnerabilitySnapshotViewSet(viewsets.ModelViewSet):
"""漏洞快照 ViewSet - 嵌套路由GET /api/scans/{scan_pk}/vulnerabilities/"""
serializer_class = VulnerabilitySnapshotSerializer
pagination_class = BasePagination
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
search_fields = ['vuln_type']
ordering = ['-discovered_at']
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = VulnerabilitySnapshotsService()
def get_queryset(self):
scan_pk = self.kwargs.get('scan_pk')
if scan_pk:
return self.service.get_by_scan(scan_pk)
return self.service.get_all()

View File

@@ -0,0 +1,40 @@
"""
Asset 应用视图模块
重新导出所有视图类以保持向后兼容
"""
from .asset_views import (
AssetStatisticsViewSet,
SubdomainViewSet,
WebSiteViewSet,
DirectoryViewSet,
EndpointViewSet,
HostPortMappingViewSet,
VulnerabilityViewSet,
SubdomainSnapshotViewSet,
WebsiteSnapshotViewSet,
DirectorySnapshotViewSet,
EndpointSnapshotViewSet,
HostPortMappingSnapshotViewSet,
VulnerabilitySnapshotViewSet,
)
from .search_views import AssetSearchView, AssetSearchExportView
__all__ = [
'AssetStatisticsViewSet',
'SubdomainViewSet',
'WebSiteViewSet',
'DirectoryViewSet',
'EndpointViewSet',
'HostPortMappingViewSet',
'VulnerabilityViewSet',
'SubdomainSnapshotViewSet',
'WebsiteSnapshotViewSet',
'DirectorySnapshotViewSet',
'EndpointSnapshotViewSet',
'HostPortMappingSnapshotViewSet',
'VulnerabilitySnapshotViewSet',
'AssetSearchView',
'AssetSearchExportView',
]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,361 @@
"""
资产搜索 API 视图
提供资产搜索的 REST API 接口:
- GET /api/assets/search/ - 搜索资产
- GET /api/assets/search/export/ - 导出搜索结果为 CSV
搜索语法:
- field="value" 模糊匹配ILIKE %value%
- field=="value" 精确匹配
- field!="value" 不等于
- && AND 连接
- || OR 连接
支持的字段:
- host: 主机名
- url: URL
- title: 标题
- tech: 技术栈
- status: 状态码
- body: 响应体
- header: 响应头
支持的资产类型:
- website: 站点(默认)
- endpoint: 端点
"""
import logging
import json
from datetime import datetime
from urllib.parse import urlparse, urlunparse
from rest_framework import status
from rest_framework.views import APIView
from rest_framework.request import Request
from django.db import connection
from apps.common.response_helpers import success_response, error_response
from apps.common.error_codes import ErrorCodes
from apps.asset.services.search_service import AssetSearchService, VALID_ASSET_TYPES
logger = logging.getLogger(__name__)
class AssetSearchView(APIView):
"""
资产搜索 API
GET /api/assets/search/
Query Parameters:
q: 搜索查询表达式
asset_type: 资产类型 ('website''endpoint',默认 'website')
page: 页码(从 1 开始,默认 1
pageSize: 每页数量(默认 10最大 100
示例查询:
?q=host="api" && tech="nginx"
?q=tech="vue" || tech="react"&asset_type=endpoint
?q=status=="200" && host!="test"
Response:
{
"results": [...],
"total": 100,
"page": 1,
"pageSize": 10,
"totalPages": 10,
"assetType": "website"
}
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = AssetSearchService()
def _parse_headers(self, headers_data) -> dict:
"""解析响应头为字典"""
if not headers_data:
return {}
try:
return json.loads(headers_data)
except (json.JSONDecodeError, TypeError):
result = {}
for line in str(headers_data).split('\n'):
if ':' in line:
key, value = line.split(':', 1)
result[key.strip()] = value.strip()
return result
def _format_result(self, result: dict, vulnerabilities_by_url: dict, asset_type: str) -> dict:
"""格式化单个搜索结果"""
url = result.get('url', '')
vulns = vulnerabilities_by_url.get(url, [])
# 基础字段Website 和 Endpoint 共有)
formatted = {
'id': result.get('id'),
'url': url,
'host': result.get('host', ''),
'title': result.get('title', ''),
'technologies': result.get('tech', []) or [],
'statusCode': result.get('status_code'),
'contentLength': result.get('content_length'),
'contentType': result.get('content_type', ''),
'webserver': result.get('webserver', ''),
'location': result.get('location', ''),
'vhost': result.get('vhost'),
'responseHeaders': self._parse_headers(result.get('response_headers')),
'responseBody': result.get('response_body', ''),
'createdAt': result.get('created_at').isoformat() if result.get('created_at') else None,
'targetId': result.get('target_id'),
}
# Website 特有字段:漏洞关联
if asset_type == 'website':
formatted['vulnerabilities'] = [
{
'id': v.get('id'),
'name': v.get('vuln_type', ''),
'vulnType': v.get('vuln_type', ''),
'severity': v.get('severity', 'info'),
}
for v in vulns
]
# Endpoint 特有字段
if asset_type == 'endpoint':
formatted['matchedGfPatterns'] = result.get('matched_gf_patterns', []) or []
return formatted
def _get_vulnerabilities_by_url_prefix(self, website_urls: list) -> dict:
"""
根据 URL 前缀批量查询漏洞数据
漏洞 URL 是 website URL 的子路径,使用前缀匹配:
- website.url: https://example.com/path?query=1
- vulnerability.url: https://example.com/path/api/users
Args:
website_urls: website URL 列表,格式为 [(url, target_id), ...]
Returns:
dict: {website_url: [vulnerability_list]}
"""
if not website_urls:
return {}
try:
with connection.cursor() as cursor:
# 构建 OR 条件:每个 website URL去掉查询参数作为前缀匹配
conditions = []
params = []
url_mapping = {} # base_url -> original_url
for url, target_id in website_urls:
if not url or target_id is None:
continue
# 使用 urlparse 去掉查询参数和片段,只保留 scheme://netloc/path
parsed = urlparse(url)
base_url = urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
url_mapping[base_url] = url
conditions.append("(v.url LIKE %s AND v.target_id = %s)")
params.extend([base_url + '%', target_id])
if not conditions:
return {}
where_clause = " OR ".join(conditions)
sql = f"""
SELECT v.id, v.vuln_type, v.severity, v.url, v.target_id
FROM vulnerability v
WHERE {where_clause}
ORDER BY
CASE v.severity
WHEN 'critical' THEN 1
WHEN 'high' THEN 2
WHEN 'medium' THEN 3
WHEN 'low' THEN 4
ELSE 5
END
"""
cursor.execute(sql, params)
# 获取所有漏洞
all_vulns = []
for row in cursor.fetchall():
all_vulns.append({
'id': row[0],
'vuln_type': row[1],
'name': row[1],
'severity': row[2],
'url': row[3],
'target_id': row[4],
})
# 按原始 website URL 分组(用于返回结果)
result = {url: [] for url, _ in website_urls}
for vuln in all_vulns:
vuln_url = vuln['url']
# 找到匹配的 website URL最长前缀匹配
for website_url, target_id in website_urls:
parsed = urlparse(website_url)
base_url = urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
if vuln_url.startswith(base_url) and vuln['target_id'] == target_id:
result[website_url].append(vuln)
break
return result
except Exception as e:
logger.error(f"批量查询漏洞失败: {e}")
return {}
def get(self, request: Request):
"""搜索资产"""
# 获取搜索查询
query = request.query_params.get('q', '').strip()
if not query:
return error_response(
code=ErrorCodes.VALIDATION_ERROR,
message='Search query (q) is required',
status_code=status.HTTP_400_BAD_REQUEST
)
# 获取并验证资产类型
asset_type = request.query_params.get('asset_type', 'website').strip().lower()
if asset_type not in VALID_ASSET_TYPES:
return error_response(
code=ErrorCodes.VALIDATION_ERROR,
message=f'Invalid asset_type. Must be one of: {", ".join(VALID_ASSET_TYPES)}',
status_code=status.HTTP_400_BAD_REQUEST
)
# 获取分页参数
try:
page = int(request.query_params.get('page', 1))
page_size = int(request.query_params.get('pageSize', 10))
except (ValueError, TypeError):
page = 1
page_size = 10
# 限制分页参数
page = max(1, page)
page_size = min(max(1, page_size), 100)
# 获取总数和搜索结果
total = self.service.count(query, asset_type)
total_pages = (total + page_size - 1) // page_size if total > 0 else 1
offset = (page - 1) * page_size
all_results = self.service.search(query, asset_type)
results = all_results[offset:offset + page_size]
# 批量查询漏洞数据(仅 Website 类型需要)
vulnerabilities_by_url = {}
if asset_type == 'website':
website_urls = [(r.get('url'), r.get('target_id')) for r in results if r.get('url') and r.get('target_id')]
vulnerabilities_by_url = self._get_vulnerabilities_by_url_prefix(website_urls) if website_urls else {}
# 格式化结果
formatted_results = [self._format_result(r, vulnerabilities_by_url, asset_type) for r in results]
return success_response(data={
'results': formatted_results,
'total': total,
'page': page,
'pageSize': page_size,
'totalPages': total_pages,
'assetType': asset_type,
})
class AssetSearchExportView(APIView):
"""
资产搜索导出 API
GET /api/assets/search/export/
Query Parameters:
q: 搜索查询表达式
asset_type: 资产类型 ('website''endpoint',默认 'website')
Response:
CSV 文件(带 Content-Length支持浏览器显示下载进度
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = AssetSearchService()
def _get_headers_and_formatters(self, asset_type: str):
"""获取 CSV 表头和格式化器"""
from apps.common.utils import format_datetime, format_list_field
if asset_type == 'website':
headers = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length',
'webserver', 'location', 'tech', 'vhost', 'created_at']
else:
headers = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length',
'webserver', 'location', 'tech', 'matched_gf_patterns', 'vhost', 'created_at']
formatters = {
'created_at': format_datetime,
'tech': lambda x: format_list_field(x, separator='; '),
'matched_gf_patterns': lambda x: format_list_field(x, separator='; '),
'vhost': lambda x: 'true' if x else ('false' if x is False else ''),
}
return headers, formatters
def get(self, request: Request):
"""导出搜索结果为 CSV带 Content-Length支持下载进度显示"""
from apps.common.utils import create_csv_export_response
# 获取搜索查询
query = request.query_params.get('q', '').strip()
if not query:
return error_response(
code=ErrorCodes.VALIDATION_ERROR,
message='Search query (q) is required',
status_code=status.HTTP_400_BAD_REQUEST
)
# 获取并验证资产类型
asset_type = request.query_params.get('asset_type', 'website').strip().lower()
if asset_type not in VALID_ASSET_TYPES:
return error_response(
code=ErrorCodes.VALIDATION_ERROR,
message=f'Invalid asset_type. Must be one of: {", ".join(VALID_ASSET_TYPES)}',
status_code=status.HTTP_400_BAD_REQUEST
)
# 检查是否有结果(快速检查,避免空导出)
total = self.service.count(query, asset_type)
if total == 0:
return error_response(
code=ErrorCodes.NOT_FOUND,
message='No results to export',
status_code=status.HTTP_404_NOT_FOUND
)
# 获取表头和格式化器
headers, formatters = self._get_headers_and_formatters(asset_type)
# 生成文件名
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'search_{asset_type}_{timestamp}.csv'
# 使用通用导出工具
data_iterator = self.service.search_iter(query, asset_type)
return create_csv_export_response(
data_iterator=data_iterator,
headers=headers,
filename=filename,
field_formatters=formatters,
show_progress=True # 显示下载进度
)

View File

@@ -40,8 +40,14 @@ def fetch_config_and_setup_django():
print(f"[CONFIG] 正在从配置中心获取配置: {config_url}")
print(f"[CONFIG] IS_LOCAL={is_local}")
try:
# 构建请求头(包含 Worker API Key
headers = {}
worker_api_key = os.environ.get("WORKER_API_KEY", "")
if worker_api_key:
headers["X-Worker-API-Key"] = worker_api_key
# verify=False: 远程 Worker 通过 HTTPS 访问时可能使用自签名证书
resp = requests.get(config_url, timeout=10, verify=False)
resp = requests.get(config_url, headers=headers, timeout=10, verify=False)
resp.raise_for_status()
config = resp.json()
@@ -57,9 +63,6 @@ def fetch_config_and_setup_django():
os.environ.setdefault("DB_USER", db_user)
os.environ.setdefault("DB_PASSWORD", config['db']['password'])
# Redis 配置
os.environ.setdefault("REDIS_URL", config['redisUrl'])
# 日志配置
os.environ.setdefault("LOG_DIR", config['paths']['logs'])
os.environ.setdefault("LOG_LEVEL", config['logging']['level'])
@@ -71,7 +74,6 @@ def fetch_config_and_setup_django():
print(f"[CONFIG] DB_PORT: {db_port}")
print(f"[CONFIG] DB_NAME: {db_name}")
print(f"[CONFIG] DB_USER: {db_user}")
print(f"[CONFIG] REDIS_URL: {config['redisUrl']}")
except Exception as e:
print(f"[ERROR] 获取配置失败: {config_url} - {e}", file=sys.stderr)

View File

@@ -0,0 +1,31 @@
"""
标准化错误码定义
采用简化方案(参考 Stripe、GitHub 等大厂做法):
- 只定义 5-10 个通用错误码
- 未知错误使用通用错误码
- 错误码格式:大写字母和下划线组成
"""
class ErrorCodes:
"""标准化错误码
只定义通用错误码,其他错误使用通用消息。
这是 Stripe、GitHub 等大厂的标准做法。
错误码格式规范:
- 使用大写字母和下划线
- 简洁明了,易于理解
- 前端通过错误码映射到 i18n 键
"""
# 通用错误码8 个)
VALIDATION_ERROR = 'VALIDATION_ERROR' # 输入验证失败
NOT_FOUND = 'NOT_FOUND' # 资源未找到
PERMISSION_DENIED = 'PERMISSION_DENIED' # 权限不足
SERVER_ERROR = 'SERVER_ERROR' # 服务器内部错误
BAD_REQUEST = 'BAD_REQUEST' # 请求格式错误
CONFLICT = 'CONFLICT' # 资源冲突(如重复创建)
UNAUTHORIZED = 'UNAUTHORIZED' # 未认证
RATE_LIMITED = 'RATE_LIMITED' # 请求过于频繁

View File

@@ -0,0 +1,49 @@
"""
自定义异常处理器
统一处理 DRF 异常,确保错误响应格式一致
"""
from rest_framework.views import exception_handler
from rest_framework import status
from rest_framework.exceptions import AuthenticationFailed, NotAuthenticated
from apps.common.response_helpers import error_response
from apps.common.error_codes import ErrorCodes
def custom_exception_handler(exc, context):
"""
自定义异常处理器
处理认证相关异常,返回统一格式的错误响应
"""
# 先调用 DRF 默认的异常处理器
response = exception_handler(exc, context)
if response is not None:
# 处理 401 未认证错误
if response.status_code == status.HTTP_401_UNAUTHORIZED:
return error_response(
code=ErrorCodes.UNAUTHORIZED,
message='Authentication required',
status_code=status.HTTP_401_UNAUTHORIZED
)
# 处理 403 权限不足错误
if response.status_code == status.HTTP_403_FORBIDDEN:
return error_response(
code=ErrorCodes.PERMISSION_DENIED,
message='Permission denied',
status_code=status.HTTP_403_FORBIDDEN
)
# 处理 NotAuthenticated 和 AuthenticationFailed 异常
if isinstance(exc, (NotAuthenticated, AuthenticationFailed)):
return error_response(
code=ErrorCodes.UNAUTHORIZED,
message='Authentication required',
status_code=status.HTTP_401_UNAUTHORIZED
)
return response

View File

@@ -0,0 +1,34 @@
# Generated by Django 5.2.7 on 2026-01-06 00:55
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
('targets', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='BlacklistRule',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('pattern', models.CharField(help_text='规则模式,如 *.gov, 10.0.0.0/8, 192.168.1.1', max_length=255)),
('rule_type', models.CharField(choices=[('domain', '域名'), ('ip', 'IP地址'), ('cidr', 'CIDR范围'), ('keyword', '关键词')], help_text='规则类型domain, ip, cidr', max_length=20)),
('scope', models.CharField(choices=[('global', '全局规则'), ('target', 'Target规则')], db_index=True, help_text='作用域global 或 target', max_length=20)),
('description', models.CharField(blank=True, default='', help_text='规则描述', max_length=500)),
('created_at', models.DateTimeField(auto_now_add=True)),
('target', models.ForeignKey(blank=True, help_text='关联的 Target仅 scope=target 时有值)', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='blacklist_rules', to='targets.target')),
],
options={
'db_table': 'blacklist_rule',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['scope', 'rule_type'], name='blacklist_r_scope_6ff77f_idx'), models.Index(fields=['target', 'scope'], name='blacklist_r_target__191441_idx')],
'constraints': [models.UniqueConstraint(fields=('pattern', 'scope', 'target'), name='unique_blacklist_rule')],
},
),
]

View File

@@ -0,0 +1,4 @@
"""Common models"""
from apps.common.models.blacklist import BlacklistRule
__all__ = ['BlacklistRule']

View File

@@ -0,0 +1,71 @@
"""黑名单规则模型"""
from django.db import models
class BlacklistRule(models.Model):
"""黑名单规则模型
用于存储黑名单过滤规则支持域名、IP、CIDR 三种类型。
支持两层作用域:全局规则和 Target 级规则。
"""
class RuleType(models.TextChoices):
DOMAIN = 'domain', '域名'
IP = 'ip', 'IP地址'
CIDR = 'cidr', 'CIDR范围'
KEYWORD = 'keyword', '关键词'
class Scope(models.TextChoices):
GLOBAL = 'global', '全局规则'
TARGET = 'target', 'Target规则'
id = models.AutoField(primary_key=True)
pattern = models.CharField(
max_length=255,
help_text='规则模式,如 *.gov, 10.0.0.0/8, 192.168.1.1'
)
rule_type = models.CharField(
max_length=20,
choices=RuleType.choices,
help_text='规则类型domain, ip, cidr'
)
scope = models.CharField(
max_length=20,
choices=Scope.choices,
db_index=True,
help_text='作用域global 或 target'
)
target = models.ForeignKey(
'targets.Target',
on_delete=models.CASCADE,
null=True,
blank=True,
related_name='blacklist_rules',
help_text='关联的 Target仅 scope=target 时有值)'
)
description = models.CharField(
max_length=500,
blank=True,
default='',
help_text='规则描述'
)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = 'blacklist_rule'
indexes = [
models.Index(fields=['scope', 'rule_type']),
models.Index(fields=['target', 'scope']),
]
constraints = [
models.UniqueConstraint(
fields=['pattern', 'scope', 'target'],
name='unique_blacklist_rule'
),
]
ordering = ['-created_at']
def __str__(self):
if self.scope == self.Scope.TARGET and self.target:
return f"[{self.scope}:{self.target_id}] {self.pattern}"
return f"[{self.scope}] {self.pattern}"

View File

@@ -0,0 +1,80 @@
"""
集中式权限管理
实现三类端点的认证逻辑:
1. 公开端点(无需认证):登录、登出、获取当前用户状态
2. Worker 端点API Key 认证):注册、配置、心跳、回调、资源同步
3. 业务端点Session 认证):其他所有 API
"""
import re
import logging
from django.conf import settings
from rest_framework.permissions import BasePermission
logger = logging.getLogger(__name__)
# 公开端点白名单(无需任何认证)
PUBLIC_ENDPOINTS = [
r'^/api/auth/login/$',
r'^/api/auth/logout/$',
r'^/api/auth/me/$',
]
# Worker API 端点(需要 API Key 认证)
# 包括:注册、配置、心跳、回调、资源同步(字典下载)
WORKER_ENDPOINTS = [
r'^/api/workers/register/$',
r'^/api/workers/config/$',
r'^/api/workers/\d+/heartbeat/$',
r'^/api/callbacks/',
# 资源同步端点Worker 需要下载字典文件)
r'^/api/wordlists/download/$',
# 注意:指纹导出 API 使用 Session 认证(前端用户导出用)
# Worker 通过数据库直接获取指纹数据,不需要 HTTP API
]
class IsAuthenticatedOrPublic(BasePermission):
"""
自定义权限类:
- 白名单内的端点公开访问
- Worker 端点需要 API Key 认证
- 其他端点需要 Session 认证
"""
def has_permission(self, request, view):
path = request.path
# 检查是否在公开白名单内
for pattern in PUBLIC_ENDPOINTS:
if re.match(pattern, path):
return True
# 检查是否是 Worker 端点
for pattern in WORKER_ENDPOINTS:
if re.match(pattern, path):
return self._check_worker_api_key(request)
# 其他路径需要 Session 认证
return request.user and request.user.is_authenticated
def _check_worker_api_key(self, request):
"""验证 Worker API Key"""
api_key = request.headers.get('X-Worker-API-Key')
expected_key = getattr(settings, 'WORKER_API_KEY', None)
if not expected_key:
# 未配置 API Key 时,拒绝所有 Worker 请求
logger.warning("WORKER_API_KEY 未配置,拒绝 Worker 请求")
return False
if not api_key:
logger.warning(f"Worker 请求缺少 X-Worker-API-Key Header: {request.path}")
return False
if api_key != expected_key:
logger.warning(f"Worker API Key 无效: {request.path}")
return False
return True

View File

@@ -16,6 +16,7 @@ def setup_django_for_prefect():
1. 添加项目根目录到 Python 路径
2. 设置 DJANGO_SETTINGS_MODULE 环境变量
3. 调用 django.setup() 初始化 Django
4. 关闭旧的数据库连接,确保使用新连接
使用方式:
from apps.common.prefect_django_setup import setup_django_for_prefect
@@ -36,6 +37,25 @@ def setup_django_for_prefect():
# 初始化 Django
import django
django.setup()
# 关闭所有旧的数据库连接,确保 Worker 进程使用新连接
# 解决 "server closed the connection unexpectedly" 问题
from django.db import connections
connections.close_all()
def close_old_db_connections():
"""
关闭旧的数据库连接
在长时间运行的任务中调用此函数,可以确保使用有效的数据库连接。
适用于:
- Flow 开始前
- Task 开始前
- 长时间空闲后恢复操作前
"""
from django.db import connections
connections.close_all()
# 自动执行初始化(导入即生效)

View File

@@ -0,0 +1,88 @@
"""
标准化 API 响应辅助函数
遵循行业标准RFC 9457 Problem Details和大厂实践Google、Stripe、GitHub
- 成功响应只包含数据,不包含 message 字段
- 错误响应使用机器可读的错误码,前端映射到 i18n 消息
"""
from typing import Any, Dict, List, Optional, Union
from rest_framework import status
from rest_framework.response import Response
def success_response(
data: Optional[Union[Dict[str, Any], List[Any]]] = None,
status_code: int = status.HTTP_200_OK
) -> Response:
"""
标准化成功响应
直接返回数据,不做包装,符合 Stripe/GitHub 等大厂标准。
Args:
data: 响应数据dict 或 list
status_code: HTTP 状态码,默认 200
Returns:
Response: DRF Response 对象
Examples:
# 单个资源
>>> success_response(data={'id': 1, 'name': 'Test'})
{'id': 1, 'name': 'Test'}
# 操作结果
>>> success_response(data={'count': 3, 'scans': [...]})
{'count': 3, 'scans': [...]}
# 创建资源
>>> success_response(data={'id': 1}, status_code=201)
"""
# 注意:不能使用 data or {},因为空列表 [] 会被转换为 {}
if data is None:
data = {}
return Response(data, status=status_code)
def error_response(
code: str,
message: Optional[str] = None,
details: Optional[List[Dict[str, Any]]] = None,
status_code: int = status.HTTP_400_BAD_REQUEST
) -> Response:
"""
标准化错误响应
Args:
code: 错误码(如 'VALIDATION_ERROR', 'NOT_FOUND'
格式:大写字母和下划线组成
message: 开发者调试信息(非用户显示)
details: 详细错误信息(如字段级验证错误)
status_code: HTTP 状态码,默认 400
Returns:
Response: DRF Response 对象
Examples:
# 简单错误
>>> error_response(code='NOT_FOUND', status_code=404)
{'error': {'code': 'NOT_FOUND'}}
# 带调试信息
>>> error_response(
... code='VALIDATION_ERROR',
... message='Invalid input data',
... details=[{'field': 'name', 'message': 'Required'}]
... )
{'error': {'code': 'VALIDATION_ERROR', 'message': '...', 'details': [...]}}
"""
error_body: Dict[str, Any] = {'code': code}
if message:
error_body['message'] = message
if details:
error_body['details'] = details
return Response({'error': error_body}, status=status_code)

View File

@@ -0,0 +1,12 @@
"""Common serializers"""
from .blacklist_serializers import (
BlacklistRuleSerializer,
GlobalBlacklistRuleSerializer,
TargetBlacklistRuleSerializer,
)
__all__ = [
'BlacklistRuleSerializer',
'GlobalBlacklistRuleSerializer',
'TargetBlacklistRuleSerializer',
]

View File

@@ -0,0 +1,68 @@
"""黑名单规则序列化器"""
from rest_framework import serializers
from apps.common.models import BlacklistRule
from apps.common.utils import detect_rule_type
class BlacklistRuleSerializer(serializers.ModelSerializer):
"""黑名单规则序列化器"""
class Meta:
model = BlacklistRule
fields = [
'id',
'pattern',
'rule_type',
'scope',
'target',
'description',
'created_at',
]
read_only_fields = ['id', 'rule_type', 'created_at']
def validate_pattern(self, value):
"""验证规则模式"""
if not value or not value.strip():
raise serializers.ValidationError("规则模式不能为空")
return value.strip()
def create(self, validated_data):
"""创建规则时自动识别规则类型"""
pattern = validated_data.get('pattern', '')
validated_data['rule_type'] = detect_rule_type(pattern)
return super().create(validated_data)
def update(self, instance, validated_data):
"""更新规则时重新识别规则类型"""
if 'pattern' in validated_data:
pattern = validated_data['pattern']
validated_data['rule_type'] = detect_rule_type(pattern)
return super().update(instance, validated_data)
class GlobalBlacklistRuleSerializer(BlacklistRuleSerializer):
"""全局黑名单规则序列化器"""
class Meta(BlacklistRuleSerializer.Meta):
fields = ['id', 'pattern', 'rule_type', 'description', 'created_at']
read_only_fields = ['id', 'rule_type', 'created_at']
def create(self, validated_data):
"""创建全局规则"""
validated_data['scope'] = BlacklistRule.Scope.GLOBAL
validated_data['target'] = None
return super().create(validated_data)
class TargetBlacklistRuleSerializer(BlacklistRuleSerializer):
"""Target 黑名单规则序列化器"""
class Meta(BlacklistRuleSerializer.Meta):
fields = ['id', 'pattern', 'rule_type', 'description', 'created_at']
read_only_fields = ['id', 'rule_type', 'created_at']
def create(self, validated_data):
"""创建 Target 规则target_id 由 view 设置)"""
validated_data['scope'] = BlacklistRule.Scope.TARGET
return super().create(validated_data)

View File

@@ -3,8 +3,16 @@
提供系统级别的公共服务,包括:
- SystemLogService: 系统日志读取服务
- BlacklistService: 黑名单过滤服务
注意FilterService 已移至 apps.common.utils.filter_utils
推荐使用: from apps.common.utils.filter_utils import apply_filters
"""
from .system_log_service import SystemLogService
from .blacklist_service import BlacklistService
__all__ = ['SystemLogService']
__all__ = [
'SystemLogService',
'BlacklistService',
]

View File

@@ -0,0 +1,176 @@
"""
黑名单规则管理服务
负责黑名单规则的 CRUD 操作(数据库层面)。
过滤逻辑请使用 apps.common.utils.BlacklistFilter。
架构说明:
- Model: BlacklistRule (apps.common.models.blacklist)
- Service: BlacklistService (本文件) - 规则 CRUD
- Utils: BlacklistFilter (apps.common.utils.blacklist_filter) - 过滤逻辑
- View: GlobalBlacklistView, TargetViewSet.blacklist
"""
import logging
from typing import List, Dict, Any, Optional
from django.db.models import QuerySet
from apps.common.utils import detect_rule_type
logger = logging.getLogger(__name__)
def _normalize_patterns(patterns: List[str]) -> List[str]:
"""
规范化规则列表:去重 + 过滤空行
Args:
patterns: 原始规则列表
Returns:
List[str]: 去重后的规则列表(保持顺序)
"""
return list(dict.fromkeys(filter(None, (p.strip() for p in patterns))))
class BlacklistService:
"""
黑名单规则管理服务
只负责规则的 CRUD 操作,不包含过滤逻辑。
过滤逻辑请使用 BlacklistFilter 工具类。
"""
def get_global_rules(self) -> QuerySet:
"""
获取全局黑名单规则列表
Returns:
QuerySet: 全局规则查询集
"""
from apps.common.models import BlacklistRule
return BlacklistRule.objects.filter(scope=BlacklistRule.Scope.GLOBAL)
def get_target_rules(self, target_id: int) -> QuerySet:
"""
获取 Target 级黑名单规则列表
Args:
target_id: Target ID
Returns:
QuerySet: Target 级规则查询集
"""
from apps.common.models import BlacklistRule
return BlacklistRule.objects.filter(
scope=BlacklistRule.Scope.TARGET,
target_id=target_id
)
def get_rules(self, target_id: Optional[int] = None) -> List:
"""
获取黑名单规则(全局 + Target 级)
Args:
target_id: Target ID用于加载 Target 级规则
Returns:
List[BlacklistRule]: 规则列表
"""
from apps.common.models import BlacklistRule
# 加载全局规则
rules = list(BlacklistRule.objects.filter(scope=BlacklistRule.Scope.GLOBAL))
# 加载 Target 级规则
if target_id:
target_rules = BlacklistRule.objects.filter(
scope=BlacklistRule.Scope.TARGET,
target_id=target_id
)
rules.extend(target_rules)
return rules
def replace_global_rules(self, patterns: List[str]) -> Dict[str, Any]:
"""
全量替换全局黑名单规则PUT 语义)
Args:
patterns: 新的规则模式列表
Returns:
Dict: {'count': int} 最终规则数量
"""
from apps.common.models import BlacklistRule
count = self._replace_rules(
patterns=patterns,
scope=BlacklistRule.Scope.GLOBAL,
target=None
)
logger.info("全量替换全局黑名单规则: %d", count)
return {'count': count}
def replace_target_rules(self, target, patterns: List[str]) -> Dict[str, Any]:
"""
全量替换 Target 级黑名单规则PUT 语义)
Args:
target: Target 对象
patterns: 新的规则模式列表
Returns:
Dict: {'count': int} 最终规则数量
"""
from apps.common.models import BlacklistRule
count = self._replace_rules(
patterns=patterns,
scope=BlacklistRule.Scope.TARGET,
target=target
)
logger.info("全量替换 Target 黑名单规则: %d 条 (Target: %s)", count, target.name)
return {'count': count}
def _replace_rules(self, patterns: List[str], scope: str, target=None) -> int:
"""
内部方法:全量替换规则
Args:
patterns: 规则模式列表
scope: 规则作用域 (GLOBAL/TARGET)
target: Target 对象(仅 TARGET 作用域需要)
Returns:
int: 最终规则数量
"""
from apps.common.models import BlacklistRule
from django.db import transaction
patterns = _normalize_patterns(patterns)
with transaction.atomic():
# 1. 删除旧规则
delete_filter = {'scope': scope}
if target:
delete_filter['target'] = target
BlacklistRule.objects.filter(**delete_filter).delete()
# 2. 创建新规则
if patterns:
rules = [
BlacklistRule(
pattern=pattern,
rule_type=detect_rule_type(pattern),
scope=scope,
target=target
)
for pattern in patterns
]
BlacklistRule.objects.bulk_create(rules)
return len(patterns)

View File

@@ -4,15 +4,28 @@
提供系统日志的读取功能,支持:
- 从日志目录读取日志文件
- 限制返回行数,防止内存溢出
- 列出可用的日志文件
"""
import fnmatch
import logging
import os
import subprocess
from datetime import datetime, timezone
from typing import TypedDict
logger = logging.getLogger(__name__)
class LogFileInfo(TypedDict):
"""日志文件信息"""
filename: str
category: str # 'system' | 'error' | 'performance' | 'container'
size: int
modifiedAt: str # ISO 8601 格式
class SystemLogService:
"""
系统日志服务类
@@ -20,23 +33,131 @@ class SystemLogService:
负责读取系统日志文件,支持从容器内路径或宿主机挂载路径读取日志。
"""
# 日志文件分类规则
CATEGORY_RULES = [
('xingrin.log', 'system'),
('xingrin_error.log', 'error'),
('performance.log', 'performance'),
('container_*.log', 'container'),
]
def __init__(self):
# 日志文件路径(容器内路径,通过 volume 挂载到宿主机 /opt/xingrin/logs
self.log_file = "/app/backend/logs/xingrin.log"
self.default_lines = 200 # 默认返回行数
self.max_lines = 10000 # 最大返回行数限制
self.timeout_seconds = 3 # tail 命令超时时间
# 日志目录路径
self.log_dir = "/opt/xingrin/logs"
self.default_file = "xingrin.log" # 默认日志文件
self.default_lines = 200 # 默认返回行数
self.max_lines = 10000 # 最大返回行数限制
self.timeout_seconds = 3 # tail 命令超时时间
def get_logs_content(self, lines: int | None = None) -> str:
def _categorize_file(self, filename: str) -> str | None:
"""
根据文件名判断日志分类
Returns:
分类名称,如果不是日志文件则返回 None
"""
for pattern, category in self.CATEGORY_RULES:
if fnmatch.fnmatch(filename, pattern):
return category
return None
def _validate_filename(self, filename: str) -> bool:
"""
验证文件名是否合法(防止路径遍历攻击)
Args:
filename: 要验证的文件名
Returns:
bool: 文件名是否合法
"""
# 不允许包含路径分隔符
if '/' in filename or '\\' in filename:
return False
# 不允许 .. 路径遍历
if '..' in filename:
return False
# 必须是已知的日志文件类型
return self._categorize_file(filename) is not None
def get_log_files(self) -> list[LogFileInfo]:
"""
获取所有可用的日志文件列表
Returns:
日志文件信息列表,按分类和文件名排序
"""
files: list[LogFileInfo] = []
if not os.path.isdir(self.log_dir):
logger.warning("日志目录不存在: %s", self.log_dir)
return files
for filename in os.listdir(self.log_dir):
filepath = os.path.join(self.log_dir, filename)
# 只处理文件,跳过目录
if not os.path.isfile(filepath):
continue
# 判断分类
category = self._categorize_file(filename)
if category is None:
continue
# 获取文件信息
try:
stat = os.stat(filepath)
modified_at = datetime.fromtimestamp(
stat.st_mtime, tz=timezone.utc
).isoformat()
files.append({
'filename': filename,
'category': category,
'size': stat.st_size,
'modifiedAt': modified_at,
})
except OSError as e:
logger.warning("获取文件信息失败 %s: %s", filepath, e)
continue
# 排序按分类优先级system > error > performance > container然后按文件名
category_order = {'system': 0, 'error': 1, 'performance': 2, 'container': 3}
files.sort(key=lambda f: (category_order.get(f['category'], 99), f['filename']))
return files
def get_logs_content(self, filename: str | None = None, lines: int | None = None) -> str:
"""
获取系统日志内容
Args:
filename: 日志文件名,默认为 xingrin.log
lines: 返回的日志行数,默认 200 行,最大 10000 行
Returns:
str: 日志内容,每行以换行符分隔,保持原始顺序
Raises:
ValueError: 文件名不合法
FileNotFoundError: 日志文件不存在
"""
# 文件名处理
if filename is None:
filename = self.default_file
# 验证文件名
if not self._validate_filename(filename):
raise ValueError(f"无效的文件名: {filename}")
# 构建完整路径
log_file = os.path.join(self.log_dir, filename)
# 检查文件是否存在
if not os.path.isfile(log_file):
raise FileNotFoundError(f"日志文件不存在: {filename}")
# 参数校验和默认值处理
if lines is None:
lines = self.default_lines
@@ -48,7 +169,7 @@ class SystemLogService:
lines = self.max_lines
# 使用 tail 命令读取日志文件末尾内容
cmd = ["tail", "-n", str(lines), self.log_file]
cmd = ["tail", "-n", str(lines), log_file]
result = subprocess.run(
cmd,

View File

@@ -2,14 +2,24 @@
通用模块 URL 配置
路由说明:
- /api/auth/* 认证相关接口(登录、登出、用户信息
- /api/system/* 系统管理接口(日志查看等
- /api/health/ 健康检查接口(无需认证)
- /api/auth/* 认证相关接口(登录、登出、用户信息
- /api/system/* 系统管理接口(日志查看等)
- /api/blacklist/* 黑名单管理接口
"""
from django.urls import path
from .views import LoginView, LogoutView, MeView, ChangePasswordView, SystemLogsView
from .views import (
LoginView, LogoutView, MeView, ChangePasswordView,
SystemLogsView, SystemLogFilesView, HealthCheckView,
GlobalBlacklistView,
)
urlpatterns = [
# 健康检查(无需认证)
path('health/', HealthCheckView.as_view(), name='health-check'),
# 认证相关
path('auth/login/', LoginView.as_view(), name='auth-login'),
path('auth/logout/', LogoutView.as_view(), name='auth-logout'),
@@ -18,4 +28,8 @@ urlpatterns = [
# 系统管理
path('system/logs/', SystemLogsView.as_view(), name='system-logs'),
path('system/logs/files/', SystemLogFilesView.as_view(), name='system-log-files'),
# 黑名单管理PUT 全量替换模式)
path('blacklist/rules/', GlobalBlacklistView.as_view(), name='blacklist-rules'),
]

View File

@@ -0,0 +1,38 @@
"""Common utilities"""
from .dedup import deduplicate_for_bulk, get_unique_fields
from .hash import (
calc_file_sha256,
calc_stream_sha256,
safe_calc_file_sha256,
is_file_hash_match,
)
from .csv_utils import (
generate_csv_rows,
format_list_field,
format_datetime,
create_csv_export_response,
UTF8_BOM,
)
from .blacklist_filter import (
BlacklistFilter,
detect_rule_type,
extract_host,
)
__all__ = [
'deduplicate_for_bulk',
'get_unique_fields',
'calc_file_sha256',
'calc_stream_sha256',
'safe_calc_file_sha256',
'is_file_hash_match',
'generate_csv_rows',
'format_list_field',
'format_datetime',
'create_csv_export_response',
'UTF8_BOM',
'BlacklistFilter',
'detect_rule_type',
'extract_host',
]

View File

@@ -0,0 +1,246 @@
"""
黑名单过滤工具
提供域名、IP、CIDR、关键词的黑名单匹配功能。
纯工具类,不涉及数据库操作。
支持的规则类型:
1. 域名精确匹配: example.com
- 规则: example.com
- 匹配: example.com
- 不匹配: sub.example.com, other.com
2. 域名后缀匹配: *.example.com
- 规则: *.example.com
- 匹配: sub.example.com, a.b.example.com, example.com
- 不匹配: other.com, example.com.cn
3. 关键词匹配: *cdn*
- 规则: *cdn*
- 匹配: cdn.example.com, a.cdn.b.com, mycdn123.com
- 不匹配: example.com (不包含 cdn)
4. IP 精确匹配: 192.168.1.1
- 规则: 192.168.1.1
- 匹配: 192.168.1.1
- 不匹配: 192.168.1.2
5. CIDR 范围匹配: 192.168.0.0/24
- 规则: 192.168.0.0/24
- 匹配: 192.168.0.1, 192.168.0.255
- 不匹配: 192.168.1.1
使用方式:
from apps.common.utils import BlacklistFilter
# 创建过滤器(传入规则列表)
rules = BlacklistRule.objects.filter(...)
filter = BlacklistFilter(rules)
# 检查单个目标
if filter.is_allowed('http://example.com'):
process(url)
# 流式处理
for url in urls:
if filter.is_allowed(url):
process(url)
"""
import ipaddress
import logging
from typing import List, Optional
from urllib.parse import urlparse
from apps.common.validators import is_valid_ip, validate_cidr
logger = logging.getLogger(__name__)
def detect_rule_type(pattern: str) -> str:
"""
自动识别规则类型
支持的模式:
- 域名精确匹配: example.com
- 域名后缀匹配: *.example.com
- 关键词匹配: *cdn* (匹配包含 cdn 的域名)
- IP 精确匹配: 192.168.1.1
- CIDR 范围: 192.168.0.0/24
Args:
pattern: 规则模式字符串
Returns:
str: 规则类型 ('domain', 'ip', 'cidr', 'keyword')
"""
if not pattern:
return 'domain'
pattern = pattern.strip()
# 检查关键词模式: *keyword* (前后都有星号,中间无点)
if pattern.startswith('*') and pattern.endswith('*') and len(pattern) > 2:
keyword = pattern[1:-1]
# 关键词中不能有点(否则可能是域名模式)
if '.' not in keyword:
return 'keyword'
# 检查 CIDR包含 /
if '/' in pattern:
try:
validate_cidr(pattern)
return 'cidr'
except ValueError:
pass
# 检查 IP去掉通配符前缀后验证
clean_pattern = pattern.lstrip('*').lstrip('.')
if is_valid_ip(clean_pattern):
return 'ip'
# 默认为域名
return 'domain'
def extract_host(target: str) -> str:
"""
从目标字符串中提取主机名
支持:
- 纯域名example.com
- 纯 IP192.168.1.1
- URLhttp://example.com/path
Args:
target: 目标字符串
Returns:
str: 提取的主机名
"""
if not target:
return ''
target = target.strip()
# 如果是 URL提取 hostname
if '://' in target:
try:
parsed = urlparse(target)
return parsed.hostname or target
except Exception:
return target
return target
class BlacklistFilter:
"""
黑名单过滤器
预编译规则,提供高效的匹配功能。
"""
def __init__(self, rules: List):
"""
初始化过滤器
Args:
rules: BlacklistRule 对象列表
"""
from apps.common.models import BlacklistRule
# 预解析:按类型分类 + CIDR 预编译
self._domain_rules = [] # (pattern, is_wildcard, suffix)
self._ip_rules = set() # 精确 IP 用 setO(1) 查找
self._cidr_rules = [] # (pattern, network_obj)
self._keyword_rules = [] # 关键词列表(小写)
# 去重:跨 scope 可能有重复规则
seen_patterns = set()
for rule in rules:
if rule.pattern in seen_patterns:
continue
seen_patterns.add(rule.pattern)
if rule.rule_type == BlacklistRule.RuleType.DOMAIN:
pattern = rule.pattern.lower()
if pattern.startswith('*.'):
self._domain_rules.append((pattern, True, pattern[1:]))
else:
self._domain_rules.append((pattern, False, None))
elif rule.rule_type == BlacklistRule.RuleType.IP:
self._ip_rules.add(rule.pattern)
elif rule.rule_type == BlacklistRule.RuleType.CIDR:
try:
network = ipaddress.ip_network(rule.pattern, strict=False)
self._cidr_rules.append((rule.pattern, network))
except ValueError:
pass
elif rule.rule_type == BlacklistRule.RuleType.KEYWORD:
# *cdn* -> cdn
keyword = rule.pattern[1:-1].lower()
self._keyword_rules.append(keyword)
def is_allowed(self, target: str) -> bool:
"""
检查目标是否通过过滤
Args:
target: 要检查的目标(域名/IP/URL
Returns:
bool: True 表示通过不在黑名单False 表示被过滤
"""
if not target:
return True
host = extract_host(target)
if not host:
return True
# 先判断输入类型,再走对应分支
if is_valid_ip(host):
return self._check_ip_rules(host)
else:
return self._check_domain_rules(host)
def _check_domain_rules(self, host: str) -> bool:
"""检查域名规则(精确匹配 + 后缀匹配 + 关键词匹配)"""
host_lower = host.lower()
# 1. 域名规则(精确 + 后缀)
for pattern, is_wildcard, suffix in self._domain_rules:
if is_wildcard:
if host_lower.endswith(suffix) or host_lower == pattern[2:]:
return False
else:
if host_lower == pattern:
return False
# 2. 关键词匹配(字符串 in 操作O(n*m)
for keyword in self._keyword_rules:
if keyword in host_lower:
return False
return True
def _check_ip_rules(self, host: str) -> bool:
"""检查 IP 规则(精确匹配 + CIDR"""
# 1. IP 精确匹配O(1)
if host in self._ip_rules:
return False
# 2. CIDR 匹配
if self._cidr_rules:
try:
ip_obj = ipaddress.ip_address(host)
for _, network in self._cidr_rules:
if ip_obj in network:
return False
except ValueError:
pass
return True

View File

@@ -0,0 +1,244 @@
"""CSV 导出工具模块
提供流式 CSV 生成功能,支持:
- UTF-8 BOMExcel 兼容)
- RFC 4180 规范转义
- 流式生成(内存友好)
- 带 Content-Length 的文件响应(支持浏览器下载进度显示)
"""
import csv
import io
import os
import tempfile
import logging
from datetime import datetime
from typing import Iterator, Dict, Any, List, Callable, Optional
from django.http import FileResponse, StreamingHttpResponse
logger = logging.getLogger(__name__)
# UTF-8 BOM确保 Excel 正确识别编码
UTF8_BOM = '\ufeff'
def generate_csv_rows(
data_iterator: Iterator[Dict[str, Any]],
headers: List[str],
field_formatters: Optional[Dict[str, Callable]] = None
) -> Iterator[str]:
"""
流式生成 CSV 行
Args:
data_iterator: 数据迭代器,每个元素是一个字典
headers: CSV 表头列表
field_formatters: 字段格式化函数字典key 为字段名value 为格式化函数
Yields:
CSV 行字符串(包含换行符)
Example:
>>> data = [{'ip': '192.168.1.1', 'hosts': ['a.com', 'b.com']}]
>>> headers = ['ip', 'hosts']
>>> formatters = {'hosts': format_list_field}
>>> for row in generate_csv_rows(iter(data), headers, formatters):
... print(row, end='')
"""
# 输出 BOM + 表头
output = io.StringIO()
writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
writer.writerow(headers)
yield UTF8_BOM + output.getvalue()
# 输出数据行
for row_data in data_iterator:
output = io.StringIO()
writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
row = []
for header in headers:
value = row_data.get(header, '')
if field_formatters and header in field_formatters:
value = field_formatters[header](value)
row.append(value if value is not None else '')
writer.writerow(row)
yield output.getvalue()
def format_list_field(values: List, separator: str = ';') -> str:
"""
将列表字段格式化为分号分隔的字符串
Args:
values: 值列表
separator: 分隔符,默认为分号
Returns:
分隔符连接的字符串
Example:
>>> format_list_field(['a.com', 'b.com'])
'a.com;b.com'
>>> format_list_field([80, 443])
'80;443'
>>> format_list_field([])
''
>>> format_list_field(None)
''
"""
if not values:
return ''
return separator.join(str(v) for v in values)
def format_datetime(dt: Optional[datetime]) -> str:
"""
格式化日期时间为字符串(转换为本地时区)
Args:
dt: datetime 对象或 None
Returns:
格式化的日期时间字符串,格式为 YYYY-MM-DD HH:MM:SS本地时区
Example:
>>> from datetime import datetime
>>> format_datetime(datetime(2024, 1, 15, 10, 30, 0))
'2024-01-15 10:30:00'
>>> format_datetime(None)
''
"""
if dt is None:
return ''
if isinstance(dt, str):
return dt
# 转换为本地时区(从 Django settings 获取)
from django.utils import timezone
if timezone.is_aware(dt):
dt = timezone.localtime(dt)
return dt.strftime('%Y-%m-%d %H:%M:%S')
def create_csv_export_response(
data_iterator: Iterator[Dict[str, Any]],
headers: List[str],
filename: str,
field_formatters: Optional[Dict[str, Callable]] = None,
show_progress: bool = True
) -> FileResponse | StreamingHttpResponse:
"""
创建 CSV 导出响应
根据 show_progress 参数选择响应类型:
- True: 使用临时文件 + FileResponse带 Content-Length浏览器显示下载进度
- False: 使用 StreamingHttpResponse内存更友好但无下载进度
Args:
data_iterator: 数据迭代器,每个元素是一个字典
headers: CSV 表头列表
filename: 下载文件名(如 "export_2024.csv"
field_formatters: 字段格式化函数字典
show_progress: 是否显示下载进度(默认 True
Returns:
FileResponse 或 StreamingHttpResponse
Example:
>>> data_iter = service.iter_data()
>>> headers = ['url', 'host', 'created_at']
>>> formatters = {'created_at': format_datetime}
>>> response = create_csv_export_response(
... data_iter, headers, 'websites.csv', formatters
... )
>>> return response
"""
if show_progress:
return _create_file_response(data_iterator, headers, filename, field_formatters)
else:
return _create_streaming_response(data_iterator, headers, filename, field_formatters)
def _create_file_response(
data_iterator: Iterator[Dict[str, Any]],
headers: List[str],
filename: str,
field_formatters: Optional[Dict[str, Callable]] = None
) -> FileResponse:
"""
创建带 Content-Length 的文件响应(支持浏览器下载进度)
实现方式:先写入临时文件,再返回 FileResponse
"""
# 创建临时文件
temp_file = tempfile.NamedTemporaryFile(
mode='w',
suffix='.csv',
delete=False,
encoding='utf-8'
)
temp_path = temp_file.name
try:
# 流式写入 CSV 数据到临时文件
for row in generate_csv_rows(data_iterator, headers, field_formatters):
temp_file.write(row)
temp_file.close()
# 获取文件大小
file_size = os.path.getsize(temp_path)
# 创建文件响应
response = FileResponse(
open(temp_path, 'rb'),
content_type='text/csv; charset=utf-8',
as_attachment=True,
filename=filename
)
response['Content-Length'] = file_size
# 设置清理回调:响应完成后删除临时文件
original_close = response.file_to_stream.close
def close_and_cleanup():
original_close()
try:
os.unlink(temp_path)
except OSError:
pass
response.file_to_stream.close = close_and_cleanup
return response
except Exception as e:
# 清理临时文件
try:
temp_file.close()
except:
pass
try:
os.unlink(temp_path)
except OSError:
pass
logger.error(f"创建 CSV 导出响应失败: {e}")
raise
def _create_streaming_response(
data_iterator: Iterator[Dict[str, Any]],
headers: List[str],
filename: str,
field_formatters: Optional[Dict[str, Callable]] = None
) -> StreamingHttpResponse:
"""
创建流式响应(无 Content-Length内存更友好
"""
response = StreamingHttpResponse(
generate_csv_rows(data_iterator, headers, field_formatters),
content_type='text/csv; charset=utf-8'
)
response['Content-Disposition'] = f'attachment; filename="{filename}"'
return response

View File

@@ -0,0 +1,101 @@
"""
批量数据去重工具
用于 bulk_create 前的批次内去重,避免 PostgreSQL ON CONFLICT 错误。
自动从 Django 模型读取唯一约束字段,无需手动指定。
"""
import logging
from typing import List, TypeVar, Tuple, Optional
from django.db import models
logger = logging.getLogger(__name__)
T = TypeVar('T')
def get_unique_fields(model: type[models.Model]) -> Optional[Tuple[str, ...]]:
"""
从 Django 模型获取唯一约束字段
按优先级查找:
1. Meta.constraints 中的 UniqueConstraint
2. Meta.unique_together
Args:
model: Django 模型类
Returns:
唯一约束字段元组,如果没有则返回 None
"""
meta = model._meta
# 1. 优先查找 UniqueConstraint
for constraint in getattr(meta, 'constraints', []):
if isinstance(constraint, models.UniqueConstraint):
# 跳过条件约束partial unique
if getattr(constraint, 'condition', None) is None:
return tuple(constraint.fields)
# 2. 回退到 unique_together
unique_together = getattr(meta, 'unique_together', None)
if unique_together:
# unique_together 可能是 (('a', 'b'),) 或 ('a', 'b')
if unique_together and isinstance(unique_together[0], (list, tuple)):
return tuple(unique_together[0])
return tuple(unique_together)
return None
def deduplicate_for_bulk(items: List[T], model: type[models.Model]) -> List[T]:
"""
根据模型唯一约束对数据去重
自动从模型读取唯一约束字段,生成去重 key。
保留最后一条记录(后面的数据通常是更新的)。
Args:
items: 待去重的数据列表DTO 或 Model 对象)
model: Django 模型类(用于读取唯一约束)
Returns:
去重后的数据列表
Example:
# 自动从 Endpoint 模型读取唯一约束 (url, target)
unique_items = deduplicate_for_bulk(items, Endpoint)
"""
if not items:
return items
unique_fields = get_unique_fields(model)
if unique_fields is None:
# 模型没有唯一约束,无需去重
logger.debug(f"{model.__name__} 没有唯一约束,跳过去重")
return items
# 处理外键字段名target -> target_id
def make_key(item: T) -> tuple:
key_parts = []
for field in unique_fields:
# 尝试 field_id外键和 field 两种形式
value = getattr(item, f'{field}_id', None)
if value is None:
value = getattr(item, field, None)
key_parts.append(value)
return tuple(key_parts)
# 使用字典去重,保留最后一条
seen = {}
for item in items:
key = make_key(item)
seen[key] = item
unique_items = list(seen.values())
if len(unique_items) < len(items):
logger.debug(f"{model.__name__} 去重: {len(items)} -> {len(unique_items)}")
return unique_items

View File

@@ -0,0 +1,331 @@
"""智能过滤工具 - 通用查询语法解析和 Django ORM 查询构建
支持的语法:
- field="value" 模糊匹配(包含)
- field=="value" 精确匹配
- field!="value" 不等于
逻辑运算符:
- AND: && 或 and 或 空格(默认)
- OR: || 或 or
示例:
type="xss" || type="sqli" # OR
type="xss" or type="sqli" # OR等价
severity="high" && source="nuclei" # AND
severity="high" source="nuclei" # AND空格默认为 AND
severity="high" and source="nuclei" # AND等价
使用示例:
from apps.common.utils.filter_utils import apply_filters
field_mapping = {'ip': 'ip', 'port': 'port', 'host': 'host'}
queryset = apply_filters(queryset, 'ip="192" || port="80"', field_mapping)
"""
import re
import logging
from dataclasses import dataclass
from typing import List, Dict, Optional, Union
from enum import Enum
from django.db.models import QuerySet, Q, F, Func, CharField
from django.db.models.functions import Cast
logger = logging.getLogger(__name__)
class ArrayToString(Func):
"""PostgreSQL array_to_string 函数"""
function = 'array_to_string'
template = "%(function)s(%(expressions)s, ',')"
output_field = CharField()
class LogicalOp(Enum):
"""逻辑运算符"""
AND = 'AND'
OR = 'OR'
@dataclass
class ParsedFilter:
"""解析后的过滤条件"""
field: str # 字段名
operator: str # 操作符: '=', '==', '!='
value: str # 原始值
@dataclass
class FilterGroup:
"""过滤条件组(带逻辑运算符)"""
filter: ParsedFilter
logical_op: LogicalOp # 与前一个条件的逻辑关系
class QueryParser:
"""查询语法解析器
支持 ||/or (OR) 和 &&/and/空格 (AND) 逻辑运算符
"""
# 正则匹配: field="value", field=="value", field!="value"
FILTER_PATTERN = re.compile(r'(\w+)(==|!=|=)"([^"]*)"')
# 逻辑运算符模式(带空格)
OR_PATTERN = re.compile(r'\s*(\|\||(?<![a-zA-Z])or(?![a-zA-Z]))\s*', re.IGNORECASE)
AND_PATTERN = re.compile(r'\s*(&&|(?<![a-zA-Z])and(?![a-zA-Z]))\s*', re.IGNORECASE)
@classmethod
def parse(cls, query_string: str) -> List[FilterGroup]:
"""解析查询语法字符串
Args:
query_string: 查询语法字符串
Returns:
解析后的过滤条件组列表
Examples:
>>> QueryParser.parse('type="xss" || type="sqli"')
[FilterGroup(filter=..., logical_op=AND), # 第一个默认 AND
FilterGroup(filter=..., logical_op=OR)]
"""
if not query_string or not query_string.strip():
return []
# 第一步:提取所有过滤条件并用占位符替换,保护引号内的空格
filters_found = []
placeholder_pattern = '__FILTER_{}__'
def replace_filter(match):
idx = len(filters_found)
filters_found.append(match.group(0))
return placeholder_pattern.format(idx)
# 先用正则提取所有 field="value" 形式的条件
protected = cls.FILTER_PATTERN.sub(replace_filter, query_string)
# 标准化逻辑运算符
# 先处理 || 和 or -> __OR__
normalized = cls.OR_PATTERN.sub(' __OR__ ', protected)
# 再处理 && 和 and -> __AND__
normalized = cls.AND_PATTERN.sub(' __AND__ ', normalized)
# 分词:按空格分割,保留逻辑运算符标记
tokens = normalized.split()
groups = []
pending_op = LogicalOp.AND # 默认 AND
for token in tokens:
if token == '__OR__':
pending_op = LogicalOp.OR
elif token == '__AND__':
pending_op = LogicalOp.AND
elif token.startswith('__FILTER_') and token.endswith('__'):
# 还原占位符为原始过滤条件
try:
idx = int(token[9:-2]) # 提取索引
original_filter = filters_found[idx]
match = cls.FILTER_PATTERN.match(original_filter)
if match:
field, operator, value = match.groups()
groups.append(FilterGroup(
filter=ParsedFilter(
field=field.lower(),
operator=operator,
value=value
),
logical_op=pending_op if groups else LogicalOp.AND
))
pending_op = LogicalOp.AND # 重置为默认 AND
except (ValueError, IndexError):
pass
# 其他 token 忽略(无效输入)
return groups
class QueryBuilder:
"""Django ORM 查询构建器
将解析后的过滤条件转换为 Django ORM 查询,支持 AND/OR 逻辑
"""
@classmethod
def build_query(
cls,
queryset: QuerySet,
filter_groups: List[FilterGroup],
field_mapping: Dict[str, str],
json_array_fields: List[str] = None
) -> QuerySet:
"""构建 Django ORM 查询
Args:
queryset: Django QuerySet
filter_groups: 解析后的过滤条件组列表
field_mapping: 字段映射
json_array_fields: JSON 数组字段列表(使用 __contains 查询)
Returns:
过滤后的 QuerySet
"""
if not filter_groups:
return queryset
json_array_fields = json_array_fields or []
# 收集需要 annotate 的数组模糊搜索字段
array_fuzzy_fields = set()
# 第一遍:检查是否有数组模糊匹配
for group in filter_groups:
f = group.filter
db_field = field_mapping.get(f.field)
if db_field and db_field in json_array_fields and f.operator == '=':
array_fuzzy_fields.add(db_field)
# 对数组模糊搜索字段做 annotate
for field in array_fuzzy_fields:
annotate_name = f'{field}_text'
queryset = queryset.annotate(**{annotate_name: ArrayToString(F(field))})
# 构建 Q 对象
combined_q = None
for group in filter_groups:
f = group.filter
# 字段映射
db_field = field_mapping.get(f.field)
if not db_field:
logger.debug(f"忽略未知字段: {f.field}")
continue
# 判断是否为 JSON 数组字段
is_json_array = db_field in json_array_fields
# 构建单个条件的 Q 对象
q = cls._build_single_q(db_field, f.operator, f.value, is_json_array)
if q is None:
continue
# 组合 Q 对象
if combined_q is None:
combined_q = q
elif group.logical_op == LogicalOp.OR:
combined_q = combined_q | q
else: # AND
combined_q = combined_q & q
if combined_q is not None:
return queryset.filter(combined_q)
return queryset
@classmethod
def _build_single_q(cls, field: str, operator: str, value: str, is_json_array: bool = False) -> Optional[Q]:
"""构建单个条件的 Q 对象"""
if is_json_array:
if operator == '==':
# 精确匹配:数组中包含完全等于 value 的元素
return Q(**{f'{field}__contains': [value]})
elif operator == '!=':
# 不包含:数组中不包含完全等于 value 的元素
return ~Q(**{f'{field}__contains': [value]})
else: # '=' 模糊匹配
# 使用 annotate 后的字段进行模糊搜索
# 字段已在 build_query 中通过 ArrayToString 转换为文本
annotate_name = f'{field}_text'
return Q(**{f'{annotate_name}__icontains': value})
if operator == '!=':
return cls._build_not_equal_q(field, value)
elif operator == '==':
return cls._build_exact_q(field, value)
else: # '='
return cls._build_fuzzy_q(field, value)
@classmethod
def _try_convert_to_int(cls, value: str) -> Optional[int]:
"""尝试将值转换为整数"""
try:
return int(value.strip())
except (ValueError, TypeError):
return None
@classmethod
def _build_fuzzy_q(cls, field: str, value: str) -> Q:
"""模糊匹配: 包含"""
return Q(**{f'{field}__icontains': value})
@classmethod
def _build_exact_q(cls, field: str, value: str) -> Q:
"""精确匹配"""
int_val = cls._try_convert_to_int(value)
if int_val is not None:
return Q(**{f'{field}__exact': int_val})
return Q(**{f'{field}__exact': value})
@classmethod
def _build_not_equal_q(cls, field: str, value: str) -> Q:
"""不等于"""
int_val = cls._try_convert_to_int(value)
if int_val is not None:
return ~Q(**{f'{field}__exact': int_val})
return ~Q(**{f'{field}__exact': value})
def apply_filters(
queryset: QuerySet,
query_string: str,
field_mapping: Dict[str, str],
json_array_fields: List[str] = None
) -> QuerySet:
"""应用过滤条件到 QuerySet
Args:
queryset: Django QuerySet
query_string: 查询语法字符串
field_mapping: 字段映射
json_array_fields: JSON 数组字段列表(使用 __contains 查询)
Returns:
过滤后的 QuerySet
Examples:
# OR 查询
apply_filters(qs, 'type="xss" || type="sqli"', mapping)
apply_filters(qs, 'type="xss" or type="sqli"', mapping)
# AND 查询
apply_filters(qs, 'severity="high" && source="nuclei"', mapping)
apply_filters(qs, 'severity="high" source="nuclei"', mapping)
# 混合查询
apply_filters(qs, 'type="xss" || type="sqli" && severity="high"', mapping)
# JSON 数组字段查询
apply_filters(qs, 'implies="PHP"', mapping, json_array_fields=['implies'])
"""
if not query_string or not query_string.strip():
return queryset
try:
filter_groups = QueryParser.parse(query_string)
if not filter_groups:
logger.debug(f"未解析到有效过滤条件: {query_string}")
return queryset
logger.debug(f"解析过滤条件: {filter_groups}")
return QueryBuilder.build_query(
queryset,
filter_groups,
field_mapping,
json_array_fields=json_array_fields
)
except Exception as e:
logger.warning(f"过滤解析错误: {e}, query: {query_string}")
return queryset # 静默降级

View File

@@ -7,7 +7,6 @@
import hashlib
import logging
from pathlib import Path
from typing import Optional, BinaryIO
logger = logging.getLogger(__name__)
@@ -91,11 +90,3 @@ def is_file_hash_match(file_path: str, expected_hash: str) -> bool:
return False
return actual_hash.lower() == expected_hash.lower()
__all__ = [
"calc_file_sha256",
"calc_stream_sha256",
"safe_calc_file_sha256",
"is_file_hash_match",
]

View File

@@ -1,6 +1,8 @@
"""域名、IP、端口和目标验证工具函数"""
"""域名、IP、端口、URL 和目标验证工具函数"""
import ipaddress
import logging
from urllib.parse import urlparse
import validators
logger = logging.getLogger(__name__)
@@ -25,6 +27,21 @@ def validate_domain(domain: str) -> None:
raise ValueError(f"域名格式无效: {domain}")
def is_valid_domain(domain: str) -> bool:
"""
判断是否为有效域名(不抛异常)
Args:
domain: 域名字符串
Returns:
bool: 是否为有效域名
"""
if not domain or len(domain) > 253:
return False
return bool(validators.domain(domain))
def validate_ip(ip: str) -> None:
"""
验证 IP 地址格式(支持 IPv4 和 IPv6
@@ -44,6 +61,25 @@ def validate_ip(ip: str) -> None:
raise ValueError(f"IP 地址格式无效: {ip}")
def is_valid_ip(ip: str) -> bool:
"""
判断是否为有效 IP 地址(不抛异常)
Args:
ip: IP 地址字符串
Returns:
bool: 是否为有效 IP 地址
"""
if not ip:
return False
try:
ipaddress.ip_address(ip)
return True
except ValueError:
return False
def validate_cidr(cidr: str) -> None:
"""
验证 CIDR 格式(支持 IPv4 和 IPv6
@@ -140,3 +176,136 @@ def validate_port(port: any) -> tuple[bool, int | None]:
except (ValueError, TypeError):
logger.warning("端口号格式错误,无法转换为整数: %s", port)
return False, None
# ==================== URL 验证函数 ====================
def validate_url(url: str) -> None:
"""
验证 URL 格式,必须包含 schemehttp:// 或 https://
Args:
url: URL 字符串
Raises:
ValueError: URL 格式无效或缺少 scheme
"""
if not url:
raise ValueError("URL 不能为空")
# 检查是否包含 scheme
if not url.startswith('http://') and not url.startswith('https://'):
raise ValueError("URL 必须包含协议http:// 或 https://")
try:
parsed = urlparse(url)
if not parsed.hostname:
raise ValueError("URL 必须包含主机名")
except Exception:
raise ValueError(f"URL 格式无效: {url}")
def is_valid_url(url: str, max_length: int = 2000) -> bool:
"""
判断是否为有效 URL不抛异常
Args:
url: URL 字符串
max_length: URL 最大长度,默认 2000
Returns:
bool: 是否为有效 URL
"""
if not url or len(url) > max_length:
return False
try:
validate_url(url)
return True
except ValueError:
return False
def is_url_match_target(url: str, target_name: str, target_type: str) -> bool:
"""
判断 URL 是否匹配目标
Args:
url: URL 字符串
target_name: 目标名称域名、IP 或 CIDR
target_type: 目标类型 ('domain', 'ip', 'cidr')
Returns:
bool: 是否匹配
"""
try:
parsed = urlparse(url)
hostname = parsed.hostname
if not hostname:
return False
hostname = hostname.lower()
target_name = target_name.lower()
if target_type == 'domain':
# 域名类型hostname 等于 target_name 或以 .target_name 结尾
return hostname == target_name or hostname.endswith('.' + target_name)
elif target_type == 'ip':
# IP 类型hostname 必须完全等于 target_name
return hostname == target_name
elif target_type == 'cidr':
# CIDR 类型hostname 必须是 IP 且在 CIDR 范围内
try:
ip = ipaddress.ip_address(hostname)
network = ipaddress.ip_network(target_name, strict=False)
return ip in network
except ValueError:
# hostname 不是有效 IP
return False
return False
except Exception:
return False
def detect_input_type(input_str: str) -> str:
"""
检测输入类型(用于快速扫描输入解析)
Args:
input_str: 输入字符串(应该已经 strip
Returns:
str: 输入类型 ('url', 'domain', 'ip', 'cidr')
"""
if not input_str:
raise ValueError("输入不能为空")
# 1. 包含 :// 一定是 URL
if '://' in input_str:
return 'url'
# 2. 包含 / 需要判断是 CIDR 还是 URL缺少 scheme
if '/' in input_str:
# CIDR 格式: IP/prefix如 10.0.0.0/8
parts = input_str.split('/')
if len(parts) == 2:
ip_part, prefix_part = parts
# 如果斜杠后是纯数字且在 0-32 范围内,检查是否是 CIDR
if prefix_part.isdigit() and 0 <= int(prefix_part) <= 32:
ip_parts = ip_part.split('.')
if len(ip_parts) == 4 and all(p.isdigit() for p in ip_parts):
return 'cidr'
# 不是 CIDR视为 URL缺少 scheme后续验证会报错
return 'url'
# 3. 检查是否是 IP 地址
try:
ipaddress.ip_address(input_str)
return 'ip'
except ValueError:
pass
# 4. 默认为域名
return 'domain'

View File

@@ -2,11 +2,20 @@
通用模块视图导出
包含:
- 健康检查视图Docker 健康检查
- 认证相关视图:登录、登出、用户信息、修改密码
- 系统日志视图:实时日志查看
- 黑名单视图:全局黑名单规则管理
"""
from .health_views import HealthCheckView
from .auth_views import LoginView, LogoutView, MeView, ChangePasswordView
from .system_log_views import SystemLogsView
from .system_log_views import SystemLogsView, SystemLogFilesView
from .blacklist_views import GlobalBlacklistView
__all__ = ['LoginView', 'LogoutView', 'MeView', 'ChangePasswordView', 'SystemLogsView']
__all__ = [
'HealthCheckView',
'LoginView', 'LogoutView', 'MeView', 'ChangePasswordView',
'SystemLogsView', 'SystemLogFilesView',
'GlobalBlacklistView',
]

View File

@@ -9,7 +9,10 @@ from django.utils.decorators import method_decorator
from rest_framework import status
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.permissions import AllowAny, IsAuthenticated
from rest_framework.permissions import AllowAny
from apps.common.response_helpers import success_response, error_response
from apps.common.error_codes import ErrorCodes
logger = logging.getLogger(__name__)
@@ -28,9 +31,10 @@ class LoginView(APIView):
password = request.data.get('password')
if not username or not password:
return Response(
{'error': '请提供用户名和密码'},
status=status.HTTP_400_BAD_REQUEST
return error_response(
code=ErrorCodes.VALIDATION_ERROR,
message='Username and password are required',
status_code=status.HTTP_400_BAD_REQUEST
)
user = authenticate(request, username=username, password=password)
@@ -38,20 +42,22 @@ class LoginView(APIView):
if user is not None:
login(request, user)
logger.info(f"用户 {username} 登录成功")
return Response({
'message': '登录成功',
'user': {
'id': user.id,
'username': user.username,
'isStaff': user.is_staff,
'isSuperuser': user.is_superuser,
return success_response(
data={
'user': {
'id': user.id,
'username': user.username,
'isStaff': user.is_staff,
'isSuperuser': user.is_superuser,
}
}
})
)
else:
logger.warning(f"用户 {username} 登录失败:用户名或密码错误")
return Response(
{'error': '用户名或密码错误'},
status=status.HTTP_401_UNAUTHORIZED
return error_response(
code=ErrorCodes.UNAUTHORIZED,
message='Invalid username or password',
status_code=status.HTTP_401_UNAUTHORIZED
)
@@ -79,7 +85,7 @@ class LogoutView(APIView):
logout(request)
else:
logout(request)
return Response({'message': '已登出'})
return success_response()
@method_decorator(csrf_exempt, name='dispatch')
@@ -100,22 +106,26 @@ class MeView(APIView):
if user_id:
try:
user = User.objects.get(pk=user_id)
return Response({
'authenticated': True,
'user': {
'id': user.id,
'username': user.username,
'isStaff': user.is_staff,
'isSuperuser': user.is_superuser,
return success_response(
data={
'authenticated': True,
'user': {
'id': user.id,
'username': user.username,
'isStaff': user.is_staff,
'isSuperuser': user.is_superuser,
}
}
})
)
except User.DoesNotExist:
pass
return Response({
'authenticated': False,
'user': None
})
return success_response(
data={
'authenticated': False,
'user': None
}
)
@method_decorator(csrf_exempt, name='dispatch')
@@ -124,43 +134,27 @@ class ChangePasswordView(APIView):
修改密码
POST /api/auth/change-password/
"""
authentication_classes = [] # 禁用认证(绕过 CSRF
permission_classes = [AllowAny] # 手动检查登录状态
def post(self, request):
# 手动检查登录状态(从 session 获取用户
from django.contrib.auth import get_user_model
User = get_user_model()
user_id = request.session.get('_auth_user_id')
if not user_id:
return Response(
{'error': '请先登录'},
status=status.HTTP_401_UNAUTHORIZED
)
try:
user = User.objects.get(pk=user_id)
except User.DoesNotExist:
return Response(
{'error': '用户不存在'},
status=status.HTTP_401_UNAUTHORIZED
)
# 使用全局权限类验证request.user 已经是认证用户
user = request.user
# CamelCaseParser 将 oldPassword -> old_password
old_password = request.data.get('old_password')
new_password = request.data.get('new_password')
if not old_password or not new_password:
return Response(
{'error': '请提供旧密码和新密码'},
status=status.HTTP_400_BAD_REQUEST
return error_response(
code=ErrorCodes.VALIDATION_ERROR,
message='Old password and new password are required',
status_code=status.HTTP_400_BAD_REQUEST
)
if not user.check_password(old_password):
return Response(
{'error': '旧密码错误'},
status=status.HTTP_400_BAD_REQUEST
return error_response(
code=ErrorCodes.VALIDATION_ERROR,
message='Old password is incorrect',
status_code=status.HTTP_400_BAD_REQUEST
)
user.set_password(new_password)
@@ -170,4 +164,4 @@ class ChangePasswordView(APIView):
update_session_auth_hash(request, user)
logger.info(f"用户 {user.username} 已修改密码")
return Response({'message': '密码修改成功'})
return success_response()

View File

@@ -0,0 +1,80 @@
"""全局黑名单 API 视图"""
import logging
from rest_framework import status
from rest_framework.views import APIView
from rest_framework.permissions import IsAuthenticated
from apps.common.response_helpers import success_response, error_response
from apps.common.services import BlacklistService
logger = logging.getLogger(__name__)
class GlobalBlacklistView(APIView):
"""
全局黑名单规则 API
Endpoints:
- GET /api/blacklist/rules/ - 获取全局黑名单列表
- PUT /api/blacklist/rules/ - 全量替换规则(文本框保存场景)
设计说明:
- 使用 PUT 全量替换模式,适合"文本框每行一个规则"的前端场景
- 用户编辑文本框 -> 点击保存 -> 后端全量替换
架构MVS 模式
- View: 参数验证、响应格式化
- Service: 业务逻辑BlacklistService
- Model: 数据持久化BlacklistRule
"""
permission_classes = [IsAuthenticated]
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.blacklist_service = BlacklistService()
def get(self, request):
"""
获取全局黑名单规则列表
返回格式:
{
"patterns": ["*.gov", "*.edu", "10.0.0.0/8"]
}
"""
rules = self.blacklist_service.get_global_rules()
patterns = list(rules.values_list('pattern', flat=True))
return success_response(data={'patterns': patterns})
def put(self, request):
"""
全量替换全局黑名单规则
请求格式:
{
"patterns": ["*.gov", "*.edu", "10.0.0.0/8"]
}
或者空数组清空所有规则:
{
"patterns": []
}
"""
patterns = request.data.get('patterns', [])
# 兼容字符串输入(换行分隔)
if isinstance(patterns, str):
patterns = [p for p in patterns.split('\n') if p.strip()]
if not isinstance(patterns, list):
return error_response(
code='VALIDATION_ERROR',
message='patterns 必须是数组'
)
# 调用 Service 层全量替换
result = self.blacklist_service.replace_global_rules(patterns)
return success_response(data=result)

View File

@@ -0,0 +1,24 @@
"""
健康检查视图
提供 Docker 健康检查端点,无需认证。
"""
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.permissions import AllowAny
class HealthCheckView(APIView):
"""
健康检查端点
GET /api/health/
返回服务状态,用于 Docker 健康检查。
此端点无需认证。
"""
permission_classes = [AllowAny]
def get(self, request):
return Response({'status': 'ok'})

View File

@@ -9,16 +9,57 @@ import logging
from django.utils.decorators import method_decorator
from django.views.decorators.csrf import csrf_exempt
from rest_framework import status
from rest_framework.permissions import AllowAny
from rest_framework.response import Response
from rest_framework.views import APIView
from apps.common.response_helpers import success_response, error_response
from apps.common.error_codes import ErrorCodes
from apps.common.services.system_log_service import SystemLogService
logger = logging.getLogger(__name__)
@method_decorator(csrf_exempt, name="dispatch")
class SystemLogFilesView(APIView):
"""
日志文件列表 API 视图
GET /api/system/logs/files/
获取所有可用的日志文件列表
Response:
{
"files": [
{
"filename": "xingrin.log",
"category": "system",
"size": 1048576,
"modifiedAt": "2025-01-15T10:30:00+00:00"
},
...
]
}
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.service = SystemLogService()
def get(self, request):
"""获取日志文件列表"""
try:
files = self.service.get_log_files()
return success_response(data={"files": files})
except Exception:
logger.exception("获取日志文件列表失败")
return error_response(
code=ErrorCodes.SERVER_ERROR,
message='Failed to get log files',
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
)
@method_decorator(csrf_exempt, name="dispatch")
class SystemLogsView(APIView):
"""
@@ -28,21 +69,14 @@ class SystemLogsView(APIView):
获取系统日志内容
Query Parameters:
file (str, optional): 日志文件名,默认 xingrin.log
lines (int, optional): 返回的日志行数,默认 200最大 10000
Response:
{
"content": "日志内容字符串..."
}
Note:
- 当前为开发阶段,暂时允许匿名访问
- 生产环境应添加管理员权限验证
"""
# TODO: 生产环境应改为 IsAdminUser 权限
authentication_classes = []
permission_classes = [AllowAny]
def __init__(self, **kwargs):
super().__init__(**kwargs)
@@ -52,18 +86,33 @@ class SystemLogsView(APIView):
"""
获取系统日志
支持通过 lines 参数控制返回行数,用于前端分页或实时刷新场景
支持通过 file 和 lines 参数控制返回内容
"""
try:
# 解析 lines 参数
# 解析参数
filename = request.query_params.get("file")
lines_raw = request.query_params.get("lines")
lines = int(lines_raw) if lines_raw is not None else None
# 调用服务获取日志内容
content = self.service.get_logs_content(lines=lines)
return Response({"content": content})
except ValueError:
return Response({"error": "lines 参数必须是整数"}, status=status.HTTP_400_BAD_REQUEST)
content = self.service.get_logs_content(filename=filename, lines=lines)
return success_response(data={"content": content})
except ValueError as e:
return error_response(
code=ErrorCodes.VALIDATION_ERROR,
message=str(e) if 'file' in str(e).lower() else 'lines must be an integer',
status_code=status.HTTP_400_BAD_REQUEST
)
except FileNotFoundError as e:
return error_response(
code=ErrorCodes.NOT_FOUND,
message=str(e),
status_code=status.HTTP_404_NOT_FOUND
)
except Exception:
logger.exception("获取系统日志失败")
return Response({"error": "获取系统日志失败"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
return error_response(
code=ErrorCodes.SERVER_ERROR,
message='Failed to get system logs',
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
)

View File

@@ -0,0 +1,44 @@
"""
WebSocket 认证基类
提供需要认证的 WebSocket Consumer 基类
"""
import logging
from channels.generic.websocket import AsyncWebsocketConsumer
logger = logging.getLogger(__name__)
class AuthenticatedWebsocketConsumer(AsyncWebsocketConsumer):
"""
需要认证的 WebSocket Consumer 基类
子类应该重写 on_connect() 方法实现具体的连接逻辑
"""
async def connect(self):
"""
连接时验证用户认证状态
未认证时使用 close(code=4001) 拒绝连接
"""
user = self.scope.get('user')
if not user or not user.is_authenticated:
logger.warning(
f"WebSocket 连接被拒绝:用户未认证 - Path: {self.scope.get('path')}"
)
await self.close(code=4001)
return
# 调用子类的连接逻辑
await self.on_connect()
async def on_connect(self):
"""
子类实现具体的连接逻辑
默认实现:接受连接
"""
await self.accept()

View File

@@ -6,17 +6,17 @@ import json
import logging
import asyncio
import os
from channels.generic.websocket import AsyncWebsocketConsumer
from asgiref.sync import sync_to_async
from django.conf import settings
from apps.common.websocket_auth import AuthenticatedWebsocketConsumer
from apps.engine.services import WorkerService
logger = logging.getLogger(__name__)
class WorkerDeployConsumer(AsyncWebsocketConsumer):
class WorkerDeployConsumer(AuthenticatedWebsocketConsumer):
"""
Worker 交互式终端 WebSocket Consumer
@@ -31,8 +31,8 @@ class WorkerDeployConsumer(AsyncWebsocketConsumer):
self.read_task = None
self.worker_service = WorkerService()
async def connect(self):
"""连接时加入对应 Worker 的组并自动建立 SSH 连接"""
async def on_connect(self):
"""连接时加入对应 Worker 的组并自动建立 SSH 连接(已通过认证)"""
self.worker_id = self.scope['url_route']['kwargs']['worker_id']
self.group_name = f'worker_deploy_{self.worker_id}'
@@ -242,8 +242,9 @@ class WorkerDeployConsumer(AsyncWebsocketConsumer):
return
# 远程 Worker 通过 nginx HTTPS 访问nginx 反代到后端 8888
# 使用 https://{PUBLIC_HOST} 而不是直连 8888 端口
heartbeat_api_url = f"https://{public_host}" # 基础 URLagent 会加 /api/...
# 使用 https://{PUBLIC_HOST}:{PUBLIC_PORT} 而不是直连 8888 端口
public_port = getattr(settings, 'PUBLIC_PORT', '8083')
heartbeat_api_url = f"https://{public_host}:{public_port}"
session_name = f'xingrin_deploy_{self.worker_id}'
remote_script_path = '/tmp/xingrin_deploy.sh'

View File

@@ -15,9 +15,10 @@
"""
from django.core.management.base import BaseCommand
from io import StringIO
from pathlib import Path
import yaml
from ruamel.yaml import YAML
from apps.engine.models import ScanEngine
@@ -44,10 +45,12 @@ class Command(BaseCommand):
with open(config_path, 'r', encoding='utf-8') as f:
default_config = f.read()
# 解析 YAML 为字典,后续用于生成子引擎配置
# 使用 ruamel.yaml 解析,保留注释
yaml_parser = YAML()
yaml_parser.preserve_quotes = True
try:
config_dict = yaml.safe_load(default_config) or {}
except yaml.YAMLError as e:
config_dict = yaml_parser.load(default_config) or {}
except Exception as e:
self.stdout.write(self.style.ERROR(f'引擎配置 YAML 解析失败: {e}'))
return
@@ -83,15 +86,13 @@ class Command(BaseCommand):
if scan_type != 'subdomain_discovery' and 'tools' not in scan_cfg:
continue
# 构造只包含当前扫描类型配置的 YAML
# 构造只包含当前扫描类型配置的 YAML(保留注释)
single_config = {scan_type: scan_cfg}
try:
single_yaml = yaml.safe_dump(
single_config,
sort_keys=False,
allow_unicode=True,
)
except yaml.YAMLError as e:
stream = StringIO()
yaml_parser.dump(single_config, stream)
single_yaml = stream.getvalue()
except Exception as e:
self.stdout.write(self.style.ERROR(f'生成子引擎 {scan_type} 配置失败: {e}'))
continue

View File

@@ -0,0 +1,205 @@
"""初始化内置指纹库
- EHole 指纹: ehole.json -> 导入到数据库
- Goby 指纹: goby.json -> 导入到数据库
- Wappalyzer 指纹: wappalyzer.json -> 导入到数据库
- Fingers 指纹: fingers_http.json -> 导入到数据库
- FingerPrintHub 指纹: fingerprinthub_web.json -> 导入到数据库
- ARL 指纹: ARL.yaml -> 导入到数据库
可重复执行:如果数据库已有数据则跳过,只在空库时导入。
"""
import json
import logging
from pathlib import Path
import yaml
from django.conf import settings
from django.core.management.base import BaseCommand
from apps.engine.models import (
EholeFingerprint,
GobyFingerprint,
WappalyzerFingerprint,
FingersFingerprint,
FingerPrintHubFingerprint,
ARLFingerprint,
)
from apps.engine.services.fingerprints import (
EholeFingerprintService,
GobyFingerprintService,
WappalyzerFingerprintService,
FingersFingerprintService,
FingerPrintHubFingerprintService,
ARLFingerprintService,
)
logger = logging.getLogger(__name__)
# 内置指纹配置
DEFAULT_FINGERPRINTS = [
{
"type": "ehole",
"filename": "ehole.json",
"model": EholeFingerprint,
"service": EholeFingerprintService,
"data_key": "fingerprint", # JSON 中指纹数组的 key
"file_format": "json",
},
{
"type": "goby",
"filename": "goby.json",
"model": GobyFingerprint,
"service": GobyFingerprintService,
"data_key": None, # Goby 是数组格式,直接使用整个 JSON
"file_format": "json",
},
{
"type": "wappalyzer",
"filename": "wappalyzer.json",
"model": WappalyzerFingerprint,
"service": WappalyzerFingerprintService,
"data_key": "apps", # Wappalyzer 使用 apps 对象
"file_format": "json",
},
{
"type": "fingers",
"filename": "fingers_http.json",
"model": FingersFingerprint,
"service": FingersFingerprintService,
"data_key": None, # Fingers 是数组格式
"file_format": "json",
},
{
"type": "fingerprinthub",
"filename": "fingerprinthub_web.json",
"model": FingerPrintHubFingerprint,
"service": FingerPrintHubFingerprintService,
"data_key": None, # FingerPrintHub 是数组格式
"file_format": "json",
},
{
"type": "arl",
"filename": "ARL.yaml",
"model": ARLFingerprint,
"service": ARLFingerprintService,
"data_key": None, # ARL 是 YAML 数组格式
"file_format": "yaml",
},
]
class Command(BaseCommand):
help = "初始化内置指纹库"
def handle(self, *args, **options):
project_base = Path(settings.BASE_DIR).parent # /app/backend -> /app
fingerprints_dir = project_base / "backend" / "fingerprints"
initialized = 0
skipped = 0
failed = 0
for item in DEFAULT_FINGERPRINTS:
fp_type = item["type"]
filename = item["filename"]
model = item["model"]
service_class = item["service"]
data_key = item["data_key"]
file_format = item.get("file_format", "json")
# 检查数据库是否已有数据
existing_count = model.objects.count()
if existing_count > 0:
self.stdout.write(self.style.SUCCESS(
f"[{fp_type}] 数据库已有 {existing_count} 条记录,跳过初始化"
))
skipped += 1
continue
# 查找源文件
src_path = fingerprints_dir / filename
if not src_path.exists():
self.stdout.write(self.style.WARNING(
f"[{fp_type}] 未找到内置指纹文件: {src_path},跳过"
))
failed += 1
continue
# 读取并解析文件(支持 JSON 和 YAML
try:
with open(src_path, "r", encoding="utf-8") as f:
if file_format == "yaml":
file_data = yaml.safe_load(f)
else:
file_data = json.load(f)
except (json.JSONDecodeError, yaml.YAMLError, OSError) as exc:
self.stdout.write(self.style.ERROR(
f"[{fp_type}] 读取指纹文件失败: {exc}"
))
failed += 1
continue
# 提取指纹数据(根据不同格式处理)
fingerprints = self._extract_fingerprints(file_data, data_key, fp_type)
if not fingerprints:
self.stdout.write(self.style.WARNING(
f"[{fp_type}] 指纹文件中没有有效数据,跳过"
))
failed += 1
continue
# 使用 Service 批量导入
try:
service = service_class()
result = service.batch_create_fingerprints(fingerprints)
created = result.get("created", 0)
failed_count = result.get("failed", 0)
self.stdout.write(self.style.SUCCESS(
f"[{fp_type}] 导入成功: 创建 {created} 条,失败 {failed_count}"
))
initialized += 1
except Exception as exc:
self.stdout.write(self.style.ERROR(
f"[{fp_type}] 导入失败: {exc}"
))
failed += 1
continue
self.stdout.write(self.style.SUCCESS(
f"指纹初始化完成: 成功 {initialized}, 已存在跳过 {skipped}, 失败 {failed}"
))
def _extract_fingerprints(self, json_data, data_key, fp_type):
"""
根据不同格式提取指纹数据,兼容数组和对象两种格式
支持的格式:
- 数组格式: [...] 或 {"key": [...]}
- 对象格式: {...} 或 {"key": {...}} -> 转换为 [{"name": k, ...v}]
"""
# 获取目标数据
if data_key is None:
# 直接使用整个 JSON
target = json_data
else:
# 从指定 key 获取,支持多个可能的 key如 apps/technologies
if data_key == "apps":
target = json_data.get("apps") or json_data.get("technologies") or {}
else:
target = json_data.get(data_key, [])
# 根据数据类型处理
if isinstance(target, list):
# 已经是数组格式,直接返回
return target
elif isinstance(target, dict):
# 对象格式,转换为数组 [{"name": key, ...value}]
return [{"name": name, **data} if isinstance(data, dict) else {"name": name}
for name, data in target.items()]
return []

View File

@@ -3,12 +3,17 @@
项目安装后执行此命令,自动创建官方模板仓库记录。
使用方式:
python manage.py init_nuclei_templates # 只创建记录
python manage.py init_nuclei_templates # 只创建记录(检测本地已有仓库)
python manage.py init_nuclei_templates --sync # 创建并同步git clone
"""
import logging
import subprocess
from pathlib import Path
from django.conf import settings
from django.core.management.base import BaseCommand
from django.utils import timezone
from apps.engine.models import NucleiTemplateRepo
from apps.engine.services import NucleiTemplateRepoService
@@ -26,6 +31,20 @@ DEFAULT_REPOS = [
]
def get_local_commit_hash(local_path: Path) -> str:
"""获取本地 Git 仓库的 commit hash"""
if not (local_path / ".git").is_dir():
return ""
result = subprocess.run(
["git", "-C", str(local_path), "rev-parse", "HEAD"],
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
return result.stdout.strip() if result.returncode == 0 else ""
class Command(BaseCommand):
help = "初始化 Nuclei 模板仓库(创建官方模板仓库记录)"
@@ -46,6 +65,8 @@ class Command(BaseCommand):
force = options.get("force", False)
service = NucleiTemplateRepoService()
base_dir = Path(getattr(settings, "NUCLEI_TEMPLATES_REPOS_BASE_DIR", "/opt/xingrin/nuclei-repos"))
created = 0
skipped = 0
synced = 0
@@ -87,20 +108,30 @@ class Command(BaseCommand):
# 创建新仓库记录
try:
# 检查本地是否已有仓库(由 install.sh 预下载)
local_path = base_dir / name
local_commit = get_local_commit_hash(local_path)
repo = NucleiTemplateRepo.objects.create(
name=name,
repo_url=repo_url,
local_path=str(local_path) if local_commit else "",
commit_hash=local_commit,
last_synced_at=timezone.now() if local_commit else None,
)
self.stdout.write(self.style.SUCCESS(
f"[{name}] 创建成功: id={repo.id}"
))
if local_commit:
self.stdout.write(self.style.SUCCESS(
f"[{name}] 创建成功(检测到本地仓库): commit={local_commit[:8]}"
))
else:
self.stdout.write(self.style.SUCCESS(
f"[{name}] 创建成功: id={repo.id}"
))
created += 1
# 初始化本地路径
service.ensure_local_path(repo)
# 如果需要同步
if do_sync:
# 如果本地没有仓库且需要同步
if not local_commit and do_sync:
try:
self.stdout.write(self.style.WARNING(
f"[{name}] 正在同步(首次可能需要几分钟)..."

View File

@@ -1,7 +1,8 @@
"""初始化所有内置字典 Wordlist 记录
- 目录扫描默认字典: dir_default.txt -> /app/backend/wordlist/dir_default.txt
- 子域名爆破默认字典: subdomains-top1million-110000.txt -> /app/backend/wordlist/subdomains-top1million-110000.txt
内置字典从镜像内 /app/backend/wordlist/ 复制到运行时目录 /opt/xingrin/wordlists/
- 目录扫描默认字典: dir_default.txt
- 子域名爆破默认字典: subdomains-top1million-110000.txt
可重复执行:如果已存在同名记录且文件有效则跳过,只在缺失或文件丢失时创建/修复。
"""
@@ -13,7 +14,7 @@ from pathlib import Path
from django.conf import settings
from django.core.management.base import BaseCommand
from apps.common.hash_utils import safe_calc_file_sha256
from apps.common.utils import safe_calc_file_sha256
from apps.engine.models import Wordlist

View File

@@ -0,0 +1,213 @@
# Generated by Django 5.2.7 on 2026-01-06 00:55
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='NucleiTemplateRepo',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(help_text='仓库名称,用于前端展示和配置引用', max_length=200, unique=True)),
('repo_url', models.CharField(help_text='Git 仓库地址', max_length=500)),
('local_path', models.CharField(blank=True, default='', help_text='本地工作目录绝对路径', max_length=500)),
('commit_hash', models.CharField(blank=True, default='', help_text='最后同步的 Git commit hash用于 Worker 版本校验', max_length=40)),
('last_synced_at', models.DateTimeField(blank=True, help_text='最后一次成功同步时间', null=True)),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
],
options={
'verbose_name': 'Nuclei 模板仓库',
'verbose_name_plural': 'Nuclei 模板仓库',
'db_table': 'nuclei_template_repo',
},
),
migrations.CreateModel(
name='ARLFingerprint',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(help_text='指纹名称', max_length=300, unique=True)),
('rule', models.TextField(help_text='匹配规则表达式')),
('created_at', models.DateTimeField(auto_now_add=True)),
],
options={
'verbose_name': 'ARL 指纹',
'verbose_name_plural': 'ARL 指纹',
'db_table': 'arl_fingerprint',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['name'], name='arl_fingerp_name_c3a305_idx'), models.Index(fields=['-created_at'], name='arl_fingerp_created_ed1060_idx')],
},
),
migrations.CreateModel(
name='EholeFingerprint',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('cms', models.CharField(help_text='产品/CMS名称', max_length=200)),
('method', models.CharField(default='keyword', help_text='匹配方式', max_length=200)),
('location', models.CharField(default='body', help_text='匹配位置', max_length=200)),
('keyword', models.JSONField(default=list, help_text='关键词列表')),
('is_important', models.BooleanField(default=False, help_text='是否重点资产')),
('type', models.CharField(blank=True, default='-', help_text='分类', max_length=100)),
('created_at', models.DateTimeField(auto_now_add=True)),
],
options={
'verbose_name': 'EHole 指纹',
'verbose_name_plural': 'EHole 指纹',
'db_table': 'ehole_fingerprint',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['cms'], name='ehole_finge_cms_72ca2c_idx'), models.Index(fields=['method'], name='ehole_finge_method_17f0db_idx'), models.Index(fields=['location'], name='ehole_finge_locatio_7bb82b_idx'), models.Index(fields=['type'], name='ehole_finge_type_ca2bce_idx'), models.Index(fields=['is_important'], name='ehole_finge_is_impo_d56e64_idx'), models.Index(fields=['-created_at'], name='ehole_finge_created_d862b0_idx')],
'constraints': [models.UniqueConstraint(fields=('cms', 'method', 'location'), name='unique_ehole_fingerprint')],
},
),
migrations.CreateModel(
name='FingerPrintHubFingerprint',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('fp_id', models.CharField(help_text='指纹ID', max_length=200, unique=True)),
('name', models.CharField(help_text='指纹名称', max_length=300)),
('author', models.CharField(blank=True, default='', help_text='作者', max_length=200)),
('tags', models.CharField(blank=True, default='', help_text='标签', max_length=500)),
('severity', models.CharField(blank=True, default='info', help_text='严重程度', max_length=50)),
('metadata', models.JSONField(blank=True, default=dict, help_text='元数据')),
('http', models.JSONField(default=list, help_text='HTTP 匹配规则')),
('source_file', models.CharField(blank=True, default='', help_text='来源文件', max_length=500)),
('created_at', models.DateTimeField(auto_now_add=True)),
],
options={
'verbose_name': 'FingerPrintHub 指纹',
'verbose_name_plural': 'FingerPrintHub 指纹',
'db_table': 'fingerprinthub_fingerprint',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['fp_id'], name='fingerprint_fp_id_df467f_idx'), models.Index(fields=['name'], name='fingerprint_name_95b6fb_idx'), models.Index(fields=['author'], name='fingerprint_author_80f54b_idx'), models.Index(fields=['severity'], name='fingerprint_severit_f70422_idx'), models.Index(fields=['-created_at'], name='fingerprint_created_bec16c_idx')],
},
),
migrations.CreateModel(
name='FingersFingerprint',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(help_text='指纹名称', max_length=300, unique=True)),
('link', models.URLField(blank=True, default='', help_text='相关链接', max_length=500)),
('rule', models.JSONField(default=list, help_text='匹配规则数组')),
('tag', models.JSONField(default=list, help_text='标签数组')),
('focus', models.BooleanField(default=False, help_text='是否重点关注')),
('default_port', models.JSONField(blank=True, default=list, help_text='默认端口数组')),
('created_at', models.DateTimeField(auto_now_add=True)),
],
options={
'verbose_name': 'Fingers 指纹',
'verbose_name_plural': 'Fingers 指纹',
'db_table': 'fingers_fingerprint',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['name'], name='fingers_fin_name_952de0_idx'), models.Index(fields=['link'], name='fingers_fin_link_4c6b7f_idx'), models.Index(fields=['focus'], name='fingers_fin_focus_568c7f_idx'), models.Index(fields=['-created_at'], name='fingers_fin_created_46fc91_idx')],
},
),
migrations.CreateModel(
name='GobyFingerprint',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(help_text='产品名称', max_length=300, unique=True)),
('logic', models.CharField(help_text='逻辑表达式', max_length=500)),
('rule', models.JSONField(default=list, help_text='规则数组')),
('created_at', models.DateTimeField(auto_now_add=True)),
],
options={
'verbose_name': 'Goby 指纹',
'verbose_name_plural': 'Goby 指纹',
'db_table': 'goby_fingerprint',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['name'], name='goby_finger_name_82084c_idx'), models.Index(fields=['logic'], name='goby_finger_logic_a63226_idx'), models.Index(fields=['-created_at'], name='goby_finger_created_50e000_idx')],
},
),
migrations.CreateModel(
name='ScanEngine',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('name', models.CharField(help_text='引擎名称', max_length=200, unique=True)),
('configuration', models.CharField(blank=True, default='', help_text='引擎配置yaml 格式', max_length=10000)),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
],
options={
'verbose_name': '扫描引擎',
'verbose_name_plural': '扫描引擎',
'db_table': 'scan_engine',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['-created_at'], name='scan_engine_created_da4870_idx')],
},
),
migrations.CreateModel(
name='WappalyzerFingerprint',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(help_text='应用名称', max_length=300, unique=True)),
('cats', models.JSONField(default=list, help_text='分类 ID 数组')),
('cookies', models.JSONField(blank=True, default=dict, help_text='Cookie 检测规则')),
('headers', models.JSONField(blank=True, default=dict, help_text='HTTP Header 检测规则')),
('script_src', models.JSONField(blank=True, default=list, help_text='脚本 URL 正则数组')),
('js', models.JSONField(blank=True, default=list, help_text='JavaScript 变量检测规则')),
('implies', models.JSONField(blank=True, default=list, help_text='依赖关系数组')),
('meta', models.JSONField(blank=True, default=dict, help_text='HTML meta 标签检测规则')),
('html', models.JSONField(blank=True, default=list, help_text='HTML 内容正则数组')),
('description', models.TextField(blank=True, default='', help_text='应用描述')),
('website', models.URLField(blank=True, default='', help_text='官网链接', max_length=500)),
('cpe', models.CharField(blank=True, default='', help_text='CPE 标识符', max_length=300)),
('created_at', models.DateTimeField(auto_now_add=True)),
],
options={
'verbose_name': 'Wappalyzer 指纹',
'verbose_name_plural': 'Wappalyzer 指纹',
'db_table': 'wappalyzer_fingerprint',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['name'], name='wappalyzer__name_63c669_idx'), models.Index(fields=['website'], name='wappalyzer__website_88de1c_idx'), models.Index(fields=['cpe'], name='wappalyzer__cpe_30c761_idx'), models.Index(fields=['-created_at'], name='wappalyzer__created_8e6c21_idx')],
},
),
migrations.CreateModel(
name='Wordlist',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('name', models.CharField(help_text='字典名称,唯一', max_length=200, unique=True)),
('description', models.CharField(blank=True, default='', help_text='字典描述', max_length=200)),
('file_path', models.CharField(help_text='后端保存的字典文件绝对路径', max_length=500)),
('file_size', models.BigIntegerField(default=0, help_text='文件大小(字节)')),
('line_count', models.IntegerField(default=0, help_text='字典行数')),
('file_hash', models.CharField(blank=True, default='', help_text='文件 SHA-256 哈希,用于缓存校验', max_length=64)),
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
],
options={
'verbose_name': '字典文件',
'verbose_name_plural': '字典文件',
'db_table': 'wordlist',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['-created_at'], name='wordlist_created_4afb02_idx')],
},
),
migrations.CreateModel(
name='WorkerNode',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(help_text='节点名称', max_length=100)),
('ip_address', models.GenericIPAddressField(help_text='IP 地址(本地节点为 127.0.0.1')),
('ssh_port', models.IntegerField(default=22, help_text='SSH 端口')),
('username', models.CharField(default='root', help_text='SSH 用户名', max_length=50)),
('password', models.CharField(blank=True, default='', help_text='SSH 密码', max_length=200)),
('is_local', models.BooleanField(default=False, help_text='是否为本地节点Docker 容器内)')),
('status', models.CharField(choices=[('pending', '待部署'), ('deploying', '部署中'), ('online', '在线'), ('offline', '离线'), ('updating', '更新中'), ('outdated', '版本过低')], default='pending', help_text='状态: pending/deploying/online/offline', max_length=20)),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
],
options={
'verbose_name': 'Worker 节点',
'db_table': 'worker_node',
'ordering': ['-created_at'],
'constraints': [models.UniqueConstraint(condition=models.Q(('is_local', False)), fields=('ip_address',), name='unique_remote_worker_ip'), models.UniqueConstraint(fields=('name',), name='unique_worker_name')],
},
),
]

View File

@@ -0,0 +1,29 @@
"""Engine Models
导出所有 Engine 模块的 Models
"""
from .engine import WorkerNode, ScanEngine, Wordlist, NucleiTemplateRepo
from .fingerprints import (
EholeFingerprint,
GobyFingerprint,
WappalyzerFingerprint,
FingersFingerprint,
FingerPrintHubFingerprint,
ARLFingerprint,
)
__all__ = [
# 核心 Models
"WorkerNode",
"ScanEngine",
"Wordlist",
"NucleiTemplateRepo",
# 指纹 Models
"EholeFingerprint",
"GobyFingerprint",
"WappalyzerFingerprint",
"FingersFingerprint",
"FingerPrintHubFingerprint",
"ARLFingerprint",
]

View File

@@ -1,3 +1,8 @@
"""Engine 模块核心 Models
包含 WorkerNode, ScanEngine, Wordlist, NucleiTemplateRepo
"""
from django.db import models
@@ -78,6 +83,7 @@ class ScanEngine(models.Model):
indexes = [
models.Index(fields=['-created_at']),
]
def __str__(self):
return str(self.name or f'ScanEngine {self.id}')

View File

@@ -0,0 +1,195 @@
"""指纹相关 Models
包含 EHole、Goby、Wappalyzer 等指纹格式的数据模型
"""
from django.db import models
class GobyFingerprint(models.Model):
"""Goby 格式指纹规则
Goby 使用逻辑表达式和规则数组进行匹配:
- logic: 逻辑表达式,如 "a||b", "(a&&b)||c"
- rule: 规则数组,每条规则包含 label, feature, is_equal
"""
name = models.CharField(max_length=300, unique=True, help_text='产品名称')
logic = models.CharField(max_length=500, help_text='逻辑表达式')
rule = models.JSONField(default=list, help_text='规则数组')
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = 'goby_fingerprint'
verbose_name = 'Goby 指纹'
verbose_name_plural = 'Goby 指纹'
ordering = ['-created_at']
indexes = [
models.Index(fields=['name']),
models.Index(fields=['logic']),
models.Index(fields=['-created_at']),
]
def __str__(self) -> str:
return f"{self.name} ({self.logic})"
class EholeFingerprint(models.Model):
"""EHole 格式指纹规则(字段与 ehole.json 一致)"""
cms = models.CharField(max_length=200, help_text='产品/CMS名称')
method = models.CharField(max_length=200, default='keyword', help_text='匹配方式')
location = models.CharField(max_length=200, default='body', help_text='匹配位置')
keyword = models.JSONField(default=list, help_text='关键词列表')
is_important = models.BooleanField(default=False, help_text='是否重点资产')
type = models.CharField(max_length=100, blank=True, default='-', help_text='分类')
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = 'ehole_fingerprint'
verbose_name = 'EHole 指纹'
verbose_name_plural = 'EHole 指纹'
ordering = ['-created_at']
indexes = [
# 搜索过滤字段索引
models.Index(fields=['cms']),
models.Index(fields=['method']),
models.Index(fields=['location']),
models.Index(fields=['type']),
models.Index(fields=['is_important']),
# 排序字段索引
models.Index(fields=['-created_at']),
]
constraints = [
# 唯一约束cms + method + location 组合不能重复
models.UniqueConstraint(
fields=['cms', 'method', 'location'],
name='unique_ehole_fingerprint'
),
]
def __str__(self) -> str:
return f"{self.cms} ({self.method}@{self.location})"
class WappalyzerFingerprint(models.Model):
"""Wappalyzer 格式指纹规则
Wappalyzer 支持多种检测方式cookies, headers, scriptSrc, js, meta, html 等
"""
name = models.CharField(max_length=300, unique=True, help_text='应用名称')
cats = models.JSONField(default=list, help_text='分类 ID 数组')
cookies = models.JSONField(default=dict, blank=True, help_text='Cookie 检测规则')
headers = models.JSONField(default=dict, blank=True, help_text='HTTP Header 检测规则')
script_src = models.JSONField(default=list, blank=True, help_text='脚本 URL 正则数组')
js = models.JSONField(default=list, blank=True, help_text='JavaScript 变量检测规则')
implies = models.JSONField(default=list, blank=True, help_text='依赖关系数组')
meta = models.JSONField(default=dict, blank=True, help_text='HTML meta 标签检测规则')
html = models.JSONField(default=list, blank=True, help_text='HTML 内容正则数组')
description = models.TextField(blank=True, default='', help_text='应用描述')
website = models.URLField(max_length=500, blank=True, default='', help_text='官网链接')
cpe = models.CharField(max_length=300, blank=True, default='', help_text='CPE 标识符')
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = 'wappalyzer_fingerprint'
verbose_name = 'Wappalyzer 指纹'
verbose_name_plural = 'Wappalyzer 指纹'
ordering = ['-created_at']
indexes = [
models.Index(fields=['name']),
models.Index(fields=['website']),
models.Index(fields=['cpe']),
models.Index(fields=['-created_at']),
]
def __str__(self) -> str:
return f"{self.name}"
class FingersFingerprint(models.Model):
"""Fingers 格式指纹规则 (fingers_http.json)
使用正则表达式和标签进行匹配,支持 favicon hash、header、body 等多种检测方式
"""
name = models.CharField(max_length=300, unique=True, help_text='指纹名称')
link = models.URLField(max_length=500, blank=True, default='', help_text='相关链接')
rule = models.JSONField(default=list, help_text='匹配规则数组')
tag = models.JSONField(default=list, help_text='标签数组')
focus = models.BooleanField(default=False, help_text='是否重点关注')
default_port = models.JSONField(default=list, blank=True, help_text='默认端口数组')
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = 'fingers_fingerprint'
verbose_name = 'Fingers 指纹'
verbose_name_plural = 'Fingers 指纹'
ordering = ['-created_at']
indexes = [
models.Index(fields=['name']),
models.Index(fields=['link']),
models.Index(fields=['focus']),
models.Index(fields=['-created_at']),
]
def __str__(self) -> str:
return f"{self.name}"
class FingerPrintHubFingerprint(models.Model):
"""FingerPrintHub 格式指纹规则 (fingerprinthub_web.json)
基于 nuclei 模板格式,使用 HTTP 请求和响应特征进行匹配
"""
fp_id = models.CharField(max_length=200, unique=True, help_text='指纹ID')
name = models.CharField(max_length=300, help_text='指纹名称')
author = models.CharField(max_length=200, blank=True, default='', help_text='作者')
tags = models.CharField(max_length=500, blank=True, default='', help_text='标签')
severity = models.CharField(max_length=50, blank=True, default='info', help_text='严重程度')
metadata = models.JSONField(default=dict, blank=True, help_text='元数据')
http = models.JSONField(default=list, help_text='HTTP 匹配规则')
source_file = models.CharField(max_length=500, blank=True, default='', help_text='来源文件')
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = 'fingerprinthub_fingerprint'
verbose_name = 'FingerPrintHub 指纹'
verbose_name_plural = 'FingerPrintHub 指纹'
ordering = ['-created_at']
indexes = [
models.Index(fields=['fp_id']),
models.Index(fields=['name']),
models.Index(fields=['author']),
models.Index(fields=['severity']),
models.Index(fields=['-created_at']),
]
def __str__(self) -> str:
return f"{self.name} ({self.fp_id})"
class ARLFingerprint(models.Model):
"""ARL 格式指纹规则 (ARL.yaml)
使用简单的 name + rule 表达式格式
"""
name = models.CharField(max_length=300, unique=True, help_text='指纹名称')
rule = models.TextField(help_text='匹配规则表达式')
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = 'arl_fingerprint'
verbose_name = 'ARL 指纹'
verbose_name_plural = 'ARL 指纹'
ordering = ['-created_at']
indexes = [
models.Index(fields=['name']),
models.Index(fields=['-created_at']),
]
def __str__(self) -> str:
return f"{self.name}"

View File

@@ -88,6 +88,8 @@ def _register_scheduled_jobs(scheduler: BackgroundScheduler):
replace_existing=True,
)
logger.info(" - 已注册: 扫描结果清理(每天 03:00")
# 注意:搜索物化视图刷新已迁移到 pg_ivm 增量维护,无需定时任务
def _trigger_scheduled_scans():

View File

@@ -0,0 +1,20 @@
"""指纹管理 Serializers
导出所有指纹相关的 Serializer 类
"""
from .ehole import EholeFingerprintSerializer
from .goby import GobyFingerprintSerializer
from .wappalyzer import WappalyzerFingerprintSerializer
from .fingers import FingersFingerprintSerializer
from .fingerprinthub import FingerPrintHubFingerprintSerializer
from .arl import ARLFingerprintSerializer
__all__ = [
"EholeFingerprintSerializer",
"GobyFingerprintSerializer",
"WappalyzerFingerprintSerializer",
"FingersFingerprintSerializer",
"FingerPrintHubFingerprintSerializer",
"ARLFingerprintSerializer",
]

View File

@@ -0,0 +1,31 @@
"""ARL 指纹 Serializer"""
from rest_framework import serializers
from apps.engine.models import ARLFingerprint
class ARLFingerprintSerializer(serializers.ModelSerializer):
"""ARL 指纹序列化器
字段映射:
- name: 指纹名称 (必填, 唯一)
- rule: 匹配规则表达式 (必填)
"""
class Meta:
model = ARLFingerprint
fields = ['id', 'name', 'rule', 'created_at']
read_only_fields = ['id', 'created_at']
def validate_name(self, value):
"""校验 name 字段"""
if not value or not value.strip():
raise serializers.ValidationError("name 字段不能为空")
return value.strip()
def validate_rule(self, value):
"""校验 rule 字段"""
if not value or not value.strip():
raise serializers.ValidationError("rule 字段不能为空")
return value.strip()

View File

@@ -0,0 +1,27 @@
"""EHole 指纹 Serializer"""
from rest_framework import serializers
from apps.engine.models import EholeFingerprint
class EholeFingerprintSerializer(serializers.ModelSerializer):
"""EHole 指纹序列化器"""
class Meta:
model = EholeFingerprint
fields = ['id', 'cms', 'method', 'location', 'keyword',
'is_important', 'type', 'created_at']
read_only_fields = ['id', 'created_at']
def validate_cms(self, value):
"""校验 cms 字段"""
if not value or not value.strip():
raise serializers.ValidationError("cms 字段不能为空")
return value.strip()
def validate_keyword(self, value):
"""校验 keyword 字段"""
if not isinstance(value, list):
raise serializers.ValidationError("keyword 必须是数组")
return value

View File

@@ -0,0 +1,50 @@
"""FingerPrintHub 指纹 Serializer"""
from rest_framework import serializers
from apps.engine.models import FingerPrintHubFingerprint
class FingerPrintHubFingerprintSerializer(serializers.ModelSerializer):
"""FingerPrintHub 指纹序列化器
字段映射:
- fp_id: 指纹ID (必填, 唯一)
- name: 指纹名称 (必填)
- author: 作者 (可选)
- tags: 标签字符串 (可选)
- severity: 严重程度 (可选, 默认 'info')
- metadata: 元数据 JSON (可选)
- http: HTTP 匹配规则数组 (必填)
- source_file: 来源文件 (可选)
"""
class Meta:
model = FingerPrintHubFingerprint
fields = ['id', 'fp_id', 'name', 'author', 'tags', 'severity',
'metadata', 'http', 'source_file', 'created_at']
read_only_fields = ['id', 'created_at']
def validate_fp_id(self, value):
"""校验 fp_id 字段"""
if not value or not value.strip():
raise serializers.ValidationError("fp_id 字段不能为空")
return value.strip()
def validate_name(self, value):
"""校验 name 字段"""
if not value or not value.strip():
raise serializers.ValidationError("name 字段不能为空")
return value.strip()
def validate_http(self, value):
"""校验 http 字段"""
if not isinstance(value, list):
raise serializers.ValidationError("http 必须是数组")
return value
def validate_metadata(self, value):
"""校验 metadata 字段"""
if not isinstance(value, dict):
raise serializers.ValidationError("metadata 必须是对象")
return value

View File

@@ -0,0 +1,48 @@
"""Fingers 指纹 Serializer"""
from rest_framework import serializers
from apps.engine.models import FingersFingerprint
class FingersFingerprintSerializer(serializers.ModelSerializer):
"""Fingers 指纹序列化器
字段映射:
- name: 指纹名称 (必填, 唯一)
- link: 相关链接 (可选)
- rule: 匹配规则数组 (必填)
- tag: 标签数组 (可选)
- focus: 是否重点关注 (可选, 默认 False)
- default_port: 默认端口数组 (可选)
"""
class Meta:
model = FingersFingerprint
fields = ['id', 'name', 'link', 'rule', 'tag', 'focus',
'default_port', 'created_at']
read_only_fields = ['id', 'created_at']
def validate_name(self, value):
"""校验 name 字段"""
if not value or not value.strip():
raise serializers.ValidationError("name 字段不能为空")
return value.strip()
def validate_rule(self, value):
"""校验 rule 字段"""
if not isinstance(value, list):
raise serializers.ValidationError("rule 必须是数组")
return value
def validate_tag(self, value):
"""校验 tag 字段"""
if not isinstance(value, list):
raise serializers.ValidationError("tag 必须是数组")
return value
def validate_default_port(self, value):
"""校验 default_port 字段"""
if not isinstance(value, list):
raise serializers.ValidationError("default_port 必须是数组")
return value

View File

@@ -0,0 +1,26 @@
"""Goby 指纹 Serializer"""
from rest_framework import serializers
from apps.engine.models import GobyFingerprint
class GobyFingerprintSerializer(serializers.ModelSerializer):
"""Goby 指纹序列化器"""
class Meta:
model = GobyFingerprint
fields = ['id', 'name', 'logic', 'rule', 'created_at']
read_only_fields = ['id', 'created_at']
def validate_name(self, value):
"""校验 name 字段"""
if not value or not value.strip():
raise serializers.ValidationError("name 字段不能为空")
return value.strip()
def validate_rule(self, value):
"""校验 rule 字段"""
if not isinstance(value, list):
raise serializers.ValidationError("rule 必须是数组")
return value

View File

@@ -0,0 +1,24 @@
"""Wappalyzer 指纹 Serializer"""
from rest_framework import serializers
from apps.engine.models import WappalyzerFingerprint
class WappalyzerFingerprintSerializer(serializers.ModelSerializer):
"""Wappalyzer 指纹序列化器"""
class Meta:
model = WappalyzerFingerprint
fields = [
'id', 'name', 'cats', 'cookies', 'headers', 'script_src',
'js', 'implies', 'meta', 'html', 'description', 'website',
'cpe', 'created_at'
]
read_only_fields = ['id', 'created_at']
def validate_name(self, value):
"""校验 name 字段"""
if not value or not value.strip():
raise serializers.ValidationError("name 字段不能为空")
return value.strip()

View File

@@ -66,6 +66,7 @@ def get_start_agent_script(
# 替换变量
script = script.replace("{{HEARTBEAT_API_URL}}", heartbeat_api_url or '')
script = script.replace("{{WORKER_ID}}", str(worker_id) if worker_id else '')
script = script.replace("{{WORKER_API_KEY}}", getattr(settings, 'WORKER_API_KEY', ''))
# 注入镜像版本配置(确保远程节点使用相同版本)
docker_user = getattr(settings, 'DOCKER_USER', 'yyhuni')

View File

@@ -0,0 +1,22 @@
"""指纹管理 Services
导出所有指纹相关的 Service 类
"""
from .base import BaseFingerprintService
from .ehole import EholeFingerprintService
from .goby import GobyFingerprintService
from .wappalyzer import WappalyzerFingerprintService
from .fingers_service import FingersFingerprintService
from .fingerprinthub_service import FingerPrintHubFingerprintService
from .arl_service import ARLFingerprintService
__all__ = [
"BaseFingerprintService",
"EholeFingerprintService",
"GobyFingerprintService",
"WappalyzerFingerprintService",
"FingersFingerprintService",
"FingerPrintHubFingerprintService",
"ARLFingerprintService",
]

View File

@@ -0,0 +1,110 @@
"""ARL 指纹管理 Service
实现 ARL 格式指纹的校验、转换和导出逻辑
支持 YAML 格式的导入导出
"""
import logging
import yaml
from apps.engine.models import ARLFingerprint
from .base import BaseFingerprintService
logger = logging.getLogger(__name__)
class ARLFingerprintService(BaseFingerprintService):
"""ARL 指纹管理服务(继承基类,实现 ARL 特定逻辑)"""
model = ARLFingerprint
def validate_fingerprint(self, item: dict) -> bool:
"""
校验单条 ARL 指纹
校验规则:
- name 字段必须存在且非空
- rule 字段必须存在且非空
Args:
item: 单条指纹数据
Returns:
bool: 是否有效
"""
name = item.get('name', '')
rule = item.get('rule', '')
return bool(name and str(name).strip()) and bool(rule and str(rule).strip())
def to_model_data(self, item: dict) -> dict:
"""
转换 ARL YAML 格式为 Model 字段
Args:
item: 原始 ARL YAML 数据
Returns:
dict: Model 字段数据
"""
return {
'name': str(item.get('name', '')).strip(),
'rule': str(item.get('rule', '')).strip(),
}
def get_export_data(self) -> list:
"""
获取导出数据ARL 格式 - 数组,用于 YAML 导出)
Returns:
list: ARL 格式的数据(数组格式)
[
{"name": "...", "rule": "..."},
...
]
"""
fingerprints = self.model.objects.all()
return [
{
'name': fp.name,
'rule': fp.rule,
}
for fp in fingerprints
]
def export_to_yaml(self, output_path: str) -> int:
"""
导出所有指纹到 YAML 文件
Args:
output_path: 输出文件路径
Returns:
int: 导出的指纹数量
"""
data = self.get_export_data()
with open(output_path, 'w', encoding='utf-8') as f:
yaml.dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
count = len(data)
logger.info("导出 ARL 指纹文件: %s, 数量: %d", output_path, count)
return count
def parse_yaml_import(self, yaml_content: str) -> list:
"""
解析 YAML 格式的导入内容
Args:
yaml_content: YAML 格式的字符串内容
Returns:
list: 解析后的指纹数据列表
Raises:
ValueError: 当 YAML 格式无效时
"""
try:
data = yaml.safe_load(yaml_content)
if not isinstance(data, list):
raise ValueError("ARL YAML 文件必须是数组格式")
return data
except yaml.YAMLError as e:
raise ValueError(f"无效的 YAML 格式: {e}")

View File

@@ -0,0 +1,144 @@
"""指纹管理基类 Service
提供通用的批量操作和缓存逻辑,供 EHole/Goby/Wappalyzer 等子类继承
"""
import json
import logging
from typing import Any
logger = logging.getLogger(__name__)
class BaseFingerprintService:
"""指纹管理基类 Service提供通用的批量操作和缓存逻辑"""
model = None # 子类必须指定
BATCH_SIZE = 1000 # 每批处理数量
def validate_fingerprint(self, item: dict) -> bool:
"""
校验单条指纹,子类必须实现
Args:
item: 单条指纹数据
Returns:
bool: 是否有效
"""
raise NotImplementedError("子类必须实现 validate_fingerprint 方法")
def validate_fingerprints(self, raw_data: list) -> tuple[list, list]:
"""
批量校验指纹数据
Args:
raw_data: 原始指纹数据列表
Returns:
tuple: (valid_items, invalid_items)
"""
valid, invalid = [], []
for item in raw_data:
if self.validate_fingerprint(item):
valid.append(item)
else:
invalid.append(item)
return valid, invalid
def to_model_data(self, item: dict) -> dict:
"""
转换为 Model 字段,子类必须实现
Args:
item: 原始指纹数据
Returns:
dict: Model 字段数据
"""
raise NotImplementedError("子类必须实现 to_model_data 方法")
def bulk_create(self, fingerprints: list) -> int:
"""
批量创建指纹记录(已校验的数据)
Args:
fingerprints: 已校验的指纹数据列表
Returns:
int: 成功创建数量
"""
if not fingerprints:
return 0
objects = [self.model(**self.to_model_data(item)) for item in fingerprints]
created = self.model.objects.bulk_create(objects, ignore_conflicts=True)
return len(created)
def batch_create_fingerprints(self, raw_data: list) -> dict:
"""
完整流程:分批校验 + 批量创建
Args:
raw_data: 原始指纹数据列表
Returns:
dict: {'created': int, 'failed': int}
"""
total_created = 0
total_failed = 0
for i in range(0, len(raw_data), self.BATCH_SIZE):
batch = raw_data[i:i + self.BATCH_SIZE]
valid, invalid = self.validate_fingerprints(batch)
total_created += self.bulk_create(valid)
total_failed += len(invalid)
logger.info(
"批量创建指纹完成: created=%d, failed=%d, total=%d",
total_created, total_failed, len(raw_data)
)
return {'created': total_created, 'failed': total_failed}
def get_export_data(self) -> dict:
"""
获取导出数据,子类必须实现
Returns:
dict: 导出的 JSON 数据
"""
raise NotImplementedError("子类必须实现 get_export_data 方法")
def export_to_file(self, output_path: str) -> int:
"""
导出所有指纹到 JSON 文件
Args:
output_path: 输出文件路径
Returns:
int: 导出的指纹数量
"""
data = self.get_export_data()
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False)
count = len(data.get('fingerprint', []))
logger.info("导出指纹文件: %s, 数量: %d", output_path, count)
return count
def get_fingerprint_version(self) -> str:
"""
获取指纹库版本标识(用于缓存校验)
Returns:
str: 版本标识,格式 "{count}_{latest_timestamp}"
版本变化场景:
- 新增记录 → count 变化
- 删除记录 → count 变化
- 清空全部 → count 变为 0
"""
count = self.model.objects.count()
latest = self.model.objects.order_by('-created_at').first()
latest_ts = int(latest.created_at.timestamp()) if latest else 0
return f"{count}_{latest_ts}"

View File

@@ -0,0 +1,84 @@
"""EHole 指纹管理 Service
实现 EHole 格式指纹的校验、转换和导出逻辑
"""
from apps.engine.models import EholeFingerprint
from .base import BaseFingerprintService
class EholeFingerprintService(BaseFingerprintService):
"""EHole 指纹管理服务(继承基类,实现 EHole 特定逻辑)"""
model = EholeFingerprint
def validate_fingerprint(self, item: dict) -> bool:
"""
校验单条 EHole 指纹
校验规则:
- cms 字段必须存在且非空
- keyword 字段必须是数组
Args:
item: 单条指纹数据
Returns:
bool: 是否有效
"""
cms = item.get('cms', '')
keyword = item.get('keyword')
return bool(cms and str(cms).strip()) and isinstance(keyword, list)
def to_model_data(self, item: dict) -> dict:
"""
转换 EHole JSON 格式为 Model 字段
字段映射:
- isImportant (JSON) → is_important (Model)
Args:
item: 原始 EHole JSON 数据
Returns:
dict: Model 字段数据
"""
return {
'cms': str(item.get('cms', '')).strip(),
'method': item.get('method', 'keyword'),
'location': item.get('location', 'body'),
'keyword': item.get('keyword', []),
'is_important': item.get('isImportant', False),
'type': item.get('type', '-'),
}
def get_export_data(self) -> dict:
"""
获取导出数据EHole JSON 格式)
Returns:
dict: EHole 格式的 JSON 数据
{
"fingerprint": [
{"cms": "...", "method": "...", "location": "...",
"keyword": [...], "isImportant": false, "type": "..."},
...
],
"version": "1000_1703836800"
}
"""
fingerprints = self.model.objects.all()
data = []
for fp in fingerprints:
data.append({
'cms': fp.cms,
'method': fp.method,
'location': fp.location,
'keyword': fp.keyword,
'isImportant': fp.is_important, # 转回 JSON 格式
'type': fp.type,
})
return {
'fingerprint': data,
'version': self.get_fingerprint_version(),
}

View File

@@ -0,0 +1,110 @@
"""FingerPrintHub 指纹管理 Service
实现 FingerPrintHub 格式指纹的校验、转换和导出逻辑
"""
from apps.engine.models import FingerPrintHubFingerprint
from .base import BaseFingerprintService
class FingerPrintHubFingerprintService(BaseFingerprintService):
"""FingerPrintHub 指纹管理服务(继承基类,实现 FingerPrintHub 特定逻辑)"""
model = FingerPrintHubFingerprint
def validate_fingerprint(self, item: dict) -> bool:
"""
校验单条 FingerPrintHub 指纹
校验规则:
- id 字段必须存在且非空
- info 字段必须存在且包含 name
- http 字段必须是数组
Args:
item: 单条指纹数据
Returns:
bool: 是否有效
"""
fp_id = item.get('id', '')
info = item.get('info', {})
http = item.get('http')
if not fp_id or not str(fp_id).strip():
return False
if not isinstance(info, dict) or not info.get('name'):
return False
if not isinstance(http, list):
return False
return True
def to_model_data(self, item: dict) -> dict:
"""
转换 FingerPrintHub JSON 格式为 Model 字段
字段映射(嵌套结构转扁平):
- id (JSON) → fp_id (Model)
- info.name (JSON) → name (Model)
- info.author (JSON) → author (Model)
- info.tags (JSON) → tags (Model)
- info.severity (JSON) → severity (Model)
- info.metadata (JSON) → metadata (Model)
- http (JSON) → http (Model)
- _source_file (JSON) → source_file (Model)
Args:
item: 原始 FingerPrintHub JSON 数据
Returns:
dict: Model 字段数据
"""
info = item.get('info', {})
return {
'fp_id': str(item.get('id', '')).strip(),
'name': str(info.get('name', '')).strip(),
'author': info.get('author', ''),
'tags': info.get('tags', ''),
'severity': info.get('severity', 'info'),
'metadata': info.get('metadata', {}),
'http': item.get('http', []),
'source_file': item.get('_source_file', ''),
}
def get_export_data(self) -> list:
"""
获取导出数据FingerPrintHub JSON 格式 - 数组)
Returns:
list: FingerPrintHub 格式的 JSON 数据(数组格式)
[
{
"id": "...",
"info": {"name": "...", "author": "...", "tags": "...",
"severity": "...", "metadata": {...}},
"http": [...],
"_source_file": "..."
},
...
]
"""
fingerprints = self.model.objects.all()
data = []
for fp in fingerprints:
item = {
'id': fp.fp_id,
'info': {
'name': fp.name,
'author': fp.author,
'tags': fp.tags,
'severity': fp.severity,
'metadata': fp.metadata,
},
'http': fp.http,
}
# 只有当 source_file 非空时才添加该字段
if fp.source_file:
item['_source_file'] = fp.source_file
data.append(item)
return data

View File

@@ -0,0 +1,83 @@
"""Fingers 指纹管理 Service
实现 Fingers 格式指纹的校验、转换和导出逻辑
"""
from apps.engine.models import FingersFingerprint
from .base import BaseFingerprintService
class FingersFingerprintService(BaseFingerprintService):
"""Fingers 指纹管理服务(继承基类,实现 Fingers 特定逻辑)"""
model = FingersFingerprint
def validate_fingerprint(self, item: dict) -> bool:
"""
校验单条 Fingers 指纹
校验规则:
- name 字段必须存在且非空
- rule 字段必须是数组
Args:
item: 单条指纹数据
Returns:
bool: 是否有效
"""
name = item.get('name', '')
rule = item.get('rule')
return bool(name and str(name).strip()) and isinstance(rule, list)
def to_model_data(self, item: dict) -> dict:
"""
转换 Fingers JSON 格式为 Model 字段
字段映射:
- default_port (JSON) → default_port (Model)
Args:
item: 原始 Fingers JSON 数据
Returns:
dict: Model 字段数据
"""
return {
'name': str(item.get('name', '')).strip(),
'link': item.get('link', ''),
'rule': item.get('rule', []),
'tag': item.get('tag', []),
'focus': item.get('focus', False),
'default_port': item.get('default_port', []),
}
def get_export_data(self) -> list:
"""
获取导出数据Fingers JSON 格式 - 数组)
Returns:
list: Fingers 格式的 JSON 数据(数组格式)
[
{"name": "...", "link": "...", "rule": [...], "tag": [...],
"focus": false, "default_port": [...]},
...
]
"""
fingerprints = self.model.objects.all()
data = []
for fp in fingerprints:
item = {
'name': fp.name,
'link': fp.link,
'rule': fp.rule,
'tag': fp.tag,
}
# 只有当 focus 为 True 时才添加该字段(保持与原始格式一致)
if fp.focus:
item['focus'] = fp.focus
# 只有当 default_port 非空时才添加该字段
if fp.default_port:
item['default_port'] = fp.default_port
data.append(item)
return data

View File

@@ -0,0 +1,87 @@
"""Goby 指纹管理 Service
实现 Goby 格式指纹的校验、转换和导出逻辑
"""
from apps.engine.models import GobyFingerprint
from .base import BaseFingerprintService
class GobyFingerprintService(BaseFingerprintService):
"""Goby 指纹管理服务(继承基类,实现 Goby 特定逻辑)"""
model = GobyFingerprint
def validate_fingerprint(self, item: dict) -> bool:
"""
校验单条 Goby 指纹
支持两种格式:
1. 标准格式: {"name": "...", "logic": "...", "rule": [...]}
2. JSONL 格式: {"product": "...", "rule": "..."}
Args:
item: 单条指纹数据
Returns:
bool: 是否有效
"""
# 标准格式name + logic + rule(数组)
name = item.get('name', '')
if name and item.get('logic') is not None and isinstance(item.get('rule'), list):
return bool(str(name).strip())
# JSONL 格式product + rule(字符串)
product = item.get('product', '')
rule = item.get('rule', '')
return bool(product and str(product).strip() and rule and str(rule).strip())
def to_model_data(self, item: dict) -> dict:
"""
转换 Goby JSON 格式为 Model 字段
支持两种输入格式:
1. 标准格式: {"name": "...", "logic": "...", "rule": [...]}
2. JSONL 格式: {"product": "...", "rule": "..."}
Args:
item: 原始 Goby JSON 数据
Returns:
dict: Model 字段数据
"""
# 标准格式
if 'name' in item and isinstance(item.get('rule'), list):
return {
'name': str(item.get('name', '')).strip(),
'logic': item.get('logic', ''),
'rule': item.get('rule', []),
}
# JSONL 格式:将 rule 字符串转为单元素数组
return {
'name': str(item.get('product', '')).strip(),
'logic': 'or', # JSONL 格式默认 or 逻辑
'rule': [item.get('rule', '')] if item.get('rule') else [],
}
def get_export_data(self) -> list:
"""
获取导出数据Goby JSON 格式 - 数组)
Returns:
list: Goby 格式的 JSON 数据(数组格式)
[
{"name": "...", "logic": "...", "rule": [...]},
...
]
"""
fingerprints = self.model.objects.all()
return [
{
'name': fp.name,
'logic': fp.logic,
'rule': fp.rule,
}
for fp in fingerprints
]

View File

@@ -0,0 +1,99 @@
"""Wappalyzer 指纹管理 Service
实现 Wappalyzer 格式指纹的校验、转换和导出逻辑
"""
from apps.engine.models import WappalyzerFingerprint
from .base import BaseFingerprintService
class WappalyzerFingerprintService(BaseFingerprintService):
"""Wappalyzer 指纹管理服务(继承基类,实现 Wappalyzer 特定逻辑)"""
model = WappalyzerFingerprint
def validate_fingerprint(self, item: dict) -> bool:
"""
校验单条 Wappalyzer 指纹
校验规则:
- name 字段必须存在且非空(从 apps 对象的 key 传入)
Args:
item: 单条指纹数据
Returns:
bool: 是否有效
"""
name = item.get('name', '')
return bool(name and str(name).strip())
def to_model_data(self, item: dict) -> dict:
"""
转换 Wappalyzer JSON 格式为 Model 字段
字段映射:
- scriptSrc (JSON) → script_src (Model)
Args:
item: 原始 Wappalyzer JSON 数据
Returns:
dict: Model 字段数据
"""
return {
'name': str(item.get('name', '')).strip(),
'cats': item.get('cats', []),
'cookies': item.get('cookies', {}),
'headers': item.get('headers', {}),
'script_src': item.get('scriptSrc', []), # JSON: scriptSrc -> Model: script_src
'js': item.get('js', []),
'implies': item.get('implies', []),
'meta': item.get('meta', {}),
'html': item.get('html', []),
'description': item.get('description', ''),
'website': item.get('website', ''),
'cpe': item.get('cpe', ''),
}
def get_export_data(self) -> dict:
"""
获取导出数据Wappalyzer JSON 格式)
Returns:
dict: Wappalyzer 格式的 JSON 数据
{
"apps": {
"AppName": {"cats": [...], "cookies": {...}, ...},
...
}
}
"""
fingerprints = self.model.objects.all()
apps = {}
for fp in fingerprints:
app_data = {}
if fp.cats:
app_data['cats'] = fp.cats
if fp.cookies:
app_data['cookies'] = fp.cookies
if fp.headers:
app_data['headers'] = fp.headers
if fp.script_src:
app_data['scriptSrc'] = fp.script_src # Model: script_src -> JSON: scriptSrc
if fp.js:
app_data['js'] = fp.js
if fp.implies:
app_data['implies'] = fp.implies
if fp.meta:
app_data['meta'] = fp.meta
if fp.html:
app_data['html'] = fp.html
if fp.description:
app_data['description'] = fp.description
if fp.website:
app_data['website'] = fp.website
if fp.cpe:
app_data['cpe'] = fp.cpe
apps[fp.name] = app_data
return {'apps': apps}

View File

@@ -196,6 +196,9 @@ class NucleiTemplateRepoService:
cmd: List[str]
action: str
# 直接使用原始 URL不再使用 Git 加速)
repo_url = obj.repo_url
# 判断是 clone 还是 pull
if git_dir.is_dir():
# 检查远程地址是否变化
@@ -208,12 +211,13 @@ class NucleiTemplateRepoService:
)
current_url = current_remote.stdout.strip() if current_remote.returncode == 0 else ""
if current_url != obj.repo_url:
# 检查是否需要重新 clone
if current_url != repo_url:
# 远程地址变化,删除旧目录重新 clone
logger.info("nuclei 模板仓库 %s 远程地址变化,重新 clone: %s -> %s", obj.id, current_url, obj.repo_url)
logger.info("nuclei 模板仓库 %s 远程地址变化,重新 clone: %s -> %s", obj.id, current_url, repo_url)
shutil.rmtree(local_path)
local_path.mkdir(parents=True, exist_ok=True)
cmd = ["git", "clone", "--depth", "1", obj.repo_url, str(local_path)]
cmd = ["git", "clone", "--depth", "1", repo_url, str(local_path)]
action = "clone"
else:
# 已有仓库且地址未变,执行 pull
@@ -224,7 +228,7 @@ class NucleiTemplateRepoService:
if local_path.exists() and not local_path.is_dir():
raise RuntimeError(f"本地路径已存在且不是目录: {local_path}")
# --depth 1 浅克隆,只获取最新提交,节省空间和时间
cmd = ["git", "clone", "--depth", "1", obj.repo_url, str(local_path)]
cmd = ["git", "clone", "--depth", "1", repo_url, str(local_path)]
action = "clone"
# 执行 Git 命令

View File

@@ -76,8 +76,8 @@ class TaskDistributor:
self.docker_image = settings.TASK_EXECUTOR_IMAGE
if not self.docker_image:
raise ValueError("TASK_EXECUTOR_IMAGE 未配置,请确保 IMAGE_TAG 环境变量已设置")
self.results_mount = getattr(settings, 'CONTAINER_RESULTS_MOUNT', '/app/backend/results')
self.logs_mount = getattr(settings, 'CONTAINER_LOGS_MOUNT', '/app/backend/logs')
# 统一使用 /opt/xingrin 下的路径
self.logs_mount = "/opt/xingrin/logs"
self.submit_interval = getattr(settings, 'TASK_SUBMIT_INTERVAL', 5)
def get_online_workers(self) -> list[WorkerNode]:
@@ -153,30 +153,68 @@ class TaskDistributor:
else:
scored_workers.append((worker, score, cpu, mem))
# 降级策略:如果没有正常负载的,等待后重新选择
# 降级策略:如果没有正常负载的,循环等待后重新检测
if not scored_workers:
if high_load_workers:
# 高负载时先等待,给系统喘息时间(默认 60 秒)
# 高负载等待参数(默认 60 秒检测一次,最多 10 次
high_load_wait = getattr(settings, 'HIGH_LOAD_WAIT_SECONDS', 60)
logger.warning("所有 Worker 高负载,等待 %d 秒后重试...", high_load_wait)
time.sleep(high_load_wait)
high_load_max_retries = getattr(settings, 'HIGH_LOAD_MAX_RETRIES', 10)
# 重新选择(递归调用,可能负载已降下来)
# 为避免无限递归,这里直接使用高负载中最低的
# 开始等待前发送高负载通知
high_load_workers.sort(key=lambda x: x[1])
best_worker, _, cpu, mem = high_load_workers[0]
# 发送高负载通知
_, _, first_cpu, first_mem = high_load_workers[0]
from apps.common.signals import all_workers_high_load
all_workers_high_load.send(
sender=self.__class__,
worker_name=best_worker.name,
cpu=cpu,
mem=mem
worker_name="所有节点",
cpu=first_cpu,
mem=first_mem
)
logger.info("选择 Worker: %s (CPU: %.1f%%, MEM: %.1f%%)", best_worker.name, cpu, mem)
return best_worker
for retry in range(high_load_max_retries):
logger.warning(
"所有 Worker 高负载,等待 %d 秒后重试... (%d/%d)",
high_load_wait, retry + 1, high_load_max_retries
)
time.sleep(high_load_wait)
# 重新获取负载数据
loads = worker_load_service.get_all_loads(worker_ids)
# 重新评估
scored_workers = []
high_load_workers = []
for worker in workers:
load = loads.get(worker.id)
if not load:
continue
cpu = load.get('cpu', 0)
mem = load.get('mem', 0)
score = cpu * 0.7 + mem * 0.3
if cpu > 85 or mem > 85:
high_load_workers.append((worker, score, cpu, mem))
else:
scored_workers.append((worker, score, cpu, mem))
# 如果有正常负载的 Worker跳出循环
if scored_workers:
logger.info("检测到正常负载 Worker结束等待")
break
# 超时或仍然高负载,选择负载最低的
if not scored_workers and high_load_workers:
high_load_workers.sort(key=lambda x: x[1])
best_worker, _, cpu, mem = high_load_workers[0]
logger.warning(
"等待超时,强制分发到高负载 Worker: %s (CPU: %.1f%%, MEM: %.1f%%)",
best_worker.name, cpu, mem
)
return best_worker
return best_worker
else:
logger.warning("没有可用的 Worker")
return None
@@ -234,11 +272,10 @@ class TaskDistributor:
else:
# 远程:通过 Nginx 反向代理访问HTTPS不直连 8888 端口)
network_arg = ""
server_url = f"https://{settings.PUBLIC_HOST}"
server_url = f"https://{settings.PUBLIC_HOST}:{settings.PUBLIC_PORT}"
# 挂载路径(所有节点统一使用固定路径
host_results_dir = settings.HOST_RESULTS_DIR # /opt/xingrin/results
host_logs_dir = settings.HOST_LOGS_DIR # /opt/xingrin/logs
# 挂载路径(统一挂载 /opt/xingrin扫描工具在 /opt/xingrin-tools/bin 不受影响
host_xingrin_dir = "/opt/xingrin"
# 环境变量SERVER_URL + IS_LOCAL其他配置容器启动时从配置中心获取
# IS_LOCAL 用于 Worker 向配置中心声明身份,决定返回的数据库地址
@@ -247,19 +284,17 @@ class TaskDistributor:
env_vars = [
f"-e SERVER_URL={shlex.quote(server_url)}",
f"-e IS_LOCAL={is_local_str}",
f"-e WORKER_API_KEY={shlex.quote(settings.WORKER_API_KEY)}", # Worker API 认证密钥
"-e PREFECT_HOME=/tmp/.prefect", # 设置 Prefect 数据目录到可写位置
"-e PREFECT_SERVER_EPHEMERAL_ENABLED=true", # 启用 ephemeral server本地临时服务器
"-e PREFECT_SERVER_EPHEMERAL_STARTUP_TIMEOUT_SECONDS=120", # 增加启动超时时间
"-e PREFECT_SERVER_DATABASE_CONNECTION_URL=sqlite+aiosqlite:////tmp/.prefect/prefect.db", # 使用 /tmp 下的 SQLite
"-e PREFECT_LOGGING_LEVEL=DEBUG", # 启用 DEBUG 级别日志
"-e PREFECT_LOGGING_SERVER_LEVEL=DEBUG", # Server 日志级别
"-e PREFECT_DEBUG_MODE=true", # 启用调试模式
"-e PREFECT_LOGGING_LEVEL=WARNING", # 日志级别(减少 DEBUG 噪音)
]
# 挂载卷
# 挂载卷(统一挂载整个 /opt/xingrin 目录)
volumes = [
f"-v {host_results_dir}:{self.results_mount}",
f"-v {host_logs_dir}:{self.logs_mount}",
f"-v {host_xingrin_dir}:{host_xingrin_dir}",
]
# 构建命令行参数
@@ -277,11 +312,10 @@ class TaskDistributor:
# - 本地 Workerinstall.sh 已预拉取镜像,直接使用本地版本
# - 远程 Workerdeploy 时已预拉取镜像,直接使用本地版本
# - 避免每次任务都检查 Docker Hub提升性能和稳定性
# 使用双引号包裹 sh -c 命令,内部 shlex.quote 生成的单引号参数可正确解析
cmd = f'''docker run --rm -d --pull=missing {network_arg} \
{' '.join(env_vars)} \
{' '.join(volumes)} \
{self.docker_image} \
cmd = f'''docker run --rm -d --pull=missing {network_arg} \\
{' '.join(env_vars)} \\
{' '.join(volumes)} \\
{self.docker_image} \\
sh -c "{inner_cmd}"'''
return cmd
@@ -520,7 +554,7 @@ class TaskDistributor:
try:
# 构建 docker run 命令(清理过期扫描结果目录)
script_args = {
'results_dir': '/app/backend/results',
'results_dir': '/opt/xingrin/results',
'retention_days': retention_days,
}

Some files were not shown because too many files have changed in this diff Show More