mirror of
https://github.com/yyhuni/xingrin.git
synced 2026-02-01 12:13:12 +08:00
Compare commits
175 Commits
v1.2.1-dev
...
v1.5.10-de
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2d2ec93626 | ||
|
|
ced9f811f4 | ||
|
|
aa99b26f50 | ||
|
|
8342f196db | ||
|
|
1bd2a6ed88 | ||
|
|
033ff89aee | ||
|
|
4284a0cd9a | ||
|
|
943a4cb960 | ||
|
|
eb2d853b76 | ||
|
|
1184c18b74 | ||
|
|
8a6f1b6f24 | ||
|
|
255d505aba | ||
|
|
d06a9bab1f | ||
|
|
6d5c776bf7 | ||
|
|
bf058dd67b | ||
|
|
0532d7c8b8 | ||
|
|
2ee9b5ffa2 | ||
|
|
648a1888d4 | ||
|
|
2508268a45 | ||
|
|
c60383940c | ||
|
|
47298c294a | ||
|
|
eba394e14e | ||
|
|
592a1958c4 | ||
|
|
38e2856c08 | ||
|
|
f5ad8e68e9 | ||
|
|
d5f91a236c | ||
|
|
24ae8b5aeb | ||
|
|
86f43f94a0 | ||
|
|
53ba03d1e5 | ||
|
|
89c44ebd05 | ||
|
|
e0e3419edb | ||
|
|
52ee4684a7 | ||
|
|
ce8cebf11d | ||
|
|
ec006d8f54 | ||
|
|
48976a570f | ||
|
|
5da7229873 | ||
|
|
8bb737a9fa | ||
|
|
2d018d33f3 | ||
|
|
0c07cc8497 | ||
|
|
225b039985 | ||
|
|
d1624627bc | ||
|
|
7bb15e4ae4 | ||
|
|
8e8cc29669 | ||
|
|
d6d5338acb | ||
|
|
c521bdb511 | ||
|
|
abf2d95f6f | ||
|
|
ab58cf0d85 | ||
|
|
fb0111adf2 | ||
|
|
161ee9a2b1 | ||
|
|
0cf75585d5 | ||
|
|
1d8d5f51d9 | ||
|
|
3f8de07c8c | ||
|
|
cd5c2b9f11 | ||
|
|
54786c22dd | ||
|
|
d468f975ab | ||
|
|
a85a12b8ad | ||
|
|
a8b0d97b7b | ||
|
|
b8504921c2 | ||
|
|
ecfc1822fb | ||
|
|
81633642e6 | ||
|
|
d1ec9b7f27 | ||
|
|
2a3d9b4446 | ||
|
|
9b63203b5a | ||
|
|
6ff86e14ec | ||
|
|
4c1282e9bb | ||
|
|
ba3a9b709d | ||
|
|
283b28b46a | ||
|
|
1269e5a314 | ||
|
|
802e967906 | ||
|
|
e446326416 | ||
|
|
e0abb3ce7b | ||
|
|
d418baaf79 | ||
|
|
f8da408580 | ||
|
|
7cd4354d8f | ||
|
|
6bf35a760f | ||
|
|
be9ecadffb | ||
|
|
adb53c9f85 | ||
|
|
7b7bbed634 | ||
|
|
8dd3f0536e | ||
|
|
8a8062a12d | ||
|
|
55908a2da5 | ||
|
|
22a7d4f091 | ||
|
|
f287f18134 | ||
|
|
de27230b7a | ||
|
|
15a6295189 | ||
|
|
674acdac66 | ||
|
|
c59152bedf | ||
|
|
b4037202dc | ||
|
|
4b4f9862bf | ||
|
|
1c42e4978f | ||
|
|
57bab63997 | ||
|
|
b1f0f18ac0 | ||
|
|
ccee5471b8 | ||
|
|
0ccd362535 | ||
|
|
7f2af7f7e2 | ||
|
|
4bd0f9e8c1 | ||
|
|
68cc996e3b | ||
|
|
f1e79d638e | ||
|
|
d484133e4c | ||
|
|
fc977ae029 | ||
|
|
f328474404 | ||
|
|
68e726a066 | ||
|
|
77a6f45909 | ||
|
|
49d1f1f1bb | ||
|
|
db8ecb1644 | ||
|
|
18cc016268 | ||
|
|
23bc463283 | ||
|
|
7b903b91b2 | ||
|
|
b3136d51b9 | ||
|
|
08372588a4 | ||
|
|
236c828041 | ||
|
|
fb13bb74d8 | ||
|
|
f076c682b6 | ||
|
|
9eda2caceb | ||
|
|
b1c9e202dd | ||
|
|
918669bc29 | ||
|
|
fd70b0544d | ||
|
|
0f2df7a5f3 | ||
|
|
857ab737b5 | ||
|
|
ee2d99edda | ||
|
|
db6ce16aca | ||
|
|
ab800eca06 | ||
|
|
e8e5572339 | ||
|
|
d48d4bbcad | ||
|
|
d1cca4c083 | ||
|
|
df0810c863 | ||
|
|
d33e54c440 | ||
|
|
35a306fe8b | ||
|
|
724df82931 | ||
|
|
8dfffdf802 | ||
|
|
b8cb85ce0b | ||
|
|
da96d437a4 | ||
|
|
feaf8062e5 | ||
|
|
4bab76f233 | ||
|
|
09416b4615 | ||
|
|
bc1c5f6b0e | ||
|
|
2f2742e6fe | ||
|
|
be3c346a74 | ||
|
|
0c7a6fff12 | ||
|
|
3b4f0e3147 | ||
|
|
51212a2a0c | ||
|
|
58533bbaf6 | ||
|
|
6ccca1602d | ||
|
|
6389b0f672 | ||
|
|
d7599b8599 | ||
|
|
8eff298293 | ||
|
|
3634101c5b | ||
|
|
163973a7df | ||
|
|
80ffecba3e | ||
|
|
3c21ac940c | ||
|
|
5c9f484d70 | ||
|
|
7567f6c25b | ||
|
|
0599a0b298 | ||
|
|
f7557fe90c | ||
|
|
13571b9772 | ||
|
|
8ee76eef69 | ||
|
|
2a31e29aa2 | ||
|
|
81abc59961 | ||
|
|
ffbfec6dd5 | ||
|
|
a0091636a8 | ||
|
|
69490ab396 | ||
|
|
7306964abf | ||
|
|
cb6b0259e3 | ||
|
|
e1b4618e58 | ||
|
|
556dcf5f62 | ||
|
|
0628eef025 | ||
|
|
38ed8bc642 | ||
|
|
2f4d6a2168 | ||
|
|
c25cb9e06b | ||
|
|
b14ab71c7f | ||
|
|
8b5060e2d3 | ||
|
|
3c9335febf | ||
|
|
1b95e4f2c3 | ||
|
|
d20a600afc | ||
|
|
c29b11fd37 |
223
.github/workflows/docker-build.yml
vendored
223
.github/workflows/docker-build.yml
vendored
@@ -19,7 +19,8 @@ permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
build:
|
||||
# AMD64 构建(原生 x64 runner)
|
||||
build-amd64:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -27,39 +28,30 @@ jobs:
|
||||
- image: xingrin-server
|
||||
dockerfile: docker/server/Dockerfile
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
- image: xingrin-frontend
|
||||
dockerfile: docker/frontend/Dockerfile
|
||||
context: .
|
||||
platforms: linux/amd64 # ARM64 构建时 Next.js 在 QEMU 下会崩溃
|
||||
- image: xingrin-worker
|
||||
dockerfile: docker/worker/Dockerfile
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
- image: xingrin-nginx
|
||||
dockerfile: docker/nginx/Dockerfile
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
- image: xingrin-agent
|
||||
dockerfile: docker/agent/Dockerfile
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
- image: xingrin-postgres
|
||||
dockerfile: docker/postgres/Dockerfile
|
||||
context: docker/postgres
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Free disk space (for large builds like worker)
|
||||
- name: Free disk space
|
||||
run: |
|
||||
echo "=== Before cleanup ==="
|
||||
df -h
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /opt/ghc
|
||||
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
|
||||
sudo docker image prune -af
|
||||
echo "=== After cleanup ==="
|
||||
df -h
|
||||
|
||||
- name: Generate SSL certificates for nginx build
|
||||
if: matrix.image == 'xingrin-nginx'
|
||||
@@ -69,10 +61,6 @@ jobs:
|
||||
-keyout docker/nginx/ssl/privkey.pem \
|
||||
-out docker/nginx/ssl/fullchain.pem \
|
||||
-subj "/CN=localhost"
|
||||
echo "SSL certificates generated for CI build"
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
@@ -83,7 +71,120 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Get version from git tag
|
||||
- name: Get version
|
||||
id: version
|
||||
run: |
|
||||
if [[ $GITHUB_REF == refs/tags/* ]]; then
|
||||
echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "VERSION=dev-$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Build and push AMD64
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ${{ matrix.context }}
|
||||
file: ${{ matrix.dockerfile }}
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: ${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }}-amd64
|
||||
build-args: IMAGE_TAG=${{ steps.version.outputs.VERSION }}
|
||||
cache-from: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-amd64
|
||||
cache-to: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-amd64,mode=max
|
||||
provenance: false
|
||||
sbom: false
|
||||
|
||||
# ARM64 构建(原生 ARM64 runner)
|
||||
build-arm64:
|
||||
runs-on: ubuntu-22.04-arm
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- image: xingrin-server
|
||||
dockerfile: docker/server/Dockerfile
|
||||
context: .
|
||||
- image: xingrin-frontend
|
||||
dockerfile: docker/frontend/Dockerfile
|
||||
context: .
|
||||
- image: xingrin-worker
|
||||
dockerfile: docker/worker/Dockerfile
|
||||
context: .
|
||||
- image: xingrin-nginx
|
||||
dockerfile: docker/nginx/Dockerfile
|
||||
context: .
|
||||
- image: xingrin-agent
|
||||
dockerfile: docker/agent/Dockerfile
|
||||
context: .
|
||||
- image: xingrin-postgres
|
||||
dockerfile: docker/postgres/Dockerfile
|
||||
context: docker/postgres
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Generate SSL certificates for nginx build
|
||||
if: matrix.image == 'xingrin-nginx'
|
||||
run: |
|
||||
mkdir -p docker/nginx/ssl
|
||||
openssl req -x509 -nodes -days 365 -newkey rsa:2048 \
|
||||
-keyout docker/nginx/ssl/privkey.pem \
|
||||
-out docker/nginx/ssl/fullchain.pem \
|
||||
-subj "/CN=localhost"
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Get version
|
||||
id: version
|
||||
run: |
|
||||
if [[ $GITHUB_REF == refs/tags/* ]]; then
|
||||
echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "VERSION=dev-$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Build and push ARM64
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ${{ matrix.context }}
|
||||
file: ${{ matrix.dockerfile }}
|
||||
platforms: linux/arm64
|
||||
push: true
|
||||
tags: ${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }}-arm64
|
||||
build-args: IMAGE_TAG=${{ steps.version.outputs.VERSION }}
|
||||
cache-from: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-arm64
|
||||
cache-to: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-arm64,mode=max
|
||||
provenance: false
|
||||
sbom: false
|
||||
|
||||
# 合并多架构 manifest
|
||||
merge-manifests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-amd64, build-arm64]
|
||||
strategy:
|
||||
matrix:
|
||||
image:
|
||||
- xingrin-server
|
||||
- xingrin-frontend
|
||||
- xingrin-worker
|
||||
- xingrin-nginx
|
||||
- xingrin-agent
|
||||
- xingrin-postgres
|
||||
steps:
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Get version
|
||||
id: version
|
||||
run: |
|
||||
if [[ $GITHUB_REF == refs/tags/* ]]; then
|
||||
@@ -94,46 +195,76 @@ jobs:
|
||||
echo "IS_RELEASE=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ${{ matrix.context }}
|
||||
file: ${{ matrix.dockerfile }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }}
|
||||
${{ steps.version.outputs.IS_RELEASE == 'true' && format('{0}/{1}:latest', env.IMAGE_PREFIX, matrix.image) || '' }}
|
||||
build-args: |
|
||||
IMAGE_TAG=${{ steps.version.outputs.VERSION }}
|
||||
cache-from: type=gha,scope=${{ matrix.image }}
|
||||
cache-to: type=gha,mode=max,scope=${{ matrix.image }}
|
||||
provenance: false
|
||||
sbom: false
|
||||
- name: Create and push multi-arch manifest
|
||||
run: |
|
||||
VERSION=${{ steps.version.outputs.VERSION }}
|
||||
IMAGE=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}
|
||||
|
||||
docker manifest create ${IMAGE}:${VERSION} \
|
||||
${IMAGE}:${VERSION}-amd64 \
|
||||
${IMAGE}:${VERSION}-arm64
|
||||
docker manifest push ${IMAGE}:${VERSION}
|
||||
|
||||
if [[ "${{ steps.version.outputs.IS_RELEASE }}" == "true" ]]; then
|
||||
docker manifest create ${IMAGE}:latest \
|
||||
${IMAGE}:${VERSION}-amd64 \
|
||||
${IMAGE}:${VERSION}-arm64
|
||||
docker manifest push ${IMAGE}:latest
|
||||
fi
|
||||
|
||||
# 所有镜像构建成功后,更新 VERSION 文件
|
||||
# 只有正式版本(不含 -dev, -alpha, -beta, -rc 等后缀)才更新
|
||||
# 更新 VERSION 文件
|
||||
update-version:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
if: startsWith(github.ref, 'refs/tags/v') && !contains(github.ref, '-')
|
||||
needs: merge-manifests
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
steps:
|
||||
- name: Checkout
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: main
|
||||
fetch-depth: 0 # 获取完整历史,用于判断 tag 所在分支
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Determine source branch and version
|
||||
id: branch
|
||||
run: |
|
||||
VERSION="${GITHUB_REF#refs/tags/}"
|
||||
|
||||
# 查找包含此 tag 的分支
|
||||
BRANCHES=$(git branch -r --contains ${{ github.ref_name }})
|
||||
echo "Branches containing tag: $BRANCHES"
|
||||
|
||||
# 判断 tag 来自哪个分支
|
||||
if echo "$BRANCHES" | grep -q "origin/main"; then
|
||||
TARGET_BRANCH="main"
|
||||
UPDATE_LATEST="true"
|
||||
elif echo "$BRANCHES" | grep -q "origin/dev"; then
|
||||
TARGET_BRANCH="dev"
|
||||
UPDATE_LATEST="false"
|
||||
else
|
||||
echo "Warning: Tag not found in main or dev branch, defaulting to main"
|
||||
TARGET_BRANCH="main"
|
||||
UPDATE_LATEST="false"
|
||||
fi
|
||||
|
||||
echo "BRANCH=$TARGET_BRANCH" >> $GITHUB_OUTPUT
|
||||
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
|
||||
echo "UPDATE_LATEST=$UPDATE_LATEST" >> $GITHUB_OUTPUT
|
||||
echo "Will update VERSION on branch: $TARGET_BRANCH"
|
||||
|
||||
- name: Checkout target branch
|
||||
run: |
|
||||
git checkout ${{ steps.branch.outputs.BRANCH }}
|
||||
|
||||
- name: Update VERSION file
|
||||
run: |
|
||||
VERSION="${GITHUB_REF#refs/tags/}"
|
||||
VERSION="${{ steps.branch.outputs.VERSION }}"
|
||||
echo "$VERSION" > VERSION
|
||||
echo "Updated VERSION to $VERSION"
|
||||
echo "Updated VERSION to $VERSION on branch ${{ steps.branch.outputs.BRANCH }}"
|
||||
|
||||
- name: Commit and push
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git add VERSION
|
||||
git diff --staged --quiet || git commit -m "chore: bump version to ${GITHUB_REF#refs/tags/}"
|
||||
git push
|
||||
git diff --staged --quiet || git commit -m "chore: bump version to ${{ steps.branch.outputs.VERSION }}"
|
||||
git push origin ${{ steps.branch.outputs.BRANCH }}
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -64,6 +64,7 @@ backend/.env.local
|
||||
.coverage
|
||||
htmlcov/
|
||||
*.cover
|
||||
.hypothesis/
|
||||
|
||||
# ============================
|
||||
# 后端 (Go) 相关
|
||||
|
||||
189
README.md
189
README.md
@@ -1,7 +1,7 @@
|
||||
<h1 align="center">XingRin - 星环</h1>
|
||||
|
||||
<p align="center">
|
||||
<b>🛡️ 攻击面管理平台 (ASM) | 自动化资产发现与漏洞扫描系统</b>
|
||||
<b>攻击面管理平台 (ASM) | 自动化资产发现与漏洞扫描系统</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
@@ -12,22 +12,29 @@
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="#-功能特性">功能特性</a> •
|
||||
<a href="#-快速开始">快速开始</a> •
|
||||
<a href="#-文档">文档</a> •
|
||||
<a href="#-技术栈">技术栈</a> •
|
||||
<a href="#-反馈与贡献">反馈与贡献</a>
|
||||
<a href="#功能特性">功能特性</a> •
|
||||
<a href="#全局资产搜索">资产搜索</a> •
|
||||
<a href="#快速开始">快速开始</a> •
|
||||
<a href="#文档">文档</a> •
|
||||
<a href="#反馈与贡献">反馈与贡献</a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<sub>🔍 关键词: ASM | 攻击面管理 | 漏洞扫描 | 资产发现 | Bug Bounty | 渗透测试 | Nuclei | 子域名枚举 | EASM</sub>
|
||||
<sub>关键词: ASM | 攻击面管理 | 漏洞扫描 | 资产发现 | 资产搜索 | Bug Bounty | 渗透测试 | Nuclei | 子域名枚举 | EASM</sub>
|
||||
</p>
|
||||
|
||||
---
|
||||
|
||||
## 在线 Demo
|
||||
|
||||
**[https://xingrin.vercel.app/](https://xingrin.vercel.app/)**
|
||||
|
||||
> 仅用于 UI 展示,未接入后端数据库
|
||||
|
||||
---
|
||||
|
||||
<p align="center">
|
||||
<b>🎨 现代化 UI </b>
|
||||
<b>现代化 UI</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
@@ -37,34 +44,49 @@
|
||||
<img src="docs/screenshots/quantum-rose.png" alt="Quantum Rose" width="24%">
|
||||
</p>
|
||||
|
||||
## 📚 文档
|
||||
## 文档
|
||||
|
||||
- [📖 技术文档](./docs/README.md) - 技术文档导航(🚧 持续完善中)
|
||||
- [🚀 快速开始](./docs/quick-start.md) - 一键安装和部署指南
|
||||
- [🔄 版本管理](./docs/version-management.md) - Git Tag 驱动的自动化版本管理系统
|
||||
- [📦 Nuclei 模板架构](./docs/nuclei-template-architecture.md) - 模板仓库的存储与同步
|
||||
- [📖 字典文件架构](./docs/wordlist-architecture.md) - 字典文件的存储与同步
|
||||
- [🔍 扫描流程架构](./docs/scan-flow-architecture.md) - 完整扫描流程与工具编排
|
||||
- [技术文档](./docs/README.md) - 技术文档导航(持续完善中)
|
||||
- [快速开始](./docs/quick-start.md) - 一键安装和部署指南
|
||||
- [版本管理](./docs/version-management.md) - Git Tag 驱动的自动化版本管理系统
|
||||
- [Nuclei 模板架构](./docs/nuclei-template-architecture.md) - 模板仓库的存储与同步
|
||||
- [字典文件架构](./docs/wordlist-architecture.md) - 字典文件的存储与同步
|
||||
- [扫描流程架构](./docs/scan-flow-architecture.md) - 完整扫描流程与工具编排
|
||||
|
||||
|
||||
---
|
||||
|
||||
## ✨ 功能特性
|
||||
## 功能特性
|
||||
|
||||
### 🎯 目标与资产管理
|
||||
- **组织管理** - 多层级目标组织,灵活分组
|
||||
- **目标管理** - 支持域名、IP目标类型
|
||||
- **资产发现** - 子域名、网站、端点、目录自动发现
|
||||
- **资产快照** - 扫描结果快照对比,追踪资产变化
|
||||
### 扫描能力
|
||||
|
||||
### 🔍 漏洞扫描
|
||||
- **多引擎支持** - 集成 Nuclei 等主流扫描引擎
|
||||
- **自定义流程** - YAML 配置扫描流程,灵活编排
|
||||
- **定时扫描** - Cron 表达式配置,自动化周期扫描
|
||||
| 功能 | 状态 | 工具 | 说明 |
|
||||
|------|------|------|------|
|
||||
| 子域名扫描 | 已完成 | Subfinder, Amass, PureDNS | 被动收集 + 主动爆破,聚合 50+ 数据源 |
|
||||
| 端口扫描 | 已完成 | Naabu | 自定义端口范围 |
|
||||
| 站点发现 | 已完成 | HTTPX | HTTP 探测,自动获取标题、状态码、技术栈 |
|
||||
| 指纹识别 | 已完成 | XingFinger | 2.7W+ 指纹规则,多源指纹库 |
|
||||
| URL 收集 | 已完成 | Waymore, Katana | 历史数据 + 主动爬取 |
|
||||
| 目录扫描 | 已完成 | FFUF | 高速爆破,智能字典 |
|
||||
| 漏洞扫描 | 已完成 | Nuclei, Dalfox | 9000+ POC 模板,XSS 检测 |
|
||||
| 站点截图 | 已完成 | Playwright | WebP 高压缩存储 |
|
||||
|
||||
#### 扫描流程架构
|
||||
### 平台能力
|
||||
|
||||
完整的扫描流程包括:子域名发现、端口扫描、站点发现、URL 收集、目录扫描、漏洞扫描等阶段
|
||||
| 功能 | 状态 | 说明 |
|
||||
|------|------|------|
|
||||
| 目标管理 | 已完成 | 多层级组织,支持域名/IP 目标 |
|
||||
| 资产快照 | 已完成 | 扫描结果对比,追踪资产变化 |
|
||||
| 黑名单过滤 | 已完成 | 全局 + Target 级,支持通配符/CIDR |
|
||||
| 定时任务 | 已完成 | Cron 表达式,自动化周期扫描 |
|
||||
| 分布式扫描 | 已完成 | 多 Worker 节点,负载感知调度 |
|
||||
| 全局搜索 | 已完成 | 表达式语法,多字段组合查询 |
|
||||
| 通知推送 | 已完成 | 企业微信、Telegram、Discord |
|
||||
| API 密钥管理 | 已完成 | 可视化配置各数据源 API Key |
|
||||
|
||||
### 扫描流程架构
|
||||
|
||||
完整的扫描流程包括:子域名发现、端口扫描、站点发现、指纹识别、URL 收集、目录扫描、漏洞扫描等阶段
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
@@ -75,13 +97,15 @@ flowchart LR
|
||||
SUB["子域名发现<br/>subfinder, amass, puredns"]
|
||||
PORT["端口扫描<br/>naabu"]
|
||||
SITE["站点识别<br/>httpx"]
|
||||
SUB --> PORT --> SITE
|
||||
FINGER["指纹识别<br/>xingfinger"]
|
||||
SUB --> PORT --> SITE --> FINGER
|
||||
end
|
||||
|
||||
subgraph STAGE2["阶段 2: 深度分析"]
|
||||
direction TB
|
||||
URL["URL 收集<br/>waymore, katana"]
|
||||
DIR["目录扫描<br/>ffuf"]
|
||||
SCREENSHOT["站点截图<br/>playwright"]
|
||||
end
|
||||
|
||||
subgraph STAGE3["阶段 3: 漏洞检测"]
|
||||
@@ -91,7 +115,7 @@ flowchart LR
|
||||
FINISH["扫描完成"]
|
||||
|
||||
START --> STAGE1
|
||||
SITE --> STAGE2
|
||||
FINGER --> STAGE2
|
||||
STAGE2 --> STAGE3
|
||||
STAGE3 --> FINISH
|
||||
|
||||
@@ -103,14 +127,16 @@ flowchart LR
|
||||
style SUB fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
|
||||
style PORT fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
|
||||
style SITE fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
|
||||
style FINGER fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
|
||||
style URL fill:#bb8fce,stroke:#9b59b6,stroke-width:1px,color:#fff
|
||||
style DIR fill:#bb8fce,stroke:#9b59b6,stroke-width:1px,color:#fff
|
||||
style SCREENSHOT fill:#bb8fce,stroke:#9b59b6,stroke-width:1px,color:#fff
|
||||
style VULN fill:#f0b27a,stroke:#e67e22,stroke-width:1px,color:#fff
|
||||
```
|
||||
|
||||
详细说明请查看 [扫描流程架构文档](./docs/scan-flow-architecture.md)
|
||||
|
||||
### 🖥️ 分布式架构
|
||||
### 分布式架构
|
||||
- **多节点扫描** - 支持部署多个 Worker 节点,横向扩展扫描能力
|
||||
- **本地节点** - 零配置,安装即自动注册本地 Docker Worker
|
||||
- **远程节点** - SSH 一键部署远程 VPS 作为扫描节点
|
||||
@@ -155,17 +181,43 @@ flowchart TB
|
||||
W3 -.心跳上报.-> REDIS
|
||||
```
|
||||
|
||||
### 📊 可视化界面
|
||||
### 全局资产搜索
|
||||
- **多类型搜索** - 支持 Website 和 Endpoint 两种资产类型
|
||||
- **表达式语法** - 支持 `=`(模糊)、`==`(精确)、`!=`(不等于)操作符
|
||||
- **逻辑组合** - 支持 `&&` (AND) 和 `||` (OR) 逻辑组合
|
||||
- **多字段查询** - 支持 host、url、title、tech、status、body、header 字段
|
||||
- **CSV 导出** - 流式导出全部搜索结果,无数量限制
|
||||
|
||||
#### 搜索语法示例
|
||||
|
||||
```bash
|
||||
# 基础搜索
|
||||
host="api" # host 包含 "api"
|
||||
status=="200" # 状态码精确等于 200
|
||||
tech="nginx" # 技术栈包含 nginx
|
||||
|
||||
# 组合搜索
|
||||
host="api" && status=="200" # host 包含 api 且状态码为 200
|
||||
tech="vue" || tech="react" # 技术栈包含 vue 或 react
|
||||
|
||||
# 复杂查询
|
||||
host="admin" && tech="php" && status=="200"
|
||||
url="/api/v1" && status!="404"
|
||||
```
|
||||
|
||||
### 可视化界面
|
||||
- **数据统计** - 资产/漏洞统计仪表盘
|
||||
- **实时通知** - WebSocket 消息推送
|
||||
- **通知推送** - 实时企业微信,tg,discard消息推送服务
|
||||
|
||||
---
|
||||
|
||||
## 📦 快速开始
|
||||
## 快速开始
|
||||
|
||||
### 环境要求
|
||||
|
||||
- **操作系统**: Ubuntu 20.04+ / Debian 11+ (推荐)
|
||||
- **操作系统**: Ubuntu 20.04+ / Debian 11+
|
||||
- **系统架构**: AMD64 (x86_64) / ARM64 (aarch64)
|
||||
- **硬件**: 2核 4G 内存起步,20GB+ 磁盘空间
|
||||
|
||||
### 一键安装
|
||||
@@ -178,18 +230,18 @@ cd xingrin
|
||||
# 安装并启动(生产模式)
|
||||
sudo ./install.sh
|
||||
|
||||
# 🇨🇳 中国大陆用户推荐使用镜像加速
|
||||
# 中国大陆用户推荐使用镜像加速(第三方加速服务可能会失效,不保证长期可用)
|
||||
sudo ./install.sh --mirror
|
||||
```
|
||||
|
||||
> **💡 --mirror 参数说明**
|
||||
> **--mirror 参数说明**
|
||||
> - 自动配置 Docker 镜像加速(国内镜像源)
|
||||
> - 加速 Git 仓库克隆(Nuclei 模板等)
|
||||
> - 大幅提升安装速度,避免网络超时
|
||||
|
||||
### 访问服务
|
||||
|
||||
- **Web 界面**: `https://ip:8083`
|
||||
- **默认账号**: admin / admin(首次登录后请修改密码)
|
||||
|
||||
### 常用命令
|
||||
|
||||
@@ -207,22 +259,38 @@ sudo ./restart.sh
|
||||
sudo ./uninstall.sh
|
||||
```
|
||||
|
||||
## 🤝 反馈与贡献
|
||||
## 反馈与贡献
|
||||
|
||||
- 🐛 **如果发现 Bug** 可以点击右边链接进行提交 [Issue](https://github.com/yyhuni/xingrin/issues)
|
||||
- 💡 **有新想法,比如UI设计,功能设计等** 欢迎点击右边链接进行提交建议 [Issue](https://github.com/yyhuni/xingrin/issues)
|
||||
- 🔧 **想参与开发?** 关注我公众号与我个人联系
|
||||
- **发现 Bug,有新想法,比如UI设计,功能设计等** 欢迎点击右边链接进行提交建议 [Issue](https://github.com/yyhuni/xingrin/issues) 或者公众号私信
|
||||
|
||||
## 📧 联系
|
||||
- 目前版本就我个人使用,可能会有很多边界问题
|
||||
- 如有问题,建议,其他,优先提交[Issue](https://github.com/yyhuni/xingrin/issues),也可以直接给我的公众号发消息,我都会回复的
|
||||
|
||||
- 微信公众号: **洋洋的小黑屋**
|
||||
## 联系
|
||||
- 微信公众号: **塔罗安全学苑**
|
||||
- 微信群去公众号底下的菜单,有个交流群,点击就可以看到了,链接过期可以私信我拉你
|
||||
|
||||
<img src="docs/wechat-qrcode.png" alt="微信公众号" width="200">
|
||||
|
||||
### 关注公众号免费领取指纹库
|
||||
|
||||
## ⚠️ 免责声明
|
||||
| 指纹库 | 数量 |
|
||||
|--------|------|
|
||||
| ehole.json | 21,977 |
|
||||
| ARL.yaml | 9,264 |
|
||||
| goby.json | 7,086 |
|
||||
| FingerprintHub.json | 3,147 |
|
||||
|
||||
> 关注公众号回复「指纹」即可获取
|
||||
|
||||
## 赞助支持
|
||||
|
||||
如果这个项目对你有帮助,谢谢请我能喝杯蜜雪冰城,你的star和赞助是我免费更新的动力
|
||||
|
||||
<p>
|
||||
<img src="docs/wx_pay.jpg" alt="微信支付" width="200">
|
||||
<img src="docs/zfb_pay.jpg" alt="支付宝" width="200">
|
||||
</p>
|
||||
|
||||
|
||||
## 免责声明
|
||||
|
||||
**重要:请在使用前仔细阅读**
|
||||
|
||||
@@ -237,30 +305,29 @@ sudo ./uninstall.sh
|
||||
- 遵守所在地区的法律法规
|
||||
- 承担因滥用产生的一切后果
|
||||
|
||||
## 🌟 Star History
|
||||
## Star History
|
||||
|
||||
如果这个项目对你有帮助,请给一个 ⭐ Star 支持一下!
|
||||
如果这个项目对你有帮助,请给一个 Star 支持一下!
|
||||
|
||||
[](https://star-history.com/#yyhuni/xingrin&Date)
|
||||
|
||||
## 📄 许可证
|
||||
## 许可证
|
||||
|
||||
本项目采用 [GNU General Public License v3.0](LICENSE) 许可证。
|
||||
|
||||
### 允许的用途
|
||||
|
||||
- ✅ 个人学习和研究
|
||||
- ✅ 商业和非商业使用
|
||||
- ✅ 修改和分发
|
||||
- ✅ 专利使用
|
||||
- ✅ 私人使用
|
||||
- 个人学习和研究
|
||||
- 商业和非商业使用
|
||||
- 修改和分发
|
||||
- 专利使用
|
||||
- 私人使用
|
||||
|
||||
### 义务和限制
|
||||
|
||||
- 📋 **开源义务**:分发时必须提供源代码
|
||||
- 📋 **相同许可**:衍生作品必须使用相同许可证
|
||||
- 📋 **版权声明**:必须保留原始版权和许可证声明
|
||||
- ❌ **责任免除**:不提供任何担保
|
||||
- ❌ 未经授权的渗透测试
|
||||
- ❌ 任何违法行为
|
||||
|
||||
- **开源义务**:分发时必须提供源代码
|
||||
- **相同许可**:衍生作品必须使用相同许可证
|
||||
- **版权声明**:必须保留原始版权和许可证声明
|
||||
- **责任免除**:不提供任何担保
|
||||
- 未经授权的渗透测试
|
||||
- 任何违法行为
|
||||
|
||||
1
backend/.gitignore
vendored
1
backend/.gitignore
vendored
@@ -7,6 +7,7 @@ __pycache__/
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
.hypothesis/ # Hypothesis 属性测试缓存
|
||||
|
||||
# 虚拟环境
|
||||
venv/
|
||||
|
||||
@@ -4,7 +4,3 @@ from django.apps import AppConfig
|
||||
class AssetConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'apps.asset'
|
||||
|
||||
def ready(self):
|
||||
# 导入所有模型以确保Django发现并注册
|
||||
from . import models
|
||||
|
||||
@@ -14,12 +14,13 @@ class EndpointDTO:
|
||||
status_code: Optional[int] = None
|
||||
content_length: Optional[int] = None
|
||||
webserver: Optional[str] = None
|
||||
body_preview: Optional[str] = None
|
||||
response_body: Optional[str] = None
|
||||
content_type: Optional[str] = None
|
||||
tech: Optional[List[str]] = None
|
||||
vhost: Optional[bool] = None
|
||||
location: Optional[str] = None
|
||||
matched_gf_patterns: Optional[List[str]] = None
|
||||
response_headers: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
|
||||
@@ -17,9 +17,10 @@ class WebSiteDTO:
|
||||
webserver: str = ''
|
||||
content_type: str = ''
|
||||
tech: List[str] = None
|
||||
body_preview: str = ''
|
||||
response_body: str = ''
|
||||
vhost: Optional[bool] = None
|
||||
created_at: str = None
|
||||
response_headers: str = ''
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
|
||||
@@ -13,6 +13,7 @@ class EndpointSnapshotDTO:
|
||||
快照只属于 scan。
|
||||
"""
|
||||
scan_id: int
|
||||
target_id: int # 必填,用于同步到资产表
|
||||
url: str
|
||||
host: str = '' # 主机名(域名或IP地址)
|
||||
title: str = ''
|
||||
@@ -22,10 +23,10 @@ class EndpointSnapshotDTO:
|
||||
webserver: str = ''
|
||||
content_type: str = ''
|
||||
tech: List[str] = None
|
||||
body_preview: str = ''
|
||||
response_body: str = ''
|
||||
vhost: Optional[bool] = None
|
||||
matched_gf_patterns: List[str] = None
|
||||
target_id: Optional[int] = None # 冗余字段,用于同步到资产表
|
||||
response_headers: str = ''
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
@@ -42,9 +43,6 @@ class EndpointSnapshotDTO:
|
||||
"""
|
||||
from apps.asset.dtos.asset import EndpointDTO
|
||||
|
||||
if self.target_id is None:
|
||||
raise ValueError("target_id 不能为 None,无法同步到资产表")
|
||||
|
||||
return EndpointDTO(
|
||||
target_id=self.target_id,
|
||||
url=self.url,
|
||||
@@ -53,10 +51,11 @@ class EndpointSnapshotDTO:
|
||||
status_code=self.status_code,
|
||||
content_length=self.content_length,
|
||||
webserver=self.webserver,
|
||||
body_preview=self.body_preview,
|
||||
response_body=self.response_body,
|
||||
content_type=self.content_type,
|
||||
tech=self.tech if self.tech else [],
|
||||
vhost=self.vhost,
|
||||
location=self.location,
|
||||
matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else []
|
||||
matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else [],
|
||||
response_headers=self.response_headers,
|
||||
)
|
||||
|
||||
@@ -13,18 +13,19 @@ class WebsiteSnapshotDTO:
|
||||
快照只属于 scan,target 信息通过 scan.target 获取。
|
||||
"""
|
||||
scan_id: int
|
||||
target_id: int # 仅用于传递数据,不保存到数据库
|
||||
target_id: int # 必填,用于同步到资产表
|
||||
url: str
|
||||
host: str
|
||||
title: str = ''
|
||||
status: Optional[int] = None
|
||||
status_code: Optional[int] = None # 统一命名:status -> status_code
|
||||
content_length: Optional[int] = None
|
||||
location: str = ''
|
||||
web_server: str = ''
|
||||
webserver: str = '' # 统一命名:web_server -> webserver
|
||||
content_type: str = ''
|
||||
tech: List[str] = None
|
||||
body_preview: str = ''
|
||||
response_body: str = ''
|
||||
vhost: Optional[bool] = None
|
||||
response_headers: str = ''
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
@@ -44,12 +45,13 @@ class WebsiteSnapshotDTO:
|
||||
url=self.url,
|
||||
host=self.host,
|
||||
title=self.title,
|
||||
status_code=self.status,
|
||||
status_code=self.status_code,
|
||||
content_length=self.content_length,
|
||||
location=self.location,
|
||||
webserver=self.web_server,
|
||||
webserver=self.webserver,
|
||||
content_type=self.content_type,
|
||||
tech=self.tech if self.tech else [],
|
||||
body_preview=self.body_preview,
|
||||
vhost=self.vhost
|
||||
response_body=self.response_body,
|
||||
vhost=self.vhost,
|
||||
response_headers=self.response_headers,
|
||||
)
|
||||
|
||||
345
backend/apps/asset/migrations/0001_initial.py
Normal file
345
backend/apps/asset/migrations/0001_initial.py
Normal file
@@ -0,0 +1,345 @@
|
||||
# Generated by Django 5.2.7 on 2026-01-06 00:55
|
||||
|
||||
import django.contrib.postgres.fields
|
||||
import django.contrib.postgres.indexes
|
||||
import django.core.validators
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('scan', '0001_initial'),
|
||||
('targets', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='AssetStatistics',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('total_targets', models.IntegerField(default=0, help_text='目标总数')),
|
||||
('total_subdomains', models.IntegerField(default=0, help_text='子域名总数')),
|
||||
('total_ips', models.IntegerField(default=0, help_text='IP地址总数')),
|
||||
('total_endpoints', models.IntegerField(default=0, help_text='端点总数')),
|
||||
('total_websites', models.IntegerField(default=0, help_text='网站总数')),
|
||||
('total_vulns', models.IntegerField(default=0, help_text='漏洞总数')),
|
||||
('total_assets', models.IntegerField(default=0, help_text='总资产数(子域名+IP+端点+网站)')),
|
||||
('prev_targets', models.IntegerField(default=0, help_text='上次目标总数')),
|
||||
('prev_subdomains', models.IntegerField(default=0, help_text='上次子域名总数')),
|
||||
('prev_ips', models.IntegerField(default=0, help_text='上次IP地址总数')),
|
||||
('prev_endpoints', models.IntegerField(default=0, help_text='上次端点总数')),
|
||||
('prev_websites', models.IntegerField(default=0, help_text='上次网站总数')),
|
||||
('prev_vulns', models.IntegerField(default=0, help_text='上次漏洞总数')),
|
||||
('prev_assets', models.IntegerField(default=0, help_text='上次总资产数')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, help_text='最后更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '资产统计',
|
||||
'verbose_name_plural': '资产统计',
|
||||
'db_table': 'asset_statistics',
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='StatisticsHistory',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('date', models.DateField(help_text='统计日期', unique=True)),
|
||||
('total_targets', models.IntegerField(default=0, help_text='目标总数')),
|
||||
('total_subdomains', models.IntegerField(default=0, help_text='子域名总数')),
|
||||
('total_ips', models.IntegerField(default=0, help_text='IP地址总数')),
|
||||
('total_endpoints', models.IntegerField(default=0, help_text='端点总数')),
|
||||
('total_websites', models.IntegerField(default=0, help_text='网站总数')),
|
||||
('total_vulns', models.IntegerField(default=0, help_text='漏洞总数')),
|
||||
('total_assets', models.IntegerField(default=0, help_text='总资产数')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '统计历史',
|
||||
'verbose_name_plural': '统计历史',
|
||||
'db_table': 'statistics_history',
|
||||
'ordering': ['-date'],
|
||||
'indexes': [models.Index(fields=['date'], name='statistics__date_1d29cd_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Directory',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.CharField(help_text='完整请求 URL', max_length=2000)),
|
||||
('status', models.IntegerField(blank=True, help_text='HTTP 响应状态码', null=True)),
|
||||
('content_length', models.BigIntegerField(blank=True, help_text='响应体字节大小(Content-Length 或实际长度)', null=True)),
|
||||
('words', models.IntegerField(blank=True, help_text='响应体中单词数量(按空格分割)', null=True)),
|
||||
('lines', models.IntegerField(blank=True, help_text='响应体行数(按换行符分割)', null=True)),
|
||||
('content_type', models.CharField(blank=True, default='', help_text='响应头 Content-Type 值', max_length=200)),
|
||||
('duration', models.BigIntegerField(blank=True, help_text='请求耗时(单位:纳秒)', null=True)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标', on_delete=django.db.models.deletion.CASCADE, related_name='directories', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '目录',
|
||||
'verbose_name_plural': '目录',
|
||||
'db_table': 'directory',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='directory_created_2cef03_idx'), models.Index(fields=['target'], name='directory_target__e310c8_idx'), models.Index(fields=['url'], name='directory_url_ba40cd_idx'), models.Index(fields=['status'], name='directory_status_40bbe6_idx'), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='directory_url_trgm_idx', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('target', 'url'), name='unique_directory_url_target')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='DirectorySnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.CharField(help_text='目录URL', max_length=2000)),
|
||||
('status', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
|
||||
('content_length', models.BigIntegerField(blank=True, help_text='内容长度', null=True)),
|
||||
('words', models.IntegerField(blank=True, help_text='响应体中单词数量(按空格分割)', null=True)),
|
||||
('lines', models.IntegerField(blank=True, help_text='响应体行数(按换行符分割)', null=True)),
|
||||
('content_type', models.CharField(blank=True, default='', help_text='响应头 Content-Type 值', max_length=200)),
|
||||
('duration', models.BigIntegerField(blank=True, help_text='请求耗时(单位:纳秒)', null=True)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='directory_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '目录快照',
|
||||
'verbose_name_plural': '目录快照',
|
||||
'db_table': 'directory_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='directory_s_scan_id_c45900_idx'), models.Index(fields=['url'], name='directory_s_url_b4b72b_idx'), models.Index(fields=['status'], name='directory_s_status_e9f57e_idx'), models.Index(fields=['content_type'], name='directory_s_content_45e864_idx'), models.Index(fields=['-created_at'], name='directory_s_created_eb9d27_idx'), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='dir_snap_url_trgm', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('scan', 'url'), name='unique_directory_per_scan_snapshot')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Endpoint',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.TextField(help_text='最终访问的完整URL')),
|
||||
('host', models.CharField(blank=True, default='', help_text='主机名(域名或IP地址)', max_length=253)),
|
||||
('location', models.TextField(blank=True, default='', help_text='重定向地址(HTTP 3xx 响应头 Location)')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('title', models.TextField(blank=True, default='', help_text='网页标题(HTML <title> 标签内容)')),
|
||||
('webserver', models.TextField(blank=True, default='', help_text='服务器类型(HTTP 响应头 Server 值)')),
|
||||
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
|
||||
('content_type', models.TextField(blank=True, default='', help_text='响应类型(HTTP Content-Type 响应头)')),
|
||||
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈(服务器/框架/语言等)', size=None)),
|
||||
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
|
||||
('content_length', models.IntegerField(blank=True, help_text='响应体大小(单位字节)', null=True)),
|
||||
('vhost', models.BooleanField(blank=True, help_text='是否支持虚拟主机', null=True)),
|
||||
('matched_gf_patterns', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='匹配的GF模式列表,用于识别敏感端点(如api, debug, config等)', size=None)),
|
||||
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)', on_delete=django.db.models.deletion.CASCADE, related_name='endpoints', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '端点',
|
||||
'verbose_name_plural': '端点',
|
||||
'db_table': 'endpoint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='endpoint_created_44fe9c_idx'), models.Index(fields=['target'], name='endpoint_target__7f9065_idx'), models.Index(fields=['url'], name='endpoint_url_30f66e_idx'), models.Index(fields=['host'], name='endpoint_host_5b4cc8_idx'), models.Index(fields=['status_code'], name='endpoint_status__5d4fdd_idx'), models.Index(fields=['title'], name='endpoint_title_29e26c_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='endpoint_tech_2bfa7c_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='endpoint_resp_headers_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='endpoint_url_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='endpoint_title_trgm_idx', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('url', 'target'), name='unique_endpoint_url_target')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='EndpointSnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.TextField(help_text='端点URL')),
|
||||
('host', models.CharField(blank=True, default='', help_text='主机名(域名或IP地址)', max_length=253)),
|
||||
('title', models.TextField(blank=True, default='', help_text='页面标题')),
|
||||
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
|
||||
('content_length', models.IntegerField(blank=True, help_text='内容长度', null=True)),
|
||||
('location', models.TextField(blank=True, default='', help_text='重定向位置')),
|
||||
('webserver', models.TextField(blank=True, default='', help_text='Web服务器')),
|
||||
('content_type', models.TextField(blank=True, default='', help_text='内容类型')),
|
||||
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈', size=None)),
|
||||
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
|
||||
('vhost', models.BooleanField(blank=True, help_text='虚拟主机标志', null=True)),
|
||||
('matched_gf_patterns', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='匹配的GF模式列表', size=None)),
|
||||
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='endpoint_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '端点快照',
|
||||
'verbose_name_plural': '端点快照',
|
||||
'db_table': 'endpoint_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='endpoint_sn_scan_id_6ac9a7_idx'), models.Index(fields=['url'], name='endpoint_sn_url_205160_idx'), models.Index(fields=['host'], name='endpoint_sn_host_577bfd_idx'), models.Index(fields=['title'], name='endpoint_sn_title_516a05_idx'), models.Index(fields=['status_code'], name='endpoint_sn_status__83efb0_idx'), models.Index(fields=['webserver'], name='endpoint_sn_webserv_66be83_idx'), models.Index(fields=['-created_at'], name='endpoint_sn_created_21fb5b_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='endpoint_sn_tech_0d0752_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='ep_snap_resp_hdr_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='ep_snap_url_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='ep_snap_title_trgm', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('scan', 'url'), name='unique_endpoint_per_scan_snapshot')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='HostPortMapping',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('host', models.CharField(help_text='主机名(域名或IP)', max_length=1000)),
|
||||
('ip', models.GenericIPAddressField(help_text='IP地址')),
|
||||
('port', models.IntegerField(help_text='端口号(1-65535)', validators=[django.core.validators.MinValueValidator(1, message='端口号必须大于等于1'), django.core.validators.MaxValueValidator(65535, message='端口号必须小于等于65535')])),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标', on_delete=django.db.models.deletion.CASCADE, related_name='host_port_mappings', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '主机端口映射',
|
||||
'verbose_name_plural': '主机端口映射',
|
||||
'db_table': 'host_port_mapping',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['target'], name='host_port_m_target__943e9b_idx'), models.Index(fields=['host'], name='host_port_m_host_f78363_idx'), models.Index(fields=['ip'], name='host_port_m_ip_2e6f02_idx'), models.Index(fields=['port'], name='host_port_m_port_9fb9ff_idx'), models.Index(fields=['host', 'ip'], name='host_port_m_host_3ce245_idx'), models.Index(fields=['-created_at'], name='host_port_m_created_11cd22_idx')],
|
||||
'constraints': [models.UniqueConstraint(fields=('target', 'host', 'ip', 'port'), name='unique_target_host_ip_port')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='HostPortMappingSnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('host', models.CharField(help_text='主机名(域名或IP)', max_length=1000)),
|
||||
('ip', models.GenericIPAddressField(help_text='IP地址')),
|
||||
('port', models.IntegerField(help_text='端口号(1-65535)', validators=[django.core.validators.MinValueValidator(1, message='端口号必须大于等于1'), django.core.validators.MaxValueValidator(65535, message='端口号必须小于等于65535')])),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务(主关联)', on_delete=django.db.models.deletion.CASCADE, related_name='host_port_mapping_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '主机端口映射快照',
|
||||
'verbose_name_plural': '主机端口映射快照',
|
||||
'db_table': 'host_port_mapping_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='host_port_m_scan_id_50ba0b_idx'), models.Index(fields=['host'], name='host_port_m_host_e99054_idx'), models.Index(fields=['ip'], name='host_port_m_ip_54818c_idx'), models.Index(fields=['port'], name='host_port_m_port_ed7b48_idx'), models.Index(fields=['host', 'ip'], name='host_port_m_host_8a463a_idx'), models.Index(fields=['scan', 'host'], name='host_port_m_scan_id_426fdb_idx'), models.Index(fields=['-created_at'], name='host_port_m_created_fb28b8_idx')],
|
||||
'constraints': [models.UniqueConstraint(fields=('scan', 'host', 'ip', 'port'), name='unique_scan_host_ip_port_snapshot')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Subdomain',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('name', models.CharField(help_text='子域名名称', max_length=1000)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)', on_delete=django.db.models.deletion.CASCADE, related_name='subdomains', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '子域名',
|
||||
'verbose_name_plural': '子域名',
|
||||
'db_table': 'subdomain',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='subdomain_created_e187a8_idx'), models.Index(fields=['name', 'target'], name='subdomain_name_60e1d0_idx'), models.Index(fields=['target'], name='subdomain_target__e409f0_idx'), models.Index(fields=['name'], name='subdomain_name_d40ba7_idx'), django.contrib.postgres.indexes.GinIndex(fields=['name'], name='subdomain_name_trgm_idx', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('name', 'target'), name='unique_subdomain_name_target')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='SubdomainSnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('name', models.CharField(help_text='子域名名称', max_length=1000)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='subdomain_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '子域名快照',
|
||||
'verbose_name_plural': '子域名快照',
|
||||
'db_table': 'subdomain_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='subdomain_s_scan_id_68c253_idx'), models.Index(fields=['name'], name='subdomain_s_name_2da42b_idx'), models.Index(fields=['-created_at'], name='subdomain_s_created_d2b48e_idx'), django.contrib.postgres.indexes.GinIndex(fields=['name'], name='subdomain_snap_name_trgm', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('scan', 'name'), name='unique_subdomain_per_scan_snapshot')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Vulnerability',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.CharField(help_text='漏洞所在的URL', max_length=2000)),
|
||||
('vuln_type', models.CharField(help_text='漏洞类型(如 xss, sqli)', max_length=100)),
|
||||
('severity', models.CharField(choices=[('unknown', '未知'), ('info', '信息'), ('low', '低'), ('medium', '中'), ('high', '高'), ('critical', '危急')], default='unknown', help_text='严重性(未知/信息/低/中/高/危急)', max_length=20)),
|
||||
('source', models.CharField(blank=True, default='', help_text='来源工具(如 dalfox, nuclei, crlfuzz)', max_length=50)),
|
||||
('cvss_score', models.DecimalField(blank=True, decimal_places=1, help_text='CVSS 评分(0.0-10.0)', max_digits=3, null=True)),
|
||||
('description', models.TextField(blank=True, default='', help_text='漏洞描述')),
|
||||
('raw_output', models.JSONField(blank=True, default=dict, help_text='工具原始输出')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标', on_delete=django.db.models.deletion.CASCADE, related_name='vulnerabilities', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '漏洞',
|
||||
'verbose_name_plural': '漏洞',
|
||||
'db_table': 'vulnerability',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['target'], name='vulnerabili_target__755a02_idx'), models.Index(fields=['vuln_type'], name='vulnerabili_vuln_ty_3010cd_idx'), models.Index(fields=['severity'], name='vulnerabili_severit_1a798b_idx'), models.Index(fields=['source'], name='vulnerabili_source_7c7552_idx'), models.Index(fields=['url'], name='vulnerabili_url_4dcc4d_idx'), models.Index(fields=['-created_at'], name='vulnerabili_created_e25ff7_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='VulnerabilitySnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.CharField(help_text='漏洞所在的URL', max_length=2000)),
|
||||
('vuln_type', models.CharField(help_text='漏洞类型(如 xss, sqli)', max_length=100)),
|
||||
('severity', models.CharField(choices=[('unknown', '未知'), ('info', '信息'), ('low', '低'), ('medium', '中'), ('high', '高'), ('critical', '危急')], default='unknown', help_text='严重性(未知/信息/低/中/高/危急)', max_length=20)),
|
||||
('source', models.CharField(blank=True, default='', help_text='来源工具(如 dalfox, nuclei, crlfuzz)', max_length=50)),
|
||||
('cvss_score', models.DecimalField(blank=True, decimal_places=1, help_text='CVSS 评分(0.0-10.0)', max_digits=3, null=True)),
|
||||
('description', models.TextField(blank=True, default='', help_text='漏洞描述')),
|
||||
('raw_output', models.JSONField(blank=True, default=dict, help_text='工具原始输出')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='vulnerability_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '漏洞快照',
|
||||
'verbose_name_plural': '漏洞快照',
|
||||
'db_table': 'vulnerability_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='vulnerabili_scan_id_7b81c9_idx'), models.Index(fields=['url'], name='vulnerabili_url_11a707_idx'), models.Index(fields=['vuln_type'], name='vulnerabili_vuln_ty_6b90ee_idx'), models.Index(fields=['severity'], name='vulnerabili_severit_4eae0d_idx'), models.Index(fields=['source'], name='vulnerabili_source_968b1f_idx'), models.Index(fields=['-created_at'], name='vulnerabili_created_53a12e_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='WebSite',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.TextField(help_text='最终访问的完整URL')),
|
||||
('host', models.CharField(blank=True, default='', help_text='主机名(域名或IP地址)', max_length=253)),
|
||||
('location', models.TextField(blank=True, default='', help_text='重定向地址(HTTP 3xx 响应头 Location)')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('title', models.TextField(blank=True, default='', help_text='网页标题(HTML <title> 标签内容)')),
|
||||
('webserver', models.TextField(blank=True, default='', help_text='服务器类型(HTTP 响应头 Server 值)')),
|
||||
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
|
||||
('content_type', models.TextField(blank=True, default='', help_text='响应类型(HTTP Content-Type 响应头)')),
|
||||
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈(服务器/框架/语言等)', size=None)),
|
||||
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
|
||||
('content_length', models.IntegerField(blank=True, help_text='响应体大小(单位字节)', null=True)),
|
||||
('vhost', models.BooleanField(blank=True, help_text='是否支持虚拟主机', null=True)),
|
||||
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)', on_delete=django.db.models.deletion.CASCADE, related_name='websites', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '站点',
|
||||
'verbose_name_plural': '站点',
|
||||
'db_table': 'website',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='website_created_c9cfd2_idx'), models.Index(fields=['url'], name='website_url_b18883_idx'), models.Index(fields=['host'], name='website_host_996b50_idx'), models.Index(fields=['target'], name='website_target__2a353b_idx'), models.Index(fields=['title'], name='website_title_c2775b_idx'), models.Index(fields=['status_code'], name='website_status__51663d_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='website_tech_e3f0cb_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='website_resp_headers_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='website_url_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='website_title_trgm_idx', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('url', 'target'), name='unique_website_url_target')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='WebsiteSnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.TextField(help_text='站点URL')),
|
||||
('host', models.CharField(blank=True, default='', help_text='主机名(域名或IP地址)', max_length=253)),
|
||||
('title', models.TextField(blank=True, default='', help_text='页面标题')),
|
||||
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
|
||||
('content_length', models.BigIntegerField(blank=True, help_text='内容长度', null=True)),
|
||||
('location', models.TextField(blank=True, default='', help_text='重定向位置')),
|
||||
('webserver', models.TextField(blank=True, default='', help_text='Web服务器')),
|
||||
('content_type', models.TextField(blank=True, default='', help_text='内容类型')),
|
||||
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈', size=None)),
|
||||
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
|
||||
('vhost', models.BooleanField(blank=True, help_text='虚拟主机标志', null=True)),
|
||||
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='website_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '网站快照',
|
||||
'verbose_name_plural': '网站快照',
|
||||
'db_table': 'website_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='website_sna_scan_id_26b6dc_idx'), models.Index(fields=['url'], name='website_sna_url_801a70_idx'), models.Index(fields=['host'], name='website_sna_host_348fe1_idx'), models.Index(fields=['title'], name='website_sna_title_b1a5ee_idx'), models.Index(fields=['-created_at'], name='website_sna_created_2c149a_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='website_sna_tech_3d6d2f_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='ws_snap_resp_hdr_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='ws_snap_url_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='ws_snap_title_trgm', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('scan', 'url'), name='unique_website_per_scan_snapshot')],
|
||||
},
|
||||
),
|
||||
]
|
||||
104
backend/apps/asset/migrations/0002_create_search_views.py
Normal file
104
backend/apps/asset/migrations/0002_create_search_views.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
创建资产搜索物化视图(使用 pg_ivm 增量维护)
|
||||
|
||||
这些视图用于资产搜索功能,提供高性能的全文搜索能力。
|
||||
"""
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
"""创建资产搜索所需的增量物化视图"""
|
||||
|
||||
dependencies = [
|
||||
('asset', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# 1. 确保 pg_ivm 扩展已安装
|
||||
migrations.RunSQL(
|
||||
sql="CREATE EXTENSION IF NOT EXISTS pg_ivm;",
|
||||
reverse_sql="DROP EXTENSION IF EXISTS pg_ivm;",
|
||||
),
|
||||
|
||||
# 2. 创建 Website 搜索视图
|
||||
# 注意:pg_ivm 不支持 ArrayField,所以 tech 字段需要从原表 JOIN 获取
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
SELECT pgivm.create_immv('asset_search_view', $$
|
||||
SELECT
|
||||
w.id,
|
||||
w.url,
|
||||
w.host,
|
||||
w.title,
|
||||
w.status_code,
|
||||
w.response_headers,
|
||||
w.response_body,
|
||||
w.content_type,
|
||||
w.content_length,
|
||||
w.webserver,
|
||||
w.location,
|
||||
w.vhost,
|
||||
w.created_at,
|
||||
w.target_id
|
||||
FROM website w
|
||||
$$);
|
||||
""",
|
||||
reverse_sql="DROP TABLE IF EXISTS asset_search_view CASCADE;",
|
||||
),
|
||||
|
||||
# 3. 创建 Endpoint 搜索视图
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
SELECT pgivm.create_immv('endpoint_search_view', $$
|
||||
SELECT
|
||||
e.id,
|
||||
e.url,
|
||||
e.host,
|
||||
e.title,
|
||||
e.status_code,
|
||||
e.response_headers,
|
||||
e.response_body,
|
||||
e.content_type,
|
||||
e.content_length,
|
||||
e.webserver,
|
||||
e.location,
|
||||
e.vhost,
|
||||
e.created_at,
|
||||
e.target_id
|
||||
FROM endpoint e
|
||||
$$);
|
||||
""",
|
||||
reverse_sql="DROP TABLE IF EXISTS endpoint_search_view CASCADE;",
|
||||
),
|
||||
|
||||
# 4. 为搜索视图创建索引(加速查询)
|
||||
migrations.RunSQL(
|
||||
sql=[
|
||||
# Website 搜索视图索引
|
||||
"CREATE INDEX IF NOT EXISTS asset_search_view_host_idx ON asset_search_view (host);",
|
||||
"CREATE INDEX IF NOT EXISTS asset_search_view_url_idx ON asset_search_view (url);",
|
||||
"CREATE INDEX IF NOT EXISTS asset_search_view_title_idx ON asset_search_view (title);",
|
||||
"CREATE INDEX IF NOT EXISTS asset_search_view_status_idx ON asset_search_view (status_code);",
|
||||
"CREATE INDEX IF NOT EXISTS asset_search_view_created_idx ON asset_search_view (created_at DESC);",
|
||||
# Endpoint 搜索视图索引
|
||||
"CREATE INDEX IF NOT EXISTS endpoint_search_view_host_idx ON endpoint_search_view (host);",
|
||||
"CREATE INDEX IF NOT EXISTS endpoint_search_view_url_idx ON endpoint_search_view (url);",
|
||||
"CREATE INDEX IF NOT EXISTS endpoint_search_view_title_idx ON endpoint_search_view (title);",
|
||||
"CREATE INDEX IF NOT EXISTS endpoint_search_view_status_idx ON endpoint_search_view (status_code);",
|
||||
"CREATE INDEX IF NOT EXISTS endpoint_search_view_created_idx ON endpoint_search_view (created_at DESC);",
|
||||
],
|
||||
reverse_sql=[
|
||||
"DROP INDEX IF EXISTS asset_search_view_host_idx;",
|
||||
"DROP INDEX IF EXISTS asset_search_view_url_idx;",
|
||||
"DROP INDEX IF EXISTS asset_search_view_title_idx;",
|
||||
"DROP INDEX IF EXISTS asset_search_view_status_idx;",
|
||||
"DROP INDEX IF EXISTS asset_search_view_created_idx;",
|
||||
"DROP INDEX IF EXISTS endpoint_search_view_host_idx;",
|
||||
"DROP INDEX IF EXISTS endpoint_search_view_url_idx;",
|
||||
"DROP INDEX IF EXISTS endpoint_search_view_title_idx;",
|
||||
"DROP INDEX IF EXISTS endpoint_search_view_status_idx;",
|
||||
"DROP INDEX IF EXISTS endpoint_search_view_created_idx;",
|
||||
],
|
||||
),
|
||||
]
|
||||
53
backend/apps/asset/migrations/0003_add_screenshot_models.py
Normal file
53
backend/apps/asset/migrations/0003_add_screenshot_models.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# Generated by Django 5.2.7 on 2026-01-07 02:21
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('asset', '0002_create_search_views'),
|
||||
('scan', '0001_initial'),
|
||||
('targets', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Screenshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.TextField(help_text='截图对应的 URL')),
|
||||
('image', models.BinaryField(help_text='截图 WebP 二进制数据(压缩后)')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
|
||||
('target', models.ForeignKey(help_text='所属目标', on_delete=django.db.models.deletion.CASCADE, related_name='screenshots', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '截图',
|
||||
'verbose_name_plural': '截图',
|
||||
'db_table': 'screenshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['target'], name='screenshot_target__2f01f6_idx'), models.Index(fields=['-created_at'], name='screenshot_created_c0ad4b_idx')],
|
||||
'constraints': [models.UniqueConstraint(fields=('target', 'url'), name='unique_screenshot_per_target')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='ScreenshotSnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.TextField(help_text='截图对应的 URL')),
|
||||
('image', models.BinaryField(help_text='截图 WebP 二进制数据(压缩后)')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='screenshot_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '截图快照',
|
||||
'verbose_name_plural': '截图快照',
|
||||
'db_table': 'screenshot_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='screenshot__scan_id_fb8c4d_idx'), models.Index(fields=['-created_at'], name='screenshot__created_804117_idx')],
|
||||
'constraints': [models.UniqueConstraint(fields=('scan', 'url'), name='unique_screenshot_per_scan_snapshot')],
|
||||
},
|
||||
),
|
||||
]
|
||||
@@ -0,0 +1,23 @@
|
||||
# Generated by Django 5.2.7 on 2026-01-07 13:29
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('asset', '0003_add_screenshot_models'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='screenshot',
|
||||
name='status_code',
|
||||
field=models.SmallIntegerField(blank=True, help_text='HTTP 响应状态码', null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='screenshotsnapshot',
|
||||
name='status_code',
|
||||
field=models.SmallIntegerField(blank=True, help_text='HTTP 响应状态码', null=True),
|
||||
),
|
||||
]
|
||||
@@ -20,6 +20,12 @@ from .snapshot_models import (
|
||||
VulnerabilitySnapshot,
|
||||
)
|
||||
|
||||
# 截图模型
|
||||
from .screenshot_models import (
|
||||
Screenshot,
|
||||
ScreenshotSnapshot,
|
||||
)
|
||||
|
||||
# 统计模型
|
||||
from .statistics_models import AssetStatistics, StatisticsHistory
|
||||
|
||||
@@ -39,6 +45,9 @@ __all__ = [
|
||||
'HostPortMappingSnapshot',
|
||||
'EndpointSnapshot',
|
||||
'VulnerabilitySnapshot',
|
||||
# 截图模型
|
||||
'Screenshot',
|
||||
'ScreenshotSnapshot',
|
||||
# 统计模型
|
||||
'AssetStatistics',
|
||||
'StatisticsHistory',
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
|
||||
from django.db import models
|
||||
from django.contrib.postgres.fields import ArrayField
|
||||
from django.contrib.postgres.indexes import GinIndex
|
||||
from django.core.validators import MinValueValidator, MaxValueValidator
|
||||
|
||||
|
||||
@@ -34,6 +35,12 @@ class Subdomain(models.Model):
|
||||
models.Index(fields=['name', 'target']), # 复合索引,优化 get_by_names_and_target_id 批量查询
|
||||
models.Index(fields=['target']), # 优化从target_id快速查找下面的子域名
|
||||
models.Index(fields=['name']), # 优化从name快速查找子域名,搜索场景
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='subdomain_name_trgm_idx',
|
||||
fields=['name'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 普通唯一约束:name + target 组合唯一
|
||||
@@ -58,40 +65,35 @@ class Endpoint(models.Model):
|
||||
help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)'
|
||||
)
|
||||
|
||||
url = models.CharField(max_length=2000, help_text='最终访问的完整URL')
|
||||
url = models.TextField(help_text='最终访问的完整URL')
|
||||
host = models.CharField(
|
||||
max_length=253,
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='主机名(域名或IP地址)'
|
||||
)
|
||||
location = models.CharField(
|
||||
max_length=1000,
|
||||
location = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='重定向地址(HTTP 3xx 响应头 Location)'
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
title = models.CharField(
|
||||
max_length=1000,
|
||||
title = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='网页标题(HTML <title> 标签内容)'
|
||||
)
|
||||
webserver = models.CharField(
|
||||
max_length=200,
|
||||
webserver = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='服务器类型(HTTP 响应头 Server 值)'
|
||||
)
|
||||
body_preview = models.CharField(
|
||||
max_length=1000,
|
||||
response_body = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='响应正文前N个字符(默认100个字符)'
|
||||
help_text='HTTP响应体'
|
||||
)
|
||||
content_type = models.CharField(
|
||||
max_length=200,
|
||||
content_type = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='响应类型(HTTP Content-Type 响应头)'
|
||||
@@ -123,6 +125,11 @@ class Endpoint(models.Model):
|
||||
default=list,
|
||||
help_text='匹配的GF模式列表,用于识别敏感端点(如api, debug, config等)'
|
||||
)
|
||||
response_headers = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='原始HTTP响应头'
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = 'endpoint'
|
||||
@@ -131,11 +138,28 @@ class Endpoint(models.Model):
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['-created_at']),
|
||||
models.Index(fields=['target']), # 优化从target_id快速查找下面的端点(主关联字段)
|
||||
models.Index(fields=['target']), # 优化从 target_id快速查找下面的端点(主关联字段)
|
||||
models.Index(fields=['url']), # URL索引,优化查询性能
|
||||
models.Index(fields=['host']), # host索引,优化根据主机名查询
|
||||
models.Index(fields=['status_code']), # 状态码索引,优化筛选
|
||||
models.Index(fields=['title']), # title索引,优化智能过滤搜索
|
||||
GinIndex(fields=['tech']), # GIN索引,优化 tech 数组字段的 __contains 查询
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='endpoint_resp_headers_trgm_idx',
|
||||
fields=['response_headers'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='endpoint_url_trgm_idx',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='endpoint_title_trgm_idx',
|
||||
fields=['title'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 普通唯一约束:url + target 组合唯一
|
||||
@@ -160,40 +184,35 @@ class WebSite(models.Model):
|
||||
help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)'
|
||||
)
|
||||
|
||||
url = models.CharField(max_length=2000, help_text='最终访问的完整URL')
|
||||
url = models.TextField(help_text='最终访问的完整URL')
|
||||
host = models.CharField(
|
||||
max_length=253,
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='主机名(域名或IP地址)'
|
||||
)
|
||||
location = models.CharField(
|
||||
max_length=1000,
|
||||
location = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='重定向地址(HTTP 3xx 响应头 Location)'
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
title = models.CharField(
|
||||
max_length=1000,
|
||||
title = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='网页标题(HTML <title> 标签内容)'
|
||||
)
|
||||
webserver = models.CharField(
|
||||
max_length=200,
|
||||
webserver = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='服务器类型(HTTP 响应头 Server 值)'
|
||||
)
|
||||
body_preview = models.CharField(
|
||||
max_length=1000,
|
||||
response_body = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='响应正文前N个字符(默认100个字符)'
|
||||
help_text='HTTP响应体'
|
||||
)
|
||||
content_type = models.CharField(
|
||||
max_length=200,
|
||||
content_type = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='响应类型(HTTP Content-Type 响应头)'
|
||||
@@ -219,6 +238,11 @@ class WebSite(models.Model):
|
||||
blank=True,
|
||||
help_text='是否支持虚拟主机'
|
||||
)
|
||||
response_headers = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='原始HTTP响应头'
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = 'website'
|
||||
@@ -229,9 +253,26 @@ class WebSite(models.Model):
|
||||
models.Index(fields=['-created_at']),
|
||||
models.Index(fields=['url']), # URL索引,优化查询性能
|
||||
models.Index(fields=['host']), # host索引,优化根据主机名查询
|
||||
models.Index(fields=['target']), # 优化从target_id快速查找下面的站点
|
||||
models.Index(fields=['target']), # 优化从 target_id快速查找下面的站点
|
||||
models.Index(fields=['title']), # title索引,优化智能过滤搜索
|
||||
models.Index(fields=['status_code']), # 状态码索引,优化智能过滤搜索
|
||||
GinIndex(fields=['tech']), # GIN索引,优化 tech 数组字段的 __contains 查询
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='website_resp_headers_trgm_idx',
|
||||
fields=['response_headers'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='website_url_trgm_idx',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='website_title_trgm_idx',
|
||||
fields=['title'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 普通唯一约束:url + target 组合唯一
|
||||
@@ -308,6 +349,12 @@ class Directory(models.Model):
|
||||
models.Index(fields=['target']), # 优化从target_id快速查找下面的目录
|
||||
models.Index(fields=['url']), # URL索引,优化搜索和唯一约束
|
||||
models.Index(fields=['status']), # 状态码索引,优化筛选
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='directory_url_trgm_idx',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 普通唯一约束:target + url 组合唯一
|
||||
|
||||
80
backend/apps/asset/models/screenshot_models.py
Normal file
80
backend/apps/asset/models/screenshot_models.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from django.db import models
|
||||
|
||||
|
||||
class ScreenshotSnapshot(models.Model):
|
||||
"""
|
||||
截图快照
|
||||
|
||||
记录:某次扫描中捕获的网站截图
|
||||
"""
|
||||
|
||||
id = models.AutoField(primary_key=True)
|
||||
scan = models.ForeignKey(
|
||||
'scan.Scan',
|
||||
on_delete=models.CASCADE,
|
||||
related_name='screenshot_snapshots',
|
||||
help_text='所属的扫描任务'
|
||||
)
|
||||
url = models.TextField(help_text='截图对应的 URL')
|
||||
status_code = models.SmallIntegerField(null=True, blank=True, help_text='HTTP 响应状态码')
|
||||
image = models.BinaryField(help_text='截图 WebP 二进制数据(压缩后)')
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
db_table = 'screenshot_snapshot'
|
||||
verbose_name = '截图快照'
|
||||
verbose_name_plural = '截图快照'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['scan']),
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=['scan', 'url'],
|
||||
name='unique_screenshot_per_scan_snapshot'
|
||||
),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f'{self.url} (Scan #{self.scan_id})'
|
||||
|
||||
|
||||
class Screenshot(models.Model):
|
||||
"""
|
||||
截图资产
|
||||
|
||||
存储:目标的最新截图(从快照同步)
|
||||
"""
|
||||
|
||||
id = models.AutoField(primary_key=True)
|
||||
target = models.ForeignKey(
|
||||
'targets.Target',
|
||||
on_delete=models.CASCADE,
|
||||
related_name='screenshots',
|
||||
help_text='所属目标'
|
||||
)
|
||||
url = models.TextField(help_text='截图对应的 URL')
|
||||
status_code = models.SmallIntegerField(null=True, blank=True, help_text='HTTP 响应状态码')
|
||||
image = models.BinaryField(help_text='截图 WebP 二进制数据(压缩后)')
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
updated_at = models.DateTimeField(auto_now=True, help_text='更新时间')
|
||||
|
||||
class Meta:
|
||||
db_table = 'screenshot'
|
||||
verbose_name = '截图'
|
||||
verbose_name_plural = '截图'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['target']),
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=['target', 'url'],
|
||||
name='unique_screenshot_per_target'
|
||||
),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f'{self.url} (Target #{self.target_id})'
|
||||
@@ -1,5 +1,6 @@
|
||||
from django.db import models
|
||||
from django.contrib.postgres.fields import ArrayField
|
||||
from django.contrib.postgres.indexes import GinIndex
|
||||
from django.core.validators import MinValueValidator, MaxValueValidator
|
||||
|
||||
|
||||
@@ -26,6 +27,12 @@ class SubdomainSnapshot(models.Model):
|
||||
models.Index(fields=['scan']),
|
||||
models.Index(fields=['name']),
|
||||
models.Index(fields=['-created_at']),
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='subdomain_snap_name_trgm',
|
||||
fields=['name'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 唯一约束:同一次扫描中,同一个子域名只能记录一次
|
||||
@@ -54,22 +61,27 @@ class WebsiteSnapshot(models.Model):
|
||||
)
|
||||
|
||||
# 扫描结果数据
|
||||
url = models.CharField(max_length=2000, help_text='站点URL')
|
||||
url = models.TextField(help_text='站点URL')
|
||||
host = models.CharField(max_length=253, blank=True, default='', help_text='主机名(域名或IP地址)')
|
||||
title = models.CharField(max_length=500, blank=True, default='', help_text='页面标题')
|
||||
status = models.IntegerField(null=True, blank=True, help_text='HTTP状态码')
|
||||
title = models.TextField(blank=True, default='', help_text='页面标题')
|
||||
status_code = models.IntegerField(null=True, blank=True, help_text='HTTP状态码')
|
||||
content_length = models.BigIntegerField(null=True, blank=True, help_text='内容长度')
|
||||
location = models.CharField(max_length=1000, blank=True, default='', help_text='重定向位置')
|
||||
web_server = models.CharField(max_length=200, blank=True, default='', help_text='Web服务器')
|
||||
content_type = models.CharField(max_length=200, blank=True, default='', help_text='内容类型')
|
||||
location = models.TextField(blank=True, default='', help_text='重定向位置')
|
||||
webserver = models.TextField(blank=True, default='', help_text='Web服务器')
|
||||
content_type = models.TextField(blank=True, default='', help_text='内容类型')
|
||||
tech = ArrayField(
|
||||
models.CharField(max_length=100),
|
||||
blank=True,
|
||||
default=list,
|
||||
help_text='技术栈'
|
||||
)
|
||||
body_preview = models.TextField(blank=True, default='', help_text='响应体预览')
|
||||
response_body = models.TextField(blank=True, default='', help_text='HTTP响应体')
|
||||
vhost = models.BooleanField(null=True, blank=True, help_text='虚拟主机标志')
|
||||
response_headers = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='原始HTTP响应头'
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
@@ -83,6 +95,23 @@ class WebsiteSnapshot(models.Model):
|
||||
models.Index(fields=['host']), # host索引,优化根据主机名查询
|
||||
models.Index(fields=['title']), # title索引,优化标题搜索
|
||||
models.Index(fields=['-created_at']),
|
||||
GinIndex(fields=['tech']), # GIN索引,优化数组字段查询
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='ws_snap_resp_hdr_trgm',
|
||||
fields=['response_headers'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='ws_snap_url_trgm',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='ws_snap_title_trgm',
|
||||
fields=['title'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 唯一约束:同一次扫描中,同一个URL只能记录一次
|
||||
@@ -132,6 +161,12 @@ class DirectorySnapshot(models.Model):
|
||||
models.Index(fields=['status']), # 状态码索引,优化筛选
|
||||
models.Index(fields=['content_type']), # content_type索引,优化内容类型搜索
|
||||
models.Index(fields=['-created_at']),
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='dir_snap_url_trgm',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 唯一约束:同一次扫描中,同一个目录URL只能记录一次
|
||||
@@ -232,26 +267,26 @@ class EndpointSnapshot(models.Model):
|
||||
)
|
||||
|
||||
# 扫描结果数据
|
||||
url = models.CharField(max_length=2000, help_text='端点URL')
|
||||
url = models.TextField(help_text='端点URL')
|
||||
host = models.CharField(
|
||||
max_length=253,
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='主机名(域名或IP地址)'
|
||||
)
|
||||
title = models.CharField(max_length=1000, blank=True, default='', help_text='页面标题')
|
||||
title = models.TextField(blank=True, default='', help_text='页面标题')
|
||||
status_code = models.IntegerField(null=True, blank=True, help_text='HTTP状态码')
|
||||
content_length = models.IntegerField(null=True, blank=True, help_text='内容长度')
|
||||
location = models.CharField(max_length=1000, blank=True, default='', help_text='重定向位置')
|
||||
webserver = models.CharField(max_length=200, blank=True, default='', help_text='Web服务器')
|
||||
content_type = models.CharField(max_length=200, blank=True, default='', help_text='内容类型')
|
||||
location = models.TextField(blank=True, default='', help_text='重定向位置')
|
||||
webserver = models.TextField(blank=True, default='', help_text='Web服务器')
|
||||
content_type = models.TextField(blank=True, default='', help_text='内容类型')
|
||||
tech = ArrayField(
|
||||
models.CharField(max_length=100),
|
||||
blank=True,
|
||||
default=list,
|
||||
help_text='技术栈'
|
||||
)
|
||||
body_preview = models.CharField(max_length=1000, blank=True, default='', help_text='响应体预览')
|
||||
response_body = models.TextField(blank=True, default='', help_text='HTTP响应体')
|
||||
vhost = models.BooleanField(null=True, blank=True, help_text='虚拟主机标志')
|
||||
matched_gf_patterns = ArrayField(
|
||||
models.CharField(max_length=100),
|
||||
@@ -259,6 +294,11 @@ class EndpointSnapshot(models.Model):
|
||||
default=list,
|
||||
help_text='匹配的GF模式列表'
|
||||
)
|
||||
response_headers = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='原始HTTP响应头'
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
@@ -274,6 +314,23 @@ class EndpointSnapshot(models.Model):
|
||||
models.Index(fields=['status_code']), # 状态码索引,优化筛选
|
||||
models.Index(fields=['webserver']), # webserver索引,优化服务器搜索
|
||||
models.Index(fields=['-created_at']),
|
||||
GinIndex(fields=['tech']), # GIN索引,优化数组字段查询
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='ep_snap_resp_hdr_trgm',
|
||||
fields=['response_headers'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='ep_snap_url_trgm',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='ep_snap_title_trgm',
|
||||
fields=['title'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 唯一约束:同一次扫描中,同一个URL只能记录一次
|
||||
|
||||
@@ -48,12 +48,13 @@ class DjangoEndpointRepository:
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
webserver=item.webserver or '',
|
||||
body_preview=item.body_preview or '',
|
||||
response_body=item.response_body or '',
|
||||
content_type=item.content_type or '',
|
||||
tech=item.tech if item.tech else [],
|
||||
vhost=item.vhost,
|
||||
location=item.location or '',
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
@@ -65,8 +66,8 @@ class DjangoEndpointRepository:
|
||||
unique_fields=['url', 'target'],
|
||||
update_fields=[
|
||||
'host', 'title', 'status_code', 'content_length',
|
||||
'webserver', 'body_preview', 'content_type', 'tech',
|
||||
'vhost', 'location', 'matched_gf_patterns'
|
||||
'webserver', 'response_body', 'content_type', 'tech',
|
||||
'vhost', 'location', 'matched_gf_patterns', 'response_headers'
|
||||
],
|
||||
batch_size=1000
|
||||
)
|
||||
@@ -138,12 +139,13 @@ class DjangoEndpointRepository:
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
webserver=item.webserver or '',
|
||||
body_preview=item.body_preview or '',
|
||||
response_body=item.response_body or '',
|
||||
content_type=item.content_type or '',
|
||||
tech=item.tech if item.tech else [],
|
||||
vhost=item.vhost,
|
||||
location=item.location or '',
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
@@ -183,7 +185,7 @@ class DjangoEndpointRepository:
|
||||
.values(
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'body_preview', 'vhost', 'matched_gf_patterns', 'created_at'
|
||||
'response_body', 'response_headers', 'vhost', 'matched_gf_patterns', 'created_at'
|
||||
)
|
||||
.order_by('url')
|
||||
)
|
||||
|
||||
@@ -49,12 +49,13 @@ class DjangoWebSiteRepository:
|
||||
location=item.location or '',
|
||||
title=item.title or '',
|
||||
webserver=item.webserver or '',
|
||||
body_preview=item.body_preview or '',
|
||||
response_body=item.response_body or '',
|
||||
content_type=item.content_type or '',
|
||||
tech=item.tech if item.tech else [],
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
vhost=item.vhost
|
||||
vhost=item.vhost,
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
@@ -66,8 +67,8 @@ class DjangoWebSiteRepository:
|
||||
unique_fields=['url', 'target'],
|
||||
update_fields=[
|
||||
'host', 'location', 'title', 'webserver',
|
||||
'body_preview', 'content_type', 'tech',
|
||||
'status_code', 'content_length', 'vhost'
|
||||
'response_body', 'content_type', 'tech',
|
||||
'status_code', 'content_length', 'vhost', 'response_headers'
|
||||
],
|
||||
batch_size=1000
|
||||
)
|
||||
@@ -132,12 +133,13 @@ class DjangoWebSiteRepository:
|
||||
location=item.location or '',
|
||||
title=item.title or '',
|
||||
webserver=item.webserver or '',
|
||||
body_preview=item.body_preview or '',
|
||||
response_body=item.response_body or '',
|
||||
content_type=item.content_type or '',
|
||||
tech=item.tech if item.tech else [],
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
vhost=item.vhost
|
||||
vhost=item.vhost,
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
@@ -177,7 +179,7 @@ class DjangoWebSiteRepository:
|
||||
.values(
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'body_preview', 'vhost', 'created_at'
|
||||
'response_body', 'response_headers', 'vhost', 'created_at'
|
||||
)
|
||||
.order_by('url')
|
||||
)
|
||||
|
||||
@@ -44,6 +44,7 @@ class DjangoEndpointSnapshotRepository:
|
||||
snapshots.append(EndpointSnapshot(
|
||||
scan_id=item.scan_id,
|
||||
url=item.url,
|
||||
host=item.host if item.host else '',
|
||||
title=item.title,
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
@@ -51,9 +52,10 @@ class DjangoEndpointSnapshotRepository:
|
||||
webserver=item.webserver,
|
||||
content_type=item.content_type,
|
||||
tech=item.tech if item.tech else [],
|
||||
body_preview=item.body_preview,
|
||||
response_body=item.response_body,
|
||||
vhost=item.vhost,
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
))
|
||||
|
||||
# 批量创建(忽略冲突,基于唯一约束去重)
|
||||
@@ -100,7 +102,7 @@ class DjangoEndpointSnapshotRepository:
|
||||
.values(
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'body_preview', 'vhost', 'matched_gf_patterns', 'created_at'
|
||||
'response_body', 'response_headers', 'vhost', 'matched_gf_patterns', 'created_at'
|
||||
)
|
||||
.order_by('url')
|
||||
)
|
||||
|
||||
@@ -195,3 +195,32 @@ class DjangoHostPortMappingSnapshotRepository:
|
||||
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
def iter_unique_host_ports_by_scan(
|
||||
self,
|
||||
scan_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取扫描下的唯一 host:port 组合(去重)
|
||||
|
||||
用于生成 URL 时避免重复,同一个 host:port 可能对应多个 IP,
|
||||
但生成 URL 时只需要一个。
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
batch_size: 每批数据量
|
||||
|
||||
Yields:
|
||||
{'host': 'example.com', 'port': 80}
|
||||
"""
|
||||
qs = (
|
||||
HostPortMappingSnapshot.objects
|
||||
.filter(scan_id=scan_id)
|
||||
.values('host', 'port')
|
||||
.distinct()
|
||||
.order_by('host', 'port')
|
||||
)
|
||||
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
@@ -46,14 +46,15 @@ class DjangoWebsiteSnapshotRepository:
|
||||
url=item.url,
|
||||
host=item.host,
|
||||
title=item.title,
|
||||
status=item.status,
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
location=item.location,
|
||||
web_server=item.web_server,
|
||||
webserver=item.webserver,
|
||||
content_type=item.content_type,
|
||||
tech=item.tech if item.tech else [],
|
||||
body_preview=item.body_preview,
|
||||
vhost=item.vhost
|
||||
response_body=item.response_body,
|
||||
vhost=item.vhost,
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
))
|
||||
|
||||
# 批量创建(忽略冲突,基于唯一约束去重)
|
||||
@@ -98,26 +99,12 @@ class DjangoWebsiteSnapshotRepository:
|
||||
WebsiteSnapshot.objects
|
||||
.filter(scan_id=scan_id)
|
||||
.values(
|
||||
'url', 'host', 'location', 'title', 'status',
|
||||
'content_length', 'content_type', 'web_server', 'tech',
|
||||
'body_preview', 'vhost', 'created_at'
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'response_body', 'response_headers', 'vhost', 'created_at'
|
||||
)
|
||||
.order_by('url')
|
||||
)
|
||||
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
# 重命名字段以匹配 CSV 表头
|
||||
yield {
|
||||
'url': row['url'],
|
||||
'host': row['host'],
|
||||
'location': row['location'],
|
||||
'title': row['title'],
|
||||
'status_code': row['status'],
|
||||
'content_length': row['content_length'],
|
||||
'content_type': row['content_type'],
|
||||
'webserver': row['web_server'],
|
||||
'tech': row['tech'],
|
||||
'body_preview': row['body_preview'],
|
||||
'vhost': row['vhost'],
|
||||
'created_at': row['created_at'],
|
||||
}
|
||||
yield row
|
||||
|
||||
@@ -7,6 +7,7 @@ from .models.snapshot_models import (
|
||||
EndpointSnapshot,
|
||||
VulnerabilitySnapshot,
|
||||
)
|
||||
from .models.screenshot_models import Screenshot, ScreenshotSnapshot
|
||||
|
||||
|
||||
# 注意:IPAddress 和 Port 模型已被重构为 HostPortMapping
|
||||
@@ -67,9 +68,10 @@ class SubdomainListSerializer(serializers.ModelSerializer):
|
||||
|
||||
|
||||
class WebSiteSerializer(serializers.ModelSerializer):
|
||||
"""站点序列化器"""
|
||||
"""站点序列化器(目标详情页)"""
|
||||
|
||||
subdomain = serializers.CharField(source='subdomain.name', allow_blank=True, default='')
|
||||
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = WebSite
|
||||
@@ -83,9 +85,10 @@ class WebSiteSerializer(serializers.ModelSerializer):
|
||||
'content_type',
|
||||
'status_code',
|
||||
'content_length',
|
||||
'body_preview',
|
||||
'response_body',
|
||||
'tech',
|
||||
'vhost',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'subdomain',
|
||||
'created_at',
|
||||
]
|
||||
@@ -140,6 +143,7 @@ class EndpointListSerializer(serializers.ModelSerializer):
|
||||
source='matched_gf_patterns',
|
||||
read_only=True,
|
||||
)
|
||||
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = Endpoint
|
||||
@@ -152,9 +156,10 @@ class EndpointListSerializer(serializers.ModelSerializer):
|
||||
'content_length',
|
||||
'content_type',
|
||||
'webserver',
|
||||
'body_preview',
|
||||
'response_body',
|
||||
'tech',
|
||||
'vhost',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'gfPatterns',
|
||||
'created_at',
|
||||
]
|
||||
@@ -213,8 +218,7 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
|
||||
"""网站快照序列化器(用于扫描历史)"""
|
||||
|
||||
subdomain_name = serializers.CharField(source='subdomain.name', read_only=True)
|
||||
webserver = serializers.CharField(source='web_server', read_only=True) # 映射字段名
|
||||
status_code = serializers.IntegerField(source='status', read_only=True) # 映射字段名
|
||||
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = WebsiteSnapshot
|
||||
@@ -223,13 +227,14 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
|
||||
'url',
|
||||
'location',
|
||||
'title',
|
||||
'webserver', # 使用映射后的字段名
|
||||
'webserver',
|
||||
'content_type',
|
||||
'status_code', # 使用映射后的字段名
|
||||
'status_code',
|
||||
'content_length',
|
||||
'body_preview',
|
||||
'response_body',
|
||||
'tech',
|
||||
'vhost',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'subdomain_name',
|
||||
'created_at',
|
||||
]
|
||||
@@ -264,6 +269,7 @@ class EndpointSnapshotSerializer(serializers.ModelSerializer):
|
||||
source='matched_gf_patterns',
|
||||
read_only=True,
|
||||
)
|
||||
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = EndpointSnapshot
|
||||
@@ -277,10 +283,31 @@ class EndpointSnapshotSerializer(serializers.ModelSerializer):
|
||||
'content_type',
|
||||
'status_code',
|
||||
'content_length',
|
||||
'body_preview',
|
||||
'response_body',
|
||||
'tech',
|
||||
'vhost',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'gfPatterns',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
|
||||
# ==================== 截图序列化器 ====================
|
||||
|
||||
class ScreenshotListSerializer(serializers.ModelSerializer):
|
||||
"""截图资产列表序列化器(不包含 image 字段)"""
|
||||
|
||||
class Meta:
|
||||
model = Screenshot
|
||||
fields = ['id', 'url', 'status_code', 'created_at', 'updated_at']
|
||||
read_only_fields = fields
|
||||
|
||||
|
||||
class ScreenshotSnapshotListSerializer(serializers.ModelSerializer):
|
||||
"""截图快照列表序列化器(不包含 image 字段)"""
|
||||
|
||||
class Meta:
|
||||
model = ScreenshotSnapshot
|
||||
fields = ['id', 'url', 'status_code', 'created_at']
|
||||
read_only_fields = fields
|
||||
|
||||
@@ -27,7 +27,8 @@ class EndpointService:
|
||||
'url': 'url',
|
||||
'host': 'host',
|
||||
'title': 'title',
|
||||
'status': 'status_code',
|
||||
'status_code': 'status_code',
|
||||
'tech': 'tech',
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
@@ -115,7 +116,7 @@ class EndpointService:
|
||||
"""获取目标下的所有端点"""
|
||||
queryset = self.repo.get_by_target(target_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
|
||||
return queryset
|
||||
|
||||
def count_endpoints_by_target(self, target_id: int) -> int:
|
||||
@@ -134,7 +135,7 @@ class EndpointService:
|
||||
"""获取所有端点(全局查询)"""
|
||||
queryset = self.repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
|
||||
return queryset
|
||||
|
||||
def iter_endpoint_urls_by_target(self, target_id: int, chunk_size: int = 1000) -> Iterator[str]:
|
||||
|
||||
@@ -19,7 +19,8 @@ class WebSiteService:
|
||||
'url': 'url',
|
||||
'host': 'host',
|
||||
'title': 'title',
|
||||
'status': 'status_code',
|
||||
'status_code': 'status_code',
|
||||
'tech': 'tech',
|
||||
}
|
||||
|
||||
def __init__(self, repository=None):
|
||||
@@ -107,14 +108,14 @@ class WebSiteService:
|
||||
"""获取目标下的所有网站"""
|
||||
queryset = self.repo.get_by_target(target_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
|
||||
return queryset
|
||||
|
||||
def get_all(self, filter_query: Optional[str] = None):
|
||||
"""获取所有网站"""
|
||||
queryset = self.repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
|
||||
return queryset
|
||||
|
||||
def get_by_url(self, url: str, target_id: int) -> int:
|
||||
|
||||
186
backend/apps/asset/services/playwright_screenshot_service.py
Normal file
186
backend/apps/asset/services/playwright_screenshot_service.py
Normal file
@@ -0,0 +1,186 @@
|
||||
"""
|
||||
Playwright 截图服务
|
||||
|
||||
使用 Playwright 异步批量捕获网站截图
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Optional, AsyncGenerator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PlaywrightScreenshotService:
|
||||
"""Playwright 截图服务 - 异步多 Page 并发截图"""
|
||||
|
||||
# 内置默认值(不暴露给用户)
|
||||
DEFAULT_VIEWPORT_WIDTH = 1920
|
||||
DEFAULT_VIEWPORT_HEIGHT = 1080
|
||||
DEFAULT_TIMEOUT = 30000 # 毫秒
|
||||
DEFAULT_JPEG_QUALITY = 85
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
viewport_width: int = DEFAULT_VIEWPORT_WIDTH,
|
||||
viewport_height: int = DEFAULT_VIEWPORT_HEIGHT,
|
||||
timeout: int = DEFAULT_TIMEOUT,
|
||||
concurrency: int = 5
|
||||
):
|
||||
"""
|
||||
初始化 Playwright 截图服务
|
||||
|
||||
Args:
|
||||
viewport_width: 视口宽度(像素)
|
||||
viewport_height: 视口高度(像素)
|
||||
timeout: 页面加载超时时间(毫秒)
|
||||
concurrency: 并发截图数
|
||||
"""
|
||||
self.viewport_width = viewport_width
|
||||
self.viewport_height = viewport_height
|
||||
self.timeout = timeout
|
||||
self.concurrency = concurrency
|
||||
|
||||
async def capture_screenshot(self, url: str, page) -> tuple[Optional[bytes], Optional[int]]:
|
||||
"""
|
||||
捕获单个 URL 的截图
|
||||
|
||||
Args:
|
||||
url: 目标 URL
|
||||
page: Playwright Page 对象
|
||||
|
||||
Returns:
|
||||
(screenshot_bytes, status_code) 元组
|
||||
- screenshot_bytes: JPEG 格式的截图字节数据,失败返回 None
|
||||
- status_code: HTTP 响应状态码,失败返回 None
|
||||
"""
|
||||
status_code = None
|
||||
try:
|
||||
# 尝试加载页面,即使返回错误状态码也继续截图
|
||||
try:
|
||||
response = await page.goto(url, timeout=self.timeout, wait_until='networkidle')
|
||||
if response:
|
||||
status_code = response.status
|
||||
except Exception as goto_error:
|
||||
# 页面加载失败(4xx/5xx 或其他错误),但页面可能已部分渲染
|
||||
# 仍然尝试截图以捕获错误页面
|
||||
logger.debug("页面加载异常但尝试截图: %s, 错误: %s", url, str(goto_error)[:50])
|
||||
|
||||
# 尝试截图(即使 goto 失败)
|
||||
screenshot_bytes = await page.screenshot(
|
||||
type='jpeg',
|
||||
quality=self.DEFAULT_JPEG_QUALITY,
|
||||
full_page=False
|
||||
)
|
||||
return (screenshot_bytes, status_code)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("截图超时: %s", url)
|
||||
return (None, None)
|
||||
except Exception as e:
|
||||
logger.warning("截图失败: %s, 错误: %s", url, str(e)[:100])
|
||||
return (None, None)
|
||||
|
||||
async def _capture_with_semaphore(
|
||||
self,
|
||||
url: str,
|
||||
context,
|
||||
semaphore: asyncio.Semaphore
|
||||
) -> tuple[str, Optional[bytes], Optional[int]]:
|
||||
"""
|
||||
使用信号量控制并发的截图任务
|
||||
|
||||
Args:
|
||||
url: 目标 URL
|
||||
context: Playwright BrowserContext
|
||||
semaphore: 并发控制信号量
|
||||
|
||||
Returns:
|
||||
(url, screenshot_bytes, status_code) 元组
|
||||
"""
|
||||
async with semaphore:
|
||||
page = await context.new_page()
|
||||
try:
|
||||
screenshot_bytes, status_code = await self.capture_screenshot(url, page)
|
||||
return (url, screenshot_bytes, status_code)
|
||||
finally:
|
||||
await page.close()
|
||||
|
||||
async def capture_batch(
|
||||
self,
|
||||
urls: list[str]
|
||||
) -> AsyncGenerator[tuple[str, Optional[bytes], Optional[int]], None]:
|
||||
"""
|
||||
批量捕获截图(异步生成器)
|
||||
|
||||
使用单个 BrowserContext + 多 Page 并发模式
|
||||
通过 Semaphore 控制并发数
|
||||
|
||||
Args:
|
||||
urls: URL 列表
|
||||
|
||||
Yields:
|
||||
(url, screenshot_bytes, status_code) 元组
|
||||
"""
|
||||
if not urls:
|
||||
return
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
async with async_playwright() as p:
|
||||
# 启动浏览器(headless 模式)
|
||||
browser = await p.chromium.launch(
|
||||
headless=True,
|
||||
args=[
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu'
|
||||
]
|
||||
)
|
||||
|
||||
try:
|
||||
# 创建单个 context
|
||||
context = await browser.new_context(
|
||||
viewport={
|
||||
'width': self.viewport_width,
|
||||
'height': self.viewport_height
|
||||
},
|
||||
ignore_https_errors=True,
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||
)
|
||||
|
||||
# 使用 Semaphore 控制并发
|
||||
semaphore = asyncio.Semaphore(self.concurrency)
|
||||
|
||||
# 创建所有任务
|
||||
tasks = [
|
||||
self._capture_with_semaphore(url, context, semaphore)
|
||||
for url in urls
|
||||
]
|
||||
|
||||
# 使用 as_completed 实现流式返回
|
||||
for coro in asyncio.as_completed(tasks):
|
||||
result = await coro
|
||||
yield result
|
||||
|
||||
await context.close()
|
||||
|
||||
finally:
|
||||
await browser.close()
|
||||
|
||||
async def capture_batch_collect(
|
||||
self,
|
||||
urls: list[str]
|
||||
) -> list[tuple[str, Optional[bytes], Optional[int]]]:
|
||||
"""
|
||||
批量捕获截图(收集所有结果)
|
||||
|
||||
Args:
|
||||
urls: URL 列表
|
||||
|
||||
Returns:
|
||||
[(url, screenshot_bytes, status_code), ...] 列表
|
||||
"""
|
||||
results = []
|
||||
async for result in self.capture_batch(urls):
|
||||
results.append(result)
|
||||
return results
|
||||
187
backend/apps/asset/services/screenshot_service.py
Normal file
187
backend/apps/asset/services/screenshot_service.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""
|
||||
截图服务
|
||||
|
||||
负责截图的压缩、保存和同步
|
||||
"""
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from PIL import Image
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ScreenshotService:
|
||||
"""截图服务 - 负责压缩、保存和同步"""
|
||||
|
||||
def __init__(self, max_width: int = 800, target_kb: int = 100):
|
||||
"""
|
||||
初始化截图服务
|
||||
|
||||
Args:
|
||||
max_width: 最大宽度(像素)
|
||||
target_kb: 目标文件大小(KB)
|
||||
"""
|
||||
self.max_width = max_width
|
||||
self.target_kb = target_kb
|
||||
|
||||
def compress_screenshot(self, image_path: str) -> Optional[bytes]:
|
||||
"""
|
||||
压缩截图为 WebP 格式
|
||||
|
||||
Args:
|
||||
image_path: PNG 截图文件路径
|
||||
|
||||
Returns:
|
||||
压缩后的 WebP 二进制数据,失败返回 None
|
||||
"""
|
||||
if not os.path.exists(image_path):
|
||||
logger.warning(f"截图文件不存在: {image_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
with Image.open(image_path) as img:
|
||||
return self._compress_image(img)
|
||||
except Exception as e:
|
||||
logger.error(f"压缩截图失败: {image_path}, 错误: {e}")
|
||||
return None
|
||||
|
||||
def compress_from_bytes(self, image_bytes: bytes) -> Optional[bytes]:
|
||||
"""
|
||||
从字节数据压缩截图为 WebP 格式
|
||||
|
||||
Args:
|
||||
image_bytes: JPEG/PNG 图片字节数据
|
||||
|
||||
Returns:
|
||||
压缩后的 WebP 二进制数据,失败返回 None
|
||||
"""
|
||||
if not image_bytes:
|
||||
return None
|
||||
|
||||
try:
|
||||
img = Image.open(io.BytesIO(image_bytes))
|
||||
return self._compress_image(img)
|
||||
except Exception as e:
|
||||
logger.error(f"从字节压缩截图失败: {e}")
|
||||
return None
|
||||
|
||||
def _compress_image(self, img: Image.Image) -> Optional[bytes]:
|
||||
"""
|
||||
压缩 PIL Image 对象为 WebP 格式
|
||||
|
||||
Args:
|
||||
img: PIL Image 对象
|
||||
|
||||
Returns:
|
||||
压缩后的 WebP 二进制数据
|
||||
"""
|
||||
try:
|
||||
if img.mode in ('RGBA', 'P'):
|
||||
img = img.convert('RGB')
|
||||
|
||||
width, height = img.size
|
||||
if width > self.max_width:
|
||||
ratio = self.max_width / width
|
||||
new_size = (self.max_width, int(height * ratio))
|
||||
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
||||
|
||||
quality = 80
|
||||
while quality >= 10:
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='WEBP', quality=quality, method=6)
|
||||
if len(buffer.getvalue()) <= self.target_kb * 1024:
|
||||
return buffer.getvalue()
|
||||
quality -= 10
|
||||
|
||||
return buffer.getvalue()
|
||||
except Exception as e:
|
||||
logger.error(f"压缩图片失败: {e}")
|
||||
return None
|
||||
|
||||
def save_screenshot_snapshot(
|
||||
self,
|
||||
scan_id: int,
|
||||
url: str,
|
||||
image_data: bytes,
|
||||
status_code: int | None = None
|
||||
) -> bool:
|
||||
"""
|
||||
保存截图快照到 ScreenshotSnapshot 表
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
url: 截图对应的 URL
|
||||
image_data: 压缩后的图片二进制数据
|
||||
status_code: HTTP 响应状态码
|
||||
|
||||
Returns:
|
||||
是否保存成功
|
||||
"""
|
||||
from apps.asset.models import ScreenshotSnapshot
|
||||
|
||||
try:
|
||||
ScreenshotSnapshot.objects.update_or_create(
|
||||
scan_id=scan_id,
|
||||
url=url,
|
||||
defaults={'image': image_data, 'status_code': status_code}
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"保存截图快照失败: scan_id={scan_id}, url={url}, 错误: {e}")
|
||||
return False
|
||||
|
||||
def sync_screenshots_to_asset(self, scan_id: int, target_id: int) -> int:
|
||||
"""
|
||||
将扫描的截图快照同步到资产表
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
target_id: 目标 ID
|
||||
|
||||
Returns:
|
||||
同步的截图数量
|
||||
"""
|
||||
from apps.asset.models import Screenshot, ScreenshotSnapshot
|
||||
|
||||
# 使用 iterator() 避免 QuerySet 缓存大量 BinaryField 数据导致内存飙升
|
||||
# chunk_size=50: 每次只加载 50 条记录,处理完后释放内存
|
||||
snapshots = ScreenshotSnapshot.objects.filter(scan_id=scan_id).iterator(chunk_size=50)
|
||||
count = 0
|
||||
|
||||
for snapshot in snapshots:
|
||||
try:
|
||||
Screenshot.objects.update_or_create(
|
||||
target_id=target_id,
|
||||
url=snapshot.url,
|
||||
defaults={
|
||||
'image': snapshot.image,
|
||||
'status_code': snapshot.status_code
|
||||
}
|
||||
)
|
||||
count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"同步截图到资产表失败: url={snapshot.url}, 错误: {e}")
|
||||
|
||||
logger.info(f"同步截图完成: scan_id={scan_id}, target_id={target_id}, 数量={count}")
|
||||
return count
|
||||
|
||||
def process_and_save_screenshot(self, scan_id: int, url: str, image_path: str) -> bool:
|
||||
"""
|
||||
处理并保存截图(压缩 + 保存快照)
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
url: 截图对应的 URL
|
||||
image_path: PNG 截图文件路径
|
||||
|
||||
Returns:
|
||||
是否处理成功
|
||||
"""
|
||||
image_data = self.compress_screenshot(image_path)
|
||||
if image_data is None:
|
||||
return False
|
||||
|
||||
return self.save_screenshot_snapshot(scan_id, url, image_data)
|
||||
477
backend/apps/asset/services/search_service.py
Normal file
477
backend/apps/asset/services/search_service.py
Normal file
@@ -0,0 +1,477 @@
|
||||
"""
|
||||
资产搜索服务
|
||||
|
||||
提供资产搜索的核心业务逻辑:
|
||||
- 从物化视图查询数据
|
||||
- 支持表达式语法解析
|
||||
- 支持 =(模糊)、==(精确)、!=(不等于)操作符
|
||||
- 支持 && (AND) 和 || (OR) 逻辑组合
|
||||
- 支持 Website 和 Endpoint 两种资产类型
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional, List, Dict, Any, Tuple, Literal, Iterator
|
||||
|
||||
from django.db import connection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 支持的字段映射(前端字段名 -> 数据库字段名)
|
||||
FIELD_MAPPING = {
|
||||
'host': 'host',
|
||||
'url': 'url',
|
||||
'title': 'title',
|
||||
'tech': 'tech',
|
||||
'status': 'status_code',
|
||||
'body': 'response_body',
|
||||
'header': 'response_headers',
|
||||
}
|
||||
|
||||
# 数组类型字段
|
||||
ARRAY_FIELDS = {'tech'}
|
||||
|
||||
# 资产类型到视图名的映射
|
||||
VIEW_MAPPING = {
|
||||
'website': 'asset_search_view',
|
||||
'endpoint': 'endpoint_search_view',
|
||||
}
|
||||
|
||||
# 资产类型到原表名的映射(用于 JOIN 获取数组字段)
|
||||
# ⚠️ 重要:pg_ivm 不支持 ArrayField,所有数组字段必须从原表 JOIN 获取
|
||||
TABLE_MAPPING = {
|
||||
'website': 'website',
|
||||
'endpoint': 'endpoint',
|
||||
}
|
||||
|
||||
# 有效的资产类型
|
||||
VALID_ASSET_TYPES = {'website', 'endpoint'}
|
||||
|
||||
# Website 查询字段(v=视图,t=原表)
|
||||
# ⚠️ 注意:t.tech 从原表获取,因为 pg_ivm 不支持 ArrayField
|
||||
WEBSITE_SELECT_FIELDS = """
|
||||
v.id,
|
||||
v.url,
|
||||
v.host,
|
||||
v.title,
|
||||
t.tech, -- ArrayField,从 website 表 JOIN 获取
|
||||
v.status_code,
|
||||
v.response_headers,
|
||||
v.response_body,
|
||||
v.content_type,
|
||||
v.content_length,
|
||||
v.webserver,
|
||||
v.location,
|
||||
v.vhost,
|
||||
v.created_at,
|
||||
v.target_id
|
||||
"""
|
||||
|
||||
# Endpoint 查询字段
|
||||
# ⚠️ 注意:t.tech 和 t.matched_gf_patterns 从原表获取,因为 pg_ivm 不支持 ArrayField
|
||||
ENDPOINT_SELECT_FIELDS = """
|
||||
v.id,
|
||||
v.url,
|
||||
v.host,
|
||||
v.title,
|
||||
t.tech, -- ArrayField,从 endpoint 表 JOIN 获取
|
||||
v.status_code,
|
||||
v.response_headers,
|
||||
v.response_body,
|
||||
v.content_type,
|
||||
v.content_length,
|
||||
v.webserver,
|
||||
v.location,
|
||||
v.vhost,
|
||||
t.matched_gf_patterns, -- ArrayField,从 endpoint 表 JOIN 获取
|
||||
v.created_at,
|
||||
v.target_id
|
||||
"""
|
||||
|
||||
|
||||
class SearchQueryParser:
|
||||
"""
|
||||
搜索查询解析器
|
||||
|
||||
支持语法:
|
||||
- field="value" 模糊匹配(ILIKE %value%)
|
||||
- field=="value" 精确匹配
|
||||
- field!="value" 不等于
|
||||
- && AND 连接
|
||||
- || OR 连接
|
||||
- () 分组(暂不支持嵌套)
|
||||
|
||||
示例:
|
||||
- host="api" && tech="nginx"
|
||||
- tech="vue" || tech="react"
|
||||
- status=="200" && host!="test"
|
||||
"""
|
||||
|
||||
# 匹配单个条件: field="value" 或 field=="value" 或 field!="value"
|
||||
CONDITION_PATTERN = re.compile(r'(\w+)\s*(==|!=|=)\s*"([^"]*)"')
|
||||
|
||||
@classmethod
|
||||
def parse(cls, query: str) -> Tuple[str, List[Any]]:
|
||||
"""
|
||||
解析查询字符串,返回 SQL WHERE 子句和参数
|
||||
|
||||
Args:
|
||||
query: 搜索查询字符串
|
||||
|
||||
Returns:
|
||||
(where_clause, params) 元组
|
||||
"""
|
||||
if not query or not query.strip():
|
||||
return "1=1", []
|
||||
|
||||
query = query.strip()
|
||||
|
||||
# 检查是否包含操作符语法,如果不包含则作为 host 模糊搜索
|
||||
if not cls.CONDITION_PATTERN.search(query):
|
||||
# 裸文本,默认作为 host 模糊搜索(v 是视图别名)
|
||||
return "v.host ILIKE %s", [f"%{query}%"]
|
||||
|
||||
# 按 || 分割为 OR 组
|
||||
or_groups = cls._split_by_or(query)
|
||||
|
||||
if len(or_groups) == 1:
|
||||
# 没有 OR,直接解析 AND 条件
|
||||
return cls._parse_and_group(or_groups[0])
|
||||
|
||||
# 多个 OR 组
|
||||
or_clauses = []
|
||||
all_params = []
|
||||
|
||||
for group in or_groups:
|
||||
clause, params = cls._parse_and_group(group)
|
||||
if clause and clause != "1=1":
|
||||
or_clauses.append(f"({clause})")
|
||||
all_params.extend(params)
|
||||
|
||||
if not or_clauses:
|
||||
return "1=1", []
|
||||
|
||||
return " OR ".join(or_clauses), all_params
|
||||
|
||||
@classmethod
|
||||
def _split_by_or(cls, query: str) -> List[str]:
|
||||
"""按 || 分割查询,但忽略引号内的 ||"""
|
||||
parts = []
|
||||
current = ""
|
||||
in_quotes = False
|
||||
i = 0
|
||||
|
||||
while i < len(query):
|
||||
char = query[i]
|
||||
|
||||
if char == '"':
|
||||
in_quotes = not in_quotes
|
||||
current += char
|
||||
elif not in_quotes and i + 1 < len(query) and query[i:i+2] == '||':
|
||||
if current.strip():
|
||||
parts.append(current.strip())
|
||||
current = ""
|
||||
i += 1 # 跳过第二个 |
|
||||
else:
|
||||
current += char
|
||||
|
||||
i += 1
|
||||
|
||||
if current.strip():
|
||||
parts.append(current.strip())
|
||||
|
||||
return parts if parts else [query]
|
||||
|
||||
@classmethod
|
||||
def _parse_and_group(cls, group: str) -> Tuple[str, List[Any]]:
|
||||
"""解析 AND 组(用 && 连接的条件)"""
|
||||
# 移除外层括号
|
||||
group = group.strip()
|
||||
if group.startswith('(') and group.endswith(')'):
|
||||
group = group[1:-1].strip()
|
||||
|
||||
# 按 && 分割
|
||||
parts = cls._split_by_and(group)
|
||||
|
||||
and_clauses = []
|
||||
all_params = []
|
||||
|
||||
for part in parts:
|
||||
clause, params = cls._parse_condition(part.strip())
|
||||
if clause:
|
||||
and_clauses.append(clause)
|
||||
all_params.extend(params)
|
||||
|
||||
if not and_clauses:
|
||||
return "1=1", []
|
||||
|
||||
return " AND ".join(and_clauses), all_params
|
||||
|
||||
@classmethod
|
||||
def _split_by_and(cls, query: str) -> List[str]:
|
||||
"""按 && 分割查询,但忽略引号内的 &&"""
|
||||
parts = []
|
||||
current = ""
|
||||
in_quotes = False
|
||||
i = 0
|
||||
|
||||
while i < len(query):
|
||||
char = query[i]
|
||||
|
||||
if char == '"':
|
||||
in_quotes = not in_quotes
|
||||
current += char
|
||||
elif not in_quotes and i + 1 < len(query) and query[i:i+2] == '&&':
|
||||
if current.strip():
|
||||
parts.append(current.strip())
|
||||
current = ""
|
||||
i += 1 # 跳过第二个 &
|
||||
else:
|
||||
current += char
|
||||
|
||||
i += 1
|
||||
|
||||
if current.strip():
|
||||
parts.append(current.strip())
|
||||
|
||||
return parts if parts else [query]
|
||||
|
||||
@classmethod
|
||||
def _parse_condition(cls, condition: str) -> Tuple[Optional[str], List[Any]]:
|
||||
"""
|
||||
解析单个条件
|
||||
|
||||
Returns:
|
||||
(sql_clause, params) 或 (None, []) 如果解析失败
|
||||
"""
|
||||
# 移除括号
|
||||
condition = condition.strip()
|
||||
if condition.startswith('(') and condition.endswith(')'):
|
||||
condition = condition[1:-1].strip()
|
||||
|
||||
match = cls.CONDITION_PATTERN.match(condition)
|
||||
if not match:
|
||||
logger.warning(f"无法解析条件: {condition}")
|
||||
return None, []
|
||||
|
||||
field, operator, value = match.groups()
|
||||
field = field.lower()
|
||||
|
||||
# 验证字段
|
||||
if field not in FIELD_MAPPING:
|
||||
logger.warning(f"未知字段: {field}")
|
||||
return None, []
|
||||
|
||||
db_field = FIELD_MAPPING[field]
|
||||
is_array = field in ARRAY_FIELDS
|
||||
|
||||
# 根据操作符生成 SQL
|
||||
if operator == '=':
|
||||
# 模糊匹配
|
||||
return cls._build_like_condition(db_field, value, is_array)
|
||||
elif operator == '==':
|
||||
# 精确匹配
|
||||
return cls._build_exact_condition(db_field, value, is_array)
|
||||
elif operator == '!=':
|
||||
# 不等于
|
||||
return cls._build_not_equal_condition(db_field, value, is_array)
|
||||
|
||||
return None, []
|
||||
|
||||
@classmethod
|
||||
def _build_like_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
|
||||
"""构建模糊匹配条件"""
|
||||
if is_array:
|
||||
# 数组字段:检查数组中是否有元素包含该值(从原表 t 获取)
|
||||
return f"EXISTS (SELECT 1 FROM unnest(t.{field}) AS elem WHERE elem ILIKE %s)", [f"%{value}%"]
|
||||
elif field == 'status_code':
|
||||
# 状态码是整数,模糊匹配转为精确匹配
|
||||
try:
|
||||
return f"v.{field} = %s", [int(value)]
|
||||
except ValueError:
|
||||
return f"v.{field}::text ILIKE %s", [f"%{value}%"]
|
||||
else:
|
||||
return f"v.{field} ILIKE %s", [f"%{value}%"]
|
||||
|
||||
@classmethod
|
||||
def _build_exact_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
|
||||
"""构建精确匹配条件"""
|
||||
if is_array:
|
||||
# 数组字段:检查数组中是否包含该精确值(从原表 t 获取)
|
||||
return f"%s = ANY(t.{field})", [value]
|
||||
elif field == 'status_code':
|
||||
# 状态码是整数
|
||||
try:
|
||||
return f"v.{field} = %s", [int(value)]
|
||||
except ValueError:
|
||||
return f"v.{field}::text = %s", [value]
|
||||
else:
|
||||
return f"v.{field} = %s", [value]
|
||||
|
||||
@classmethod
|
||||
def _build_not_equal_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
|
||||
"""构建不等于条件"""
|
||||
if is_array:
|
||||
# 数组字段:检查数组中不包含该值(从原表 t 获取)
|
||||
return f"NOT (%s = ANY(t.{field}))", [value]
|
||||
elif field == 'status_code':
|
||||
try:
|
||||
return f"(v.{field} IS NULL OR v.{field} != %s)", [int(value)]
|
||||
except ValueError:
|
||||
return f"(v.{field} IS NULL OR v.{field}::text != %s)", [value]
|
||||
else:
|
||||
return f"(v.{field} IS NULL OR v.{field} != %s)", [value]
|
||||
|
||||
|
||||
AssetType = Literal['website', 'endpoint']
|
||||
|
||||
|
||||
class AssetSearchService:
|
||||
"""资产搜索服务"""
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
asset_type: AssetType = 'website',
|
||||
limit: Optional[int] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
搜索资产
|
||||
|
||||
Args:
|
||||
query: 搜索查询字符串
|
||||
asset_type: 资产类型 ('website' 或 'endpoint')
|
||||
limit: 最大返回数量(可选)
|
||||
|
||||
Returns:
|
||||
List[Dict]: 搜索结果列表
|
||||
"""
|
||||
where_clause, params = SearchQueryParser.parse(query)
|
||||
|
||||
# 根据资产类型选择视图、原表和字段
|
||||
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
|
||||
table_name = TABLE_MAPPING.get(asset_type, 'website')
|
||||
select_fields = ENDPOINT_SELECT_FIELDS if asset_type == 'endpoint' else WEBSITE_SELECT_FIELDS
|
||||
|
||||
# JOIN 原表获取数组字段(tech, matched_gf_patterns)
|
||||
sql = f"""
|
||||
SELECT {select_fields}
|
||||
FROM {view_name} v
|
||||
JOIN {table_name} t ON v.id = t.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY v.created_at DESC
|
||||
"""
|
||||
|
||||
# 添加 LIMIT
|
||||
if limit is not None and limit > 0:
|
||||
sql += f" LIMIT {int(limit)}"
|
||||
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(sql, params)
|
||||
columns = [col[0] for col in cursor.description]
|
||||
results = []
|
||||
|
||||
for row in cursor.fetchall():
|
||||
result = dict(zip(columns, row))
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"搜索查询失败: {e}, SQL: {sql}, params: {params}")
|
||||
raise
|
||||
|
||||
def count(self, query: str, asset_type: AssetType = 'website', statement_timeout_ms: int = 300000) -> int:
|
||||
"""
|
||||
统计搜索结果数量
|
||||
|
||||
Args:
|
||||
query: 搜索查询字符串
|
||||
asset_type: 资产类型 ('website' 或 'endpoint')
|
||||
statement_timeout_ms: SQL 语句超时时间(毫秒),默认 5 分钟
|
||||
|
||||
Returns:
|
||||
int: 结果总数
|
||||
"""
|
||||
where_clause, params = SearchQueryParser.parse(query)
|
||||
|
||||
# 根据资产类型选择视图和原表
|
||||
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
|
||||
table_name = TABLE_MAPPING.get(asset_type, 'website')
|
||||
|
||||
# JOIN 原表以支持数组字段查询
|
||||
sql = f"SELECT COUNT(*) FROM {view_name} v JOIN {table_name} t ON v.id = t.id WHERE {where_clause}"
|
||||
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
# 为导出设置更长的超时时间(仅影响当前会话)
|
||||
cursor.execute(f"SET LOCAL statement_timeout = {statement_timeout_ms}")
|
||||
cursor.execute(sql, params)
|
||||
return cursor.fetchone()[0]
|
||||
except Exception as e:
|
||||
logger.error(f"统计查询失败: {e}")
|
||||
raise
|
||||
|
||||
def search_iter(
|
||||
self,
|
||||
query: str,
|
||||
asset_type: AssetType = 'website',
|
||||
batch_size: int = 1000,
|
||||
statement_timeout_ms: int = 300000
|
||||
) -> Iterator[Dict[str, Any]]:
|
||||
"""
|
||||
流式搜索资产(使用分批查询,内存友好)
|
||||
|
||||
Args:
|
||||
query: 搜索查询字符串
|
||||
asset_type: 资产类型 ('website' 或 'endpoint')
|
||||
batch_size: 每批获取的数量
|
||||
statement_timeout_ms: SQL 语句超时时间(毫秒),默认 5 分钟
|
||||
|
||||
Yields:
|
||||
Dict: 单条搜索结果
|
||||
"""
|
||||
where_clause, params = SearchQueryParser.parse(query)
|
||||
|
||||
# 根据资产类型选择视图、原表和字段
|
||||
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
|
||||
table_name = TABLE_MAPPING.get(asset_type, 'website')
|
||||
select_fields = ENDPOINT_SELECT_FIELDS if asset_type == 'endpoint' else WEBSITE_SELECT_FIELDS
|
||||
|
||||
# 使用 OFFSET/LIMIT 分批查询(Django 不支持命名游标)
|
||||
offset = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
# JOIN 原表获取数组字段
|
||||
sql = f"""
|
||||
SELECT {select_fields}
|
||||
FROM {view_name} v
|
||||
JOIN {table_name} t ON v.id = t.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY v.created_at DESC
|
||||
LIMIT {batch_size} OFFSET {offset}
|
||||
"""
|
||||
|
||||
with connection.cursor() as cursor:
|
||||
# 为导出设置更长的超时时间(仅影响当前会话)
|
||||
cursor.execute(f"SET LOCAL statement_timeout = {statement_timeout_ms}")
|
||||
cursor.execute(sql, params)
|
||||
columns = [col[0] for col in cursor.description]
|
||||
rows = cursor.fetchall()
|
||||
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for row in rows:
|
||||
yield dict(zip(columns, row))
|
||||
|
||||
# 如果返回的行数少于 batch_size,说明已经是最后一批
|
||||
if len(rows) < batch_size:
|
||||
break
|
||||
|
||||
offset += batch_size
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"流式搜索查询失败: {e}, SQL: {sql}, params: {params}")
|
||||
raise
|
||||
@@ -1,101 +1,110 @@
|
||||
"""Endpoint Snapshots Service - 业务逻辑层"""
|
||||
|
||||
import logging
|
||||
from typing import List, Iterator
|
||||
from typing import Iterator, List, Optional
|
||||
|
||||
from apps.asset.dtos.snapshot import EndpointSnapshotDTO
|
||||
from apps.asset.repositories.snapshot import DjangoEndpointSnapshotRepository
|
||||
from apps.asset.services.asset import EndpointService
|
||||
from apps.asset.dtos.snapshot import EndpointSnapshotDTO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EndpointSnapshotsService:
|
||||
"""端点快照服务 - 统一管理快照和资产同步"""
|
||||
|
||||
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'url': 'url',
|
||||
'host': 'host',
|
||||
'title': 'title',
|
||||
'status_code': 'status_code',
|
||||
'webserver': 'webserver',
|
||||
'tech': 'tech',
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.snapshot_repo = DjangoEndpointSnapshotRepository()
|
||||
self.asset_service = EndpointService()
|
||||
|
||||
|
||||
def save_and_sync(self, items: List[EndpointSnapshotDTO]) -> None:
|
||||
"""
|
||||
保存端点快照并同步到资产表(统一入口)
|
||||
|
||||
|
||||
流程:
|
||||
1. 保存到快照表(完整记录)
|
||||
2. 同步到资产表(去重)
|
||||
|
||||
|
||||
Args:
|
||||
items: 端点快照 DTO 列表(必须包含 target_id)
|
||||
|
||||
|
||||
Raises:
|
||||
ValueError: 如果 items 中的 target_id 为 None
|
||||
Exception: 数据库操作失败
|
||||
"""
|
||||
if not items:
|
||||
return
|
||||
|
||||
|
||||
# 检查 Scan 是否仍存在(防止删除后竞态写入)
|
||||
scan_id = items[0].scan_id
|
||||
from apps.scan.repositories import DjangoScanRepository
|
||||
if not DjangoScanRepository().exists(scan_id):
|
||||
logger.warning("Scan 已删除,跳过端点快照保存 - scan_id=%s, 数量=%d", scan_id, len(items))
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
logger.debug("保存端点快照并同步到资产表 - 数量: %d", len(items))
|
||||
|
||||
|
||||
# 步骤 1: 保存到快照表
|
||||
logger.debug("步骤 1: 保存到快照表")
|
||||
self.snapshot_repo.save_snapshots(items)
|
||||
|
||||
# 步骤 2: 转换为资产 DTO 并保存到资产表
|
||||
# 使用 upsert:新记录插入,已存在的记录更新
|
||||
logger.debug("步骤 2: 同步到资产表(通过 Service 层)")
|
||||
|
||||
# 步骤 2: 转换为资产 DTO 并保存到资产表(upsert)
|
||||
asset_items = [item.to_asset_dto() for item in items]
|
||||
|
||||
self.asset_service.bulk_upsert(asset_items)
|
||||
|
||||
|
||||
logger.info("端点快照和资产数据保存成功 - 数量: %d", len(items))
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"保存端点快照失败 - 数量: %d, 错误: %s",
|
||||
len(items),
|
||||
str(e),
|
||||
exc_info=True
|
||||
)
|
||||
logger.error("保存端点快照失败 - 数量: %d, 错误: %s", len(items), str(e), exc_info=True)
|
||||
raise
|
||||
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'url': 'url',
|
||||
'host': 'host',
|
||||
'title': 'title',
|
||||
'status': 'status_code',
|
||||
'webserver': 'webserver',
|
||||
'tech': 'tech',
|
||||
}
|
||||
|
||||
def get_by_scan(self, scan_id: int, filter_query: str = None):
|
||||
|
||||
def get_by_scan(self, scan_id: int, filter_query: Optional[str] = None):
|
||||
"""
|
||||
获取指定扫描的端点快照
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
filter_query: 过滤查询字符串
|
||||
|
||||
Returns:
|
||||
QuerySet: 端点快照查询集
|
||||
"""
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
|
||||
queryset = self.snapshot_repo.get_by_scan(scan_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def get_all(self, filter_query: str = None):
|
||||
"""获取所有端点快照"""
|
||||
def get_all(self, filter_query: Optional[str] = None):
|
||||
"""
|
||||
获取所有端点快照
|
||||
|
||||
Args:
|
||||
filter_query: 过滤查询字符串
|
||||
|
||||
Returns:
|
||||
QuerySet: 端点快照查询集
|
||||
"""
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
|
||||
queryset = self.snapshot_repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def iter_endpoint_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
|
||||
"""流式获取某次扫描下的所有端点 URL。"""
|
||||
"""流式获取某次扫描下的所有端点 URL"""
|
||||
queryset = self.snapshot_repo.get_by_scan(scan_id)
|
||||
for snapshot in queryset.iterator(chunk_size=chunk_size):
|
||||
yield snapshot.url
|
||||
@@ -103,10 +112,10 @@ class EndpointSnapshotsService:
|
||||
def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
|
||||
|
||||
Yields:
|
||||
原始数据字典
|
||||
"""
|
||||
|
||||
@@ -91,3 +91,25 @@ class HostPortMappingSnapshotsService:
|
||||
原始数据字典 {ip, host, port, created_at}
|
||||
"""
|
||||
return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)
|
||||
|
||||
def iter_unique_host_ports_by_scan(
|
||||
self,
|
||||
scan_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取扫描下的唯一 host:port 组合(去重)
|
||||
|
||||
用于生成 URL 时避免重复。
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
batch_size: 每批数据量
|
||||
|
||||
Yields:
|
||||
{'host': 'example.com', 'port': 80}
|
||||
"""
|
||||
return self.snapshot_repo.iter_unique_host_ports_by_scan(
|
||||
scan_id=scan_id,
|
||||
batch_size=batch_size
|
||||
)
|
||||
|
||||
@@ -73,8 +73,8 @@ class WebsiteSnapshotsService:
|
||||
'url': 'url',
|
||||
'host': 'host',
|
||||
'title': 'title',
|
||||
'status': 'status',
|
||||
'webserver': 'web_server',
|
||||
'status_code': 'status_code',
|
||||
'webserver': 'webserver',
|
||||
'tech': 'tech',
|
||||
}
|
||||
|
||||
|
||||
@@ -10,19 +10,28 @@ from .views import (
|
||||
DirectoryViewSet,
|
||||
VulnerabilityViewSet,
|
||||
AssetStatisticsViewSet,
|
||||
AssetSearchView,
|
||||
AssetSearchExportView,
|
||||
EndpointViewSet,
|
||||
HostPortMappingViewSet,
|
||||
ScreenshotViewSet,
|
||||
)
|
||||
|
||||
# 创建 DRF 路由器
|
||||
router = DefaultRouter()
|
||||
|
||||
# 注册 ViewSet
|
||||
# 注意:IPAddress 模型已被重构为 HostPortMapping,相关路由已移除
|
||||
router.register(r'subdomains', SubdomainViewSet, basename='subdomain')
|
||||
router.register(r'websites', WebSiteViewSet, basename='website')
|
||||
router.register(r'directories', DirectoryViewSet, basename='directory')
|
||||
router.register(r'endpoints', EndpointViewSet, basename='endpoint')
|
||||
router.register(r'ip-addresses', HostPortMappingViewSet, basename='ip-address')
|
||||
router.register(r'vulnerabilities', VulnerabilityViewSet, basename='vulnerability')
|
||||
router.register(r'screenshots', ScreenshotViewSet, basename='screenshot')
|
||||
router.register(r'statistics', AssetStatisticsViewSet, basename='asset-statistics')
|
||||
|
||||
urlpatterns = [
|
||||
path('assets/', include(router.urls)),
|
||||
path('assets/search/', AssetSearchView.as_view(), name='asset-search'),
|
||||
path('assets/search/export/', AssetSearchExportView.as_view(), name='asset-search-export'),
|
||||
]
|
||||
|
||||
44
backend/apps/asset/views/__init__.py
Normal file
44
backend/apps/asset/views/__init__.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Asset 应用视图模块
|
||||
|
||||
重新导出所有视图类以保持向后兼容
|
||||
"""
|
||||
|
||||
from .asset_views import (
|
||||
AssetStatisticsViewSet,
|
||||
SubdomainViewSet,
|
||||
WebSiteViewSet,
|
||||
DirectoryViewSet,
|
||||
EndpointViewSet,
|
||||
HostPortMappingViewSet,
|
||||
VulnerabilityViewSet,
|
||||
SubdomainSnapshotViewSet,
|
||||
WebsiteSnapshotViewSet,
|
||||
DirectorySnapshotViewSet,
|
||||
EndpointSnapshotViewSet,
|
||||
HostPortMappingSnapshotViewSet,
|
||||
VulnerabilitySnapshotViewSet,
|
||||
ScreenshotViewSet,
|
||||
ScreenshotSnapshotViewSet,
|
||||
)
|
||||
from .search_views import AssetSearchView, AssetSearchExportView
|
||||
|
||||
__all__ = [
|
||||
'AssetStatisticsViewSet',
|
||||
'SubdomainViewSet',
|
||||
'WebSiteViewSet',
|
||||
'DirectoryViewSet',
|
||||
'EndpointViewSet',
|
||||
'HostPortMappingViewSet',
|
||||
'VulnerabilityViewSet',
|
||||
'SubdomainSnapshotViewSet',
|
||||
'WebsiteSnapshotViewSet',
|
||||
'DirectorySnapshotViewSet',
|
||||
'EndpointSnapshotViewSet',
|
||||
'HostPortMappingSnapshotViewSet',
|
||||
'VulnerabilitySnapshotViewSet',
|
||||
'ScreenshotViewSet',
|
||||
'ScreenshotSnapshotViewSet',
|
||||
'AssetSearchView',
|
||||
'AssetSearchExportView',
|
||||
]
|
||||
@@ -2,23 +2,24 @@ import logging
|
||||
from rest_framework import viewsets, status, filters
|
||||
from rest_framework.decorators import action
|
||||
from rest_framework.response import Response
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
from rest_framework.request import Request
|
||||
from rest_framework.exceptions import NotFound, ValidationError as DRFValidationError
|
||||
from django.core.exceptions import ValidationError, ObjectDoesNotExist
|
||||
from django.db import DatabaseError, IntegrityError, OperationalError
|
||||
from django.http import StreamingHttpResponse
|
||||
|
||||
from .serializers import (
|
||||
from ..serializers import (
|
||||
SubdomainListSerializer, WebSiteSerializer, DirectorySerializer,
|
||||
VulnerabilitySerializer, EndpointListSerializer, IPAddressAggregatedSerializer,
|
||||
SubdomainSnapshotSerializer, WebsiteSnapshotSerializer, DirectorySnapshotSerializer,
|
||||
EndpointSnapshotSerializer, VulnerabilitySnapshotSerializer
|
||||
)
|
||||
from .services import (
|
||||
from ..services import (
|
||||
SubdomainService, WebSiteService, DirectoryService,
|
||||
VulnerabilityService, AssetStatisticsService, EndpointService, HostPortMappingService
|
||||
)
|
||||
from .services.snapshot import (
|
||||
from ..services.snapshot import (
|
||||
SubdomainSnapshotsService, WebsiteSnapshotsService, DirectorySnapshotsService,
|
||||
EndpointSnapshotsService, HostPortMappingSnapshotsService, VulnerabilitySnapshotsService
|
||||
)
|
||||
@@ -57,7 +58,7 @@ class AssetStatisticsViewSet(viewsets.ViewSet):
|
||||
"""
|
||||
try:
|
||||
stats = self.service.get_statistics()
|
||||
return Response({
|
||||
return success_response(data={
|
||||
'totalTargets': stats['total_targets'],
|
||||
'totalSubdomains': stats['total_subdomains'],
|
||||
'totalIps': stats['total_ips'],
|
||||
@@ -80,9 +81,10 @@ class AssetStatisticsViewSet(viewsets.ViewSet):
|
||||
})
|
||||
except (DatabaseError, OperationalError) as e:
|
||||
logger.exception("获取资产统计数据失败")
|
||||
return Response(
|
||||
{'error': '获取统计数据失败'},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to get statistics',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='history')
|
||||
@@ -107,12 +109,13 @@ class AssetStatisticsViewSet(viewsets.ViewSet):
|
||||
days = min(max(days, 1), 90) # 限制在 1-90 天
|
||||
|
||||
history = self.service.get_statistics_history(days=days)
|
||||
return Response(history)
|
||||
return success_response(data=history)
|
||||
except (DatabaseError, OperationalError) as e:
|
||||
logger.exception("获取统计历史数据失败")
|
||||
return Response(
|
||||
{'error': '获取历史数据失败'},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to get history data',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
|
||||
@@ -164,45 +167,50 @@ class SubdomainViewSet(viewsets.ModelViewSet):
|
||||
|
||||
响应:
|
||||
{
|
||||
"message": "批量创建完成",
|
||||
"createdCount": 10,
|
||||
"skippedCount": 2,
|
||||
"invalidCount": 1,
|
||||
"mismatchedCount": 1,
|
||||
"totalReceived": 14
|
||||
"data": {
|
||||
"createdCount": 10,
|
||||
"skippedCount": 2,
|
||||
"invalidCount": 1,
|
||||
"mismatchedCount": 1,
|
||||
"totalReceived": 14
|
||||
}
|
||||
}
|
||||
"""
|
||||
from apps.targets.models import Target
|
||||
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
return Response(
|
||||
{'error': '必须在目标下批量创建子域名'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Must create subdomains under a target',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 获取目标
|
||||
try:
|
||||
target = Target.objects.get(pk=target_pk)
|
||||
except Target.DoesNotExist:
|
||||
return Response(
|
||||
{'error': '目标不存在'},
|
||||
status=status.HTTP_404_NOT_FOUND
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Target not found',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
# 验证目标类型必须为域名
|
||||
if target.type != Target.TargetType.DOMAIN:
|
||||
return Response(
|
||||
{'error': '只有域名类型的目标支持导入子域名'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Only domain type targets support subdomain import',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 获取请求体中的子域名列表
|
||||
subdomains = request.data.get('subdomains', [])
|
||||
if not subdomains or not isinstance(subdomains, list):
|
||||
return Response(
|
||||
{'error': '请求体不能为空或格式错误'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Request body cannot be empty or invalid format',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 调用 service 层处理
|
||||
@@ -214,19 +222,19 @@ class SubdomainViewSet(viewsets.ModelViewSet):
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("批量创建子域名失败")
|
||||
return Response(
|
||||
{'error': '服务器内部错误'},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Server internal error',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
return Response({
|
||||
'message': '批量创建完成',
|
||||
return success_response(data={
|
||||
'createdCount': result.created_count,
|
||||
'skippedCount': result.skipped_count,
|
||||
'invalidCount': result.invalid_count,
|
||||
'mismatchedCount': result.mismatched_count,
|
||||
'totalReceived': result.total_received,
|
||||
}, status=status.HTTP_200_OK)
|
||||
})
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request, **kwargs):
|
||||
@@ -234,7 +242,7 @@ class SubdomainViewSet(viewsets.ModelViewSet):
|
||||
|
||||
CSV 列:name, created_at
|
||||
"""
|
||||
from apps.common.utils import generate_csv_rows, format_datetime
|
||||
from apps.common.utils import create_csv_export_response, format_datetime
|
||||
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
@@ -245,12 +253,41 @@ class SubdomainViewSet(viewsets.ModelViewSet):
|
||||
headers = ['name', 'created_at']
|
||||
formatters = {'created_at': format_datetime}
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=f"target-{target_pk}-subdomains.csv",
|
||||
field_formatters=formatters
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-subdomains.csv"'
|
||||
return response
|
||||
|
||||
@action(detail=False, methods=['post'], url_path='bulk-delete')
|
||||
def bulk_delete(self, request, **kwargs):
|
||||
"""批量删除子域名
|
||||
|
||||
POST /api/assets/subdomains/bulk-delete/
|
||||
|
||||
请求体: {"ids": [1, 2, 3]}
|
||||
响应: {"deletedCount": 3}
|
||||
"""
|
||||
ids = request.data.get('ids', [])
|
||||
if not ids or not isinstance(ids, list):
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='ids is required and must be a list',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
try:
|
||||
from ..models import Subdomain
|
||||
deleted_count, _ = Subdomain.objects.filter(id__in=ids).delete()
|
||||
return success_response(data={'deletedCount': deleted_count})
|
||||
except Exception as e:
|
||||
logger.exception("批量删除子域名失败")
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to delete subdomains',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
|
||||
class WebSiteViewSet(viewsets.ModelViewSet):
|
||||
@@ -265,6 +302,7 @@ class WebSiteViewSet(viewsets.ModelViewSet):
|
||||
- host="example" 主机名模糊匹配
|
||||
- title="login" 标题模糊匹配
|
||||
- status="200,301" 状态码多值匹配
|
||||
- tech="nginx" 技术栈匹配(数组字段)
|
||||
- 多条件空格分隔 AND 关系
|
||||
"""
|
||||
|
||||
@@ -299,35 +337,38 @@ class WebSiteViewSet(viewsets.ModelViewSet):
|
||||
|
||||
响应:
|
||||
{
|
||||
"message": "批量创建完成",
|
||||
"createdCount": 10,
|
||||
"mismatchedCount": 2
|
||||
"data": {
|
||||
"createdCount": 10
|
||||
}
|
||||
}
|
||||
"""
|
||||
from apps.targets.models import Target
|
||||
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
return Response(
|
||||
{'error': '必须在目标下批量创建网站'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Must create websites under a target',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 获取目标
|
||||
try:
|
||||
target = Target.objects.get(pk=target_pk)
|
||||
except Target.DoesNotExist:
|
||||
return Response(
|
||||
{'error': '目标不存在'},
|
||||
status=status.HTTP_404_NOT_FOUND
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Target not found',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
# 获取请求体中的 URL 列表
|
||||
urls = request.data.get('urls', [])
|
||||
if not urls or not isinstance(urls, list):
|
||||
return Response(
|
||||
{'error': '请求体不能为空或格式错误'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Request body cannot be empty or invalid format',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 调用 service 层处理
|
||||
@@ -340,23 +381,23 @@ class WebSiteViewSet(viewsets.ModelViewSet):
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("批量创建网站失败")
|
||||
return Response(
|
||||
{'error': '服务器内部错误'},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Server internal error',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
return Response({
|
||||
'message': '批量创建完成',
|
||||
return success_response(data={
|
||||
'createdCount': created_count,
|
||||
}, status=status.HTTP_200_OK)
|
||||
})
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request, **kwargs):
|
||||
"""导出网站为 CSV 格式
|
||||
|
||||
CSV 列:url, host, location, title, status_code, content_length, content_type, webserver, tech, body_preview, vhost, created_at
|
||||
CSV 列:url, host, location, title, status_code, content_length, content_type, webserver, tech, response_body, response_headers, vhost, created_at
|
||||
"""
|
||||
from apps.common.utils import generate_csv_rows, format_datetime, format_list_field
|
||||
from apps.common.utils import create_csv_export_response, format_datetime, format_list_field
|
||||
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
@@ -367,19 +408,48 @@ class WebSiteViewSet(viewsets.ModelViewSet):
|
||||
headers = [
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'body_preview', 'vhost', 'created_at'
|
||||
'response_body', 'response_headers', 'vhost', 'created_at'
|
||||
]
|
||||
formatters = {
|
||||
'created_at': format_datetime,
|
||||
'tech': lambda x: format_list_field(x, separator=','),
|
||||
}
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=f"target-{target_pk}-websites.csv",
|
||||
field_formatters=formatters
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-websites.csv"'
|
||||
return response
|
||||
|
||||
@action(detail=False, methods=['post'], url_path='bulk-delete')
|
||||
def bulk_delete(self, request, **kwargs):
|
||||
"""批量删除网站
|
||||
|
||||
POST /api/assets/websites/bulk-delete/
|
||||
|
||||
请求体: {"ids": [1, 2, 3]}
|
||||
响应: {"deletedCount": 3}
|
||||
"""
|
||||
ids = request.data.get('ids', [])
|
||||
if not ids or not isinstance(ids, list):
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='ids is required and must be a list',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
try:
|
||||
from ..models import WebSite
|
||||
deleted_count, _ = WebSite.objects.filter(id__in=ids).delete()
|
||||
return success_response(data={'deletedCount': deleted_count})
|
||||
except Exception as e:
|
||||
logger.exception("批量删除网站失败")
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to delete websites',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
|
||||
class DirectoryViewSet(viewsets.ModelViewSet):
|
||||
@@ -426,35 +496,38 @@ class DirectoryViewSet(viewsets.ModelViewSet):
|
||||
|
||||
响应:
|
||||
{
|
||||
"message": "批量创建完成",
|
||||
"createdCount": 10,
|
||||
"mismatchedCount": 2
|
||||
"data": {
|
||||
"createdCount": 10
|
||||
}
|
||||
}
|
||||
"""
|
||||
from apps.targets.models import Target
|
||||
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
return Response(
|
||||
{'error': '必须在目标下批量创建目录'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Must create directories under a target',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 获取目标
|
||||
try:
|
||||
target = Target.objects.get(pk=target_pk)
|
||||
except Target.DoesNotExist:
|
||||
return Response(
|
||||
{'error': '目标不存在'},
|
||||
status=status.HTTP_404_NOT_FOUND
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Target not found',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
# 获取请求体中的 URL 列表
|
||||
urls = request.data.get('urls', [])
|
||||
if not urls or not isinstance(urls, list):
|
||||
return Response(
|
||||
{'error': '请求体不能为空或格式错误'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Request body cannot be empty or invalid format',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 调用 service 层处理
|
||||
@@ -467,15 +540,15 @@ class DirectoryViewSet(viewsets.ModelViewSet):
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("批量创建目录失败")
|
||||
return Response(
|
||||
{'error': '服务器内部错误'},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Server internal error',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
return Response({
|
||||
'message': '批量创建完成',
|
||||
return success_response(data={
|
||||
'createdCount': created_count,
|
||||
}, status=status.HTTP_200_OK)
|
||||
})
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request, **kwargs):
|
||||
@@ -483,7 +556,7 @@ class DirectoryViewSet(viewsets.ModelViewSet):
|
||||
|
||||
CSV 列:url, status, content_length, words, lines, content_type, duration, created_at
|
||||
"""
|
||||
from apps.common.utils import generate_csv_rows, format_datetime
|
||||
from apps.common.utils import create_csv_export_response, format_datetime
|
||||
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
@@ -499,12 +572,41 @@ class DirectoryViewSet(viewsets.ModelViewSet):
|
||||
'created_at': format_datetime,
|
||||
}
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=f"target-{target_pk}-directories.csv",
|
||||
field_formatters=formatters
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-directories.csv"'
|
||||
return response
|
||||
|
||||
@action(detail=False, methods=['post'], url_path='bulk-delete')
|
||||
def bulk_delete(self, request, **kwargs):
|
||||
"""批量删除目录
|
||||
|
||||
POST /api/assets/directories/bulk-delete/
|
||||
|
||||
请求体: {"ids": [1, 2, 3]}
|
||||
响应: {"deletedCount": 3}
|
||||
"""
|
||||
ids = request.data.get('ids', [])
|
||||
if not ids or not isinstance(ids, list):
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='ids is required and must be a list',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
try:
|
||||
from ..models import Directory
|
||||
deleted_count, _ = Directory.objects.filter(id__in=ids).delete()
|
||||
return success_response(data={'deletedCount': deleted_count})
|
||||
except Exception as e:
|
||||
logger.exception("批量删除目录失败")
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to delete directories',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
|
||||
class EndpointViewSet(viewsets.ModelViewSet):
|
||||
@@ -519,6 +621,7 @@ class EndpointViewSet(viewsets.ModelViewSet):
|
||||
- host="example" 主机名模糊匹配
|
||||
- title="login" 标题模糊匹配
|
||||
- status="200,301" 状态码多值匹配
|
||||
- tech="nginx" 技术栈匹配(数组字段)
|
||||
- 多条件空格分隔 AND 关系
|
||||
"""
|
||||
|
||||
@@ -553,35 +656,38 @@ class EndpointViewSet(viewsets.ModelViewSet):
|
||||
|
||||
响应:
|
||||
{
|
||||
"message": "批量创建完成",
|
||||
"createdCount": 10,
|
||||
"mismatchedCount": 2
|
||||
"data": {
|
||||
"createdCount": 10
|
||||
}
|
||||
}
|
||||
"""
|
||||
from apps.targets.models import Target
|
||||
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
return Response(
|
||||
{'error': '必须在目标下批量创建端点'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Must create endpoints under a target',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 获取目标
|
||||
try:
|
||||
target = Target.objects.get(pk=target_pk)
|
||||
except Target.DoesNotExist:
|
||||
return Response(
|
||||
{'error': '目标不存在'},
|
||||
status=status.HTTP_404_NOT_FOUND
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Target not found',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
# 获取请求体中的 URL 列表
|
||||
urls = request.data.get('urls', [])
|
||||
if not urls or not isinstance(urls, list):
|
||||
return Response(
|
||||
{'error': '请求体不能为空或格式错误'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Request body cannot be empty or invalid format',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 调用 service 层处理
|
||||
@@ -594,23 +700,23 @@ class EndpointViewSet(viewsets.ModelViewSet):
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("批量创建端点失败")
|
||||
return Response(
|
||||
{'error': '服务器内部错误'},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Server internal error',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
return Response({
|
||||
'message': '批量创建完成',
|
||||
return success_response(data={
|
||||
'createdCount': created_count,
|
||||
}, status=status.HTTP_200_OK)
|
||||
})
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request, **kwargs):
|
||||
"""导出端点为 CSV 格式
|
||||
|
||||
CSV 列:url, host, location, title, status_code, content_length, content_type, webserver, tech, body_preview, vhost, matched_gf_patterns, created_at
|
||||
CSV 列:url, host, location, title, status_code, content_length, content_type, webserver, tech, response_body, response_headers, vhost, matched_gf_patterns, created_at
|
||||
"""
|
||||
from apps.common.utils import generate_csv_rows, format_datetime, format_list_field
|
||||
from apps.common.utils import create_csv_export_response, format_datetime, format_list_field
|
||||
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
@@ -621,7 +727,7 @@ class EndpointViewSet(viewsets.ModelViewSet):
|
||||
headers = [
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'body_preview', 'vhost', 'matched_gf_patterns', 'created_at'
|
||||
'response_body', 'response_headers', 'vhost', 'matched_gf_patterns', 'created_at'
|
||||
]
|
||||
formatters = {
|
||||
'created_at': format_datetime,
|
||||
@@ -629,12 +735,41 @@ class EndpointViewSet(viewsets.ModelViewSet):
|
||||
'matched_gf_patterns': lambda x: format_list_field(x, separator=','),
|
||||
}
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=f"target-{target_pk}-endpoints.csv",
|
||||
field_formatters=formatters
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-endpoints.csv"'
|
||||
return response
|
||||
|
||||
@action(detail=False, methods=['post'], url_path='bulk-delete')
|
||||
def bulk_delete(self, request, **kwargs):
|
||||
"""批量删除端点
|
||||
|
||||
POST /api/assets/endpoints/bulk-delete/
|
||||
|
||||
请求体: {"ids": [1, 2, 3]}
|
||||
响应: {"deletedCount": 3}
|
||||
"""
|
||||
ids = request.data.get('ids', [])
|
||||
if not ids or not isinstance(ids, list):
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='ids is required and must be a list',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
try:
|
||||
from ..models import Endpoint
|
||||
deleted_count, _ = Endpoint.objects.filter(id__in=ids).delete()
|
||||
return success_response(data={'deletedCount': deleted_count})
|
||||
except Exception as e:
|
||||
logger.exception("批量删除端点失败")
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to delete endpoints',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
|
||||
class HostPortMappingViewSet(viewsets.ModelViewSet):
|
||||
@@ -687,7 +822,7 @@ class HostPortMappingViewSet(viewsets.ModelViewSet):
|
||||
|
||||
CSV 列:ip, host, port, created_at
|
||||
"""
|
||||
from apps.common.utils import generate_csv_rows, format_datetime
|
||||
from apps.common.utils import create_csv_export_response, format_datetime
|
||||
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
@@ -702,14 +837,44 @@ class HostPortMappingViewSet(viewsets.ModelViewSet):
|
||||
'created_at': format_datetime
|
||||
}
|
||||
|
||||
# 生成流式响应
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=f"target-{target_pk}-ip-addresses.csv",
|
||||
field_formatters=formatters
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-ip-addresses.csv"'
|
||||
|
||||
@action(detail=False, methods=['post'], url_path='bulk-delete')
|
||||
def bulk_delete(self, request, **kwargs):
|
||||
"""批量删除 IP 地址映射
|
||||
|
||||
return response
|
||||
POST /api/assets/ip-addresses/bulk-delete/
|
||||
|
||||
请求体: {"ips": ["192.168.1.1", "10.0.0.1"]}
|
||||
响应: {"deletedCount": 3}
|
||||
|
||||
注意:由于 IP 地址是聚合显示的,删除时传入 IP 列表,
|
||||
会删除该 IP 下的所有 host:port 映射记录
|
||||
"""
|
||||
ips = request.data.get('ips', [])
|
||||
if not ips or not isinstance(ips, list):
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='ips is required and must be a list',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
try:
|
||||
from ..models import HostPortMapping
|
||||
deleted_count, _ = HostPortMapping.objects.filter(ip__in=ips).delete()
|
||||
return success_response(data={'deletedCount': deleted_count})
|
||||
except Exception as e:
|
||||
logger.exception("批量删除 IP 地址映射失败")
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to delete ip addresses',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
|
||||
class VulnerabilityViewSet(viewsets.ModelViewSet):
|
||||
@@ -781,7 +946,7 @@ class SubdomainSnapshotViewSet(viewsets.ModelViewSet):
|
||||
|
||||
CSV 列:name, created_at
|
||||
"""
|
||||
from apps.common.utils import generate_csv_rows, format_datetime
|
||||
from apps.common.utils import create_csv_export_response, format_datetime
|
||||
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if not scan_pk:
|
||||
@@ -792,12 +957,12 @@ class SubdomainSnapshotViewSet(viewsets.ModelViewSet):
|
||||
headers = ['name', 'created_at']
|
||||
formatters = {'created_at': format_datetime}
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=f"scan-{scan_pk}-subdomains.csv",
|
||||
field_formatters=formatters
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-subdomains.csv"'
|
||||
return response
|
||||
|
||||
|
||||
class WebsiteSnapshotViewSet(viewsets.ModelViewSet):
|
||||
@@ -833,9 +998,9 @@ class WebsiteSnapshotViewSet(viewsets.ModelViewSet):
|
||||
def export(self, request, **kwargs):
|
||||
"""导出网站快照为 CSV 格式
|
||||
|
||||
CSV 列:url, host, location, title, status_code, content_length, content_type, webserver, tech, body_preview, vhost, created_at
|
||||
CSV 列:url, host, location, title, status_code, content_length, content_type, webserver, tech, response_body, response_headers, vhost, created_at
|
||||
"""
|
||||
from apps.common.utils import generate_csv_rows, format_datetime, format_list_field
|
||||
from apps.common.utils import create_csv_export_response, format_datetime, format_list_field
|
||||
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if not scan_pk:
|
||||
@@ -846,19 +1011,19 @@ class WebsiteSnapshotViewSet(viewsets.ModelViewSet):
|
||||
headers = [
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'body_preview', 'vhost', 'created_at'
|
||||
'response_body', 'response_headers', 'vhost', 'created_at'
|
||||
]
|
||||
formatters = {
|
||||
'created_at': format_datetime,
|
||||
'tech': lambda x: format_list_field(x, separator=','),
|
||||
}
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=f"scan-{scan_pk}-websites.csv",
|
||||
field_formatters=formatters
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-websites.csv"'
|
||||
return response
|
||||
|
||||
|
||||
class DirectorySnapshotViewSet(viewsets.ModelViewSet):
|
||||
@@ -893,7 +1058,7 @@ class DirectorySnapshotViewSet(viewsets.ModelViewSet):
|
||||
|
||||
CSV 列:url, status, content_length, words, lines, content_type, duration, created_at
|
||||
"""
|
||||
from apps.common.utils import generate_csv_rows, format_datetime
|
||||
from apps.common.utils import create_csv_export_response, format_datetime
|
||||
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if not scan_pk:
|
||||
@@ -909,12 +1074,12 @@ class DirectorySnapshotViewSet(viewsets.ModelViewSet):
|
||||
'created_at': format_datetime,
|
||||
}
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=f"scan-{scan_pk}-directories.csv",
|
||||
field_formatters=formatters
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-directories.csv"'
|
||||
return response
|
||||
|
||||
|
||||
class EndpointSnapshotViewSet(viewsets.ModelViewSet):
|
||||
@@ -950,9 +1115,9 @@ class EndpointSnapshotViewSet(viewsets.ModelViewSet):
|
||||
def export(self, request, **kwargs):
|
||||
"""导出端点快照为 CSV 格式
|
||||
|
||||
CSV 列:url, host, location, title, status_code, content_length, content_type, webserver, tech, body_preview, vhost, matched_gf_patterns, created_at
|
||||
CSV 列:url, host, location, title, status_code, content_length, content_type, webserver, tech, response_body, response_headers, vhost, matched_gf_patterns, created_at
|
||||
"""
|
||||
from apps.common.utils import generate_csv_rows, format_datetime, format_list_field
|
||||
from apps.common.utils import create_csv_export_response, format_datetime, format_list_field
|
||||
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if not scan_pk:
|
||||
@@ -963,7 +1128,7 @@ class EndpointSnapshotViewSet(viewsets.ModelViewSet):
|
||||
headers = [
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'body_preview', 'vhost', 'matched_gf_patterns', 'created_at'
|
||||
'response_body', 'response_headers', 'vhost', 'matched_gf_patterns', 'created_at'
|
||||
]
|
||||
formatters = {
|
||||
'created_at': format_datetime,
|
||||
@@ -971,12 +1136,12 @@ class EndpointSnapshotViewSet(viewsets.ModelViewSet):
|
||||
'matched_gf_patterns': lambda x: format_list_field(x, separator=','),
|
||||
}
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=f"scan-{scan_pk}-endpoints.csv",
|
||||
field_formatters=formatters
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-endpoints.csv"'
|
||||
return response
|
||||
|
||||
|
||||
class HostPortMappingSnapshotViewSet(viewsets.ModelViewSet):
|
||||
@@ -1011,7 +1176,7 @@ class HostPortMappingSnapshotViewSet(viewsets.ModelViewSet):
|
||||
|
||||
CSV 列:ip, host, port, created_at
|
||||
"""
|
||||
from apps.common.utils import generate_csv_rows, format_datetime
|
||||
from apps.common.utils import create_csv_export_response, format_datetime
|
||||
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if not scan_pk:
|
||||
@@ -1026,14 +1191,12 @@ class HostPortMappingSnapshotViewSet(viewsets.ModelViewSet):
|
||||
'created_at': format_datetime
|
||||
}
|
||||
|
||||
# 生成流式响应
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=f"scan-{scan_pk}-ip-addresses.csv",
|
||||
field_formatters=formatters
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-ip-addresses.csv"'
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class VulnerabilitySnapshotViewSet(viewsets.ModelViewSet):
|
||||
@@ -1062,3 +1225,162 @@ class VulnerabilitySnapshotViewSet(viewsets.ModelViewSet):
|
||||
if scan_pk:
|
||||
return self.service.get_by_scan(scan_pk, filter_query=filter_query)
|
||||
return self.service.get_all(filter_query=filter_query)
|
||||
|
||||
|
||||
# ==================== 截图 ViewSet ====================
|
||||
|
||||
class ScreenshotViewSet(viewsets.ModelViewSet):
|
||||
"""截图资产 ViewSet
|
||||
|
||||
支持两种访问方式:
|
||||
1. 嵌套路由:GET /api/targets/{target_pk}/screenshots/
|
||||
2. 独立路由:GET /api/screenshots/(全局查询)
|
||||
|
||||
支持智能过滤语法(filter 参数):
|
||||
- url="example" URL 模糊匹配
|
||||
"""
|
||||
|
||||
from ..serializers import ScreenshotListSerializer
|
||||
|
||||
serializer_class = ScreenshotListSerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.OrderingFilter]
|
||||
ordering = ['-created_at']
|
||||
|
||||
def get_queryset(self):
|
||||
"""根据是否有 target_pk 参数决定查询范围"""
|
||||
from ..models import Screenshot
|
||||
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
filter_query = self.request.query_params.get('filter', None)
|
||||
|
||||
queryset = Screenshot.objects.all()
|
||||
if target_pk:
|
||||
queryset = queryset.filter(target_id=target_pk)
|
||||
|
||||
if filter_query:
|
||||
# 简单的 URL 模糊匹配
|
||||
queryset = queryset.filter(url__icontains=filter_query)
|
||||
|
||||
return queryset.order_by('-created_at')
|
||||
|
||||
@action(detail=True, methods=['get'], url_path='image')
|
||||
def image(self, request, pk=None, **kwargs):
|
||||
"""获取截图图片
|
||||
|
||||
GET /api/assets/screenshots/{id}/image/
|
||||
|
||||
返回 WebP 格式的图片二进制数据
|
||||
"""
|
||||
from django.http import HttpResponse
|
||||
from ..models import Screenshot
|
||||
|
||||
try:
|
||||
screenshot = Screenshot.objects.get(pk=pk)
|
||||
if not screenshot.image:
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Screenshot image not found',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
response = HttpResponse(screenshot.image, content_type='image/webp')
|
||||
response['Content-Disposition'] = f'inline; filename="screenshot_{pk}.webp"'
|
||||
return response
|
||||
except Screenshot.DoesNotExist:
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Screenshot not found',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
@action(detail=False, methods=['post'], url_path='bulk-delete')
|
||||
def bulk_delete(self, request, **kwargs):
|
||||
"""批量删除截图
|
||||
|
||||
POST /api/assets/screenshots/bulk-delete/
|
||||
|
||||
请求体: {"ids": [1, 2, 3]}
|
||||
响应: {"deletedCount": 3}
|
||||
"""
|
||||
ids = request.data.get('ids', [])
|
||||
if not ids or not isinstance(ids, list):
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='ids is required and must be a list',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
try:
|
||||
from ..models import Screenshot
|
||||
deleted_count, _ = Screenshot.objects.filter(id__in=ids).delete()
|
||||
return success_response(data={'deletedCount': deleted_count})
|
||||
except Exception as e:
|
||||
logger.exception("批量删除截图失败")
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to delete screenshots',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
|
||||
class ScreenshotSnapshotViewSet(viewsets.ModelViewSet):
|
||||
"""截图快照 ViewSet - 嵌套路由:GET /api/scans/{scan_pk}/screenshots/
|
||||
|
||||
支持智能过滤语法(filter 参数):
|
||||
- url="example" URL 模糊匹配
|
||||
"""
|
||||
|
||||
from ..serializers import ScreenshotSnapshotListSerializer
|
||||
|
||||
serializer_class = ScreenshotSnapshotListSerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.OrderingFilter]
|
||||
ordering = ['-created_at']
|
||||
|
||||
def get_queryset(self):
|
||||
"""根据 scan_pk 参数查询"""
|
||||
from ..models import ScreenshotSnapshot
|
||||
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
filter_query = self.request.query_params.get('filter', None)
|
||||
|
||||
queryset = ScreenshotSnapshot.objects.all()
|
||||
if scan_pk:
|
||||
queryset = queryset.filter(scan_id=scan_pk)
|
||||
|
||||
if filter_query:
|
||||
# 简单的 URL 模糊匹配
|
||||
queryset = queryset.filter(url__icontains=filter_query)
|
||||
|
||||
return queryset.order_by('-created_at')
|
||||
|
||||
@action(detail=True, methods=['get'], url_path='image')
|
||||
def image(self, request, pk=None, **kwargs):
|
||||
"""获取截图快照图片
|
||||
|
||||
GET /api/scans/{scan_pk}/screenshots/{id}/image/
|
||||
|
||||
返回 WebP 格式的图片二进制数据
|
||||
"""
|
||||
from django.http import HttpResponse
|
||||
from ..models import ScreenshotSnapshot
|
||||
|
||||
try:
|
||||
screenshot = ScreenshotSnapshot.objects.get(pk=pk)
|
||||
if not screenshot.image:
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Screenshot image not found',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
response = HttpResponse(screenshot.image, content_type='image/webp')
|
||||
response['Content-Disposition'] = f'inline; filename="screenshot_snapshot_{pk}.webp"'
|
||||
return response
|
||||
except ScreenshotSnapshot.DoesNotExist:
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Screenshot snapshot not found',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
361
backend/apps/asset/views/search_views.py
Normal file
361
backend/apps/asset/views/search_views.py
Normal file
@@ -0,0 +1,361 @@
|
||||
"""
|
||||
资产搜索 API 视图
|
||||
|
||||
提供资产搜索的 REST API 接口:
|
||||
- GET /api/assets/search/ - 搜索资产
|
||||
- GET /api/assets/search/export/ - 导出搜索结果为 CSV
|
||||
|
||||
搜索语法:
|
||||
- field="value" 模糊匹配(ILIKE %value%)
|
||||
- field=="value" 精确匹配
|
||||
- field!="value" 不等于
|
||||
- && AND 连接
|
||||
- || OR 连接
|
||||
|
||||
支持的字段:
|
||||
- host: 主机名
|
||||
- url: URL
|
||||
- title: 标题
|
||||
- tech: 技术栈
|
||||
- status: 状态码
|
||||
- body: 响应体
|
||||
- header: 响应头
|
||||
|
||||
支持的资产类型:
|
||||
- website: 站点(默认)
|
||||
- endpoint: 端点
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
from rest_framework import status
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.request import Request
|
||||
from django.db import connection
|
||||
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
from apps.asset.services.search_service import AssetSearchService, VALID_ASSET_TYPES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AssetSearchView(APIView):
|
||||
"""
|
||||
资产搜索 API
|
||||
|
||||
GET /api/assets/search/
|
||||
|
||||
Query Parameters:
|
||||
q: 搜索查询表达式
|
||||
asset_type: 资产类型 ('website' 或 'endpoint',默认 'website')
|
||||
page: 页码(从 1 开始,默认 1)
|
||||
pageSize: 每页数量(默认 10,最大 100)
|
||||
|
||||
示例查询:
|
||||
?q=host="api" && tech="nginx"
|
||||
?q=tech="vue" || tech="react"&asset_type=endpoint
|
||||
?q=status=="200" && host!="test"
|
||||
|
||||
Response:
|
||||
{
|
||||
"results": [...],
|
||||
"total": 100,
|
||||
"page": 1,
|
||||
"pageSize": 10,
|
||||
"totalPages": 10,
|
||||
"assetType": "website"
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = AssetSearchService()
|
||||
|
||||
def _parse_headers(self, headers_data) -> dict:
|
||||
"""解析响应头为字典"""
|
||||
if not headers_data:
|
||||
return {}
|
||||
try:
|
||||
return json.loads(headers_data)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
result = {}
|
||||
for line in str(headers_data).split('\n'):
|
||||
if ':' in line:
|
||||
key, value = line.split(':', 1)
|
||||
result[key.strip()] = value.strip()
|
||||
return result
|
||||
|
||||
def _format_result(self, result: dict, vulnerabilities_by_url: dict, asset_type: str) -> dict:
|
||||
"""格式化单个搜索结果"""
|
||||
url = result.get('url', '')
|
||||
vulns = vulnerabilities_by_url.get(url, [])
|
||||
|
||||
# 基础字段(Website 和 Endpoint 共有)
|
||||
formatted = {
|
||||
'id': result.get('id'),
|
||||
'url': url,
|
||||
'host': result.get('host', ''),
|
||||
'title': result.get('title', ''),
|
||||
'technologies': result.get('tech', []) or [],
|
||||
'statusCode': result.get('status_code'),
|
||||
'contentLength': result.get('content_length'),
|
||||
'contentType': result.get('content_type', ''),
|
||||
'webserver': result.get('webserver', ''),
|
||||
'location': result.get('location', ''),
|
||||
'vhost': result.get('vhost'),
|
||||
'responseHeaders': self._parse_headers(result.get('response_headers')),
|
||||
'responseBody': result.get('response_body', ''),
|
||||
'createdAt': result.get('created_at').isoformat() if result.get('created_at') else None,
|
||||
'targetId': result.get('target_id'),
|
||||
}
|
||||
|
||||
# Website 特有字段:漏洞关联
|
||||
if asset_type == 'website':
|
||||
formatted['vulnerabilities'] = [
|
||||
{
|
||||
'id': v.get('id'),
|
||||
'name': v.get('vuln_type', ''),
|
||||
'vulnType': v.get('vuln_type', ''),
|
||||
'severity': v.get('severity', 'info'),
|
||||
}
|
||||
for v in vulns
|
||||
]
|
||||
|
||||
# Endpoint 特有字段
|
||||
if asset_type == 'endpoint':
|
||||
formatted['matchedGfPatterns'] = result.get('matched_gf_patterns', []) or []
|
||||
|
||||
return formatted
|
||||
|
||||
def _get_vulnerabilities_by_url_prefix(self, website_urls: list) -> dict:
|
||||
"""
|
||||
根据 URL 前缀批量查询漏洞数据
|
||||
|
||||
漏洞 URL 是 website URL 的子路径,使用前缀匹配:
|
||||
- website.url: https://example.com/path?query=1
|
||||
- vulnerability.url: https://example.com/path/api/users
|
||||
|
||||
Args:
|
||||
website_urls: website URL 列表,格式为 [(url, target_id), ...]
|
||||
|
||||
Returns:
|
||||
dict: {website_url: [vulnerability_list]}
|
||||
"""
|
||||
if not website_urls:
|
||||
return {}
|
||||
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
# 构建 OR 条件:每个 website URL(去掉查询参数)作为前缀匹配
|
||||
conditions = []
|
||||
params = []
|
||||
url_mapping = {} # base_url -> original_url
|
||||
|
||||
for url, target_id in website_urls:
|
||||
if not url or target_id is None:
|
||||
continue
|
||||
# 使用 urlparse 去掉查询参数和片段,只保留 scheme://netloc/path
|
||||
parsed = urlparse(url)
|
||||
base_url = urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
|
||||
url_mapping[base_url] = url
|
||||
conditions.append("(v.url LIKE %s AND v.target_id = %s)")
|
||||
params.extend([base_url + '%', target_id])
|
||||
|
||||
if not conditions:
|
||||
return {}
|
||||
|
||||
where_clause = " OR ".join(conditions)
|
||||
|
||||
sql = f"""
|
||||
SELECT v.id, v.vuln_type, v.severity, v.url, v.target_id
|
||||
FROM vulnerability v
|
||||
WHERE {where_clause}
|
||||
ORDER BY
|
||||
CASE v.severity
|
||||
WHEN 'critical' THEN 1
|
||||
WHEN 'high' THEN 2
|
||||
WHEN 'medium' THEN 3
|
||||
WHEN 'low' THEN 4
|
||||
ELSE 5
|
||||
END
|
||||
"""
|
||||
cursor.execute(sql, params)
|
||||
|
||||
# 获取所有漏洞
|
||||
all_vulns = []
|
||||
for row in cursor.fetchall():
|
||||
all_vulns.append({
|
||||
'id': row[0],
|
||||
'vuln_type': row[1],
|
||||
'name': row[1],
|
||||
'severity': row[2],
|
||||
'url': row[3],
|
||||
'target_id': row[4],
|
||||
})
|
||||
|
||||
# 按原始 website URL 分组(用于返回结果)
|
||||
result = {url: [] for url, _ in website_urls}
|
||||
for vuln in all_vulns:
|
||||
vuln_url = vuln['url']
|
||||
# 找到匹配的 website URL(最长前缀匹配)
|
||||
for website_url, target_id in website_urls:
|
||||
parsed = urlparse(website_url)
|
||||
base_url = urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
|
||||
if vuln_url.startswith(base_url) and vuln['target_id'] == target_id:
|
||||
result[website_url].append(vuln)
|
||||
break
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"批量查询漏洞失败: {e}")
|
||||
return {}
|
||||
|
||||
def get(self, request: Request):
|
||||
"""搜索资产"""
|
||||
# 获取搜索查询
|
||||
query = request.query_params.get('q', '').strip()
|
||||
|
||||
if not query:
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Search query (q) is required',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 获取并验证资产类型
|
||||
asset_type = request.query_params.get('asset_type', 'website').strip().lower()
|
||||
if asset_type not in VALID_ASSET_TYPES:
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message=f'Invalid asset_type. Must be one of: {", ".join(VALID_ASSET_TYPES)}',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 获取分页参数
|
||||
try:
|
||||
page = int(request.query_params.get('page', 1))
|
||||
page_size = int(request.query_params.get('pageSize', 10))
|
||||
except (ValueError, TypeError):
|
||||
page = 1
|
||||
page_size = 10
|
||||
|
||||
# 限制分页参数
|
||||
page = max(1, page)
|
||||
page_size = min(max(1, page_size), 100)
|
||||
|
||||
# 获取总数和搜索结果
|
||||
total = self.service.count(query, asset_type)
|
||||
total_pages = (total + page_size - 1) // page_size if total > 0 else 1
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
all_results = self.service.search(query, asset_type)
|
||||
results = all_results[offset:offset + page_size]
|
||||
|
||||
# 批量查询漏洞数据(仅 Website 类型需要)
|
||||
vulnerabilities_by_url = {}
|
||||
if asset_type == 'website':
|
||||
website_urls = [(r.get('url'), r.get('target_id')) for r in results if r.get('url') and r.get('target_id')]
|
||||
vulnerabilities_by_url = self._get_vulnerabilities_by_url_prefix(website_urls) if website_urls else {}
|
||||
|
||||
# 格式化结果
|
||||
formatted_results = [self._format_result(r, vulnerabilities_by_url, asset_type) for r in results]
|
||||
|
||||
return success_response(data={
|
||||
'results': formatted_results,
|
||||
'total': total,
|
||||
'page': page,
|
||||
'pageSize': page_size,
|
||||
'totalPages': total_pages,
|
||||
'assetType': asset_type,
|
||||
})
|
||||
|
||||
|
||||
class AssetSearchExportView(APIView):
|
||||
"""
|
||||
资产搜索导出 API
|
||||
|
||||
GET /api/assets/search/export/
|
||||
|
||||
Query Parameters:
|
||||
q: 搜索查询表达式
|
||||
asset_type: 资产类型 ('website' 或 'endpoint',默认 'website')
|
||||
|
||||
Response:
|
||||
CSV 文件(带 Content-Length,支持浏览器显示下载进度)
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = AssetSearchService()
|
||||
|
||||
def _get_headers_and_formatters(self, asset_type: str):
|
||||
"""获取 CSV 表头和格式化器"""
|
||||
from apps.common.utils import format_datetime, format_list_field
|
||||
|
||||
if asset_type == 'website':
|
||||
headers = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length',
|
||||
'webserver', 'location', 'tech', 'vhost', 'created_at']
|
||||
else:
|
||||
headers = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length',
|
||||
'webserver', 'location', 'tech', 'matched_gf_patterns', 'vhost', 'created_at']
|
||||
|
||||
formatters = {
|
||||
'created_at': format_datetime,
|
||||
'tech': lambda x: format_list_field(x, separator='; '),
|
||||
'matched_gf_patterns': lambda x: format_list_field(x, separator='; '),
|
||||
'vhost': lambda x: 'true' if x else ('false' if x is False else ''),
|
||||
}
|
||||
|
||||
return headers, formatters
|
||||
|
||||
def get(self, request: Request):
|
||||
"""导出搜索结果为 CSV(带 Content-Length,支持下载进度显示)"""
|
||||
from apps.common.utils import create_csv_export_response
|
||||
|
||||
# 获取搜索查询
|
||||
query = request.query_params.get('q', '').strip()
|
||||
|
||||
if not query:
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Search query (q) is required',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 获取并验证资产类型
|
||||
asset_type = request.query_params.get('asset_type', 'website').strip().lower()
|
||||
if asset_type not in VALID_ASSET_TYPES:
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message=f'Invalid asset_type. Must be one of: {", ".join(VALID_ASSET_TYPES)}',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 检查是否有结果(快速检查,避免空导出)
|
||||
total = self.service.count(query, asset_type)
|
||||
if total == 0:
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='No results to export',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
# 获取表头和格式化器
|
||||
headers, formatters = self._get_headers_and_formatters(asset_type)
|
||||
|
||||
# 生成文件名
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
filename = f'search_{asset_type}_{timestamp}.csv'
|
||||
|
||||
# 使用通用导出工具
|
||||
data_iterator = self.service.search_iter(query, asset_type)
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=filename,
|
||||
field_formatters=formatters,
|
||||
show_progress=True # 显示下载进度
|
||||
)
|
||||
@@ -40,8 +40,14 @@ def fetch_config_and_setup_django():
|
||||
print(f"[CONFIG] 正在从配置中心获取配置: {config_url}")
|
||||
print(f"[CONFIG] IS_LOCAL={is_local}")
|
||||
try:
|
||||
# 构建请求头(包含 Worker API Key)
|
||||
headers = {}
|
||||
worker_api_key = os.environ.get("WORKER_API_KEY", "")
|
||||
if worker_api_key:
|
||||
headers["X-Worker-API-Key"] = worker_api_key
|
||||
|
||||
# verify=False: 远程 Worker 通过 HTTPS 访问时可能使用自签名证书
|
||||
resp = requests.get(config_url, timeout=10, verify=False)
|
||||
resp = requests.get(config_url, headers=headers, timeout=10, verify=False)
|
||||
resp.raise_for_status()
|
||||
config = resp.json()
|
||||
|
||||
@@ -57,28 +63,17 @@ def fetch_config_and_setup_django():
|
||||
os.environ.setdefault("DB_USER", db_user)
|
||||
os.environ.setdefault("DB_PASSWORD", config['db']['password'])
|
||||
|
||||
# Redis 配置
|
||||
os.environ.setdefault("REDIS_URL", config['redisUrl'])
|
||||
|
||||
# 日志配置
|
||||
os.environ.setdefault("LOG_DIR", config['paths']['logs'])
|
||||
os.environ.setdefault("LOG_LEVEL", config['logging']['level'])
|
||||
os.environ.setdefault("ENABLE_COMMAND_LOGGING", str(config['logging']['enableCommandLogging']).lower())
|
||||
os.environ.setdefault("DEBUG", str(config['debug']))
|
||||
|
||||
# Git 加速配置(用于 Git clone 加速)
|
||||
git_mirror = config.get('gitMirror', '')
|
||||
if git_mirror:
|
||||
os.environ.setdefault("GIT_MIRROR", git_mirror)
|
||||
|
||||
print(f"[CONFIG] ✓ 配置获取成功")
|
||||
print(f"[CONFIG] DB_HOST: {db_host}")
|
||||
print(f"[CONFIG] DB_PORT: {db_port}")
|
||||
print(f"[CONFIG] DB_NAME: {db_name}")
|
||||
print(f"[CONFIG] DB_USER: {db_user}")
|
||||
print(f"[CONFIG] REDIS_URL: {config['redisUrl']}")
|
||||
if git_mirror:
|
||||
print(f"[CONFIG] GIT_MIRROR: {git_mirror}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] 获取配置失败: {config_url} - {e}", file=sys.stderr)
|
||||
|
||||
31
backend/apps/common/error_codes.py
Normal file
31
backend/apps/common/error_codes.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""
|
||||
标准化错误码定义
|
||||
|
||||
采用简化方案(参考 Stripe、GitHub 等大厂做法):
|
||||
- 只定义 5-10 个通用错误码
|
||||
- 未知错误使用通用错误码
|
||||
- 错误码格式:大写字母和下划线组成
|
||||
"""
|
||||
|
||||
|
||||
class ErrorCodes:
|
||||
"""标准化错误码
|
||||
|
||||
只定义通用错误码,其他错误使用通用消息。
|
||||
这是 Stripe、GitHub 等大厂的标准做法。
|
||||
|
||||
错误码格式规范:
|
||||
- 使用大写字母和下划线
|
||||
- 简洁明了,易于理解
|
||||
- 前端通过错误码映射到 i18n 键
|
||||
"""
|
||||
|
||||
# 通用错误码(8 个)
|
||||
VALIDATION_ERROR = 'VALIDATION_ERROR' # 输入验证失败
|
||||
NOT_FOUND = 'NOT_FOUND' # 资源未找到
|
||||
PERMISSION_DENIED = 'PERMISSION_DENIED' # 权限不足
|
||||
SERVER_ERROR = 'SERVER_ERROR' # 服务器内部错误
|
||||
BAD_REQUEST = 'BAD_REQUEST' # 请求格式错误
|
||||
CONFLICT = 'CONFLICT' # 资源冲突(如重复创建)
|
||||
UNAUTHORIZED = 'UNAUTHORIZED' # 未认证
|
||||
RATE_LIMITED = 'RATE_LIMITED' # 请求过于频繁
|
||||
49
backend/apps/common/exception_handlers.py
Normal file
49
backend/apps/common/exception_handlers.py
Normal file
@@ -0,0 +1,49 @@
|
||||
"""
|
||||
自定义异常处理器
|
||||
|
||||
统一处理 DRF 异常,确保错误响应格式一致
|
||||
"""
|
||||
|
||||
from rest_framework.views import exception_handler
|
||||
from rest_framework import status
|
||||
from rest_framework.exceptions import AuthenticationFailed, NotAuthenticated
|
||||
|
||||
from apps.common.response_helpers import error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
|
||||
|
||||
def custom_exception_handler(exc, context):
|
||||
"""
|
||||
自定义异常处理器
|
||||
|
||||
处理认证相关异常,返回统一格式的错误响应
|
||||
"""
|
||||
# 先调用 DRF 默认的异常处理器
|
||||
response = exception_handler(exc, context)
|
||||
|
||||
if response is not None:
|
||||
# 处理 401 未认证错误
|
||||
if response.status_code == status.HTTP_401_UNAUTHORIZED:
|
||||
return error_response(
|
||||
code=ErrorCodes.UNAUTHORIZED,
|
||||
message='Authentication required',
|
||||
status_code=status.HTTP_401_UNAUTHORIZED
|
||||
)
|
||||
|
||||
# 处理 403 权限不足错误
|
||||
if response.status_code == status.HTTP_403_FORBIDDEN:
|
||||
return error_response(
|
||||
code=ErrorCodes.PERMISSION_DENIED,
|
||||
message='Permission denied',
|
||||
status_code=status.HTTP_403_FORBIDDEN
|
||||
)
|
||||
|
||||
# 处理 NotAuthenticated 和 AuthenticationFailed 异常
|
||||
if isinstance(exc, (NotAuthenticated, AuthenticationFailed)):
|
||||
return error_response(
|
||||
code=ErrorCodes.UNAUTHORIZED,
|
||||
message='Authentication required',
|
||||
status_code=status.HTTP_401_UNAUTHORIZED
|
||||
)
|
||||
|
||||
return response
|
||||
34
backend/apps/common/migrations/0001_initial.py
Normal file
34
backend/apps/common/migrations/0001_initial.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# Generated by Django 5.2.7 on 2026-01-06 00:55
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('targets', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='BlacklistRule',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('pattern', models.CharField(help_text='规则模式,如 *.gov, 10.0.0.0/8, 192.168.1.1', max_length=255)),
|
||||
('rule_type', models.CharField(choices=[('domain', '域名'), ('ip', 'IP地址'), ('cidr', 'CIDR范围'), ('keyword', '关键词')], help_text='规则类型:domain, ip, cidr', max_length=20)),
|
||||
('scope', models.CharField(choices=[('global', '全局规则'), ('target', 'Target规则')], db_index=True, help_text='作用域:global 或 target', max_length=20)),
|
||||
('description', models.CharField(blank=True, default='', help_text='规则描述', max_length=500)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('target', models.ForeignKey(blank=True, help_text='关联的 Target(仅 scope=target 时有值)', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='blacklist_rules', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'db_table': 'blacklist_rule',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scope', 'rule_type'], name='blacklist_r_scope_6ff77f_idx'), models.Index(fields=['target', 'scope'], name='blacklist_r_target__191441_idx')],
|
||||
'constraints': [models.UniqueConstraint(fields=('pattern', 'scope', 'target'), name='unique_blacklist_rule')],
|
||||
},
|
||||
),
|
||||
]
|
||||
0
backend/apps/common/migrations/__init__.py
Normal file
0
backend/apps/common/migrations/__init__.py
Normal file
4
backend/apps/common/models/__init__.py
Normal file
4
backend/apps/common/models/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
"""Common models"""
|
||||
from apps.common.models.blacklist import BlacklistRule
|
||||
|
||||
__all__ = ['BlacklistRule']
|
||||
71
backend/apps/common/models/blacklist.py
Normal file
71
backend/apps/common/models/blacklist.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""黑名单规则模型"""
|
||||
from django.db import models
|
||||
|
||||
|
||||
class BlacklistRule(models.Model):
|
||||
"""黑名单规则模型
|
||||
|
||||
用于存储黑名单过滤规则,支持域名、IP、CIDR 三种类型。
|
||||
支持两层作用域:全局规则和 Target 级规则。
|
||||
"""
|
||||
|
||||
class RuleType(models.TextChoices):
|
||||
DOMAIN = 'domain', '域名'
|
||||
IP = 'ip', 'IP地址'
|
||||
CIDR = 'cidr', 'CIDR范围'
|
||||
KEYWORD = 'keyword', '关键词'
|
||||
|
||||
class Scope(models.TextChoices):
|
||||
GLOBAL = 'global', '全局规则'
|
||||
TARGET = 'target', 'Target规则'
|
||||
|
||||
id = models.AutoField(primary_key=True)
|
||||
pattern = models.CharField(
|
||||
max_length=255,
|
||||
help_text='规则模式,如 *.gov, 10.0.0.0/8, 192.168.1.1'
|
||||
)
|
||||
rule_type = models.CharField(
|
||||
max_length=20,
|
||||
choices=RuleType.choices,
|
||||
help_text='规则类型:domain, ip, cidr'
|
||||
)
|
||||
scope = models.CharField(
|
||||
max_length=20,
|
||||
choices=Scope.choices,
|
||||
db_index=True,
|
||||
help_text='作用域:global 或 target'
|
||||
)
|
||||
target = models.ForeignKey(
|
||||
'targets.Target',
|
||||
on_delete=models.CASCADE,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name='blacklist_rules',
|
||||
help_text='关联的 Target(仅 scope=target 时有值)'
|
||||
)
|
||||
description = models.CharField(
|
||||
max_length=500,
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='规则描述'
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = 'blacklist_rule'
|
||||
indexes = [
|
||||
models.Index(fields=['scope', 'rule_type']),
|
||||
models.Index(fields=['target', 'scope']),
|
||||
]
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=['pattern', 'scope', 'target'],
|
||||
name='unique_blacklist_rule'
|
||||
),
|
||||
]
|
||||
ordering = ['-created_at']
|
||||
|
||||
def __str__(self):
|
||||
if self.scope == self.Scope.TARGET and self.target:
|
||||
return f"[{self.scope}:{self.target_id}] {self.pattern}"
|
||||
return f"[{self.scope}] {self.pattern}"
|
||||
80
backend/apps/common/permissions.py
Normal file
80
backend/apps/common/permissions.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
集中式权限管理
|
||||
|
||||
实现三类端点的认证逻辑:
|
||||
1. 公开端点(无需认证):登录、登出、获取当前用户状态
|
||||
2. Worker 端点(API Key 认证):注册、配置、心跳、回调、资源同步
|
||||
3. 业务端点(Session 认证):其他所有 API
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from django.conf import settings
|
||||
from rest_framework.permissions import BasePermission
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 公开端点白名单(无需任何认证)
|
||||
PUBLIC_ENDPOINTS = [
|
||||
r'^/api/auth/login/$',
|
||||
r'^/api/auth/logout/$',
|
||||
r'^/api/auth/me/$',
|
||||
]
|
||||
|
||||
# Worker API 端点(需要 API Key 认证)
|
||||
# 包括:注册、配置、心跳、回调、资源同步(字典下载)
|
||||
WORKER_ENDPOINTS = [
|
||||
r'^/api/workers/register/$',
|
||||
r'^/api/workers/config/$',
|
||||
r'^/api/workers/\d+/heartbeat/$',
|
||||
r'^/api/callbacks/',
|
||||
# 资源同步端点(Worker 需要下载字典文件)
|
||||
r'^/api/wordlists/download/$',
|
||||
# 注意:指纹导出 API 使用 Session 认证(前端用户导出用)
|
||||
# Worker 通过数据库直接获取指纹数据,不需要 HTTP API
|
||||
]
|
||||
|
||||
|
||||
class IsAuthenticatedOrPublic(BasePermission):
|
||||
"""
|
||||
自定义权限类:
|
||||
- 白名单内的端点公开访问
|
||||
- Worker 端点需要 API Key 认证
|
||||
- 其他端点需要 Session 认证
|
||||
"""
|
||||
|
||||
def has_permission(self, request, view):
|
||||
path = request.path
|
||||
|
||||
# 检查是否在公开白名单内
|
||||
for pattern in PUBLIC_ENDPOINTS:
|
||||
if re.match(pattern, path):
|
||||
return True
|
||||
|
||||
# 检查是否是 Worker 端点
|
||||
for pattern in WORKER_ENDPOINTS:
|
||||
if re.match(pattern, path):
|
||||
return self._check_worker_api_key(request)
|
||||
|
||||
# 其他路径需要 Session 认证
|
||||
return request.user and request.user.is_authenticated
|
||||
|
||||
def _check_worker_api_key(self, request):
|
||||
"""验证 Worker API Key"""
|
||||
api_key = request.headers.get('X-Worker-API-Key')
|
||||
expected_key = getattr(settings, 'WORKER_API_KEY', None)
|
||||
|
||||
if not expected_key:
|
||||
# 未配置 API Key 时,拒绝所有 Worker 请求
|
||||
logger.warning("WORKER_API_KEY 未配置,拒绝 Worker 请求")
|
||||
return False
|
||||
|
||||
if not api_key:
|
||||
logger.warning(f"Worker 请求缺少 X-Worker-API-Key Header: {request.path}")
|
||||
return False
|
||||
|
||||
if api_key != expected_key:
|
||||
logger.warning(f"Worker API Key 无效: {request.path}")
|
||||
return False
|
||||
|
||||
return True
|
||||
88
backend/apps/common/response_helpers.py
Normal file
88
backend/apps/common/response_helpers.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""
|
||||
标准化 API 响应辅助函数
|
||||
|
||||
遵循行业标准(RFC 9457 Problem Details)和大厂实践(Google、Stripe、GitHub):
|
||||
- 成功响应只包含数据,不包含 message 字段
|
||||
- 错误响应使用机器可读的错误码,前端映射到 i18n 消息
|
||||
"""
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from rest_framework import status
|
||||
from rest_framework.response import Response
|
||||
|
||||
|
||||
def success_response(
|
||||
data: Optional[Union[Dict[str, Any], List[Any]]] = None,
|
||||
status_code: int = status.HTTP_200_OK
|
||||
) -> Response:
|
||||
"""
|
||||
标准化成功响应
|
||||
|
||||
直接返回数据,不做包装,符合 Stripe/GitHub 等大厂标准。
|
||||
|
||||
Args:
|
||||
data: 响应数据(dict 或 list)
|
||||
status_code: HTTP 状态码,默认 200
|
||||
|
||||
Returns:
|
||||
Response: DRF Response 对象
|
||||
|
||||
Examples:
|
||||
# 单个资源
|
||||
>>> success_response(data={'id': 1, 'name': 'Test'})
|
||||
{'id': 1, 'name': 'Test'}
|
||||
|
||||
# 操作结果
|
||||
>>> success_response(data={'count': 3, 'scans': [...]})
|
||||
{'count': 3, 'scans': [...]}
|
||||
|
||||
# 创建资源
|
||||
>>> success_response(data={'id': 1}, status_code=201)
|
||||
"""
|
||||
# 注意:不能使用 data or {},因为空列表 [] 会被转换为 {}
|
||||
if data is None:
|
||||
data = {}
|
||||
return Response(data, status=status_code)
|
||||
|
||||
|
||||
def error_response(
|
||||
code: str,
|
||||
message: Optional[str] = None,
|
||||
details: Optional[List[Dict[str, Any]]] = None,
|
||||
status_code: int = status.HTTP_400_BAD_REQUEST
|
||||
) -> Response:
|
||||
"""
|
||||
标准化错误响应
|
||||
|
||||
Args:
|
||||
code: 错误码(如 'VALIDATION_ERROR', 'NOT_FOUND')
|
||||
格式:大写字母和下划线组成
|
||||
message: 开发者调试信息(非用户显示)
|
||||
details: 详细错误信息(如字段级验证错误)
|
||||
status_code: HTTP 状态码,默认 400
|
||||
|
||||
Returns:
|
||||
Response: DRF Response 对象
|
||||
|
||||
Examples:
|
||||
# 简单错误
|
||||
>>> error_response(code='NOT_FOUND', status_code=404)
|
||||
{'error': {'code': 'NOT_FOUND'}}
|
||||
|
||||
# 带调试信息
|
||||
>>> error_response(
|
||||
... code='VALIDATION_ERROR',
|
||||
... message='Invalid input data',
|
||||
... details=[{'field': 'name', 'message': 'Required'}]
|
||||
... )
|
||||
{'error': {'code': 'VALIDATION_ERROR', 'message': '...', 'details': [...]}}
|
||||
"""
|
||||
error_body: Dict[str, Any] = {'code': code}
|
||||
|
||||
if message:
|
||||
error_body['message'] = message
|
||||
|
||||
if details:
|
||||
error_body['details'] = details
|
||||
|
||||
return Response({'error': error_body}, status=status_code)
|
||||
12
backend/apps/common/serializers/__init__.py
Normal file
12
backend/apps/common/serializers/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""Common serializers"""
|
||||
from .blacklist_serializers import (
|
||||
BlacklistRuleSerializer,
|
||||
GlobalBlacklistRuleSerializer,
|
||||
TargetBlacklistRuleSerializer,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'BlacklistRuleSerializer',
|
||||
'GlobalBlacklistRuleSerializer',
|
||||
'TargetBlacklistRuleSerializer',
|
||||
]
|
||||
68
backend/apps/common/serializers/blacklist_serializers.py
Normal file
68
backend/apps/common/serializers/blacklist_serializers.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""黑名单规则序列化器"""
|
||||
from rest_framework import serializers
|
||||
|
||||
from apps.common.models import BlacklistRule
|
||||
from apps.common.utils import detect_rule_type
|
||||
|
||||
|
||||
class BlacklistRuleSerializer(serializers.ModelSerializer):
|
||||
"""黑名单规则序列化器"""
|
||||
|
||||
class Meta:
|
||||
model = BlacklistRule
|
||||
fields = [
|
||||
'id',
|
||||
'pattern',
|
||||
'rule_type',
|
||||
'scope',
|
||||
'target',
|
||||
'description',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = ['id', 'rule_type', 'created_at']
|
||||
|
||||
def validate_pattern(self, value):
|
||||
"""验证规则模式"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("规则模式不能为空")
|
||||
return value.strip()
|
||||
|
||||
def create(self, validated_data):
|
||||
"""创建规则时自动识别规则类型"""
|
||||
pattern = validated_data.get('pattern', '')
|
||||
validated_data['rule_type'] = detect_rule_type(pattern)
|
||||
return super().create(validated_data)
|
||||
|
||||
def update(self, instance, validated_data):
|
||||
"""更新规则时重新识别规则类型"""
|
||||
if 'pattern' in validated_data:
|
||||
pattern = validated_data['pattern']
|
||||
validated_data['rule_type'] = detect_rule_type(pattern)
|
||||
return super().update(instance, validated_data)
|
||||
|
||||
|
||||
class GlobalBlacklistRuleSerializer(BlacklistRuleSerializer):
|
||||
"""全局黑名单规则序列化器"""
|
||||
|
||||
class Meta(BlacklistRuleSerializer.Meta):
|
||||
fields = ['id', 'pattern', 'rule_type', 'description', 'created_at']
|
||||
read_only_fields = ['id', 'rule_type', 'created_at']
|
||||
|
||||
def create(self, validated_data):
|
||||
"""创建全局规则"""
|
||||
validated_data['scope'] = BlacklistRule.Scope.GLOBAL
|
||||
validated_data['target'] = None
|
||||
return super().create(validated_data)
|
||||
|
||||
|
||||
class TargetBlacklistRuleSerializer(BlacklistRuleSerializer):
|
||||
"""Target 黑名单规则序列化器"""
|
||||
|
||||
class Meta(BlacklistRuleSerializer.Meta):
|
||||
fields = ['id', 'pattern', 'rule_type', 'description', 'created_at']
|
||||
read_only_fields = ['id', 'rule_type', 'created_at']
|
||||
|
||||
def create(self, validated_data):
|
||||
"""创建 Target 规则(target_id 由 view 设置)"""
|
||||
validated_data['scope'] = BlacklistRule.Scope.TARGET
|
||||
return super().create(validated_data)
|
||||
@@ -3,13 +3,16 @@
|
||||
|
||||
提供系统级别的公共服务,包括:
|
||||
- SystemLogService: 系统日志读取服务
|
||||
- BlacklistService: 黑名单过滤服务
|
||||
|
||||
注意:FilterService 已移至 apps.common.utils.filter_utils
|
||||
推荐使用: from apps.common.utils.filter_utils import apply_filters
|
||||
"""
|
||||
|
||||
from .system_log_service import SystemLogService
|
||||
from .blacklist_service import BlacklistService
|
||||
|
||||
__all__ = [
|
||||
'SystemLogService',
|
||||
'BlacklistService',
|
||||
]
|
||||
|
||||
176
backend/apps/common/services/blacklist_service.py
Normal file
176
backend/apps/common/services/blacklist_service.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""
|
||||
黑名单规则管理服务
|
||||
|
||||
负责黑名单规则的 CRUD 操作(数据库层面)。
|
||||
过滤逻辑请使用 apps.common.utils.BlacklistFilter。
|
||||
|
||||
架构说明:
|
||||
- Model: BlacklistRule (apps.common.models.blacklist)
|
||||
- Service: BlacklistService (本文件) - 规则 CRUD
|
||||
- Utils: BlacklistFilter (apps.common.utils.blacklist_filter) - 过滤逻辑
|
||||
- View: GlobalBlacklistView, TargetViewSet.blacklist
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from django.db.models import QuerySet
|
||||
|
||||
from apps.common.utils import detect_rule_type
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _normalize_patterns(patterns: List[str]) -> List[str]:
|
||||
"""
|
||||
规范化规则列表:去重 + 过滤空行
|
||||
|
||||
Args:
|
||||
patterns: 原始规则列表
|
||||
|
||||
Returns:
|
||||
List[str]: 去重后的规则列表(保持顺序)
|
||||
"""
|
||||
return list(dict.fromkeys(filter(None, (p.strip() for p in patterns))))
|
||||
|
||||
|
||||
class BlacklistService:
|
||||
"""
|
||||
黑名单规则管理服务
|
||||
|
||||
只负责规则的 CRUD 操作,不包含过滤逻辑。
|
||||
过滤逻辑请使用 BlacklistFilter 工具类。
|
||||
"""
|
||||
|
||||
def get_global_rules(self) -> QuerySet:
|
||||
"""
|
||||
获取全局黑名单规则列表
|
||||
|
||||
Returns:
|
||||
QuerySet: 全局规则查询集
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
return BlacklistRule.objects.filter(scope=BlacklistRule.Scope.GLOBAL)
|
||||
|
||||
def get_target_rules(self, target_id: int) -> QuerySet:
|
||||
"""
|
||||
获取 Target 级黑名单规则列表
|
||||
|
||||
Args:
|
||||
target_id: Target ID
|
||||
|
||||
Returns:
|
||||
QuerySet: Target 级规则查询集
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
return BlacklistRule.objects.filter(
|
||||
scope=BlacklistRule.Scope.TARGET,
|
||||
target_id=target_id
|
||||
)
|
||||
|
||||
def get_rules(self, target_id: Optional[int] = None) -> List:
|
||||
"""
|
||||
获取黑名单规则(全局 + Target 级)
|
||||
|
||||
Args:
|
||||
target_id: Target ID,用于加载 Target 级规则
|
||||
|
||||
Returns:
|
||||
List[BlacklistRule]: 规则列表
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
|
||||
# 加载全局规则
|
||||
rules = list(BlacklistRule.objects.filter(scope=BlacklistRule.Scope.GLOBAL))
|
||||
|
||||
# 加载 Target 级规则
|
||||
if target_id:
|
||||
target_rules = BlacklistRule.objects.filter(
|
||||
scope=BlacklistRule.Scope.TARGET,
|
||||
target_id=target_id
|
||||
)
|
||||
rules.extend(target_rules)
|
||||
|
||||
return rules
|
||||
|
||||
def replace_global_rules(self, patterns: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
全量替换全局黑名单规则(PUT 语义)
|
||||
|
||||
Args:
|
||||
patterns: 新的规则模式列表
|
||||
|
||||
Returns:
|
||||
Dict: {'count': int} 最终规则数量
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
|
||||
count = self._replace_rules(
|
||||
patterns=patterns,
|
||||
scope=BlacklistRule.Scope.GLOBAL,
|
||||
target=None
|
||||
)
|
||||
|
||||
logger.info("全量替换全局黑名单规则: %d 条", count)
|
||||
return {'count': count}
|
||||
|
||||
def replace_target_rules(self, target, patterns: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
全量替换 Target 级黑名单规则(PUT 语义)
|
||||
|
||||
Args:
|
||||
target: Target 对象
|
||||
patterns: 新的规则模式列表
|
||||
|
||||
Returns:
|
||||
Dict: {'count': int} 最终规则数量
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
|
||||
count = self._replace_rules(
|
||||
patterns=patterns,
|
||||
scope=BlacklistRule.Scope.TARGET,
|
||||
target=target
|
||||
)
|
||||
|
||||
logger.info("全量替换 Target 黑名单规则: %d 条 (Target: %s)", count, target.name)
|
||||
return {'count': count}
|
||||
|
||||
def _replace_rules(self, patterns: List[str], scope: str, target=None) -> int:
|
||||
"""
|
||||
内部方法:全量替换规则
|
||||
|
||||
Args:
|
||||
patterns: 规则模式列表
|
||||
scope: 规则作用域 (GLOBAL/TARGET)
|
||||
target: Target 对象(仅 TARGET 作用域需要)
|
||||
|
||||
Returns:
|
||||
int: 最终规则数量
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
from django.db import transaction
|
||||
|
||||
patterns = _normalize_patterns(patterns)
|
||||
|
||||
with transaction.atomic():
|
||||
# 1. 删除旧规则
|
||||
delete_filter = {'scope': scope}
|
||||
if target:
|
||||
delete_filter['target'] = target
|
||||
BlacklistRule.objects.filter(**delete_filter).delete()
|
||||
|
||||
# 2. 创建新规则
|
||||
if patterns:
|
||||
rules = [
|
||||
BlacklistRule(
|
||||
pattern=pattern,
|
||||
rule_type=detect_rule_type(pattern),
|
||||
scope=scope,
|
||||
target=target
|
||||
)
|
||||
for pattern in patterns
|
||||
]
|
||||
BlacklistRule.objects.bulk_create(rules)
|
||||
|
||||
return len(patterns)
|
||||
@@ -4,15 +4,28 @@
|
||||
提供系统日志的读取功能,支持:
|
||||
- 从日志目录读取日志文件
|
||||
- 限制返回行数,防止内存溢出
|
||||
- 列出可用的日志文件
|
||||
"""
|
||||
|
||||
import fnmatch
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from datetime import datetime, timezone
|
||||
from typing import TypedDict
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LogFileInfo(TypedDict):
|
||||
"""日志文件信息"""
|
||||
filename: str
|
||||
category: str # 'system' | 'error' | 'performance' | 'container'
|
||||
size: int
|
||||
modifiedAt: str # ISO 8601 格式
|
||||
|
||||
|
||||
class SystemLogService:
|
||||
"""
|
||||
系统日志服务类
|
||||
@@ -20,23 +33,131 @@ class SystemLogService:
|
||||
负责读取系统日志文件,支持从容器内路径或宿主机挂载路径读取日志。
|
||||
"""
|
||||
|
||||
# 日志文件分类规则
|
||||
CATEGORY_RULES = [
|
||||
('xingrin.log', 'system'),
|
||||
('xingrin_error.log', 'error'),
|
||||
('performance.log', 'performance'),
|
||||
('container_*.log', 'container'),
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
# 日志文件路径(统一使用 /opt/xingrin/logs)
|
||||
self.log_file = "/opt/xingrin/logs/xingrin.log"
|
||||
self.default_lines = 200 # 默认返回行数
|
||||
self.max_lines = 10000 # 最大返回行数限制
|
||||
self.timeout_seconds = 3 # tail 命令超时时间
|
||||
# 日志目录路径
|
||||
self.log_dir = "/opt/xingrin/logs"
|
||||
self.default_file = "xingrin.log" # 默认日志文件
|
||||
self.default_lines = 200 # 默认返回行数
|
||||
self.max_lines = 10000 # 最大返回行数限制
|
||||
self.timeout_seconds = 3 # tail 命令超时时间
|
||||
|
||||
def get_logs_content(self, lines: int | None = None) -> str:
|
||||
def _categorize_file(self, filename: str) -> str | None:
|
||||
"""
|
||||
根据文件名判断日志分类
|
||||
|
||||
Returns:
|
||||
分类名称,如果不是日志文件则返回 None
|
||||
"""
|
||||
for pattern, category in self.CATEGORY_RULES:
|
||||
if fnmatch.fnmatch(filename, pattern):
|
||||
return category
|
||||
return None
|
||||
|
||||
def _validate_filename(self, filename: str) -> bool:
|
||||
"""
|
||||
验证文件名是否合法(防止路径遍历攻击)
|
||||
|
||||
Args:
|
||||
filename: 要验证的文件名
|
||||
|
||||
Returns:
|
||||
bool: 文件名是否合法
|
||||
"""
|
||||
# 不允许包含路径分隔符
|
||||
if '/' in filename or '\\' in filename:
|
||||
return False
|
||||
# 不允许 .. 路径遍历
|
||||
if '..' in filename:
|
||||
return False
|
||||
# 必须是已知的日志文件类型
|
||||
return self._categorize_file(filename) is not None
|
||||
|
||||
def get_log_files(self) -> list[LogFileInfo]:
|
||||
"""
|
||||
获取所有可用的日志文件列表
|
||||
|
||||
Returns:
|
||||
日志文件信息列表,按分类和文件名排序
|
||||
"""
|
||||
files: list[LogFileInfo] = []
|
||||
|
||||
if not os.path.isdir(self.log_dir):
|
||||
logger.warning("日志目录不存在: %s", self.log_dir)
|
||||
return files
|
||||
|
||||
for filename in os.listdir(self.log_dir):
|
||||
filepath = os.path.join(self.log_dir, filename)
|
||||
|
||||
# 只处理文件,跳过目录
|
||||
if not os.path.isfile(filepath):
|
||||
continue
|
||||
|
||||
# 判断分类
|
||||
category = self._categorize_file(filename)
|
||||
if category is None:
|
||||
continue
|
||||
|
||||
# 获取文件信息
|
||||
try:
|
||||
stat = os.stat(filepath)
|
||||
modified_at = datetime.fromtimestamp(
|
||||
stat.st_mtime, tz=timezone.utc
|
||||
).isoformat()
|
||||
|
||||
files.append({
|
||||
'filename': filename,
|
||||
'category': category,
|
||||
'size': stat.st_size,
|
||||
'modifiedAt': modified_at,
|
||||
})
|
||||
except OSError as e:
|
||||
logger.warning("获取文件信息失败 %s: %s", filepath, e)
|
||||
continue
|
||||
|
||||
# 排序:按分类优先级(system > error > performance > container),然后按文件名
|
||||
category_order = {'system': 0, 'error': 1, 'performance': 2, 'container': 3}
|
||||
files.sort(key=lambda f: (category_order.get(f['category'], 99), f['filename']))
|
||||
|
||||
return files
|
||||
|
||||
def get_logs_content(self, filename: str | None = None, lines: int | None = None) -> str:
|
||||
"""
|
||||
获取系统日志内容
|
||||
|
||||
Args:
|
||||
filename: 日志文件名,默认为 xingrin.log
|
||||
lines: 返回的日志行数,默认 200 行,最大 10000 行
|
||||
|
||||
Returns:
|
||||
str: 日志内容,每行以换行符分隔,保持原始顺序
|
||||
|
||||
Raises:
|
||||
ValueError: 文件名不合法
|
||||
FileNotFoundError: 日志文件不存在
|
||||
"""
|
||||
# 文件名处理
|
||||
if filename is None:
|
||||
filename = self.default_file
|
||||
|
||||
# 验证文件名
|
||||
if not self._validate_filename(filename):
|
||||
raise ValueError(f"无效的文件名: {filename}")
|
||||
|
||||
# 构建完整路径
|
||||
log_file = os.path.join(self.log_dir, filename)
|
||||
|
||||
# 检查文件是否存在
|
||||
if not os.path.isfile(log_file):
|
||||
raise FileNotFoundError(f"日志文件不存在: {filename}")
|
||||
|
||||
# 参数校验和默认值处理
|
||||
if lines is None:
|
||||
lines = self.default_lines
|
||||
@@ -48,7 +169,7 @@ class SystemLogService:
|
||||
lines = self.max_lines
|
||||
|
||||
# 使用 tail 命令读取日志文件末尾内容
|
||||
cmd = ["tail", "-n", str(lines), self.log_file]
|
||||
cmd = ["tail", "-n", str(lines), log_file]
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
|
||||
@@ -2,14 +2,25 @@
|
||||
通用模块 URL 配置
|
||||
|
||||
路由说明:
|
||||
- /api/auth/* 认证相关接口(登录、登出、用户信息)
|
||||
- /api/system/* 系统管理接口(日志查看等)
|
||||
- /api/health/ 健康检查接口(无需认证)
|
||||
- /api/auth/* 认证相关接口(登录、登出、用户信息)
|
||||
- /api/system/* 系统管理接口(日志查看等)
|
||||
- /api/blacklist/* 黑名单管理接口
|
||||
"""
|
||||
|
||||
from django.urls import path
|
||||
from .views import LoginView, LogoutView, MeView, ChangePasswordView, SystemLogsView
|
||||
|
||||
from .views import (
|
||||
LoginView, LogoutView, MeView, ChangePasswordView,
|
||||
SystemLogsView, SystemLogFilesView, HealthCheckView,
|
||||
GlobalBlacklistView,
|
||||
VersionView, CheckUpdateView,
|
||||
)
|
||||
|
||||
urlpatterns = [
|
||||
# 健康检查(无需认证)
|
||||
path('health/', HealthCheckView.as_view(), name='health-check'),
|
||||
|
||||
# 认证相关
|
||||
path('auth/login/', LoginView.as_view(), name='auth-login'),
|
||||
path('auth/logout/', LogoutView.as_view(), name='auth-logout'),
|
||||
@@ -18,4 +29,10 @@ urlpatterns = [
|
||||
|
||||
# 系统管理
|
||||
path('system/logs/', SystemLogsView.as_view(), name='system-logs'),
|
||||
path('system/logs/files/', SystemLogFilesView.as_view(), name='system-log-files'),
|
||||
path('system/version/', VersionView.as_view(), name='system-version'),
|
||||
path('system/check-update/', CheckUpdateView.as_view(), name='system-check-update'),
|
||||
|
||||
# 黑名单管理(PUT 全量替换模式)
|
||||
path('blacklist/rules/', GlobalBlacklistView.as_view(), name='blacklist-rules'),
|
||||
]
|
||||
|
||||
@@ -11,9 +11,14 @@ from .csv_utils import (
|
||||
generate_csv_rows,
|
||||
format_list_field,
|
||||
format_datetime,
|
||||
create_csv_export_response,
|
||||
UTF8_BOM,
|
||||
)
|
||||
from .git_proxy import get_git_proxy_url
|
||||
from .blacklist_filter import (
|
||||
BlacklistFilter,
|
||||
detect_rule_type,
|
||||
extract_host,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'deduplicate_for_bulk',
|
||||
@@ -25,6 +30,9 @@ __all__ = [
|
||||
'generate_csv_rows',
|
||||
'format_list_field',
|
||||
'format_datetime',
|
||||
'create_csv_export_response',
|
||||
'UTF8_BOM',
|
||||
'get_git_proxy_url',
|
||||
'BlacklistFilter',
|
||||
'detect_rule_type',
|
||||
'extract_host',
|
||||
]
|
||||
|
||||
246
backend/apps/common/utils/blacklist_filter.py
Normal file
246
backend/apps/common/utils/blacklist_filter.py
Normal file
@@ -0,0 +1,246 @@
|
||||
"""
|
||||
黑名单过滤工具
|
||||
|
||||
提供域名、IP、CIDR、关键词的黑名单匹配功能。
|
||||
纯工具类,不涉及数据库操作。
|
||||
|
||||
支持的规则类型:
|
||||
1. 域名精确匹配: example.com
|
||||
- 规则: example.com
|
||||
- 匹配: example.com
|
||||
- 不匹配: sub.example.com, other.com
|
||||
|
||||
2. 域名后缀匹配: *.example.com
|
||||
- 规则: *.example.com
|
||||
- 匹配: sub.example.com, a.b.example.com, example.com
|
||||
- 不匹配: other.com, example.com.cn
|
||||
|
||||
3. 关键词匹配: *cdn*
|
||||
- 规则: *cdn*
|
||||
- 匹配: cdn.example.com, a.cdn.b.com, mycdn123.com
|
||||
- 不匹配: example.com (不包含 cdn)
|
||||
|
||||
4. IP 精确匹配: 192.168.1.1
|
||||
- 规则: 192.168.1.1
|
||||
- 匹配: 192.168.1.1
|
||||
- 不匹配: 192.168.1.2
|
||||
|
||||
5. CIDR 范围匹配: 192.168.0.0/24
|
||||
- 规则: 192.168.0.0/24
|
||||
- 匹配: 192.168.0.1, 192.168.0.255
|
||||
- 不匹配: 192.168.1.1
|
||||
|
||||
使用方式:
|
||||
from apps.common.utils import BlacklistFilter
|
||||
|
||||
# 创建过滤器(传入规则列表)
|
||||
rules = BlacklistRule.objects.filter(...)
|
||||
filter = BlacklistFilter(rules)
|
||||
|
||||
# 检查单个目标
|
||||
if filter.is_allowed('http://example.com'):
|
||||
process(url)
|
||||
|
||||
# 流式处理
|
||||
for url in urls:
|
||||
if filter.is_allowed(url):
|
||||
process(url)
|
||||
"""
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from apps.common.validators import is_valid_ip, validate_cidr
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def detect_rule_type(pattern: str) -> str:
|
||||
"""
|
||||
自动识别规则类型
|
||||
|
||||
支持的模式:
|
||||
- 域名精确匹配: example.com
|
||||
- 域名后缀匹配: *.example.com
|
||||
- 关键词匹配: *cdn* (匹配包含 cdn 的域名)
|
||||
- IP 精确匹配: 192.168.1.1
|
||||
- CIDR 范围: 192.168.0.0/24
|
||||
|
||||
Args:
|
||||
pattern: 规则模式字符串
|
||||
|
||||
Returns:
|
||||
str: 规则类型 ('domain', 'ip', 'cidr', 'keyword')
|
||||
"""
|
||||
if not pattern:
|
||||
return 'domain'
|
||||
|
||||
pattern = pattern.strip()
|
||||
|
||||
# 检查关键词模式: *keyword* (前后都有星号,中间无点)
|
||||
if pattern.startswith('*') and pattern.endswith('*') and len(pattern) > 2:
|
||||
keyword = pattern[1:-1]
|
||||
# 关键词中不能有点(否则可能是域名模式)
|
||||
if '.' not in keyword:
|
||||
return 'keyword'
|
||||
|
||||
# 检查 CIDR(包含 /)
|
||||
if '/' in pattern:
|
||||
try:
|
||||
validate_cidr(pattern)
|
||||
return 'cidr'
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 检查 IP(去掉通配符前缀后验证)
|
||||
clean_pattern = pattern.lstrip('*').lstrip('.')
|
||||
if is_valid_ip(clean_pattern):
|
||||
return 'ip'
|
||||
|
||||
# 默认为域名
|
||||
return 'domain'
|
||||
|
||||
|
||||
def extract_host(target: str) -> str:
|
||||
"""
|
||||
从目标字符串中提取主机名
|
||||
|
||||
支持:
|
||||
- 纯域名:example.com
|
||||
- 纯 IP:192.168.1.1
|
||||
- URL:http://example.com/path
|
||||
|
||||
Args:
|
||||
target: 目标字符串
|
||||
|
||||
Returns:
|
||||
str: 提取的主机名
|
||||
"""
|
||||
if not target:
|
||||
return ''
|
||||
|
||||
target = target.strip()
|
||||
|
||||
# 如果是 URL,提取 hostname
|
||||
if '://' in target:
|
||||
try:
|
||||
parsed = urlparse(target)
|
||||
return parsed.hostname or target
|
||||
except Exception:
|
||||
return target
|
||||
|
||||
return target
|
||||
|
||||
|
||||
class BlacklistFilter:
|
||||
"""
|
||||
黑名单过滤器
|
||||
|
||||
预编译规则,提供高效的匹配功能。
|
||||
"""
|
||||
|
||||
def __init__(self, rules: List):
|
||||
"""
|
||||
初始化过滤器
|
||||
|
||||
Args:
|
||||
rules: BlacklistRule 对象列表
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
|
||||
# 预解析:按类型分类 + CIDR 预编译
|
||||
self._domain_rules = [] # (pattern, is_wildcard, suffix)
|
||||
self._ip_rules = set() # 精确 IP 用 set,O(1) 查找
|
||||
self._cidr_rules = [] # (pattern, network_obj)
|
||||
self._keyword_rules = [] # 关键词列表(小写)
|
||||
|
||||
# 去重:跨 scope 可能有重复规则
|
||||
seen_patterns = set()
|
||||
|
||||
for rule in rules:
|
||||
if rule.pattern in seen_patterns:
|
||||
continue
|
||||
seen_patterns.add(rule.pattern)
|
||||
if rule.rule_type == BlacklistRule.RuleType.DOMAIN:
|
||||
pattern = rule.pattern.lower()
|
||||
if pattern.startswith('*.'):
|
||||
self._domain_rules.append((pattern, True, pattern[1:]))
|
||||
else:
|
||||
self._domain_rules.append((pattern, False, None))
|
||||
elif rule.rule_type == BlacklistRule.RuleType.IP:
|
||||
self._ip_rules.add(rule.pattern)
|
||||
elif rule.rule_type == BlacklistRule.RuleType.CIDR:
|
||||
try:
|
||||
network = ipaddress.ip_network(rule.pattern, strict=False)
|
||||
self._cidr_rules.append((rule.pattern, network))
|
||||
except ValueError:
|
||||
pass
|
||||
elif rule.rule_type == BlacklistRule.RuleType.KEYWORD:
|
||||
# *cdn* -> cdn
|
||||
keyword = rule.pattern[1:-1].lower()
|
||||
self._keyword_rules.append(keyword)
|
||||
|
||||
def is_allowed(self, target: str) -> bool:
|
||||
"""
|
||||
检查目标是否通过过滤
|
||||
|
||||
Args:
|
||||
target: 要检查的目标(域名/IP/URL)
|
||||
|
||||
Returns:
|
||||
bool: True 表示通过(不在黑名单),False 表示被过滤
|
||||
"""
|
||||
if not target:
|
||||
return True
|
||||
|
||||
host = extract_host(target)
|
||||
if not host:
|
||||
return True
|
||||
|
||||
# 先判断输入类型,再走对应分支
|
||||
if is_valid_ip(host):
|
||||
return self._check_ip_rules(host)
|
||||
else:
|
||||
return self._check_domain_rules(host)
|
||||
|
||||
def _check_domain_rules(self, host: str) -> bool:
|
||||
"""检查域名规则(精确匹配 + 后缀匹配 + 关键词匹配)"""
|
||||
host_lower = host.lower()
|
||||
|
||||
# 1. 域名规则(精确 + 后缀)
|
||||
for pattern, is_wildcard, suffix in self._domain_rules:
|
||||
if is_wildcard:
|
||||
if host_lower.endswith(suffix) or host_lower == pattern[2:]:
|
||||
return False
|
||||
else:
|
||||
if host_lower == pattern:
|
||||
return False
|
||||
|
||||
# 2. 关键词匹配(字符串 in 操作,O(n*m))
|
||||
for keyword in self._keyword_rules:
|
||||
if keyword in host_lower:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_ip_rules(self, host: str) -> bool:
|
||||
"""检查 IP 规则(精确匹配 + CIDR)"""
|
||||
# 1. IP 精确匹配(O(1))
|
||||
if host in self._ip_rules:
|
||||
return False
|
||||
|
||||
# 2. CIDR 匹配
|
||||
if self._cidr_rules:
|
||||
try:
|
||||
ip_obj = ipaddress.ip_address(host)
|
||||
for _, network in self._cidr_rules:
|
||||
if ip_obj in network:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@@ -4,13 +4,21 @@
|
||||
- UTF-8 BOM(Excel 兼容)
|
||||
- RFC 4180 规范转义
|
||||
- 流式生成(内存友好)
|
||||
- 带 Content-Length 的文件响应(支持浏览器下载进度显示)
|
||||
"""
|
||||
|
||||
import csv
|
||||
import io
|
||||
import os
|
||||
import tempfile
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Iterator, Dict, Any, List, Callable, Optional
|
||||
|
||||
from django.http import FileResponse, StreamingHttpResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# UTF-8 BOM,确保 Excel 正确识别编码
|
||||
UTF8_BOM = '\ufeff'
|
||||
|
||||
@@ -114,3 +122,123 @@ def format_datetime(dt: Optional[datetime]) -> str:
|
||||
dt = timezone.localtime(dt)
|
||||
|
||||
return dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
|
||||
def create_csv_export_response(
|
||||
data_iterator: Iterator[Dict[str, Any]],
|
||||
headers: List[str],
|
||||
filename: str,
|
||||
field_formatters: Optional[Dict[str, Callable]] = None,
|
||||
show_progress: bool = True
|
||||
) -> FileResponse | StreamingHttpResponse:
|
||||
"""
|
||||
创建 CSV 导出响应
|
||||
|
||||
根据 show_progress 参数选择响应类型:
|
||||
- True: 使用临时文件 + FileResponse,带 Content-Length(浏览器显示下载进度)
|
||||
- False: 使用 StreamingHttpResponse(内存更友好,但无下载进度)
|
||||
|
||||
Args:
|
||||
data_iterator: 数据迭代器,每个元素是一个字典
|
||||
headers: CSV 表头列表
|
||||
filename: 下载文件名(如 "export_2024.csv")
|
||||
field_formatters: 字段格式化函数字典
|
||||
show_progress: 是否显示下载进度(默认 True)
|
||||
|
||||
Returns:
|
||||
FileResponse 或 StreamingHttpResponse
|
||||
|
||||
Example:
|
||||
>>> data_iter = service.iter_data()
|
||||
>>> headers = ['url', 'host', 'created_at']
|
||||
>>> formatters = {'created_at': format_datetime}
|
||||
>>> response = create_csv_export_response(
|
||||
... data_iter, headers, 'websites.csv', formatters
|
||||
... )
|
||||
>>> return response
|
||||
"""
|
||||
if show_progress:
|
||||
return _create_file_response(data_iterator, headers, filename, field_formatters)
|
||||
else:
|
||||
return _create_streaming_response(data_iterator, headers, filename, field_formatters)
|
||||
|
||||
|
||||
def _create_file_response(
|
||||
data_iterator: Iterator[Dict[str, Any]],
|
||||
headers: List[str],
|
||||
filename: str,
|
||||
field_formatters: Optional[Dict[str, Callable]] = None
|
||||
) -> FileResponse:
|
||||
"""
|
||||
创建带 Content-Length 的文件响应(支持浏览器下载进度)
|
||||
|
||||
实现方式:先写入临时文件,再返回 FileResponse
|
||||
"""
|
||||
# 创建临时文件
|
||||
temp_file = tempfile.NamedTemporaryFile(
|
||||
mode='w',
|
||||
suffix='.csv',
|
||||
delete=False,
|
||||
encoding='utf-8'
|
||||
)
|
||||
temp_path = temp_file.name
|
||||
|
||||
try:
|
||||
# 流式写入 CSV 数据到临时文件
|
||||
for row in generate_csv_rows(data_iterator, headers, field_formatters):
|
||||
temp_file.write(row)
|
||||
temp_file.close()
|
||||
|
||||
# 获取文件大小
|
||||
file_size = os.path.getsize(temp_path)
|
||||
|
||||
# 创建文件响应
|
||||
response = FileResponse(
|
||||
open(temp_path, 'rb'),
|
||||
content_type='text/csv; charset=utf-8',
|
||||
as_attachment=True,
|
||||
filename=filename
|
||||
)
|
||||
response['Content-Length'] = file_size
|
||||
|
||||
# 设置清理回调:响应完成后删除临时文件
|
||||
original_close = response.file_to_stream.close
|
||||
def close_and_cleanup():
|
||||
original_close()
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except OSError:
|
||||
pass
|
||||
response.file_to_stream.close = close_and_cleanup
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
# 清理临时文件
|
||||
try:
|
||||
temp_file.close()
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except OSError:
|
||||
pass
|
||||
logger.error(f"创建 CSV 导出响应失败: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def _create_streaming_response(
|
||||
data_iterator: Iterator[Dict[str, Any]],
|
||||
headers: List[str],
|
||||
filename: str,
|
||||
field_formatters: Optional[Dict[str, Callable]] = None
|
||||
) -> StreamingHttpResponse:
|
||||
"""
|
||||
创建流式响应(无 Content-Length,内存更友好)
|
||||
"""
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, field_formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="{filename}"'
|
||||
return response
|
||||
|
||||
@@ -29,11 +29,19 @@ from dataclasses import dataclass
|
||||
from typing import List, Dict, Optional, Union
|
||||
from enum import Enum
|
||||
|
||||
from django.db.models import QuerySet, Q
|
||||
from django.db.models import QuerySet, Q, F, Func, CharField
|
||||
from django.db.models.functions import Cast
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ArrayToString(Func):
|
||||
"""PostgreSQL array_to_string 函数"""
|
||||
function = 'array_to_string'
|
||||
template = "%(function)s(%(expressions)s, ',')"
|
||||
output_field = CharField()
|
||||
|
||||
|
||||
class LogicalOp(Enum):
|
||||
"""逻辑运算符"""
|
||||
AND = 'AND'
|
||||
@@ -86,9 +94,21 @@ class QueryParser:
|
||||
if not query_string or not query_string.strip():
|
||||
return []
|
||||
|
||||
# 第一步:提取所有过滤条件并用占位符替换,保护引号内的空格
|
||||
filters_found = []
|
||||
placeholder_pattern = '__FILTER_{}__'
|
||||
|
||||
def replace_filter(match):
|
||||
idx = len(filters_found)
|
||||
filters_found.append(match.group(0))
|
||||
return placeholder_pattern.format(idx)
|
||||
|
||||
# 先用正则提取所有 field="value" 形式的条件
|
||||
protected = cls.FILTER_PATTERN.sub(replace_filter, query_string)
|
||||
|
||||
# 标准化逻辑运算符
|
||||
# 先处理 || 和 or -> __OR__
|
||||
normalized = cls.OR_PATTERN.sub(' __OR__ ', query_string)
|
||||
normalized = cls.OR_PATTERN.sub(' __OR__ ', protected)
|
||||
# 再处理 && 和 and -> __AND__
|
||||
normalized = cls.AND_PATTERN.sub(' __AND__ ', normalized)
|
||||
|
||||
@@ -103,20 +123,26 @@ class QueryParser:
|
||||
pending_op = LogicalOp.OR
|
||||
elif token == '__AND__':
|
||||
pending_op = LogicalOp.AND
|
||||
else:
|
||||
# 尝试解析为过滤条件
|
||||
match = cls.FILTER_PATTERN.match(token)
|
||||
if match:
|
||||
field, operator, value = match.groups()
|
||||
groups.append(FilterGroup(
|
||||
filter=ParsedFilter(
|
||||
field=field.lower(),
|
||||
operator=operator,
|
||||
value=value
|
||||
),
|
||||
logical_op=pending_op if groups else LogicalOp.AND # 第一个条件默认 AND
|
||||
))
|
||||
pending_op = LogicalOp.AND # 重置为默认 AND
|
||||
elif token.startswith('__FILTER_') and token.endswith('__'):
|
||||
# 还原占位符为原始过滤条件
|
||||
try:
|
||||
idx = int(token[9:-2]) # 提取索引
|
||||
original_filter = filters_found[idx]
|
||||
match = cls.FILTER_PATTERN.match(original_filter)
|
||||
if match:
|
||||
field, operator, value = match.groups()
|
||||
groups.append(FilterGroup(
|
||||
filter=ParsedFilter(
|
||||
field=field.lower(),
|
||||
operator=operator,
|
||||
value=value
|
||||
),
|
||||
logical_op=pending_op if groups else LogicalOp.AND
|
||||
))
|
||||
pending_op = LogicalOp.AND # 重置为默认 AND
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
# 其他 token 忽略(无效输入)
|
||||
|
||||
return groups
|
||||
|
||||
@@ -151,6 +177,21 @@ class QueryBuilder:
|
||||
|
||||
json_array_fields = json_array_fields or []
|
||||
|
||||
# 收集需要 annotate 的数组模糊搜索字段
|
||||
array_fuzzy_fields = set()
|
||||
|
||||
# 第一遍:检查是否有数组模糊匹配
|
||||
for group in filter_groups:
|
||||
f = group.filter
|
||||
db_field = field_mapping.get(f.field)
|
||||
if db_field and db_field in json_array_fields and f.operator == '=':
|
||||
array_fuzzy_fields.add(db_field)
|
||||
|
||||
# 对数组模糊搜索字段做 annotate
|
||||
for field in array_fuzzy_fields:
|
||||
annotate_name = f'{field}_text'
|
||||
queryset = queryset.annotate(**{annotate_name: ArrayToString(F(field))})
|
||||
|
||||
# 构建 Q 对象
|
||||
combined_q = None
|
||||
|
||||
@@ -187,8 +228,17 @@ class QueryBuilder:
|
||||
def _build_single_q(cls, field: str, operator: str, value: str, is_json_array: bool = False) -> Optional[Q]:
|
||||
"""构建单个条件的 Q 对象"""
|
||||
if is_json_array:
|
||||
# JSON 数组字段使用 __contains 查询
|
||||
return Q(**{f'{field}__contains': [value]})
|
||||
if operator == '==':
|
||||
# 精确匹配:数组中包含完全等于 value 的元素
|
||||
return Q(**{f'{field}__contains': [value]})
|
||||
elif operator == '!=':
|
||||
# 不包含:数组中不包含完全等于 value 的元素
|
||||
return ~Q(**{f'{field}__contains': [value]})
|
||||
else: # '=' 模糊匹配
|
||||
# 使用 annotate 后的字段进行模糊搜索
|
||||
# 字段已在 build_query 中通过 ArrayToString 转换为文本
|
||||
annotate_name = f'{field}_text'
|
||||
return Q(**{f'{annotate_name}__icontains': value})
|
||||
|
||||
if operator == '!=':
|
||||
return cls._build_not_equal_q(field, value)
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
"""Git proxy utilities for URL acceleration."""
|
||||
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
def get_git_proxy_url(original_url: str) -> str:
|
||||
"""
|
||||
Convert Git repository URL to proxy format for acceleration.
|
||||
|
||||
Supports multiple mirror services (standard format):
|
||||
- gh-proxy.org: https://gh-proxy.org/https://github.com/user/repo.git
|
||||
- ghproxy.com: https://ghproxy.com/https://github.com/user/repo.git
|
||||
- mirror.ghproxy.com: https://mirror.ghproxy.com/https://github.com/user/repo.git
|
||||
- ghps.cc: https://ghps.cc/https://github.com/user/repo.git
|
||||
|
||||
Args:
|
||||
original_url: Original repository URL, e.g., https://github.com/user/repo.git
|
||||
|
||||
Returns:
|
||||
Converted URL based on GIT_MIRROR setting.
|
||||
If GIT_MIRROR is not set, returns the original URL unchanged.
|
||||
"""
|
||||
git_mirror = os.getenv("GIT_MIRROR", "").strip()
|
||||
if not git_mirror:
|
||||
return original_url
|
||||
|
||||
# Remove trailing slash from mirror URL if present
|
||||
git_mirror = git_mirror.rstrip("/")
|
||||
|
||||
parsed = urlparse(original_url)
|
||||
host = parsed.netloc.lower()
|
||||
|
||||
# Only support GitHub for now
|
||||
if "github.com" not in host:
|
||||
return original_url
|
||||
|
||||
# Standard format: https://mirror.example.com/https://github.com/user/repo.git
|
||||
return f"{git_mirror}/{original_url}"
|
||||
@@ -2,11 +2,23 @@
|
||||
通用模块视图导出
|
||||
|
||||
包含:
|
||||
- 健康检查视图:Docker 健康检查
|
||||
- 认证相关视图:登录、登出、用户信息、修改密码
|
||||
- 系统日志视图:实时日志查看
|
||||
- 黑名单视图:全局黑名单规则管理
|
||||
- 版本视图:系统版本和更新检查
|
||||
"""
|
||||
|
||||
from .health_views import HealthCheckView
|
||||
from .auth_views import LoginView, LogoutView, MeView, ChangePasswordView
|
||||
from .system_log_views import SystemLogsView
|
||||
from .system_log_views import SystemLogsView, SystemLogFilesView
|
||||
from .blacklist_views import GlobalBlacklistView
|
||||
from .version_views import VersionView, CheckUpdateView
|
||||
|
||||
__all__ = ['LoginView', 'LogoutView', 'MeView', 'ChangePasswordView', 'SystemLogsView']
|
||||
__all__ = [
|
||||
'HealthCheckView',
|
||||
'LoginView', 'LogoutView', 'MeView', 'ChangePasswordView',
|
||||
'SystemLogsView', 'SystemLogFilesView',
|
||||
'GlobalBlacklistView',
|
||||
'VersionView', 'CheckUpdateView',
|
||||
]
|
||||
|
||||
@@ -9,7 +9,10 @@ from django.utils.decorators import method_decorator
|
||||
from rest_framework import status
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.permissions import AllowAny, IsAuthenticated
|
||||
from rest_framework.permissions import AllowAny
|
||||
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -28,9 +31,10 @@ class LoginView(APIView):
|
||||
password = request.data.get('password')
|
||||
|
||||
if not username or not password:
|
||||
return Response(
|
||||
{'error': '请提供用户名和密码'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Username and password are required',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
user = authenticate(request, username=username, password=password)
|
||||
@@ -38,20 +42,22 @@ class LoginView(APIView):
|
||||
if user is not None:
|
||||
login(request, user)
|
||||
logger.info(f"用户 {username} 登录成功")
|
||||
return Response({
|
||||
'message': '登录成功',
|
||||
'user': {
|
||||
'id': user.id,
|
||||
'username': user.username,
|
||||
'isStaff': user.is_staff,
|
||||
'isSuperuser': user.is_superuser,
|
||||
return success_response(
|
||||
data={
|
||||
'user': {
|
||||
'id': user.id,
|
||||
'username': user.username,
|
||||
'isStaff': user.is_staff,
|
||||
'isSuperuser': user.is_superuser,
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
else:
|
||||
logger.warning(f"用户 {username} 登录失败:用户名或密码错误")
|
||||
return Response(
|
||||
{'error': '用户名或密码错误'},
|
||||
status=status.HTTP_401_UNAUTHORIZED
|
||||
return error_response(
|
||||
code=ErrorCodes.UNAUTHORIZED,
|
||||
message='Invalid username or password',
|
||||
status_code=status.HTTP_401_UNAUTHORIZED
|
||||
)
|
||||
|
||||
|
||||
@@ -79,7 +85,7 @@ class LogoutView(APIView):
|
||||
logout(request)
|
||||
else:
|
||||
logout(request)
|
||||
return Response({'message': '已登出'})
|
||||
return success_response()
|
||||
|
||||
|
||||
@method_decorator(csrf_exempt, name='dispatch')
|
||||
@@ -100,22 +106,26 @@ class MeView(APIView):
|
||||
if user_id:
|
||||
try:
|
||||
user = User.objects.get(pk=user_id)
|
||||
return Response({
|
||||
'authenticated': True,
|
||||
'user': {
|
||||
'id': user.id,
|
||||
'username': user.username,
|
||||
'isStaff': user.is_staff,
|
||||
'isSuperuser': user.is_superuser,
|
||||
return success_response(
|
||||
data={
|
||||
'authenticated': True,
|
||||
'user': {
|
||||
'id': user.id,
|
||||
'username': user.username,
|
||||
'isStaff': user.is_staff,
|
||||
'isSuperuser': user.is_superuser,
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
except User.DoesNotExist:
|
||||
pass
|
||||
|
||||
return Response({
|
||||
'authenticated': False,
|
||||
'user': None
|
||||
})
|
||||
return success_response(
|
||||
data={
|
||||
'authenticated': False,
|
||||
'user': None
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@method_decorator(csrf_exempt, name='dispatch')
|
||||
@@ -124,43 +134,27 @@ class ChangePasswordView(APIView):
|
||||
修改密码
|
||||
POST /api/auth/change-password/
|
||||
"""
|
||||
authentication_classes = [] # 禁用认证(绕过 CSRF)
|
||||
permission_classes = [AllowAny] # 手动检查登录状态
|
||||
|
||||
def post(self, request):
|
||||
# 手动检查登录状态(从 session 获取用户)
|
||||
from django.contrib.auth import get_user_model
|
||||
User = get_user_model()
|
||||
|
||||
user_id = request.session.get('_auth_user_id')
|
||||
if not user_id:
|
||||
return Response(
|
||||
{'error': '请先登录'},
|
||||
status=status.HTTP_401_UNAUTHORIZED
|
||||
)
|
||||
|
||||
try:
|
||||
user = User.objects.get(pk=user_id)
|
||||
except User.DoesNotExist:
|
||||
return Response(
|
||||
{'error': '用户不存在'},
|
||||
status=status.HTTP_401_UNAUTHORIZED
|
||||
)
|
||||
# 使用全局权限类验证,request.user 已经是认证用户
|
||||
user = request.user
|
||||
|
||||
# CamelCaseParser 将 oldPassword -> old_password
|
||||
old_password = request.data.get('old_password')
|
||||
new_password = request.data.get('new_password')
|
||||
|
||||
if not old_password or not new_password:
|
||||
return Response(
|
||||
{'error': '请提供旧密码和新密码'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Old password and new password are required',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
if not user.check_password(old_password):
|
||||
return Response(
|
||||
{'error': '旧密码错误'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Old password is incorrect',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
user.set_password(new_password)
|
||||
@@ -170,4 +164,4 @@ class ChangePasswordView(APIView):
|
||||
update_session_auth_hash(request, user)
|
||||
|
||||
logger.info(f"用户 {user.username} 已修改密码")
|
||||
return Response({'message': '密码修改成功'})
|
||||
return success_response()
|
||||
|
||||
80
backend/apps/common/views/blacklist_views.py
Normal file
80
backend/apps/common/views/blacklist_views.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""全局黑名单 API 视图"""
|
||||
import logging
|
||||
|
||||
from rest_framework import status
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.services import BlacklistService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GlobalBlacklistView(APIView):
|
||||
"""
|
||||
全局黑名单规则 API
|
||||
|
||||
Endpoints:
|
||||
- GET /api/blacklist/rules/ - 获取全局黑名单列表
|
||||
- PUT /api/blacklist/rules/ - 全量替换规则(文本框保存场景)
|
||||
|
||||
设计说明:
|
||||
- 使用 PUT 全量替换模式,适合"文本框每行一个规则"的前端场景
|
||||
- 用户编辑文本框 -> 点击保存 -> 后端全量替换
|
||||
|
||||
架构:MVS 模式
|
||||
- View: 参数验证、响应格式化
|
||||
- Service: 业务逻辑(BlacklistService)
|
||||
- Model: 数据持久化(BlacklistRule)
|
||||
"""
|
||||
|
||||
permission_classes = [IsAuthenticated]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.blacklist_service = BlacklistService()
|
||||
|
||||
def get(self, request):
|
||||
"""
|
||||
获取全局黑名单规则列表
|
||||
|
||||
返回格式:
|
||||
{
|
||||
"patterns": ["*.gov", "*.edu", "10.0.0.0/8"]
|
||||
}
|
||||
"""
|
||||
rules = self.blacklist_service.get_global_rules()
|
||||
patterns = list(rules.values_list('pattern', flat=True))
|
||||
return success_response(data={'patterns': patterns})
|
||||
|
||||
def put(self, request):
|
||||
"""
|
||||
全量替换全局黑名单规则
|
||||
|
||||
请求格式:
|
||||
{
|
||||
"patterns": ["*.gov", "*.edu", "10.0.0.0/8"]
|
||||
}
|
||||
|
||||
或者空数组清空所有规则:
|
||||
{
|
||||
"patterns": []
|
||||
}
|
||||
"""
|
||||
patterns = request.data.get('patterns', [])
|
||||
|
||||
# 兼容字符串输入(换行分隔)
|
||||
if isinstance(patterns, str):
|
||||
patterns = [p for p in patterns.split('\n') if p.strip()]
|
||||
|
||||
if not isinstance(patterns, list):
|
||||
return error_response(
|
||||
code='VALIDATION_ERROR',
|
||||
message='patterns 必须是数组'
|
||||
)
|
||||
|
||||
# 调用 Service 层全量替换
|
||||
result = self.blacklist_service.replace_global_rules(patterns)
|
||||
|
||||
return success_response(data=result)
|
||||
24
backend/apps/common/views/health_views.py
Normal file
24
backend/apps/common/views/health_views.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
健康检查视图
|
||||
|
||||
提供 Docker 健康检查端点,无需认证。
|
||||
"""
|
||||
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.permissions import AllowAny
|
||||
|
||||
|
||||
class HealthCheckView(APIView):
|
||||
"""
|
||||
健康检查端点
|
||||
|
||||
GET /api/health/
|
||||
|
||||
返回服务状态,用于 Docker 健康检查。
|
||||
此端点无需认证。
|
||||
"""
|
||||
permission_classes = [AllowAny]
|
||||
|
||||
def get(self, request):
|
||||
return Response({'status': 'ok'})
|
||||
@@ -9,16 +9,57 @@ import logging
|
||||
from django.utils.decorators import method_decorator
|
||||
from django.views.decorators.csrf import csrf_exempt
|
||||
from rest_framework import status
|
||||
from rest_framework.permissions import AllowAny
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.views import APIView
|
||||
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
from apps.common.services.system_log_service import SystemLogService
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@method_decorator(csrf_exempt, name="dispatch")
|
||||
class SystemLogFilesView(APIView):
|
||||
"""
|
||||
日志文件列表 API 视图
|
||||
|
||||
GET /api/system/logs/files/
|
||||
获取所有可用的日志文件列表
|
||||
|
||||
Response:
|
||||
{
|
||||
"files": [
|
||||
{
|
||||
"filename": "xingrin.log",
|
||||
"category": "system",
|
||||
"size": 1048576,
|
||||
"modifiedAt": "2025-01-15T10:30:00+00:00"
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = SystemLogService()
|
||||
|
||||
def get(self, request):
|
||||
"""获取日志文件列表"""
|
||||
try:
|
||||
files = self.service.get_log_files()
|
||||
return success_response(data={"files": files})
|
||||
except Exception:
|
||||
logger.exception("获取日志文件列表失败")
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to get log files',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
|
||||
@method_decorator(csrf_exempt, name="dispatch")
|
||||
class SystemLogsView(APIView):
|
||||
"""
|
||||
@@ -28,21 +69,14 @@ class SystemLogsView(APIView):
|
||||
获取系统日志内容
|
||||
|
||||
Query Parameters:
|
||||
file (str, optional): 日志文件名,默认 xingrin.log
|
||||
lines (int, optional): 返回的日志行数,默认 200,最大 10000
|
||||
|
||||
Response:
|
||||
{
|
||||
"content": "日志内容字符串..."
|
||||
}
|
||||
|
||||
Note:
|
||||
- 当前为开发阶段,暂时允许匿名访问
|
||||
- 生产环境应添加管理员权限验证
|
||||
"""
|
||||
|
||||
# TODO: 生产环境应改为 IsAdminUser 权限
|
||||
authentication_classes = []
|
||||
permission_classes = [AllowAny]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
@@ -52,18 +86,33 @@ class SystemLogsView(APIView):
|
||||
"""
|
||||
获取系统日志
|
||||
|
||||
支持通过 lines 参数控制返回行数,用于前端分页或实时刷新场景。
|
||||
支持通过 file 和 lines 参数控制返回内容。
|
||||
"""
|
||||
try:
|
||||
# 解析 lines 参数
|
||||
# 解析参数
|
||||
filename = request.query_params.get("file")
|
||||
lines_raw = request.query_params.get("lines")
|
||||
lines = int(lines_raw) if lines_raw is not None else None
|
||||
|
||||
# 调用服务获取日志内容
|
||||
content = self.service.get_logs_content(lines=lines)
|
||||
return Response({"content": content})
|
||||
except ValueError:
|
||||
return Response({"error": "lines 参数必须是整数"}, status=status.HTTP_400_BAD_REQUEST)
|
||||
content = self.service.get_logs_content(filename=filename, lines=lines)
|
||||
return success_response(data={"content": content})
|
||||
except ValueError as e:
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message=str(e) if 'file' in str(e).lower() else 'lines must be an integer',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message=str(e),
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("获取系统日志失败")
|
||||
return Response({"error": "获取系统日志失败"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to get system logs',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
136
backend/apps/common/views/version_views.py
Normal file
136
backend/apps/common/views/version_views.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
系统版本相关视图
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from rest_framework.request import Request
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.views import APIView
|
||||
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
from apps.common.response_helpers import error_response, success_response
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# GitHub 仓库信息
|
||||
GITHUB_REPO = "yyhuni/xingrin"
|
||||
GITHUB_API_URL = f"https://api.github.com/repos/{GITHUB_REPO}/releases/latest"
|
||||
GITHUB_RELEASES_URL = f"https://github.com/{GITHUB_REPO}/releases"
|
||||
|
||||
|
||||
def get_current_version() -> str:
|
||||
"""读取当前版本号"""
|
||||
import os
|
||||
|
||||
# 方式1:从环境变量读取(Docker 容器中推荐)
|
||||
version = os.environ.get('IMAGE_TAG', '')
|
||||
if version:
|
||||
return version
|
||||
|
||||
# 方式2:从文件读取(开发环境)
|
||||
possible_paths = [
|
||||
Path('/app/VERSION'),
|
||||
Path(__file__).parent.parent.parent.parent.parent / 'VERSION',
|
||||
]
|
||||
|
||||
for path in possible_paths:
|
||||
try:
|
||||
return path.read_text(encoding='utf-8').strip()
|
||||
except (FileNotFoundError, OSError):
|
||||
continue
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def compare_versions(current: str, latest: str) -> bool:
|
||||
"""
|
||||
比较版本号,判断是否有更新
|
||||
|
||||
Returns:
|
||||
True 表示有更新可用
|
||||
"""
|
||||
def parse_version(v: str) -> tuple:
|
||||
v = v.lstrip('v')
|
||||
parts = v.split('.')
|
||||
result = []
|
||||
for part in parts:
|
||||
if '-' in part:
|
||||
num, _ = part.split('-', 1)
|
||||
result.append(int(num))
|
||||
else:
|
||||
result.append(int(part))
|
||||
return tuple(result)
|
||||
|
||||
try:
|
||||
return parse_version(latest) > parse_version(current)
|
||||
except (ValueError, AttributeError):
|
||||
return False
|
||||
|
||||
|
||||
class VersionView(APIView):
|
||||
"""获取当前系统版本"""
|
||||
|
||||
def get(self, _request: Request) -> Response:
|
||||
"""获取当前版本信息"""
|
||||
return success_response(data={
|
||||
'version': get_current_version(),
|
||||
'github_repo': GITHUB_REPO,
|
||||
})
|
||||
|
||||
|
||||
class CheckUpdateView(APIView):
|
||||
"""检查系统更新"""
|
||||
|
||||
def get(self, _request: Request) -> Response:
|
||||
"""
|
||||
检查是否有新版本
|
||||
|
||||
Returns:
|
||||
- current_version: 当前版本
|
||||
- latest_version: 最新版本
|
||||
- has_update: 是否有更新
|
||||
- release_url: 发布页面 URL
|
||||
- release_notes: 更新说明(如果有)
|
||||
"""
|
||||
current_version = get_current_version()
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
GITHUB_API_URL,
|
||||
headers={'Accept': 'application/vnd.github.v3+json'},
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if response.status_code == 404:
|
||||
return success_response(data={
|
||||
'current_version': current_version,
|
||||
'latest_version': current_version,
|
||||
'has_update': False,
|
||||
'release_url': GITHUB_RELEASES_URL,
|
||||
'release_notes': None,
|
||||
})
|
||||
|
||||
response.raise_for_status()
|
||||
release_data = response.json()
|
||||
|
||||
latest_version = release_data.get('tag_name', current_version)
|
||||
has_update = compare_versions(current_version, latest_version)
|
||||
|
||||
return success_response(data={
|
||||
'current_version': current_version,
|
||||
'latest_version': latest_version,
|
||||
'has_update': has_update,
|
||||
'release_url': release_data.get('html_url', GITHUB_RELEASES_URL),
|
||||
'release_notes': release_data.get('body'),
|
||||
'published_at': release_data.get('published_at'),
|
||||
})
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.warning("检查更新失败: %s", e)
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message="无法连接到 GitHub,请稍后重试",
|
||||
)
|
||||
44
backend/apps/common/websocket_auth.py
Normal file
44
backend/apps/common/websocket_auth.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
WebSocket 认证基类
|
||||
|
||||
提供需要认证的 WebSocket Consumer 基类
|
||||
"""
|
||||
|
||||
import logging
|
||||
from channels.generic.websocket import AsyncWebsocketConsumer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AuthenticatedWebsocketConsumer(AsyncWebsocketConsumer):
|
||||
"""
|
||||
需要认证的 WebSocket Consumer 基类
|
||||
|
||||
子类应该重写 on_connect() 方法实现具体的连接逻辑
|
||||
"""
|
||||
|
||||
async def connect(self):
|
||||
"""
|
||||
连接时验证用户认证状态
|
||||
|
||||
未认证时使用 close(code=4001) 拒绝连接
|
||||
"""
|
||||
user = self.scope.get('user')
|
||||
|
||||
if not user or not user.is_authenticated:
|
||||
logger.warning(
|
||||
f"WebSocket 连接被拒绝:用户未认证 - Path: {self.scope.get('path')}"
|
||||
)
|
||||
await self.close(code=4001)
|
||||
return
|
||||
|
||||
# 调用子类的连接逻辑
|
||||
await self.on_connect()
|
||||
|
||||
async def on_connect(self):
|
||||
"""
|
||||
子类实现具体的连接逻辑
|
||||
|
||||
默认实现:接受连接
|
||||
"""
|
||||
await self.accept()
|
||||
@@ -6,17 +6,17 @@ import json
|
||||
import logging
|
||||
import asyncio
|
||||
import os
|
||||
from channels.generic.websocket import AsyncWebsocketConsumer
|
||||
from asgiref.sync import sync_to_async
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from apps.common.websocket_auth import AuthenticatedWebsocketConsumer
|
||||
from apps.engine.services import WorkerService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WorkerDeployConsumer(AsyncWebsocketConsumer):
|
||||
class WorkerDeployConsumer(AuthenticatedWebsocketConsumer):
|
||||
"""
|
||||
Worker 交互式终端 WebSocket Consumer
|
||||
|
||||
@@ -31,8 +31,8 @@ class WorkerDeployConsumer(AsyncWebsocketConsumer):
|
||||
self.read_task = None
|
||||
self.worker_service = WorkerService()
|
||||
|
||||
async def connect(self):
|
||||
"""连接时加入对应 Worker 的组并自动建立 SSH 连接"""
|
||||
async def on_connect(self):
|
||||
"""连接时加入对应 Worker 的组并自动建立 SSH 连接(已通过认证)"""
|
||||
self.worker_id = self.scope['url_route']['kwargs']['worker_id']
|
||||
self.group_name = f'worker_deploy_{self.worker_id}'
|
||||
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
初始化默认扫描引擎
|
||||
|
||||
用法:
|
||||
python manage.py init_default_engine # 只创建不存在的引擎(不覆盖已有)
|
||||
python manage.py init_default_engine --force # 强制覆盖所有引擎配置
|
||||
python manage.py init_default_engine # 只创建不存在的引擎(不覆盖已有)
|
||||
python manage.py init_default_engine --force # 强制覆盖所有引擎配置
|
||||
python manage.py init_default_engine --force-sub # 只覆盖子引擎,保留 full scan
|
||||
|
||||
cd /root/my-vulun-scan/docker
|
||||
docker compose exec server python backend/manage.py init_default_engine --force
|
||||
@@ -12,12 +13,14 @@
|
||||
- 读取 engine_config_example.yaml 作为默认配置
|
||||
- 创建 full scan(默认引擎)+ 各扫描类型的子引擎
|
||||
- 默认不覆盖已有配置,加 --force 才会覆盖
|
||||
- 加 --force-sub 只覆盖子引擎配置,保留用户自定义的 full scan
|
||||
"""
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from ruamel.yaml import YAML
|
||||
|
||||
from apps.engine.models import ScanEngine
|
||||
|
||||
@@ -29,11 +32,18 @@ class Command(BaseCommand):
|
||||
parser.add_argument(
|
||||
'--force',
|
||||
action='store_true',
|
||||
help='强制覆盖已有的引擎配置',
|
||||
help='强制覆盖已有的引擎配置(包括 full scan 和子引擎)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--force-sub',
|
||||
action='store_true',
|
||||
help='只覆盖子引擎配置,保留 full scan(升级时使用)',
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
force = options.get('force', False)
|
||||
force_sub = options.get('force_sub', False)
|
||||
|
||||
# 读取默认配置文件
|
||||
config_path = Path(__file__).resolve().parent.parent.parent.parent / 'scan' / 'configs' / 'engine_config_example.yaml'
|
||||
|
||||
@@ -44,10 +54,12 @@ class Command(BaseCommand):
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
default_config = f.read()
|
||||
|
||||
# 解析 YAML 为字典,后续用于生成子引擎配置
|
||||
# 使用 ruamel.yaml 解析,保留注释
|
||||
yaml_parser = YAML()
|
||||
yaml_parser.preserve_quotes = True
|
||||
try:
|
||||
config_dict = yaml.safe_load(default_config) or {}
|
||||
except yaml.YAMLError as e:
|
||||
config_dict = yaml_parser.load(default_config) or {}
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f'引擎配置 YAML 解析失败: {e}'))
|
||||
return
|
||||
|
||||
@@ -83,30 +95,35 @@ class Command(BaseCommand):
|
||||
if scan_type != 'subdomain_discovery' and 'tools' not in scan_cfg:
|
||||
continue
|
||||
|
||||
# 构造只包含当前扫描类型配置的 YAML
|
||||
# 构造只包含当前扫描类型配置的 YAML(保留注释)
|
||||
single_config = {scan_type: scan_cfg}
|
||||
try:
|
||||
single_yaml = yaml.safe_dump(
|
||||
single_config,
|
||||
sort_keys=False,
|
||||
allow_unicode=True,
|
||||
)
|
||||
except yaml.YAMLError as e:
|
||||
stream = StringIO()
|
||||
yaml_parser.dump(single_config, stream)
|
||||
single_yaml = stream.getvalue()
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f'生成子引擎 {scan_type} 配置失败: {e}'))
|
||||
continue
|
||||
|
||||
engine_name = f"{scan_type}"
|
||||
sub_engine = ScanEngine.objects.filter(name=engine_name).first()
|
||||
if sub_engine:
|
||||
if force:
|
||||
# force 或 force_sub 都会覆盖子引擎
|
||||
if force or force_sub:
|
||||
sub_engine.configuration = single_yaml
|
||||
sub_engine.save()
|
||||
self.stdout.write(self.style.SUCCESS(f' ✓ 子引擎 {engine_name} 配置已更新 (ID: {sub_engine.id})'))
|
||||
self.stdout.write(self.style.SUCCESS(
|
||||
f' ✓ 子引擎 {engine_name} 配置已更新 (ID: {sub_engine.id})'
|
||||
))
|
||||
else:
|
||||
self.stdout.write(self.style.WARNING(f' ⊘ {engine_name} 已存在,跳过(使用 --force 覆盖)'))
|
||||
self.stdout.write(self.style.WARNING(
|
||||
f' ⊘ {engine_name} 已存在,跳过(使用 --force 覆盖)'
|
||||
))
|
||||
else:
|
||||
sub_engine = ScanEngine.objects.create(
|
||||
name=engine_name,
|
||||
configuration=single_yaml,
|
||||
)
|
||||
self.stdout.write(self.style.SUCCESS(f' ✓ 子引擎 {engine_name} 已创建 (ID: {sub_engine.id})'))
|
||||
self.stdout.write(self.style.SUCCESS(
|
||||
f' ✓ 子引擎 {engine_name} 已创建 (ID: {sub_engine.id})'
|
||||
))
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
- EHole 指纹: ehole.json -> 导入到数据库
|
||||
- Goby 指纹: goby.json -> 导入到数据库
|
||||
- Wappalyzer 指纹: wappalyzer.json -> 导入到数据库
|
||||
- Fingers 指纹: fingers_http.json -> 导入到数据库
|
||||
- FingerPrintHub 指纹: fingerprinthub_web.json -> 导入到数据库
|
||||
- ARL 指纹: ARL.yaml -> 导入到数据库
|
||||
|
||||
可重复执行:如果数据库已有数据则跳过,只在空库时导入。
|
||||
"""
|
||||
@@ -11,14 +14,25 @@ import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from apps.engine.models import EholeFingerprint, GobyFingerprint, WappalyzerFingerprint
|
||||
from apps.engine.models import (
|
||||
EholeFingerprint,
|
||||
GobyFingerprint,
|
||||
WappalyzerFingerprint,
|
||||
FingersFingerprint,
|
||||
FingerPrintHubFingerprint,
|
||||
ARLFingerprint,
|
||||
)
|
||||
from apps.engine.services.fingerprints import (
|
||||
EholeFingerprintService,
|
||||
GobyFingerprintService,
|
||||
WappalyzerFingerprintService,
|
||||
FingersFingerprintService,
|
||||
FingerPrintHubFingerprintService,
|
||||
ARLFingerprintService,
|
||||
)
|
||||
|
||||
|
||||
@@ -33,6 +47,7 @@ DEFAULT_FINGERPRINTS = [
|
||||
"model": EholeFingerprint,
|
||||
"service": EholeFingerprintService,
|
||||
"data_key": "fingerprint", # JSON 中指纹数组的 key
|
||||
"file_format": "json",
|
||||
},
|
||||
{
|
||||
"type": "goby",
|
||||
@@ -40,6 +55,7 @@ DEFAULT_FINGERPRINTS = [
|
||||
"model": GobyFingerprint,
|
||||
"service": GobyFingerprintService,
|
||||
"data_key": None, # Goby 是数组格式,直接使用整个 JSON
|
||||
"file_format": "json",
|
||||
},
|
||||
{
|
||||
"type": "wappalyzer",
|
||||
@@ -47,6 +63,31 @@ DEFAULT_FINGERPRINTS = [
|
||||
"model": WappalyzerFingerprint,
|
||||
"service": WappalyzerFingerprintService,
|
||||
"data_key": "apps", # Wappalyzer 使用 apps 对象
|
||||
"file_format": "json",
|
||||
},
|
||||
{
|
||||
"type": "fingers",
|
||||
"filename": "fingers_http.json",
|
||||
"model": FingersFingerprint,
|
||||
"service": FingersFingerprintService,
|
||||
"data_key": None, # Fingers 是数组格式
|
||||
"file_format": "json",
|
||||
},
|
||||
{
|
||||
"type": "fingerprinthub",
|
||||
"filename": "fingerprinthub_web.json",
|
||||
"model": FingerPrintHubFingerprint,
|
||||
"service": FingerPrintHubFingerprintService,
|
||||
"data_key": None, # FingerPrintHub 是数组格式
|
||||
"file_format": "json",
|
||||
},
|
||||
{
|
||||
"type": "arl",
|
||||
"filename": "ARL.yaml",
|
||||
"model": ARLFingerprint,
|
||||
"service": ARLFingerprintService,
|
||||
"data_key": None, # ARL 是 YAML 数组格式
|
||||
"file_format": "yaml",
|
||||
},
|
||||
]
|
||||
|
||||
@@ -68,6 +109,7 @@ class Command(BaseCommand):
|
||||
model = item["model"]
|
||||
service_class = item["service"]
|
||||
data_key = item["data_key"]
|
||||
file_format = item.get("file_format", "json")
|
||||
|
||||
# 检查数据库是否已有数据
|
||||
existing_count = model.objects.count()
|
||||
@@ -87,11 +129,14 @@ class Command(BaseCommand):
|
||||
failed += 1
|
||||
continue
|
||||
|
||||
# 读取并解析 JSON
|
||||
# 读取并解析文件(支持 JSON 和 YAML)
|
||||
try:
|
||||
with open(src_path, "r", encoding="utf-8") as f:
|
||||
json_data = json.load(f)
|
||||
except (json.JSONDecodeError, OSError) as exc:
|
||||
if file_format == "yaml":
|
||||
file_data = yaml.safe_load(f)
|
||||
else:
|
||||
file_data = json.load(f)
|
||||
except (json.JSONDecodeError, yaml.YAMLError, OSError) as exc:
|
||||
self.stdout.write(self.style.ERROR(
|
||||
f"[{fp_type}] 读取指纹文件失败: {exc}"
|
||||
))
|
||||
@@ -99,7 +144,7 @@ class Command(BaseCommand):
|
||||
continue
|
||||
|
||||
# 提取指纹数据(根据不同格式处理)
|
||||
fingerprints = self._extract_fingerprints(json_data, data_key, fp_type)
|
||||
fingerprints = self._extract_fingerprints(file_data, data_key, fp_type)
|
||||
if not fingerprints:
|
||||
self.stdout.write(self.style.WARNING(
|
||||
f"[{fp_type}] 指纹文件中没有有效数据,跳过"
|
||||
|
||||
213
backend/apps/engine/migrations/0001_initial.py
Normal file
213
backend/apps/engine/migrations/0001_initial.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Generated by Django 5.2.7 on 2026-01-06 00:55
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='NucleiTemplateRepo',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='仓库名称,用于前端展示和配置引用', max_length=200, unique=True)),
|
||||
('repo_url', models.CharField(help_text='Git 仓库地址', max_length=500)),
|
||||
('local_path', models.CharField(blank=True, default='', help_text='本地工作目录绝对路径', max_length=500)),
|
||||
('commit_hash', models.CharField(blank=True, default='', help_text='最后同步的 Git commit hash,用于 Worker 版本校验', max_length=40)),
|
||||
('last_synced_at', models.DateTimeField(blank=True, help_text='最后一次成功同步时间', null=True)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Nuclei 模板仓库',
|
||||
'verbose_name_plural': 'Nuclei 模板仓库',
|
||||
'db_table': 'nuclei_template_repo',
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='ARLFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='指纹名称', max_length=300, unique=True)),
|
||||
('rule', models.TextField(help_text='匹配规则表达式')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'ARL 指纹',
|
||||
'verbose_name_plural': 'ARL 指纹',
|
||||
'db_table': 'arl_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['name'], name='arl_fingerp_name_c3a305_idx'), models.Index(fields=['-created_at'], name='arl_fingerp_created_ed1060_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='EholeFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('cms', models.CharField(help_text='产品/CMS名称', max_length=200)),
|
||||
('method', models.CharField(default='keyword', help_text='匹配方式', max_length=200)),
|
||||
('location', models.CharField(default='body', help_text='匹配位置', max_length=200)),
|
||||
('keyword', models.JSONField(default=list, help_text='关键词列表')),
|
||||
('is_important', models.BooleanField(default=False, help_text='是否重点资产')),
|
||||
('type', models.CharField(blank=True, default='-', help_text='分类', max_length=100)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'EHole 指纹',
|
||||
'verbose_name_plural': 'EHole 指纹',
|
||||
'db_table': 'ehole_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['cms'], name='ehole_finge_cms_72ca2c_idx'), models.Index(fields=['method'], name='ehole_finge_method_17f0db_idx'), models.Index(fields=['location'], name='ehole_finge_locatio_7bb82b_idx'), models.Index(fields=['type'], name='ehole_finge_type_ca2bce_idx'), models.Index(fields=['is_important'], name='ehole_finge_is_impo_d56e64_idx'), models.Index(fields=['-created_at'], name='ehole_finge_created_d862b0_idx')],
|
||||
'constraints': [models.UniqueConstraint(fields=('cms', 'method', 'location'), name='unique_ehole_fingerprint')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='FingerPrintHubFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('fp_id', models.CharField(help_text='指纹ID', max_length=200, unique=True)),
|
||||
('name', models.CharField(help_text='指纹名称', max_length=300)),
|
||||
('author', models.CharField(blank=True, default='', help_text='作者', max_length=200)),
|
||||
('tags', models.CharField(blank=True, default='', help_text='标签', max_length=500)),
|
||||
('severity', models.CharField(blank=True, default='info', help_text='严重程度', max_length=50)),
|
||||
('metadata', models.JSONField(blank=True, default=dict, help_text='元数据')),
|
||||
('http', models.JSONField(default=list, help_text='HTTP 匹配规则')),
|
||||
('source_file', models.CharField(blank=True, default='', help_text='来源文件', max_length=500)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'FingerPrintHub 指纹',
|
||||
'verbose_name_plural': 'FingerPrintHub 指纹',
|
||||
'db_table': 'fingerprinthub_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['fp_id'], name='fingerprint_fp_id_df467f_idx'), models.Index(fields=['name'], name='fingerprint_name_95b6fb_idx'), models.Index(fields=['author'], name='fingerprint_author_80f54b_idx'), models.Index(fields=['severity'], name='fingerprint_severit_f70422_idx'), models.Index(fields=['-created_at'], name='fingerprint_created_bec16c_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='FingersFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='指纹名称', max_length=300, unique=True)),
|
||||
('link', models.URLField(blank=True, default='', help_text='相关链接', max_length=500)),
|
||||
('rule', models.JSONField(default=list, help_text='匹配规则数组')),
|
||||
('tag', models.JSONField(default=list, help_text='标签数组')),
|
||||
('focus', models.BooleanField(default=False, help_text='是否重点关注')),
|
||||
('default_port', models.JSONField(blank=True, default=list, help_text='默认端口数组')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Fingers 指纹',
|
||||
'verbose_name_plural': 'Fingers 指纹',
|
||||
'db_table': 'fingers_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['name'], name='fingers_fin_name_952de0_idx'), models.Index(fields=['link'], name='fingers_fin_link_4c6b7f_idx'), models.Index(fields=['focus'], name='fingers_fin_focus_568c7f_idx'), models.Index(fields=['-created_at'], name='fingers_fin_created_46fc91_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='GobyFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='产品名称', max_length=300, unique=True)),
|
||||
('logic', models.CharField(help_text='逻辑表达式', max_length=500)),
|
||||
('rule', models.JSONField(default=list, help_text='规则数组')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Goby 指纹',
|
||||
'verbose_name_plural': 'Goby 指纹',
|
||||
'db_table': 'goby_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['name'], name='goby_finger_name_82084c_idx'), models.Index(fields=['logic'], name='goby_finger_logic_a63226_idx'), models.Index(fields=['-created_at'], name='goby_finger_created_50e000_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='ScanEngine',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('name', models.CharField(help_text='引擎名称', max_length=200, unique=True)),
|
||||
('configuration', models.CharField(blank=True, default='', help_text='引擎配置,yaml 格式', max_length=10000)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '扫描引擎',
|
||||
'verbose_name_plural': '扫描引擎',
|
||||
'db_table': 'scan_engine',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='scan_engine_created_da4870_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='WappalyzerFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='应用名称', max_length=300, unique=True)),
|
||||
('cats', models.JSONField(default=list, help_text='分类 ID 数组')),
|
||||
('cookies', models.JSONField(blank=True, default=dict, help_text='Cookie 检测规则')),
|
||||
('headers', models.JSONField(blank=True, default=dict, help_text='HTTP Header 检测规则')),
|
||||
('script_src', models.JSONField(blank=True, default=list, help_text='脚本 URL 正则数组')),
|
||||
('js', models.JSONField(blank=True, default=list, help_text='JavaScript 变量检测规则')),
|
||||
('implies', models.JSONField(blank=True, default=list, help_text='依赖关系数组')),
|
||||
('meta', models.JSONField(blank=True, default=dict, help_text='HTML meta 标签检测规则')),
|
||||
('html', models.JSONField(blank=True, default=list, help_text='HTML 内容正则数组')),
|
||||
('description', models.TextField(blank=True, default='', help_text='应用描述')),
|
||||
('website', models.URLField(blank=True, default='', help_text='官网链接', max_length=500)),
|
||||
('cpe', models.CharField(blank=True, default='', help_text='CPE 标识符', max_length=300)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Wappalyzer 指纹',
|
||||
'verbose_name_plural': 'Wappalyzer 指纹',
|
||||
'db_table': 'wappalyzer_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['name'], name='wappalyzer__name_63c669_idx'), models.Index(fields=['website'], name='wappalyzer__website_88de1c_idx'), models.Index(fields=['cpe'], name='wappalyzer__cpe_30c761_idx'), models.Index(fields=['-created_at'], name='wappalyzer__created_8e6c21_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Wordlist',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('name', models.CharField(help_text='字典名称,唯一', max_length=200, unique=True)),
|
||||
('description', models.CharField(blank=True, default='', help_text='字典描述', max_length=200)),
|
||||
('file_path', models.CharField(help_text='后端保存的字典文件绝对路径', max_length=500)),
|
||||
('file_size', models.BigIntegerField(default=0, help_text='文件大小(字节)')),
|
||||
('line_count', models.IntegerField(default=0, help_text='字典行数')),
|
||||
('file_hash', models.CharField(blank=True, default='', help_text='文件 SHA-256 哈希,用于缓存校验', max_length=64)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '字典文件',
|
||||
'verbose_name_plural': '字典文件',
|
||||
'db_table': 'wordlist',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='wordlist_created_4afb02_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='WorkerNode',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='节点名称', max_length=100)),
|
||||
('ip_address', models.GenericIPAddressField(help_text='IP 地址(本地节点为 127.0.0.1)')),
|
||||
('ssh_port', models.IntegerField(default=22, help_text='SSH 端口')),
|
||||
('username', models.CharField(default='root', help_text='SSH 用户名', max_length=50)),
|
||||
('password', models.CharField(blank=True, default='', help_text='SSH 密码', max_length=200)),
|
||||
('is_local', models.BooleanField(default=False, help_text='是否为本地节点(Docker 容器内)')),
|
||||
('status', models.CharField(choices=[('pending', '待部署'), ('deploying', '部署中'), ('online', '在线'), ('offline', '离线'), ('updating', '更新中'), ('outdated', '版本过低')], default='pending', help_text='状态: pending/deploying/online/offline', max_length=20)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('updated_at', models.DateTimeField(auto_now=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Worker 节点',
|
||||
'db_table': 'worker_node',
|
||||
'ordering': ['-created_at'],
|
||||
'constraints': [models.UniqueConstraint(condition=models.Q(('is_local', False)), fields=('ip_address',), name='unique_remote_worker_ip'), models.UniqueConstraint(fields=('name',), name='unique_worker_name')],
|
||||
},
|
||||
),
|
||||
]
|
||||
@@ -4,7 +4,14 @@
|
||||
"""
|
||||
|
||||
from .engine import WorkerNode, ScanEngine, Wordlist, NucleiTemplateRepo
|
||||
from .fingerprints import EholeFingerprint, GobyFingerprint, WappalyzerFingerprint
|
||||
from .fingerprints import (
|
||||
EholeFingerprint,
|
||||
GobyFingerprint,
|
||||
WappalyzerFingerprint,
|
||||
FingersFingerprint,
|
||||
FingerPrintHubFingerprint,
|
||||
ARLFingerprint,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# 核心 Models
|
||||
@@ -16,4 +23,7 @@ __all__ = [
|
||||
"EholeFingerprint",
|
||||
"GobyFingerprint",
|
||||
"WappalyzerFingerprint",
|
||||
"FingersFingerprint",
|
||||
"FingerPrintHubFingerprint",
|
||||
"ARLFingerprint",
|
||||
]
|
||||
|
||||
@@ -106,3 +106,90 @@ class WappalyzerFingerprint(models.Model):
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.name}"
|
||||
|
||||
|
||||
class FingersFingerprint(models.Model):
|
||||
"""Fingers 格式指纹规则 (fingers_http.json)
|
||||
|
||||
使用正则表达式和标签进行匹配,支持 favicon hash、header、body 等多种检测方式
|
||||
"""
|
||||
|
||||
name = models.CharField(max_length=300, unique=True, help_text='指纹名称')
|
||||
link = models.URLField(max_length=500, blank=True, default='', help_text='相关链接')
|
||||
rule = models.JSONField(default=list, help_text='匹配规则数组')
|
||||
tag = models.JSONField(default=list, help_text='标签数组')
|
||||
focus = models.BooleanField(default=False, help_text='是否重点关注')
|
||||
default_port = models.JSONField(default=list, blank=True, help_text='默认端口数组')
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = 'fingers_fingerprint'
|
||||
verbose_name = 'Fingers 指纹'
|
||||
verbose_name_plural = 'Fingers 指纹'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['name']),
|
||||
models.Index(fields=['link']),
|
||||
models.Index(fields=['focus']),
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.name}"
|
||||
|
||||
|
||||
class FingerPrintHubFingerprint(models.Model):
|
||||
"""FingerPrintHub 格式指纹规则 (fingerprinthub_web.json)
|
||||
|
||||
基于 nuclei 模板格式,使用 HTTP 请求和响应特征进行匹配
|
||||
"""
|
||||
|
||||
fp_id = models.CharField(max_length=200, unique=True, help_text='指纹ID')
|
||||
name = models.CharField(max_length=300, help_text='指纹名称')
|
||||
author = models.CharField(max_length=200, blank=True, default='', help_text='作者')
|
||||
tags = models.CharField(max_length=500, blank=True, default='', help_text='标签')
|
||||
severity = models.CharField(max_length=50, blank=True, default='info', help_text='严重程度')
|
||||
metadata = models.JSONField(default=dict, blank=True, help_text='元数据')
|
||||
http = models.JSONField(default=list, help_text='HTTP 匹配规则')
|
||||
source_file = models.CharField(max_length=500, blank=True, default='', help_text='来源文件')
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = 'fingerprinthub_fingerprint'
|
||||
verbose_name = 'FingerPrintHub 指纹'
|
||||
verbose_name_plural = 'FingerPrintHub 指纹'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['fp_id']),
|
||||
models.Index(fields=['name']),
|
||||
models.Index(fields=['author']),
|
||||
models.Index(fields=['severity']),
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.name} ({self.fp_id})"
|
||||
|
||||
|
||||
class ARLFingerprint(models.Model):
|
||||
"""ARL 格式指纹规则 (ARL.yaml)
|
||||
|
||||
使用简单的 name + rule 表达式格式
|
||||
"""
|
||||
|
||||
name = models.CharField(max_length=300, unique=True, help_text='指纹名称')
|
||||
rule = models.TextField(help_text='匹配规则表达式')
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = 'arl_fingerprint'
|
||||
verbose_name = 'ARL 指纹'
|
||||
verbose_name_plural = 'ARL 指纹'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['name']),
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.name}"
|
||||
|
||||
@@ -88,6 +88,8 @@ def _register_scheduled_jobs(scheduler: BackgroundScheduler):
|
||||
replace_existing=True,
|
||||
)
|
||||
logger.info(" - 已注册: 扫描结果清理(每天 03:00)")
|
||||
|
||||
# 注意:搜索物化视图刷新已迁移到 pg_ivm 增量维护,无需定时任务
|
||||
|
||||
|
||||
def _trigger_scheduled_scans():
|
||||
|
||||
@@ -6,9 +6,15 @@
|
||||
from .ehole import EholeFingerprintSerializer
|
||||
from .goby import GobyFingerprintSerializer
|
||||
from .wappalyzer import WappalyzerFingerprintSerializer
|
||||
from .fingers import FingersFingerprintSerializer
|
||||
from .fingerprinthub import FingerPrintHubFingerprintSerializer
|
||||
from .arl import ARLFingerprintSerializer
|
||||
|
||||
__all__ = [
|
||||
"EholeFingerprintSerializer",
|
||||
"GobyFingerprintSerializer",
|
||||
"WappalyzerFingerprintSerializer",
|
||||
"FingersFingerprintSerializer",
|
||||
"FingerPrintHubFingerprintSerializer",
|
||||
"ARLFingerprintSerializer",
|
||||
]
|
||||
|
||||
31
backend/apps/engine/serializers/fingerprints/arl.py
Normal file
31
backend/apps/engine/serializers/fingerprints/arl.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""ARL 指纹 Serializer"""
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from apps.engine.models import ARLFingerprint
|
||||
|
||||
|
||||
class ARLFingerprintSerializer(serializers.ModelSerializer):
|
||||
"""ARL 指纹序列化器
|
||||
|
||||
字段映射:
|
||||
- name: 指纹名称 (必填, 唯一)
|
||||
- rule: 匹配规则表达式 (必填)
|
||||
"""
|
||||
|
||||
class Meta:
|
||||
model = ARLFingerprint
|
||||
fields = ['id', 'name', 'rule', 'created_at']
|
||||
read_only_fields = ['id', 'created_at']
|
||||
|
||||
def validate_name(self, value):
|
||||
"""校验 name 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("name 字段不能为空")
|
||||
return value.strip()
|
||||
|
||||
def validate_rule(self, value):
|
||||
"""校验 rule 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("rule 字段不能为空")
|
||||
return value.strip()
|
||||
@@ -0,0 +1,50 @@
|
||||
"""FingerPrintHub 指纹 Serializer"""
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from apps.engine.models import FingerPrintHubFingerprint
|
||||
|
||||
|
||||
class FingerPrintHubFingerprintSerializer(serializers.ModelSerializer):
|
||||
"""FingerPrintHub 指纹序列化器
|
||||
|
||||
字段映射:
|
||||
- fp_id: 指纹ID (必填, 唯一)
|
||||
- name: 指纹名称 (必填)
|
||||
- author: 作者 (可选)
|
||||
- tags: 标签字符串 (可选)
|
||||
- severity: 严重程度 (可选, 默认 'info')
|
||||
- metadata: 元数据 JSON (可选)
|
||||
- http: HTTP 匹配规则数组 (必填)
|
||||
- source_file: 来源文件 (可选)
|
||||
"""
|
||||
|
||||
class Meta:
|
||||
model = FingerPrintHubFingerprint
|
||||
fields = ['id', 'fp_id', 'name', 'author', 'tags', 'severity',
|
||||
'metadata', 'http', 'source_file', 'created_at']
|
||||
read_only_fields = ['id', 'created_at']
|
||||
|
||||
def validate_fp_id(self, value):
|
||||
"""校验 fp_id 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("fp_id 字段不能为空")
|
||||
return value.strip()
|
||||
|
||||
def validate_name(self, value):
|
||||
"""校验 name 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("name 字段不能为空")
|
||||
return value.strip()
|
||||
|
||||
def validate_http(self, value):
|
||||
"""校验 http 字段"""
|
||||
if not isinstance(value, list):
|
||||
raise serializers.ValidationError("http 必须是数组")
|
||||
return value
|
||||
|
||||
def validate_metadata(self, value):
|
||||
"""校验 metadata 字段"""
|
||||
if not isinstance(value, dict):
|
||||
raise serializers.ValidationError("metadata 必须是对象")
|
||||
return value
|
||||
48
backend/apps/engine/serializers/fingerprints/fingers.py
Normal file
48
backend/apps/engine/serializers/fingerprints/fingers.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""Fingers 指纹 Serializer"""
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from apps.engine.models import FingersFingerprint
|
||||
|
||||
|
||||
class FingersFingerprintSerializer(serializers.ModelSerializer):
|
||||
"""Fingers 指纹序列化器
|
||||
|
||||
字段映射:
|
||||
- name: 指纹名称 (必填, 唯一)
|
||||
- link: 相关链接 (可选)
|
||||
- rule: 匹配规则数组 (必填)
|
||||
- tag: 标签数组 (可选)
|
||||
- focus: 是否重点关注 (可选, 默认 False)
|
||||
- default_port: 默认端口数组 (可选)
|
||||
"""
|
||||
|
||||
class Meta:
|
||||
model = FingersFingerprint
|
||||
fields = ['id', 'name', 'link', 'rule', 'tag', 'focus',
|
||||
'default_port', 'created_at']
|
||||
read_only_fields = ['id', 'created_at']
|
||||
|
||||
def validate_name(self, value):
|
||||
"""校验 name 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("name 字段不能为空")
|
||||
return value.strip()
|
||||
|
||||
def validate_rule(self, value):
|
||||
"""校验 rule 字段"""
|
||||
if not isinstance(value, list):
|
||||
raise serializers.ValidationError("rule 必须是数组")
|
||||
return value
|
||||
|
||||
def validate_tag(self, value):
|
||||
"""校验 tag 字段"""
|
||||
if not isinstance(value, list):
|
||||
raise serializers.ValidationError("tag 必须是数组")
|
||||
return value
|
||||
|
||||
def validate_default_port(self, value):
|
||||
"""校验 default_port 字段"""
|
||||
if not isinstance(value, list):
|
||||
raise serializers.ValidationError("default_port 必须是数组")
|
||||
return value
|
||||
@@ -66,6 +66,7 @@ def get_start_agent_script(
|
||||
# 替换变量
|
||||
script = script.replace("{{HEARTBEAT_API_URL}}", heartbeat_api_url or '')
|
||||
script = script.replace("{{WORKER_ID}}", str(worker_id) if worker_id else '')
|
||||
script = script.replace("{{WORKER_API_KEY}}", getattr(settings, 'WORKER_API_KEY', ''))
|
||||
|
||||
# 注入镜像版本配置(确保远程节点使用相同版本)
|
||||
docker_user = getattr(settings, 'DOCKER_USER', 'yyhuni')
|
||||
|
||||
@@ -7,10 +7,16 @@ from .base import BaseFingerprintService
|
||||
from .ehole import EholeFingerprintService
|
||||
from .goby import GobyFingerprintService
|
||||
from .wappalyzer import WappalyzerFingerprintService
|
||||
from .fingers_service import FingersFingerprintService
|
||||
from .fingerprinthub_service import FingerPrintHubFingerprintService
|
||||
from .arl_service import ARLFingerprintService
|
||||
|
||||
__all__ = [
|
||||
"BaseFingerprintService",
|
||||
"EholeFingerprintService",
|
||||
"GobyFingerprintService",
|
||||
"WappalyzerFingerprintService",
|
||||
"FingersFingerprintService",
|
||||
"FingerPrintHubFingerprintService",
|
||||
"ARLFingerprintService",
|
||||
]
|
||||
|
||||
110
backend/apps/engine/services/fingerprints/arl_service.py
Normal file
110
backend/apps/engine/services/fingerprints/arl_service.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""ARL 指纹管理 Service
|
||||
|
||||
实现 ARL 格式指纹的校验、转换和导出逻辑
|
||||
支持 YAML 格式的导入导出
|
||||
"""
|
||||
|
||||
import logging
|
||||
import yaml
|
||||
|
||||
from apps.engine.models import ARLFingerprint
|
||||
from .base import BaseFingerprintService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ARLFingerprintService(BaseFingerprintService):
|
||||
"""ARL 指纹管理服务(继承基类,实现 ARL 特定逻辑)"""
|
||||
|
||||
model = ARLFingerprint
|
||||
|
||||
def validate_fingerprint(self, item: dict) -> bool:
|
||||
"""
|
||||
校验单条 ARL 指纹
|
||||
|
||||
校验规则:
|
||||
- name 字段必须存在且非空
|
||||
- rule 字段必须存在且非空
|
||||
|
||||
Args:
|
||||
item: 单条指纹数据
|
||||
|
||||
Returns:
|
||||
bool: 是否有效
|
||||
"""
|
||||
name = item.get('name', '')
|
||||
rule = item.get('rule', '')
|
||||
return bool(name and str(name).strip()) and bool(rule and str(rule).strip())
|
||||
|
||||
def to_model_data(self, item: dict) -> dict:
|
||||
"""
|
||||
转换 ARL YAML 格式为 Model 字段
|
||||
|
||||
Args:
|
||||
item: 原始 ARL YAML 数据
|
||||
|
||||
Returns:
|
||||
dict: Model 字段数据
|
||||
"""
|
||||
return {
|
||||
'name': str(item.get('name', '')).strip(),
|
||||
'rule': str(item.get('rule', '')).strip(),
|
||||
}
|
||||
|
||||
def get_export_data(self) -> list:
|
||||
"""
|
||||
获取导出数据(ARL 格式 - 数组,用于 YAML 导出)
|
||||
|
||||
Returns:
|
||||
list: ARL 格式的数据(数组格式)
|
||||
[
|
||||
{"name": "...", "rule": "..."},
|
||||
...
|
||||
]
|
||||
"""
|
||||
fingerprints = self.model.objects.all()
|
||||
return [
|
||||
{
|
||||
'name': fp.name,
|
||||
'rule': fp.rule,
|
||||
}
|
||||
for fp in fingerprints
|
||||
]
|
||||
|
||||
def export_to_yaml(self, output_path: str) -> int:
|
||||
"""
|
||||
导出所有指纹到 YAML 文件
|
||||
|
||||
Args:
|
||||
output_path: 输出文件路径
|
||||
|
||||
Returns:
|
||||
int: 导出的指纹数量
|
||||
"""
|
||||
data = self.get_export_data()
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
yaml.dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||||
count = len(data)
|
||||
logger.info("导出 ARL 指纹文件: %s, 数量: %d", output_path, count)
|
||||
return count
|
||||
|
||||
def parse_yaml_import(self, yaml_content: str) -> list:
|
||||
"""
|
||||
解析 YAML 格式的导入内容
|
||||
|
||||
Args:
|
||||
yaml_content: YAML 格式的字符串内容
|
||||
|
||||
Returns:
|
||||
list: 解析后的指纹数据列表
|
||||
|
||||
Raises:
|
||||
ValueError: 当 YAML 格式无效时
|
||||
"""
|
||||
try:
|
||||
data = yaml.safe_load(yaml_content)
|
||||
if not isinstance(data, list):
|
||||
raise ValueError("ARL YAML 文件必须是数组格式")
|
||||
return data
|
||||
except yaml.YAMLError as e:
|
||||
raise ValueError(f"无效的 YAML 格式: {e}")
|
||||
@@ -0,0 +1,110 @@
|
||||
"""FingerPrintHub 指纹管理 Service
|
||||
|
||||
实现 FingerPrintHub 格式指纹的校验、转换和导出逻辑
|
||||
"""
|
||||
|
||||
from apps.engine.models import FingerPrintHubFingerprint
|
||||
from .base import BaseFingerprintService
|
||||
|
||||
|
||||
class FingerPrintHubFingerprintService(BaseFingerprintService):
|
||||
"""FingerPrintHub 指纹管理服务(继承基类,实现 FingerPrintHub 特定逻辑)"""
|
||||
|
||||
model = FingerPrintHubFingerprint
|
||||
|
||||
def validate_fingerprint(self, item: dict) -> bool:
|
||||
"""
|
||||
校验单条 FingerPrintHub 指纹
|
||||
|
||||
校验规则:
|
||||
- id 字段必须存在且非空
|
||||
- info 字段必须存在且包含 name
|
||||
- http 字段必须是数组
|
||||
|
||||
Args:
|
||||
item: 单条指纹数据
|
||||
|
||||
Returns:
|
||||
bool: 是否有效
|
||||
"""
|
||||
fp_id = item.get('id', '')
|
||||
info = item.get('info', {})
|
||||
http = item.get('http')
|
||||
|
||||
if not fp_id or not str(fp_id).strip():
|
||||
return False
|
||||
if not isinstance(info, dict) or not info.get('name'):
|
||||
return False
|
||||
if not isinstance(http, list):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def to_model_data(self, item: dict) -> dict:
|
||||
"""
|
||||
转换 FingerPrintHub JSON 格式为 Model 字段
|
||||
|
||||
字段映射(嵌套结构转扁平):
|
||||
- id (JSON) → fp_id (Model)
|
||||
- info.name (JSON) → name (Model)
|
||||
- info.author (JSON) → author (Model)
|
||||
- info.tags (JSON) → tags (Model)
|
||||
- info.severity (JSON) → severity (Model)
|
||||
- info.metadata (JSON) → metadata (Model)
|
||||
- http (JSON) → http (Model)
|
||||
- _source_file (JSON) → source_file (Model)
|
||||
|
||||
Args:
|
||||
item: 原始 FingerPrintHub JSON 数据
|
||||
|
||||
Returns:
|
||||
dict: Model 字段数据
|
||||
"""
|
||||
info = item.get('info', {})
|
||||
return {
|
||||
'fp_id': str(item.get('id', '')).strip(),
|
||||
'name': str(info.get('name', '')).strip(),
|
||||
'author': info.get('author', ''),
|
||||
'tags': info.get('tags', ''),
|
||||
'severity': info.get('severity', 'info'),
|
||||
'metadata': info.get('metadata', {}),
|
||||
'http': item.get('http', []),
|
||||
'source_file': item.get('_source_file', ''),
|
||||
}
|
||||
|
||||
def get_export_data(self) -> list:
|
||||
"""
|
||||
获取导出数据(FingerPrintHub JSON 格式 - 数组)
|
||||
|
||||
Returns:
|
||||
list: FingerPrintHub 格式的 JSON 数据(数组格式)
|
||||
[
|
||||
{
|
||||
"id": "...",
|
||||
"info": {"name": "...", "author": "...", "tags": "...",
|
||||
"severity": "...", "metadata": {...}},
|
||||
"http": [...],
|
||||
"_source_file": "..."
|
||||
},
|
||||
...
|
||||
]
|
||||
"""
|
||||
fingerprints = self.model.objects.all()
|
||||
data = []
|
||||
for fp in fingerprints:
|
||||
item = {
|
||||
'id': fp.fp_id,
|
||||
'info': {
|
||||
'name': fp.name,
|
||||
'author': fp.author,
|
||||
'tags': fp.tags,
|
||||
'severity': fp.severity,
|
||||
'metadata': fp.metadata,
|
||||
},
|
||||
'http': fp.http,
|
||||
}
|
||||
# 只有当 source_file 非空时才添加该字段
|
||||
if fp.source_file:
|
||||
item['_source_file'] = fp.source_file
|
||||
data.append(item)
|
||||
return data
|
||||
83
backend/apps/engine/services/fingerprints/fingers_service.py
Normal file
83
backend/apps/engine/services/fingerprints/fingers_service.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""Fingers 指纹管理 Service
|
||||
|
||||
实现 Fingers 格式指纹的校验、转换和导出逻辑
|
||||
"""
|
||||
|
||||
from apps.engine.models import FingersFingerprint
|
||||
from .base import BaseFingerprintService
|
||||
|
||||
|
||||
class FingersFingerprintService(BaseFingerprintService):
|
||||
"""Fingers 指纹管理服务(继承基类,实现 Fingers 特定逻辑)"""
|
||||
|
||||
model = FingersFingerprint
|
||||
|
||||
def validate_fingerprint(self, item: dict) -> bool:
|
||||
"""
|
||||
校验单条 Fingers 指纹
|
||||
|
||||
校验规则:
|
||||
- name 字段必须存在且非空
|
||||
- rule 字段必须是数组
|
||||
|
||||
Args:
|
||||
item: 单条指纹数据
|
||||
|
||||
Returns:
|
||||
bool: 是否有效
|
||||
"""
|
||||
name = item.get('name', '')
|
||||
rule = item.get('rule')
|
||||
return bool(name and str(name).strip()) and isinstance(rule, list)
|
||||
|
||||
def to_model_data(self, item: dict) -> dict:
|
||||
"""
|
||||
转换 Fingers JSON 格式为 Model 字段
|
||||
|
||||
字段映射:
|
||||
- default_port (JSON) → default_port (Model)
|
||||
|
||||
Args:
|
||||
item: 原始 Fingers JSON 数据
|
||||
|
||||
Returns:
|
||||
dict: Model 字段数据
|
||||
"""
|
||||
return {
|
||||
'name': str(item.get('name', '')).strip(),
|
||||
'link': item.get('link', ''),
|
||||
'rule': item.get('rule', []),
|
||||
'tag': item.get('tag', []),
|
||||
'focus': item.get('focus', False),
|
||||
'default_port': item.get('default_port', []),
|
||||
}
|
||||
|
||||
def get_export_data(self) -> list:
|
||||
"""
|
||||
获取导出数据(Fingers JSON 格式 - 数组)
|
||||
|
||||
Returns:
|
||||
list: Fingers 格式的 JSON 数据(数组格式)
|
||||
[
|
||||
{"name": "...", "link": "...", "rule": [...], "tag": [...],
|
||||
"focus": false, "default_port": [...]},
|
||||
...
|
||||
]
|
||||
"""
|
||||
fingerprints = self.model.objects.all()
|
||||
data = []
|
||||
for fp in fingerprints:
|
||||
item = {
|
||||
'name': fp.name,
|
||||
'link': fp.link,
|
||||
'rule': fp.rule,
|
||||
'tag': fp.tag,
|
||||
}
|
||||
# 只有当 focus 为 True 时才添加该字段(保持与原始格式一致)
|
||||
if fp.focus:
|
||||
item['focus'] = fp.focus
|
||||
# 只有当 default_port 非空时才添加该字段
|
||||
if fp.default_port:
|
||||
item['default_port'] = fp.default_port
|
||||
data.append(item)
|
||||
return data
|
||||
@@ -16,10 +16,9 @@ class GobyFingerprintService(BaseFingerprintService):
|
||||
"""
|
||||
校验单条 Goby 指纹
|
||||
|
||||
校验规则:
|
||||
- name 字段必须存在且非空
|
||||
- logic 字段必须存在
|
||||
- rule 字段必须是数组
|
||||
支持两种格式:
|
||||
1. 标准格式: {"name": "...", "logic": "...", "rule": [...]}
|
||||
2. JSONL 格式: {"product": "...", "rule": "..."}
|
||||
|
||||
Args:
|
||||
item: 单条指纹数据
|
||||
@@ -27,25 +26,43 @@ class GobyFingerprintService(BaseFingerprintService):
|
||||
Returns:
|
||||
bool: 是否有效
|
||||
"""
|
||||
# 标准格式:name + logic + rule(数组)
|
||||
name = item.get('name', '')
|
||||
logic = item.get('logic', '')
|
||||
rule = item.get('rule')
|
||||
return bool(name and str(name).strip()) and bool(logic) and isinstance(rule, list)
|
||||
if name and item.get('logic') is not None and isinstance(item.get('rule'), list):
|
||||
return bool(str(name).strip())
|
||||
|
||||
# JSONL 格式:product + rule(字符串)
|
||||
product = item.get('product', '')
|
||||
rule = item.get('rule', '')
|
||||
return bool(product and str(product).strip() and rule and str(rule).strip())
|
||||
|
||||
def to_model_data(self, item: dict) -> dict:
|
||||
"""
|
||||
转换 Goby JSON 格式为 Model 字段
|
||||
|
||||
支持两种输入格式:
|
||||
1. 标准格式: {"name": "...", "logic": "...", "rule": [...]}
|
||||
2. JSONL 格式: {"product": "...", "rule": "..."}
|
||||
|
||||
Args:
|
||||
item: 原始 Goby JSON 数据
|
||||
|
||||
Returns:
|
||||
dict: Model 字段数据
|
||||
"""
|
||||
# 标准格式
|
||||
if 'name' in item and isinstance(item.get('rule'), list):
|
||||
return {
|
||||
'name': str(item.get('name', '')).strip(),
|
||||
'logic': item.get('logic', ''),
|
||||
'rule': item.get('rule', []),
|
||||
}
|
||||
|
||||
# JSONL 格式:将 rule 字符串转为单元素数组
|
||||
return {
|
||||
'name': str(item.get('name', '')).strip(),
|
||||
'logic': item.get('logic', ''),
|
||||
'rule': item.get('rule', []),
|
||||
'name': str(item.get('product', '')).strip(),
|
||||
'logic': 'or', # JSONL 格式默认 or 逻辑
|
||||
'rule': [item.get('rule', '')] if item.get('rule') else [],
|
||||
}
|
||||
|
||||
def get_export_data(self) -> list:
|
||||
|
||||
@@ -186,7 +186,6 @@ class NucleiTemplateRepoService:
|
||||
RuntimeError: Git 命令执行失败
|
||||
"""
|
||||
import subprocess
|
||||
from apps.common.utils.git_proxy import get_git_proxy_url
|
||||
|
||||
obj = self._get_repo_obj(repo_id)
|
||||
|
||||
@@ -197,14 +196,12 @@ class NucleiTemplateRepoService:
|
||||
cmd: List[str]
|
||||
action: str
|
||||
|
||||
# 获取代理后的 URL(如果启用了 Git 加速)
|
||||
proxied_url = get_git_proxy_url(obj.repo_url)
|
||||
if proxied_url != obj.repo_url:
|
||||
logger.info("使用 Git 加速: %s -> %s", obj.repo_url, proxied_url)
|
||||
# 直接使用原始 URL(不再使用 Git 加速)
|
||||
repo_url = obj.repo_url
|
||||
|
||||
# 判断是 clone 还是 pull
|
||||
if git_dir.is_dir():
|
||||
# 检查远程地址是否变化(比较原始 URL,不是代理 URL)
|
||||
# 检查远程地址是否变化
|
||||
current_remote = subprocess.run(
|
||||
["git", "-C", str(local_path), "remote", "get-url", "origin"],
|
||||
check=False,
|
||||
@@ -214,13 +211,13 @@ class NucleiTemplateRepoService:
|
||||
)
|
||||
current_url = current_remote.stdout.strip() if current_remote.returncode == 0 else ""
|
||||
|
||||
# 检查是否需要重新 clone(原始 URL 或代理 URL 变化都需要)
|
||||
if current_url not in [obj.repo_url, proxied_url]:
|
||||
# 检查是否需要重新 clone
|
||||
if current_url != repo_url:
|
||||
# 远程地址变化,删除旧目录重新 clone
|
||||
logger.info("nuclei 模板仓库 %s 远程地址变化,重新 clone: %s -> %s", obj.id, current_url, obj.repo_url)
|
||||
logger.info("nuclei 模板仓库 %s 远程地址变化,重新 clone: %s -> %s", obj.id, current_url, repo_url)
|
||||
shutil.rmtree(local_path)
|
||||
local_path.mkdir(parents=True, exist_ok=True)
|
||||
cmd = ["git", "clone", "--depth", "1", proxied_url, str(local_path)]
|
||||
cmd = ["git", "clone", "--depth", "1", repo_url, str(local_path)]
|
||||
action = "clone"
|
||||
else:
|
||||
# 已有仓库且地址未变,执行 pull
|
||||
@@ -231,7 +228,7 @@ class NucleiTemplateRepoService:
|
||||
if local_path.exists() and not local_path.is_dir():
|
||||
raise RuntimeError(f"本地路径已存在且不是目录: {local_path}")
|
||||
# --depth 1 浅克隆,只获取最新提交,节省空间和时间
|
||||
cmd = ["git", "clone", "--depth", "1", proxied_url, str(local_path)]
|
||||
cmd = ["git", "clone", "--depth", "1", repo_url, str(local_path)]
|
||||
action = "clone"
|
||||
|
||||
# 执行 Git 命令
|
||||
|
||||
@@ -274,7 +274,7 @@ class TaskDistributor:
|
||||
network_arg = ""
|
||||
server_url = f"https://{settings.PUBLIC_HOST}:{settings.PUBLIC_PORT}"
|
||||
|
||||
# 挂载路径(统一挂载 /opt/xingrin)
|
||||
# 挂载路径(统一挂载 /opt/xingrin,扫描工具在 /opt/xingrin-tools/bin 不受影响)
|
||||
host_xingrin_dir = "/opt/xingrin"
|
||||
|
||||
# 环境变量:SERVER_URL + IS_LOCAL,其他配置容器启动时从配置中心获取
|
||||
@@ -284,6 +284,7 @@ class TaskDistributor:
|
||||
env_vars = [
|
||||
f"-e SERVER_URL={shlex.quote(server_url)}",
|
||||
f"-e IS_LOCAL={is_local_str}",
|
||||
f"-e WORKER_API_KEY={shlex.quote(settings.WORKER_API_KEY)}", # Worker API 认证密钥
|
||||
"-e PREFECT_HOME=/tmp/.prefect", # 设置 Prefect 数据目录到可写位置
|
||||
"-e PREFECT_SERVER_EPHEMERAL_ENABLED=true", # 启用 ephemeral server(本地临时服务器)
|
||||
"-e PREFECT_SERVER_EPHEMERAL_STARTUP_TIMEOUT_SECONDS=120", # 增加启动超时时间
|
||||
@@ -311,11 +312,14 @@ class TaskDistributor:
|
||||
# - 本地 Worker:install.sh 已预拉取镜像,直接使用本地版本
|
||||
# - 远程 Worker:deploy 时已预拉取镜像,直接使用本地版本
|
||||
# - 避免每次任务都检查 Docker Hub,提升性能和稳定性
|
||||
# 使用双引号包裹 sh -c 命令,内部 shlex.quote 生成的单引号参数可正确解析
|
||||
cmd = f'''docker run --rm -d --pull=missing {network_arg} \
|
||||
{' '.join(env_vars)} \
|
||||
{' '.join(volumes)} \
|
||||
{self.docker_image} \
|
||||
# OOM 优先级:--oom-score-adj=1000 让 Worker 在内存不足时优先被杀
|
||||
# - 范围 -1000 到 1000,值越大越容易被 OOM Killer 选中
|
||||
# - 保护 server/nginx/frontend 等核心服务,确保 Web 界面可用
|
||||
cmd = f'''docker run --rm -d --pull=missing {network_arg} \\
|
||||
--oom-score-adj=1000 \\
|
||||
{' '.join(env_vars)} \\
|
||||
{' '.join(volumes)} \\
|
||||
{self.docker_image} \\
|
||||
sh -c "{inner_cmd}"'''
|
||||
|
||||
return cmd
|
||||
@@ -445,34 +449,33 @@ class TaskDistributor:
|
||||
def execute_scan_flow(
|
||||
self,
|
||||
scan_id: int,
|
||||
target_name: str,
|
||||
target_id: int,
|
||||
target_name: str,
|
||||
scan_workspace_dir: str,
|
||||
engine_name: str,
|
||||
scheduled_scan_name: str | None = None,
|
||||
) -> tuple[bool, str, Optional[str], Optional[int]]:
|
||||
"""
|
||||
在远程或本地 Worker 上执行扫描 Flow
|
||||
|
||||
|
||||
Args:
|
||||
scan_id: 扫描任务 ID
|
||||
target_name: 目标名称
|
||||
target_id: 目标 ID
|
||||
scan_workspace_dir: 扫描工作目录
|
||||
engine_name: 引擎名称
|
||||
scheduled_scan_name: 定时扫描任务名称(可选)
|
||||
|
||||
|
||||
Returns:
|
||||
(success, message, container_id, worker_id) 元组
|
||||
|
||||
|
||||
Note:
|
||||
engine_config 由 Flow 内部通过 scan_id 查询数据库获取
|
||||
"""
|
||||
logger.info("="*60)
|
||||
logger.info("execute_scan_flow 开始")
|
||||
logger.info(" scan_id: %s", scan_id)
|
||||
logger.info(" target_name: %s", target_name)
|
||||
logger.info(" target_id: %s", target_id)
|
||||
logger.info(" target_name: %s", target_name)
|
||||
logger.info(" scan_workspace_dir: %s", scan_workspace_dir)
|
||||
logger.info(" engine_name: %s", engine_name)
|
||||
logger.info(" docker_image: %s", self.docker_image)
|
||||
@@ -491,23 +494,22 @@ class TaskDistributor:
|
||||
# 3. 构建 docker run 命令
|
||||
script_args = {
|
||||
'scan_id': scan_id,
|
||||
'target_name': target_name,
|
||||
'target_id': target_id,
|
||||
'scan_workspace_dir': scan_workspace_dir,
|
||||
'engine_name': engine_name,
|
||||
}
|
||||
if scheduled_scan_name:
|
||||
script_args['scheduled_scan_name'] = scheduled_scan_name
|
||||
|
||||
|
||||
docker_cmd = self._build_docker_command(
|
||||
worker=worker,
|
||||
script_module='apps.scan.scripts.run_initiate_scan',
|
||||
script_args=script_args,
|
||||
)
|
||||
|
||||
|
||||
logger.info(
|
||||
"提交扫描任务到 Worker: %s - Scan ID: %d, Target: %s",
|
||||
worker.name, scan_id, target_name
|
||||
"提交扫描任务到 Worker: %s - Scan ID: %d, Target: %s (ID: %d)",
|
||||
worker.name, scan_id, target_name, target_id
|
||||
)
|
||||
|
||||
# 4. 执行 docker run(本地直接执行,远程通过 SSH)
|
||||
|
||||
@@ -11,6 +11,9 @@ from .views.fingerprints import (
|
||||
EholeFingerprintViewSet,
|
||||
GobyFingerprintViewSet,
|
||||
WappalyzerFingerprintViewSet,
|
||||
FingersFingerprintViewSet,
|
||||
FingerPrintHubFingerprintViewSet,
|
||||
ARLFingerprintViewSet,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,6 +27,9 @@ router.register(r"nuclei/repos", NucleiTemplateRepoViewSet, basename="nuclei-rep
|
||||
router.register(r"fingerprints/ehole", EholeFingerprintViewSet, basename="ehole-fingerprint")
|
||||
router.register(r"fingerprints/goby", GobyFingerprintViewSet, basename="goby-fingerprint")
|
||||
router.register(r"fingerprints/wappalyzer", WappalyzerFingerprintViewSet, basename="wappalyzer-fingerprint")
|
||||
router.register(r"fingerprints/fingers", FingersFingerprintViewSet, basename="fingers-fingerprint")
|
||||
router.register(r"fingerprints/fingerprinthub", FingerPrintHubFingerprintViewSet, basename="fingerprinthub-fingerprint")
|
||||
router.register(r"fingerprints/arl", ARLFingerprintViewSet, basename="arl-fingerprint")
|
||||
|
||||
urlpatterns = [
|
||||
path("", include(router.urls)),
|
||||
|
||||
@@ -7,10 +7,16 @@ from .base import BaseFingerprintViewSet
|
||||
from .ehole import EholeFingerprintViewSet
|
||||
from .goby import GobyFingerprintViewSet
|
||||
from .wappalyzer import WappalyzerFingerprintViewSet
|
||||
from .fingers import FingersFingerprintViewSet
|
||||
from .fingerprinthub import FingerPrintHubFingerprintViewSet
|
||||
from .arl import ARLFingerprintViewSet
|
||||
|
||||
__all__ = [
|
||||
"BaseFingerprintViewSet",
|
||||
"EholeFingerprintViewSet",
|
||||
"GobyFingerprintViewSet",
|
||||
"WappalyzerFingerprintViewSet",
|
||||
"FingersFingerprintViewSet",
|
||||
"FingerPrintHubFingerprintViewSet",
|
||||
"ARLFingerprintViewSet",
|
||||
]
|
||||
|
||||
122
backend/apps/engine/views/fingerprints/arl.py
Normal file
122
backend/apps/engine/views/fingerprints/arl.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""ARL 指纹管理 ViewSet"""
|
||||
|
||||
import yaml
|
||||
from django.http import HttpResponse
|
||||
from rest_framework.decorators import action
|
||||
from rest_framework.exceptions import ValidationError
|
||||
|
||||
from apps.common.pagination import BasePagination
|
||||
from apps.common.response_helpers import success_response
|
||||
from apps.engine.models import ARLFingerprint
|
||||
from apps.engine.serializers.fingerprints import ARLFingerprintSerializer
|
||||
from apps.engine.services.fingerprints import ARLFingerprintService
|
||||
|
||||
from .base import BaseFingerprintViewSet
|
||||
|
||||
|
||||
class ARLFingerprintViewSet(BaseFingerprintViewSet):
|
||||
"""ARL 指纹管理 ViewSet
|
||||
|
||||
继承自 BaseFingerprintViewSet,提供以下 API:
|
||||
|
||||
标准 CRUD(ModelViewSet):
|
||||
- GET / 列表查询(分页)
|
||||
- POST / 创建单条
|
||||
- GET /{id}/ 获取详情
|
||||
- PUT /{id}/ 更新
|
||||
- DELETE /{id}/ 删除
|
||||
|
||||
批量操作(继承自基类):
|
||||
- POST /batch_create/ 批量创建(JSON body)
|
||||
- POST /import_file/ 文件导入(multipart/form-data,支持 YAML)
|
||||
- POST /bulk-delete/ 批量删除
|
||||
- POST /delete-all/ 删除所有
|
||||
- GET /export/ 导出下载(YAML 格式)
|
||||
|
||||
智能过滤语法(filter 参数):
|
||||
- name="word" 模糊匹配 name 字段
|
||||
- name=="WordPress" 精确匹配
|
||||
- rule="body=" 按规则内容筛选
|
||||
"""
|
||||
|
||||
queryset = ARLFingerprint.objects.all()
|
||||
serializer_class = ARLFingerprintSerializer
|
||||
pagination_class = BasePagination
|
||||
service_class = ARLFingerprintService
|
||||
|
||||
# 排序配置
|
||||
ordering_fields = ['created_at', 'name']
|
||||
ordering = ['-created_at']
|
||||
|
||||
# ARL 过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'name': 'name',
|
||||
'rule': 'rule',
|
||||
}
|
||||
|
||||
def parse_import_data(self, json_data) -> list:
|
||||
"""
|
||||
解析 ARL 格式的导入数据(JSON 格式)
|
||||
|
||||
输入格式:[{...}, {...}] 数组格式
|
||||
返回:指纹列表
|
||||
"""
|
||||
if isinstance(json_data, list):
|
||||
return json_data
|
||||
return []
|
||||
|
||||
def get_export_filename(self) -> str:
|
||||
"""导出文件名"""
|
||||
return 'ARL.yaml'
|
||||
|
||||
@action(detail=False, methods=['post'])
|
||||
def import_file(self, request):
|
||||
"""
|
||||
文件导入(支持 YAML 和 JSON 格式)
|
||||
POST /api/engine/fingerprints/arl/import_file/
|
||||
|
||||
请求格式:multipart/form-data
|
||||
- file: YAML 或 JSON 文件
|
||||
|
||||
返回:同 batch_create
|
||||
"""
|
||||
file = request.FILES.get('file')
|
||||
if not file:
|
||||
raise ValidationError('缺少文件')
|
||||
|
||||
filename = file.name.lower()
|
||||
content = file.read().decode('utf-8')
|
||||
|
||||
try:
|
||||
if filename.endswith('.yaml') or filename.endswith('.yml'):
|
||||
# YAML 格式
|
||||
fingerprints = yaml.safe_load(content)
|
||||
else:
|
||||
# JSON 格式
|
||||
import json
|
||||
fingerprints = json.loads(content)
|
||||
except (yaml.YAMLError, json.JSONDecodeError) as e:
|
||||
raise ValidationError(f'无效的文件格式: {e}')
|
||||
|
||||
if not isinstance(fingerprints, list):
|
||||
raise ValidationError('文件内容必须是数组格式')
|
||||
|
||||
if not fingerprints:
|
||||
raise ValidationError('文件中没有有效的指纹数据')
|
||||
|
||||
result = self.get_service().batch_create_fingerprints(fingerprints)
|
||||
return success_response(data=result)
|
||||
|
||||
@action(detail=False, methods=['get'])
|
||||
def export(self, request):
|
||||
"""
|
||||
导出指纹(YAML 格式)
|
||||
GET /api/engine/fingerprints/arl/export/
|
||||
|
||||
返回:YAML 文件下载
|
||||
"""
|
||||
data = self.get_service().get_export_data()
|
||||
content = yaml.dump(data, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||||
response = HttpResponse(content, content_type='application/x-yaml')
|
||||
response['Content-Disposition'] = f'attachment; filename="{self.get_export_filename()}"'
|
||||
return response
|
||||
@@ -13,6 +13,7 @@ from rest_framework.response import Response
|
||||
from rest_framework.exceptions import ValidationError
|
||||
|
||||
from apps.common.pagination import BasePagination
|
||||
from apps.common.response_helpers import success_response
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -129,7 +130,7 @@ class BaseFingerprintViewSet(viewsets.ModelViewSet):
|
||||
raise ValidationError('fingerprints 必须是数组')
|
||||
|
||||
result = self.get_service().batch_create_fingerprints(fingerprints)
|
||||
return Response(result, status=status.HTTP_201_CREATED)
|
||||
return success_response(data=result, status_code=status.HTTP_201_CREATED)
|
||||
|
||||
@action(detail=False, methods=['post'])
|
||||
def import_file(self, request):
|
||||
@@ -138,7 +139,7 @@ class BaseFingerprintViewSet(viewsets.ModelViewSet):
|
||||
POST /api/engine/fingerprints/{type}/import_file/
|
||||
|
||||
请求格式:multipart/form-data
|
||||
- file: JSON 文件
|
||||
- file: JSON 文件(支持标准 JSON 和 JSONL 格式)
|
||||
|
||||
返回:同 batch_create
|
||||
"""
|
||||
@@ -147,16 +148,54 @@ class BaseFingerprintViewSet(viewsets.ModelViewSet):
|
||||
raise ValidationError('缺少文件')
|
||||
|
||||
try:
|
||||
json_data = json.load(file)
|
||||
content = file.read().decode('utf-8')
|
||||
json_data = self._parse_json_content(content)
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValidationError(f'无效的 JSON 格式: {e}')
|
||||
except UnicodeDecodeError as e:
|
||||
raise ValidationError(f'文件编码错误: {e}')
|
||||
|
||||
fingerprints = self.parse_import_data(json_data)
|
||||
if not fingerprints:
|
||||
raise ValidationError('文件中没有有效的指纹数据')
|
||||
|
||||
result = self.get_service().batch_create_fingerprints(fingerprints)
|
||||
return Response(result, status=status.HTTP_201_CREATED)
|
||||
return success_response(data=result, status_code=status.HTTP_201_CREATED)
|
||||
|
||||
def _parse_json_content(self, content: str):
|
||||
"""
|
||||
解析 JSON 内容,支持标准 JSON 和 JSONL 格式
|
||||
|
||||
Args:
|
||||
content: 文件内容字符串
|
||||
|
||||
Returns:
|
||||
解析后的数据(list 或 dict)
|
||||
"""
|
||||
content = content.strip()
|
||||
|
||||
# 尝试标准 JSON 解析
|
||||
try:
|
||||
return json.loads(content)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 尝试 JSONL 格式(每行一个 JSON 对象)
|
||||
lines = content.split('\n')
|
||||
result = []
|
||||
for i, line in enumerate(lines):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
result.append(json.loads(line))
|
||||
except json.JSONDecodeError as e:
|
||||
raise json.JSONDecodeError(f'第 {i + 1} 行解析失败: {e.msg}', e.doc, e.pos)
|
||||
|
||||
if not result:
|
||||
raise json.JSONDecodeError('文件为空或格式无效', content, 0)
|
||||
|
||||
return result
|
||||
|
||||
@action(detail=False, methods=['post'], url_path='bulk-delete')
|
||||
def bulk_delete(self, request):
|
||||
@@ -174,7 +213,7 @@ class BaseFingerprintViewSet(viewsets.ModelViewSet):
|
||||
raise ValidationError('ids 必须是数组')
|
||||
|
||||
deleted_count = self.queryset.model.objects.filter(id__in=ids).delete()[0]
|
||||
return Response({'deleted': deleted_count})
|
||||
return success_response(data={'deleted': deleted_count})
|
||||
|
||||
@action(detail=False, methods=['post'], url_path='delete-all')
|
||||
def delete_all(self, request):
|
||||
@@ -185,7 +224,7 @@ class BaseFingerprintViewSet(viewsets.ModelViewSet):
|
||||
返回:{"deleted": 1000}
|
||||
"""
|
||||
deleted_count = self.queryset.model.objects.all().delete()[0]
|
||||
return Response({'deleted': deleted_count})
|
||||
return success_response(data={'deleted': deleted_count})
|
||||
|
||||
@action(detail=False, methods=['get'])
|
||||
def export(self, request):
|
||||
|
||||
73
backend/apps/engine/views/fingerprints/fingerprinthub.py
Normal file
73
backend/apps/engine/views/fingerprints/fingerprinthub.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""FingerPrintHub 指纹管理 ViewSet"""
|
||||
|
||||
from apps.common.pagination import BasePagination
|
||||
from apps.engine.models import FingerPrintHubFingerprint
|
||||
from apps.engine.serializers.fingerprints import FingerPrintHubFingerprintSerializer
|
||||
from apps.engine.services.fingerprints import FingerPrintHubFingerprintService
|
||||
|
||||
from .base import BaseFingerprintViewSet
|
||||
|
||||
|
||||
class FingerPrintHubFingerprintViewSet(BaseFingerprintViewSet):
|
||||
"""FingerPrintHub 指纹管理 ViewSet
|
||||
|
||||
继承自 BaseFingerprintViewSet,提供以下 API:
|
||||
|
||||
标准 CRUD(ModelViewSet):
|
||||
- GET / 列表查询(分页)
|
||||
- POST / 创建单条
|
||||
- GET /{id}/ 获取详情
|
||||
- PUT /{id}/ 更新
|
||||
- DELETE /{id}/ 删除
|
||||
|
||||
批量操作(继承自基类):
|
||||
- POST /batch_create/ 批量创建(JSON body)
|
||||
- POST /import_file/ 文件导入(multipart/form-data)
|
||||
- POST /bulk-delete/ 批量删除
|
||||
- POST /delete-all/ 删除所有
|
||||
- GET /export/ 导出下载
|
||||
|
||||
智能过滤语法(filter 参数):
|
||||
- name="word" 模糊匹配 name 字段
|
||||
- fp_id=="xxx" 精确匹配指纹ID
|
||||
- author="xxx" 按作者筛选
|
||||
- severity="info" 按严重程度筛选
|
||||
- tags="cms" 按标签筛选
|
||||
"""
|
||||
|
||||
queryset = FingerPrintHubFingerprint.objects.all()
|
||||
serializer_class = FingerPrintHubFingerprintSerializer
|
||||
pagination_class = BasePagination
|
||||
service_class = FingerPrintHubFingerprintService
|
||||
|
||||
# 排序配置
|
||||
ordering_fields = ['created_at', 'name', 'severity']
|
||||
ordering = ['-created_at']
|
||||
|
||||
# FingerPrintHub 过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'fp_id': 'fp_id',
|
||||
'name': 'name',
|
||||
'author': 'author',
|
||||
'tags': 'tags',
|
||||
'severity': 'severity',
|
||||
'source_file': 'source_file',
|
||||
}
|
||||
|
||||
# JSON 数组字段(使用 __contains 查询)
|
||||
JSON_ARRAY_FIELDS = ['http']
|
||||
|
||||
def parse_import_data(self, json_data) -> list:
|
||||
"""
|
||||
解析 FingerPrintHub JSON 格式的导入数据
|
||||
|
||||
输入格式:[{...}, {...}] 数组格式
|
||||
返回:指纹列表
|
||||
"""
|
||||
if isinstance(json_data, list):
|
||||
return json_data
|
||||
return []
|
||||
|
||||
def get_export_filename(self) -> str:
|
||||
"""导出文件名"""
|
||||
return 'fingerprinthub_web.json'
|
||||
69
backend/apps/engine/views/fingerprints/fingers.py
Normal file
69
backend/apps/engine/views/fingerprints/fingers.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Fingers 指纹管理 ViewSet"""
|
||||
|
||||
from apps.common.pagination import BasePagination
|
||||
from apps.engine.models import FingersFingerprint
|
||||
from apps.engine.serializers.fingerprints import FingersFingerprintSerializer
|
||||
from apps.engine.services.fingerprints import FingersFingerprintService
|
||||
|
||||
from .base import BaseFingerprintViewSet
|
||||
|
||||
|
||||
class FingersFingerprintViewSet(BaseFingerprintViewSet):
|
||||
"""Fingers 指纹管理 ViewSet
|
||||
|
||||
继承自 BaseFingerprintViewSet,提供以下 API:
|
||||
|
||||
标准 CRUD(ModelViewSet):
|
||||
- GET / 列表查询(分页)
|
||||
- POST / 创建单条
|
||||
- GET /{id}/ 获取详情
|
||||
- PUT /{id}/ 更新
|
||||
- DELETE /{id}/ 删除
|
||||
|
||||
批量操作(继承自基类):
|
||||
- POST /batch_create/ 批量创建(JSON body)
|
||||
- POST /import_file/ 文件导入(multipart/form-data)
|
||||
- POST /bulk-delete/ 批量删除
|
||||
- POST /delete-all/ 删除所有
|
||||
- GET /export/ 导出下载
|
||||
|
||||
智能过滤语法(filter 参数):
|
||||
- name="word" 模糊匹配 name 字段
|
||||
- name=="WordPress" 精确匹配
|
||||
- tag="cms" 按标签筛选
|
||||
- focus="true" 按重点关注筛选
|
||||
"""
|
||||
|
||||
queryset = FingersFingerprint.objects.all()
|
||||
serializer_class = FingersFingerprintSerializer
|
||||
pagination_class = BasePagination
|
||||
service_class = FingersFingerprintService
|
||||
|
||||
# 排序配置
|
||||
ordering_fields = ['created_at', 'name']
|
||||
ordering = ['-created_at']
|
||||
|
||||
# Fingers 过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'name': 'name',
|
||||
'link': 'link',
|
||||
'focus': 'focus',
|
||||
}
|
||||
|
||||
# JSON 数组字段(使用 __contains 查询)
|
||||
JSON_ARRAY_FIELDS = ['tag', 'rule', 'default_port']
|
||||
|
||||
def parse_import_data(self, json_data) -> list:
|
||||
"""
|
||||
解析 Fingers JSON 格式的导入数据
|
||||
|
||||
输入格式:[{...}, {...}] 数组格式
|
||||
返回:指纹列表
|
||||
"""
|
||||
if isinstance(json_data, list):
|
||||
return json_data
|
||||
return []
|
||||
|
||||
def get_export_filename(self) -> str:
|
||||
"""导出文件名"""
|
||||
return 'fingers_http.json'
|
||||
@@ -31,6 +31,8 @@ from rest_framework.decorators import action
|
||||
from rest_framework.request import Request
|
||||
from rest_framework.response import Response
|
||||
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
from apps.engine.models import NucleiTemplateRepo
|
||||
from apps.engine.serializers import NucleiTemplateRepoSerializer
|
||||
from apps.engine.services import NucleiTemplateRepoService
|
||||
@@ -107,18 +109,30 @@ class NucleiTemplateRepoViewSet(viewsets.ModelViewSet):
|
||||
try:
|
||||
repo_id = int(pk) if pk is not None else None
|
||||
except (TypeError, ValueError):
|
||||
return Response({"message": "无效的仓库 ID"}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Invalid repository ID',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 调用 Service 层
|
||||
try:
|
||||
result = self.service.refresh_repo(repo_id)
|
||||
except ValidationError as exc:
|
||||
return Response({"message": str(exc)}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message=str(exc),
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.error("刷新 Nuclei 模板仓库失败: %s", exc, exc_info=True)
|
||||
return Response({"message": f"刷新仓库失败: {exc}"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message=f'Refresh failed: {exc}',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
return Response({"message": "刷新成功", "result": result}, status=status.HTTP_200_OK)
|
||||
return success_response(data={'result': result})
|
||||
|
||||
# ==================== 自定义 Action: 模板只读浏览 ====================
|
||||
|
||||
@@ -142,18 +156,30 @@ class NucleiTemplateRepoViewSet(viewsets.ModelViewSet):
|
||||
try:
|
||||
repo_id = int(pk) if pk is not None else None
|
||||
except (TypeError, ValueError):
|
||||
return Response({"message": "无效的仓库 ID"}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Invalid repository ID',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 调用 Service 层,仅从当前本地目录读取目录树
|
||||
try:
|
||||
roots = self.service.get_template_tree(repo_id)
|
||||
except ValidationError as exc:
|
||||
return Response({"message": str(exc)}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message=str(exc),
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.error("获取 Nuclei 模板目录树失败: %s", exc, exc_info=True)
|
||||
return Response({"message": "获取模板目录树失败"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to get template tree',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
return Response({"roots": roots})
|
||||
return success_response(data={'roots': roots})
|
||||
|
||||
@action(detail=True, methods=["get"], url_path="templates/content")
|
||||
def templates_content(self, request: Request, pk: str | None = None) -> Response:
|
||||
@@ -174,23 +200,43 @@ class NucleiTemplateRepoViewSet(viewsets.ModelViewSet):
|
||||
try:
|
||||
repo_id = int(pk) if pk is not None else None
|
||||
except (TypeError, ValueError):
|
||||
return Response({"message": "无效的仓库 ID"}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Invalid repository ID',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 解析 path 参数
|
||||
rel_path = (request.query_params.get("path", "") or "").strip()
|
||||
if not rel_path:
|
||||
return Response({"message": "缺少 path 参数"}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Missing path parameter',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 调用 Service 层
|
||||
try:
|
||||
result = self.service.get_template_content(repo_id, rel_path)
|
||||
except ValidationError as exc:
|
||||
return Response({"message": str(exc)}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message=str(exc),
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.error("获取 Nuclei 模板内容失败: %s", exc, exc_info=True)
|
||||
return Response({"message": "获取模板内容失败"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to get template content',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
# 文件不存在
|
||||
if result is None:
|
||||
return Response({"message": "模板不存在或无法读取"}, status=status.HTTP_404_NOT_FOUND)
|
||||
return Response(result)
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Template not found or unreadable',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
return success_response(data=result)
|
||||
|
||||
@@ -9,6 +9,8 @@ from rest_framework.decorators import action
|
||||
from rest_framework.response import Response
|
||||
|
||||
from apps.common.pagination import BasePagination
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
from apps.engine.serializers.wordlist_serializer import WordlistSerializer
|
||||
from apps.engine.services.wordlist_service import WordlistService
|
||||
|
||||
@@ -46,7 +48,11 @@ class WordlistViewSet(viewsets.ViewSet):
|
||||
uploaded_file = request.FILES.get("file")
|
||||
|
||||
if not uploaded_file:
|
||||
return Response({"error": "缺少字典文件"}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Missing wordlist file',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
try:
|
||||
wordlist = self.service.create_wordlist(
|
||||
@@ -55,21 +61,32 @@ class WordlistViewSet(viewsets.ViewSet):
|
||||
uploaded_file=uploaded_file,
|
||||
)
|
||||
except ValidationError as exc:
|
||||
return Response({"error": str(exc)}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message=str(exc),
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
serializer = WordlistSerializer(wordlist)
|
||||
return Response(serializer.data, status=status.HTTP_201_CREATED)
|
||||
return success_response(data=serializer.data, status_code=status.HTTP_201_CREATED)
|
||||
|
||||
def destroy(self, request, pk=None):
|
||||
"""删除字典记录"""
|
||||
try:
|
||||
wordlist_id = int(pk)
|
||||
except (TypeError, ValueError):
|
||||
return Response({"error": "无效的 ID"}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Invalid ID',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
success = self.service.delete_wordlist(wordlist_id)
|
||||
if not success:
|
||||
return Response({"error": "字典不存在"}, status=status.HTTP_404_NOT_FOUND)
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
return Response(status=status.HTTP_204_NO_CONTENT)
|
||||
|
||||
@@ -82,15 +99,27 @@ class WordlistViewSet(viewsets.ViewSet):
|
||||
"""
|
||||
name = (request.query_params.get("wordlist", "") or "").strip()
|
||||
if not name:
|
||||
return Response({"error": "缺少参数 wordlist"}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Missing parameter: wordlist',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
wordlist = self.service.get_wordlist_by_name(name)
|
||||
if not wordlist:
|
||||
return Response({"error": "字典不存在"}, status=status.HTTP_404_NOT_FOUND)
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Wordlist not found',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
file_path = wordlist.file_path
|
||||
if not file_path or not os.path.exists(file_path):
|
||||
return Response({"error": "字典文件不存在"}, status=status.HTTP_404_NOT_FOUND)
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Wordlist file not found',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
filename = os.path.basename(file_path)
|
||||
response = FileResponse(open(file_path, "rb"), as_attachment=True, filename=filename)
|
||||
@@ -106,22 +135,38 @@ class WordlistViewSet(viewsets.ViewSet):
|
||||
try:
|
||||
wordlist_id = int(pk)
|
||||
except (TypeError, ValueError):
|
||||
return Response({"error": "无效的 ID"}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Invalid ID',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
if request.method == "GET":
|
||||
content = self.service.get_wordlist_content(wordlist_id)
|
||||
if content is None:
|
||||
return Response({"error": "字典不存在或文件无法读取"}, status=status.HTTP_404_NOT_FOUND)
|
||||
return Response({"content": content})
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Wordlist not found or file unreadable',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
return success_response(data={"content": content})
|
||||
|
||||
elif request.method == "PUT":
|
||||
content = request.data.get("content")
|
||||
if content is None:
|
||||
return Response({"error": "缺少 content 参数"}, status=status.HTTP_400_BAD_REQUEST)
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Missing content parameter',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
wordlist = self.service.update_wordlist_content(wordlist_id, content)
|
||||
if not wordlist:
|
||||
return Response({"error": "字典不存在或更新失败"}, status=status.HTTP_404_NOT_FOUND)
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='Wordlist not found or update failed',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
serializer = WordlistSerializer(wordlist)
|
||||
return Response(serializer.data)
|
||||
return success_response(data=serializer.data)
|
||||
|
||||
@@ -9,6 +9,8 @@ from rest_framework import viewsets, status
|
||||
from rest_framework.decorators import action
|
||||
from rest_framework.response import Response
|
||||
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
from apps.engine.serializers import WorkerNodeSerializer
|
||||
from apps.engine.services import WorkerService
|
||||
from apps.common.signals import worker_delete_failed
|
||||
@@ -111,9 +113,8 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
|
||||
threading.Thread(target=_async_remote_uninstall, daemon=True).start()
|
||||
|
||||
# 3. 立即返回成功
|
||||
return Response(
|
||||
{"message": f"节点 {worker_name} 已删除"},
|
||||
status=status.HTTP_200_OK
|
||||
return success_response(
|
||||
data={'name': worker_name}
|
||||
)
|
||||
|
||||
@action(detail=True, methods=['post'])
|
||||
@@ -190,11 +191,13 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
|
||||
worker.status = 'online'
|
||||
worker.save(update_fields=['status'])
|
||||
|
||||
return Response({
|
||||
'status': 'ok',
|
||||
'need_update': need_update,
|
||||
'server_version': server_version
|
||||
})
|
||||
return success_response(
|
||||
data={
|
||||
'status': 'ok',
|
||||
'needUpdate': need_update,
|
||||
'serverVersion': server_version
|
||||
}
|
||||
)
|
||||
|
||||
def _trigger_remote_agent_update(self, worker, target_version: str):
|
||||
"""
|
||||
@@ -304,9 +307,10 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
|
||||
is_local = request.data.get('is_local', True)
|
||||
|
||||
if not name:
|
||||
return Response(
|
||||
{'error': '缺少 name 参数'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Missing name parameter',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
worker, created = self.worker_service.register_worker(
|
||||
@@ -314,11 +318,13 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
|
||||
is_local=is_local
|
||||
)
|
||||
|
||||
return Response({
|
||||
'worker_id': worker.id,
|
||||
'name': worker.name,
|
||||
'created': created
|
||||
})
|
||||
return success_response(
|
||||
data={
|
||||
'workerId': worker.id,
|
||||
'name': worker.name,
|
||||
'created': created
|
||||
}
|
||||
)
|
||||
|
||||
@action(detail=False, methods=['get'])
|
||||
def config(self, request):
|
||||
@@ -334,13 +340,12 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
|
||||
返回:
|
||||
{
|
||||
"db": {"host": "...", "port": "...", ...},
|
||||
"redisUrl": "...",
|
||||
"paths": {"results": "...", "logs": "..."}
|
||||
}
|
||||
|
||||
配置逻辑:
|
||||
- 本地 Worker (is_local=true): db_host=postgres, redis=redis:6379
|
||||
- 远程 Worker (is_local=false): db_host=PUBLIC_HOST, redis=PUBLIC_HOST:6379
|
||||
- 本地 Worker (is_local=true): db_host=postgres
|
||||
- 远程 Worker (is_local=false): db_host=PUBLIC_HOST
|
||||
"""
|
||||
from django.conf import settings
|
||||
import logging
|
||||
@@ -365,39 +370,35 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
|
||||
if is_local_worker:
|
||||
# 本地 Worker:直接用 Docker 内部服务名
|
||||
worker_db_host = 'postgres'
|
||||
worker_redis_url = 'redis://redis:6379/0'
|
||||
else:
|
||||
# 远程 Worker:通过公网 IP 访问
|
||||
public_host = settings.PUBLIC_HOST
|
||||
if public_host in ('server', 'localhost', '127.0.0.1'):
|
||||
logger.warning("远程 Worker 请求配置,但 PUBLIC_HOST=%s 不是有效的公网地址", public_host)
|
||||
worker_db_host = public_host
|
||||
worker_redis_url = f'redis://{public_host}:6379/0'
|
||||
else:
|
||||
# 远程数据库场景:所有 Worker 都用 DB_HOST
|
||||
worker_db_host = db_host
|
||||
worker_redis_url = getattr(settings, 'WORKER_REDIS_URL', 'redis://redis:6379/0')
|
||||
|
||||
logger.info("返回 Worker 配置 - db_host: %s, redis_url: %s", worker_db_host, worker_redis_url)
|
||||
logger.info("返回 Worker 配置 - db_host: %s", worker_db_host)
|
||||
|
||||
return Response({
|
||||
'db': {
|
||||
'host': worker_db_host,
|
||||
'port': str(settings.DATABASES['default']['PORT']),
|
||||
'name': settings.DATABASES['default']['NAME'],
|
||||
'user': settings.DATABASES['default']['USER'],
|
||||
'password': settings.DATABASES['default']['PASSWORD'],
|
||||
},
|
||||
'redisUrl': worker_redis_url,
|
||||
'paths': {
|
||||
'results': getattr(settings, 'CONTAINER_RESULTS_MOUNT', '/opt/xingrin/results'),
|
||||
'logs': getattr(settings, 'CONTAINER_LOGS_MOUNT', '/opt/xingrin/logs'),
|
||||
},
|
||||
'logging': {
|
||||
'level': os.getenv('LOG_LEVEL', 'INFO'),
|
||||
'enableCommandLogging': os.getenv('ENABLE_COMMAND_LOGGING', 'true').lower() == 'true',
|
||||
},
|
||||
'debug': settings.DEBUG,
|
||||
# Git 加速配置(用于 Git clone 加速,如 Nuclei 模板仓库)
|
||||
'gitMirror': os.getenv('GIT_MIRROR', ''),
|
||||
})
|
||||
return success_response(
|
||||
data={
|
||||
'db': {
|
||||
'host': worker_db_host,
|
||||
'port': str(settings.DATABASES['default']['PORT']),
|
||||
'name': settings.DATABASES['default']['NAME'],
|
||||
'user': settings.DATABASES['default']['USER'],
|
||||
'password': settings.DATABASES['default']['PASSWORD'],
|
||||
},
|
||||
'paths': {
|
||||
'results': getattr(settings, 'CONTAINER_RESULTS_MOUNT', '/opt/xingrin/results'),
|
||||
'logs': getattr(settings, 'CONTAINER_LOGS_MOUNT', '/opt/xingrin/logs'),
|
||||
},
|
||||
'logging': {
|
||||
'level': os.getenv('LOG_LEVEL', 'INFO'),
|
||||
'enableCommandLogging': os.getenv('ENABLE_COMMAND_LOGGING', 'true').lower() == 'true',
|
||||
},
|
||||
'debug': settings.DEBUG,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -7,35 +7,25 @@
|
||||
from django.conf import settings
|
||||
|
||||
# ==================== 路径配置 ====================
|
||||
SCAN_TOOLS_BASE_PATH = getattr(settings, 'SCAN_TOOLS_BASE_PATH', '/opt/xingrin/tools')
|
||||
SCAN_TOOLS_BASE_PATH = getattr(settings, 'SCAN_TOOLS_BASE_PATH', '/usr/local/bin')
|
||||
|
||||
# ==================== 子域名发现 ====================
|
||||
|
||||
SUBDOMAIN_DISCOVERY_COMMANDS = {
|
||||
'subfinder': {
|
||||
# 默认使用所有数据源(更全面,略慢),并始终开启递归
|
||||
# -all 使用所有数据源
|
||||
# -recursive 对支持递归的源启用递归枚举(默认开启)
|
||||
'base': "subfinder -d {domain} -all -recursive -o '{output_file}' -silent",
|
||||
# 使用所有数据源(包括付费源,只要配置了 API key)
|
||||
# -all 使用所有数据源(slow 但全面)
|
||||
# -v 显示详细输出,包括使用的数据源(调试用)
|
||||
# 注意:不要加 -recursive,它会排除不支持递归的源(如 fofa)
|
||||
'base': "subfinder -d {domain} -all -o '{output_file}' -v",
|
||||
'optional': {
|
||||
'threads': '-t {threads}', # 控制并发 goroutine 数
|
||||
'provider_config': "-pc '{provider_config}'", # Provider 配置文件路径
|
||||
}
|
||||
},
|
||||
|
||||
'amass_passive': {
|
||||
# 先执行被动枚举,将结果写入 amass 内部数据库,然后从数据库中导出纯域名(names)到 output_file
|
||||
# -silent 禁用进度条和其他输出
|
||||
'base': "amass enum -passive -silent -d {domain} && amass subs -names -d {domain} > '{output_file}'"
|
||||
},
|
||||
|
||||
'amass_active': {
|
||||
# 先执行主动枚举 + 爆破,将结果写入 amass 内部数据库,然后从数据库中导出纯域名(names)到 output_file
|
||||
# -silent 禁用进度条和其他输出
|
||||
'base': "amass enum -active -silent -d {domain} -brute && amass subs -names -d {domain} > '{output_file}'"
|
||||
},
|
||||
|
||||
'sublist3r': {
|
||||
'base': "python3 '{scan_tools_base}/Sublist3r/sublist3r.py' -d {domain} -o '{output_file}'",
|
||||
'base': "python3 '/usr/local/share/Sublist3r/sublist3r.py' -d {domain} -o '{output_file}'",
|
||||
'optional': {
|
||||
'threads': '-t {threads}'
|
||||
}
|
||||
@@ -97,9 +87,11 @@ SITE_SCAN_COMMANDS = {
|
||||
'base': (
|
||||
"'{scan_tools_base}/httpx' -l '{url_file}' "
|
||||
'-status-code -content-type -content-length '
|
||||
'-location -title -server -body-preview '
|
||||
'-location -title -server '
|
||||
'-tech-detect -cdn -vhost '
|
||||
'-random-agent -no-color -json'
|
||||
'-include-response '
|
||||
'-rstr 2000 '
|
||||
'-random-agent -no-color -json -silent'
|
||||
),
|
||||
'optional': {
|
||||
'threads': '-threads {threads}',
|
||||
@@ -115,7 +107,7 @@ SITE_SCAN_COMMANDS = {
|
||||
|
||||
DIRECTORY_SCAN_COMMANDS = {
|
||||
'ffuf': {
|
||||
'base': "ffuf -u '{url}FUZZ' -se -ac -sf -json -w '{wordlist}'",
|
||||
'base': "'{scan_tools_base}/ffuf' -u '{url}FUZZ' -se -ac -sf -json -w '{wordlist}'",
|
||||
'optional': {
|
||||
'delay': '-p {delay}',
|
||||
'threads': '-t {threads}',
|
||||
@@ -169,9 +161,11 @@ URL_FETCH_COMMANDS = {
|
||||
'base': (
|
||||
"'{scan_tools_base}/httpx' -l '{url_file}' "
|
||||
'-status-code -content-type -content-length '
|
||||
'-location -title -server -body-preview '
|
||||
'-location -title -server '
|
||||
'-tech-detect -cdn -vhost '
|
||||
'-random-agent -no-color -json'
|
||||
'-include-response '
|
||||
'-rstr 2000 '
|
||||
'-random-agent -no-color -json -silent'
|
||||
),
|
||||
'optional': {
|
||||
'threads': '-threads {threads}',
|
||||
@@ -209,7 +203,7 @@ VULN_SCAN_COMMANDS = {
|
||||
# -silent: 静默模式
|
||||
# -l: 输入 URL 列表文件
|
||||
# -t: 模板目录路径(支持多个仓库,多次 -t 由 template_args 直接拼接)
|
||||
'base': "nuclei -j -silent -l '{endpoints_file}' {template_args}",
|
||||
'base': "nuclei -j -silent -l '{input_file}' {template_args}",
|
||||
'optional': {
|
||||
'concurrency': '-c {concurrency}', # 并发数(默认 25)
|
||||
'rate_limit': '-rl {rate_limit}', # 每秒请求数限制
|
||||
@@ -220,7 +214,12 @@ VULN_SCAN_COMMANDS = {
|
||||
'tags': '-tags {tags}', # 过滤标签
|
||||
'exclude_tags': '-etags {exclude_tags}', # 排除标签
|
||||
},
|
||||
'input_type': 'endpoints_file',
|
||||
# 支持多种输入类型,用户通过 scan_endpoints/scan_websites 选择
|
||||
'input_types': ['endpoints_file', 'websites_file'],
|
||||
'defaults': {
|
||||
'scan_endpoints': False, # 默认不扫描 endpoints
|
||||
'scan_websites': True, # 默认扫描 websites
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -239,6 +238,9 @@ FINGERPRINT_DETECT_COMMANDS = {
|
||||
'ehole': '--ehole {ehole}',
|
||||
'goby': '--goby {goby}',
|
||||
'wappalyzer': '--wappalyzer {wappalyzer}',
|
||||
'fingers': '--fingers {fingers}',
|
||||
'fingerprinthub': '--fingerprint {fingerprinthub}',
|
||||
'arl': '--arl {arl}',
|
||||
}
|
||||
},
|
||||
}
|
||||
@@ -254,11 +256,16 @@ COMMAND_TEMPLATES = {
|
||||
'directory_scan': DIRECTORY_SCAN_COMMANDS,
|
||||
'url_fetch': URL_FETCH_COMMANDS,
|
||||
'vuln_scan': VULN_SCAN_COMMANDS,
|
||||
'screenshot': {}, # 使用 Python 原生库(Playwright),无命令模板
|
||||
}
|
||||
|
||||
# ==================== 扫描类型配置 ====================
|
||||
|
||||
# 执行阶段定义(按顺序执行)
|
||||
# Stage 1: 资产发现 - 子域名 → 端口 → 站点探测 → 指纹识别
|
||||
# Stage 2: URL 收集 - URL 获取 + 目录扫描(并行)
|
||||
# Stage 3: 截图 - 在 URL 收集完成后执行,捕获更多发现的页面
|
||||
# Stage 4: 漏洞扫描 - 最后执行
|
||||
EXECUTION_STAGES = [
|
||||
{
|
||||
'mode': 'sequential',
|
||||
@@ -268,6 +275,10 @@ EXECUTION_STAGES = [
|
||||
'mode': 'parallel',
|
||||
'flows': ['url_fetch', 'directory_scan']
|
||||
},
|
||||
{
|
||||
'mode': 'sequential',
|
||||
'flows': ['screenshot']
|
||||
},
|
||||
{
|
||||
'mode': 'sequential',
|
||||
'flows': ['vuln_scan']
|
||||
|
||||
@@ -4,14 +4,12 @@
|
||||
# 必需参数:enabled(是否启用)
|
||||
# 可选参数:timeout(超时秒数,默认 auto 自动计算)
|
||||
|
||||
# ==================== 子域名发现 ====================
|
||||
#
|
||||
# Stage 1: 被动收集(并行) - 必选,至少启用一个工具
|
||||
# Stage 2: 字典爆破(可选) - 使用字典暴力枚举子域名
|
||||
# Stage 3: 变异生成 + 验证(可选) - 基于已发现域名生成变异,流式验证存活
|
||||
# Stage 4: DNS 存活验证(可选) - 验证所有候选域名是否能解析
|
||||
#
|
||||
subdomain_discovery:
|
||||
# ==================== 子域名发现 ====================
|
||||
# Stage 1: 被动收集(并行) - 必选,至少启用一个工具
|
||||
# Stage 2: 字典爆破(可选) - 使用字典暴力枚举子域名
|
||||
# Stage 3: 变异生成 + 验证(可选) - 基于已发现域名生成变异,流式验证存活
|
||||
# Stage 4: DNS 存活验证(可选) - 验证所有候选域名是否能解析
|
||||
# === Stage 1: 被动收集工具(并行执行)===
|
||||
passive_tools:
|
||||
subfinder:
|
||||
@@ -19,14 +17,6 @@ subdomain_discovery:
|
||||
timeout: 3600 # 1小时
|
||||
# threads: 10 # 并发 goroutine 数
|
||||
|
||||
amass_passive:
|
||||
enabled: true
|
||||
timeout: 3600
|
||||
|
||||
amass_active:
|
||||
enabled: true # 主动枚举 + 爆破
|
||||
timeout: 3600
|
||||
|
||||
sublist3r:
|
||||
enabled: true
|
||||
timeout: 3600
|
||||
@@ -53,10 +43,10 @@ subdomain_discovery:
|
||||
resolve:
|
||||
enabled: true
|
||||
subdomain_resolve:
|
||||
# timeout: auto # 自动根据候选子域数量计算
|
||||
timeout: auto # 自动根据候选子域数量计算
|
||||
|
||||
# ==================== 端口扫描 ====================
|
||||
port_scan:
|
||||
# ==================== 端口扫描 ====================
|
||||
tools:
|
||||
naabu_active:
|
||||
enabled: true
|
||||
@@ -64,14 +54,14 @@ port_scan:
|
||||
threads: 200 # 并发连接数(默认 5)
|
||||
# ports: 1-65535 # 扫描端口范围(默认 1-65535)
|
||||
top-ports: 100 # 扫描 nmap top 100 端口
|
||||
rate: 10 # 扫描速率(默认 10)
|
||||
rate: 50 # 扫描速率
|
||||
|
||||
naabu_passive:
|
||||
enabled: true
|
||||
# timeout: auto # 被动扫描通常较快
|
||||
|
||||
# ==================== 站点扫描 ====================
|
||||
site_scan:
|
||||
# ==================== 站点扫描 ====================
|
||||
tools:
|
||||
httpx:
|
||||
enabled: true
|
||||
@@ -81,16 +71,16 @@ site_scan:
|
||||
# request-timeout: 10 # 单个请求超时秒数(默认 10)
|
||||
# retries: 2 # 请求失败重试次数
|
||||
|
||||
# ==================== 指纹识别 ====================
|
||||
# 在 site_scan 后串行执行,识别 WebSite 的技术栈
|
||||
fingerprint_detect:
|
||||
# ==================== 指纹识别 ====================
|
||||
# 在 站点扫描 后串行执行,识别 WebSite 的技术栈
|
||||
tools:
|
||||
xingfinger:
|
||||
enabled: true
|
||||
fingerprint-libs: [ehole, goby, wappalyzer] # 启用的指纹库:ehole, goby, wappalyzer, fingers, fingerprinthub
|
||||
fingerprint-libs: [ehole, goby, wappalyzer, fingers, fingerprinthub, arl] # 默认启动全部指纹库
|
||||
|
||||
# ==================== 目录扫描 ====================
|
||||
directory_scan:
|
||||
# ==================== 目录扫描 ====================
|
||||
tools:
|
||||
ffuf:
|
||||
enabled: true
|
||||
@@ -103,8 +93,18 @@ directory_scan:
|
||||
match-codes: 200,201,301,302,401,403 # 匹配的 HTTP 状态码
|
||||
# rate: 0 # 每秒请求数(默认 0 不限制)
|
||||
|
||||
# ==================== URL 获取 ====================
|
||||
screenshot:
|
||||
# ==================== 网站截图 ====================
|
||||
# 使用 Playwright 对网站进行截图,保存为 WebP 格式
|
||||
# 在 Stage 2 与 url_fetch、directory_scan 并行执行
|
||||
tools:
|
||||
playwright:
|
||||
enabled: true
|
||||
concurrency: 5 # 并发截图数(默认 5)
|
||||
url_sources: [websites] # URL 来源,当前对website截图,还可以用 [websites, endpoints]
|
||||
|
||||
url_fetch:
|
||||
# ==================== URL 获取 ====================
|
||||
tools:
|
||||
waymore:
|
||||
enabled: true
|
||||
@@ -142,8 +142,8 @@ url_fetch:
|
||||
# request-timeout: 10 # 单个请求超时秒数(默认 10)
|
||||
# retries: 2 # 请求失败重试次数
|
||||
|
||||
# ==================== 漏洞扫描 ====================
|
||||
vuln_scan:
|
||||
# ==================== 漏洞扫描 ====================
|
||||
tools:
|
||||
dalfox_xss:
|
||||
enabled: true
|
||||
@@ -158,7 +158,9 @@ vuln_scan:
|
||||
|
||||
nuclei:
|
||||
enabled: true
|
||||
# timeout: auto # 自动计算(根据 endpoints 行数)
|
||||
# timeout: auto # 自动计算(根据输入 URL 行数)
|
||||
scan-endpoints: false # 是否扫描 endpoints(默认关闭)
|
||||
scan-websites: true # 是否扫描 websites(默认开启)
|
||||
template-repo-names: # 模板仓库列表,对应「Nuclei 模板」中的仓库名
|
||||
- nuclei-templates
|
||||
# - nuclei-custom # 可追加自定义仓库
|
||||
|
||||
@@ -10,30 +10,30 @@
|
||||
- 配置由 YAML 解析
|
||||
"""
|
||||
|
||||
# Django 环境初始化(导入即生效)
|
||||
from apps.common.prefect_django_setup import setup_django_for_prefect
|
||||
|
||||
from prefect import flow
|
||||
from prefect.task_runners import ThreadPoolTaskRunner
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple
|
||||
|
||||
from apps.scan.tasks.directory_scan import (
|
||||
export_sites_task,
|
||||
run_and_stream_save_directories_task
|
||||
)
|
||||
from prefect import flow
|
||||
|
||||
from apps.scan.handlers.scan_flow_handlers import (
|
||||
on_scan_flow_running,
|
||||
on_scan_flow_completed,
|
||||
on_scan_flow_failed,
|
||||
on_scan_flow_running,
|
||||
)
|
||||
from apps.scan.tasks.directory_scan import (
|
||||
export_sites_task,
|
||||
run_and_stream_save_directories_task,
|
||||
)
|
||||
from apps.scan.utils import (
|
||||
build_scan_command,
|
||||
ensure_wordlist_local,
|
||||
user_log,
|
||||
wait_for_system_load,
|
||||
)
|
||||
from apps.scan.utils import config_parser, build_scan_command, ensure_wordlist_local
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -45,496 +45,344 @@ def calculate_directory_scan_timeout(
|
||||
tool_config: dict,
|
||||
base_per_word: float = 1.0,
|
||||
min_timeout: int = 60,
|
||||
max_timeout: int = 7200
|
||||
) -> int:
|
||||
"""
|
||||
根据字典行数计算目录扫描超时时间
|
||||
|
||||
|
||||
计算公式:超时时间 = 字典行数 × 每个单词基础时间
|
||||
超时范围:60秒 ~ 2小时(7200秒)
|
||||
|
||||
超时范围:最小 60 秒,无上限
|
||||
|
||||
Args:
|
||||
tool_config: 工具配置字典,包含 wordlist 路径
|
||||
base_per_word: 每个单词的基础时间(秒),默认 1.0秒
|
||||
min_timeout: 最小超时时间(秒),默认 60秒
|
||||
max_timeout: 最大超时时间(秒),默认 7200秒(2小时)
|
||||
|
||||
|
||||
Returns:
|
||||
int: 计算出的超时时间(秒),范围:60 ~ 7200
|
||||
|
||||
Example:
|
||||
# 1000行字典 × 1.0秒 = 1000秒 → 限制为7200秒中的 1000秒
|
||||
# 10000行字典 × 1.0秒 = 10000秒 → 限制为7200秒(最大值)
|
||||
timeout = calculate_directory_scan_timeout(
|
||||
tool_config={'wordlist': '/path/to/wordlist.txt'}
|
||||
)
|
||||
int: 计算出的超时时间(秒)
|
||||
"""
|
||||
import os
|
||||
|
||||
wordlist_path = tool_config.get('wordlist')
|
||||
if not wordlist_path:
|
||||
logger.warning("工具配置中未指定 wordlist,使用默认超时: %d秒", min_timeout)
|
||||
return min_timeout
|
||||
|
||||
wordlist_path = os.path.expanduser(wordlist_path)
|
||||
|
||||
if not os.path.exists(wordlist_path):
|
||||
logger.warning("字典文件不存在: %s,使用默认超时: %d秒", wordlist_path, min_timeout)
|
||||
return min_timeout
|
||||
|
||||
try:
|
||||
# 从 tool_config 中获取 wordlist 路径
|
||||
wordlist_path = tool_config.get('wordlist')
|
||||
if not wordlist_path:
|
||||
logger.warning("工具配置中未指定 wordlist,使用默认超时: %d秒", min_timeout)
|
||||
return min_timeout
|
||||
|
||||
# 展开用户目录(~)
|
||||
wordlist_path = os.path.expanduser(wordlist_path)
|
||||
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(wordlist_path):
|
||||
logger.warning("字典文件不存在: %s,使用默认超时: %d秒", wordlist_path, min_timeout)
|
||||
return min_timeout
|
||||
|
||||
# 使用 wc -l 快速统计字典行数
|
||||
result = subprocess.run(
|
||||
['wc', '-l', wordlist_path],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
# wc -l 输出格式:行数 + 空格 + 文件名
|
||||
line_count = int(result.stdout.strip().split()[0])
|
||||
|
||||
# 计算超时时间
|
||||
timeout = int(line_count * base_per_word)
|
||||
|
||||
# 设置合理的下限(不再设置上限)
|
||||
timeout = max(min_timeout, timeout)
|
||||
|
||||
timeout = max(min_timeout, int(line_count * base_per_word))
|
||||
|
||||
logger.info(
|
||||
"目录扫描超时计算 - 字典: %s, 行数: %d, 基础时间: %.3f秒/词, 计算超时: %d秒",
|
||||
wordlist_path, line_count, base_per_word, timeout
|
||||
)
|
||||
|
||||
return timeout
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error("统计字典行数失败: %s", e)
|
||||
# 失败时返回默认超时
|
||||
return min_timeout
|
||||
except (ValueError, IndexError) as e:
|
||||
logger.error("解析字典行数失败: %s", e)
|
||||
return min_timeout
|
||||
except Exception as e:
|
||||
logger.error("计算超时时间异常: %s", e)
|
||||
|
||||
except (subprocess.CalledProcessError, ValueError, IndexError) as e:
|
||||
logger.error("计算超时时间失败: %s", e)
|
||||
return min_timeout
|
||||
|
||||
|
||||
def _get_max_workers(tool_config: dict, default: int = DEFAULT_MAX_WORKERS) -> int:
|
||||
"""
|
||||
从单个工具配置中获取 max_workers 参数
|
||||
|
||||
Args:
|
||||
tool_config: 单个工具的配置字典,如 {'max_workers': 10, 'threads': 5, ...}
|
||||
default: 默认值,默认为 5
|
||||
|
||||
Returns:
|
||||
int: max_workers 值
|
||||
"""
|
||||
"""从单个工具配置中获取 max_workers 参数"""
|
||||
if not isinstance(tool_config, dict):
|
||||
return default
|
||||
|
||||
# 支持 max_workers 和 max-workers(YAML 中划线会被转换)
|
||||
|
||||
max_workers = tool_config.get('max_workers') or tool_config.get('max-workers')
|
||||
if max_workers is not None and isinstance(max_workers, int) and max_workers > 0:
|
||||
if isinstance(max_workers, int) and max_workers > 0:
|
||||
return max_workers
|
||||
return default
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def _export_site_urls(target_id: int, target_name: str, directory_scan_dir: Path) -> tuple[str, int]:
|
||||
def _export_site_urls(
|
||||
target_id: int,
|
||||
directory_scan_dir: Path,
|
||||
provider,
|
||||
) -> Tuple[str, int]:
|
||||
"""
|
||||
导出目标下的所有站点 URL 到文件(支持懒加载)
|
||||
|
||||
导出目标下的所有站点 URL 到文件
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
target_name: 目标名称(用于懒加载创建默认站点)
|
||||
directory_scan_dir: 目录扫描目录
|
||||
|
||||
provider: TargetProvider 实例
|
||||
|
||||
Returns:
|
||||
tuple: (sites_file, site_count)
|
||||
|
||||
Raises:
|
||||
ValueError: 站点数量为 0
|
||||
"""
|
||||
logger.info("Step 1: 导出目标的所有站点 URL")
|
||||
|
||||
|
||||
sites_file = str(directory_scan_dir / 'sites.txt')
|
||||
export_result = export_sites_task(
|
||||
target_id=target_id,
|
||||
output_file=sites_file,
|
||||
batch_size=1000 # 每次读取 1000 条,优化内存占用
|
||||
provider=provider,
|
||||
)
|
||||
|
||||
|
||||
site_count = export_result['total_count']
|
||||
|
||||
logger.info(
|
||||
"✓ 站点 URL 导出完成 - 文件: %s, 数量: %d",
|
||||
export_result['output_file'],
|
||||
site_count
|
||||
)
|
||||
|
||||
|
||||
if site_count == 0:
|
||||
logger.warning("目标下没有站点,无法执行目录扫描")
|
||||
# 不抛出异常,由上层决定如何处理
|
||||
# raise ValueError("目标下没有站点,无法执行目录扫描")
|
||||
|
||||
|
||||
return export_result['output_file'], site_count
|
||||
|
||||
|
||||
def _run_scans_sequentially(
|
||||
enabled_tools: dict,
|
||||
sites_file: str,
|
||||
directory_scan_dir: Path,
|
||||
scan_id: int,
|
||||
target_id: int,
|
||||
site_count: int,
|
||||
target_name: str
|
||||
) -> tuple[int, int, list]:
|
||||
"""
|
||||
串行执行目录扫描任务(支持多工具)- 已废弃,保留用于兼容
|
||||
|
||||
Args:
|
||||
enabled_tools: 启用的工具配置字典
|
||||
sites_file: 站点文件路径
|
||||
directory_scan_dir: 目录扫描目录
|
||||
scan_id: 扫描任务 ID
|
||||
target_id: 目标 ID
|
||||
site_count: 站点数量
|
||||
target_name: 目标名称(用于错误日志)
|
||||
|
||||
Returns:
|
||||
tuple: (total_directories, processed_sites, failed_sites)
|
||||
"""
|
||||
# 读取站点列表
|
||||
sites = []
|
||||
with open(sites_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
site_url = line.strip()
|
||||
if site_url:
|
||||
sites.append(site_url)
|
||||
|
||||
logger.info("准备扫描 %d 个站点,使用工具: %s", len(sites), ', '.join(enabled_tools.keys()))
|
||||
|
||||
total_directories = 0
|
||||
processed_sites_set = set() # 使用 set 避免重复计数
|
||||
failed_sites = []
|
||||
|
||||
# 遍历每个工具
|
||||
for tool_name, tool_config in enabled_tools.items():
|
||||
logger.info("="*60)
|
||||
logger.info("使用工具: %s", tool_name)
|
||||
logger.info("="*60)
|
||||
|
||||
# 如果配置了 wordlist_name,则先确保本地存在对应的字典文件(含 hash 校验)
|
||||
wordlist_name = tool_config.get('wordlist_name')
|
||||
if wordlist_name:
|
||||
try:
|
||||
local_wordlist_path = ensure_wordlist_local(wordlist_name)
|
||||
tool_config['wordlist'] = local_wordlist_path
|
||||
except Exception as exc:
|
||||
logger.error("为工具 %s 准备字典失败: %s", tool_name, exc)
|
||||
# 当前工具无法执行,将所有站点视为失败,继续下一个工具
|
||||
failed_sites.extend(sites)
|
||||
continue
|
||||
|
||||
# 逐个站点执行扫描
|
||||
for idx, site_url in enumerate(sites, 1):
|
||||
logger.info(
|
||||
"[%d/%d] 开始扫描站点: %s (工具: %s)",
|
||||
idx, len(sites), site_url, tool_name
|
||||
)
|
||||
|
||||
# 使用统一的命令构建器
|
||||
try:
|
||||
command = build_scan_command(
|
||||
tool_name=tool_name,
|
||||
scan_type='directory_scan',
|
||||
command_params={
|
||||
'url': site_url
|
||||
},
|
||||
tool_config=tool_config
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"✗ [%d/%d] 构建 %s 命令失败: %s - 站点: %s",
|
||||
idx, len(sites), tool_name, e, site_url
|
||||
)
|
||||
failed_sites.append(site_url)
|
||||
continue
|
||||
|
||||
# 单个站点超时:从配置中获取(支持 'auto' 动态计算)
|
||||
# ffuf 逐个站点扫描,timeout 就是单个站点的超时时间
|
||||
site_timeout = tool_config.get('timeout', 300)
|
||||
if site_timeout == 'auto':
|
||||
# 动态计算超时时间(基于字典行数)
|
||||
site_timeout = calculate_directory_scan_timeout(tool_config)
|
||||
logger.info(f"✓ 工具 {tool_name} 动态计算 timeout: {site_timeout}秒")
|
||||
|
||||
# 生成日志文件路径
|
||||
from datetime import datetime
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
log_file = directory_scan_dir / f"{tool_name}_{timestamp}_{idx}.log"
|
||||
|
||||
try:
|
||||
# 直接调用 task(串行执行)
|
||||
result = run_and_stream_save_directories_task(
|
||||
cmd=command,
|
||||
tool_name=tool_name, # 新增:工具名称
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
site_url=site_url,
|
||||
cwd=str(directory_scan_dir),
|
||||
shell=True,
|
||||
batch_size=1000,
|
||||
timeout=site_timeout,
|
||||
log_file=str(log_file) # 新增:日志文件路径
|
||||
)
|
||||
|
||||
total_directories += result.get('created_directories', 0)
|
||||
processed_sites_set.add(site_url) # 使用 set 记录成功的站点
|
||||
|
||||
logger.info(
|
||||
"✓ [%d/%d] 站点扫描完成: %s - 发现 %d 个目录",
|
||||
idx, len(sites), site_url,
|
||||
result.get('created_directories', 0)
|
||||
)
|
||||
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
# 超时异常单独处理
|
||||
failed_sites.append(site_url)
|
||||
logger.warning(
|
||||
"⚠️ [%d/%d] 站点扫描超时: %s - 超时配置: %d秒\n"
|
||||
"注意:超时前已解析的目录数据已保存到数据库,但扫描未完全完成。",
|
||||
idx, len(sites), site_url, site_timeout
|
||||
)
|
||||
except Exception as exc:
|
||||
# 其他异常
|
||||
failed_sites.append(site_url)
|
||||
logger.error(
|
||||
"✗ [%d/%d] 站点扫描失败: %s - 错误: %s",
|
||||
idx, len(sites), site_url, exc
|
||||
)
|
||||
|
||||
# 每 10 个站点输出进度
|
||||
if idx % 10 == 0:
|
||||
logger.info(
|
||||
"进度: %d/%d (%.1f%%) - 已发现 %d 个目录",
|
||||
idx, len(sites), idx/len(sites)*100, total_directories
|
||||
)
|
||||
|
||||
# 计算成功和失败的站点数
|
||||
processed_count = len(processed_sites_set)
|
||||
|
||||
if failed_sites:
|
||||
logger.warning(
|
||||
"部分站点扫描失败: %d/%d",
|
||||
len(failed_sites), len(sites)
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"✓ 串行目录扫描执行完成 - 成功: %d/%d, 失败: %d, 总目录数: %d",
|
||||
processed_count, len(sites), len(failed_sites), total_directories
|
||||
)
|
||||
|
||||
return total_directories, processed_count, failed_sites
|
||||
|
||||
|
||||
def _generate_log_filename(tool_name: str, site_url: str, directory_scan_dir: Path) -> Path:
|
||||
"""
|
||||
生成唯一的日志文件名
|
||||
|
||||
使用 URL 的 hash 确保并发时不会冲突
|
||||
|
||||
Args:
|
||||
tool_name: 工具名称
|
||||
site_url: 站点 URL
|
||||
directory_scan_dir: 目录扫描目录
|
||||
|
||||
Returns:
|
||||
Path: 日志文件路径
|
||||
"""
|
||||
url_hash = hashlib.md5(site_url.encode()).hexdigest()[:8]
|
||||
def _generate_log_filename(
|
||||
tool_name: str,
|
||||
site_url: str,
|
||||
directory_scan_dir: Path
|
||||
) -> Path:
|
||||
"""生成唯一的日志文件名(使用 URL 的 hash 确保并发时不会冲突)"""
|
||||
url_hash = hashlib.md5(
|
||||
site_url.encode(),
|
||||
usedforsecurity=False
|
||||
).hexdigest()[:8]
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
|
||||
return directory_scan_dir / f"{tool_name}_{url_hash}_{timestamp}.log"
|
||||
|
||||
|
||||
def _prepare_tool_wordlist(tool_name: str, tool_config: dict) -> bool:
|
||||
"""准备工具的字典文件,返回是否成功"""
|
||||
wordlist_name = tool_config.get('wordlist_name')
|
||||
if not wordlist_name:
|
||||
return True
|
||||
|
||||
try:
|
||||
local_wordlist_path = ensure_wordlist_local(wordlist_name)
|
||||
tool_config['wordlist'] = local_wordlist_path
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.error("为工具 %s 准备字典失败: %s", tool_name, exc)
|
||||
return False
|
||||
|
||||
|
||||
def _build_scan_params(
|
||||
tool_name: str,
|
||||
tool_config: dict,
|
||||
sites: List[str],
|
||||
directory_scan_dir: Path,
|
||||
site_timeout: int
|
||||
) -> Tuple[List[dict], List[str]]:
|
||||
"""构建所有站点的扫描参数,返回 (scan_params_list, failed_sites)"""
|
||||
scan_params_list = []
|
||||
failed_sites = []
|
||||
|
||||
for idx, site_url in enumerate(sites, 1):
|
||||
try:
|
||||
command = build_scan_command(
|
||||
tool_name=tool_name,
|
||||
scan_type='directory_scan',
|
||||
command_params={'url': site_url},
|
||||
tool_config=tool_config
|
||||
)
|
||||
log_file = _generate_log_filename(tool_name, site_url, directory_scan_dir)
|
||||
scan_params_list.append({
|
||||
'idx': idx,
|
||||
'site_url': site_url,
|
||||
'command': command,
|
||||
'log_file': str(log_file),
|
||||
'timeout': site_timeout
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"✗ [%d/%d] 构建 %s 命令失败: %s - 站点: %s",
|
||||
idx, len(sites), tool_name, e, site_url
|
||||
)
|
||||
failed_sites.append(site_url)
|
||||
|
||||
return scan_params_list, failed_sites
|
||||
|
||||
|
||||
def _execute_batch(
|
||||
batch_params: List[dict],
|
||||
tool_name: str,
|
||||
scan_id: int,
|
||||
target_id: int,
|
||||
directory_scan_dir: Path,
|
||||
total_sites: int
|
||||
) -> Tuple[int, List[str]]:
|
||||
"""执行一批扫描任务,返回 (directories_found, failed_sites)"""
|
||||
directories_found = 0
|
||||
failed_sites = []
|
||||
|
||||
# 提交任务
|
||||
futures = []
|
||||
for params in batch_params:
|
||||
future = run_and_stream_save_directories_task.submit(
|
||||
cmd=params['command'],
|
||||
tool_name=tool_name,
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
site_url=params['site_url'],
|
||||
cwd=str(directory_scan_dir),
|
||||
shell=True,
|
||||
batch_size=1000,
|
||||
timeout=params['timeout'],
|
||||
log_file=params['log_file']
|
||||
)
|
||||
futures.append((params['idx'], params['site_url'], future))
|
||||
|
||||
# 等待结果
|
||||
for idx, site_url, future in futures:
|
||||
try:
|
||||
result = future.result()
|
||||
dirs_count = result.get('created_directories', 0)
|
||||
directories_found += dirs_count
|
||||
logger.info(
|
||||
"✓ [%d/%d] 站点扫描完成: %s - 发现 %d 个目录",
|
||||
idx, total_sites, site_url, dirs_count
|
||||
)
|
||||
except Exception as exc:
|
||||
failed_sites.append(site_url)
|
||||
if 'timeout' in str(exc).lower():
|
||||
logger.warning(
|
||||
"⚠️ [%d/%d] 站点扫描超时: %s - 错误: %s",
|
||||
idx, total_sites, site_url, exc
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"✗ [%d/%d] 站点扫描失败: %s - 错误: %s",
|
||||
idx, total_sites, site_url, exc
|
||||
)
|
||||
|
||||
return directories_found, failed_sites
|
||||
|
||||
|
||||
def _run_scans_concurrently(
|
||||
enabled_tools: dict,
|
||||
sites_file: str,
|
||||
directory_scan_dir: Path,
|
||||
scan_id: int,
|
||||
target_id: int,
|
||||
site_count: int,
|
||||
target_name: str
|
||||
) -> Tuple[int, int, List[str]]:
|
||||
"""
|
||||
并发执行目录扫描任务(使用 ThreadPoolTaskRunner)
|
||||
|
||||
Args:
|
||||
enabled_tools: 启用的工具配置字典
|
||||
sites_file: 站点文件路径
|
||||
directory_scan_dir: 目录扫描目录
|
||||
scan_id: 扫描任务 ID
|
||||
target_id: 目标 ID
|
||||
site_count: 站点数量
|
||||
target_name: 目标名称(用于错误日志)
|
||||
|
||||
并发执行目录扫描任务
|
||||
|
||||
Returns:
|
||||
tuple: (total_directories, processed_sites, failed_sites)
|
||||
"""
|
||||
# 读取站点列表
|
||||
sites: List[str] = []
|
||||
with open(sites_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
site_url = line.strip()
|
||||
if site_url:
|
||||
sites.append(site_url)
|
||||
|
||||
sites = [line.strip() for line in f if line.strip()]
|
||||
|
||||
if not sites:
|
||||
logger.warning("站点列表为空")
|
||||
return 0, 0, []
|
||||
|
||||
|
||||
logger.info(
|
||||
"准备并发扫描 %d 个站点,使用工具: %s",
|
||||
len(sites), ', '.join(enabled_tools.keys())
|
||||
)
|
||||
|
||||
|
||||
total_directories = 0
|
||||
processed_sites_count = 0
|
||||
failed_sites: List[str] = []
|
||||
|
||||
# 遍历每个工具
|
||||
for tool_name, tool_config in enabled_tools.items():
|
||||
# 每个工具独立获取 max_workers 配置
|
||||
max_workers = _get_max_workers(tool_config)
|
||||
|
||||
logger.info("="*60)
|
||||
logger.info("使用工具: %s (并发模式, max_workers=%d)", tool_name, max_workers)
|
||||
logger.info("="*60)
|
||||
|
||||
# 如果配置了 wordlist_name,则先确保本地存在对应的字典文件(含 hash 校验)
|
||||
wordlist_name = tool_config.get('wordlist_name')
|
||||
if wordlist_name:
|
||||
try:
|
||||
local_wordlist_path = ensure_wordlist_local(wordlist_name)
|
||||
tool_config['wordlist'] = local_wordlist_path
|
||||
except Exception as exc:
|
||||
logger.error("为工具 %s 准备字典失败: %s", tool_name, exc)
|
||||
# 当前工具无法执行,将所有站点视为失败,继续下一个工具
|
||||
failed_sites.extend(sites)
|
||||
continue
|
||||
|
||||
# 计算超时时间(所有站点共用)
|
||||
for tool_name, tool_config in enabled_tools.items():
|
||||
max_workers = _get_max_workers(tool_config)
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("使用工具: %s (并发模式, max_workers=%d)", tool_name, max_workers)
|
||||
logger.info("=" * 60)
|
||||
user_log(scan_id, "directory_scan", f"Running {tool_name}")
|
||||
|
||||
# 准备字典文件
|
||||
if not _prepare_tool_wordlist(tool_name, tool_config):
|
||||
failed_sites.extend(sites)
|
||||
continue
|
||||
|
||||
# 计算超时时间
|
||||
site_timeout = tool_config.get('timeout', 300)
|
||||
if site_timeout == 'auto':
|
||||
site_timeout = calculate_directory_scan_timeout(tool_config)
|
||||
logger.info(f"✓ 工具 {tool_name} 动态计算 timeout: {site_timeout}秒")
|
||||
|
||||
# 准备所有站点的扫描参数
|
||||
scan_params_list = []
|
||||
for idx, site_url in enumerate(sites, 1):
|
||||
try:
|
||||
command = build_scan_command(
|
||||
tool_name=tool_name,
|
||||
scan_type='directory_scan',
|
||||
command_params={'url': site_url},
|
||||
tool_config=tool_config
|
||||
)
|
||||
log_file = _generate_log_filename(tool_name, site_url, directory_scan_dir)
|
||||
scan_params_list.append({
|
||||
'idx': idx,
|
||||
'site_url': site_url,
|
||||
'command': command,
|
||||
'log_file': str(log_file),
|
||||
'timeout': site_timeout
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"✗ [%d/%d] 构建 %s 命令失败: %s - 站点: %s",
|
||||
idx, len(sites), tool_name, e, site_url
|
||||
)
|
||||
failed_sites.append(site_url)
|
||||
|
||||
logger.info("✓ 工具 %s 动态计算 timeout: %d秒", tool_name, site_timeout)
|
||||
|
||||
# 构建扫描参数
|
||||
scan_params_list, build_failed = _build_scan_params(
|
||||
tool_name, tool_config, sites, directory_scan_dir, site_timeout
|
||||
)
|
||||
failed_sites.extend(build_failed)
|
||||
|
||||
if not scan_params_list:
|
||||
logger.warning("没有有效的扫描任务")
|
||||
continue
|
||||
|
||||
# ============================================================
|
||||
# 分批执行策略:控制实际并发的 ffuf 进程数
|
||||
# ============================================================
|
||||
|
||||
# 分批执行
|
||||
total_tasks = len(scan_params_list)
|
||||
logger.info("开始分批执行 %d 个扫描任务(每批 %d 个)...", total_tasks, max_workers)
|
||||
|
||||
batch_num = 0
|
||||
|
||||
last_progress_percent = 0
|
||||
tool_directories = 0
|
||||
tool_processed = 0
|
||||
|
||||
for batch_start in range(0, total_tasks, max_workers):
|
||||
batch_end = min(batch_start + max_workers, total_tasks)
|
||||
batch_params = scan_params_list[batch_start:batch_end]
|
||||
batch_num += 1
|
||||
|
||||
logger.info("执行第 %d 批任务(%d-%d/%d)...", batch_num, batch_start + 1, batch_end, total_tasks)
|
||||
|
||||
# 提交当前批次的任务(非阻塞,立即返回 future)
|
||||
futures = []
|
||||
for params in batch_params:
|
||||
future = run_and_stream_save_directories_task.submit(
|
||||
cmd=params['command'],
|
||||
tool_name=tool_name,
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
site_url=params['site_url'],
|
||||
cwd=str(directory_scan_dir),
|
||||
shell=True,
|
||||
batch_size=1000,
|
||||
timeout=params['timeout'],
|
||||
log_file=params['log_file']
|
||||
batch_num = batch_start // max_workers + 1
|
||||
|
||||
logger.info(
|
||||
"执行第 %d 批任务(%d-%d/%d)...",
|
||||
batch_num, batch_start + 1, batch_end, total_tasks
|
||||
)
|
||||
|
||||
dirs_found, batch_failed = _execute_batch(
|
||||
batch_params, tool_name, scan_id, target_id,
|
||||
directory_scan_dir, len(sites)
|
||||
)
|
||||
|
||||
total_directories += dirs_found
|
||||
tool_directories += dirs_found
|
||||
tool_processed += len(batch_params) - len(batch_failed)
|
||||
processed_sites_count += len(batch_params) - len(batch_failed)
|
||||
failed_sites.extend(batch_failed)
|
||||
|
||||
# 进度里程碑:每 20% 输出一次
|
||||
current_progress = int((batch_end / total_tasks) * 100)
|
||||
if current_progress >= last_progress_percent + 20:
|
||||
user_log(
|
||||
scan_id, "directory_scan",
|
||||
f"Progress: {batch_end}/{total_tasks} sites scanned"
|
||||
)
|
||||
futures.append((params['idx'], params['site_url'], future))
|
||||
|
||||
# 等待当前批次所有任务完成(阻塞,确保本批完成后再启动下一批)
|
||||
for idx, site_url, future in futures:
|
||||
try:
|
||||
result = future.result() # 阻塞等待单个任务完成
|
||||
directories_found = result.get('created_directories', 0)
|
||||
total_directories += directories_found
|
||||
processed_sites_count += 1
|
||||
|
||||
logger.info(
|
||||
"✓ [%d/%d] 站点扫描完成: %s - 发现 %d 个目录",
|
||||
idx, len(sites), site_url, directories_found
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
failed_sites.append(site_url)
|
||||
if 'timeout' in str(exc).lower() or isinstance(exc, subprocess.TimeoutExpired):
|
||||
logger.warning(
|
||||
"⚠️ [%d/%d] 站点扫描超时: %s - 错误: %s",
|
||||
idx, len(sites), site_url, exc
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"✗ [%d/%d] 站点扫描失败: %s - 错误: %s",
|
||||
idx, len(sites), site_url, exc
|
||||
)
|
||||
|
||||
# 输出汇总信息
|
||||
if failed_sites:
|
||||
logger.warning(
|
||||
"部分站点扫描失败: %d/%d",
|
||||
len(failed_sites), len(sites)
|
||||
last_progress_percent = (current_progress // 20) * 20
|
||||
|
||||
logger.info(
|
||||
"✓ 工具 %s 执行完成 - 已处理站点: %d/%d, 发现目录: %d",
|
||||
tool_name, tool_processed, total_tasks, tool_directories
|
||||
)
|
||||
|
||||
user_log(
|
||||
scan_id, "directory_scan",
|
||||
f"{tool_name} completed: found {tool_directories} directories"
|
||||
)
|
||||
|
||||
if failed_sites:
|
||||
logger.warning("部分站点扫描失败: %d/%d", len(failed_sites), len(sites))
|
||||
|
||||
logger.info(
|
||||
"✓ 并发目录扫描执行完成 - 成功: %d/%d, 失败: %d, 总目录数: %d",
|
||||
processed_sites_count, len(sites), len(failed_sites), total_directories
|
||||
)
|
||||
|
||||
|
||||
return total_directories, processed_sites_count, failed_sites
|
||||
|
||||
|
||||
@flow(
|
||||
name="directory_scan",
|
||||
name="directory_scan",
|
||||
log_prints=True,
|
||||
on_running=[on_scan_flow_running],
|
||||
on_completion=[on_scan_flow_completed],
|
||||
@@ -542,90 +390,65 @@ def _run_scans_concurrently(
|
||||
)
|
||||
def directory_scan_flow(
|
||||
scan_id: int,
|
||||
target_name: str,
|
||||
target_id: int,
|
||||
scan_workspace_dir: str,
|
||||
enabled_tools: dict
|
||||
enabled_tools: dict,
|
||||
provider,
|
||||
) -> dict:
|
||||
"""
|
||||
目录扫描 Flow
|
||||
|
||||
|
||||
主要功能:
|
||||
1. 从 target 获取所有站点的 URL
|
||||
2. 对每个站点 URL 执行目录扫描(支持 ffuf 等工具)
|
||||
3. 流式保存扫描结果到数据库 Directory 表
|
||||
|
||||
工作流程:
|
||||
Step 0: 创建工作目录
|
||||
Step 1: 导出站点 URL 列表到文件(供扫描工具使用)
|
||||
Step 2: 验证工具配置
|
||||
Step 3: 并发执行扫描工具并实时保存结果(使用 ThreadPoolTaskRunner)
|
||||
|
||||
ffuf 输出字段:
|
||||
- url: 发现的目录/文件 URL
|
||||
- length: 响应内容长度
|
||||
- status: HTTP 状态码
|
||||
- words: 响应内容单词数
|
||||
- lines: 响应内容行数
|
||||
- content_type: 内容类型
|
||||
- duration: 请求耗时
|
||||
|
||||
|
||||
Args:
|
||||
scan_id: 扫描任务 ID
|
||||
target_name: 目标名称
|
||||
target_id: 目标 ID
|
||||
scan_workspace_dir: 扫描工作空间目录
|
||||
enabled_tools: 启用的工具配置字典
|
||||
|
||||
provider: TargetProvider 实例
|
||||
|
||||
Returns:
|
||||
dict: {
|
||||
'success': bool,
|
||||
'scan_id': int,
|
||||
'target': str,
|
||||
'scan_workspace_dir': str,
|
||||
'sites_file': str,
|
||||
'site_count': int,
|
||||
'total_directories': int, # 发现的总目录数
|
||||
'processed_sites': int, # 成功处理的站点数
|
||||
'failed_sites_count': int, # 失败的站点数
|
||||
'executed_tasks': list
|
||||
}
|
||||
|
||||
Raises:
|
||||
ValueError: 参数错误
|
||||
RuntimeError: 执行失败
|
||||
dict: 扫描结果
|
||||
"""
|
||||
try:
|
||||
wait_for_system_load(context="directory_scan_flow")
|
||||
|
||||
# 从 provider 获取 target_name
|
||||
target_name = provider.get_target_name()
|
||||
if not target_name:
|
||||
raise ValueError("无法获取 Target 名称")
|
||||
|
||||
logger.info(
|
||||
"="*60 + "\n" +
|
||||
"开始目录扫描\n" +
|
||||
f" Scan ID: {scan_id}\n" +
|
||||
f" Target: {target_name}\n" +
|
||||
f" Workspace: {scan_workspace_dir}\n" +
|
||||
"="*60
|
||||
"开始目录扫描 - Scan ID: %s, Target: %s, Workspace: %s",
|
||||
scan_id, target_name, scan_workspace_dir
|
||||
)
|
||||
|
||||
user_log(scan_id, "directory_scan", "Starting directory scan")
|
||||
|
||||
# 参数验证
|
||||
if scan_id is None:
|
||||
raise ValueError("scan_id 不能为空")
|
||||
if not target_name:
|
||||
raise ValueError("target_name 不能为空")
|
||||
if target_id is None:
|
||||
raise ValueError("target_id 不能为空")
|
||||
if not scan_workspace_dir:
|
||||
raise ValueError("scan_workspace_dir 不能为空")
|
||||
if not enabled_tools:
|
||||
raise ValueError("enabled_tools 不能为空")
|
||||
|
||||
|
||||
# Step 0: 创建工作目录
|
||||
from apps.scan.utils import setup_scan_directory
|
||||
directory_scan_dir = setup_scan_directory(scan_workspace_dir, 'directory_scan')
|
||||
|
||||
# Step 1: 导出站点 URL(支持懒加载)
|
||||
sites_file, site_count = _export_site_urls(target_id, target_name, directory_scan_dir)
|
||||
|
||||
|
||||
# Step 1: 导出站点 URL
|
||||
sites_file, site_count = _export_site_urls(
|
||||
target_id, directory_scan_dir, provider
|
||||
)
|
||||
|
||||
if site_count == 0:
|
||||
logger.warning("目标下没有站点,跳过目录扫描")
|
||||
logger.warning("跳过目录扫描:没有站点可扫描 - Scan ID: %s", scan_id)
|
||||
user_log(scan_id, "directory_scan", "Skipped: no sites to scan", "warning")
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
@@ -638,16 +461,16 @@ def directory_scan_flow(
|
||||
'failed_sites_count': 0,
|
||||
'executed_tasks': ['export_sites']
|
||||
}
|
||||
|
||||
|
||||
# Step 2: 工具配置信息
|
||||
logger.info("Step 2: 工具配置信息")
|
||||
tool_info = []
|
||||
for tool_name, tool_config in enabled_tools.items():
|
||||
mw = _get_max_workers(tool_config)
|
||||
tool_info.append(f"{tool_name}(max_workers={mw})")
|
||||
tool_info = [
|
||||
f"{name}(max_workers={_get_max_workers(cfg)})"
|
||||
for name, cfg in enabled_tools.items()
|
||||
]
|
||||
logger.info("✓ 启用工具: %s", ', '.join(tool_info))
|
||||
|
||||
# Step 3: 并发执行扫描工具并实时保存结果
|
||||
|
||||
# Step 3: 并发执行扫描
|
||||
logger.info("Step 3: 并发执行扫描工具并实时保存结果")
|
||||
total_directories, processed_sites, failed_sites = _run_scans_concurrently(
|
||||
enabled_tools=enabled_tools,
|
||||
@@ -655,17 +478,20 @@ def directory_scan_flow(
|
||||
directory_scan_dir=directory_scan_dir,
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
site_count=site_count,
|
||||
target_name=target_name
|
||||
)
|
||||
|
||||
# 检查是否所有站点都失败
|
||||
|
||||
if processed_sites == 0 and site_count > 0:
|
||||
logger.warning("所有站点扫描均失败 - 总站点数: %d, 失败数: %d", site_count, len(failed_sites))
|
||||
# 不抛出异常,让扫描继续
|
||||
|
||||
logger.info("="*60 + "\n✓ 目录扫描完成\n" + "="*60)
|
||||
|
||||
logger.warning(
|
||||
"所有站点扫描均失败 - 总站点数: %d, 失败数: %d",
|
||||
site_count, len(failed_sites)
|
||||
)
|
||||
|
||||
logger.info("✓ 目录扫描完成 - 发现目录: %d", total_directories)
|
||||
user_log(
|
||||
scan_id, "directory_scan",
|
||||
f"directory_scan completed: found {total_directories} directories"
|
||||
)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
@@ -678,7 +504,7 @@ def directory_scan_flow(
|
||||
'failed_sites_count': len(failed_sites),
|
||||
'executed_tasks': ['export_sites', 'run_and_stream_save_directories']
|
||||
}
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("目录扫描失败: %s", e)
|
||||
raise
|
||||
raise
|
||||
|
||||
@@ -10,211 +10,189 @@
|
||||
- 流式处理输出,批量更新数据库
|
||||
"""
|
||||
|
||||
# Django 环境初始化(导入即生效)
|
||||
from apps.common.prefect_django_setup import setup_django_for_prefect
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from prefect import flow
|
||||
|
||||
from apps.scan.handlers.scan_flow_handlers import (
|
||||
on_scan_flow_running,
|
||||
on_scan_flow_completed,
|
||||
on_scan_flow_failed,
|
||||
on_scan_flow_running,
|
||||
)
|
||||
from apps.scan.tasks.fingerprint_detect import (
|
||||
export_urls_for_fingerprint_task,
|
||||
export_site_urls_for_fingerprint_task,
|
||||
run_xingfinger_and_stream_update_tech_task,
|
||||
)
|
||||
from apps.scan.utils import build_scan_command
|
||||
from apps.scan.utils import build_scan_command, setup_scan_directory, user_log, wait_for_system_load
|
||||
from apps.scan.utils.fingerprint_helpers import get_fingerprint_paths
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FingerprintContext:
|
||||
"""指纹识别上下文,用于在各函数间传递状态"""
|
||||
scan_id: int
|
||||
target_id: int
|
||||
target_name: str
|
||||
scan_workspace_dir: str
|
||||
fingerprint_dir: Optional[Path] = None
|
||||
urls_file: str = ""
|
||||
url_count: int = 0
|
||||
source: str = "website"
|
||||
|
||||
|
||||
def calculate_fingerprint_detect_timeout(
|
||||
url_count: int,
|
||||
base_per_url: float = 3.0,
|
||||
min_timeout: int = 60
|
||||
base_per_url: float = 10.0,
|
||||
min_timeout: int = 300
|
||||
) -> int:
|
||||
"""
|
||||
根据 URL 数量计算超时时间
|
||||
|
||||
公式:超时时间 = URL 数量 × 每 URL 基础时间
|
||||
最小值:60秒
|
||||
无上限
|
||||
|
||||
Args:
|
||||
url_count: URL 数量
|
||||
base_per_url: 每 URL 基础时间(秒),默认 3秒
|
||||
min_timeout: 最小超时时间(秒),默认 60秒
|
||||
|
||||
Returns:
|
||||
int: 计算出的超时时间(秒)
|
||||
|
||||
示例:
|
||||
100 URL × 3秒 = 300秒
|
||||
1000 URL × 3秒 = 3000秒(50分钟)
|
||||
10000 URL × 3秒 = 30000秒(8.3小时)
|
||||
"""
|
||||
timeout = int(url_count * base_per_url)
|
||||
return max(min_timeout, timeout)
|
||||
"""根据 URL 数量计算超时时间(最小 300 秒)"""
|
||||
return max(min_timeout, int(url_count * base_per_url))
|
||||
|
||||
|
||||
|
||||
def _export_urls(fingerprint_dir: Path, provider) -> tuple[str, int]:
|
||||
"""导出 URL 到文件,返回 (urls_file, total_count)"""
|
||||
logger.info("Step 1: 导出 URL 列表")
|
||||
|
||||
|
||||
def _export_urls(
|
||||
target_id: int,
|
||||
fingerprint_dir: Path,
|
||||
source: str = 'website'
|
||||
) -> tuple[str, int]:
|
||||
"""
|
||||
导出 URL 到文件
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
fingerprint_dir: 指纹识别目录
|
||||
source: 数据源类型
|
||||
|
||||
Returns:
|
||||
tuple: (urls_file, total_count)
|
||||
"""
|
||||
logger.info("Step 1: 导出 URL 列表 (source=%s)", source)
|
||||
|
||||
urls_file = str(fingerprint_dir / 'urls.txt')
|
||||
export_result = export_urls_for_fingerprint_task(
|
||||
target_id=target_id,
|
||||
export_result = export_site_urls_for_fingerprint_task(
|
||||
output_file=urls_file,
|
||||
source=source,
|
||||
batch_size=1000
|
||||
provider=provider,
|
||||
)
|
||||
|
||||
|
||||
total_count = export_result['total_count']
|
||||
|
||||
logger.info(
|
||||
"✓ URL 导出完成 - 文件: %s, 数量: %d",
|
||||
export_result['output_file'],
|
||||
total_count
|
||||
)
|
||||
|
||||
logger.info("✓ URL 导出完成 - 文件: %s, 数量: %d", export_result['output_file'], total_count)
|
||||
|
||||
return export_result['output_file'], total_count
|
||||
|
||||
|
||||
def _run_fingerprint_detect(
|
||||
enabled_tools: dict,
|
||||
urls_file: str,
|
||||
url_count: int,
|
||||
fingerprint_dir: Path,
|
||||
scan_id: int,
|
||||
target_id: int,
|
||||
source: str
|
||||
) -> tuple[dict, list]:
|
||||
"""
|
||||
执行指纹识别任务
|
||||
|
||||
Args:
|
||||
enabled_tools: 已启用的工具配置字典
|
||||
urls_file: URL 文件路径
|
||||
url_count: URL 总数
|
||||
fingerprint_dir: 指纹识别目录
|
||||
scan_id: 扫描任务 ID
|
||||
target_id: 目标 ID
|
||||
source: 数据源类型
|
||||
|
||||
Returns:
|
||||
tuple: (tool_stats, failed_tools)
|
||||
"""
|
||||
def _run_single_tool(
|
||||
tool_name: str,
|
||||
tool_config: dict,
|
||||
ctx: FingerprintContext
|
||||
) -> tuple[Optional[dict], Optional[dict]]:
|
||||
"""执行单个指纹识别工具,返回 (stats, failed_info)"""
|
||||
# 获取指纹库路径
|
||||
lib_names = tool_config.get('fingerprint_libs', ['ehole'])
|
||||
fingerprint_paths = get_fingerprint_paths(lib_names)
|
||||
|
||||
if not fingerprint_paths:
|
||||
reason = f"没有可用的指纹库: {lib_names}"
|
||||
logger.warning(reason)
|
||||
return None, {'tool': tool_name, 'reason': reason}
|
||||
|
||||
# 构建命令
|
||||
tool_config_with_paths = {**tool_config, **fingerprint_paths}
|
||||
try:
|
||||
command = build_scan_command(
|
||||
tool_name=tool_name,
|
||||
scan_type='fingerprint_detect',
|
||||
command_params={'urls_file': ctx.urls_file},
|
||||
tool_config=tool_config_with_paths
|
||||
)
|
||||
except Exception as e:
|
||||
reason = f"命令构建失败: {e}"
|
||||
logger.error("构建 %s 命令失败: %s", tool_name, e)
|
||||
return None, {'tool': tool_name, 'reason': reason}
|
||||
|
||||
# 计算超时时间和日志文件
|
||||
timeout = calculate_fingerprint_detect_timeout(ctx.url_count)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
log_file = ctx.fingerprint_dir / f"{tool_name}_{timestamp}.log"
|
||||
|
||||
logger.info(
|
||||
"开始执行 %s 指纹识别 - URL数: %d, 超时: %ds, 指纹库: %s",
|
||||
tool_name, ctx.url_count, timeout, list(fingerprint_paths.keys())
|
||||
)
|
||||
user_log(ctx.scan_id, "fingerprint_detect", f"Running {tool_name}: {command}")
|
||||
|
||||
# 执行扫描任务
|
||||
try:
|
||||
result = run_xingfinger_and_stream_update_tech_task(
|
||||
cmd=command,
|
||||
tool_name=tool_name,
|
||||
scan_id=ctx.scan_id,
|
||||
target_id=ctx.target_id,
|
||||
source=ctx.source,
|
||||
cwd=str(ctx.fingerprint_dir),
|
||||
timeout=timeout,
|
||||
log_file=str(log_file),
|
||||
batch_size=100
|
||||
)
|
||||
|
||||
stats = {
|
||||
'command': command,
|
||||
'result': result,
|
||||
'timeout': timeout,
|
||||
'fingerprint_libs': list(fingerprint_paths.keys())
|
||||
}
|
||||
|
||||
tool_updated = result.get('updated_count', 0)
|
||||
logger.info(
|
||||
"✓ 工具 %s 执行完成 - 处理记录: %d, 更新: %d, 未找到: %d",
|
||||
tool_name,
|
||||
result.get('processed_records', 0),
|
||||
tool_updated,
|
||||
result.get('not_found_count', 0)
|
||||
)
|
||||
user_log(
|
||||
ctx.scan_id, "fingerprint_detect",
|
||||
f"{tool_name} completed: identified {tool_updated} fingerprints"
|
||||
)
|
||||
return stats, None
|
||||
|
||||
except Exception as exc:
|
||||
reason = str(exc)
|
||||
logger.error("工具 %s 执行失败: %s", tool_name, exc, exc_info=True)
|
||||
user_log(ctx.scan_id, "fingerprint_detect", f"{tool_name} failed: {reason}", "error")
|
||||
return None, {'tool': tool_name, 'reason': reason}
|
||||
|
||||
|
||||
def _run_fingerprint_detect(enabled_tools: dict, ctx: FingerprintContext) -> tuple[dict, list]:
|
||||
"""执行指纹识别任务,返回 (tool_stats, failed_tools)"""
|
||||
tool_stats = {}
|
||||
failed_tools = []
|
||||
|
||||
|
||||
for tool_name, tool_config in enabled_tools.items():
|
||||
# 1. 获取指纹库路径
|
||||
lib_names = tool_config.get('fingerprint_libs', ['ehole'])
|
||||
fingerprint_paths = get_fingerprint_paths(lib_names)
|
||||
|
||||
if not fingerprint_paths:
|
||||
reason = f"没有可用的指纹库: {lib_names}"
|
||||
logger.warning(reason)
|
||||
failed_tools.append({'tool': tool_name, 'reason': reason})
|
||||
continue
|
||||
|
||||
# 2. 将指纹库路径合并到 tool_config(用于命令构建)
|
||||
tool_config_with_paths = {**tool_config, **fingerprint_paths}
|
||||
|
||||
# 3. 构建命令
|
||||
try:
|
||||
command = build_scan_command(
|
||||
tool_name=tool_name,
|
||||
scan_type='fingerprint_detect',
|
||||
command_params={
|
||||
'urls_file': urls_file
|
||||
},
|
||||
tool_config=tool_config_with_paths
|
||||
)
|
||||
except Exception as e:
|
||||
reason = f"命令构建失败: {str(e)}"
|
||||
logger.error("构建 %s 命令失败: %s", tool_name, e)
|
||||
failed_tools.append({'tool': tool_name, 'reason': reason})
|
||||
continue
|
||||
|
||||
# 4. 计算超时时间
|
||||
timeout = calculate_fingerprint_detect_timeout(url_count)
|
||||
|
||||
# 5. 生成日志文件路径
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
log_file = fingerprint_dir / f"{tool_name}_{timestamp}.log"
|
||||
|
||||
logger.info(
|
||||
"开始执行 %s 指纹识别 - URL数: %d, 超时: %ds, 指纹库: %s",
|
||||
tool_name, url_count, timeout, list(fingerprint_paths.keys())
|
||||
)
|
||||
|
||||
# 6. 执行扫描任务
|
||||
try:
|
||||
result = run_xingfinger_and_stream_update_tech_task(
|
||||
cmd=command,
|
||||
tool_name=tool_name,
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
source=source,
|
||||
cwd=str(fingerprint_dir),
|
||||
timeout=timeout,
|
||||
log_file=str(log_file),
|
||||
batch_size=100
|
||||
)
|
||||
|
||||
tool_stats[tool_name] = {
|
||||
'command': command,
|
||||
'result': result,
|
||||
'timeout': timeout,
|
||||
'fingerprint_libs': list(fingerprint_paths.keys())
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"✓ 工具 %s 执行完成 - 处理记录: %d, 更新: %d, 未找到: %d",
|
||||
tool_name,
|
||||
result.get('processed_records', 0),
|
||||
result.get('updated_count', 0),
|
||||
result.get('not_found_count', 0)
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
failed_tools.append({'tool': tool_name, 'reason': str(exc)})
|
||||
logger.error("工具 %s 执行失败: %s", tool_name, exc, exc_info=True)
|
||||
|
||||
stats, failed_info = _run_single_tool(tool_name, tool_config, ctx)
|
||||
if stats:
|
||||
tool_stats[tool_name] = stats
|
||||
if failed_info:
|
||||
failed_tools.append(failed_info)
|
||||
|
||||
if failed_tools:
|
||||
logger.warning(
|
||||
"以下指纹识别工具执行失败: %s",
|
||||
', '.join([f['tool'] for f in failed_tools])
|
||||
)
|
||||
|
||||
|
||||
return tool_stats, failed_tools
|
||||
|
||||
|
||||
def _aggregate_results(tool_stats: dict) -> dict:
|
||||
"""汇总所有工具的结果"""
|
||||
return {
|
||||
'processed_records': sum(
|
||||
s['result'].get('processed_records', 0) for s in tool_stats.values()
|
||||
),
|
||||
'updated_count': sum(
|
||||
s['result'].get('updated_count', 0) for s in tool_stats.values()
|
||||
),
|
||||
'created_count': sum(
|
||||
s['result'].get('created_count', 0) for s in tool_stats.values()
|
||||
),
|
||||
'snapshot_count': sum(
|
||||
s['result'].get('snapshot_count', 0) for s in tool_stats.values()
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@flow(
|
||||
name="fingerprint_detect",
|
||||
log_prints=True,
|
||||
@@ -224,140 +202,88 @@ def _run_fingerprint_detect(
|
||||
)
|
||||
def fingerprint_detect_flow(
|
||||
scan_id: int,
|
||||
target_name: str,
|
||||
target_id: int,
|
||||
scan_workspace_dir: str,
|
||||
enabled_tools: dict
|
||||
enabled_tools: dict,
|
||||
provider,
|
||||
) -> dict:
|
||||
"""
|
||||
指纹识别 Flow
|
||||
|
||||
|
||||
主要功能:
|
||||
1. 从数据库导出目标下所有 WebSite URL 到文件
|
||||
2. 使用 xingfinger 进行技术栈识别
|
||||
3. 解析结果并更新 WebSite.tech 字段(合并去重)
|
||||
|
||||
工作流程:
|
||||
Step 0: 创建工作目录
|
||||
Step 1: 导出 URL 列表
|
||||
Step 2: 解析配置,获取启用的工具
|
||||
Step 3: 执行 xingfinger 并解析结果
|
||||
|
||||
Args:
|
||||
scan_id: 扫描任务 ID
|
||||
target_name: 目标名称
|
||||
target_id: 目标 ID
|
||||
scan_workspace_dir: 扫描工作空间目录
|
||||
enabled_tools: 启用的工具配置(xingfinger)
|
||||
|
||||
Returns:
|
||||
dict: {
|
||||
'success': bool,
|
||||
'scan_id': int,
|
||||
'target': str,
|
||||
'scan_workspace_dir': str,
|
||||
'urls_file': str,
|
||||
'url_count': int,
|
||||
'processed_records': int,
|
||||
'updated_count': int,
|
||||
'not_found_count': int,
|
||||
'executed_tasks': list,
|
||||
'tool_stats': dict
|
||||
}
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
"="*60 + "\n" +
|
||||
"开始指纹识别\n" +
|
||||
f" Scan ID: {scan_id}\n" +
|
||||
f" Target: {target_name}\n" +
|
||||
f" Workspace: {scan_workspace_dir}\n" +
|
||||
"="*60
|
||||
)
|
||||
|
||||
wait_for_system_load(context="fingerprint_detect_flow")
|
||||
|
||||
# 从 provider 获取 target_name
|
||||
target_name = provider.get_target_name()
|
||||
if not target_name:
|
||||
raise ValueError("无法获取 Target 名称")
|
||||
|
||||
# 参数验证
|
||||
if scan_id is None:
|
||||
raise ValueError("scan_id 不能为空")
|
||||
if not target_name:
|
||||
raise ValueError("target_name 不能为空")
|
||||
if target_id is None:
|
||||
raise ValueError("target_id 不能为空")
|
||||
if not scan_workspace_dir:
|
||||
raise ValueError("scan_workspace_dir 不能为空")
|
||||
|
||||
# 数据源类型(当前只支持 website)
|
||||
source = 'website'
|
||||
|
||||
# Step 0: 创建工作目录
|
||||
from apps.scan.utils import setup_scan_directory
|
||||
fingerprint_dir = setup_scan_directory(scan_workspace_dir, 'fingerprint_detect')
|
||||
|
||||
# Step 1: 导出 URL(支持懒加载)
|
||||
urls_file, url_count = _export_urls(target_id, fingerprint_dir, source)
|
||||
|
||||
if url_count == 0:
|
||||
logger.warning("目标下没有可用的 URL,跳过指纹识别")
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
'target': target_name,
|
||||
'scan_workspace_dir': scan_workspace_dir,
|
||||
'urls_file': urls_file,
|
||||
'url_count': 0,
|
||||
'processed_records': 0,
|
||||
'updated_count': 0,
|
||||
'created_count': 0,
|
||||
'executed_tasks': ['export_urls_for_fingerprint'],
|
||||
'tool_stats': {
|
||||
'total': 0,
|
||||
'successful': 0,
|
||||
'failed': 0,
|
||||
'successful_tools': [],
|
||||
'failed_tools': [],
|
||||
'details': {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
logger.info(
|
||||
"开始指纹识别 - Scan ID: %s, Target: %s, Workspace: %s",
|
||||
scan_id, target_name, scan_workspace_dir
|
||||
)
|
||||
user_log(scan_id, "fingerprint_detect", "Starting fingerprint detection")
|
||||
|
||||
# 创建上下文
|
||||
ctx = FingerprintContext(
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
target_name=target_name,
|
||||
scan_workspace_dir=scan_workspace_dir,
|
||||
fingerprint_dir=setup_scan_directory(scan_workspace_dir, 'fingerprint_detect')
|
||||
)
|
||||
|
||||
# Step 1: 导出 URL
|
||||
ctx.urls_file, ctx.url_count = _export_urls(ctx.fingerprint_dir, provider)
|
||||
|
||||
if ctx.url_count == 0:
|
||||
logger.warning("跳过指纹识别:没有 URL 可扫描 - Scan ID: %s", scan_id)
|
||||
user_log(scan_id, "fingerprint_detect", "Skipped: no URLs to scan", "warning")
|
||||
return _build_empty_result(scan_id, target_name, scan_workspace_dir, ctx.urls_file)
|
||||
|
||||
# Step 2: 工具配置信息
|
||||
logger.info("Step 2: 工具配置信息")
|
||||
logger.info("✓ 启用工具: %s", ', '.join(enabled_tools.keys()))
|
||||
|
||||
|
||||
# Step 3: 执行指纹识别
|
||||
logger.info("Step 3: 执行指纹识别")
|
||||
tool_stats, failed_tools = _run_fingerprint_detect(
|
||||
enabled_tools=enabled_tools,
|
||||
urls_file=urls_file,
|
||||
url_count=url_count,
|
||||
fingerprint_dir=fingerprint_dir,
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
source=source
|
||||
tool_stats, failed_tools = _run_fingerprint_detect(enabled_tools, ctx)
|
||||
|
||||
# 汇总结果
|
||||
totals = _aggregate_results(tool_stats)
|
||||
failed_tool_names = {f['tool'] for f in failed_tools}
|
||||
successful_tools = [name for name in enabled_tools if name not in failed_tool_names]
|
||||
|
||||
logger.info("✓ 指纹识别完成 - 识别指纹: %d", totals['updated_count'])
|
||||
user_log(
|
||||
scan_id, "fingerprint_detect",
|
||||
f"fingerprint_detect completed: identified {totals['updated_count']} fingerprints"
|
||||
)
|
||||
|
||||
logger.info("="*60 + "\n✓ 指纹识别完成\n" + "="*60)
|
||||
|
||||
# 动态生成已执行的任务列表
|
||||
executed_tasks = ['export_urls_for_fingerprint']
|
||||
executed_tasks.extend([f'run_xingfinger ({tool})' for tool in tool_stats.keys()])
|
||||
|
||||
# 汇总所有工具的结果
|
||||
total_processed = sum(stats['result'].get('processed_records', 0) for stats in tool_stats.values())
|
||||
total_updated = sum(stats['result'].get('updated_count', 0) for stats in tool_stats.values())
|
||||
total_created = sum(stats['result'].get('created_count', 0) for stats in tool_stats.values())
|
||||
|
||||
successful_tools = [name for name in enabled_tools.keys()
|
||||
if name not in [f['tool'] for f in failed_tools]]
|
||||
|
||||
|
||||
executed_tasks = ['export_site_urls_for_fingerprint']
|
||||
executed_tasks.extend([f'run_xingfinger ({tool})' for tool in tool_stats])
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
'target': target_name,
|
||||
'scan_workspace_dir': scan_workspace_dir,
|
||||
'urls_file': urls_file,
|
||||
'url_count': url_count,
|
||||
'processed_records': total_processed,
|
||||
'updated_count': total_updated,
|
||||
'created_count': total_created,
|
||||
'urls_file': ctx.urls_file,
|
||||
'url_count': ctx.url_count,
|
||||
**totals,
|
||||
'executed_tasks': executed_tasks,
|
||||
'tool_stats': {
|
||||
'total': len(enabled_tools),
|
||||
@@ -368,7 +294,7 @@ def fingerprint_detect_flow(
|
||||
'details': tool_stats
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
except ValueError as e:
|
||||
logger.error("配置错误: %s", e)
|
||||
raise
|
||||
@@ -378,3 +304,33 @@ def fingerprint_detect_flow(
|
||||
except Exception as e:
|
||||
logger.exception("指纹识别失败: %s", e)
|
||||
raise
|
||||
|
||||
|
||||
def _build_empty_result(
|
||||
scan_id: int,
|
||||
target_name: str,
|
||||
scan_workspace_dir: str,
|
||||
urls_file: str
|
||||
) -> dict:
|
||||
"""构建空结果(无 URL 可扫描时)"""
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
'target': target_name,
|
||||
'scan_workspace_dir': scan_workspace_dir,
|
||||
'urls_file': urls_file,
|
||||
'url_count': 0,
|
||||
'processed_records': 0,
|
||||
'updated_count': 0,
|
||||
'created_count': 0,
|
||||
'snapshot_count': 0,
|
||||
'executed_tasks': ['export_site_urls_for_fingerprint'],
|
||||
'tool_stats': {
|
||||
'total': 0,
|
||||
'successful': 0,
|
||||
'failed': 0,
|
||||
'successful_tools': [],
|
||||
'failed_tools': [],
|
||||
'details': {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
- 使用 FlowOrchestrator 解析 YAML 配置
|
||||
- 在 Prefect Flow 中执行子 Flow(Subflow)
|
||||
- 按照 YAML 顺序编排工作流
|
||||
- 根据 scan_mode 创建对应的 Provider
|
||||
- 不包含具体业务逻辑(由 Tasks 和 FlowOrchestrator 实现)
|
||||
|
||||
架构:
|
||||
@@ -18,20 +19,20 @@
|
||||
|
||||
# Django 环境初始化(导入即生效)
|
||||
# 注意:动态扫描容器应使用 run_initiate_scan.py 启动,以便在导入前设置环境变量
|
||||
from apps.common.prefect_django_setup import setup_django_for_prefect
|
||||
import apps.common.prefect_django_setup # noqa: F401
|
||||
|
||||
import logging
|
||||
|
||||
from prefect import flow, task
|
||||
from pathlib import Path
|
||||
import logging
|
||||
from prefect.futures import wait
|
||||
|
||||
from apps.scan.handlers import (
|
||||
on_initiate_scan_flow_running,
|
||||
on_initiate_scan_flow_completed,
|
||||
on_initiate_scan_flow_failed,
|
||||
)
|
||||
from prefect.futures import wait
|
||||
from apps.scan.utils import setup_scan_workspace
|
||||
from apps.scan.orchestrators import FlowOrchestrator
|
||||
from apps.scan.utils import setup_scan_workspace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -43,6 +44,75 @@ def _run_subflow_task(scan_type: str, flow_func, flow_kwargs: dict):
|
||||
return flow_func(**flow_kwargs)
|
||||
|
||||
|
||||
def _create_provider(scan, target_id: int, scan_id: int):
|
||||
"""根据 scan_mode 创建对应的 Provider"""
|
||||
from apps.scan.models import Scan
|
||||
from apps.scan.providers import (
|
||||
DatabaseTargetProvider,
|
||||
SnapshotTargetProvider,
|
||||
ProviderContext,
|
||||
)
|
||||
|
||||
provider_context = ProviderContext(target_id=target_id, scan_id=scan_id)
|
||||
|
||||
if scan.scan_mode == Scan.ScanMode.QUICK:
|
||||
provider = SnapshotTargetProvider(scan_id=scan_id, context=provider_context)
|
||||
logger.info("✓ 快速扫描模式 - 创建 SnapshotTargetProvider")
|
||||
else:
|
||||
provider = DatabaseTargetProvider(target_id=target_id, context=provider_context)
|
||||
logger.info("✓ 完整扫描模式 - 使用 DatabaseTargetProvider")
|
||||
|
||||
return provider
|
||||
|
||||
|
||||
def _execute_sequential_flows(valid_flows: list, results: dict, executed_flows: list):
|
||||
"""顺序执行 Flow 列表"""
|
||||
for scan_type, flow_func, flow_kwargs in valid_flows:
|
||||
logger.info("=" * 60)
|
||||
logger.info("执行 Flow: %s", scan_type)
|
||||
logger.info("=" * 60)
|
||||
try:
|
||||
result = flow_func(**flow_kwargs)
|
||||
executed_flows.append(scan_type)
|
||||
results[scan_type] = result
|
||||
logger.info("✓ %s 执行成功", scan_type)
|
||||
except Exception as e:
|
||||
logger.warning("%s 执行失败: %s", scan_type, e)
|
||||
executed_flows.append(f"{scan_type} (失败)")
|
||||
results[scan_type] = {'success': False, 'error': str(e)}
|
||||
|
||||
|
||||
def _execute_parallel_flows(valid_flows: list, results: dict, executed_flows: list):
|
||||
"""并行执行 Flow 列表"""
|
||||
futures = []
|
||||
for scan_type, flow_func, flow_kwargs in valid_flows:
|
||||
logger.info("=" * 60)
|
||||
logger.info("提交并行子 Flow 任务: %s", scan_type)
|
||||
logger.info("=" * 60)
|
||||
future = _run_subflow_task.submit(
|
||||
scan_type=scan_type,
|
||||
flow_func=flow_func,
|
||||
flow_kwargs=flow_kwargs,
|
||||
)
|
||||
futures.append((scan_type, future))
|
||||
|
||||
if not futures:
|
||||
return
|
||||
|
||||
wait([f for _, f in futures])
|
||||
|
||||
for scan_type, future in futures:
|
||||
try:
|
||||
result = future.result()
|
||||
executed_flows.append(scan_type)
|
||||
results[scan_type] = result
|
||||
logger.info("✓ %s 执行成功", scan_type)
|
||||
except Exception as e:
|
||||
logger.warning("%s 执行失败: %s", scan_type, e)
|
||||
executed_flows.append(f"{scan_type} (失败)")
|
||||
results[scan_type] = {'success': False, 'error': str(e)}
|
||||
|
||||
|
||||
@flow(
|
||||
name='initiate_scan',
|
||||
description='扫描任务初始化流程',
|
||||
@@ -53,15 +123,14 @@ def _run_subflow_task(scan_type: str, flow_func, flow_kwargs: dict):
|
||||
)
|
||||
def initiate_scan_flow(
|
||||
scan_id: int,
|
||||
target_name: str,
|
||||
target_id: int,
|
||||
scan_workspace_dir: str,
|
||||
engine_name: str,
|
||||
scheduled_scan_name: str | None = None,
|
||||
scheduled_scan_name: str | None = None, # noqa: ARG001
|
||||
) -> dict:
|
||||
"""
|
||||
初始化扫描任务(动态工作流编排)
|
||||
|
||||
|
||||
根据 YAML 配置动态编排工作流:
|
||||
- 从数据库获取 engine_config (YAML)
|
||||
- 检测启用的扫描类型
|
||||
@@ -73,184 +142,112 @@ def initiate_scan_flow(
|
||||
Stage 2: Analysis (并行执行)
|
||||
- url_fetch
|
||||
- directory_scan
|
||||
|
||||
|
||||
Args:
|
||||
scan_id: 扫描任务 ID
|
||||
target_name: 目标名称
|
||||
target_id: 目标 ID
|
||||
scan_workspace_dir: Scan 工作空间目录路径
|
||||
engine_name: 引擎名称(用于显示)
|
||||
scheduled_scan_name: 定时扫描任务名称(可选,用于通知显示)
|
||||
|
||||
|
||||
Returns:
|
||||
dict: 执行结果摘要
|
||||
|
||||
|
||||
Raises:
|
||||
ValueError: 参数验证失败或配置无效
|
||||
RuntimeError: 执行失败
|
||||
"""
|
||||
try:
|
||||
# ==================== 参数验证 ====================
|
||||
# 参数验证
|
||||
if not scan_id:
|
||||
raise ValueError("scan_id is required")
|
||||
if not scan_workspace_dir:
|
||||
raise ValueError("scan_workspace_dir is required")
|
||||
if not engine_name:
|
||||
raise ValueError("engine_name is required")
|
||||
|
||||
|
||||
logger.info(
|
||||
"="*60 + "\n" +
|
||||
"开始初始化扫描任务\n" +
|
||||
f" Scan ID: {scan_id}\n" +
|
||||
f" Target: {target_name}\n" +
|
||||
f" Engine: {engine_name}\n" +
|
||||
f" Workspace: {scan_workspace_dir}\n" +
|
||||
"="*60
|
||||
)
|
||||
|
||||
# ==================== Task 1: 创建 Scan 工作空间 ====================
|
||||
|
||||
# 创建工作空间
|
||||
scan_workspace_path = setup_scan_workspace(scan_workspace_dir)
|
||||
|
||||
# ==================== Task 2: 获取引擎配置 ====================
|
||||
|
||||
# 获取引擎配置
|
||||
from apps.scan.models import Scan
|
||||
scan = Scan.objects.select_related('engine').get(id=scan_id)
|
||||
engine_config = scan.engine.configuration
|
||||
|
||||
# ==================== Task 3: 解析配置,生成执行计划 ====================
|
||||
scan = Scan.objects.get(id=scan_id)
|
||||
engine_config = scan.yaml_configuration
|
||||
|
||||
# 创建 Provider
|
||||
provider = _create_provider(scan, target_id, scan_id)
|
||||
|
||||
# 获取 target_name 用于日志显示
|
||||
target_name = provider.get_target_name()
|
||||
if not target_name:
|
||||
raise ValueError("无法获取 Target 名称")
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("开始初始化扫描任务")
|
||||
logger.info("Scan ID: %s, Target: %s, Engine: %s", scan_id, target_name, engine_name)
|
||||
logger.info("Workspace: %s", scan_workspace_dir)
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 解析配置,生成执行计划
|
||||
orchestrator = FlowOrchestrator(engine_config)
|
||||
|
||||
# FlowOrchestrator 已经解析了所有工具配置
|
||||
enabled_tools_by_type = orchestrator.enabled_tools_by_type
|
||||
|
||||
logger.info(
|
||||
f"执行计划生成成功:\n"
|
||||
f" 扫描类型: {' → '.join(orchestrator.scan_types)}\n"
|
||||
f" 总共 {len(orchestrator.scan_types)} 个 Flow"
|
||||
)
|
||||
|
||||
# ==================== 初始化阶段进度 ====================
|
||||
# 在解析完配置后立即初始化,此时已有完整的 scan_types 列表
|
||||
|
||||
logger.info("执行计划: %s (共 %d 个 Flow)",
|
||||
' → '.join(orchestrator.scan_types), len(orchestrator.scan_types))
|
||||
|
||||
# 初始化阶段进度
|
||||
from apps.scan.services import ScanService
|
||||
scan_service = ScanService()
|
||||
scan_service.init_stage_progress(scan_id, orchestrator.scan_types)
|
||||
logger.info(f"✓ 初始化阶段进度 - Stages: {orchestrator.scan_types}")
|
||||
|
||||
# ==================== 更新 Target 最后扫描时间 ====================
|
||||
# 在开始扫描时更新,表示"最后一次扫描开始时间"
|
||||
ScanService().init_stage_progress(scan_id, orchestrator.scan_types)
|
||||
logger.info("✓ 初始化阶段进度 - Stages: %s", orchestrator.scan_types)
|
||||
|
||||
# 更新 Target 最后扫描时间
|
||||
from apps.targets.services import TargetService
|
||||
target_service = TargetService()
|
||||
target_service.update_last_scanned_at(target_id)
|
||||
logger.info(f"✓ 更新 Target 最后扫描时间 - Target ID: {target_id}")
|
||||
|
||||
# ==================== Task 3: 执行 Flow(动态阶段执行)====================
|
||||
# 注意:各阶段状态更新由 scan_flow_handlers.py 自动处理(running/completed/failed)
|
||||
TargetService().update_last_scanned_at(target_id)
|
||||
logger.info("✓ 更新 Target 最后扫描时间 - Target ID: %s", target_id)
|
||||
|
||||
# 执行 Flow
|
||||
executed_flows = []
|
||||
results = {}
|
||||
|
||||
# 通用执行参数
|
||||
flow_kwargs = {
|
||||
base_kwargs = {
|
||||
'scan_id': scan_id,
|
||||
'target_name': target_name,
|
||||
'target_id': target_id,
|
||||
'scan_workspace_dir': str(scan_workspace_path)
|
||||
}
|
||||
|
||||
def record_flow_result(scan_type, result=None, error=None):
|
||||
"""
|
||||
统一的结果记录函数
|
||||
|
||||
Args:
|
||||
scan_type: 扫描类型名称
|
||||
result: 执行结果(成功时)
|
||||
error: 异常对象(失败时)
|
||||
"""
|
||||
if error:
|
||||
# 失败处理:记录错误但不抛出异常,让扫描继续执行后续阶段
|
||||
error_msg = f"{scan_type} 执行失败: {str(error)}"
|
||||
logger.warning(error_msg)
|
||||
executed_flows.append(f"{scan_type} (失败)")
|
||||
results[scan_type] = {'success': False, 'error': str(error)}
|
||||
# 不再抛出异常,让扫描继续
|
||||
else:
|
||||
# 成功处理
|
||||
executed_flows.append(scan_type)
|
||||
results[scan_type] = result
|
||||
logger.info(f"✓ {scan_type} 执行成功")
|
||||
|
||||
def get_valid_flows(flow_names):
|
||||
"""
|
||||
获取有效的 Flow 函数列表,并为每个 Flow 准备专属参数
|
||||
|
||||
Args:
|
||||
flow_names: 扫描类型名称列表
|
||||
|
||||
Returns:
|
||||
list: [(scan_type, flow_func, flow_specific_kwargs), ...] 有效的函数列表
|
||||
"""
|
||||
valid_flows = []
|
||||
def get_valid_flows(flow_names: list) -> list:
|
||||
"""获取有效的 Flow 函数列表"""
|
||||
valid = []
|
||||
for scan_type in flow_names:
|
||||
flow_func = orchestrator.get_flow_function(scan_type)
|
||||
if flow_func:
|
||||
# 为每个 Flow 准备专属的参数(包含对应的 enabled_tools)
|
||||
flow_specific_kwargs = dict(flow_kwargs)
|
||||
flow_specific_kwargs['enabled_tools'] = enabled_tools_by_type.get(scan_type, {})
|
||||
valid_flows.append((scan_type, flow_func, flow_specific_kwargs))
|
||||
else:
|
||||
logger.warning(f"跳过未实现的 Flow: {scan_type}")
|
||||
return valid_flows
|
||||
if not flow_func:
|
||||
logger.warning("跳过未实现的 Flow: %s", scan_type)
|
||||
continue
|
||||
kwargs = dict(base_kwargs)
|
||||
kwargs['enabled_tools'] = enabled_tools_by_type.get(scan_type, {})
|
||||
kwargs['provider'] = provider
|
||||
valid.append((scan_type, flow_func, kwargs))
|
||||
return valid
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 动态阶段执行(基于 FlowOrchestrator 定义)
|
||||
# ---------------------------------------------------------
|
||||
# 动态阶段执行
|
||||
for mode, enabled_flows in orchestrator.get_execution_stages():
|
||||
valid_flows = get_valid_flows(enabled_flows)
|
||||
if not valid_flows:
|
||||
continue
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("%s执行阶段: %s", "顺序" if mode == 'sequential' else "并行",
|
||||
', '.join(enabled_flows))
|
||||
logger.info("=" * 60)
|
||||
|
||||
if mode == 'sequential':
|
||||
# 顺序执行
|
||||
logger.info(f"\n{'='*60}\n顺序执行阶段: {', '.join(enabled_flows)}\n{'='*60}")
|
||||
for scan_type, flow_func, flow_specific_kwargs in get_valid_flows(enabled_flows):
|
||||
logger.info(f"\n{'='*60}\n执行 Flow: {scan_type}\n{'='*60}")
|
||||
try:
|
||||
result = flow_func(**flow_specific_kwargs)
|
||||
record_flow_result(scan_type, result=result)
|
||||
except Exception as e:
|
||||
record_flow_result(scan_type, error=e)
|
||||
|
||||
elif mode == 'parallel':
|
||||
# 并行执行阶段:通过 Task 包装子 Flow,并使用 Prefect TaskRunner 并发运行
|
||||
logger.info(f"\n{'='*60}\n并行执行阶段: {', '.join(enabled_flows)}\n{'='*60}")
|
||||
futures = []
|
||||
_execute_sequential_flows(valid_flows, results, executed_flows)
|
||||
else:
|
||||
_execute_parallel_flows(valid_flows, results, executed_flows)
|
||||
|
||||
# 提交所有并行子 Flow 任务
|
||||
for scan_type, flow_func, flow_specific_kwargs in get_valid_flows(enabled_flows):
|
||||
logger.info(f"\n{'='*60}\n提交并行子 Flow 任务: {scan_type}\n{'='*60}")
|
||||
future = _run_subflow_task.submit(
|
||||
scan_type=scan_type,
|
||||
flow_func=flow_func,
|
||||
flow_kwargs=flow_specific_kwargs,
|
||||
)
|
||||
futures.append((scan_type, future))
|
||||
logger.info("=" * 60)
|
||||
logger.info("✓ 扫描任务初始化完成 - 执行的 Flow: %s", ', '.join(executed_flows))
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 等待所有并行子 Flow 完成
|
||||
if futures:
|
||||
wait([f for _, f in futures])
|
||||
|
||||
# 检查结果(复用统一的结果处理逻辑)
|
||||
for scan_type, future in futures:
|
||||
try:
|
||||
result = future.result()
|
||||
record_flow_result(scan_type, result=result)
|
||||
except Exception as e:
|
||||
record_flow_result(scan_type, error=e)
|
||||
|
||||
# ==================== 完成 ====================
|
||||
logger.info(
|
||||
"="*60 + "\n" +
|
||||
"✓ 扫描任务初始化完成\n" +
|
||||
f" 执行的 Flow: {', '.join(executed_flows)}\n" +
|
||||
"="*60
|
||||
)
|
||||
|
||||
# ==================== 返回结果 ====================
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
@@ -259,21 +256,16 @@ def initiate_scan_flow(
|
||||
'executed_flows': executed_flows,
|
||||
'results': results
|
||||
}
|
||||
|
||||
|
||||
except ValueError as e:
|
||||
# 参数错误
|
||||
logger.error("参数错误: %s", e)
|
||||
raise
|
||||
except RuntimeError as e:
|
||||
# 执行失败
|
||||
logger.error("运行时错误: %s", e)
|
||||
raise
|
||||
except OSError as e:
|
||||
# 文件系统错误(工作空间创建失败)
|
||||
logger.error("文件系统错误: %s", e)
|
||||
raise
|
||||
except Exception as e:
|
||||
# 其他未预期错误
|
||||
logger.exception("初始化扫描任务失败: %s", e)
|
||||
# 注意:失败状态更新由 Prefect State Handlers 自动处理
|
||||
raise
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""
|
||||
"""
|
||||
端口扫描 Flow
|
||||
|
||||
负责编排端口扫描的完整流程
|
||||
@@ -10,25 +10,23 @@
|
||||
- 配置由 YAML 解析
|
||||
"""
|
||||
|
||||
# Django 环境初始化(导入即生效)
|
||||
from apps.common.prefect_django_setup import setup_django_for_prefect
|
||||
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
from prefect import flow
|
||||
from apps.scan.tasks.port_scan import (
|
||||
export_scan_targets_task,
|
||||
run_and_stream_save_ports_task
|
||||
)
|
||||
|
||||
from apps.scan.handlers.scan_flow_handlers import (
|
||||
on_scan_flow_running,
|
||||
on_scan_flow_completed,
|
||||
on_scan_flow_failed,
|
||||
on_scan_flow_running,
|
||||
)
|
||||
from apps.scan.utils import config_parser, build_scan_command
|
||||
from apps.scan.tasks.port_scan import (
|
||||
export_hosts_task,
|
||||
run_and_stream_save_ports_task,
|
||||
)
|
||||
from apps.scan.utils import build_scan_command, user_log, wait_for_system_load
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -40,28 +38,19 @@ def calculate_port_scan_timeout(
|
||||
) -> int:
|
||||
"""
|
||||
根据目标数量和端口数量计算超时时间
|
||||
|
||||
|
||||
计算公式:超时时间 = 目标数 × 端口数 × base_per_pair
|
||||
超时范围:60秒 ~ 2天(172800秒)
|
||||
|
||||
超时范围:60秒 ~ 无上限
|
||||
|
||||
Args:
|
||||
tool_config: 工具配置字典,包含端口配置(ports, top-ports等)
|
||||
file_path: 目标文件路径(域名/IP列表)
|
||||
base_per_pair: 每个"端口-目标对"的基础时间(秒),默认 0.5秒
|
||||
|
||||
|
||||
Returns:
|
||||
int: 计算出的超时时间(秒),范围:60 ~ 172800
|
||||
|
||||
Example:
|
||||
# 100个目标 × 100个端口 × 0.5秒 = 5000秒
|
||||
# 10个目标 × 1000个端口 × 0.5秒 = 5000秒
|
||||
timeout = calculate_port_scan_timeout(
|
||||
tool_config={'top-ports': 100},
|
||||
file_path='/path/to/domains.txt'
|
||||
)
|
||||
int: 计算出的超时时间(秒),最小 60 秒
|
||||
"""
|
||||
try:
|
||||
# 1. 统计目标数量
|
||||
result = subprocess.run(
|
||||
['wc', '-l', file_path],
|
||||
capture_output=True,
|
||||
@@ -69,133 +58,110 @@ def calculate_port_scan_timeout(
|
||||
check=True
|
||||
)
|
||||
target_count = int(result.stdout.strip().split()[0])
|
||||
|
||||
# 2. 解析端口数量
|
||||
port_count = _parse_port_count(tool_config)
|
||||
|
||||
# 3. 计算超时时间
|
||||
# 总工作量 = 目标数 × 端口数
|
||||
total_work = target_count * port_count
|
||||
timeout = int(total_work * base_per_pair)
|
||||
|
||||
# 4. 设置合理的下限(不再设置上限)
|
||||
min_timeout = 60 # 最小 60 秒
|
||||
timeout = max(min_timeout, timeout)
|
||||
|
||||
timeout = max(60, int(total_work * base_per_pair))
|
||||
|
||||
logger.info(
|
||||
f"计算端口扫描 timeout - "
|
||||
f"目标数: {target_count}, "
|
||||
f"端口数: {port_count}, "
|
||||
f"总工作量: {total_work}, "
|
||||
f"超时: {timeout}秒"
|
||||
"计算端口扫描 timeout - 目标数: %d, 端口数: %d, 总工作量: %d, 超时: %d秒",
|
||||
target_count, port_count, total_work, timeout
|
||||
)
|
||||
return timeout
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"计算 timeout 失败: {e},使用默认值 600秒")
|
||||
logger.warning("计算 timeout 失败: %s,使用默认值 600秒", e)
|
||||
return 600
|
||||
|
||||
|
||||
def _parse_port_count(tool_config: dict) -> int:
|
||||
"""
|
||||
从工具配置中解析端口数量
|
||||
|
||||
|
||||
优先级:
|
||||
1. top-ports: N → 返回 N
|
||||
2. ports: "80,443,8080" → 返回逗号分隔的数量
|
||||
3. ports: "1-1000" → 返回范围的大小
|
||||
4. ports: "1-65535" → 返回 65535
|
||||
5. 默认 → 返回 100(naabu 默认扫描 top 100)
|
||||
|
||||
|
||||
Args:
|
||||
tool_config: 工具配置字典
|
||||
|
||||
|
||||
Returns:
|
||||
int: 端口数量
|
||||
"""
|
||||
# 1. 检查 top-ports 配置
|
||||
# 检查 top-ports 配置
|
||||
if 'top-ports' in tool_config:
|
||||
top_ports = tool_config['top-ports']
|
||||
if isinstance(top_ports, int) and top_ports > 0:
|
||||
return top_ports
|
||||
logger.warning(f"top-ports 配置无效: {top_ports},使用默认值")
|
||||
|
||||
# 2. 检查 ports 配置
|
||||
logger.warning("top-ports 配置无效: %s,使用默认值", top_ports)
|
||||
|
||||
# 检查 ports 配置
|
||||
if 'ports' in tool_config:
|
||||
ports_str = str(tool_config['ports']).strip()
|
||||
|
||||
# 2.1 逗号分隔的端口列表:80,443,8080
|
||||
|
||||
# 逗号分隔的端口列表:80,443,8080
|
||||
if ',' in ports_str:
|
||||
port_list = [p.strip() for p in ports_str.split(',') if p.strip()]
|
||||
return len(port_list)
|
||||
|
||||
# 2.2 端口范围:1-1000
|
||||
return len([p.strip() for p in ports_str.split(',') if p.strip()])
|
||||
|
||||
# 端口范围:1-1000
|
||||
if '-' in ports_str:
|
||||
try:
|
||||
start, end = ports_str.split('-', 1)
|
||||
start_port = int(start.strip())
|
||||
end_port = int(end.strip())
|
||||
|
||||
if 1 <= start_port <= end_port <= 65535:
|
||||
return end_port - start_port + 1
|
||||
logger.warning(f"端口范围无效: {ports_str},使用默认值")
|
||||
logger.warning("端口范围无效: %s,使用默认值", ports_str)
|
||||
except ValueError:
|
||||
logger.warning(f"端口范围解析失败: {ports_str},使用默认值")
|
||||
|
||||
# 2.3 单个端口
|
||||
logger.warning("端口范围解析失败: %s,使用默认值", ports_str)
|
||||
|
||||
# 单个端口
|
||||
try:
|
||||
port = int(ports_str)
|
||||
if 1 <= port <= 65535:
|
||||
return 1
|
||||
except ValueError:
|
||||
logger.warning(f"端口配置解析失败: {ports_str},使用默认值")
|
||||
|
||||
# 3. 默认值:naabu 默认扫描 top 100 端口
|
||||
logger.warning("端口配置解析失败: %s,使用默认值", ports_str)
|
||||
|
||||
# 默认值:naabu 默认扫描 top 100 端口
|
||||
return 100
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def _export_scan_targets(target_id: int, port_scan_dir: Path) -> tuple[str, int, str]:
|
||||
def _export_hosts(port_scan_dir: Path, provider) -> tuple[str, int]:
|
||||
"""
|
||||
导出扫描目标到文件
|
||||
|
||||
根据 Target 类型自动决定导出内容:
|
||||
- DOMAIN: 从 Subdomain 表导出子域名
|
||||
- IP: 直接写入 target.name
|
||||
- CIDR: 展开 CIDR 范围内的所有 IP
|
||||
|
||||
导出主机列表到文件
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
port_scan_dir: 端口扫描目录
|
||||
|
||||
provider: TargetProvider 实例
|
||||
|
||||
Returns:
|
||||
tuple: (targets_file, target_count, target_type)
|
||||
tuple: (hosts_file, host_count)
|
||||
"""
|
||||
logger.info("Step 1: 导出扫描目标列表")
|
||||
|
||||
targets_file = str(port_scan_dir / 'targets.txt')
|
||||
export_result = export_scan_targets_task(
|
||||
target_id=target_id,
|
||||
output_file=targets_file,
|
||||
batch_size=1000 # 每次读取 1000 条,优化内存占用
|
||||
logger.info("Step 1: 导出主机列表")
|
||||
|
||||
hosts_file = str(port_scan_dir / 'hosts.txt')
|
||||
export_result = export_hosts_task(
|
||||
output_file=hosts_file,
|
||||
provider=provider,
|
||||
)
|
||||
|
||||
target_count = export_result['total_count']
|
||||
target_type = export_result.get('target_type', 'unknown')
|
||||
|
||||
|
||||
host_count = export_result['total_count']
|
||||
|
||||
logger.info(
|
||||
"✓ 扫描目标导出完成 - 类型: %s, 文件: %s, 数量: %d",
|
||||
target_type,
|
||||
export_result['output_file'],
|
||||
target_count
|
||||
"✓ 主机列表导出完成 - 文件: %s, 数量: %d",
|
||||
export_result['output_file'], host_count
|
||||
)
|
||||
|
||||
if target_count == 0:
|
||||
logger.warning("目标下没有可扫描的地址,无法执行端口扫描")
|
||||
|
||||
return export_result['output_file'], target_count, target_type
|
||||
|
||||
if host_count == 0:
|
||||
logger.warning("目标下没有可扫描的主机,无法执行端口扫描")
|
||||
|
||||
return export_result['output_file'], host_count
|
||||
|
||||
|
||||
def _run_scans_sequentially(
|
||||
@@ -204,137 +170,121 @@ def _run_scans_sequentially(
|
||||
port_scan_dir: Path,
|
||||
scan_id: int,
|
||||
target_id: int,
|
||||
target_name: str
|
||||
target_name: str,
|
||||
) -> tuple[dict, int, list, list]:
|
||||
"""
|
||||
串行执行端口扫描任务
|
||||
|
||||
|
||||
Args:
|
||||
enabled_tools: 已启用的工具配置字典
|
||||
domains_file: 域名文件路径
|
||||
port_scan_dir: 端口扫描目录
|
||||
scan_id: 扫描任务 ID
|
||||
target_id: 目标 ID
|
||||
target_name: 目标名称(用于错误日志)
|
||||
|
||||
target_name: 目标名称(用于日志显示)
|
||||
|
||||
Returns:
|
||||
tuple: (tool_stats, processed_records, successful_tool_names, failed_tools)
|
||||
注意:端口扫描是流式输出,不生成结果文件
|
||||
|
||||
Raises:
|
||||
RuntimeError: 所有工具均失败
|
||||
"""
|
||||
# ==================== 构建命令并串行执行 ====================
|
||||
|
||||
tool_stats = {}
|
||||
processed_records = 0
|
||||
failed_tools = [] # 记录失败的工具(含原因)
|
||||
|
||||
# for循环执行工具:按顺序串行运行每个启用的端口扫描工具
|
||||
failed_tools = []
|
||||
|
||||
for tool_name, tool_config in enabled_tools.items():
|
||||
# 1. 构建完整命令(变量替换)
|
||||
# 构建命令
|
||||
try:
|
||||
command = build_scan_command(
|
||||
tool_name=tool_name,
|
||||
scan_type='port_scan',
|
||||
command_params={
|
||||
'domains_file': domains_file # 对应 {domains_file}
|
||||
},
|
||||
tool_config=tool_config #yaml的工具配置
|
||||
command_params={'domains_file': domains_file},
|
||||
tool_config=tool_config
|
||||
)
|
||||
except Exception as e:
|
||||
reason = f"命令构建失败: {str(e)}"
|
||||
logger.error(f"构建 {tool_name} 命令失败: {e}")
|
||||
reason = f"命令构建失败: {e}"
|
||||
logger.error("构建 %s 命令失败: %s", tool_name, e)
|
||||
failed_tools.append({'tool': tool_name, 'reason': reason})
|
||||
continue
|
||||
|
||||
# 2. 获取超时时间(支持 'auto' 动态计算)
|
||||
|
||||
# 获取超时时间
|
||||
config_timeout = tool_config['timeout']
|
||||
if config_timeout == 'auto':
|
||||
# 动态计算超时时间
|
||||
config_timeout = calculate_port_scan_timeout(
|
||||
tool_config=tool_config,
|
||||
file_path=str(domains_file)
|
||||
)
|
||||
logger.info(f"✓ 工具 {tool_name} 动态计算 timeout: {config_timeout}秒")
|
||||
|
||||
# 2.1 生成日志文件路径
|
||||
from datetime import datetime
|
||||
config_timeout = calculate_port_scan_timeout(tool_config, str(domains_file))
|
||||
logger.info("✓ 工具 %s 动态计算 timeout: %d秒", tool_name, config_timeout)
|
||||
|
||||
# 生成日志文件路径
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
log_file = port_scan_dir / f"{tool_name}_{timestamp}.log"
|
||||
|
||||
# 3. 执行扫描任务
|
||||
|
||||
logger.info("开始执行 %s 扫描(超时: %d秒)...", tool_name, config_timeout)
|
||||
|
||||
user_log(scan_id, "port_scan", f"Running {tool_name}: {command}")
|
||||
|
||||
# 执行扫描任务
|
||||
try:
|
||||
# 直接调用 task(串行执行)
|
||||
# 注意:端口扫描是流式输出到 stdout,不使用 output_file
|
||||
result = run_and_stream_save_ports_task(
|
||||
cmd=command,
|
||||
tool_name=tool_name, # 工具名称
|
||||
tool_name=tool_name,
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
cwd=str(port_scan_dir),
|
||||
shell=True,
|
||||
batch_size=1000,
|
||||
timeout=config_timeout,
|
||||
log_file=str(log_file) # 新增:日志文件路径
|
||||
log_file=str(log_file)
|
||||
)
|
||||
|
||||
|
||||
tool_stats[tool_name] = {
|
||||
'command': command,
|
||||
'result': result,
|
||||
'timeout': config_timeout
|
||||
}
|
||||
processed_records += result.get('processed_records', 0)
|
||||
logger.info(
|
||||
"✓ 工具 %s 流式处理完成 - 记录数: %d",
|
||||
tool_name, result.get('processed_records', 0)
|
||||
)
|
||||
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
# 超时异常单独处理
|
||||
# 注意:流式处理任务超时时,已解析的数据已保存到数据库
|
||||
reason = f"执行超时(配置: {config_timeout}秒)"
|
||||
tool_records = result.get('processed_records', 0)
|
||||
processed_records += tool_records
|
||||
logger.info("✓ 工具 %s 流式处理完成 - 记录数: %d", tool_name, tool_records)
|
||||
user_log(scan_id, "port_scan", f"{tool_name} completed: found {tool_records} ports")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
reason = f"timeout after {config_timeout}s"
|
||||
failed_tools.append({'tool': tool_name, 'reason': reason})
|
||||
logger.warning(
|
||||
"⚠️ 工具 %s 执行超时 - 超时配置: %d秒\n"
|
||||
"注意:超时前已解析的端口数据已保存到数据库,但扫描未完全完成。",
|
||||
tool_name, config_timeout
|
||||
)
|
||||
user_log(scan_id, "port_scan", f"{tool_name} failed: {reason}", "error")
|
||||
except Exception as exc:
|
||||
# 其他异常
|
||||
failed_tools.append({'tool': tool_name, 'reason': str(exc)})
|
||||
reason = str(exc)
|
||||
failed_tools.append({'tool': tool_name, 'reason': reason})
|
||||
logger.error("工具 %s 执行失败: %s", tool_name, exc, exc_info=True)
|
||||
|
||||
user_log(scan_id, "port_scan", f"{tool_name} failed: {reason}", "error")
|
||||
|
||||
if failed_tools:
|
||||
logger.warning(
|
||||
"以下扫描工具执行失败: %s",
|
||||
', '.join([f['tool'] for f in failed_tools])
|
||||
)
|
||||
|
||||
|
||||
if not tool_stats:
|
||||
error_details = "; ".join([f"{f['tool']}: {f['reason']}" for f in failed_tools])
|
||||
logger.warning("所有端口扫描工具均失败 - 目标: %s, 失败工具: %s", target_name, error_details)
|
||||
# 返回空结果,不抛出异常,让扫描继续
|
||||
logger.warning("所有端口扫描工具均失败 - Target: %s, 失败工具: %s", target_name, error_details)
|
||||
return {}, 0, [], failed_tools
|
||||
|
||||
# 动态计算成功的工具列表
|
||||
successful_tool_names = [name for name in enabled_tools.keys()
|
||||
if name not in [f['tool'] for f in failed_tools]]
|
||||
|
||||
|
||||
successful_tool_names = [
|
||||
name for name in enabled_tools
|
||||
if name not in [f['tool'] for f in failed_tools]
|
||||
]
|
||||
|
||||
logger.info(
|
||||
"✓ 串行端口扫描执行完成 - 成功: %d/%d (成功: %s, 失败: %s)",
|
||||
len(tool_stats), len(enabled_tools),
|
||||
', '.join(successful_tool_names) if successful_tool_names else '无',
|
||||
', '.join([f['tool'] for f in failed_tools]) if failed_tools else '无'
|
||||
)
|
||||
|
||||
|
||||
return tool_stats, processed_records, successful_tool_names, failed_tools
|
||||
|
||||
|
||||
@flow(
|
||||
name="port_scan",
|
||||
name="port_scan",
|
||||
log_prints=True,
|
||||
on_running=[on_scan_flow_running],
|
||||
on_completion=[on_scan_flow_completed],
|
||||
@@ -342,103 +292,83 @@ def _run_scans_sequentially(
|
||||
)
|
||||
def port_scan_flow(
|
||||
scan_id: int,
|
||||
target_name: str,
|
||||
target_id: int,
|
||||
scan_workspace_dir: str,
|
||||
enabled_tools: dict
|
||||
enabled_tools: dict,
|
||||
provider,
|
||||
) -> dict:
|
||||
"""
|
||||
端口扫描 Flow
|
||||
|
||||
|
||||
主要功能:
|
||||
1. 扫描目标域名/IP 的开放端口
|
||||
2. 保存 host + ip + port 三元映射到 HostPortMapping 表
|
||||
|
||||
|
||||
输出资产:
|
||||
- HostPortMapping:主机端口映射(host + ip + port 三元组)
|
||||
|
||||
|
||||
工作流程:
|
||||
Step 0: 创建工作目录
|
||||
Step 1: 导出域名列表到文件(供扫描工具使用)
|
||||
Step 2: 解析配置,获取启用的工具
|
||||
Step 3: 串行执行扫描工具,运行端口扫描工具并实时解析输出到数据库(→ HostPortMapping)
|
||||
Step 3: 串行执行扫描工具,运行端口扫描工具并实时解析输出到数据库
|
||||
|
||||
Args:
|
||||
scan_id: 扫描任务 ID
|
||||
target_name: 域名
|
||||
target_id: 目标 ID
|
||||
scan_workspace_dir: Scan 工作空间目录
|
||||
enabled_tools: 启用的工具配置字典
|
||||
provider: TargetProvider 实例
|
||||
|
||||
Returns:
|
||||
dict: {
|
||||
'success': bool,
|
||||
'scan_id': int,
|
||||
'target': str,
|
||||
'scan_workspace_dir': str,
|
||||
'domains_file': str,
|
||||
'domain_count': int,
|
||||
'processed_records': int,
|
||||
'executed_tasks': list,
|
||||
'tool_stats': {
|
||||
'total': int, # 总工具数
|
||||
'successful': int, # 成功工具数
|
||||
'failed': int, # 失败工具数
|
||||
'successful_tools': list[str], # 成功工具列表 ['naabu_active']
|
||||
'failed_tools': list[dict], # 失败工具列表 [{'tool': 'naabu_passive', 'reason': '超时'}]
|
||||
'details': dict # 详细执行结果(保留向后兼容)
|
||||
}
|
||||
}
|
||||
dict: 扫描结果
|
||||
|
||||
Raises:
|
||||
ValueError: 配置错误
|
||||
RuntimeError: 执行失败
|
||||
|
||||
Note:
|
||||
端口扫描工具(如 naabu)会解析域名获取 IP,输出 host + ip + port 三元组。
|
||||
同一 host 可能对应多个 IP(CDN、负载均衡),因此使用三元映射表存储。
|
||||
"""
|
||||
try:
|
||||
# 参数验证
|
||||
wait_for_system_load(context="port_scan_flow")
|
||||
|
||||
# 从 provider 获取 target_name
|
||||
target_name = provider.get_target_name()
|
||||
if not target_name:
|
||||
raise ValueError("无法获取 Target 名称")
|
||||
|
||||
if scan_id is None:
|
||||
raise ValueError("scan_id 不能为空")
|
||||
if not target_name:
|
||||
raise ValueError("target_name 不能为空")
|
||||
if target_id is None:
|
||||
raise ValueError("target_id 不能为空")
|
||||
if not scan_workspace_dir:
|
||||
raise ValueError("scan_workspace_dir 不能为空")
|
||||
if not enabled_tools:
|
||||
raise ValueError("enabled_tools 不能为空")
|
||||
|
||||
|
||||
logger.info(
|
||||
"="*60 + "\n" +
|
||||
"开始端口扫描\n" +
|
||||
f" Scan ID: {scan_id}\n" +
|
||||
f" Target: {target_name}\n" +
|
||||
f" Workspace: {scan_workspace_dir}\n" +
|
||||
"="*60
|
||||
"开始端口扫描 - Scan ID: %s, Target: %s, Workspace: %s",
|
||||
scan_id, target_name, scan_workspace_dir
|
||||
)
|
||||
|
||||
user_log(scan_id, "port_scan", "Starting port scan")
|
||||
|
||||
# Step 0: 创建工作目录
|
||||
from apps.scan.utils import setup_scan_directory
|
||||
port_scan_dir = setup_scan_directory(scan_workspace_dir, 'port_scan')
|
||||
|
||||
# Step 1: 导出扫描目标列表到文件(根据 Target 类型自动决定内容)
|
||||
targets_file, target_count, target_type = _export_scan_targets(target_id, port_scan_dir)
|
||||
|
||||
if target_count == 0:
|
||||
logger.warning("目标下没有可扫描的地址,跳过端口扫描")
|
||||
|
||||
# Step 1: 导出主机列表
|
||||
hosts_file, host_count = _export_hosts(port_scan_dir, provider)
|
||||
|
||||
if host_count == 0:
|
||||
logger.warning("跳过端口扫描:没有主机可扫描 - Scan ID: %s", scan_id)
|
||||
user_log(scan_id, "port_scan", "Skipped: no hosts to scan", "warning")
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
'target': target_name,
|
||||
'scan_workspace_dir': scan_workspace_dir,
|
||||
'targets_file': targets_file,
|
||||
'target_count': 0,
|
||||
'target_type': target_type,
|
||||
'hosts_file': hosts_file,
|
||||
'host_count': 0,
|
||||
'processed_records': 0,
|
||||
'executed_tasks': ['export_scan_targets'],
|
||||
'executed_tasks': ['export_hosts'],
|
||||
'tool_stats': {
|
||||
'total': 0,
|
||||
'successful': 0,
|
||||
@@ -448,39 +378,35 @@ def port_scan_flow(
|
||||
'details': {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Step 2: 工具配置信息
|
||||
logger.info("Step 2: 工具配置信息")
|
||||
logger.info(
|
||||
"✓ 启用工具: %s",
|
||||
', '.join(enabled_tools.keys())
|
||||
)
|
||||
|
||||
logger.info("✓ 启用工具: %s", ', '.join(enabled_tools.keys()))
|
||||
|
||||
# Step 3: 串行执行扫描工具
|
||||
logger.info("Step 3: 串行执行扫描工具")
|
||||
tool_stats, processed_records, successful_tool_names, failed_tools = _run_scans_sequentially(
|
||||
enabled_tools=enabled_tools,
|
||||
domains_file=targets_file, # 现在是 targets_file,兼容原参数名
|
||||
domains_file=hosts_file,
|
||||
port_scan_dir=port_scan_dir,
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
target_name=target_name
|
||||
target_name=target_name,
|
||||
)
|
||||
|
||||
logger.info("="*60 + "\n✓ 端口扫描完成\n" + "="*60)
|
||||
|
||||
# 动态生成已执行的任务列表
|
||||
executed_tasks = ['export_scan_targets', 'parse_config']
|
||||
executed_tasks.extend([f'run_and_stream_save_ports ({tool})' for tool in tool_stats.keys()])
|
||||
|
||||
|
||||
logger.info("✓ 端口扫描完成 - 发现端口: %d", processed_records)
|
||||
user_log(scan_id, "port_scan", f"port_scan completed: found {processed_records} ports")
|
||||
|
||||
executed_tasks = ['export_hosts', 'parse_config']
|
||||
executed_tasks.extend([f'run_and_stream_save_ports ({tool})' for tool in tool_stats])
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
'target': target_name,
|
||||
'scan_workspace_dir': scan_workspace_dir,
|
||||
'targets_file': targets_file,
|
||||
'target_count': target_count,
|
||||
'target_type': target_type,
|
||||
'hosts_file': hosts_file,
|
||||
'host_count': host_count,
|
||||
'processed_records': processed_records,
|
||||
'executed_tasks': executed_tasks,
|
||||
'tool_stats': {
|
||||
@@ -488,8 +414,8 @@ def port_scan_flow(
|
||||
'successful': len(successful_tool_names),
|
||||
'failed': len(failed_tools),
|
||||
'successful_tools': successful_tool_names,
|
||||
'failed_tools': failed_tools, # [{'tool': 'naabu_active', 'reason': '超时'}]
|
||||
'details': tool_stats # 详细结果(保留向后兼容)
|
||||
'failed_tools': failed_tools,
|
||||
'details': tool_stats
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
175
backend/apps/scan/flows/screenshot_flow.py
Normal file
175
backend/apps/scan/flows/screenshot_flow.py
Normal file
@@ -0,0 +1,175 @@
|
||||
"""
|
||||
截图 Flow
|
||||
|
||||
负责编排截图的完整流程:
|
||||
1. 从 Provider 获取 URL 列表
|
||||
2. 批量截图并保存快照
|
||||
3. 同步到资产表
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from prefect import flow
|
||||
|
||||
from apps.scan.handlers.scan_flow_handlers import (
|
||||
on_scan_flow_completed,
|
||||
on_scan_flow_failed,
|
||||
on_scan_flow_running,
|
||||
)
|
||||
from apps.scan.providers import TargetProvider
|
||||
from apps.scan.tasks.screenshot import capture_screenshots_task
|
||||
from apps.scan.utils import user_log, wait_for_system_load
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _parse_screenshot_config(enabled_tools: dict) -> dict:
|
||||
"""解析截图配置"""
|
||||
playwright_config = enabled_tools.get('playwright', {})
|
||||
return {
|
||||
'concurrency': playwright_config.get('concurrency', 5),
|
||||
}
|
||||
|
||||
|
||||
def _collect_urls_from_provider(provider: TargetProvider) -> tuple[list[str], str]:
|
||||
"""
|
||||
从 Provider 收集网站 URL(带回退逻辑)
|
||||
|
||||
优先级:WebSite → HostPortMapping → Default URL
|
||||
|
||||
Returns:
|
||||
tuple: (urls, source)
|
||||
- urls: URL 列表
|
||||
- source: 数据来源 ('website' | 'host_port' | 'default')
|
||||
"""
|
||||
logger.info("从 Provider 获取网站 URL - Provider: %s", type(provider).__name__)
|
||||
|
||||
# 优先从 WebSite 获取
|
||||
urls = list(provider.iter_websites())
|
||||
if urls:
|
||||
logger.info("使用 WebSite 数据源 - 数量: %d", len(urls))
|
||||
return urls, "website"
|
||||
|
||||
# 回退到 HostPortMapping
|
||||
urls = list(provider.iter_host_port_urls())
|
||||
if urls:
|
||||
logger.info("WebSite 为空,回退到 HostPortMapping - 数量: %d", len(urls))
|
||||
return urls, "host_port"
|
||||
|
||||
# 最终回退到默认 URL
|
||||
urls = list(provider.iter_default_urls())
|
||||
logger.info("HostPortMapping 为空,回退到默认 URL - 数量: %d", len(urls))
|
||||
return urls, "default"
|
||||
|
||||
|
||||
def _build_empty_result(scan_id: int, target_name: str) -> dict:
|
||||
"""构建空结果"""
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
'target': target_name,
|
||||
'total_urls': 0,
|
||||
'successful': 0,
|
||||
'failed': 0,
|
||||
'synced': 0
|
||||
}
|
||||
|
||||
|
||||
@flow(
|
||||
name="screenshot",
|
||||
log_prints=True,
|
||||
on_running=[on_scan_flow_running],
|
||||
on_completion=[on_scan_flow_completed],
|
||||
on_failure=[on_scan_flow_failed],
|
||||
)
|
||||
def screenshot_flow(
|
||||
scan_id: int,
|
||||
target_id: int,
|
||||
scan_workspace_dir: str,
|
||||
enabled_tools: dict,
|
||||
provider: TargetProvider,
|
||||
) -> dict:
|
||||
"""
|
||||
截图 Flow
|
||||
|
||||
Args:
|
||||
scan_id: 扫描任务 ID
|
||||
target_id: 目标 ID
|
||||
scan_workspace_dir: 扫描工作空间目录
|
||||
enabled_tools: 启用的工具配置
|
||||
provider: TargetProvider 实例
|
||||
|
||||
Returns:
|
||||
截图结果字典
|
||||
"""
|
||||
try:
|
||||
wait_for_system_load(context="screenshot_flow")
|
||||
|
||||
# 从 provider 获取 target_name
|
||||
target_name = provider.get_target_name()
|
||||
if not target_name:
|
||||
raise ValueError("无法获取 Target 名称")
|
||||
|
||||
logger.info(
|
||||
"开始截图扫描 - Scan ID: %s, Target: %s",
|
||||
scan_id, target_name
|
||||
)
|
||||
user_log(scan_id, "screenshot", "Starting screenshot capture")
|
||||
|
||||
# Step 1: 解析配置
|
||||
config = _parse_screenshot_config(enabled_tools)
|
||||
concurrency = config['concurrency']
|
||||
logger.info("截图配置 - 并发: %d", concurrency)
|
||||
|
||||
# Step 2: 从 Provider 收集 URL 列表(带回退逻辑)
|
||||
urls, source = _collect_urls_from_provider(provider)
|
||||
logger.info("URL 收集完成 - 来源: %s, 数量: %d", source, len(urls))
|
||||
|
||||
if not urls:
|
||||
logger.warning("没有可截图的 URL,跳过截图任务")
|
||||
user_log(scan_id, "screenshot", "Skipped: no URLs to capture", "warning")
|
||||
return _build_empty_result(scan_id, target_name)
|
||||
|
||||
user_log(scan_id, "screenshot", f"Found {len(urls)} URLs to capture")
|
||||
|
||||
# Step 3: 批量截图
|
||||
logger.info("批量截图 - %d 个 URL", len(urls))
|
||||
capture_result = capture_screenshots_task(
|
||||
urls=urls,
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
config={'concurrency': concurrency}
|
||||
)
|
||||
|
||||
# Step 4: 同步到资产表
|
||||
logger.info("同步截图到资产表")
|
||||
from apps.asset.services.screenshot_service import ScreenshotService
|
||||
synced = ScreenshotService().sync_screenshots_to_asset(scan_id, target_id)
|
||||
|
||||
total = capture_result['total']
|
||||
successful = capture_result['successful']
|
||||
failed = capture_result['failed']
|
||||
|
||||
logger.info(
|
||||
"✓ 截图完成 - 总数: %d, 成功: %d, 失败: %d, 同步: %d",
|
||||
total, successful, failed, synced
|
||||
)
|
||||
user_log(
|
||||
scan_id, "screenshot",
|
||||
f"Screenshot completed: {successful}/{total} captured, {synced} synced"
|
||||
)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
'target': target_name,
|
||||
'total_urls': total,
|
||||
'successful': successful,
|
||||
'failed': failed,
|
||||
'synced': synced
|
||||
}
|
||||
|
||||
except Exception:
|
||||
logger.exception("截图 Flow 失败")
|
||||
user_log(scan_id, "screenshot", "Screenshot failed", "error")
|
||||
raise
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
"""
|
||||
站点扫描 Flow
|
||||
|
||||
@@ -11,296 +10,312 @@
|
||||
- 配置由 YAML 解析
|
||||
"""
|
||||
|
||||
# Django 环境初始化(导入即生效)
|
||||
from apps.common.prefect_django_setup import setup_django_for_prefect
|
||||
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
from typing import Optional
|
||||
|
||||
from prefect import flow
|
||||
from apps.scan.tasks.site_scan import export_site_urls_task, run_and_stream_save_websites_task
|
||||
|
||||
# Django 环境初始化(导入即生效)
|
||||
from apps.common.prefect_django_setup import setup_django_for_prefect # noqa: F401
|
||||
from apps.scan.handlers.scan_flow_handlers import (
|
||||
on_scan_flow_running,
|
||||
on_scan_flow_completed,
|
||||
on_scan_flow_failed,
|
||||
on_scan_flow_running,
|
||||
)
|
||||
from apps.scan.utils import config_parser, build_scan_command
|
||||
from apps.scan.tasks.site_scan import (
|
||||
export_site_urls_task,
|
||||
run_and_stream_save_websites_task,
|
||||
)
|
||||
from apps.scan.utils import build_scan_command, user_log, wait_for_system_load
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def calculate_timeout_by_line_count(
|
||||
tool_config: dict,
|
||||
file_path: str,
|
||||
base_per_time: int = 1,
|
||||
min_timeout: int = 60
|
||||
) -> int:
|
||||
"""
|
||||
根据文件行数计算 timeout
|
||||
|
||||
使用 wc -l 统计文件行数,根据行数和每行基础时间计算 timeout
|
||||
|
||||
Args:
|
||||
tool_config: 工具配置字典(此函数未使用,但保持接口一致性)
|
||||
file_path: 要统计行数的文件路径
|
||||
base_per_time: 每行的基础时间(秒),默认1秒
|
||||
min_timeout: 最小超时时间(秒),默认60秒
|
||||
|
||||
Returns:
|
||||
int: 计算出的超时时间(秒),不低于 min_timeout
|
||||
|
||||
Example:
|
||||
timeout = calculate_timeout_by_line_count(
|
||||
tool_config={},
|
||||
file_path='/path/to/urls.txt',
|
||||
base_per_time=2
|
||||
)
|
||||
"""
|
||||
@dataclass
|
||||
class ScanContext:
|
||||
"""扫描上下文,封装扫描参数"""
|
||||
scan_id: int
|
||||
target_id: int
|
||||
target_name: str
|
||||
site_scan_dir: Path
|
||||
urls_file: str
|
||||
total_urls: int
|
||||
|
||||
|
||||
def _count_file_lines(file_path: str) -> int:
|
||||
"""使用 wc -l 统计文件行数"""
|
||||
try:
|
||||
# 使用 wc -l 快速统计行数
|
||||
result = subprocess.run(
|
||||
['wc', '-l', file_path],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
# wc -l 输出格式:行数 + 空格 + 文件名
|
||||
line_count = int(result.stdout.strip().split()[0])
|
||||
|
||||
# 计算 timeout:行数 × 每行基础时间,不低于最小值
|
||||
timeout = max(line_count * base_per_time, min_timeout)
|
||||
|
||||
logger.info(
|
||||
f"timeout 自动计算: 文件={file_path}, "
|
||||
f"行数={line_count}, 每行时间={base_per_time}秒, 最小值={min_timeout}秒, timeout={timeout}秒"
|
||||
)
|
||||
|
||||
return timeout
|
||||
|
||||
except Exception as e:
|
||||
# 如果 wc -l 失败,使用默认值
|
||||
logger.warning(f"wc -l 计算行数失败: {e},使用默认 timeout: {min_timeout}秒")
|
||||
return min_timeout
|
||||
return int(result.stdout.strip().split()[0])
|
||||
except (subprocess.CalledProcessError, ValueError, IndexError) as e:
|
||||
logger.warning("wc -l 计算行数失败: %s,返回 0", e)
|
||||
return 0
|
||||
|
||||
|
||||
def _calculate_timeout_by_line_count(
|
||||
file_path: str,
|
||||
base_per_time: int = 1,
|
||||
min_timeout: int = 60
|
||||
) -> int:
|
||||
"""
|
||||
根据文件行数计算 timeout
|
||||
|
||||
Args:
|
||||
file_path: 要统计行数的文件路径
|
||||
base_per_time: 每行的基础时间(秒),默认1秒
|
||||
min_timeout: 最小超时时间(秒),默认60秒
|
||||
|
||||
Returns:
|
||||
int: 计算出的超时时间(秒),不低于 min_timeout
|
||||
"""
|
||||
line_count = _count_file_lines(file_path)
|
||||
timeout = max(line_count * base_per_time, min_timeout)
|
||||
|
||||
logger.info(
|
||||
"timeout 自动计算: 文件=%s, 行数=%d, 每行时间=%d秒, timeout=%d秒",
|
||||
file_path, line_count, base_per_time, timeout
|
||||
)
|
||||
return timeout
|
||||
|
||||
|
||||
|
||||
def _export_site_urls(target_id: int, site_scan_dir: Path, target_name: str = None) -> tuple[str, int, int]:
|
||||
def _export_site_urls(
|
||||
site_scan_dir: Path,
|
||||
provider,
|
||||
) -> tuple[str, int]:
|
||||
"""
|
||||
导出站点 URL 到文件
|
||||
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
site_scan_dir: 站点扫描目录
|
||||
target_name: 目标名称(用于懒加载时写入默认值)
|
||||
|
||||
provider: TargetProvider 实例
|
||||
|
||||
Returns:
|
||||
tuple: (urls_file, total_urls, association_count)
|
||||
|
||||
Raises:
|
||||
ValueError: URL 数量为 0
|
||||
tuple: (urls_file, total_urls)
|
||||
"""
|
||||
logger.info("Step 1: 导出站点URL列表")
|
||||
|
||||
|
||||
urls_file = str(site_scan_dir / 'site_urls.txt')
|
||||
export_result = export_site_urls_task(
|
||||
target_id=target_id,
|
||||
output_file=urls_file,
|
||||
batch_size=1000 # 每次处理1000个子域名
|
||||
provider=provider,
|
||||
)
|
||||
|
||||
|
||||
total_urls = export_result['total_urls']
|
||||
association_count = export_result['association_count'] # 主机端口关联数
|
||||
|
||||
|
||||
logger.info(
|
||||
"✓ 站点URL导出完成 - 文件: %s, URL数量: %d, 关联数: %d",
|
||||
export_result['output_file'],
|
||||
total_urls,
|
||||
association_count
|
||||
"✓ 站点URL导出完成 - 文件: %s, URL数量: %d",
|
||||
export_result['output_file'], total_urls
|
||||
)
|
||||
|
||||
|
||||
if total_urls == 0:
|
||||
logger.warning("目标下没有可用的站点URL,无法执行站点扫描")
|
||||
# 不抛出异常,由上层决定如何处理
|
||||
# raise ValueError("目标下没有可用的站点URL,无法执行站点扫描")
|
||||
|
||||
return export_result['output_file'], total_urls, association_count
|
||||
|
||||
return export_result['output_file'], total_urls
|
||||
|
||||
|
||||
def _get_tool_timeout(tool_config: dict, urls_file: str) -> int:
|
||||
"""获取工具超时时间(支持 'auto' 动态计算)"""
|
||||
config_timeout = tool_config.get('timeout', 300)
|
||||
|
||||
if config_timeout == 'auto':
|
||||
return _calculate_timeout_by_line_count(urls_file, base_per_time=1)
|
||||
|
||||
dynamic_timeout = _calculate_timeout_by_line_count(urls_file, base_per_time=1)
|
||||
return max(dynamic_timeout, config_timeout)
|
||||
|
||||
|
||||
def _execute_single_tool(
|
||||
tool_name: str,
|
||||
tool_config: dict,
|
||||
ctx: ScanContext
|
||||
) -> Optional[dict]:
|
||||
"""
|
||||
执行单个扫描工具
|
||||
|
||||
Returns:
|
||||
成功返回结果字典,失败返回 None
|
||||
"""
|
||||
# 构建命令
|
||||
try:
|
||||
command = build_scan_command(
|
||||
tool_name=tool_name,
|
||||
scan_type='site_scan',
|
||||
command_params={'url_file': ctx.urls_file},
|
||||
tool_config=tool_config
|
||||
)
|
||||
except (ValueError, KeyError) as e:
|
||||
logger.error("构建 %s 命令失败: %s", tool_name, e)
|
||||
return None
|
||||
|
||||
timeout = _get_tool_timeout(tool_config, ctx.urls_file)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
log_file = ctx.site_scan_dir / f"{tool_name}_{timestamp}.log"
|
||||
|
||||
logger.info(
|
||||
"开始执行 %s 站点扫描 - URL数: %d, 超时: %ds",
|
||||
tool_name, ctx.total_urls, timeout
|
||||
)
|
||||
user_log(ctx.scan_id, "site_scan", f"Running {tool_name}: {command}")
|
||||
|
||||
try:
|
||||
result = run_and_stream_save_websites_task(
|
||||
cmd=command,
|
||||
tool_name=tool_name,
|
||||
scan_id=ctx.scan_id,
|
||||
target_id=ctx.target_id,
|
||||
cwd=str(ctx.site_scan_dir),
|
||||
shell=True,
|
||||
timeout=timeout,
|
||||
log_file=str(log_file)
|
||||
)
|
||||
|
||||
tool_created = result.get('created_websites', 0)
|
||||
skipped = result.get('skipped_no_subdomain', 0) + result.get('skipped_failed', 0)
|
||||
|
||||
logger.info(
|
||||
"✓ 工具 %s 完成 - 处理: %d, 创建: %d, 跳过: %d",
|
||||
tool_name, result.get('processed_records', 0), tool_created, skipped
|
||||
)
|
||||
user_log(
|
||||
ctx.scan_id, "site_scan",
|
||||
f"{tool_name} completed: found {tool_created} websites"
|
||||
)
|
||||
|
||||
return {'command': command, 'result': result, 'timeout': timeout}
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning(
|
||||
"⚠️ 工具 %s 执行超时 - 超时配置: %d秒 (超时前数据已保存)",
|
||||
tool_name, timeout
|
||||
)
|
||||
user_log(
|
||||
ctx.scan_id, "site_scan",
|
||||
f"{tool_name} failed: timeout after {timeout}s", "error"
|
||||
)
|
||||
except (OSError, RuntimeError) as exc:
|
||||
logger.error("工具 %s 执行失败: %s", tool_name, exc, exc_info=True)
|
||||
user_log(ctx.scan_id, "site_scan", f"{tool_name} failed: {exc}", "error")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _run_scans_sequentially(
|
||||
enabled_tools: dict,
|
||||
urls_file: str,
|
||||
total_urls: int,
|
||||
site_scan_dir: Path,
|
||||
scan_id: int,
|
||||
target_id: int,
|
||||
target_name: str
|
||||
ctx: ScanContext
|
||||
) -> tuple[dict, int, list, list]:
|
||||
"""
|
||||
串行执行站点扫描任务
|
||||
|
||||
Args:
|
||||
enabled_tools: 已启用的工具配置字典
|
||||
urls_file: URL 文件路径
|
||||
total_urls: URL 总数
|
||||
site_scan_dir: 站点扫描目录
|
||||
scan_id: 扫描任务 ID
|
||||
target_id: 目标 ID
|
||||
target_name: 目标名称(用于错误日志)
|
||||
|
||||
|
||||
Returns:
|
||||
tuple: (tool_stats, processed_records, successful_tool_names, failed_tools)
|
||||
|
||||
Raises:
|
||||
RuntimeError: 所有工具均失败
|
||||
tuple: (tool_stats, processed_records, successful_tools, failed_tools)
|
||||
"""
|
||||
tool_stats = {}
|
||||
processed_records = 0
|
||||
failed_tools = []
|
||||
|
||||
|
||||
for tool_name, tool_config in enabled_tools.items():
|
||||
# 1. 构建完整命令(变量替换)
|
||||
try:
|
||||
command = build_scan_command(
|
||||
tool_name=tool_name,
|
||||
scan_type='site_scan',
|
||||
command_params={
|
||||
'url_file': urls_file
|
||||
},
|
||||
tool_config=tool_config
|
||||
)
|
||||
except Exception as e:
|
||||
reason = f"命令构建失败: {str(e)}"
|
||||
logger.error(f"构建 {tool_name} 命令失败: {e}")
|
||||
failed_tools.append({'tool': tool_name, 'reason': reason})
|
||||
continue
|
||||
|
||||
# 2. 获取超时时间(支持 'auto' 动态计算)
|
||||
config_timeout = tool_config.get('timeout', 300)
|
||||
if config_timeout == 'auto':
|
||||
# 动态计算超时时间
|
||||
timeout = calculate_timeout_by_line_count(tool_config, urls_file, base_per_time=1)
|
||||
logger.info(f"✓ 工具 {tool_name} 动态计算 timeout: {timeout}秒")
|
||||
result = _execute_single_tool(tool_name, tool_config, ctx)
|
||||
|
||||
if result:
|
||||
tool_stats[tool_name] = result
|
||||
processed_records += result['result'].get('processed_records', 0)
|
||||
else:
|
||||
# 使用配置的超时时间和动态计算的较大值
|
||||
dynamic_timeout = calculate_timeout_by_line_count(tool_config, urls_file, base_per_time=1)
|
||||
timeout = max(dynamic_timeout, config_timeout)
|
||||
|
||||
# 2.1 生成日志文件路径(类似端口扫描)
|
||||
from datetime import datetime
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
log_file = site_scan_dir / f"{tool_name}_{timestamp}.log"
|
||||
|
||||
logger.info(
|
||||
"开始执行 %s 站点扫描 - URL数: %d, 最终超时: %ds",
|
||||
tool_name, total_urls, timeout
|
||||
)
|
||||
|
||||
# 3. 执行扫描任务
|
||||
try:
|
||||
# 流式执行扫描并实时保存结果
|
||||
result = run_and_stream_save_websites_task(
|
||||
cmd=command,
|
||||
tool_name=tool_name, # 新增:工具名称
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
cwd=str(site_scan_dir),
|
||||
shell=True,
|
||||
batch_size=1000,
|
||||
timeout=timeout,
|
||||
log_file=str(log_file) # 新增:日志文件路径
|
||||
)
|
||||
|
||||
tool_stats[tool_name] = {
|
||||
'command': command,
|
||||
'result': result,
|
||||
'timeout': timeout
|
||||
}
|
||||
processed_records += result.get('processed_records', 0)
|
||||
|
||||
logger.info(
|
||||
"✓ 工具 %s 流式处理完成 - 处理记录: %d, 创建站点: %d, 跳过: %d",
|
||||
tool_name,
|
||||
result.get('processed_records', 0),
|
||||
result.get('created_websites', 0),
|
||||
result.get('skipped_no_subdomain', 0) + result.get('skipped_failed', 0)
|
||||
)
|
||||
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
# 超时异常单独处理
|
||||
reason = f"执行超时(配置: {timeout}秒)"
|
||||
failed_tools.append({'tool': tool_name, 'reason': reason})
|
||||
logger.warning(
|
||||
"⚠️ 工具 %s 执行超时 - 超时配置: %d秒\n"
|
||||
"注意:超时前已解析的站点数据已保存到数据库,但扫描未完全完成。",
|
||||
tool_name, timeout
|
||||
)
|
||||
except Exception as exc:
|
||||
# 其他异常
|
||||
failed_tools.append({'tool': tool_name, 'reason': str(exc)})
|
||||
logger.error("工具 %s 执行失败: %s", tool_name, exc, exc_info=True)
|
||||
|
||||
failed_tools.append({'tool': tool_name, 'reason': '执行失败'})
|
||||
|
||||
if failed_tools:
|
||||
logger.warning(
|
||||
"以下扫描工具执行失败: %s",
|
||||
', '.join([f['tool'] for f in failed_tools])
|
||||
', '.join(f['tool'] for f in failed_tools)
|
||||
)
|
||||
|
||||
|
||||
if not tool_stats:
|
||||
error_details = "; ".join([f"{f['tool']}: {f['reason']}" for f in failed_tools])
|
||||
logger.warning("所有站点扫描工具均失败 - 目标: %s, 失败工具: %s", target_name, error_details)
|
||||
# 返回空结果,不抛出异常,让扫描继续
|
||||
logger.warning(
|
||||
"所有站点扫描工具均失败 - 目标: %s", ctx.target_name
|
||||
)
|
||||
return {}, 0, [], failed_tools
|
||||
|
||||
# 动态计算成功的工具列表
|
||||
successful_tool_names = [name for name in enabled_tools.keys()
|
||||
if name not in [f['tool'] for f in failed_tools]]
|
||||
|
||||
|
||||
successful_tools = [
|
||||
name for name in enabled_tools
|
||||
if name not in {f['tool'] for f in failed_tools}
|
||||
]
|
||||
|
||||
logger.info(
|
||||
"✓ 串行站点扫描执行完成 - 成功: %d/%d (成功: %s, 失败: %s)",
|
||||
len(tool_stats), len(enabled_tools),
|
||||
', '.join(successful_tool_names) if successful_tool_names else '无',
|
||||
', '.join([f['tool'] for f in failed_tools]) if failed_tools else '无'
|
||||
"✓ 站点扫描执行完成 - 成功: %d/%d",
|
||||
len(tool_stats), len(enabled_tools)
|
||||
)
|
||||
|
||||
return tool_stats, processed_records, successful_tool_names, failed_tools
|
||||
|
||||
return tool_stats, processed_records, successful_tools, failed_tools
|
||||
|
||||
|
||||
def calculate_timeout(url_count: int, base: int = 600, per_url: int = 1) -> int:
|
||||
"""
|
||||
根据 URL 数量动态计算扫描超时时间
|
||||
def _build_empty_result(
|
||||
scan_id: int,
|
||||
target_name: str,
|
||||
scan_workspace_dir: str,
|
||||
urls_file: str,
|
||||
) -> dict:
|
||||
"""构建空结果(无 URL 可扫描时)"""
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
'target': target_name,
|
||||
'scan_workspace_dir': scan_workspace_dir,
|
||||
'urls_file': urls_file,
|
||||
'total_urls': 0,
|
||||
'processed_records': 0,
|
||||
'created_websites': 0,
|
||||
'skipped_no_subdomain': 0,
|
||||
'skipped_failed': 0,
|
||||
'executed_tasks': ['export_site_urls'],
|
||||
'tool_stats': {
|
||||
'total': 0,
|
||||
'successful': 0,
|
||||
'failed': 0,
|
||||
'successful_tools': [],
|
||||
'failed_tools': [],
|
||||
'details': {}
|
||||
}
|
||||
}
|
||||
|
||||
规则:
|
||||
- 基础时间:默认 600 秒(10 分钟)
|
||||
- 每个 URL 额外增加:默认 1 秒
|
||||
|
||||
Args:
|
||||
url_count: URL 数量,必须为正整数
|
||||
base: 基础超时时间(秒),默认 600
|
||||
per_url: 每个 URL 增加的时间(秒),默认 1
|
||||
def _aggregate_tool_results(tool_stats: dict) -> tuple[int, int, int]:
|
||||
"""汇总工具结果"""
|
||||
total_created = sum(
|
||||
s['result'].get('created_websites', 0) for s in tool_stats.values()
|
||||
)
|
||||
total_skipped_no_subdomain = sum(
|
||||
s['result'].get('skipped_no_subdomain', 0) for s in tool_stats.values()
|
||||
)
|
||||
total_skipped_failed = sum(
|
||||
s['result'].get('skipped_failed', 0) for s in tool_stats.values()
|
||||
)
|
||||
return total_created, total_skipped_no_subdomain, total_skipped_failed
|
||||
|
||||
Returns:
|
||||
int: 计算得到的超时时间(秒),不超过 max_timeout
|
||||
|
||||
Raises:
|
||||
ValueError: 当 url_count 为负数或 0 时抛出异常
|
||||
"""
|
||||
if url_count < 0:
|
||||
raise ValueError(f"URL数量不能为负数: {url_count}")
|
||||
if url_count == 0:
|
||||
raise ValueError("URL数量不能为0")
|
||||
|
||||
timeout = base + int(url_count * per_url)
|
||||
|
||||
# 不设置上限,由调用方根据需要控制
|
||||
return timeout
|
||||
def _validate_flow_params(
|
||||
scan_id: int,
|
||||
target_id: int,
|
||||
scan_workspace_dir: str
|
||||
) -> None:
|
||||
"""验证 Flow 参数"""
|
||||
if scan_id is None:
|
||||
raise ValueError("scan_id 不能为空")
|
||||
if target_id is None:
|
||||
raise ValueError("target_id 不能为空")
|
||||
if not scan_workspace_dir:
|
||||
raise ValueError("scan_workspace_dir 不能为空")
|
||||
|
||||
|
||||
@flow(
|
||||
name="site_scan",
|
||||
name="site_scan",
|
||||
log_prints=True,
|
||||
on_running=[on_scan_flow_running],
|
||||
on_completion=[on_scan_flow_completed],
|
||||
@@ -308,142 +323,95 @@ def calculate_timeout(url_count: int, base: int = 600, per_url: int = 1) -> int:
|
||||
)
|
||||
def site_scan_flow(
|
||||
scan_id: int,
|
||||
target_name: str,
|
||||
target_id: int,
|
||||
scan_workspace_dir: str,
|
||||
enabled_tools: dict
|
||||
enabled_tools: dict,
|
||||
provider,
|
||||
) -> dict:
|
||||
"""
|
||||
站点扫描 Flow
|
||||
|
||||
|
||||
主要功能:
|
||||
1. 从target获取所有子域名与其对应的端口号,拼接成URL写入文件
|
||||
2. 用httpx进行批量请求并实时保存到数据库(流式处理)
|
||||
|
||||
工作流程:
|
||||
Step 0: 创建工作目录
|
||||
Step 1: 导出站点 URL 列表
|
||||
Step 2: 解析配置,获取启用的工具
|
||||
Step 3: 串行执行扫描工具并实时保存结果
|
||||
|
||||
|
||||
Args:
|
||||
scan_id: 扫描任务 ID
|
||||
target_name: 目标名称
|
||||
target_id: 目标 ID
|
||||
scan_workspace_dir: 扫描工作空间目录
|
||||
enabled_tools: 启用的工具配置字典
|
||||
|
||||
provider: TargetProvider 实例
|
||||
|
||||
Returns:
|
||||
dict: {
|
||||
'success': bool,
|
||||
'scan_id': int,
|
||||
'target': str,
|
||||
'scan_workspace_dir': str,
|
||||
'urls_file': str,
|
||||
'total_urls': int,
|
||||
'association_count': int,
|
||||
'processed_records': int,
|
||||
'created_websites': int,
|
||||
'skipped_no_subdomain': int,
|
||||
'skipped_failed': int,
|
||||
'executed_tasks': list,
|
||||
'tool_stats': {
|
||||
'total': int,
|
||||
'successful': int,
|
||||
'failed': int,
|
||||
'successful_tools': list[str],
|
||||
'failed_tools': list[dict]
|
||||
}
|
||||
}
|
||||
|
||||
dict: 扫描结果
|
||||
|
||||
Raises:
|
||||
ValueError: 配置错误
|
||||
RuntimeError: 执行失败
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
"="*60 + "\n" +
|
||||
"开始站点扫描\n" +
|
||||
f" Scan ID: {scan_id}\n" +
|
||||
f" Target: {target_name}\n" +
|
||||
f" Workspace: {scan_workspace_dir}\n" +
|
||||
"="*60
|
||||
)
|
||||
|
||||
# 参数验证
|
||||
if scan_id is None:
|
||||
raise ValueError("scan_id 不能为空")
|
||||
wait_for_system_load(context="site_scan_flow")
|
||||
|
||||
# 从 provider 获取 target_name
|
||||
target_name = provider.get_target_name()
|
||||
if not target_name:
|
||||
raise ValueError("target_name 不能为空")
|
||||
if target_id is None:
|
||||
raise ValueError("target_id 不能为空")
|
||||
if not scan_workspace_dir:
|
||||
raise ValueError("scan_workspace_dir 不能为空")
|
||||
|
||||
raise ValueError("无法获取 Target 名称")
|
||||
|
||||
logger.info(
|
||||
"开始站点扫描 - Scan ID: %s, Target: %s, Workspace: %s",
|
||||
scan_id, target_name, scan_workspace_dir
|
||||
)
|
||||
|
||||
_validate_flow_params(scan_id, target_id, scan_workspace_dir)
|
||||
user_log(scan_id, "site_scan", "Starting site scan")
|
||||
|
||||
# Step 0: 创建工作目录
|
||||
from apps.scan.utils import setup_scan_directory
|
||||
site_scan_dir = setup_scan_directory(scan_workspace_dir, 'site_scan')
|
||||
|
||||
|
||||
# Step 1: 导出站点 URL
|
||||
urls_file, total_urls, association_count = _export_site_urls(
|
||||
target_id, site_scan_dir, target_name
|
||||
urls_file, total_urls = _export_site_urls(
|
||||
site_scan_dir, provider
|
||||
)
|
||||
|
||||
|
||||
if total_urls == 0:
|
||||
logger.warning("目标下没有可用的站点URL,跳过站点扫描")
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
'target': target_name,
|
||||
'scan_workspace_dir': scan_workspace_dir,
|
||||
'urls_file': urls_file,
|
||||
'total_urls': 0,
|
||||
'association_count': association_count,
|
||||
'processed_records': 0,
|
||||
'created_websites': 0,
|
||||
'skipped_no_subdomain': 0,
|
||||
'skipped_failed': 0,
|
||||
'executed_tasks': ['export_site_urls'],
|
||||
'tool_stats': {
|
||||
'total': 0,
|
||||
'successful': 0,
|
||||
'failed': 0,
|
||||
'successful_tools': [],
|
||||
'failed_tools': [],
|
||||
'details': {}
|
||||
}
|
||||
}
|
||||
|
||||
logger.warning("跳过站点扫描:没有站点 URL 可扫描 - Scan ID: %s", scan_id)
|
||||
user_log(scan_id, "site_scan", "Skipped: no site URLs to scan", "warning")
|
||||
return _build_empty_result(
|
||||
scan_id, target_name, scan_workspace_dir, urls_file
|
||||
)
|
||||
|
||||
# Step 2: 工具配置信息
|
||||
logger.info("Step 2: 工具配置信息")
|
||||
logger.info(
|
||||
"✓ 启用工具: %s",
|
||||
', '.join(enabled_tools.keys())
|
||||
)
|
||||
|
||||
logger.info("✓ 启用工具: %s", ', '.join(enabled_tools))
|
||||
|
||||
# Step 3: 串行执行扫描工具
|
||||
logger.info("Step 3: 串行执行扫描工具并实时保存结果")
|
||||
tool_stats, processed_records, successful_tool_names, failed_tools = _run_scans_sequentially(
|
||||
enabled_tools=enabled_tools,
|
||||
urls_file=urls_file,
|
||||
total_urls=total_urls,
|
||||
site_scan_dir=site_scan_dir,
|
||||
ctx = ScanContext(
|
||||
scan_id=scan_id,
|
||||
target_id=target_id,
|
||||
target_name=target_name
|
||||
target_name=target_name,
|
||||
site_scan_dir=site_scan_dir,
|
||||
urls_file=urls_file,
|
||||
total_urls=total_urls
|
||||
)
|
||||
|
||||
logger.info("="*60 + "\n✓ 站点扫描完成\n" + "="*60)
|
||||
|
||||
# 动态生成已执行的任务列表
|
||||
|
||||
tool_stats, processed_records, successful_tools, failed_tools = \
|
||||
_run_scans_sequentially(enabled_tools, ctx)
|
||||
|
||||
# 汇总结果
|
||||
executed_tasks = ['export_site_urls', 'parse_config']
|
||||
executed_tasks.extend([f'run_and_stream_save_websites ({tool})' for tool in tool_stats.keys()])
|
||||
|
||||
# 汇总所有工具的结果
|
||||
total_created = sum(stats['result'].get('created_websites', 0) for stats in tool_stats.values())
|
||||
total_skipped_no_subdomain = sum(stats['result'].get('skipped_no_subdomain', 0) for stats in tool_stats.values())
|
||||
total_skipped_failed = sum(stats['result'].get('skipped_failed', 0) for stats in tool_stats.values())
|
||||
|
||||
executed_tasks.extend(
|
||||
f'run_and_stream_save_websites ({tool})' for tool in tool_stats
|
||||
)
|
||||
|
||||
total_created, total_skipped_no_sub, total_skipped_failed = \
|
||||
_aggregate_tool_results(tool_stats)
|
||||
|
||||
logger.info("✓ 站点扫描完成 - 创建站点: %d", total_created)
|
||||
user_log(
|
||||
scan_id, "site_scan",
|
||||
f"site_scan completed: found {total_created} websites"
|
||||
)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'scan_id': scan_id,
|
||||
@@ -451,28 +419,22 @@ def site_scan_flow(
|
||||
'scan_workspace_dir': scan_workspace_dir,
|
||||
'urls_file': urls_file,
|
||||
'total_urls': total_urls,
|
||||
'association_count': association_count,
|
||||
'processed_records': processed_records,
|
||||
'created_websites': total_created,
|
||||
'skipped_no_subdomain': total_skipped_no_subdomain,
|
||||
'skipped_no_subdomain': total_skipped_no_sub,
|
||||
'skipped_failed': total_skipped_failed,
|
||||
'executed_tasks': executed_tasks,
|
||||
'tool_stats': {
|
||||
'total': len(enabled_tools),
|
||||
'successful': len(successful_tool_names),
|
||||
'successful': len(successful_tools),
|
||||
'failed': len(failed_tools),
|
||||
'successful_tools': successful_tool_names,
|
||||
'successful_tools': successful_tools,
|
||||
'failed_tools': failed_tools,
|
||||
'details': tool_stats
|
||||
}
|
||||
}
|
||||
|
||||
except ValueError as e:
|
||||
logger.error("配置错误: %s", e)
|
||||
|
||||
except ValueError:
|
||||
raise
|
||||
except RuntimeError as e:
|
||||
logger.error("运行时错误: %s", e)
|
||||
except RuntimeError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception("站点扫描失败: %s", e)
|
||||
raise
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user