mirror of
https://github.com/yyhuni/xingrin.git
synced 2026-01-31 19:53:11 +08:00
Compare commits
342 Commits
v1.0.3
...
v1.4.0-dev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cd5c2b9f11 | ||
|
|
54786c22dd | ||
|
|
d468f975ab | ||
|
|
a85a12b8ad | ||
|
|
a8b0d97b7b | ||
|
|
b8504921c2 | ||
|
|
ecfc1822fb | ||
|
|
81633642e6 | ||
|
|
d1ec9b7f27 | ||
|
|
2a3d9b4446 | ||
|
|
9b63203b5a | ||
|
|
4c1282e9bb | ||
|
|
ba3a9b709d | ||
|
|
283b28b46a | ||
|
|
1269e5a314 | ||
|
|
802e967906 | ||
|
|
e446326416 | ||
|
|
e0abb3ce7b | ||
|
|
d418baaf79 | ||
|
|
f8da408580 | ||
|
|
7cd4354d8f | ||
|
|
6bf35a760f | ||
|
|
be9ecadffb | ||
|
|
adb53c9f85 | ||
|
|
7b7bbed634 | ||
|
|
8dd3f0536e | ||
|
|
8a8062a12d | ||
|
|
55908a2da5 | ||
|
|
22a7d4f091 | ||
|
|
f287f18134 | ||
|
|
de27230b7a | ||
|
|
15a6295189 | ||
|
|
674acdac66 | ||
|
|
c59152bedf | ||
|
|
b4037202dc | ||
|
|
4b4f9862bf | ||
|
|
1c42e4978f | ||
|
|
57bab63997 | ||
|
|
b1f0f18ac0 | ||
|
|
ccee5471b8 | ||
|
|
0ccd362535 | ||
|
|
7f2af7f7e2 | ||
|
|
4bd0f9e8c1 | ||
|
|
68cc996e3b | ||
|
|
f1e79d638e | ||
|
|
d484133e4c | ||
|
|
fc977ae029 | ||
|
|
f328474404 | ||
|
|
68e726a066 | ||
|
|
77a6f45909 | ||
|
|
49d1f1f1bb | ||
|
|
db8ecb1644 | ||
|
|
18cc016268 | ||
|
|
23bc463283 | ||
|
|
7b903b91b2 | ||
|
|
b3136d51b9 | ||
|
|
08372588a4 | ||
|
|
236c828041 | ||
|
|
fb13bb74d8 | ||
|
|
f076c682b6 | ||
|
|
9eda2caceb | ||
|
|
b1c9e202dd | ||
|
|
918669bc29 | ||
|
|
fd70b0544d | ||
|
|
0f2df7a5f3 | ||
|
|
857ab737b5 | ||
|
|
ee2d99edda | ||
|
|
db6ce16aca | ||
|
|
ab800eca06 | ||
|
|
e8e5572339 | ||
|
|
d48d4bbcad | ||
|
|
d1cca4c083 | ||
|
|
df0810c863 | ||
|
|
d33e54c440 | ||
|
|
35a306fe8b | ||
|
|
724df82931 | ||
|
|
8dfffdf802 | ||
|
|
b8cb85ce0b | ||
|
|
da96d437a4 | ||
|
|
feaf8062e5 | ||
|
|
4bab76f233 | ||
|
|
09416b4615 | ||
|
|
bc1c5f6b0e | ||
|
|
2f2742e6fe | ||
|
|
be3c346a74 | ||
|
|
0c7a6fff12 | ||
|
|
3b4f0e3147 | ||
|
|
51212a2a0c | ||
|
|
58533bbaf6 | ||
|
|
6ccca1602d | ||
|
|
6389b0f672 | ||
|
|
d7599b8599 | ||
|
|
8eff298293 | ||
|
|
3634101c5b | ||
|
|
163973a7df | ||
|
|
80ffecba3e | ||
|
|
3c21ac940c | ||
|
|
5c9f484d70 | ||
|
|
7567f6c25b | ||
|
|
0599a0b298 | ||
|
|
f7557fe90c | ||
|
|
13571b9772 | ||
|
|
8ee76eef69 | ||
|
|
2a31e29aa2 | ||
|
|
81abc59961 | ||
|
|
ffbfec6dd5 | ||
|
|
a0091636a8 | ||
|
|
69490ab396 | ||
|
|
7306964abf | ||
|
|
cb6b0259e3 | ||
|
|
e1b4618e58 | ||
|
|
556dcf5f62 | ||
|
|
0628eef025 | ||
|
|
38ed8bc642 | ||
|
|
2f4d6a2168 | ||
|
|
c25cb9e06b | ||
|
|
b14ab71c7f | ||
|
|
8b5060e2d3 | ||
|
|
3c9335febf | ||
|
|
1b95e4f2c3 | ||
|
|
d20a600afc | ||
|
|
c29b11fd37 | ||
|
|
6caf707072 | ||
|
|
2627b1fc40 | ||
|
|
ec6712b9b4 | ||
|
|
9d5e4d5408 | ||
|
|
c5d5b24c8f | ||
|
|
671cb56b62 | ||
|
|
51025f69a8 | ||
|
|
b2403b29c4 | ||
|
|
18ef01a47b | ||
|
|
0bf8108fb3 | ||
|
|
837ad19131 | ||
|
|
d7de9a7129 | ||
|
|
22b4e51b42 | ||
|
|
d03628ee45 | ||
|
|
0baabe0753 | ||
|
|
e1191d7abf | ||
|
|
82a2e9a0e7 | ||
|
|
1ccd1bc338 | ||
|
|
b4d42f5372 | ||
|
|
2c66450756 | ||
|
|
119d82dc89 | ||
|
|
fba7f7c508 | ||
|
|
99d384ce29 | ||
|
|
07f36718ab | ||
|
|
7e3f69c208 | ||
|
|
5f90473c3c | ||
|
|
e2a815b96a | ||
|
|
f86a1a9d47 | ||
|
|
d5945679aa | ||
|
|
51e2c51748 | ||
|
|
e2cbf98dda | ||
|
|
cd72bdf7c3 | ||
|
|
35abcf7e39 | ||
|
|
09f2d343a4 | ||
|
|
54d1f86bde | ||
|
|
a3997c9676 | ||
|
|
c90a55f85e | ||
|
|
2eab88b452 | ||
|
|
1baf0eb5e1 | ||
|
|
b61e73f7be | ||
|
|
e896734dfc | ||
|
|
cd83f52f35 | ||
|
|
3e29554c36 | ||
|
|
18e02b536e | ||
|
|
4c1c6f70ab | ||
|
|
a72e7675f5 | ||
|
|
93c2163764 | ||
|
|
de72c91561 | ||
|
|
3e6d060b75 | ||
|
|
766f045904 | ||
|
|
8acfe1cc33 | ||
|
|
7aec3eabb2 | ||
|
|
b1f11c36a4 | ||
|
|
d97fb5245a | ||
|
|
ddf9a1f5a4 | ||
|
|
47f9f96a4b | ||
|
|
6f43e73162 | ||
|
|
9b7d496f3e | ||
|
|
6390849d52 | ||
|
|
7a6d2054f6 | ||
|
|
73ebaab232 | ||
|
|
11899b29c2 | ||
|
|
877d2a56d1 | ||
|
|
dc1e94f038 | ||
|
|
9c3833d13d | ||
|
|
92f3b722ef | ||
|
|
9ef503c666 | ||
|
|
c3a43e94fa | ||
|
|
d6d94355fb | ||
|
|
bc638eabf4 | ||
|
|
5acaada7ab | ||
|
|
aaad3f29cf | ||
|
|
f13eb2d9b2 | ||
|
|
f1b3b60382 | ||
|
|
e249056289 | ||
|
|
dba195b83a | ||
|
|
9b494e6c67 | ||
|
|
2841157747 | ||
|
|
f6c1fef1a6 | ||
|
|
6ec0adf9dd | ||
|
|
22c6661567 | ||
|
|
d9ed004e35 | ||
|
|
a0d9d1f29d | ||
|
|
8aa9ed2a97 | ||
|
|
8baf29d1c3 | ||
|
|
248e48353a | ||
|
|
0d210be50b | ||
|
|
f7c0d0b215 | ||
|
|
d83428f27b | ||
|
|
45a09b8173 | ||
|
|
11dfdee6fd | ||
|
|
e53a884d13 | ||
|
|
3b318c89e3 | ||
|
|
e564bc116a | ||
|
|
410c543066 | ||
|
|
66da140801 | ||
|
|
e60aac3622 | ||
|
|
14aaa71cb1 | ||
|
|
0309dba510 | ||
|
|
967ff8a69f | ||
|
|
9ac23d50b6 | ||
|
|
265525c61e | ||
|
|
1b9d05ce62 | ||
|
|
737980b30f | ||
|
|
494ee81478 | ||
|
|
452686b282 | ||
|
|
c95c68f4e9 | ||
|
|
b02f38606d | ||
|
|
b543f3d2b7 | ||
|
|
a18fb46906 | ||
|
|
bb74f61ea2 | ||
|
|
654f3664f8 | ||
|
|
30defe08d2 | ||
|
|
41266bd931 | ||
|
|
9eebd0a47c | ||
|
|
e7f4d25e58 | ||
|
|
56cc810783 | ||
|
|
efe20bbf69 | ||
|
|
d88cf19a68 | ||
|
|
8e74f842f0 | ||
|
|
5e9773a183 | ||
|
|
a952ef5b6b | ||
|
|
815c409a9e | ||
|
|
7ca85b8d7d | ||
|
|
73291e6c4c | ||
|
|
dcafe03ea2 | ||
|
|
0390e05397 | ||
|
|
088b69b61a | ||
|
|
de34567b53 | ||
|
|
bf40532ce4 | ||
|
|
252759c822 | ||
|
|
2d43204639 | ||
|
|
7715d0cf01 | ||
|
|
6d0d87d8ef | ||
|
|
25074f9191 | ||
|
|
b06f33db5b | ||
|
|
a116755087 | ||
|
|
cddc4c244d | ||
|
|
9e7089a8c2 | ||
|
|
6f543072fd | ||
|
|
196058384a | ||
|
|
e076ea6849 | ||
|
|
abdc580a7a | ||
|
|
17134ceb4b | ||
|
|
98fba31118 | ||
|
|
75eff9929e | ||
|
|
9baa615623 | ||
|
|
69a598e789 | ||
|
|
54017d0334 | ||
|
|
8ac97b4451 | ||
|
|
0a4f1d45be | ||
|
|
bce310a4b0 | ||
|
|
8502daf8a0 | ||
|
|
d0066dd9fc | ||
|
|
3407a98cac | ||
|
|
3d189431fc | ||
|
|
1cbb6350c4 | ||
|
|
20a22f98d0 | ||
|
|
a96ab79891 | ||
|
|
3744a724be | ||
|
|
f63e40fbba | ||
|
|
54573e210a | ||
|
|
6179dd2ed3 | ||
|
|
34ac706fbc | ||
|
|
3ba1ba427e | ||
|
|
6019555729 | ||
|
|
750f52c515 | ||
|
|
bb5ce66a31 | ||
|
|
ac958571a5 | ||
|
|
bcb321f883 | ||
|
|
fd3cdf8033 | ||
|
|
f3f9718df2 | ||
|
|
984c34dbca | ||
|
|
e9dcbf510d | ||
|
|
65deb8c5d0 | ||
|
|
5a93ad878c | ||
|
|
51f25d0976 | ||
|
|
fe1579e7fb | ||
|
|
ef117d2245 | ||
|
|
39cea5a918 | ||
|
|
0d477ce269 | ||
|
|
1bb6e90c3d | ||
|
|
9004c77031 | ||
|
|
71de0b4b1b | ||
|
|
1ef1f9709e | ||
|
|
3323bd2a4f | ||
|
|
df602dd1ae | ||
|
|
372bab5267 | ||
|
|
bed80e4ba7 | ||
|
|
3b014bd04c | ||
|
|
5e60911cb3 | ||
|
|
5de7ea9dbc | ||
|
|
971641cdeb | ||
|
|
e5a74faf9f | ||
|
|
e9a58e89aa | ||
|
|
3d9d520dc7 | ||
|
|
8d814b5864 | ||
|
|
c16b7afabe | ||
|
|
fa55167989 | ||
|
|
55a2762c71 | ||
|
|
5532f1e63a | ||
|
|
948568e950 | ||
|
|
873b6893f1 | ||
|
|
dbb30f7c78 | ||
|
|
38eced3814 | ||
|
|
68fc7cee3b | ||
|
|
6e23824a45 | ||
|
|
a88cceb4f4 | ||
|
|
81164621d2 | ||
|
|
379abaeca7 | ||
|
|
de77057679 | ||
|
|
630747ed2b | ||
|
|
98c418ee8b | ||
|
|
cd54089c34 | ||
|
|
8fcda537a3 | ||
|
|
3ca94be7b7 | ||
|
|
eb70692843 | ||
|
|
3d20623b41 | ||
|
|
2c45b3baa8 | ||
|
|
0cd2215f9d |
214
.github/workflows/docker-build.yml
vendored
214
.github/workflows/docker-build.yml
vendored
@@ -16,10 +16,11 @@ env:
|
||||
IMAGE_PREFIX: yyhuni
|
||||
|
||||
permissions:
|
||||
contents: write # 允许修改仓库内容
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
build:
|
||||
# AMD64 构建(原生 x64 runner)
|
||||
build-amd64:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -39,23 +40,18 @@ jobs:
|
||||
- image: xingrin-agent
|
||||
dockerfile: docker/agent/Dockerfile
|
||||
context: .
|
||||
- image: xingrin-postgres
|
||||
dockerfile: docker/postgres/Dockerfile
|
||||
context: docker/postgres
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Free disk space (for large builds like worker)
|
||||
- name: Free disk space
|
||||
run: |
|
||||
echo "=== Before cleanup ==="
|
||||
df -h
|
||||
# 删除不需要的大型软件包
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /opt/ghc
|
||||
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
|
||||
sudo docker image prune -af
|
||||
echo "=== After cleanup ==="
|
||||
df -h
|
||||
|
||||
- name: Generate SSL certificates for nginx build
|
||||
if: matrix.image == 'xingrin-nginx'
|
||||
@@ -65,10 +61,6 @@ jobs:
|
||||
-keyout docker/nginx/ssl/privkey.pem \
|
||||
-out docker/nginx/ssl/fullchain.pem \
|
||||
-subj "/CN=localhost"
|
||||
echo "SSL certificates generated for CI build"
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
@@ -79,7 +71,120 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Get version from git tag
|
||||
- name: Get version
|
||||
id: version
|
||||
run: |
|
||||
if [[ $GITHUB_REF == refs/tags/* ]]; then
|
||||
echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "VERSION=dev-$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Build and push AMD64
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ${{ matrix.context }}
|
||||
file: ${{ matrix.dockerfile }}
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: ${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }}-amd64
|
||||
build-args: IMAGE_TAG=${{ steps.version.outputs.VERSION }}
|
||||
cache-from: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-amd64
|
||||
cache-to: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-amd64,mode=max
|
||||
provenance: false
|
||||
sbom: false
|
||||
|
||||
# ARM64 构建(原生 ARM64 runner)
|
||||
build-arm64:
|
||||
runs-on: ubuntu-22.04-arm
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- image: xingrin-server
|
||||
dockerfile: docker/server/Dockerfile
|
||||
context: .
|
||||
- image: xingrin-frontend
|
||||
dockerfile: docker/frontend/Dockerfile
|
||||
context: .
|
||||
- image: xingrin-worker
|
||||
dockerfile: docker/worker/Dockerfile
|
||||
context: .
|
||||
- image: xingrin-nginx
|
||||
dockerfile: docker/nginx/Dockerfile
|
||||
context: .
|
||||
- image: xingrin-agent
|
||||
dockerfile: docker/agent/Dockerfile
|
||||
context: .
|
||||
- image: xingrin-postgres
|
||||
dockerfile: docker/postgres/Dockerfile
|
||||
context: docker/postgres
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Generate SSL certificates for nginx build
|
||||
if: matrix.image == 'xingrin-nginx'
|
||||
run: |
|
||||
mkdir -p docker/nginx/ssl
|
||||
openssl req -x509 -nodes -days 365 -newkey rsa:2048 \
|
||||
-keyout docker/nginx/ssl/privkey.pem \
|
||||
-out docker/nginx/ssl/fullchain.pem \
|
||||
-subj "/CN=localhost"
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Get version
|
||||
id: version
|
||||
run: |
|
||||
if [[ $GITHUB_REF == refs/tags/* ]]; then
|
||||
echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "VERSION=dev-$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Build and push ARM64
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ${{ matrix.context }}
|
||||
file: ${{ matrix.dockerfile }}
|
||||
platforms: linux/arm64
|
||||
push: true
|
||||
tags: ${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }}-arm64
|
||||
build-args: IMAGE_TAG=${{ steps.version.outputs.VERSION }}
|
||||
cache-from: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-arm64
|
||||
cache-to: type=registry,ref=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:cache-arm64,mode=max
|
||||
provenance: false
|
||||
sbom: false
|
||||
|
||||
# 合并多架构 manifest
|
||||
merge-manifests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-amd64, build-arm64]
|
||||
strategy:
|
||||
matrix:
|
||||
image:
|
||||
- xingrin-server
|
||||
- xingrin-frontend
|
||||
- xingrin-worker
|
||||
- xingrin-nginx
|
||||
- xingrin-agent
|
||||
- xingrin-postgres
|
||||
steps:
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Get version
|
||||
id: version
|
||||
run: |
|
||||
if [[ $GITHUB_REF == refs/tags/* ]]; then
|
||||
@@ -90,41 +195,76 @@ jobs:
|
||||
echo "IS_RELEASE=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ${{ matrix.context }}
|
||||
file: ${{ matrix.dockerfile }}
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }}
|
||||
${{ steps.version.outputs.IS_RELEASE == 'true' && format('{0}/{1}:latest', env.IMAGE_PREFIX, matrix.image) || '' }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
- name: Create and push multi-arch manifest
|
||||
run: |
|
||||
VERSION=${{ steps.version.outputs.VERSION }}
|
||||
IMAGE=${{ env.IMAGE_PREFIX }}/${{ matrix.image }}
|
||||
|
||||
docker manifest create ${IMAGE}:${VERSION} \
|
||||
${IMAGE}:${VERSION}-amd64 \
|
||||
${IMAGE}:${VERSION}-arm64
|
||||
docker manifest push ${IMAGE}:${VERSION}
|
||||
|
||||
if [[ "${{ steps.version.outputs.IS_RELEASE }}" == "true" ]]; then
|
||||
docker manifest create ${IMAGE}:latest \
|
||||
${IMAGE}:${VERSION}-amd64 \
|
||||
${IMAGE}:${VERSION}-arm64
|
||||
docker manifest push ${IMAGE}:latest
|
||||
fi
|
||||
|
||||
# 所有镜像构建成功后,更新 VERSION 文件
|
||||
# 更新 VERSION 文件
|
||||
update-version:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build # 等待所有 build job 完成
|
||||
needs: merge-manifests
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
steps:
|
||||
- name: Checkout
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: main
|
||||
fetch-depth: 0 # 获取完整历史,用于判断 tag 所在分支
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Determine source branch and version
|
||||
id: branch
|
||||
run: |
|
||||
VERSION="${GITHUB_REF#refs/tags/}"
|
||||
|
||||
# 查找包含此 tag 的分支
|
||||
BRANCHES=$(git branch -r --contains ${{ github.ref_name }})
|
||||
echo "Branches containing tag: $BRANCHES"
|
||||
|
||||
# 判断 tag 来自哪个分支
|
||||
if echo "$BRANCHES" | grep -q "origin/main"; then
|
||||
TARGET_BRANCH="main"
|
||||
UPDATE_LATEST="true"
|
||||
elif echo "$BRANCHES" | grep -q "origin/dev"; then
|
||||
TARGET_BRANCH="dev"
|
||||
UPDATE_LATEST="false"
|
||||
else
|
||||
echo "Warning: Tag not found in main or dev branch, defaulting to main"
|
||||
TARGET_BRANCH="main"
|
||||
UPDATE_LATEST="false"
|
||||
fi
|
||||
|
||||
echo "BRANCH=$TARGET_BRANCH" >> $GITHUB_OUTPUT
|
||||
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
|
||||
echo "UPDATE_LATEST=$UPDATE_LATEST" >> $GITHUB_OUTPUT
|
||||
echo "Will update VERSION on branch: $TARGET_BRANCH"
|
||||
|
||||
- name: Checkout target branch
|
||||
run: |
|
||||
git checkout ${{ steps.branch.outputs.BRANCH }}
|
||||
|
||||
- name: Update VERSION file
|
||||
run: |
|
||||
VERSION="${GITHUB_REF#refs/tags/}"
|
||||
VERSION="${{ steps.branch.outputs.VERSION }}"
|
||||
echo "$VERSION" > VERSION
|
||||
echo "Updated VERSION to $VERSION"
|
||||
echo "Updated VERSION to $VERSION on branch ${{ steps.branch.outputs.BRANCH }}"
|
||||
|
||||
- name: Commit and push
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git add VERSION
|
||||
git diff --staged --quiet || git commit -m "chore: bump version to ${GITHUB_REF#refs/tags/}"
|
||||
git push
|
||||
git diff --staged --quiet || git commit -m "chore: bump version to ${{ steps.branch.outputs.VERSION }}"
|
||||
git push origin ${{ steps.branch.outputs.BRANCH }}
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -96,6 +96,7 @@ backend/vendor/
|
||||
.idea/
|
||||
.cursor/
|
||||
.claude/
|
||||
.kiro/
|
||||
.playwright-mcp/
|
||||
*.swp
|
||||
*.swo
|
||||
@@ -131,3 +132,5 @@ temp/
|
||||
|
||||
HGETALL
|
||||
KEYS
|
||||
vuln_scan/input_endpoints.txt
|
||||
open-in-v0
|
||||
@@ -1,12 +0,0 @@
|
||||
---
|
||||
trigger: always_on
|
||||
---
|
||||
|
||||
1.后端网页应该是 8888 端口
|
||||
3.前端所有路由加上末尾斜杠,以匹配 django 的 DRF 规则
|
||||
4.网页测试可以用 curl
|
||||
8.所有前端 api 接口都应该写在@services 中,所有 type 类型都应该写在@types 中
|
||||
10.前端的加载等逻辑用 React Query来实现,自动管理
|
||||
17.所有业务操作的 toast 都放在 hook 中
|
||||
23.前端非必要不要采用window.location.href去跳转,而是用Next.js 客户端路由
|
||||
24.ui相关的都去调用mcp来看看有没有通用组件,美观的组件来实现
|
||||
@@ -1,85 +0,0 @@
|
||||
---
|
||||
trigger: manual
|
||||
description: 进行代码审查的时候,必须调用这个规则
|
||||
---
|
||||
|
||||
### **0. 逻辑正确性 & Bug 排查** *(最高优先级,必须手动推演)*
|
||||
|
||||
**目标**:不依赖测试,主动发现“代码能跑但结果错”的逻辑错误。
|
||||
|
||||
1. **手动推演关键路径**:
|
||||
- 选 2~3 个典型输入(含边界),**在脑中或纸上一步步推演代码执行流程**。
|
||||
- 输出是否符合预期?每一步变量变化是否正确?
|
||||
2. **常见逻辑 bug 检查**:
|
||||
- **off-by-one**:循环、数组索引、分页
|
||||
- **条件逻辑错误**:`and`/`or` 优先级、短路求值误用
|
||||
- **状态混乱**:变量未初始化、被意外覆盖
|
||||
- **算法偏差**:排序、搜索、二分查找的中点处理
|
||||
- **浮点精度**:是否误用 `==` 比较浮点数?
|
||||
3. **控制流审查**:
|
||||
- 所有 `if/else` 分支是否都覆盖?有无“不可达代码”?
|
||||
- `switch`/`match` 是否有 `default`?是否漏 case?
|
||||
- 异常路径会返回什么?是否遗漏 `finally` 清理?
|
||||
4. **业务逻辑一致性**:
|
||||
- 是否符合**业务规则**?(如“订单总额 = 商品价 × 数量 + 运费 - 折扣”)
|
||||
- 是否遗漏隐含约束?(如“用户只能评价已完成的订单”)
|
||||
|
||||
### **一、功能性 & 正确性** *(阻塞性问题必须修复)*
|
||||
|
||||
1. **需求符合度**:是否100%覆盖需求?遗漏/多余功能点?
|
||||
2. **边界条件**:
|
||||
- 输入:`null`、空、极值、非法格式
|
||||
- 集合:空、单元素、超大(如10⁶)
|
||||
- 循环:终止条件、off-by-one
|
||||
3. **错误处理**:
|
||||
- 异常捕获全面?失败路径有降级?
|
||||
- 错误信息清晰?不泄露栈迹?
|
||||
4. **并发安全**:
|
||||
- 竞态/死锁风险?共享资源是否同步?
|
||||
- 使用了`volatile`/`synchronized`/`Lock`/`atomic`?
|
||||
5. **单元测试**:
|
||||
- 覆盖率 ≥80%?包含正向/边界/异常用例?
|
||||
- 测试独立?无外部依赖?
|
||||
|
||||
### **二、代码质量与可读性**
|
||||
|
||||
1. **命名**:见名知意?遵循规范?
|
||||
2. **函数设计**:
|
||||
- **单一职责**?参数 ≤4?建议长度 <50行(视语言调整)
|
||||
- 可提取为工具函数?
|
||||
3. **结构与复杂度**:
|
||||
- 无重复代码?圈复杂度 <10?
|
||||
- 嵌套 ≤3层?使用卫语句提前返回
|
||||
4. **注释**:解释**为什么**而非**是什么**?复杂逻辑必注释
|
||||
5. **风格一致**:通过`Prettier`/`ESLint`/`Spotless`自动格式化
|
||||
|
||||
### **三、架构与设计**
|
||||
|
||||
1. **SOLID**:是否符合单一职责、开闭、依赖倒置?
|
||||
2. **依赖**:是否依赖接口而非实现?无循环依赖?
|
||||
3. **可测试性**:是否支持依赖注入?避免`new`硬编码
|
||||
4. **扩展性**:新增功能是否只需改一处?
|
||||
|
||||
### **四、性能优化**
|
||||
|
||||
- **N+1查询**?循环内IO/日志/分配?
|
||||
- 算法复杂度合理?(如O(n²)是否可优化)
|
||||
- 内存:无泄漏?大对象及时释放?缓存有失效?
|
||||
|
||||
### **五、其他**
|
||||
|
||||
1. **可维护性**:日志带上下文?修改后更干净?
|
||||
2. **兼容性**:API/数据库变更是否向后兼容?
|
||||
3. **依赖管理**:新库必要?许可证合规?
|
||||
|
||||
---
|
||||
|
||||
### **审查最佳实践**
|
||||
|
||||
- **小批次审查**:≤200行/次
|
||||
- **语气建议**:`“建议将函数拆分以提升可读性”` 而非 `“这个函数太长了”`
|
||||
- **自动化先行**:风格/空指针/安全扫描 → CI工具
|
||||
- **重点分级**:
|
||||
- 🛑 **阻塞**:功能错、安全漏洞
|
||||
- ⚠️ **必须改**:设计缺陷、性能瓶颈
|
||||
- 💡 **建议**:风格、命名、可读性
|
||||
@@ -1,195 +0,0 @@
|
||||
---
|
||||
trigger: always_on
|
||||
---
|
||||
|
||||
## 标准分层架构调用顺序
|
||||
|
||||
按照 **DDD(领域驱动设计)和清洁架构**原则,调用顺序应该是:
|
||||
|
||||
```
|
||||
HTTP请求 → Views → Tasks → Services → Repositories → Models
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 📊 完整的调用链路图
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ HTTP Request (前端) │
|
||||
└────────────────────────┬────────────────────────────────────┘
|
||||
↓
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Views (HTTP 层) │
|
||||
│ - 参数验证 │
|
||||
│ - 权限检查 │
|
||||
│ - 调用 Tasks/Services │
|
||||
│ - 返回 HTTP 响应 │
|
||||
└────────────────────────┬────────────────────────────────────┘
|
||||
↓
|
||||
┌────────────────┴────────────────┐
|
||||
↓ (异步) ↓ (同步)
|
||||
┌──────────────────┐ ┌──────────────────┐
|
||||
│ Tasks (任务层) │ │ Services (业务层)│
|
||||
│ - 异步执行 │ │ - 业务逻辑 │
|
||||
│ - 后台作业 │───────>│ - 事务管理 │
|
||||
│ - 通知发送 │ │ - 数据验证 │
|
||||
└──────────────────┘ └────────┬─────────┘
|
||||
↓
|
||||
┌──────────────────────┐
|
||||
│ Repositories (存储层) │
|
||||
│ - 数据访问 │
|
||||
│ - 查询封装 │
|
||||
│ - 批量操作 │
|
||||
└────────┬─────────────┘
|
||||
↓
|
||||
┌──────────────────────┐
|
||||
│ Models (模型层) │
|
||||
│ - ORM 定义 │
|
||||
│ - 数据结构 │
|
||||
│ - 关系映射 │
|
||||
└──────────────────────┘
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🔄 具体调用示例
|
||||
|
||||
### **场景 1:同步删除(Views → Services → Repositories → Models)**
|
||||
|
||||
```python
|
||||
# 1. Views 层 (views.py)
|
||||
def some_sync_delete(self, request):
|
||||
# 参数验证
|
||||
target_ids = request.data.get('ids')
|
||||
|
||||
# 调用 Service 层
|
||||
service = TargetService()
|
||||
result = service.bulk_delete_targets(target_ids)
|
||||
|
||||
# 返回响应
|
||||
return Response({'message': 'deleted'})
|
||||
|
||||
# 2. Services 层 (services/target_service.py)
|
||||
class TargetService:
|
||||
def bulk_delete_targets(self, target_ids):
|
||||
# 业务逻辑验证
|
||||
logger.info("准备删除...")
|
||||
|
||||
# 调用 Repository 层
|
||||
deleted_count = self.repo.bulk_delete_by_ids(target_ids)
|
||||
|
||||
# 返回结果
|
||||
return deleted_count
|
||||
|
||||
# 3. Repositories 层 (repositories/django_target_repository.py)
|
||||
class DjangoTargetRepository:
|
||||
def bulk_delete_by_ids(self, target_ids):
|
||||
# 数据访问操作
|
||||
return Target.objects.filter(id__in=target_ids).delete()
|
||||
|
||||
# 4. Models 层 (models.py)
|
||||
class Target(models.Model):
|
||||
# ORM 定义
|
||||
name = models.CharField(...)
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### **场景 2:异步删除(Views → Tasks → Services → Repositories → Models)**
|
||||
|
||||
```python
|
||||
# 1. Views 层 (views.py)
|
||||
def destroy(self, request, *args, **kwargs):
|
||||
target = self.get_object()
|
||||
|
||||
# 调用 Tasks 层(异步)
|
||||
async_bulk_delete_targets([target.id], [target.name])
|
||||
|
||||
# 立即返回 202
|
||||
return Response(status=202)
|
||||
|
||||
# 2. Tasks 层 (tasks/target_tasks.py)
|
||||
def async_bulk_delete_targets(target_ids, target_names):
|
||||
def _delete():
|
||||
# 发送通知
|
||||
create_notification("删除中...")
|
||||
|
||||
# 调用 Service 层
|
||||
service = TargetService()
|
||||
result = service.bulk_delete_targets(target_ids)
|
||||
|
||||
# 发送完成通知
|
||||
create_notification("删除成功")
|
||||
|
||||
# 后台线程执行
|
||||
threading.Thread(target=_delete).start()
|
||||
|
||||
# 3. Services 层 (services/target_service.py)
|
||||
class TargetService:
|
||||
def bulk_delete_targets(self, target_ids):
|
||||
# 业务逻辑
|
||||
return self.repo.bulk_delete_by_ids(target_ids)
|
||||
|
||||
# 4. Repositories 层 (repositories/django_target_repository.py)
|
||||
class DjangoTargetRepository:
|
||||
def bulk_delete_by_ids(self, target_ids):
|
||||
# 数据访问
|
||||
return Target.objects.filter(id__in=target_ids).delete()
|
||||
|
||||
# 5. Models 层 (models.py)
|
||||
class Target(models.Model):
|
||||
# ORM 定义
|
||||
...
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 📋 各层职责清单
|
||||
|
||||
| 层级 | 职责 | 不应该做 |
|
||||
| --- | --- | --- |
|
||||
| **Views** | HTTP 请求处理、参数验证、权限检查 | ❌ 直接访问 Models<br>❌ 业务逻辑 |
|
||||
| **Tasks** | 异步执行、后台作业、通知发送 | ❌ 直接访问 Models<br>❌ HTTP 响应 |
|
||||
| **Services** | 业务逻辑、事务管理、数据验证 | ❌ 直接写 SQL<br>❌ HTTP 相关 |
|
||||
| **Repositories** | 数据访问、查询封装、批量操作 | ❌ 业务逻辑<br>❌ 通知发送 |
|
||||
| **Models** | ORM 定义、数据结构、关系映射 | ❌ 业务逻辑<br>❌ 复杂查询 |
|
||||
|
||||
---
|
||||
|
||||
### ✅ 最佳实践原则
|
||||
|
||||
1. **单向依赖**:只能向下调用,不能向上调用
|
||||
|
||||
```
|
||||
Views → Tasks → Services → Repositories → Models
|
||||
(上层) (下层)
|
||||
|
||||
```
|
||||
|
||||
2. **层级隔离**:相邻层交互,禁止跨层
|
||||
- ✅ Views → Services
|
||||
- ✅ Tasks → Services
|
||||
- ✅ Services → Repositories
|
||||
- ❌ Views → Repositories(跨层)
|
||||
- ❌ Tasks → Models(跨层)
|
||||
3. **依赖注入**:通过构造函数注入依赖
|
||||
|
||||
```python
|
||||
class TargetService:
|
||||
def __init__(self):
|
||||
self.repo = DjangoTargetRepository() # 注入
|
||||
|
||||
```
|
||||
|
||||
4. **接口抽象**:使用 Protocol 定义接口
|
||||
|
||||
```python
|
||||
class TargetRepository(Protocol):
|
||||
def bulk_delete_by_ids(self, ids): ...
|
||||
|
||||
```
|
||||
733
LICENSE
733
LICENSE
@@ -1,131 +1,674 @@
|
||||
# PolyForm Noncommercial License 1.0.0
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
<https://polyformproject.org/licenses/noncommercial/1.0.0>
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
## Acceptance
|
||||
Preamble
|
||||
|
||||
In order to get any license under these terms, you must agree
|
||||
to them as both strict obligations and conditions to all
|
||||
your licenses.
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
## Copyright License
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
The licensor grants you a copyright license for the
|
||||
software to do everything you might do with the software
|
||||
that would otherwise infringe the licensor's copyright
|
||||
in it for any permitted purpose. However, you may
|
||||
only distribute the software according to [Distribution
|
||||
License](#distribution-license) and make changes or new works
|
||||
based on the software according to [Changes and New Works
|
||||
License](#changes-and-new-works-license).
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
## Distribution License
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
The licensor grants you an additional copyright license
|
||||
to distribute copies of the software. Your license
|
||||
to distribute covers distributing the software with
|
||||
changes and new works permitted by [Changes and New Works
|
||||
License](#changes-and-new-works-license).
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
## Notices
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
You must ensure that anyone who gets a copy of any part of
|
||||
the software from you also gets a copy of these terms or the
|
||||
URL for them above, as well as copies of any plain-text lines
|
||||
beginning with `Required Notice:` that the licensor provided
|
||||
with the software. For example:
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
> Required Notice: Copyright Yuhang Yang (yyhuni)
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
## Changes and New Works License
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The licensor grants you an additional copyright license to
|
||||
make changes and new works based on the software for any
|
||||
permitted purpose.
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
## Patent License
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
The licensor grants you a patent license for the software that
|
||||
covers patent claims the licensor can license, or becomes able
|
||||
to license, that you would infringe by using the software.
|
||||
0. Definitions.
|
||||
|
||||
## Noncommercial Purposes
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
Any noncommercial purpose is a permitted purpose.
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
## Personal Uses
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
Personal use for research, experiment, and testing for
|
||||
the benefit of public knowledge, personal study, private
|
||||
entertainment, hobby projects, amateur pursuits, or religious
|
||||
observance, without any anticipated commercial application,
|
||||
is use for a permitted purpose.
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
## Noncommercial Organizations
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
Use by any charitable organization, educational institution,
|
||||
public research organization, public safety or health
|
||||
organization, environmental protection organization,
|
||||
or government institution is use for a permitted purpose
|
||||
regardless of the source of funding or obligations resulting
|
||||
from the funding.
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
## Fair Use
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
You may have "fair use" rights for the software under the
|
||||
law. These terms do not limit them.
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
## No Other Rights
|
||||
1. Source Code.
|
||||
|
||||
These terms do not allow you to sublicense or transfer any of
|
||||
your licenses to anyone else, or prevent the licensor from
|
||||
granting licenses to anyone else. These terms do not imply
|
||||
any other licenses.
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
## Patent Defense
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
If you make any written claim that the software infringes or
|
||||
contributes to infringement of any patent, your patent license
|
||||
for the software granted under these terms ends immediately. If
|
||||
your company makes such a claim, your patent license ends
|
||||
immediately for work on behalf of your company.
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
## Violations
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The first time you are notified in writing that you have
|
||||
violated any of these terms, or done anything with the software
|
||||
not covered by your licenses, your licenses can nonetheless
|
||||
continue if you come into full compliance with these terms,
|
||||
and take practical steps to correct past violations, within
|
||||
32 days of receiving notice. Otherwise, all your licenses
|
||||
end immediately.
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
## No Liability
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
***As far as the law allows, the software comes as is, without
|
||||
any warranty or condition, and the licensor will not be liable
|
||||
to you for any damages arising out of these terms or the use
|
||||
or nature of the software, under any kind of legal claim.***
|
||||
2. Basic Permissions.
|
||||
|
||||
## Definitions
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
The **licensor** is the individual or entity offering these
|
||||
terms, and the **software** is the software the licensor makes
|
||||
available under these terms.
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
**You** refers to the individual or entity agreeing to these
|
||||
terms.
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
**Your company** is any legal entity, sole proprietorship,
|
||||
or other kind of organization that you work for, plus all
|
||||
organizations that have control over, are under the control of,
|
||||
or are under common control with that organization. **Control**
|
||||
means ownership of substantially all the assets of an entity,
|
||||
or the power to direct its management and policies by vote,
|
||||
contract, or otherwise. Control can be direct or indirect.
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
**Your licenses** are all the licenses granted to you for the
|
||||
software under these terms.
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
**Use** means anything you do with the software requiring one
|
||||
of your licenses.
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<https://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<https://www.gnu.org/licenses/why-not-lgpl.html>.
|
||||
|
||||
264
README.md
264
README.md
@@ -1,27 +1,47 @@
|
||||
<h1 align="center">Xingrin - 星环</h1>
|
||||
<h1 align="center">XingRin - 星环</h1>
|
||||
|
||||
<p align="center">
|
||||
<b>一款现代化的企业级漏洞扫描与资产管理平台</b><br>
|
||||
提供自动化安全检测、资产发现、漏洞管理等功能
|
||||
<b>🛡️ 攻击面管理平台 (ASM) | 自动化资产发现与漏洞扫描系统</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<b>🌗 明暗模式切换</b>
|
||||
<a href="https://github.com/yyhuni/xingrin/stargazers"><img src="https://img.shields.io/github/stars/yyhuni/xingrin?style=flat-square&logo=github" alt="GitHub stars"></a>
|
||||
<a href="https://github.com/yyhuni/xingrin/network/members"><img src="https://img.shields.io/github/forks/yyhuni/xingrin?style=flat-square&logo=github" alt="GitHub forks"></a>
|
||||
<a href="https://github.com/yyhuni/xingrin/issues"><img src="https://img.shields.io/github/issues/yyhuni/xingrin?style=flat-square&logo=github" alt="GitHub issues"></a>
|
||||
<a href="https://github.com/yyhuni/xingrin/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-PolyForm%20NC-blue?style=flat-square" alt="License"></a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<img src="docs/screenshots/light.png" alt="Light Mode" width="49%">
|
||||
<img src="docs/screenshots/dark.png" alt="Dark Mode" width="49%">
|
||||
<a href="#-功能特性">功能特性</a> •
|
||||
<a href="#-全局资产搜索">资产搜索</a> •
|
||||
<a href="#-快速开始">快速开始</a> •
|
||||
<a href="#-文档">文档</a> •
|
||||
<a href="#-反馈与贡献">反馈与贡献</a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<b>🎨 多种 UI 主题</b>
|
||||
<sub>🔍 关键词: ASM | 攻击面管理 | 漏洞扫描 | 资产发现 | 资产搜索 | Bug Bounty | 渗透测试 | Nuclei | 子域名枚举 | EASM</sub>
|
||||
</p>
|
||||
|
||||
---
|
||||
|
||||
## 🌐 在线 Demo
|
||||
|
||||
👉 **[https://xingrin.vercel.app/](https://xingrin.vercel.app/)**
|
||||
|
||||
> ⚠️ 仅用于 UI 展示,未接入后端数据库
|
||||
|
||||
---
|
||||
|
||||
<p align="center">
|
||||
<b>🎨 现代化 UI </b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<img src="docs/screenshots/bubblegum.png" alt="Bubblegum" width="32%">
|
||||
<img src="docs/screenshots/cosmic-night.png" alt="Cosmic Night" width="32%">
|
||||
<img src="docs/screenshots/quantum-rose.png" alt="Quantum Rose" width="32%">
|
||||
<img src="docs/screenshots/light.png" alt="Light Mode" width="24%">
|
||||
<img src="docs/screenshots/bubblegum.png" alt="Bubblegum" width="24%">
|
||||
<img src="docs/screenshots/cosmic-night.png" alt="Cosmic Night" width="24%">
|
||||
<img src="docs/screenshots/quantum-rose.png" alt="Quantum Rose" width="24%">
|
||||
</p>
|
||||
|
||||
## 📚 文档
|
||||
@@ -29,6 +49,9 @@
|
||||
- [📖 技术文档](./docs/README.md) - 技术文档导航(🚧 持续完善中)
|
||||
- [🚀 快速开始](./docs/quick-start.md) - 一键安装和部署指南
|
||||
- [🔄 版本管理](./docs/version-management.md) - Git Tag 驱动的自动化版本管理系统
|
||||
- [📦 Nuclei 模板架构](./docs/nuclei-template-architecture.md) - 模板仓库的存储与同步
|
||||
- [📖 字典文件架构](./docs/wordlist-architecture.md) - 字典文件的存储与同步
|
||||
- [🔍 扫描流程架构](./docs/scan-flow-architecture.md) - 完整扫描流程与工具编排
|
||||
|
||||
|
||||
---
|
||||
@@ -46,6 +69,61 @@
|
||||
- **自定义流程** - YAML 配置扫描流程,灵活编排
|
||||
- **定时扫描** - Cron 表达式配置,自动化周期扫描
|
||||
|
||||
### 🔖 指纹识别
|
||||
- **多源指纹库** - 内置 EHole、Goby、Wappalyzer、Fingers、FingerPrintHub、ARL 等 2.7W+ 指纹规则
|
||||
- **自动识别** - 扫描流程自动执行,识别 Web 应用技术栈
|
||||
- **指纹管理** - 支持查询、导入、导出指纹规则
|
||||
|
||||
#### 扫描流程架构
|
||||
|
||||
完整的扫描流程包括:子域名发现、端口扫描、站点发现、指纹识别、URL 收集、目录扫描、漏洞扫描等阶段
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
START["开始扫描"]
|
||||
|
||||
subgraph STAGE1["阶段 1: 资产发现"]
|
||||
direction TB
|
||||
SUB["子域名发现<br/>subfinder, amass, puredns"]
|
||||
PORT["端口扫描<br/>naabu"]
|
||||
SITE["站点识别<br/>httpx"]
|
||||
FINGER["指纹识别<br/>xingfinger"]
|
||||
SUB --> PORT --> SITE --> FINGER
|
||||
end
|
||||
|
||||
subgraph STAGE2["阶段 2: 深度分析"]
|
||||
direction TB
|
||||
URL["URL 收集<br/>waymore, katana"]
|
||||
DIR["目录扫描<br/>ffuf"]
|
||||
end
|
||||
|
||||
subgraph STAGE3["阶段 3: 漏洞检测"]
|
||||
VULN["漏洞扫描<br/>nuclei, dalfox"]
|
||||
end
|
||||
|
||||
FINISH["扫描完成"]
|
||||
|
||||
START --> STAGE1
|
||||
FINGER --> STAGE2
|
||||
STAGE2 --> STAGE3
|
||||
STAGE3 --> FINISH
|
||||
|
||||
style START fill:#34495e,stroke:#2c3e50,stroke-width:2px,color:#fff
|
||||
style FINISH fill:#27ae60,stroke:#229954,stroke-width:2px,color:#fff
|
||||
style STAGE1 fill:#3498db,stroke:#2980b9,stroke-width:2px,color:#fff
|
||||
style STAGE2 fill:#9b59b6,stroke:#8e44ad,stroke-width:2px,color:#fff
|
||||
style STAGE3 fill:#e67e22,stroke:#d35400,stroke-width:2px,color:#fff
|
||||
style SUB fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
|
||||
style PORT fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
|
||||
style SITE fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
|
||||
style FINGER fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
|
||||
style URL fill:#bb8fce,stroke:#9b59b6,stroke-width:1px,color:#fff
|
||||
style DIR fill:#bb8fce,stroke:#9b59b6,stroke-width:1px,color:#fff
|
||||
style VULN fill:#f0b27a,stroke:#e67e22,stroke-width:1px,color:#fff
|
||||
```
|
||||
|
||||
详细说明请查看 [扫描流程架构文档](./docs/scan-flow-architecture.md)
|
||||
|
||||
### 🖥️ 分布式架构
|
||||
- **多节点扫描** - 支持部署多个 Worker 节点,横向扩展扫描能力
|
||||
- **本地节点** - 零配置,安装即自动注册本地 Docker Worker
|
||||
@@ -54,62 +132,71 @@
|
||||
- **节点监控** - 实时心跳检测,CPU/内存/磁盘状态监控
|
||||
- **断线重连** - 节点离线自动检测,恢复后自动重新接入
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph MASTER["主服务器 (Master Server)"]
|
||||
direction TB
|
||||
|
||||
REDIS["Redis 负载缓存"]
|
||||
|
||||
subgraph SCHEDULER["任务调度器 (Task Distributor)"]
|
||||
direction TB
|
||||
SUBMIT["接收扫描任务"]
|
||||
SELECT["负载感知选择"]
|
||||
DISPATCH["智能分发"]
|
||||
|
||||
SUBMIT --> SELECT
|
||||
SELECT --> DISPATCH
|
||||
end
|
||||
|
||||
REDIS -.负载数据.-> SELECT
|
||||
end
|
||||
|
||||
subgraph WORKERS["Worker 节点集群"]
|
||||
direction TB
|
||||
|
||||
W1["Worker 1 (本地)<br/>CPU: 45% | MEM: 60%"]
|
||||
W2["Worker 2 (远程)<br/>CPU: 30% | MEM: 40%"]
|
||||
W3["Worker N (远程)<br/>CPU: 90% | MEM: 85%"]
|
||||
end
|
||||
|
||||
DISPATCH -->|任务分发| W1
|
||||
DISPATCH -->|任务分发| W2
|
||||
DISPATCH -->|高负载跳过| W3
|
||||
|
||||
W1 -.心跳上报.-> REDIS
|
||||
W2 -.心跳上报.-> REDIS
|
||||
W3 -.心跳上报.-> REDIS
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ 主服务器 (Master) │
|
||||
│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
|
||||
│ │ Next.js │ │ Django │ │ Postgres│ │ Redis │ │
|
||||
│ │ 前端 │ │ 后端 │ │ 数据库 │ │ 缓存 │ │
|
||||
│ └─────────┘ └────┬────┘ └─────────┘ └─────────┘ │
|
||||
│ │ │
|
||||
│ ┌─────┴─────┐ │
|
||||
│ │ 任务调度器 │ │
|
||||
│ │ Scheduler │ │
|
||||
│ └─────┬─────┘ │
|
||||
└────────────────────┼────────────────────────────────────────────┘
|
||||
│
|
||||
┌────────────┼────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌───────────┐ ┌───────────┐ ┌───────────┐
|
||||
│ Worker 1 │ │ Worker 2 │ │ Worker N │
|
||||
│ (本地) │ │ (远程) │ │ (远程) │
|
||||
├───────────┤ ├───────────┤ ├───────────┤
|
||||
│ • Nuclei │ │ • Nuclei │ │ • Nuclei │
|
||||
│ • httpx │ │ • httpx │ │ • httpx │
|
||||
│ • naabu │ │ • naabu │ │ • naabu │
|
||||
│ • ... │ │ • ... │ │ • ... │
|
||||
├───────────┤ ├───────────┤ ├───────────┤
|
||||
│ 心跳上报 │ │ 心跳上报 │ │ 心跳上报 │
|
||||
└───────────┘ └───────────┘ └───────────┘
|
||||
|
||||
### 🔎 全局资产搜索
|
||||
- **多类型搜索** - 支持 Website 和 Endpoint 两种资产类型
|
||||
- **表达式语法** - 支持 `=`(模糊)、`==`(精确)、`!=`(不等于)操作符
|
||||
- **逻辑组合** - 支持 `&&` (AND) 和 `||` (OR) 逻辑组合
|
||||
- **多字段查询** - 支持 host、url、title、tech、status、body、header 字段
|
||||
- **CSV 导出** - 流式导出全部搜索结果,无数量限制
|
||||
|
||||
#### 搜索语法示例
|
||||
|
||||
```bash
|
||||
# 基础搜索
|
||||
host="api" # host 包含 "api"
|
||||
status=="200" # 状态码精确等于 200
|
||||
tech="nginx" # 技术栈包含 nginx
|
||||
|
||||
# 组合搜索
|
||||
host="api" && status=="200" # host 包含 api 且状态码为 200
|
||||
tech="vue" || tech="react" # 技术栈包含 vue 或 react
|
||||
|
||||
# 复杂查询
|
||||
host="admin" && tech="php" && status=="200"
|
||||
url="/api/v1" && status!="404"
|
||||
```
|
||||
|
||||
### 📊 可视化界面
|
||||
- **数据统计** - 资产/漏洞统计仪表盘
|
||||
- **实时通知** - WebSocket 消息推送
|
||||
- **暗色主题** - 支持明暗主题切换
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ 技术栈
|
||||
|
||||
- **前端**: Next.js + React + TailwindCSS
|
||||
- **后端**: Django + Django REST Framework
|
||||
- **数据库**: PostgreSQL + Redis
|
||||
- **部署**: Docker + Nginx
|
||||
|
||||
### 🔧 内置扫描工具
|
||||
|
||||
| 类别 | 工具 |
|
||||
|------|------|
|
||||
| 子域名爆破 | puredns, massdns, dnsgen |
|
||||
| 被动发现 | subfinder, amass, assetfinder, Sublist3r |
|
||||
| 端口扫描 | naabu |
|
||||
| 站点发现 | httpx |
|
||||
| 目录扫描 | ffuf |
|
||||
| 爬虫 | katana |
|
||||
| 被动URL收集 | waymore, uro |
|
||||
| 漏洞扫描 | nuclei, dalfox |
|
||||
- **通知推送** - 实时企业微信,tg,discard消息推送服务
|
||||
|
||||
---
|
||||
|
||||
@@ -117,8 +204,9 @@
|
||||
|
||||
### 环境要求
|
||||
|
||||
- **操作系统**: Ubuntu 20.04+ / Debian 11+ (推荐)
|
||||
- **硬件**: 2核 4G 内存起步,10GB+ 磁盘空间
|
||||
- **操作系统**: Ubuntu 20.04+ / Debian 11+
|
||||
- **系统架构**: AMD64 (x86_64) / ARM64 (aarch64)
|
||||
- **硬件**: 2核 4G 内存起步,20GB+ 磁盘空间
|
||||
|
||||
### 一键安装
|
||||
|
||||
@@ -130,13 +218,19 @@ cd xingrin
|
||||
# 安装并启动(生产模式)
|
||||
sudo ./install.sh
|
||||
|
||||
# 开发模式
|
||||
sudo ./install.sh --dev
|
||||
# 🇨🇳 中国大陆用户推荐使用镜像加速(第三方加速服务可能会失效,不保证长期可用)
|
||||
sudo ./install.sh --mirror
|
||||
```
|
||||
|
||||
> **💡 --mirror 参数说明**
|
||||
> - 自动配置 Docker 镜像加速(国内镜像源)
|
||||
> - 加速 Git 仓库克隆(Nuclei 模板等)
|
||||
> - 大幅提升安装速度,避免网络超时
|
||||
|
||||
### 访问服务
|
||||
|
||||
- **Web 界面**: `https://localhost` 或 `http://localhost`
|
||||
- **Web 界面**: `https://ip:8083`
|
||||
- **默认账号**: admin / admin(首次登录后请修改密码)
|
||||
|
||||
### 常用命令
|
||||
|
||||
@@ -152,25 +246,15 @@ sudo ./restart.sh
|
||||
|
||||
# 卸载
|
||||
sudo ./uninstall.sh
|
||||
|
||||
# 更新
|
||||
sudo ./update.sh
|
||||
```
|
||||
## 日志
|
||||
- 项目日志:/opt/xingrin/logs 下存储了这个项目的运行日志信息,error文件存储了错误相关信息,xingrin.log存储了包括错误在内的所有项目日志
|
||||
- 工具调用日志:/opt/xingrin/results 下存储了工具的运行结果日志,比如naabu,httpx等的结果调用日志
|
||||
|
||||
## 🤝 反馈与贡献
|
||||
|
||||
- 🐛 **如果发现 Bug** 可以点击右边链接进行提交 [Issue](https://github.com/yyhuni/xingrin/issues)
|
||||
- 💡 **有新想法,比如UI设计,功能设计等** 欢迎点击右边链接进行提交建议 [Issue](https://github.com/yyhuni/xingrin/issues)
|
||||
- 🔧 **想参与开发?** 关注我公众号与我个人联系
|
||||
- 💡 **发现 Bug,有新想法,比如UI设计,功能设计等** 欢迎点击右边链接进行提交建议 [Issue](https://github.com/yyhuni/xingrin/issues) 或者公众号私信
|
||||
|
||||
## 📧 联系
|
||||
- 目前版本就我个人使用,可能会有很多边界问题
|
||||
- 如有问题,建议,其他,优先提交[Issue](https://github.com/yyhuni/xingrin/issues),也可以直接给我的公众号发消息,我都会回复的
|
||||
|
||||
- 微信公众号: **洋洋的小黑屋**
|
||||
- 微信公众号: **塔罗安全学苑**
|
||||
- 微信群去公众号底下的菜单,有个交流群,点击就可以看到了,链接过期可以私信我拉你
|
||||
|
||||
<img src="docs/wechat-qrcode.png" alt="微信公众号" width="200">
|
||||
|
||||
@@ -190,22 +274,30 @@ sudo ./update.sh
|
||||
- 遵守所在地区的法律法规
|
||||
- 承担因滥用产生的一切后果
|
||||
|
||||
## 🌟 Star History
|
||||
|
||||
如果这个项目对你有帮助,请给一个 ⭐ Star 支持一下!
|
||||
|
||||
[](https://star-history.com/#yyhuni/xingrin&Date)
|
||||
|
||||
## 📄 许可证
|
||||
|
||||
本项目采用 [PolyForm Noncommercial License 1.0.0](LICENSE) 许可证。
|
||||
本项目采用 [GNU General Public License v3.0](LICENSE) 许可证。
|
||||
|
||||
### 允许的用途
|
||||
|
||||
- ✅ 个人学习和研究
|
||||
- ✅ 非商业安全测试
|
||||
- ✅ 教育机构使用
|
||||
- ✅ 非营利组织使用
|
||||
- ✅ 商业和非商业使用
|
||||
- ✅ 修改和分发
|
||||
- ✅ 专利使用
|
||||
- ✅ 私人使用
|
||||
|
||||
### 禁止的用途
|
||||
### 义务和限制
|
||||
|
||||
- ❌ **商业用途**(包括但不限于:出售、商业服务、SaaS 等)
|
||||
- 📋 **开源义务**:分发时必须提供源代码
|
||||
- 📋 **相同许可**:衍生作品必须使用相同许可证
|
||||
- 📋 **版权声明**:必须保留原始版权和许可证声明
|
||||
- ❌ **责任免除**:不提供任何担保
|
||||
- ❌ 未经授权的渗透测试
|
||||
- ❌ 任何违法行为
|
||||
|
||||
如需商业授权,请联系作者。
|
||||
|
||||
|
||||
@@ -4,7 +4,3 @@ from django.apps import AppConfig
|
||||
class AssetConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'apps.asset'
|
||||
|
||||
def ready(self):
|
||||
# 导入所有模型以确保Django发现并注册
|
||||
from . import models
|
||||
|
||||
@@ -7,7 +7,6 @@ from typing import Optional
|
||||
@dataclass
|
||||
class DirectoryDTO:
|
||||
"""目录数据传输对象"""
|
||||
website_id: int
|
||||
target_id: int
|
||||
url: str
|
||||
status: Optional[int] = None
|
||||
|
||||
@@ -14,12 +14,13 @@ class EndpointDTO:
|
||||
status_code: Optional[int] = None
|
||||
content_length: Optional[int] = None
|
||||
webserver: Optional[str] = None
|
||||
body_preview: Optional[str] = None
|
||||
response_body: Optional[str] = None
|
||||
content_type: Optional[str] = None
|
||||
tech: Optional[List[str]] = None
|
||||
vhost: Optional[bool] = None
|
||||
location: Optional[str] = None
|
||||
matched_gf_patterns: Optional[List[str]] = None
|
||||
response_headers: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
|
||||
@@ -9,7 +9,7 @@ class WebSiteDTO:
|
||||
"""网站数据传输对象"""
|
||||
target_id: int
|
||||
url: str
|
||||
host: str
|
||||
host: str = ''
|
||||
title: str = ''
|
||||
status_code: Optional[int] = None
|
||||
content_length: Optional[int] = None
|
||||
@@ -17,9 +17,10 @@ class WebSiteDTO:
|
||||
webserver: str = ''
|
||||
content_type: str = ''
|
||||
tech: List[str] = None
|
||||
body_preview: str = ''
|
||||
response_body: str = ''
|
||||
vhost: Optional[bool] = None
|
||||
created_at: str = None
|
||||
response_headers: str = ''
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
|
||||
@@ -12,11 +12,10 @@ class DirectorySnapshotDTO:
|
||||
|
||||
用于保存扫描过程中发现的目录信息到快照表
|
||||
|
||||
注意:website_id 和 target_id 只用于传递数据和转换为资产 DTO,不会保存到快照表中。
|
||||
注意:target_id 只用于传递数据和转换为资产 DTO,不会保存到快照表中。
|
||||
快照只属于 scan。
|
||||
"""
|
||||
scan_id: int
|
||||
website_id: int # 仅用于传递数据,不保存到数据库
|
||||
target_id: int # 仅用于传递数据,不保存到数据库
|
||||
url: str
|
||||
status: Optional[int] = None
|
||||
@@ -36,7 +35,6 @@ class DirectorySnapshotDTO:
|
||||
DirectoryDTO: 资产表 DTO
|
||||
"""
|
||||
return DirectoryDTO(
|
||||
website_id=self.website_id,
|
||||
target_id=self.target_id,
|
||||
url=self.url,
|
||||
status=self.status,
|
||||
|
||||
@@ -13,6 +13,7 @@ class EndpointSnapshotDTO:
|
||||
快照只属于 scan。
|
||||
"""
|
||||
scan_id: int
|
||||
target_id: int # 必填,用于同步到资产表
|
||||
url: str
|
||||
host: str = '' # 主机名(域名或IP地址)
|
||||
title: str = ''
|
||||
@@ -22,10 +23,10 @@ class EndpointSnapshotDTO:
|
||||
webserver: str = ''
|
||||
content_type: str = ''
|
||||
tech: List[str] = None
|
||||
body_preview: str = ''
|
||||
response_body: str = ''
|
||||
vhost: Optional[bool] = None
|
||||
matched_gf_patterns: List[str] = None
|
||||
target_id: Optional[int] = None # 冗余字段,用于同步到资产表
|
||||
response_headers: str = ''
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
@@ -42,9 +43,6 @@ class EndpointSnapshotDTO:
|
||||
"""
|
||||
from apps.asset.dtos.asset import EndpointDTO
|
||||
|
||||
if self.target_id is None:
|
||||
raise ValueError("target_id 不能为 None,无法同步到资产表")
|
||||
|
||||
return EndpointDTO(
|
||||
target_id=self.target_id,
|
||||
url=self.url,
|
||||
@@ -53,10 +51,11 @@ class EndpointSnapshotDTO:
|
||||
status_code=self.status_code,
|
||||
content_length=self.content_length,
|
||||
webserver=self.webserver,
|
||||
body_preview=self.body_preview,
|
||||
response_body=self.response_body,
|
||||
content_type=self.content_type,
|
||||
tech=self.tech if self.tech else [],
|
||||
vhost=self.vhost,
|
||||
location=self.location,
|
||||
matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else []
|
||||
matched_gf_patterns=self.matched_gf_patterns if self.matched_gf_patterns else [],
|
||||
response_headers=self.response_headers,
|
||||
)
|
||||
|
||||
@@ -13,18 +13,19 @@ class WebsiteSnapshotDTO:
|
||||
快照只属于 scan,target 信息通过 scan.target 获取。
|
||||
"""
|
||||
scan_id: int
|
||||
target_id: int # 仅用于传递数据,不保存到数据库
|
||||
target_id: int # 必填,用于同步到资产表
|
||||
url: str
|
||||
host: str
|
||||
title: str = ''
|
||||
status: Optional[int] = None
|
||||
status_code: Optional[int] = None # 统一命名:status -> status_code
|
||||
content_length: Optional[int] = None
|
||||
location: str = ''
|
||||
web_server: str = ''
|
||||
webserver: str = '' # 统一命名:web_server -> webserver
|
||||
content_type: str = ''
|
||||
tech: List[str] = None
|
||||
body_preview: str = ''
|
||||
response_body: str = ''
|
||||
vhost: Optional[bool] = None
|
||||
response_headers: str = ''
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tech is None:
|
||||
@@ -44,12 +45,13 @@ class WebsiteSnapshotDTO:
|
||||
url=self.url,
|
||||
host=self.host,
|
||||
title=self.title,
|
||||
status_code=self.status,
|
||||
status_code=self.status_code,
|
||||
content_length=self.content_length,
|
||||
location=self.location,
|
||||
webserver=self.web_server,
|
||||
webserver=self.webserver,
|
||||
content_type=self.content_type,
|
||||
tech=self.tech if self.tech else [],
|
||||
body_preview=self.body_preview,
|
||||
vhost=self.vhost
|
||||
response_body=self.response_body,
|
||||
vhost=self.vhost,
|
||||
response_headers=self.response_headers,
|
||||
)
|
||||
|
||||
345
backend/apps/asset/migrations/0001_initial.py
Normal file
345
backend/apps/asset/migrations/0001_initial.py
Normal file
@@ -0,0 +1,345 @@
|
||||
# Generated by Django 5.2.7 on 2026-01-06 00:55
|
||||
|
||||
import django.contrib.postgres.fields
|
||||
import django.contrib.postgres.indexes
|
||||
import django.core.validators
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('scan', '0001_initial'),
|
||||
('targets', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='AssetStatistics',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('total_targets', models.IntegerField(default=0, help_text='目标总数')),
|
||||
('total_subdomains', models.IntegerField(default=0, help_text='子域名总数')),
|
||||
('total_ips', models.IntegerField(default=0, help_text='IP地址总数')),
|
||||
('total_endpoints', models.IntegerField(default=0, help_text='端点总数')),
|
||||
('total_websites', models.IntegerField(default=0, help_text='网站总数')),
|
||||
('total_vulns', models.IntegerField(default=0, help_text='漏洞总数')),
|
||||
('total_assets', models.IntegerField(default=0, help_text='总资产数(子域名+IP+端点+网站)')),
|
||||
('prev_targets', models.IntegerField(default=0, help_text='上次目标总数')),
|
||||
('prev_subdomains', models.IntegerField(default=0, help_text='上次子域名总数')),
|
||||
('prev_ips', models.IntegerField(default=0, help_text='上次IP地址总数')),
|
||||
('prev_endpoints', models.IntegerField(default=0, help_text='上次端点总数')),
|
||||
('prev_websites', models.IntegerField(default=0, help_text='上次网站总数')),
|
||||
('prev_vulns', models.IntegerField(default=0, help_text='上次漏洞总数')),
|
||||
('prev_assets', models.IntegerField(default=0, help_text='上次总资产数')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, help_text='最后更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '资产统计',
|
||||
'verbose_name_plural': '资产统计',
|
||||
'db_table': 'asset_statistics',
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='StatisticsHistory',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('date', models.DateField(help_text='统计日期', unique=True)),
|
||||
('total_targets', models.IntegerField(default=0, help_text='目标总数')),
|
||||
('total_subdomains', models.IntegerField(default=0, help_text='子域名总数')),
|
||||
('total_ips', models.IntegerField(default=0, help_text='IP地址总数')),
|
||||
('total_endpoints', models.IntegerField(default=0, help_text='端点总数')),
|
||||
('total_websites', models.IntegerField(default=0, help_text='网站总数')),
|
||||
('total_vulns', models.IntegerField(default=0, help_text='漏洞总数')),
|
||||
('total_assets', models.IntegerField(default=0, help_text='总资产数')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '统计历史',
|
||||
'verbose_name_plural': '统计历史',
|
||||
'db_table': 'statistics_history',
|
||||
'ordering': ['-date'],
|
||||
'indexes': [models.Index(fields=['date'], name='statistics__date_1d29cd_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Directory',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.CharField(help_text='完整请求 URL', max_length=2000)),
|
||||
('status', models.IntegerField(blank=True, help_text='HTTP 响应状态码', null=True)),
|
||||
('content_length', models.BigIntegerField(blank=True, help_text='响应体字节大小(Content-Length 或实际长度)', null=True)),
|
||||
('words', models.IntegerField(blank=True, help_text='响应体中单词数量(按空格分割)', null=True)),
|
||||
('lines', models.IntegerField(blank=True, help_text='响应体行数(按换行符分割)', null=True)),
|
||||
('content_type', models.CharField(blank=True, default='', help_text='响应头 Content-Type 值', max_length=200)),
|
||||
('duration', models.BigIntegerField(blank=True, help_text='请求耗时(单位:纳秒)', null=True)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标', on_delete=django.db.models.deletion.CASCADE, related_name='directories', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '目录',
|
||||
'verbose_name_plural': '目录',
|
||||
'db_table': 'directory',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='directory_created_2cef03_idx'), models.Index(fields=['target'], name='directory_target__e310c8_idx'), models.Index(fields=['url'], name='directory_url_ba40cd_idx'), models.Index(fields=['status'], name='directory_status_40bbe6_idx'), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='directory_url_trgm_idx', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('target', 'url'), name='unique_directory_url_target')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='DirectorySnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.CharField(help_text='目录URL', max_length=2000)),
|
||||
('status', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
|
||||
('content_length', models.BigIntegerField(blank=True, help_text='内容长度', null=True)),
|
||||
('words', models.IntegerField(blank=True, help_text='响应体中单词数量(按空格分割)', null=True)),
|
||||
('lines', models.IntegerField(blank=True, help_text='响应体行数(按换行符分割)', null=True)),
|
||||
('content_type', models.CharField(blank=True, default='', help_text='响应头 Content-Type 值', max_length=200)),
|
||||
('duration', models.BigIntegerField(blank=True, help_text='请求耗时(单位:纳秒)', null=True)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='directory_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '目录快照',
|
||||
'verbose_name_plural': '目录快照',
|
||||
'db_table': 'directory_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='directory_s_scan_id_c45900_idx'), models.Index(fields=['url'], name='directory_s_url_b4b72b_idx'), models.Index(fields=['status'], name='directory_s_status_e9f57e_idx'), models.Index(fields=['content_type'], name='directory_s_content_45e864_idx'), models.Index(fields=['-created_at'], name='directory_s_created_eb9d27_idx'), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='dir_snap_url_trgm', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('scan', 'url'), name='unique_directory_per_scan_snapshot')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Endpoint',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.TextField(help_text='最终访问的完整URL')),
|
||||
('host', models.CharField(blank=True, default='', help_text='主机名(域名或IP地址)', max_length=253)),
|
||||
('location', models.TextField(blank=True, default='', help_text='重定向地址(HTTP 3xx 响应头 Location)')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('title', models.TextField(blank=True, default='', help_text='网页标题(HTML <title> 标签内容)')),
|
||||
('webserver', models.TextField(blank=True, default='', help_text='服务器类型(HTTP 响应头 Server 值)')),
|
||||
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
|
||||
('content_type', models.TextField(blank=True, default='', help_text='响应类型(HTTP Content-Type 响应头)')),
|
||||
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈(服务器/框架/语言等)', size=None)),
|
||||
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
|
||||
('content_length', models.IntegerField(blank=True, help_text='响应体大小(单位字节)', null=True)),
|
||||
('vhost', models.BooleanField(blank=True, help_text='是否支持虚拟主机', null=True)),
|
||||
('matched_gf_patterns', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='匹配的GF模式列表,用于识别敏感端点(如api, debug, config等)', size=None)),
|
||||
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)', on_delete=django.db.models.deletion.CASCADE, related_name='endpoints', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '端点',
|
||||
'verbose_name_plural': '端点',
|
||||
'db_table': 'endpoint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='endpoint_created_44fe9c_idx'), models.Index(fields=['target'], name='endpoint_target__7f9065_idx'), models.Index(fields=['url'], name='endpoint_url_30f66e_idx'), models.Index(fields=['host'], name='endpoint_host_5b4cc8_idx'), models.Index(fields=['status_code'], name='endpoint_status__5d4fdd_idx'), models.Index(fields=['title'], name='endpoint_title_29e26c_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='endpoint_tech_2bfa7c_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='endpoint_resp_headers_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='endpoint_url_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='endpoint_title_trgm_idx', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('url', 'target'), name='unique_endpoint_url_target')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='EndpointSnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.TextField(help_text='端点URL')),
|
||||
('host', models.CharField(blank=True, default='', help_text='主机名(域名或IP地址)', max_length=253)),
|
||||
('title', models.TextField(blank=True, default='', help_text='页面标题')),
|
||||
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
|
||||
('content_length', models.IntegerField(blank=True, help_text='内容长度', null=True)),
|
||||
('location', models.TextField(blank=True, default='', help_text='重定向位置')),
|
||||
('webserver', models.TextField(blank=True, default='', help_text='Web服务器')),
|
||||
('content_type', models.TextField(blank=True, default='', help_text='内容类型')),
|
||||
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈', size=None)),
|
||||
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
|
||||
('vhost', models.BooleanField(blank=True, help_text='虚拟主机标志', null=True)),
|
||||
('matched_gf_patterns', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='匹配的GF模式列表', size=None)),
|
||||
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='endpoint_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '端点快照',
|
||||
'verbose_name_plural': '端点快照',
|
||||
'db_table': 'endpoint_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='endpoint_sn_scan_id_6ac9a7_idx'), models.Index(fields=['url'], name='endpoint_sn_url_205160_idx'), models.Index(fields=['host'], name='endpoint_sn_host_577bfd_idx'), models.Index(fields=['title'], name='endpoint_sn_title_516a05_idx'), models.Index(fields=['status_code'], name='endpoint_sn_status__83efb0_idx'), models.Index(fields=['webserver'], name='endpoint_sn_webserv_66be83_idx'), models.Index(fields=['-created_at'], name='endpoint_sn_created_21fb5b_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='endpoint_sn_tech_0d0752_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='ep_snap_resp_hdr_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='ep_snap_url_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='ep_snap_title_trgm', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('scan', 'url'), name='unique_endpoint_per_scan_snapshot')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='HostPortMapping',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('host', models.CharField(help_text='主机名(域名或IP)', max_length=1000)),
|
||||
('ip', models.GenericIPAddressField(help_text='IP地址')),
|
||||
('port', models.IntegerField(help_text='端口号(1-65535)', validators=[django.core.validators.MinValueValidator(1, message='端口号必须大于等于1'), django.core.validators.MaxValueValidator(65535, message='端口号必须小于等于65535')])),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标', on_delete=django.db.models.deletion.CASCADE, related_name='host_port_mappings', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '主机端口映射',
|
||||
'verbose_name_plural': '主机端口映射',
|
||||
'db_table': 'host_port_mapping',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['target'], name='host_port_m_target__943e9b_idx'), models.Index(fields=['host'], name='host_port_m_host_f78363_idx'), models.Index(fields=['ip'], name='host_port_m_ip_2e6f02_idx'), models.Index(fields=['port'], name='host_port_m_port_9fb9ff_idx'), models.Index(fields=['host', 'ip'], name='host_port_m_host_3ce245_idx'), models.Index(fields=['-created_at'], name='host_port_m_created_11cd22_idx')],
|
||||
'constraints': [models.UniqueConstraint(fields=('target', 'host', 'ip', 'port'), name='unique_target_host_ip_port')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='HostPortMappingSnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('host', models.CharField(help_text='主机名(域名或IP)', max_length=1000)),
|
||||
('ip', models.GenericIPAddressField(help_text='IP地址')),
|
||||
('port', models.IntegerField(help_text='端口号(1-65535)', validators=[django.core.validators.MinValueValidator(1, message='端口号必须大于等于1'), django.core.validators.MaxValueValidator(65535, message='端口号必须小于等于65535')])),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务(主关联)', on_delete=django.db.models.deletion.CASCADE, related_name='host_port_mapping_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '主机端口映射快照',
|
||||
'verbose_name_plural': '主机端口映射快照',
|
||||
'db_table': 'host_port_mapping_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='host_port_m_scan_id_50ba0b_idx'), models.Index(fields=['host'], name='host_port_m_host_e99054_idx'), models.Index(fields=['ip'], name='host_port_m_ip_54818c_idx'), models.Index(fields=['port'], name='host_port_m_port_ed7b48_idx'), models.Index(fields=['host', 'ip'], name='host_port_m_host_8a463a_idx'), models.Index(fields=['scan', 'host'], name='host_port_m_scan_id_426fdb_idx'), models.Index(fields=['-created_at'], name='host_port_m_created_fb28b8_idx')],
|
||||
'constraints': [models.UniqueConstraint(fields=('scan', 'host', 'ip', 'port'), name='unique_scan_host_ip_port_snapshot')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Subdomain',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('name', models.CharField(help_text='子域名名称', max_length=1000)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)', on_delete=django.db.models.deletion.CASCADE, related_name='subdomains', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '子域名',
|
||||
'verbose_name_plural': '子域名',
|
||||
'db_table': 'subdomain',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='subdomain_created_e187a8_idx'), models.Index(fields=['name', 'target'], name='subdomain_name_60e1d0_idx'), models.Index(fields=['target'], name='subdomain_target__e409f0_idx'), models.Index(fields=['name'], name='subdomain_name_d40ba7_idx'), django.contrib.postgres.indexes.GinIndex(fields=['name'], name='subdomain_name_trgm_idx', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('name', 'target'), name='unique_subdomain_name_target')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='SubdomainSnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('name', models.CharField(help_text='子域名名称', max_length=1000)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='subdomain_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '子域名快照',
|
||||
'verbose_name_plural': '子域名快照',
|
||||
'db_table': 'subdomain_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='subdomain_s_scan_id_68c253_idx'), models.Index(fields=['name'], name='subdomain_s_name_2da42b_idx'), models.Index(fields=['-created_at'], name='subdomain_s_created_d2b48e_idx'), django.contrib.postgres.indexes.GinIndex(fields=['name'], name='subdomain_snap_name_trgm', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('scan', 'name'), name='unique_subdomain_per_scan_snapshot')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Vulnerability',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.CharField(help_text='漏洞所在的URL', max_length=2000)),
|
||||
('vuln_type', models.CharField(help_text='漏洞类型(如 xss, sqli)', max_length=100)),
|
||||
('severity', models.CharField(choices=[('unknown', '未知'), ('info', '信息'), ('low', '低'), ('medium', '中'), ('high', '高'), ('critical', '危急')], default='unknown', help_text='严重性(未知/信息/低/中/高/危急)', max_length=20)),
|
||||
('source', models.CharField(blank=True, default='', help_text='来源工具(如 dalfox, nuclei, crlfuzz)', max_length=50)),
|
||||
('cvss_score', models.DecimalField(blank=True, decimal_places=1, help_text='CVSS 评分(0.0-10.0)', max_digits=3, null=True)),
|
||||
('description', models.TextField(blank=True, default='', help_text='漏洞描述')),
|
||||
('raw_output', models.JSONField(blank=True, default=dict, help_text='工具原始输出')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标', on_delete=django.db.models.deletion.CASCADE, related_name='vulnerabilities', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '漏洞',
|
||||
'verbose_name_plural': '漏洞',
|
||||
'db_table': 'vulnerability',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['target'], name='vulnerabili_target__755a02_idx'), models.Index(fields=['vuln_type'], name='vulnerabili_vuln_ty_3010cd_idx'), models.Index(fields=['severity'], name='vulnerabili_severit_1a798b_idx'), models.Index(fields=['source'], name='vulnerabili_source_7c7552_idx'), models.Index(fields=['url'], name='vulnerabili_url_4dcc4d_idx'), models.Index(fields=['-created_at'], name='vulnerabili_created_e25ff7_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='VulnerabilitySnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.CharField(help_text='漏洞所在的URL', max_length=2000)),
|
||||
('vuln_type', models.CharField(help_text='漏洞类型(如 xss, sqli)', max_length=100)),
|
||||
('severity', models.CharField(choices=[('unknown', '未知'), ('info', '信息'), ('low', '低'), ('medium', '中'), ('high', '高'), ('critical', '危急')], default='unknown', help_text='严重性(未知/信息/低/中/高/危急)', max_length=20)),
|
||||
('source', models.CharField(blank=True, default='', help_text='来源工具(如 dalfox, nuclei, crlfuzz)', max_length=50)),
|
||||
('cvss_score', models.DecimalField(blank=True, decimal_places=1, help_text='CVSS 评分(0.0-10.0)', max_digits=3, null=True)),
|
||||
('description', models.TextField(blank=True, default='', help_text='漏洞描述')),
|
||||
('raw_output', models.JSONField(blank=True, default=dict, help_text='工具原始输出')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='vulnerability_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '漏洞快照',
|
||||
'verbose_name_plural': '漏洞快照',
|
||||
'db_table': 'vulnerability_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='vulnerabili_scan_id_7b81c9_idx'), models.Index(fields=['url'], name='vulnerabili_url_11a707_idx'), models.Index(fields=['vuln_type'], name='vulnerabili_vuln_ty_6b90ee_idx'), models.Index(fields=['severity'], name='vulnerabili_severit_4eae0d_idx'), models.Index(fields=['source'], name='vulnerabili_source_968b1f_idx'), models.Index(fields=['-created_at'], name='vulnerabili_created_53a12e_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='WebSite',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.TextField(help_text='最终访问的完整URL')),
|
||||
('host', models.CharField(blank=True, default='', help_text='主机名(域名或IP地址)', max_length=253)),
|
||||
('location', models.TextField(blank=True, default='', help_text='重定向地址(HTTP 3xx 响应头 Location)')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('title', models.TextField(blank=True, default='', help_text='网页标题(HTML <title> 标签内容)')),
|
||||
('webserver', models.TextField(blank=True, default='', help_text='服务器类型(HTTP 响应头 Server 值)')),
|
||||
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
|
||||
('content_type', models.TextField(blank=True, default='', help_text='响应类型(HTTP Content-Type 响应头)')),
|
||||
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈(服务器/框架/语言等)', size=None)),
|
||||
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
|
||||
('content_length', models.IntegerField(blank=True, help_text='响应体大小(单位字节)', null=True)),
|
||||
('vhost', models.BooleanField(blank=True, help_text='是否支持虚拟主机', null=True)),
|
||||
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
|
||||
('target', models.ForeignKey(help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)', on_delete=django.db.models.deletion.CASCADE, related_name='websites', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '站点',
|
||||
'verbose_name_plural': '站点',
|
||||
'db_table': 'website',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='website_created_c9cfd2_idx'), models.Index(fields=['url'], name='website_url_b18883_idx'), models.Index(fields=['host'], name='website_host_996b50_idx'), models.Index(fields=['target'], name='website_target__2a353b_idx'), models.Index(fields=['title'], name='website_title_c2775b_idx'), models.Index(fields=['status_code'], name='website_status__51663d_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='website_tech_e3f0cb_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='website_resp_headers_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='website_url_trgm_idx', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='website_title_trgm_idx', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('url', 'target'), name='unique_website_url_target')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='WebsiteSnapshot',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('url', models.TextField(help_text='站点URL')),
|
||||
('host', models.CharField(blank=True, default='', help_text='主机名(域名或IP地址)', max_length=253)),
|
||||
('title', models.TextField(blank=True, default='', help_text='页面标题')),
|
||||
('status_code', models.IntegerField(blank=True, help_text='HTTP状态码', null=True)),
|
||||
('content_length', models.BigIntegerField(blank=True, help_text='内容长度', null=True)),
|
||||
('location', models.TextField(blank=True, default='', help_text='重定向位置')),
|
||||
('webserver', models.TextField(blank=True, default='', help_text='Web服务器')),
|
||||
('content_type', models.TextField(blank=True, default='', help_text='内容类型')),
|
||||
('tech', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=100), blank=True, default=list, help_text='技术栈', size=None)),
|
||||
('response_body', models.TextField(blank=True, default='', help_text='HTTP响应体')),
|
||||
('vhost', models.BooleanField(blank=True, help_text='虚拟主机标志', null=True)),
|
||||
('response_headers', models.TextField(blank=True, default='', help_text='原始HTTP响应头')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('scan', models.ForeignKey(help_text='所属的扫描任务', on_delete=django.db.models.deletion.CASCADE, related_name='website_snapshots', to='scan.scan')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '网站快照',
|
||||
'verbose_name_plural': '网站快照',
|
||||
'db_table': 'website_snapshot',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scan'], name='website_sna_scan_id_26b6dc_idx'), models.Index(fields=['url'], name='website_sna_url_801a70_idx'), models.Index(fields=['host'], name='website_sna_host_348fe1_idx'), models.Index(fields=['title'], name='website_sna_title_b1a5ee_idx'), models.Index(fields=['-created_at'], name='website_sna_created_2c149a_idx'), django.contrib.postgres.indexes.GinIndex(fields=['tech'], name='website_sna_tech_3d6d2f_gin'), django.contrib.postgres.indexes.GinIndex(fields=['response_headers'], name='ws_snap_resp_hdr_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['url'], name='ws_snap_url_trgm', opclasses=['gin_trgm_ops']), django.contrib.postgres.indexes.GinIndex(fields=['title'], name='ws_snap_title_trgm', opclasses=['gin_trgm_ops'])],
|
||||
'constraints': [models.UniqueConstraint(fields=('scan', 'url'), name='unique_website_per_scan_snapshot')],
|
||||
},
|
||||
),
|
||||
]
|
||||
104
backend/apps/asset/migrations/0002_create_search_views.py
Normal file
104
backend/apps/asset/migrations/0002_create_search_views.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
创建资产搜索物化视图(使用 pg_ivm 增量维护)
|
||||
|
||||
这些视图用于资产搜索功能,提供高性能的全文搜索能力。
|
||||
"""
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
"""创建资产搜索所需的增量物化视图"""
|
||||
|
||||
dependencies = [
|
||||
('asset', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# 1. 确保 pg_ivm 扩展已安装
|
||||
migrations.RunSQL(
|
||||
sql="CREATE EXTENSION IF NOT EXISTS pg_ivm;",
|
||||
reverse_sql="DROP EXTENSION IF EXISTS pg_ivm;",
|
||||
),
|
||||
|
||||
# 2. 创建 Website 搜索视图
|
||||
# 注意:pg_ivm 不支持 ArrayField,所以 tech 字段需要从原表 JOIN 获取
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
SELECT pgivm.create_immv('asset_search_view', $$
|
||||
SELECT
|
||||
w.id,
|
||||
w.url,
|
||||
w.host,
|
||||
w.title,
|
||||
w.status_code,
|
||||
w.response_headers,
|
||||
w.response_body,
|
||||
w.content_type,
|
||||
w.content_length,
|
||||
w.webserver,
|
||||
w.location,
|
||||
w.vhost,
|
||||
w.created_at,
|
||||
w.target_id
|
||||
FROM website w
|
||||
$$);
|
||||
""",
|
||||
reverse_sql="DROP TABLE IF EXISTS asset_search_view CASCADE;",
|
||||
),
|
||||
|
||||
# 3. 创建 Endpoint 搜索视图
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
SELECT pgivm.create_immv('endpoint_search_view', $$
|
||||
SELECT
|
||||
e.id,
|
||||
e.url,
|
||||
e.host,
|
||||
e.title,
|
||||
e.status_code,
|
||||
e.response_headers,
|
||||
e.response_body,
|
||||
e.content_type,
|
||||
e.content_length,
|
||||
e.webserver,
|
||||
e.location,
|
||||
e.vhost,
|
||||
e.created_at,
|
||||
e.target_id
|
||||
FROM endpoint e
|
||||
$$);
|
||||
""",
|
||||
reverse_sql="DROP TABLE IF EXISTS endpoint_search_view CASCADE;",
|
||||
),
|
||||
|
||||
# 4. 为搜索视图创建索引(加速查询)
|
||||
migrations.RunSQL(
|
||||
sql=[
|
||||
# Website 搜索视图索引
|
||||
"CREATE INDEX IF NOT EXISTS asset_search_view_host_idx ON asset_search_view (host);",
|
||||
"CREATE INDEX IF NOT EXISTS asset_search_view_url_idx ON asset_search_view (url);",
|
||||
"CREATE INDEX IF NOT EXISTS asset_search_view_title_idx ON asset_search_view (title);",
|
||||
"CREATE INDEX IF NOT EXISTS asset_search_view_status_idx ON asset_search_view (status_code);",
|
||||
"CREATE INDEX IF NOT EXISTS asset_search_view_created_idx ON asset_search_view (created_at DESC);",
|
||||
# Endpoint 搜索视图索引
|
||||
"CREATE INDEX IF NOT EXISTS endpoint_search_view_host_idx ON endpoint_search_view (host);",
|
||||
"CREATE INDEX IF NOT EXISTS endpoint_search_view_url_idx ON endpoint_search_view (url);",
|
||||
"CREATE INDEX IF NOT EXISTS endpoint_search_view_title_idx ON endpoint_search_view (title);",
|
||||
"CREATE INDEX IF NOT EXISTS endpoint_search_view_status_idx ON endpoint_search_view (status_code);",
|
||||
"CREATE INDEX IF NOT EXISTS endpoint_search_view_created_idx ON endpoint_search_view (created_at DESC);",
|
||||
],
|
||||
reverse_sql=[
|
||||
"DROP INDEX IF EXISTS asset_search_view_host_idx;",
|
||||
"DROP INDEX IF EXISTS asset_search_view_url_idx;",
|
||||
"DROP INDEX IF EXISTS asset_search_view_title_idx;",
|
||||
"DROP INDEX IF EXISTS asset_search_view_status_idx;",
|
||||
"DROP INDEX IF EXISTS asset_search_view_created_idx;",
|
||||
"DROP INDEX IF EXISTS endpoint_search_view_host_idx;",
|
||||
"DROP INDEX IF EXISTS endpoint_search_view_url_idx;",
|
||||
"DROP INDEX IF EXISTS endpoint_search_view_title_idx;",
|
||||
"DROP INDEX IF EXISTS endpoint_search_view_status_idx;",
|
||||
"DROP INDEX IF EXISTS endpoint_search_view_created_idx;",
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -1,16 +1,10 @@
|
||||
|
||||
from django.db import models
|
||||
from django.contrib.postgres.fields import ArrayField
|
||||
from django.contrib.postgres.indexes import GinIndex
|
||||
from django.core.validators import MinValueValidator, MaxValueValidator
|
||||
|
||||
|
||||
class SoftDeleteManager(models.Manager):
|
||||
"""软删除管理器:默认只返回未删除的记录"""
|
||||
|
||||
def get_queryset(self):
|
||||
return super().get_queryset().filter(deleted_at__isnull=True)
|
||||
|
||||
|
||||
class Subdomain(models.Model):
|
||||
"""
|
||||
子域名模型(纯资产表)
|
||||
@@ -29,33 +23,30 @@ class Subdomain(models.Model):
|
||||
help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)'
|
||||
)
|
||||
name = models.CharField(max_length=1000, help_text='子域名名称')
|
||||
discovered_at = models.DateTimeField(auto_now_add=True, help_text='首次发现时间')
|
||||
|
||||
# ==================== 软删除字段 ====================
|
||||
deleted_at = models.DateTimeField(null=True, blank=True, db_index=True, help_text='删除时间(NULL表示未删除)')
|
||||
|
||||
# ==================== 管理器 ====================
|
||||
objects = SoftDeleteManager() # 默认管理器:只返回未删除的记录
|
||||
all_objects = models.Manager() # 全量管理器:包括已删除的记录(用于硬删除)
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
db_table = 'subdomain'
|
||||
verbose_name = '子域名'
|
||||
verbose_name_plural = '子域名'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['-discovered_at']),
|
||||
models.Index(fields=['-created_at']),
|
||||
models.Index(fields=['name', 'target']), # 复合索引,优化 get_by_names_and_target_id 批量查询
|
||||
models.Index(fields=['target']), # 优化从target_id快速查找下面的子域名
|
||||
models.Index(fields=['name']), # 优化从name快速查找子域名,搜索场景
|
||||
models.Index(fields=['deleted_at', '-discovered_at']), # 软删除 + 时间索引
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='subdomain_name_trgm_idx',
|
||||
fields=['name'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 部分唯一约束:只对未删除记录生效
|
||||
# 普通唯一约束:name + target 组合唯一
|
||||
models.UniqueConstraint(
|
||||
fields=['name', 'target'],
|
||||
condition=models.Q(deleted_at__isnull=True),
|
||||
name='unique_name_target_active'
|
||||
name='unique_subdomain_name_target'
|
||||
)
|
||||
]
|
||||
|
||||
@@ -74,40 +65,35 @@ class Endpoint(models.Model):
|
||||
help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)'
|
||||
)
|
||||
|
||||
url = models.CharField(max_length=2000, help_text='最终访问的完整URL')
|
||||
url = models.TextField(help_text='最终访问的完整URL')
|
||||
host = models.CharField(
|
||||
max_length=253,
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='主机名(域名或IP地址)'
|
||||
)
|
||||
location = models.CharField(
|
||||
max_length=1000,
|
||||
location = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='重定向地址(HTTP 3xx 响应头 Location)'
|
||||
)
|
||||
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
|
||||
title = models.CharField(
|
||||
max_length=1000,
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
title = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='网页标题(HTML <title> 标签内容)'
|
||||
)
|
||||
webserver = models.CharField(
|
||||
max_length=200,
|
||||
webserver = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='服务器类型(HTTP 响应头 Server 值)'
|
||||
)
|
||||
body_preview = models.CharField(
|
||||
max_length=1000,
|
||||
response_body = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='响应正文前N个字符(默认100个字符)'
|
||||
help_text='HTTP响应体'
|
||||
)
|
||||
content_type = models.CharField(
|
||||
max_length=200,
|
||||
content_type = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='响应类型(HTTP Content-Type 响应头)'
|
||||
@@ -139,33 +125,47 @@ class Endpoint(models.Model):
|
||||
default=list,
|
||||
help_text='匹配的GF模式列表,用于识别敏感端点(如api, debug, config等)'
|
||||
)
|
||||
|
||||
# ==================== 软删除字段 ====================
|
||||
deleted_at = models.DateTimeField(null=True, blank=True, db_index=True, help_text='删除时间(NULL表示未删除)')
|
||||
|
||||
# ==================== 管理器 ====================
|
||||
objects = SoftDeleteManager() # 默认管理器:只返回未删除的记录
|
||||
all_objects = models.Manager() # 全量管理器:包括已删除的记录(用于硬删除)
|
||||
response_headers = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='原始HTTP响应头'
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = 'endpoint'
|
||||
verbose_name = '端点'
|
||||
verbose_name_plural = '端点'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['-discovered_at']),
|
||||
models.Index(fields=['target']), # 优化从target_id快速查找下面的端点(主关联字段)
|
||||
models.Index(fields=['-created_at']),
|
||||
models.Index(fields=['target']), # 优化从 target_id快速查找下面的端点(主关联字段)
|
||||
models.Index(fields=['url']), # URL索引,优化查询性能
|
||||
models.Index(fields=['host']), # host索引,优化根据主机名查询
|
||||
models.Index(fields=['status_code']), # 状态码索引,优化筛选
|
||||
models.Index(fields=['deleted_at', '-discovered_at']), # 软删除 + 时间索引
|
||||
models.Index(fields=['title']), # title索引,优化智能过滤搜索
|
||||
GinIndex(fields=['tech']), # GIN索引,优化 tech 数组字段的 __contains 查询
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='endpoint_resp_headers_trgm_idx',
|
||||
fields=['response_headers'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='endpoint_url_trgm_idx',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='endpoint_title_trgm_idx',
|
||||
fields=['title'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 部分唯一约束:只对未删除记录生效
|
||||
# 普通唯一约束:url + target 组合唯一
|
||||
models.UniqueConstraint(
|
||||
fields=['url', 'target'],
|
||||
condition=models.Q(deleted_at__isnull=True),
|
||||
name='unique_endpoint_url_target_active'
|
||||
name='unique_endpoint_url_target'
|
||||
)
|
||||
]
|
||||
|
||||
@@ -184,40 +184,35 @@ class WebSite(models.Model):
|
||||
help_text='所属的扫描目标(主关联字段,表示所属关系,不能为空)'
|
||||
)
|
||||
|
||||
url = models.CharField(max_length=2000, help_text='最终访问的完整URL')
|
||||
url = models.TextField(help_text='最终访问的完整URL')
|
||||
host = models.CharField(
|
||||
max_length=253,
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='主机名(域名或IP地址)'
|
||||
)
|
||||
location = models.CharField(
|
||||
max_length=1000,
|
||||
location = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='重定向地址(HTTP 3xx 响应头 Location)'
|
||||
)
|
||||
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
|
||||
title = models.CharField(
|
||||
max_length=1000,
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
title = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='网页标题(HTML <title> 标签内容)'
|
||||
)
|
||||
webserver = models.CharField(
|
||||
max_length=200,
|
||||
webserver = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='服务器类型(HTTP 响应头 Server 值)'
|
||||
)
|
||||
body_preview = models.CharField(
|
||||
max_length=1000,
|
||||
response_body = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='响应正文前N个字符(默认100个字符)'
|
||||
help_text='HTTP响应体'
|
||||
)
|
||||
content_type = models.CharField(
|
||||
max_length=200,
|
||||
content_type = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='响应类型(HTTP Content-Type 响应头)'
|
||||
@@ -243,32 +238,47 @@ class WebSite(models.Model):
|
||||
blank=True,
|
||||
help_text='是否支持虚拟主机'
|
||||
)
|
||||
|
||||
# ==================== 软删除字段 ====================
|
||||
deleted_at = models.DateTimeField(null=True, blank=True, db_index=True, help_text='删除时间(NULL表示未删除)')
|
||||
|
||||
# ==================== 管理器 ====================
|
||||
objects = SoftDeleteManager() # 默认管理器:只返回未删除的记录
|
||||
all_objects = models.Manager() # 全量管理器:包括已删除的记录(用于硬删除)
|
||||
response_headers = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='原始HTTP响应头'
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = 'website'
|
||||
verbose_name = '站点'
|
||||
verbose_name_plural = '站点'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['-discovered_at']),
|
||||
models.Index(fields=['-created_at']),
|
||||
models.Index(fields=['url']), # URL索引,优化查询性能
|
||||
models.Index(fields=['host']), # host索引,优化根据主机名查询
|
||||
models.Index(fields=['target']), # 优化从target_id快速查找下面的站点
|
||||
models.Index(fields=['deleted_at', '-discovered_at']), # 软删除 + 时间索引
|
||||
models.Index(fields=['target']), # 优化从 target_id快速查找下面的站点
|
||||
models.Index(fields=['title']), # title索引,优化智能过滤搜索
|
||||
models.Index(fields=['status_code']), # 状态码索引,优化智能过滤搜索
|
||||
GinIndex(fields=['tech']), # GIN索引,优化 tech 数组字段的 __contains 查询
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='website_resp_headers_trgm_idx',
|
||||
fields=['response_headers'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='website_url_trgm_idx',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='website_title_trgm_idx',
|
||||
fields=['title'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 部分唯一约束:只对未删除记录生效
|
||||
# 普通唯一约束:url + target 组合唯一
|
||||
models.UniqueConstraint(
|
||||
fields=['url', 'target'],
|
||||
condition=models.Q(deleted_at__isnull=True),
|
||||
name='unique_website_url_target_active'
|
||||
name='unique_website_url_target'
|
||||
)
|
||||
]
|
||||
|
||||
@@ -282,19 +292,11 @@ class Directory(models.Model):
|
||||
"""
|
||||
|
||||
id = models.AutoField(primary_key=True)
|
||||
website = models.ForeignKey(
|
||||
'Website',
|
||||
on_delete=models.CASCADE,
|
||||
related_name='directories',
|
||||
help_text='所属的站点(主关联字段,表示所属关系,不能为空)'
|
||||
)
|
||||
target = models.ForeignKey(
|
||||
'targets.Target', # 使用字符串引用
|
||||
'targets.Target',
|
||||
on_delete=models.CASCADE,
|
||||
related_name='directories',
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text='所属的扫描目标(冗余字段,用于快速查询)'
|
||||
help_text='所属的扫描目标'
|
||||
)
|
||||
|
||||
url = models.CharField(
|
||||
@@ -335,34 +337,30 @@ class Directory(models.Model):
|
||||
help_text='请求耗时(单位:纳秒)'
|
||||
)
|
||||
|
||||
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
|
||||
|
||||
# ==================== 软删除字段 ====================
|
||||
deleted_at = models.DateTimeField(null=True, blank=True, db_index=True, help_text='删除时间(NULL表示未删除)')
|
||||
|
||||
# ==================== 管理器 ====================
|
||||
objects = SoftDeleteManager() # 默认管理器:只返回未删除的记录
|
||||
all_objects = models.Manager() # 全量管理器:包括已删除的记录(用于硬删除)
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
db_table = 'directory'
|
||||
verbose_name = '目录'
|
||||
verbose_name_plural = '目录'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['-discovered_at']),
|
||||
models.Index(fields=['-created_at']),
|
||||
models.Index(fields=['target']), # 优化从target_id快速查找下面的目录
|
||||
models.Index(fields=['url']), # URL索引,优化搜索和唯一约束
|
||||
models.Index(fields=['website']), # 站点索引,优化按站点查询
|
||||
models.Index(fields=['status']), # 状态码索引,优化筛选
|
||||
models.Index(fields=['deleted_at', '-discovered_at']), # 软删除 + 时间索引
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='directory_url_trgm_idx',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 部分唯一约束:只对未删除记录生效
|
||||
# 普通唯一约束:target + url 组合唯一
|
||||
models.UniqueConstraint(
|
||||
fields=['website', 'url'],
|
||||
condition=models.Q(deleted_at__isnull=True),
|
||||
name='unique_directory_url_website_active'
|
||||
fields=['target', 'url'],
|
||||
name='unique_directory_url_target'
|
||||
),
|
||||
]
|
||||
|
||||
@@ -410,43 +408,29 @@ class HostPortMapping(models.Model):
|
||||
)
|
||||
|
||||
# ==================== 时间字段 ====================
|
||||
discovered_at = models.DateTimeField(
|
||||
created_at = models.DateTimeField(
|
||||
auto_now_add=True,
|
||||
help_text='发现时间'
|
||||
help_text='创建时间'
|
||||
)
|
||||
|
||||
# ==================== 软删除字段 ====================
|
||||
deleted_at = models.DateTimeField(
|
||||
null=True,
|
||||
blank=True,
|
||||
db_index=True,
|
||||
help_text='删除时间(NULL表示未删除)'
|
||||
)
|
||||
|
||||
# ==================== 管理器 ====================
|
||||
objects = SoftDeleteManager() # 默认管理器:只返回未删除的记录
|
||||
all_objects = models.Manager() # 全量管理器:包括已删除的记录(用于硬删除)
|
||||
|
||||
class Meta:
|
||||
db_table = 'host_port_mapping'
|
||||
verbose_name = '主机端口映射'
|
||||
verbose_name_plural = '主机端口映射'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['target']), # 优化按目标查询
|
||||
models.Index(fields=['host']), # 优化按主机名查询
|
||||
models.Index(fields=['ip']), # 优化按IP查询
|
||||
models.Index(fields=['port']), # 优化按端口查询
|
||||
models.Index(fields=['host', 'ip']), # 优化组合查询
|
||||
models.Index(fields=['-discovered_at']), # 优化时间排序
|
||||
models.Index(fields=['deleted_at', '-discovered_at']), # 软删除 + 时间索引
|
||||
models.Index(fields=['-created_at']), # 优化时间排序
|
||||
]
|
||||
constraints = [
|
||||
# 复合唯一约束:target + host + ip + port 组合唯一(只对未删除记录生效)
|
||||
# 复合唯一约束:target + host + ip + port 组合唯一
|
||||
models.UniqueConstraint(
|
||||
fields=['target', 'host', 'ip', 'port'],
|
||||
condition=models.Q(deleted_at__isnull=True),
|
||||
name='unique_target_host_ip_port_active'
|
||||
name='unique_target_host_ip_port'
|
||||
),
|
||||
]
|
||||
|
||||
@@ -474,7 +458,7 @@ class Vulnerability(models.Model):
|
||||
)
|
||||
|
||||
# ==================== 核心字段 ====================
|
||||
url = models.TextField(help_text='漏洞所在的URL')
|
||||
url = models.CharField(max_length=2000, help_text='漏洞所在的URL')
|
||||
vuln_type = models.CharField(max_length=100, help_text='漏洞类型(如 xss, sqli)')
|
||||
severity = models.CharField(
|
||||
max_length=20,
|
||||
@@ -488,27 +472,20 @@ class Vulnerability(models.Model):
|
||||
raw_output = models.JSONField(blank=True, default=dict, help_text='工具原始输出')
|
||||
|
||||
# ==================== 时间字段 ====================
|
||||
discovered_at = models.DateTimeField(auto_now_add=True, help_text='首次发现时间')
|
||||
|
||||
# ==================== 软删除字段 ====================
|
||||
deleted_at = models.DateTimeField(null=True, blank=True, db_index=True, help_text='删除时间(NULL表示未删除)')
|
||||
|
||||
# ==================== 管理器 ====================
|
||||
objects = SoftDeleteManager()
|
||||
all_objects = models.Manager()
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
db_table = 'vulnerability'
|
||||
verbose_name = '漏洞'
|
||||
verbose_name_plural = '漏洞'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['target']),
|
||||
models.Index(fields=['vuln_type']),
|
||||
models.Index(fields=['severity']),
|
||||
models.Index(fields=['source']),
|
||||
models.Index(fields=['-discovered_at']),
|
||||
models.Index(fields=['deleted_at', '-discovered_at']),
|
||||
models.Index(fields=['url']), # url索引,优化智能过滤搜索
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from django.db import models
|
||||
from django.contrib.postgres.fields import ArrayField
|
||||
from django.contrib.postgres.indexes import GinIndex
|
||||
from django.core.validators import MinValueValidator, MaxValueValidator
|
||||
|
||||
|
||||
@@ -15,17 +16,23 @@ class SubdomainSnapshot(models.Model):
|
||||
)
|
||||
|
||||
name = models.CharField(max_length=1000, help_text='子域名名称')
|
||||
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
db_table = 'subdomain_snapshot'
|
||||
verbose_name = '子域名快照'
|
||||
verbose_name_plural = '子域名快照'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['scan']),
|
||||
models.Index(fields=['name']),
|
||||
models.Index(fields=['-discovered_at']),
|
||||
models.Index(fields=['-created_at']),
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='subdomain_snap_name_trgm',
|
||||
fields=['name'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 唯一约束:同一次扫描中,同一个子域名只能记录一次
|
||||
@@ -54,34 +61,57 @@ class WebsiteSnapshot(models.Model):
|
||||
)
|
||||
|
||||
# 扫描结果数据
|
||||
url = models.CharField(max_length=2000, help_text='站点URL')
|
||||
url = models.TextField(help_text='站点URL')
|
||||
host = models.CharField(max_length=253, blank=True, default='', help_text='主机名(域名或IP地址)')
|
||||
title = models.CharField(max_length=500, blank=True, default='', help_text='页面标题')
|
||||
status = models.IntegerField(null=True, blank=True, help_text='HTTP状态码')
|
||||
title = models.TextField(blank=True, default='', help_text='页面标题')
|
||||
status_code = models.IntegerField(null=True, blank=True, help_text='HTTP状态码')
|
||||
content_length = models.BigIntegerField(null=True, blank=True, help_text='内容长度')
|
||||
location = models.CharField(max_length=1000, blank=True, default='', help_text='重定向位置')
|
||||
web_server = models.CharField(max_length=200, blank=True, default='', help_text='Web服务器')
|
||||
content_type = models.CharField(max_length=200, blank=True, default='', help_text='内容类型')
|
||||
location = models.TextField(blank=True, default='', help_text='重定向位置')
|
||||
webserver = models.TextField(blank=True, default='', help_text='Web服务器')
|
||||
content_type = models.TextField(blank=True, default='', help_text='内容类型')
|
||||
tech = ArrayField(
|
||||
models.CharField(max_length=100),
|
||||
blank=True,
|
||||
default=list,
|
||||
help_text='技术栈'
|
||||
)
|
||||
body_preview = models.TextField(blank=True, default='', help_text='响应体预览')
|
||||
response_body = models.TextField(blank=True, default='', help_text='HTTP响应体')
|
||||
vhost = models.BooleanField(null=True, blank=True, help_text='虚拟主机标志')
|
||||
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
|
||||
response_headers = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='原始HTTP响应头'
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
db_table = 'website_snapshot'
|
||||
verbose_name = '网站快照'
|
||||
verbose_name_plural = '网站快照'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['scan']),
|
||||
models.Index(fields=['url']),
|
||||
models.Index(fields=['host']), # host索引,优化根据主机名查询
|
||||
models.Index(fields=['-discovered_at']),
|
||||
models.Index(fields=['title']), # title索引,优化标题搜索
|
||||
models.Index(fields=['-created_at']),
|
||||
GinIndex(fields=['tech']), # GIN索引,优化数组字段查询
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='ws_snap_resp_hdr_trgm',
|
||||
fields=['response_headers'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='ws_snap_url_trgm',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='ws_snap_title_trgm',
|
||||
fields=['title'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 唯一约束:同一次扫描中,同一个URL只能记录一次
|
||||
@@ -118,18 +148,25 @@ class DirectorySnapshot(models.Model):
|
||||
lines = models.IntegerField(null=True, blank=True, help_text='响应体行数(按换行符分割)')
|
||||
content_type = models.CharField(max_length=200, blank=True, default='', help_text='响应头 Content-Type 值')
|
||||
duration = models.BigIntegerField(null=True, blank=True, help_text='请求耗时(单位:纳秒)')
|
||||
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
db_table = 'directory_snapshot'
|
||||
verbose_name = '目录快照'
|
||||
verbose_name_plural = '目录快照'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['scan']),
|
||||
models.Index(fields=['url']),
|
||||
models.Index(fields=['status']), # 状态码索引,优化筛选
|
||||
models.Index(fields=['-discovered_at']),
|
||||
models.Index(fields=['content_type']), # content_type索引,优化内容类型搜索
|
||||
models.Index(fields=['-created_at']),
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='dir_snap_url_trgm',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 唯一约束:同一次扫描中,同一个目录URL只能记录一次
|
||||
@@ -183,16 +220,16 @@ class HostPortMappingSnapshot(models.Model):
|
||||
)
|
||||
|
||||
# ==================== 时间字段 ====================
|
||||
discovered_at = models.DateTimeField(
|
||||
created_at = models.DateTimeField(
|
||||
auto_now_add=True,
|
||||
help_text='发现时间'
|
||||
help_text='创建时间'
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = 'host_port_mapping_snapshot'
|
||||
verbose_name = '主机端口映射快照'
|
||||
verbose_name_plural = '主机端口映射快照'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['scan']), # 优化按扫描查询
|
||||
models.Index(fields=['host']), # 优化按主机名查询
|
||||
@@ -200,7 +237,7 @@ class HostPortMappingSnapshot(models.Model):
|
||||
models.Index(fields=['port']), # 优化按端口查询
|
||||
models.Index(fields=['host', 'ip']), # 优化组合查询
|
||||
models.Index(fields=['scan', 'host']), # 优化扫描+主机查询
|
||||
models.Index(fields=['-discovered_at']), # 优化时间排序
|
||||
models.Index(fields=['-created_at']), # 优化时间排序
|
||||
]
|
||||
constraints = [
|
||||
# 复合唯一约束:同一次扫描中,scan + host + ip + port 组合唯一
|
||||
@@ -230,26 +267,26 @@ class EndpointSnapshot(models.Model):
|
||||
)
|
||||
|
||||
# 扫描结果数据
|
||||
url = models.CharField(max_length=2000, help_text='端点URL')
|
||||
url = models.TextField(help_text='端点URL')
|
||||
host = models.CharField(
|
||||
max_length=253,
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='主机名(域名或IP地址)'
|
||||
)
|
||||
title = models.CharField(max_length=1000, blank=True, default='', help_text='页面标题')
|
||||
title = models.TextField(blank=True, default='', help_text='页面标题')
|
||||
status_code = models.IntegerField(null=True, blank=True, help_text='HTTP状态码')
|
||||
content_length = models.IntegerField(null=True, blank=True, help_text='内容长度')
|
||||
location = models.CharField(max_length=1000, blank=True, default='', help_text='重定向位置')
|
||||
webserver = models.CharField(max_length=200, blank=True, default='', help_text='Web服务器')
|
||||
content_type = models.CharField(max_length=200, blank=True, default='', help_text='内容类型')
|
||||
location = models.TextField(blank=True, default='', help_text='重定向位置')
|
||||
webserver = models.TextField(blank=True, default='', help_text='Web服务器')
|
||||
content_type = models.TextField(blank=True, default='', help_text='内容类型')
|
||||
tech = ArrayField(
|
||||
models.CharField(max_length=100),
|
||||
blank=True,
|
||||
default=list,
|
||||
help_text='技术栈'
|
||||
)
|
||||
body_preview = models.CharField(max_length=1000, blank=True, default='', help_text='响应体预览')
|
||||
response_body = models.TextField(blank=True, default='', help_text='HTTP响应体')
|
||||
vhost = models.BooleanField(null=True, blank=True, help_text='虚拟主机标志')
|
||||
matched_gf_patterns = ArrayField(
|
||||
models.CharField(max_length=100),
|
||||
@@ -257,19 +294,43 @@ class EndpointSnapshot(models.Model):
|
||||
default=list,
|
||||
help_text='匹配的GF模式列表'
|
||||
)
|
||||
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
|
||||
response_headers = models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='原始HTTP响应头'
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
db_table = 'endpoint_snapshot'
|
||||
verbose_name = '端点快照'
|
||||
verbose_name_plural = '端点快照'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['scan']),
|
||||
models.Index(fields=['url']),
|
||||
models.Index(fields=['host']), # host索引,优化根据主机名查询
|
||||
models.Index(fields=['title']), # title索引,优化标题搜索
|
||||
models.Index(fields=['status_code']), # 状态码索引,优化筛选
|
||||
models.Index(fields=['-discovered_at']),
|
||||
models.Index(fields=['webserver']), # webserver索引,优化服务器搜索
|
||||
models.Index(fields=['-created_at']),
|
||||
GinIndex(fields=['tech']), # GIN索引,优化数组字段查询
|
||||
# pg_trgm GIN 索引,支持 LIKE '%keyword%' 模糊搜索
|
||||
GinIndex(
|
||||
name='ep_snap_resp_hdr_trgm',
|
||||
fields=['response_headers'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='ep_snap_url_trgm',
|
||||
fields=['url'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
GinIndex(
|
||||
name='ep_snap_title_trgm',
|
||||
fields=['title'],
|
||||
opclasses=['gin_trgm_ops']
|
||||
),
|
||||
]
|
||||
constraints = [
|
||||
# 唯一约束:同一次扫描中,同一个URL只能记录一次
|
||||
@@ -302,7 +363,7 @@ class VulnerabilitySnapshot(models.Model):
|
||||
)
|
||||
|
||||
# ==================== 核心字段 ====================
|
||||
url = models.TextField(help_text='漏洞所在的URL')
|
||||
url = models.CharField(max_length=2000, help_text='漏洞所在的URL')
|
||||
vuln_type = models.CharField(max_length=100, help_text='漏洞类型(如 xss, sqli)')
|
||||
severity = models.CharField(
|
||||
max_length=20,
|
||||
@@ -316,19 +377,20 @@ class VulnerabilitySnapshot(models.Model):
|
||||
raw_output = models.JSONField(blank=True, default=dict, help_text='工具原始输出')
|
||||
|
||||
# ==================== 时间字段 ====================
|
||||
discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
|
||||
created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
|
||||
|
||||
class Meta:
|
||||
db_table = 'vulnerability_snapshot'
|
||||
verbose_name = '漏洞快照'
|
||||
verbose_name_plural = '漏洞快照'
|
||||
ordering = ['-discovered_at']
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['scan']),
|
||||
models.Index(fields=['url']), # url索引,优化URL搜索
|
||||
models.Index(fields=['vuln_type']),
|
||||
models.Index(fields=['severity']),
|
||||
models.Index(fields=['source']),
|
||||
models.Index(fields=['-discovered_at']),
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
|
||||
@@ -3,162 +3,141 @@ Django ORM 实现的 Directory Repository
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Tuple, Dict, Iterator
|
||||
from django.db import transaction, IntegrityError, OperationalError, DatabaseError
|
||||
from django.utils import timezone
|
||||
from typing import List, Iterator
|
||||
from django.db import transaction
|
||||
|
||||
from apps.asset.models.asset_models import Directory
|
||||
from apps.asset.dtos import DirectoryDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@auto_ensure_db_connection
|
||||
class DjangoDirectoryRepository:
|
||||
"""Django ORM 实现的 Directory Repository"""
|
||||
|
||||
def bulk_create_ignore_conflicts(self, items: List[DirectoryDTO]) -> int:
|
||||
def bulk_upsert(self, items: List[DirectoryDTO]) -> int:
|
||||
"""
|
||||
批量创建 Directory,忽略冲突
|
||||
批量创建或更新 Directory(upsert)
|
||||
|
||||
存在则更新所有字段,不存在则创建。
|
||||
使用 Django 原生 update_conflicts。
|
||||
|
||||
注意:自动按模型唯一约束去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
items: Directory DTO 列表
|
||||
|
||||
Returns:
|
||||
int: 实际创建的记录数
|
||||
|
||||
Raises:
|
||||
IntegrityError: 数据完整性错误
|
||||
OperationalError: 数据库操作错误
|
||||
DatabaseError: 数据库错误
|
||||
int: 处理的记录数
|
||||
"""
|
||||
if not items:
|
||||
return 0
|
||||
|
||||
|
||||
try:
|
||||
# 转换为 Django 模型对象
|
||||
directory_objects = [
|
||||
# 自动按模型唯一约束去重
|
||||
unique_items = deduplicate_for_bulk(items, Directory)
|
||||
|
||||
# 直接从 DTO 字段构建 Model
|
||||
directories = [
|
||||
Directory(
|
||||
website_id=item.website_id,
|
||||
target_id=item.target_id,
|
||||
url=item.url,
|
||||
status=item.status,
|
||||
content_length=item.content_length,
|
||||
words=item.words,
|
||||
lines=item.lines,
|
||||
content_type=item.content_type,
|
||||
content_type=item.content_type or '',
|
||||
duration=item.duration
|
||||
)
|
||||
for item in items
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
|
||||
with transaction.atomic():
|
||||
# 批量插入或忽略冲突
|
||||
# 如果 website + url 已存在,忽略冲突
|
||||
Directory.objects.bulk_create(
|
||||
directory_objects,
|
||||
ignore_conflicts=True
|
||||
directories,
|
||||
update_conflicts=True,
|
||||
unique_fields=['target', 'url'],
|
||||
update_fields=[
|
||||
'status', 'content_length', 'words',
|
||||
'lines', 'content_type', 'duration'
|
||||
],
|
||||
batch_size=1000
|
||||
)
|
||||
|
||||
logger.debug(f"成功处理 {len(items)} 条 Directory 记录")
|
||||
return len(items)
|
||||
|
||||
except IntegrityError as e:
|
||||
logger.error(
|
||||
f"批量插入 Directory 失败 - 数据完整性错误: {e}, "
|
||||
f"记录数: {len(items)}"
|
||||
)
|
||||
raise
|
||||
|
||||
except OperationalError as e:
|
||||
logger.error(
|
||||
f"批量插入 Directory 失败 - 数据库操作错误: {e}, "
|
||||
f"记录数: {len(items)}"
|
||||
)
|
||||
raise
|
||||
|
||||
except DatabaseError as e:
|
||||
logger.error(
|
||||
f"批量插入 Directory 失败 - 数据库错误: {e}, "
|
||||
f"记录数: {len(items)}"
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
logger.debug(f"批量 upsert Directory 成功: {len(unique_items)} 条")
|
||||
return len(unique_items)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"批量插入 Directory 失败 - 未知错误: {e}, "
|
||||
f"记录数: {len(items)}, "
|
||||
f"错误类型: {type(e).__name__}",
|
||||
exc_info=True
|
||||
)
|
||||
logger.error(f"批量 upsert Directory 失败: {e}")
|
||||
raise
|
||||
|
||||
def get_by_website(self, website_id: int) -> List[DirectoryDTO]:
|
||||
def bulk_create_ignore_conflicts(self, items: List[DirectoryDTO]) -> int:
|
||||
"""
|
||||
获取指定站点的所有目录
|
||||
批量创建 Directory(存在即跳过)
|
||||
|
||||
与 bulk_upsert 不同,此方法不会更新已存在的记录。
|
||||
适用于批量添加场景,只提供 URL,没有其他字段数据。
|
||||
|
||||
注意:自动按模型唯一约束去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
website_id: 站点 ID
|
||||
items: Directory DTO 列表
|
||||
|
||||
Returns:
|
||||
List[DirectoryDTO]: 目录列表
|
||||
int: 处理的记录数
|
||||
"""
|
||||
if not items:
|
||||
return 0
|
||||
|
||||
try:
|
||||
directories = Directory.objects.filter(website_id=website_id)
|
||||
return [
|
||||
DirectoryDTO(
|
||||
website_id=d.website_id,
|
||||
target_id=d.target_id,
|
||||
url=d.url,
|
||||
status=d.status,
|
||||
content_length=d.content_length,
|
||||
words=d.words,
|
||||
lines=d.lines,
|
||||
content_type=d.content_type,
|
||||
duration=d.duration
|
||||
# 自动按模型唯一约束去重
|
||||
unique_items = deduplicate_for_bulk(items, Directory)
|
||||
|
||||
directories = [
|
||||
Directory(
|
||||
target_id=item.target_id,
|
||||
url=item.url,
|
||||
status=item.status,
|
||||
content_length=item.content_length,
|
||||
words=item.words,
|
||||
lines=item.lines,
|
||||
content_type=item.content_type or '',
|
||||
duration=item.duration
|
||||
)
|
||||
for d in directories
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取目录列表失败 - Website ID: {website_id}, 错误: {e}")
|
||||
raise
|
||||
|
||||
def count_by_website(self, website_id: int) -> int:
|
||||
"""
|
||||
统计指定站点的目录总数
|
||||
|
||||
Args:
|
||||
website_id: 站点 ID
|
||||
|
||||
Returns:
|
||||
int: 目录总数
|
||||
"""
|
||||
try:
|
||||
count = Directory.objects.filter(website_id=website_id).count()
|
||||
logger.debug(f"Website {website_id} 的目录总数: {count}")
|
||||
return count
|
||||
|
||||
with transaction.atomic():
|
||||
Directory.objects.bulk_create(
|
||||
directories,
|
||||
ignore_conflicts=True,
|
||||
batch_size=1000
|
||||
)
|
||||
|
||||
logger.debug(f"批量创建 Directory 成功(ignore_conflicts): {len(unique_items)} 条")
|
||||
return len(unique_items)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"统计目录数量失败 - Website ID: {website_id}, 错误: {e}")
|
||||
logger.error(f"批量创建 Directory 失败: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def count_by_target(self, target_id: int) -> int:
|
||||
"""统计目标下的目录总数"""
|
||||
return Directory.objects.filter(target_id=target_id).count()
|
||||
|
||||
def get_all(self):
|
||||
"""
|
||||
获取所有目录
|
||||
|
||||
Returns:
|
||||
QuerySet: 目录查询集
|
||||
"""
|
||||
return Directory.objects.all()
|
||||
|
||||
"""获取所有目录"""
|
||||
return Directory.objects.all().order_by('-created_at')
|
||||
|
||||
def get_by_target(self, target_id: int):
|
||||
return Directory.objects.filter(target_id=target_id).select_related('website').order_by('-discovered_at')
|
||||
"""获取目标下的所有目录"""
|
||||
return Directory.objects.filter(target_id=target_id).order_by('-created_at')
|
||||
|
||||
def get_urls_for_export(self, target_id: int, batch_size: int = 1000) -> Iterator[str]:
|
||||
"""流式导出目标下的所有目录 URL(只查 url 字段,避免加载多余数据)。"""
|
||||
"""流式导出目标下的所有目录 URL"""
|
||||
try:
|
||||
queryset = (
|
||||
Directory.objects
|
||||
@@ -172,78 +151,31 @@ class DjangoDirectoryRepository:
|
||||
except Exception as e:
|
||||
logger.error("流式导出目录 URL 失败 - Target ID: %s, 错误: %s", target_id, e)
|
||||
raise
|
||||
|
||||
def soft_delete_by_ids(self, directory_ids: List[int]) -> int:
|
||||
|
||||
def iter_raw_data_for_export(
|
||||
self,
|
||||
target_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
根据 ID 列表批量软删除Directory
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
directory_ids: Directory ID 列表
|
||||
target_id: 目标 ID
|
||||
batch_size: 每批数据量
|
||||
|
||||
Returns:
|
||||
软删除的记录数
|
||||
Yields:
|
||||
包含所有目录字段的字典
|
||||
"""
|
||||
try:
|
||||
updated_count = (
|
||||
Directory.objects
|
||||
.filter(id__in=directory_ids)
|
||||
.update(deleted_at=timezone.now())
|
||||
qs = (
|
||||
Directory.objects
|
||||
.filter(target_id=target_id)
|
||||
.values(
|
||||
'url', 'status', 'content_length', 'words',
|
||||
'lines', 'content_type', 'duration', 'created_at'
|
||||
)
|
||||
logger.debug(
|
||||
"批量软删除Directory成功 - Count: %s, 更新记录: %s",
|
||||
len(directory_ids),
|
||||
updated_count
|
||||
)
|
||||
return updated_count
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"批量软删除Directory失败 - IDs: %s, 错误: %s",
|
||||
directory_ids,
|
||||
e
|
||||
)
|
||||
raise
|
||||
|
||||
def hard_delete_by_ids(self, directory_ids: List[int]) -> Tuple[int, Dict[str, int]]:
|
||||
"""
|
||||
根据 ID 列表硬删除Directory(使用数据库级 CASCADE)
|
||||
.order_by('url')
|
||||
)
|
||||
|
||||
Args:
|
||||
directory_ids: Directory ID 列表
|
||||
|
||||
Returns:
|
||||
(删除的记录数, 删除详情字典)
|
||||
"""
|
||||
try:
|
||||
batch_size = 1000
|
||||
total_deleted = 0
|
||||
|
||||
logger.debug(f"开始批量删除 {len(directory_ids)} 个Directory(数据库 CASCADE)...")
|
||||
|
||||
for i in range(0, len(directory_ids), batch_size):
|
||||
batch_ids = directory_ids[i:i + batch_size]
|
||||
count, _ = Directory.all_objects.filter(id__in=batch_ids).delete()
|
||||
total_deleted += count
|
||||
logger.debug(f"批次删除完成: {len(batch_ids)} 个Directory,删除 {count} 条记录")
|
||||
|
||||
deleted_details = {
|
||||
'directories': len(directory_ids),
|
||||
'total': total_deleted,
|
||||
'note': 'Database CASCADE - detailed stats unavailable'
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
"批量硬删除成功(CASCADE)- Directory数: %s, 总删除记录: %s",
|
||||
len(directory_ids),
|
||||
total_deleted
|
||||
)
|
||||
|
||||
return total_deleted, deleted_details
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"批量硬删除失败(CASCADE)- Directory数: %s, 错误: %s",
|
||||
len(directory_ids),
|
||||
str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
"""Endpoint Repository - Django ORM 实现"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional, Tuple, Dict, Any
|
||||
from typing import List, Iterator
|
||||
|
||||
from apps.asset.models import Endpoint
|
||||
from apps.asset.dtos.asset import EndpointDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
from django.db import transaction
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -15,25 +16,31 @@ logger = logging.getLogger(__name__)
|
||||
class DjangoEndpointRepository:
|
||||
"""端点 Repository - 负责端点表的数据访问"""
|
||||
|
||||
def bulk_create_ignore_conflicts(self, items: List[EndpointDTO]) -> int:
|
||||
def bulk_upsert(self, items: List[EndpointDTO]) -> int:
|
||||
"""
|
||||
批量创建端点(忽略冲突)
|
||||
批量创建或更新端点(upsert)
|
||||
|
||||
存在则更新所有字段,不存在则创建。
|
||||
使用 Django 原生 update_conflicts。
|
||||
|
||||
注意:自动按模型唯一约束去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
items: 端点 DTO 列表
|
||||
|
||||
Returns:
|
||||
int: 创建的记录数
|
||||
int: 处理的记录数
|
||||
"""
|
||||
if not items:
|
||||
return 0
|
||||
|
||||
try:
|
||||
endpoints = []
|
||||
for item in items:
|
||||
# Endpoint 模型当前只关联 target,不再依赖 website 外键
|
||||
# 这里按照 EndpointDTO 的字段映射构造 Endpoint 实例
|
||||
endpoints.append(Endpoint(
|
||||
# 自动按模型唯一约束去重
|
||||
unique_items = deduplicate_for_bulk(items, Endpoint)
|
||||
|
||||
# 直接从 DTO 字段构建 Model
|
||||
endpoints = [
|
||||
Endpoint(
|
||||
target_id=item.target_id,
|
||||
url=item.url,
|
||||
host=item.host or '',
|
||||
@@ -41,68 +48,42 @@ class DjangoEndpointRepository:
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
webserver=item.webserver or '',
|
||||
body_preview=item.body_preview or '',
|
||||
response_body=item.response_body or '',
|
||||
content_type=item.content_type or '',
|
||||
tech=item.tech if item.tech else [],
|
||||
vhost=item.vhost,
|
||||
location=item.location or '',
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
|
||||
))
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
with transaction.atomic():
|
||||
created = Endpoint.objects.bulk_create(
|
||||
Endpoint.objects.bulk_create(
|
||||
endpoints,
|
||||
ignore_conflicts=True,
|
||||
update_conflicts=True,
|
||||
unique_fields=['url', 'target'],
|
||||
update_fields=[
|
||||
'host', 'title', 'status_code', 'content_length',
|
||||
'webserver', 'response_body', 'content_type', 'tech',
|
||||
'vhost', 'location', 'matched_gf_patterns', 'response_headers'
|
||||
],
|
||||
batch_size=1000
|
||||
)
|
||||
return len(created)
|
||||
|
||||
logger.debug(f"批量 upsert 端点成功: {len(unique_items)} 条")
|
||||
return len(unique_items)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量创建端点失败: {e}")
|
||||
logger.error(f"批量 upsert 端点失败: {e}")
|
||||
raise
|
||||
|
||||
def get_by_website(self, website_id: int) -> List[EndpointDTO]:
|
||||
"""
|
||||
获取网站下的所有端点
|
||||
|
||||
Args:
|
||||
website_id: 网站 ID
|
||||
|
||||
Returns:
|
||||
List[EndpointDTO]: 端点列表
|
||||
"""
|
||||
endpoints = Endpoint.objects.filter(
|
||||
website_id=website_id
|
||||
).order_by('-discovered_at')
|
||||
|
||||
result = []
|
||||
for endpoint in endpoints:
|
||||
result.append(EndpointDTO(
|
||||
website_id=endpoint.website_id,
|
||||
target_id=endpoint.target_id,
|
||||
url=endpoint.url,
|
||||
title=endpoint.title,
|
||||
status_code=endpoint.status_code,
|
||||
content_length=endpoint.content_length,
|
||||
webserver=endpoint.webserver,
|
||||
body_preview=endpoint.body_preview,
|
||||
content_type=endpoint.content_type,
|
||||
tech=endpoint.tech,
|
||||
vhost=endpoint.vhost,
|
||||
location=endpoint.location,
|
||||
matched_gf_patterns=endpoint.matched_gf_patterns
|
||||
))
|
||||
|
||||
return result
|
||||
|
||||
def get_queryset_by_target(self, target_id: int):
|
||||
return Endpoint.objects.filter(target_id=target_id).order_by('-discovered_at')
|
||||
|
||||
def get_all(self):
|
||||
"""获取所有端点(全局查询)"""
|
||||
return Endpoint.objects.all().order_by('-discovered_at')
|
||||
return Endpoint.objects.all().order_by('-created_at')
|
||||
|
||||
def get_by_target(self, target_id: int) -> List[EndpointDTO]:
|
||||
def get_by_target(self, target_id: int):
|
||||
"""
|
||||
获取目标下的所有端点
|
||||
|
||||
@@ -110,43 +91,9 @@ class DjangoEndpointRepository:
|
||||
target_id: 目标 ID
|
||||
|
||||
Returns:
|
||||
List[EndpointDTO]: 端点列表
|
||||
QuerySet: 端点查询集
|
||||
"""
|
||||
endpoints = Endpoint.objects.filter(
|
||||
target_id=target_id
|
||||
).order_by('-discovered_at')
|
||||
|
||||
result = []
|
||||
for endpoint in endpoints:
|
||||
result.append(EndpointDTO(
|
||||
website_id=endpoint.website_id,
|
||||
target_id=endpoint.target_id,
|
||||
url=endpoint.url,
|
||||
title=endpoint.title,
|
||||
status_code=endpoint.status_code,
|
||||
content_length=endpoint.content_length,
|
||||
webserver=endpoint.webserver,
|
||||
body_preview=endpoint.body_preview,
|
||||
content_type=endpoint.content_type,
|
||||
tech=endpoint.tech,
|
||||
vhost=endpoint.vhost,
|
||||
location=endpoint.location,
|
||||
matched_gf_patterns=endpoint.matched_gf_patterns
|
||||
))
|
||||
|
||||
return result
|
||||
|
||||
def count_by_website(self, website_id: int) -> int:
|
||||
"""
|
||||
统计网站下的端点数量
|
||||
|
||||
Args:
|
||||
website_id: 网站 ID
|
||||
|
||||
Returns:
|
||||
int: 端点数量
|
||||
"""
|
||||
return Endpoint.objects.filter(website_id=website_id).count()
|
||||
return Endpoint.objects.filter(target_id=target_id).order_by('-created_at')
|
||||
|
||||
def count_by_target(self, target_id: int) -> int:
|
||||
"""
|
||||
@@ -159,34 +106,89 @@ class DjangoEndpointRepository:
|
||||
int: 端点数量
|
||||
"""
|
||||
return Endpoint.objects.filter(target_id=target_id).count()
|
||||
|
||||
def soft_delete_by_ids(self, ids: List[int]) -> int:
|
||||
|
||||
def bulk_create_ignore_conflicts(self, items: List[EndpointDTO]) -> int:
|
||||
"""
|
||||
软删除端点(批量)
|
||||
批量创建端点(存在即跳过)
|
||||
|
||||
与 bulk_upsert 不同,此方法不会更新已存在的记录。
|
||||
适用于快速扫描场景,只提供 URL,没有其他字段数据。
|
||||
|
||||
注意:自动按模型唯一约束去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
ids: 端点 ID 列表
|
||||
items: 端点 DTO 列表
|
||||
|
||||
Returns:
|
||||
int: 更新的记录数
|
||||
int: 处理的记录数
|
||||
"""
|
||||
from django.utils import timezone
|
||||
return Endpoint.objects.filter(
|
||||
id__in=ids
|
||||
).update(deleted_at=timezone.now())
|
||||
|
||||
def hard_delete_by_ids(self, ids: List[int]) -> Tuple[int, Dict[str, int]]:
|
||||
if not items:
|
||||
return 0
|
||||
|
||||
try:
|
||||
# 自动按模型唯一约束去重
|
||||
unique_items = deduplicate_for_bulk(items, Endpoint)
|
||||
|
||||
# 直接从 DTO 字段构建 Model
|
||||
endpoints = [
|
||||
Endpoint(
|
||||
target_id=item.target_id,
|
||||
url=item.url,
|
||||
host=item.host or '',
|
||||
title=item.title or '',
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
webserver=item.webserver or '',
|
||||
response_body=item.response_body or '',
|
||||
content_type=item.content_type or '',
|
||||
tech=item.tech if item.tech else [],
|
||||
vhost=item.vhost,
|
||||
location=item.location or '',
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
with transaction.atomic():
|
||||
Endpoint.objects.bulk_create(
|
||||
endpoints,
|
||||
ignore_conflicts=True,
|
||||
batch_size=1000
|
||||
)
|
||||
|
||||
logger.debug(f"批量创建端点成功(ignore_conflicts): {len(unique_items)} 条")
|
||||
return len(unique_items)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量创建端点失败: {e}")
|
||||
raise
|
||||
|
||||
def iter_raw_data_for_export(
|
||||
self,
|
||||
target_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
硬删除端点(批量)
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
ids: 端点 ID 列表
|
||||
|
||||
Returns:
|
||||
Tuple[int, Dict[str, int]]: (删除总数, 详细信息)
|
||||
"""
|
||||
deleted_count, details = Endpoint.all_objects.filter(
|
||||
id__in=ids
|
||||
).delete()
|
||||
target_id: 目标 ID
|
||||
batch_size: 每批数据量
|
||||
|
||||
return deleted_count, details
|
||||
Yields:
|
||||
包含所有端点字段的字典
|
||||
"""
|
||||
qs = (
|
||||
Endpoint.objects
|
||||
.filter(target_id=target_id)
|
||||
.values(
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'response_body', 'response_headers', 'vhost', 'matched_gf_patterns', 'created_at'
|
||||
)
|
||||
.order_by('url')
|
||||
)
|
||||
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
@@ -1,32 +1,36 @@
|
||||
"""HostPortMapping Repository - Django ORM 实现"""
|
||||
|
||||
import logging
|
||||
from typing import List, Iterator
|
||||
from typing import List, Iterator, Dict, Optional
|
||||
|
||||
from django.db.models import QuerySet, Min
|
||||
|
||||
from apps.asset.models.asset_models import HostPortMapping
|
||||
from apps.asset.dtos.asset import HostPortMappingDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@auto_ensure_db_connection
|
||||
class DjangoHostPortMappingRepository:
|
||||
"""HostPortMapping Repository - Django ORM 实现"""
|
||||
"""HostPortMapping Repository - Django ORM 实现
|
||||
|
||||
职责:纯数据访问,不包含业务逻辑
|
||||
"""
|
||||
|
||||
def bulk_create_ignore_conflicts(self, items: List[HostPortMappingDTO]) -> int:
|
||||
"""
|
||||
批量创建主机端口关联(忽略冲突)
|
||||
|
||||
注意:自动按模型唯一约束去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
items: 主机端口关联 DTO 列表
|
||||
|
||||
Returns:
|
||||
int: 实际创建的记录数(注意:ignore_conflicts 时可能为 0)
|
||||
|
||||
Note:
|
||||
- 基于唯一约束 (target + host + ip + port) 自动去重
|
||||
- 忽略已存在的记录,不更新
|
||||
int: 实际创建的记录数
|
||||
"""
|
||||
try:
|
||||
logger.debug("准备批量创建主机端口关联 - 数量: %d", len(items))
|
||||
@@ -34,18 +38,20 @@ class DjangoHostPortMappingRepository:
|
||||
if not items:
|
||||
logger.debug("主机端口关联为空,跳过创建")
|
||||
return 0
|
||||
|
||||
# 自动按模型唯一约束去重
|
||||
unique_items = deduplicate_for_bulk(items, HostPortMapping)
|
||||
|
||||
# 构建记录对象
|
||||
records = []
|
||||
for item in items:
|
||||
records.append(HostPortMapping(
|
||||
records = [
|
||||
HostPortMapping(
|
||||
target_id=item.target_id,
|
||||
host=item.host,
|
||||
ip=item.ip,
|
||||
port=item.port
|
||||
))
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
# 批量创建(忽略冲突,基于唯一约束去重)
|
||||
created = HostPortMapping.objects.bulk_create(
|
||||
records,
|
||||
ignore_conflicts=True
|
||||
@@ -89,79 +95,47 @@ class DjangoHostPortMappingRepository:
|
||||
for ip in queryset:
|
||||
yield ip
|
||||
|
||||
def get_ip_aggregation_by_target(self, target_id: int, search: str = None):
|
||||
from django.db.models import Min
|
||||
def get_queryset_by_target(self, target_id: int) -> QuerySet:
|
||||
"""获取目标下的 QuerySet"""
|
||||
return HostPortMapping.objects.filter(target_id=target_id)
|
||||
|
||||
qs = HostPortMapping.objects.filter(target_id=target_id)
|
||||
if search:
|
||||
qs = qs.filter(ip__icontains=search)
|
||||
def get_all_queryset(self) -> QuerySet:
|
||||
"""获取所有记录的 QuerySet"""
|
||||
return HostPortMapping.objects.all()
|
||||
|
||||
ip_aggregated = (
|
||||
qs
|
||||
.values('ip')
|
||||
.annotate(
|
||||
discovered_at=Min('discovered_at')
|
||||
)
|
||||
.order_by('-discovered_at')
|
||||
def get_queryset_by_ip(self, ip: str, target_id: Optional[int] = None) -> QuerySet:
|
||||
"""获取指定 IP 的 QuerySet"""
|
||||
qs = HostPortMapping.objects.filter(ip=ip)
|
||||
if target_id:
|
||||
qs = qs.filter(target_id=target_id)
|
||||
return qs
|
||||
|
||||
def iter_raw_data_for_export(
|
||||
self,
|
||||
target_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
batch_size: 每批数据量
|
||||
|
||||
Yields:
|
||||
{
|
||||
'ip': '192.168.1.1',
|
||||
'host': 'example.com',
|
||||
'port': 80,
|
||||
'created_at': datetime
|
||||
}
|
||||
"""
|
||||
qs = (
|
||||
HostPortMapping.objects
|
||||
.filter(target_id=target_id)
|
||||
.values('ip', 'host', 'port', 'created_at')
|
||||
.order_by('ip', 'host', 'port')
|
||||
)
|
||||
|
||||
results = []
|
||||
for item in ip_aggregated:
|
||||
ip = item['ip']
|
||||
mappings = (
|
||||
HostPortMapping.objects
|
||||
.filter(target_id=target_id, ip=ip)
|
||||
.values('host', 'port')
|
||||
.distinct()
|
||||
)
|
||||
|
||||
hosts = sorted({m['host'] for m in mappings})
|
||||
ports = sorted({m['port'] for m in mappings})
|
||||
|
||||
results.append({
|
||||
'ip': ip,
|
||||
'hosts': hosts,
|
||||
'ports': ports,
|
||||
'discovered_at': item['discovered_at'],
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def get_all_ip_aggregation(self, search: str = None):
|
||||
"""获取所有 IP 聚合数据(全局查询)"""
|
||||
from django.db.models import Min
|
||||
|
||||
qs = HostPortMapping.objects.all()
|
||||
if search:
|
||||
qs = qs.filter(ip__icontains=search)
|
||||
|
||||
ip_aggregated = (
|
||||
qs
|
||||
.values('ip')
|
||||
.annotate(
|
||||
discovered_at=Min('discovered_at')
|
||||
)
|
||||
.order_by('-discovered_at')
|
||||
)
|
||||
|
||||
results = []
|
||||
for item in ip_aggregated:
|
||||
ip = item['ip']
|
||||
mappings = (
|
||||
HostPortMapping.objects
|
||||
.filter(ip=ip)
|
||||
.values('host', 'port')
|
||||
.distinct()
|
||||
)
|
||||
|
||||
hosts = sorted({m['host'] for m in mappings})
|
||||
ports = sorted({m['port'] for m in mappings})
|
||||
|
||||
results.append({
|
||||
'ip': ip,
|
||||
'hosts': hosts,
|
||||
'ports': ports,
|
||||
'discovered_at': item['discovered_at'],
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
@@ -1,117 +1,72 @@
|
||||
"""Subdomain Repository - Django ORM 实现"""
|
||||
|
||||
import logging
|
||||
from typing import List, Iterator
|
||||
|
||||
from django.db import transaction, IntegrityError, OperationalError, DatabaseError
|
||||
from django.utils import timezone
|
||||
from typing import Tuple, Dict
|
||||
from django.db import transaction
|
||||
|
||||
from apps.asset.models.asset_models import Subdomain
|
||||
from apps.asset.dtos import SubdomainDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@auto_ensure_db_connection
|
||||
class DjangoSubdomainRepository:
|
||||
"""基于 Django ORM 的子域名仓储实现。"""
|
||||
"""基于 Django ORM 的子域名仓储实现"""
|
||||
|
||||
def bulk_create_ignore_conflicts(self, items: List[SubdomainDTO]) -> None:
|
||||
"""
|
||||
批量创建子域名,忽略冲突
|
||||
|
||||
注意:自动按模型唯一约束去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
items: 子域名 DTO 列表
|
||||
|
||||
Raises:
|
||||
IntegrityError: 数据完整性错误(如唯一约束冲突)
|
||||
OperationalError: 数据库操作错误(如连接失败)
|
||||
DatabaseError: 其他数据库错误
|
||||
"""
|
||||
if not items:
|
||||
return
|
||||
|
||||
try:
|
||||
# 自动按模型唯一约束去重
|
||||
unique_items = deduplicate_for_bulk(items, Subdomain)
|
||||
|
||||
subdomain_objects = [
|
||||
Subdomain(
|
||||
name=item.name,
|
||||
target_id=item.target_id,
|
||||
)
|
||||
for item in items
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
with transaction.atomic():
|
||||
# 使用 ignore_conflicts 策略:
|
||||
# - 新子域名:INSERT 完整记录
|
||||
# - 已存在子域名:忽略(不更新,因为没有探测字段数据)
|
||||
# 注意:ignore_conflicts 无法返回实际创建的数量
|
||||
Subdomain.objects.bulk_create( # type: ignore[attr-defined]
|
||||
Subdomain.objects.bulk_create(
|
||||
subdomain_objects,
|
||||
ignore_conflicts=True, # 忽略重复记录
|
||||
ignore_conflicts=True,
|
||||
)
|
||||
|
||||
logger.debug(f"成功处理 {len(items)} 条子域名记录")
|
||||
|
||||
except IntegrityError as e:
|
||||
logger.error(
|
||||
f"批量插入子域名失败 - 数据完整性错误: {e}, "
|
||||
f"记录数: {len(items)}, "
|
||||
f"示例域名: {items[0].name if items else 'N/A'}"
|
||||
)
|
||||
raise
|
||||
|
||||
except OperationalError as e:
|
||||
logger.error(
|
||||
f"批量插入子域名失败 - 数据库操作错误: {e}, "
|
||||
f"记录数: {len(items)}"
|
||||
)
|
||||
raise
|
||||
|
||||
except DatabaseError as e:
|
||||
logger.error(
|
||||
f"批量插入子域名失败 - 数据库错误: {e}, "
|
||||
f"记录数: {len(items)}"
|
||||
)
|
||||
raise
|
||||
logger.debug(f"成功处理 {len(unique_items)} 条子域名记录")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"批量插入子域名失败 - 未知错误: {e}, "
|
||||
f"记录数: {len(items)}, "
|
||||
f"错误类型: {type(e).__name__}",
|
||||
exc_info=True
|
||||
)
|
||||
logger.error(f"批量插入子域名失败: {e}")
|
||||
raise
|
||||
|
||||
def get_or_create(self, name: str, target_id: int) -> Tuple[Subdomain, bool]:
|
||||
"""
|
||||
获取或创建子域名
|
||||
|
||||
Args:
|
||||
name: 子域名名称
|
||||
target_id: 目标 ID
|
||||
|
||||
Returns:
|
||||
(Subdomain对象, 是否新创建)
|
||||
"""
|
||||
return Subdomain.objects.get_or_create(
|
||||
name=name,
|
||||
target_id=target_id,
|
||||
)
|
||||
def get_all(self):
|
||||
"""获取所有子域名"""
|
||||
return Subdomain.objects.all().order_by('-created_at')
|
||||
|
||||
def get_by_target(self, target_id: int):
|
||||
"""获取目标下的所有子域名"""
|
||||
return Subdomain.objects.filter(target_id=target_id).order_by('-created_at')
|
||||
|
||||
def count_by_target(self, target_id: int) -> int:
|
||||
"""统计目标下的域名数量"""
|
||||
return Subdomain.objects.filter(target_id=target_id).count()
|
||||
|
||||
def get_domains_for_export(self, target_id: int, batch_size: int = 1000) -> Iterator[str]:
|
||||
"""
|
||||
流式导出域名(用于生成扫描工具输入文件)
|
||||
|
||||
使用 iterator() 进行流式查询,避免一次性加载所有数据到内存
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
batch_size: 每次从数据库读取的行数
|
||||
|
||||
Yields:
|
||||
str: 域名
|
||||
"""
|
||||
"""流式导出域名"""
|
||||
queryset = Subdomain.objects.filter(
|
||||
target_id=target_id
|
||||
).only('name').iterator(chunk_size=batch_size)
|
||||
@@ -119,138 +74,36 @@ class DjangoSubdomainRepository:
|
||||
for subdomain in queryset:
|
||||
yield subdomain.name
|
||||
|
||||
def get_by_target(self, target_id: int):
|
||||
return Subdomain.objects.filter(target_id=target_id).order_by('-discovered_at')
|
||||
|
||||
def count_by_target(self, target_id: int) -> int:
|
||||
"""
|
||||
统计目标下的域名数量
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
|
||||
Returns:
|
||||
int: 域名数量
|
||||
"""
|
||||
return Subdomain.objects.filter(target_id=target_id).count()
|
||||
|
||||
def get_by_names_and_target_id(self, names: set, target_id: int) -> dict:
|
||||
"""
|
||||
根据域名列表和目标ID批量查询 Subdomain
|
||||
|
||||
Args:
|
||||
names: 域名集合
|
||||
target_id: 目标 ID
|
||||
|
||||
Returns:
|
||||
dict: {domain_name: Subdomain对象}
|
||||
"""
|
||||
"""根据域名列表和目标ID批量查询 Subdomain"""
|
||||
subdomains = Subdomain.objects.filter(
|
||||
name__in=names,
|
||||
target_id=target_id
|
||||
).only('id', 'name')
|
||||
|
||||
return {sd.name: sd for sd in subdomains}
|
||||
|
||||
def get_all(self):
|
||||
|
||||
def iter_raw_data_for_export(
|
||||
self,
|
||||
target_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
获取所有子域名
|
||||
|
||||
Returns:
|
||||
QuerySet: 子域名查询集
|
||||
"""
|
||||
return Subdomain.objects.all()
|
||||
|
||||
def soft_delete_by_ids(self, subdomain_ids: List[int]) -> int:
|
||||
"""
|
||||
根据 ID 列表批量软删除子域名
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
subdomain_ids: 子域名 ID 列表
|
||||
target_id: 目标 ID
|
||||
batch_size: 每批数据量
|
||||
|
||||
Returns:
|
||||
软删除的记录数
|
||||
|
||||
Note:
|
||||
- 使用软删除:只标记为已删除,不真正删除数据库记录
|
||||
- 保留所有关联数据,可恢复
|
||||
Yields:
|
||||
{'name': 'sub.example.com', 'created_at': datetime}
|
||||
"""
|
||||
try:
|
||||
updated_count = (
|
||||
Subdomain.objects
|
||||
.filter(id__in=subdomain_ids)
|
||||
.update(deleted_at=timezone.now())
|
||||
)
|
||||
logger.debug(
|
||||
"批量软删除子域名成功 - Count: %s, 更新记录: %s",
|
||||
len(subdomain_ids),
|
||||
updated_count
|
||||
)
|
||||
return updated_count
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"批量软删除子域名失败 - IDs: %s, 错误: %s",
|
||||
subdomain_ids,
|
||||
e
|
||||
)
|
||||
raise
|
||||
|
||||
def hard_delete_by_ids(self, subdomain_ids: List[int]) -> Tuple[int, Dict[str, int]]:
|
||||
"""
|
||||
根据 ID 列表硬删除子域名(使用数据库级 CASCADE)
|
||||
qs = (
|
||||
Subdomain.objects
|
||||
.filter(target_id=target_id)
|
||||
.values('name', 'created_at')
|
||||
.order_by('name')
|
||||
)
|
||||
|
||||
Args:
|
||||
subdomain_ids: 子域名 ID 列表
|
||||
|
||||
Returns:
|
||||
(删除的记录数, 删除详情字典)
|
||||
|
||||
Strategy:
|
||||
使用数据库级 CASCADE 删除,性能最优
|
||||
|
||||
Note:
|
||||
- 硬删除:从数据库中永久删除
|
||||
- 数据库自动处理所有外键级联删除
|
||||
- 不触发 Django 信号(pre_delete/post_delete)
|
||||
"""
|
||||
try:
|
||||
batch_size = 1000 # 每批处理1000个子域名
|
||||
total_deleted = 0
|
||||
|
||||
logger.debug(f"开始批量删除 {len(subdomain_ids)} 个子域名(数据库 CASCADE)...")
|
||||
|
||||
# 分批处理子域名ID,避免单次删除过多
|
||||
for i in range(0, len(subdomain_ids), batch_size):
|
||||
batch_ids = subdomain_ids[i:i + batch_size]
|
||||
|
||||
# 直接删除子域名,数据库自动级联删除所有关联数据
|
||||
count, _ = Subdomain.all_objects.filter(id__in=batch_ids).delete()
|
||||
total_deleted += count
|
||||
|
||||
logger.debug(f"批次删除完成: {len(batch_ids)} 个子域名,删除 {count} 条记录")
|
||||
|
||||
# 由于使用数据库 CASCADE,无法获取详细统计
|
||||
deleted_details = {
|
||||
'subdomains': len(subdomain_ids),
|
||||
'total': total_deleted,
|
||||
'note': 'Database CASCADE - detailed stats unavailable'
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
"批量硬删除成功(CASCADE)- 子域名数: %s, 总删除记录: %s",
|
||||
len(subdomain_ids),
|
||||
total_deleted
|
||||
)
|
||||
|
||||
return total_deleted, deleted_details
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"批量硬删除失败(CASCADE)- 子域名数: %s, 错误: %s",
|
||||
len(subdomain_ids),
|
||||
str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
@@ -3,110 +3,88 @@ Django ORM 实现的 WebSite Repository
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Generator, Tuple, Dict, Optional
|
||||
from django.db import transaction, IntegrityError, OperationalError, DatabaseError
|
||||
from django.utils import timezone
|
||||
from typing import List, Generator, Optional, Iterator
|
||||
from django.db import transaction
|
||||
|
||||
from apps.asset.models.asset_models import WebSite
|
||||
from apps.asset.dtos import WebSiteDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@auto_ensure_db_connection
|
||||
class DjangoWebSiteRepository:
|
||||
"""Django ORM 实现的 WebSite Repository"""
|
||||
|
||||
def bulk_create_ignore_conflicts(self, items: List[WebSiteDTO]) -> None:
|
||||
def bulk_upsert(self, items: List[WebSiteDTO]) -> int:
|
||||
"""
|
||||
批量创建 WebSite,忽略冲突
|
||||
批量创建或更新 WebSite(upsert)
|
||||
|
||||
存在则更新所有字段,不存在则创建。
|
||||
使用 Django 原生 update_conflicts。
|
||||
|
||||
注意:自动按模型唯一约束去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
items: WebSite DTO 列表
|
||||
|
||||
Raises:
|
||||
IntegrityError: 数据完整性错误
|
||||
OperationalError: 数据库操作错误
|
||||
DatabaseError: 数据库错误
|
||||
Returns:
|
||||
int: 处理的记录数
|
||||
"""
|
||||
if not items:
|
||||
return
|
||||
|
||||
return 0
|
||||
|
||||
try:
|
||||
# 转换为 Django 模型对象
|
||||
website_objects = [
|
||||
# 自动按模型唯一约束去重
|
||||
unique_items = deduplicate_for_bulk(items, WebSite)
|
||||
|
||||
# 直接从 DTO 字段构建 Model
|
||||
websites = [
|
||||
WebSite(
|
||||
target_id=item.target_id,
|
||||
url=item.url,
|
||||
host=item.host,
|
||||
location=item.location,
|
||||
title=item.title,
|
||||
webserver=item.webserver,
|
||||
body_preview=item.body_preview,
|
||||
content_type=item.content_type,
|
||||
tech=item.tech,
|
||||
host=item.host or '',
|
||||
location=item.location or '',
|
||||
title=item.title or '',
|
||||
webserver=item.webserver or '',
|
||||
response_body=item.response_body or '',
|
||||
content_type=item.content_type or '',
|
||||
tech=item.tech if item.tech else [],
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
vhost=item.vhost
|
||||
vhost=item.vhost,
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
)
|
||||
for item in items
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
|
||||
with transaction.atomic():
|
||||
# 批量插入或更新
|
||||
# 如果URL和目标已存在,忽略冲突
|
||||
WebSite.objects.bulk_create(
|
||||
website_objects,
|
||||
ignore_conflicts=True
|
||||
websites,
|
||||
update_conflicts=True,
|
||||
unique_fields=['url', 'target'],
|
||||
update_fields=[
|
||||
'host', 'location', 'title', 'webserver',
|
||||
'response_body', 'content_type', 'tech',
|
||||
'status_code', 'content_length', 'vhost', 'response_headers'
|
||||
],
|
||||
batch_size=1000
|
||||
)
|
||||
|
||||
logger.debug(f"成功处理 {len(items)} 条 WebSite 记录")
|
||||
|
||||
except IntegrityError as e:
|
||||
logger.error(
|
||||
f"批量插入 WebSite 失败 - 数据完整性错误: {e}, "
|
||||
f"记录数: {len(items)}"
|
||||
)
|
||||
raise
|
||||
|
||||
except OperationalError as e:
|
||||
logger.error(
|
||||
f"批量插入 WebSite 失败 - 数据库操作错误: {e}, "
|
||||
f"记录数: {len(items)}"
|
||||
)
|
||||
raise
|
||||
|
||||
except DatabaseError as e:
|
||||
logger.error(
|
||||
f"批量插入 WebSite 失败 - 数据库错误: {e}, "
|
||||
f"记录数: {len(items)}"
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
logger.debug(f"批量 upsert WebSite 成功: {len(unique_items)} 条")
|
||||
return len(unique_items)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"批量插入 WebSite 失败 - 未知错误: {e}, "
|
||||
f"记录数: {len(items)}, "
|
||||
f"错误类型: {type(e).__name__}",
|
||||
exc_info=True
|
||||
)
|
||||
logger.error(f"批量 upsert WebSite 失败: {e}")
|
||||
raise
|
||||
|
||||
def get_urls_for_export(self, target_id: int, batch_size: int = 1000) -> Generator[str, None, None]:
|
||||
"""
|
||||
流式导出目标下的所有站点 URL
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
batch_size: 批次大小
|
||||
|
||||
Yields:
|
||||
str: 站点 URL
|
||||
"""
|
||||
try:
|
||||
# 查询目标下的站点,只选择 URL 字段,避免不必要的数据传输
|
||||
queryset = WebSite.objects.filter(
|
||||
target_id=target_id
|
||||
).values_list('url', flat=True).iterator(chunk_size=batch_size)
|
||||
@@ -117,144 +95,94 @@ class DjangoWebSiteRepository:
|
||||
logger.error(f"流式导出站点 URL 失败 - Target ID: {target_id}, 错误: {e}")
|
||||
raise
|
||||
|
||||
def get_all(self):
|
||||
"""获取所有网站"""
|
||||
return WebSite.objects.all().order_by('-created_at')
|
||||
|
||||
def get_by_target(self, target_id: int):
|
||||
return WebSite.objects.filter(target_id=target_id).order_by('-discovered_at')
|
||||
"""获取目标下的所有网站"""
|
||||
return WebSite.objects.filter(target_id=target_id).order_by('-created_at')
|
||||
|
||||
def count_by_target(self, target_id: int) -> int:
|
||||
"""
|
||||
统计目标下的站点总数
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
|
||||
Returns:
|
||||
int: 站点总数
|
||||
"""
|
||||
try:
|
||||
count = WebSite.objects.filter(target_id=target_id).count()
|
||||
logger.debug(f"Target {target_id} 的站点总数: {count}")
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"统计站点数量失败 - Target ID: {target_id}, 错误: {e}")
|
||||
raise
|
||||
|
||||
def count_by_scan(self, scan_id: int) -> int:
|
||||
"""
|
||||
统计扫描下的站点总数
|
||||
"""
|
||||
try:
|
||||
count = WebSite.objects.filter(scan_id=scan_id).count()
|
||||
logger.debug(f"Scan {scan_id} 的站点总数: {count}")
|
||||
return count
|
||||
except Exception as e:
|
||||
logger.error(f"统计站点数量失败 - Scan ID: {scan_id}, 错误: {e}")
|
||||
raise
|
||||
"""统计目标下的站点总数"""
|
||||
return WebSite.objects.filter(target_id=target_id).count()
|
||||
|
||||
def get_by_url(self, url: str, target_id: int) -> Optional[int]:
|
||||
"""根据 URL 和 target_id 查找站点 ID"""
|
||||
website = WebSite.objects.filter(url=url, target_id=target_id).first()
|
||||
return website.id if website else None
|
||||
|
||||
def bulk_create_ignore_conflicts(self, items: List[WebSiteDTO]) -> int:
|
||||
"""
|
||||
根据 URL 和 target_id 查找站点 ID
|
||||
批量创建 WebSite(存在即跳过)
|
||||
|
||||
注意:自动按模型唯一约束去重,保留最后一条记录。
|
||||
"""
|
||||
if not items:
|
||||
return 0
|
||||
|
||||
try:
|
||||
# 自动按模型唯一约束去重
|
||||
unique_items = deduplicate_for_bulk(items, WebSite)
|
||||
|
||||
websites = [
|
||||
WebSite(
|
||||
target_id=item.target_id,
|
||||
url=item.url,
|
||||
host=item.host or '',
|
||||
location=item.location or '',
|
||||
title=item.title or '',
|
||||
webserver=item.webserver or '',
|
||||
response_body=item.response_body or '',
|
||||
content_type=item.content_type or '',
|
||||
tech=item.tech if item.tech else [],
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
vhost=item.vhost,
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
)
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
with transaction.atomic():
|
||||
WebSite.objects.bulk_create(
|
||||
websites,
|
||||
ignore_conflicts=True,
|
||||
batch_size=1000
|
||||
)
|
||||
|
||||
logger.debug(f"批量创建 WebSite 成功(ignore_conflicts): {len(unique_items)} 条")
|
||||
return len(unique_items)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量创建 WebSite 失败: {e}")
|
||||
raise
|
||||
|
||||
def iter_raw_data_for_export(
|
||||
self,
|
||||
target_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
url: 站点 URL
|
||||
target_id: 目标 ID
|
||||
|
||||
Returns:
|
||||
Optional[int]: 站点 ID,如果不存在返回 None
|
||||
|
||||
Raises:
|
||||
ValueError: 发现多个站点时
|
||||
"""
|
||||
try:
|
||||
website = WebSite.objects.filter(url=url, target_id=target_id).first()
|
||||
if website:
|
||||
return website.id
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查询站点失败 - URL: {url}, Target ID: {target_id}, 错误: {e}")
|
||||
raise
|
||||
|
||||
def get_all(self):
|
||||
"""
|
||||
获取所有网站
|
||||
batch_size: 每批数据量
|
||||
|
||||
Returns:
|
||||
QuerySet: 网站查询集
|
||||
Yields:
|
||||
包含所有网站字段的字典
|
||||
"""
|
||||
return WebSite.objects.all()
|
||||
|
||||
def soft_delete_by_ids(self, website_ids: List[int]) -> int:
|
||||
"""
|
||||
根据 ID 列表批量软删除WebSite
|
||||
|
||||
Args:
|
||||
website_ids: WebSite ID 列表
|
||||
|
||||
Returns:
|
||||
软删除的记录数
|
||||
"""
|
||||
try:
|
||||
updated_count = (
|
||||
WebSite.objects
|
||||
.filter(id__in=website_ids)
|
||||
.update(deleted_at=timezone.now())
|
||||
qs = (
|
||||
WebSite.objects
|
||||
.filter(target_id=target_id)
|
||||
.values(
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'response_body', 'response_headers', 'vhost', 'created_at'
|
||||
)
|
||||
logger.debug(
|
||||
"批量软删除WebSite成功 - Count: %s, 更新记录: %s",
|
||||
len(website_ids),
|
||||
updated_count
|
||||
)
|
||||
return updated_count
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"批量软删除WebSite失败 - IDs: %s, 错误: %s",
|
||||
website_ids,
|
||||
e
|
||||
)
|
||||
raise
|
||||
|
||||
def hard_delete_by_ids(self, website_ids: List[int]) -> Tuple[int, Dict[str, int]]:
|
||||
"""
|
||||
根据 ID 列表硬删除WebSite(使用数据库级 CASCADE)
|
||||
.order_by('url')
|
||||
)
|
||||
|
||||
Args:
|
||||
website_ids: WebSite ID 列表
|
||||
|
||||
Returns:
|
||||
(删除的记录数, 删除详情字典)
|
||||
"""
|
||||
try:
|
||||
batch_size = 1000
|
||||
total_deleted = 0
|
||||
|
||||
logger.debug(f"开始批量删除 {len(website_ids)} 个WebSite(数据库 CASCADE)...")
|
||||
|
||||
for i in range(0, len(website_ids), batch_size):
|
||||
batch_ids = website_ids[i:i + batch_size]
|
||||
count, _ = WebSite.all_objects.filter(id__in=batch_ids).delete()
|
||||
total_deleted += count
|
||||
logger.debug(f"批次删除完成: {len(batch_ids)} 个WebSite,删除 {count} 条记录")
|
||||
|
||||
deleted_details = {
|
||||
'websites': len(website_ids),
|
||||
'total': total_deleted,
|
||||
'note': 'Database CASCADE - detailed stats unavailable'
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
"批量硬删除成功(CASCADE)- WebSite数: %s, 总删除记录: %s",
|
||||
len(website_ids),
|
||||
total_deleted
|
||||
)
|
||||
|
||||
return total_deleted, deleted_details
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"批量硬删除失败(CASCADE)- WebSite数: %s, 错误: %s",
|
||||
len(website_ids),
|
||||
str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
"""Directory Snapshot Repository - 目录快照数据访问层"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from typing import List, Iterator
|
||||
from django.db import transaction
|
||||
|
||||
from apps.asset.models import DirectorySnapshot
|
||||
from apps.asset.dtos.snapshot import DirectorySnapshotDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -25,6 +26,8 @@ class DjangoDirectorySnapshotRepository:
|
||||
|
||||
使用 ignore_conflicts 策略,如果快照已存在(相同 scan + url)则跳过
|
||||
|
||||
注意:会自动按 (scan_id, url) 去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
items: 目录快照 DTO 列表
|
||||
|
||||
@@ -37,6 +40,9 @@ class DjangoDirectorySnapshotRepository:
|
||||
return
|
||||
|
||||
try:
|
||||
# 根据模型唯一约束自动去重
|
||||
unique_items = deduplicate_for_bulk(items, DirectorySnapshot)
|
||||
|
||||
# 转换为 Django 模型对象
|
||||
snapshot_objects = [
|
||||
DirectorySnapshot(
|
||||
@@ -49,7 +55,7 @@ class DjangoDirectorySnapshotRepository:
|
||||
content_type=item.content_type,
|
||||
duration=item.duration
|
||||
)
|
||||
for item in items
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
with transaction.atomic():
|
||||
@@ -60,7 +66,7 @@ class DjangoDirectorySnapshotRepository:
|
||||
ignore_conflicts=True
|
||||
)
|
||||
|
||||
logger.debug("成功保存 %d 条目录快照记录", len(items))
|
||||
logger.debug("成功保存 %d 条目录快照记录", len(unique_items))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
@@ -72,7 +78,35 @@ class DjangoDirectorySnapshotRepository:
|
||||
raise
|
||||
|
||||
def get_by_scan(self, scan_id: int):
|
||||
return DirectorySnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
|
||||
return DirectorySnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')
|
||||
|
||||
def get_all(self):
|
||||
return DirectorySnapshot.objects.all().order_by('-discovered_at')
|
||||
return DirectorySnapshot.objects.all().order_by('-created_at')
|
||||
|
||||
def iter_raw_data_for_export(
|
||||
self,
|
||||
scan_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
batch_size: 每批数据量
|
||||
|
||||
Yields:
|
||||
包含所有目录字段的字典
|
||||
"""
|
||||
qs = (
|
||||
DirectorySnapshot.objects
|
||||
.filter(scan_id=scan_id)
|
||||
.values(
|
||||
'url', 'status', 'content_length', 'words',
|
||||
'lines', 'content_type', 'duration', 'created_at'
|
||||
)
|
||||
.order_by('url')
|
||||
)
|
||||
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
"""EndpointSnapshot Repository - Django ORM 实现"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from typing import List, Iterator
|
||||
|
||||
from apps.asset.models.snapshot_models import EndpointSnapshot
|
||||
from apps.asset.dtos.snapshot import EndpointSnapshotDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -18,6 +19,8 @@ class DjangoEndpointSnapshotRepository:
|
||||
"""
|
||||
保存端点快照
|
||||
|
||||
注意:会自动按 (scan_id, url) 去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
items: 端点快照 DTO 列表
|
||||
|
||||
@@ -31,13 +34,17 @@ class DjangoEndpointSnapshotRepository:
|
||||
if not items:
|
||||
logger.debug("端点快照为空,跳过保存")
|
||||
return
|
||||
|
||||
# 根据模型唯一约束自动去重
|
||||
unique_items = deduplicate_for_bulk(items, EndpointSnapshot)
|
||||
|
||||
# 构建快照对象
|
||||
snapshots = []
|
||||
for item in items:
|
||||
for item in unique_items:
|
||||
snapshots.append(EndpointSnapshot(
|
||||
scan_id=item.scan_id,
|
||||
url=item.url,
|
||||
host=item.host if item.host else '',
|
||||
title=item.title,
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
@@ -45,9 +52,10 @@ class DjangoEndpointSnapshotRepository:
|
||||
webserver=item.webserver,
|
||||
content_type=item.content_type,
|
||||
tech=item.tech if item.tech else [],
|
||||
body_preview=item.body_preview,
|
||||
response_body=item.response_body,
|
||||
vhost=item.vhost,
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
|
||||
matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else [],
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
))
|
||||
|
||||
# 批量创建(忽略冲突,基于唯一约束去重)
|
||||
@@ -68,7 +76,36 @@ class DjangoEndpointSnapshotRepository:
|
||||
raise
|
||||
|
||||
def get_by_scan(self, scan_id: int):
|
||||
return EndpointSnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
|
||||
return EndpointSnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')
|
||||
|
||||
def get_all(self):
|
||||
return EndpointSnapshot.objects.all().order_by('-discovered_at')
|
||||
return EndpointSnapshot.objects.all().order_by('-created_at')
|
||||
|
||||
def iter_raw_data_for_export(
|
||||
self,
|
||||
scan_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
batch_size: 每批数据量
|
||||
|
||||
Yields:
|
||||
包含所有端点字段的字典
|
||||
"""
|
||||
qs = (
|
||||
EndpointSnapshot.objects
|
||||
.filter(scan_id=scan_id)
|
||||
.values(
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'response_body', 'response_headers', 'vhost', 'matched_gf_patterns', 'created_at'
|
||||
)
|
||||
.order_by('url')
|
||||
)
|
||||
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
@@ -6,6 +6,7 @@ from typing import List, Iterator
|
||||
from apps.asset.models.snapshot_models import HostPortMappingSnapshot
|
||||
from apps.asset.dtos.snapshot import HostPortMappingSnapshotDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -18,6 +19,8 @@ class DjangoHostPortMappingSnapshotRepository:
|
||||
"""
|
||||
保存主机端口关联快照
|
||||
|
||||
注意:会自动按 (scan_id, host, ip, port) 去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
items: 主机端口关联快照 DTO 列表
|
||||
|
||||
@@ -31,10 +34,13 @@ class DjangoHostPortMappingSnapshotRepository:
|
||||
if not items:
|
||||
logger.debug("主机端口关联快照为空,跳过保存")
|
||||
return
|
||||
|
||||
# 根据模型唯一约束自动去重
|
||||
unique_items = deduplicate_for_bulk(items, HostPortMappingSnapshot)
|
||||
|
||||
# 构建快照对象
|
||||
snapshots = []
|
||||
for item in items:
|
||||
for item in unique_items:
|
||||
snapshots.append(HostPortMappingSnapshot(
|
||||
scan_id=item.scan_id,
|
||||
host=item.host,
|
||||
@@ -59,20 +65,28 @@ class DjangoHostPortMappingSnapshotRepository:
|
||||
)
|
||||
raise
|
||||
|
||||
def get_ip_aggregation_by_scan(self, scan_id: int, search: str = None):
|
||||
def get_ip_aggregation_by_scan(self, scan_id: int, filter_query: str = None):
|
||||
from django.db.models import Min
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
qs = HostPortMappingSnapshot.objects.filter(scan_id=scan_id)
|
||||
if search:
|
||||
qs = qs.filter(ip__icontains=search)
|
||||
|
||||
# 应用智能过滤
|
||||
if filter_query:
|
||||
field_mapping = {
|
||||
'ip': 'ip',
|
||||
'port': 'port',
|
||||
'host': 'host',
|
||||
}
|
||||
qs = apply_filters(qs, filter_query, field_mapping)
|
||||
|
||||
ip_aggregated = (
|
||||
qs
|
||||
.values('ip')
|
||||
.annotate(
|
||||
discovered_at=Min('discovered_at')
|
||||
created_at=Min('created_at')
|
||||
)
|
||||
.order_by('-discovered_at')
|
||||
.order_by('-created_at')
|
||||
)
|
||||
|
||||
results = []
|
||||
@@ -92,24 +106,32 @@ class DjangoHostPortMappingSnapshotRepository:
|
||||
'ip': ip,
|
||||
'hosts': hosts,
|
||||
'ports': ports,
|
||||
'discovered_at': item['discovered_at'],
|
||||
'created_at': item['created_at'],
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def get_all_ip_aggregation(self, search: str = None):
|
||||
def get_all_ip_aggregation(self, filter_query: str = None):
|
||||
"""获取所有 IP 聚合数据"""
|
||||
from django.db.models import Min
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
qs = HostPortMappingSnapshot.objects.all()
|
||||
if search:
|
||||
qs = qs.filter(ip__icontains=search)
|
||||
|
||||
# 应用智能过滤
|
||||
if filter_query:
|
||||
field_mapping = {
|
||||
'ip': 'ip',
|
||||
'port': 'port',
|
||||
'host': 'host',
|
||||
}
|
||||
qs = apply_filters(qs, filter_query, field_mapping)
|
||||
|
||||
ip_aggregated = (
|
||||
qs
|
||||
.values('ip')
|
||||
.annotate(discovered_at=Min('discovered_at'))
|
||||
.order_by('-discovered_at')
|
||||
.annotate(created_at=Min('created_at'))
|
||||
.order_by('-created_at')
|
||||
)
|
||||
|
||||
results = []
|
||||
@@ -127,7 +149,7 @@ class DjangoHostPortMappingSnapshotRepository:
|
||||
'ip': ip,
|
||||
'hosts': hosts,
|
||||
'ports': ports,
|
||||
'discovered_at': item['discovered_at'],
|
||||
'created_at': item['created_at'],
|
||||
})
|
||||
return results
|
||||
|
||||
@@ -143,3 +165,33 @@ class DjangoHostPortMappingSnapshotRepository:
|
||||
)
|
||||
for ip in queryset:
|
||||
yield ip
|
||||
|
||||
def iter_raw_data_for_export(
|
||||
self,
|
||||
scan_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
batch_size: 每批数据量
|
||||
|
||||
Yields:
|
||||
{
|
||||
'ip': '192.168.1.1',
|
||||
'host': 'example.com',
|
||||
'port': 80,
|
||||
'created_at': datetime
|
||||
}
|
||||
"""
|
||||
qs = (
|
||||
HostPortMappingSnapshot.objects
|
||||
.filter(scan_id=scan_id)
|
||||
.values('ip', 'host', 'port', 'created_at')
|
||||
.order_by('ip', 'host', 'port')
|
||||
)
|
||||
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
"""Django ORM 实现的 SubdomainSnapshot Repository"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from typing import List, Iterator
|
||||
|
||||
from apps.asset.models.snapshot_models import SubdomainSnapshot
|
||||
from apps.asset.dtos import SubdomainSnapshotDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -18,6 +19,8 @@ class DjangoSubdomainSnapshotRepository:
|
||||
"""
|
||||
保存子域名快照
|
||||
|
||||
注意:会自动按 (scan_id, name) 去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
items: 子域名快照 DTO 列表
|
||||
|
||||
@@ -31,10 +34,13 @@ class DjangoSubdomainSnapshotRepository:
|
||||
if not items:
|
||||
logger.debug("子域名快照为空,跳过保存")
|
||||
return
|
||||
|
||||
# 根据模型唯一约束自动去重
|
||||
unique_items = deduplicate_for_bulk(items, SubdomainSnapshot)
|
||||
|
||||
# 构建快照对象
|
||||
snapshots = []
|
||||
for item in items:
|
||||
for item in unique_items:
|
||||
snapshots.append(SubdomainSnapshot(
|
||||
scan_id=item.scan_id,
|
||||
name=item.name,
|
||||
@@ -55,7 +61,32 @@ class DjangoSubdomainSnapshotRepository:
|
||||
raise
|
||||
|
||||
def get_by_scan(self, scan_id: int):
|
||||
return SubdomainSnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
|
||||
return SubdomainSnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')
|
||||
|
||||
def get_all(self):
|
||||
return SubdomainSnapshot.objects.all().order_by('-discovered_at')
|
||||
return SubdomainSnapshot.objects.all().order_by('-created_at')
|
||||
|
||||
def iter_raw_data_for_export(
|
||||
self,
|
||||
scan_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
batch_size: 每批数据量
|
||||
|
||||
Yields:
|
||||
{'name': 'sub.example.com', 'created_at': datetime}
|
||||
"""
|
||||
qs = (
|
||||
SubdomainSnapshot.objects
|
||||
.filter(scan_id=scan_id)
|
||||
.values('name', 'created_at')
|
||||
.order_by('name')
|
||||
)
|
||||
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
@@ -8,6 +8,7 @@ from django.db import transaction
|
||||
from apps.asset.models import VulnerabilitySnapshot
|
||||
from apps.asset.dtos.snapshot import VulnerabilitySnapshotDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -21,12 +22,17 @@ class DjangoVulnerabilitySnapshotRepository:
|
||||
|
||||
使用 ``ignore_conflicts`` 策略,如果快照已存在则跳过。
|
||||
具体唯一约束由数据库模型控制。
|
||||
|
||||
注意:会自动按唯一约束字段去重,保留最后一条记录。
|
||||
"""
|
||||
if not items:
|
||||
logger.warning("漏洞快照列表为空,跳过保存")
|
||||
return
|
||||
|
||||
try:
|
||||
# 根据模型唯一约束自动去重
|
||||
unique_items = deduplicate_for_bulk(items, VulnerabilitySnapshot)
|
||||
|
||||
snapshot_objects = [
|
||||
VulnerabilitySnapshot(
|
||||
scan_id=item.scan_id,
|
||||
@@ -38,7 +44,7 @@ class DjangoVulnerabilitySnapshotRepository:
|
||||
description=item.description,
|
||||
raw_output=item.raw_output,
|
||||
)
|
||||
for item in items
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
with transaction.atomic():
|
||||
@@ -47,7 +53,7 @@ class DjangoVulnerabilitySnapshotRepository:
|
||||
ignore_conflicts=True,
|
||||
)
|
||||
|
||||
logger.debug("成功保存 %d 条漏洞快照记录", len(items))
|
||||
logger.debug("成功保存 %d 条漏洞快照记录", len(unique_items))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
@@ -60,7 +66,7 @@ class DjangoVulnerabilitySnapshotRepository:
|
||||
|
||||
def get_by_scan(self, scan_id: int):
|
||||
"""按扫描任务获取漏洞快照 QuerySet。"""
|
||||
return VulnerabilitySnapshot.objects.filter(scan_id=scan_id).order_by("-discovered_at")
|
||||
return VulnerabilitySnapshot.objects.filter(scan_id=scan_id).order_by("-created_at")
|
||||
|
||||
def get_all(self):
|
||||
return VulnerabilitySnapshot.objects.all().order_by('-discovered_at')
|
||||
return VulnerabilitySnapshot.objects.all().order_by('-created_at')
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
"""WebsiteSnapshot Repository - Django ORM 实现"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from typing import List, Iterator
|
||||
|
||||
from apps.asset.models.snapshot_models import WebsiteSnapshot
|
||||
from apps.asset.dtos.snapshot import WebsiteSnapshotDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -18,6 +19,8 @@ class DjangoWebsiteSnapshotRepository:
|
||||
"""
|
||||
保存网站快照
|
||||
|
||||
注意:会自动按 (scan_id, url) 去重,保留最后一条记录。
|
||||
|
||||
Args:
|
||||
items: 网站快照 DTO 列表
|
||||
|
||||
@@ -31,23 +34,27 @@ class DjangoWebsiteSnapshotRepository:
|
||||
if not items:
|
||||
logger.debug("网站快照为空,跳过保存")
|
||||
return
|
||||
|
||||
# 根据模型唯一约束自动去重
|
||||
unique_items = deduplicate_for_bulk(items, WebsiteSnapshot)
|
||||
|
||||
# 构建快照对象
|
||||
snapshots = []
|
||||
for item in items:
|
||||
for item in unique_items:
|
||||
snapshots.append(WebsiteSnapshot(
|
||||
scan_id=item.scan_id,
|
||||
url=item.url,
|
||||
host=item.host,
|
||||
title=item.title,
|
||||
status=item.status,
|
||||
status_code=item.status_code,
|
||||
content_length=item.content_length,
|
||||
location=item.location,
|
||||
web_server=item.web_server,
|
||||
webserver=item.webserver,
|
||||
content_type=item.content_type,
|
||||
tech=item.tech if item.tech else [],
|
||||
body_preview=item.body_preview,
|
||||
vhost=item.vhost
|
||||
response_body=item.response_body,
|
||||
vhost=item.vhost,
|
||||
response_headers=item.response_headers if item.response_headers else ''
|
||||
))
|
||||
|
||||
# 批量创建(忽略冲突,基于唯一约束去重)
|
||||
@@ -68,7 +75,36 @@ class DjangoWebsiteSnapshotRepository:
|
||||
raise
|
||||
|
||||
def get_by_scan(self, scan_id: int):
|
||||
return WebsiteSnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
|
||||
return WebsiteSnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')
|
||||
|
||||
def get_all(self):
|
||||
return WebsiteSnapshot.objects.all().order_by('-discovered_at')
|
||||
return WebsiteSnapshot.objects.all().order_by('-created_at')
|
||||
|
||||
def iter_raw_data_for_export(
|
||||
self,
|
||||
scan_id: int,
|
||||
batch_size: int = 1000
|
||||
) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
batch_size: 每批数据量
|
||||
|
||||
Yields:
|
||||
包含所有网站字段的字典
|
||||
"""
|
||||
qs = (
|
||||
WebsiteSnapshot.objects
|
||||
.filter(scan_id=scan_id)
|
||||
.values(
|
||||
'url', 'host', 'location', 'title', 'status_code',
|
||||
'content_length', 'content_type', 'webserver', 'tech',
|
||||
'response_body', 'response_headers', 'vhost', 'created_at'
|
||||
)
|
||||
.order_by('url')
|
||||
)
|
||||
|
||||
for row in qs.iterator(chunk_size=batch_size):
|
||||
yield row
|
||||
|
||||
@@ -26,9 +26,9 @@ class SubdomainSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = Subdomain
|
||||
fields = [
|
||||
'id', 'name', 'discovered_at', 'target'
|
||||
'id', 'name', 'created_at', 'target'
|
||||
]
|
||||
read_only_fields = ['id', 'discovered_at']
|
||||
read_only_fields = ['id', 'created_at']
|
||||
|
||||
|
||||
class SubdomainListSerializer(serializers.ModelSerializer):
|
||||
@@ -41,9 +41,9 @@ class SubdomainListSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = Subdomain
|
||||
fields = [
|
||||
'id', 'name', 'discovered_at'
|
||||
'id', 'name', 'created_at'
|
||||
]
|
||||
read_only_fields = ['id', 'discovered_at']
|
||||
read_only_fields = ['id', 'created_at']
|
||||
|
||||
|
||||
# class IPAddressListSerializer(serializers.ModelSerializer):
|
||||
@@ -67,9 +67,10 @@ class SubdomainListSerializer(serializers.ModelSerializer):
|
||||
|
||||
|
||||
class WebSiteSerializer(serializers.ModelSerializer):
|
||||
"""站点序列化器"""
|
||||
"""站点序列化器(目标详情页)"""
|
||||
|
||||
subdomain = serializers.CharField(source='subdomain.name', allow_blank=True, default='')
|
||||
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = WebSite
|
||||
@@ -83,11 +84,12 @@ class WebSiteSerializer(serializers.ModelSerializer):
|
||||
'content_type',
|
||||
'status_code',
|
||||
'content_length',
|
||||
'body_preview',
|
||||
'response_body',
|
||||
'tech',
|
||||
'vhost',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'subdomain',
|
||||
'discovered_at',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
@@ -107,7 +109,7 @@ class VulnerabilitySerializer(serializers.ModelSerializer):
|
||||
'cvss_score',
|
||||
'description',
|
||||
'raw_output',
|
||||
'discovered_at',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
@@ -126,7 +128,7 @@ class VulnerabilitySnapshotSerializer(serializers.ModelSerializer):
|
||||
'cvss_score',
|
||||
'description',
|
||||
'raw_output',
|
||||
'discovered_at',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
@@ -134,12 +136,13 @@ class VulnerabilitySnapshotSerializer(serializers.ModelSerializer):
|
||||
class EndpointListSerializer(serializers.ModelSerializer):
|
||||
"""端点列表序列化器(用于目标端点列表页)"""
|
||||
|
||||
# 将 GF 匹配模式映射为前端使用的 tags 字段
|
||||
tags = serializers.ListField(
|
||||
# GF 匹配模式(gf-patterns 工具匹配的敏感 URL 模式)
|
||||
gfPatterns = serializers.ListField(
|
||||
child=serializers.CharField(),
|
||||
source='matched_gf_patterns',
|
||||
read_only=True,
|
||||
)
|
||||
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = Endpoint
|
||||
@@ -152,11 +155,12 @@ class EndpointListSerializer(serializers.ModelSerializer):
|
||||
'content_length',
|
||||
'content_type',
|
||||
'webserver',
|
||||
'body_preview',
|
||||
'response_body',
|
||||
'tech',
|
||||
'vhost',
|
||||
'tags',
|
||||
'discovered_at',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'gfPatterns',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
@@ -164,8 +168,7 @@ class EndpointListSerializer(serializers.ModelSerializer):
|
||||
class DirectorySerializer(serializers.ModelSerializer):
|
||||
"""目录序列化器"""
|
||||
|
||||
website_url = serializers.CharField(source='website.url', read_only=True)
|
||||
discovered_at = serializers.DateTimeField(read_only=True)
|
||||
created_at = serializers.DateTimeField(read_only=True)
|
||||
|
||||
class Meta:
|
||||
model = Directory
|
||||
@@ -178,8 +181,7 @@ class DirectorySerializer(serializers.ModelSerializer):
|
||||
'lines',
|
||||
'content_type',
|
||||
'duration',
|
||||
'website_url',
|
||||
'discovered_at',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
@@ -192,12 +194,12 @@ class IPAddressAggregatedSerializer(serializers.Serializer):
|
||||
- ip: IP 地址
|
||||
- hosts: 该 IP 关联的所有主机名列表
|
||||
- ports: 该 IP 关联的所有端口列表
|
||||
- discovered_at: 首次发现时间
|
||||
- created_at: 创建时间
|
||||
"""
|
||||
ip = serializers.IPAddressField(read_only=True)
|
||||
hosts = serializers.ListField(child=serializers.CharField(), read_only=True)
|
||||
ports = serializers.ListField(child=serializers.IntegerField(), read_only=True)
|
||||
discovered_at = serializers.DateTimeField(read_only=True)
|
||||
created_at = serializers.DateTimeField(read_only=True)
|
||||
|
||||
|
||||
# ==================== 快照序列化器 ====================
|
||||
@@ -207,7 +209,7 @@ class SubdomainSnapshotSerializer(serializers.ModelSerializer):
|
||||
|
||||
class Meta:
|
||||
model = SubdomainSnapshot
|
||||
fields = ['id', 'name', 'discovered_at']
|
||||
fields = ['id', 'name', 'created_at']
|
||||
read_only_fields = fields
|
||||
|
||||
|
||||
@@ -215,8 +217,7 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
|
||||
"""网站快照序列化器(用于扫描历史)"""
|
||||
|
||||
subdomain_name = serializers.CharField(source='subdomain.name', read_only=True)
|
||||
webserver = serializers.CharField(source='web_server', read_only=True) # 映射字段名
|
||||
status_code = serializers.IntegerField(source='status', read_only=True) # 映射字段名
|
||||
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = WebsiteSnapshot
|
||||
@@ -225,15 +226,16 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
|
||||
'url',
|
||||
'location',
|
||||
'title',
|
||||
'webserver', # 使用映射后的字段名
|
||||
'webserver',
|
||||
'content_type',
|
||||
'status_code', # 使用映射后的字段名
|
||||
'status_code',
|
||||
'content_length',
|
||||
'body_preview',
|
||||
'response_body',
|
||||
'tech',
|
||||
'vhost',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'subdomain_name',
|
||||
'discovered_at',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
@@ -241,9 +243,6 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
|
||||
class DirectorySnapshotSerializer(serializers.ModelSerializer):
|
||||
"""目录快照序列化器(用于扫描历史)"""
|
||||
|
||||
# DirectorySnapshot 当前不再关联 Website,这里暂时将 website_url 映射为自身的 url,保证字段兼容
|
||||
website_url = serializers.CharField(source='url', read_only=True)
|
||||
|
||||
class Meta:
|
||||
model = DirectorySnapshot
|
||||
fields = [
|
||||
@@ -255,8 +254,7 @@ class DirectorySnapshotSerializer(serializers.ModelSerializer):
|
||||
'lines',
|
||||
'content_type',
|
||||
'duration',
|
||||
'website_url',
|
||||
'discovered_at',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
@@ -264,12 +262,13 @@ class DirectorySnapshotSerializer(serializers.ModelSerializer):
|
||||
class EndpointSnapshotSerializer(serializers.ModelSerializer):
|
||||
"""端点快照序列化器(用于扫描历史)"""
|
||||
|
||||
# 将 GF 匹配模式映射为前端使用的 tags 字段
|
||||
tags = serializers.ListField(
|
||||
# GF 匹配模式(gf-patterns 工具匹配的敏感 URL 模式)
|
||||
gfPatterns = serializers.ListField(
|
||||
child=serializers.CharField(),
|
||||
source='matched_gf_patterns',
|
||||
read_only=True,
|
||||
)
|
||||
responseHeaders = serializers.CharField(source='response_headers', read_only=True) # 原始HTTP响应头
|
||||
|
||||
class Meta:
|
||||
model = EndpointSnapshot
|
||||
@@ -283,10 +282,11 @@ class EndpointSnapshotSerializer(serializers.ModelSerializer):
|
||||
'content_type',
|
||||
'status_code',
|
||||
'content_length',
|
||||
'body_preview',
|
||||
'response_body',
|
||||
'tech',
|
||||
'vhost',
|
||||
'tags',
|
||||
'discovered_at',
|
||||
'responseHeaders', # HTTP响应头
|
||||
'gfPatterns',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = fields
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
"""Directory Service - 目录业务逻辑层"""
|
||||
|
||||
import logging
|
||||
from typing import List, Iterator, Optional
|
||||
|
||||
from apps.asset.models.asset_models import Directory
|
||||
from apps.asset.repositories import DjangoDirectoryRepository
|
||||
from apps.asset.dtos import DirectoryDTO
|
||||
from apps.common.validators import is_valid_url, is_url_match_target
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -10,46 +14,122 @@ logger = logging.getLogger(__name__)
|
||||
class DirectoryService:
|
||||
"""目录业务逻辑层"""
|
||||
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'url': 'url',
|
||||
'status': 'status',
|
||||
}
|
||||
|
||||
def __init__(self, repository=None):
|
||||
"""
|
||||
初始化目录服务
|
||||
|
||||
Args:
|
||||
repository: 目录仓储实例(用于依赖注入)
|
||||
"""
|
||||
"""初始化目录服务"""
|
||||
self.repo = repository or DjangoDirectoryRepository()
|
||||
|
||||
# ==================== 创建操作 ====================
|
||||
|
||||
def bulk_create_ignore_conflicts(self, directory_dtos: list) -> None:
|
||||
def bulk_upsert(self, directory_dtos: List[DirectoryDTO]) -> int:
|
||||
"""
|
||||
批量创建目录记录,忽略冲突(用于扫描任务)
|
||||
批量创建或更新目录(upsert)
|
||||
|
||||
存在则更新所有字段,不存在则创建。
|
||||
|
||||
Args:
|
||||
directory_dtos: DirectoryDTO 列表
|
||||
"""
|
||||
return self.repo.bulk_create_ignore_conflicts(directory_dtos)
|
||||
|
||||
# ==================== 查询操作 ====================
|
||||
|
||||
def get_all(self):
|
||||
"""
|
||||
获取所有目录
|
||||
|
||||
|
||||
Returns:
|
||||
QuerySet: 目录查询集
|
||||
int: 处理的记录数
|
||||
"""
|
||||
logger.debug("获取所有目录")
|
||||
return self.repo.get_all()
|
||||
if not directory_dtos:
|
||||
return 0
|
||||
|
||||
try:
|
||||
return self.repo.bulk_upsert(directory_dtos)
|
||||
except Exception as e:
|
||||
logger.error(f"批量 upsert 目录失败: {e}")
|
||||
raise
|
||||
|
||||
def get_directories_by_target(self, target_id: int):
|
||||
logger.debug("获取目标下所有目录 - Target ID: %d", target_id)
|
||||
return self.repo.get_by_target(target_id)
|
||||
def bulk_create_urls(self, target_id: int, target_name: str, target_type: str, urls: List[str]) -> int:
|
||||
"""
|
||||
批量创建目录(仅 URL,使用 ignore_conflicts)
|
||||
|
||||
验证 URL 格式和匹配,过滤无效/不匹配 URL,去重后批量创建。
|
||||
已存在的记录会被跳过。
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
target_name: 目标名称(用于匹配验证)
|
||||
target_type: 目标类型 ('domain', 'ip', 'cidr')
|
||||
urls: URL 列表
|
||||
|
||||
Returns:
|
||||
int: 实际创建的记录数
|
||||
"""
|
||||
if not urls:
|
||||
return 0
|
||||
|
||||
# 过滤有效 URL 并去重
|
||||
valid_urls = []
|
||||
seen = set()
|
||||
|
||||
for url in urls:
|
||||
if not isinstance(url, str):
|
||||
continue
|
||||
url = url.strip()
|
||||
if not url or url in seen:
|
||||
continue
|
||||
if not is_valid_url(url):
|
||||
continue
|
||||
|
||||
# 匹配验证(前端已阻止不匹配的提交,后端作为双重保障)
|
||||
if not is_url_match_target(url, target_name, target_type):
|
||||
continue
|
||||
|
||||
seen.add(url)
|
||||
valid_urls.append(url)
|
||||
|
||||
if not valid_urls:
|
||||
return 0
|
||||
|
||||
# 获取创建前的数量
|
||||
count_before = self.repo.count_by_target(target_id)
|
||||
|
||||
# 创建 DTO 列表并批量创建
|
||||
directory_dtos = [
|
||||
DirectoryDTO(url=url, target_id=target_id)
|
||||
for url in valid_urls
|
||||
]
|
||||
self.repo.bulk_create_ignore_conflicts(directory_dtos)
|
||||
|
||||
# 获取创建后的数量
|
||||
count_after = self.repo.count_by_target(target_id)
|
||||
return count_after - count_before
|
||||
|
||||
def get_directories_by_target(self, target_id: int, filter_query: Optional[str] = None):
|
||||
"""获取目标下的所有目录"""
|
||||
queryset = self.repo.get_by_target(target_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def get_all(self, filter_query: Optional[str] = None):
|
||||
"""获取所有目录"""
|
||||
queryset = self.repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def iter_directory_urls_by_target(self, target_id: int, chunk_size: int = 1000) -> Iterator[str]:
|
||||
"""流式获取目标下的所有目录 URL,用于导出大批量数据。"""
|
||||
logger.debug("流式导出目标下目录 URL - Target ID: %d", target_id)
|
||||
"""流式获取目标下的所有目录 URL"""
|
||||
return self.repo.get_urls_for_export(target_id=target_id, batch_size=chunk_size)
|
||||
|
||||
def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
|
||||
Yields:
|
||||
原始数据字典
|
||||
"""
|
||||
return self.repo.iter_raw_data_for_export(target_id=target_id)
|
||||
|
||||
|
||||
__all__ = ['DirectoryService']
|
||||
|
||||
@@ -5,10 +5,12 @@ Endpoint 服务层
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional, Dict, Any, Iterator
|
||||
from typing import List, Iterator, Optional
|
||||
|
||||
from apps.asset.dtos.asset import EndpointDTO
|
||||
from apps.asset.repositories.asset import DjangoEndpointRepository
|
||||
from apps.common.validators import is_valid_url, is_url_match_target
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -20,101 +22,102 @@ class EndpointService:
|
||||
提供 Endpoint(URL/端点)相关的业务逻辑
|
||||
"""
|
||||
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'url': 'url',
|
||||
'host': 'host',
|
||||
'title': 'title',
|
||||
'status_code': 'status_code',
|
||||
'tech': 'tech',
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
"""初始化 Endpoint 服务"""
|
||||
self.repo = DjangoEndpointRepository()
|
||||
|
||||
def bulk_create_endpoints(
|
||||
self,
|
||||
endpoints: List[EndpointDTO],
|
||||
ignore_conflicts: bool = True
|
||||
) -> int:
|
||||
def bulk_upsert(self, endpoints: List[EndpointDTO]) -> int:
|
||||
"""
|
||||
批量创建端点记录
|
||||
批量创建或更新端点(upsert)
|
||||
|
||||
存在则更新所有字段,不存在则创建。
|
||||
|
||||
Args:
|
||||
endpoints: 端点数据列表
|
||||
ignore_conflicts: 是否忽略冲突(去重)
|
||||
|
||||
Returns:
|
||||
int: 创建的记录数
|
||||
int: 处理的记录数
|
||||
"""
|
||||
if not endpoints:
|
||||
return 0
|
||||
|
||||
try:
|
||||
if ignore_conflicts:
|
||||
return self.repo.bulk_create_ignore_conflicts(endpoints)
|
||||
else:
|
||||
# 如果需要非忽略冲突的版本,可以在 repository 中添加
|
||||
return self.repo.bulk_create_ignore_conflicts(endpoints)
|
||||
return self.repo.bulk_upsert(endpoints)
|
||||
except Exception as e:
|
||||
logger.error(f"批量创建端点失败: {e}")
|
||||
logger.error(f"批量 upsert 端点失败: {e}")
|
||||
raise
|
||||
|
||||
def get_endpoints_by_website(
|
||||
self,
|
||||
website_id: int,
|
||||
limit: Optional[int] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
def bulk_create_urls(self, target_id: int, target_name: str, target_type: str, urls: List[str]) -> int:
|
||||
"""
|
||||
获取网站下的端点列表
|
||||
批量创建端点(仅 URL,使用 ignore_conflicts)
|
||||
|
||||
Args:
|
||||
website_id: 网站 ID
|
||||
limit: 返回数量限制
|
||||
|
||||
Returns:
|
||||
List[Dict]: 端点列表
|
||||
"""
|
||||
endpoints_dto = self.repo.get_by_website(website_id)
|
||||
|
||||
if limit:
|
||||
endpoints_dto = endpoints_dto[:limit]
|
||||
|
||||
endpoints = []
|
||||
for dto in endpoints_dto:
|
||||
endpoints.append({
|
||||
'url': dto.url,
|
||||
'title': dto.title,
|
||||
'status_code': dto.status_code,
|
||||
'content_length': dto.content_length,
|
||||
'webserver': dto.webserver
|
||||
})
|
||||
|
||||
return endpoints
|
||||
|
||||
def get_endpoints_by_target(
|
||||
self,
|
||||
target_id: int,
|
||||
limit: Optional[int] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取目标下的端点列表
|
||||
验证 URL 格式和匹配,过滤无效/不匹配 URL,去重后批量创建。
|
||||
已存在的记录会被跳过。
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
limit: 返回数量限制
|
||||
target_name: 目标名称(用于匹配验证)
|
||||
target_type: 目标类型 ('domain', 'ip', 'cidr')
|
||||
urls: URL 列表
|
||||
|
||||
Returns:
|
||||
List[Dict]: 端点列表
|
||||
int: 实际创建的记录数
|
||||
"""
|
||||
endpoints_dto = self.repo.get_by_target(target_id)
|
||||
if not urls:
|
||||
return 0
|
||||
|
||||
if limit:
|
||||
endpoints_dto = endpoints_dto[:limit]
|
||||
# 过滤有效 URL 并去重
|
||||
valid_urls = []
|
||||
seen = set()
|
||||
|
||||
endpoints = []
|
||||
for dto in endpoints_dto:
|
||||
endpoints.append({
|
||||
'url': dto.url,
|
||||
'title': dto.title,
|
||||
'status_code': dto.status_code,
|
||||
'content_length': dto.content_length,
|
||||
'webserver': dto.webserver
|
||||
})
|
||||
for url in urls:
|
||||
if not isinstance(url, str):
|
||||
continue
|
||||
url = url.strip()
|
||||
if not url or url in seen:
|
||||
continue
|
||||
if not is_valid_url(url):
|
||||
continue
|
||||
|
||||
# 匹配验证(前端已阻止不匹配的提交,后端作为双重保障)
|
||||
if not is_url_match_target(url, target_name, target_type):
|
||||
continue
|
||||
|
||||
seen.add(url)
|
||||
valid_urls.append(url)
|
||||
|
||||
return endpoints
|
||||
if not valid_urls:
|
||||
return 0
|
||||
|
||||
# 获取创建前的数量
|
||||
count_before = self.repo.count_by_target(target_id)
|
||||
|
||||
# 创建 DTO 列表并批量创建
|
||||
endpoint_dtos = [
|
||||
EndpointDTO(url=url, target_id=target_id)
|
||||
for url in valid_urls
|
||||
]
|
||||
self.repo.bulk_create_ignore_conflicts(endpoint_dtos)
|
||||
|
||||
# 获取创建后的数量
|
||||
count_after = self.repo.count_by_target(target_id)
|
||||
return count_after - count_before
|
||||
|
||||
def get_endpoints_by_target(self, target_id: int, filter_query: Optional[str] = None):
|
||||
"""获取目标下的所有端点"""
|
||||
queryset = self.repo.get_by_target(target_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
|
||||
return queryset
|
||||
|
||||
def count_endpoints_by_target(self, target_id: int) -> int:
|
||||
"""
|
||||
@@ -127,52 +130,28 @@ class EndpointService:
|
||||
int: 端点数量
|
||||
"""
|
||||
return self.repo.count_by_target(target_id)
|
||||
|
||||
def get_queryset_by_target(self, target_id: int):
|
||||
return self.repo.get_queryset_by_target(target_id)
|
||||
|
||||
def get_all(self):
|
||||
def get_all(self, filter_query: Optional[str] = None):
|
||||
"""获取所有端点(全局查询)"""
|
||||
return self.repo.get_all()
|
||||
queryset = self.repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
|
||||
return queryset
|
||||
|
||||
def iter_endpoint_urls_by_target(self, target_id: int, chunk_size: int = 1000) -> Iterator[str]:
|
||||
"""流式获取目标下的所有端点 URL,用于导出。"""
|
||||
queryset = self.repo.get_queryset_by_target(target_id)
|
||||
queryset = self.repo.get_by_target(target_id)
|
||||
for url in queryset.values_list('url', flat=True).iterator(chunk_size=chunk_size):
|
||||
yield url
|
||||
|
||||
def count_endpoints_by_website(self, website_id: int) -> int:
|
||||
|
||||
def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
|
||||
"""
|
||||
统计网站下的端点数量
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
website_id: 网站 ID
|
||||
|
||||
Returns:
|
||||
int: 端点数量
|
||||
"""
|
||||
return self.repo.count_by_website(website_id)
|
||||
|
||||
def soft_delete_endpoints(self, endpoint_ids: List[int]) -> int:
|
||||
"""
|
||||
软删除端点
|
||||
target_id: 目标 ID
|
||||
|
||||
Args:
|
||||
endpoint_ids: 端点 ID 列表
|
||||
|
||||
Returns:
|
||||
int: 更新的数量
|
||||
Yields:
|
||||
原始数据字典
|
||||
"""
|
||||
return self.repo.soft_delete_by_ids(endpoint_ids)
|
||||
|
||||
def hard_delete_endpoints(self, endpoint_ids: List[int]) -> tuple:
|
||||
"""
|
||||
硬删除端点
|
||||
|
||||
Args:
|
||||
endpoint_ids: 端点 ID 列表
|
||||
|
||||
Returns:
|
||||
tuple: (删除总数, 详细信息)
|
||||
"""
|
||||
return self.repo.hard_delete_by_ids(endpoint_ids)
|
||||
return self.repo.iter_raw_data_for_export(target_id=target_id)
|
||||
|
||||
@@ -1,16 +1,31 @@
|
||||
"""HostPortMapping Service - 业务逻辑层"""
|
||||
|
||||
import logging
|
||||
from typing import List, Iterator
|
||||
from typing import List, Iterator, Optional, Dict
|
||||
|
||||
from django.db.models import Min
|
||||
|
||||
from apps.asset.repositories.asset import DjangoHostPortMappingRepository
|
||||
from apps.asset.dtos.asset import HostPortMappingDTO
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HostPortMappingService:
|
||||
"""主机端口映射服务 - 负责主机端口映射数据的业务逻辑"""
|
||||
"""主机端口映射服务 - 负责主机端口映射数据的业务逻辑
|
||||
|
||||
职责:
|
||||
- 业务逻辑处理(过滤、聚合)
|
||||
- 调用 Repository 进行数据访问
|
||||
"""
|
||||
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'ip': 'ip',
|
||||
'port': 'port',
|
||||
'host': 'host',
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.repo = DjangoHostPortMappingRepository()
|
||||
@@ -49,13 +64,106 @@ class HostPortMappingService:
|
||||
def iter_host_port_by_target(self, target_id: int, batch_size: int = 1000):
|
||||
return self.repo.get_for_export(target_id=target_id, batch_size=batch_size)
|
||||
|
||||
def get_ip_aggregation_by_target(self, target_id: int, search: str = None):
|
||||
return self.repo.get_ip_aggregation_by_target(target_id, search=search)
|
||||
def get_ip_aggregation_by_target(
|
||||
self,
|
||||
target_id: int,
|
||||
filter_query: Optional[str] = None
|
||||
) -> List[Dict]:
|
||||
"""获取目标下的 IP 聚合数据
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
filter_query: 智能过滤语法字符串
|
||||
|
||||
Returns:
|
||||
聚合后的 IP 数据列表
|
||||
"""
|
||||
# 从 Repository 获取基础 QuerySet
|
||||
qs = self.repo.get_queryset_by_target(target_id)
|
||||
|
||||
# Service 层应用过滤逻辑
|
||||
if filter_query:
|
||||
qs = apply_filters(qs, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
|
||||
# Service 层处理聚合逻辑
|
||||
return self._aggregate_by_ip(qs, filter_query, target_id=target_id)
|
||||
|
||||
def get_all_ip_aggregation(self, search: str = None):
|
||||
"""获取所有 IP 聚合数据(全局查询)"""
|
||||
return self.repo.get_all_ip_aggregation(search=search)
|
||||
def get_all_ip_aggregation(self, filter_query: Optional[str] = None) -> List[Dict]:
|
||||
"""获取所有 IP 聚合数据(全局查询)
|
||||
|
||||
Args:
|
||||
filter_query: 智能过滤语法字符串
|
||||
|
||||
Returns:
|
||||
聚合后的 IP 数据列表
|
||||
"""
|
||||
# 从 Repository 获取基础 QuerySet
|
||||
qs = self.repo.get_all_queryset()
|
||||
|
||||
# Service 层应用过滤逻辑
|
||||
if filter_query:
|
||||
qs = apply_filters(qs, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
|
||||
# Service 层处理聚合逻辑
|
||||
return self._aggregate_by_ip(qs, filter_query)
|
||||
|
||||
def _aggregate_by_ip(
|
||||
self,
|
||||
qs,
|
||||
filter_query: Optional[str] = None,
|
||||
target_id: Optional[int] = None
|
||||
) -> List[Dict]:
|
||||
"""按 IP 聚合数据
|
||||
|
||||
Args:
|
||||
qs: 已过滤的 QuerySet
|
||||
filter_query: 过滤条件(用于子查询)
|
||||
target_id: 目标 ID(用于子查询限定范围)
|
||||
|
||||
Returns:
|
||||
聚合后的数据列表
|
||||
"""
|
||||
ip_aggregated = (
|
||||
qs
|
||||
.values('ip')
|
||||
.annotate(created_at=Min('created_at'))
|
||||
.order_by('-created_at')
|
||||
)
|
||||
|
||||
results = []
|
||||
for item in ip_aggregated:
|
||||
ip = item['ip']
|
||||
|
||||
# 获取该 IP 的所有 host 和 port(也需要应用过滤条件)
|
||||
mappings_qs = self.repo.get_queryset_by_ip(ip, target_id=target_id)
|
||||
if filter_query:
|
||||
mappings_qs = apply_filters(mappings_qs, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
|
||||
mappings = mappings_qs.values('host', 'port').distinct()
|
||||
hosts = sorted({m['host'] for m in mappings})
|
||||
ports = sorted({m['port'] for m in mappings})
|
||||
|
||||
results.append({
|
||||
'ip': ip,
|
||||
'hosts': hosts,
|
||||
'ports': ports,
|
||||
'created_at': item['created_at'],
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def iter_ips_by_target(self, target_id: int, batch_size: int = 1000) -> Iterator[str]:
|
||||
"""流式获取目标下的所有唯一 IP 地址。"""
|
||||
return self.repo.get_ips_for_export(target_id=target_id, batch_size=batch_size)
|
||||
|
||||
def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
|
||||
Yields:
|
||||
原始数据字典 {ip, host, port, created_at}
|
||||
"""
|
||||
return self.repo.iter_raw_data_for_export(target_id=target_id)
|
||||
|
||||
@@ -1,15 +1,33 @@
|
||||
import logging
|
||||
from typing import Tuple, List, Dict
|
||||
from typing import List, Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from apps.asset.repositories import DjangoSubdomainRepository
|
||||
from apps.asset.dtos import SubdomainDTO
|
||||
from apps.common.validators import is_valid_domain
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BulkCreateResult:
|
||||
"""批量创建结果"""
|
||||
created_count: int
|
||||
skipped_count: int
|
||||
invalid_count: int
|
||||
mismatched_count: int
|
||||
total_received: int
|
||||
|
||||
|
||||
class SubdomainService:
|
||||
"""子域名业务逻辑层"""
|
||||
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'name': 'name',
|
||||
}
|
||||
|
||||
def __init__(self, repository=None):
|
||||
"""
|
||||
初始化子域名服务
|
||||
@@ -21,44 +39,50 @@ class SubdomainService:
|
||||
|
||||
# ==================== 查询操作 ====================
|
||||
|
||||
def get_all(self):
|
||||
def get_all(self, filter_query: Optional[str] = None):
|
||||
"""
|
||||
获取所有子域名
|
||||
|
||||
Args:
|
||||
filter_query: 智能过滤语法字符串
|
||||
|
||||
Returns:
|
||||
QuerySet: 子域名查询集
|
||||
"""
|
||||
logger.debug("获取所有子域名")
|
||||
return self.repo.get_all()
|
||||
queryset = self.repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
# ==================== 创建操作 ====================
|
||||
|
||||
def get_or_create(self, name: str, target_id: int) -> Tuple[any, bool]:
|
||||
def get_subdomains_by_target(self, target_id: int, filter_query: Optional[str] = None):
|
||||
"""
|
||||
获取或创建子域名
|
||||
获取目标下的子域名
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
filter_query: 智能过滤语法字符串
|
||||
|
||||
Returns:
|
||||
QuerySet: 子域名查询集
|
||||
"""
|
||||
queryset = self.repo.get_by_target(target_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def count_subdomains_by_target(self, target_id: int) -> int:
|
||||
"""
|
||||
统计目标下的子域名数量
|
||||
|
||||
Args:
|
||||
name: 子域名名称
|
||||
target_id: 目标 ID
|
||||
|
||||
Returns:
|
||||
(Subdomain对象, 是否新创建)
|
||||
int: 子域名数量
|
||||
"""
|
||||
logger.debug("获取或创建子域名 - Name: %s, Target ID: %d", name, target_id)
|
||||
return self.repo.get_or_create(name, target_id)
|
||||
|
||||
def bulk_create_ignore_conflicts(self, items: List[SubdomainDTO]) -> None:
|
||||
"""
|
||||
批量创建子域名,忽略冲突
|
||||
|
||||
Args:
|
||||
items: 子域名 DTO 列表
|
||||
|
||||
Note:
|
||||
使用 ignore_conflicts 策略,重复记录会被跳过
|
||||
"""
|
||||
logger.debug("批量创建子域名 - 数量: %d", len(items))
|
||||
return self.repo.bulk_create_ignore_conflicts(items)
|
||||
logger.debug("统计目标下子域名数量 - Target ID: %d", target_id)
|
||||
return self.repo.count_by_target(target_id)
|
||||
|
||||
def get_by_names_and_target_id(self, names: set, target_id: int) -> dict:
|
||||
"""
|
||||
@@ -85,25 +109,8 @@ class SubdomainService:
|
||||
List[str]: 子域名名称列表
|
||||
"""
|
||||
logger.debug("获取目标下所有子域名 - Target ID: %d", target_id)
|
||||
# 通过仓储层统一访问数据库,内部已使用 iterator() 做流式查询
|
||||
return list(self.repo.get_domains_for_export(target_id=target_id))
|
||||
|
||||
def get_subdomains_by_target(self, target_id: int):
|
||||
return self.repo.get_by_target(target_id)
|
||||
|
||||
def count_subdomains_by_target(self, target_id: int) -> int:
|
||||
"""
|
||||
统计目标下的子域名数量
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
|
||||
Returns:
|
||||
int: 子域名数量
|
||||
"""
|
||||
logger.debug("统计目标下子域名数量 - Target ID: %d", target_id)
|
||||
return self.repo.count_by_target(target_id)
|
||||
|
||||
def iter_subdomain_names_by_target(self, target_id: int, chunk_size: int = 1000):
|
||||
"""
|
||||
流式获取目标下的所有子域名名称(内存优化)
|
||||
@@ -116,8 +123,123 @@ class SubdomainService:
|
||||
str: 子域名名称
|
||||
"""
|
||||
logger.debug("流式获取目标下所有子域名 - Target ID: %d, 批次大小: %d", target_id, chunk_size)
|
||||
# 通过仓储层统一访问数据库,内部已使用 iterator() 做流式查询
|
||||
return self.repo.get_domains_for_export(target_id=target_id, batch_size=chunk_size)
|
||||
|
||||
def iter_raw_data_for_csv_export(self, target_id: int):
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
|
||||
Yields:
|
||||
原始数据字典 {name, created_at}
|
||||
"""
|
||||
return self.repo.iter_raw_data_for_export(target_id=target_id)
|
||||
|
||||
__all__ = ['SubdomainService']
|
||||
# ==================== 创建操作 ====================
|
||||
|
||||
def bulk_create_ignore_conflicts(self, items: List[SubdomainDTO]) -> None:
|
||||
"""
|
||||
批量创建子域名,忽略冲突
|
||||
|
||||
Args:
|
||||
items: 子域名 DTO 列表
|
||||
|
||||
Note:
|
||||
使用 ignore_conflicts 策略,重复记录会被跳过
|
||||
"""
|
||||
logger.debug("批量创建子域名 - 数量: %d", len(items))
|
||||
return self.repo.bulk_create_ignore_conflicts(items)
|
||||
|
||||
def bulk_create_subdomains(
|
||||
self,
|
||||
target_id: int,
|
||||
target_name: str,
|
||||
subdomains: List[str]
|
||||
) -> BulkCreateResult:
|
||||
"""
|
||||
批量创建子域名(带验证)
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
target_name: 目标域名(用于匹配验证)
|
||||
subdomains: 子域名列表
|
||||
|
||||
Returns:
|
||||
BulkCreateResult: 创建结果统计
|
||||
"""
|
||||
total_received = len(subdomains)
|
||||
target_name = target_name.lower().strip()
|
||||
|
||||
def is_subdomain_match(subdomain: str) -> bool:
|
||||
"""验证子域名是否匹配目标域名"""
|
||||
if subdomain == target_name:
|
||||
return True
|
||||
if subdomain.endswith('.' + target_name):
|
||||
return True
|
||||
return False
|
||||
|
||||
# 过滤有效的子域名
|
||||
valid_subdomains = []
|
||||
invalid_count = 0
|
||||
mismatched_count = 0
|
||||
|
||||
for subdomain in subdomains:
|
||||
if not isinstance(subdomain, str) or not subdomain.strip():
|
||||
continue
|
||||
|
||||
subdomain = subdomain.lower().strip()
|
||||
|
||||
# 验证格式
|
||||
if not is_valid_domain(subdomain):
|
||||
invalid_count += 1
|
||||
continue
|
||||
|
||||
# 验证匹配
|
||||
if not is_subdomain_match(subdomain):
|
||||
mismatched_count += 1
|
||||
continue
|
||||
|
||||
valid_subdomains.append(subdomain)
|
||||
|
||||
# 去重
|
||||
unique_subdomains = list(set(valid_subdomains))
|
||||
duplicate_count = len(valid_subdomains) - len(unique_subdomains)
|
||||
|
||||
if not unique_subdomains:
|
||||
return BulkCreateResult(
|
||||
created_count=0,
|
||||
skipped_count=duplicate_count,
|
||||
invalid_count=invalid_count,
|
||||
mismatched_count=mismatched_count,
|
||||
total_received=total_received,
|
||||
)
|
||||
|
||||
# 获取创建前的数量
|
||||
count_before = self.repo.count_by_target(target_id)
|
||||
|
||||
# 创建 DTO 列表并批量创建
|
||||
subdomain_dtos = [
|
||||
SubdomainDTO(name=name, target_id=target_id)
|
||||
for name in unique_subdomains
|
||||
]
|
||||
self.repo.bulk_create_ignore_conflicts(subdomain_dtos)
|
||||
|
||||
# 获取创建后的数量
|
||||
count_after = self.repo.count_by_target(target_id)
|
||||
created_count = count_after - count_before
|
||||
|
||||
# 计算因数据库冲突跳过的数量
|
||||
db_skipped = len(unique_subdomains) - created_count
|
||||
|
||||
return BulkCreateResult(
|
||||
created_count=created_count,
|
||||
skipped_count=duplicate_count + db_skipped,
|
||||
invalid_count=invalid_count,
|
||||
mismatched_count=mismatched_count,
|
||||
total_received=total_received,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ['SubdomainService', 'BulkCreateResult']
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
"""Vulnerability Service - 漏洞资产业务逻辑层"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
from apps.asset.models import Vulnerability
|
||||
from apps.asset.dtos.asset import VulnerabilityDTO
|
||||
from apps.common.decorators import auto_ensure_db_connection
|
||||
from apps.common.utils import deduplicate_for_bulk
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -16,10 +18,20 @@ class VulnerabilityService:
|
||||
|
||||
当前提供基础的批量创建能力,使用 ignore_conflicts 依赖数据库唯一约束去重。
|
||||
"""
|
||||
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'type': 'vuln_type',
|
||||
'severity': 'severity',
|
||||
'source': 'source',
|
||||
'url': 'url',
|
||||
}
|
||||
|
||||
def bulk_create_ignore_conflicts(self, items: List[VulnerabilityDTO]) -> None:
|
||||
"""批量创建漏洞资产记录,忽略冲突。
|
||||
|
||||
注意:会自动按 (target_id, url, vuln_type, source) 去重,保留最后一条记录。
|
||||
|
||||
Note:
|
||||
- 是否去重取决于模型上的唯一/部分唯一约束;
|
||||
- 当前 Vulnerability 模型未定义唯一约束,因此会保留全部记录。
|
||||
@@ -29,6 +41,9 @@ class VulnerabilityService:
|
||||
return
|
||||
|
||||
try:
|
||||
# 根据模型唯一约束自动去重(如果模型没有唯一约束则跳过)
|
||||
unique_items = deduplicate_for_bulk(items, Vulnerability)
|
||||
|
||||
vulns = [
|
||||
Vulnerability(
|
||||
target_id=item.target_id,
|
||||
@@ -40,7 +55,7 @@ class VulnerabilityService:
|
||||
description=item.description,
|
||||
raw_output=item.raw_output,
|
||||
)
|
||||
for item in items
|
||||
for item in unique_items
|
||||
]
|
||||
|
||||
Vulnerability.objects.bulk_create(vulns, ignore_conflicts=True)
|
||||
@@ -57,24 +72,34 @@ class VulnerabilityService:
|
||||
|
||||
# ==================== 查询方法 ====================
|
||||
|
||||
def get_all(self):
|
||||
def get_all(self, filter_query: Optional[str] = None):
|
||||
"""获取所有漏洞 QuerySet(用于全局漏洞列表)。
|
||||
|
||||
Returns:
|
||||
QuerySet[Vulnerability]: 所有漏洞,按发现时间倒序
|
||||
"""
|
||||
return Vulnerability.objects.filter(deleted_at__isnull=True).order_by("-discovered_at")
|
||||
Args:
|
||||
filter_query: 智能过滤语法字符串
|
||||
|
||||
def get_queryset_by_target(self, target_id: int):
|
||||
Returns:
|
||||
QuerySet[Vulnerability]: 所有漏洞,按创建时间倒序
|
||||
"""
|
||||
queryset = Vulnerability.objects.all().order_by("-created_at")
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def get_vulnerabilities_by_target(self, target_id: int, filter_query: Optional[str] = None):
|
||||
"""按目标获取漏洞 QuerySet(用于分页)。
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
filter_query: 智能过滤语法字符串
|
||||
|
||||
Returns:
|
||||
QuerySet[Vulnerability]: 目标下的所有漏洞,按发现时间倒序
|
||||
QuerySet[Vulnerability]: 目标下的所有漏洞,按创建时间倒序
|
||||
"""
|
||||
return Vulnerability.objects.filter(target_id=target_id).order_by("-discovered_at")
|
||||
queryset = Vulnerability.objects.filter(target_id=target_id).order_by("-created_at")
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def count_by_target(self, target_id: int) -> int:
|
||||
"""统计目标下的漏洞数量。"""
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
"""WebSite Service - 网站业务逻辑层"""
|
||||
|
||||
import logging
|
||||
from typing import Tuple, List
|
||||
from typing import List, Iterator, Optional
|
||||
|
||||
from apps.asset.repositories import DjangoWebSiteRepository
|
||||
from apps.asset.dtos import WebSiteDTO
|
||||
from apps.common.validators import is_valid_url, is_url_match_target
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -10,82 +14,129 @@ logger = logging.getLogger(__name__)
|
||||
class WebSiteService:
|
||||
"""网站业务逻辑层"""
|
||||
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'url': 'url',
|
||||
'host': 'host',
|
||||
'title': 'title',
|
||||
'status_code': 'status_code',
|
||||
'tech': 'tech',
|
||||
}
|
||||
|
||||
def __init__(self, repository=None):
|
||||
"""
|
||||
初始化网站服务
|
||||
|
||||
Args:
|
||||
repository: 网站仓储实例(用于依赖注入)
|
||||
"""
|
||||
"""初始化网站服务"""
|
||||
self.repo = repository or DjangoWebSiteRepository()
|
||||
|
||||
# ==================== 创建操作 ====================
|
||||
|
||||
def bulk_create_ignore_conflicts(self, website_dtos: List[WebSiteDTO]) -> None:
|
||||
def bulk_upsert(self, website_dtos: List[WebSiteDTO]) -> int:
|
||||
"""
|
||||
批量创建网站记录,忽略冲突(用于扫描任务)
|
||||
批量创建或更新网站(upsert)
|
||||
|
||||
存在则更新所有字段,不存在则创建。
|
||||
|
||||
Args:
|
||||
website_dtos: WebSiteDTO 列表
|
||||
|
||||
Note:
|
||||
使用 ignore_conflicts 策略,重复记录会被跳过
|
||||
"""
|
||||
logger.debug("批量创建网站 - 数量: %d", len(website_dtos))
|
||||
return self.repo.bulk_create_ignore_conflicts(website_dtos)
|
||||
|
||||
# ==================== 查询操作 ====================
|
||||
|
||||
def get_by_url(self, url: str, target_id: int) -> int:
|
||||
"""
|
||||
根据 URL 和 target_id 查找网站 ID
|
||||
|
||||
Args:
|
||||
url: 网站 URL
|
||||
target_id: 目标 ID
|
||||
|
||||
Returns:
|
||||
int: 网站 ID,如果不存在返回 None
|
||||
int: 处理的记录数
|
||||
"""
|
||||
return self.repo.get_by_url(url=url, target_id=target_id)
|
||||
|
||||
# ==================== 查询操作 ====================
|
||||
|
||||
def get_all(self):
|
||||
"""
|
||||
获取所有网站
|
||||
if not website_dtos:
|
||||
return 0
|
||||
|
||||
Returns:
|
||||
QuerySet: 网站查询集
|
||||
"""
|
||||
logger.debug("获取所有网站")
|
||||
return self.repo.get_all()
|
||||
try:
|
||||
return self.repo.bulk_upsert(website_dtos)
|
||||
except Exception as e:
|
||||
logger.error(f"批量 upsert 网站失败: {e}")
|
||||
raise
|
||||
|
||||
def get_websites_by_target(self, target_id: int):
|
||||
return self.repo.get_by_target(target_id)
|
||||
|
||||
def count_websites_by_scan(self, scan_id: int) -> int:
|
||||
def bulk_create_urls(self, target_id: int, target_name: str, target_type: str, urls: List[str]) -> int:
|
||||
"""
|
||||
统计扫描下的网站数量
|
||||
批量创建网站(仅 URL,使用 ignore_conflicts)
|
||||
|
||||
验证 URL 格式和匹配,过滤无效/不匹配 URL,去重后批量创建。
|
||||
已存在的记录会被跳过。
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
|
||||
target_id: 目标 ID
|
||||
target_name: 目标名称(用于匹配验证)
|
||||
target_type: 目标类型 ('domain', 'ip', 'cidr')
|
||||
urls: URL 列表
|
||||
|
||||
Returns:
|
||||
int: 网站数量
|
||||
int: 实际创建的记录数
|
||||
"""
|
||||
logger.debug("统计扫描下网站数量 - Scan ID: %d", scan_id)
|
||||
return self.repo.count_by_scan(scan_id)
|
||||
if not urls:
|
||||
return 0
|
||||
|
||||
# 过滤有效 URL 并去重
|
||||
valid_urls = []
|
||||
seen = set()
|
||||
|
||||
for url in urls:
|
||||
if not isinstance(url, str):
|
||||
continue
|
||||
url = url.strip()
|
||||
if not url or url in seen:
|
||||
continue
|
||||
if not is_valid_url(url):
|
||||
continue
|
||||
|
||||
# 匹配验证(前端已阻止不匹配的提交,后端作为双重保障)
|
||||
if not is_url_match_target(url, target_name, target_type):
|
||||
continue
|
||||
|
||||
seen.add(url)
|
||||
valid_urls.append(url)
|
||||
|
||||
if not valid_urls:
|
||||
return 0
|
||||
|
||||
# 获取创建前的数量
|
||||
count_before = self.repo.count_by_target(target_id)
|
||||
|
||||
# 创建 DTO 列表并批量创建
|
||||
website_dtos = [
|
||||
WebSiteDTO(url=url, target_id=target_id)
|
||||
for url in valid_urls
|
||||
]
|
||||
self.repo.bulk_create_ignore_conflicts(website_dtos)
|
||||
|
||||
# 获取创建后的数量
|
||||
count_after = self.repo.count_by_target(target_id)
|
||||
return count_after - count_before
|
||||
|
||||
def get_websites_by_target(self, target_id: int, filter_query: Optional[str] = None):
|
||||
"""获取目标下的所有网站"""
|
||||
queryset = self.repo.get_by_target(target_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
|
||||
return queryset
|
||||
|
||||
def get_all(self, filter_query: Optional[str] = None):
|
||||
"""获取所有网站"""
|
||||
queryset = self.repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING, json_array_fields=['tech'])
|
||||
return queryset
|
||||
|
||||
def get_by_url(self, url: str, target_id: int) -> int:
|
||||
"""根据 URL 和 target_id 查找网站 ID"""
|
||||
return self.repo.get_by_url(url=url, target_id=target_id)
|
||||
|
||||
def iter_website_urls_by_target(self, target_id: int, chunk_size: int = 1000):
|
||||
"""流式获取目标下的所有站点 URL(内存优化,委托给 Repository 层)"""
|
||||
logger.debug(
|
||||
"流式获取目标下所有站点 URL - Target ID: %d, 批次大小: %d",
|
||||
target_id,
|
||||
chunk_size,
|
||||
)
|
||||
# 通过仓储层统一访问数据库,避免 Service 直接依赖 ORM
|
||||
"""流式获取目标下的所有站点 URL"""
|
||||
return self.repo.get_urls_for_export(target_id=target_id, batch_size=chunk_size)
|
||||
|
||||
def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
target_id: 目标 ID
|
||||
|
||||
Yields:
|
||||
原始数据字典
|
||||
"""
|
||||
return self.repo.iter_raw_data_for_export(target_id=target_id)
|
||||
|
||||
|
||||
__all__ = ['WebSiteService']
|
||||
|
||||
477
backend/apps/asset/services/search_service.py
Normal file
477
backend/apps/asset/services/search_service.py
Normal file
@@ -0,0 +1,477 @@
|
||||
"""
|
||||
资产搜索服务
|
||||
|
||||
提供资产搜索的核心业务逻辑:
|
||||
- 从物化视图查询数据
|
||||
- 支持表达式语法解析
|
||||
- 支持 =(模糊)、==(精确)、!=(不等于)操作符
|
||||
- 支持 && (AND) 和 || (OR) 逻辑组合
|
||||
- 支持 Website 和 Endpoint 两种资产类型
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional, List, Dict, Any, Tuple, Literal, Iterator
|
||||
|
||||
from django.db import connection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 支持的字段映射(前端字段名 -> 数据库字段名)
|
||||
FIELD_MAPPING = {
|
||||
'host': 'host',
|
||||
'url': 'url',
|
||||
'title': 'title',
|
||||
'tech': 'tech',
|
||||
'status': 'status_code',
|
||||
'body': 'response_body',
|
||||
'header': 'response_headers',
|
||||
}
|
||||
|
||||
# 数组类型字段
|
||||
ARRAY_FIELDS = {'tech'}
|
||||
|
||||
# 资产类型到视图名的映射
|
||||
VIEW_MAPPING = {
|
||||
'website': 'asset_search_view',
|
||||
'endpoint': 'endpoint_search_view',
|
||||
}
|
||||
|
||||
# 资产类型到原表名的映射(用于 JOIN 获取数组字段)
|
||||
# ⚠️ 重要:pg_ivm 不支持 ArrayField,所有数组字段必须从原表 JOIN 获取
|
||||
TABLE_MAPPING = {
|
||||
'website': 'website',
|
||||
'endpoint': 'endpoint',
|
||||
}
|
||||
|
||||
# 有效的资产类型
|
||||
VALID_ASSET_TYPES = {'website', 'endpoint'}
|
||||
|
||||
# Website 查询字段(v=视图,t=原表)
|
||||
# ⚠️ 注意:t.tech 从原表获取,因为 pg_ivm 不支持 ArrayField
|
||||
WEBSITE_SELECT_FIELDS = """
|
||||
v.id,
|
||||
v.url,
|
||||
v.host,
|
||||
v.title,
|
||||
t.tech, -- ArrayField,从 website 表 JOIN 获取
|
||||
v.status_code,
|
||||
v.response_headers,
|
||||
v.response_body,
|
||||
v.content_type,
|
||||
v.content_length,
|
||||
v.webserver,
|
||||
v.location,
|
||||
v.vhost,
|
||||
v.created_at,
|
||||
v.target_id
|
||||
"""
|
||||
|
||||
# Endpoint 查询字段
|
||||
# ⚠️ 注意:t.tech 和 t.matched_gf_patterns 从原表获取,因为 pg_ivm 不支持 ArrayField
|
||||
ENDPOINT_SELECT_FIELDS = """
|
||||
v.id,
|
||||
v.url,
|
||||
v.host,
|
||||
v.title,
|
||||
t.tech, -- ArrayField,从 endpoint 表 JOIN 获取
|
||||
v.status_code,
|
||||
v.response_headers,
|
||||
v.response_body,
|
||||
v.content_type,
|
||||
v.content_length,
|
||||
v.webserver,
|
||||
v.location,
|
||||
v.vhost,
|
||||
t.matched_gf_patterns, -- ArrayField,从 endpoint 表 JOIN 获取
|
||||
v.created_at,
|
||||
v.target_id
|
||||
"""
|
||||
|
||||
|
||||
class SearchQueryParser:
|
||||
"""
|
||||
搜索查询解析器
|
||||
|
||||
支持语法:
|
||||
- field="value" 模糊匹配(ILIKE %value%)
|
||||
- field=="value" 精确匹配
|
||||
- field!="value" 不等于
|
||||
- && AND 连接
|
||||
- || OR 连接
|
||||
- () 分组(暂不支持嵌套)
|
||||
|
||||
示例:
|
||||
- host="api" && tech="nginx"
|
||||
- tech="vue" || tech="react"
|
||||
- status=="200" && host!="test"
|
||||
"""
|
||||
|
||||
# 匹配单个条件: field="value" 或 field=="value" 或 field!="value"
|
||||
CONDITION_PATTERN = re.compile(r'(\w+)\s*(==|!=|=)\s*"([^"]*)"')
|
||||
|
||||
@classmethod
|
||||
def parse(cls, query: str) -> Tuple[str, List[Any]]:
|
||||
"""
|
||||
解析查询字符串,返回 SQL WHERE 子句和参数
|
||||
|
||||
Args:
|
||||
query: 搜索查询字符串
|
||||
|
||||
Returns:
|
||||
(where_clause, params) 元组
|
||||
"""
|
||||
if not query or not query.strip():
|
||||
return "1=1", []
|
||||
|
||||
query = query.strip()
|
||||
|
||||
# 检查是否包含操作符语法,如果不包含则作为 host 模糊搜索
|
||||
if not cls.CONDITION_PATTERN.search(query):
|
||||
# 裸文本,默认作为 host 模糊搜索(v 是视图别名)
|
||||
return "v.host ILIKE %s", [f"%{query}%"]
|
||||
|
||||
# 按 || 分割为 OR 组
|
||||
or_groups = cls._split_by_or(query)
|
||||
|
||||
if len(or_groups) == 1:
|
||||
# 没有 OR,直接解析 AND 条件
|
||||
return cls._parse_and_group(or_groups[0])
|
||||
|
||||
# 多个 OR 组
|
||||
or_clauses = []
|
||||
all_params = []
|
||||
|
||||
for group in or_groups:
|
||||
clause, params = cls._parse_and_group(group)
|
||||
if clause and clause != "1=1":
|
||||
or_clauses.append(f"({clause})")
|
||||
all_params.extend(params)
|
||||
|
||||
if not or_clauses:
|
||||
return "1=1", []
|
||||
|
||||
return " OR ".join(or_clauses), all_params
|
||||
|
||||
@classmethod
|
||||
def _split_by_or(cls, query: str) -> List[str]:
|
||||
"""按 || 分割查询,但忽略引号内的 ||"""
|
||||
parts = []
|
||||
current = ""
|
||||
in_quotes = False
|
||||
i = 0
|
||||
|
||||
while i < len(query):
|
||||
char = query[i]
|
||||
|
||||
if char == '"':
|
||||
in_quotes = not in_quotes
|
||||
current += char
|
||||
elif not in_quotes and i + 1 < len(query) and query[i:i+2] == '||':
|
||||
if current.strip():
|
||||
parts.append(current.strip())
|
||||
current = ""
|
||||
i += 1 # 跳过第二个 |
|
||||
else:
|
||||
current += char
|
||||
|
||||
i += 1
|
||||
|
||||
if current.strip():
|
||||
parts.append(current.strip())
|
||||
|
||||
return parts if parts else [query]
|
||||
|
||||
@classmethod
|
||||
def _parse_and_group(cls, group: str) -> Tuple[str, List[Any]]:
|
||||
"""解析 AND 组(用 && 连接的条件)"""
|
||||
# 移除外层括号
|
||||
group = group.strip()
|
||||
if group.startswith('(') and group.endswith(')'):
|
||||
group = group[1:-1].strip()
|
||||
|
||||
# 按 && 分割
|
||||
parts = cls._split_by_and(group)
|
||||
|
||||
and_clauses = []
|
||||
all_params = []
|
||||
|
||||
for part in parts:
|
||||
clause, params = cls._parse_condition(part.strip())
|
||||
if clause:
|
||||
and_clauses.append(clause)
|
||||
all_params.extend(params)
|
||||
|
||||
if not and_clauses:
|
||||
return "1=1", []
|
||||
|
||||
return " AND ".join(and_clauses), all_params
|
||||
|
||||
@classmethod
|
||||
def _split_by_and(cls, query: str) -> List[str]:
|
||||
"""按 && 分割查询,但忽略引号内的 &&"""
|
||||
parts = []
|
||||
current = ""
|
||||
in_quotes = False
|
||||
i = 0
|
||||
|
||||
while i < len(query):
|
||||
char = query[i]
|
||||
|
||||
if char == '"':
|
||||
in_quotes = not in_quotes
|
||||
current += char
|
||||
elif not in_quotes and i + 1 < len(query) and query[i:i+2] == '&&':
|
||||
if current.strip():
|
||||
parts.append(current.strip())
|
||||
current = ""
|
||||
i += 1 # 跳过第二个 &
|
||||
else:
|
||||
current += char
|
||||
|
||||
i += 1
|
||||
|
||||
if current.strip():
|
||||
parts.append(current.strip())
|
||||
|
||||
return parts if parts else [query]
|
||||
|
||||
@classmethod
|
||||
def _parse_condition(cls, condition: str) -> Tuple[Optional[str], List[Any]]:
|
||||
"""
|
||||
解析单个条件
|
||||
|
||||
Returns:
|
||||
(sql_clause, params) 或 (None, []) 如果解析失败
|
||||
"""
|
||||
# 移除括号
|
||||
condition = condition.strip()
|
||||
if condition.startswith('(') and condition.endswith(')'):
|
||||
condition = condition[1:-1].strip()
|
||||
|
||||
match = cls.CONDITION_PATTERN.match(condition)
|
||||
if not match:
|
||||
logger.warning(f"无法解析条件: {condition}")
|
||||
return None, []
|
||||
|
||||
field, operator, value = match.groups()
|
||||
field = field.lower()
|
||||
|
||||
# 验证字段
|
||||
if field not in FIELD_MAPPING:
|
||||
logger.warning(f"未知字段: {field}")
|
||||
return None, []
|
||||
|
||||
db_field = FIELD_MAPPING[field]
|
||||
is_array = field in ARRAY_FIELDS
|
||||
|
||||
# 根据操作符生成 SQL
|
||||
if operator == '=':
|
||||
# 模糊匹配
|
||||
return cls._build_like_condition(db_field, value, is_array)
|
||||
elif operator == '==':
|
||||
# 精确匹配
|
||||
return cls._build_exact_condition(db_field, value, is_array)
|
||||
elif operator == '!=':
|
||||
# 不等于
|
||||
return cls._build_not_equal_condition(db_field, value, is_array)
|
||||
|
||||
return None, []
|
||||
|
||||
@classmethod
|
||||
def _build_like_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
|
||||
"""构建模糊匹配条件"""
|
||||
if is_array:
|
||||
# 数组字段:检查数组中是否有元素包含该值(从原表 t 获取)
|
||||
return f"EXISTS (SELECT 1 FROM unnest(t.{field}) AS elem WHERE elem ILIKE %s)", [f"%{value}%"]
|
||||
elif field == 'status_code':
|
||||
# 状态码是整数,模糊匹配转为精确匹配
|
||||
try:
|
||||
return f"v.{field} = %s", [int(value)]
|
||||
except ValueError:
|
||||
return f"v.{field}::text ILIKE %s", [f"%{value}%"]
|
||||
else:
|
||||
return f"v.{field} ILIKE %s", [f"%{value}%"]
|
||||
|
||||
@classmethod
|
||||
def _build_exact_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
|
||||
"""构建精确匹配条件"""
|
||||
if is_array:
|
||||
# 数组字段:检查数组中是否包含该精确值(从原表 t 获取)
|
||||
return f"%s = ANY(t.{field})", [value]
|
||||
elif field == 'status_code':
|
||||
# 状态码是整数
|
||||
try:
|
||||
return f"v.{field} = %s", [int(value)]
|
||||
except ValueError:
|
||||
return f"v.{field}::text = %s", [value]
|
||||
else:
|
||||
return f"v.{field} = %s", [value]
|
||||
|
||||
@classmethod
|
||||
def _build_not_equal_condition(cls, field: str, value: str, is_array: bool) -> Tuple[str, List[Any]]:
|
||||
"""构建不等于条件"""
|
||||
if is_array:
|
||||
# 数组字段:检查数组中不包含该值(从原表 t 获取)
|
||||
return f"NOT (%s = ANY(t.{field}))", [value]
|
||||
elif field == 'status_code':
|
||||
try:
|
||||
return f"(v.{field} IS NULL OR v.{field} != %s)", [int(value)]
|
||||
except ValueError:
|
||||
return f"(v.{field} IS NULL OR v.{field}::text != %s)", [value]
|
||||
else:
|
||||
return f"(v.{field} IS NULL OR v.{field} != %s)", [value]
|
||||
|
||||
|
||||
AssetType = Literal['website', 'endpoint']
|
||||
|
||||
|
||||
class AssetSearchService:
|
||||
"""资产搜索服务"""
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
asset_type: AssetType = 'website',
|
||||
limit: Optional[int] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
搜索资产
|
||||
|
||||
Args:
|
||||
query: 搜索查询字符串
|
||||
asset_type: 资产类型 ('website' 或 'endpoint')
|
||||
limit: 最大返回数量(可选)
|
||||
|
||||
Returns:
|
||||
List[Dict]: 搜索结果列表
|
||||
"""
|
||||
where_clause, params = SearchQueryParser.parse(query)
|
||||
|
||||
# 根据资产类型选择视图、原表和字段
|
||||
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
|
||||
table_name = TABLE_MAPPING.get(asset_type, 'website')
|
||||
select_fields = ENDPOINT_SELECT_FIELDS if asset_type == 'endpoint' else WEBSITE_SELECT_FIELDS
|
||||
|
||||
# JOIN 原表获取数组字段(tech, matched_gf_patterns)
|
||||
sql = f"""
|
||||
SELECT {select_fields}
|
||||
FROM {view_name} v
|
||||
JOIN {table_name} t ON v.id = t.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY v.created_at DESC
|
||||
"""
|
||||
|
||||
# 添加 LIMIT
|
||||
if limit is not None and limit > 0:
|
||||
sql += f" LIMIT {int(limit)}"
|
||||
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(sql, params)
|
||||
columns = [col[0] for col in cursor.description]
|
||||
results = []
|
||||
|
||||
for row in cursor.fetchall():
|
||||
result = dict(zip(columns, row))
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"搜索查询失败: {e}, SQL: {sql}, params: {params}")
|
||||
raise
|
||||
|
||||
def count(self, query: str, asset_type: AssetType = 'website', statement_timeout_ms: int = 300000) -> int:
|
||||
"""
|
||||
统计搜索结果数量
|
||||
|
||||
Args:
|
||||
query: 搜索查询字符串
|
||||
asset_type: 资产类型 ('website' 或 'endpoint')
|
||||
statement_timeout_ms: SQL 语句超时时间(毫秒),默认 5 分钟
|
||||
|
||||
Returns:
|
||||
int: 结果总数
|
||||
"""
|
||||
where_clause, params = SearchQueryParser.parse(query)
|
||||
|
||||
# 根据资产类型选择视图和原表
|
||||
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
|
||||
table_name = TABLE_MAPPING.get(asset_type, 'website')
|
||||
|
||||
# JOIN 原表以支持数组字段查询
|
||||
sql = f"SELECT COUNT(*) FROM {view_name} v JOIN {table_name} t ON v.id = t.id WHERE {where_clause}"
|
||||
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
# 为导出设置更长的超时时间(仅影响当前会话)
|
||||
cursor.execute(f"SET LOCAL statement_timeout = {statement_timeout_ms}")
|
||||
cursor.execute(sql, params)
|
||||
return cursor.fetchone()[0]
|
||||
except Exception as e:
|
||||
logger.error(f"统计查询失败: {e}")
|
||||
raise
|
||||
|
||||
def search_iter(
|
||||
self,
|
||||
query: str,
|
||||
asset_type: AssetType = 'website',
|
||||
batch_size: int = 1000,
|
||||
statement_timeout_ms: int = 300000
|
||||
) -> Iterator[Dict[str, Any]]:
|
||||
"""
|
||||
流式搜索资产(使用分批查询,内存友好)
|
||||
|
||||
Args:
|
||||
query: 搜索查询字符串
|
||||
asset_type: 资产类型 ('website' 或 'endpoint')
|
||||
batch_size: 每批获取的数量
|
||||
statement_timeout_ms: SQL 语句超时时间(毫秒),默认 5 分钟
|
||||
|
||||
Yields:
|
||||
Dict: 单条搜索结果
|
||||
"""
|
||||
where_clause, params = SearchQueryParser.parse(query)
|
||||
|
||||
# 根据资产类型选择视图、原表和字段
|
||||
view_name = VIEW_MAPPING.get(asset_type, 'asset_search_view')
|
||||
table_name = TABLE_MAPPING.get(asset_type, 'website')
|
||||
select_fields = ENDPOINT_SELECT_FIELDS if asset_type == 'endpoint' else WEBSITE_SELECT_FIELDS
|
||||
|
||||
# 使用 OFFSET/LIMIT 分批查询(Django 不支持命名游标)
|
||||
offset = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
# JOIN 原表获取数组字段
|
||||
sql = f"""
|
||||
SELECT {select_fields}
|
||||
FROM {view_name} v
|
||||
JOIN {table_name} t ON v.id = t.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY v.created_at DESC
|
||||
LIMIT {batch_size} OFFSET {offset}
|
||||
"""
|
||||
|
||||
with connection.cursor() as cursor:
|
||||
# 为导出设置更长的超时时间(仅影响当前会话)
|
||||
cursor.execute(f"SET LOCAL statement_timeout = {statement_timeout_ms}")
|
||||
cursor.execute(sql, params)
|
||||
columns = [col[0] for col in cursor.description]
|
||||
rows = cursor.fetchall()
|
||||
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for row in rows:
|
||||
yield dict(zip(columns, row))
|
||||
|
||||
# 如果返回的行数少于 batch_size,说明已经是最后一批
|
||||
if len(rows) < batch_size:
|
||||
break
|
||||
|
||||
offset += batch_size
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"流式搜索查询失败: {e}, SQL: {sql}, params: {params}")
|
||||
raise
|
||||
@@ -26,10 +26,9 @@ class DirectorySnapshotsService:
|
||||
2. 同步到资产表(去重,不包含 scan_id)
|
||||
|
||||
Args:
|
||||
items: 目录快照 DTO 列表(必须包含 website_id)
|
||||
items: 目录快照 DTO 列表(必须包含 target_id)
|
||||
|
||||
Raises:
|
||||
ValueError: 如果 items 中的 website_id 为 None
|
||||
Exception: 数据库操作失败
|
||||
"""
|
||||
if not items:
|
||||
@@ -49,14 +48,13 @@ class DirectorySnapshotsService:
|
||||
logger.debug("步骤 1: 保存到快照表")
|
||||
self.snapshot_repo.save_snapshots(items)
|
||||
|
||||
# 步骤 2: 转换为资产 DTO 并保存到资产表
|
||||
# 注意:去重是通过数据库的 UNIQUE 约束 + ignore_conflicts 实现的
|
||||
# 步骤 2: 转换为资产 DTO 并保存到资产表(upsert)
|
||||
# - 新记录:插入资产表
|
||||
# - 已存在的记录:自动跳过
|
||||
logger.debug("步骤 2: 同步到资产表(通过 Service 层)")
|
||||
# - 已存在的记录:更新字段(created_at 不更新,保留创建时间)
|
||||
logger.debug("步骤 2: 同步到资产表(通过 Service 层,upsert)")
|
||||
asset_items = [item.to_asset_dto() for item in items]
|
||||
|
||||
self.asset_service.bulk_create_ignore_conflicts(asset_items)
|
||||
self.asset_service.bulk_upsert(asset_items)
|
||||
|
||||
logger.info("目录快照和资产数据保存成功 - 数量: %d", len(items))
|
||||
|
||||
@@ -69,15 +67,44 @@ class DirectorySnapshotsService:
|
||||
)
|
||||
raise
|
||||
|
||||
def get_by_scan(self, scan_id: int):
|
||||
return self.snapshot_repo.get_by_scan(scan_id)
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'url': 'url',
|
||||
'status': 'status',
|
||||
'content_type': 'content_type',
|
||||
}
|
||||
|
||||
def get_by_scan(self, scan_id: int, filter_query: str = None):
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
queryset = self.snapshot_repo.get_by_scan(scan_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def get_all(self):
|
||||
def get_all(self, filter_query: str = None):
|
||||
"""获取所有目录快照"""
|
||||
return self.snapshot_repo.get_all()
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
queryset = self.snapshot_repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def iter_directory_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
|
||||
"""流式获取某次扫描下的所有目录 URL。"""
|
||||
queryset = self.snapshot_repo.get_by_scan(scan_id)
|
||||
for snapshot in queryset.iterator(chunk_size=chunk_size):
|
||||
yield snapshot.url
|
||||
|
||||
def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
|
||||
Yields:
|
||||
原始数据字典
|
||||
"""
|
||||
return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)
|
||||
|
||||
@@ -50,13 +50,11 @@ class EndpointSnapshotsService:
|
||||
self.snapshot_repo.save_snapshots(items)
|
||||
|
||||
# 步骤 2: 转换为资产 DTO 并保存到资产表
|
||||
# 注意:去重是通过数据库的 UNIQUE 约束 + ignore_conflicts 实现的
|
||||
# - 新记录:插入资产表
|
||||
# - 已存在的记录:自动跳过
|
||||
# 使用 upsert:新记录插入,已存在的记录更新
|
||||
logger.debug("步骤 2: 同步到资产表(通过 Service 层)")
|
||||
asset_items = [item.to_asset_dto() for item in items]
|
||||
|
||||
self.asset_service.bulk_create_endpoints(asset_items)
|
||||
self.asset_service.bulk_upsert(asset_items)
|
||||
|
||||
logger.info("端点快照和资产数据保存成功 - 数量: %d", len(items))
|
||||
|
||||
@@ -69,15 +67,47 @@ class EndpointSnapshotsService:
|
||||
)
|
||||
raise
|
||||
|
||||
def get_by_scan(self, scan_id: int):
|
||||
return self.snapshot_repo.get_by_scan(scan_id)
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'url': 'url',
|
||||
'host': 'host',
|
||||
'title': 'title',
|
||||
'status_code': 'status_code',
|
||||
'webserver': 'webserver',
|
||||
'tech': 'tech',
|
||||
}
|
||||
|
||||
def get_by_scan(self, scan_id: int, filter_query: str = None):
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
queryset = self.snapshot_repo.get_by_scan(scan_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def get_all(self):
|
||||
def get_all(self, filter_query: str = None):
|
||||
"""获取所有端点快照"""
|
||||
return self.snapshot_repo.get_all()
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
queryset = self.snapshot_repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def iter_endpoint_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
|
||||
"""流式获取某次扫描下的所有端点 URL。"""
|
||||
queryset = self.snapshot_repo.get_by_scan(scan_id)
|
||||
for snapshot in queryset.iterator(chunk_size=chunk_size):
|
||||
yield snapshot.url
|
||||
|
||||
def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
|
||||
Yields:
|
||||
原始数据字典
|
||||
"""
|
||||
return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)
|
||||
|
||||
@@ -69,13 +69,25 @@ class HostPortMappingSnapshotsService:
|
||||
)
|
||||
raise
|
||||
|
||||
def get_ip_aggregation_by_scan(self, scan_id: int, search: str = None):
|
||||
return self.snapshot_repo.get_ip_aggregation_by_scan(scan_id, search=search)
|
||||
def get_ip_aggregation_by_scan(self, scan_id: int, filter_query: str = None):
|
||||
return self.snapshot_repo.get_ip_aggregation_by_scan(scan_id, filter_query=filter_query)
|
||||
|
||||
def get_all_ip_aggregation(self, search: str = None):
|
||||
def get_all_ip_aggregation(self, filter_query: str = None):
|
||||
"""获取所有 IP 聚合数据"""
|
||||
return self.snapshot_repo.get_all_ip_aggregation(search=search)
|
||||
return self.snapshot_repo.get_all_ip_aggregation(filter_query=filter_query)
|
||||
|
||||
def iter_ips_by_scan(self, scan_id: int, batch_size: int = 1000) -> Iterator[str]:
|
||||
"""流式获取某次扫描下的所有唯一 IP 地址。"""
|
||||
return self.snapshot_repo.get_ips_for_export(scan_id=scan_id, batch_size=batch_size)
|
||||
|
||||
def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
|
||||
Yields:
|
||||
原始数据字典 {ip, host, port, created_at}
|
||||
"""
|
||||
return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)
|
||||
|
||||
@@ -66,14 +66,41 @@ class SubdomainSnapshotsService:
|
||||
)
|
||||
raise
|
||||
|
||||
def get_by_scan(self, scan_id: int):
|
||||
return self.subdomain_snapshot_repo.get_by_scan(scan_id)
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'name': 'name',
|
||||
}
|
||||
|
||||
def get_by_scan(self, scan_id: int, filter_query: str = None):
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
queryset = self.subdomain_snapshot_repo.get_by_scan(scan_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def get_all(self):
|
||||
def get_all(self, filter_query: str = None):
|
||||
"""获取所有子域名快照"""
|
||||
return self.subdomain_snapshot_repo.get_all()
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
queryset = self.subdomain_snapshot_repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def iter_subdomain_names_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
|
||||
queryset = self.subdomain_snapshot_repo.get_by_scan(scan_id)
|
||||
for snapshot in queryset.iterator(chunk_size=chunk_size):
|
||||
yield snapshot.name
|
||||
yield snapshot.name
|
||||
|
||||
def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
|
||||
Yields:
|
||||
原始数据字典 {name, created_at}
|
||||
"""
|
||||
return self.subdomain_snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)
|
||||
@@ -66,13 +66,31 @@ class VulnerabilitySnapshotsService:
|
||||
)
|
||||
raise
|
||||
|
||||
def get_by_scan(self, scan_id: int):
|
||||
"""按扫描任务获取所有漏洞快照。"""
|
||||
return self.snapshot_repo.get_by_scan(scan_id)
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'type': 'vuln_type',
|
||||
'url': 'url',
|
||||
'severity': 'severity',
|
||||
'source': 'source',
|
||||
}
|
||||
|
||||
def get_all(self):
|
||||
def get_by_scan(self, scan_id: int, filter_query: str = None):
|
||||
"""按扫描任务获取所有漏洞快照。"""
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
queryset = self.snapshot_repo.get_by_scan(scan_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def get_all(self, filter_query: str = None):
|
||||
"""获取所有漏洞快照"""
|
||||
return self.snapshot_repo.get_all()
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
queryset = self.snapshot_repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def iter_vuln_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
|
||||
"""流式获取某次扫描下的所有漏洞 URL。"""
|
||||
|
||||
@@ -49,14 +49,13 @@ class WebsiteSnapshotsService:
|
||||
logger.debug("步骤 1: 保存到快照表")
|
||||
self.snapshot_repo.save_snapshots(items)
|
||||
|
||||
# 步骤 2: 转换为资产 DTO 并保存到资产表
|
||||
# 注意:去重是通过数据库的 UNIQUE 约束 + ignore_conflicts 实现的
|
||||
# 步骤 2: 转换为资产 DTO 并保存到资产表(upsert)
|
||||
# - 新记录:插入资产表
|
||||
# - 已存在的记录:自动跳过
|
||||
logger.debug("步骤 2: 同步到资产表(通过 Service 层)")
|
||||
# - 已存在的记录:更新字段(created_at 不更新,保留创建时间)
|
||||
logger.debug("步骤 2: 同步到资产表(通过 Service 层,upsert)")
|
||||
asset_items = [item.to_asset_dto() for item in items]
|
||||
|
||||
self.asset_service.bulk_create_ignore_conflicts(asset_items)
|
||||
self.asset_service.bulk_upsert(asset_items)
|
||||
|
||||
logger.info("网站快照和资产数据保存成功 - 数量: %d", len(items))
|
||||
|
||||
@@ -69,15 +68,47 @@ class WebsiteSnapshotsService:
|
||||
)
|
||||
raise
|
||||
|
||||
def get_by_scan(self, scan_id: int):
|
||||
return self.snapshot_repo.get_by_scan(scan_id)
|
||||
# 智能过滤字段映射
|
||||
FILTER_FIELD_MAPPING = {
|
||||
'url': 'url',
|
||||
'host': 'host',
|
||||
'title': 'title',
|
||||
'status_code': 'status_code',
|
||||
'webserver': 'webserver',
|
||||
'tech': 'tech',
|
||||
}
|
||||
|
||||
def get_by_scan(self, scan_id: int, filter_query: str = None):
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
queryset = self.snapshot_repo.get_by_scan(scan_id)
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def get_all(self):
|
||||
def get_all(self, filter_query: str = None):
|
||||
"""获取所有网站快照"""
|
||||
return self.snapshot_repo.get_all()
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
queryset = self.snapshot_repo.get_all()
|
||||
if filter_query:
|
||||
queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
|
||||
return queryset
|
||||
|
||||
def iter_website_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
|
||||
"""流式获取某次扫描下的所有站点 URL(按发现时间倒序)。"""
|
||||
"""流式获取某次扫描下的所有站点 URL(按创建时间倒序)。"""
|
||||
queryset = self.snapshot_repo.get_by_scan(scan_id)
|
||||
for snapshot in queryset.iterator(chunk_size=chunk_size):
|
||||
yield snapshot.url
|
||||
|
||||
def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
|
||||
"""
|
||||
流式获取原始数据用于 CSV 导出
|
||||
|
||||
Args:
|
||||
scan_id: 扫描 ID
|
||||
|
||||
Yields:
|
||||
原始数据字典
|
||||
"""
|
||||
return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)
|
||||
|
||||
@@ -10,6 +10,8 @@ from .views import (
|
||||
DirectoryViewSet,
|
||||
VulnerabilityViewSet,
|
||||
AssetStatisticsViewSet,
|
||||
AssetSearchView,
|
||||
AssetSearchExportView,
|
||||
)
|
||||
|
||||
# 创建 DRF 路由器
|
||||
@@ -25,4 +27,6 @@ router.register(r'statistics', AssetStatisticsViewSet, basename='asset-statistic
|
||||
|
||||
urlpatterns = [
|
||||
path('assets/', include(router.urls)),
|
||||
path('assets/search/', AssetSearchView.as_view(), name='asset-search'),
|
||||
path('assets/search/export/', AssetSearchExportView.as_view(), name='asset-search-export'),
|
||||
]
|
||||
|
||||
@@ -1,562 +0,0 @@
|
||||
import logging
|
||||
from rest_framework import viewsets, status, filters
|
||||
from rest_framework.decorators import action
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.request import Request
|
||||
from rest_framework.exceptions import NotFound, ValidationError as DRFValidationError
|
||||
from django.core.exceptions import ValidationError, ObjectDoesNotExist
|
||||
from django.db import DatabaseError, IntegrityError, OperationalError
|
||||
from django.http import StreamingHttpResponse
|
||||
|
||||
from .serializers import (
|
||||
SubdomainListSerializer, WebSiteSerializer, DirectorySerializer,
|
||||
VulnerabilitySerializer, EndpointListSerializer, IPAddressAggregatedSerializer,
|
||||
SubdomainSnapshotSerializer, WebsiteSnapshotSerializer, DirectorySnapshotSerializer,
|
||||
EndpointSnapshotSerializer, VulnerabilitySnapshotSerializer
|
||||
)
|
||||
from .services import (
|
||||
SubdomainService, WebSiteService, DirectoryService,
|
||||
VulnerabilityService, AssetStatisticsService, EndpointService, HostPortMappingService
|
||||
)
|
||||
from .services.snapshot import (
|
||||
SubdomainSnapshotsService, WebsiteSnapshotsService, DirectorySnapshotsService,
|
||||
EndpointSnapshotsService, HostPortMappingSnapshotsService, VulnerabilitySnapshotsService
|
||||
)
|
||||
from apps.common.pagination import BasePagination
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AssetStatisticsViewSet(viewsets.ViewSet):
|
||||
"""
|
||||
资产统计 API
|
||||
|
||||
提供仪表盘所需的统计数据(预聚合,读取缓存表)
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = AssetStatisticsService()
|
||||
|
||||
def list(self, request):
|
||||
"""
|
||||
获取资产统计数据
|
||||
|
||||
GET /assets/statistics/
|
||||
|
||||
返回:
|
||||
- totalTargets: 目标总数
|
||||
- totalSubdomains: 子域名总数
|
||||
- totalIps: IP 总数
|
||||
- totalEndpoints: 端点总数
|
||||
- totalWebsites: 网站总数
|
||||
- totalVulns: 漏洞总数
|
||||
- totalAssets: 总资产数
|
||||
- runningScans: 运行中的扫描数
|
||||
- updatedAt: 统计更新时间
|
||||
"""
|
||||
try:
|
||||
stats = self.service.get_statistics()
|
||||
return Response({
|
||||
'totalTargets': stats['total_targets'],
|
||||
'totalSubdomains': stats['total_subdomains'],
|
||||
'totalIps': stats['total_ips'],
|
||||
'totalEndpoints': stats['total_endpoints'],
|
||||
'totalWebsites': stats['total_websites'],
|
||||
'totalVulns': stats['total_vulns'],
|
||||
'totalAssets': stats['total_assets'],
|
||||
'runningScans': stats['running_scans'],
|
||||
'updatedAt': stats['updated_at'],
|
||||
# 变化值
|
||||
'changeTargets': stats['change_targets'],
|
||||
'changeSubdomains': stats['change_subdomains'],
|
||||
'changeIps': stats['change_ips'],
|
||||
'changeEndpoints': stats['change_endpoints'],
|
||||
'changeWebsites': stats['change_websites'],
|
||||
'changeVulns': stats['change_vulns'],
|
||||
'changeAssets': stats['change_assets'],
|
||||
# 漏洞严重程度分布
|
||||
'vulnBySeverity': stats['vuln_by_severity'],
|
||||
})
|
||||
except (DatabaseError, OperationalError) as e:
|
||||
logger.exception("获取资产统计数据失败")
|
||||
return Response(
|
||||
{'error': '获取统计数据失败'},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='history')
|
||||
def history(self, request: Request):
|
||||
"""
|
||||
获取统计历史数据(用于折线图)
|
||||
|
||||
GET /assets/statistics/history/?days=7
|
||||
|
||||
Query Parameters:
|
||||
days: 获取最近多少天的数据,默认 7,最大 90
|
||||
|
||||
Returns:
|
||||
历史数据列表
|
||||
"""
|
||||
try:
|
||||
days_param = request.query_params.get('days', '7')
|
||||
try:
|
||||
days = int(days_param)
|
||||
except (ValueError, TypeError):
|
||||
days = 7
|
||||
days = min(max(days, 1), 90) # 限制在 1-90 天
|
||||
|
||||
history = self.service.get_statistics_history(days=days)
|
||||
return Response(history)
|
||||
except (DatabaseError, OperationalError) as e:
|
||||
logger.exception("获取统计历史数据失败")
|
||||
return Response(
|
||||
{'error': '获取历史数据失败'},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
|
||||
# 注意:IPAddress 模型已被重构为 HostPortMapping
|
||||
# IPAddressViewSet 已删除,需要根据新架构重新实现
|
||||
|
||||
|
||||
class SubdomainViewSet(viewsets.ModelViewSet):
|
||||
"""子域名管理 ViewSet
|
||||
|
||||
支持两种访问方式:
|
||||
1. 嵌套路由:GET /api/targets/{target_pk}/subdomains/
|
||||
2. 独立路由:GET /api/subdomains/(全局查询)
|
||||
"""
|
||||
|
||||
serializer_class = SubdomainListSerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
|
||||
search_fields = ['name']
|
||||
ordering = ['-discovered_at']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = SubdomainService()
|
||||
|
||||
def get_queryset(self):
|
||||
"""根据是否有 target_pk 参数决定查询范围"""
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if target_pk:
|
||||
return self.service.get_subdomains_by_target(target_pk)
|
||||
return self.service.get_all()
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request):
|
||||
"""导出子域名(纯文本,一行一个)"""
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
raise DRFValidationError('必须在目标下导出')
|
||||
|
||||
def line_iterator():
|
||||
for name in self.service.iter_subdomain_names_by_target(target_pk):
|
||||
yield f"{name}\n"
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
line_iterator(),
|
||||
content_type='text/plain; charset=utf-8',
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-subdomains.txt"'
|
||||
return response
|
||||
|
||||
|
||||
class WebSiteViewSet(viewsets.ModelViewSet):
|
||||
"""站点管理 ViewSet
|
||||
|
||||
支持两种访问方式:
|
||||
1. 嵌套路由:GET /api/targets/{target_pk}/websites/
|
||||
2. 独立路由:GET /api/websites/(全局查询)
|
||||
"""
|
||||
|
||||
serializer_class = WebSiteSerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
|
||||
search_fields = ['host']
|
||||
ordering = ['-discovered_at']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = WebSiteService()
|
||||
|
||||
def get_queryset(self):
|
||||
"""根据是否有 target_pk 参数决定查询范围"""
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if target_pk:
|
||||
return self.service.get_websites_by_target(target_pk)
|
||||
return self.service.get_all()
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request):
|
||||
"""导出站点 URL(纯文本,一行一个)"""
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
raise DRFValidationError('必须在目标下导出')
|
||||
|
||||
def line_iterator():
|
||||
for url in self.service.iter_website_urls_by_target(target_pk):
|
||||
yield f"{url}\n"
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
line_iterator(),
|
||||
content_type='text/plain; charset=utf-8',
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-websites.txt"'
|
||||
return response
|
||||
|
||||
|
||||
class DirectoryViewSet(viewsets.ModelViewSet):
|
||||
"""目录管理 ViewSet
|
||||
|
||||
支持两种访问方式:
|
||||
1. 嵌套路由:GET /api/targets/{target_pk}/directories/
|
||||
2. 独立路由:GET /api/directories/(全局查询)
|
||||
"""
|
||||
|
||||
serializer_class = DirectorySerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
|
||||
search_fields = ['url']
|
||||
ordering = ['-discovered_at']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = DirectoryService()
|
||||
|
||||
def get_queryset(self):
|
||||
"""根据是否有 target_pk 参数决定查询范围"""
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if target_pk:
|
||||
return self.service.get_directories_by_target(target_pk)
|
||||
return self.service.get_all()
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request):
|
||||
"""导出目录 URL(纯文本,一行一个)"""
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
raise DRFValidationError('必须在目标下导出')
|
||||
|
||||
def line_iterator():
|
||||
for url in self.service.iter_directory_urls_by_target(target_pk):
|
||||
yield f"{url}\n"
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
line_iterator(),
|
||||
content_type='text/plain; charset=utf-8',
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-directories.txt"'
|
||||
return response
|
||||
|
||||
|
||||
class EndpointViewSet(viewsets.ModelViewSet):
|
||||
"""端点管理 ViewSet
|
||||
|
||||
支持两种访问方式:
|
||||
1. 嵌套路由:GET /api/targets/{target_pk}/endpoints/
|
||||
2. 独立路由:GET /api/endpoints/(全局查询)
|
||||
"""
|
||||
|
||||
serializer_class = EndpointListSerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
|
||||
search_fields = ['host']
|
||||
ordering = ['-discovered_at']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = EndpointService()
|
||||
|
||||
def get_queryset(self):
|
||||
"""根据是否有 target_pk 参数决定查询范围"""
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if target_pk:
|
||||
return self.service.get_queryset_by_target(target_pk)
|
||||
return self.service.get_all()
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request):
|
||||
"""导出端点 URL(纯文本,一行一个)"""
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
raise DRFValidationError('必须在目标下导出')
|
||||
|
||||
def line_iterator():
|
||||
for url in self.service.iter_endpoint_urls_by_target(target_pk):
|
||||
yield f"{url}\n"
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
line_iterator(),
|
||||
content_type='text/plain; charset=utf-8',
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-endpoints.txt"'
|
||||
return response
|
||||
|
||||
|
||||
class HostPortMappingViewSet(viewsets.ModelViewSet):
|
||||
"""主机端口映射管理 ViewSet(IP 地址聚合视图)
|
||||
|
||||
支持两种访问方式:
|
||||
1. 嵌套路由:GET /api/targets/{target_pk}/ip-addresses/
|
||||
2. 独立路由:GET /api/ip-addresses/(全局查询)
|
||||
|
||||
返回按 IP 聚合的数据,每个 IP 显示其关联的所有 hosts 和 ports
|
||||
|
||||
注意:由于返回的是聚合数据(字典列表),不支持 DRF SearchFilter
|
||||
"""
|
||||
|
||||
serializer_class = IPAddressAggregatedSerializer
|
||||
pagination_class = BasePagination
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = HostPortMappingService()
|
||||
|
||||
def get_queryset(self):
|
||||
"""根据是否有 target_pk 参数决定查询范围,返回按 IP 聚合的数据"""
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
search = self.request.query_params.get('search', None)
|
||||
if target_pk:
|
||||
return self.service.get_ip_aggregation_by_target(target_pk, search=search)
|
||||
return self.service.get_all_ip_aggregation(search=search)
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request):
|
||||
"""导出 IP 地址(纯文本,一行一个)"""
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if not target_pk:
|
||||
raise DRFValidationError('必须在目标下导出')
|
||||
|
||||
def line_iterator():
|
||||
for ip in self.service.iter_ips_by_target(target_pk):
|
||||
yield f"{ip}\n"
|
||||
|
||||
response = StreamingHttpResponse(
|
||||
line_iterator(),
|
||||
content_type='text/plain; charset=utf-8',
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="target-{target_pk}-ip-addresses.txt"'
|
||||
return response
|
||||
|
||||
|
||||
class VulnerabilityViewSet(viewsets.ModelViewSet):
|
||||
"""漏洞资产管理 ViewSet(只读)
|
||||
|
||||
支持两种访问方式:
|
||||
1. 嵌套路由:GET /api/targets/{target_pk}/vulnerabilities/
|
||||
2. 独立路由:GET /api/vulnerabilities/(全局查询)
|
||||
"""
|
||||
|
||||
serializer_class = VulnerabilitySerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
|
||||
search_fields = ['vuln_type']
|
||||
ordering = ['-discovered_at']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = VulnerabilityService()
|
||||
|
||||
def get_queryset(self):
|
||||
"""根据是否有 target_pk 参数决定查询范围"""
|
||||
target_pk = self.kwargs.get('target_pk')
|
||||
if target_pk:
|
||||
return self.service.get_queryset_by_target(target_pk)
|
||||
return self.service.get_all()
|
||||
|
||||
|
||||
# ==================== 快照 ViewSet(Scan 嵌套路由) ====================
|
||||
|
||||
class SubdomainSnapshotViewSet(viewsets.ModelViewSet):
|
||||
"""子域名快照 ViewSet - 嵌套路由:GET /api/scans/{scan_pk}/subdomains/"""
|
||||
|
||||
serializer_class = SubdomainSnapshotSerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
|
||||
search_fields = ['name']
|
||||
ordering_fields = ['name', 'discovered_at']
|
||||
ordering = ['-discovered_at']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = SubdomainSnapshotsService()
|
||||
|
||||
def get_queryset(self):
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if scan_pk:
|
||||
return self.service.get_by_scan(scan_pk)
|
||||
return self.service.get_all()
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request):
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if not scan_pk:
|
||||
raise DRFValidationError('必须在扫描下导出')
|
||||
|
||||
def line_iterator():
|
||||
for name in self.service.iter_subdomain_names_by_scan(scan_pk):
|
||||
yield f"{name}\n"
|
||||
|
||||
response = StreamingHttpResponse(line_iterator(), content_type='text/plain; charset=utf-8')
|
||||
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-subdomains.txt"'
|
||||
return response
|
||||
|
||||
|
||||
class WebsiteSnapshotViewSet(viewsets.ModelViewSet):
|
||||
"""网站快照 ViewSet - 嵌套路由:GET /api/scans/{scan_pk}/websites/"""
|
||||
|
||||
serializer_class = WebsiteSnapshotSerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
|
||||
search_fields = ['host']
|
||||
ordering = ['-discovered_at']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = WebsiteSnapshotsService()
|
||||
|
||||
def get_queryset(self):
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if scan_pk:
|
||||
return self.service.get_by_scan(scan_pk)
|
||||
return self.service.get_all()
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request):
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if not scan_pk:
|
||||
raise DRFValidationError('必须在扫描下导出')
|
||||
|
||||
def line_iterator():
|
||||
for url in self.service.iter_website_urls_by_scan(scan_pk):
|
||||
yield f"{url}\n"
|
||||
|
||||
response = StreamingHttpResponse(line_iterator(), content_type='text/plain; charset=utf-8')
|
||||
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-websites.txt"'
|
||||
return response
|
||||
|
||||
|
||||
class DirectorySnapshotViewSet(viewsets.ModelViewSet):
|
||||
"""目录快照 ViewSet - 嵌套路由:GET /api/scans/{scan_pk}/directories/"""
|
||||
|
||||
serializer_class = DirectorySnapshotSerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
|
||||
search_fields = ['url']
|
||||
ordering = ['-discovered_at']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = DirectorySnapshotsService()
|
||||
|
||||
def get_queryset(self):
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if scan_pk:
|
||||
return self.service.get_by_scan(scan_pk)
|
||||
return self.service.get_all()
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request):
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if not scan_pk:
|
||||
raise DRFValidationError('必须在扫描下导出')
|
||||
|
||||
def line_iterator():
|
||||
for url in self.service.iter_directory_urls_by_scan(scan_pk):
|
||||
yield f"{url}\n"
|
||||
|
||||
response = StreamingHttpResponse(line_iterator(), content_type='text/plain; charset=utf-8')
|
||||
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-directories.txt"'
|
||||
return response
|
||||
|
||||
|
||||
class EndpointSnapshotViewSet(viewsets.ModelViewSet):
|
||||
"""端点快照 ViewSet - 嵌套路由:GET /api/scans/{scan_pk}/endpoints/"""
|
||||
|
||||
serializer_class = EndpointSnapshotSerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
|
||||
search_fields = ['host']
|
||||
ordering = ['-discovered_at']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = EndpointSnapshotsService()
|
||||
|
||||
def get_queryset(self):
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if scan_pk:
|
||||
return self.service.get_by_scan(scan_pk)
|
||||
return self.service.get_all()
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request):
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if not scan_pk:
|
||||
raise DRFValidationError('必须在扫描下导出')
|
||||
|
||||
def line_iterator():
|
||||
for url in self.service.iter_endpoint_urls_by_scan(scan_pk):
|
||||
yield f"{url}\n"
|
||||
|
||||
response = StreamingHttpResponse(line_iterator(), content_type='text/plain; charset=utf-8')
|
||||
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-endpoints.txt"'
|
||||
return response
|
||||
|
||||
|
||||
class HostPortMappingSnapshotViewSet(viewsets.ModelViewSet):
|
||||
"""主机端口映射快照 ViewSet - 嵌套路由:GET /api/scans/{scan_pk}/ip-addresses/
|
||||
|
||||
注意:由于返回的是聚合数据(字典列表),不支持 DRF SearchFilter
|
||||
"""
|
||||
|
||||
serializer_class = IPAddressAggregatedSerializer
|
||||
pagination_class = BasePagination
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = HostPortMappingSnapshotsService()
|
||||
|
||||
def get_queryset(self):
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
search = self.request.query_params.get('search', None)
|
||||
if scan_pk:
|
||||
return self.service.get_ip_aggregation_by_scan(scan_pk, search=search)
|
||||
return self.service.get_all_ip_aggregation(search=search)
|
||||
|
||||
@action(detail=False, methods=['get'], url_path='export')
|
||||
def export(self, request):
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if not scan_pk:
|
||||
raise DRFValidationError('必须在扫描下导出')
|
||||
|
||||
def line_iterator():
|
||||
for ip in self.service.iter_ips_by_scan(scan_pk):
|
||||
yield f"{ip}\n"
|
||||
|
||||
response = StreamingHttpResponse(line_iterator(), content_type='text/plain; charset=utf-8')
|
||||
response['Content-Disposition'] = f'attachment; filename="scan-{scan_pk}-ip-addresses.txt"'
|
||||
return response
|
||||
|
||||
|
||||
class VulnerabilitySnapshotViewSet(viewsets.ModelViewSet):
|
||||
"""漏洞快照 ViewSet - 嵌套路由:GET /api/scans/{scan_pk}/vulnerabilities/"""
|
||||
|
||||
serializer_class = VulnerabilitySnapshotSerializer
|
||||
pagination_class = BasePagination
|
||||
filter_backends = [filters.SearchFilter, filters.OrderingFilter]
|
||||
search_fields = ['vuln_type']
|
||||
ordering = ['-discovered_at']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = VulnerabilitySnapshotsService()
|
||||
|
||||
def get_queryset(self):
|
||||
scan_pk = self.kwargs.get('scan_pk')
|
||||
if scan_pk:
|
||||
return self.service.get_by_scan(scan_pk)
|
||||
return self.service.get_all()
|
||||
40
backend/apps/asset/views/__init__.py
Normal file
40
backend/apps/asset/views/__init__.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
Asset 应用视图模块
|
||||
|
||||
重新导出所有视图类以保持向后兼容
|
||||
"""
|
||||
|
||||
from .asset_views import (
|
||||
AssetStatisticsViewSet,
|
||||
SubdomainViewSet,
|
||||
WebSiteViewSet,
|
||||
DirectoryViewSet,
|
||||
EndpointViewSet,
|
||||
HostPortMappingViewSet,
|
||||
VulnerabilityViewSet,
|
||||
SubdomainSnapshotViewSet,
|
||||
WebsiteSnapshotViewSet,
|
||||
DirectorySnapshotViewSet,
|
||||
EndpointSnapshotViewSet,
|
||||
HostPortMappingSnapshotViewSet,
|
||||
VulnerabilitySnapshotViewSet,
|
||||
)
|
||||
from .search_views import AssetSearchView, AssetSearchExportView
|
||||
|
||||
__all__ = [
|
||||
'AssetStatisticsViewSet',
|
||||
'SubdomainViewSet',
|
||||
'WebSiteViewSet',
|
||||
'DirectoryViewSet',
|
||||
'EndpointViewSet',
|
||||
'HostPortMappingViewSet',
|
||||
'VulnerabilityViewSet',
|
||||
'SubdomainSnapshotViewSet',
|
||||
'WebsiteSnapshotViewSet',
|
||||
'DirectorySnapshotViewSet',
|
||||
'EndpointSnapshotViewSet',
|
||||
'HostPortMappingSnapshotViewSet',
|
||||
'VulnerabilitySnapshotViewSet',
|
||||
'AssetSearchView',
|
||||
'AssetSearchExportView',
|
||||
]
|
||||
1079
backend/apps/asset/views/asset_views.py
Normal file
1079
backend/apps/asset/views/asset_views.py
Normal file
File diff suppressed because it is too large
Load Diff
361
backend/apps/asset/views/search_views.py
Normal file
361
backend/apps/asset/views/search_views.py
Normal file
@@ -0,0 +1,361 @@
|
||||
"""
|
||||
资产搜索 API 视图
|
||||
|
||||
提供资产搜索的 REST API 接口:
|
||||
- GET /api/assets/search/ - 搜索资产
|
||||
- GET /api/assets/search/export/ - 导出搜索结果为 CSV
|
||||
|
||||
搜索语法:
|
||||
- field="value" 模糊匹配(ILIKE %value%)
|
||||
- field=="value" 精确匹配
|
||||
- field!="value" 不等于
|
||||
- && AND 连接
|
||||
- || OR 连接
|
||||
|
||||
支持的字段:
|
||||
- host: 主机名
|
||||
- url: URL
|
||||
- title: 标题
|
||||
- tech: 技术栈
|
||||
- status: 状态码
|
||||
- body: 响应体
|
||||
- header: 响应头
|
||||
|
||||
支持的资产类型:
|
||||
- website: 站点(默认)
|
||||
- endpoint: 端点
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
from rest_framework import status
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.request import Request
|
||||
from django.db import connection
|
||||
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
from apps.asset.services.search_service import AssetSearchService, VALID_ASSET_TYPES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AssetSearchView(APIView):
|
||||
"""
|
||||
资产搜索 API
|
||||
|
||||
GET /api/assets/search/
|
||||
|
||||
Query Parameters:
|
||||
q: 搜索查询表达式
|
||||
asset_type: 资产类型 ('website' 或 'endpoint',默认 'website')
|
||||
page: 页码(从 1 开始,默认 1)
|
||||
pageSize: 每页数量(默认 10,最大 100)
|
||||
|
||||
示例查询:
|
||||
?q=host="api" && tech="nginx"
|
||||
?q=tech="vue" || tech="react"&asset_type=endpoint
|
||||
?q=status=="200" && host!="test"
|
||||
|
||||
Response:
|
||||
{
|
||||
"results": [...],
|
||||
"total": 100,
|
||||
"page": 1,
|
||||
"pageSize": 10,
|
||||
"totalPages": 10,
|
||||
"assetType": "website"
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = AssetSearchService()
|
||||
|
||||
def _parse_headers(self, headers_data) -> dict:
|
||||
"""解析响应头为字典"""
|
||||
if not headers_data:
|
||||
return {}
|
||||
try:
|
||||
return json.loads(headers_data)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
result = {}
|
||||
for line in str(headers_data).split('\n'):
|
||||
if ':' in line:
|
||||
key, value = line.split(':', 1)
|
||||
result[key.strip()] = value.strip()
|
||||
return result
|
||||
|
||||
def _format_result(self, result: dict, vulnerabilities_by_url: dict, asset_type: str) -> dict:
|
||||
"""格式化单个搜索结果"""
|
||||
url = result.get('url', '')
|
||||
vulns = vulnerabilities_by_url.get(url, [])
|
||||
|
||||
# 基础字段(Website 和 Endpoint 共有)
|
||||
formatted = {
|
||||
'id': result.get('id'),
|
||||
'url': url,
|
||||
'host': result.get('host', ''),
|
||||
'title': result.get('title', ''),
|
||||
'technologies': result.get('tech', []) or [],
|
||||
'statusCode': result.get('status_code'),
|
||||
'contentLength': result.get('content_length'),
|
||||
'contentType': result.get('content_type', ''),
|
||||
'webserver': result.get('webserver', ''),
|
||||
'location': result.get('location', ''),
|
||||
'vhost': result.get('vhost'),
|
||||
'responseHeaders': self._parse_headers(result.get('response_headers')),
|
||||
'responseBody': result.get('response_body', ''),
|
||||
'createdAt': result.get('created_at').isoformat() if result.get('created_at') else None,
|
||||
'targetId': result.get('target_id'),
|
||||
}
|
||||
|
||||
# Website 特有字段:漏洞关联
|
||||
if asset_type == 'website':
|
||||
formatted['vulnerabilities'] = [
|
||||
{
|
||||
'id': v.get('id'),
|
||||
'name': v.get('vuln_type', ''),
|
||||
'vulnType': v.get('vuln_type', ''),
|
||||
'severity': v.get('severity', 'info'),
|
||||
}
|
||||
for v in vulns
|
||||
]
|
||||
|
||||
# Endpoint 特有字段
|
||||
if asset_type == 'endpoint':
|
||||
formatted['matchedGfPatterns'] = result.get('matched_gf_patterns', []) or []
|
||||
|
||||
return formatted
|
||||
|
||||
def _get_vulnerabilities_by_url_prefix(self, website_urls: list) -> dict:
|
||||
"""
|
||||
根据 URL 前缀批量查询漏洞数据
|
||||
|
||||
漏洞 URL 是 website URL 的子路径,使用前缀匹配:
|
||||
- website.url: https://example.com/path?query=1
|
||||
- vulnerability.url: https://example.com/path/api/users
|
||||
|
||||
Args:
|
||||
website_urls: website URL 列表,格式为 [(url, target_id), ...]
|
||||
|
||||
Returns:
|
||||
dict: {website_url: [vulnerability_list]}
|
||||
"""
|
||||
if not website_urls:
|
||||
return {}
|
||||
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
# 构建 OR 条件:每个 website URL(去掉查询参数)作为前缀匹配
|
||||
conditions = []
|
||||
params = []
|
||||
url_mapping = {} # base_url -> original_url
|
||||
|
||||
for url, target_id in website_urls:
|
||||
if not url or target_id is None:
|
||||
continue
|
||||
# 使用 urlparse 去掉查询参数和片段,只保留 scheme://netloc/path
|
||||
parsed = urlparse(url)
|
||||
base_url = urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
|
||||
url_mapping[base_url] = url
|
||||
conditions.append("(v.url LIKE %s AND v.target_id = %s)")
|
||||
params.extend([base_url + '%', target_id])
|
||||
|
||||
if not conditions:
|
||||
return {}
|
||||
|
||||
where_clause = " OR ".join(conditions)
|
||||
|
||||
sql = f"""
|
||||
SELECT v.id, v.vuln_type, v.severity, v.url, v.target_id
|
||||
FROM vulnerability v
|
||||
WHERE {where_clause}
|
||||
ORDER BY
|
||||
CASE v.severity
|
||||
WHEN 'critical' THEN 1
|
||||
WHEN 'high' THEN 2
|
||||
WHEN 'medium' THEN 3
|
||||
WHEN 'low' THEN 4
|
||||
ELSE 5
|
||||
END
|
||||
"""
|
||||
cursor.execute(sql, params)
|
||||
|
||||
# 获取所有漏洞
|
||||
all_vulns = []
|
||||
for row in cursor.fetchall():
|
||||
all_vulns.append({
|
||||
'id': row[0],
|
||||
'vuln_type': row[1],
|
||||
'name': row[1],
|
||||
'severity': row[2],
|
||||
'url': row[3],
|
||||
'target_id': row[4],
|
||||
})
|
||||
|
||||
# 按原始 website URL 分组(用于返回结果)
|
||||
result = {url: [] for url, _ in website_urls}
|
||||
for vuln in all_vulns:
|
||||
vuln_url = vuln['url']
|
||||
# 找到匹配的 website URL(最长前缀匹配)
|
||||
for website_url, target_id in website_urls:
|
||||
parsed = urlparse(website_url)
|
||||
base_url = urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
|
||||
if vuln_url.startswith(base_url) and vuln['target_id'] == target_id:
|
||||
result[website_url].append(vuln)
|
||||
break
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"批量查询漏洞失败: {e}")
|
||||
return {}
|
||||
|
||||
def get(self, request: Request):
|
||||
"""搜索资产"""
|
||||
# 获取搜索查询
|
||||
query = request.query_params.get('q', '').strip()
|
||||
|
||||
if not query:
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Search query (q) is required',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 获取并验证资产类型
|
||||
asset_type = request.query_params.get('asset_type', 'website').strip().lower()
|
||||
if asset_type not in VALID_ASSET_TYPES:
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message=f'Invalid asset_type. Must be one of: {", ".join(VALID_ASSET_TYPES)}',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 获取分页参数
|
||||
try:
|
||||
page = int(request.query_params.get('page', 1))
|
||||
page_size = int(request.query_params.get('pageSize', 10))
|
||||
except (ValueError, TypeError):
|
||||
page = 1
|
||||
page_size = 10
|
||||
|
||||
# 限制分页参数
|
||||
page = max(1, page)
|
||||
page_size = min(max(1, page_size), 100)
|
||||
|
||||
# 获取总数和搜索结果
|
||||
total = self.service.count(query, asset_type)
|
||||
total_pages = (total + page_size - 1) // page_size if total > 0 else 1
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
all_results = self.service.search(query, asset_type)
|
||||
results = all_results[offset:offset + page_size]
|
||||
|
||||
# 批量查询漏洞数据(仅 Website 类型需要)
|
||||
vulnerabilities_by_url = {}
|
||||
if asset_type == 'website':
|
||||
website_urls = [(r.get('url'), r.get('target_id')) for r in results if r.get('url') and r.get('target_id')]
|
||||
vulnerabilities_by_url = self._get_vulnerabilities_by_url_prefix(website_urls) if website_urls else {}
|
||||
|
||||
# 格式化结果
|
||||
formatted_results = [self._format_result(r, vulnerabilities_by_url, asset_type) for r in results]
|
||||
|
||||
return success_response(data={
|
||||
'results': formatted_results,
|
||||
'total': total,
|
||||
'page': page,
|
||||
'pageSize': page_size,
|
||||
'totalPages': total_pages,
|
||||
'assetType': asset_type,
|
||||
})
|
||||
|
||||
|
||||
class AssetSearchExportView(APIView):
|
||||
"""
|
||||
资产搜索导出 API
|
||||
|
||||
GET /api/assets/search/export/
|
||||
|
||||
Query Parameters:
|
||||
q: 搜索查询表达式
|
||||
asset_type: 资产类型 ('website' 或 'endpoint',默认 'website')
|
||||
|
||||
Response:
|
||||
CSV 文件(带 Content-Length,支持浏览器显示下载进度)
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = AssetSearchService()
|
||||
|
||||
def _get_headers_and_formatters(self, asset_type: str):
|
||||
"""获取 CSV 表头和格式化器"""
|
||||
from apps.common.utils import format_datetime, format_list_field
|
||||
|
||||
if asset_type == 'website':
|
||||
headers = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length',
|
||||
'webserver', 'location', 'tech', 'vhost', 'created_at']
|
||||
else:
|
||||
headers = ['url', 'host', 'title', 'status_code', 'content_type', 'content_length',
|
||||
'webserver', 'location', 'tech', 'matched_gf_patterns', 'vhost', 'created_at']
|
||||
|
||||
formatters = {
|
||||
'created_at': format_datetime,
|
||||
'tech': lambda x: format_list_field(x, separator='; '),
|
||||
'matched_gf_patterns': lambda x: format_list_field(x, separator='; '),
|
||||
'vhost': lambda x: 'true' if x else ('false' if x is False else ''),
|
||||
}
|
||||
|
||||
return headers, formatters
|
||||
|
||||
def get(self, request: Request):
|
||||
"""导出搜索结果为 CSV(带 Content-Length,支持下载进度显示)"""
|
||||
from apps.common.utils import create_csv_export_response
|
||||
|
||||
# 获取搜索查询
|
||||
query = request.query_params.get('q', '').strip()
|
||||
|
||||
if not query:
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Search query (q) is required',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 获取并验证资产类型
|
||||
asset_type = request.query_params.get('asset_type', 'website').strip().lower()
|
||||
if asset_type not in VALID_ASSET_TYPES:
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message=f'Invalid asset_type. Must be one of: {", ".join(VALID_ASSET_TYPES)}',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# 检查是否有结果(快速检查,避免空导出)
|
||||
total = self.service.count(query, asset_type)
|
||||
if total == 0:
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message='No results to export',
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
# 获取表头和格式化器
|
||||
headers, formatters = self._get_headers_and_formatters(asset_type)
|
||||
|
||||
# 生成文件名
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
filename = f'search_{asset_type}_{timestamp}.csv'
|
||||
|
||||
# 使用通用导出工具
|
||||
data_iterator = self.service.search_iter(query, asset_type)
|
||||
return create_csv_export_response(
|
||||
data_iterator=data_iterator,
|
||||
headers=headers,
|
||||
filename=filename,
|
||||
field_formatters=formatters,
|
||||
show_progress=True # 显示下载进度
|
||||
)
|
||||
@@ -14,6 +14,10 @@ import os
|
||||
import sys
|
||||
import requests
|
||||
import logging
|
||||
import urllib3
|
||||
|
||||
# 禁用自签名证书的 SSL 警告(远程 Worker 场景)
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -30,21 +34,34 @@ def fetch_config_and_setup_django():
|
||||
print("[ERROR] 缺少 SERVER_URL 环境变量", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
config_url = f"{server_url}/api/workers/config/"
|
||||
# 通过环境变量声明 Worker 身份(本地/远程)
|
||||
is_local = os.environ.get("IS_LOCAL", "false").lower() == "true"
|
||||
config_url = f"{server_url}/api/workers/config/?is_local={str(is_local).lower()}"
|
||||
print(f"[CONFIG] 正在从配置中心获取配置: {config_url}")
|
||||
print(f"[CONFIG] IS_LOCAL={is_local}")
|
||||
try:
|
||||
resp = requests.get(config_url, timeout=10)
|
||||
# 构建请求头(包含 Worker API Key)
|
||||
headers = {}
|
||||
worker_api_key = os.environ.get("WORKER_API_KEY", "")
|
||||
if worker_api_key:
|
||||
headers["X-Worker-API-Key"] = worker_api_key
|
||||
|
||||
# verify=False: 远程 Worker 通过 HTTPS 访问时可能使用自签名证书
|
||||
resp = requests.get(config_url, headers=headers, timeout=10, verify=False)
|
||||
resp.raise_for_status()
|
||||
config = resp.json()
|
||||
|
||||
# 数据库配置(必需)
|
||||
os.environ.setdefault("DB_HOST", config['db']['host'])
|
||||
os.environ.setdefault("DB_PORT", config['db']['port'])
|
||||
os.environ.setdefault("DB_NAME", config['db']['name'])
|
||||
os.environ.setdefault("DB_USER", config['db']['user'])
|
||||
os.environ.setdefault("DB_PASSWORD", config['db']['password'])
|
||||
db_host = config['db']['host']
|
||||
db_port = config['db']['port']
|
||||
db_name = config['db']['name']
|
||||
db_user = config['db']['user']
|
||||
|
||||
# Redis 配置
|
||||
os.environ.setdefault("REDIS_URL", config['redisUrl'])
|
||||
os.environ.setdefault("DB_HOST", db_host)
|
||||
os.environ.setdefault("DB_PORT", db_port)
|
||||
os.environ.setdefault("DB_NAME", db_name)
|
||||
os.environ.setdefault("DB_USER", db_user)
|
||||
os.environ.setdefault("DB_PASSWORD", config['db']['password'])
|
||||
|
||||
# 日志配置
|
||||
os.environ.setdefault("LOG_DIR", config['paths']['logs'])
|
||||
@@ -52,7 +69,11 @@ def fetch_config_and_setup_django():
|
||||
os.environ.setdefault("ENABLE_COMMAND_LOGGING", str(config['logging']['enableCommandLogging']).lower())
|
||||
os.environ.setdefault("DEBUG", str(config['debug']))
|
||||
|
||||
print(f"[CONFIG] 从配置中心获取配置成功: {config_url}")
|
||||
print(f"[CONFIG] ✓ 配置获取成功")
|
||||
print(f"[CONFIG] DB_HOST: {db_host}")
|
||||
print(f"[CONFIG] DB_PORT: {db_port}")
|
||||
print(f"[CONFIG] DB_NAME: {db_name}")
|
||||
print(f"[CONFIG] DB_USER: {db_user}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] 获取配置失败: {config_url} - {e}", file=sys.stderr)
|
||||
|
||||
31
backend/apps/common/error_codes.py
Normal file
31
backend/apps/common/error_codes.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""
|
||||
标准化错误码定义
|
||||
|
||||
采用简化方案(参考 Stripe、GitHub 等大厂做法):
|
||||
- 只定义 5-10 个通用错误码
|
||||
- 未知错误使用通用错误码
|
||||
- 错误码格式:大写字母和下划线组成
|
||||
"""
|
||||
|
||||
|
||||
class ErrorCodes:
|
||||
"""标准化错误码
|
||||
|
||||
只定义通用错误码,其他错误使用通用消息。
|
||||
这是 Stripe、GitHub 等大厂的标准做法。
|
||||
|
||||
错误码格式规范:
|
||||
- 使用大写字母和下划线
|
||||
- 简洁明了,易于理解
|
||||
- 前端通过错误码映射到 i18n 键
|
||||
"""
|
||||
|
||||
# 通用错误码(8 个)
|
||||
VALIDATION_ERROR = 'VALIDATION_ERROR' # 输入验证失败
|
||||
NOT_FOUND = 'NOT_FOUND' # 资源未找到
|
||||
PERMISSION_DENIED = 'PERMISSION_DENIED' # 权限不足
|
||||
SERVER_ERROR = 'SERVER_ERROR' # 服务器内部错误
|
||||
BAD_REQUEST = 'BAD_REQUEST' # 请求格式错误
|
||||
CONFLICT = 'CONFLICT' # 资源冲突(如重复创建)
|
||||
UNAUTHORIZED = 'UNAUTHORIZED' # 未认证
|
||||
RATE_LIMITED = 'RATE_LIMITED' # 请求过于频繁
|
||||
49
backend/apps/common/exception_handlers.py
Normal file
49
backend/apps/common/exception_handlers.py
Normal file
@@ -0,0 +1,49 @@
|
||||
"""
|
||||
自定义异常处理器
|
||||
|
||||
统一处理 DRF 异常,确保错误响应格式一致
|
||||
"""
|
||||
|
||||
from rest_framework.views import exception_handler
|
||||
from rest_framework import status
|
||||
from rest_framework.exceptions import AuthenticationFailed, NotAuthenticated
|
||||
|
||||
from apps.common.response_helpers import error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
|
||||
|
||||
def custom_exception_handler(exc, context):
|
||||
"""
|
||||
自定义异常处理器
|
||||
|
||||
处理认证相关异常,返回统一格式的错误响应
|
||||
"""
|
||||
# 先调用 DRF 默认的异常处理器
|
||||
response = exception_handler(exc, context)
|
||||
|
||||
if response is not None:
|
||||
# 处理 401 未认证错误
|
||||
if response.status_code == status.HTTP_401_UNAUTHORIZED:
|
||||
return error_response(
|
||||
code=ErrorCodes.UNAUTHORIZED,
|
||||
message='Authentication required',
|
||||
status_code=status.HTTP_401_UNAUTHORIZED
|
||||
)
|
||||
|
||||
# 处理 403 权限不足错误
|
||||
if response.status_code == status.HTTP_403_FORBIDDEN:
|
||||
return error_response(
|
||||
code=ErrorCodes.PERMISSION_DENIED,
|
||||
message='Permission denied',
|
||||
status_code=status.HTTP_403_FORBIDDEN
|
||||
)
|
||||
|
||||
# 处理 NotAuthenticated 和 AuthenticationFailed 异常
|
||||
if isinstance(exc, (NotAuthenticated, AuthenticationFailed)):
|
||||
return error_response(
|
||||
code=ErrorCodes.UNAUTHORIZED,
|
||||
message='Authentication required',
|
||||
status_code=status.HTTP_401_UNAUTHORIZED
|
||||
)
|
||||
|
||||
return response
|
||||
34
backend/apps/common/migrations/0001_initial.py
Normal file
34
backend/apps/common/migrations/0001_initial.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# Generated by Django 5.2.7 on 2026-01-06 00:55
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('targets', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='BlacklistRule',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('pattern', models.CharField(help_text='规则模式,如 *.gov, 10.0.0.0/8, 192.168.1.1', max_length=255)),
|
||||
('rule_type', models.CharField(choices=[('domain', '域名'), ('ip', 'IP地址'), ('cidr', 'CIDR范围'), ('keyword', '关键词')], help_text='规则类型:domain, ip, cidr', max_length=20)),
|
||||
('scope', models.CharField(choices=[('global', '全局规则'), ('target', 'Target规则')], db_index=True, help_text='作用域:global 或 target', max_length=20)),
|
||||
('description', models.CharField(blank=True, default='', help_text='规则描述', max_length=500)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('target', models.ForeignKey(blank=True, help_text='关联的 Target(仅 scope=target 时有值)', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='blacklist_rules', to='targets.target')),
|
||||
],
|
||||
options={
|
||||
'db_table': 'blacklist_rule',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['scope', 'rule_type'], name='blacklist_r_scope_6ff77f_idx'), models.Index(fields=['target', 'scope'], name='blacklist_r_target__191441_idx')],
|
||||
'constraints': [models.UniqueConstraint(fields=('pattern', 'scope', 'target'), name='unique_blacklist_rule')],
|
||||
},
|
||||
),
|
||||
]
|
||||
0
backend/apps/common/migrations/__init__.py
Normal file
0
backend/apps/common/migrations/__init__.py
Normal file
4
backend/apps/common/models/__init__.py
Normal file
4
backend/apps/common/models/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
"""Common models"""
|
||||
from apps.common.models.blacklist import BlacklistRule
|
||||
|
||||
__all__ = ['BlacklistRule']
|
||||
71
backend/apps/common/models/blacklist.py
Normal file
71
backend/apps/common/models/blacklist.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""黑名单规则模型"""
|
||||
from django.db import models
|
||||
|
||||
|
||||
class BlacklistRule(models.Model):
|
||||
"""黑名单规则模型
|
||||
|
||||
用于存储黑名单过滤规则,支持域名、IP、CIDR 三种类型。
|
||||
支持两层作用域:全局规则和 Target 级规则。
|
||||
"""
|
||||
|
||||
class RuleType(models.TextChoices):
|
||||
DOMAIN = 'domain', '域名'
|
||||
IP = 'ip', 'IP地址'
|
||||
CIDR = 'cidr', 'CIDR范围'
|
||||
KEYWORD = 'keyword', '关键词'
|
||||
|
||||
class Scope(models.TextChoices):
|
||||
GLOBAL = 'global', '全局规则'
|
||||
TARGET = 'target', 'Target规则'
|
||||
|
||||
id = models.AutoField(primary_key=True)
|
||||
pattern = models.CharField(
|
||||
max_length=255,
|
||||
help_text='规则模式,如 *.gov, 10.0.0.0/8, 192.168.1.1'
|
||||
)
|
||||
rule_type = models.CharField(
|
||||
max_length=20,
|
||||
choices=RuleType.choices,
|
||||
help_text='规则类型:domain, ip, cidr'
|
||||
)
|
||||
scope = models.CharField(
|
||||
max_length=20,
|
||||
choices=Scope.choices,
|
||||
db_index=True,
|
||||
help_text='作用域:global 或 target'
|
||||
)
|
||||
target = models.ForeignKey(
|
||||
'targets.Target',
|
||||
on_delete=models.CASCADE,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name='blacklist_rules',
|
||||
help_text='关联的 Target(仅 scope=target 时有值)'
|
||||
)
|
||||
description = models.CharField(
|
||||
max_length=500,
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='规则描述'
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = 'blacklist_rule'
|
||||
indexes = [
|
||||
models.Index(fields=['scope', 'rule_type']),
|
||||
models.Index(fields=['target', 'scope']),
|
||||
]
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=['pattern', 'scope', 'target'],
|
||||
name='unique_blacklist_rule'
|
||||
),
|
||||
]
|
||||
ordering = ['-created_at']
|
||||
|
||||
def __str__(self):
|
||||
if self.scope == self.Scope.TARGET and self.target:
|
||||
return f"[{self.scope}:{self.target_id}] {self.pattern}"
|
||||
return f"[{self.scope}] {self.pattern}"
|
||||
80
backend/apps/common/permissions.py
Normal file
80
backend/apps/common/permissions.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
集中式权限管理
|
||||
|
||||
实现三类端点的认证逻辑:
|
||||
1. 公开端点(无需认证):登录、登出、获取当前用户状态
|
||||
2. Worker 端点(API Key 认证):注册、配置、心跳、回调、资源同步
|
||||
3. 业务端点(Session 认证):其他所有 API
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from django.conf import settings
|
||||
from rest_framework.permissions import BasePermission
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 公开端点白名单(无需任何认证)
|
||||
PUBLIC_ENDPOINTS = [
|
||||
r'^/api/auth/login/$',
|
||||
r'^/api/auth/logout/$',
|
||||
r'^/api/auth/me/$',
|
||||
]
|
||||
|
||||
# Worker API 端点(需要 API Key 认证)
|
||||
# 包括:注册、配置、心跳、回调、资源同步(字典下载)
|
||||
WORKER_ENDPOINTS = [
|
||||
r'^/api/workers/register/$',
|
||||
r'^/api/workers/config/$',
|
||||
r'^/api/workers/\d+/heartbeat/$',
|
||||
r'^/api/callbacks/',
|
||||
# 资源同步端点(Worker 需要下载字典文件)
|
||||
r'^/api/wordlists/download/$',
|
||||
# 注意:指纹导出 API 使用 Session 认证(前端用户导出用)
|
||||
# Worker 通过数据库直接获取指纹数据,不需要 HTTP API
|
||||
]
|
||||
|
||||
|
||||
class IsAuthenticatedOrPublic(BasePermission):
|
||||
"""
|
||||
自定义权限类:
|
||||
- 白名单内的端点公开访问
|
||||
- Worker 端点需要 API Key 认证
|
||||
- 其他端点需要 Session 认证
|
||||
"""
|
||||
|
||||
def has_permission(self, request, view):
|
||||
path = request.path
|
||||
|
||||
# 检查是否在公开白名单内
|
||||
for pattern in PUBLIC_ENDPOINTS:
|
||||
if re.match(pattern, path):
|
||||
return True
|
||||
|
||||
# 检查是否是 Worker 端点
|
||||
for pattern in WORKER_ENDPOINTS:
|
||||
if re.match(pattern, path):
|
||||
return self._check_worker_api_key(request)
|
||||
|
||||
# 其他路径需要 Session 认证
|
||||
return request.user and request.user.is_authenticated
|
||||
|
||||
def _check_worker_api_key(self, request):
|
||||
"""验证 Worker API Key"""
|
||||
api_key = request.headers.get('X-Worker-API-Key')
|
||||
expected_key = getattr(settings, 'WORKER_API_KEY', None)
|
||||
|
||||
if not expected_key:
|
||||
# 未配置 API Key 时,拒绝所有 Worker 请求
|
||||
logger.warning("WORKER_API_KEY 未配置,拒绝 Worker 请求")
|
||||
return False
|
||||
|
||||
if not api_key:
|
||||
logger.warning(f"Worker 请求缺少 X-Worker-API-Key Header: {request.path}")
|
||||
return False
|
||||
|
||||
if api_key != expected_key:
|
||||
logger.warning(f"Worker API Key 无效: {request.path}")
|
||||
return False
|
||||
|
||||
return True
|
||||
@@ -16,6 +16,7 @@ def setup_django_for_prefect():
|
||||
1. 添加项目根目录到 Python 路径
|
||||
2. 设置 DJANGO_SETTINGS_MODULE 环境变量
|
||||
3. 调用 django.setup() 初始化 Django
|
||||
4. 关闭旧的数据库连接,确保使用新连接
|
||||
|
||||
使用方式:
|
||||
from apps.common.prefect_django_setup import setup_django_for_prefect
|
||||
@@ -36,6 +37,25 @@ def setup_django_for_prefect():
|
||||
# 初始化 Django
|
||||
import django
|
||||
django.setup()
|
||||
|
||||
# 关闭所有旧的数据库连接,确保 Worker 进程使用新连接
|
||||
# 解决 "server closed the connection unexpectedly" 问题
|
||||
from django.db import connections
|
||||
connections.close_all()
|
||||
|
||||
|
||||
def close_old_db_connections():
|
||||
"""
|
||||
关闭旧的数据库连接
|
||||
|
||||
在长时间运行的任务中调用此函数,可以确保使用有效的数据库连接。
|
||||
适用于:
|
||||
- Flow 开始前
|
||||
- Task 开始前
|
||||
- 长时间空闲后恢复操作前
|
||||
"""
|
||||
from django.db import connections
|
||||
connections.close_all()
|
||||
|
||||
|
||||
# 自动执行初始化(导入即生效)
|
||||
|
||||
88
backend/apps/common/response_helpers.py
Normal file
88
backend/apps/common/response_helpers.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""
|
||||
标准化 API 响应辅助函数
|
||||
|
||||
遵循行业标准(RFC 9457 Problem Details)和大厂实践(Google、Stripe、GitHub):
|
||||
- 成功响应只包含数据,不包含 message 字段
|
||||
- 错误响应使用机器可读的错误码,前端映射到 i18n 消息
|
||||
"""
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from rest_framework import status
|
||||
from rest_framework.response import Response
|
||||
|
||||
|
||||
def success_response(
|
||||
data: Optional[Union[Dict[str, Any], List[Any]]] = None,
|
||||
status_code: int = status.HTTP_200_OK
|
||||
) -> Response:
|
||||
"""
|
||||
标准化成功响应
|
||||
|
||||
直接返回数据,不做包装,符合 Stripe/GitHub 等大厂标准。
|
||||
|
||||
Args:
|
||||
data: 响应数据(dict 或 list)
|
||||
status_code: HTTP 状态码,默认 200
|
||||
|
||||
Returns:
|
||||
Response: DRF Response 对象
|
||||
|
||||
Examples:
|
||||
# 单个资源
|
||||
>>> success_response(data={'id': 1, 'name': 'Test'})
|
||||
{'id': 1, 'name': 'Test'}
|
||||
|
||||
# 操作结果
|
||||
>>> success_response(data={'count': 3, 'scans': [...]})
|
||||
{'count': 3, 'scans': [...]}
|
||||
|
||||
# 创建资源
|
||||
>>> success_response(data={'id': 1}, status_code=201)
|
||||
"""
|
||||
# 注意:不能使用 data or {},因为空列表 [] 会被转换为 {}
|
||||
if data is None:
|
||||
data = {}
|
||||
return Response(data, status=status_code)
|
||||
|
||||
|
||||
def error_response(
|
||||
code: str,
|
||||
message: Optional[str] = None,
|
||||
details: Optional[List[Dict[str, Any]]] = None,
|
||||
status_code: int = status.HTTP_400_BAD_REQUEST
|
||||
) -> Response:
|
||||
"""
|
||||
标准化错误响应
|
||||
|
||||
Args:
|
||||
code: 错误码(如 'VALIDATION_ERROR', 'NOT_FOUND')
|
||||
格式:大写字母和下划线组成
|
||||
message: 开发者调试信息(非用户显示)
|
||||
details: 详细错误信息(如字段级验证错误)
|
||||
status_code: HTTP 状态码,默认 400
|
||||
|
||||
Returns:
|
||||
Response: DRF Response 对象
|
||||
|
||||
Examples:
|
||||
# 简单错误
|
||||
>>> error_response(code='NOT_FOUND', status_code=404)
|
||||
{'error': {'code': 'NOT_FOUND'}}
|
||||
|
||||
# 带调试信息
|
||||
>>> error_response(
|
||||
... code='VALIDATION_ERROR',
|
||||
... message='Invalid input data',
|
||||
... details=[{'field': 'name', 'message': 'Required'}]
|
||||
... )
|
||||
{'error': {'code': 'VALIDATION_ERROR', 'message': '...', 'details': [...]}}
|
||||
"""
|
||||
error_body: Dict[str, Any] = {'code': code}
|
||||
|
||||
if message:
|
||||
error_body['message'] = message
|
||||
|
||||
if details:
|
||||
error_body['details'] = details
|
||||
|
||||
return Response({'error': error_body}, status=status_code)
|
||||
12
backend/apps/common/serializers/__init__.py
Normal file
12
backend/apps/common/serializers/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""Common serializers"""
|
||||
from .blacklist_serializers import (
|
||||
BlacklistRuleSerializer,
|
||||
GlobalBlacklistRuleSerializer,
|
||||
TargetBlacklistRuleSerializer,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'BlacklistRuleSerializer',
|
||||
'GlobalBlacklistRuleSerializer',
|
||||
'TargetBlacklistRuleSerializer',
|
||||
]
|
||||
68
backend/apps/common/serializers/blacklist_serializers.py
Normal file
68
backend/apps/common/serializers/blacklist_serializers.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""黑名单规则序列化器"""
|
||||
from rest_framework import serializers
|
||||
|
||||
from apps.common.models import BlacklistRule
|
||||
from apps.common.utils import detect_rule_type
|
||||
|
||||
|
||||
class BlacklistRuleSerializer(serializers.ModelSerializer):
|
||||
"""黑名单规则序列化器"""
|
||||
|
||||
class Meta:
|
||||
model = BlacklistRule
|
||||
fields = [
|
||||
'id',
|
||||
'pattern',
|
||||
'rule_type',
|
||||
'scope',
|
||||
'target',
|
||||
'description',
|
||||
'created_at',
|
||||
]
|
||||
read_only_fields = ['id', 'rule_type', 'created_at']
|
||||
|
||||
def validate_pattern(self, value):
|
||||
"""验证规则模式"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("规则模式不能为空")
|
||||
return value.strip()
|
||||
|
||||
def create(self, validated_data):
|
||||
"""创建规则时自动识别规则类型"""
|
||||
pattern = validated_data.get('pattern', '')
|
||||
validated_data['rule_type'] = detect_rule_type(pattern)
|
||||
return super().create(validated_data)
|
||||
|
||||
def update(self, instance, validated_data):
|
||||
"""更新规则时重新识别规则类型"""
|
||||
if 'pattern' in validated_data:
|
||||
pattern = validated_data['pattern']
|
||||
validated_data['rule_type'] = detect_rule_type(pattern)
|
||||
return super().update(instance, validated_data)
|
||||
|
||||
|
||||
class GlobalBlacklistRuleSerializer(BlacklistRuleSerializer):
|
||||
"""全局黑名单规则序列化器"""
|
||||
|
||||
class Meta(BlacklistRuleSerializer.Meta):
|
||||
fields = ['id', 'pattern', 'rule_type', 'description', 'created_at']
|
||||
read_only_fields = ['id', 'rule_type', 'created_at']
|
||||
|
||||
def create(self, validated_data):
|
||||
"""创建全局规则"""
|
||||
validated_data['scope'] = BlacklistRule.Scope.GLOBAL
|
||||
validated_data['target'] = None
|
||||
return super().create(validated_data)
|
||||
|
||||
|
||||
class TargetBlacklistRuleSerializer(BlacklistRuleSerializer):
|
||||
"""Target 黑名单规则序列化器"""
|
||||
|
||||
class Meta(BlacklistRuleSerializer.Meta):
|
||||
fields = ['id', 'pattern', 'rule_type', 'description', 'created_at']
|
||||
read_only_fields = ['id', 'rule_type', 'created_at']
|
||||
|
||||
def create(self, validated_data):
|
||||
"""创建 Target 规则(target_id 由 view 设置)"""
|
||||
validated_data['scope'] = BlacklistRule.Scope.TARGET
|
||||
return super().create(validated_data)
|
||||
18
backend/apps/common/services/__init__.py
Normal file
18
backend/apps/common/services/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
通用服务模块
|
||||
|
||||
提供系统级别的公共服务,包括:
|
||||
- SystemLogService: 系统日志读取服务
|
||||
- BlacklistService: 黑名单过滤服务
|
||||
|
||||
注意:FilterService 已移至 apps.common.utils.filter_utils
|
||||
推荐使用: from apps.common.utils.filter_utils import apply_filters
|
||||
"""
|
||||
|
||||
from .system_log_service import SystemLogService
|
||||
from .blacklist_service import BlacklistService
|
||||
|
||||
__all__ = [
|
||||
'SystemLogService',
|
||||
'BlacklistService',
|
||||
]
|
||||
176
backend/apps/common/services/blacklist_service.py
Normal file
176
backend/apps/common/services/blacklist_service.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""
|
||||
黑名单规则管理服务
|
||||
|
||||
负责黑名单规则的 CRUD 操作(数据库层面)。
|
||||
过滤逻辑请使用 apps.common.utils.BlacklistFilter。
|
||||
|
||||
架构说明:
|
||||
- Model: BlacklistRule (apps.common.models.blacklist)
|
||||
- Service: BlacklistService (本文件) - 规则 CRUD
|
||||
- Utils: BlacklistFilter (apps.common.utils.blacklist_filter) - 过滤逻辑
|
||||
- View: GlobalBlacklistView, TargetViewSet.blacklist
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from django.db.models import QuerySet
|
||||
|
||||
from apps.common.utils import detect_rule_type
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _normalize_patterns(patterns: List[str]) -> List[str]:
|
||||
"""
|
||||
规范化规则列表:去重 + 过滤空行
|
||||
|
||||
Args:
|
||||
patterns: 原始规则列表
|
||||
|
||||
Returns:
|
||||
List[str]: 去重后的规则列表(保持顺序)
|
||||
"""
|
||||
return list(dict.fromkeys(filter(None, (p.strip() for p in patterns))))
|
||||
|
||||
|
||||
class BlacklistService:
|
||||
"""
|
||||
黑名单规则管理服务
|
||||
|
||||
只负责规则的 CRUD 操作,不包含过滤逻辑。
|
||||
过滤逻辑请使用 BlacklistFilter 工具类。
|
||||
"""
|
||||
|
||||
def get_global_rules(self) -> QuerySet:
|
||||
"""
|
||||
获取全局黑名单规则列表
|
||||
|
||||
Returns:
|
||||
QuerySet: 全局规则查询集
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
return BlacklistRule.objects.filter(scope=BlacklistRule.Scope.GLOBAL)
|
||||
|
||||
def get_target_rules(self, target_id: int) -> QuerySet:
|
||||
"""
|
||||
获取 Target 级黑名单规则列表
|
||||
|
||||
Args:
|
||||
target_id: Target ID
|
||||
|
||||
Returns:
|
||||
QuerySet: Target 级规则查询集
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
return BlacklistRule.objects.filter(
|
||||
scope=BlacklistRule.Scope.TARGET,
|
||||
target_id=target_id
|
||||
)
|
||||
|
||||
def get_rules(self, target_id: Optional[int] = None) -> List:
|
||||
"""
|
||||
获取黑名单规则(全局 + Target 级)
|
||||
|
||||
Args:
|
||||
target_id: Target ID,用于加载 Target 级规则
|
||||
|
||||
Returns:
|
||||
List[BlacklistRule]: 规则列表
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
|
||||
# 加载全局规则
|
||||
rules = list(BlacklistRule.objects.filter(scope=BlacklistRule.Scope.GLOBAL))
|
||||
|
||||
# 加载 Target 级规则
|
||||
if target_id:
|
||||
target_rules = BlacklistRule.objects.filter(
|
||||
scope=BlacklistRule.Scope.TARGET,
|
||||
target_id=target_id
|
||||
)
|
||||
rules.extend(target_rules)
|
||||
|
||||
return rules
|
||||
|
||||
def replace_global_rules(self, patterns: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
全量替换全局黑名单规则(PUT 语义)
|
||||
|
||||
Args:
|
||||
patterns: 新的规则模式列表
|
||||
|
||||
Returns:
|
||||
Dict: {'count': int} 最终规则数量
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
|
||||
count = self._replace_rules(
|
||||
patterns=patterns,
|
||||
scope=BlacklistRule.Scope.GLOBAL,
|
||||
target=None
|
||||
)
|
||||
|
||||
logger.info("全量替换全局黑名单规则: %d 条", count)
|
||||
return {'count': count}
|
||||
|
||||
def replace_target_rules(self, target, patterns: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
全量替换 Target 级黑名单规则(PUT 语义)
|
||||
|
||||
Args:
|
||||
target: Target 对象
|
||||
patterns: 新的规则模式列表
|
||||
|
||||
Returns:
|
||||
Dict: {'count': int} 最终规则数量
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
|
||||
count = self._replace_rules(
|
||||
patterns=patterns,
|
||||
scope=BlacklistRule.Scope.TARGET,
|
||||
target=target
|
||||
)
|
||||
|
||||
logger.info("全量替换 Target 黑名单规则: %d 条 (Target: %s)", count, target.name)
|
||||
return {'count': count}
|
||||
|
||||
def _replace_rules(self, patterns: List[str], scope: str, target=None) -> int:
|
||||
"""
|
||||
内部方法:全量替换规则
|
||||
|
||||
Args:
|
||||
patterns: 规则模式列表
|
||||
scope: 规则作用域 (GLOBAL/TARGET)
|
||||
target: Target 对象(仅 TARGET 作用域需要)
|
||||
|
||||
Returns:
|
||||
int: 最终规则数量
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
from django.db import transaction
|
||||
|
||||
patterns = _normalize_patterns(patterns)
|
||||
|
||||
with transaction.atomic():
|
||||
# 1. 删除旧规则
|
||||
delete_filter = {'scope': scope}
|
||||
if target:
|
||||
delete_filter['target'] = target
|
||||
BlacklistRule.objects.filter(**delete_filter).delete()
|
||||
|
||||
# 2. 创建新规则
|
||||
if patterns:
|
||||
rules = [
|
||||
BlacklistRule(
|
||||
pattern=pattern,
|
||||
rule_type=detect_rule_type(pattern),
|
||||
scope=scope,
|
||||
target=target
|
||||
)
|
||||
for pattern in patterns
|
||||
]
|
||||
BlacklistRule.objects.bulk_create(rules)
|
||||
|
||||
return len(patterns)
|
||||
190
backend/apps/common/services/system_log_service.py
Normal file
190
backend/apps/common/services/system_log_service.py
Normal file
@@ -0,0 +1,190 @@
|
||||
"""
|
||||
系统日志服务模块
|
||||
|
||||
提供系统日志的读取功能,支持:
|
||||
- 从日志目录读取日志文件
|
||||
- 限制返回行数,防止内存溢出
|
||||
- 列出可用的日志文件
|
||||
"""
|
||||
|
||||
import fnmatch
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from datetime import datetime, timezone
|
||||
from typing import TypedDict
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LogFileInfo(TypedDict):
|
||||
"""日志文件信息"""
|
||||
filename: str
|
||||
category: str # 'system' | 'error' | 'performance' | 'container'
|
||||
size: int
|
||||
modifiedAt: str # ISO 8601 格式
|
||||
|
||||
|
||||
class SystemLogService:
|
||||
"""
|
||||
系统日志服务类
|
||||
|
||||
负责读取系统日志文件,支持从容器内路径或宿主机挂载路径读取日志。
|
||||
"""
|
||||
|
||||
# 日志文件分类规则
|
||||
CATEGORY_RULES = [
|
||||
('xingrin.log', 'system'),
|
||||
('xingrin_error.log', 'error'),
|
||||
('performance.log', 'performance'),
|
||||
('container_*.log', 'container'),
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
# 日志目录路径
|
||||
self.log_dir = "/opt/xingrin/logs"
|
||||
self.default_file = "xingrin.log" # 默认日志文件
|
||||
self.default_lines = 200 # 默认返回行数
|
||||
self.max_lines = 10000 # 最大返回行数限制
|
||||
self.timeout_seconds = 3 # tail 命令超时时间
|
||||
|
||||
def _categorize_file(self, filename: str) -> str | None:
|
||||
"""
|
||||
根据文件名判断日志分类
|
||||
|
||||
Returns:
|
||||
分类名称,如果不是日志文件则返回 None
|
||||
"""
|
||||
for pattern, category in self.CATEGORY_RULES:
|
||||
if fnmatch.fnmatch(filename, pattern):
|
||||
return category
|
||||
return None
|
||||
|
||||
def _validate_filename(self, filename: str) -> bool:
|
||||
"""
|
||||
验证文件名是否合法(防止路径遍历攻击)
|
||||
|
||||
Args:
|
||||
filename: 要验证的文件名
|
||||
|
||||
Returns:
|
||||
bool: 文件名是否合法
|
||||
"""
|
||||
# 不允许包含路径分隔符
|
||||
if '/' in filename or '\\' in filename:
|
||||
return False
|
||||
# 不允许 .. 路径遍历
|
||||
if '..' in filename:
|
||||
return False
|
||||
# 必须是已知的日志文件类型
|
||||
return self._categorize_file(filename) is not None
|
||||
|
||||
def get_log_files(self) -> list[LogFileInfo]:
|
||||
"""
|
||||
获取所有可用的日志文件列表
|
||||
|
||||
Returns:
|
||||
日志文件信息列表,按分类和文件名排序
|
||||
"""
|
||||
files: list[LogFileInfo] = []
|
||||
|
||||
if not os.path.isdir(self.log_dir):
|
||||
logger.warning("日志目录不存在: %s", self.log_dir)
|
||||
return files
|
||||
|
||||
for filename in os.listdir(self.log_dir):
|
||||
filepath = os.path.join(self.log_dir, filename)
|
||||
|
||||
# 只处理文件,跳过目录
|
||||
if not os.path.isfile(filepath):
|
||||
continue
|
||||
|
||||
# 判断分类
|
||||
category = self._categorize_file(filename)
|
||||
if category is None:
|
||||
continue
|
||||
|
||||
# 获取文件信息
|
||||
try:
|
||||
stat = os.stat(filepath)
|
||||
modified_at = datetime.fromtimestamp(
|
||||
stat.st_mtime, tz=timezone.utc
|
||||
).isoformat()
|
||||
|
||||
files.append({
|
||||
'filename': filename,
|
||||
'category': category,
|
||||
'size': stat.st_size,
|
||||
'modifiedAt': modified_at,
|
||||
})
|
||||
except OSError as e:
|
||||
logger.warning("获取文件信息失败 %s: %s", filepath, e)
|
||||
continue
|
||||
|
||||
# 排序:按分类优先级(system > error > performance > container),然后按文件名
|
||||
category_order = {'system': 0, 'error': 1, 'performance': 2, 'container': 3}
|
||||
files.sort(key=lambda f: (category_order.get(f['category'], 99), f['filename']))
|
||||
|
||||
return files
|
||||
|
||||
def get_logs_content(self, filename: str | None = None, lines: int | None = None) -> str:
|
||||
"""
|
||||
获取系统日志内容
|
||||
|
||||
Args:
|
||||
filename: 日志文件名,默认为 xingrin.log
|
||||
lines: 返回的日志行数,默认 200 行,最大 10000 行
|
||||
|
||||
Returns:
|
||||
str: 日志内容,每行以换行符分隔,保持原始顺序
|
||||
|
||||
Raises:
|
||||
ValueError: 文件名不合法
|
||||
FileNotFoundError: 日志文件不存在
|
||||
"""
|
||||
# 文件名处理
|
||||
if filename is None:
|
||||
filename = self.default_file
|
||||
|
||||
# 验证文件名
|
||||
if not self._validate_filename(filename):
|
||||
raise ValueError(f"无效的文件名: {filename}")
|
||||
|
||||
# 构建完整路径
|
||||
log_file = os.path.join(self.log_dir, filename)
|
||||
|
||||
# 检查文件是否存在
|
||||
if not os.path.isfile(log_file):
|
||||
raise FileNotFoundError(f"日志文件不存在: {filename}")
|
||||
|
||||
# 参数校验和默认值处理
|
||||
if lines is None:
|
||||
lines = self.default_lines
|
||||
|
||||
lines = int(lines)
|
||||
if lines < 1:
|
||||
lines = 1
|
||||
if lines > self.max_lines:
|
||||
lines = self.max_lines
|
||||
|
||||
# 使用 tail 命令读取日志文件末尾内容
|
||||
cmd = ["tail", "-n", str(lines), log_file]
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self.timeout_seconds,
|
||||
check=False,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.warning(
|
||||
"tail command failed: returncode=%s stderr=%s",
|
||||
result.returncode,
|
||||
(result.stderr or "").strip(),
|
||||
)
|
||||
|
||||
# 直接返回原始内容,保持文件中的顺序
|
||||
return result.stdout or ""
|
||||
@@ -27,3 +27,10 @@ vulnerabilities_saved = Signal()
|
||||
# - worker_name: str Worker 名称
|
||||
# - message: str 失败原因
|
||||
worker_delete_failed = Signal()
|
||||
|
||||
# 所有 Worker 高负载信号
|
||||
# 参数:
|
||||
# - worker_name: str 被选中的 Worker 名称
|
||||
# - cpu: float CPU 使用率
|
||||
# - mem: float 内存使用率
|
||||
all_workers_high_load = Signal()
|
||||
|
||||
@@ -1,12 +1,35 @@
|
||||
"""
|
||||
通用模块 URL 配置
|
||||
|
||||
路由说明:
|
||||
- /api/health/ 健康检查接口(无需认证)
|
||||
- /api/auth/* 认证相关接口(登录、登出、用户信息)
|
||||
- /api/system/* 系统管理接口(日志查看等)
|
||||
- /api/blacklist/* 黑名单管理接口
|
||||
"""
|
||||
|
||||
from django.urls import path
|
||||
from .views import LoginView, LogoutView, MeView, ChangePasswordView
|
||||
|
||||
from .views import (
|
||||
LoginView, LogoutView, MeView, ChangePasswordView,
|
||||
SystemLogsView, SystemLogFilesView, HealthCheckView,
|
||||
GlobalBlacklistView,
|
||||
)
|
||||
|
||||
urlpatterns = [
|
||||
# 健康检查(无需认证)
|
||||
path('health/', HealthCheckView.as_view(), name='health-check'),
|
||||
|
||||
# 认证相关
|
||||
path('auth/login/', LoginView.as_view(), name='auth-login'),
|
||||
path('auth/logout/', LogoutView.as_view(), name='auth-logout'),
|
||||
path('auth/me/', MeView.as_view(), name='auth-me'),
|
||||
path('auth/change-password/', ChangePasswordView.as_view(), name='auth-change-password'),
|
||||
|
||||
# 系统管理
|
||||
path('system/logs/', SystemLogsView.as_view(), name='system-logs'),
|
||||
path('system/logs/files/', SystemLogFilesView.as_view(), name='system-log-files'),
|
||||
|
||||
# 黑名单管理(PUT 全量替换模式)
|
||||
path('blacklist/rules/', GlobalBlacklistView.as_view(), name='blacklist-rules'),
|
||||
]
|
||||
|
||||
38
backend/apps/common/utils/__init__.py
Normal file
38
backend/apps/common/utils/__init__.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""Common utilities"""
|
||||
|
||||
from .dedup import deduplicate_for_bulk, get_unique_fields
|
||||
from .hash import (
|
||||
calc_file_sha256,
|
||||
calc_stream_sha256,
|
||||
safe_calc_file_sha256,
|
||||
is_file_hash_match,
|
||||
)
|
||||
from .csv_utils import (
|
||||
generate_csv_rows,
|
||||
format_list_field,
|
||||
format_datetime,
|
||||
create_csv_export_response,
|
||||
UTF8_BOM,
|
||||
)
|
||||
from .blacklist_filter import (
|
||||
BlacklistFilter,
|
||||
detect_rule_type,
|
||||
extract_host,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'deduplicate_for_bulk',
|
||||
'get_unique_fields',
|
||||
'calc_file_sha256',
|
||||
'calc_stream_sha256',
|
||||
'safe_calc_file_sha256',
|
||||
'is_file_hash_match',
|
||||
'generate_csv_rows',
|
||||
'format_list_field',
|
||||
'format_datetime',
|
||||
'create_csv_export_response',
|
||||
'UTF8_BOM',
|
||||
'BlacklistFilter',
|
||||
'detect_rule_type',
|
||||
'extract_host',
|
||||
]
|
||||
246
backend/apps/common/utils/blacklist_filter.py
Normal file
246
backend/apps/common/utils/blacklist_filter.py
Normal file
@@ -0,0 +1,246 @@
|
||||
"""
|
||||
黑名单过滤工具
|
||||
|
||||
提供域名、IP、CIDR、关键词的黑名单匹配功能。
|
||||
纯工具类,不涉及数据库操作。
|
||||
|
||||
支持的规则类型:
|
||||
1. 域名精确匹配: example.com
|
||||
- 规则: example.com
|
||||
- 匹配: example.com
|
||||
- 不匹配: sub.example.com, other.com
|
||||
|
||||
2. 域名后缀匹配: *.example.com
|
||||
- 规则: *.example.com
|
||||
- 匹配: sub.example.com, a.b.example.com, example.com
|
||||
- 不匹配: other.com, example.com.cn
|
||||
|
||||
3. 关键词匹配: *cdn*
|
||||
- 规则: *cdn*
|
||||
- 匹配: cdn.example.com, a.cdn.b.com, mycdn123.com
|
||||
- 不匹配: example.com (不包含 cdn)
|
||||
|
||||
4. IP 精确匹配: 192.168.1.1
|
||||
- 规则: 192.168.1.1
|
||||
- 匹配: 192.168.1.1
|
||||
- 不匹配: 192.168.1.2
|
||||
|
||||
5. CIDR 范围匹配: 192.168.0.0/24
|
||||
- 规则: 192.168.0.0/24
|
||||
- 匹配: 192.168.0.1, 192.168.0.255
|
||||
- 不匹配: 192.168.1.1
|
||||
|
||||
使用方式:
|
||||
from apps.common.utils import BlacklistFilter
|
||||
|
||||
# 创建过滤器(传入规则列表)
|
||||
rules = BlacklistRule.objects.filter(...)
|
||||
filter = BlacklistFilter(rules)
|
||||
|
||||
# 检查单个目标
|
||||
if filter.is_allowed('http://example.com'):
|
||||
process(url)
|
||||
|
||||
# 流式处理
|
||||
for url in urls:
|
||||
if filter.is_allowed(url):
|
||||
process(url)
|
||||
"""
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from apps.common.validators import is_valid_ip, validate_cidr
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def detect_rule_type(pattern: str) -> str:
|
||||
"""
|
||||
自动识别规则类型
|
||||
|
||||
支持的模式:
|
||||
- 域名精确匹配: example.com
|
||||
- 域名后缀匹配: *.example.com
|
||||
- 关键词匹配: *cdn* (匹配包含 cdn 的域名)
|
||||
- IP 精确匹配: 192.168.1.1
|
||||
- CIDR 范围: 192.168.0.0/24
|
||||
|
||||
Args:
|
||||
pattern: 规则模式字符串
|
||||
|
||||
Returns:
|
||||
str: 规则类型 ('domain', 'ip', 'cidr', 'keyword')
|
||||
"""
|
||||
if not pattern:
|
||||
return 'domain'
|
||||
|
||||
pattern = pattern.strip()
|
||||
|
||||
# 检查关键词模式: *keyword* (前后都有星号,中间无点)
|
||||
if pattern.startswith('*') and pattern.endswith('*') and len(pattern) > 2:
|
||||
keyword = pattern[1:-1]
|
||||
# 关键词中不能有点(否则可能是域名模式)
|
||||
if '.' not in keyword:
|
||||
return 'keyword'
|
||||
|
||||
# 检查 CIDR(包含 /)
|
||||
if '/' in pattern:
|
||||
try:
|
||||
validate_cidr(pattern)
|
||||
return 'cidr'
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 检查 IP(去掉通配符前缀后验证)
|
||||
clean_pattern = pattern.lstrip('*').lstrip('.')
|
||||
if is_valid_ip(clean_pattern):
|
||||
return 'ip'
|
||||
|
||||
# 默认为域名
|
||||
return 'domain'
|
||||
|
||||
|
||||
def extract_host(target: str) -> str:
|
||||
"""
|
||||
从目标字符串中提取主机名
|
||||
|
||||
支持:
|
||||
- 纯域名:example.com
|
||||
- 纯 IP:192.168.1.1
|
||||
- URL:http://example.com/path
|
||||
|
||||
Args:
|
||||
target: 目标字符串
|
||||
|
||||
Returns:
|
||||
str: 提取的主机名
|
||||
"""
|
||||
if not target:
|
||||
return ''
|
||||
|
||||
target = target.strip()
|
||||
|
||||
# 如果是 URL,提取 hostname
|
||||
if '://' in target:
|
||||
try:
|
||||
parsed = urlparse(target)
|
||||
return parsed.hostname or target
|
||||
except Exception:
|
||||
return target
|
||||
|
||||
return target
|
||||
|
||||
|
||||
class BlacklistFilter:
|
||||
"""
|
||||
黑名单过滤器
|
||||
|
||||
预编译规则,提供高效的匹配功能。
|
||||
"""
|
||||
|
||||
def __init__(self, rules: List):
|
||||
"""
|
||||
初始化过滤器
|
||||
|
||||
Args:
|
||||
rules: BlacklistRule 对象列表
|
||||
"""
|
||||
from apps.common.models import BlacklistRule
|
||||
|
||||
# 预解析:按类型分类 + CIDR 预编译
|
||||
self._domain_rules = [] # (pattern, is_wildcard, suffix)
|
||||
self._ip_rules = set() # 精确 IP 用 set,O(1) 查找
|
||||
self._cidr_rules = [] # (pattern, network_obj)
|
||||
self._keyword_rules = [] # 关键词列表(小写)
|
||||
|
||||
# 去重:跨 scope 可能有重复规则
|
||||
seen_patterns = set()
|
||||
|
||||
for rule in rules:
|
||||
if rule.pattern in seen_patterns:
|
||||
continue
|
||||
seen_patterns.add(rule.pattern)
|
||||
if rule.rule_type == BlacklistRule.RuleType.DOMAIN:
|
||||
pattern = rule.pattern.lower()
|
||||
if pattern.startswith('*.'):
|
||||
self._domain_rules.append((pattern, True, pattern[1:]))
|
||||
else:
|
||||
self._domain_rules.append((pattern, False, None))
|
||||
elif rule.rule_type == BlacklistRule.RuleType.IP:
|
||||
self._ip_rules.add(rule.pattern)
|
||||
elif rule.rule_type == BlacklistRule.RuleType.CIDR:
|
||||
try:
|
||||
network = ipaddress.ip_network(rule.pattern, strict=False)
|
||||
self._cidr_rules.append((rule.pattern, network))
|
||||
except ValueError:
|
||||
pass
|
||||
elif rule.rule_type == BlacklistRule.RuleType.KEYWORD:
|
||||
# *cdn* -> cdn
|
||||
keyword = rule.pattern[1:-1].lower()
|
||||
self._keyword_rules.append(keyword)
|
||||
|
||||
def is_allowed(self, target: str) -> bool:
|
||||
"""
|
||||
检查目标是否通过过滤
|
||||
|
||||
Args:
|
||||
target: 要检查的目标(域名/IP/URL)
|
||||
|
||||
Returns:
|
||||
bool: True 表示通过(不在黑名单),False 表示被过滤
|
||||
"""
|
||||
if not target:
|
||||
return True
|
||||
|
||||
host = extract_host(target)
|
||||
if not host:
|
||||
return True
|
||||
|
||||
# 先判断输入类型,再走对应分支
|
||||
if is_valid_ip(host):
|
||||
return self._check_ip_rules(host)
|
||||
else:
|
||||
return self._check_domain_rules(host)
|
||||
|
||||
def _check_domain_rules(self, host: str) -> bool:
|
||||
"""检查域名规则(精确匹配 + 后缀匹配 + 关键词匹配)"""
|
||||
host_lower = host.lower()
|
||||
|
||||
# 1. 域名规则(精确 + 后缀)
|
||||
for pattern, is_wildcard, suffix in self._domain_rules:
|
||||
if is_wildcard:
|
||||
if host_lower.endswith(suffix) or host_lower == pattern[2:]:
|
||||
return False
|
||||
else:
|
||||
if host_lower == pattern:
|
||||
return False
|
||||
|
||||
# 2. 关键词匹配(字符串 in 操作,O(n*m))
|
||||
for keyword in self._keyword_rules:
|
||||
if keyword in host_lower:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_ip_rules(self, host: str) -> bool:
|
||||
"""检查 IP 规则(精确匹配 + CIDR)"""
|
||||
# 1. IP 精确匹配(O(1))
|
||||
if host in self._ip_rules:
|
||||
return False
|
||||
|
||||
# 2. CIDR 匹配
|
||||
if self._cidr_rules:
|
||||
try:
|
||||
ip_obj = ipaddress.ip_address(host)
|
||||
for _, network in self._cidr_rules:
|
||||
if ip_obj in network:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
|
||||
244
backend/apps/common/utils/csv_utils.py
Normal file
244
backend/apps/common/utils/csv_utils.py
Normal file
@@ -0,0 +1,244 @@
|
||||
"""CSV 导出工具模块
|
||||
|
||||
提供流式 CSV 生成功能,支持:
|
||||
- UTF-8 BOM(Excel 兼容)
|
||||
- RFC 4180 规范转义
|
||||
- 流式生成(内存友好)
|
||||
- 带 Content-Length 的文件响应(支持浏览器下载进度显示)
|
||||
"""
|
||||
|
||||
import csv
|
||||
import io
|
||||
import os
|
||||
import tempfile
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Iterator, Dict, Any, List, Callable, Optional
|
||||
|
||||
from django.http import FileResponse, StreamingHttpResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# UTF-8 BOM,确保 Excel 正确识别编码
|
||||
UTF8_BOM = '\ufeff'
|
||||
|
||||
|
||||
def generate_csv_rows(
|
||||
data_iterator: Iterator[Dict[str, Any]],
|
||||
headers: List[str],
|
||||
field_formatters: Optional[Dict[str, Callable]] = None
|
||||
) -> Iterator[str]:
|
||||
"""
|
||||
流式生成 CSV 行
|
||||
|
||||
Args:
|
||||
data_iterator: 数据迭代器,每个元素是一个字典
|
||||
headers: CSV 表头列表
|
||||
field_formatters: 字段格式化函数字典,key 为字段名,value 为格式化函数
|
||||
|
||||
Yields:
|
||||
CSV 行字符串(包含换行符)
|
||||
|
||||
Example:
|
||||
>>> data = [{'ip': '192.168.1.1', 'hosts': ['a.com', 'b.com']}]
|
||||
>>> headers = ['ip', 'hosts']
|
||||
>>> formatters = {'hosts': format_list_field}
|
||||
>>> for row in generate_csv_rows(iter(data), headers, formatters):
|
||||
... print(row, end='')
|
||||
"""
|
||||
# 输出 BOM + 表头
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerow(headers)
|
||||
yield UTF8_BOM + output.getvalue()
|
||||
|
||||
# 输出数据行
|
||||
for row_data in data_iterator:
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
|
||||
|
||||
row = []
|
||||
for header in headers:
|
||||
value = row_data.get(header, '')
|
||||
if field_formatters and header in field_formatters:
|
||||
value = field_formatters[header](value)
|
||||
row.append(value if value is not None else '')
|
||||
|
||||
writer.writerow(row)
|
||||
yield output.getvalue()
|
||||
|
||||
|
||||
def format_list_field(values: List, separator: str = ';') -> str:
|
||||
"""
|
||||
将列表字段格式化为分号分隔的字符串
|
||||
|
||||
Args:
|
||||
values: 值列表
|
||||
separator: 分隔符,默认为分号
|
||||
|
||||
Returns:
|
||||
分隔符连接的字符串
|
||||
|
||||
Example:
|
||||
>>> format_list_field(['a.com', 'b.com'])
|
||||
'a.com;b.com'
|
||||
>>> format_list_field([80, 443])
|
||||
'80;443'
|
||||
>>> format_list_field([])
|
||||
''
|
||||
>>> format_list_field(None)
|
||||
''
|
||||
"""
|
||||
if not values:
|
||||
return ''
|
||||
return separator.join(str(v) for v in values)
|
||||
|
||||
|
||||
def format_datetime(dt: Optional[datetime]) -> str:
|
||||
"""
|
||||
格式化日期时间为字符串(转换为本地时区)
|
||||
|
||||
Args:
|
||||
dt: datetime 对象或 None
|
||||
|
||||
Returns:
|
||||
格式化的日期时间字符串,格式为 YYYY-MM-DD HH:MM:SS(本地时区)
|
||||
|
||||
Example:
|
||||
>>> from datetime import datetime
|
||||
>>> format_datetime(datetime(2024, 1, 15, 10, 30, 0))
|
||||
'2024-01-15 10:30:00'
|
||||
>>> format_datetime(None)
|
||||
''
|
||||
"""
|
||||
if dt is None:
|
||||
return ''
|
||||
if isinstance(dt, str):
|
||||
return dt
|
||||
|
||||
# 转换为本地时区(从 Django settings 获取)
|
||||
from django.utils import timezone
|
||||
if timezone.is_aware(dt):
|
||||
dt = timezone.localtime(dt)
|
||||
|
||||
return dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
|
||||
def create_csv_export_response(
|
||||
data_iterator: Iterator[Dict[str, Any]],
|
||||
headers: List[str],
|
||||
filename: str,
|
||||
field_formatters: Optional[Dict[str, Callable]] = None,
|
||||
show_progress: bool = True
|
||||
) -> FileResponse | StreamingHttpResponse:
|
||||
"""
|
||||
创建 CSV 导出响应
|
||||
|
||||
根据 show_progress 参数选择响应类型:
|
||||
- True: 使用临时文件 + FileResponse,带 Content-Length(浏览器显示下载进度)
|
||||
- False: 使用 StreamingHttpResponse(内存更友好,但无下载进度)
|
||||
|
||||
Args:
|
||||
data_iterator: 数据迭代器,每个元素是一个字典
|
||||
headers: CSV 表头列表
|
||||
filename: 下载文件名(如 "export_2024.csv")
|
||||
field_formatters: 字段格式化函数字典
|
||||
show_progress: 是否显示下载进度(默认 True)
|
||||
|
||||
Returns:
|
||||
FileResponse 或 StreamingHttpResponse
|
||||
|
||||
Example:
|
||||
>>> data_iter = service.iter_data()
|
||||
>>> headers = ['url', 'host', 'created_at']
|
||||
>>> formatters = {'created_at': format_datetime}
|
||||
>>> response = create_csv_export_response(
|
||||
... data_iter, headers, 'websites.csv', formatters
|
||||
... )
|
||||
>>> return response
|
||||
"""
|
||||
if show_progress:
|
||||
return _create_file_response(data_iterator, headers, filename, field_formatters)
|
||||
else:
|
||||
return _create_streaming_response(data_iterator, headers, filename, field_formatters)
|
||||
|
||||
|
||||
def _create_file_response(
|
||||
data_iterator: Iterator[Dict[str, Any]],
|
||||
headers: List[str],
|
||||
filename: str,
|
||||
field_formatters: Optional[Dict[str, Callable]] = None
|
||||
) -> FileResponse:
|
||||
"""
|
||||
创建带 Content-Length 的文件响应(支持浏览器下载进度)
|
||||
|
||||
实现方式:先写入临时文件,再返回 FileResponse
|
||||
"""
|
||||
# 创建临时文件
|
||||
temp_file = tempfile.NamedTemporaryFile(
|
||||
mode='w',
|
||||
suffix='.csv',
|
||||
delete=False,
|
||||
encoding='utf-8'
|
||||
)
|
||||
temp_path = temp_file.name
|
||||
|
||||
try:
|
||||
# 流式写入 CSV 数据到临时文件
|
||||
for row in generate_csv_rows(data_iterator, headers, field_formatters):
|
||||
temp_file.write(row)
|
||||
temp_file.close()
|
||||
|
||||
# 获取文件大小
|
||||
file_size = os.path.getsize(temp_path)
|
||||
|
||||
# 创建文件响应
|
||||
response = FileResponse(
|
||||
open(temp_path, 'rb'),
|
||||
content_type='text/csv; charset=utf-8',
|
||||
as_attachment=True,
|
||||
filename=filename
|
||||
)
|
||||
response['Content-Length'] = file_size
|
||||
|
||||
# 设置清理回调:响应完成后删除临时文件
|
||||
original_close = response.file_to_stream.close
|
||||
def close_and_cleanup():
|
||||
original_close()
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except OSError:
|
||||
pass
|
||||
response.file_to_stream.close = close_and_cleanup
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
# 清理临时文件
|
||||
try:
|
||||
temp_file.close()
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except OSError:
|
||||
pass
|
||||
logger.error(f"创建 CSV 导出响应失败: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def _create_streaming_response(
|
||||
data_iterator: Iterator[Dict[str, Any]],
|
||||
headers: List[str],
|
||||
filename: str,
|
||||
field_formatters: Optional[Dict[str, Callable]] = None
|
||||
) -> StreamingHttpResponse:
|
||||
"""
|
||||
创建流式响应(无 Content-Length,内存更友好)
|
||||
"""
|
||||
response = StreamingHttpResponse(
|
||||
generate_csv_rows(data_iterator, headers, field_formatters),
|
||||
content_type='text/csv; charset=utf-8'
|
||||
)
|
||||
response['Content-Disposition'] = f'attachment; filename="{filename}"'
|
||||
return response
|
||||
101
backend/apps/common/utils/dedup.py
Normal file
101
backend/apps/common/utils/dedup.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
批量数据去重工具
|
||||
|
||||
用于 bulk_create 前的批次内去重,避免 PostgreSQL ON CONFLICT 错误。
|
||||
自动从 Django 模型读取唯一约束字段,无需手动指定。
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, TypeVar, Tuple, Optional
|
||||
|
||||
from django.db import models
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
def get_unique_fields(model: type[models.Model]) -> Optional[Tuple[str, ...]]:
|
||||
"""
|
||||
从 Django 模型获取唯一约束字段
|
||||
|
||||
按优先级查找:
|
||||
1. Meta.constraints 中的 UniqueConstraint
|
||||
2. Meta.unique_together
|
||||
|
||||
Args:
|
||||
model: Django 模型类
|
||||
|
||||
Returns:
|
||||
唯一约束字段元组,如果没有则返回 None
|
||||
"""
|
||||
meta = model._meta
|
||||
|
||||
# 1. 优先查找 UniqueConstraint
|
||||
for constraint in getattr(meta, 'constraints', []):
|
||||
if isinstance(constraint, models.UniqueConstraint):
|
||||
# 跳过条件约束(partial unique)
|
||||
if getattr(constraint, 'condition', None) is None:
|
||||
return tuple(constraint.fields)
|
||||
|
||||
# 2. 回退到 unique_together
|
||||
unique_together = getattr(meta, 'unique_together', None)
|
||||
if unique_together:
|
||||
# unique_together 可能是 (('a', 'b'),) 或 ('a', 'b')
|
||||
if unique_together and isinstance(unique_together[0], (list, tuple)):
|
||||
return tuple(unique_together[0])
|
||||
return tuple(unique_together)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def deduplicate_for_bulk(items: List[T], model: type[models.Model]) -> List[T]:
|
||||
"""
|
||||
根据模型唯一约束对数据去重
|
||||
|
||||
自动从模型读取唯一约束字段,生成去重 key。
|
||||
保留最后一条记录(后面的数据通常是更新的)。
|
||||
|
||||
Args:
|
||||
items: 待去重的数据列表(DTO 或 Model 对象)
|
||||
model: Django 模型类(用于读取唯一约束)
|
||||
|
||||
Returns:
|
||||
去重后的数据列表
|
||||
|
||||
Example:
|
||||
# 自动从 Endpoint 模型读取唯一约束 (url, target)
|
||||
unique_items = deduplicate_for_bulk(items, Endpoint)
|
||||
"""
|
||||
if not items:
|
||||
return items
|
||||
|
||||
unique_fields = get_unique_fields(model)
|
||||
if unique_fields is None:
|
||||
# 模型没有唯一约束,无需去重
|
||||
logger.debug(f"{model.__name__} 没有唯一约束,跳过去重")
|
||||
return items
|
||||
|
||||
# 处理外键字段名(target -> target_id)
|
||||
def make_key(item: T) -> tuple:
|
||||
key_parts = []
|
||||
for field in unique_fields:
|
||||
# 尝试 field_id(外键)和 field 两种形式
|
||||
value = getattr(item, f'{field}_id', None)
|
||||
if value is None:
|
||||
value = getattr(item, field, None)
|
||||
key_parts.append(value)
|
||||
return tuple(key_parts)
|
||||
|
||||
# 使用字典去重,保留最后一条
|
||||
seen = {}
|
||||
for item in items:
|
||||
key = make_key(item)
|
||||
seen[key] = item
|
||||
|
||||
unique_items = list(seen.values())
|
||||
|
||||
if len(unique_items) < len(items):
|
||||
logger.debug(f"{model.__name__} 去重: {len(items)} -> {len(unique_items)} 条")
|
||||
|
||||
return unique_items
|
||||
331
backend/apps/common/utils/filter_utils.py
Normal file
331
backend/apps/common/utils/filter_utils.py
Normal file
@@ -0,0 +1,331 @@
|
||||
"""智能过滤工具 - 通用查询语法解析和 Django ORM 查询构建
|
||||
|
||||
支持的语法:
|
||||
- field="value" 模糊匹配(包含)
|
||||
- field=="value" 精确匹配
|
||||
- field!="value" 不等于
|
||||
|
||||
逻辑运算符:
|
||||
- AND: && 或 and 或 空格(默认)
|
||||
- OR: || 或 or
|
||||
|
||||
示例:
|
||||
type="xss" || type="sqli" # OR
|
||||
type="xss" or type="sqli" # OR(等价)
|
||||
severity="high" && source="nuclei" # AND
|
||||
severity="high" source="nuclei" # AND(空格默认为 AND)
|
||||
severity="high" and source="nuclei" # AND(等价)
|
||||
|
||||
使用示例:
|
||||
from apps.common.utils.filter_utils import apply_filters
|
||||
|
||||
field_mapping = {'ip': 'ip', 'port': 'port', 'host': 'host'}
|
||||
queryset = apply_filters(queryset, 'ip="192" || port="80"', field_mapping)
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Optional, Union
|
||||
from enum import Enum
|
||||
|
||||
from django.db.models import QuerySet, Q, F, Func, CharField
|
||||
from django.db.models.functions import Cast
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ArrayToString(Func):
|
||||
"""PostgreSQL array_to_string 函数"""
|
||||
function = 'array_to_string'
|
||||
template = "%(function)s(%(expressions)s, ',')"
|
||||
output_field = CharField()
|
||||
|
||||
|
||||
class LogicalOp(Enum):
|
||||
"""逻辑运算符"""
|
||||
AND = 'AND'
|
||||
OR = 'OR'
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedFilter:
|
||||
"""解析后的过滤条件"""
|
||||
field: str # 字段名
|
||||
operator: str # 操作符: '=', '==', '!='
|
||||
value: str # 原始值
|
||||
|
||||
|
||||
@dataclass
|
||||
class FilterGroup:
|
||||
"""过滤条件组(带逻辑运算符)"""
|
||||
filter: ParsedFilter
|
||||
logical_op: LogicalOp # 与前一个条件的逻辑关系
|
||||
|
||||
|
||||
class QueryParser:
|
||||
"""查询语法解析器
|
||||
|
||||
支持 ||/or (OR) 和 &&/and/空格 (AND) 逻辑运算符
|
||||
"""
|
||||
|
||||
# 正则匹配: field="value", field=="value", field!="value"
|
||||
FILTER_PATTERN = re.compile(r'(\w+)(==|!=|=)"([^"]*)"')
|
||||
|
||||
# 逻辑运算符模式(带空格)
|
||||
OR_PATTERN = re.compile(r'\s*(\|\||(?<![a-zA-Z])or(?![a-zA-Z]))\s*', re.IGNORECASE)
|
||||
AND_PATTERN = re.compile(r'\s*(&&|(?<![a-zA-Z])and(?![a-zA-Z]))\s*', re.IGNORECASE)
|
||||
|
||||
@classmethod
|
||||
def parse(cls, query_string: str) -> List[FilterGroup]:
|
||||
"""解析查询语法字符串
|
||||
|
||||
Args:
|
||||
query_string: 查询语法字符串
|
||||
|
||||
Returns:
|
||||
解析后的过滤条件组列表
|
||||
|
||||
Examples:
|
||||
>>> QueryParser.parse('type="xss" || type="sqli"')
|
||||
[FilterGroup(filter=..., logical_op=AND), # 第一个默认 AND
|
||||
FilterGroup(filter=..., logical_op=OR)]
|
||||
"""
|
||||
if not query_string or not query_string.strip():
|
||||
return []
|
||||
|
||||
# 第一步:提取所有过滤条件并用占位符替换,保护引号内的空格
|
||||
filters_found = []
|
||||
placeholder_pattern = '__FILTER_{}__'
|
||||
|
||||
def replace_filter(match):
|
||||
idx = len(filters_found)
|
||||
filters_found.append(match.group(0))
|
||||
return placeholder_pattern.format(idx)
|
||||
|
||||
# 先用正则提取所有 field="value" 形式的条件
|
||||
protected = cls.FILTER_PATTERN.sub(replace_filter, query_string)
|
||||
|
||||
# 标准化逻辑运算符
|
||||
# 先处理 || 和 or -> __OR__
|
||||
normalized = cls.OR_PATTERN.sub(' __OR__ ', protected)
|
||||
# 再处理 && 和 and -> __AND__
|
||||
normalized = cls.AND_PATTERN.sub(' __AND__ ', normalized)
|
||||
|
||||
# 分词:按空格分割,保留逻辑运算符标记
|
||||
tokens = normalized.split()
|
||||
|
||||
groups = []
|
||||
pending_op = LogicalOp.AND # 默认 AND
|
||||
|
||||
for token in tokens:
|
||||
if token == '__OR__':
|
||||
pending_op = LogicalOp.OR
|
||||
elif token == '__AND__':
|
||||
pending_op = LogicalOp.AND
|
||||
elif token.startswith('__FILTER_') and token.endswith('__'):
|
||||
# 还原占位符为原始过滤条件
|
||||
try:
|
||||
idx = int(token[9:-2]) # 提取索引
|
||||
original_filter = filters_found[idx]
|
||||
match = cls.FILTER_PATTERN.match(original_filter)
|
||||
if match:
|
||||
field, operator, value = match.groups()
|
||||
groups.append(FilterGroup(
|
||||
filter=ParsedFilter(
|
||||
field=field.lower(),
|
||||
operator=operator,
|
||||
value=value
|
||||
),
|
||||
logical_op=pending_op if groups else LogicalOp.AND
|
||||
))
|
||||
pending_op = LogicalOp.AND # 重置为默认 AND
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
# 其他 token 忽略(无效输入)
|
||||
|
||||
return groups
|
||||
|
||||
|
||||
class QueryBuilder:
|
||||
"""Django ORM 查询构建器
|
||||
|
||||
将解析后的过滤条件转换为 Django ORM 查询,支持 AND/OR 逻辑
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def build_query(
|
||||
cls,
|
||||
queryset: QuerySet,
|
||||
filter_groups: List[FilterGroup],
|
||||
field_mapping: Dict[str, str],
|
||||
json_array_fields: List[str] = None
|
||||
) -> QuerySet:
|
||||
"""构建 Django ORM 查询
|
||||
|
||||
Args:
|
||||
queryset: Django QuerySet
|
||||
filter_groups: 解析后的过滤条件组列表
|
||||
field_mapping: 字段映射
|
||||
json_array_fields: JSON 数组字段列表(使用 __contains 查询)
|
||||
|
||||
Returns:
|
||||
过滤后的 QuerySet
|
||||
"""
|
||||
if not filter_groups:
|
||||
return queryset
|
||||
|
||||
json_array_fields = json_array_fields or []
|
||||
|
||||
# 收集需要 annotate 的数组模糊搜索字段
|
||||
array_fuzzy_fields = set()
|
||||
|
||||
# 第一遍:检查是否有数组模糊匹配
|
||||
for group in filter_groups:
|
||||
f = group.filter
|
||||
db_field = field_mapping.get(f.field)
|
||||
if db_field and db_field in json_array_fields and f.operator == '=':
|
||||
array_fuzzy_fields.add(db_field)
|
||||
|
||||
# 对数组模糊搜索字段做 annotate
|
||||
for field in array_fuzzy_fields:
|
||||
annotate_name = f'{field}_text'
|
||||
queryset = queryset.annotate(**{annotate_name: ArrayToString(F(field))})
|
||||
|
||||
# 构建 Q 对象
|
||||
combined_q = None
|
||||
|
||||
for group in filter_groups:
|
||||
f = group.filter
|
||||
|
||||
# 字段映射
|
||||
db_field = field_mapping.get(f.field)
|
||||
if not db_field:
|
||||
logger.debug(f"忽略未知字段: {f.field}")
|
||||
continue
|
||||
|
||||
# 判断是否为 JSON 数组字段
|
||||
is_json_array = db_field in json_array_fields
|
||||
|
||||
# 构建单个条件的 Q 对象
|
||||
q = cls._build_single_q(db_field, f.operator, f.value, is_json_array)
|
||||
if q is None:
|
||||
continue
|
||||
|
||||
# 组合 Q 对象
|
||||
if combined_q is None:
|
||||
combined_q = q
|
||||
elif group.logical_op == LogicalOp.OR:
|
||||
combined_q = combined_q | q
|
||||
else: # AND
|
||||
combined_q = combined_q & q
|
||||
|
||||
if combined_q is not None:
|
||||
return queryset.filter(combined_q)
|
||||
return queryset
|
||||
|
||||
@classmethod
|
||||
def _build_single_q(cls, field: str, operator: str, value: str, is_json_array: bool = False) -> Optional[Q]:
|
||||
"""构建单个条件的 Q 对象"""
|
||||
if is_json_array:
|
||||
if operator == '==':
|
||||
# 精确匹配:数组中包含完全等于 value 的元素
|
||||
return Q(**{f'{field}__contains': [value]})
|
||||
elif operator == '!=':
|
||||
# 不包含:数组中不包含完全等于 value 的元素
|
||||
return ~Q(**{f'{field}__contains': [value]})
|
||||
else: # '=' 模糊匹配
|
||||
# 使用 annotate 后的字段进行模糊搜索
|
||||
# 字段已在 build_query 中通过 ArrayToString 转换为文本
|
||||
annotate_name = f'{field}_text'
|
||||
return Q(**{f'{annotate_name}__icontains': value})
|
||||
|
||||
if operator == '!=':
|
||||
return cls._build_not_equal_q(field, value)
|
||||
elif operator == '==':
|
||||
return cls._build_exact_q(field, value)
|
||||
else: # '='
|
||||
return cls._build_fuzzy_q(field, value)
|
||||
|
||||
@classmethod
|
||||
def _try_convert_to_int(cls, value: str) -> Optional[int]:
|
||||
"""尝试将值转换为整数"""
|
||||
try:
|
||||
return int(value.strip())
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _build_fuzzy_q(cls, field: str, value: str) -> Q:
|
||||
"""模糊匹配: 包含"""
|
||||
return Q(**{f'{field}__icontains': value})
|
||||
|
||||
@classmethod
|
||||
def _build_exact_q(cls, field: str, value: str) -> Q:
|
||||
"""精确匹配"""
|
||||
int_val = cls._try_convert_to_int(value)
|
||||
if int_val is not None:
|
||||
return Q(**{f'{field}__exact': int_val})
|
||||
return Q(**{f'{field}__exact': value})
|
||||
|
||||
@classmethod
|
||||
def _build_not_equal_q(cls, field: str, value: str) -> Q:
|
||||
"""不等于"""
|
||||
int_val = cls._try_convert_to_int(value)
|
||||
if int_val is not None:
|
||||
return ~Q(**{f'{field}__exact': int_val})
|
||||
return ~Q(**{f'{field}__exact': value})
|
||||
|
||||
|
||||
def apply_filters(
|
||||
queryset: QuerySet,
|
||||
query_string: str,
|
||||
field_mapping: Dict[str, str],
|
||||
json_array_fields: List[str] = None
|
||||
) -> QuerySet:
|
||||
"""应用过滤条件到 QuerySet
|
||||
|
||||
Args:
|
||||
queryset: Django QuerySet
|
||||
query_string: 查询语法字符串
|
||||
field_mapping: 字段映射
|
||||
json_array_fields: JSON 数组字段列表(使用 __contains 查询)
|
||||
|
||||
Returns:
|
||||
过滤后的 QuerySet
|
||||
|
||||
Examples:
|
||||
# OR 查询
|
||||
apply_filters(qs, 'type="xss" || type="sqli"', mapping)
|
||||
apply_filters(qs, 'type="xss" or type="sqli"', mapping)
|
||||
|
||||
# AND 查询
|
||||
apply_filters(qs, 'severity="high" && source="nuclei"', mapping)
|
||||
apply_filters(qs, 'severity="high" source="nuclei"', mapping)
|
||||
|
||||
# 混合查询
|
||||
apply_filters(qs, 'type="xss" || type="sqli" && severity="high"', mapping)
|
||||
|
||||
# JSON 数组字段查询
|
||||
apply_filters(qs, 'implies="PHP"', mapping, json_array_fields=['implies'])
|
||||
"""
|
||||
if not query_string or not query_string.strip():
|
||||
return queryset
|
||||
|
||||
try:
|
||||
filter_groups = QueryParser.parse(query_string)
|
||||
if not filter_groups:
|
||||
logger.debug(f"未解析到有效过滤条件: {query_string}")
|
||||
return queryset
|
||||
|
||||
logger.debug(f"解析过滤条件: {filter_groups}")
|
||||
return QueryBuilder.build_query(
|
||||
queryset,
|
||||
filter_groups,
|
||||
field_mapping,
|
||||
json_array_fields=json_array_fields
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"过滤解析错误: {e}, query: {query_string}")
|
||||
return queryset # 静默降级
|
||||
@@ -7,7 +7,6 @@
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, BinaryIO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -91,11 +90,3 @@ def is_file_hash_match(file_path: str, expected_hash: str) -> bool:
|
||||
return False
|
||||
|
||||
return actual_hash.lower() == expected_hash.lower()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"calc_file_sha256",
|
||||
"calc_stream_sha256",
|
||||
"safe_calc_file_sha256",
|
||||
"is_file_hash_match",
|
||||
]
|
||||
@@ -1,6 +1,8 @@
|
||||
"""域名、IP、端口和目标验证工具函数"""
|
||||
"""域名、IP、端口、URL 和目标验证工具函数"""
|
||||
import ipaddress
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import validators
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -25,6 +27,21 @@ def validate_domain(domain: str) -> None:
|
||||
raise ValueError(f"域名格式无效: {domain}")
|
||||
|
||||
|
||||
def is_valid_domain(domain: str) -> bool:
|
||||
"""
|
||||
判断是否为有效域名(不抛异常)
|
||||
|
||||
Args:
|
||||
domain: 域名字符串
|
||||
|
||||
Returns:
|
||||
bool: 是否为有效域名
|
||||
"""
|
||||
if not domain or len(domain) > 253:
|
||||
return False
|
||||
return bool(validators.domain(domain))
|
||||
|
||||
|
||||
def validate_ip(ip: str) -> None:
|
||||
"""
|
||||
验证 IP 地址格式(支持 IPv4 和 IPv6)
|
||||
@@ -44,6 +61,25 @@ def validate_ip(ip: str) -> None:
|
||||
raise ValueError(f"IP 地址格式无效: {ip}")
|
||||
|
||||
|
||||
def is_valid_ip(ip: str) -> bool:
|
||||
"""
|
||||
判断是否为有效 IP 地址(不抛异常)
|
||||
|
||||
Args:
|
||||
ip: IP 地址字符串
|
||||
|
||||
Returns:
|
||||
bool: 是否为有效 IP 地址
|
||||
"""
|
||||
if not ip:
|
||||
return False
|
||||
try:
|
||||
ipaddress.ip_address(ip)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def validate_cidr(cidr: str) -> None:
|
||||
"""
|
||||
验证 CIDR 格式(支持 IPv4 和 IPv6)
|
||||
@@ -140,3 +176,136 @@ def validate_port(port: any) -> tuple[bool, int | None]:
|
||||
except (ValueError, TypeError):
|
||||
logger.warning("端口号格式错误,无法转换为整数: %s", port)
|
||||
return False, None
|
||||
|
||||
|
||||
# ==================== URL 验证函数 ====================
|
||||
|
||||
def validate_url(url: str) -> None:
|
||||
"""
|
||||
验证 URL 格式,必须包含 scheme(http:// 或 https://)
|
||||
|
||||
Args:
|
||||
url: URL 字符串
|
||||
|
||||
Raises:
|
||||
ValueError: URL 格式无效或缺少 scheme
|
||||
"""
|
||||
if not url:
|
||||
raise ValueError("URL 不能为空")
|
||||
|
||||
# 检查是否包含 scheme
|
||||
if not url.startswith('http://') and not url.startswith('https://'):
|
||||
raise ValueError("URL 必须包含协议(http:// 或 https://)")
|
||||
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
if not parsed.hostname:
|
||||
raise ValueError("URL 必须包含主机名")
|
||||
except Exception:
|
||||
raise ValueError(f"URL 格式无效: {url}")
|
||||
|
||||
|
||||
def is_valid_url(url: str, max_length: int = 2000) -> bool:
|
||||
"""
|
||||
判断是否为有效 URL(不抛异常)
|
||||
|
||||
Args:
|
||||
url: URL 字符串
|
||||
max_length: URL 最大长度,默认 2000
|
||||
|
||||
Returns:
|
||||
bool: 是否为有效 URL
|
||||
"""
|
||||
if not url or len(url) > max_length:
|
||||
return False
|
||||
try:
|
||||
validate_url(url)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def is_url_match_target(url: str, target_name: str, target_type: str) -> bool:
|
||||
"""
|
||||
判断 URL 是否匹配目标
|
||||
|
||||
Args:
|
||||
url: URL 字符串
|
||||
target_name: 目标名称(域名、IP 或 CIDR)
|
||||
target_type: 目标类型 ('domain', 'ip', 'cidr')
|
||||
|
||||
Returns:
|
||||
bool: 是否匹配
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
return False
|
||||
|
||||
hostname = hostname.lower()
|
||||
target_name = target_name.lower()
|
||||
|
||||
if target_type == 'domain':
|
||||
# 域名类型:hostname 等于 target_name 或以 .target_name 结尾
|
||||
return hostname == target_name or hostname.endswith('.' + target_name)
|
||||
|
||||
elif target_type == 'ip':
|
||||
# IP 类型:hostname 必须完全等于 target_name
|
||||
return hostname == target_name
|
||||
|
||||
elif target_type == 'cidr':
|
||||
# CIDR 类型:hostname 必须是 IP 且在 CIDR 范围内
|
||||
try:
|
||||
ip = ipaddress.ip_address(hostname)
|
||||
network = ipaddress.ip_network(target_name, strict=False)
|
||||
return ip in network
|
||||
except ValueError:
|
||||
# hostname 不是有效 IP
|
||||
return False
|
||||
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def detect_input_type(input_str: str) -> str:
|
||||
"""
|
||||
检测输入类型(用于快速扫描输入解析)
|
||||
|
||||
Args:
|
||||
input_str: 输入字符串(应该已经 strip)
|
||||
|
||||
Returns:
|
||||
str: 输入类型 ('url', 'domain', 'ip', 'cidr')
|
||||
"""
|
||||
if not input_str:
|
||||
raise ValueError("输入不能为空")
|
||||
|
||||
# 1. 包含 :// 一定是 URL
|
||||
if '://' in input_str:
|
||||
return 'url'
|
||||
|
||||
# 2. 包含 / 需要判断是 CIDR 还是 URL(缺少 scheme)
|
||||
if '/' in input_str:
|
||||
# CIDR 格式: IP/prefix,如 10.0.0.0/8
|
||||
parts = input_str.split('/')
|
||||
if len(parts) == 2:
|
||||
ip_part, prefix_part = parts
|
||||
# 如果斜杠后是纯数字且在 0-32 范围内,检查是否是 CIDR
|
||||
if prefix_part.isdigit() and 0 <= int(prefix_part) <= 32:
|
||||
ip_parts = ip_part.split('.')
|
||||
if len(ip_parts) == 4 and all(p.isdigit() for p in ip_parts):
|
||||
return 'cidr'
|
||||
# 不是 CIDR,视为 URL(缺少 scheme,后续验证会报错)
|
||||
return 'url'
|
||||
|
||||
# 3. 检查是否是 IP 地址
|
||||
try:
|
||||
ipaddress.ip_address(input_str)
|
||||
return 'ip'
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 4. 默认为域名
|
||||
return 'domain'
|
||||
|
||||
@@ -1,3 +1,21 @@
|
||||
from .auth_views import LoginView, LogoutView, MeView, ChangePasswordView
|
||||
"""
|
||||
通用模块视图导出
|
||||
|
||||
__all__ = ['LoginView', 'LogoutView', 'MeView', 'ChangePasswordView']
|
||||
包含:
|
||||
- 健康检查视图:Docker 健康检查
|
||||
- 认证相关视图:登录、登出、用户信息、修改密码
|
||||
- 系统日志视图:实时日志查看
|
||||
- 黑名单视图:全局黑名单规则管理
|
||||
"""
|
||||
|
||||
from .health_views import HealthCheckView
|
||||
from .auth_views import LoginView, LogoutView, MeView, ChangePasswordView
|
||||
from .system_log_views import SystemLogsView, SystemLogFilesView
|
||||
from .blacklist_views import GlobalBlacklistView
|
||||
|
||||
__all__ = [
|
||||
'HealthCheckView',
|
||||
'LoginView', 'LogoutView', 'MeView', 'ChangePasswordView',
|
||||
'SystemLogsView', 'SystemLogFilesView',
|
||||
'GlobalBlacklistView',
|
||||
]
|
||||
|
||||
@@ -9,7 +9,10 @@ from django.utils.decorators import method_decorator
|
||||
from rest_framework import status
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.permissions import AllowAny, IsAuthenticated
|
||||
from rest_framework.permissions import AllowAny
|
||||
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -28,9 +31,10 @@ class LoginView(APIView):
|
||||
password = request.data.get('password')
|
||||
|
||||
if not username or not password:
|
||||
return Response(
|
||||
{'error': '请提供用户名和密码'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Username and password are required',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
user = authenticate(request, username=username, password=password)
|
||||
@@ -38,20 +42,22 @@ class LoginView(APIView):
|
||||
if user is not None:
|
||||
login(request, user)
|
||||
logger.info(f"用户 {username} 登录成功")
|
||||
return Response({
|
||||
'message': '登录成功',
|
||||
'user': {
|
||||
'id': user.id,
|
||||
'username': user.username,
|
||||
'isStaff': user.is_staff,
|
||||
'isSuperuser': user.is_superuser,
|
||||
return success_response(
|
||||
data={
|
||||
'user': {
|
||||
'id': user.id,
|
||||
'username': user.username,
|
||||
'isStaff': user.is_staff,
|
||||
'isSuperuser': user.is_superuser,
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
else:
|
||||
logger.warning(f"用户 {username} 登录失败:用户名或密码错误")
|
||||
return Response(
|
||||
{'error': '用户名或密码错误'},
|
||||
status=status.HTTP_401_UNAUTHORIZED
|
||||
return error_response(
|
||||
code=ErrorCodes.UNAUTHORIZED,
|
||||
message='Invalid username or password',
|
||||
status_code=status.HTTP_401_UNAUTHORIZED
|
||||
)
|
||||
|
||||
|
||||
@@ -79,7 +85,7 @@ class LogoutView(APIView):
|
||||
logout(request)
|
||||
else:
|
||||
logout(request)
|
||||
return Response({'message': '已登出'})
|
||||
return success_response()
|
||||
|
||||
|
||||
@method_decorator(csrf_exempt, name='dispatch')
|
||||
@@ -100,22 +106,26 @@ class MeView(APIView):
|
||||
if user_id:
|
||||
try:
|
||||
user = User.objects.get(pk=user_id)
|
||||
return Response({
|
||||
'authenticated': True,
|
||||
'user': {
|
||||
'id': user.id,
|
||||
'username': user.username,
|
||||
'isStaff': user.is_staff,
|
||||
'isSuperuser': user.is_superuser,
|
||||
return success_response(
|
||||
data={
|
||||
'authenticated': True,
|
||||
'user': {
|
||||
'id': user.id,
|
||||
'username': user.username,
|
||||
'isStaff': user.is_staff,
|
||||
'isSuperuser': user.is_superuser,
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
except User.DoesNotExist:
|
||||
pass
|
||||
|
||||
return Response({
|
||||
'authenticated': False,
|
||||
'user': None
|
||||
})
|
||||
return success_response(
|
||||
data={
|
||||
'authenticated': False,
|
||||
'user': None
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@method_decorator(csrf_exempt, name='dispatch')
|
||||
@@ -124,43 +134,27 @@ class ChangePasswordView(APIView):
|
||||
修改密码
|
||||
POST /api/auth/change-password/
|
||||
"""
|
||||
authentication_classes = [] # 禁用认证(绕过 CSRF)
|
||||
permission_classes = [AllowAny] # 手动检查登录状态
|
||||
|
||||
def post(self, request):
|
||||
# 手动检查登录状态(从 session 获取用户)
|
||||
from django.contrib.auth import get_user_model
|
||||
User = get_user_model()
|
||||
|
||||
user_id = request.session.get('_auth_user_id')
|
||||
if not user_id:
|
||||
return Response(
|
||||
{'error': '请先登录'},
|
||||
status=status.HTTP_401_UNAUTHORIZED
|
||||
)
|
||||
|
||||
try:
|
||||
user = User.objects.get(pk=user_id)
|
||||
except User.DoesNotExist:
|
||||
return Response(
|
||||
{'error': '用户不存在'},
|
||||
status=status.HTTP_401_UNAUTHORIZED
|
||||
)
|
||||
# 使用全局权限类验证,request.user 已经是认证用户
|
||||
user = request.user
|
||||
|
||||
# CamelCaseParser 将 oldPassword -> old_password
|
||||
old_password = request.data.get('old_password')
|
||||
new_password = request.data.get('new_password')
|
||||
|
||||
if not old_password or not new_password:
|
||||
return Response(
|
||||
{'error': '请提供旧密码和新密码'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Old password and new password are required',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
if not user.check_password(old_password):
|
||||
return Response(
|
||||
{'error': '旧密码错误'},
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message='Old password is incorrect',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
user.set_password(new_password)
|
||||
@@ -170,4 +164,4 @@ class ChangePasswordView(APIView):
|
||||
update_session_auth_hash(request, user)
|
||||
|
||||
logger.info(f"用户 {user.username} 已修改密码")
|
||||
return Response({'message': '密码修改成功'})
|
||||
return success_response()
|
||||
|
||||
80
backend/apps/common/views/blacklist_views.py
Normal file
80
backend/apps/common/views/blacklist_views.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""全局黑名单 API 视图"""
|
||||
import logging
|
||||
|
||||
from rest_framework import status
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.services import BlacklistService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GlobalBlacklistView(APIView):
|
||||
"""
|
||||
全局黑名单规则 API
|
||||
|
||||
Endpoints:
|
||||
- GET /api/blacklist/rules/ - 获取全局黑名单列表
|
||||
- PUT /api/blacklist/rules/ - 全量替换规则(文本框保存场景)
|
||||
|
||||
设计说明:
|
||||
- 使用 PUT 全量替换模式,适合"文本框每行一个规则"的前端场景
|
||||
- 用户编辑文本框 -> 点击保存 -> 后端全量替换
|
||||
|
||||
架构:MVS 模式
|
||||
- View: 参数验证、响应格式化
|
||||
- Service: 业务逻辑(BlacklistService)
|
||||
- Model: 数据持久化(BlacklistRule)
|
||||
"""
|
||||
|
||||
permission_classes = [IsAuthenticated]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.blacklist_service = BlacklistService()
|
||||
|
||||
def get(self, request):
|
||||
"""
|
||||
获取全局黑名单规则列表
|
||||
|
||||
返回格式:
|
||||
{
|
||||
"patterns": ["*.gov", "*.edu", "10.0.0.0/8"]
|
||||
}
|
||||
"""
|
||||
rules = self.blacklist_service.get_global_rules()
|
||||
patterns = list(rules.values_list('pattern', flat=True))
|
||||
return success_response(data={'patterns': patterns})
|
||||
|
||||
def put(self, request):
|
||||
"""
|
||||
全量替换全局黑名单规则
|
||||
|
||||
请求格式:
|
||||
{
|
||||
"patterns": ["*.gov", "*.edu", "10.0.0.0/8"]
|
||||
}
|
||||
|
||||
或者空数组清空所有规则:
|
||||
{
|
||||
"patterns": []
|
||||
}
|
||||
"""
|
||||
patterns = request.data.get('patterns', [])
|
||||
|
||||
# 兼容字符串输入(换行分隔)
|
||||
if isinstance(patterns, str):
|
||||
patterns = [p for p in patterns.split('\n') if p.strip()]
|
||||
|
||||
if not isinstance(patterns, list):
|
||||
return error_response(
|
||||
code='VALIDATION_ERROR',
|
||||
message='patterns 必须是数组'
|
||||
)
|
||||
|
||||
# 调用 Service 层全量替换
|
||||
result = self.blacklist_service.replace_global_rules(patterns)
|
||||
|
||||
return success_response(data=result)
|
||||
24
backend/apps/common/views/health_views.py
Normal file
24
backend/apps/common/views/health_views.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
健康检查视图
|
||||
|
||||
提供 Docker 健康检查端点,无需认证。
|
||||
"""
|
||||
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.permissions import AllowAny
|
||||
|
||||
|
||||
class HealthCheckView(APIView):
|
||||
"""
|
||||
健康检查端点
|
||||
|
||||
GET /api/health/
|
||||
|
||||
返回服务状态,用于 Docker 健康检查。
|
||||
此端点无需认证。
|
||||
"""
|
||||
permission_classes = [AllowAny]
|
||||
|
||||
def get(self, request):
|
||||
return Response({'status': 'ok'})
|
||||
118
backend/apps/common/views/system_log_views.py
Normal file
118
backend/apps/common/views/system_log_views.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""
|
||||
系统日志视图模块
|
||||
|
||||
提供系统日志的 REST API 接口,供前端实时查看系统运行日志。
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from django.utils.decorators import method_decorator
|
||||
from django.views.decorators.csrf import csrf_exempt
|
||||
from rest_framework import status
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.views import APIView
|
||||
|
||||
from apps.common.response_helpers import success_response, error_response
|
||||
from apps.common.error_codes import ErrorCodes
|
||||
from apps.common.services.system_log_service import SystemLogService
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@method_decorator(csrf_exempt, name="dispatch")
|
||||
class SystemLogFilesView(APIView):
|
||||
"""
|
||||
日志文件列表 API 视图
|
||||
|
||||
GET /api/system/logs/files/
|
||||
获取所有可用的日志文件列表
|
||||
|
||||
Response:
|
||||
{
|
||||
"files": [
|
||||
{
|
||||
"filename": "xingrin.log",
|
||||
"category": "system",
|
||||
"size": 1048576,
|
||||
"modifiedAt": "2025-01-15T10:30:00+00:00"
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = SystemLogService()
|
||||
|
||||
def get(self, request):
|
||||
"""获取日志文件列表"""
|
||||
try:
|
||||
files = self.service.get_log_files()
|
||||
return success_response(data={"files": files})
|
||||
except Exception:
|
||||
logger.exception("获取日志文件列表失败")
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to get log files',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
|
||||
|
||||
@method_decorator(csrf_exempt, name="dispatch")
|
||||
class SystemLogsView(APIView):
|
||||
"""
|
||||
系统日志 API 视图
|
||||
|
||||
GET /api/system/logs/
|
||||
获取系统日志内容
|
||||
|
||||
Query Parameters:
|
||||
file (str, optional): 日志文件名,默认 xingrin.log
|
||||
lines (int, optional): 返回的日志行数,默认 200,最大 10000
|
||||
|
||||
Response:
|
||||
{
|
||||
"content": "日志内容字符串..."
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.service = SystemLogService()
|
||||
|
||||
def get(self, request):
|
||||
"""
|
||||
获取系统日志
|
||||
|
||||
支持通过 file 和 lines 参数控制返回内容。
|
||||
"""
|
||||
try:
|
||||
# 解析参数
|
||||
filename = request.query_params.get("file")
|
||||
lines_raw = request.query_params.get("lines")
|
||||
lines = int(lines_raw) if lines_raw is not None else None
|
||||
|
||||
# 调用服务获取日志内容
|
||||
content = self.service.get_logs_content(filename=filename, lines=lines)
|
||||
return success_response(data={"content": content})
|
||||
except ValueError as e:
|
||||
return error_response(
|
||||
code=ErrorCodes.VALIDATION_ERROR,
|
||||
message=str(e) if 'file' in str(e).lower() else 'lines must be an integer',
|
||||
status_code=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
return error_response(
|
||||
code=ErrorCodes.NOT_FOUND,
|
||||
message=str(e),
|
||||
status_code=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("获取系统日志失败")
|
||||
return error_response(
|
||||
code=ErrorCodes.SERVER_ERROR,
|
||||
message='Failed to get system logs',
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
44
backend/apps/common/websocket_auth.py
Normal file
44
backend/apps/common/websocket_auth.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
WebSocket 认证基类
|
||||
|
||||
提供需要认证的 WebSocket Consumer 基类
|
||||
"""
|
||||
|
||||
import logging
|
||||
from channels.generic.websocket import AsyncWebsocketConsumer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AuthenticatedWebsocketConsumer(AsyncWebsocketConsumer):
|
||||
"""
|
||||
需要认证的 WebSocket Consumer 基类
|
||||
|
||||
子类应该重写 on_connect() 方法实现具体的连接逻辑
|
||||
"""
|
||||
|
||||
async def connect(self):
|
||||
"""
|
||||
连接时验证用户认证状态
|
||||
|
||||
未认证时使用 close(code=4001) 拒绝连接
|
||||
"""
|
||||
user = self.scope.get('user')
|
||||
|
||||
if not user or not user.is_authenticated:
|
||||
logger.warning(
|
||||
f"WebSocket 连接被拒绝:用户未认证 - Path: {self.scope.get('path')}"
|
||||
)
|
||||
await self.close(code=4001)
|
||||
return
|
||||
|
||||
# 调用子类的连接逻辑
|
||||
await self.on_connect()
|
||||
|
||||
async def on_connect(self):
|
||||
"""
|
||||
子类实现具体的连接逻辑
|
||||
|
||||
默认实现:接受连接
|
||||
"""
|
||||
await self.accept()
|
||||
@@ -6,17 +6,17 @@ import json
|
||||
import logging
|
||||
import asyncio
|
||||
import os
|
||||
from channels.generic.websocket import AsyncWebsocketConsumer
|
||||
from asgiref.sync import sync_to_async
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from apps.common.websocket_auth import AuthenticatedWebsocketConsumer
|
||||
from apps.engine.services import WorkerService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WorkerDeployConsumer(AsyncWebsocketConsumer):
|
||||
class WorkerDeployConsumer(AuthenticatedWebsocketConsumer):
|
||||
"""
|
||||
Worker 交互式终端 WebSocket Consumer
|
||||
|
||||
@@ -31,8 +31,8 @@ class WorkerDeployConsumer(AsyncWebsocketConsumer):
|
||||
self.read_task = None
|
||||
self.worker_service = WorkerService()
|
||||
|
||||
async def connect(self):
|
||||
"""连接时加入对应 Worker 的组并自动建立 SSH 连接"""
|
||||
async def on_connect(self):
|
||||
"""连接时加入对应 Worker 的组并自动建立 SSH 连接(已通过认证)"""
|
||||
self.worker_id = self.scope['url_route']['kwargs']['worker_id']
|
||||
self.group_name = f'worker_deploy_{self.worker_id}'
|
||||
|
||||
@@ -241,8 +241,10 @@ class WorkerDeployConsumer(AsyncWebsocketConsumer):
|
||||
}))
|
||||
return
|
||||
|
||||
django_host = f"{public_host}:{server_port}" # Django / 心跳上报使用
|
||||
heartbeat_api_url = f"http://{django_host}" # 基础 URL,agent 会加 /api/...
|
||||
# 远程 Worker 通过 nginx HTTPS 访问(nginx 反代到后端 8888)
|
||||
# 使用 https://{PUBLIC_HOST}:{PUBLIC_PORT} 而不是直连 8888 端口
|
||||
public_port = getattr(settings, 'PUBLIC_PORT', '8083')
|
||||
heartbeat_api_url = f"https://{public_host}:{public_port}"
|
||||
|
||||
session_name = f'xingrin_deploy_{self.worker_id}'
|
||||
remote_script_path = '/tmp/xingrin_deploy.sh'
|
||||
|
||||
@@ -15,9 +15,10 @@
|
||||
"""
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from ruamel.yaml import YAML
|
||||
|
||||
from apps.engine.models import ScanEngine
|
||||
|
||||
@@ -44,10 +45,12 @@ class Command(BaseCommand):
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
default_config = f.read()
|
||||
|
||||
# 解析 YAML 为字典,后续用于生成子引擎配置
|
||||
# 使用 ruamel.yaml 解析,保留注释
|
||||
yaml_parser = YAML()
|
||||
yaml_parser.preserve_quotes = True
|
||||
try:
|
||||
config_dict = yaml.safe_load(default_config) or {}
|
||||
except yaml.YAMLError as e:
|
||||
config_dict = yaml_parser.load(default_config) or {}
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f'引擎配置 YAML 解析失败: {e}'))
|
||||
return
|
||||
|
||||
@@ -83,15 +86,13 @@ class Command(BaseCommand):
|
||||
if scan_type != 'subdomain_discovery' and 'tools' not in scan_cfg:
|
||||
continue
|
||||
|
||||
# 构造只包含当前扫描类型配置的 YAML
|
||||
# 构造只包含当前扫描类型配置的 YAML(保留注释)
|
||||
single_config = {scan_type: scan_cfg}
|
||||
try:
|
||||
single_yaml = yaml.safe_dump(
|
||||
single_config,
|
||||
sort_keys=False,
|
||||
allow_unicode=True,
|
||||
)
|
||||
except yaml.YAMLError as e:
|
||||
stream = StringIO()
|
||||
yaml_parser.dump(single_config, stream)
|
||||
single_yaml = stream.getvalue()
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f'生成子引擎 {scan_type} 配置失败: {e}'))
|
||||
continue
|
||||
|
||||
|
||||
205
backend/apps/engine/management/commands/init_fingerprints.py
Normal file
205
backend/apps/engine/management/commands/init_fingerprints.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""初始化内置指纹库
|
||||
|
||||
- EHole 指纹: ehole.json -> 导入到数据库
|
||||
- Goby 指纹: goby.json -> 导入到数据库
|
||||
- Wappalyzer 指纹: wappalyzer.json -> 导入到数据库
|
||||
- Fingers 指纹: fingers_http.json -> 导入到数据库
|
||||
- FingerPrintHub 指纹: fingerprinthub_web.json -> 导入到数据库
|
||||
- ARL 指纹: ARL.yaml -> 导入到数据库
|
||||
|
||||
可重复执行:如果数据库已有数据则跳过,只在空库时导入。
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from apps.engine.models import (
|
||||
EholeFingerprint,
|
||||
GobyFingerprint,
|
||||
WappalyzerFingerprint,
|
||||
FingersFingerprint,
|
||||
FingerPrintHubFingerprint,
|
||||
ARLFingerprint,
|
||||
)
|
||||
from apps.engine.services.fingerprints import (
|
||||
EholeFingerprintService,
|
||||
GobyFingerprintService,
|
||||
WappalyzerFingerprintService,
|
||||
FingersFingerprintService,
|
||||
FingerPrintHubFingerprintService,
|
||||
ARLFingerprintService,
|
||||
)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# 内置指纹配置
|
||||
DEFAULT_FINGERPRINTS = [
|
||||
{
|
||||
"type": "ehole",
|
||||
"filename": "ehole.json",
|
||||
"model": EholeFingerprint,
|
||||
"service": EholeFingerprintService,
|
||||
"data_key": "fingerprint", # JSON 中指纹数组的 key
|
||||
"file_format": "json",
|
||||
},
|
||||
{
|
||||
"type": "goby",
|
||||
"filename": "goby.json",
|
||||
"model": GobyFingerprint,
|
||||
"service": GobyFingerprintService,
|
||||
"data_key": None, # Goby 是数组格式,直接使用整个 JSON
|
||||
"file_format": "json",
|
||||
},
|
||||
{
|
||||
"type": "wappalyzer",
|
||||
"filename": "wappalyzer.json",
|
||||
"model": WappalyzerFingerprint,
|
||||
"service": WappalyzerFingerprintService,
|
||||
"data_key": "apps", # Wappalyzer 使用 apps 对象
|
||||
"file_format": "json",
|
||||
},
|
||||
{
|
||||
"type": "fingers",
|
||||
"filename": "fingers_http.json",
|
||||
"model": FingersFingerprint,
|
||||
"service": FingersFingerprintService,
|
||||
"data_key": None, # Fingers 是数组格式
|
||||
"file_format": "json",
|
||||
},
|
||||
{
|
||||
"type": "fingerprinthub",
|
||||
"filename": "fingerprinthub_web.json",
|
||||
"model": FingerPrintHubFingerprint,
|
||||
"service": FingerPrintHubFingerprintService,
|
||||
"data_key": None, # FingerPrintHub 是数组格式
|
||||
"file_format": "json",
|
||||
},
|
||||
{
|
||||
"type": "arl",
|
||||
"filename": "ARL.yaml",
|
||||
"model": ARLFingerprint,
|
||||
"service": ARLFingerprintService,
|
||||
"data_key": None, # ARL 是 YAML 数组格式
|
||||
"file_format": "yaml",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "初始化内置指纹库"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
project_base = Path(settings.BASE_DIR).parent # /app/backend -> /app
|
||||
fingerprints_dir = project_base / "backend" / "fingerprints"
|
||||
|
||||
initialized = 0
|
||||
skipped = 0
|
||||
failed = 0
|
||||
|
||||
for item in DEFAULT_FINGERPRINTS:
|
||||
fp_type = item["type"]
|
||||
filename = item["filename"]
|
||||
model = item["model"]
|
||||
service_class = item["service"]
|
||||
data_key = item["data_key"]
|
||||
file_format = item.get("file_format", "json")
|
||||
|
||||
# 检查数据库是否已有数据
|
||||
existing_count = model.objects.count()
|
||||
if existing_count > 0:
|
||||
self.stdout.write(self.style.SUCCESS(
|
||||
f"[{fp_type}] 数据库已有 {existing_count} 条记录,跳过初始化"
|
||||
))
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# 查找源文件
|
||||
src_path = fingerprints_dir / filename
|
||||
if not src_path.exists():
|
||||
self.stdout.write(self.style.WARNING(
|
||||
f"[{fp_type}] 未找到内置指纹文件: {src_path},跳过"
|
||||
))
|
||||
failed += 1
|
||||
continue
|
||||
|
||||
# 读取并解析文件(支持 JSON 和 YAML)
|
||||
try:
|
||||
with open(src_path, "r", encoding="utf-8") as f:
|
||||
if file_format == "yaml":
|
||||
file_data = yaml.safe_load(f)
|
||||
else:
|
||||
file_data = json.load(f)
|
||||
except (json.JSONDecodeError, yaml.YAMLError, OSError) as exc:
|
||||
self.stdout.write(self.style.ERROR(
|
||||
f"[{fp_type}] 读取指纹文件失败: {exc}"
|
||||
))
|
||||
failed += 1
|
||||
continue
|
||||
|
||||
# 提取指纹数据(根据不同格式处理)
|
||||
fingerprints = self._extract_fingerprints(file_data, data_key, fp_type)
|
||||
if not fingerprints:
|
||||
self.stdout.write(self.style.WARNING(
|
||||
f"[{fp_type}] 指纹文件中没有有效数据,跳过"
|
||||
))
|
||||
failed += 1
|
||||
continue
|
||||
|
||||
# 使用 Service 批量导入
|
||||
try:
|
||||
service = service_class()
|
||||
result = service.batch_create_fingerprints(fingerprints)
|
||||
created = result.get("created", 0)
|
||||
failed_count = result.get("failed", 0)
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(
|
||||
f"[{fp_type}] 导入成功: 创建 {created} 条,失败 {failed_count} 条"
|
||||
))
|
||||
initialized += 1
|
||||
except Exception as exc:
|
||||
self.stdout.write(self.style.ERROR(
|
||||
f"[{fp_type}] 导入失败: {exc}"
|
||||
))
|
||||
failed += 1
|
||||
continue
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(
|
||||
f"指纹初始化完成: 成功 {initialized}, 已存在跳过 {skipped}, 失败 {failed}"
|
||||
))
|
||||
|
||||
def _extract_fingerprints(self, json_data, data_key, fp_type):
|
||||
"""
|
||||
根据不同格式提取指纹数据,兼容数组和对象两种格式
|
||||
|
||||
支持的格式:
|
||||
- 数组格式: [...] 或 {"key": [...]}
|
||||
- 对象格式: {...} 或 {"key": {...}} -> 转换为 [{"name": k, ...v}]
|
||||
"""
|
||||
# 获取目标数据
|
||||
if data_key is None:
|
||||
# 直接使用整个 JSON
|
||||
target = json_data
|
||||
else:
|
||||
# 从指定 key 获取,支持多个可能的 key(如 apps/technologies)
|
||||
if data_key == "apps":
|
||||
target = json_data.get("apps") or json_data.get("technologies") or {}
|
||||
else:
|
||||
target = json_data.get(data_key, [])
|
||||
|
||||
# 根据数据类型处理
|
||||
if isinstance(target, list):
|
||||
# 已经是数组格式,直接返回
|
||||
return target
|
||||
elif isinstance(target, dict):
|
||||
# 对象格式,转换为数组 [{"name": key, ...value}]
|
||||
return [{"name": name, **data} if isinstance(data, dict) else {"name": name}
|
||||
for name, data in target.items()]
|
||||
|
||||
return []
|
||||
@@ -3,12 +3,17 @@
|
||||
项目安装后执行此命令,自动创建官方模板仓库记录。
|
||||
|
||||
使用方式:
|
||||
python manage.py init_nuclei_templates # 只创建记录
|
||||
python manage.py init_nuclei_templates # 只创建记录(检测本地已有仓库)
|
||||
python manage.py init_nuclei_templates --sync # 创建并同步(git clone)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.utils import timezone
|
||||
|
||||
from apps.engine.models import NucleiTemplateRepo
|
||||
from apps.engine.services import NucleiTemplateRepoService
|
||||
@@ -26,6 +31,20 @@ DEFAULT_REPOS = [
|
||||
]
|
||||
|
||||
|
||||
def get_local_commit_hash(local_path: Path) -> str:
|
||||
"""获取本地 Git 仓库的 commit hash"""
|
||||
if not (local_path / ".git").is_dir():
|
||||
return ""
|
||||
result = subprocess.run(
|
||||
["git", "-C", str(local_path), "rev-parse", "HEAD"],
|
||||
check=False,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
return result.stdout.strip() if result.returncode == 0 else ""
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "初始化 Nuclei 模板仓库(创建官方模板仓库记录)"
|
||||
|
||||
@@ -46,6 +65,8 @@ class Command(BaseCommand):
|
||||
force = options.get("force", False)
|
||||
|
||||
service = NucleiTemplateRepoService()
|
||||
base_dir = Path(getattr(settings, "NUCLEI_TEMPLATES_REPOS_BASE_DIR", "/opt/xingrin/nuclei-repos"))
|
||||
|
||||
created = 0
|
||||
skipped = 0
|
||||
synced = 0
|
||||
@@ -87,20 +108,30 @@ class Command(BaseCommand):
|
||||
|
||||
# 创建新仓库记录
|
||||
try:
|
||||
# 检查本地是否已有仓库(由 install.sh 预下载)
|
||||
local_path = base_dir / name
|
||||
local_commit = get_local_commit_hash(local_path)
|
||||
|
||||
repo = NucleiTemplateRepo.objects.create(
|
||||
name=name,
|
||||
repo_url=repo_url,
|
||||
local_path=str(local_path) if local_commit else "",
|
||||
commit_hash=local_commit,
|
||||
last_synced_at=timezone.now() if local_commit else None,
|
||||
)
|
||||
self.stdout.write(self.style.SUCCESS(
|
||||
f"[{name}] 创建成功: id={repo.id}"
|
||||
))
|
||||
|
||||
if local_commit:
|
||||
self.stdout.write(self.style.SUCCESS(
|
||||
f"[{name}] 创建成功(检测到本地仓库): commit={local_commit[:8]}"
|
||||
))
|
||||
else:
|
||||
self.stdout.write(self.style.SUCCESS(
|
||||
f"[{name}] 创建成功: id={repo.id}"
|
||||
))
|
||||
created += 1
|
||||
|
||||
# 初始化本地路径
|
||||
service.ensure_local_path(repo)
|
||||
|
||||
# 如果需要同步
|
||||
if do_sync:
|
||||
# 如果本地没有仓库且需要同步
|
||||
if not local_commit and do_sync:
|
||||
try:
|
||||
self.stdout.write(self.style.WARNING(
|
||||
f"[{name}] 正在同步(首次可能需要几分钟)..."
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
"""初始化所有内置字典 Wordlist 记录
|
||||
|
||||
- 目录扫描默认字典: dir_default.txt -> /app/backend/wordlist/dir_default.txt
|
||||
- 子域名爆破默认字典: subdomains-top1million-110000.txt -> /app/backend/wordlist/subdomains-top1million-110000.txt
|
||||
内置字典从镜像内 /app/backend/wordlist/ 复制到运行时目录 /opt/xingrin/wordlists/:
|
||||
- 目录扫描默认字典: dir_default.txt
|
||||
- 子域名爆破默认字典: subdomains-top1million-110000.txt
|
||||
|
||||
可重复执行:如果已存在同名记录且文件有效则跳过,只在缺失或文件丢失时创建/修复。
|
||||
"""
|
||||
@@ -13,7 +14,7 @@ from pathlib import Path
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from apps.common.hash_utils import safe_calc_file_sha256
|
||||
from apps.common.utils import safe_calc_file_sha256
|
||||
from apps.engine.models import Wordlist
|
||||
|
||||
|
||||
|
||||
213
backend/apps/engine/migrations/0001_initial.py
Normal file
213
backend/apps/engine/migrations/0001_initial.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Generated by Django 5.2.7 on 2026-01-06 00:55
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='NucleiTemplateRepo',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='仓库名称,用于前端展示和配置引用', max_length=200, unique=True)),
|
||||
('repo_url', models.CharField(help_text='Git 仓库地址', max_length=500)),
|
||||
('local_path', models.CharField(blank=True, default='', help_text='本地工作目录绝对路径', max_length=500)),
|
||||
('commit_hash', models.CharField(blank=True, default='', help_text='最后同步的 Git commit hash,用于 Worker 版本校验', max_length=40)),
|
||||
('last_synced_at', models.DateTimeField(blank=True, help_text='最后一次成功同步时间', null=True)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Nuclei 模板仓库',
|
||||
'verbose_name_plural': 'Nuclei 模板仓库',
|
||||
'db_table': 'nuclei_template_repo',
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='ARLFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='指纹名称', max_length=300, unique=True)),
|
||||
('rule', models.TextField(help_text='匹配规则表达式')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'ARL 指纹',
|
||||
'verbose_name_plural': 'ARL 指纹',
|
||||
'db_table': 'arl_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['name'], name='arl_fingerp_name_c3a305_idx'), models.Index(fields=['-created_at'], name='arl_fingerp_created_ed1060_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='EholeFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('cms', models.CharField(help_text='产品/CMS名称', max_length=200)),
|
||||
('method', models.CharField(default='keyword', help_text='匹配方式', max_length=200)),
|
||||
('location', models.CharField(default='body', help_text='匹配位置', max_length=200)),
|
||||
('keyword', models.JSONField(default=list, help_text='关键词列表')),
|
||||
('is_important', models.BooleanField(default=False, help_text='是否重点资产')),
|
||||
('type', models.CharField(blank=True, default='-', help_text='分类', max_length=100)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'EHole 指纹',
|
||||
'verbose_name_plural': 'EHole 指纹',
|
||||
'db_table': 'ehole_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['cms'], name='ehole_finge_cms_72ca2c_idx'), models.Index(fields=['method'], name='ehole_finge_method_17f0db_idx'), models.Index(fields=['location'], name='ehole_finge_locatio_7bb82b_idx'), models.Index(fields=['type'], name='ehole_finge_type_ca2bce_idx'), models.Index(fields=['is_important'], name='ehole_finge_is_impo_d56e64_idx'), models.Index(fields=['-created_at'], name='ehole_finge_created_d862b0_idx')],
|
||||
'constraints': [models.UniqueConstraint(fields=('cms', 'method', 'location'), name='unique_ehole_fingerprint')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='FingerPrintHubFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('fp_id', models.CharField(help_text='指纹ID', max_length=200, unique=True)),
|
||||
('name', models.CharField(help_text='指纹名称', max_length=300)),
|
||||
('author', models.CharField(blank=True, default='', help_text='作者', max_length=200)),
|
||||
('tags', models.CharField(blank=True, default='', help_text='标签', max_length=500)),
|
||||
('severity', models.CharField(blank=True, default='info', help_text='严重程度', max_length=50)),
|
||||
('metadata', models.JSONField(blank=True, default=dict, help_text='元数据')),
|
||||
('http', models.JSONField(default=list, help_text='HTTP 匹配规则')),
|
||||
('source_file', models.CharField(blank=True, default='', help_text='来源文件', max_length=500)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'FingerPrintHub 指纹',
|
||||
'verbose_name_plural': 'FingerPrintHub 指纹',
|
||||
'db_table': 'fingerprinthub_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['fp_id'], name='fingerprint_fp_id_df467f_idx'), models.Index(fields=['name'], name='fingerprint_name_95b6fb_idx'), models.Index(fields=['author'], name='fingerprint_author_80f54b_idx'), models.Index(fields=['severity'], name='fingerprint_severit_f70422_idx'), models.Index(fields=['-created_at'], name='fingerprint_created_bec16c_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='FingersFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='指纹名称', max_length=300, unique=True)),
|
||||
('link', models.URLField(blank=True, default='', help_text='相关链接', max_length=500)),
|
||||
('rule', models.JSONField(default=list, help_text='匹配规则数组')),
|
||||
('tag', models.JSONField(default=list, help_text='标签数组')),
|
||||
('focus', models.BooleanField(default=False, help_text='是否重点关注')),
|
||||
('default_port', models.JSONField(blank=True, default=list, help_text='默认端口数组')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Fingers 指纹',
|
||||
'verbose_name_plural': 'Fingers 指纹',
|
||||
'db_table': 'fingers_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['name'], name='fingers_fin_name_952de0_idx'), models.Index(fields=['link'], name='fingers_fin_link_4c6b7f_idx'), models.Index(fields=['focus'], name='fingers_fin_focus_568c7f_idx'), models.Index(fields=['-created_at'], name='fingers_fin_created_46fc91_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='GobyFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='产品名称', max_length=300, unique=True)),
|
||||
('logic', models.CharField(help_text='逻辑表达式', max_length=500)),
|
||||
('rule', models.JSONField(default=list, help_text='规则数组')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Goby 指纹',
|
||||
'verbose_name_plural': 'Goby 指纹',
|
||||
'db_table': 'goby_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['name'], name='goby_finger_name_82084c_idx'), models.Index(fields=['logic'], name='goby_finger_logic_a63226_idx'), models.Index(fields=['-created_at'], name='goby_finger_created_50e000_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='ScanEngine',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('name', models.CharField(help_text='引擎名称', max_length=200, unique=True)),
|
||||
('configuration', models.CharField(blank=True, default='', help_text='引擎配置,yaml 格式', max_length=10000)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '扫描引擎',
|
||||
'verbose_name_plural': '扫描引擎',
|
||||
'db_table': 'scan_engine',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='scan_engine_created_da4870_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='WappalyzerFingerprint',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='应用名称', max_length=300, unique=True)),
|
||||
('cats', models.JSONField(default=list, help_text='分类 ID 数组')),
|
||||
('cookies', models.JSONField(blank=True, default=dict, help_text='Cookie 检测规则')),
|
||||
('headers', models.JSONField(blank=True, default=dict, help_text='HTTP Header 检测规则')),
|
||||
('script_src', models.JSONField(blank=True, default=list, help_text='脚本 URL 正则数组')),
|
||||
('js', models.JSONField(blank=True, default=list, help_text='JavaScript 变量检测规则')),
|
||||
('implies', models.JSONField(blank=True, default=list, help_text='依赖关系数组')),
|
||||
('meta', models.JSONField(blank=True, default=dict, help_text='HTML meta 标签检测规则')),
|
||||
('html', models.JSONField(blank=True, default=list, help_text='HTML 内容正则数组')),
|
||||
('description', models.TextField(blank=True, default='', help_text='应用描述')),
|
||||
('website', models.URLField(blank=True, default='', help_text='官网链接', max_length=500)),
|
||||
('cpe', models.CharField(blank=True, default='', help_text='CPE 标识符', max_length=300)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Wappalyzer 指纹',
|
||||
'verbose_name_plural': 'Wappalyzer 指纹',
|
||||
'db_table': 'wappalyzer_fingerprint',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['name'], name='wappalyzer__name_63c669_idx'), models.Index(fields=['website'], name='wappalyzer__website_88de1c_idx'), models.Index(fields=['cpe'], name='wappalyzer__cpe_30c761_idx'), models.Index(fields=['-created_at'], name='wappalyzer__created_8e6c21_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Wordlist',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('name', models.CharField(help_text='字典名称,唯一', max_length=200, unique=True)),
|
||||
('description', models.CharField(blank=True, default='', help_text='字典描述', max_length=200)),
|
||||
('file_path', models.CharField(help_text='后端保存的字典文件绝对路径', max_length=500)),
|
||||
('file_size', models.BigIntegerField(default=0, help_text='文件大小(字节)')),
|
||||
('line_count', models.IntegerField(default=0, help_text='字典行数')),
|
||||
('file_hash', models.CharField(blank=True, default='', help_text='文件 SHA-256 哈希,用于缓存校验', max_length=64)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, help_text='创建时间')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, help_text='更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '字典文件',
|
||||
'verbose_name_plural': '字典文件',
|
||||
'db_table': 'wordlist',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['-created_at'], name='wordlist_created_4afb02_idx')],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='WorkerNode',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(help_text='节点名称', max_length=100)),
|
||||
('ip_address', models.GenericIPAddressField(help_text='IP 地址(本地节点为 127.0.0.1)')),
|
||||
('ssh_port', models.IntegerField(default=22, help_text='SSH 端口')),
|
||||
('username', models.CharField(default='root', help_text='SSH 用户名', max_length=50)),
|
||||
('password', models.CharField(blank=True, default='', help_text='SSH 密码', max_length=200)),
|
||||
('is_local', models.BooleanField(default=False, help_text='是否为本地节点(Docker 容器内)')),
|
||||
('status', models.CharField(choices=[('pending', '待部署'), ('deploying', '部署中'), ('online', '在线'), ('offline', '离线'), ('updating', '更新中'), ('outdated', '版本过低')], default='pending', help_text='状态: pending/deploying/online/offline', max_length=20)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('updated_at', models.DateTimeField(auto_now=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Worker 节点',
|
||||
'db_table': 'worker_node',
|
||||
'ordering': ['-created_at'],
|
||||
'constraints': [models.UniqueConstraint(condition=models.Q(('is_local', False)), fields=('ip_address',), name='unique_remote_worker_ip'), models.UniqueConstraint(fields=('name',), name='unique_worker_name')],
|
||||
},
|
||||
),
|
||||
]
|
||||
29
backend/apps/engine/models/__init__.py
Normal file
29
backend/apps/engine/models/__init__.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""Engine Models
|
||||
|
||||
导出所有 Engine 模块的 Models
|
||||
"""
|
||||
|
||||
from .engine import WorkerNode, ScanEngine, Wordlist, NucleiTemplateRepo
|
||||
from .fingerprints import (
|
||||
EholeFingerprint,
|
||||
GobyFingerprint,
|
||||
WappalyzerFingerprint,
|
||||
FingersFingerprint,
|
||||
FingerPrintHubFingerprint,
|
||||
ARLFingerprint,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# 核心 Models
|
||||
"WorkerNode",
|
||||
"ScanEngine",
|
||||
"Wordlist",
|
||||
"NucleiTemplateRepo",
|
||||
# 指纹 Models
|
||||
"EholeFingerprint",
|
||||
"GobyFingerprint",
|
||||
"WappalyzerFingerprint",
|
||||
"FingersFingerprint",
|
||||
"FingerPrintHubFingerprint",
|
||||
"ARLFingerprint",
|
||||
]
|
||||
@@ -1,3 +1,8 @@
|
||||
"""Engine 模块核心 Models
|
||||
|
||||
包含 WorkerNode, ScanEngine, Wordlist, NucleiTemplateRepo
|
||||
"""
|
||||
|
||||
from django.db import models
|
||||
|
||||
|
||||
@@ -10,6 +15,8 @@ class WorkerNode(models.Model):
|
||||
('deploying', '部署中'),
|
||||
('online', '在线'),
|
||||
('offline', '离线'),
|
||||
('updating', '更新中'),
|
||||
('outdated', '版本过低'),
|
||||
]
|
||||
|
||||
name = models.CharField(max_length=100, help_text='节点名称')
|
||||
@@ -76,6 +83,7 @@ class ScanEngine(models.Model):
|
||||
indexes = [
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return str(self.name or f'ScanEngine {self.id}')
|
||||
|
||||
195
backend/apps/engine/models/fingerprints.py
Normal file
195
backend/apps/engine/models/fingerprints.py
Normal file
@@ -0,0 +1,195 @@
|
||||
"""指纹相关 Models
|
||||
|
||||
包含 EHole、Goby、Wappalyzer 等指纹格式的数据模型
|
||||
"""
|
||||
|
||||
from django.db import models
|
||||
|
||||
|
||||
class GobyFingerprint(models.Model):
|
||||
"""Goby 格式指纹规则
|
||||
|
||||
Goby 使用逻辑表达式和规则数组进行匹配:
|
||||
- logic: 逻辑表达式,如 "a||b", "(a&&b)||c"
|
||||
- rule: 规则数组,每条规则包含 label, feature, is_equal
|
||||
"""
|
||||
|
||||
name = models.CharField(max_length=300, unique=True, help_text='产品名称')
|
||||
logic = models.CharField(max_length=500, help_text='逻辑表达式')
|
||||
rule = models.JSONField(default=list, help_text='规则数组')
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = 'goby_fingerprint'
|
||||
verbose_name = 'Goby 指纹'
|
||||
verbose_name_plural = 'Goby 指纹'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['name']),
|
||||
models.Index(fields=['logic']),
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.name} ({self.logic})"
|
||||
|
||||
|
||||
class EholeFingerprint(models.Model):
|
||||
"""EHole 格式指纹规则(字段与 ehole.json 一致)"""
|
||||
|
||||
cms = models.CharField(max_length=200, help_text='产品/CMS名称')
|
||||
method = models.CharField(max_length=200, default='keyword', help_text='匹配方式')
|
||||
location = models.CharField(max_length=200, default='body', help_text='匹配位置')
|
||||
keyword = models.JSONField(default=list, help_text='关键词列表')
|
||||
is_important = models.BooleanField(default=False, help_text='是否重点资产')
|
||||
type = models.CharField(max_length=100, blank=True, default='-', help_text='分类')
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = 'ehole_fingerprint'
|
||||
verbose_name = 'EHole 指纹'
|
||||
verbose_name_plural = 'EHole 指纹'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
# 搜索过滤字段索引
|
||||
models.Index(fields=['cms']),
|
||||
models.Index(fields=['method']),
|
||||
models.Index(fields=['location']),
|
||||
models.Index(fields=['type']),
|
||||
models.Index(fields=['is_important']),
|
||||
# 排序字段索引
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
constraints = [
|
||||
# 唯一约束:cms + method + location 组合不能重复
|
||||
models.UniqueConstraint(
|
||||
fields=['cms', 'method', 'location'],
|
||||
name='unique_ehole_fingerprint'
|
||||
),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.cms} ({self.method}@{self.location})"
|
||||
|
||||
|
||||
class WappalyzerFingerprint(models.Model):
|
||||
"""Wappalyzer 格式指纹规则
|
||||
|
||||
Wappalyzer 支持多种检测方式:cookies, headers, scriptSrc, js, meta, html 等
|
||||
"""
|
||||
|
||||
name = models.CharField(max_length=300, unique=True, help_text='应用名称')
|
||||
cats = models.JSONField(default=list, help_text='分类 ID 数组')
|
||||
cookies = models.JSONField(default=dict, blank=True, help_text='Cookie 检测规则')
|
||||
headers = models.JSONField(default=dict, blank=True, help_text='HTTP Header 检测规则')
|
||||
script_src = models.JSONField(default=list, blank=True, help_text='脚本 URL 正则数组')
|
||||
js = models.JSONField(default=list, blank=True, help_text='JavaScript 变量检测规则')
|
||||
implies = models.JSONField(default=list, blank=True, help_text='依赖关系数组')
|
||||
meta = models.JSONField(default=dict, blank=True, help_text='HTML meta 标签检测规则')
|
||||
html = models.JSONField(default=list, blank=True, help_text='HTML 内容正则数组')
|
||||
description = models.TextField(blank=True, default='', help_text='应用描述')
|
||||
website = models.URLField(max_length=500, blank=True, default='', help_text='官网链接')
|
||||
cpe = models.CharField(max_length=300, blank=True, default='', help_text='CPE 标识符')
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = 'wappalyzer_fingerprint'
|
||||
verbose_name = 'Wappalyzer 指纹'
|
||||
verbose_name_plural = 'Wappalyzer 指纹'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['name']),
|
||||
models.Index(fields=['website']),
|
||||
models.Index(fields=['cpe']),
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.name}"
|
||||
|
||||
|
||||
class FingersFingerprint(models.Model):
|
||||
"""Fingers 格式指纹规则 (fingers_http.json)
|
||||
|
||||
使用正则表达式和标签进行匹配,支持 favicon hash、header、body 等多种检测方式
|
||||
"""
|
||||
|
||||
name = models.CharField(max_length=300, unique=True, help_text='指纹名称')
|
||||
link = models.URLField(max_length=500, blank=True, default='', help_text='相关链接')
|
||||
rule = models.JSONField(default=list, help_text='匹配规则数组')
|
||||
tag = models.JSONField(default=list, help_text='标签数组')
|
||||
focus = models.BooleanField(default=False, help_text='是否重点关注')
|
||||
default_port = models.JSONField(default=list, blank=True, help_text='默认端口数组')
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = 'fingers_fingerprint'
|
||||
verbose_name = 'Fingers 指纹'
|
||||
verbose_name_plural = 'Fingers 指纹'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['name']),
|
||||
models.Index(fields=['link']),
|
||||
models.Index(fields=['focus']),
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.name}"
|
||||
|
||||
|
||||
class FingerPrintHubFingerprint(models.Model):
|
||||
"""FingerPrintHub 格式指纹规则 (fingerprinthub_web.json)
|
||||
|
||||
基于 nuclei 模板格式,使用 HTTP 请求和响应特征进行匹配
|
||||
"""
|
||||
|
||||
fp_id = models.CharField(max_length=200, unique=True, help_text='指纹ID')
|
||||
name = models.CharField(max_length=300, help_text='指纹名称')
|
||||
author = models.CharField(max_length=200, blank=True, default='', help_text='作者')
|
||||
tags = models.CharField(max_length=500, blank=True, default='', help_text='标签')
|
||||
severity = models.CharField(max_length=50, blank=True, default='info', help_text='严重程度')
|
||||
metadata = models.JSONField(default=dict, blank=True, help_text='元数据')
|
||||
http = models.JSONField(default=list, help_text='HTTP 匹配规则')
|
||||
source_file = models.CharField(max_length=500, blank=True, default='', help_text='来源文件')
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = 'fingerprinthub_fingerprint'
|
||||
verbose_name = 'FingerPrintHub 指纹'
|
||||
verbose_name_plural = 'FingerPrintHub 指纹'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['fp_id']),
|
||||
models.Index(fields=['name']),
|
||||
models.Index(fields=['author']),
|
||||
models.Index(fields=['severity']),
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.name} ({self.fp_id})"
|
||||
|
||||
|
||||
class ARLFingerprint(models.Model):
|
||||
"""ARL 格式指纹规则 (ARL.yaml)
|
||||
|
||||
使用简单的 name + rule 表达式格式
|
||||
"""
|
||||
|
||||
name = models.CharField(max_length=300, unique=True, help_text='指纹名称')
|
||||
rule = models.TextField(help_text='匹配规则表达式')
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = 'arl_fingerprint'
|
||||
verbose_name = 'ARL 指纹'
|
||||
verbose_name_plural = 'ARL 指纹'
|
||||
ordering = ['-created_at']
|
||||
indexes = [
|
||||
models.Index(fields=['name']),
|
||||
models.Index(fields=['-created_at']),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.name}"
|
||||
@@ -88,6 +88,8 @@ def _register_scheduled_jobs(scheduler: BackgroundScheduler):
|
||||
replace_existing=True,
|
||||
)
|
||||
logger.info(" - 已注册: 扫描结果清理(每天 03:00)")
|
||||
|
||||
# 注意:搜索物化视图刷新已迁移到 pg_ivm 增量维护,无需定时任务
|
||||
|
||||
|
||||
def _trigger_scheduled_scans():
|
||||
|
||||
20
backend/apps/engine/serializers/fingerprints/__init__.py
Normal file
20
backend/apps/engine/serializers/fingerprints/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""指纹管理 Serializers
|
||||
|
||||
导出所有指纹相关的 Serializer 类
|
||||
"""
|
||||
|
||||
from .ehole import EholeFingerprintSerializer
|
||||
from .goby import GobyFingerprintSerializer
|
||||
from .wappalyzer import WappalyzerFingerprintSerializer
|
||||
from .fingers import FingersFingerprintSerializer
|
||||
from .fingerprinthub import FingerPrintHubFingerprintSerializer
|
||||
from .arl import ARLFingerprintSerializer
|
||||
|
||||
__all__ = [
|
||||
"EholeFingerprintSerializer",
|
||||
"GobyFingerprintSerializer",
|
||||
"WappalyzerFingerprintSerializer",
|
||||
"FingersFingerprintSerializer",
|
||||
"FingerPrintHubFingerprintSerializer",
|
||||
"ARLFingerprintSerializer",
|
||||
]
|
||||
31
backend/apps/engine/serializers/fingerprints/arl.py
Normal file
31
backend/apps/engine/serializers/fingerprints/arl.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""ARL 指纹 Serializer"""
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from apps.engine.models import ARLFingerprint
|
||||
|
||||
|
||||
class ARLFingerprintSerializer(serializers.ModelSerializer):
|
||||
"""ARL 指纹序列化器
|
||||
|
||||
字段映射:
|
||||
- name: 指纹名称 (必填, 唯一)
|
||||
- rule: 匹配规则表达式 (必填)
|
||||
"""
|
||||
|
||||
class Meta:
|
||||
model = ARLFingerprint
|
||||
fields = ['id', 'name', 'rule', 'created_at']
|
||||
read_only_fields = ['id', 'created_at']
|
||||
|
||||
def validate_name(self, value):
|
||||
"""校验 name 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("name 字段不能为空")
|
||||
return value.strip()
|
||||
|
||||
def validate_rule(self, value):
|
||||
"""校验 rule 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("rule 字段不能为空")
|
||||
return value.strip()
|
||||
27
backend/apps/engine/serializers/fingerprints/ehole.py
Normal file
27
backend/apps/engine/serializers/fingerprints/ehole.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""EHole 指纹 Serializer"""
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from apps.engine.models import EholeFingerprint
|
||||
|
||||
|
||||
class EholeFingerprintSerializer(serializers.ModelSerializer):
|
||||
"""EHole 指纹序列化器"""
|
||||
|
||||
class Meta:
|
||||
model = EholeFingerprint
|
||||
fields = ['id', 'cms', 'method', 'location', 'keyword',
|
||||
'is_important', 'type', 'created_at']
|
||||
read_only_fields = ['id', 'created_at']
|
||||
|
||||
def validate_cms(self, value):
|
||||
"""校验 cms 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("cms 字段不能为空")
|
||||
return value.strip()
|
||||
|
||||
def validate_keyword(self, value):
|
||||
"""校验 keyword 字段"""
|
||||
if not isinstance(value, list):
|
||||
raise serializers.ValidationError("keyword 必须是数组")
|
||||
return value
|
||||
@@ -0,0 +1,50 @@
|
||||
"""FingerPrintHub 指纹 Serializer"""
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from apps.engine.models import FingerPrintHubFingerprint
|
||||
|
||||
|
||||
class FingerPrintHubFingerprintSerializer(serializers.ModelSerializer):
|
||||
"""FingerPrintHub 指纹序列化器
|
||||
|
||||
字段映射:
|
||||
- fp_id: 指纹ID (必填, 唯一)
|
||||
- name: 指纹名称 (必填)
|
||||
- author: 作者 (可选)
|
||||
- tags: 标签字符串 (可选)
|
||||
- severity: 严重程度 (可选, 默认 'info')
|
||||
- metadata: 元数据 JSON (可选)
|
||||
- http: HTTP 匹配规则数组 (必填)
|
||||
- source_file: 来源文件 (可选)
|
||||
"""
|
||||
|
||||
class Meta:
|
||||
model = FingerPrintHubFingerprint
|
||||
fields = ['id', 'fp_id', 'name', 'author', 'tags', 'severity',
|
||||
'metadata', 'http', 'source_file', 'created_at']
|
||||
read_only_fields = ['id', 'created_at']
|
||||
|
||||
def validate_fp_id(self, value):
|
||||
"""校验 fp_id 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("fp_id 字段不能为空")
|
||||
return value.strip()
|
||||
|
||||
def validate_name(self, value):
|
||||
"""校验 name 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("name 字段不能为空")
|
||||
return value.strip()
|
||||
|
||||
def validate_http(self, value):
|
||||
"""校验 http 字段"""
|
||||
if not isinstance(value, list):
|
||||
raise serializers.ValidationError("http 必须是数组")
|
||||
return value
|
||||
|
||||
def validate_metadata(self, value):
|
||||
"""校验 metadata 字段"""
|
||||
if not isinstance(value, dict):
|
||||
raise serializers.ValidationError("metadata 必须是对象")
|
||||
return value
|
||||
48
backend/apps/engine/serializers/fingerprints/fingers.py
Normal file
48
backend/apps/engine/serializers/fingerprints/fingers.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""Fingers 指纹 Serializer"""
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from apps.engine.models import FingersFingerprint
|
||||
|
||||
|
||||
class FingersFingerprintSerializer(serializers.ModelSerializer):
|
||||
"""Fingers 指纹序列化器
|
||||
|
||||
字段映射:
|
||||
- name: 指纹名称 (必填, 唯一)
|
||||
- link: 相关链接 (可选)
|
||||
- rule: 匹配规则数组 (必填)
|
||||
- tag: 标签数组 (可选)
|
||||
- focus: 是否重点关注 (可选, 默认 False)
|
||||
- default_port: 默认端口数组 (可选)
|
||||
"""
|
||||
|
||||
class Meta:
|
||||
model = FingersFingerprint
|
||||
fields = ['id', 'name', 'link', 'rule', 'tag', 'focus',
|
||||
'default_port', 'created_at']
|
||||
read_only_fields = ['id', 'created_at']
|
||||
|
||||
def validate_name(self, value):
|
||||
"""校验 name 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("name 字段不能为空")
|
||||
return value.strip()
|
||||
|
||||
def validate_rule(self, value):
|
||||
"""校验 rule 字段"""
|
||||
if not isinstance(value, list):
|
||||
raise serializers.ValidationError("rule 必须是数组")
|
||||
return value
|
||||
|
||||
def validate_tag(self, value):
|
||||
"""校验 tag 字段"""
|
||||
if not isinstance(value, list):
|
||||
raise serializers.ValidationError("tag 必须是数组")
|
||||
return value
|
||||
|
||||
def validate_default_port(self, value):
|
||||
"""校验 default_port 字段"""
|
||||
if not isinstance(value, list):
|
||||
raise serializers.ValidationError("default_port 必须是数组")
|
||||
return value
|
||||
26
backend/apps/engine/serializers/fingerprints/goby.py
Normal file
26
backend/apps/engine/serializers/fingerprints/goby.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Goby 指纹 Serializer"""
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from apps.engine.models import GobyFingerprint
|
||||
|
||||
|
||||
class GobyFingerprintSerializer(serializers.ModelSerializer):
|
||||
"""Goby 指纹序列化器"""
|
||||
|
||||
class Meta:
|
||||
model = GobyFingerprint
|
||||
fields = ['id', 'name', 'logic', 'rule', 'created_at']
|
||||
read_only_fields = ['id', 'created_at']
|
||||
|
||||
def validate_name(self, value):
|
||||
"""校验 name 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("name 字段不能为空")
|
||||
return value.strip()
|
||||
|
||||
def validate_rule(self, value):
|
||||
"""校验 rule 字段"""
|
||||
if not isinstance(value, list):
|
||||
raise serializers.ValidationError("rule 必须是数组")
|
||||
return value
|
||||
24
backend/apps/engine/serializers/fingerprints/wappalyzer.py
Normal file
24
backend/apps/engine/serializers/fingerprints/wappalyzer.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""Wappalyzer 指纹 Serializer"""
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from apps.engine.models import WappalyzerFingerprint
|
||||
|
||||
|
||||
class WappalyzerFingerprintSerializer(serializers.ModelSerializer):
|
||||
"""Wappalyzer 指纹序列化器"""
|
||||
|
||||
class Meta:
|
||||
model = WappalyzerFingerprint
|
||||
fields = [
|
||||
'id', 'name', 'cats', 'cookies', 'headers', 'script_src',
|
||||
'js', 'implies', 'meta', 'html', 'description', 'website',
|
||||
'cpe', 'created_at'
|
||||
]
|
||||
read_only_fields = ['id', 'created_at']
|
||||
|
||||
def validate_name(self, value):
|
||||
"""校验 name 字段"""
|
||||
if not value or not value.strip():
|
||||
raise serializers.ValidationError("name 字段不能为空")
|
||||
return value.strip()
|
||||
@@ -66,6 +66,7 @@ def get_start_agent_script(
|
||||
# 替换变量
|
||||
script = script.replace("{{HEARTBEAT_API_URL}}", heartbeat_api_url or '')
|
||||
script = script.replace("{{WORKER_ID}}", str(worker_id) if worker_id else '')
|
||||
script = script.replace("{{WORKER_API_KEY}}", getattr(settings, 'WORKER_API_KEY', ''))
|
||||
|
||||
# 注入镜像版本配置(确保远程节点使用相同版本)
|
||||
docker_user = getattr(settings, 'DOCKER_USER', 'yyhuni')
|
||||
|
||||
22
backend/apps/engine/services/fingerprints/__init__.py
Normal file
22
backend/apps/engine/services/fingerprints/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""指纹管理 Services
|
||||
|
||||
导出所有指纹相关的 Service 类
|
||||
"""
|
||||
|
||||
from .base import BaseFingerprintService
|
||||
from .ehole import EholeFingerprintService
|
||||
from .goby import GobyFingerprintService
|
||||
from .wappalyzer import WappalyzerFingerprintService
|
||||
from .fingers_service import FingersFingerprintService
|
||||
from .fingerprinthub_service import FingerPrintHubFingerprintService
|
||||
from .arl_service import ARLFingerprintService
|
||||
|
||||
__all__ = [
|
||||
"BaseFingerprintService",
|
||||
"EholeFingerprintService",
|
||||
"GobyFingerprintService",
|
||||
"WappalyzerFingerprintService",
|
||||
"FingersFingerprintService",
|
||||
"FingerPrintHubFingerprintService",
|
||||
"ARLFingerprintService",
|
||||
]
|
||||
110
backend/apps/engine/services/fingerprints/arl_service.py
Normal file
110
backend/apps/engine/services/fingerprints/arl_service.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""ARL 指纹管理 Service
|
||||
|
||||
实现 ARL 格式指纹的校验、转换和导出逻辑
|
||||
支持 YAML 格式的导入导出
|
||||
"""
|
||||
|
||||
import logging
|
||||
import yaml
|
||||
|
||||
from apps.engine.models import ARLFingerprint
|
||||
from .base import BaseFingerprintService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ARLFingerprintService(BaseFingerprintService):
|
||||
"""ARL 指纹管理服务(继承基类,实现 ARL 特定逻辑)"""
|
||||
|
||||
model = ARLFingerprint
|
||||
|
||||
def validate_fingerprint(self, item: dict) -> bool:
|
||||
"""
|
||||
校验单条 ARL 指纹
|
||||
|
||||
校验规则:
|
||||
- name 字段必须存在且非空
|
||||
- rule 字段必须存在且非空
|
||||
|
||||
Args:
|
||||
item: 单条指纹数据
|
||||
|
||||
Returns:
|
||||
bool: 是否有效
|
||||
"""
|
||||
name = item.get('name', '')
|
||||
rule = item.get('rule', '')
|
||||
return bool(name and str(name).strip()) and bool(rule and str(rule).strip())
|
||||
|
||||
def to_model_data(self, item: dict) -> dict:
|
||||
"""
|
||||
转换 ARL YAML 格式为 Model 字段
|
||||
|
||||
Args:
|
||||
item: 原始 ARL YAML 数据
|
||||
|
||||
Returns:
|
||||
dict: Model 字段数据
|
||||
"""
|
||||
return {
|
||||
'name': str(item.get('name', '')).strip(),
|
||||
'rule': str(item.get('rule', '')).strip(),
|
||||
}
|
||||
|
||||
def get_export_data(self) -> list:
|
||||
"""
|
||||
获取导出数据(ARL 格式 - 数组,用于 YAML 导出)
|
||||
|
||||
Returns:
|
||||
list: ARL 格式的数据(数组格式)
|
||||
[
|
||||
{"name": "...", "rule": "..."},
|
||||
...
|
||||
]
|
||||
"""
|
||||
fingerprints = self.model.objects.all()
|
||||
return [
|
||||
{
|
||||
'name': fp.name,
|
||||
'rule': fp.rule,
|
||||
}
|
||||
for fp in fingerprints
|
||||
]
|
||||
|
||||
def export_to_yaml(self, output_path: str) -> int:
|
||||
"""
|
||||
导出所有指纹到 YAML 文件
|
||||
|
||||
Args:
|
||||
output_path: 输出文件路径
|
||||
|
||||
Returns:
|
||||
int: 导出的指纹数量
|
||||
"""
|
||||
data = self.get_export_data()
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
yaml.dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||||
count = len(data)
|
||||
logger.info("导出 ARL 指纹文件: %s, 数量: %d", output_path, count)
|
||||
return count
|
||||
|
||||
def parse_yaml_import(self, yaml_content: str) -> list:
|
||||
"""
|
||||
解析 YAML 格式的导入内容
|
||||
|
||||
Args:
|
||||
yaml_content: YAML 格式的字符串内容
|
||||
|
||||
Returns:
|
||||
list: 解析后的指纹数据列表
|
||||
|
||||
Raises:
|
||||
ValueError: 当 YAML 格式无效时
|
||||
"""
|
||||
try:
|
||||
data = yaml.safe_load(yaml_content)
|
||||
if not isinstance(data, list):
|
||||
raise ValueError("ARL YAML 文件必须是数组格式")
|
||||
return data
|
||||
except yaml.YAMLError as e:
|
||||
raise ValueError(f"无效的 YAML 格式: {e}")
|
||||
144
backend/apps/engine/services/fingerprints/base.py
Normal file
144
backend/apps/engine/services/fingerprints/base.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""指纹管理基类 Service
|
||||
|
||||
提供通用的批量操作和缓存逻辑,供 EHole/Goby/Wappalyzer 等子类继承
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseFingerprintService:
|
||||
"""指纹管理基类 Service,提供通用的批量操作和缓存逻辑"""
|
||||
|
||||
model = None # 子类必须指定
|
||||
BATCH_SIZE = 1000 # 每批处理数量
|
||||
|
||||
def validate_fingerprint(self, item: dict) -> bool:
|
||||
"""
|
||||
校验单条指纹,子类必须实现
|
||||
|
||||
Args:
|
||||
item: 单条指纹数据
|
||||
|
||||
Returns:
|
||||
bool: 是否有效
|
||||
"""
|
||||
raise NotImplementedError("子类必须实现 validate_fingerprint 方法")
|
||||
|
||||
def validate_fingerprints(self, raw_data: list) -> tuple[list, list]:
|
||||
"""
|
||||
批量校验指纹数据
|
||||
|
||||
Args:
|
||||
raw_data: 原始指纹数据列表
|
||||
|
||||
Returns:
|
||||
tuple: (valid_items, invalid_items)
|
||||
"""
|
||||
valid, invalid = [], []
|
||||
for item in raw_data:
|
||||
if self.validate_fingerprint(item):
|
||||
valid.append(item)
|
||||
else:
|
||||
invalid.append(item)
|
||||
return valid, invalid
|
||||
|
||||
def to_model_data(self, item: dict) -> dict:
|
||||
"""
|
||||
转换为 Model 字段,子类必须实现
|
||||
|
||||
Args:
|
||||
item: 原始指纹数据
|
||||
|
||||
Returns:
|
||||
dict: Model 字段数据
|
||||
"""
|
||||
raise NotImplementedError("子类必须实现 to_model_data 方法")
|
||||
|
||||
def bulk_create(self, fingerprints: list) -> int:
|
||||
"""
|
||||
批量创建指纹记录(已校验的数据)
|
||||
|
||||
Args:
|
||||
fingerprints: 已校验的指纹数据列表
|
||||
|
||||
Returns:
|
||||
int: 成功创建数量
|
||||
"""
|
||||
if not fingerprints:
|
||||
return 0
|
||||
|
||||
objects = [self.model(**self.to_model_data(item)) for item in fingerprints]
|
||||
created = self.model.objects.bulk_create(objects, ignore_conflicts=True)
|
||||
return len(created)
|
||||
|
||||
def batch_create_fingerprints(self, raw_data: list) -> dict:
|
||||
"""
|
||||
完整流程:分批校验 + 批量创建
|
||||
|
||||
Args:
|
||||
raw_data: 原始指纹数据列表
|
||||
|
||||
Returns:
|
||||
dict: {'created': int, 'failed': int}
|
||||
"""
|
||||
total_created = 0
|
||||
total_failed = 0
|
||||
|
||||
for i in range(0, len(raw_data), self.BATCH_SIZE):
|
||||
batch = raw_data[i:i + self.BATCH_SIZE]
|
||||
valid, invalid = self.validate_fingerprints(batch)
|
||||
total_created += self.bulk_create(valid)
|
||||
total_failed += len(invalid)
|
||||
|
||||
logger.info(
|
||||
"批量创建指纹完成: created=%d, failed=%d, total=%d",
|
||||
total_created, total_failed, len(raw_data)
|
||||
)
|
||||
return {'created': total_created, 'failed': total_failed}
|
||||
|
||||
def get_export_data(self) -> dict:
|
||||
"""
|
||||
获取导出数据,子类必须实现
|
||||
|
||||
Returns:
|
||||
dict: 导出的 JSON 数据
|
||||
"""
|
||||
raise NotImplementedError("子类必须实现 get_export_data 方法")
|
||||
|
||||
def export_to_file(self, output_path: str) -> int:
|
||||
"""
|
||||
导出所有指纹到 JSON 文件
|
||||
|
||||
Args:
|
||||
output_path: 输出文件路径
|
||||
|
||||
Returns:
|
||||
int: 导出的指纹数量
|
||||
"""
|
||||
data = self.get_export_data()
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, ensure_ascii=False)
|
||||
count = len(data.get('fingerprint', []))
|
||||
logger.info("导出指纹文件: %s, 数量: %d", output_path, count)
|
||||
return count
|
||||
|
||||
def get_fingerprint_version(self) -> str:
|
||||
"""
|
||||
获取指纹库版本标识(用于缓存校验)
|
||||
|
||||
Returns:
|
||||
str: 版本标识,格式 "{count}_{latest_timestamp}"
|
||||
|
||||
版本变化场景:
|
||||
- 新增记录 → count 变化
|
||||
- 删除记录 → count 变化
|
||||
- 清空全部 → count 变为 0
|
||||
"""
|
||||
count = self.model.objects.count()
|
||||
latest = self.model.objects.order_by('-created_at').first()
|
||||
latest_ts = int(latest.created_at.timestamp()) if latest else 0
|
||||
return f"{count}_{latest_ts}"
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user