refactor: replace Chinese comments with English in frontend components

- Replace all Chinese inline comments with English equivalents across 24 frontend component files - Update JSDoc comments to use English for better code documentation - Improve code readability and maintainability for international development team - Standardize comment style across directories, endpoints, ip-addresses, subdomains, and websites components - Ensure consistency with previous frontend refactoring efforts
refactor: replace Chinese comments with English across frontend components
2026-01-31 19:53:11 +08:00 · 2025-12-29 23:01:16 +08:00 · 2025-12-29 22:14:12 +08:00 · 2025-12-29 21:03:47 +08:00 · 2025-12-29 20:54:00 +08:00 · 2025-12-29 20:48:42 +08:00
446 changed files with 186072 additions and 15936 deletions
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -16,7 +16,7 @@ env:
  IMAGE_PREFIX: yyhuni

 permissions:
-  contents: write  # 允许修改仓库内容
+  contents: write

 jobs:
  build:
@@ -27,18 +27,23 @@ jobs:
          - image: xingrin-server
            dockerfile: docker/server/Dockerfile
            context: .
+            platforms: linux/amd64,linux/arm64
          - image: xingrin-frontend
            dockerfile: docker/frontend/Dockerfile
            context: .
+            platforms: linux/amd64  # ARM64 构建时 Next.js 在 QEMU 下会崩溃
          - image: xingrin-worker
            dockerfile: docker/worker/Dockerfile
            context: .
+            platforms: linux/amd64,linux/arm64
          - image: xingrin-nginx
            dockerfile: docker/nginx/Dockerfile
            context: .
+            platforms: linux/amd64,linux/arm64
          - image: xingrin-agent
            dockerfile: docker/agent/Dockerfile
            context: .
+            platforms: linux/amd64,linux/arm64

    steps:
      - name: Checkout
@@ -48,7 +53,6 @@ jobs:
        run: |
          echo "=== Before cleanup ==="
          df -h
-          # 删除不需要的大型软件包
          sudo rm -rf /usr/share/dotnet
          sudo rm -rf /usr/local/lib/android
          sudo rm -rf /opt/ghc
@@ -95,21 +99,24 @@ jobs:
        with:
          context: ${{ matrix.context }}
          file: ${{ matrix.dockerfile }}
-          platforms: linux/amd64,linux/arm64
+          platforms: ${{ matrix.platforms }}
          push: true
          tags: |
            ${{ env.IMAGE_PREFIX }}/${{ matrix.image }}:${{ steps.version.outputs.VERSION }}
            ${{ steps.version.outputs.IS_RELEASE == 'true' && format('{0}/{1}:latest', env.IMAGE_PREFIX, matrix.image) || '' }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
+          build-args: |
+            IMAGE_TAG=${{ steps.version.outputs.VERSION }}
+          cache-from: type=gha,scope=${{ matrix.image }}
+          cache-to: type=gha,mode=max,scope=${{ matrix.image }}
          provenance: false
          sbom: false

  # 所有镜像构建成功后，更新 VERSION 文件
+  # 只有正式版本（不含 -dev, -alpha, -beta, -rc 等后缀）才更新
  update-version:
    runs-on: ubuntu-latest
-    needs: build  # 等待所有 build job 完成
-    if: startsWith(github.ref, 'refs/tags/v')
+    needs: build
+    if: startsWith(github.ref, 'refs/tags/v') && !contains(github.ref, '-')
    steps:
      - name: Checkout
        uses: actions/checkout@v4
--- a/.gitignore
+++ b/.gitignore
@@ -96,6 +96,7 @@ backend/vendor/
 .idea/
 .cursor/
 .claude/
+.kiro/
 .playwright-mcp/
 *.swp
 *.swo
@@ -131,3 +132,5 @@ temp/

 HGETALL
 KEYS
+vuln_scan/input_endpoints.txt
+open-in-v0
--- a/733
+++ b/733
@@ -1,131 +1,674 @@
-# PolyForm Noncommercial License 1.0.0
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007

-<https://polyformproject.org/licenses/noncommercial/1.0.0>
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.

-## Acceptance
+                            Preamble

-In order to get any license under these terms, you must agree
-to them as both strict obligations and conditions to all
-your licenses.
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.

-## Copyright License
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.

-The licensor grants you a copyright license for the
-software to do everything you might do with the software
-that would otherwise infringe the licensor's copyright
-in it for any permitted purpose.  However, you may
-only distribute the software according to [Distribution
-License](#distribution-license) and make changes or new works
-based on the software according to [Changes and New Works
-License](#changes-and-new-works-license).
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.

-## Distribution License
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.

-The licensor grants you an additional copyright license
-to distribute copies of the software.  Your license
-to distribute covers distributing the software with
-changes and new works permitted by [Changes and New Works
-License](#changes-and-new-works-license).
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.

-## Notices
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.

-You must ensure that anyone who gets a copy of any part of
-the software from you also gets a copy of these terms or the
-URL for them above, as well as copies of any plain-text lines
-beginning with `Required Notice:` that the licensor provided
-with the software.  For example:
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.

-> Required Notice: Copyright Yuhang Yang (yyhuni)
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.

-## Changes and New Works License
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.

-The licensor grants you an additional copyright license to
-make changes and new works based on the software for any
-permitted purpose.
+  The precise terms and conditions for copying, distribution and
+modification follow.

-## Patent License
+                       TERMS AND CONDITIONS

-The licensor grants you a patent license for the software that
-covers patent claims the licensor can license, or becomes able
-to license, that you would infringe by using the software.
+  0. Definitions.

-## Noncommercial Purposes
+  "This License" refers to version 3 of the GNU General Public License.

-Any noncommercial purpose is a permitted purpose.
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.

-## Personal Uses
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.

-Personal use for research, experiment, and testing for
-the benefit of public knowledge, personal study, private
-entertainment, hobby projects, amateur pursuits, or religious
-observance, without any anticipated commercial application,
-is use for a permitted purpose.
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.

-## Noncommercial Organizations
+  A "covered work" means either the unmodified Program or a work based
+on the Program.

-Use by any charitable organization, educational institution,
-public research organization, public safety or health
-organization, environmental protection organization,
-or government institution is use for a permitted purpose
-regardless of the source of funding or obligations resulting
-from the funding.
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.

-## Fair Use
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.

-You may have "fair use" rights for the software under the
-law. These terms do not limit them.
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.

-## No Other Rights
+  1. Source Code.

-These terms do not allow you to sublicense or transfer any of
-your licenses to anyone else, or prevent the licensor from
-granting licenses to anyone else.  These terms do not imply
-any other licenses.
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.

-## Patent Defense
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.

-If you make any written claim that the software infringes or
-contributes to infringement of any patent, your patent license
-for the software granted under these terms ends immediately. If
-your company makes such a claim, your patent license ends
-immediately for work on behalf of your company.
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.

-## Violations
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.

-The first time you are notified in writing that you have
-violated any of these terms, or done anything with the software
-not covered by your licenses, your licenses can nonetheless
-continue if you come into full compliance with these terms,
-and take practical steps to correct past violations, within
-32 days of receiving notice.  Otherwise, all your licenses
-end immediately.
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.

-## No Liability
+  The Corresponding Source for a work in source code form is that
+same work.

-***As far as the law allows, the software comes as is, without
-any warranty or condition, and the licensor will not be liable
-to you for any damages arising out of these terms or the use
-or nature of the software, under any kind of legal claim.***
+  2. Basic Permissions.

-## Definitions
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.

-The **licensor** is the individual or entity offering these
-terms, and the **software** is the software the licensor makes
-available under these terms.
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.

-**You** refers to the individual or entity agreeing to these
-terms.
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.

-**Your company** is any legal entity, sole proprietorship,
-or other kind of organization that you work for, plus all
-organizations that have control over, are under the control of,
-or are under common control with that organization.  **Control**
-means ownership of substantially all the assets of an entity,
-or the power to direct its management and policies by vote,
-contract, or otherwise.  Control can be direct or indirect.
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

-**Your licenses** are all the licenses granted to you for the
-software under these terms.
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.

-**Use** means anything you do with the software requiring one
-of your licenses.
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
--- a/README.md
+++ b/README.md
@@ -1,27 +1,40 @@
-<h1 align="center">Xingrin - 星环</h1>
+<h1 align="center">XingRin - 星环</h1>

 <p align="center">
-  <b>一款现代化的企业级漏洞扫描与资产管理平台</b><br>
-  提供自动化安全检测、资产发现、漏洞管理等功能
+  <b>🛡️ 攻击面管理平台 (ASM) | 自动化资产发现与漏洞扫描系统</b>
 </p>

 <p align="center">
-  <b>🌗 明暗模式切换</b>
+  <a href="https://github.com/yyhuni/xingrin/stargazers"><img src="https://img.shields.io/github/stars/yyhuni/xingrin?style=flat-square&logo=github" alt="GitHub stars"></a>
+  <a href="https://github.com/yyhuni/xingrin/network/members"><img src="https://img.shields.io/github/forks/yyhuni/xingrin?style=flat-square&logo=github" alt="GitHub forks"></a>
+  <a href="https://github.com/yyhuni/xingrin/issues"><img src="https://img.shields.io/github/issues/yyhuni/xingrin?style=flat-square&logo=github" alt="GitHub issues"></a>
+  <a href="https://github.com/yyhuni/xingrin/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-PolyForm%20NC-blue?style=flat-square" alt="License"></a>
 </p>

 <p align="center">
-  <img src="docs/screenshots/light.png" alt="Light Mode" width="49%">
-  <img src="docs/screenshots/dark.png" alt="Dark Mode" width="49%">
+  <a href="#-功能特性">功能特性</a> •
+  <a href="#-快速开始">快速开始</a> •
+  <a href="#-文档">文档</a> •
+  <a href="#-技术栈">技术栈</a> •
+  <a href="#-反馈与贡献">反馈与贡献</a>
 </p>

 <p align="center">
-  <b>🎨 多种 UI 主题</b>
+  <sub>🔍 关键词: ASM | 攻击面管理 | 漏洞扫描 | 资产发现 | Bug Bounty | 渗透测试 | Nuclei | 子域名枚举 | EASM</sub>
+</p>
+
+---
+
+
+<p align="center">
+  <b>🎨 现代化 UI </b>
 </p>

 <p align="center">
-  <img src="docs/screenshots/bubblegum.png" alt="Bubblegum" width="32%">
-  <img src="docs/screenshots/cosmic-night.png" alt="Cosmic Night" width="32%">
-  <img src="docs/screenshots/quantum-rose.png" alt="Quantum Rose" width="32%">
+  <img src="docs/screenshots/light.png" alt="Light Mode" width="24%">
+  <img src="docs/screenshots/bubblegum.png" alt="Bubblegum" width="24%">
+  <img src="docs/screenshots/cosmic-night.png" alt="Cosmic Night" width="24%">
+  <img src="docs/screenshots/quantum-rose.png" alt="Quantum Rose" width="24%">
 </p>

 ## 📚 文档
@@ -31,6 +44,7 @@
 - [🔄 版本管理](./docs/version-management.md) - Git Tag 驱动的自动化版本管理系统
 - [📦 Nuclei 模板架构](./docs/nuclei-template-architecture.md) - 模板仓库的存储与同步
 - [📖 字典文件架构](./docs/wordlist-architecture.md) - 字典文件的存储与同步
+- [🔍 扫描流程架构](./docs/scan-flow-architecture.md) - 完整扫描流程与工具编排


 ---
@@ -48,6 +62,54 @@
 - **自定义流程** - YAML 配置扫描流程，灵活编排
 - **定时扫描** - Cron 表达式配置，自动化周期扫描

+#### 扫描流程架构
+
+完整的扫描流程包括：子域名发现、端口扫描、站点发现、URL 收集、目录扫描、漏洞扫描等阶段
+
+```mermaid
+flowchart LR
+    START["开始扫描"]
+    
+    subgraph STAGE1["阶段 1: 资产发现"]
+        direction TB
+        SUB["子域名发现<br/>subfinder, amass, puredns"]
+        PORT["端口扫描<br/>naabu"]
+        SITE["站点识别<br/>httpx"]
+        SUB --> PORT --> SITE
+    end
+    
+    subgraph STAGE2["阶段 2: 深度分析"]
+        direction TB
+        URL["URL 收集<br/>waymore, katana"]
+        DIR["目录扫描<br/>ffuf"]
+    end
+    
+    subgraph STAGE3["阶段 3: 漏洞检测"]
+        VULN["漏洞扫描<br/>nuclei, dalfox"]
+    end
+    
+    FINISH["扫描完成"]
+    
+    START --> STAGE1
+    SITE --> STAGE2
+    STAGE2 --> STAGE3
+    STAGE3 --> FINISH
+    
+    style START fill:#34495e,stroke:#2c3e50,stroke-width:2px,color:#fff
+    style FINISH fill:#27ae60,stroke:#229954,stroke-width:2px,color:#fff
+    style STAGE1 fill:#3498db,stroke:#2980b9,stroke-width:2px,color:#fff
+    style STAGE2 fill:#9b59b6,stroke:#8e44ad,stroke-width:2px,color:#fff
+    style STAGE3 fill:#e67e22,stroke:#d35400,stroke-width:2px,color:#fff
+    style SUB fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
+    style PORT fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
+    style SITE fill:#5dade2,stroke:#3498db,stroke-width:1px,color:#fff
+    style URL fill:#bb8fce,stroke:#9b59b6,stroke-width:1px,color:#fff
+    style DIR fill:#bb8fce,stroke:#9b59b6,stroke-width:1px,color:#fff
+    style VULN fill:#f0b27a,stroke:#e67e22,stroke-width:1px,color:#fff
+```
+
+详细说明请查看 [扫描流程架构文档](./docs/scan-flow-architecture.md)
+
 ### 🖥️ 分布式架构
 - **多节点扫描** - 支持部署多个 Worker 节点，横向扩展扫描能力
 - **本地节点** - 零配置，安装即自动注册本地 Docker Worker
@@ -56,62 +118,46 @@
 - **节点监控** - 实时心跳检测，CPU/内存/磁盘状态监控
 - **断线重连** - 节点离线自动检测，恢复后自动重新接入

-```
-┌─────────────────────────────────────────────────────────────────┐
-│                         主服务器 (Master)                         │
-│  ┌─────────┐  ┌─────────┐  ┌─────────┐  ┌─────────┐            │
-│  │ Next.js │  │ Django  │  │ Postgres│  │  Redis  │            │
-│  │ 前端    │  │ 后端    │  │ 数据库  │  │  缓存   │            │
-│  └─────────┘  └────┬────┘  └─────────┘  └─────────┘            │
-│                    │                                            │
-│              ┌─────┴─────┐                                      │
-│              │ 任务调度器 │                                      │
-│              │ Scheduler │                                      │
-│              └─────┬─────┘                                      │
-└────────────────────┼────────────────────────────────────────────┘
-                     │
-        ┌────────────┼────────────┐
-        │            │            │
-        ▼            ▼            ▼
-┌───────────┐ ┌───────────┐ ┌───────────┐
-│  Worker 1 │ │  Worker 2 │ │  Worker N │
-│  (本地)   │ │  (远程)   │ │  (远程)   │
-├───────────┤ ├───────────┤ ├───────────┤
-│ • Nuclei  │ │ • Nuclei  │ │ • Nuclei  │
-│ • httpx   │ │ • httpx   │ │ • httpx   │
-│ • naabu   │ │ • naabu   │ │ • naabu   │
-│ • ...     │ │ • ...     │ │ • ...     │
-├───────────┤ ├───────────┤ ├───────────┤
-│  心跳上报  │ │  心跳上报  │ │  心跳上报  │
-└───────────┘ └───────────┘ └───────────┘
+```mermaid
+flowchart TB
+    subgraph MASTER["主服务器 (Master Server)"]
+        direction TB
+        
+        REDIS["Redis 负载缓存"]
+        
+        subgraph SCHEDULER["任务调度器 (Task Distributor)"]
+            direction TB
+            SUBMIT["接收扫描任务"]
+            SELECT["负载感知选择"]
+            DISPATCH["智能分发"]
+            
+            SUBMIT --> SELECT
+            SELECT --> DISPATCH
+        end
+        
+        REDIS -.负载数据.-> SELECT
+    end
+    
+    subgraph WORKERS["Worker 节点集群"]
+        direction TB
+        
+        W1["Worker 1 (本地)<br/>CPU: 45% | MEM: 60%"]
+        W2["Worker 2 (远程)<br/>CPU: 30% | MEM: 40%"]
+        W3["Worker N (远程)<br/>CPU: 90% | MEM: 85%"]
+    end
+    
+    DISPATCH -->|任务分发| W1
+    DISPATCH -->|任务分发| W2
+    DISPATCH -->|高负载跳过| W3
+    
+    W1 -.心跳上报.-> REDIS
+    W2 -.心跳上报.-> REDIS
+    W3 -.心跳上报.-> REDIS
 ```

 ### 📊 可视化界面
 - **数据统计** - 资产/漏洞统计仪表盘
 - **实时通知** - WebSocket 消息推送
- **暗色主题** - 支持明暗主题切换
-
---
-
-## 🛠️ 技术栈
-
- **前端**: Next.js + React + TailwindCSS
- **后端**: Django + Django REST Framework
- **数据库**: PostgreSQL + Redis
- **部署**: Docker + Nginx
-
-### 🔧 内置扫描工具
-
-| 类别 | 工具 |
-|------|------|
-| 子域名爆破 | puredns, massdns, dnsgen |
-| 被动发现 | subfinder, amass, assetfinder, Sublist3r |
-| 端口扫描 | naabu |
-| 站点发现 | httpx |
-| 目录扫描 | ffuf |
-| 爬虫 | katana |
-| 被动URL收集 | waymore, uro |
-| 漏洞扫描 | nuclei, dalfox |

 ---

@@ -120,7 +166,7 @@
 ### 环境要求

 - **操作系统**: Ubuntu 20.04+ / Debian 11+ （推荐）
- **硬件**: 2核 4G 内存起步，10GB+ 磁盘空间
+- **硬件**: 2核 4G 内存起步，20GB+ 磁盘空间

 ### 一键安装

@@ -132,13 +178,18 @@ cd xingrin
 # 安装并启动（生产模式）
 sudo ./install.sh

-# 开发模式
-sudo ./install.sh --dev
+# 🇨🇳 中国大陆用户推荐使用镜像加速
+sudo ./install.sh --mirror
 ```

+> **💡 --mirror 参数说明**
+> - 自动配置 Docker 镜像加速（国内镜像源）
+> - 加速 Git 仓库克隆（Nuclei 模板等）
+> - 大幅提升安装速度，避免网络超时
+
 ### 访问服务

- **Web 界面**: `https://localhost` 或 `http://localhost`
+- **Web 界面**: `https://ip:8083` 

 ### 常用命令

@@ -154,13 +205,7 @@ sudo ./restart.sh

 # 卸载
 sudo ./uninstall.sh
-
-# 更新
-sudo ./update.sh
 ```
-## 日志
- 项目日志：/opt/xingrin/logs 下存储了这个项目的运行日志信息，error文件存储了错误相关信息，xingrin.log存储了包括错误在内的所有项目日志
- 工具调用日志：/opt/xingrin/results 下存储了工具的运行结果日志，比如naabu，httpx等的结果调用日志

 ## 🤝 反馈与贡献

@@ -192,22 +237,30 @@ sudo ./update.sh
 - 遵守所在地区的法律法规
 - 承担因滥用产生的一切后果

+## 🌟 Star History
+
+如果这个项目对你有帮助，请给一个 ⭐ Star 支持一下！
+
+[![Star History Chart](https://api.star-history.com/svg?repos=yyhuni/xingrin&type=Date)](https://star-history.com/#yyhuni/xingrin&Date)
+
 ## 📄 许可证

-本项目采用 [PolyForm Noncommercial License 1.0.0](LICENSE) 许可证。
+本项目采用 [GNU General Public License v3.0](LICENSE) 许可证。

 ### 允许的用途

 - ✅ 个人学习和研究
- ✅ 非商业安全测试
- ✅ 教育机构使用
- ✅ 非营利组织使用
+- ✅ 商业和非商业使用
+- ✅ 修改和分发
+- ✅ 专利使用
+- ✅ 私人使用

-### 禁止的用途
+### 义务和限制

- ❌ **商业用途**（包括但不限于：出售、商业服务、SaaS 等）
+- 📋 **开源义务**：分发时必须提供源代码
+- 📋 **相同许可**：衍生作品必须使用相同许可证
+- 📋 **版权声明**：必须保留原始版权和许可证声明
+- ❌ **责任免除**：不提供任何担保
 - ❌ 未经授权的渗透测试
 - ❌ 任何违法行为

-如需商业授权，请联系作者。
-
--- a/2
+++ b/2
@@ -1 +1 @@
-v1.0.10
+v1.1.14
--- a/backend/apps/asset/dtos/asset/directory_dto.py
+++ b/backend/apps/asset/dtos/asset/directory_dto.py
@@ -7,7 +7,6 @@ from typing import Optional
@dataclass
 class DirectoryDTO:
    """目录数据传输对象"""
-    website_id: int
    target_id: int
    url: str
    status: Optional[int] = None
--- a/backend/apps/asset/dtos/asset/website_dto.py
+++ b/backend/apps/asset/dtos/asset/website_dto.py
@@ -9,7 +9,7 @@ class WebSiteDTO:
    """网站数据传输对象"""
    target_id: int
    url: str
-    host: str
+    host: str = ''
    title: str = ''
    status_code: Optional[int] = None
    content_length: Optional[int] = None
--- a/backend/apps/asset/dtos/snapshot/directory_snapshot_dto.py
+++ b/backend/apps/asset/dtos/snapshot/directory_snapshot_dto.py
@@ -12,11 +12,10 @@ class DirectorySnapshotDTO:
    
    用于保存扫描过程中发现的目录信息到快照表
    
-    注意：website_id 和 target_id 只用于传递数据和转换为资产 DTO，不会保存到快照表中。
+    注意：target_id 只用于传递数据和转换为资产 DTO，不会保存到快照表中。
    快照只属于 scan。
    """
    scan_id: int
-    website_id: int  # 仅用于传递数据，不保存到数据库
    target_id: int  # 仅用于传递数据，不保存到数据库
    url: str
    status: Optional[int] = None
@@ -36,7 +35,6 @@ class DirectorySnapshotDTO:
            DirectoryDTO: 资产表 DTO
        """
        return DirectoryDTO(
-            website_id=self.website_id,
            target_id=self.target_id,
            url=self.url,
            status=self.status,
--- a/backend/apps/asset/models/asset_models.py
+++ b/backend/apps/asset/models/asset_models.py
@@ -4,13 +4,6 @@ from django.contrib.postgres.fields import ArrayField
 from django.core.validators import MinValueValidator, MaxValueValidator


-class SoftDeleteManager(models.Manager):
-    """软删除管理器：默认只返回未删除的记录"""
-    
-    def get_queryset(self):
-        return super().get_queryset().filter(deleted_at__isnull=True)
-
-
 class Subdomain(models.Model):
    """
    子域名模型（纯资产表）
@@ -29,33 +22,24 @@ class Subdomain(models.Model):
        help_text='所属的扫描目标（主关联字段，表示所属关系，不能为空）'
    )
    name = models.CharField(max_length=1000, help_text='子域名名称')
-    discovered_at = models.DateTimeField(auto_now_add=True, help_text='首次发现时间')
-    
-    # ==================== 软删除字段 ====================
-    deleted_at = models.DateTimeField(null=True, blank=True, db_index=True, help_text='删除时间（NULL表示未删除）')
-    
-    # ==================== 管理器 ====================
-    objects = SoftDeleteManager()  # 默认管理器：只返回未删除的记录
-    all_objects = models.Manager()  # 全量管理器：包括已删除的记录（用于硬删除）
+    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')

    class Meta:
        db_table = 'subdomain'
        verbose_name = '子域名'
        verbose_name_plural = '子域名'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
-            models.Index(fields=['-discovered_at']),
+            models.Index(fields=['-created_at']),
            models.Index(fields=['name', 'target']),  # 复合索引，优化 get_by_names_and_target_id 批量查询
            models.Index(fields=['target']),     # 优化从target_id快速查找下面的子域名
            models.Index(fields=['name']),            # 优化从name快速查找子域名，搜索场景
-            models.Index(fields=['deleted_at', '-discovered_at']),  # 软删除 + 时间索引
        ]
        constraints = [
-            # 部分唯一约束：只对未删除记录生效
+            # 普通唯一约束：name + target 组合唯一
            models.UniqueConstraint(
                fields=['name', 'target'],
-                condition=models.Q(deleted_at__isnull=True),
-                name='unique_name_target_active'
+                name='unique_subdomain_name_target'
            )
        ]

@@ -87,7 +71,7 @@ class Endpoint(models.Model):
        default='',
        help_text='重定向地址（HTTP 3xx 响应头 Location）'
    )
-    discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
+    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
    title = models.CharField(
        max_length=1000,
        blank=True,
@@ -139,33 +123,25 @@ class Endpoint(models.Model):
        default=list,
        help_text='匹配的GF模式列表，用于识别敏感端点（如api, debug, config等）'
    )
-    
-    # ==================== 软删除字段 ====================
-    deleted_at = models.DateTimeField(null=True, blank=True, db_index=True, help_text='删除时间（NULL表示未删除）')
-    
-    # ==================== 管理器 ====================
-    objects = SoftDeleteManager()  # 默认管理器：只返回未删除的记录
-    all_objects = models.Manager()  # 全量管理器：包括已删除的记录（用于硬删除）

    class Meta:
        db_table = 'endpoint'
        verbose_name = '端点'
        verbose_name_plural = '端点'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
-            models.Index(fields=['-discovered_at']),
+            models.Index(fields=['-created_at']),
            models.Index(fields=['target']),       # 优化从target_id快速查找下面的端点（主关联字段）
            models.Index(fields=['url']),          # URL索引，优化查询性能
            models.Index(fields=['host']),         # host索引，优化根据主机名查询
            models.Index(fields=['status_code']),  # 状态码索引，优化筛选
-            models.Index(fields=['deleted_at', '-discovered_at']),  # 软删除 + 时间索引
+            models.Index(fields=['title']),        # title索引，优化智能过滤搜索
        ]
        constraints = [
-            # 部分唯一约束：只对未删除记录生效
+            # 普通唯一约束：url + target 组合唯一
            models.UniqueConstraint(
                fields=['url', 'target'],
-                condition=models.Q(deleted_at__isnull=True),
-                name='unique_endpoint_url_target_active'
+                name='unique_endpoint_url_target'
            )
        ]

@@ -197,7 +173,7 @@ class WebSite(models.Model):
        default='',
        help_text='重定向地址（HTTP 3xx 响应头 Location）'
    )
-    discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
+    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
    title = models.CharField(
        max_length=1000,
        blank=True,
@@ -243,32 +219,25 @@ class WebSite(models.Model):
        blank=True,
        help_text='是否支持虚拟主机'
    )
-    
-    # ==================== 软删除字段 ====================
-    deleted_at = models.DateTimeField(null=True, blank=True, db_index=True, help_text='删除时间（NULL表示未删除）')
-    
-    # ==================== 管理器 ====================
-    objects = SoftDeleteManager()  # 默认管理器：只返回未删除的记录
-    all_objects = models.Manager()  # 全量管理器：包括已删除的记录（用于硬删除）

    class Meta:
        db_table = 'website'
        verbose_name = '站点'
        verbose_name_plural = '站点'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
-            models.Index(fields=['-discovered_at']),
+            models.Index(fields=['-created_at']),
            models.Index(fields=['url']),  # URL索引，优化查询性能
            models.Index(fields=['host']),  # host索引，优化根据主机名查询
            models.Index(fields=['target']),     # 优化从target_id快速查找下面的站点
-            models.Index(fields=['deleted_at', '-discovered_at']),  # 软删除 + 时间索引
+            models.Index(fields=['title']),      # title索引，优化智能过滤搜索
+            models.Index(fields=['status_code']),  # 状态码索引，优化智能过滤搜索
        ]
        constraints = [
-            # 部分唯一约束：只对未删除记录生效
+            # 普通唯一约束：url + target 组合唯一
            models.UniqueConstraint(
                fields=['url', 'target'],
-                condition=models.Q(deleted_at__isnull=True),
-                name='unique_website_url_target_active'
+                name='unique_website_url_target'
            )
        ]

@@ -282,19 +251,11 @@ class Directory(models.Model):
    """

    id = models.AutoField(primary_key=True)
-    website = models.ForeignKey(
-        'Website',
-        on_delete=models.CASCADE,
-        related_name='directories',
-        help_text='所属的站点（主关联字段，表示所属关系，不能为空）'
-    )
    target = models.ForeignKey(
-        'targets.Target',  # 使用字符串引用
+        'targets.Target',
        on_delete=models.CASCADE,
        related_name='directories',
-        null=True,
-        blank=True,
-        help_text='所属的扫描目标（冗余字段，用于快速查询）'
+        help_text='所属的扫描目标'
    )
    
    url = models.CharField(
@@ -335,34 +296,24 @@ class Directory(models.Model):
        help_text='请求耗时（单位：纳秒）'
    )
    
-    discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
-    
-    # ==================== 软删除字段 ====================
-    deleted_at = models.DateTimeField(null=True, blank=True, db_index=True, help_text='删除时间（NULL表示未删除）')
-    
-    # ==================== 管理器 ====================
-    objects = SoftDeleteManager()  # 默认管理器：只返回未删除的记录
-    all_objects = models.Manager()  # 全量管理器：包括已删除的记录（用于硬删除）
+    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')

    class Meta:
        db_table = 'directory'
        verbose_name = '目录'
        verbose_name_plural = '目录'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
-            models.Index(fields=['-discovered_at']),
+            models.Index(fields=['-created_at']),
            models.Index(fields=['target']),     # 优化从target_id快速查找下面的目录
            models.Index(fields=['url']),        # URL索引，优化搜索和唯一约束
-            models.Index(fields=['website']),    # 站点索引，优化按站点查询
            models.Index(fields=['status']),     # 状态码索引，优化筛选
-            models.Index(fields=['deleted_at', '-discovered_at']),  # 软删除 + 时间索引
        ]
        constraints = [
-            # 部分唯一约束：只对未删除记录生效
+            # 普通唯一约束：target + url 组合唯一
            models.UniqueConstraint(
-                fields=['website', 'url'],
-                condition=models.Q(deleted_at__isnull=True),
-                name='unique_directory_url_website_active'
+                fields=['target', 'url'],
+                name='unique_directory_url_target'
            ),
        ]

@@ -410,43 +361,29 @@ class HostPortMapping(models.Model):
    )
    
    # ==================== 时间字段 ====================
-    discovered_at = models.DateTimeField(
+    created_at = models.DateTimeField(
        auto_now_add=True,
-        help_text='发现时间'
+        help_text='创建时间'
    )
-    
-    # ==================== 软删除字段 ====================
-    deleted_at = models.DateTimeField(
-        null=True,
-        blank=True,
-        db_index=True,
-        help_text='删除时间（NULL表示未删除）'
-    )
-    
-    # ==================== 管理器 ====================
-    objects = SoftDeleteManager()  # 默认管理器：只返回未删除的记录
-    all_objects = models.Manager()  # 全量管理器：包括已删除的记录（用于硬删除）

    class Meta:
        db_table = 'host_port_mapping'
        verbose_name = '主机端口映射'
        verbose_name_plural = '主机端口映射'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
            models.Index(fields=['target']),           # 优化按目标查询
            models.Index(fields=['host']),             # 优化按主机名查询
            models.Index(fields=['ip']),               # 优化按IP查询
            models.Index(fields=['port']),             # 优化按端口查询
            models.Index(fields=['host', 'ip']),       # 优化组合查询
-            models.Index(fields=['-discovered_at']),   # 优化时间排序
-            models.Index(fields=['deleted_at', '-discovered_at']),  # 软删除 + 时间索引
+            models.Index(fields=['-created_at']),   # 优化时间排序
        ]
        constraints = [
-            # 复合唯一约束：target + host + ip + port 组合唯一（只对未删除记录生效）
+            # 复合唯一约束：target + host + ip + port 组合唯一
            models.UniqueConstraint(
                fields=['target', 'host', 'ip', 'port'],
-                condition=models.Q(deleted_at__isnull=True),
-                name='unique_target_host_ip_port_active'
+                name='unique_target_host_ip_port'
            ),
        ]

@@ -474,7 +411,7 @@ class Vulnerability(models.Model):
    )
    
    # ==================== 核心字段 ====================
-    url = models.TextField(help_text='漏洞所在的URL')
+    url = models.CharField(max_length=2000, help_text='漏洞所在的URL')
    vuln_type = models.CharField(max_length=100, help_text='漏洞类型（如 xss, sqli）')
    severity = models.CharField(
        max_length=20,
@@ -488,27 +425,20 @@ class Vulnerability(models.Model):
    raw_output = models.JSONField(blank=True, default=dict, help_text='工具原始输出')
    
    # ==================== 时间字段 ====================
-    discovered_at = models.DateTimeField(auto_now_add=True, help_text='首次发现时间')
-    
-    # ==================== 软删除字段 ====================
-    deleted_at = models.DateTimeField(null=True, blank=True, db_index=True, help_text='删除时间（NULL表示未删除）')
-    
-    # ==================== 管理器 ====================
-    objects = SoftDeleteManager()
-    all_objects = models.Manager()
+    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')

    class Meta:
        db_table = 'vulnerability'
        verbose_name = '漏洞'
        verbose_name_plural = '漏洞'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
            models.Index(fields=['target']),
            models.Index(fields=['vuln_type']),
            models.Index(fields=['severity']),
            models.Index(fields=['source']),
-            models.Index(fields=['-discovered_at']),
-            models.Index(fields=['deleted_at', '-discovered_at']),
+            models.Index(fields=['url']),          # url索引，优化智能过滤搜索
+            models.Index(fields=['-created_at']),
        ]

    def __str__(self):
--- a/backend/apps/asset/models/snapshot_models.py
+++ b/backend/apps/asset/models/snapshot_models.py
@@ -15,17 +15,17 @@ class SubdomainSnapshot(models.Model):
    )
    
    name = models.CharField(max_length=1000, help_text='子域名名称')
-    discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
+    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')
    
    class Meta:
        db_table = 'subdomain_snapshot'
        verbose_name = '子域名快照'
        verbose_name_plural = '子域名快照'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
            models.Index(fields=['scan']),
            models.Index(fields=['name']),
-            models.Index(fields=['-discovered_at']),
+            models.Index(fields=['-created_at']),
        ]
        constraints = [
            # 唯一约束：同一次扫描中，同一个子域名只能记录一次
@@ -70,18 +70,19 @@ class WebsiteSnapshot(models.Model):
    )
    body_preview = models.TextField(blank=True, default='', help_text='响应体预览')
    vhost = models.BooleanField(null=True, blank=True, help_text='虚拟主机标志')
-    discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
+    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')

    class Meta:
        db_table = 'website_snapshot'
        verbose_name = '网站快照'
        verbose_name_plural = '网站快照'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
            models.Index(fields=['scan']),
            models.Index(fields=['url']),
            models.Index(fields=['host']),  # host索引，优化根据主机名查询
-            models.Index(fields=['-discovered_at']),
+            models.Index(fields=['title']),  # title索引，优化标题搜索
+            models.Index(fields=['-created_at']),
        ]
        constraints = [
            # 唯一约束：同一次扫描中，同一个URL只能记录一次
@@ -118,18 +119,19 @@ class DirectorySnapshot(models.Model):
    lines = models.IntegerField(null=True, blank=True, help_text='响应体行数（按换行符分割）')
    content_type = models.CharField(max_length=200, blank=True, default='', help_text='响应头 Content-Type 值')
    duration = models.BigIntegerField(null=True, blank=True, help_text='请求耗时（单位：纳秒）')
-    discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
+    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')

    class Meta:
        db_table = 'directory_snapshot'
        verbose_name = '目录快照'
        verbose_name_plural = '目录快照'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
            models.Index(fields=['scan']),
            models.Index(fields=['url']),
            models.Index(fields=['status']),  # 状态码索引，优化筛选
-            models.Index(fields=['-discovered_at']),
+            models.Index(fields=['content_type']),  # content_type索引，优化内容类型搜索
+            models.Index(fields=['-created_at']),
        ]
        constraints = [
            # 唯一约束：同一次扫描中，同一个目录URL只能记录一次
@@ -183,16 +185,16 @@ class HostPortMappingSnapshot(models.Model):
    )
    
    # ==================== 时间字段 ====================
-    discovered_at = models.DateTimeField(
+    created_at = models.DateTimeField(
        auto_now_add=True,
-        help_text='发现时间'
+        help_text='创建时间'
    )

    class Meta:
        db_table = 'host_port_mapping_snapshot'
        verbose_name = '主机端口映射快照'
        verbose_name_plural = '主机端口映射快照'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
            models.Index(fields=['scan']),             # 优化按扫描查询
            models.Index(fields=['host']),             # 优化按主机名查询
@@ -200,7 +202,7 @@ class HostPortMappingSnapshot(models.Model):
            models.Index(fields=['port']),             # 优化按端口查询
            models.Index(fields=['host', 'ip']),       # 优化组合查询
            models.Index(fields=['scan', 'host']),     # 优化扫描+主机查询
-            models.Index(fields=['-discovered_at']),   # 优化时间排序
+            models.Index(fields=['-created_at']),   # 优化时间排序
        ]
        constraints = [
            # 复合唯一约束：同一次扫描中，scan + host + ip + port 组合唯一
@@ -257,19 +259,21 @@ class EndpointSnapshot(models.Model):
        default=list,
        help_text='匹配的GF模式列表'
    )
-    discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
+    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')

    class Meta:
        db_table = 'endpoint_snapshot'
        verbose_name = '端点快照'
        verbose_name_plural = '端点快照'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
            models.Index(fields=['scan']),
            models.Index(fields=['url']),
            models.Index(fields=['host']),  # host索引，优化根据主机名查询
+            models.Index(fields=['title']),  # title索引，优化标题搜索
            models.Index(fields=['status_code']),  # 状态码索引，优化筛选
-            models.Index(fields=['-discovered_at']),
+            models.Index(fields=['webserver']),  # webserver索引，优化服务器搜索
+            models.Index(fields=['-created_at']),
        ]
        constraints = [
            # 唯一约束：同一次扫描中，同一个URL只能记录一次
@@ -302,7 +306,7 @@ class VulnerabilitySnapshot(models.Model):
    )
    
    # ==================== 核心字段 ====================
-    url = models.TextField(help_text='漏洞所在的URL')
+    url = models.CharField(max_length=2000, help_text='漏洞所在的URL')
    vuln_type = models.CharField(max_length=100, help_text='漏洞类型（如 xss, sqli）')
    severity = models.CharField(
        max_length=20,
@@ -316,19 +320,20 @@ class VulnerabilitySnapshot(models.Model):
    raw_output = models.JSONField(blank=True, default=dict, help_text='工具原始输出')
    
    # ==================== 时间字段 ====================
-    discovered_at = models.DateTimeField(auto_now_add=True, help_text='发现时间')
+    created_at = models.DateTimeField(auto_now_add=True, help_text='创建时间')

    class Meta:
        db_table = 'vulnerability_snapshot'
        verbose_name = '漏洞快照'
        verbose_name_plural = '漏洞快照'
-        ordering = ['-discovered_at']
+        ordering = ['-created_at']
        indexes = [
            models.Index(fields=['scan']),
+            models.Index(fields=['url']),  # url索引，优化URL搜索
            models.Index(fields=['vuln_type']),
            models.Index(fields=['severity']),
            models.Index(fields=['source']),
-            models.Index(fields=['-discovered_at']),
+            models.Index(fields=['-created_at']),
        ]

    def __str__(self):
--- a/backend/apps/asset/repositories/asset/directory_repository.py
+++ b/backend/apps/asset/repositories/asset/directory_repository.py
@@ -3,162 +3,141 @@ Django ORM 实现的 Directory Repository
 """

 import logging
-from typing import List, Tuple, Dict, Iterator
-from django.db import transaction, IntegrityError, OperationalError, DatabaseError
-from django.utils import timezone
+from typing import List, Iterator
+from django.db import transaction

 from apps.asset.models.asset_models import Directory
 from apps.asset.dtos import DirectoryDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk

 logger = logging.getLogger(__name__)


-
@auto_ensure_db_connection
 class DjangoDirectoryRepository:
    """Django ORM 实现的 Directory Repository"""

-    def bulk_create_ignore_conflicts(self, items: List[DirectoryDTO]) -> int:
+    def bulk_upsert(self, items: List[DirectoryDTO]) -> int:
        """
-        批量创建 Directory，忽略冲突
+        批量创建或更新 Directory（upsert）
+        
+        存在则更新所有字段，不存在则创建。
+        使用 Django 原生 update_conflicts。
+        
+        注意：自动按模型唯一约束去重，保留最后一条记录。
        
        Args:
            items: Directory DTO 列表
            
        Returns:
-            int: 实际创建的记录数
-            
-        Raises:
-            IntegrityError: 数据完整性错误
-            OperationalError: 数据库操作错误
-            DatabaseError: 数据库错误
+            int: 处理的记录数
        """
        if not items:
            return 0
-
+        
        try:
-            # 转换为 Django 模型对象
-            directory_objects = [
+            # 自动按模型唯一约束去重
+            unique_items = deduplicate_for_bulk(items, Directory)
+            
+            # 直接从 DTO 字段构建 Model
+            directories = [
                Directory(
-                    website_id=item.website_id,
                    target_id=item.target_id,
                    url=item.url,
                    status=item.status,
                    content_length=item.content_length,
                    words=item.words,
                    lines=item.lines,
-                    content_type=item.content_type,
+                    content_type=item.content_type or '',
                    duration=item.duration
                )
-                for item in items
+                for item in unique_items
            ]
-
+            
            with transaction.atomic():
-                # 批量插入或忽略冲突
-                # 如果 website + url 已存在，忽略冲突
                Directory.objects.bulk_create(
-                    directory_objects,
-                    ignore_conflicts=True
+                    directories,
+                    update_conflicts=True,
+                    unique_fields=['target', 'url'],
+                    update_fields=[
+                        'status', 'content_length', 'words',
+                        'lines', 'content_type', 'duration'
+                    ],
+                    batch_size=1000
                )
-
-            logger.debug(f"成功处理 {len(items)} 条 Directory 记录")
-            return len(items)
-
-        except IntegrityError as e:
-            logger.error(
-                f"批量插入 Directory 失败 - 数据完整性错误: {e}, "
-                f"记录数: {len(items)}"
-            )
-            raise
-
-        except OperationalError as e:
-            logger.error(
-                f"批量插入 Directory 失败 - 数据库操作错误: {e}, "
-                f"记录数: {len(items)}"
-            )
-            raise
-
-        except DatabaseError as e:
-            logger.error(
-                f"批量插入 Directory 失败 - 数据库错误: {e}, "
-                f"记录数: {len(items)}"
-            )
-            raise
-
+            
+            logger.debug(f"批量 upsert Directory 成功: {len(unique_items)} 条")
+            return len(unique_items)
+                
        except Exception as e:
-            logger.error(
-                f"批量插入 Directory 失败 - 未知错误: {e}, "
-                f"记录数: {len(items)}, "
-                f"错误类型: {type(e).__name__}",
-                exc_info=True
-            )
+            logger.error(f"批量 upsert Directory 失败: {e}")
            raise

-    def get_by_website(self, website_id: int) -> List[DirectoryDTO]:
+    def bulk_create_ignore_conflicts(self, items: List[DirectoryDTO]) -> int:
        """
-        获取指定站点的所有目录
+        批量创建 Directory（存在即跳过）
+        
+        与 bulk_upsert 不同，此方法不会更新已存在的记录。
+        适用于批量添加场景，只提供 URL，没有其他字段数据。
+        
+        注意：自动按模型唯一约束去重，保留最后一条记录。
        
        Args:
-            website_id: 站点 ID
+            items: Directory DTO 列表
            
        Returns:
-            List[DirectoryDTO]: 目录列表
+            int: 处理的记录数
        """
+        if not items:
+            return 0
+        
        try:
-            directories = Directory.objects.filter(website_id=website_id)
-            return [
-                DirectoryDTO(
-                    website_id=d.website_id,
-                    target_id=d.target_id,
-                    url=d.url,
-                    status=d.status,
-                    content_length=d.content_length,
-                    words=d.words,
-                    lines=d.lines,
-                    content_type=d.content_type,
-                    duration=d.duration
+            # 自动按模型唯一约束去重
+            unique_items = deduplicate_for_bulk(items, Directory)
+            
+            directories = [
+                Directory(
+                    target_id=item.target_id,
+                    url=item.url,
+                    status=item.status,
+                    content_length=item.content_length,
+                    words=item.words,
+                    lines=item.lines,
+                    content_type=item.content_type or '',
+                    duration=item.duration
                )
-                for d in directories
+                for item in unique_items
            ]
-
-        except Exception as e:
-            logger.error(f"获取目录列表失败 - Website ID: {website_id}, 错误: {e}")
-            raise
-
-    def count_by_website(self, website_id: int) -> int:
-        """
-        统计指定站点的目录总数
-        
-        Args:
-            website_id: 站点 ID
            
-        Returns:
-            int: 目录总数
-        """
-        try:
-            count = Directory.objects.filter(website_id=website_id).count()
-            logger.debug(f"Website {website_id} 的目录总数: {count}")
-            return count
-
+            with transaction.atomic():
+                Directory.objects.bulk_create(
+                    directories,
+                    ignore_conflicts=True,
+                    batch_size=1000
+                )
+            
+            logger.debug(f"批量创建 Directory 成功（ignore_conflicts）: {len(unique_items)} 条")
+            return len(unique_items)
+                
        except Exception as e:
-            logger.error(f"统计目录数量失败 - Website ID: {website_id}, 错误: {e}")
+            logger.error(f"批量创建 Directory 失败: {e}")
            raise
-    
+
+    def count_by_target(self, target_id: int) -> int:
+        """统计目标下的目录总数"""
+        return Directory.objects.filter(target_id=target_id).count()
+
    def get_all(self):
-        """
-        获取所有目录
-        
-        Returns:
-            QuerySet: 目录查询集
-        """
-        return Directory.objects.all()
-    
+        """获取所有目录"""
+        return Directory.objects.all().order_by('-created_at')
+
    def get_by_target(self, target_id: int):
-        return Directory.objects.filter(target_id=target_id).select_related('website').order_by('-discovered_at')
+        """获取目标下的所有目录"""
+        return Directory.objects.filter(target_id=target_id).order_by('-created_at')

    def get_urls_for_export(self, target_id: int, batch_size: int = 1000) -> Iterator[str]:
-        """流式导出目标下的所有目录 URL（只查 url 字段，避免加载多余数据）。"""
+        """流式导出目标下的所有目录 URL"""
        try:
            queryset = (
                Directory.objects
@@ -172,78 +151,31 @@ class DjangoDirectoryRepository:
        except Exception as e:
            logger.error("流式导出目录 URL 失败 - Target ID: %s, 错误: %s", target_id, e)
            raise
-    
-    def soft_delete_by_ids(self, directory_ids: List[int]) -> int:
+
+    def iter_raw_data_for_export(
+        self, 
+        target_id: int,
+        batch_size: int = 1000
+    ) -> Iterator[dict]:
        """
-        根据 ID 列表批量软删除Directory
+        流式获取原始数据用于 CSV 导出
        
        Args:
-            directory_ids: Directory ID 列表
+            target_id: 目标 ID
+            batch_size: 每批数据量
        
-        Returns:
-            软删除的记录数
+        Yields:
+            包含所有目录字段的字典
        """
-        try:
-            updated_count = (
-                Directory.objects
-                .filter(id__in=directory_ids)
-                .update(deleted_at=timezone.now())
+        qs = (
+            Directory.objects
+            .filter(target_id=target_id)
+            .values(
+                'url', 'status', 'content_length', 'words',
+                'lines', 'content_type', 'duration', 'created_at'
            )
-            logger.debug(
-                "批量软删除Directory成功 - Count: %s, 更新记录: %s",
-                len(directory_ids),
-                updated_count
-            )
-            return updated_count
-        except Exception as e:
-            logger.error(
-                "批量软删除Directory失败 - IDs: %s, 错误: %s",
-                directory_ids,
-                e
-            )
-            raise
-    
-    def hard_delete_by_ids(self, directory_ids: List[int]) -> Tuple[int, Dict[str, int]]:
-        """
-        根据 ID 列表硬删除Directory（使用数据库级 CASCADE）
+            .order_by('url')
+        )
        
-        Args:
-            directory_ids: Directory ID 列表
-        
-        Returns:
-            (删除的记录数, 删除详情字典)
-        """
-        try:
-            batch_size = 1000
-            total_deleted = 0
-            
-            logger.debug(f"开始批量删除 {len(directory_ids)} 个Directory（数据库 CASCADE）...")
-            
-            for i in range(0, len(directory_ids), batch_size):
-                batch_ids = directory_ids[i:i + batch_size]
-                count, _ = Directory.all_objects.filter(id__in=batch_ids).delete()
-                total_deleted += count
-                logger.debug(f"批次删除完成: {len(batch_ids)} 个Directory，删除 {count} 条记录")
-            
-            deleted_details = {
-                'directories': len(directory_ids),
-                'total': total_deleted,
-                'note': 'Database CASCADE - detailed stats unavailable'
-            }
-            
-            logger.debug(
-                "批量硬删除成功（CASCADE）- Directory数: %s, 总删除记录: %s",
-                len(directory_ids),
-                total_deleted
-            )
-            
-            return total_deleted, deleted_details
-        
-        except Exception as e:
-            logger.error(
-                "批量硬删除失败（CASCADE）- Directory数: %s, 错误: %s",
-                len(directory_ids),
-                str(e),
-                exc_info=True
-            )
-            raise
+        for row in qs.iterator(chunk_size=batch_size):
+            yield row
--- a/backend/apps/asset/repositories/asset/endpoint_repository.py
+++ b/backend/apps/asset/repositories/asset/endpoint_repository.py
@@ -1,11 +1,12 @@
 """Endpoint Repository - Django ORM 实现"""

 import logging
-from typing import List, Optional, Tuple, Dict, Any
+from typing import List, Iterator

 from apps.asset.models import Endpoint
 from apps.asset.dtos.asset import EndpointDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk
 from django.db import transaction

 logger = logging.getLogger(__name__)
@@ -15,25 +16,31 @@ logger = logging.getLogger(__name__)
 class DjangoEndpointRepository:
    """端点 Repository - 负责端点表的数据访问"""
    
-    def bulk_create_ignore_conflicts(self, items: List[EndpointDTO]) -> int:
+    def bulk_upsert(self, items: List[EndpointDTO]) -> int:
        """
-        批量创建端点（忽略冲突）
+        批量创建或更新端点（upsert）
+        
+        存在则更新所有字段，不存在则创建。
+        使用 Django 原生 update_conflicts。
+        
+        注意：自动按模型唯一约束去重，保留最后一条记录。
        
        Args:
            items: 端点 DTO 列表
            
        Returns:
-            int: 创建的记录数
+            int: 处理的记录数
        """
        if not items:
            return 0
        
        try:
-            endpoints = []
-            for item in items:
-                # Endpoint 模型当前只关联 target，不再依赖 website 外键
-                # 这里按照 EndpointDTO 的字段映射构造 Endpoint 实例
-                endpoints.append(Endpoint(
+            # 自动按模型唯一约束去重
+            unique_items = deduplicate_for_bulk(items, Endpoint)
+            
+            # 直接从 DTO 字段构建 Model
+            endpoints = [
+                Endpoint(
                    target_id=item.target_id,
                    url=item.url,
                    host=item.host or '',
@@ -47,62 +54,35 @@ class DjangoEndpointRepository:
                    vhost=item.vhost,
                    location=item.location or '',
                    matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
-                ))
+                )
+                for item in unique_items
+            ]
            
            with transaction.atomic():
-                created = Endpoint.objects.bulk_create(
+                Endpoint.objects.bulk_create(
                    endpoints,
-                    ignore_conflicts=True,
+                    update_conflicts=True,
+                    unique_fields=['url', 'target'],
+                    update_fields=[
+                        'host', 'title', 'status_code', 'content_length',
+                        'webserver', 'body_preview', 'content_type', 'tech',
+                        'vhost', 'location', 'matched_gf_patterns'
+                    ],
                    batch_size=1000
                )
-                return len(created)
+            
+            logger.debug(f"批量 upsert 端点成功: {len(unique_items)} 条")
+            return len(unique_items)
                
        except Exception as e:
-            logger.error(f"批量创建端点失败: {e}")
+            logger.error(f"批量 upsert 端点失败: {e}")
            raise
    
-    def get_by_website(self, website_id: int) -> List[EndpointDTO]:
-        """
-        获取网站下的所有端点
-        
-        Args:
-            website_id: 网站 ID
-            
-        Returns:
-            List[EndpointDTO]: 端点列表
-        """
-        endpoints = Endpoint.objects.filter(
-            website_id=website_id
-        ).order_by('-discovered_at')
-        
-        result = []
-        for endpoint in endpoints:
-            result.append(EndpointDTO(
-                website_id=endpoint.website_id,
-                target_id=endpoint.target_id,
-                url=endpoint.url,
-                title=endpoint.title,
-                status_code=endpoint.status_code,
-                content_length=endpoint.content_length,
-                webserver=endpoint.webserver,
-                body_preview=endpoint.body_preview,
-                content_type=endpoint.content_type,
-                tech=endpoint.tech,
-                vhost=endpoint.vhost,
-                location=endpoint.location,
-                matched_gf_patterns=endpoint.matched_gf_patterns
-            ))
-        
-        return result
-    
-    def get_queryset_by_target(self, target_id: int):
-        return Endpoint.objects.filter(target_id=target_id).order_by('-discovered_at')
-
    def get_all(self):
        """获取所有端点（全局查询）"""
-        return Endpoint.objects.all().order_by('-discovered_at')
+        return Endpoint.objects.all().order_by('-created_at')
    
-    def get_by_target(self, target_id: int) -> List[EndpointDTO]:
+    def get_by_target(self, target_id: int):
        """
        获取目标下的所有端点
        
@@ -110,43 +90,9 @@ class DjangoEndpointRepository:
            target_id: 目标 ID
            
        Returns:
-            List[EndpointDTO]: 端点列表
+            QuerySet: 端点查询集
        """
-        endpoints = Endpoint.objects.filter(
-            target_id=target_id
-        ).order_by('-discovered_at')
-        
-        result = []
-        for endpoint in endpoints:
-            result.append(EndpointDTO(
-                website_id=endpoint.website_id,
-                target_id=endpoint.target_id,
-                url=endpoint.url,
-                title=endpoint.title,
-                status_code=endpoint.status_code,
-                content_length=endpoint.content_length,
-                webserver=endpoint.webserver,
-                body_preview=endpoint.body_preview,
-                content_type=endpoint.content_type,
-                tech=endpoint.tech,
-                vhost=endpoint.vhost,
-                location=endpoint.location,
-                matched_gf_patterns=endpoint.matched_gf_patterns
-            ))
-        
-        return result
-    
-    def count_by_website(self, website_id: int) -> int:
-        """
-        统计网站下的端点数量
-        
-        Args:
-            website_id: 网站 ID
-            
-        Returns:
-            int: 端点数量
-        """
-        return Endpoint.objects.filter(website_id=website_id).count()
+        return Endpoint.objects.filter(target_id=target_id).order_by('-created_at')
    
    def count_by_target(self, target_id: int) -> int:
        """
@@ -159,34 +105,88 @@ class DjangoEndpointRepository:
            int: 端点数量
        """
        return Endpoint.objects.filter(target_id=target_id).count()
-    
-    def soft_delete_by_ids(self, ids: List[int]) -> int:
+
+    def bulk_create_ignore_conflicts(self, items: List[EndpointDTO]) -> int:
        """
-        软删除端点（批量）
+        批量创建端点（存在即跳过）
+        
+        与 bulk_upsert 不同，此方法不会更新已存在的记录。
+        适用于快速扫描场景，只提供 URL，没有其他字段数据。
+        
+        注意：自动按模型唯一约束去重，保留最后一条记录。
        
        Args:
-            ids: 端点 ID 列表
+            items: 端点 DTO 列表
            
        Returns:
-            int: 更新的记录数
+            int: 处理的记录数
        """
-        from django.utils import timezone
-        return Endpoint.objects.filter(
-            id__in=ids
-        ).update(deleted_at=timezone.now())
-    
-    def hard_delete_by_ids(self, ids: List[int]) -> Tuple[int, Dict[str, int]]:
+        if not items:
+            return 0
+        
+        try:
+            # 自动按模型唯一约束去重
+            unique_items = deduplicate_for_bulk(items, Endpoint)
+            
+            # 直接从 DTO 字段构建 Model
+            endpoints = [
+                Endpoint(
+                    target_id=item.target_id,
+                    url=item.url,
+                    host=item.host or '',
+                    title=item.title or '',
+                    status_code=item.status_code,
+                    content_length=item.content_length,
+                    webserver=item.webserver or '',
+                    body_preview=item.body_preview or '',
+                    content_type=item.content_type or '',
+                    tech=item.tech if item.tech else [],
+                    vhost=item.vhost,
+                    location=item.location or '',
+                    matched_gf_patterns=item.matched_gf_patterns if item.matched_gf_patterns else []
+                )
+                for item in unique_items
+            ]
+            
+            with transaction.atomic():
+                Endpoint.objects.bulk_create(
+                    endpoints,
+                    ignore_conflicts=True,
+                    batch_size=1000
+                )
+            
+            logger.debug(f"批量创建端点成功（ignore_conflicts）: {len(unique_items)} 条")
+            return len(unique_items)
+                
+        except Exception as e:
+            logger.error(f"批量创建端点失败: {e}")
+            raise
+
+    def iter_raw_data_for_export(
+        self, 
+        target_id: int,
+        batch_size: int = 1000
+    ) -> Iterator[dict]:
        """
-        硬删除端点（批量）
+        流式获取原始数据用于 CSV 导出
        
        Args:
-            ids: 端点 ID 列表
-            
-        Returns:
-            Tuple[int, Dict[str, int]]: (删除总数, 详细信息)
-        """
-        deleted_count, details = Endpoint.all_objects.filter(
-            id__in=ids
-        ).delete()
+            target_id: 目标 ID
+            batch_size: 每批数据量
        
-        return deleted_count, details
+        Yields:
+            包含所有端点字段的字典
+        """
+        qs = (
+            Endpoint.objects
+            .filter(target_id=target_id)
+            .values(
+                'url', 'host', 'location', 'title', 'status_code',
+                'content_length', 'content_type', 'webserver', 'tech',
+                'body_preview', 'vhost', 'matched_gf_patterns', 'created_at'
+            )
+            .order_by('url')
+        )
+        
+        for row in qs.iterator(chunk_size=batch_size):
+            yield row
--- a/backend/apps/asset/repositories/asset/host_port_mapping_repository.py
+++ b/backend/apps/asset/repositories/asset/host_port_mapping_repository.py
@@ -1,32 +1,36 @@
 """HostPortMapping Repository - Django ORM 实现"""

 import logging
-from typing import List, Iterator
+from typing import List, Iterator, Dict, Optional
+
+from django.db.models import QuerySet, Min

 from apps.asset.models.asset_models import HostPortMapping
 from apps.asset.dtos.asset import HostPortMappingDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk

 logger = logging.getLogger(__name__)


@auto_ensure_db_connection
 class DjangoHostPortMappingRepository:
-    """HostPortMapping Repository - Django ORM 实现"""
+    """HostPortMapping Repository - Django ORM 实现
+    
+    职责：纯数据访问，不包含业务逻辑
+    """

    def bulk_create_ignore_conflicts(self, items: List[HostPortMappingDTO]) -> int:
        """
        批量创建主机端口关联（忽略冲突）
        
+        注意：自动按模型唯一约束去重，保留最后一条记录。
+        
        Args:
            items: 主机端口关联 DTO 列表
        
        Returns:
-            int: 实际创建的记录数（注意：ignore_conflicts 时可能为 0）
-        
-        Note:
-            - 基于唯一约束 (target + host + ip + port) 自动去重
-            - 忽略已存在的记录，不更新
+            int: 实际创建的记录数
        """
        try:
            logger.debug("准备批量创建主机端口关联 - 数量: %d", len(items))
@@ -34,18 +38,20 @@ class DjangoHostPortMappingRepository:
            if not items:
                logger.debug("主机端口关联为空，跳过创建")
                return 0
+            
+            # 自动按模型唯一约束去重
+            unique_items = deduplicate_for_bulk(items, HostPortMapping)
                
-            # 构建记录对象
-            records = []
-            for item in items:
-                records.append(HostPortMapping(
+            records = [
+                HostPortMapping(
                    target_id=item.target_id,
                    host=item.host,
                    ip=item.ip,
                    port=item.port
-                ))
+                )
+                for item in unique_items
+            ]
            
-            # 批量创建（忽略冲突，基于唯一约束去重）
            created = HostPortMapping.objects.bulk_create(
                records, 
                ignore_conflicts=True
@@ -89,79 +95,47 @@ class DjangoHostPortMappingRepository:
        for ip in queryset:
            yield ip

-    def get_ip_aggregation_by_target(self, target_id: int, search: str = None):
-        from django.db.models import Min
+    def get_queryset_by_target(self, target_id: int) -> QuerySet:
+        """获取目标下的 QuerySet"""
+        return HostPortMapping.objects.filter(target_id=target_id)

-        qs = HostPortMapping.objects.filter(target_id=target_id)
-        if search:
-            qs = qs.filter(ip__icontains=search)
+    def get_all_queryset(self) -> QuerySet:
+        """获取所有记录的 QuerySet"""
+        return HostPortMapping.objects.all()

-        ip_aggregated = (
-            qs
-            .values('ip')
-            .annotate(
-                discovered_at=Min('discovered_at')
-            )
-            .order_by('-discovered_at')
+    def get_queryset_by_ip(self, ip: str, target_id: Optional[int] = None) -> QuerySet:
+        """获取指定 IP 的 QuerySet"""
+        qs = HostPortMapping.objects.filter(ip=ip)
+        if target_id:
+            qs = qs.filter(target_id=target_id)
+        return qs
+
+    def iter_raw_data_for_export(
+        self, 
+        target_id: int,
+        batch_size: int = 1000
+    ) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            target_id: 目标 ID
+            batch_size: 每批数据量
+        
+        Yields:
+            {
+                'ip': '192.168.1.1',
+                'host': 'example.com',
+                'port': 80,
+                'created_at': datetime
+            }
+        """
+        qs = (
+            HostPortMapping.objects
+            .filter(target_id=target_id)
+            .values('ip', 'host', 'port', 'created_at')
+            .order_by('ip', 'host', 'port')
        )
-
-        results = []
-        for item in ip_aggregated:
-            ip = item['ip']
-            mappings = (
-                HostPortMapping.objects
-                .filter(target_id=target_id, ip=ip)
-                .values('host', 'port')
-                .distinct()
-            )
-
-            hosts = sorted({m['host'] for m in mappings})
-            ports = sorted({m['port'] for m in mappings})
-
-            results.append({
-                'ip': ip,
-                'hosts': hosts,
-                'ports': ports,
-                'discovered_at': item['discovered_at'],
-            })
-
-        return results
-
-    def get_all_ip_aggregation(self, search: str = None):
-        """获取所有 IP 聚合数据（全局查询）"""
-        from django.db.models import Min
-
-        qs = HostPortMapping.objects.all()
-        if search:
-            qs = qs.filter(ip__icontains=search)
-
-        ip_aggregated = (
-            qs
-            .values('ip')
-            .annotate(
-                discovered_at=Min('discovered_at')
-            )
-            .order_by('-discovered_at')
-        )
-
-        results = []
-        for item in ip_aggregated:
-            ip = item['ip']
-            mappings = (
-                HostPortMapping.objects
-                .filter(ip=ip)
-                .values('host', 'port')
-                .distinct()
-            )
-
-            hosts = sorted({m['host'] for m in mappings})
-            ports = sorted({m['port'] for m in mappings})
-
-            results.append({
-                'ip': ip,
-                'hosts': hosts,
-                'ports': ports,
-                'discovered_at': item['discovered_at'],
-            })
-
-        return results
+        
+        for row in qs.iterator(chunk_size=batch_size):
+            yield row
--- a/backend/apps/asset/repositories/asset/subdomain_repository.py
+++ b/backend/apps/asset/repositories/asset/subdomain_repository.py
@@ -1,117 +1,72 @@
+"""Subdomain Repository - Django ORM 实现"""
+
 import logging
 from typing import List, Iterator

-from django.db import transaction, IntegrityError, OperationalError, DatabaseError
-from django.utils import timezone
-from typing import Tuple, Dict
+from django.db import transaction

 from apps.asset.models.asset_models import Subdomain
 from apps.asset.dtos import SubdomainDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk

 logger = logging.getLogger(__name__)


@auto_ensure_db_connection
 class DjangoSubdomainRepository:
-    """基于 Django ORM 的子域名仓储实现。"""
+    """基于 Django ORM 的子域名仓储实现"""

    def bulk_create_ignore_conflicts(self, items: List[SubdomainDTO]) -> None:
        """
        批量创建子域名，忽略冲突
        
+        注意：自动按模型唯一约束去重，保留最后一条记录。
+        
        Args:
            items: 子域名 DTO 列表
-            
-        Raises:
-            IntegrityError: 数据完整性错误（如唯一约束冲突）
-            OperationalError: 数据库操作错误（如连接失败）
-            DatabaseError: 其他数据库错误
        """
        if not items:
            return

        try:
+            # 自动按模型唯一约束去重
+            unique_items = deduplicate_for_bulk(items, Subdomain)
+            
            subdomain_objects = [
                Subdomain(
                    name=item.name,
                    target_id=item.target_id,
                )
-                for item in items
+                for item in unique_items
            ]

            with transaction.atomic():
-                # 使用 ignore_conflicts 策略：
-                # - 新子域名：INSERT 完整记录
-                # - 已存在子域名：忽略（不更新，因为没有探测字段数据）
-                # 注意：ignore_conflicts 无法返回实际创建的数量
-                Subdomain.objects.bulk_create(  # type: ignore[attr-defined]
+                Subdomain.objects.bulk_create(
                    subdomain_objects,
-                    ignore_conflicts=True,  # 忽略重复记录
+                    ignore_conflicts=True,
                )

-            logger.debug(f"成功处理 {len(items)} 条子域名记录")
-
-        except IntegrityError as e:
-            logger.error(
-                f"批量插入子域名失败 - 数据完整性错误: {e}, "
-                f"记录数: {len(items)}, "
-                f"示例域名: {items[0].name if items else 'N/A'}"
-            )
-            raise
-
-        except OperationalError as e:
-            logger.error(
-                f"批量插入子域名失败 - 数据库操作错误: {e}, "
-                f"记录数: {len(items)}"
-            )
-            raise
-
-        except DatabaseError as e:
-            logger.error(
-                f"批量插入子域名失败 - 数据库错误: {e}, "
-                f"记录数: {len(items)}"
-            )
-            raise
+            logger.debug(f"成功处理 {len(unique_items)} 条子域名记录")

        except Exception as e:
-            logger.error(
-                f"批量插入子域名失败 - 未知错误: {e}, "
-                f"记录数: {len(items)}, "
-                f"错误类型: {type(e).__name__}",
-                exc_info=True
-            )
+            logger.error(f"批量插入子域名失败: {e}")
            raise
    
-    def get_or_create(self, name: str, target_id: int) -> Tuple[Subdomain, bool]:
-        """
-        获取或创建子域名
-        
-        Args:
-            name: 子域名名称
-            target_id: 目标 ID
-        
-        Returns:
-            (Subdomain对象, 是否新创建)
-        """
-        return Subdomain.objects.get_or_create(
-            name=name,
-            target_id=target_id,
-        )
+    def get_all(self):
+        """获取所有子域名"""
+        return Subdomain.objects.all().order_by('-created_at')
+
+    def get_by_target(self, target_id: int):
+        """获取目标下的所有子域名"""
+        return Subdomain.objects.filter(target_id=target_id).order_by('-created_at')
+    
+    def count_by_target(self, target_id: int) -> int:
+        """统计目标下的域名数量"""
+        return Subdomain.objects.filter(target_id=target_id).count()
    
    def get_domains_for_export(self, target_id: int, batch_size: int = 1000) -> Iterator[str]:
-        """
-        流式导出域名（用于生成扫描工具输入文件）
-        
-        使用 iterator() 进行流式查询，避免一次性加载所有数据到内存
-        
-        Args:
-            target_id: 目标 ID
-            batch_size: 每次从数据库读取的行数
-            
-        Yields:
-            str: 域名
-        """
+        """流式导出域名"""
        queryset = Subdomain.objects.filter(
            target_id=target_id
        ).only('name').iterator(chunk_size=batch_size)
@@ -119,138 +74,36 @@ class DjangoSubdomainRepository:
        for subdomain in queryset:
            yield subdomain.name
    
-    def get_by_target(self, target_id: int):
-        return Subdomain.objects.filter(target_id=target_id).order_by('-discovered_at')
-    
-    def count_by_target(self, target_id: int) -> int:
-        """
-        统计目标下的域名数量
-        
-        Args:
-            target_id: 目标 ID
-            
-        Returns:
-            int: 域名数量
-        """
-        return Subdomain.objects.filter(target_id=target_id).count()
-    
    def get_by_names_and_target_id(self, names: set, target_id: int) -> dict:
-        """
-        根据域名列表和目标ID批量查询 Subdomain
-        
-        Args:
-            names: 域名集合
-            target_id: 目标 ID
-            
-        Returns:
-            dict: {domain_name: Subdomain对象}
-        """
+        """根据域名列表和目标ID批量查询 Subdomain"""
        subdomains = Subdomain.objects.filter(
            name__in=names,
            target_id=target_id
        ).only('id', 'name')
        
        return {sd.name: sd for sd in subdomains}
-    
-    def get_all(self):
+
+    def iter_raw_data_for_export(
+        self, 
+        target_id: int,
+        batch_size: int = 1000
+    ) -> Iterator[dict]:
        """
-        获取所有子域名
-        
-        Returns:
-            QuerySet: 子域名查询集
-        """
-        return Subdomain.objects.all()
-    
-    def soft_delete_by_ids(self, subdomain_ids: List[int]) -> int:
-        """
-        根据 ID 列表批量软删除子域名
+        流式获取原始数据用于 CSV 导出
        
        Args:
-            subdomain_ids: 子域名 ID 列表
+            target_id: 目标 ID
+            batch_size: 每批数据量
        
-        Returns:
-            软删除的记录数
-        
-        Note:
-            - 使用软删除：只标记为已删除，不真正删除数据库记录
-            - 保留所有关联数据，可恢复
+        Yields:
+            {'name': 'sub.example.com', 'created_at': datetime}
        """
-        try:
-            updated_count = (
-                Subdomain.objects
-                .filter(id__in=subdomain_ids)
-                .update(deleted_at=timezone.now())
-            )
-            logger.debug(
-                "批量软删除子域名成功 - Count: %s, 更新记录: %s",
-                len(subdomain_ids),
-                updated_count
-            )
-            return updated_count
-        except Exception as e:
-            logger.error(
-                "批量软删除子域名失败 - IDs: %s, 错误: %s",
-                subdomain_ids,
-                e
-            )
-            raise
-    
-    def hard_delete_by_ids(self, subdomain_ids: List[int]) -> Tuple[int, Dict[str, int]]:
-        """
-        根据 ID 列表硬删除子域名（使用数据库级 CASCADE）
+        qs = (
+            Subdomain.objects
+            .filter(target_id=target_id)
+            .values('name', 'created_at')
+            .order_by('name')
+        )
        
-        Args:
-            subdomain_ids: 子域名 ID 列表
-        
-        Returns:
-            (删除的记录数, 删除详情字典)
-        
-        Strategy:
-            使用数据库级 CASCADE 删除，性能最优
-        
-        Note:
-            - 硬删除：从数据库中永久删除
-            - 数据库自动处理所有外键级联删除
-            - 不触发 Django 信号（pre_delete/post_delete）
-        """
-        try:
-            batch_size = 1000  # 每批处理1000个子域名
-            total_deleted = 0
-            
-            logger.debug(f"开始批量删除 {len(subdomain_ids)} 个子域名（数据库 CASCADE）...")
-            
-            # 分批处理子域名ID，避免单次删除过多
-            for i in range(0, len(subdomain_ids), batch_size):
-                batch_ids = subdomain_ids[i:i + batch_size]
-                
-                # 直接删除子域名，数据库自动级联删除所有关联数据
-                count, _ = Subdomain.all_objects.filter(id__in=batch_ids).delete()
-                total_deleted += count
-                
-                logger.debug(f"批次删除完成: {len(batch_ids)} 个子域名，删除 {count} 条记录")
-            
-            # 由于使用数据库 CASCADE，无法获取详细统计
-            deleted_details = {
-                'subdomains': len(subdomain_ids),
-                'total': total_deleted,
-                'note': 'Database CASCADE - detailed stats unavailable'
-            }
-            
-            logger.debug(
-                "批量硬删除成功（CASCADE）- 子域名数: %s, 总删除记录: %s",
-                len(subdomain_ids),
-                total_deleted
-            )
-            
-            return total_deleted, deleted_details
-        
-        except Exception as e:
-            logger.error(
-                "批量硬删除失败（CASCADE）- 子域名数: %s, 错误: %s",
-                len(subdomain_ids),
-                str(e),
-                exc_info=True
-            )
-            raise
-
-
+        for row in qs.iterator(chunk_size=batch_size):
+            yield row
--- a/backend/apps/asset/repositories/asset/website_repository.py
+++ b/backend/apps/asset/repositories/asset/website_repository.py
@@ -3,110 +3,87 @@ Django ORM 实现的 WebSite Repository
 """

 import logging
-from typing import List, Generator, Tuple, Dict, Optional
-from django.db import transaction, IntegrityError, OperationalError, DatabaseError
-from django.utils import timezone
+from typing import List, Generator, Optional, Iterator
+from django.db import transaction

 from apps.asset.models.asset_models import WebSite
 from apps.asset.dtos import WebSiteDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk

 logger = logging.getLogger(__name__)


-
@auto_ensure_db_connection
 class DjangoWebSiteRepository:
    """Django ORM 实现的 WebSite Repository"""

-    def bulk_create_ignore_conflicts(self, items: List[WebSiteDTO]) -> None:
+    def bulk_upsert(self, items: List[WebSiteDTO]) -> int:
        """
-        批量创建 WebSite，忽略冲突
+        批量创建或更新 WebSite（upsert）
+        
+        存在则更新所有字段，不存在则创建。
+        使用 Django 原生 update_conflicts。
+        
+        注意：自动按模型唯一约束去重，保留最后一条记录。
        
        Args:
            items: WebSite DTO 列表
            
-        Raises:
-            IntegrityError: 数据完整性错误
-            OperationalError: 数据库操作错误
-            DatabaseError: 数据库错误
+        Returns:
+            int: 处理的记录数
        """
        if not items:
-            return
-
+            return 0
+        
        try:
-            # 转换为 Django 模型对象
-            website_objects = [
+            # 自动按模型唯一约束去重
+            unique_items = deduplicate_for_bulk(items, WebSite)
+            
+            # 直接从 DTO 字段构建 Model
+            websites = [
                WebSite(
                    target_id=item.target_id,
                    url=item.url,
-                    host=item.host,
-                    location=item.location,
-                    title=item.title,
-                    webserver=item.webserver,
-                    body_preview=item.body_preview,
-                    content_type=item.content_type,
-                    tech=item.tech,
+                    host=item.host or '',
+                    location=item.location or '',
+                    title=item.title or '',
+                    webserver=item.webserver or '',
+                    body_preview=item.body_preview or '',
+                    content_type=item.content_type or '',
+                    tech=item.tech if item.tech else [],
                    status_code=item.status_code,
                    content_length=item.content_length,
                    vhost=item.vhost
                )
-                for item in items
+                for item in unique_items
            ]
-
+            
            with transaction.atomic():
-                # 批量插入或更新
-                # 如果URL和目标已存在，忽略冲突
                WebSite.objects.bulk_create(
-                    website_objects,
-                    ignore_conflicts=True
+                    websites,
+                    update_conflicts=True,
+                    unique_fields=['url', 'target'],
+                    update_fields=[
+                        'host', 'location', 'title', 'webserver',
+                        'body_preview', 'content_type', 'tech',
+                        'status_code', 'content_length', 'vhost'
+                    ],
+                    batch_size=1000
                )
-
-            logger.debug(f"成功处理 {len(items)} 条 WebSite 记录")
-
-        except IntegrityError as e:
-            logger.error(
-                f"批量插入 WebSite 失败 - 数据完整性错误: {e}, "
-                f"记录数: {len(items)}"
-            )
-            raise
-
-        except OperationalError as e:
-            logger.error(
-                f"批量插入 WebSite 失败 - 数据库操作错误: {e}, "
-                f"记录数: {len(items)}"
-            )
-            raise
-
-        except DatabaseError as e:
-            logger.error(
-                f"批量插入 WebSite 失败 - 数据库错误: {e}, "
-                f"记录数: {len(items)}"
-            )
-            raise
-
+            
+            logger.debug(f"批量 upsert WebSite 成功: {len(unique_items)} 条")
+            return len(unique_items)
+                
        except Exception as e:
-            logger.error(
-                f"批量插入 WebSite 失败 - 未知错误: {e}, "
-                f"记录数: {len(items)}, "
-                f"错误类型: {type(e).__name__}",
-                exc_info=True
-            )
+            logger.error(f"批量 upsert WebSite 失败: {e}")
            raise

    def get_urls_for_export(self, target_id: int, batch_size: int = 1000) -> Generator[str, None, None]:
        """
        流式导出目标下的所有站点 URL
-        
-        Args:
-            target_id: 目标 ID  
-            batch_size: 批次大小
-            
-        Yields:
-            str: 站点 URL
        """
        try:
-            # 查询目标下的站点，只选择 URL 字段，避免不必要的数据传输
            queryset = WebSite.objects.filter(
                target_id=target_id
            ).values_list('url', flat=True).iterator(chunk_size=batch_size)
@@ -117,144 +94,93 @@ class DjangoWebSiteRepository:
            logger.error(f"流式导出站点 URL 失败 - Target ID: {target_id}, 错误: {e}")
            raise

+    def get_all(self):
+        """获取所有网站"""
+        return WebSite.objects.all().order_by('-created_at')
+
    def get_by_target(self, target_id: int):
-        return WebSite.objects.filter(target_id=target_id).order_by('-discovered_at')
+        """获取目标下的所有网站"""
+        return WebSite.objects.filter(target_id=target_id).order_by('-created_at')

    def count_by_target(self, target_id: int) -> int:
-        """
-        统计目标下的站点总数
-        
-        Args:
-            target_id: 目标 ID
-            
-        Returns:
-            int: 站点总数
-        """
-        try:
-            count = WebSite.objects.filter(target_id=target_id).count()
-            logger.debug(f"Target {target_id} 的站点总数: {count}")
-            return count
-            
-        except Exception as e:
-            logger.error(f"统计站点数量失败 - Target ID: {target_id}, 错误: {e}")
-            raise
-
-    def count_by_scan(self, scan_id: int) -> int:
-        """
-        统计扫描下的站点总数
-        """
-        try:
-            count = WebSite.objects.filter(scan_id=scan_id).count()
-            logger.debug(f"Scan {scan_id} 的站点总数: {count}")
-            return count
-        except Exception as e:
-            logger.error(f"统计站点数量失败 - Scan ID: {scan_id}, 错误: {e}")
-            raise
+        """统计目标下的站点总数"""
+        return WebSite.objects.filter(target_id=target_id).count()

    def get_by_url(self, url: str, target_id: int) -> Optional[int]:
+        """根据 URL 和 target_id 查找站点 ID"""
+        website = WebSite.objects.filter(url=url, target_id=target_id).first()
+        return website.id if website else None
+
+    def bulk_create_ignore_conflicts(self, items: List[WebSiteDTO]) -> int:
        """
-        根据 URL 和 target_id 查找站点 ID
+        批量创建 WebSite（存在即跳过）
+        
+        注意：自动按模型唯一约束去重，保留最后一条记录。
+        """
+        if not items:
+            return 0
+        
+        try:
+            # 自动按模型唯一约束去重
+            unique_items = deduplicate_for_bulk(items, WebSite)
+            
+            websites = [
+                WebSite(
+                    target_id=item.target_id,
+                    url=item.url,
+                    host=item.host or '',
+                    location=item.location or '',
+                    title=item.title or '',
+                    webserver=item.webserver or '',
+                    body_preview=item.body_preview or '',
+                    content_type=item.content_type or '',
+                    tech=item.tech if item.tech else [],
+                    status_code=item.status_code,
+                    content_length=item.content_length,
+                    vhost=item.vhost
+                )
+                for item in unique_items
+            ]
+            
+            with transaction.atomic():
+                WebSite.objects.bulk_create(
+                    websites,
+                    ignore_conflicts=True,
+                    batch_size=1000
+                )
+            
+            logger.debug(f"批量创建 WebSite 成功（ignore_conflicts）: {len(unique_items)} 条")
+            return len(unique_items)
+                
+        except Exception as e:
+            logger.error(f"批量创建 WebSite 失败: {e}")
+            raise
+
+    def iter_raw_data_for_export(
+        self, 
+        target_id: int,
+        batch_size: int = 1000
+    ) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
        
        Args:
-            url: 站点 URL
            target_id: 目标 ID
-            
-        Returns:
-            Optional[int]: 站点 ID，如果不存在返回 None
-            
-        Raises:
-            ValueError: 发现多个站点时
-        """
-        try:
-            website = WebSite.objects.filter(url=url, target_id=target_id).first()
-            if website:
-                return website.id
-            return None
-            
-        except Exception as e:
-            logger.error(f"查询站点失败 - URL: {url}, Target ID: {target_id}, 错误: {e}")
-            raise
-    
-    def get_all(self):
-        """
-        获取所有网站
+            batch_size: 每批数据量
        
-        Returns:
-            QuerySet: 网站查询集
+        Yields:
+            包含所有网站字段的字典
        """
-        return WebSite.objects.all()
-    
-    def soft_delete_by_ids(self, website_ids: List[int]) -> int:
-        """
-        根据 ID 列表批量软删除WebSite
-        
-        Args:
-            website_ids: WebSite ID 列表
-        
-        Returns:
-            软删除的记录数
-        """
-        try:
-            updated_count = (
-                WebSite.objects
-                .filter(id__in=website_ids)
-                .update(deleted_at=timezone.now())
+        qs = (
+            WebSite.objects
+            .filter(target_id=target_id)
+            .values(
+                'url', 'host', 'location', 'title', 'status_code',
+                'content_length', 'content_type', 'webserver', 'tech',
+                'body_preview', 'vhost', 'created_at'
            )
-            logger.debug(
-                "批量软删除WebSite成功 - Count: %s, 更新记录: %s",
-                len(website_ids),
-                updated_count
-            )
-            return updated_count
-        except Exception as e:
-            logger.error(
-                "批量软删除WebSite失败 - IDs: %s, 错误: %s",
-                website_ids,
-                e
-            )
-            raise
-    
-    def hard_delete_by_ids(self, website_ids: List[int]) -> Tuple[int, Dict[str, int]]:
-        """
-        根据 ID 列表硬删除WebSite（使用数据库级 CASCADE）
+            .order_by('url')
+        )
        
-        Args:
-            website_ids: WebSite ID 列表
-        
-        Returns:
-            (删除的记录数, 删除详情字典)
-        """
-        try:
-            batch_size = 1000
-            total_deleted = 0
-            
-            logger.debug(f"开始批量删除 {len(website_ids)} 个WebSite（数据库 CASCADE）...")
-            
-            for i in range(0, len(website_ids), batch_size):
-                batch_ids = website_ids[i:i + batch_size]
-                count, _ = WebSite.all_objects.filter(id__in=batch_ids).delete()
-                total_deleted += count
-                logger.debug(f"批次删除完成: {len(batch_ids)} 个WebSite，删除 {count} 条记录")
-            
-            deleted_details = {
-                'websites': len(website_ids),
-                'total': total_deleted,
-                'note': 'Database CASCADE - detailed stats unavailable'
-            }
-            
-            logger.debug(
-                "批量硬删除成功（CASCADE）- WebSite数: %s, 总删除记录: %s",
-                len(website_ids),
-                total_deleted
-            )
-            
-            return total_deleted, deleted_details
-        
-        except Exception as e:
-            logger.error(
-                "批量硬删除失败（CASCADE）- WebSite数: %s, 错误: %s",
-                len(website_ids),
-                str(e),
-                exc_info=True
-            )
-            raise
+        for row in qs.iterator(chunk_size=batch_size):
+            yield row
--- a/backend/apps/asset/repositories/snapshot/directory_snapshot_repository.py
+++ b/backend/apps/asset/repositories/snapshot/directory_snapshot_repository.py
@@ -1,12 +1,13 @@
 """Directory Snapshot Repository - 目录快照数据访问层"""

 import logging
-from typing import List
+from typing import List, Iterator
 from django.db import transaction

 from apps.asset.models import DirectorySnapshot
 from apps.asset.dtos.snapshot import DirectorySnapshotDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk

 logger = logging.getLogger(__name__)

@@ -25,6 +26,8 @@ class DjangoDirectorySnapshotRepository:
        
        使用 ignore_conflicts 策略，如果快照已存在（相同 scan + url）则跳过
        
+        注意：会自动按 (scan_id, url) 去重，保留最后一条记录。
+        
        Args:
            items: 目录快照 DTO 列表
        
@@ -37,6 +40,9 @@ class DjangoDirectorySnapshotRepository:
            return
        
        try:
+            # 根据模型唯一约束自动去重
+            unique_items = deduplicate_for_bulk(items, DirectorySnapshot)
+            
            # 转换为 Django 模型对象
            snapshot_objects = [
                DirectorySnapshot(
@@ -49,7 +55,7 @@ class DjangoDirectorySnapshotRepository:
                    content_type=item.content_type,
                    duration=item.duration
                )
-                for item in items
+                for item in unique_items
            ]
            
            with transaction.atomic():
@@ -60,7 +66,7 @@ class DjangoDirectorySnapshotRepository:
                    ignore_conflicts=True
                )
            
-            logger.debug("成功保存 %d 条目录快照记录", len(items))
+            logger.debug("成功保存 %d 条目录快照记录", len(unique_items))
            
        except Exception as e:
            logger.error(
@@ -72,7 +78,35 @@ class DjangoDirectorySnapshotRepository:
            raise
    
    def get_by_scan(self, scan_id: int):
-        return DirectorySnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
+        return DirectorySnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')

    def get_all(self):
-        return DirectorySnapshot.objects.all().order_by('-discovered_at')
+        return DirectorySnapshot.objects.all().order_by('-created_at')
+
+    def iter_raw_data_for_export(
+        self, 
+        scan_id: int,
+        batch_size: int = 1000
+    ) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            scan_id: 扫描 ID
+            batch_size: 每批数据量
+        
+        Yields:
+            包含所有目录字段的字典
+        """
+        qs = (
+            DirectorySnapshot.objects
+            .filter(scan_id=scan_id)
+            .values(
+                'url', 'status', 'content_length', 'words',
+                'lines', 'content_type', 'duration', 'created_at'
+            )
+            .order_by('url')
+        )
+        
+        for row in qs.iterator(chunk_size=batch_size):
+            yield row
--- a/backend/apps/asset/repositories/snapshot/endpoint_snapshot_repository.py
+++ b/backend/apps/asset/repositories/snapshot/endpoint_snapshot_repository.py
@@ -1,11 +1,12 @@
 """EndpointSnapshot Repository - Django ORM 实现"""

 import logging
-from typing import List
+from typing import List, Iterator

 from apps.asset.models.snapshot_models import EndpointSnapshot
 from apps.asset.dtos.snapshot import EndpointSnapshotDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk

 logger = logging.getLogger(__name__)

@@ -18,6 +19,8 @@ class DjangoEndpointSnapshotRepository:
        """
        保存端点快照
        
+        注意：会自动按 (scan_id, url) 去重，保留最后一条记录。
+        
        Args:
            items: 端点快照 DTO 列表
        
@@ -31,10 +34,13 @@ class DjangoEndpointSnapshotRepository:
            if not items:
                logger.debug("端点快照为空，跳过保存")
                return
+            
+            # 根据模型唯一约束自动去重
+            unique_items = deduplicate_for_bulk(items, EndpointSnapshot)
                
            # 构建快照对象
            snapshots = []
-            for item in items:
+            for item in unique_items:
                snapshots.append(EndpointSnapshot(
                    scan_id=item.scan_id,
                    url=item.url,
@@ -68,7 +74,36 @@ class DjangoEndpointSnapshotRepository:
            raise
    
    def get_by_scan(self, scan_id: int):
-        return EndpointSnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
+        return EndpointSnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')

    def get_all(self):
-        return EndpointSnapshot.objects.all().order_by('-discovered_at')
+        return EndpointSnapshot.objects.all().order_by('-created_at')
+
+    def iter_raw_data_for_export(
+        self, 
+        scan_id: int,
+        batch_size: int = 1000
+    ) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            scan_id: 扫描 ID
+            batch_size: 每批数据量
+        
+        Yields:
+            包含所有端点字段的字典
+        """
+        qs = (
+            EndpointSnapshot.objects
+            .filter(scan_id=scan_id)
+            .values(
+                'url', 'host', 'location', 'title', 'status_code',
+                'content_length', 'content_type', 'webserver', 'tech',
+                'body_preview', 'vhost', 'matched_gf_patterns', 'created_at'
+            )
+            .order_by('url')
+        )
+        
+        for row in qs.iterator(chunk_size=batch_size):
+            yield row
--- a/backend/apps/asset/repositories/snapshot/host_port_mapping_snapshot_repository.py
+++ b/backend/apps/asset/repositories/snapshot/host_port_mapping_snapshot_repository.py
@@ -6,6 +6,7 @@ from typing import List, Iterator
 from apps.asset.models.snapshot_models import HostPortMappingSnapshot
 from apps.asset.dtos.snapshot import HostPortMappingSnapshotDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk

 logger = logging.getLogger(__name__)

@@ -18,6 +19,8 @@ class DjangoHostPortMappingSnapshotRepository:
        """
        保存主机端口关联快照
        
+        注意：会自动按 (scan_id, host, ip, port) 去重，保留最后一条记录。
+        
        Args:
            items: 主机端口关联快照 DTO 列表
        
@@ -31,10 +34,13 @@ class DjangoHostPortMappingSnapshotRepository:
            if not items:
                logger.debug("主机端口关联快照为空，跳过保存")
                return
+            
+            # 根据模型唯一约束自动去重
+            unique_items = deduplicate_for_bulk(items, HostPortMappingSnapshot)
                
            # 构建快照对象
            snapshots = []
-            for item in items:
+            for item in unique_items:
                snapshots.append(HostPortMappingSnapshot(
                    scan_id=item.scan_id,
                    host=item.host,
@@ -59,20 +65,28 @@ class DjangoHostPortMappingSnapshotRepository:
            )
            raise
    
-    def get_ip_aggregation_by_scan(self, scan_id: int, search: str = None):
+    def get_ip_aggregation_by_scan(self, scan_id: int, filter_query: str = None):
        from django.db.models import Min
+        from apps.common.utils.filter_utils import apply_filters

        qs = HostPortMappingSnapshot.objects.filter(scan_id=scan_id)
-        if search:
-            qs = qs.filter(ip__icontains=search)
+        
+        # 应用智能过滤
+        if filter_query:
+            field_mapping = {
+                'ip': 'ip',
+                'port': 'port',
+                'host': 'host',
+            }
+            qs = apply_filters(qs, filter_query, field_mapping)

        ip_aggregated = (
            qs
            .values('ip')
            .annotate(
-                discovered_at=Min('discovered_at')
+                created_at=Min('created_at')
            )
-            .order_by('-discovered_at')
+            .order_by('-created_at')
        )

        results = []
@@ -92,24 +106,32 @@ class DjangoHostPortMappingSnapshotRepository:
                'ip': ip,
                'hosts': hosts,
                'ports': ports,
-                'discovered_at': item['discovered_at'],
+                'created_at': item['created_at'],
            })

        return results

-    def get_all_ip_aggregation(self, search: str = None):
+    def get_all_ip_aggregation(self, filter_query: str = None):
        """获取所有 IP 聚合数据"""
        from django.db.models import Min
+        from apps.common.utils.filter_utils import apply_filters

        qs = HostPortMappingSnapshot.objects.all()
-        if search:
-            qs = qs.filter(ip__icontains=search)
+        
+        # 应用智能过滤
+        if filter_query:
+            field_mapping = {
+                'ip': 'ip',
+                'port': 'port',
+                'host': 'host',
+            }
+            qs = apply_filters(qs, filter_query, field_mapping)

        ip_aggregated = (
            qs
            .values('ip')
-            .annotate(discovered_at=Min('discovered_at'))
-            .order_by('-discovered_at')
+            .annotate(created_at=Min('created_at'))
+            .order_by('-created_at')
        )

        results = []
@@ -127,7 +149,7 @@ class DjangoHostPortMappingSnapshotRepository:
                'ip': ip,
                'hosts': hosts,
                'ports': ports,
-                'discovered_at': item['discovered_at'],
+                'created_at': item['created_at'],
            })
        return results

@@ -143,3 +165,33 @@ class DjangoHostPortMappingSnapshotRepository:
        )
        for ip in queryset:
            yield ip
+
+    def iter_raw_data_for_export(
+        self, 
+        scan_id: int,
+        batch_size: int = 1000
+    ) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            scan_id: 扫描 ID
+            batch_size: 每批数据量
+        
+        Yields:
+            {
+                'ip': '192.168.1.1',
+                'host': 'example.com',
+                'port': 80,
+                'created_at': datetime
+            }
+        """
+        qs = (
+            HostPortMappingSnapshot.objects
+            .filter(scan_id=scan_id)
+            .values('ip', 'host', 'port', 'created_at')
+            .order_by('ip', 'host', 'port')
+        )
+        
+        for row in qs.iterator(chunk_size=batch_size):
+            yield row
--- a/backend/apps/asset/repositories/snapshot/subdomain_snapshot_repository.py
+++ b/backend/apps/asset/repositories/snapshot/subdomain_snapshot_repository.py
@@ -1,11 +1,12 @@
 """Django ORM 实现的 SubdomainSnapshot Repository"""

 import logging
-from typing import List
+from typing import List, Iterator

 from apps.asset.models.snapshot_models import SubdomainSnapshot
 from apps.asset.dtos import SubdomainSnapshotDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk

 logger = logging.getLogger(__name__)

@@ -18,6 +19,8 @@ class DjangoSubdomainSnapshotRepository:
        """
        保存子域名快照
        
+        注意：会自动按 (scan_id, name) 去重，保留最后一条记录。
+        
        Args:
            items: 子域名快照 DTO 列表
        
@@ -31,10 +34,13 @@ class DjangoSubdomainSnapshotRepository:
            if not items:
                logger.debug("子域名快照为空，跳过保存")
                return
+            
+            # 根据模型唯一约束自动去重
+            unique_items = deduplicate_for_bulk(items, SubdomainSnapshot)
                
            # 构建快照对象
            snapshots = []
-            for item in items:
+            for item in unique_items:
                snapshots.append(SubdomainSnapshot(
                    scan_id=item.scan_id,
                    name=item.name,
@@ -55,7 +61,32 @@ class DjangoSubdomainSnapshotRepository:
            raise
    
    def get_by_scan(self, scan_id: int):
-        return SubdomainSnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
+        return SubdomainSnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')

    def get_all(self):
-        return SubdomainSnapshot.objects.all().order_by('-discovered_at')
+        return SubdomainSnapshot.objects.all().order_by('-created_at')
+
+    def iter_raw_data_for_export(
+        self, 
+        scan_id: int,
+        batch_size: int = 1000
+    ) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            scan_id: 扫描 ID
+            batch_size: 每批数据量
+        
+        Yields:
+            {'name': 'sub.example.com', 'created_at': datetime}
+        """
+        qs = (
+            SubdomainSnapshot.objects
+            .filter(scan_id=scan_id)
+            .values('name', 'created_at')
+            .order_by('name')
+        )
+        
+        for row in qs.iterator(chunk_size=batch_size):
+            yield row
--- a/backend/apps/asset/repositories/snapshot/vulnerability_snapshot_repository.py
+++ b/backend/apps/asset/repositories/snapshot/vulnerability_snapshot_repository.py
@@ -8,6 +8,7 @@ from django.db import transaction
 from apps.asset.models import VulnerabilitySnapshot
 from apps.asset.dtos.snapshot import VulnerabilitySnapshotDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk

 logger = logging.getLogger(__name__)

@@ -21,12 +22,17 @@ class DjangoVulnerabilitySnapshotRepository:

        使用 ``ignore_conflicts`` 策略，如果快照已存在则跳过。
        具体唯一约束由数据库模型控制。
+        
+        注意：会自动按唯一约束字段去重，保留最后一条记录。
        """
        if not items:
            logger.warning("漏洞快照列表为空，跳过保存")
            return

        try:
+            # 根据模型唯一约束自动去重
+            unique_items = deduplicate_for_bulk(items, VulnerabilitySnapshot)
+            
            snapshot_objects = [
                VulnerabilitySnapshot(
                    scan_id=item.scan_id,
@@ -38,7 +44,7 @@ class DjangoVulnerabilitySnapshotRepository:
                    description=item.description,
                    raw_output=item.raw_output,
                )
-                for item in items
+                for item in unique_items
            ]

            with transaction.atomic():
@@ -47,7 +53,7 @@ class DjangoVulnerabilitySnapshotRepository:
                    ignore_conflicts=True,
                )

-            logger.debug("成功保存 %d 条漏洞快照记录", len(items))
+            logger.debug("成功保存 %d 条漏洞快照记录", len(unique_items))

        except Exception as e:
            logger.error(
@@ -60,7 +66,7 @@ class DjangoVulnerabilitySnapshotRepository:

    def get_by_scan(self, scan_id: int):
        """按扫描任务获取漏洞快照 QuerySet。"""
-        return VulnerabilitySnapshot.objects.filter(scan_id=scan_id).order_by("-discovered_at")
+        return VulnerabilitySnapshot.objects.filter(scan_id=scan_id).order_by("-created_at")

    def get_all(self):
-        return VulnerabilitySnapshot.objects.all().order_by('-discovered_at')
+        return VulnerabilitySnapshot.objects.all().order_by('-created_at')
--- a/backend/apps/asset/repositories/snapshot/website_snapshot_repository.py
+++ b/backend/apps/asset/repositories/snapshot/website_snapshot_repository.py
@@ -1,11 +1,12 @@
 """WebsiteSnapshot Repository - Django ORM 实现"""

 import logging
-from typing import List
+from typing import List, Iterator

 from apps.asset.models.snapshot_models import WebsiteSnapshot
 from apps.asset.dtos.snapshot import WebsiteSnapshotDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk

 logger = logging.getLogger(__name__)

@@ -18,6 +19,8 @@ class DjangoWebsiteSnapshotRepository:
        """
        保存网站快照
        
+        注意：会自动按 (scan_id, url) 去重，保留最后一条记录。
+        
        Args:
            items: 网站快照 DTO 列表
        
@@ -31,10 +34,13 @@ class DjangoWebsiteSnapshotRepository:
            if not items:
                logger.debug("网站快照为空，跳过保存")
                return
+            
+            # 根据模型唯一约束自动去重
+            unique_items = deduplicate_for_bulk(items, WebsiteSnapshot)
                
            # 构建快照对象
            snapshots = []
-            for item in items:
+            for item in unique_items:
                snapshots.append(WebsiteSnapshot(
                    scan_id=item.scan_id,
                    url=item.url,
@@ -68,7 +74,50 @@ class DjangoWebsiteSnapshotRepository:
            raise
    
    def get_by_scan(self, scan_id: int):
-        return WebsiteSnapshot.objects.filter(scan_id=scan_id).order_by('-discovered_at')
+        return WebsiteSnapshot.objects.filter(scan_id=scan_id).order_by('-created_at')

    def get_all(self):
-        return WebsiteSnapshot.objects.all().order_by('-discovered_at')
+        return WebsiteSnapshot.objects.all().order_by('-created_at')
+
+    def iter_raw_data_for_export(
+        self, 
+        scan_id: int,
+        batch_size: int = 1000
+    ) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            scan_id: 扫描 ID
+            batch_size: 每批数据量
+        
+        Yields:
+            包含所有网站字段的字典
+        """
+        qs = (
+            WebsiteSnapshot.objects
+            .filter(scan_id=scan_id)
+            .values(
+                'url', 'host', 'location', 'title', 'status',
+                'content_length', 'content_type', 'web_server', 'tech',
+                'body_preview', 'vhost', 'created_at'
+            )
+            .order_by('url')
+        )
+        
+        for row in qs.iterator(chunk_size=batch_size):
+            # 重命名字段以匹配 CSV 表头
+            yield {
+                'url': row['url'],
+                'host': row['host'],
+                'location': row['location'],
+                'title': row['title'],
+                'status_code': row['status'],
+                'content_length': row['content_length'],
+                'content_type': row['content_type'],
+                'webserver': row['web_server'],
+                'tech': row['tech'],
+                'body_preview': row['body_preview'],
+                'vhost': row['vhost'],
+                'created_at': row['created_at'],
+            }
--- a/backend/apps/asset/serializers.py
+++ b/backend/apps/asset/serializers.py
@@ -26,9 +26,9 @@ class SubdomainSerializer(serializers.ModelSerializer):
    class Meta:
        model = Subdomain
        fields = [
-            'id', 'name', 'discovered_at', 'target'
+            'id', 'name', 'created_at', 'target'
        ]
-        read_only_fields = ['id', 'discovered_at']
+        read_only_fields = ['id', 'created_at']


 class SubdomainListSerializer(serializers.ModelSerializer):
@@ -41,9 +41,9 @@ class SubdomainListSerializer(serializers.ModelSerializer):
    class Meta:
        model = Subdomain
        fields = [
-            'id', 'name', 'discovered_at'
+            'id', 'name', 'created_at'
        ]
-        read_only_fields = ['id', 'discovered_at']
+        read_only_fields = ['id', 'created_at']


 # class IPAddressListSerializer(serializers.ModelSerializer):
@@ -87,7 +87,7 @@ class WebSiteSerializer(serializers.ModelSerializer):
            'tech',
            'vhost',
            'subdomain',
-            'discovered_at',
+            'created_at',
        ]
        read_only_fields = fields

@@ -107,7 +107,7 @@ class VulnerabilitySerializer(serializers.ModelSerializer):
            'cvss_score',
            'description',
            'raw_output',
-            'discovered_at',
+            'created_at',
        ]
        read_only_fields = fields

@@ -126,7 +126,7 @@ class VulnerabilitySnapshotSerializer(serializers.ModelSerializer):
            'cvss_score',
            'description',
            'raw_output',
-            'discovered_at',
+            'created_at',
        ]
        read_only_fields = fields

@@ -134,8 +134,8 @@ class VulnerabilitySnapshotSerializer(serializers.ModelSerializer):
 class EndpointListSerializer(serializers.ModelSerializer):
    """端点列表序列化器（用于目标端点列表页）"""

-    # 将 GF 匹配模式映射为前端使用的 tags 字段
-    tags = serializers.ListField(
+    # GF 匹配模式（gf-patterns 工具匹配的敏感 URL 模式）
+    gfPatterns = serializers.ListField(
        child=serializers.CharField(),
        source='matched_gf_patterns',
        read_only=True,
@@ -155,8 +155,8 @@ class EndpointListSerializer(serializers.ModelSerializer):
            'body_preview',
            'tech',
            'vhost',
-            'tags',
-            'discovered_at',
+            'gfPatterns',
+            'created_at',
        ]
        read_only_fields = fields

@@ -164,8 +164,7 @@ class EndpointListSerializer(serializers.ModelSerializer):
 class DirectorySerializer(serializers.ModelSerializer):
    """目录序列化器"""
    
-    website_url = serializers.CharField(source='website.url', read_only=True)
-    discovered_at = serializers.DateTimeField(read_only=True)
+    created_at = serializers.DateTimeField(read_only=True)
    
    class Meta:
        model = Directory
@@ -178,8 +177,7 @@ class DirectorySerializer(serializers.ModelSerializer):
            'lines',
            'content_type',
            'duration',
-            'website_url',
-            'discovered_at',
+            'created_at',
        ]
        read_only_fields = fields

@@ -192,12 +190,12 @@ class IPAddressAggregatedSerializer(serializers.Serializer):
    - ip: IP 地址
    - hosts: 该 IP 关联的所有主机名列表
    - ports: 该 IP 关联的所有端口列表
-    - discovered_at: 首次发现时间
+    - created_at: 创建时间
    """
    ip = serializers.IPAddressField(read_only=True)
    hosts = serializers.ListField(child=serializers.CharField(), read_only=True)
    ports = serializers.ListField(child=serializers.IntegerField(), read_only=True)
-    discovered_at = serializers.DateTimeField(read_only=True)
+    created_at = serializers.DateTimeField(read_only=True)


 # ==================== 快照序列化器 ====================
@@ -207,7 +205,7 @@ class SubdomainSnapshotSerializer(serializers.ModelSerializer):
    
    class Meta:
        model = SubdomainSnapshot
-        fields = ['id', 'name', 'discovered_at']
+        fields = ['id', 'name', 'created_at']
        read_only_fields = fields


@@ -233,7 +231,7 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
            'tech',
            'vhost',
            'subdomain_name',
-            'discovered_at',
+            'created_at',
        ]
        read_only_fields = fields

@@ -241,9 +239,6 @@ class WebsiteSnapshotSerializer(serializers.ModelSerializer):
 class DirectorySnapshotSerializer(serializers.ModelSerializer):
    """目录快照序列化器（用于扫描历史）"""
    
-    # DirectorySnapshot 当前不再关联 Website，这里暂时将 website_url 映射为自身的 url，保证字段兼容
-    website_url = serializers.CharField(source='url', read_only=True)
-    
    class Meta:
        model = DirectorySnapshot
        fields = [
@@ -255,8 +250,7 @@ class DirectorySnapshotSerializer(serializers.ModelSerializer):
            'lines',
            'content_type',
            'duration',
-            'website_url',
-            'discovered_at',
+            'created_at',
        ]
        read_only_fields = fields

@@ -264,8 +258,8 @@ class DirectorySnapshotSerializer(serializers.ModelSerializer):
 class EndpointSnapshotSerializer(serializers.ModelSerializer):
    """端点快照序列化器（用于扫描历史）"""

-    # 将 GF 匹配模式映射为前端使用的 tags 字段
-    tags = serializers.ListField(
+    # GF 匹配模式（gf-patterns 工具匹配的敏感 URL 模式）
+    gfPatterns = serializers.ListField(
        child=serializers.CharField(),
        source='matched_gf_patterns',
        read_only=True,
@@ -286,7 +280,7 @@ class EndpointSnapshotSerializer(serializers.ModelSerializer):
            'body_preview',
            'tech',
            'vhost',
-            'tags',
-            'discovered_at',
+            'gfPatterns',
+            'created_at',
        ]
        read_only_fields = fields
--- a/backend/apps/asset/services/asset/directory_service.py
+++ b/backend/apps/asset/services/asset/directory_service.py
@@ -1,8 +1,12 @@
-import logging
-from typing import Tuple, Iterator
+"""Directory Service - 目录业务逻辑层"""
+
+import logging
+from typing import List, Iterator, Optional

-from apps.asset.models.asset_models import Directory
 from apps.asset.repositories import DjangoDirectoryRepository
+from apps.asset.dtos import DirectoryDTO
+from apps.common.validators import is_valid_url, is_url_match_target
+from apps.common.utils.filter_utils import apply_filters

 logger = logging.getLogger(__name__)

@@ -10,46 +14,122 @@ logger = logging.getLogger(__name__)
 class DirectoryService:
    """目录业务逻辑层"""
    
+    # 智能过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'url': 'url',
+        'status': 'status',
+    }
+    
    def __init__(self, repository=None):
-        """
-        初始化目录服务
-        
-        Args:
-            repository: 目录仓储实例（用于依赖注入）
-        """
+        """初始化目录服务"""
        self.repo = repository or DjangoDirectoryRepository()
    
-    # ==================== 创建操作 ====================
-    
-    def bulk_create_ignore_conflicts(self, directory_dtos: list) -> None:
+    def bulk_upsert(self, directory_dtos: List[DirectoryDTO]) -> int:
        """
-        批量创建目录记录，忽略冲突（用于扫描任务）
+        批量创建或更新目录（upsert）
+        
+        存在则更新所有字段，不存在则创建。
        
        Args:
            directory_dtos: DirectoryDTO 列表
-        """
-        return self.repo.bulk_create_ignore_conflicts(directory_dtos)
-    
-    # ==================== 查询操作 ====================
-
-    def get_all(self):
-        """
-        获取所有目录
-        
+            
        Returns:
-            QuerySet: 目录查询集
+            int: 处理的记录数
        """
-        logger.debug("获取所有目录")
-        return self.repo.get_all()
+        if not directory_dtos:
+            return 0
+        
+        try:
+            return self.repo.bulk_upsert(directory_dtos)
+        except Exception as e:
+            logger.error(f"批量 upsert 目录失败: {e}")
+            raise
    
-    def get_directories_by_target(self, target_id: int):
-        logger.debug("获取目标下所有目录 - Target ID: %d", target_id)
-        return self.repo.get_by_target(target_id)
+    def bulk_create_urls(self, target_id: int, target_name: str, target_type: str, urls: List[str]) -> int:
+        """
+        批量创建目录（仅 URL，使用 ignore_conflicts）
+        
+        验证 URL 格式和匹配，过滤无效/不匹配 URL，去重后批量创建。
+        已存在的记录会被跳过。
+        
+        Args:
+            target_id: 目标 ID
+            target_name: 目标名称（用于匹配验证）
+            target_type: 目标类型 ('domain', 'ip', 'cidr')
+            urls: URL 列表
+            
+        Returns:
+            int: 实际创建的记录数
+        """
+        if not urls:
+            return 0
+        
+        # 过滤有效 URL 并去重
+        valid_urls = []
+        seen = set()
+        
+        for url in urls:
+            if not isinstance(url, str):
+                continue
+            url = url.strip()
+            if not url or url in seen:
+                continue
+            if not is_valid_url(url):
+                continue
+            
+            # 匹配验证（前端已阻止不匹配的提交，后端作为双重保障）
+            if not is_url_match_target(url, target_name, target_type):
+                continue
+            
+            seen.add(url)
+            valid_urls.append(url)
+        
+        if not valid_urls:
+            return 0
+        
+        # 获取创建前的数量
+        count_before = self.repo.count_by_target(target_id)
+        
+        # 创建 DTO 列表并批量创建
+        directory_dtos = [
+            DirectoryDTO(url=url, target_id=target_id)
+            for url in valid_urls
+        ]
+        self.repo.bulk_create_ignore_conflicts(directory_dtos)
+        
+        # 获取创建后的数量
+        count_after = self.repo.count_by_target(target_id)
+        return count_after - count_before
+    
+    def get_directories_by_target(self, target_id: int, filter_query: Optional[str] = None):
+        """获取目标下的所有目录"""
+        queryset = self.repo.get_by_target(target_id)
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset
+    
+    def get_all(self, filter_query: Optional[str] = None):
+        """获取所有目录"""
+        queryset = self.repo.get_all()
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset

    def iter_directory_urls_by_target(self, target_id: int, chunk_size: int = 1000) -> Iterator[str]:
-        """流式获取目标下的所有目录 URL，用于导出大批量数据。"""
-        logger.debug("流式导出目标下目录 URL - Target ID: %d", target_id)
+        """流式获取目标下的所有目录 URL"""
        return self.repo.get_urls_for_export(target_id=target_id, batch_size=chunk_size)

+    def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            target_id: 目标 ID
+        
+        Yields:
+            原始数据字典
+        """
+        return self.repo.iter_raw_data_for_export(target_id=target_id)
+

 __all__ = ['DirectoryService']
--- a/backend/apps/asset/services/asset/endpoint_service.py
+++ b/backend/apps/asset/services/asset/endpoint_service.py
@@ -5,10 +5,12 @@ Endpoint 服务层
 """

 import logging
-from typing import List, Optional, Dict, Any, Iterator
+from typing import List, Iterator, Optional

 from apps.asset.dtos.asset import EndpointDTO
 from apps.asset.repositories.asset import DjangoEndpointRepository
+from apps.common.validators import is_valid_url, is_url_match_target
+from apps.common.utils.filter_utils import apply_filters

 logger = logging.getLogger(__name__)

@@ -20,101 +22,101 @@ class EndpointService:
    提供 Endpoint（URL/端点）相关的业务逻辑
    """
    
+    # 智能过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'url': 'url',
+        'host': 'host',
+        'title': 'title',
+        'status': 'status_code',
+    }
+    
    def __init__(self):
        """初始化 Endpoint 服务"""
        self.repo = DjangoEndpointRepository()
    
-    def bulk_create_endpoints(
-        self,
-        endpoints: List[EndpointDTO],
-        ignore_conflicts: bool = True
-    ) -> int:
+    def bulk_upsert(self, endpoints: List[EndpointDTO]) -> int:
        """
-        批量创建端点记录
+        批量创建或更新端点（upsert）
+        
+        存在则更新所有字段，不存在则创建。
        
        Args:
            endpoints: 端点数据列表
-            ignore_conflicts: 是否忽略冲突（去重）
            
        Returns:
-            int: 创建的记录数
+            int: 处理的记录数
        """
        if not endpoints:
            return 0
        
        try:
-            if ignore_conflicts:
-                return self.repo.bulk_create_ignore_conflicts(endpoints)
-            else:
-                # 如果需要非忽略冲突的版本，可以在 repository 中添加
-                return self.repo.bulk_create_ignore_conflicts(endpoints)
+            return self.repo.bulk_upsert(endpoints)
        except Exception as e:
-            logger.error(f"批量创建端点失败: {e}")
+            logger.error(f"批量 upsert 端点失败: {e}")
            raise
    
-    def get_endpoints_by_website(
-        self,
-        website_id: int,
-        limit: Optional[int] = None
-    ) -> List[Dict[str, Any]]:
+    def bulk_create_urls(self, target_id: int, target_name: str, target_type: str, urls: List[str]) -> int:
        """
-        获取网站下的端点列表
+        批量创建端点（仅 URL，使用 ignore_conflicts）
        
-        Args:
-            website_id: 网站 ID
-            limit: 返回数量限制
-            
-        Returns:
-            List[Dict]: 端点列表
-        """
-        endpoints_dto = self.repo.get_by_website(website_id)
-        
-        if limit:
-            endpoints_dto = endpoints_dto[:limit]
-        
-        endpoints = []
-        for dto in endpoints_dto:
-            endpoints.append({
-                'url': dto.url,
-                'title': dto.title,
-                'status_code': dto.status_code,
-                'content_length': dto.content_length,
-                'webserver': dto.webserver
-            })
-        
-        return endpoints
-    
-    def get_endpoints_by_target(
-        self,
-        target_id: int,
-        limit: Optional[int] = None
-    ) -> List[Dict[str, Any]]:
-        """
-        获取目标下的端点列表
+        验证 URL 格式和匹配，过滤无效/不匹配 URL，去重后批量创建。
+        已存在的记录会被跳过。
        
        Args:
            target_id: 目标 ID
-            limit: 返回数量限制
+            target_name: 目标名称（用于匹配验证）
+            target_type: 目标类型 ('domain', 'ip', 'cidr')
+            urls: URL 列表
            
        Returns:
-            List[Dict]: 端点列表
+            int: 实际创建的记录数
        """
-        endpoints_dto = self.repo.get_by_target(target_id)
+        if not urls:
+            return 0
        
-        if limit:
-            endpoints_dto = endpoints_dto[:limit]
+        # 过滤有效 URL 并去重
+        valid_urls = []
+        seen = set()
        
-        endpoints = []
-        for dto in endpoints_dto:
-            endpoints.append({
-                'url': dto.url,
-                'title': dto.title,
-                'status_code': dto.status_code,
-                'content_length': dto.content_length,
-                'webserver': dto.webserver
-            })
+        for url in urls:
+            if not isinstance(url, str):
+                continue
+            url = url.strip()
+            if not url or url in seen:
+                continue
+            if not is_valid_url(url):
+                continue
+            
+            # 匹配验证（前端已阻止不匹配的提交，后端作为双重保障）
+            if not is_url_match_target(url, target_name, target_type):
+                continue
+            
+            seen.add(url)
+            valid_urls.append(url)
        
-        return endpoints
+        if not valid_urls:
+            return 0
+        
+        # 获取创建前的数量
+        count_before = self.repo.count_by_target(target_id)
+        
+        # 创建 DTO 列表并批量创建
+        endpoint_dtos = [
+            EndpointDTO(url=url, target_id=target_id)
+            for url in valid_urls
+        ]
+        self.repo.bulk_create_ignore_conflicts(endpoint_dtos)
+        
+        # 获取创建后的数量
+        count_after = self.repo.count_by_target(target_id)
+        return count_after - count_before
+    
+    def get_endpoints_by_target(self, target_id: int, filter_query: Optional[str] = None):
+        """获取目标下的所有端点"""
+        queryset = self.repo.get_by_target(target_id)
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset
    
    def count_endpoints_by_target(self, target_id: int) -> int:
        """
@@ -127,52 +129,28 @@ class EndpointService:
            int: 端点数量
        """
        return self.repo.count_by_target(target_id)
-    
-    def get_queryset_by_target(self, target_id: int):
-        return self.repo.get_queryset_by_target(target_id)

-    def get_all(self):
+    def get_all(self, filter_query: Optional[str] = None):
        """获取所有端点（全局查询）"""
-        return self.repo.get_all()
+        queryset = self.repo.get_all()
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset
    
    def iter_endpoint_urls_by_target(self, target_id: int, chunk_size: int = 1000) -> Iterator[str]:
        """流式获取目标下的所有端点 URL，用于导出。"""
-        queryset = self.repo.get_queryset_by_target(target_id)
+        queryset = self.repo.get_by_target(target_id)
        for url in queryset.values_list('url', flat=True).iterator(chunk_size=chunk_size):
            yield url
-    
-    def count_endpoints_by_website(self, website_id: int) -> int:
+
+    def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
        """
-        统计网站下的端点数量
+        流式获取原始数据用于 CSV 导出
        
        Args:
-            website_id: 网站 ID
-            
-        Returns:
-            int: 端点数量
-        """
-        return self.repo.count_by_website(website_id)
-    
-    def soft_delete_endpoints(self, endpoint_ids: List[int]) -> int:
-        """
-        软删除端点
+            target_id: 目标 ID
        
-        Args:
-            endpoint_ids: 端点 ID 列表
-            
-        Returns:
-            int: 更新的数量
+        Yields:
+            原始数据字典
        """
-        return self.repo.soft_delete_by_ids(endpoint_ids)
-    
-    def hard_delete_endpoints(self, endpoint_ids: List[int]) -> tuple:
-        """
-        硬删除端点
-        
-        Args:
-            endpoint_ids: 端点 ID 列表
-            
-        Returns:
-            tuple: (删除总数, 详细信息)
-        """
-        return self.repo.hard_delete_by_ids(endpoint_ids)
+        return self.repo.iter_raw_data_for_export(target_id=target_id)
--- a/backend/apps/asset/services/asset/host_port_mapping_service.py
+++ b/backend/apps/asset/services/asset/host_port_mapping_service.py
@@ -1,16 +1,31 @@
 """HostPortMapping Service - 业务逻辑层"""

 import logging
-from typing import List, Iterator
+from typing import List, Iterator, Optional, Dict
+
+from django.db.models import Min

 from apps.asset.repositories.asset import DjangoHostPortMappingRepository
 from apps.asset.dtos.asset import HostPortMappingDTO
+from apps.common.utils.filter_utils import apply_filters

 logger = logging.getLogger(__name__)


 class HostPortMappingService:
-    """主机端口映射服务 - 负责主机端口映射数据的业务逻辑"""
+    """主机端口映射服务 - 负责主机端口映射数据的业务逻辑
+    
+    职责：
+    - 业务逻辑处理（过滤、聚合）
+    - 调用 Repository 进行数据访问
+    """
+    
+    # 智能过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'ip': 'ip',
+        'port': 'port',
+        'host': 'host',
+    }
    
    def __init__(self):
        self.repo = DjangoHostPortMappingRepository()
@@ -49,13 +64,106 @@ class HostPortMappingService:
    def iter_host_port_by_target(self, target_id: int, batch_size: int = 1000):
        return self.repo.get_for_export(target_id=target_id, batch_size=batch_size)

-    def get_ip_aggregation_by_target(self, target_id: int, search: str = None):
-        return self.repo.get_ip_aggregation_by_target(target_id, search=search)
+    def get_ip_aggregation_by_target(
+        self, 
+        target_id: int, 
+        filter_query: Optional[str] = None
+    ) -> List[Dict]:
+        """获取目标下的 IP 聚合数据
+        
+        Args:
+            target_id: 目标 ID
+            filter_query: 智能过滤语法字符串
+        
+        Returns:
+            聚合后的 IP 数据列表
+        """
+        # 从 Repository 获取基础 QuerySet
+        qs = self.repo.get_queryset_by_target(target_id)
+        
+        # Service 层应用过滤逻辑
+        if filter_query:
+            qs = apply_filters(qs, filter_query, self.FILTER_FIELD_MAPPING)
+        
+        # Service 层处理聚合逻辑
+        return self._aggregate_by_ip(qs, filter_query, target_id=target_id)

-    def get_all_ip_aggregation(self, search: str = None):
-        """获取所有 IP 聚合数据（全局查询）"""
-        return self.repo.get_all_ip_aggregation(search=search)
+    def get_all_ip_aggregation(self, filter_query: Optional[str] = None) -> List[Dict]:
+        """获取所有 IP 聚合数据（全局查询）
+        
+        Args:
+            filter_query: 智能过滤语法字符串
+        
+        Returns:
+            聚合后的 IP 数据列表
+        """
+        # 从 Repository 获取基础 QuerySet
+        qs = self.repo.get_all_queryset()
+        
+        # Service 层应用过滤逻辑
+        if filter_query:
+            qs = apply_filters(qs, filter_query, self.FILTER_FIELD_MAPPING)
+        
+        # Service 层处理聚合逻辑
+        return self._aggregate_by_ip(qs, filter_query)
+
+    def _aggregate_by_ip(
+        self, 
+        qs, 
+        filter_query: Optional[str] = None,
+        target_id: Optional[int] = None
+    ) -> List[Dict]:
+        """按 IP 聚合数据
+        
+        Args:
+            qs: 已过滤的 QuerySet
+            filter_query: 过滤条件（用于子查询）
+            target_id: 目标 ID（用于子查询限定范围）
+        
+        Returns:
+            聚合后的数据列表
+        """
+        ip_aggregated = (
+            qs
+            .values('ip')
+            .annotate(created_at=Min('created_at'))
+            .order_by('-created_at')
+        )
+
+        results = []
+        for item in ip_aggregated:
+            ip = item['ip']
+            
+            # 获取该 IP 的所有 host 和 port（也需要应用过滤条件）
+            mappings_qs = self.repo.get_queryset_by_ip(ip, target_id=target_id)
+            if filter_query:
+                mappings_qs = apply_filters(mappings_qs, filter_query, self.FILTER_FIELD_MAPPING)
+            
+            mappings = mappings_qs.values('host', 'port').distinct()
+            hosts = sorted({m['host'] for m in mappings})
+            ports = sorted({m['port'] for m in mappings})
+            
+            results.append({
+                'ip': ip,
+                'hosts': hosts,
+                'ports': ports,
+                'created_at': item['created_at'],
+            })
+        
+        return results

    def iter_ips_by_target(self, target_id: int, batch_size: int = 1000) -> Iterator[str]:
        """流式获取目标下的所有唯一 IP 地址。"""
        return self.repo.get_ips_for_export(target_id=target_id, batch_size=batch_size)
+
+    def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            target_id: 目标 ID
+        
+        Yields:
+            原始数据字典 {ip, host, port, created_at}
+        """
+        return self.repo.iter_raw_data_for_export(target_id=target_id)
--- a/backend/apps/asset/services/asset/subdomain_service.py
+++ b/backend/apps/asset/services/asset/subdomain_service.py
@@ -1,15 +1,33 @@
 import logging
-from typing import Tuple, List, Dict
+from typing import List, Dict, Optional
+from dataclasses import dataclass

 from apps.asset.repositories import DjangoSubdomainRepository
 from apps.asset.dtos import SubdomainDTO
+from apps.common.validators import is_valid_domain
+from apps.common.utils.filter_utils import apply_filters

 logger = logging.getLogger(__name__)


+@dataclass
+class BulkCreateResult:
+    """批量创建结果"""
+    created_count: int
+    skipped_count: int
+    invalid_count: int
+    mismatched_count: int
+    total_received: int
+
+
 class SubdomainService:
    """子域名业务逻辑层"""
    
+    # 智能过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'name': 'name',
+    }
+    
    def __init__(self, repository=None):
        """
        初始化子域名服务
@@ -21,44 +39,50 @@ class SubdomainService:
    
    # ==================== 查询操作 ====================
    
-    def get_all(self):
+    def get_all(self, filter_query: Optional[str] = None):
        """
        获取所有子域名
        
+        Args:
+            filter_query: 智能过滤语法字符串
+        
        Returns:
            QuerySet: 子域名查询集
        """
        logger.debug("获取所有子域名")
-        return self.repo.get_all()
+        queryset = self.repo.get_all()
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset
    
-    # ==================== 创建操作 ====================
-    
-    def get_or_create(self, name: str, target_id: int) -> Tuple[any, bool]:
+    def get_subdomains_by_target(self, target_id: int, filter_query: Optional[str] = None):
        """
-        获取或创建子域名
+        获取目标下的子域名
+        
+        Args:
+            target_id: 目标 ID
+            filter_query: 智能过滤语法字符串
+        
+        Returns:
+            QuerySet: 子域名查询集
+        """
+        queryset = self.repo.get_by_target(target_id)
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset
+
+    def count_subdomains_by_target(self, target_id: int) -> int:
+        """
+        统计目标下的子域名数量
        
        Args:
-            name: 子域名名称
            target_id: 目标 ID
        
        Returns:
-            (Subdomain对象, 是否新创建)
+            int: 子域名数量
        """
-        logger.debug("获取或创建子域名 - Name: %s, Target ID: %d", name, target_id)
-        return self.repo.get_or_create(name, target_id)
-
-    def bulk_create_ignore_conflicts(self, items: List[SubdomainDTO]) -> None:
-        """
-        批量创建子域名，忽略冲突
-        
-        Args:
-            items: 子域名 DTO 列表
-        
-        Note:
-            使用 ignore_conflicts 策略，重复记录会被跳过
-        """
-        logger.debug("批量创建子域名 - 数量: %d", len(items))
-        return self.repo.bulk_create_ignore_conflicts(items)
+        logger.debug("统计目标下子域名数量 - Target ID: %d", target_id)
+        return self.repo.count_by_target(target_id)
    
    def get_by_names_and_target_id(self, names: set, target_id: int) -> dict:
        """
@@ -85,25 +109,8 @@ class SubdomainService:
            List[str]: 子域名名称列表
        """
        logger.debug("获取目标下所有子域名 - Target ID: %d", target_id)
-        # 通过仓储层统一访问数据库，内部已使用 iterator() 做流式查询
        return list(self.repo.get_domains_for_export(target_id=target_id))
    
-    def get_subdomains_by_target(self, target_id: int):
-        return self.repo.get_by_target(target_id)
-    
-    def count_subdomains_by_target(self, target_id: int) -> int:
-        """
-        统计目标下的子域名数量
-        
-        Args:
-            target_id: 目标 ID
-        
-        Returns:
-            int: 子域名数量
-        """
-        logger.debug("统计目标下子域名数量 - Target ID: %d", target_id)
-        return self.repo.count_by_target(target_id)
-    
    def iter_subdomain_names_by_target(self, target_id: int, chunk_size: int = 1000):
        """
        流式获取目标下的所有子域名名称（内存优化）
@@ -116,8 +123,123 @@ class SubdomainService:
            str: 子域名名称
        """
        logger.debug("流式获取目标下所有子域名 - Target ID: %d, 批次大小: %d", target_id, chunk_size)
-        # 通过仓储层统一访问数据库，内部已使用 iterator() 做流式查询
        return self.repo.get_domains_for_export(target_id=target_id, batch_size=chunk_size)

+    def iter_raw_data_for_csv_export(self, target_id: int):
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            target_id: 目标 ID
+        
+        Yields:
+            原始数据字典 {name, created_at}
+        """
+        return self.repo.iter_raw_data_for_export(target_id=target_id)

-__all__ = ['SubdomainService']
+    # ==================== 创建操作 ====================
+
+    def bulk_create_ignore_conflicts(self, items: List[SubdomainDTO]) -> None:
+        """
+        批量创建子域名，忽略冲突
+        
+        Args:
+            items: 子域名 DTO 列表
+        
+        Note:
+            使用 ignore_conflicts 策略，重复记录会被跳过
+        """
+        logger.debug("批量创建子域名 - 数量: %d", len(items))
+        return self.repo.bulk_create_ignore_conflicts(items)
+
+    def bulk_create_subdomains(
+        self,
+        target_id: int,
+        target_name: str,
+        subdomains: List[str]
+    ) -> BulkCreateResult:
+        """
+        批量创建子域名（带验证）
+        
+        Args:
+            target_id: 目标 ID
+            target_name: 目标域名（用于匹配验证）
+            subdomains: 子域名列表
+        
+        Returns:
+            BulkCreateResult: 创建结果统计
+        """
+        total_received = len(subdomains)
+        target_name = target_name.lower().strip()
+        
+        def is_subdomain_match(subdomain: str) -> bool:
+            """验证子域名是否匹配目标域名"""
+            if subdomain == target_name:
+                return True
+            if subdomain.endswith('.' + target_name):
+                return True
+            return False
+        
+        # 过滤有效的子域名
+        valid_subdomains = []
+        invalid_count = 0
+        mismatched_count = 0
+        
+        for subdomain in subdomains:
+            if not isinstance(subdomain, str) or not subdomain.strip():
+                continue
+            
+            subdomain = subdomain.lower().strip()
+            
+            # 验证格式
+            if not is_valid_domain(subdomain):
+                invalid_count += 1
+                continue
+            
+            # 验证匹配
+            if not is_subdomain_match(subdomain):
+                mismatched_count += 1
+                continue
+            
+            valid_subdomains.append(subdomain)
+        
+        # 去重
+        unique_subdomains = list(set(valid_subdomains))
+        duplicate_count = len(valid_subdomains) - len(unique_subdomains)
+        
+        if not unique_subdomains:
+            return BulkCreateResult(
+                created_count=0,
+                skipped_count=duplicate_count,
+                invalid_count=invalid_count,
+                mismatched_count=mismatched_count,
+                total_received=total_received,
+            )
+        
+        # 获取创建前的数量
+        count_before = self.repo.count_by_target(target_id)
+        
+        # 创建 DTO 列表并批量创建
+        subdomain_dtos = [
+            SubdomainDTO(name=name, target_id=target_id)
+            for name in unique_subdomains
+        ]
+        self.repo.bulk_create_ignore_conflicts(subdomain_dtos)
+        
+        # 获取创建后的数量
+        count_after = self.repo.count_by_target(target_id)
+        created_count = count_after - count_before
+        
+        # 计算因数据库冲突跳过的数量
+        db_skipped = len(unique_subdomains) - created_count
+        
+        return BulkCreateResult(
+            created_count=created_count,
+            skipped_count=duplicate_count + db_skipped,
+            invalid_count=invalid_count,
+            mismatched_count=mismatched_count,
+            total_received=total_received,
+        )
+
+
+__all__ = ['SubdomainService', 'BulkCreateResult']
--- a/backend/apps/asset/services/asset/vulnerability_service.py
+++ b/backend/apps/asset/services/asset/vulnerability_service.py
@@ -1,11 +1,13 @@
 """Vulnerability Service - 漏洞资产业务逻辑层"""

 import logging
-from typing import List
+from typing import List, Optional

 from apps.asset.models import Vulnerability
 from apps.asset.dtos.asset import VulnerabilityDTO
 from apps.common.decorators import auto_ensure_db_connection
+from apps.common.utils import deduplicate_for_bulk
+from apps.common.utils.filter_utils import apply_filters

 logger = logging.getLogger(__name__)

@@ -16,10 +18,20 @@ class VulnerabilityService:

    当前提供基础的批量创建能力，使用 ignore_conflicts 依赖数据库唯一约束去重。
    """
+    
+    # 智能过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'type': 'vuln_type',
+        'severity': 'severity',
+        'source': 'source',
+        'url': 'url',
+    }

    def bulk_create_ignore_conflicts(self, items: List[VulnerabilityDTO]) -> None:
        """批量创建漏洞资产记录，忽略冲突。

+        注意：会自动按 (target_id, url, vuln_type, source) 去重，保留最后一条记录。
+
        Note:
            - 是否去重取决于模型上的唯一/部分唯一约束；
            - 当前 Vulnerability 模型未定义唯一约束，因此会保留全部记录。
@@ -29,6 +41,9 @@ class VulnerabilityService:
            return

        try:
+            # 根据模型唯一约束自动去重（如果模型没有唯一约束则跳过）
+            unique_items = deduplicate_for_bulk(items, Vulnerability)
+            
            vulns = [
                Vulnerability(
                    target_id=item.target_id,
@@ -40,7 +55,7 @@ class VulnerabilityService:
                    description=item.description,
                    raw_output=item.raw_output,
                )
-                for item in items
+                for item in unique_items
            ]

            Vulnerability.objects.bulk_create(vulns, ignore_conflicts=True)
@@ -57,24 +72,34 @@ class VulnerabilityService:

    # ==================== 查询方法 ====================

-    def get_all(self):
+    def get_all(self, filter_query: Optional[str] = None):
        """获取所有漏洞 QuerySet（用于全局漏洞列表）。

-        Returns:
-            QuerySet[Vulnerability]: 所有漏洞，按发现时间倒序
-        """
-        return Vulnerability.objects.filter(deleted_at__isnull=True).order_by("-discovered_at")
+        Args:
+            filter_query: 智能过滤语法字符串

-    def get_queryset_by_target(self, target_id: int):
+        Returns:
+            QuerySet[Vulnerability]: 所有漏洞，按创建时间倒序
+        """
+        queryset = Vulnerability.objects.all().order_by("-created_at")
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset
+
+    def get_vulnerabilities_by_target(self, target_id: int, filter_query: Optional[str] = None):
        """按目标获取漏洞 QuerySet（用于分页）。

        Args:
            target_id: 目标 ID
+            filter_query: 智能过滤语法字符串

        Returns:
-            QuerySet[Vulnerability]: 目标下的所有漏洞，按发现时间倒序
+            QuerySet[Vulnerability]: 目标下的所有漏洞，按创建时间倒序
        """
-        return Vulnerability.objects.filter(target_id=target_id).order_by("-discovered_at")
+        queryset = Vulnerability.objects.filter(target_id=target_id).order_by("-created_at")
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset

    def count_by_target(self, target_id: int) -> int:
        """统计目标下的漏洞数量。"""
--- a/backend/apps/asset/services/asset/website_service.py
+++ b/backend/apps/asset/services/asset/website_service.py
@@ -1,8 +1,12 @@
+"""WebSite Service - 网站业务逻辑层"""
+
 import logging
-from typing import Tuple, List
+from typing import List, Iterator, Optional

 from apps.asset.repositories import DjangoWebSiteRepository
 from apps.asset.dtos import WebSiteDTO
+from apps.common.validators import is_valid_url, is_url_match_target
+from apps.common.utils.filter_utils import apply_filters

 logger = logging.getLogger(__name__)

@@ -10,82 +14,128 @@ logger = logging.getLogger(__name__)
 class WebSiteService:
    """网站业务逻辑层"""
    
+    # 智能过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'url': 'url',
+        'host': 'host',
+        'title': 'title',
+        'status': 'status_code',
+    }
+    
    def __init__(self, repository=None):
-        """
-        初始化网站服务
-        
-        Args:
-            repository: 网站仓储实例（用于依赖注入）
-        """
+        """初始化网站服务"""
        self.repo = repository or DjangoWebSiteRepository()
    
-    # ==================== 创建操作 ====================
-    
-    def bulk_create_ignore_conflicts(self, website_dtos: List[WebSiteDTO]) -> None:
+    def bulk_upsert(self, website_dtos: List[WebSiteDTO]) -> int:
        """
-        批量创建网站记录，忽略冲突（用于扫描任务）
+        批量创建或更新网站（upsert）
+        
+        存在则更新所有字段，不存在则创建。
        
        Args:
            website_dtos: WebSiteDTO 列表
-        
-        Note:
-            使用 ignore_conflicts 策略，重复记录会被跳过
-        """
-        logger.debug("批量创建网站 - 数量: %d", len(website_dtos))
-        return self.repo.bulk_create_ignore_conflicts(website_dtos)
-    
-    # ==================== 查询操作 ====================
-    
-    def get_by_url(self, url: str, target_id: int) -> int:
-        """
-        根据 URL 和 target_id 查找网站 ID
-        
-        Args:
-            url: 网站 URL
-            target_id: 目标 ID
            
        Returns:
-            int: 网站 ID，如果不存在返回 None
+            int: 处理的记录数
        """
-        return self.repo.get_by_url(url=url, target_id=target_id)
-    
-    # ==================== 查询操作 ====================
-
-    def get_all(self):
-        """
-        获取所有网站
+        if not website_dtos:
+            return 0
        
-        Returns:
-            QuerySet: 网站查询集
-        """
-        logger.debug("获取所有网站")
-        return self.repo.get_all()
+        try:
+            return self.repo.bulk_upsert(website_dtos)
+        except Exception as e:
+            logger.error(f"批量 upsert 网站失败: {e}")
+            raise
    
-    def get_websites_by_target(self, target_id: int):
-        return self.repo.get_by_target(target_id)
-    
-    def count_websites_by_scan(self, scan_id: int) -> int:
+    def bulk_create_urls(self, target_id: int, target_name: str, target_type: str, urls: List[str]) -> int:
        """
-        统计扫描下的网站数量
+        批量创建网站（仅 URL，使用 ignore_conflicts）
+        
+        验证 URL 格式和匹配，过滤无效/不匹配 URL，去重后批量创建。
+        已存在的记录会被跳过。
        
        Args:
-            scan_id: 扫描 ID
-        
+            target_id: 目标 ID
+            target_name: 目标名称（用于匹配验证）
+            target_type: 目标类型 ('domain', 'ip', 'cidr')
+            urls: URL 列表
+            
        Returns:
-            int: 网站数量
+            int: 实际创建的记录数
        """
-        logger.debug("统计扫描下网站数量 - Scan ID: %d", scan_id)
-        return self.repo.count_by_scan(scan_id)
+        if not urls:
+            return 0
+        
+        # 过滤有效 URL 并去重
+        valid_urls = []
+        seen = set()
+        
+        for url in urls:
+            if not isinstance(url, str):
+                continue
+            url = url.strip()
+            if not url or url in seen:
+                continue
+            if not is_valid_url(url):
+                continue
+            
+            # 匹配验证（前端已阻止不匹配的提交，后端作为双重保障）
+            if not is_url_match_target(url, target_name, target_type):
+                continue
+            
+            seen.add(url)
+            valid_urls.append(url)
+        
+        if not valid_urls:
+            return 0
+        
+        # 获取创建前的数量
+        count_before = self.repo.count_by_target(target_id)
+        
+        # 创建 DTO 列表并批量创建
+        website_dtos = [
+            WebSiteDTO(url=url, target_id=target_id)
+            for url in valid_urls
+        ]
+        self.repo.bulk_create_ignore_conflicts(website_dtos)
+        
+        # 获取创建后的数量
+        count_after = self.repo.count_by_target(target_id)
+        return count_after - count_before
+    
+    def get_websites_by_target(self, target_id: int, filter_query: Optional[str] = None):
+        """获取目标下的所有网站"""
+        queryset = self.repo.get_by_target(target_id)
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset
+    
+    def get_all(self, filter_query: Optional[str] = None):
+        """获取所有网站"""
+        queryset = self.repo.get_all()
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset
+    
+    def get_by_url(self, url: str, target_id: int) -> int:
+        """根据 URL 和 target_id 查找网站 ID"""
+        return self.repo.get_by_url(url=url, target_id=target_id)
    
    def iter_website_urls_by_target(self, target_id: int, chunk_size: int = 1000):
-        """流式获取目标下的所有站点 URL（内存优化，委托给 Repository 层）"""
-        logger.debug(
-            "流式获取目标下所有站点 URL - Target ID: %d, 批次大小: %d",
-            target_id,
-            chunk_size,
-        )
-        # 通过仓储层统一访问数据库，避免 Service 直接依赖 ORM
+        """流式获取目标下的所有站点 URL"""
        return self.repo.get_urls_for_export(target_id=target_id, batch_size=chunk_size)

+    def iter_raw_data_for_csv_export(self, target_id: int) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            target_id: 目标 ID
+        
+        Yields:
+            原始数据字典
+        """
+        return self.repo.iter_raw_data_for_export(target_id=target_id)
+

 __all__ = ['WebSiteService']
--- a/backend/apps/asset/services/snapshot/directory_snapshots_service.py
+++ b/backend/apps/asset/services/snapshot/directory_snapshots_service.py
@@ -26,10 +26,9 @@ class DirectorySnapshotsService:
        2. 同步到资产表（去重，不包含 scan_id）
        
        Args:
-            items: 目录快照 DTO 列表（必须包含 website_id）
+            items: 目录快照 DTO 列表（必须包含 target_id）
        
        Raises:
-            ValueError: 如果 items 中的 website_id 为 None
            Exception: 数据库操作失败
        """
        if not items:
@@ -49,14 +48,13 @@ class DirectorySnapshotsService:
            logger.debug("步骤 1: 保存到快照表")
            self.snapshot_repo.save_snapshots(items)
            
-            # 步骤 2: 转换为资产 DTO 并保存到资产表
-            # 注意：去重是通过数据库的 UNIQUE 约束 + ignore_conflicts 实现的
+            # 步骤 2: 转换为资产 DTO 并保存到资产表（upsert）
            # - 新记录：插入资产表
-            # - 已存在的记录：自动跳过
-            logger.debug("步骤 2: 同步到资产表（通过 Service 层）")
+            # - 已存在的记录：更新字段（created_at 不更新，保留创建时间）
+            logger.debug("步骤 2: 同步到资产表（通过 Service 层，upsert）")
            asset_items = [item.to_asset_dto() for item in items]
            
-            self.asset_service.bulk_create_ignore_conflicts(asset_items)
+            self.asset_service.bulk_upsert(asset_items)
            
            logger.info("目录快照和资产数据保存成功 - 数量: %d", len(items))
            
@@ -69,15 +67,44 @@ class DirectorySnapshotsService:
            )
            raise
    
-    def get_by_scan(self, scan_id: int):
-        return self.snapshot_repo.get_by_scan(scan_id)
+    # 智能过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'url': 'url',
+        'status': 'status',
+        'content_type': 'content_type',
+    }
+    
+    def get_by_scan(self, scan_id: int, filter_query: str = None):
+        from apps.common.utils.filter_utils import apply_filters
+        
+        queryset = self.snapshot_repo.get_by_scan(scan_id)
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset

-    def get_all(self):
+    def get_all(self, filter_query: str = None):
        """获取所有目录快照"""
-        return self.snapshot_repo.get_all()
+        from apps.common.utils.filter_utils import apply_filters
+        
+        queryset = self.snapshot_repo.get_all()
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset

    def iter_directory_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
        """流式获取某次扫描下的所有目录 URL。"""
        queryset = self.snapshot_repo.get_by_scan(scan_id)
        for snapshot in queryset.iterator(chunk_size=chunk_size):
            yield snapshot.url
+
+    def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            scan_id: 扫描 ID
+        
+        Yields:
+            原始数据字典
+        """
+        return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)
--- a/backend/apps/asset/services/snapshot/endpoint_snapshots_service.py
+++ b/backend/apps/asset/services/snapshot/endpoint_snapshots_service.py
@@ -50,13 +50,11 @@ class EndpointSnapshotsService:
            self.snapshot_repo.save_snapshots(items)
            
            # 步骤 2: 转换为资产 DTO 并保存到资产表
-            # 注意：去重是通过数据库的 UNIQUE 约束 + ignore_conflicts 实现的
-            # - 新记录：插入资产表
-            # - 已存在的记录：自动跳过
+            # 使用 upsert：新记录插入，已存在的记录更新
            logger.debug("步骤 2: 同步到资产表（通过 Service 层）")
            asset_items = [item.to_asset_dto() for item in items]
            
-            self.asset_service.bulk_create_endpoints(asset_items)
+            self.asset_service.bulk_upsert(asset_items)
            
            logger.info("端点快照和资产数据保存成功 - 数量: %d", len(items))
            
@@ -69,15 +67,47 @@ class EndpointSnapshotsService:
            )
            raise
    
-    def get_by_scan(self, scan_id: int):
-        return self.snapshot_repo.get_by_scan(scan_id)
+    # 智能过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'url': 'url',
+        'host': 'host',
+        'title': 'title',
+        'status': 'status_code',
+        'webserver': 'webserver',
+        'tech': 'tech',
+    }
+    
+    def get_by_scan(self, scan_id: int, filter_query: str = None):
+        from apps.common.utils.filter_utils import apply_filters
+        
+        queryset = self.snapshot_repo.get_by_scan(scan_id)
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset

-    def get_all(self):
+    def get_all(self, filter_query: str = None):
        """获取所有端点快照"""
-        return self.snapshot_repo.get_all()
+        from apps.common.utils.filter_utils import apply_filters
+        
+        queryset = self.snapshot_repo.get_all()
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset

    def iter_endpoint_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
        """流式获取某次扫描下的所有端点 URL。"""
        queryset = self.snapshot_repo.get_by_scan(scan_id)
        for snapshot in queryset.iterator(chunk_size=chunk_size):
            yield snapshot.url
+
+    def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            scan_id: 扫描 ID
+        
+        Yields:
+            原始数据字典
+        """
+        return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)
--- a/backend/apps/asset/services/snapshot/host_port_mapping_snapshots_service.py
+++ b/backend/apps/asset/services/snapshot/host_port_mapping_snapshots_service.py
@@ -69,13 +69,25 @@ class HostPortMappingSnapshotsService:
            )
            raise
    
-    def get_ip_aggregation_by_scan(self, scan_id: int, search: str = None):
-        return self.snapshot_repo.get_ip_aggregation_by_scan(scan_id, search=search)
+    def get_ip_aggregation_by_scan(self, scan_id: int, filter_query: str = None):
+        return self.snapshot_repo.get_ip_aggregation_by_scan(scan_id, filter_query=filter_query)

-    def get_all_ip_aggregation(self, search: str = None):
+    def get_all_ip_aggregation(self, filter_query: str = None):
        """获取所有 IP 聚合数据"""
-        return self.snapshot_repo.get_all_ip_aggregation(search=search)
+        return self.snapshot_repo.get_all_ip_aggregation(filter_query=filter_query)

    def iter_ips_by_scan(self, scan_id: int, batch_size: int = 1000) -> Iterator[str]:
        """流式获取某次扫描下的所有唯一 IP 地址。"""
        return self.snapshot_repo.get_ips_for_export(scan_id=scan_id, batch_size=batch_size)
+
+    def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            scan_id: 扫描 ID
+        
+        Yields:
+            原始数据字典 {ip, host, port, created_at}
+        """
+        return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)
--- a/backend/apps/asset/services/snapshot/subdomain_snapshots_service.py
+++ b/backend/apps/asset/services/snapshot/subdomain_snapshots_service.py
@@ -66,14 +66,41 @@ class SubdomainSnapshotsService:
            )
            raise
    
-    def get_by_scan(self, scan_id: int):
-        return self.subdomain_snapshot_repo.get_by_scan(scan_id)
+    # 智能过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'name': 'name',
+    }
+    
+    def get_by_scan(self, scan_id: int, filter_query: str = None):
+        from apps.common.utils.filter_utils import apply_filters
+        
+        queryset = self.subdomain_snapshot_repo.get_by_scan(scan_id)
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset

-    def get_all(self):
+    def get_all(self, filter_query: str = None):
        """获取所有子域名快照"""
-        return self.subdomain_snapshot_repo.get_all()
+        from apps.common.utils.filter_utils import apply_filters
+        
+        queryset = self.subdomain_snapshot_repo.get_all()
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset

    def iter_subdomain_names_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
        queryset = self.subdomain_snapshot_repo.get_by_scan(scan_id)
        for snapshot in queryset.iterator(chunk_size=chunk_size):
-            yield snapshot.name
+            yield snapshot.name
+
+    def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            scan_id: 扫描 ID
+        
+        Yields:
+            原始数据字典 {name, created_at}
+        """
+        return self.subdomain_snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)
--- a/backend/apps/asset/services/snapshot/vulnerability_snapshots_service.py
+++ b/backend/apps/asset/services/snapshot/vulnerability_snapshots_service.py
@@ -66,13 +66,31 @@ class VulnerabilitySnapshotsService:
            )
            raise

-    def get_by_scan(self, scan_id: int):
-        """按扫描任务获取所有漏洞快照。"""
-        return self.snapshot_repo.get_by_scan(scan_id)
+    # 智能过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'type': 'vuln_type',
+        'url': 'url',
+        'severity': 'severity',
+        'source': 'source',
+    }

-    def get_all(self):
+    def get_by_scan(self, scan_id: int, filter_query: str = None):
+        """按扫描任务获取所有漏洞快照。"""
+        from apps.common.utils.filter_utils import apply_filters
+        
+        queryset = self.snapshot_repo.get_by_scan(scan_id)
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset
+
+    def get_all(self, filter_query: str = None):
        """获取所有漏洞快照"""
-        return self.snapshot_repo.get_all()
+        from apps.common.utils.filter_utils import apply_filters
+        
+        queryset = self.snapshot_repo.get_all()
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset

    def iter_vuln_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
        """流式获取某次扫描下的所有漏洞 URL。"""
--- a/backend/apps/asset/services/snapshot/website_snapshots_service.py
+++ b/backend/apps/asset/services/snapshot/website_snapshots_service.py
@@ -49,14 +49,13 @@ class WebsiteSnapshotsService:
            logger.debug("步骤 1: 保存到快照表")
            self.snapshot_repo.save_snapshots(items)
            
-            # 步骤 2: 转换为资产 DTO 并保存到资产表
-            # 注意：去重是通过数据库的 UNIQUE 约束 + ignore_conflicts 实现的
+            # 步骤 2: 转换为资产 DTO 并保存到资产表（upsert）
            # - 新记录：插入资产表
-            # - 已存在的记录：自动跳过
-            logger.debug("步骤 2: 同步到资产表（通过 Service 层）")
+            # - 已存在的记录：更新字段（created_at 不更新，保留创建时间）
+            logger.debug("步骤 2: 同步到资产表（通过 Service 层，upsert）")
            asset_items = [item.to_asset_dto() for item in items]
            
-            self.asset_service.bulk_create_ignore_conflicts(asset_items)
+            self.asset_service.bulk_upsert(asset_items)
            
            logger.info("网站快照和资产数据保存成功 - 数量: %d", len(items))
            
@@ -69,15 +68,47 @@ class WebsiteSnapshotsService:
            )
            raise
    
-    def get_by_scan(self, scan_id: int):
-        return self.snapshot_repo.get_by_scan(scan_id)
+    # 智能过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'url': 'url',
+        'host': 'host',
+        'title': 'title',
+        'status': 'status',
+        'webserver': 'web_server',
+        'tech': 'tech',
+    }
+    
+    def get_by_scan(self, scan_id: int, filter_query: str = None):
+        from apps.common.utils.filter_utils import apply_filters
+        
+        queryset = self.snapshot_repo.get_by_scan(scan_id)
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset

-    def get_all(self):
+    def get_all(self, filter_query: str = None):
        """获取所有网站快照"""
-        return self.snapshot_repo.get_all()
+        from apps.common.utils.filter_utils import apply_filters
+        
+        queryset = self.snapshot_repo.get_all()
+        if filter_query:
+            queryset = apply_filters(queryset, filter_query, self.FILTER_FIELD_MAPPING)
+        return queryset

    def iter_website_urls_by_scan(self, scan_id: int, chunk_size: int = 1000) -> Iterator[str]:
-        """流式获取某次扫描下的所有站点 URL（按发现时间倒序）。"""
+        """流式获取某次扫描下的所有站点 URL（按创建时间倒序）。"""
        queryset = self.snapshot_repo.get_by_scan(scan_id)
        for snapshot in queryset.iterator(chunk_size=chunk_size):
            yield snapshot.url
+
+    def iter_raw_data_for_csv_export(self, scan_id: int) -> Iterator[dict]:
+        """
+        流式获取原始数据用于 CSV 导出
+        
+        Args:
+            scan_id: 扫描 ID
+        
+        Yields:
+            原始数据字典
+        """
+        return self.snapshot_repo.iter_raw_data_for_export(scan_id=scan_id)
--- a/backend/apps/asset/views.py
+++ b/backend/apps/asset/views.py
--- a/backend/apps/common/container_bootstrap.py
+++ b/backend/apps/common/container_bootstrap.py
@@ -14,6 +14,10 @@ import os
 import sys
 import requests
 import logging
+import urllib3
+
+# 禁用自签名证书的 SSL 警告（远程 Worker 场景）
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

 logger = logging.getLogger(__name__)

@@ -30,17 +34,27 @@ def fetch_config_and_setup_django():
        print("[ERROR] 缺少 SERVER_URL 环境变量", file=sys.stderr)
        sys.exit(1)
    
-    config_url = f"{server_url}/api/workers/config/"
+    # 通过环境变量声明 Worker 身份（本地/远程）
+    is_local = os.environ.get("IS_LOCAL", "false").lower() == "true"
+    config_url = f"{server_url}/api/workers/config/?is_local={str(is_local).lower()}"
+    print(f"[CONFIG] 正在从配置中心获取配置: {config_url}")
+    print(f"[CONFIG] IS_LOCAL={is_local}")
    try:
-        resp = requests.get(config_url, timeout=10)
+        # verify=False: 远程 Worker 通过 HTTPS 访问时可能使用自签名证书
+        resp = requests.get(config_url, timeout=10, verify=False)
        resp.raise_for_status()
        config = resp.json()
        
        # 数据库配置（必需）
-        os.environ.setdefault("DB_HOST", config['db']['host'])
-        os.environ.setdefault("DB_PORT", config['db']['port'])
-        os.environ.setdefault("DB_NAME", config['db']['name'])
-        os.environ.setdefault("DB_USER", config['db']['user'])
+        db_host = config['db']['host']
+        db_port = config['db']['port']
+        db_name = config['db']['name']
+        db_user = config['db']['user']
+        
+        os.environ.setdefault("DB_HOST", db_host)
+        os.environ.setdefault("DB_PORT", db_port)
+        os.environ.setdefault("DB_NAME", db_name)
+        os.environ.setdefault("DB_USER", db_user)
        os.environ.setdefault("DB_PASSWORD", config['db']['password'])
        
        # Redis 配置
@@ -52,7 +66,19 @@ def fetch_config_and_setup_django():
        os.environ.setdefault("ENABLE_COMMAND_LOGGING", str(config['logging']['enableCommandLogging']).lower())
        os.environ.setdefault("DEBUG", str(config['debug']))
        
-        print(f"[CONFIG] 从配置中心获取配置成功: {config_url}")
+        # Git 加速配置（用于 Git clone 加速）
+        git_mirror = config.get('gitMirror', '')
+        if git_mirror:
+            os.environ.setdefault("GIT_MIRROR", git_mirror)
+        
+        print(f"[CONFIG] ✓ 配置获取成功")
+        print(f"[CONFIG]   DB_HOST: {db_host}")
+        print(f"[CONFIG]   DB_PORT: {db_port}")
+        print(f"[CONFIG]   DB_NAME: {db_name}")
+        print(f"[CONFIG]   DB_USER: {db_user}")
+        print(f"[CONFIG]   REDIS_URL: {config['redisUrl']}")
+        if git_mirror:
+            print(f"[CONFIG]   GIT_MIRROR: {git_mirror}")
        
    except Exception as e:
        print(f"[ERROR] 获取配置失败: {config_url} - {e}", file=sys.stderr)
--- a/backend/apps/common/prefect_django_setup.py
+++ b/backend/apps/common/prefect_django_setup.py
@@ -16,6 +16,7 @@ def setup_django_for_prefect():
    1. 添加项目根目录到 Python 路径
    2. 设置 DJANGO_SETTINGS_MODULE 环境变量
    3. 调用 django.setup() 初始化 Django
+    4. 关闭旧的数据库连接，确保使用新连接
    
    使用方式：
        from apps.common.prefect_django_setup import setup_django_for_prefect
@@ -36,6 +37,25 @@ def setup_django_for_prefect():
    # 初始化 Django
    import django
    django.setup()
+    
+    # 关闭所有旧的数据库连接，确保 Worker 进程使用新连接
+    # 解决 "server closed the connection unexpectedly" 问题
+    from django.db import connections
+    connections.close_all()
+
+
+def close_old_db_connections():
+    """
+    关闭旧的数据库连接
+    
+    在长时间运行的任务中调用此函数，可以确保使用有效的数据库连接。
+    适用于：
+    - Flow 开始前
+    - Task 开始前
+    - 长时间空闲后恢复操作前
+    """
+    from django.db import connections
+    connections.close_all()


 # 自动执行初始化（导入即生效）
--- a/backend/apps/common/services/init.py
+++ b/backend/apps/common/services/init.py
@@ -3,8 +3,13 @@

 提供系统级别的公共服务，包括：
 - SystemLogService: 系统日志读取服务
+
+注意：FilterService 已移至 apps.common.utils.filter_utils
+推荐使用: from apps.common.utils.filter_utils import apply_filters
 """

 from .system_log_service import SystemLogService

-__all__ = ['SystemLogService']
+__all__ = [
+    'SystemLogService',
+]
--- a/backend/apps/common/services/system_log_service.py
+++ b/backend/apps/common/services/system_log_service.py
@@ -21,8 +21,8 @@ class SystemLogService:
    """
    
    def __init__(self):
-        # 日志文件路径（容器内路径，通过 volume 挂载到宿主机 /opt/xingrin/logs）
-        self.log_file = "/app/backend/logs/xingrin.log"
+        # 日志文件路径（统一使用 /opt/xingrin/logs）
+        self.log_file = "/opt/xingrin/logs/xingrin.log"
        self.default_lines = 200        # 默认返回行数
        self.max_lines = 10000          # 最大返回行数限制
        self.timeout_seconds = 3        # tail 命令超时时间
--- a/backend/apps/common/signals.py
+++ b/backend/apps/common/signals.py
@@ -27,3 +27,10 @@ vulnerabilities_saved = Signal()
 #   - worker_name: str Worker 名称
 #   - message: str 失败原因
 worker_delete_failed = Signal()
+
+# 所有 Worker 高负载信号
+# 参数：
+#   - worker_name: str 被选中的 Worker 名称
+#   - cpu: float CPU 使用率
+#   - mem: float 内存使用率
+all_workers_high_load = Signal()
--- a/backend/apps/common/utils/init.py
+++ b/backend/apps/common/utils/init.py
@@ -0,0 +1,30 @@
+"""Common utilities"""
+
+from .dedup import deduplicate_for_bulk, get_unique_fields
+from .hash import (
+    calc_file_sha256,
+    calc_stream_sha256,
+    safe_calc_file_sha256,
+    is_file_hash_match,
+)
+from .csv_utils import (
+    generate_csv_rows,
+    format_list_field,
+    format_datetime,
+    UTF8_BOM,
+)
+from .git_proxy import get_git_proxy_url
+
+__all__ = [
+    'deduplicate_for_bulk',
+    'get_unique_fields',
+    'calc_file_sha256',
+    'calc_stream_sha256',
+    'safe_calc_file_sha256',
+    'is_file_hash_match',
+    'generate_csv_rows',
+    'format_list_field',
+    'format_datetime',
+    'UTF8_BOM',
+    'get_git_proxy_url',
+]
--- a/backend/apps/common/utils/csv_utils.py
+++ b/backend/apps/common/utils/csv_utils.py
@@ -0,0 +1,116 @@
+"""CSV 导出工具模块
+
+提供流式 CSV 生成功能，支持：
+- UTF-8 BOM（Excel 兼容）
+- RFC 4180 规范转义
+- 流式生成（内存友好）
+"""
+
+import csv
+import io
+from datetime import datetime
+from typing import Iterator, Dict, Any, List, Callable, Optional
+
+# UTF-8 BOM，确保 Excel 正确识别编码
+UTF8_BOM = '\ufeff'
+
+
+def generate_csv_rows(
+    data_iterator: Iterator[Dict[str, Any]],
+    headers: List[str],
+    field_formatters: Optional[Dict[str, Callable]] = None
+) -> Iterator[str]:
+    """
+    流式生成 CSV 行
+    
+    Args:
+        data_iterator: 数据迭代器，每个元素是一个字典
+        headers: CSV 表头列表
+        field_formatters: 字段格式化函数字典，key 为字段名，value 为格式化函数
+    
+    Yields:
+        CSV 行字符串（包含换行符）
+    
+    Example:
+        >>> data = [{'ip': '192.168.1.1', 'hosts': ['a.com', 'b.com']}]
+        >>> headers = ['ip', 'hosts']
+        >>> formatters = {'hosts': format_list_field}
+        >>> for row in generate_csv_rows(iter(data), headers, formatters):
+        ...     print(row, end='')
+    """
+    # 输出 BOM + 表头
+    output = io.StringIO()
+    writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
+    writer.writerow(headers)
+    yield UTF8_BOM + output.getvalue()
+    
+    # 输出数据行
+    for row_data in data_iterator:
+        output = io.StringIO()
+        writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
+        
+        row = []
+        for header in headers:
+            value = row_data.get(header, '')
+            if field_formatters and header in field_formatters:
+                value = field_formatters[header](value)
+            row.append(value if value is not None else '')
+        
+        writer.writerow(row)
+        yield output.getvalue()
+
+
+def format_list_field(values: List, separator: str = ';') -> str:
+    """
+    将列表字段格式化为分号分隔的字符串
+    
+    Args:
+        values: 值列表
+        separator: 分隔符，默认为分号
+    
+    Returns:
+        分隔符连接的字符串
+    
+    Example:
+        >>> format_list_field(['a.com', 'b.com'])
+        'a.com;b.com'
+        >>> format_list_field([80, 443])
+        '80;443'
+        >>> format_list_field([])
+        ''
+        >>> format_list_field(None)
+        ''
+    """
+    if not values:
+        return ''
+    return separator.join(str(v) for v in values)
+
+
+def format_datetime(dt: Optional[datetime]) -> str:
+    """
+    格式化日期时间为字符串（转换为本地时区）
+    
+    Args:
+        dt: datetime 对象或 None
+    
+    Returns:
+        格式化的日期时间字符串，格式为 YYYY-MM-DD HH:MM:SS（本地时区）
+    
+    Example:
+        >>> from datetime import datetime
+        >>> format_datetime(datetime(2024, 1, 15, 10, 30, 0))
+        '2024-01-15 10:30:00'
+        >>> format_datetime(None)
+        ''
+    """
+    if dt is None:
+        return ''
+    if isinstance(dt, str):
+        return dt
+    
+    # 转换为本地时区（从 Django settings 获取）
+    from django.utils import timezone
+    if timezone.is_aware(dt):
+        dt = timezone.localtime(dt)
+    
+    return dt.strftime('%Y-%m-%d %H:%M:%S')
--- a/backend/apps/common/utils/dedup.py
+++ b/backend/apps/common/utils/dedup.py
@@ -0,0 +1,101 @@
+"""
+批量数据去重工具
+
+用于 bulk_create 前的批次内去重，避免 PostgreSQL ON CONFLICT 错误。
+自动从 Django 模型读取唯一约束字段，无需手动指定。
+"""
+
+import logging
+from typing import List, TypeVar, Tuple, Optional
+
+from django.db import models
+
+logger = logging.getLogger(__name__)
+
+T = TypeVar('T')
+
+
+def get_unique_fields(model: type[models.Model]) -> Optional[Tuple[str, ...]]:
+    """
+    从 Django 模型获取唯一约束字段
+    
+    按优先级查找：
+    1. Meta.constraints 中的 UniqueConstraint
+    2. Meta.unique_together
+    
+    Args:
+        model: Django 模型类
+        
+    Returns:
+        唯一约束字段元组，如果没有则返回 None
+    """
+    meta = model._meta
+    
+    # 1. 优先查找 UniqueConstraint
+    for constraint in getattr(meta, 'constraints', []):
+        if isinstance(constraint, models.UniqueConstraint):
+            # 跳过条件约束（partial unique）
+            if getattr(constraint, 'condition', None) is None:
+                return tuple(constraint.fields)
+    
+    # 2. 回退到 unique_together
+    unique_together = getattr(meta, 'unique_together', None)
+    if unique_together:
+        # unique_together 可能是 (('a', 'b'),) 或 ('a', 'b')
+        if unique_together and isinstance(unique_together[0], (list, tuple)):
+            return tuple(unique_together[0])
+        return tuple(unique_together)
+    
+    return None
+
+
+def deduplicate_for_bulk(items: List[T], model: type[models.Model]) -> List[T]:
+    """
+    根据模型唯一约束对数据去重
+    
+    自动从模型读取唯一约束字段，生成去重 key。
+    保留最后一条记录（后面的数据通常是更新的）。
+    
+    Args:
+        items: 待去重的数据列表（DTO 或 Model 对象）
+        model: Django 模型类（用于读取唯一约束）
+        
+    Returns:
+        去重后的数据列表
+        
+    Example:
+        # 自动从 Endpoint 模型读取唯一约束 (url, target)
+        unique_items = deduplicate_for_bulk(items, Endpoint)
+    """
+    if not items:
+        return items
+    
+    unique_fields = get_unique_fields(model)
+    if unique_fields is None:
+        # 模型没有唯一约束，无需去重
+        logger.debug(f"{model.__name__} 没有唯一约束，跳过去重")
+        return items
+    
+    # 处理外键字段名（target -> target_id）
+    def make_key(item: T) -> tuple:
+        key_parts = []
+        for field in unique_fields:
+            # 尝试 field_id（外键）和 field 两种形式
+            value = getattr(item, f'{field}_id', None)
+            if value is None:
+                value = getattr(item, field, None)
+            key_parts.append(value)
+        return tuple(key_parts)
+    
+    # 使用字典去重，保留最后一条
+    seen = {}
+    for item in items:
+        key = make_key(item)
+        seen[key] = item
+    
+    unique_items = list(seen.values())
+    
+    if len(unique_items) < len(items):
+        logger.debug(f"{model.__name__} 去重: {len(items)} -> {len(unique_items)} 条")
+    
+    return unique_items
--- a/backend/apps/common/utils/filter_utils.py
+++ b/backend/apps/common/utils/filter_utils.py
@@ -0,0 +1,281 @@
+"""智能过滤工具 - 通用查询语法解析和 Django ORM 查询构建
+
+支持的语法：
+- field="value"     模糊匹配（包含）
+- field=="value"    精确匹配
+- field!="value"    不等于
+
+逻辑运算符：
+- AND: && 或 and 或 空格（默认）
+- OR:  || 或 or
+
+示例：
+    type="xss" || type="sqli"           # OR
+    type="xss" or type="sqli"           # OR（等价）
+    severity="high" && source="nuclei"  # AND
+    severity="high" source="nuclei"     # AND（空格默认为 AND）
+    severity="high" and source="nuclei" # AND（等价）
+
+使用示例：
+    from apps.common.utils.filter_utils import apply_filters
+    
+    field_mapping = {'ip': 'ip', 'port': 'port', 'host': 'host'}
+    queryset = apply_filters(queryset, 'ip="192" || port="80"', field_mapping)
+"""
+
+import re
+import logging
+from dataclasses import dataclass
+from typing import List, Dict, Optional, Union
+from enum import Enum
+
+from django.db.models import QuerySet, Q
+
+logger = logging.getLogger(__name__)
+
+
+class LogicalOp(Enum):
+    """逻辑运算符"""
+    AND = 'AND'
+    OR = 'OR'
+
+
+@dataclass
+class ParsedFilter:
+    """解析后的过滤条件"""
+    field: str      # 字段名
+    operator: str   # 操作符: '=', '==', '!='
+    value: str      # 原始值
+
+
+@dataclass
+class FilterGroup:
+    """过滤条件组（带逻辑运算符）"""
+    filter: ParsedFilter
+    logical_op: LogicalOp  # 与前一个条件的逻辑关系
+
+
+class QueryParser:
+    """查询语法解析器
+    
+    支持 ||/or (OR) 和 &&/and/空格 (AND) 逻辑运算符
+    """
+    
+    # 正则匹配: field="value", field=="value", field!="value"
+    FILTER_PATTERN = re.compile(r'(\w+)(==|!=|=)"([^"]*)"')
+    
+    # 逻辑运算符模式（带空格）
+    OR_PATTERN = re.compile(r'\s*(\|\||(?<![a-zA-Z])or(?![a-zA-Z]))\s*', re.IGNORECASE)
+    AND_PATTERN = re.compile(r'\s*(&&|(?<![a-zA-Z])and(?![a-zA-Z]))\s*', re.IGNORECASE)
+    
+    @classmethod
+    def parse(cls, query_string: str) -> List[FilterGroup]:
+        """解析查询语法字符串
+        
+        Args:
+            query_string: 查询语法字符串
+        
+        Returns:
+            解析后的过滤条件组列表
+        
+        Examples:
+            >>> QueryParser.parse('type="xss" || type="sqli"')
+            [FilterGroup(filter=..., logical_op=AND),  # 第一个默认 AND
+             FilterGroup(filter=..., logical_op=OR)]
+        """
+        if not query_string or not query_string.strip():
+            return []
+        
+        # 标准化逻辑运算符
+        # 先处理 || 和 or -> __OR__
+        normalized = cls.OR_PATTERN.sub(' __OR__ ', query_string)
+        # 再处理 && 和 and -> __AND__
+        normalized = cls.AND_PATTERN.sub(' __AND__ ', normalized)
+        
+        # 分词：按空格分割，保留逻辑运算符标记
+        tokens = normalized.split()
+        
+        groups = []
+        pending_op = LogicalOp.AND  # 默认 AND
+        
+        for token in tokens:
+            if token == '__OR__':
+                pending_op = LogicalOp.OR
+            elif token == '__AND__':
+                pending_op = LogicalOp.AND
+            else:
+                # 尝试解析为过滤条件
+                match = cls.FILTER_PATTERN.match(token)
+                if match:
+                    field, operator, value = match.groups()
+                    groups.append(FilterGroup(
+                        filter=ParsedFilter(
+                            field=field.lower(),
+                            operator=operator,
+                            value=value
+                        ),
+                        logical_op=pending_op if groups else LogicalOp.AND  # 第一个条件默认 AND
+                    ))
+                    pending_op = LogicalOp.AND  # 重置为默认 AND
+        
+        return groups
+
+
+class QueryBuilder:
+    """Django ORM 查询构建器
+    
+    将解析后的过滤条件转换为 Django ORM 查询，支持 AND/OR 逻辑
+    """
+    
+    @classmethod
+    def build_query(
+        cls,
+        queryset: QuerySet,
+        filter_groups: List[FilterGroup],
+        field_mapping: Dict[str, str],
+        json_array_fields: List[str] = None
+    ) -> QuerySet:
+        """构建 Django ORM 查询
+        
+        Args:
+            queryset: Django QuerySet
+            filter_groups: 解析后的过滤条件组列表
+            field_mapping: 字段映射
+            json_array_fields: JSON 数组字段列表（使用 __contains 查询）
+        
+        Returns:
+            过滤后的 QuerySet
+        """
+        if not filter_groups:
+            return queryset
+        
+        json_array_fields = json_array_fields or []
+        
+        # 构建 Q 对象
+        combined_q = None
+        
+        for group in filter_groups:
+            f = group.filter
+            
+            # 字段映射
+            db_field = field_mapping.get(f.field)
+            if not db_field:
+                logger.debug(f"忽略未知字段: {f.field}")
+                continue
+            
+            # 判断是否为 JSON 数组字段
+            is_json_array = db_field in json_array_fields
+            
+            # 构建单个条件的 Q 对象
+            q = cls._build_single_q(db_field, f.operator, f.value, is_json_array)
+            if q is None:
+                continue
+            
+            # 组合 Q 对象
+            if combined_q is None:
+                combined_q = q
+            elif group.logical_op == LogicalOp.OR:
+                combined_q = combined_q | q
+            else:  # AND
+                combined_q = combined_q & q
+        
+        if combined_q is not None:
+            return queryset.filter(combined_q)
+        return queryset
+    
+    @classmethod
+    def _build_single_q(cls, field: str, operator: str, value: str, is_json_array: bool = False) -> Optional[Q]:
+        """构建单个条件的 Q 对象"""
+        if is_json_array:
+            # JSON 数组字段使用 __contains 查询
+            return Q(**{f'{field}__contains': [value]})
+        
+        if operator == '!=':
+            return cls._build_not_equal_q(field, value)
+        elif operator == '==':
+            return cls._build_exact_q(field, value)
+        else:  # '='
+            return cls._build_fuzzy_q(field, value)
+    
+    @classmethod
+    def _try_convert_to_int(cls, value: str) -> Optional[int]:
+        """尝试将值转换为整数"""
+        try:
+            return int(value.strip())
+        except (ValueError, TypeError):
+            return None
+    
+    @classmethod
+    def _build_fuzzy_q(cls, field: str, value: str) -> Q:
+        """模糊匹配: 包含"""
+        return Q(**{f'{field}__icontains': value})
+    
+    @classmethod
+    def _build_exact_q(cls, field: str, value: str) -> Q:
+        """精确匹配"""
+        int_val = cls._try_convert_to_int(value)
+        if int_val is not None:
+            return Q(**{f'{field}__exact': int_val})
+        return Q(**{f'{field}__exact': value})
+    
+    @classmethod
+    def _build_not_equal_q(cls, field: str, value: str) -> Q:
+        """不等于"""
+        int_val = cls._try_convert_to_int(value)
+        if int_val is not None:
+            return ~Q(**{f'{field}__exact': int_val})
+        return ~Q(**{f'{field}__exact': value})
+
+
+def apply_filters(
+    queryset: QuerySet,
+    query_string: str,
+    field_mapping: Dict[str, str],
+    json_array_fields: List[str] = None
+) -> QuerySet:
+    """应用过滤条件到 QuerySet
+    
+    Args:
+        queryset: Django QuerySet
+        query_string: 查询语法字符串
+        field_mapping: 字段映射
+        json_array_fields: JSON 数组字段列表（使用 __contains 查询）
+    
+    Returns:
+        过滤后的 QuerySet
+    
+    Examples:
+        # OR 查询
+        apply_filters(qs, 'type="xss" || type="sqli"', mapping)
+        apply_filters(qs, 'type="xss" or type="sqli"', mapping)
+        
+        # AND 查询
+        apply_filters(qs, 'severity="high" && source="nuclei"', mapping)
+        apply_filters(qs, 'severity="high" source="nuclei"', mapping)
+        
+        # 混合查询
+        apply_filters(qs, 'type="xss" || type="sqli" && severity="high"', mapping)
+        
+        # JSON 数组字段查询
+        apply_filters(qs, 'implies="PHP"', mapping, json_array_fields=['implies'])
+    """
+    if not query_string or not query_string.strip():
+        return queryset
+    
+    try:
+        filter_groups = QueryParser.parse(query_string)
+        if not filter_groups:
+            logger.debug(f"未解析到有效过滤条件: {query_string}")
+            return queryset
+        
+        logger.debug(f"解析过滤条件: {filter_groups}")
+        return QueryBuilder.build_query(
+            queryset, 
+            filter_groups, 
+            field_mapping,
+            json_array_fields=json_array_fields
+        )
+    
+    except Exception as e:
+        logger.warning(f"过滤解析错误: {e}, query: {query_string}")
+        return queryset  # 静默降级
--- a/backend/apps/common/utils/git_proxy.py
+++ b/backend/apps/common/utils/git_proxy.py
@@ -0,0 +1,39 @@
+"""Git proxy utilities for URL acceleration."""
+
+import os
+from urllib.parse import urlparse
+
+
+def get_git_proxy_url(original_url: str) -> str:
+    """
+    Convert Git repository URL to proxy format for acceleration.
+    
+    Supports multiple mirror services (standard format):
+    - gh-proxy.org: https://gh-proxy.org/https://github.com/user/repo.git
+    - ghproxy.com: https://ghproxy.com/https://github.com/user/repo.git
+    - mirror.ghproxy.com: https://mirror.ghproxy.com/https://github.com/user/repo.git
+    - ghps.cc: https://ghps.cc/https://github.com/user/repo.git
+    
+    Args:
+        original_url: Original repository URL, e.g., https://github.com/user/repo.git
+        
+    Returns:
+        Converted URL based on GIT_MIRROR setting.
+        If GIT_MIRROR is not set, returns the original URL unchanged.
+    """
+    git_mirror = os.getenv("GIT_MIRROR", "").strip()
+    if not git_mirror:
+        return original_url
+    
+    # Remove trailing slash from mirror URL if present
+    git_mirror = git_mirror.rstrip("/")
+    
+    parsed = urlparse(original_url)
+    host = parsed.netloc.lower()
+    
+    # Only support GitHub for now
+    if "github.com" not in host:
+        return original_url
+    
+    # Standard format: https://mirror.example.com/https://github.com/user/repo.git
+    return f"{git_mirror}/{original_url}"
--- a/backend/apps/common/utils/hash.py
+++ b/backend/apps/common/utils/hash.py
@@ -7,7 +7,6 @@

 import hashlib
 import logging
-from pathlib import Path
 from typing import Optional, BinaryIO

 logger = logging.getLogger(__name__)
@@ -91,11 +90,3 @@ def is_file_hash_match(file_path: str, expected_hash: str) -> bool:
        return False

    return actual_hash.lower() == expected_hash.lower()
-
-
-__all__ = [
-    "calc_file_sha256",
-    "calc_stream_sha256",
-    "safe_calc_file_sha256",
-    "is_file_hash_match",
-]
--- a/backend/apps/common/validators.py
+++ b/backend/apps/common/validators.py
@@ -1,6 +1,8 @@
-"""域名、IP、端口和目标验证工具函数"""
+"""域名、IP、端口、URL 和目标验证工具函数"""
 import ipaddress
 import logging
+from urllib.parse import urlparse
+
 import validators

 logger = logging.getLogger(__name__)
@@ -25,6 +27,21 @@ def validate_domain(domain: str) -> None:
        raise ValueError(f"域名格式无效: {domain}")


+def is_valid_domain(domain: str) -> bool:
+    """
+    判断是否为有效域名（不抛异常）
+    
+    Args:
+        domain: 域名字符串
+        
+    Returns:
+        bool: 是否为有效域名
+    """
+    if not domain or len(domain) > 253:
+        return False
+    return bool(validators.domain(domain))
+
+
 def validate_ip(ip: str) -> None:
    """
    验证 IP 地址格式（支持 IPv4 和 IPv6）
@@ -44,6 +61,25 @@ def validate_ip(ip: str) -> None:
        raise ValueError(f"IP 地址格式无效: {ip}")


+def is_valid_ip(ip: str) -> bool:
+    """
+    判断是否为有效 IP 地址（不抛异常）
+    
+    Args:
+        ip: IP 地址字符串
+        
+    Returns:
+        bool: 是否为有效 IP 地址
+    """
+    if not ip:
+        return False
+    try:
+        ipaddress.ip_address(ip)
+        return True
+    except ValueError:
+        return False
+
+
 def validate_cidr(cidr: str) -> None:
    """
    验证 CIDR 格式（支持 IPv4 和 IPv6）
@@ -140,3 +176,136 @@ def validate_port(port: any) -> tuple[bool, int | None]:
    except (ValueError, TypeError):
        logger.warning("端口号格式错误，无法转换为整数: %s", port)
        return False, None
+
+
+# ==================== URL 验证函数 ====================
+
+def validate_url(url: str) -> None:
+    """
+    验证 URL 格式，必须包含 scheme（http:// 或 https://）
+    
+    Args:
+        url: URL 字符串
+        
+    Raises:
+        ValueError: URL 格式无效或缺少 scheme
+    """
+    if not url:
+        raise ValueError("URL 不能为空")
+    
+    # 检查是否包含 scheme
+    if not url.startswith('http://') and not url.startswith('https://'):
+        raise ValueError("URL 必须包含协议（http:// 或 https://）")
+    
+    try:
+        parsed = urlparse(url)
+        if not parsed.hostname:
+            raise ValueError("URL 必须包含主机名")
+    except Exception:
+        raise ValueError(f"URL 格式无效: {url}")
+
+
+def is_valid_url(url: str, max_length: int = 2000) -> bool:
+    """
+    判断是否为有效 URL（不抛异常）
+    
+    Args:
+        url: URL 字符串
+        max_length: URL 最大长度，默认 2000
+        
+    Returns:
+        bool: 是否为有效 URL
+    """
+    if not url or len(url) > max_length:
+        return False
+    try:
+        validate_url(url)
+        return True
+    except ValueError:
+        return False
+
+
+def is_url_match_target(url: str, target_name: str, target_type: str) -> bool:
+    """
+    判断 URL 是否匹配目标
+    
+    Args:
+        url: URL 字符串
+        target_name: 目标名称（域名、IP 或 CIDR）
+        target_type: 目标类型 ('domain', 'ip', 'cidr')
+        
+    Returns:
+        bool: 是否匹配
+    """
+    try:
+        parsed = urlparse(url)
+        hostname = parsed.hostname
+        if not hostname:
+            return False
+        
+        hostname = hostname.lower()
+        target_name = target_name.lower()
+        
+        if target_type == 'domain':
+            # 域名类型：hostname 等于 target_name 或以 .target_name 结尾
+            return hostname == target_name or hostname.endswith('.' + target_name)
+        
+        elif target_type == 'ip':
+            # IP 类型：hostname 必须完全等于 target_name
+            return hostname == target_name
+        
+        elif target_type == 'cidr':
+            # CIDR 类型：hostname 必须是 IP 且在 CIDR 范围内
+            try:
+                ip = ipaddress.ip_address(hostname)
+                network = ipaddress.ip_network(target_name, strict=False)
+                return ip in network
+            except ValueError:
+                # hostname 不是有效 IP
+                return False
+        
+        return False
+    except Exception:
+        return False
+
+
+def detect_input_type(input_str: str) -> str:
+    """
+    检测输入类型（用于快速扫描输入解析）
+    
+    Args:
+        input_str: 输入字符串（应该已经 strip）
+        
+    Returns:
+        str: 输入类型 ('url', 'domain', 'ip', 'cidr')
+    """
+    if not input_str:
+        raise ValueError("输入不能为空")
+    
+    # 1. 包含 :// 一定是 URL
+    if '://' in input_str:
+        return 'url'
+    
+    # 2. 包含 / 需要判断是 CIDR 还是 URL（缺少 scheme）
+    if '/' in input_str:
+        # CIDR 格式: IP/prefix，如 10.0.0.0/8
+        parts = input_str.split('/')
+        if len(parts) == 2:
+            ip_part, prefix_part = parts
+            # 如果斜杠后是纯数字且在 0-32 范围内，检查是否是 CIDR
+            if prefix_part.isdigit() and 0 <= int(prefix_part) <= 32:
+                ip_parts = ip_part.split('.')
+                if len(ip_parts) == 4 and all(p.isdigit() for p in ip_parts):
+                    return 'cidr'
+        # 不是 CIDR，视为 URL（缺少 scheme，后续验证会报错）
+        return 'url'
+    
+    # 3. 检查是否是 IP 地址
+    try:
+        ipaddress.ip_address(input_str)
+        return 'ip'
+    except ValueError:
+        pass
+    
+    # 4. 默认为域名
+    return 'domain'
--- a/backend/apps/engine/consumers/worker_deploy_consumer.py
+++ b/backend/apps/engine/consumers/worker_deploy_consumer.py
@@ -241,8 +241,10 @@ class WorkerDeployConsumer(AsyncWebsocketConsumer):
            }))
            return

-        django_host = f"{public_host}:{server_port}"   # Django / 心跳上报使用
-        heartbeat_api_url = f"http://{django_host}"  # 基础 URL，agent 会加 /api/...
+        # 远程 Worker 通过 nginx HTTPS 访问（nginx 反代到后端 8888）
+        # 使用 https://{PUBLIC_HOST}:{PUBLIC_PORT} 而不是直连 8888 端口
+        public_port = getattr(settings, 'PUBLIC_PORT', '8083')
+        heartbeat_api_url = f"https://{public_host}:{public_port}"

        session_name = f'xingrin_deploy_{self.worker_id}'
        remote_script_path = '/tmp/xingrin_deploy.sh'
--- a/backend/apps/engine/management/commands/init_fingerprints.py
+++ b/backend/apps/engine/management/commands/init_fingerprints.py
@@ -0,0 +1,160 @@
+"""初始化内置指纹库
+
+- EHole 指纹: ehole.json -> 导入到数据库
+- Goby 指纹: goby.json -> 导入到数据库
+- Wappalyzer 指纹: wappalyzer.json -> 导入到数据库
+
+可重复执行：如果数据库已有数据则跳过，只在空库时导入。
+"""
+
+import json
+import logging
+from pathlib import Path
+
+from django.conf import settings
+from django.core.management.base import BaseCommand
+
+from apps.engine.models import EholeFingerprint, GobyFingerprint, WappalyzerFingerprint
+from apps.engine.services.fingerprints import (
+    EholeFingerprintService,
+    GobyFingerprintService,
+    WappalyzerFingerprintService,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+# 内置指纹配置
+DEFAULT_FINGERPRINTS = [
+    {
+        "type": "ehole",
+        "filename": "ehole.json",
+        "model": EholeFingerprint,
+        "service": EholeFingerprintService,
+        "data_key": "fingerprint",  # JSON 中指纹数组的 key
+    },
+    {
+        "type": "goby",
+        "filename": "goby.json",
+        "model": GobyFingerprint,
+        "service": GobyFingerprintService,
+        "data_key": None,  # Goby 是数组格式，直接使用整个 JSON
+    },
+    {
+        "type": "wappalyzer",
+        "filename": "wappalyzer.json",
+        "model": WappalyzerFingerprint,
+        "service": WappalyzerFingerprintService,
+        "data_key": "apps",  # Wappalyzer 使用 apps 对象
+    },
+]
+
+
+class Command(BaseCommand):
+    help = "初始化内置指纹库"
+
+    def handle(self, *args, **options):
+        project_base = Path(settings.BASE_DIR).parent  # /app/backend -> /app
+        fingerprints_dir = project_base / "backend" / "fingerprints"
+
+        initialized = 0
+        skipped = 0
+        failed = 0
+
+        for item in DEFAULT_FINGERPRINTS:
+            fp_type = item["type"]
+            filename = item["filename"]
+            model = item["model"]
+            service_class = item["service"]
+            data_key = item["data_key"]
+
+            # 检查数据库是否已有数据
+            existing_count = model.objects.count()
+            if existing_count > 0:
+                self.stdout.write(self.style.SUCCESS(
+                    f"[{fp_type}] 数据库已有 {existing_count} 条记录，跳过初始化"
+                ))
+                skipped += 1
+                continue
+
+            # 查找源文件
+            src_path = fingerprints_dir / filename
+            if not src_path.exists():
+                self.stdout.write(self.style.WARNING(
+                    f"[{fp_type}] 未找到内置指纹文件: {src_path}，跳过"
+                ))
+                failed += 1
+                continue
+
+            # 读取并解析 JSON
+            try:
+                with open(src_path, "r", encoding="utf-8") as f:
+                    json_data = json.load(f)
+            except (json.JSONDecodeError, OSError) as exc:
+                self.stdout.write(self.style.ERROR(
+                    f"[{fp_type}] 读取指纹文件失败: {exc}"
+                ))
+                failed += 1
+                continue
+
+            # 提取指纹数据（根据不同格式处理）
+            fingerprints = self._extract_fingerprints(json_data, data_key, fp_type)
+            if not fingerprints:
+                self.stdout.write(self.style.WARNING(
+                    f"[{fp_type}] 指纹文件中没有有效数据，跳过"
+                ))
+                failed += 1
+                continue
+
+            # 使用 Service 批量导入
+            try:
+                service = service_class()
+                result = service.batch_create_fingerprints(fingerprints)
+                created = result.get("created", 0)
+                failed_count = result.get("failed", 0)
+
+                self.stdout.write(self.style.SUCCESS(
+                    f"[{fp_type}] 导入成功: 创建 {created} 条，失败 {failed_count} 条"
+                ))
+                initialized += 1
+            except Exception as exc:
+                self.stdout.write(self.style.ERROR(
+                    f"[{fp_type}] 导入失败: {exc}"
+                ))
+                failed += 1
+                continue
+
+        self.stdout.write(self.style.SUCCESS(
+            f"指纹初始化完成: 成功 {initialized}, 已存在跳过 {skipped}, 失败 {failed}"
+        ))
+
+    def _extract_fingerprints(self, json_data, data_key, fp_type):
+        """
+        根据不同格式提取指纹数据，兼容数组和对象两种格式
+        
+        支持的格式：
+        - 数组格式: [...] 或 {"key": [...]}
+        - 对象格式: {...} 或 {"key": {...}} -> 转换为 [{"name": k, ...v}]
+        """
+        # 获取目标数据
+        if data_key is None:
+            # 直接使用整个 JSON
+            target = json_data
+        else:
+            # 从指定 key 获取，支持多个可能的 key（如 apps/technologies）
+            if data_key == "apps":
+                target = json_data.get("apps") or json_data.get("technologies") or {}
+            else:
+                target = json_data.get(data_key, [])
+        
+        # 根据数据类型处理
+        if isinstance(target, list):
+            # 已经是数组格式，直接返回
+            return target
+        elif isinstance(target, dict):
+            # 对象格式，转换为数组 [{"name": key, ...value}]
+            return [{"name": name, **data} if isinstance(data, dict) else {"name": name}
+                    for name, data in target.items()]
+        
+        return []
--- a/backend/apps/engine/management/commands/init_nuclei_templates.py
+++ b/backend/apps/engine/management/commands/init_nuclei_templates.py
@@ -3,12 +3,17 @@
 项目安装后执行此命令，自动创建官方模板仓库记录。

 使用方式：
-    python manage.py init_nuclei_templates           # 只创建记录
+    python manage.py init_nuclei_templates           # 只创建记录（检测本地已有仓库）
    python manage.py init_nuclei_templates --sync    # 创建并同步（git clone）
 """

 import logging
+import subprocess
+from pathlib import Path
+
+from django.conf import settings
 from django.core.management.base import BaseCommand
+from django.utils import timezone

 from apps.engine.models import NucleiTemplateRepo
 from apps.engine.services import NucleiTemplateRepoService
@@ -26,6 +31,20 @@ DEFAULT_REPOS = [
 ]


+def get_local_commit_hash(local_path: Path) -> str:
+    """获取本地 Git 仓库的 commit hash"""
+    if not (local_path / ".git").is_dir():
+        return ""
+    result = subprocess.run(
+        ["git", "-C", str(local_path), "rev-parse", "HEAD"],
+        check=False,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+    )
+    return result.stdout.strip() if result.returncode == 0 else ""
+
+
 class Command(BaseCommand):
    help = "初始化 Nuclei 模板仓库（创建官方模板仓库记录）"

@@ -46,6 +65,8 @@ class Command(BaseCommand):
        force = options.get("force", False)

        service = NucleiTemplateRepoService()
+        base_dir = Path(getattr(settings, "NUCLEI_TEMPLATES_REPOS_BASE_DIR", "/opt/xingrin/nuclei-repos"))
+        
        created = 0
        skipped = 0
        synced = 0
@@ -87,20 +108,30 @@ class Command(BaseCommand):

            # 创建新仓库记录
            try:
+                # 检查本地是否已有仓库（由 install.sh 预下载）
+                local_path = base_dir / name
+                local_commit = get_local_commit_hash(local_path)
+                
                repo = NucleiTemplateRepo.objects.create(
                    name=name,
                    repo_url=repo_url,
+                    local_path=str(local_path) if local_commit else "",
+                    commit_hash=local_commit,
+                    last_synced_at=timezone.now() if local_commit else None,
                )
-                self.stdout.write(self.style.SUCCESS(
-                    f"[{name}] 创建成功: id={repo.id}"
-                ))
+                
+                if local_commit:
+                    self.stdout.write(self.style.SUCCESS(
+                        f"[{name}] 创建成功（检测到本地仓库）: commit={local_commit[:8]}"
+                    ))
+                else:
+                    self.stdout.write(self.style.SUCCESS(
+                        f"[{name}] 创建成功: id={repo.id}"
+                    ))
                created += 1

-                # 初始化本地路径
-                service.ensure_local_path(repo)
-
-                # 如果需要同步
-                if do_sync:
+                # 如果本地没有仓库且需要同步
+                if not local_commit and do_sync:
                    try:
                        self.stdout.write(self.style.WARNING(
                            f"[{name}] 正在同步（首次可能需要几分钟）..."
--- a/backend/apps/engine/management/commands/init_wordlists.py
+++ b/backend/apps/engine/management/commands/init_wordlists.py
@@ -1,7 +1,8 @@
 """初始化所有内置字典 Wordlist 记录

- 目录扫描默认字典: dir_default.txt -> /app/backend/wordlist/dir_default.txt
- 子域名爆破默认字典: subdomains-top1million-110000.txt -> /app/backend/wordlist/subdomains-top1million-110000.txt
+内置字典从镜像内 /app/backend/wordlist/ 复制到运行时目录 /opt/xingrin/wordlists/：
+- 目录扫描默认字典: dir_default.txt
+- 子域名爆破默认字典: subdomains-top1million-110000.txt

 可重复执行：如果已存在同名记录且文件有效则跳过，只在缺失或文件丢失时创建/修复。
 """
@@ -13,7 +14,7 @@ from pathlib import Path
 from django.conf import settings
 from django.core.management.base import BaseCommand

-from apps.common.hash_utils import safe_calc_file_sha256
+from apps.common.utils import safe_calc_file_sha256
 from apps.engine.models import Wordlist


--- a/backend/apps/engine/models/init.py
+++ b/backend/apps/engine/models/init.py
@@ -0,0 +1,19 @@
+"""Engine Models
+
+导出所有 Engine 模块的 Models
+"""
+
+from .engine import WorkerNode, ScanEngine, Wordlist, NucleiTemplateRepo
+from .fingerprints import EholeFingerprint, GobyFingerprint, WappalyzerFingerprint
+
+__all__ = [
+    # 核心 Models
+    "WorkerNode",
+    "ScanEngine",
+    "Wordlist",
+    "NucleiTemplateRepo",
+    # 指纹 Models
+    "EholeFingerprint",
+    "GobyFingerprint",
+    "WappalyzerFingerprint",
+]
--- a/backend/apps/engine/models/engine.py
+++ b/backend/apps/engine/models/engine.py
@@ -1,3 +1,8 @@
+"""Engine 模块核心 Models
+
+包含 WorkerNode, ScanEngine, Wordlist, NucleiTemplateRepo
+"""
+
 from django.db import models


@@ -10,6 +15,8 @@ class WorkerNode(models.Model):
        ('deploying', '部署中'),
        ('online', '在线'),
        ('offline', '离线'),
+        ('updating', '更新中'),
+        ('outdated', '版本过低'),
    ]
    
    name = models.CharField(max_length=100, help_text='节点名称')
@@ -76,6 +83,7 @@ class ScanEngine(models.Model):
        indexes = [
            models.Index(fields=['-created_at']),
        ]
+
    def __str__(self):
        return str(self.name or f'ScanEngine {self.id}')

--- a/backend/apps/engine/models/fingerprints.py
+++ b/backend/apps/engine/models/fingerprints.py
@@ -0,0 +1,108 @@
+"""指纹相关 Models
+
+包含 EHole、Goby、Wappalyzer 等指纹格式的数据模型
+"""
+
+from django.db import models
+
+
+class GobyFingerprint(models.Model):
+    """Goby 格式指纹规则
+    
+    Goby 使用逻辑表达式和规则数组进行匹配：
+    - logic: 逻辑表达式，如 "a||b", "(a&&b)||c"
+    - rule: 规则数组，每条规则包含 label, feature, is_equal
+    """
+    
+    name = models.CharField(max_length=300, unique=True, help_text='产品名称')
+    logic = models.CharField(max_length=500, help_text='逻辑表达式')
+    rule = models.JSONField(default=list, help_text='规则数组')
+    created_at = models.DateTimeField(auto_now_add=True)
+    
+    class Meta:
+        db_table = 'goby_fingerprint'
+        verbose_name = 'Goby 指纹'
+        verbose_name_plural = 'Goby 指纹'
+        ordering = ['-created_at']
+        indexes = [
+            models.Index(fields=['name']),
+            models.Index(fields=['logic']),
+            models.Index(fields=['-created_at']),
+        ]
+    
+    def __str__(self) -> str:
+        return f"{self.name} ({self.logic})"
+
+
+class EholeFingerprint(models.Model):
+    """EHole 格式指纹规则（字段与 ehole.json 一致）"""
+    
+    cms = models.CharField(max_length=200, help_text='产品/CMS名称')
+    method = models.CharField(max_length=200, default='keyword', help_text='匹配方式')
+    location = models.CharField(max_length=200, default='body', help_text='匹配位置')
+    keyword = models.JSONField(default=list, help_text='关键词列表')
+    is_important = models.BooleanField(default=False, help_text='是否重点资产')
+    type = models.CharField(max_length=100, blank=True, default='-', help_text='分类')
+    created_at = models.DateTimeField(auto_now_add=True)
+    
+    class Meta:
+        db_table = 'ehole_fingerprint'
+        verbose_name = 'EHole 指纹'
+        verbose_name_plural = 'EHole 指纹'
+        ordering = ['-created_at']
+        indexes = [
+            # 搜索过滤字段索引
+            models.Index(fields=['cms']),
+            models.Index(fields=['method']),
+            models.Index(fields=['location']),
+            models.Index(fields=['type']),
+            models.Index(fields=['is_important']),
+            # 排序字段索引
+            models.Index(fields=['-created_at']),
+        ]
+        constraints = [
+            # 唯一约束：cms + method + location 组合不能重复
+            models.UniqueConstraint(
+                fields=['cms', 'method', 'location'],
+                name='unique_ehole_fingerprint'
+            ),
+        ]
+    
+    def __str__(self) -> str:
+        return f"{self.cms} ({self.method}@{self.location})"
+
+
+class WappalyzerFingerprint(models.Model):
+    """Wappalyzer 格式指纹规则
+    
+    Wappalyzer 支持多种检测方式：cookies, headers, scriptSrc, js, meta, html 等
+    """
+    
+    name = models.CharField(max_length=300, unique=True, help_text='应用名称')
+    cats = models.JSONField(default=list, help_text='分类 ID 数组')
+    cookies = models.JSONField(default=dict, blank=True, help_text='Cookie 检测规则')
+    headers = models.JSONField(default=dict, blank=True, help_text='HTTP Header 检测规则')
+    script_src = models.JSONField(default=list, blank=True, help_text='脚本 URL 正则数组')
+    js = models.JSONField(default=list, blank=True, help_text='JavaScript 变量检测规则')
+    implies = models.JSONField(default=list, blank=True, help_text='依赖关系数组')
+    meta = models.JSONField(default=dict, blank=True, help_text='HTML meta 标签检测规则')
+    html = models.JSONField(default=list, blank=True, help_text='HTML 内容正则数组')
+    description = models.TextField(blank=True, default='', help_text='应用描述')
+    website = models.URLField(max_length=500, blank=True, default='', help_text='官网链接')
+    cpe = models.CharField(max_length=300, blank=True, default='', help_text='CPE 标识符')
+    created_at = models.DateTimeField(auto_now_add=True)
+    
+    class Meta:
+        db_table = 'wappalyzer_fingerprint'
+        verbose_name = 'Wappalyzer 指纹'
+        verbose_name_plural = 'Wappalyzer 指纹'
+        ordering = ['-created_at']
+        indexes = [
+            models.Index(fields=['name']),
+            models.Index(fields=['website']),
+            models.Index(fields=['cpe']),
+            models.Index(fields=['-created_at']),
+        ]
+    
+    def __str__(self) -> str:
+        return f"{self.name}"
--- a/backend/apps/engine/serializers/fingerprints/init.py
+++ b/backend/apps/engine/serializers/fingerprints/init.py
@@ -0,0 +1,14 @@
+"""指纹管理 Serializers
+
+导出所有指纹相关的 Serializer 类
+"""
+
+from .ehole import EholeFingerprintSerializer
+from .goby import GobyFingerprintSerializer
+from .wappalyzer import WappalyzerFingerprintSerializer
+
+__all__ = [
+    "EholeFingerprintSerializer",
+    "GobyFingerprintSerializer",
+    "WappalyzerFingerprintSerializer",
+]
--- a/backend/apps/engine/serializers/fingerprints/ehole.py
+++ b/backend/apps/engine/serializers/fingerprints/ehole.py
@@ -0,0 +1,27 @@
+"""EHole 指纹 Serializer"""
+
+from rest_framework import serializers
+
+from apps.engine.models import EholeFingerprint
+
+
+class EholeFingerprintSerializer(serializers.ModelSerializer):
+    """EHole 指纹序列化器"""
+    
+    class Meta:
+        model = EholeFingerprint
+        fields = ['id', 'cms', 'method', 'location', 'keyword', 
+                  'is_important', 'type', 'created_at']
+        read_only_fields = ['id', 'created_at']
+    
+    def validate_cms(self, value):
+        """校验 cms 字段"""
+        if not value or not value.strip():
+            raise serializers.ValidationError("cms 字段不能为空")
+        return value.strip()
+    
+    def validate_keyword(self, value):
+        """校验 keyword 字段"""
+        if not isinstance(value, list):
+            raise serializers.ValidationError("keyword 必须是数组")
+        return value
--- a/backend/apps/engine/serializers/fingerprints/goby.py
+++ b/backend/apps/engine/serializers/fingerprints/goby.py
@@ -0,0 +1,26 @@
+"""Goby 指纹 Serializer"""
+
+from rest_framework import serializers
+
+from apps.engine.models import GobyFingerprint
+
+
+class GobyFingerprintSerializer(serializers.ModelSerializer):
+    """Goby 指纹序列化器"""
+    
+    class Meta:
+        model = GobyFingerprint
+        fields = ['id', 'name', 'logic', 'rule', 'created_at']
+        read_only_fields = ['id', 'created_at']
+    
+    def validate_name(self, value):
+        """校验 name 字段"""
+        if not value or not value.strip():
+            raise serializers.ValidationError("name 字段不能为空")
+        return value.strip()
+    
+    def validate_rule(self, value):
+        """校验 rule 字段"""
+        if not isinstance(value, list):
+            raise serializers.ValidationError("rule 必须是数组")
+        return value
--- a/backend/apps/engine/serializers/fingerprints/wappalyzer.py
+++ b/backend/apps/engine/serializers/fingerprints/wappalyzer.py
@@ -0,0 +1,24 @@
+"""Wappalyzer 指纹 Serializer"""
+
+from rest_framework import serializers
+
+from apps.engine.models import WappalyzerFingerprint
+
+
+class WappalyzerFingerprintSerializer(serializers.ModelSerializer):
+    """Wappalyzer 指纹序列化器"""
+    
+    class Meta:
+        model = WappalyzerFingerprint
+        fields = [
+            'id', 'name', 'cats', 'cookies', 'headers', 'script_src',
+            'js', 'implies', 'meta', 'html', 'description', 'website',
+            'cpe', 'created_at'
+        ]
+        read_only_fields = ['id', 'created_at']
+    
+    def validate_name(self, value):
+        """校验 name 字段"""
+        if not value or not value.strip():
+            raise serializers.ValidationError("name 字段不能为空")
+        return value.strip()
--- a/backend/apps/engine/services/fingerprints/init.py
+++ b/backend/apps/engine/services/fingerprints/init.py
@@ -0,0 +1,16 @@
+"""指纹管理 Services
+
+导出所有指纹相关的 Service 类
+"""
+
+from .base import BaseFingerprintService
+from .ehole import EholeFingerprintService
+from .goby import GobyFingerprintService
+from .wappalyzer import WappalyzerFingerprintService
+
+__all__ = [
+    "BaseFingerprintService",
+    "EholeFingerprintService",
+    "GobyFingerprintService",
+    "WappalyzerFingerprintService",
+]
--- a/backend/apps/engine/services/fingerprints/base.py
+++ b/backend/apps/engine/services/fingerprints/base.py
@@ -0,0 +1,144 @@
+"""指纹管理基类 Service
+
+提供通用的批量操作和缓存逻辑，供 EHole/Goby/Wappalyzer 等子类继承
+"""
+
+import json
+import logging
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+class BaseFingerprintService:
+    """指纹管理基类 Service，提供通用的批量操作和缓存逻辑"""
+    
+    model = None  # 子类必须指定
+    BATCH_SIZE = 1000  # 每批处理数量
+    
+    def validate_fingerprint(self, item: dict) -> bool:
+        """
+        校验单条指纹，子类必须实现
+        
+        Args:
+            item: 单条指纹数据
+            
+        Returns:
+            bool: 是否有效
+        """
+        raise NotImplementedError("子类必须实现 validate_fingerprint 方法")
+    
+    def validate_fingerprints(self, raw_data: list) -> tuple[list, list]:
+        """
+        批量校验指纹数据
+        
+        Args:
+            raw_data: 原始指纹数据列表
+            
+        Returns:
+            tuple: (valid_items, invalid_items)
+        """
+        valid, invalid = [], []
+        for item in raw_data:
+            if self.validate_fingerprint(item):
+                valid.append(item)
+            else:
+                invalid.append(item)
+        return valid, invalid
+    
+    def to_model_data(self, item: dict) -> dict:
+        """
+        转换为 Model 字段，子类必须实现
+        
+        Args:
+            item: 原始指纹数据
+            
+        Returns:
+            dict: Model 字段数据
+        """
+        raise NotImplementedError("子类必须实现 to_model_data 方法")
+
+    def bulk_create(self, fingerprints: list) -> int:
+        """
+        批量创建指纹记录（已校验的数据）
+        
+        Args:
+            fingerprints: 已校验的指纹数据列表
+            
+        Returns:
+            int: 成功创建数量
+        """
+        if not fingerprints:
+            return 0
+        
+        objects = [self.model(**self.to_model_data(item)) for item in fingerprints]
+        created = self.model.objects.bulk_create(objects, ignore_conflicts=True)
+        return len(created)
+    
+    def batch_create_fingerprints(self, raw_data: list) -> dict:
+        """
+        完整流程：分批校验 + 批量创建
+        
+        Args:
+            raw_data: 原始指纹数据列表
+            
+        Returns:
+            dict: {'created': int, 'failed': int}
+        """
+        total_created = 0
+        total_failed = 0
+        
+        for i in range(0, len(raw_data), self.BATCH_SIZE):
+            batch = raw_data[i:i + self.BATCH_SIZE]
+            valid, invalid = self.validate_fingerprints(batch)
+            total_created += self.bulk_create(valid)
+            total_failed += len(invalid)
+        
+        logger.info(
+            "批量创建指纹完成: created=%d, failed=%d, total=%d",
+            total_created, total_failed, len(raw_data)
+        )
+        return {'created': total_created, 'failed': total_failed}
+    
+    def get_export_data(self) -> dict:
+        """
+        获取导出数据，子类必须实现
+        
+        Returns:
+            dict: 导出的 JSON 数据
+        """
+        raise NotImplementedError("子类必须实现 get_export_data 方法")
+    
+    def export_to_file(self, output_path: str) -> int:
+        """
+        导出所有指纹到 JSON 文件
+        
+        Args:
+            output_path: 输出文件路径
+            
+        Returns:
+            int: 导出的指纹数量
+        """
+        data = self.get_export_data()
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, ensure_ascii=False)
+        count = len(data.get('fingerprint', []))
+        logger.info("导出指纹文件: %s, 数量: %d", output_path, count)
+        return count
+    
+    def get_fingerprint_version(self) -> str:
+        """
+        获取指纹库版本标识（用于缓存校验）
+        
+        Returns:
+            str: 版本标识，格式 "{count}_{latest_timestamp}"
+        
+        版本变化场景：
+        - 新增记录 → count 变化
+        - 删除记录 → count 变化
+        - 清空全部 → count 变为 0
+        """
+        count = self.model.objects.count()
+        latest = self.model.objects.order_by('-created_at').first()
+        latest_ts = int(latest.created_at.timestamp()) if latest else 0
+        return f"{count}_{latest_ts}"
--- a/backend/apps/engine/services/fingerprints/ehole.py
+++ b/backend/apps/engine/services/fingerprints/ehole.py
@@ -0,0 +1,84 @@
+"""EHole 指纹管理 Service
+
+实现 EHole 格式指纹的校验、转换和导出逻辑
+"""
+
+from apps.engine.models import EholeFingerprint
+from .base import BaseFingerprintService
+
+
+class EholeFingerprintService(BaseFingerprintService):
+    """EHole 指纹管理服务（继承基类，实现 EHole 特定逻辑）"""
+    
+    model = EholeFingerprint
+    
+    def validate_fingerprint(self, item: dict) -> bool:
+        """
+        校验单条 EHole 指纹
+        
+        校验规则：
+        - cms 字段必须存在且非空
+        - keyword 字段必须是数组
+        
+        Args:
+            item: 单条指纹数据
+            
+        Returns:
+            bool: 是否有效
+        """
+        cms = item.get('cms', '')
+        keyword = item.get('keyword')
+        return bool(cms and str(cms).strip()) and isinstance(keyword, list)
+    
+    def to_model_data(self, item: dict) -> dict:
+        """
+        转换 EHole JSON 格式为 Model 字段
+        
+        字段映射：
+        - isImportant (JSON) → is_important (Model)
+        
+        Args:
+            item: 原始 EHole JSON 数据
+            
+        Returns:
+            dict: Model 字段数据
+        """
+        return {
+            'cms': str(item.get('cms', '')).strip(),
+            'method': item.get('method', 'keyword'),
+            'location': item.get('location', 'body'),
+            'keyword': item.get('keyword', []),
+            'is_important': item.get('isImportant', False),
+            'type': item.get('type', '-'),
+        }
+    
+    def get_export_data(self) -> dict:
+        """
+        获取导出数据（EHole JSON 格式）
+        
+        Returns:
+            dict: EHole 格式的 JSON 数据
+            {
+                "fingerprint": [
+                    {"cms": "...", "method": "...", "location": "...", 
+                     "keyword": [...], "isImportant": false, "type": "..."},
+                    ...
+                ],
+                "version": "1000_1703836800"
+            }
+        """
+        fingerprints = self.model.objects.all()
+        data = []
+        for fp in fingerprints:
+            data.append({
+                'cms': fp.cms,
+                'method': fp.method,
+                'location': fp.location,
+                'keyword': fp.keyword,
+                'isImportant': fp.is_important,  # 转回 JSON 格式
+                'type': fp.type,
+            })
+        return {
+            'fingerprint': data,
+            'version': self.get_fingerprint_version(),
+        }
--- a/backend/apps/engine/services/fingerprints/goby.py
+++ b/backend/apps/engine/services/fingerprints/goby.py
@@ -0,0 +1,70 @@
+"""Goby 指纹管理 Service
+
+实现 Goby 格式指纹的校验、转换和导出逻辑
+"""
+
+from apps.engine.models import GobyFingerprint
+from .base import BaseFingerprintService
+
+
+class GobyFingerprintService(BaseFingerprintService):
+    """Goby 指纹管理服务（继承基类，实现 Goby 特定逻辑）"""
+    
+    model = GobyFingerprint
+    
+    def validate_fingerprint(self, item: dict) -> bool:
+        """
+        校验单条 Goby 指纹
+        
+        校验规则：
+        - name 字段必须存在且非空
+        - logic 字段必须存在
+        - rule 字段必须是数组
+        
+        Args:
+            item: 单条指纹数据
+            
+        Returns:
+            bool: 是否有效
+        """
+        name = item.get('name', '')
+        logic = item.get('logic', '')
+        rule = item.get('rule')
+        return bool(name and str(name).strip()) and bool(logic) and isinstance(rule, list)
+    
+    def to_model_data(self, item: dict) -> dict:
+        """
+        转换 Goby JSON 格式为 Model 字段
+        
+        Args:
+            item: 原始 Goby JSON 数据
+            
+        Returns:
+            dict: Model 字段数据
+        """
+        return {
+            'name': str(item.get('name', '')).strip(),
+            'logic': item.get('logic', ''),
+            'rule': item.get('rule', []),
+        }
+    
+    def get_export_data(self) -> list:
+        """
+        获取导出数据（Goby JSON 格式 - 数组）
+        
+        Returns:
+            list: Goby 格式的 JSON 数据（数组格式）
+            [
+                {"name": "...", "logic": "...", "rule": [...]},
+                ...
+            ]
+        """
+        fingerprints = self.model.objects.all()
+        return [
+            {
+                'name': fp.name,
+                'logic': fp.logic,
+                'rule': fp.rule,
+            }
+            for fp in fingerprints
+        ]
--- a/backend/apps/engine/services/fingerprints/wappalyzer.py
+++ b/backend/apps/engine/services/fingerprints/wappalyzer.py
@@ -0,0 +1,99 @@
+"""Wappalyzer 指纹管理 Service
+
+实现 Wappalyzer 格式指纹的校验、转换和导出逻辑
+"""
+
+from apps.engine.models import WappalyzerFingerprint
+from .base import BaseFingerprintService
+
+
+class WappalyzerFingerprintService(BaseFingerprintService):
+    """Wappalyzer 指纹管理服务（继承基类，实现 Wappalyzer 特定逻辑）"""
+    
+    model = WappalyzerFingerprint
+    
+    def validate_fingerprint(self, item: dict) -> bool:
+        """
+        校验单条 Wappalyzer 指纹
+        
+        校验规则：
+        - name 字段必须存在且非空（从 apps 对象的 key 传入）
+        
+        Args:
+            item: 单条指纹数据
+            
+        Returns:
+            bool: 是否有效
+        """
+        name = item.get('name', '')
+        return bool(name and str(name).strip())
+    
+    def to_model_data(self, item: dict) -> dict:
+        """
+        转换 Wappalyzer JSON 格式为 Model 字段
+        
+        字段映射：
+        - scriptSrc (JSON) → script_src (Model)
+        
+        Args:
+            item: 原始 Wappalyzer JSON 数据
+            
+        Returns:
+            dict: Model 字段数据
+        """
+        return {
+            'name': str(item.get('name', '')).strip(),
+            'cats': item.get('cats', []),
+            'cookies': item.get('cookies', {}),
+            'headers': item.get('headers', {}),
+            'script_src': item.get('scriptSrc', []),  # JSON: scriptSrc -> Model: script_src
+            'js': item.get('js', []),
+            'implies': item.get('implies', []),
+            'meta': item.get('meta', {}),
+            'html': item.get('html', []),
+            'description': item.get('description', ''),
+            'website': item.get('website', ''),
+            'cpe': item.get('cpe', ''),
+        }
+    
+    def get_export_data(self) -> dict:
+        """
+        获取导出数据（Wappalyzer JSON 格式）
+        
+        Returns:
+            dict: Wappalyzer 格式的 JSON 数据
+            {
+                "apps": {
+                    "AppName": {"cats": [...], "cookies": {...}, ...},
+                    ...
+                }
+            }
+        """
+        fingerprints = self.model.objects.all()
+        apps = {}
+        for fp in fingerprints:
+            app_data = {}
+            if fp.cats:
+                app_data['cats'] = fp.cats
+            if fp.cookies:
+                app_data['cookies'] = fp.cookies
+            if fp.headers:
+                app_data['headers'] = fp.headers
+            if fp.script_src:
+                app_data['scriptSrc'] = fp.script_src  # Model: script_src -> JSON: scriptSrc
+            if fp.js:
+                app_data['js'] = fp.js
+            if fp.implies:
+                app_data['implies'] = fp.implies
+            if fp.meta:
+                app_data['meta'] = fp.meta
+            if fp.html:
+                app_data['html'] = fp.html
+            if fp.description:
+                app_data['description'] = fp.description
+            if fp.website:
+                app_data['website'] = fp.website
+            if fp.cpe:
+                app_data['cpe'] = fp.cpe
+            apps[fp.name] = app_data
+        return {'apps': apps}
--- a/backend/apps/engine/services/nuclei_template_repo_service.py
+++ b/backend/apps/engine/services/nuclei_template_repo_service.py
@@ -186,6 +186,7 @@ class NucleiTemplateRepoService:
            RuntimeError: Git 命令执行失败
        """
        import subprocess
+        from apps.common.utils.git_proxy import get_git_proxy_url

        obj = self._get_repo_obj(repo_id)

@@ -196,17 +197,41 @@ class NucleiTemplateRepoService:
        cmd: List[str]
        action: str

+        # 获取代理后的 URL（如果启用了 Git 加速）
+        proxied_url = get_git_proxy_url(obj.repo_url)
+        if proxied_url != obj.repo_url:
+            logger.info("使用 Git 加速: %s -> %s", obj.repo_url, proxied_url)
+
        # 判断是 clone 还是 pull
        if git_dir.is_dir():
-            # 已有仓库，执行 pull
-            cmd = ["git", "-C", str(local_path), "pull", "--ff-only"]
-            action = "pull"
+            # 检查远程地址是否变化（比较原始 URL，不是代理 URL）
+            current_remote = subprocess.run(
+                ["git", "-C", str(local_path), "remote", "get-url", "origin"],
+                check=False,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+            current_url = current_remote.stdout.strip() if current_remote.returncode == 0 else ""
+            
+            # 检查是否需要重新 clone（原始 URL 或代理 URL 变化都需要）
+            if current_url not in [obj.repo_url, proxied_url]:
+                # 远程地址变化，删除旧目录重新 clone
+                logger.info("nuclei 模板仓库 %s 远程地址变化，重新 clone: %s -> %s", obj.id, current_url, obj.repo_url)
+                shutil.rmtree(local_path)
+                local_path.mkdir(parents=True, exist_ok=True)
+                cmd = ["git", "clone", "--depth", "1", proxied_url, str(local_path)]
+                action = "clone"
+            else:
+                # 已有仓库且地址未变，执行 pull
+                cmd = ["git", "-C", str(local_path), "pull", "--ff-only"]
+                action = "pull"
        else:
            # 新仓库，执行 clone
            if local_path.exists() and not local_path.is_dir():
                raise RuntimeError(f"本地路径已存在且不是目录: {local_path}")
            # --depth 1 浅克隆，只获取最新提交，节省空间和时间
-            cmd = ["git", "clone", "--depth", "1", obj.repo_url, str(local_path)]
+            cmd = ["git", "clone", "--depth", "1", proxied_url, str(local_path)]
            action = "clone"

        # 执行 Git 命令
--- a/backend/apps/engine/services/task_distributor.py
+++ b/backend/apps/engine/services/task_distributor.py
@@ -76,8 +76,8 @@ class TaskDistributor:
        self.docker_image = settings.TASK_EXECUTOR_IMAGE
        if not self.docker_image:
            raise ValueError("TASK_EXECUTOR_IMAGE 未配置，请确保 IMAGE_TAG 环境变量已设置")
-        self.results_mount = getattr(settings, 'CONTAINER_RESULTS_MOUNT', '/app/backend/results')
-        self.logs_mount = getattr(settings, 'CONTAINER_LOGS_MOUNT', '/app/backend/logs')
+        # 统一使用 /opt/xingrin 下的路径
+        self.logs_mount = "/opt/xingrin/logs"
        self.submit_interval = getattr(settings, 'TASK_SUBMIT_INTERVAL', 5)
    
    def get_online_workers(self) -> list[WorkerNode]:
@@ -153,11 +153,68 @@ class TaskDistributor:
            else:
                scored_workers.append((worker, score, cpu, mem))
        
-        # 降级策略：如果没有正常负载的，使用高负载中最低的
+        # 降级策略：如果没有正常负载的，循环等待后重新检测
        if not scored_workers:
            if high_load_workers:
-                logger.warning("所有 Worker 高负载，降级选择负载最低的")
-                scored_workers = high_load_workers
+                # 高负载等待参数（默认每 60 秒检测一次，最多 10 次）
+                high_load_wait = getattr(settings, 'HIGH_LOAD_WAIT_SECONDS', 60)
+                high_load_max_retries = getattr(settings, 'HIGH_LOAD_MAX_RETRIES', 10)
+                
+                # 开始等待前发送高负载通知
+                high_load_workers.sort(key=lambda x: x[1])
+                _, _, first_cpu, first_mem = high_load_workers[0]
+                from apps.common.signals import all_workers_high_load
+                all_workers_high_load.send(
+                    sender=self.__class__,
+                    worker_name="所有节点",
+                    cpu=first_cpu,
+                    mem=first_mem
+                )
+                
+                for retry in range(high_load_max_retries):
+                    logger.warning(
+                        "所有 Worker 高负载，等待 %d 秒后重试... (%d/%d)",
+                        high_load_wait, retry + 1, high_load_max_retries
+                    )
+                    time.sleep(high_load_wait)
+                    
+                    # 重新获取负载数据
+                    loads = worker_load_service.get_all_loads(worker_ids)
+                    
+                    # 重新评估
+                    scored_workers = []
+                    high_load_workers = []
+                    
+                    for worker in workers:
+                        load = loads.get(worker.id)
+                        if not load:
+                            continue
+                        
+                        cpu = load.get('cpu', 0)
+                        mem = load.get('mem', 0)
+                        score = cpu * 0.7 + mem * 0.3
+                        
+                        if cpu > 85 or mem > 85:
+                            high_load_workers.append((worker, score, cpu, mem))
+                        else:
+                            scored_workers.append((worker, score, cpu, mem))
+                    
+                    # 如果有正常负载的 Worker，跳出循环
+                    if scored_workers:
+                        logger.info("检测到正常负载 Worker，结束等待")
+                        break
+                
+                # 超时或仍然高负载，选择负载最低的
+                if not scored_workers and high_load_workers:
+                    high_load_workers.sort(key=lambda x: x[1])
+                    best_worker, _, cpu, mem = high_load_workers[0]
+                    
+                    logger.warning(
+                        "等待超时，强制分发到高负载 Worker: %s (CPU: %.1f%%, MEM: %.1f%%)",
+                        best_worker.name, cpu, mem
+                    )
+                    return best_worker
+                    return best_worker
            else:
                logger.warning("没有可用的 Worker")
                return None
@@ -213,26 +270,30 @@ class TaskDistributor:
            network_arg = f"--network {settings.DOCKER_NETWORK_NAME}"
            server_url = f"http://server:{settings.SERVER_PORT}"
        else:
-            # 远程：无需指定网络，使用公网地址
+            # 远程：通过 Nginx 反向代理访问（HTTPS，不直连 8888 端口）
            network_arg = ""
-            server_url = f"http://{settings.PUBLIC_HOST}:{settings.SERVER_PORT}"
+            server_url = f"https://{settings.PUBLIC_HOST}:{settings.PUBLIC_PORT}"
        
-        # 挂载路径（所有节点统一使用固定路径）
-        host_results_dir = settings.HOST_RESULTS_DIR  # /opt/xingrin/results
-        host_logs_dir = settings.HOST_LOGS_DIR  # /opt/xingrin/logs
+        # 挂载路径（统一挂载 /opt/xingrin）
+        host_xingrin_dir = "/opt/xingrin"
        
-        # 环境变量：只需 SERVER_URL，其他配置容器启动时从配置中心获取
-        # Prefect 本地模式配置：禁用 API server 和事件系统
+        # 环境变量：SERVER_URL + IS_LOCAL，其他配置容器启动时从配置中心获取
+        # IS_LOCAL 用于 Worker 向配置中心声明身份，决定返回的数据库地址
+        # Prefect 本地模式配置：启用 ephemeral server（本地临时服务器）
+        is_local_str = "true" if worker.is_local else "false"
        env_vars = [
            f"-e SERVER_URL={shlex.quote(server_url)}",
-            "-e PREFECT_API_URL=",  # 禁用 API server
-            "-e PREFECT_LOGGING_EXTRA_LOGGERS=",  # 禁用 Prefect 的额外内部日志器
+            f"-e IS_LOCAL={is_local_str}",
+            "-e PREFECT_HOME=/tmp/.prefect",  # 设置 Prefect 数据目录到可写位置
+            "-e PREFECT_SERVER_EPHEMERAL_ENABLED=true",  # 启用 ephemeral server（本地临时服务器）
+            "-e PREFECT_SERVER_EPHEMERAL_STARTUP_TIMEOUT_SECONDS=120",  # 增加启动超时时间
+            "-e PREFECT_SERVER_DATABASE_CONNECTION_URL=sqlite+aiosqlite:////tmp/.prefect/prefect.db",  # 使用 /tmp 下的 SQLite
+            "-e PREFECT_LOGGING_LEVEL=WARNING",  # 日志级别（减少 DEBUG 噪音）
        ]
        
-        # 挂载卷
+        # 挂载卷（统一挂载整个 /opt/xingrin 目录）
        volumes = [
-            f"-v {host_results_dir}:{self.results_mount}",
-            f"-v {host_logs_dir}:{self.logs_mount}",
+            f"-v {host_xingrin_dir}:{host_xingrin_dir}",
        ]
        
        # 构建命令行参数
@@ -407,8 +468,20 @@ class TaskDistributor:
        Note:
            engine_config 由 Flow 内部通过 scan_id 查询数据库获取
        """
+        logger.info("="*60)
+        logger.info("execute_scan_flow 开始")
+        logger.info("  scan_id: %s", scan_id)
+        logger.info("  target_name: %s", target_name)
+        logger.info("  target_id: %s", target_id)
+        logger.info("  scan_workspace_dir: %s", scan_workspace_dir)
+        logger.info("  engine_name: %s", engine_name)
+        logger.info("  docker_image: %s", self.docker_image)
+        logger.info("="*60)
+        
        # 1. 等待提交间隔（后台线程执行，不阻塞 API）
+        logger.info("等待提交间隔...")
        self._wait_for_submit_interval()
+        logger.info("提交间隔等待完成")
        
        # 2. 选择最佳 Worker
        worker = self.select_best_worker()
@@ -481,7 +554,7 @@ class TaskDistributor:
            try:
                # 构建 docker run 命令（清理过期扫描结果目录）
                script_args = {
-                    'results_dir': '/app/backend/results',
+                    'results_dir': '/opt/xingrin/results',
                    'retention_days': retention_days,
                }
                
--- a/backend/apps/engine/services/wordlist_service.py
+++ b/backend/apps/engine/services/wordlist_service.py
@@ -13,7 +13,7 @@ from django.conf import settings
 from django.core.exceptions import ValidationError
 from django.core.files.uploadedfile import UploadedFile

-from apps.common.hash_utils import safe_calc_file_sha256
+from apps.common.utils import safe_calc_file_sha256
 from apps.engine.models import Wordlist
 from apps.engine.repositories import DjangoWordlistRepository

--- a/backend/apps/engine/services/worker_service.py
+++ b/backend/apps/engine/services/worker_service.py
@@ -134,5 +134,57 @@ class WorkerService:
            logger.warning(f"[卸载] Worker {worker_id} 远程卸载异常: {e}")
            return False, f"远程卸载异常: {str(e)}"

+    def execute_remote_command(
+        self,
+        ip_address: str,
+        ssh_port: int,
+        username: str,
+        password: str | None,
+        command: str
+    ) -> tuple[bool, str]:
+        """
+        在远程主机上执行命令
+        
+        Args:
+            ip_address: SSH 主机地址
+            ssh_port: SSH 端口
+            username: SSH 用户名
+            password: SSH 密码
+            command: 要执行的命令
+            
+        Returns:
+            (success, message) 元组
+        """
+        if not password:
+            return False, "未配置 SSH 密码"
+        
+        try:
+            import paramiko
+            
+            ssh = paramiko.SSHClient()
+            ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+            
+            ssh.connect(
+                ip_address,
+                port=ssh_port,
+                username=username,
+                password=password,
+                timeout=30
+            )
+            
+            stdin, stdout, stderr = ssh.exec_command(command, timeout=120)
+            exit_status = stdout.channel.recv_exit_status()
+            
+            ssh.close()
+            
+            if exit_status == 0:
+                return True, stdout.read().decode().strip()
+            else:
+                error = stderr.read().decode().strip()
+                return False, error
+                
+        except Exception as e:
+            return False, str(e)
+

 __all__ = ["WorkerService"]
--- a/backend/apps/engine/urls.py
+++ b/backend/apps/engine/urls.py
@@ -7,6 +7,11 @@ from .views import (
    WordlistViewSet,
    NucleiTemplateRepoViewSet,
 )
+from .views.fingerprints import (
+    EholeFingerprintViewSet,
+    GobyFingerprintViewSet,
+    WappalyzerFingerprintViewSet,
+)


 # 创建路由器
@@ -15,6 +20,10 @@ router.register(r"engines", ScanEngineViewSet, basename="engine")
 router.register(r"workers", WorkerNodeViewSet, basename="worker")
 router.register(r"wordlists", WordlistViewSet, basename="wordlist")
 router.register(r"nuclei/repos", NucleiTemplateRepoViewSet, basename="nuclei-repos")
+# 指纹管理
+router.register(r"fingerprints/ehole", EholeFingerprintViewSet, basename="ehole-fingerprint")
+router.register(r"fingerprints/goby", GobyFingerprintViewSet, basename="goby-fingerprint")
+router.register(r"fingerprints/wappalyzer", WappalyzerFingerprintViewSet, basename="wappalyzer-fingerprint")

 urlpatterns = [
    path("", include(router.urls)),
--- a/backend/apps/engine/views/fingerprints/init.py
+++ b/backend/apps/engine/views/fingerprints/init.py
@@ -0,0 +1,16 @@
+"""指纹管理 ViewSets
+
+导出所有指纹相关的 ViewSet 类
+"""
+
+from .base import BaseFingerprintViewSet
+from .ehole import EholeFingerprintViewSet
+from .goby import GobyFingerprintViewSet
+from .wappalyzer import WappalyzerFingerprintViewSet
+
+__all__ = [
+    "BaseFingerprintViewSet",
+    "EholeFingerprintViewSet",
+    "GobyFingerprintViewSet",
+    "WappalyzerFingerprintViewSet",
+]
--- a/backend/apps/engine/views/fingerprints/base.py
+++ b/backend/apps/engine/views/fingerprints/base.py
@@ -0,0 +1,202 @@
+"""指纹管理基类 ViewSet
+
+提供通用的 CRUD 和批量操作，供 EHole/Goby/Wappalyzer 等子类继承
+"""
+
+import json
+import logging
+
+from django.http import HttpResponse
+from rest_framework import viewsets, status, filters
+from rest_framework.decorators import action
+from rest_framework.response import Response
+from rest_framework.exceptions import ValidationError
+
+from apps.common.pagination import BasePagination
+from apps.common.utils.filter_utils import apply_filters
+
+logger = logging.getLogger(__name__)
+
+
+class BaseFingerprintViewSet(viewsets.ModelViewSet):
+    """指纹管理基类 ViewSet，供 EHole/Goby/Wappalyzer 等子类继承
+    
+    提供的 API：
+    
+    标准 CRUD（继承自 ModelViewSet）：
+    - GET    /                  列表查询（分页 + 智能过滤）
+    - POST   /                  创建单条
+    - GET    /{id}/             获取详情
+    - PUT    /{id}/             更新
+    - DELETE /{id}/             删除
+    
+    批量操作（本类实现）：
+    - POST   /batch_create/     批量创建（JSON body）
+    - POST   /import_file/      文件导入（multipart/form-data，适合 10MB+ 大文件）
+    - POST   /bulk-delete/      批量删除
+    - POST   /delete-all/       删除所有
+    - GET    /export/           导出下载
+    
+    智能过滤语法（filter 参数）：
+    - field="value"      模糊匹配（包含）
+    - field=="value"     精确匹配
+    - 多条件空格分隔     AND 关系
+    - || 或 or           OR 关系
+    
+    子类必须实现：
+    - service_class      Service 类
+    - parse_import_data  解析导入数据格式
+    - get_export_filename 导出文件名
+    """
+    
+    pagination_class = BasePagination
+    filter_backends = [filters.OrderingFilter]
+    ordering = ['-created_at']
+    
+    # 子类必须指定
+    service_class = None  # Service 类
+    
+    # 智能过滤字段映射，子类必须覆盖
+    FILTER_FIELD_MAPPING = {}
+    
+    # JSON 数组字段列表（使用 __contains 查询），子类可覆盖
+    JSON_ARRAY_FIELDS = []
+    
+    def get_queryset(self):
+        """支持智能过滤语法"""
+        queryset = super().get_queryset()
+        filter_query = self.request.query_params.get('filter', None)
+        if filter_query:
+            queryset = apply_filters(
+                queryset, 
+                filter_query, 
+                self.FILTER_FIELD_MAPPING,
+                json_array_fields=getattr(self, 'JSON_ARRAY_FIELDS', [])
+            )
+        return queryset
+    
+    def get_service(self):
+        """获取 Service 实例"""
+        if self.service_class is None:
+            raise NotImplementedError("子类必须指定 service_class")
+        return self.service_class()
+    
+    def parse_import_data(self, json_data: dict) -> list:
+        """
+        解析导入数据，子类必须实现
+        
+        Args:
+            json_data: 解析后的 JSON 数据
+            
+        Returns:
+            list: 指纹数据列表
+        """
+        raise NotImplementedError("子类必须实现 parse_import_data 方法")
+    
+    def get_export_filename(self) -> str:
+        """
+        导出文件名，子类必须实现
+        
+        Returns:
+            str: 文件名
+        """
+        raise NotImplementedError("子类必须实现 get_export_filename 方法")
+
+    @action(detail=False, methods=['post'])
+    def batch_create(self, request):
+        """
+        批量创建指纹规则
+        POST /api/engine/fingerprints/{type}/batch_create/
+        
+        请求格式：
+        {
+            "fingerprints": [
+                {"cms": "WordPress", "method": "keyword", ...},
+                ...
+            ]
+        }
+        
+        返回：
+        {
+            "created": 2,
+            "failed": 0
+        }
+        """
+        fingerprints = request.data.get('fingerprints', [])
+        if not fingerprints:
+            raise ValidationError('fingerprints 不能为空')
+        if not isinstance(fingerprints, list):
+            raise ValidationError('fingerprints 必须是数组')
+        
+        result = self.get_service().batch_create_fingerprints(fingerprints)
+        return Response(result, status=status.HTTP_201_CREATED)
+    
+    @action(detail=False, methods=['post'])
+    def import_file(self, request):
+        """
+        文件导入（适合大文件，10MB+）
+        POST /api/engine/fingerprints/{type}/import_file/
+        
+        请求格式：multipart/form-data
+        - file: JSON 文件
+        
+        返回：同 batch_create
+        """
+        file = request.FILES.get('file')
+        if not file:
+            raise ValidationError('缺少文件')
+        
+        try:
+            json_data = json.load(file)
+        except json.JSONDecodeError as e:
+            raise ValidationError(f'无效的 JSON 格式: {e}')
+        
+        fingerprints = self.parse_import_data(json_data)
+        if not fingerprints:
+            raise ValidationError('文件中没有有效的指纹数据')
+        
+        result = self.get_service().batch_create_fingerprints(fingerprints)
+        return Response(result, status=status.HTTP_201_CREATED)
+    
+    @action(detail=False, methods=['post'], url_path='bulk-delete')
+    def bulk_delete(self, request):
+        """
+        批量删除
+        POST /api/engine/fingerprints/{type}/bulk-delete/
+        
+        请求格式：{"ids": [1, 2, 3]}
+        返回：{"deleted": 3}
+        """
+        ids = request.data.get('ids', [])
+        if not ids:
+            raise ValidationError('ids 不能为空')
+        if not isinstance(ids, list):
+            raise ValidationError('ids 必须是数组')
+        
+        deleted_count = self.queryset.model.objects.filter(id__in=ids).delete()[0]
+        return Response({'deleted': deleted_count})
+    
+    @action(detail=False, methods=['post'], url_path='delete-all')
+    def delete_all(self, request):
+        """
+        删除所有指纹
+        POST /api/engine/fingerprints/{type}/delete-all/
+        
+        返回：{"deleted": 1000}
+        """
+        deleted_count = self.queryset.model.objects.all().delete()[0]
+        return Response({'deleted': deleted_count})
+    
+    @action(detail=False, methods=['get'])
+    def export(self, request):
+        """
+        导出指纹（前端下载）
+        GET /api/engine/fingerprints/{type}/export/
+        
+        返回：JSON 文件下载
+        """
+        data = self.get_service().get_export_data()
+        content = json.dumps(data, ensure_ascii=False, indent=2)
+        response = HttpResponse(content, content_type='application/json')
+        response['Content-Disposition'] = f'attachment; filename="{self.get_export_filename()}"'
+        return response
--- a/backend/apps/engine/views/fingerprints/ehole.py
+++ b/backend/apps/engine/views/fingerprints/ehole.py
@@ -0,0 +1,67 @@
+"""EHole 指纹管理 ViewSet"""
+
+from apps.common.pagination import BasePagination
+from apps.engine.models import EholeFingerprint
+from apps.engine.serializers.fingerprints import EholeFingerprintSerializer
+from apps.engine.services.fingerprints import EholeFingerprintService
+
+from .base import BaseFingerprintViewSet
+
+
+class EholeFingerprintViewSet(BaseFingerprintViewSet):
+    """EHole 指纹管理 ViewSet
+    
+    继承自 BaseFingerprintViewSet，提供以下 API：
+    
+    标准 CRUD（ModelViewSet）：
+    - GET    /                  列表查询（分页）
+    - POST   /                  创建单条
+    - GET    /{id}/             获取详情
+    - PUT    /{id}/             更新
+    - DELETE /{id}/             删除
+    
+    批量操作（继承自基类）：
+    - POST   /batch_create/     批量创建（JSON body）
+    - POST   /import_file/      文件导入（multipart/form-data）
+    - POST   /bulk-delete/      批量删除
+    - POST   /delete-all/       删除所有
+    - GET    /export/           导出下载
+    
+    智能过滤语法（filter 参数）：
+    - cms="word"         模糊匹配 cms 字段
+    - cms=="WordPress"   精确匹配
+    - type="CMS"         按类型筛选
+    - method="keyword"   按匹配方式筛选
+    - location="body"    按匹配位置筛选
+    """
+    
+    queryset = EholeFingerprint.objects.all()
+    serializer_class = EholeFingerprintSerializer
+    pagination_class = BasePagination
+    service_class = EholeFingerprintService
+    
+    # 排序配置
+    ordering_fields = ['created_at', 'cms']
+    ordering = ['-created_at']
+    
+    # EHole 过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'cms': 'cms',
+        'method': 'method',
+        'location': 'location',
+        'type': 'type',
+        'isImportant': 'is_important',
+    }
+    
+    def parse_import_data(self, json_data: dict) -> list:
+        """
+        解析 EHole JSON 格式的导入数据
+        
+        输入格式：{"fingerprint": [...]}
+        返回：指纹列表
+        """
+        return json_data.get('fingerprint', [])
+    
+    def get_export_filename(self) -> str:
+        """导出文件名"""
+        return 'ehole.json'
--- a/backend/apps/engine/views/fingerprints/goby.py
+++ b/backend/apps/engine/views/fingerprints/goby.py
@@ -0,0 +1,65 @@
+"""Goby 指纹管理 ViewSet"""
+
+from apps.common.pagination import BasePagination
+from apps.engine.models import GobyFingerprint
+from apps.engine.serializers.fingerprints import GobyFingerprintSerializer
+from apps.engine.services.fingerprints import GobyFingerprintService
+
+from .base import BaseFingerprintViewSet
+
+
+class GobyFingerprintViewSet(BaseFingerprintViewSet):
+    """Goby 指纹管理 ViewSet
+    
+    继承自 BaseFingerprintViewSet，提供以下 API：
+    
+    标准 CRUD（ModelViewSet）：
+    - GET    /                  列表查询（分页）
+    - POST   /                  创建单条
+    - GET    /{id}/             获取详情
+    - PUT    /{id}/             更新
+    - DELETE /{id}/             删除
+    
+    批量操作（继承自基类）：
+    - POST   /batch_create/     批量创建（JSON body）
+    - POST   /import_file/      文件导入（multipart/form-data）
+    - POST   /bulk-delete/      批量删除
+    - POST   /delete-all/       删除所有
+    - GET    /export/           导出下载
+    
+    智能过滤语法（filter 参数）：
+    - name="word"        模糊匹配 name 字段
+    - name=="ProductName" 精确匹配
+    """
+    
+    queryset = GobyFingerprint.objects.all()
+    serializer_class = GobyFingerprintSerializer
+    pagination_class = BasePagination
+    service_class = GobyFingerprintService
+    
+    # 排序配置
+    ordering_fields = ['created_at', 'name']
+    ordering = ['-created_at']
+    
+    # Goby 过滤字段映射
+    FILTER_FIELD_MAPPING = {
+        'name': 'name',
+        'logic': 'logic',
+    }
+    
+    def parse_import_data(self, json_data) -> list:
+        """
+        解析 Goby JSON 格式的导入数据
+        
+        Goby 格式是数组格式：[{...}, {...}, ...]
+        
+        输入格式：[{"name": "...", "logic": "...", "rule": [...]}, ...]
+        返回：指纹列表
+        """
+        if isinstance(json_data, list):
+            return json_data
+        return []
+    
+    def get_export_filename(self) -> str:
+        """导出文件名"""
+        return 'goby.json'
--- a/backend/apps/engine/views/fingerprints/wappalyzer.py
+++ b/backend/apps/engine/views/fingerprints/wappalyzer.py
@@ -0,0 +1,75 @@
+"""Wappalyzer 指纹管理 ViewSet"""
+
+from apps.common.pagination import BasePagination
+from apps.engine.models import WappalyzerFingerprint
+from apps.engine.serializers.fingerprints import WappalyzerFingerprintSerializer
+from apps.engine.services.fingerprints import WappalyzerFingerprintService
+
+from .base import BaseFingerprintViewSet
+
+
+class WappalyzerFingerprintViewSet(BaseFingerprintViewSet):
+    """Wappalyzer 指纹管理 ViewSet
+    
+    继承自 BaseFingerprintViewSet，提供以下 API：
+    
+    标准 CRUD（ModelViewSet）：
+    - GET    /                  列表查询（分页）
+    - POST   /                  创建单条
+    - GET    /{id}/             获取详情
+    - PUT    /{id}/             更新
+    - DELETE /{id}/             删除
+    
+    批量操作（继承自基类）：
+    - POST   /batch_create/     批量创建（JSON body）
+    - POST   /import_file/      文件导入（multipart/form-data）
+    - POST   /bulk-delete/      批量删除
+    - POST   /delete-all/       删除所有
+    - GET    /export/           导出下载
+    
+    智能过滤语法（filter 参数）：
+    - name="word"        模糊匹配 name 字段
+    - name=="AppName"    精确匹配
+    """
+    
+    queryset = WappalyzerFingerprint.objects.all()
+    serializer_class = WappalyzerFingerprintSerializer
+    pagination_class = BasePagination
+    service_class = WappalyzerFingerprintService
+    
+    # 排序配置
+    ordering_fields = ['created_at', 'name']
+    ordering = ['-created_at']
+    
+    # Wappalyzer 过滤字段映射
+    # 注意：implies 是 JSON 数组字段，使用 __contains 查询
+    FILTER_FIELD_MAPPING = {
+        'name': 'name',
+        'description': 'description',
+        'website': 'website',
+        'cpe': 'cpe',
+        'implies': 'implies',  # JSON 数组字段
+    }
+    
+    # JSON 数组字段列表（使用 __contains 查询）
+    JSON_ARRAY_FIELDS = ['implies']
+    
+    def parse_import_data(self, json_data: dict) -> list:
+        """
+        解析 Wappalyzer JSON 格式的导入数据
+        
+        Wappalyzer 格式是 apps 对象格式：{"apps": {"AppName": {...}, ...}}
+        
+        输入格式：{"apps": {"1C-Bitrix": {"cats": [...], ...}, ...}}
+        返回：指纹列表（每个 app 转换为带 name 字段的 dict）
+        """
+        apps = json_data.get('apps', {})
+        fingerprints = []
+        for name, data in apps.items():
+            item = {'name': name, **data}
+            fingerprints.append(item)
+        return fingerprints
+    
+    def get_export_filename(self) -> str:
+        """导出文件名"""
+        return 'wappalyzer.json'
--- a/backend/apps/engine/views/nuclei_template_repo_views.py
+++ b/backend/apps/engine/views/nuclei_template_repo_views.py
@@ -116,7 +116,7 @@ class NucleiTemplateRepoViewSet(viewsets.ModelViewSet):
            return Response({"message": str(exc)}, status=status.HTTP_400_BAD_REQUEST)
        except Exception as exc:  # noqa: BLE001
            logger.error("刷新 Nuclei 模板仓库失败: %s", exc, exc_info=True)
-            return Response({"message": "刷新仓库失败"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+            return Response({"message": f"刷新仓库失败: {exc}"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)

        return Response({"message": "刷新成功", "result": result}, status=status.HTTP_200_OK)

--- a/backend/apps/engine/views/worker_views.py
+++ b/backend/apps/engine/views/worker_views.py
@@ -118,8 +118,36 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
    
    @action(detail=True, methods=['post'])
    def heartbeat(self, request, pk=None):
-        """接收心跳上报（写 Redis，首次心跳更新部署状态）"""
+        """
+        接收心跳上报（写 Redis，首次心跳更新部署状态，检查版本）
+        
+        请求体:
+        {
+            "cpu_percent": 50.0,
+            "memory_percent": 60.0,
+            "version": "v1.0.9"
+        }
+        
+        返回:
+        {
+            "status": "ok",
+            "need_update": true/false,
+            "server_version": "v1.0.19"
+        }
+        
+        状态流转:
+        ┌─────────────────────────────────────────────────────────────────────┐
+        │ 场景                        │ 状态变化                              │
+        ├─────────────────────────────┼───────────────────────────────────────┤
+        │ 首次心跳                    │ pending/deploying → online            │
+        │ 远程 Worker 版本不匹配      │ online → updating → (更新成功) online │
+        │ 远程 Worker 更新失败        │ updating → outdated                   │
+        │ 本地 Worker 版本不匹配      │ online → outdated (需手动 update.sh)  │
+        │ 版本匹配                    │ updating/outdated → online            │
+        └─────────────────────────────┴───────────────────────────────────────┘
+        """
        from apps.engine.services.worker_load_service import worker_load_service
+        from django.conf import settings
        
        worker = self.get_object()
        info = request.data if request.data else {}
@@ -134,7 +162,122 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
            worker.status = 'online'
            worker.save(update_fields=['status'])
        
-        return Response({'status': 'ok'})
+        # 3. 版本检查：比较 agent 版本与 server 版本
+        agent_version = info.get('version', '')
+        server_version = settings.IMAGE_TAG  # Server 当前版本
+        need_update = False
+        
+        if agent_version and agent_version != 'unknown':
+            # 版本不匹配时通知 agent 更新
+            need_update = agent_version != server_version
+            if need_update:
+                logger.info(
+                    f"Worker {worker.name} 版本不匹配: agent={agent_version}, server={server_version}"
+                )
+                
+                # 远程 Worker：服务端主动通过 SSH 触发更新
+                if not worker.is_local and worker.ip_address:
+                    self._trigger_remote_agent_update(worker, server_version)
+                else:
+                    # 本地 Worker 版本不匹配：标记为 outdated
+                    # 需要用户手动执行 update.sh 更新
+                    if worker.status != 'outdated':
+                        worker.status = 'outdated'
+                        worker.save(update_fields=['status'])
+            else:
+                # 版本匹配，确保状态为 online
+                if worker.status in ('updating', 'outdated'):
+                    worker.status = 'online'
+                    worker.save(update_fields=['status'])
+        
+        return Response({
+            'status': 'ok',
+            'need_update': need_update,
+            'server_version': server_version
+        })
+    
+    def _trigger_remote_agent_update(self, worker, target_version: str):
+        """
+        通过 SSH 触发远程 agent 更新（后台执行，不阻塞心跳响应）
+        
+        使用 Redis 锁防止重复触发（同一 worker 60秒内只触发一次）
+        """
+        import redis
+        from django.conf import settings as django_settings
+        
+        redis_url = f"redis://{django_settings.REDIS_HOST}:{django_settings.REDIS_PORT}/{django_settings.REDIS_DB}"
+        redis_client = redis.from_url(redis_url)
+        lock_key = f"agent_update_lock:{worker.id}"
+        
+        # 尝试获取锁（60秒过期，防止重复触发）
+        if not redis_client.set(lock_key, "1", nx=True, ex=60):
+            logger.debug(f"Worker {worker.name} 更新已在进行中，跳过")
+            return
+        
+        # 获取锁成功，设置状态为 updating
+        self._set_worker_status(worker.id, 'updating')
+        
+        # 提取数据避免后台线程访问 ORM
+        worker_id = worker.id
+        worker_name = worker.name
+        ip_address = worker.ip_address
+        ssh_port = worker.ssh_port
+        username = worker.username
+        password = worker.password
+        
+        def _async_update():
+            try:
+                logger.info(f"开始远程更新 Worker {worker_name} 到 {target_version}")
+                
+                # 构建更新命令：拉取新镜像并重启 agent
+                docker_user = getattr(django_settings, 'DOCKER_USER', 'yyhuni')
+                update_cmd = f'''
+                    docker pull {docker_user}/xingrin-agent:{target_version} && \
+                    docker stop xingrin-agent 2>/dev/null || true && \
+                    docker rm xingrin-agent 2>/dev/null || true && \
+                    docker run -d --pull=always \
+                        --name xingrin-agent \
+                        --restart always \
+                        -e HEARTBEAT_API_URL="https://{django_settings.PUBLIC_HOST}:{getattr(django_settings, 'PUBLIC_PORT', '8083')}" \
+                        -e WORKER_ID="{worker_id}" \
+                        -e IMAGE_TAG="{target_version}" \
+                        -v /proc:/host/proc:ro \
+                        {docker_user}/xingrin-agent:{target_version}
+                '''
+                
+                success, message = self.worker_service.execute_remote_command(
+                    ip_address=ip_address,
+                    ssh_port=ssh_port,
+                    username=username,
+                    password=password,
+                    command=update_cmd
+                )
+                
+                if success:
+                    logger.info(f"Worker {worker_name} 远程更新成功")
+                    # 更新成功后，新 agent 心跳会自动把状态改回 online
+                else:
+                    logger.warning(f"Worker {worker_name} 远程更新失败: {message}")
+                    # 更新失败，标记为 outdated
+                    self._set_worker_status(worker_id, 'outdated')
+                    
+            except Exception as e:
+                logger.error(f"Worker {worker_name} 远程更新异常: {e}")
+                self._set_worker_status(worker_id, 'outdated')
+            finally:
+                # 释放锁
+                redis_client.delete(lock_key)
+        
+        # 后台执行，不阻塞心跳响应
+        threading.Thread(target=_async_update, daemon=True).start()
+    
+    def _set_worker_status(self, worker_id: int, status: str):
+        """更新 Worker 状态（用于后台线程）"""
+        try:
+            from apps.engine.models import WorkerNode
+            WorkerNode.objects.filter(id=worker_id).update(status=status)
+        except Exception as e:
+            logger.error(f"更新 Worker {worker_id} 状态失败: {e}")
    
    @action(detail=False, methods=['post'])
    def register(self, request):
@@ -177,75 +320,16 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
            'created': created
        })
    
-    def _get_client_ip(self, request) -> str:
-        """获取客户端真实 IP"""
-        x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR')
-        if x_forwarded_for:
-            return x_forwarded_for.split(',')[0].strip()
-        return request.META.get('REMOTE_ADDR', '')
-    
-    def _is_local_request(self, client_ip: str) -> bool:
-        """
-        判断是否为本地请求（Docker 网络内部）
-        
-        本地请求特征：
-        - 来自 Docker 网络内部（172.x.x.x）
-        - 来自 localhost（127.0.0.1）
-        """
-        if not client_ip:
-            return True  # 无法获取 IP 时默认为本地
-        
-        # Docker 默认网络段
-        if client_ip.startswith('172.') or client_ip.startswith('10.'):
-            return True
-        
-        # localhost
-        if client_ip in ('127.0.0.1', '::1', 'localhost'):
-            return True
-        
-        return False
-    
    @action(detail=False, methods=['get'])
    def config(self, request):
        """
        获取任务容器配置（配置中心 API）
        
        Worker 启动时调用此接口获取完整配置，实现配置中心化管理。
-        Worker 只需知道 SERVER_URL，其他配置由此 API 动态返回。
+        Worker 通过 IS_LOCAL 环境变量声明身份，请求时带上 ?is_local=true/false 参数。
        
-        ┌─────────────────────────────────────────────────────────────┐
-        │                    配置分发流程                              │
-        ├─────────────────────────────────────────────────────────────┤
-        │                                                             │
-        │   Worker 启动                                               │
-        │       │                                                     │
-        │       ▼                                                     │
-        │   GET /api/workers/config/                                  │
-        │       │                                                     │
-        │       ▼                                                     │
-        │   ┌─────────────────────┐                                   │
-        │   │ _get_client_ip()    │ ← 获取请求来源 IP                  │
-        │   │ (X-Forwarded-For    │   (支持 Nginx 代理场景)            │
-        │   │  或 REMOTE_ADDR)    │                                   │
-        │   └─────────┬───────────┘                                   │
-        │             │                                               │
-        │             ▼                                               │
-        │   ┌─────────────────────┐                                   │
-        │   │ _is_local_request() │ ← 判断是否为 Docker 网络内部请求   │
-        │   │ 172.x.x.x / 10.x.x.x│   (Docker 默认网段)               │
-        │   │ 127.0.0.1 / ::1     │   (localhost)                     │
-        │   └─────────┬───────────┘                                   │
-        │             │                                               │
-        │     ┌───────┴───────┐                                       │
-        │     ▼               ▼                                       │
-        │  本地 Worker     远程 Worker                                 │
-        │  (Docker内)      (公网访问)                                  │
-        │     │               │                                       │
-        │     ▼               ▼                                       │
-        │  db: postgres    db: PUBLIC_HOST                            │
-        │  redis: redis    redis: PUBLIC_HOST:6379                    │
-        │                                                             │
-        └─────────────────────────────────────────────────────────────┘
+        请求参数:
+            is_local: true/false - Worker 是否为本地节点（Docker 网络内）
        
        返回:
        {
@@ -253,19 +337,29 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
            "redisUrl": "...",
            "paths": {"results": "...", "logs": "..."}
        }
+        
+        配置逻辑:
+            - 本地 Worker (is_local=true): db_host=postgres, redis=redis:6379
+            - 远程 Worker (is_local=false): db_host=PUBLIC_HOST, redis=PUBLIC_HOST:6379
        """
        from django.conf import settings
+        import logging
+        logger = logging.getLogger(__name__)
        
-        # 判断请求来源：本地 Worker 还是远程 Worker
-        # 本地 Worker 在 Docker 网络内，可以直接访问 postgres 服务
-        # 远程 Worker 需要通过公网 IP 访问
-        client_ip = self._get_client_ip(request)
-        is_local_worker = self._is_local_request(client_ip)
+        # 从请求参数获取 Worker 身份（由 Worker 自己声明）
+        # 不再依赖 IP 判断，避免不同网络环境下的兼容性问题
+        is_local_param = request.query_params.get('is_local', '').lower()
+        is_local_worker = is_local_param == 'true'
        
        # 根据请求来源返回不同的数据库地址
        db_host = settings.DATABASES['default']['HOST']
        _is_internal_db = db_host in ('postgres', 'localhost', '127.0.0.1')
        
+        logger.info(
+            "Worker 配置请求 - is_local_param: %s, is_local_worker: %s, db_host: %s, is_internal_db: %s",
+            is_local_param, is_local_worker, db_host, _is_internal_db
+        )
+        
        if _is_internal_db:
            # 本地数据库场景
            if is_local_worker:
@@ -274,13 +368,18 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
                worker_redis_url = 'redis://redis:6379/0'
            else:
                # 远程 Worker：通过公网 IP 访问
-                worker_db_host = settings.PUBLIC_HOST
-                worker_redis_url = f'redis://{settings.PUBLIC_HOST}:6379/0'
+                public_host = settings.PUBLIC_HOST
+                if public_host in ('server', 'localhost', '127.0.0.1'):
+                    logger.warning("远程 Worker 请求配置，但 PUBLIC_HOST=%s 不是有效的公网地址", public_host)
+                worker_db_host = public_host
+                worker_redis_url = f'redis://{public_host}:6379/0'
        else:
            # 远程数据库场景：所有 Worker 都用 DB_HOST
            worker_db_host = db_host
            worker_redis_url = getattr(settings, 'WORKER_REDIS_URL', 'redis://redis:6379/0')
        
+        logger.info("返回 Worker 配置 - db_host: %s, redis_url: %s", worker_db_host, worker_redis_url)
+        
        return Response({
            'db': {
                'host': worker_db_host,
@@ -291,12 +390,14 @@ class WorkerNodeViewSet(viewsets.ModelViewSet):
            },
            'redisUrl': worker_redis_url,
            'paths': {
-                'results': getattr(settings, 'CONTAINER_RESULTS_MOUNT', '/app/backend/results'),
-                'logs': getattr(settings, 'CONTAINER_LOGS_MOUNT', '/app/backend/logs'),
+                'results': getattr(settings, 'CONTAINER_RESULTS_MOUNT', '/opt/xingrin/results'),
+                'logs': getattr(settings, 'CONTAINER_LOGS_MOUNT', '/opt/xingrin/logs'),
            },
            'logging': {
                'level': os.getenv('LOG_LEVEL', 'INFO'),
                'enableCommandLogging': os.getenv('ENABLE_COMMAND_LOGGING', 'true').lower() == 'true',
            },
-            'debug': settings.DEBUG
+            'debug': settings.DEBUG,
+            # Git 加速配置（用于 Git clone 加速，如 Nuclei 模板仓库）
+            'gitMirror': os.getenv('GIT_MIRROR', ''),
        })
--- a/backend/apps/scan/configs/command_templates.py
+++ b/backend/apps/scan/configs/command_templates.py
@@ -16,7 +16,7 @@ SUBDOMAIN_DISCOVERY_COMMANDS = {
        # 默认使用所有数据源（更全面，略慢），并始终开启递归
        # -all       使用所有数据源
        # -recursive 对支持递归的源启用递归枚举（默认开启）
-        'base': 'subfinder -d {domain} -all -recursive -o {output_file} -silent',
+        'base': "subfinder -d {domain} -all -recursive -o '{output_file}' -silent",
        'optional': {
            'threads': '-t {threads}',              # 控制并发 goroutine 数
        }
@@ -25,31 +25,31 @@ SUBDOMAIN_DISCOVERY_COMMANDS = {
    'amass_passive': {
        # 先执行被动枚举，将结果写入 amass 内部数据库，然后从数据库中导出纯域名（names）到 output_file
        # -silent 禁用进度条和其他输出
-        'base': 'amass enum -passive -silent -d {domain} && amass subs -names -d {domain} > {output_file}'
+        'base': "amass enum -passive -silent -d {domain} && amass subs -names -d {domain} > '{output_file}'"
    },
    
    'amass_active': {
        # 先执行主动枚举 + 爆破，将结果写入 amass 内部数据库，然后从数据库中导出纯域名（names）到 output_file
        # -silent 禁用进度条和其他输出
-        'base': 'amass enum -active -silent -d {domain} -brute && amass subs -names -d {domain} > {output_file}'
+        'base': "amass enum -active -silent -d {domain} -brute && amass subs -names -d {domain} > '{output_file}'"
    },
    
    'sublist3r': {
-        'base': 'python3 {scan_tools_base}/Sublist3r/sublist3r.py -d {domain} -o {output_file}',
+        'base': "python3 '{scan_tools_base}/Sublist3r/sublist3r.py' -d {domain} -o '{output_file}'",
        'optional': {
            'threads': '-t {threads}'
        }
    },
    
    'assetfinder': {
-        'base': 'assetfinder --subs-only {domain} > {output_file}',
+        'base': "assetfinder --subs-only {domain} > '{output_file}'",
    },
    
    # === 主动字典爆破 ===
    'subdomain_bruteforce': {
        # 使用字典对目标域名进行 DNS 爆破
        # -d 目标域名，-w 字典文件，-o 输出文件
-        'base': 'puredns bruteforce {wordlist} {domain} -r /app/backend/resources/resolvers.txt --write {output_file} --quiet',
+        'base': "puredns bruteforce '{wordlist}' {domain} -r /app/backend/resources/resolvers.txt --write '{output_file}' --quiet",
        'optional': {},
    },
    
@@ -57,7 +57,7 @@ SUBDOMAIN_DISCOVERY_COMMANDS = {
    'subdomain_resolve': {
        # 验证子域名是否能解析（存活验证）
        # 输入文件为候选子域列表，输出为存活子域列表
-        'base': 'puredns resolve {input_file} -r /app/backend/resources/resolvers.txt --write {output_file} --wildcard-tests 50 --wildcard-batch 1000000 --quiet',
+        'base': "puredns resolve '{input_file}' -r /app/backend/resources/resolvers.txt --write '{output_file}' --wildcard-tests 50 --wildcard-batch 1000000 --quiet",
        'optional': {},
    },
    
@@ -65,7 +65,7 @@ SUBDOMAIN_DISCOVERY_COMMANDS = {
    'subdomain_permutation_resolve': {
        # 流式管道：dnsgen 生成变异域名 | puredns resolve 验证存活
        # 不落盘中间文件，避免内存爆炸；不做通配符过滤
-        'base': 'cat {input_file} | dnsgen - | puredns resolve -r /app/backend/resources/resolvers.txt --write {output_file} --wildcard-tests 50 --wildcard-batch 1000000 --quiet',
+        'base': "cat '{input_file}' | dnsgen - | puredns resolve -r /app/backend/resources/resolvers.txt --write '{output_file}' --wildcard-tests 50 --wildcard-batch 1000000 --quiet",
        'optional': {},
    },
 }
@@ -75,7 +75,7 @@ SUBDOMAIN_DISCOVERY_COMMANDS = {

 PORT_SCAN_COMMANDS = {
    'naabu_active': {
-        'base': 'naabu -exclude-cdn -warm-up-time 5 -verify -list {domains_file} -json -silent',
+        'base': "naabu -exclude-cdn -warm-up-time 5 -verify -list '{domains_file}' -json -silent",
        'optional': {
            'threads': '-c {threads}',
            'ports': '-p {ports}',
@@ -85,7 +85,7 @@ PORT_SCAN_COMMANDS = {
    },
    
    'naabu_passive': {
-        'base': 'naabu -list {domains_file} -passive -json -silent'
+        'base': "naabu -list '{domains_file}' -passive -json -silent"
    },
 }

@@ -95,7 +95,7 @@ PORT_SCAN_COMMANDS = {
 SITE_SCAN_COMMANDS = {
    'httpx': {
        'base': (
-            'httpx -l {url_file} '
+            "'{scan_tools_base}/httpx' -l '{url_file}' "
            '-status-code -content-type -content-length '
            '-location -title -server -body-preview '
            '-tech-detect -cdn -vhost '
@@ -115,7 +115,7 @@ SITE_SCAN_COMMANDS = {

 DIRECTORY_SCAN_COMMANDS = {
    'ffuf': {
-        'base': 'ffuf -u {url}/FUZZ -se -ac -sf -json -w {wordlist}',  
+        'base': "ffuf -u '{url}FUZZ' -se -ac -sf -json -w '{wordlist}'",  
        'optional': {
            'delay': '-p {delay}',
            'threads': '-t {threads}',
@@ -131,13 +131,13 @@ DIRECTORY_SCAN_COMMANDS = {

 URL_FETCH_COMMANDS = {
    'waymore': {
-        'base': 'waymore -i {domain_name} -mode U -oU {output_file}',
+        'base': "waymore -i {domain_name} -mode U -oU '{output_file}'",
        'input_type': 'domain_name'
    },
    
    'katana': {
        'base': (
-            'katana -list {sites_file} -o {output_file} '
+            "katana -list '{sites_file}' -o '{output_file}' "
            '-jc '                   # 开启 JavaScript 爬取 + 自动解析 .js 文件里的所有端点（最重要）
            '-xhr '                  # 额外从 JS 中提取 XHR/Fetch 请求的 API 路径（再多挖 10-20% 隐藏接口）
            '-kf all '               # 在每个目录下自动 fuzz 所有已知敏感文件（.env、.git、backup、config、ds_store 等 5000+ 条）
@@ -157,7 +157,7 @@ URL_FETCH_COMMANDS = {
    },
    
    'uro': {
-        'base': 'uro -i {input_file} -o {output_file}',
+        'base': "uro -i '{input_file}' -o '{output_file}'",
        'optional': {
            'whitelist': '-w {whitelist}',      # 只保留指定扩展名的 URL（空格分隔）
            'blacklist': '-b {blacklist}',      # 排除指定扩展名的 URL（空格分隔）
@@ -167,7 +167,7 @@ URL_FETCH_COMMANDS = {
    
    'httpx': {
        'base': (
-            'httpx -l {url_file} '
+            "'{scan_tools_base}/httpx' -l '{url_file}' "
            '-status-code -content-type -content-length '
            '-location -title -server -body-preview '
            '-tech-detect -cdn -vhost '
@@ -187,7 +187,7 @@ VULN_SCAN_COMMANDS = {
        'base': (
            'dalfox --silence --no-color --no-spinner '
            '--skip-bav '
-            'file {endpoints_file} '
+            "file '{endpoints_file}' "
            '--waf-evasion '
            '--format json'
        ),
@@ -205,11 +205,11 @@ VULN_SCAN_COMMANDS = {
    },
    'nuclei': {
        # nuclei 漏洞扫描
-        # -j: JSON 输出
+        # -j: JSON 输出（每行一条完整 JSON）
        # -silent: 静默模式
        # -l: 输入 URL 列表文件
        # -t: 模板目录路径（支持多个仓库，多次 -t 由 template_args 直接拼接）
-        'base': 'nuclei -j -silent -l {endpoints_file} {template_args}',
+        'base': "nuclei -j -silent -l '{endpoints_file}' {template_args}",
        'optional': {
            'concurrency': '-c {concurrency}',           # 并发数（默认 25）
            'rate_limit': '-rl {rate_limit}',            # 每秒请求数限制
@@ -225,12 +225,32 @@ VULN_SCAN_COMMANDS = {
 }


+# ==================== 指纹识别 ====================
+
+FINGERPRINT_DETECT_COMMANDS = {
+    'xingfinger': {
+        # 流式输出模式（不使用 -o，输出到 stdout）
+        # -l: URL 列表文件输入
+        # -s: 静默模式，只输出命中结果
+        # --json: JSON 格式输出（每行一条）
+        'base': "xingfinger -l '{urls_file}' -s --json",
+        'optional': {
+            # 自定义指纹库路径
+            'ehole': '--ehole {ehole}',
+            'goby': '--goby {goby}',
+            'wappalyzer': '--wappalyzer {wappalyzer}',
+        }
+    },
+}
+
+
 # ==================== 工具映射 ====================

 COMMAND_TEMPLATES = {
    'subdomain_discovery': SUBDOMAIN_DISCOVERY_COMMANDS,
    'port_scan': PORT_SCAN_COMMANDS,
    'site_scan': SITE_SCAN_COMMANDS,
+    'fingerprint_detect': FINGERPRINT_DETECT_COMMANDS,
    'directory_scan': DIRECTORY_SCAN_COMMANDS,
    'url_fetch': URL_FETCH_COMMANDS,
    'vuln_scan': VULN_SCAN_COMMANDS,
@@ -242,7 +262,7 @@ COMMAND_TEMPLATES = {
 EXECUTION_STAGES = [
    {
        'mode': 'sequential',
-        'flows': ['subdomain_discovery', 'port_scan', 'site_scan']
+        'flows': ['subdomain_discovery', 'port_scan', 'site_scan', 'fingerprint_detect']
    },
    {
        'mode': 'parallel',
--- a/backend/apps/scan/configs/engine_config_example.yaml
+++ b/backend/apps/scan/configs/engine_config_example.yaml
@@ -1,224 +1,169 @@
 # 引擎配置
 #
-# ==================== 参数命名规范 ====================
-# 所有参数统一用中划线，如 rate-limit, request-timeout, wordlist-name
-# - 贴近 CLI 参数风格，用户更直观
-# - 系统会自动转换为下划线供代码使用
-#
-# ==================== 必需参数 ====================
-# - enabled: 是否启用工具（true/false）
-# - timeout: 超时时间（秒），工具执行超过此时间会被强制终止
-#
-# 使用方式：
-# - 在前端创建扫描引擎时，将此配置保存到数据库
-# - 执行扫描时，从数据库读取配置并传递给 Flow
-# - 取消注释可选参数即可启用
+# 参数命名：统一用中划线（如 rate-limit），系统自动转换为下划线
+# 必需参数：enabled（是否启用）
+# 可选参数：timeout（超时秒数，默认 auto 自动计算）

 # ==================== 子域名发现 ====================
 # 
-# 流程说明：
-#   Stage 1: 被动收集（并行） - 必选，至少启用一个工具
-#   Stage 2: 字典爆破（可选） - 使用字典暴力枚举子域名
-#   Stage 3: 变异生成 + 验证（可选） - 基于已发现域名生成变异，流式验证存活
-#   Stage 4: DNS 存活验证（可选） - 验证所有候选域名是否能解析
-#
-# 灵活组合：可以关闭 2/3/4 中的任意阶段，最终结果会根据实际执行的阶段动态决定
+# Stage 1: 被动收集（并行） - 必选，至少启用一个工具
+# Stage 2: 字典爆破（可选） - 使用字典暴力枚举子域名
+# Stage 3: 变异生成 + 验证（可选） - 基于已发现域名生成变异，流式验证存活
+# Stage 4: DNS 存活验证（可选） - 验证所有候选域名是否能解析
 #
 subdomain_discovery:
  # === Stage 1: 被动收集工具（并行执行）===
  passive_tools:
    subfinder:
      enabled: true
-      timeout: 7200      # 2小时
-      # threads: 10     # 可选，并发 goroutine 数
+      timeout: 3600      # 1小时
+      # threads: 10      # 并发 goroutine 数
      
    amass_passive:
      enabled: true
-      timeout: 7200      # 2小时
+      timeout: 3600
      
    amass_active:
      enabled: true      # 主动枚举 + 爆破
-      timeout: 7200
+      timeout: 3600
      
    sublist3r:
      enabled: true
-      timeout: 7200
-      # threads: 50     # 可选，线程数
+      timeout: 3600
+      # threads: 50      # 线程数
    
    assetfinder:
      enabled: true
-      timeout: 7200
+      timeout: 3600

  # === Stage 2: 主动字典爆破（可选）===
  bruteforce:
-    enabled: false                       # 是否启用字典爆破
+    enabled: false
    subdomain_bruteforce:
-      timeout: auto                      # 自动根据字典行数计算（后续代码中按行数 * 3 秒实现）
-      wordlist-name: subdomains-top1million-110000.txt    # 字典名称，对应「字典管理」中的 Wordlist.name
+      # timeout: auto                    # 自动根据字典行数计算
+      wordlist-name: subdomains-top1million-110000.txt    # 对应「字典管理」中的 Wordlist.name

-  # === Stage 3: 变异生成 + 存活验证（可选，流式管道避免 OOM）===
+  # === Stage 3: 变异生成 + 存活验证（可选）===
  permutation:
-    enabled: true                        # 是否启用变异生成
+    enabled: true
    subdomain_permutation_resolve:
-      timeout: 7200                      # 2小时（变异量大时需要更长时间）
+      timeout: 7200

  # === Stage 4: DNS 存活验证（可选）===
  resolve:
-    enabled: true                        # 是否启用存活验证
+    enabled: true
    subdomain_resolve:
-      timeout: auto                      # 自动根据候选子域数量计算（在 Flow 中按行数 * 3 秒实现）
-      
+      # timeout: auto    # 自动根据候选子域数量计算

 # ==================== 端口扫描 ====================
 port_scan:
  tools:
    naabu_active:
      enabled: true
-      timeout: auto     # 自动计算（根据：目标数 × 端口数 × 0.5秒）
-                        # 例如：100个域名 × 100个端口 × 0.5 = 5000秒
-                        #       10个域名 × 1000个端口 × 0.5 = 5000秒
-                        # 超时范围：60秒 ~ 2天（172800秒）
-                        # 或者手动指定：timeout: 3600
-      threads: 200        # 可选，并发连接数（默认 5）
-      # ports: 1-65535    # 可选，扫描端口范围（默认 1-65535）
-      top-ports: 100    # 可选，Scan for nmap top 100 ports（影响 timeout 计算）
-      rate: 10          # 可选，扫描速率（默认 10）
+      # timeout: auto    # 自动计算（目标数 × 端口数 × 0.5秒），范围 60秒 ~ 2天
+      threads: 200       # 并发连接数（默认 5）
+      # ports: 1-65535   # 扫描端口范围（默认 1-65535）
+      top-ports: 100     # 扫描 nmap top 100 端口
+      rate: 10           # 扫描速率（默认 10）
      
    naabu_passive:
      enabled: true
-      timeout: auto     # 自动计算（被动扫描通常较快，端口数默认为 100）
-      # 被动扫描，使用被动数据源，无需额外配置
+      # timeout: auto    # 被动扫描通常较快

 # ==================== 站点扫描 ====================
 site_scan:
  tools:
    httpx:
      enabled: true
-      timeout: auto         # 自动计算（根据URL数量，每个URL 1秒）
-                            # 或者手动指定：timeout: 3600
-      # threads: 50         # 可选，并发线程数（httpx 默认 50）
-      # rate-limit: 150     # 可选，每秒发送的请求数量（httpx 默认 150）
-      # request-timeout: 10  # 可选，单个请求的超时时间（httpx 默认 10）秒
-      # retries: 2          # 可选，请求失败重试次数
+      # timeout: auto        # 自动计算（每个 URL 约 1 秒）
+      # threads: 50          # 并发线程数（默认 50）
+      # rate-limit: 150      # 每秒请求数（默认 150）
+      # request-timeout: 10  # 单个请求超时秒数（默认 10）
+      # retries: 2           # 请求失败重试次数
+
+# ==================== 指纹识别 ====================
+# 在 site_scan 后串行执行，识别 WebSite 的技术栈
+fingerprint_detect:
+  tools:
+    xingfinger:
+      enabled: true
+      fingerprint-libs: [ehole, goby, wappalyzer]  # 启用的指纹库：ehole, goby, wappalyzer, fingers, fingerprinthub

 # ==================== 目录扫描 ====================
 directory_scan:
  tools:
    ffuf:
      enabled: true
-      timeout: auto                            # 自动计算超时时间（根据字典行数）
-                                               # 计算公式：字典行数 × 0.02秒/词
-                                               # 超时范围：60秒 ~ 7200秒（2小时）
-                                               # 也可以手动指定固定超时（如 300）
-      wordlist-name: dir_default.txt          # 字典名称（必需），对应「字典管理」中唯一的 Wordlist.name
-                                               # 安装时会自动初始化名为 dir_default.txt 的默认目录字典
-                                               # ffuf 会逐行读取字典文件，将每行作为 FUZZ 关键字的替换值
-      delay: 0.1-2.0                         # Seconds of delay between requests, or a range of random delay
-                                               # For example "0.1" or "0.1-2.0"
-      threads: 10                            # Number of concurrent threads (default: 40)
-      request-timeout: 10                    # HTTP request timeout in seconds (default: 10)
-      match-codes: 200,201,301,302,401,403   # Match HTTP status codes, comma separated
-      # rate: 0                                # Rate of requests per second (default: 0)
+      # timeout: auto                     # 自动计算（字典行数 × 0.02秒），范围 60秒 ~ 2小时
+      max-workers: 5                      # 并发扫描站点数（默认 5）
+      wordlist-name: dir_default.txt      # 对应「字典管理」中的 Wordlist.name
+      delay: 0.1-2.0                      # 请求间隔，支持范围随机（如 "0.1-2.0"）
+      threads: 10                         # 并发线程数（默认 40）
+      request-timeout: 10                 # HTTP 请求超时秒数（默认 10）
+      match-codes: 200,201,301,302,401,403  # 匹配的 HTTP 状态码
+      # rate: 0                           # 每秒请求数（默认 0 不限制）

 # ==================== URL 获取 ====================
 url_fetch:
  tools:
    waymore:
      enabled: true
-      timeout: 3600     # 工具级别总超时：固定 3600 秒（按域名 target_name 输入）
-                        # 如果目标较大或希望更快/更慢，可根据需要手动调整秒数
-                        # 输入类型：domain_name（域名级别，自动去重同域名站点）
+      timeout: 3600      # 固定 1 小时（按域名输入）
    
    katana:
      enabled: true
-      timeout: auto         # 工具级别总超时：自动计算（根据站点数量）
-                            # 或手动指定：timeout: 300
-      
-      # ========== 核心功能参数（已在命令中固定开启） ==========
-      # -jc: JavaScript 爬取 + 自动解析 .js 文件里的所有端点（最重要）
-      # -xhr: 从 JS 中提取 XHR/Fetch 请求的 API 路径（再多挖 10-20% 隐藏接口）
-      # -kf all: 自动 fuzz 所有已知敏感文件（.env、.git、backup、config 等 5000+ 条）
-      # -fs rdn: 智能过滤重复+噪声路径（分页、?id=1/2/3 全干掉，输出极干净）
-      
-      # ========== 可选参数（推荐配置） ==========
-      depth: 5              # 爬取最大深度（平衡深度与时间，默认 3，推荐 5）
-      threads: 10           # 全局并发数（极低并发最像真人，推荐 10）
-      rate-limit: 30        # 全局硬限速：每秒最多 30 个请求（WAF 几乎不报警）
-      random-delay: 1       # 每次请求之间随机延迟 0.5~1.5 秒（再加一层人性化）
-      retry: 2              # 失败请求自动重试 2 次（网络抖动不丢包）
-      request-timeout: 12   # 单请求超时 12 秒（防卡死，katana 参数名是 -timeout）
-      
-      # 输入类型：url（站点级别，每个站点单独爬取）
+      # timeout: auto    # 自动计算（根据站点数量）
+      depth: 5           # 爬取最大深度（默认 3）
+      threads: 10        # 全局并发数
+      rate-limit: 30     # 每秒最多请求数
+      random-delay: 1    # 请求间随机延迟秒数
+      retry: 2           # 失败重试次数
+      request-timeout: 12  # 单请求超时秒数
    
    uro:
      enabled: true
-      timeout: auto         # 自动计算（根据 URL 数量，每 100 个约 1 秒）
-                            # 范围：30 秒 ~ 300 秒
-                            # 或手动指定：timeout: 60
-      
-      # ========== 可选参数 ==========
-      # whitelist:          # 只保留指定扩展名的 URL（如：php,asp,jsp）
+      # timeout: auto    # 自动计算（每 100 个 URL 约 1 秒），范围 30 ~ 300 秒
+      # whitelist:       # 只保留指定扩展名
      #   - php
      #   - asp
-      # blacklist:            # 排除指定扩展名的 URL（静态资源）
+      # blacklist:       # 排除指定扩展名（静态资源）
      #   - jpg
-      #   - jpeg
      #   - png
-      #   - gif
-      #   - svg
-      #   - ico
      #   - css
-      #   - woff
-      #   - woff2
-      #   - ttf
-      #   - eot
-      #   - mp4
-      #   - mp3
-      #   - pdf
-      # filters:             # 额外的过滤规则，参考 uro 文档
-      #   - hasparams        # 只保留有参数的 URL
-      #   - hasext           # 只保留有扩展名的 URL
-      #   - vuln             # 只保留可能有漏洞的 URL
-      
-      # 用途：清理合并后的 URL 列表，去除冗余和无效 URL
-      # 输入类型：merged_file（合并后的 URL 文件）
-      # 输出：清理后的 URL 列表
+      # filters:         # 额外过滤规则
+      #   - hasparams    # 只保留有参数的 URL
+      #   - vuln         # 只保留可能有漏洞的 URL
    
    httpx:
      enabled: true
-      timeout: auto         # 自动计算（根据 URL 数量，每个 URL 1 秒）
-                            # 或手动指定：timeout: 600
-      # threads: 50         # 可选，并发线程数（httpx 默认 50）
-      # rate-limit: 150     # 可选，每秒发送的请求数量（httpx 默认 150）
-      # request-timeout: 10  # 可选，单个请求的超时时间（httpx 默认 10）秒
-      # retries: 2          # 可选，请求失败重试次数
-      
-      # 用途：判断 URL 存活，过滤无效 URL
-      # 输入类型：url_file（URL 列表文件）
-      # 输出：存活的 URL 及其响应信息（status, title, server, tech 等）
+      # timeout: auto        # 自动计算（每个 URL 约 1 秒）
+      # threads: 50          # 并发线程数（默认 50）
+      # rate-limit: 150      # 每秒请求数（默认 150）
+      # request-timeout: 10  # 单个请求超时秒数（默认 10）
+      # retries: 2           # 请求失败重试次数

 # ==================== 漏洞扫描 ====================
 vuln_scan:
  tools:
    dalfox_xss:
      enabled: true
-      timeout: auto          # 自动计算（根据 endpoints 行数 × 100 秒），或手动指定秒数如 timeout: 600
-      request-timeout: 10    # Dalfox 单个请求的超时时间，对应命令行 --timeout
+      # timeout: auto        # 自动计算（endpoints 行数 × 100 秒）
+      request-timeout: 10  # 单个请求超时秒数
      only-poc: r            # 只输出 POC 结果（r: 反射型）
-      ignore-return: "302,404,403"  # 忽略这些返回码
-      # blind-xss-server: xxx  # 可选：盲打 XSS 回连服务地址，需要时再开启
-      delay: 100             # Dalfox 扫描内部延迟参数
-      worker: 10             # Dalfox worker 数量
-      user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"  # 默认 UA，可根据需要修改
+      ignore-return: "302,404,403"  # 忽略的返回码
+      delay: 50            # 请求间隔（毫秒）
+      worker: 30             # worker 数量
+      user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+      # blind-xss-server: xxx  # 盲打 XSS 回连服务地址

    nuclei:
      enabled: true
-      timeout: auto                        # 自动计算（根据 endpoints 行数），或手动指定秒数
-      template-repo-names:                 # 模板仓库列表（必填，数组写法），对应「Nuclei 模板」中的仓库名
-        - nuclei-templates                 # Worker 会自动同步到与 Server 一致的 commit 版本
-        # - nuclei-custom                  # 可追加自定义仓库，按顺序依次 -t 传入
-      concurrency: 25                      # 并发数（默认 25）
-      rate-limit: 150                      # 每秒请求数限制（默认 150）
-      request-timeout: 5                   # 单个请求超时秒数（默认 5）
-      severity: medium,high,critical       # 只扫描中高危，降低噪音（逗号分隔）
-      # tags: cve,rce                      # 可选：只使用指定标签的模板
+      # timeout: auto        # 自动计算（根据 endpoints 行数）
+      template-repo-names:   # 模板仓库列表，对应「Nuclei 模板」中的仓库名
+        - nuclei-templates
+        # - nuclei-custom    # 可追加自定义仓库
+      concurrency: 25        # 并发数（默认 25）
+      rate-limit: 150        # 每秒请求数限制（默认 150）
+      request-timeout: 5     # 单个请求超时秒数（默认 5）
+      severity: medium,high,critical  # 只扫描中高危
+      # tags: cve,rce        # 只使用指定标签的模板
--- a/backend/apps/scan/flows/init.py
+++ b/backend/apps/scan/flows/init.py
@@ -5,8 +5,10 @@

 from .initiate_scan_flow import initiate_scan_flow
 from .subdomain_discovery_flow import subdomain_discovery_flow
+from .fingerprint_detect_flow import fingerprint_detect_flow

 __all__ = [
    'initiate_scan_flow',
    'subdomain_discovery_flow',
+    'fingerprint_detect_flow',
 ]
--- a/backend/apps/scan/flows/directory_scan_flow.py
+++ b/backend/apps/scan/flows/directory_scan_flow.py
@@ -5,7 +5,7 @@

 架构：
 - Flow 负责编排多个原子 Task
- 支持串行执行扫描工具（流式处理）
+- 支持并发执行扫描工具（使用 ThreadPoolTaskRunner）
 - 每个 Task 可独立重试
 - 配置由 YAML 解析
 """
@@ -14,11 +14,15 @@
 from apps.common.prefect_django_setup import setup_django_for_prefect

 from prefect import flow
+from prefect.task_runners import ThreadPoolTaskRunner

+import hashlib
 import logging
 import os
 import subprocess
+from datetime import datetime
 from pathlib import Path
+from typing import List, Tuple

 from apps.scan.tasks.directory_scan import (
    export_sites_task,
@@ -33,6 +37,9 @@ from apps.scan.utils import config_parser, build_scan_command, ensure_wordlist_l

 logger = logging.getLogger(__name__)

+# 默认最大并发数
+DEFAULT_MAX_WORKERS = 5
+

 def calculate_directory_scan_timeout(
    tool_config: dict,
@@ -112,36 +119,37 @@ def calculate_directory_scan_timeout(
        return min_timeout


-def _setup_directory_scan_directory(scan_workspace_dir: str) -> Path:
+def _get_max_workers(tool_config: dict, default: int = DEFAULT_MAX_WORKERS) -> int:
    """
-    创建并验证目录扫描工作目录
+    从单个工具配置中获取 max_workers 参数
    
    Args:
-        scan_workspace_dir: 扫描工作空间目录
+        tool_config: 单个工具的配置字典，如 {'max_workers': 10, 'threads': 5, ...}
+        default: 默认值，默认为 5
        
    Returns:
-        Path: 目录扫描目录路径
-        
-    Raises:
-        RuntimeError: 目录创建或验证失败
+        int: max_workers 值
    """
-    directory_scan_dir = Path(scan_workspace_dir) / 'directory_scan'
-    directory_scan_dir.mkdir(parents=True, exist_ok=True)
+    if not isinstance(tool_config, dict):
+        return default
    
-    if not directory_scan_dir.is_dir():
-        raise RuntimeError(f"目录扫描目录创建失败: {directory_scan_dir}")
-    if not os.access(directory_scan_dir, os.W_OK):
-        raise RuntimeError(f"目录扫描目录不可写: {directory_scan_dir}")
-    
-    return directory_scan_dir
+    # 支持 max_workers 和 max-workers（YAML 中划线会被转换）
+    max_workers = tool_config.get('max_workers') or tool_config.get('max-workers')
+    if max_workers is not None and isinstance(max_workers, int) and max_workers > 0:
+        return max_workers
+    return default


-def _export_site_urls(target_id: int, directory_scan_dir: Path) -> tuple[str, int]:
+
+
+
+def _export_site_urls(target_id: int, target_name: str, directory_scan_dir: Path) -> tuple[str, int]:
    """
-    导出目标下的所有站点 URL 到文件
+    导出目标下的所有站点 URL 到文件（支持懒加载）
    
    Args:
        target_id: 目标 ID
+        target_name: 目标名称（用于懒加载创建默认站点）
        directory_scan_dir: 目录扫描目录
        
    Returns:
@@ -185,7 +193,7 @@ def _run_scans_sequentially(
    target_name: str
 ) -> tuple[int, int, list]:
    """
-    串行执行目录扫描任务（支持多工具）
+    串行执行目录扫描任务（支持多工具）- 已废弃，保留用于兼容
    
    Args:
        enabled_tools: 启用的工具配置字典
@@ -333,6 +341,198 @@ def _run_scans_sequentially(
    return total_directories, processed_count, failed_sites


+def _generate_log_filename(tool_name: str, site_url: str, directory_scan_dir: Path) -> Path:
+    """
+    生成唯一的日志文件名
+    
+    使用 URL 的 hash 确保并发时不会冲突
+    
+    Args:
+        tool_name: 工具名称
+        site_url: 站点 URL
+        directory_scan_dir: 目录扫描目录
+        
+    Returns:
+        Path: 日志文件路径
+    """
+    url_hash = hashlib.md5(site_url.encode()).hexdigest()[:8]
+    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
+    return directory_scan_dir / f"{tool_name}_{url_hash}_{timestamp}.log"
+
+
+def _run_scans_concurrently(
+    enabled_tools: dict,
+    sites_file: str,
+    directory_scan_dir: Path,
+    scan_id: int,
+    target_id: int,
+    site_count: int,
+    target_name: str
+) -> Tuple[int, int, List[str]]:
+    """
+    并发执行目录扫描任务（使用 ThreadPoolTaskRunner）
+    
+    Args:
+        enabled_tools: 启用的工具配置字典
+        sites_file: 站点文件路径
+        directory_scan_dir: 目录扫描目录
+        scan_id: 扫描任务 ID
+        target_id: 目标 ID
+        site_count: 站点数量
+        target_name: 目标名称（用于错误日志）
+        
+    Returns:
+        tuple: (total_directories, processed_sites, failed_sites)
+    """
+    # 读取站点列表
+    sites: List[str] = []
+    with open(sites_file, 'r', encoding='utf-8') as f:
+        for line in f:
+            site_url = line.strip()
+            if site_url:
+                sites.append(site_url)
+    
+    if not sites:
+        logger.warning("站点列表为空")
+        return 0, 0, []
+    
+    logger.info(
+        "准备并发扫描 %d 个站点，使用工具: %s",
+        len(sites), ', '.join(enabled_tools.keys())
+    )
+    
+    total_directories = 0
+    processed_sites_count = 0
+    failed_sites: List[str] = []
+    
+    # 遍历每个工具
+    for tool_name, tool_config in enabled_tools.items():
+        # 每个工具独立获取 max_workers 配置
+        max_workers = _get_max_workers(tool_config)
+        
+        logger.info("="*60)
+        logger.info("使用工具: %s (并发模式, max_workers=%d)", tool_name, max_workers)
+        logger.info("="*60)
+
+        # 如果配置了 wordlist_name，则先确保本地存在对应的字典文件（含 hash 校验）
+        wordlist_name = tool_config.get('wordlist_name')
+        if wordlist_name:
+            try:
+                local_wordlist_path = ensure_wordlist_local(wordlist_name)
+                tool_config['wordlist'] = local_wordlist_path
+            except Exception as exc:
+                logger.error("为工具 %s 准备字典失败: %s", tool_name, exc)
+                # 当前工具无法执行，将所有站点视为失败，继续下一个工具
+                failed_sites.extend(sites)
+                continue
+        
+        # 计算超时时间（所有站点共用）
+        site_timeout = tool_config.get('timeout', 300)
+        if site_timeout == 'auto':
+            site_timeout = calculate_directory_scan_timeout(tool_config)
+            logger.info(f"✓ 工具 {tool_name} 动态计算 timeout: {site_timeout}秒")
+        
+        # 准备所有站点的扫描参数
+        scan_params_list = []
+        for idx, site_url in enumerate(sites, 1):
+            try:
+                command = build_scan_command(
+                    tool_name=tool_name,
+                    scan_type='directory_scan',
+                    command_params={'url': site_url},
+                    tool_config=tool_config
+                )
+                log_file = _generate_log_filename(tool_name, site_url, directory_scan_dir)
+                scan_params_list.append({
+                    'idx': idx,
+                    'site_url': site_url,
+                    'command': command,
+                    'log_file': str(log_file),
+                    'timeout': site_timeout
+                })
+            except Exception as e:
+                logger.error(
+                    "✗ [%d/%d] 构建 %s 命令失败: %s - 站点: %s",
+                    idx, len(sites), tool_name, e, site_url
+                )
+                failed_sites.append(site_url)
+        
+        if not scan_params_list:
+            logger.warning("没有有效的扫描任务")
+            continue
+        
+        # ============================================================
+        # 分批执行策略：控制实际并发的 ffuf 进程数
+        # ============================================================
+        total_tasks = len(scan_params_list)
+        logger.info("开始分批执行 %d 个扫描任务（每批 %d 个）...", total_tasks, max_workers)
+        
+        batch_num = 0
+        for batch_start in range(0, total_tasks, max_workers):
+            batch_end = min(batch_start + max_workers, total_tasks)
+            batch_params = scan_params_list[batch_start:batch_end]
+            batch_num += 1
+            
+            logger.info("执行第 %d 批任务（%d-%d/%d）...", batch_num, batch_start + 1, batch_end, total_tasks)
+            
+            # 提交当前批次的任务（非阻塞，立即返回 future）
+            futures = []
+            for params in batch_params:
+                future = run_and_stream_save_directories_task.submit(
+                    cmd=params['command'],
+                    tool_name=tool_name,
+                    scan_id=scan_id,
+                    target_id=target_id,
+                    site_url=params['site_url'],
+                    cwd=str(directory_scan_dir),
+                    shell=True,
+                    batch_size=1000,
+                    timeout=params['timeout'],
+                    log_file=params['log_file']
+                )
+                futures.append((params['idx'], params['site_url'], future))
+            
+            # 等待当前批次所有任务完成（阻塞，确保本批完成后再启动下一批）
+            for idx, site_url, future in futures:
+                try:
+                    result = future.result()  # 阻塞等待单个任务完成
+                    directories_found = result.get('created_directories', 0)
+                    total_directories += directories_found
+                    processed_sites_count += 1
+                    
+                    logger.info(
+                        "✓ [%d/%d] 站点扫描完成: %s - 发现 %d 个目录",
+                        idx, len(sites), site_url, directories_found
+                    )
+                    
+                except Exception as exc:
+                    failed_sites.append(site_url)
+                    if 'timeout' in str(exc).lower() or isinstance(exc, subprocess.TimeoutExpired):
+                        logger.warning(
+                            "⚠️ [%d/%d] 站点扫描超时: %s - 错误: %s",
+                            idx, len(sites), site_url, exc
+                        )
+                    else:
+                        logger.error(
+                            "✗ [%d/%d] 站点扫描失败: %s - 错误: %s",
+                            idx, len(sites), site_url, exc
+                        )
+    
+    # 输出汇总信息
+    if failed_sites:
+        logger.warning(
+            "部分站点扫描失败: %d/%d",
+            len(failed_sites), len(sites)
+        )
+    
+    logger.info(
+        "✓ 并发目录扫描执行完成 - 成功: %d/%d, 失败: %d, 总目录数: %d",
+        processed_sites_count, len(sites), len(failed_sites), total_directories
+    )
+    
+    return total_directories, processed_sites_count, failed_sites
+
+
@flow(
    name="directory_scan", 
    log_prints=True,
@@ -359,7 +559,7 @@ def directory_scan_flow(
        Step 0: 创建工作目录
        Step 1: 导出站点 URL 列表到文件（供扫描工具使用）
        Step 2: 验证工具配置
-        Step 3: 串行执行扫描工具并实时保存结果
+        Step 3: 并发执行扫描工具并实时保存结果（使用 ThreadPoolTaskRunner）
    
    ffuf 输出字段：
        - url: 发现的目录/文件 URL
@@ -418,10 +618,11 @@ def directory_scan_flow(
            raise ValueError("enabled_tools 不能为空")
        
        # Step 0: 创建工作目录
-        directory_scan_dir = _setup_directory_scan_directory(scan_workspace_dir)
+        from apps.scan.utils import setup_scan_directory
+        directory_scan_dir = setup_scan_directory(scan_workspace_dir, 'directory_scan')
        
-        # Step 1: 导出站点 URL
-        sites_file, site_count = _export_site_urls(target_id, directory_scan_dir)
+        # Step 1: 导出站点 URL（支持懒加载）
+        sites_file, site_count = _export_site_urls(target_id, target_name, directory_scan_dir)
        
        if site_count == 0:
            logger.warning("目标下没有站点，跳过目录扫描")
@@ -440,14 +641,15 @@ def directory_scan_flow(
        
        # Step 2: 工具配置信息
        logger.info("Step 2: 工具配置信息")
-        logger.info(
-            "✓ 启用工具: %s",
-            ', '.join(enabled_tools.keys())
-        )
+        tool_info = []
+        for tool_name, tool_config in enabled_tools.items():
+            mw = _get_max_workers(tool_config)
+            tool_info.append(f"{tool_name}(max_workers={mw})")
+        logger.info("✓ 启用工具: %s", ', '.join(tool_info))
        
-        # Step 3: 串行执行扫描工具并实时保存结果
-        logger.info("Step 3: 串行执行扫描工具并实时保存结果")
-        total_directories, processed_sites, failed_sites = _run_scans_sequentially(
+        # Step 3: 并发执行扫描工具并实时保存结果
+        logger.info("Step 3: 并发执行扫描工具并实时保存结果")
+        total_directories, processed_sites, failed_sites = _run_scans_concurrently(
            enabled_tools=enabled_tools,
            sites_file=sites_file,
            directory_scan_dir=directory_scan_dir,
--- a/backend/apps/scan/flows/fingerprint_detect_flow.py
+++ b/backend/apps/scan/flows/fingerprint_detect_flow.py
@@ -0,0 +1,380 @@
+"""
+指纹识别 Flow
+
+负责编排指纹识别的完整流程
+
+架构：
+- Flow 负责编排多个原子 Task
+- 在 site_scan 后串行执行
+- 使用 xingfinger 工具识别技术栈
+- 流式处理输出，批量更新数据库
+"""
+
+# Django 环境初始化（导入即生效）
+from apps.common.prefect_django_setup import setup_django_for_prefect
+
+import logging
+import os
+from datetime import datetime
+from pathlib import Path
+
+from prefect import flow
+
+from apps.scan.handlers.scan_flow_handlers import (
+    on_scan_flow_running,
+    on_scan_flow_completed,
+    on_scan_flow_failed,
+)
+from apps.scan.tasks.fingerprint_detect import (
+    export_urls_for_fingerprint_task,
+    run_xingfinger_and_stream_update_tech_task,
+)
+from apps.scan.utils import build_scan_command
+from apps.scan.utils.fingerprint_helpers import get_fingerprint_paths
+
+logger = logging.getLogger(__name__)
+
+
+def calculate_fingerprint_detect_timeout(
+    url_count: int,
+    base_per_url: float = 3.0,
+    min_timeout: int = 60
+) -> int:
+    """
+    根据 URL 数量计算超时时间
+    
+    公式：超时时间 = URL 数量 × 每 URL 基础时间
+    最小值：60秒
+    无上限
+    
+    Args:
+        url_count: URL 数量
+        base_per_url: 每 URL 基础时间（秒），默认 3秒
+        min_timeout: 最小超时时间（秒），默认 60秒
+        
+    Returns:
+        int: 计算出的超时时间（秒）
+        
+    示例：
+        100 URL × 3秒 = 300秒
+        1000 URL × 3秒 = 3000秒（50分钟）
+        10000 URL × 3秒 = 30000秒（8.3小时）
+    """
+    timeout = int(url_count * base_per_url)
+    return max(min_timeout, timeout)
+
+
+
+
+
+def _export_urls(
+    target_id: int,
+    fingerprint_dir: Path,
+    source: str = 'website'
+) -> tuple[str, int]:
+    """
+    导出 URL 到文件
+    
+    Args:
+        target_id: 目标 ID
+        fingerprint_dir: 指纹识别目录
+        source: 数据源类型
+        
+    Returns:
+        tuple: (urls_file, total_count)
+    """
+    logger.info("Step 1: 导出 URL 列表 (source=%s)", source)
+    
+    urls_file = str(fingerprint_dir / 'urls.txt')
+    export_result = export_urls_for_fingerprint_task(
+        target_id=target_id,
+        output_file=urls_file,
+        source=source,
+        batch_size=1000
+    )
+    
+    total_count = export_result['total_count']
+    
+    logger.info(
+        "✓ URL 导出完成 - 文件: %s, 数量: %d",
+        export_result['output_file'],
+        total_count
+    )
+    
+    return export_result['output_file'], total_count
+
+
+def _run_fingerprint_detect(
+    enabled_tools: dict,
+    urls_file: str,
+    url_count: int,
+    fingerprint_dir: Path,
+    scan_id: int,
+    target_id: int,
+    source: str
+) -> tuple[dict, list]:
+    """
+    执行指纹识别任务
+    
+    Args:
+        enabled_tools: 已启用的工具配置字典
+        urls_file: URL 文件路径
+        url_count: URL 总数
+        fingerprint_dir: 指纹识别目录
+        scan_id: 扫描任务 ID
+        target_id: 目标 ID
+        source: 数据源类型
+        
+    Returns:
+        tuple: (tool_stats, failed_tools)
+    """
+    tool_stats = {}
+    failed_tools = []
+    
+    for tool_name, tool_config in enabled_tools.items():
+        # 1. 获取指纹库路径
+        lib_names = tool_config.get('fingerprint_libs', ['ehole'])
+        fingerprint_paths = get_fingerprint_paths(lib_names)
+        
+        if not fingerprint_paths:
+            reason = f"没有可用的指纹库: {lib_names}"
+            logger.warning(reason)
+            failed_tools.append({'tool': tool_name, 'reason': reason})
+            continue
+        
+        # 2. 将指纹库路径合并到 tool_config（用于命令构建）
+        tool_config_with_paths = {**tool_config, **fingerprint_paths}
+        
+        # 3. 构建命令
+        try:
+            command = build_scan_command(
+                tool_name=tool_name,
+                scan_type='fingerprint_detect',
+                command_params={
+                    'urls_file': urls_file
+                },
+                tool_config=tool_config_with_paths
+            )
+        except Exception as e:
+            reason = f"命令构建失败: {str(e)}"
+            logger.error("构建 %s 命令失败: %s", tool_name, e)
+            failed_tools.append({'tool': tool_name, 'reason': reason})
+            continue
+        
+        # 4. 计算超时时间
+        timeout = calculate_fingerprint_detect_timeout(url_count)
+        
+        # 5. 生成日志文件路径
+        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+        log_file = fingerprint_dir / f"{tool_name}_{timestamp}.log"
+        
+        logger.info(
+            "开始执行 %s 指纹识别 - URL数: %d, 超时: %ds, 指纹库: %s",
+            tool_name, url_count, timeout, list(fingerprint_paths.keys())
+        )
+        
+        # 6. 执行扫描任务
+        try:
+            result = run_xingfinger_and_stream_update_tech_task(
+                cmd=command,
+                tool_name=tool_name,
+                scan_id=scan_id,
+                target_id=target_id,
+                source=source,
+                cwd=str(fingerprint_dir),
+                timeout=timeout,
+                log_file=str(log_file),
+                batch_size=100
+            )
+            
+            tool_stats[tool_name] = {
+                'command': command,
+                'result': result,
+                'timeout': timeout,
+                'fingerprint_libs': list(fingerprint_paths.keys())
+            }
+            
+            logger.info(
+                "✓ 工具 %s 执行完成 - 处理记录: %d, 更新: %d, 未找到: %d",
+                tool_name,
+                result.get('processed_records', 0),
+                result.get('updated_count', 0),
+                result.get('not_found_count', 0)
+            )
+            
+        except Exception as exc:
+            failed_tools.append({'tool': tool_name, 'reason': str(exc)})
+            logger.error("工具 %s 执行失败: %s", tool_name, exc, exc_info=True)
+    
+    if failed_tools:
+        logger.warning(
+            "以下指纹识别工具执行失败: %s",
+            ', '.join([f['tool'] for f in failed_tools])
+        )
+    
+    return tool_stats, failed_tools
+
+
+@flow(
+    name="fingerprint_detect",
+    log_prints=True,
+    on_running=[on_scan_flow_running],
+    on_completion=[on_scan_flow_completed],
+    on_failure=[on_scan_flow_failed],
+)
+def fingerprint_detect_flow(
+    scan_id: int,
+    target_name: str,
+    target_id: int,
+    scan_workspace_dir: str,
+    enabled_tools: dict
+) -> dict:
+    """
+    指纹识别 Flow
+    
+    主要功能：
+        1. 从数据库导出目标下所有 WebSite URL 到文件
+        2. 使用 xingfinger 进行技术栈识别
+        3. 解析结果并更新 WebSite.tech 字段（合并去重）
+    
+    工作流程：
+        Step 0: 创建工作目录
+        Step 1: 导出 URL 列表
+        Step 2: 解析配置，获取启用的工具
+        Step 3: 执行 xingfinger 并解析结果
+    
+    Args:
+        scan_id: 扫描任务 ID
+        target_name: 目标名称
+        target_id: 目标 ID
+        scan_workspace_dir: 扫描工作空间目录
+        enabled_tools: 启用的工具配置（xingfinger）
+        
+    Returns:
+        dict: {
+            'success': bool,
+            'scan_id': int,
+            'target': str,
+            'scan_workspace_dir': str,
+            'urls_file': str,
+            'url_count': int,
+            'processed_records': int,
+            'updated_count': int,
+            'not_found_count': int,
+            'executed_tasks': list,
+            'tool_stats': dict
+        }
+    """
+    try:
+        logger.info(
+            "="*60 + "\n" +
+            "开始指纹识别\n" +
+            f"  Scan ID: {scan_id}\n" +
+            f"  Target: {target_name}\n" +
+            f"  Workspace: {scan_workspace_dir}\n" +
+            "="*60
+        )
+        
+        # 参数验证
+        if scan_id is None:
+            raise ValueError("scan_id 不能为空")
+        if not target_name:
+            raise ValueError("target_name 不能为空")
+        if target_id is None:
+            raise ValueError("target_id 不能为空")
+        if not scan_workspace_dir:
+            raise ValueError("scan_workspace_dir 不能为空")
+        
+        # 数据源类型（当前只支持 website）
+        source = 'website'
+        
+        # Step 0: 创建工作目录
+        from apps.scan.utils import setup_scan_directory
+        fingerprint_dir = setup_scan_directory(scan_workspace_dir, 'fingerprint_detect')
+        
+        # Step 1: 导出 URL（支持懒加载）
+        urls_file, url_count = _export_urls(target_id, fingerprint_dir, source)
+        
+        if url_count == 0:
+            logger.warning("目标下没有可用的 URL，跳过指纹识别")
+            return {
+                'success': True,
+                'scan_id': scan_id,
+                'target': target_name,
+                'scan_workspace_dir': scan_workspace_dir,
+                'urls_file': urls_file,
+                'url_count': 0,
+                'processed_records': 0,
+                'updated_count': 0,
+                'created_count': 0,
+                'executed_tasks': ['export_urls_for_fingerprint'],
+                'tool_stats': {
+                    'total': 0,
+                    'successful': 0,
+                    'failed': 0,
+                    'successful_tools': [],
+                    'failed_tools': [],
+                    'details': {}
+                }
+            }
+        
+        # Step 2: 工具配置信息
+        logger.info("Step 2: 工具配置信息")
+        logger.info("✓ 启用工具: %s", ', '.join(enabled_tools.keys()))
+        
+        # Step 3: 执行指纹识别
+        logger.info("Step 3: 执行指纹识别")
+        tool_stats, failed_tools = _run_fingerprint_detect(
+            enabled_tools=enabled_tools,
+            urls_file=urls_file,
+            url_count=url_count,
+            fingerprint_dir=fingerprint_dir,
+            scan_id=scan_id,
+            target_id=target_id,
+            source=source
+        )
+        
+        logger.info("="*60 + "\n✓ 指纹识别完成\n" + "="*60)
+        
+        # 动态生成已执行的任务列表
+        executed_tasks = ['export_urls_for_fingerprint']
+        executed_tasks.extend([f'run_xingfinger ({tool})' for tool in tool_stats.keys()])
+        
+        # 汇总所有工具的结果
+        total_processed = sum(stats['result'].get('processed_records', 0) for stats in tool_stats.values())
+        total_updated = sum(stats['result'].get('updated_count', 0) for stats in tool_stats.values())
+        total_created = sum(stats['result'].get('created_count', 0) for stats in tool_stats.values())
+        
+        successful_tools = [name for name in enabled_tools.keys() 
+                           if name not in [f['tool'] for f in failed_tools]]
+        
+        return {
+            'success': True,
+            'scan_id': scan_id,
+            'target': target_name,
+            'scan_workspace_dir': scan_workspace_dir,
+            'urls_file': urls_file,
+            'url_count': url_count,
+            'processed_records': total_processed,
+            'updated_count': total_updated,
+            'created_count': total_created,
+            'executed_tasks': executed_tasks,
+            'tool_stats': {
+                'total': len(enabled_tools),
+                'successful': len(successful_tools),
+                'failed': len(failed_tools),
+                'successful_tools': successful_tools,
+                'failed_tools': failed_tools,
+                'details': tool_stats
+            }
+        }
+        
+    except ValueError as e:
+        logger.error("配置错误: %s", e)
+        raise
+    except RuntimeError as e:
+        logger.error("运行时错误: %s", e)
+        raise
+    except Exception as e:
+        logger.exception("指纹识别失败: %s", e)
+        raise
--- a/backend/apps/scan/flows/initiate_scan_flow.py
+++ b/backend/apps/scan/flows/initiate_scan_flow.py
@@ -30,7 +30,7 @@ from apps.scan.handlers import (
    on_initiate_scan_flow_failed,
 )
 from prefect.futures import wait
-from apps.scan.tasks.workspace_tasks import create_scan_workspace_task
+from apps.scan.utils import setup_scan_workspace
 from apps.scan.orchestrators import FlowOrchestrator

 logger = logging.getLogger(__name__)
@@ -110,7 +110,7 @@ def initiate_scan_flow(
        )
        
        # ==================== Task 1: 创建 Scan 工作空间 ====================
-        scan_workspace_path = create_scan_workspace_task(scan_workspace_dir)
+        scan_workspace_path = setup_scan_workspace(scan_workspace_dir)
        
        # ==================== Task 2: 获取引擎配置 ====================
        from apps.scan.models import Scan
--- a/backend/apps/scan/flows/port_scan_flow.py
+++ b/backend/apps/scan/flows/port_scan_flow.py
@@ -154,28 +154,7 @@ def _parse_port_count(tool_config: dict) -> int:
    return 100


-def _setup_port_scan_directory(scan_workspace_dir: str) -> Path:
-    """
-    创建并验证端口扫描工作目录
-    
-    Args:
-        scan_workspace_dir: 扫描工作空间目录
-        
-    Returns:
-        Path: 端口扫描目录路径
-        
-    Raises:
-        RuntimeError: 目录创建或验证失败
-    """
-    port_scan_dir = Path(scan_workspace_dir) / 'port_scan'
-    port_scan_dir.mkdir(parents=True, exist_ok=True)
-    
-    if not port_scan_dir.is_dir():
-        raise RuntimeError(f"端口扫描目录创建失败: {port_scan_dir}")
-    if not os.access(port_scan_dir, os.W_OK):
-        raise RuntimeError(f"端口扫描目录不可写: {port_scan_dir}")
-    
-    return port_scan_dir
+


 def _export_scan_targets(target_id: int, port_scan_dir: Path) -> tuple[str, int, str]:
@@ -372,19 +351,17 @@ def port_scan_flow(
    端口扫描 Flow
    
    主要功能：
-        1. 扫描目标域名的开放端口（核心目标）
-        2. 发现域名对应的 IP 地址（附带产物）
-        3. 保存 IP 和端口的关联关系
+        1. 扫描目标域名/IP 的开放端口
+        2. 保存 host + ip + port 三元映射到 HostPortMapping 表
    
    输出资产：
-        - Port：开放的端口列表（主要资产）
-        - IPAddress：域名对应的 IP 地址（附带资产）
+        - HostPortMapping：主机端口映射（host + ip + port 三元组）
    
    工作流程：
        Step 0: 创建工作目录
        Step 1: 导出域名列表到文件（供扫描工具使用）
        Step 2: 解析配置，获取启用的工具
-        Step 3: 串行执行扫描工具，运行端口扫描工具并实时解析输出到数据库（Subdomain → IPAddress → Port）
+        Step 3: 串行执行扫描工具，运行端口扫描工具并实时解析输出到数据库（→ HostPortMapping）

    Args:
        scan_id: 扫描任务 ID
@@ -418,10 +395,8 @@ def port_scan_flow(
        RuntimeError: 执行失败
    
    Note:
-        端口扫描的输出必然包含 IP 信息，因为：
-        - 扫描工具需要解析域名 → IP
-        - 端口属于 IP，而不是直接属于域名
-        - 同一域名可能对应多个 IP（CDN、负载均衡）
+        端口扫描工具（如 naabu）会解析域名获取 IP，输出 host + ip + port 三元组。
+        同一 host 可能对应多个 IP（CDN、负载均衡），因此使用三元映射表存储。
    """
    try:
        # 参数验证
@@ -446,7 +421,8 @@ def port_scan_flow(
        )
        
        # Step 0: 创建工作目录
-        port_scan_dir = _setup_port_scan_directory(scan_workspace_dir)
+        from apps.scan.utils import setup_scan_directory
+        port_scan_dir = setup_scan_directory(scan_workspace_dir, 'port_scan')
        
        # Step 1: 导出扫描目标列表到文件（根据 Target 类型自动决定内容）
        targets_file, target_count, target_type = _export_scan_targets(target_id, port_scan_dir)
--- a/backend/apps/scan/flows/site_scan_flow.py
+++ b/backend/apps/scan/flows/site_scan_flow.py
@@ -34,7 +34,8 @@ logger = logging.getLogger(__name__)
 def calculate_timeout_by_line_count(
    tool_config: dict,
    file_path: str, 
-    base_per_time: int = 1
+    base_per_time: int = 1,
+    min_timeout: int = 60
 ) -> int:
    """
    根据文件行数计算 timeout
@@ -45,9 +46,10 @@ def calculate_timeout_by_line_count(
        tool_config: 工具配置字典（此函数未使用，但保持接口一致性）
        file_path: 要统计行数的文件路径
        base_per_time: 每行的基础时间（秒），默认1秒
+        min_timeout: 最小超时时间（秒），默认60秒
    
    Returns:
-        int: 计算出的超时时间（秒）
+        int: 计算出的超时时间（秒），不低于 min_timeout
    
    Example:
        timeout = calculate_timeout_by_line_count(
@@ -67,53 +69,33 @@ def calculate_timeout_by_line_count(
        # wc -l 输出格式：行数 + 空格 + 文件名
        line_count = int(result.stdout.strip().split()[0])
        
-        # 计算 timeout：行数 × 每行基础时间
-        timeout = line_count * base_per_time
+        # 计算 timeout：行数 × 每行基础时间，不低于最小值
+        timeout = max(line_count * base_per_time, min_timeout)
        
        logger.info(
            f"timeout 自动计算: 文件={file_path}, "
-            f"行数={line_count}, 每行时间={base_per_time}秒, timeout={timeout}秒"
+            f"行数={line_count}, 每行时间={base_per_time}秒, 最小值={min_timeout}秒, timeout={timeout}秒"
        )
        
        return timeout
        
    except Exception as e:
        # 如果 wc -l 失败，使用默认值
-        logger.warning(f"wc -l 计算行数失败: {e}，使用默认 timeout: 600秒")
-        return 600
+        logger.warning(f"wc -l 计算行数失败: {e}，使用默认 timeout: {min_timeout}秒")
+        return min_timeout


-def _setup_site_scan_directory(scan_workspace_dir: str) -> Path:
-    """
-    创建并验证站点扫描工作目录
-    
-    Args:
-        scan_workspace_dir: 扫描工作空间目录
-        
-    Returns:
-        Path: 站点扫描目录路径
-        
-    Raises:
-        RuntimeError: 目录创建或验证失败
-    """
-    site_scan_dir = Path(scan_workspace_dir) / 'site_scan'
-    site_scan_dir.mkdir(parents=True, exist_ok=True)
-    
-    if not site_scan_dir.is_dir():
-        raise RuntimeError(f"站点扫描目录创建失败: {site_scan_dir}")
-    if not os.access(site_scan_dir, os.W_OK):
-        raise RuntimeError(f"站点扫描目录不可写: {site_scan_dir}")
-    
-    return site_scan_dir


-def _export_site_urls(target_id: int, site_scan_dir: Path) -> tuple[str, int, int]:
+
+def _export_site_urls(target_id: int, site_scan_dir: Path, target_name: str = None) -> tuple[str, int, int]:
    """
    导出站点 URL 到文件
    
    Args:
        target_id: 目标 ID
        site_scan_dir: 站点扫描目录
+        target_name: 目标名称（用于懒加载时写入默认值）
        
    Returns:
        tuple: (urls_file, total_urls, association_count)
@@ -399,11 +381,12 @@ def site_scan_flow(
            raise ValueError("scan_workspace_dir 不能为空")
        
        # Step 0: 创建工作目录
-        site_scan_dir = _setup_site_scan_directory(scan_workspace_dir)
+        from apps.scan.utils import setup_scan_directory
+        site_scan_dir = setup_scan_directory(scan_workspace_dir, 'site_scan')
        
        # Step 1: 导出站点 URL
        urls_file, total_urls, association_count = _export_site_urls(
-            target_id, site_scan_dir
+            target_id, site_scan_dir, target_name
        )
        
        if total_urls == 0:
--- a/backend/apps/scan/flows/subdomain_discovery_flow.py
+++ b/backend/apps/scan/flows/subdomain_discovery_flow.py
@@ -1,5 +1,5 @@
 """
-子域名发现扫描 Flow（增强版）
+子域名发现扫描 Flow

 负责编排子域名发现扫描的完整流程

@@ -41,28 +41,7 @@ import subprocess
 logger = logging.getLogger(__name__)


-def _setup_subdomain_directory(scan_workspace_dir: str) -> Path:
-    """
-    创建并验证子域名扫描工作目录
-    
-    Args:
-        scan_workspace_dir: 扫描工作空间目录
-        
-    Returns:
-        Path: 子域名扫描目录路径
-        
-    Raises:
-        RuntimeError: 目录创建或验证失败
-    """
-    result_dir = Path(scan_workspace_dir) / 'subdomain_discovery'
-    result_dir.mkdir(parents=True, exist_ok=True)
-    
-    if not result_dir.is_dir():
-        raise RuntimeError(f"子域名扫描目录创建失败: {result_dir}")
-    if not os.access(result_dir, os.W_OK):
-        raise RuntimeError(f"子域名扫描目录不可写: {result_dir}")
-    
-    return result_dir
+


 def _validate_and_normalize_target(target_name: str) -> str:
@@ -119,12 +98,7 @@ def _run_scans_parallel(
    
    # 生成时间戳（所有工具共用）
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-    
-    # TODO: 接入代理池管理系统
-    # from apps.proxy.services import proxy_pool
-    # proxy_stats = proxy_pool.get_stats()
-    # logger.info(f"代理池状态: {proxy_stats['healthy']}/{proxy_stats['total']} 可用")
-    
+
    failures = []  # 记录命令构建失败的工具
    futures = {}
    
@@ -343,7 +317,7 @@ def subdomain_discovery_flow(
    scan_workspace_dir: str,
    enabled_tools: dict
 ) -> dict:
-    """子域名发现扫描流程（增强版）
+    """子域名发现扫描流程
    
    工作流程（4 阶段）：
        Stage 1: 被动收集（并行） - 必选
@@ -352,6 +326,10 @@ def subdomain_discovery_flow(
        Stage 4: DNS 存活验证（可选） - 通用存活验证
        Final: 保存到数据库
    
+    注意：
+        - 子域名发现只对 DOMAIN 类型目标有意义
+        - IP 和 CIDR 类型目标会自动跳过
+    
    Args:
        scan_id: 扫描任务 ID
        target_name: 目标名称（域名）
@@ -390,6 +368,21 @@ def subdomain_discovery_flow(
            logger.warning("未提供目标域名，跳过子域名发现扫描")
            return _empty_result(scan_id, '', scan_workspace_dir)
        
+        # ==================== 检查 Target 类型 ====================
+        # 子域名发现只对 DOMAIN 类型有意义，IP 和 CIDR 类型跳过
+        from apps.targets.services import TargetService
+        from apps.targets.models import Target
+        
+        target_service = TargetService()
+        target = target_service.get_target(target_id)
+        
+        if target and target.type != Target.TargetType.DOMAIN:
+            logger.info(
+                "跳过子域名发现扫描: Target 类型为 %s (ID=%d, Name=%s)，子域名发现仅适用于域名类型",
+                target.type, target_id, target_name
+            )
+            return _empty_result(scan_id, target_name, scan_workspace_dir)
+        
        # 导入任务函数
        from apps.scan.tasks.subdomain_discovery import (
            run_subdomain_discovery_task,
@@ -398,7 +391,8 @@ def subdomain_discovery_flow(
        )
        
        # Step 0: 准备工作
-        result_dir = _setup_subdomain_directory(scan_workspace_dir)
+        from apps.scan.utils import setup_scan_directory
+        result_dir = setup_scan_directory(scan_workspace_dir, 'subdomain_discovery')
        
        # 验证并规范化目标域名
        try:
@@ -410,7 +404,7 @@ def subdomain_discovery_flow(
        # 验证成功后打印日志
        logger.info(
            "="*60 + "\n" +
-            "开始子域名发现扫描（增强版）\n" +
+            "开始子域名发现扫描\n" +
            f"  Scan ID: {scan_id}\n" +
            f"  Domain: {domain_name}\n" +
            f"  Workspace: {scan_workspace_dir}\n" +
--- a/backend/apps/scan/flows/url_fetch/init.py
+++ b/backend/apps/scan/flows/url_fetch/init.py
@@ -4,18 +4,15 @@ URL Fetch Flow 模块
 提供 URL 获取相关的 Flow：
 - url_fetch_flow: 主 Flow（按输入类型编排 + 统一后处理）
 - domain_name_url_fetch_flow: 基于 domain_name（来自 target_name）输入的 URL 获取子 Flow（如 waymore）
- domains_url_fetch_flow: 基于 domains_file 输入的 URL 获取子 Flow（如 gau、waybackurls）
 - sites_url_fetch_flow: 基于 sites_file 输入的 URL 获取子 Flow（如 katana 等爬虫）
 """

 from .main_flow import url_fetch_flow
 from .domain_name_url_fetch_flow import domain_name_url_fetch_flow
-from .domains_url_fetch_flow import domains_url_fetch_flow
 from .sites_url_fetch_flow import sites_url_fetch_flow

 __all__ = [
    'url_fetch_flow',
    'domain_name_url_fetch_flow',
-    'domains_url_fetch_flow',
    'sites_url_fetch_flow',
 ]
--- a/backend/apps/scan/flows/url_fetch/domain_name_url_fetch_flow.py
+++ b/backend/apps/scan/flows/url_fetch/domain_name_url_fetch_flow.py
@@ -1,9 +1,14 @@
 """
-基于 domain_name（域名）的 URL 获取 Flow
+基于 Target 根域名的 URL 被动收集 Flow

-主要用于像 waymore 这种按域名输入（input_type = 'domain_name'）的工具：
- 直接对目标域名（target_name/domain_name）执行 URL 被动收集
- 不再依赖 domains_file（子域名列表文件）
+用于 waymore 等被动收集工具：
+- 输入：Target 的根域名（target_name，如 example.com）
+- 工具会自动从第三方源（Wayback Machine、Common Crawl 等）查询该域名及其子域名的历史 URL
+- 不需要遍历子域名列表，工具内部会处理 *.example.com
+
+注意：
+- 此 Flow 只对 DOMAIN 类型 Target 有效
+- IP 和 CIDR 类型会自动跳过（被动收集工具不支持）
 """

 # Django 环境初始化
@@ -34,18 +39,49 @@ def domain_name_url_fetch_flow(
    domain_name_tools: Dict[str, dict],
 ) -> dict:
    """
-    基于 target_name/domain_name 域名执行 URL 获取子 Flow（当前主要用于 waymore）。
+    基于 Target 根域名执行 URL 被动收集（当前主要用于 waymore）

    执行流程：
-    1. 校验 target_name 是否为域名
-    2. 使用传入的 domain_name_tools 工具列表
-    3. 为每个工具构建命令并并行执行
+    1. 校验 Target 类型（IP/CIDR 类型跳过）
+    2. 使用传入的工具列表对根域名执行被动收集
+    3. 工具内部会自动查询该域名及其子域名的历史 URL
    4. 汇总结果文件列表
+    
+    Args:
+        scan_id: 扫描 ID
+        target_id: 目标 ID
+        target_name: Target 根域名（如 example.com），不是子域名列表
+        output_dir: 输出目录
+        domain_name_tools: 被动收集工具配置（如 waymore）
+    
+    注意：
+    - 此 Flow 只对 DOMAIN 类型 Target 有效
+    - IP 和 CIDR 类型会自动跳过（waymore 等工具不支持）
+    - 工具会自动收集 *.target_name 的所有历史 URL，无需遍历子域名
    """
    try:
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)

+        # 检查 Target 类型，IP/CIDR 类型跳过
+        from apps.targets.services import TargetService
+        from apps.targets.models import Target
+        
+        target_service = TargetService()
+        target = target_service.get_target(target_id)
+        
+        if target and target.type != Target.TargetType.DOMAIN:
+            logger.info(
+                "跳过 domain_name URL 获取: Target 类型为 %s (ID=%d, Name=%s)，waymore 等工具仅适用于域名类型",
+                target.type, target_id, target_name
+            )
+            return {
+                "success": True,
+                "result_files": [],
+                "failed_tools": [],
+                "successful_tools": [],
+            }
+
        # 复用公共域名校验逻辑，确保 target_name 是合法域名
        validate_domain(target_name)

--- a/backend/apps/scan/flows/url_fetch/domains_url_fetch_flow.py
+++ b/backend/apps/scan/flows/url_fetch/domains_url_fetch_flow.py
@@ -1,139 +0,0 @@
-"""
-URL 被动收集 Flow
-
-从历史归档、搜索引擎等被动来源收集 URL
-工具：waymore, gau, waybackurls 等
-输入：domains_file（子域名列表）
-"""
-
-# Django 环境初始化
-from apps.common.prefect_django_setup import setup_django_for_prefect
-
-import logging
-from pathlib import Path
-
-from prefect import flow
-
-from .utils import run_tools_parallel
-
-logger = logging.getLogger(__name__)
-
-
-def _export_domains_file(target_id: int, scan_id: int, output_dir: Path) -> tuple[str, int]:
-    """
-    导出子域名列表到文件
-    
-    Args:
-        target_id: 目标 ID
-        scan_id: 扫描 ID
-        output_dir: 输出目录
-        
-    Returns:
-        tuple: (file_path, count)
-    """
-    from apps.scan.tasks.url_fetch import export_target_assets_task
-    
-    output_file = str(output_dir / "domains.txt")
-    result = export_target_assets_task(
-        output_file=output_file,
-        target_id=target_id,
-        scan_id=scan_id,
-        input_type="domains_file"
-    )
-    
-    count = result['asset_count']
-    if count == 0:
-        logger.warning("子域名列表为空，被动收集可能无法正常工作")
-    else:
-        logger.info("✓ 子域名列表导出完成 - 数量: %d", count)
-    
-    return output_file, count
-
-
-@flow(name="domains_url_fetch_flow", log_prints=True)
-def domains_url_fetch_flow(
-    scan_id: int,
-    target_id: int,
-    target_name: str,
-    output_dir: str,
-    enabled_tools: dict
-) -> dict:
-    """
-    URL 被动收集子 Flow
-    
-    执行流程：
-    1. 导出子域名列表（domains_file）
-    2. 并行执行被动收集工具
-    3. 返回结果文件列表
-    
-    Args:
-        scan_id: 扫描 ID
-        target_id: 目标 ID
-        target_name: 目标名称
-        output_dir: 输出目录
-        enabled_tools: 启用的被动收集工具配置
-        
-    Returns:
-        dict: {
-            'success': bool,
-            'result_files': list,
-            'failed_tools': list,
-            'successful_tools': list,
-            'domains_count': int
-        }
-    """
-    try:
-        output_path = Path(output_dir)
-        
-        logger.info(
-            "开始 URL 被动收集 - Target: %s, Tools: %s",
-            target_name, ', '.join(enabled_tools.keys())
-        )
-        
-        # Step 1: 导出子域名列表
-        domains_file, domains_count = _export_domains_file(
-            target_id=target_id,
-            scan_id=scan_id,
-            output_dir=output_path
-        )
-        
-        if domains_count == 0:
-            logger.warning("没有可用的子域名，跳过被动收集")
-            return {
-                'success': True,
-                'result_files': [],
-                'failed_tools': [],
-                'successful_tools': [],
-                'domains_count': 0
-            }
-        
-        # Step 2: 并行执行被动收集工具
-        result_files, failed_tools, successful_tools = run_tools_parallel(
-            tools=enabled_tools,
-            input_file=domains_file,
-            input_type="domains_file",
-            output_dir=output_path
-        )
-        
-        logger.info(
-            "✓ 被动收集完成 - 成功: %d/%d, 结果文件: %d",
-            len(successful_tools), len(enabled_tools), len(result_files)
-        )
-        
-        return {
-            'success': True,
-            'result_files': result_files,
-            'failed_tools': failed_tools,
-            'successful_tools': successful_tools,
-            'domains_count': domains_count
-        }
-        
-    except Exception as e:
-        logger.error("URL 被动收集失败: %s", e, exc_info=True)
-        return {
-            'success': False,
-            'result_files': [],
-            'failed_tools': [{'tool': 'domains_url_fetch_flow', 'reason': str(e)}],
-            'successful_tools': [],
-            'domains_count': 0
-        }
--- a/backend/apps/scan/flows/url_fetch/main_flow.py
+++ b/backend/apps/scan/flows/url_fetch/main_flow.py
@@ -1,10 +1,10 @@
 """
 URL Fetch 主 Flow

-负责编排不同输入类型的 URL 获取子 Flow（domain_name / domains_file / sites_file），以及统一的后处理（uro 去重、httpx 验证）
+负责编排不同输入类型的 URL 获取子 Flow（domain_name / sites_file），以及统一的后处理（uro 去重、httpx 验证）

 架构：
- 调用 domain_name_url_fetch_flow（domain_name 输入）、domains_url_fetch_flow（domains_file 输入）和 sites_url_fetch_flow（sites_file 输入）
+- 调用 domain_name_url_fetch_flow（domain_name 输入）和 sites_url_fetch_flow（sites_file 输入）
 - 合并多个子 Flow 的结果
 - 统一进行 uro 去重（如果启用）
 - 统一进行 httpx 验证（如果启用）
@@ -27,7 +27,6 @@ from apps.scan.handlers.scan_flow_handlers import (
 )

 from .domain_name_url_fetch_flow import domain_name_url_fetch_flow
-from .domains_url_fetch_flow import domains_url_fetch_flow
 from .sites_url_fetch_flow import sites_url_fetch_flow
 from .utils import calculate_timeout_by_line_count

@@ -37,36 +36,23 @@ logger = logging.getLogger(__name__)
 # ==================== 工具分类配置 ====================
 # 使用 target_name (domain_name) 作为输入的 URL 获取工具
 DOMAIN_NAME_TOOLS = {'waymore'}
-# 使用 domains_file 作为输入的 URL 获取工具
-DOMAINS_FILE_TOOLS = {'gau', 'waybackurls'}
 # 使用 sites_file 作为输入的 URL 获取工具
-SITES_FILE_TOOLS = {'katana', 'gospider', 'hakrawler'}
+SITES_FILE_TOOLS = {'katana'}
 # 后处理工具：不参与获取，用于清理和验证
 POST_PROCESS_TOOLS = {'uro', 'httpx'}


-def _setup_url_fetch_directory(scan_workspace_dir: str) -> Path:
-    """创建并验证 URL 获取工作目录"""
-    url_fetch_dir = Path(scan_workspace_dir) / 'url_fetch'
-    url_fetch_dir.mkdir(parents=True, exist_ok=True)
-    
-    if not url_fetch_dir.is_dir():
-        raise RuntimeError(f"URL 获取目录创建失败: {url_fetch_dir}")
-    if not os.access(url_fetch_dir, os.W_OK):
-        raise RuntimeError(f"URL 获取目录不可写: {url_fetch_dir}")
-    
-    return url_fetch_dir


-def _classify_tools(enabled_tools: dict) -> tuple[dict, dict, dict, dict, dict]:
+
+def _classify_tools(enabled_tools: dict) -> tuple[dict, dict, dict, dict]:
    """
    将启用的工具按输入类型分类
    
    Returns:
-        tuple: (domain_name_tools, domains_file_tools, sites_file_tools, uro_config, httpx_config)
+        tuple: (domain_name_tools, sites_file_tools, uro_config, httpx_config)
    """
    domain_name_tools: dict = {}
-    domains_file_tools: dict = {}
    sites_file_tools: dict = {}
    uro_config = None
    httpx_config = None
@@ -74,8 +60,6 @@ def _classify_tools(enabled_tools: dict) -> tuple[dict, dict, dict, dict, dict]:
    for tool_name, tool_config in enabled_tools.items():
        if tool_name in DOMAIN_NAME_TOOLS:
            domain_name_tools[tool_name] = tool_config
-        elif tool_name in DOMAINS_FILE_TOOLS:
-            domains_file_tools[tool_name] = tool_config
        elif tool_name in SITES_FILE_TOOLS:
            sites_file_tools[tool_name] = tool_config
        elif tool_name == 'uro':
@@ -83,10 +67,9 @@ def _classify_tools(enabled_tools: dict) -> tuple[dict, dict, dict, dict, dict]:
        elif tool_name == 'httpx':
            httpx_config = tool_config
        else:
-            logger.warning("未知工具类型: %s，将尝试作为 domains_file 输入的被动收集工具", tool_name)
-            domains_file_tools[tool_name] = tool_config
+            logger.warning("未知工具类型: %s，跳过", tool_name)

-    return domain_name_tools, domains_file_tools, sites_file_tools, uro_config, httpx_config
+    return domain_name_tools, sites_file_tools, uro_config, httpx_config


 def _merge_and_deduplicate_urls(result_files: list, url_fetch_dir: Path) -> tuple[str, int]:
@@ -131,9 +114,9 @@ def _clean_urls_with_uro(
            tool_config=uro_config,
            file_path=merged_file,
            base_per_time=1,
+            min_timeout=60,
        )
-        timeout = max(30, timeout)
-        logger.info("uro 自动计算超时时间(按行数，每行 1 秒): %d 秒", timeout)
+        logger.info("uro 自动计算超时时间(按行数，每行 1 秒，最小 60 秒): %d 秒", timeout)
    else:
        try:
            timeout = int(raw_timeout)
@@ -202,11 +185,10 @@ def _validate_and_stream_save_urls(
    raw_timeout = httpx_config.get('timeout', 'auto')
    timeout = 3600
    if isinstance(raw_timeout, str) and raw_timeout == 'auto':
-        # 按 URL 行数计算超时时间：每行 3 秒，不设上限
-        timeout = url_count * 3
-        timeout = max(600, timeout)
+        # 按 URL 行数计算超时时间：每行 3 秒，最小 60 秒
+        timeout = max(60, url_count * 3)
        logger.info(
-            "自动计算 httpx 超时时间(按行数，每行 3 秒): url_count=%d, timeout=%d 秒",
+            "自动计算 httpx 超时时间(按行数，每行 3 秒，最小 60 秒): url_count=%d, timeout=%d 秒",
            url_count,
            timeout,
        )
@@ -282,10 +264,9 @@ def url_fetch_flow(
    
    执行流程：
    1. 准备工作目录
-    2. 按输入类型分类工具（domain_name / domains_file / sites_file / 后处理）
+    2. 按输入类型分类工具（domain_name / sites_file / 后处理）
    3. 并行执行子 Flow
       - domain_name_url_fetch_flow: 基于 domain_name（来自 target_name）执行 URL 获取（如 waymore）
-       - domains_url_fetch_flow: 基于 domains_file 执行 URL 获取（如 gau、waybackurls）
       - sites_url_fetch_flow: 基于 sites_file 执行爬虫（如 katana 等）
    4. 合并所有子 Flow 的结果并去重
    5. uro 去重（如果启用）
@@ -313,23 +294,23 @@ def url_fetch_flow(
        
        # Step 1: 准备工作目录
        logger.info("Step 1: 准备工作目录")
-        url_fetch_dir = _setup_url_fetch_directory(scan_workspace_dir)
+        from apps.scan.utils import setup_scan_directory
+        url_fetch_dir = setup_scan_directory(scan_workspace_dir, 'url_fetch')
        
        # Step 2: 分类工具（按输入类型）
        logger.info("Step 2: 分类工具")
-        domain_name_tools, domains_file_tools, sites_file_tools, uro_config, httpx_config = _classify_tools(enabled_tools)
+        domain_name_tools, sites_file_tools, uro_config, httpx_config = _classify_tools(enabled_tools)

        logger.info(
-            "工具分类 - domain_name: %s, domains_file: %s, sites_file: %s, uro: %s, httpx: %s",
+            "工具分类 - domain_name: %s, sites_file: %s, uro: %s, httpx: %s",
            list(domain_name_tools.keys()) or '无',
-            list(domains_file_tools.keys()) or '无',
            list(sites_file_tools.keys()) or '无',
            '启用' if uro_config else '未启用',
            '启用' if httpx_config else '未启用'
        )

        # 检查是否有获取工具
-        if not domain_name_tools and not domains_file_tools and not sites_file_tools:
+        if not domain_name_tools and not sites_file_tools:
            raise ValueError(
                "URL Fetch 流程需要至少启用一个 URL 获取工具（如 waymore, katana）。"
                "httpx 和 uro 仅用于后处理，不能单独使用。"
@@ -353,24 +334,10 @@ def url_fetch_flow(
            all_result_files.extend(tn_result.get('result_files', []))
            all_failed_tools.extend(tn_result.get('failed_tools', []))
            all_successful_tools.extend(tn_result.get('successful_tools', []))
-
-        # 3b: 基于 domains_file 的 URL 被动收集
-        if domains_file_tools:
-            logger.info("Step 3b: 执行基于 domains_file 的 URL 被动收集子 Flow")
-            passive_result = domains_url_fetch_flow(
-                scan_id=scan_id,
-                target_id=target_id,
-                target_name=target_name,
-                output_dir=str(url_fetch_dir),
-                enabled_tools=domains_file_tools,
-            )
-            all_result_files.extend(passive_result.get('result_files', []))
-            all_failed_tools.extend(passive_result.get('failed_tools', []))
-            all_successful_tools.extend(passive_result.get('successful_tools', []))
        
-        # 3c: 爬虫（以 sites_file 为输入）
+        # 3b: 爬虫（以 sites_file 为输入）
        if sites_file_tools:
-            logger.info("Step 3c: 执行爬虫子 Flow")
+            logger.info("Step 3b: 执行爬虫子 Flow")
            crawl_result = sites_url_fetch_flow(
                scan_id=scan_id,
                target_id=target_id,
@@ -443,8 +410,6 @@ def url_fetch_flow(
        executed_tasks = ['setup_directory', 'classify_tools']
        if domain_name_tools:
            executed_tasks.append('domain_name_url_fetch_flow')
-        if domains_file_tools:
-            executed_tasks.append('domains_url_fetch_flow')
        if sites_file_tools:
            executed_tasks.append('sites_url_fetch_flow')
        executed_tasks.append('merge_and_deduplicate')
@@ -463,7 +428,7 @@ def url_fetch_flow(
            'total': saved_count,
            'executed_tasks': executed_tasks,
            'tool_stats': {
-                'total': len(domain_name_tools) + len(domains_file_tools) + len(sites_file_tools),
+                'total': len(domain_name_tools) + len(sites_file_tools),
                'successful': len(all_successful_tools),
                'failed': len(all_failed_tools),
                'successful_tools': all_successful_tools,
--- a/backend/apps/scan/flows/url_fetch/sites_url_fetch_flow.py
+++ b/backend/apps/scan/flows/url_fetch/sites_url_fetch_flow.py
@@ -19,33 +19,35 @@ from .utils import run_tools_parallel
 logger = logging.getLogger(__name__)


-def _export_sites_file(target_id: int, scan_id: int, output_dir: Path) -> tuple[str, int]:
+def _export_sites_file(target_id: int, scan_id: int, target_name: str, output_dir: Path) -> tuple[str, int]:
    """
    导出站点 URL 列表到文件
    
+    懒加载模式：如果 WebSite 表为空，根据 Target 类型生成默认 URL
+    
    Args:
        target_id: 目标 ID
        scan_id: 扫描 ID
+        target_name: 目标名称（用于懒加载）
        output_dir: 输出目录
        
    Returns:
        tuple: (file_path, count)
    """
-    from apps.scan.tasks.url_fetch import export_target_assets_task
+    from apps.scan.tasks.url_fetch import export_sites_task
    
    output_file = str(output_dir / "sites.txt")
-    result = export_target_assets_task(
+    result = export_sites_task(
        output_file=output_file,
        target_id=target_id,
-        scan_id=scan_id,
-        input_type="sites_file"
+        scan_id=scan_id
    )
    
    count = result['asset_count']
-    if count == 0:
-        logger.warning("站点列表为空，爬虫可能无法正常工作")
-    else:
+    if count > 0:
        logger.info("✓ 站点列表导出完成 - 数量: %d", count)
+    else:
+        logger.warning("站点列表为空，爬虫可能无法正常工作")
    
    return output_file, count

@@ -94,9 +96,11 @@ def sites_url_fetch_flow(
        sites_file, sites_count = _export_sites_file(
            target_id=target_id,
            scan_id=scan_id,
+            target_name=target_name,
            output_dir=output_path
        )
        
+        # 默认值模式下，即使原本没有站点，也会有默认 URL 作为输入
        if sites_count == 0:
            logger.warning("没有可用的站点，跳过爬虫")
            return {
--- a/backend/apps/scan/flows/url_fetch/utils.py
+++ b/backend/apps/scan/flows/url_fetch/utils.py
@@ -17,6 +17,7 @@ def calculate_timeout_by_line_count(
    tool_config: dict,
    file_path: str,
    base_per_time: int = 1,
+    min_timeout: int = 60,
 ) -> int:
    """
    根据文件行数自动计算超时时间
@@ -25,9 +26,10 @@ def calculate_timeout_by_line_count(
        tool_config: 工具配置（保留参数，未来可能用于更复杂的计算）
        file_path: 输入文件路径
        base_per_time: 每行的基础时间（秒）
+        min_timeout: 最小超时时间（秒），默认60秒
        
    Returns:
-        int: 计算出的超时时间（秒）
+        int: 计算出的超时时间（秒），不低于 min_timeout
    """
    try:
        result = subprocess.run(
@@ -37,18 +39,19 @@ def calculate_timeout_by_line_count(
            check=True,
        )
        line_count = int(result.stdout.strip().split()[0])
-        timeout = line_count * base_per_time
+        timeout = max(line_count * base_per_time, min_timeout)
        logger.info(
-            "timeout 自动计算: 文件=%s, 行数=%d, 每行时间=%d秒, timeout=%d秒",
+            "timeout 自动计算: 文件=%s, 行数=%d, 每行时间=%d秒, 最小值=%d秒, timeout=%d秒",
            file_path,
            line_count,
            base_per_time,
+            min_timeout,
            timeout,
        )
        return timeout
    except Exception as e:
-        logger.warning("wc -l 计算行数失败: %s，将使用默认 timeout: 600秒", e)
-        return 600
+        logger.warning("wc -l 计算行数失败: %s，将使用默认 timeout: %d秒", e, min_timeout)
+        return min_timeout


 def prepare_tool_execution(
--- a/backend/apps/scan/flows/vuln_scan/endpoints_vuln_scan_flow.py
+++ b/backend/apps/scan/flows/vuln_scan/endpoints_vuln_scan_flow.py
@@ -25,10 +25,7 @@ from .utils import calculate_timeout_by_line_count
 logger = logging.getLogger(__name__)


-def _setup_vuln_scan_directory(scan_workspace_dir: str) -> Path:
-    vuln_scan_dir = Path(scan_workspace_dir) / "vuln_scan"
-    vuln_scan_dir.mkdir(parents=True, exist_ok=True)
-    return vuln_scan_dir
+


@flow(
@@ -55,7 +52,8 @@ def endpoints_vuln_scan_flow(
        if not enabled_tools:
            raise ValueError("enabled_tools 不能为空")

-        vuln_scan_dir = _setup_vuln_scan_directory(scan_workspace_dir)
+        from apps.scan.utils import setup_scan_directory
+        vuln_scan_dir = setup_scan_directory(scan_workspace_dir, 'vuln_scan')
        endpoints_file = vuln_scan_dir / "input_endpoints.txt"

        # Step 1: 导出 Endpoint URL
@@ -119,7 +117,6 @@ def endpoints_vuln_scan_flow(
            )

            raw_timeout = tool_config.get("timeout", 600)
-            timeout = 600

            if isinstance(raw_timeout, str) and raw_timeout == "auto":
                # timeout=auto 时，根据 endpoints_file 行数自动计算超时时间
@@ -134,7 +131,6 @@ def endpoints_vuln_scan_flow(
                try:
                    timeout = int(raw_timeout)
                except (TypeError, ValueError) as e:
-                    # 配置错误应当直接暴露，避免默默使用默认值导致排查困难
                    raise ValueError(
                        f"工具 {tool_name} 的 timeout 配置无效: {raw_timeout!r}"
                    ) from e
@@ -174,7 +170,7 @@ def endpoints_vuln_scan_flow(
                    target_id=target_id,
                    cwd=str(vuln_scan_dir),
                    shell=True,
-                    batch_size=10,
+                    batch_size=1,
                    timeout=timeout,
                    log_file=str(log_file),
                )
--- a/backend/apps/scan/flows/vuln_scan/utils.py
+++ b/backend/apps/scan/flows/vuln_scan/utils.py
@@ -12,6 +12,7 @@ def calculate_timeout_by_line_count(
    tool_config: dict,
    file_path: str,
    base_per_time: int = 1,
+    min_timeout: int = 600,
 ) -> int:
    """
    根据文件行数自动计算超时时间
@@ -20,9 +21,10 @@ def calculate_timeout_by_line_count(
        tool_config: 工具配置（保留参数，未来可能用于更复杂的计算）
        file_path: 输入文件路径
        base_per_time: 每行的基础时间（秒）
+        min_timeout: 最小超时时间（秒），默认600秒（10分钟）

    Returns:
-        int: 计算出的超时时间（秒）
+        int: 计算出的超时时间（秒），不低于 min_timeout
    """
    try:
        result = subprocess.run(
@@ -32,15 +34,16 @@ def calculate_timeout_by_line_count(
            check=True,
        )
        line_count = int(result.stdout.strip().split()[0])
-        timeout = line_count * base_per_time
+        timeout = max(line_count * base_per_time, min_timeout)
        logger.info(
-            "timeout 自动计算: 文件=%s, 行数=%d, 每行时间=%d秒, timeout=%d秒",
+            "timeout 自动计算: 文件=%s, 行数=%d, 每行时间=%d秒, 最小值=%d秒, timeout=%d秒",
            file_path,
            line_count,
            base_per_time,
+            min_timeout,
            timeout,
        )
        return timeout
    except Exception as e:
-        logger.error("wc -l 计算行数失败: %s", e)
-        raise RuntimeError(f"自动计算超时时间失败: {e}") from e
+        logger.warning("wc -l 计算行数失败: %s，使用最小超时: %d秒", e, min_timeout)
+        return min_timeout
--- a/backend/apps/scan/management/init.py
+++ b/backend/apps/scan/management/init.py
--- a/backend/apps/scan/management/commands/generate_test_data.py
+++ b/backend/apps/scan/management/commands/generate_test_data.py
@@ -1,567 +0,0 @@
-"""
-生成测试数据的管理命令
-
-用法：
-    python manage.py generate_test_data --target test.com --count 100000
-    
-性能测试：
-    python manage.py generate_test_data --target test.com --count 10000 --batch-size 500 --benchmark
-"""
-
-import random
-import string
-import time
-from django.core.management.base import BaseCommand
-from django.db import transaction, connection
-from django.utils import timezone
-from apps.targets.models import Target
-from apps.scan.models import Scan
-from apps.asset.models.asset_models import Subdomain, IPAddress, Port, WebSite, Directory
-
-
-class Command(BaseCommand):
-    help = '为指定目标生成大量测试数据'
-
-    def add_arguments(self, parser):
-        parser.add_argument(
-            '--target',
-            type=str,
-            required=True,
-            help='目标域名（如 test.com）'
-        )
-        parser.add_argument(
-            '--count',
-            type=int,
-            default=100000,
-            help='每个表生成的记录数（默认 100000）'
-        )
-        parser.add_argument(
-            '--batch-size',
-            type=int,
-            default=1000,
-            help='批量插入的批次大小（默认 1000）'
-        )
-        parser.add_argument(
-            '--benchmark',
-            action='store_true',
-            help='启用性能基准测试模式（显示详细的性能指标）'
-        )
-        parser.add_argument(
-            '--test-batch-sizes',
-            action='store_true',
-            help='测试不同批次大小的性能（100, 500, 1000, 2000, 5000）'
-        )
-
-    def handle(self, *args, **options):
-        target_name = options['target']
-        count = options['count']
-        batch_size = options['batch_size']
-        benchmark = options['benchmark']
-        test_batch_sizes = options['test_batch_sizes']
-
-        # 如果是测试批次大小模式
-        if test_batch_sizes:
-            self._test_batch_sizes(target_name, count)
-            return
-
-        self.stdout.write(f'\n{"="*60}')
-        self.stdout.write(f'  开始生成测试数据')
-        self.stdout.write(f'{"="*60}\n')
-        self.stdout.write(f'目标: {target_name}')
-        self.stdout.write(f'每表记录数: {count:,}')
-        self.stdout.write(f'批次大小: {batch_size:,}')
-        if benchmark:
-            self.stdout.write('模式: 性能基准测试 ⚡')
-            self._print_db_info()
-        self.stdout.write('')
-        
-        # 记录总开始时间
-        total_start_time = time.time()
-
-        # 1. 获取或创建目标
-        try:
-            target = Target.objects.get(name=target_name)
-            self.stdout.write(self.style.SUCCESS(f'✓ 找到目标: {target.name} (ID: {target.id})'))
-        except Target.DoesNotExist:
-            self.stdout.write(self.style.ERROR(f'✗ 目标不存在: {target_name}'))
-            return
-
-        # 2. 创建新的测试扫描任务
-        from apps.engine.models import ScanEngine
-        engine = ScanEngine.objects.first()
-        if not engine:
-            self.stdout.write(self.style.ERROR('✗ 没有可用的扫描引擎'))
-            return
-        
-        scan = Scan.objects.create(
-            target=target,
-            engine=engine,
-            status='completed',
-            results_dir=f'/tmp/test_{target_name}_{int(time.time())}'
-        )
-        self.stdout.write(self.style.SUCCESS(f'✓ 创建新测试扫描任务 (ID: {scan.id})'))
-
-        # 3. 生成子域名
-        self.stdout.write(f'\n[1/5] 生成 {count:,} 个子域名...')
-        subdomains, stats1 = self._generate_subdomains(target, scan, count, batch_size, benchmark)
-        
-        # 4. 生成 IP 地址
-        self.stdout.write(f'\n[2/5] 生成 {count:,} 个 IP 地址...')
-        ips, stats2 = self._generate_ips(target, scan, subdomains, count, batch_size, benchmark)
-        
-        # 5. 生成端口
-        self.stdout.write(f'\n[3/5] 生成 {count:,} 个端口...')
-        stats3 = self._generate_ports(scan, ips, subdomains, count, batch_size, benchmark)
-        
-        # 6. 生成网站
-        self.stdout.write(f'\n[4/5] 生成 {count:,} 个网站...')
-        websites, stats4 = self._generate_websites(target, scan, subdomains, count, batch_size, benchmark)
-        
-        # 7. 生成目录
-        self.stdout.write(f'\n[5/5] 生成 {count:,} 个目录...')
-        stats5 = self._generate_directories(target, scan, websites, count, batch_size, benchmark)
-
-        # 计算总耗时
-        total_time = time.time() - total_start_time
-        
-        self.stdout.write(f'\n{"="*60}')
-        self.stdout.write(self.style.SUCCESS('  ✓ 测试数据生成完成！'))
-        self.stdout.write(f'{"="*60}')
-        self.stdout.write(f'总耗时: {total_time:.2f} 秒 ({total_time/60:.2f} 分钟)\n')
-        
-        if benchmark:
-            self._print_performance_summary([stats1, stats2, stats3, stats4, stats5])
-
-    def _generate_subdomains(self, target, scan, count, batch_size, benchmark=False):
-        """生成子域名"""
-        subdomains = []
-        created_subdomains = []
-        start_time = time.time()
-        batch_times = []
-        
-        for i in range(count):
-            # 生成唯一的子域名
-            subdomain_name = f'test-{i:07d}.{target.name}'
-            
-            subdomains.append(Subdomain(
-                target=target,
-                scan=scan,
-                name=subdomain_name,
-                cname=[],
-                is_cdn=random.choice([True, False]),
-                cdn_name=random.choice(['', 'cloudflare', 'akamai', 'fastly'])
-            ))
-            
-            # 批量插入
-            if len(subdomains) >= batch_size:
-                batch_start = time.time()
-                with transaction.atomic():
-                    created = Subdomain.objects.bulk_create(subdomains, ignore_conflicts=True)
-                    created_subdomains.extend(created)
-                batch_time = time.time() - batch_start
-                batch_times.append(batch_time)
-                
-                if benchmark:
-                    speed = len(subdomains) / batch_time
-                    self.stdout.write(f'  插入 {len(subdomains):,} 个 | 耗时: {batch_time:.2f}s | 速度: {speed:.0f} 条/秒')
-                else:
-                    self.stdout.write(f'  插入 {len(subdomains):,} 个子域名... (进度: {i+1:,}/{count:,})')
-                subdomains = []
-        
-        # 插入剩余的
-        if subdomains:
-            with transaction.atomic():
-                created = Subdomain.objects.bulk_create(subdomains, ignore_conflicts=True)
-                created_subdomains.extend(created)
-            self.stdout.write(f'  插入 {len(subdomains):,} 个子域名... (进度: {count:,}/{count:,})')
-        
-        total_time = time.time() - start_time
-        avg_batch_time = sum(batch_times) / len(batch_times) if batch_times else 0
-        total_speed = len(created_subdomains) / total_time if total_time > 0 else 0
-        
-        self.stdout.write(self.style.SUCCESS(
-            f'  ✓ 完成！共创建 {len(created_subdomains):,} 个 | '
-            f'总耗时: {total_time:.2f}s | '
-            f'平均速度: {total_speed:.0f} 条/秒'
-        ))
-        
-        return created_subdomains, {
-            'name': '子域名',
-            'count': len(created_subdomains),
-            'time': total_time,
-            'speed': total_speed,
-            'avg_batch_time': avg_batch_time
-        }
-
-    def _generate_ips(self, target, scan, subdomains, count, batch_size, benchmark=False):
-        """生成 IP 地址"""
-        # 重新从数据库查询 subdomain，确保有 ID
-        subdomain_list = list(Subdomain.objects.filter(scan=scan).values_list('id', flat=True))
-        
-        ips = []
-        created_ips = []
-        start_time = time.time()
-        batch_times = []
-        
-        for i in range(count):
-            # 生成随机 IP
-            ip_addr = f'192.168.{random.randint(0, 255)}.{random.randint(1, 254)}'
-            subdomain_id = random.choice(subdomain_list) if subdomain_list else None
-            
-            if subdomain_id:
-                ips.append(IPAddress(
-                    target=target,
-                    scan=scan,
-                    subdomain_id=subdomain_id,
-                    ip=f'{ip_addr}-{i}',  # 加后缀确保唯一
-                    protocol_version='IPv4',
-                    is_private=True
-                ))
-            
-            # 批量插入
-            if len(ips) >= batch_size:
-                batch_start = time.time()
-                with transaction.atomic():
-                    created = IPAddress.objects.bulk_create(ips, ignore_conflicts=True)
-                    created_ips.extend(created)
-                batch_time = time.time() - batch_start
-                batch_times.append(batch_time)
-                
-                if benchmark:
-                    speed = len(ips) / batch_time
-                    self.stdout.write(f'  插入 {len(ips):,} 个 | 耗时: {batch_time:.2f}s | 速度: {speed:.0f} 条/秒')
-                else:
-                    self.stdout.write(f'  插入 {len(ips):,} 个 IP 地址... (进度: {i+1:,}/{count:,})')
-                ips = []
-        
-        # 插入剩余的
-        if ips:
-            with transaction.atomic():
-                created = IPAddress.objects.bulk_create(ips, ignore_conflicts=True)
-                created_ips.extend(created)
-            self.stdout.write(f'  插入 {len(ips):,} 个 IP 地址... (进度: {count:,}/{count:,})')
-        
-        total_time = time.time() - start_time
-        avg_batch_time = sum(batch_times) / len(batch_times) if batch_times else 0
-        total_speed = len(created_ips) / total_time if total_time > 0 else 0
-        
-        self.stdout.write(self.style.SUCCESS(
-            f'  ✓ 完成！共创建 {len(created_ips):,} 个 | '
-            f'总耗时: {total_time:.2f}s | '
-            f'平均速度: {total_speed:.0f} 条/秒'
-        ))
-        
-        return created_ips, {
-            'name': 'IP地址',
-            'count': len(created_ips),
-            'time': total_time,
-            'speed': total_speed,
-            'avg_batch_time': avg_batch_time
-        }
-
-    def _generate_ports(self, scan, ips, subdomains, count, batch_size, benchmark=False):
-        """生成端口"""
-        # 重新查询 IP 和 subdomain 的 ID
-        ip_list = list(IPAddress.objects.filter(scan=scan).values_list('id', flat=True))
-        subdomain_list = list(Subdomain.objects.filter(scan=scan).values_list('id', flat=True))
-        
-        ports = []
-        total_created = 0
-        start_time = time.time()
-        batch_times = []
-        
-        for i in range(count):
-            ip_id = random.choice(ip_list) if ip_list else None
-            subdomain_id = random.choice(subdomain_list) if subdomain_list else None
-            
-            if ip_id:
-                ports.append(Port(
-                    ip_address_id=ip_id,
-                    subdomain_id=subdomain_id,
-                    number=random.randint(1, 65535),
-                    service_name=random.choice(['http', 'https', 'ssh', 'ftp', 'mysql']),
-                    is_uncommon=random.choice([True, False])
-                ))
-            
-            # 批量插入
-            if len(ports) >= batch_size:
-                batch_start = time.time()
-                with transaction.atomic():
-                    Port.objects.bulk_create(ports, ignore_conflicts=True)
-                total_created += len(ports)
-                batch_time = time.time() - batch_start
-                batch_times.append(batch_time)
-                
-                if benchmark:
-                    speed = len(ports) / batch_time
-                    self.stdout.write(f'  插入 {len(ports):,} 个 | 耗时: {batch_time:.2f}s | 速度: {speed:.0f} 条/秒')
-                else:
-                    self.stdout.write(f'  插入 {len(ports):,} 个端口... (进度: {i+1:,}/{count:,})')
-                ports = []
-        
-        # 插入剩余的
-        if ports:
-            with transaction.atomic():
-                Port.objects.bulk_create(ports, ignore_conflicts=True)
-            total_created += len(ports)
-            self.stdout.write(f'  插入 {len(ports):,} 个端口... (进度: {count:,}/{count:,})')
-        
-        total_time = time.time() - start_time
-        avg_batch_time = sum(batch_times) / len(batch_times) if batch_times else 0
-        total_speed = total_created / total_time if total_time > 0 else 0
-        
-        self.stdout.write(self.style.SUCCESS(
-            f'  ✓ 完成！共创建 {total_created:,} 个 | '
-            f'总耗时: {total_time:.2f}s | '
-            f'平均速度: {total_speed:.0f} 条/秒'
-        ))
-        
-        return {
-            'name': '端口',
-            'count': total_created,
-            'time': total_time,
-            'speed': total_speed,
-            'avg_batch_time': avg_batch_time
-        }
-
-    def _generate_websites(self, target, scan, subdomains, count, batch_size, benchmark=False):
-        """生成网站"""
-        # 重新查询 subdomain 信息
-        subdomain_data = list(Subdomain.objects.filter(scan=scan).values('id', 'name'))
-        
-        websites = []
-        created_websites = []
-        start_time = time.time()
-        batch_times = []
-        
-        for i in range(count):
-            subdomain = random.choice(subdomain_data) if subdomain_data else None
-            
-            if subdomain:
-                protocol = random.choice(['http', 'https'])
-                url = f'{protocol}://{subdomain["name"]}'
-                
-                websites.append(WebSite(
-                    target=target,
-                    scan=scan,
-                    subdomain_id=subdomain['id'],
-                    url=f'{url}?id={i}',  # 加参数确保唯一
-                    title=f'Test Website {i}',
-                    status_code=random.choice([200, 301, 302, 404, 500]),
-                    content_length=random.randint(1000, 100000),
-                    webserver=random.choice(['nginx', 'apache', 'IIS']),
-                    content_type='text/html',
-                    tech=['Python', 'Django'] if i % 2 == 0 else ['Node.js', 'React'],
-                    vhost=random.choice([True, False, None])
-                ))
-            
-            # 批量插入
-            if len(websites) >= batch_size:
-                batch_start = time.time()
-                with transaction.atomic():
-                    created = WebSite.objects.bulk_create(websites, ignore_conflicts=True)
-                    created_websites.extend(created)
-                batch_time = time.time() - batch_start
-                batch_times.append(batch_time)
-                
-                if benchmark:
-                    speed = len(websites) / batch_time
-                    self.stdout.write(f'  插入 {len(websites):,} 个 | 耗时: {batch_time:.2f}s | 速度: {speed:.0f} 条/秒')
-                else:
-                    self.stdout.write(f'  插入 {len(websites):,} 个网站... (进度: {i+1:,}/{count:,})')
-                websites = []
-        
-        # 插入剩余的
-        if websites:
-            with transaction.atomic():
-                created = WebSite.objects.bulk_create(websites, ignore_conflicts=True)
-                created_websites.extend(created)
-            self.stdout.write(f'  插入 {len(websites):,} 个网站... (进度: {count:,}/{count:,})')
-        
-        total_time = time.time() - start_time
-        avg_batch_time = sum(batch_times) / len(batch_times) if batch_times else 0
-        total_speed = len(created_websites) / total_time if total_time > 0 else 0
-        
-        self.stdout.write(self.style.SUCCESS(
-            f'  ✓ 完成！共创建 {len(created_websites):,} 个 | '
-            f'总耗时: {total_time:.2f}s | '
-            f'平均速度: {total_speed:.0f} 条/秒'
-        ))
-        
-        return created_websites, {
-            'name': '网站',
-            'count': len(created_websites),
-            'time': total_time,
-            'speed': total_speed,
-            'avg_batch_time': avg_batch_time
-        }
-
-    def _generate_directories(self, target, scan, websites, count, batch_size, benchmark=False):
-        """生成目录"""
-        # 重新查询 website 信息
-        website_data = list(WebSite.objects.filter(scan=scan).values('id', 'url'))
-        
-        directories = []
-        total_created = 0
-        start_time = time.time()
-        batch_times = []
-        
-        for i in range(count):
-            website = random.choice(website_data) if website_data else None
-            
-            if website:
-                path = ''.join(random.choices(string.ascii_lowercase, k=10))
-                
-                directories.append(Directory(
-                    target=target,
-                    scan=scan,
-                    website_id=website['id'],
-                    url=f'{website["url"]}/dir/{path}/{i}',  # 加后缀确保唯一
-                    status=random.choice([200, 301, 403, 404]),
-                    length=random.randint(1000, 50000),
-                    words=random.randint(100, 5000),
-                    lines=random.randint(50, 1000),
-                    content_type='text/html'
-                ))
-            
-            # 批量插入
-            if len(directories) >= batch_size:
-                batch_start = time.time()
-                with transaction.atomic():
-                    Directory.objects.bulk_create(directories, ignore_conflicts=True)
-                total_created += len(directories)
-                batch_time = time.time() - batch_start
-                batch_times.append(batch_time)
-                
-                if benchmark:
-                    speed = len(directories) / batch_time
-                    self.stdout.write(f'  插入 {len(directories):,} 个 | 耗时: {batch_time:.2f}s | 速度: {speed:.0f} 条/秒')
-                else:
-                    self.stdout.write(f'  插入 {len(directories):,} 个目录... (进度: {i+1:,}/{count:,})')
-                directories = []
-        
-        # 插入剩余的
-        if directories:
-            with transaction.atomic():
-                Directory.objects.bulk_create(directories, ignore_conflicts=True)
-            total_created += len(directories)
-            self.stdout.write(f'  插入 {len(directories):,} 个目录... (进度: {count:,}/{count:,})')
-        
-        total_time = time.time() - start_time
-        avg_batch_time = sum(batch_times) / len(batch_times) if batch_times else 0
-        total_speed = total_created / total_time if total_time > 0 else 0
-        
-        self.stdout.write(self.style.SUCCESS(
-            f'  ✓ 完成！共创建 {total_created:,} 个 | '
-            f'总耗时: {total_time:.2f}s | '
-            f'平均速度: {total_speed:.0f} 条/秒'
-        ))
-        
-        return {
-            'name': '目录',
-            'count': total_created,
-            'time': total_time,
-            'speed': total_speed,
-            'avg_batch_time': avg_batch_time
-        }
-    
-    def _print_db_info(self):
-        """打印数据库连接信息"""
-        db_settings = connection.settings_dict
-        self.stdout.write(f'\n数据库信息:')
-        self.stdout.write(f'  主机: {db_settings["HOST"]}')
-        self.stdout.write(f'  端口: {db_settings["PORT"]}')
-        self.stdout.write(f'  数据库: {db_settings["NAME"]}')
-        self.stdout.write(f'  引擎: {db_settings["ENGINE"].split(".")[-1]}')
-    
-    def _print_performance_summary(self, stats_list):
-        """打印性能总结"""
-        self.stdout.write(f'\n{"="*60}')
-        self.stdout.write('  性能测试报告')
-        self.stdout.write(f'{"="*60}\n')
-        
-        total_records = sum(s['count'] for s in stats_list)
-        total_time = sum(s['time'] for s in stats_list)
-        overall_speed = total_records / total_time if total_time > 0 else 0
-        
-        self.stdout.write(f'{"表名":<12} {"记录数":<12} {"耗时(秒)":<12} {"速度(条/秒)":<15} {"平均批次时间(秒)"}')
-        self.stdout.write('-' * 65)
-        
-        for stats in stats_list:
-            self.stdout.write(
-                f'{stats["name"]:<12} '
-                f'{stats["count"]:<12,} '
-                f'{stats["time"]:<12.2f} '
-                f'{stats["speed"]:<15.0f} '
-                f'{stats.get("avg_batch_time", 0):<.3f}'
-            )
-        
-        self.stdout.write('-' * 65)
-        self.stdout.write(
-            f'{"总计":<12} '
-            f'{total_records:<12,} '
-            f'{total_time:<12.2f} '
-            f'{overall_speed:<15.0f}'
-        )
-        self.stdout.write('')
-    
-    def _test_batch_sizes(self, target_name, count):
-        """测试不同批次大小的性能"""
-        batch_sizes = [100, 500, 1000, 2000, 5000]
-        test_count = min(count, 10000)  # 限制测试数据量
-        
-        self.stdout.write(f'\n{"="*60}')
-        self.stdout.write(f'  批次大小性能测试')
-        self.stdout.write(f'{"="*60}\n')
-        self.stdout.write(f'测试数据量: {test_count:,} 条')
-        self.stdout.write(f'测试批次: {batch_sizes}\n')
-        
-        results = []
-        
-        for batch_size in batch_sizes:
-            self.stdout.write(f'\n测试批次大小: {batch_size}')
-            self.stdout.write('-' * 40)
-            
-            # 这里只测试子域名的插入性能
-            try:
-                target = Target.objects.get(name=target_name)
-            except Target.DoesNotExist:
-                self.stdout.write(self.style.ERROR(f'目标不存在: {target_name}'))
-                return
-            
-            scan = Scan.objects.filter(target=target).first()
-            if not scan:
-                from apps.engine.models import ScanEngine
-                engine = ScanEngine.objects.first()
-                scan = Scan.objects.create(
-                    target=target,
-                    engine=engine,
-                    status='completed',
-                    results_dir=f'/tmp/test_{target_name}'
-                )
-            
-            _, stats = self._generate_subdomains(target, scan, test_count, batch_size, benchmark=True)
-            results.append((batch_size, stats))
-            
-            # 清理测试数据
-            Subdomain.objects.filter(scan=scan, name__startswith=f'test-').delete()
-        
-        # 打印对比结果
-        self.stdout.write(f'\n{"="*60}')
-        self.stdout.write('  批次大小对比结果')
-        self.stdout.write(f'{"="*60}\n')
-        self.stdout.write(f'{"批次大小":<12} {"总耗时(秒)":<15} {"速度(条/秒)":<15} {"平均批次时间(秒)"}')
-        self.stdout.write('-' * 60)
-        
-        for batch_size, stats in results:
-            self.stdout.write(
-                f'{batch_size:<12} '
-                f'{stats["time"]:<15.2f} '
-                f'{stats["speed"]:<15.0f} '
-                f'{stats["avg_batch_time"]:<.3f}'
-            )
-        
-        # 找出最快的批次大小
-        fastest = min(results, key=lambda x: x[1]['time'])
-        self.stdout.write(f'\n推荐批次大小: {fastest[0]} (最快: {fastest[1]["time"]:.2f}秒)')
-        self.stdout.write('')
--- a/backend/apps/scan/notifications/receivers.py
+++ b/backend/apps/scan/notifications/receivers.py
@@ -6,7 +6,7 @@
 import logging
 from django.dispatch import receiver

-from apps.common.signals import vulnerabilities_saved, worker_delete_failed
+from apps.common.signals import vulnerabilities_saved, worker_delete_failed, all_workers_high_load
 from apps.scan.notifications import create_notification, NotificationLevel, NotificationCategory

 logger = logging.getLogger(__name__)
@@ -80,3 +80,15 @@ def on_worker_delete_failed(sender, worker_name, message, **kwargs):
        category=NotificationCategory.SYSTEM
    )
    logger.warning("Worker 删除失败通知已发送 - worker=%s, message=%s", worker_name, message)
+
+
+@receiver(all_workers_high_load)
+def on_all_workers_high_load(sender, worker_name, cpu, mem, **kwargs):
+    """所有 Worker 高负载时的通知处理"""
+    create_notification(
+        title="系统负载较高",
+        message=f"所有节点负载较高（最低负载节点 CPU: {cpu:.1f}%, 内存: {mem:.1f}%），系统将等待最多 10 分钟后分发任务，扫描速度可能受影响",
+        level=NotificationLevel.MEDIUM,
+        category=NotificationCategory.SYSTEM
+    )
+    logger.warning("高负载通知已发送 - cpu=%.1f%%, mem=%.1f%%", cpu, mem)
--- a/backend/apps/scan/notifications/services.py
+++ b/backend/apps/scan/notifications/services.py
@@ -3,10 +3,14 @@
 import logging
 import time
 import requests
+import urllib3
 from .models import Notification, NotificationSettings
 from .types import NotificationLevel, NotificationCategory
 from .repositories import DjangoNotificationRepository, NotificationSettingsRepository

+# 禁用自签名证书的 SSL 警告（远程 Worker 回调场景）
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
 logger = logging.getLogger(__name__)


@@ -314,7 +318,8 @@ def _push_via_api_callback(notification: Notification, server_url: str) -> None:
            'created_at': notification.created_at.isoformat()
        }
        
-        resp = requests.post(callback_url, json=data, timeout=5)
+        # verify=False: 远程 Worker 回调 Server 时可能使用自签名证书
+        resp = requests.post(callback_url, json=data, timeout=5, verify=False)
        resp.raise_for_status()
        
        logger.debug(f"通知回调推送成功 - ID: {notification.id}")
--- a/backend/apps/scan/orchestrators/flow_orchestrator.py
+++ b/backend/apps/scan/orchestrators/flow_orchestrator.py
@@ -206,6 +206,10 @@ class FlowOrchestrator:
            from apps.scan.flows.site_scan_flow import site_scan_flow
            return site_scan_flow
        
+        elif scan_type == 'fingerprint_detect':
+            from apps.scan.flows.fingerprint_detect_flow import fingerprint_detect_flow
+            return fingerprint_detect_flow
+        
        elif scan_type == 'directory_scan':
            from apps.scan.flows.directory_scan_flow import directory_scan_flow
            return directory_scan_flow
--- a/backend/apps/scan/scripts/run_cleanup.py
+++ b/backend/apps/scan/scripts/run_cleanup.py
@@ -83,7 +83,7 @@ def cleanup_results(results_dir: str, retention_days: int) -> dict:

 def main():
    parser = argparse.ArgumentParser(description="清理任务")
-    parser.add_argument("--results_dir", type=str, default="/app/backend/results", help="扫描结果目录")
+    parser.add_argument("--results_dir", type=str, default="/opt/xingrin/results", help="扫描结果目录")
    parser.add_argument("--retention_days", type=int, default=7, help="保留天数")
    
    args = parser.parse_args()
--- a/backend/apps/scan/scripts/run_initiate_scan.py
+++ b/backend/apps/scan/scripts/run_initiate_scan.py
@@ -6,14 +6,135 @@
 必须在 Django 导入之前获取配置并设置环境变量。
 """
 import argparse
-from apps.common.container_bootstrap import fetch_config_and_setup_django
+import sys
+import os
+import traceback
+
+
+def diagnose_prefect_environment():
+    """诊断 Prefect 运行环境，输出详细信息用于排查问题"""
+    print("\n" + "="*60)
+    print("Prefect 环境诊断")
+    print("="*60)
+    
+    # 1. 检查 Prefect 相关环境变量
+    print("\n[诊断] Prefect 环境变量:")
+    prefect_vars = [
+        'PREFECT_HOME',
+        'PREFECT_API_URL',
+        'PREFECT_SERVER_EPHEMERAL_ENABLED',
+        'PREFECT_SERVER_EPHEMERAL_STARTUP_TIMEOUT_SECONDS',
+        'PREFECT_SERVER_DATABASE_CONNECTION_URL',
+        'PREFECT_LOGGING_LEVEL',
+        'PREFECT_DEBUG_MODE',
+    ]
+    for var in prefect_vars:
+        value = os.environ.get(var, 'NOT SET')
+        print(f"  {var}={value}")
+    
+    # 2. 检查 PREFECT_HOME 目录
+    prefect_home = os.environ.get('PREFECT_HOME', os.path.expanduser('~/.prefect'))
+    print(f"\n[诊断] PREFECT_HOME 目录: {prefect_home}")
+    if os.path.exists(prefect_home):
+        print(f"  ✓ 目录存在")
+        print(f"  可写: {os.access(prefect_home, os.W_OK)}")
+        try:
+            files = os.listdir(prefect_home)
+            print(f"  文件列表: {files[:10]}{'...' if len(files) > 10 else ''}")
+        except Exception as e:
+            print(f"  ✗ 无法列出文件: {e}")
+    else:
+        print(f"  目录不存在，尝试创建...")
+        try:
+            os.makedirs(prefect_home, exist_ok=True)
+            print(f"  ✓ 创建成功")
+        except Exception as e:
+            print(f"  ✗ 创建失败: {e}")
+    
+    # 3. 检查 uvicorn 是否可用
+    print(f"\n[诊断] uvicorn 可用性:")
+    import shutil
+    uvicorn_path = shutil.which('uvicorn')
+    if uvicorn_path:
+        print(f"  ✓ uvicorn 路径: {uvicorn_path}")
+    else:
+        print(f"  ✗ uvicorn 不在 PATH 中")
+        print(f"  PATH: {os.environ.get('PATH', 'NOT SET')}")
+    
+    # 4. 检查 Prefect 版本
+    print(f"\n[诊断] Prefect 版本:")
+    try:
+        import prefect
+        print(f"  ✓ prefect=={prefect.__version__}")
+    except Exception as e:
+        print(f"  ✗ 无法导入 prefect: {e}")
+    
+    # 5. 检查 SQLite 支持
+    print(f"\n[诊断] SQLite 支持:")
+    try:
+        import sqlite3
+        print(f"  ✓ sqlite3 版本: {sqlite3.sqlite_version}")
+        # 测试创建数据库
+        test_db = os.path.join(prefect_home, 'test.db')
+        conn = sqlite3.connect(test_db)
+        conn.execute('CREATE TABLE IF NOT EXISTS test (id INTEGER)')
+        conn.close()
+        os.remove(test_db)
+        print(f"  ✓ SQLite 读写测试通过")
+    except Exception as e:
+        print(f"  ✗ SQLite 测试失败: {e}")
+    
+    # 6. 检查端口绑定能力
+    print(f"\n[诊断] 端口绑定测试:")
+    try:
+        import socket
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.bind(('127.0.0.1', 0))
+        port = sock.getsockname()[1]
+        sock.close()
+        print(f"  ✓ 可以绑定 127.0.0.1 端口 (测试端口: {port})")
+    except Exception as e:
+        print(f"  ✗ 端口绑定失败: {e}")
+    
+    # 7. 检查内存情况
+    print(f"\n[诊断] 系统资源:")
+    try:
+        import psutil
+        mem = psutil.virtual_memory()
+        print(f"  内存总量: {mem.total / 1024 / 1024:.0f} MB")
+        print(f"  可用内存: {mem.available / 1024 / 1024:.0f} MB")
+        print(f"  内存使用率: {mem.percent}%")
+    except ImportError:
+        print(f"  psutil 未安装，跳过内存检查")
+    except Exception as e:
+        print(f"  ✗ 资源检查失败: {e}")
+    
+    print("\n" + "="*60)
+    print("诊断完成")
+    print("="*60 + "\n")


 def main():
+    print("="*60)
+    print("run_initiate_scan.py 启动")
+    print(f"  Python: {sys.version}")
+    print(f"  CWD: {os.getcwd()}")
+    print(f"  SERVER_URL: {os.environ.get('SERVER_URL', 'NOT SET')}")
+    print("="*60)
+    
    # 1. 从配置中心获取配置并初始化 Django（必须在 Django 导入之前）
-    fetch_config_and_setup_django()
+    print("[1/4] 从配置中心获取配置...")
+    try:
+        from apps.common.container_bootstrap import fetch_config_and_setup_django
+        fetch_config_and_setup_django()
+        print("[1/4] ✓ 配置获取成功")
+    except Exception as e:
+        print(f"[1/4] ✗ 配置获取失败: {e}")
+        traceback.print_exc()
+        sys.exit(1)
    
    # 2. 解析命令行参数
+    print("[2/4] 解析命令行参数...")
    parser = argparse.ArgumentParser(description="执行扫描初始化 Flow")
    parser.add_argument("--scan_id", type=int, required=True, help="扫描任务 ID")
    parser.add_argument("--target_name", type=str, required=True, help="目标名称")
@@ -23,21 +144,45 @@ def main():
    parser.add_argument("--scheduled_scan_name", type=str, default=None, help="定时扫描任务名称（可选）")
    
    args = parser.parse_args()
+    print(f"[2/4] ✓ 参数解析成功:")
+    print(f"       scan_id: {args.scan_id}")
+    print(f"       target_name: {args.target_name}")
+    print(f"       target_id: {args.target_id}")
+    print(f"       scan_workspace_dir: {args.scan_workspace_dir}")
+    print(f"       engine_name: {args.engine_name}")
+    print(f"       scheduled_scan_name: {args.scheduled_scan_name}")
+    
+    # 2.5. 运行 Prefect 环境诊断（仅在 DEBUG 模式下）
+    if os.environ.get('DEBUG', '').lower() == 'true':
+        diagnose_prefect_environment()
    
    # 3. 现在可以安全导入 Django 相关模块
-    from apps.scan.flows.initiate_scan_flow import initiate_scan_flow
+    print("[3/4] 导入 initiate_scan_flow...")
+    try:
+        from apps.scan.flows.initiate_scan_flow import initiate_scan_flow
+        print("[3/4] ✓ 导入成功")
+    except Exception as e:
+        print(f"[3/4] ✗ 导入失败: {e}")
+        traceback.print_exc()
+        sys.exit(1)
    
    # 4. 执行 Flow
-    result = initiate_scan_flow(
-        scan_id=args.scan_id,
-        target_name=args.target_name,
-        target_id=args.target_id,
-        scan_workspace_dir=args.scan_workspace_dir,
-        engine_name=args.engine_name,
-        scheduled_scan_name=args.scheduled_scan_name,
-    )
-    
-    print(f"Flow 执行完成: {result}")
+    print("[4/4] 执行 initiate_scan_flow...")
+    try:
+        result = initiate_scan_flow(
+            scan_id=args.scan_id,
+            target_name=args.target_name,
+            target_id=args.target_id,
+            scan_workspace_dir=args.scan_workspace_dir,
+            engine_name=args.engine_name,
+            scheduled_scan_name=args.scheduled_scan_name,
+        )
+        print("[4/4] ✓ Flow 执行完成")
+        print(f"结果: {result}")
+    except Exception as e:
+        print(f"[4/4] ✗ Flow 执行失败: {e}")
+        traceback.print_exc()
+        sys.exit(1)


 if __name__ == "__main__":
--- a/backend/apps/scan/services/init.py
+++ b/backend/apps/scan/services/init.py
@@ -17,6 +17,8 @@ from .scan_state_service import ScanStateService
 from .scan_control_service import ScanControlService
 from .scan_stats_service import ScanStatsService
 from .scheduled_scan_service import ScheduledScanService
+from .blacklist_service import BlacklistService
+from .target_export_service import TargetExportService

 __all__ = [
    'ScanService',           # 主入口（向后兼容）
@@ -25,5 +27,7 @@ __all__ = [
    'ScanControlService',
    'ScanStatsService',
    'ScheduledScanService',
+    'BlacklistService',      # 黑名单过滤服务
+    'TargetExportService',   # 目标导出服务
 ]

--- a/Show More
+++ b/Show More