Implement shared parser functionality and related tests

- Added shared parsed and compiled caches to the parser for improved performance across engines.
- Introduced `getSharedParser` function to initialize a shared parser instance.
- Updated `NucleiEngine` initialization to optionally use shared caches based on environment variable.
- Added tests to validate shared parser behavior and benchmark performance with and without shared caches.
- Implemented sanitization of options in the compiled cache to prevent engine-scoped state retention.
This commit is contained in:
knakul853
2025-09-04 00:22:57 +05:30
parent 32dfeacd9d
commit eeedf6cce6
9 changed files with 608 additions and 12 deletions

View File

@@ -3,6 +3,7 @@ package nuclei
import (
"context"
"fmt"
"os"
"strings"
"sync"
"time"
@@ -37,6 +38,21 @@ import (
"github.com/projectdiscovery/ratelimit"
)
// enginePool provides shared parsed-cache across engines (opt-in)
var (
sharedParsedOnce sync.Once
sharedParsed *templates.Cache
)
func getSharedParser() *templates.Parser {
// Initialize the shared parsed cache once
sharedParsedOnce.Do(func() {
sharedParsed = templates.NewCache()
})
// Return a fresh Parser each call that reuses only the shared parsed cache,
return templates.NewParserWithParsedCache(sharedParsed)
}
// applyRequiredDefaults to options
func (e *NucleiEngine) applyRequiredDefaults(ctx context.Context) {
mockoutput := testutils.NewMockOutputWriter(e.opts.OmitTemplate)
@@ -123,7 +139,12 @@ func (e *NucleiEngine) init(ctx context.Context) error {
}
if e.parser == nil {
e.parser = templates.NewParser()
//TODO: remove this feature flag after testing
if os.Getenv("NUCLEI_USE_SHARED_COMPILED") == "1" {
e.parser = templates.NewSharedParserWithCompiledCache()
} else {
e.parser = templates.NewParser()
}
}
if protocolstate.ShouldInit(e.opts.ExecutionId) {

View File

@@ -3,6 +3,7 @@ package nuclei_test
import (
"context"
"log"
"os"
"testing"
"time"
@@ -35,3 +36,28 @@ func TestContextCancelNucleiEngine(t *testing.T) {
}
defer ne.Close()
}
func TestSharedParserOptIn(t *testing.T) {
os.Setenv("NUCLEI_USE_SHARED_PARSER", "1")
t.Cleanup(func() { os.Unsetenv("NUCLEI_USE_SHARED_PARSER") })
ne, err := nuclei.NewNucleiEngineCtx(context.Background())
if err != nil {
t.Fatalf("engine error: %v", err)
}
p := ne.GetParser()
if p == nil {
t.Fatalf("expected templates.Parser")
}
ne2, err := nuclei.NewNucleiEngineCtx(context.Background())
if err != nil {
t.Fatalf("engine2 error: %v", err)
}
p2 := ne2.GetParser()
if p2 == nil {
t.Fatalf("expected templates.Parser2")
}
if p.Cache() != p2.Cache() {
t.Fatalf("expected shared parsed cache across engines when opt-in is set")
}
}

View File

@@ -224,11 +224,31 @@ func Parse(filePath string, preprocessor Preprocessor, options *protocols.Execut
}
template.Path = filePath
if !options.DoNotCache {
parser.compiledTemplatesCache.Store(filePath, template, nil, err)
// Store a sanitized template in compiled cache to avoid retaining engine-scoped state.
cacheTpl := *template
cacheTpl.Options = sanitizeOptionsForCache(template.Options)
// Raw template bytes are not needed in compiled cache; keep per-engine
cacheTpl.Options.RawTemplate = nil
parser.compiledTemplatesCache.Store(filePath, &cacheTpl, nil, err)
}
return template, nil
}
// sanitizeOptionsForCache strips engine-scoped fields from ExecutorOptions to avoid
// retaining per-engine references in the shared compiled cache.
func sanitizeOptionsForCache(src *protocols.ExecutorOptions) *protocols.ExecutorOptions {
if src == nil {
return nil
}
return &protocols.ExecutorOptions{
// Intentionally exclude TemplateID/Path/Verifier and RawTemplate to avoid engine leakage
StopAtFirstMatch: src.StopAtFirstMatch,
ProtocolType: src.ProtocolType,
Flow: src.Flow,
IsMultiProtocol: src.IsMultiProtocol,
}
}
// isGlobalMatchersEnabled checks if any of requests in the template
// have global matchers enabled. It iterates through all requests and
// returns true if at least one request has global matchers enabled;

View File

@@ -206,3 +206,161 @@ func TestWrongWorkflow(t *testing.T) {
require.Nil(t, got, "could not parse template")
require.ErrorContains(t, err, "workflows cannot have other protocols")
}
func Test_SharedCompiledCache_SharedAcrossParsers(t *testing.T) {
setup()
p1 := templates.NewSharedParserWithCompiledCache()
p2 := templates.NewSharedParserWithCompiledCache()
exec1 := &protocols.ExecutorOptions{
Output: testutils.NewMockOutputWriter(testutils.DefaultOptions.OmitTemplate),
Options: testutils.DefaultOptions,
Progress: executerOpts.Progress,
Catalog: executerOpts.Catalog,
RateLimiter: executerOpts.RateLimiter,
Parser: p1,
}
// reinit options fully for isolation
opts2 := testutils.DefaultOptions
testutils.Init(opts2)
progressImpl, _ := progress.NewStatsTicker(0, false, false, false, 0)
exec2 := &protocols.ExecutorOptions{
Output: testutils.NewMockOutputWriter(opts2.OmitTemplate),
Options: opts2,
Progress: progressImpl,
Catalog: executerOpts.Catalog,
RateLimiter: executerOpts.RateLimiter,
Parser: p2,
}
filePath := "tests/match-1.yaml"
got1, err := templates.Parse(filePath, nil, exec1)
require.NoError(t, err)
require.NotNil(t, got1)
got2, err := templates.Parse(filePath, nil, exec2)
require.NoError(t, err)
require.NotNil(t, got2)
require.Equal(t, p1.CompiledCache(), p2.CompiledCache())
require.Greater(t, p1.CompiledCount(), 0)
require.Equal(t, p1.CompiledCount(), p2.CompiledCount())
}
func Test_SharedCompiledCache_OptionsIsolation(t *testing.T) {
setup()
p1 := templates.NewSharedParserWithCompiledCache()
p2 := templates.NewSharedParserWithCompiledCache()
exec1 := &protocols.ExecutorOptions{
Output: testutils.NewMockOutputWriter(testutils.DefaultOptions.OmitTemplate),
Options: testutils.DefaultOptions,
Progress: executerOpts.Progress,
Catalog: executerOpts.Catalog,
RateLimiter: executerOpts.RateLimiter,
Parser: p1,
}
// reinit options fully for isolation
opts2 := testutils.DefaultOptions
testutils.Init(opts2)
progressImpl, _ := progress.NewStatsTicker(0, false, false, false, 0)
exec2 := &protocols.ExecutorOptions{
Output: testutils.NewMockOutputWriter(opts2.OmitTemplate),
Options: opts2,
Progress: progressImpl,
Catalog: executerOpts.Catalog,
RateLimiter: executerOpts.RateLimiter,
Parser: p2,
}
filePath := "tests/match-1.yaml"
got1, err := templates.Parse(filePath, nil, exec1)
require.NoError(t, err)
require.NotNil(t, got1)
got2, err := templates.Parse(filePath, nil, exec2)
require.NoError(t, err)
require.NotNil(t, got2)
require.NotEqual(t, got1.Options, got2.Options)
}
// compiled cache does not retain engine-scoped fields
func Test_CompiledCache_SanitizesOptions(t *testing.T) {
setup()
p := templates.NewSharedParserWithCompiledCache()
exec := executerOpts
exec.Parser = p
filePath := "tests/match-1.yaml"
got, err := templates.Parse(filePath, nil, exec)
require.NoError(t, err)
require.NotNil(t, got)
cached, raw, err := p.CompiledCache().Has(filePath)
require.NoError(t, err)
require.NotNil(t, cached)
require.Nil(t, raw)
// cached template must not hold engine-scoped references
require.Nil(t, cached.Options.Options)
require.Empty(t, cached.Options.TemplateVerifier)
require.Empty(t, cached.Options.TemplateID)
require.Empty(t, cached.Options.TemplatePath)
require.False(t, cached.Options.StopAtFirstMatch)
}
// different engines see different Options pointers
func Test_EngineIsolation_NoCrossLeaks(t *testing.T) {
setup()
p1 := templates.NewSharedParserWithCompiledCache()
p2 := templates.NewSharedParserWithCompiledCache()
// engine 1
exec1 := &protocols.ExecutorOptions{
Output: executerOpts.Output,
Options: executerOpts.Options,
Progress: executerOpts.Progress,
Catalog: executerOpts.Catalog,
RateLimiter: executerOpts.RateLimiter,
Parser: p1,
}
// engine 2 with a fresh options instance
opts2 := testutils.DefaultOptions
testutils.Init(opts2)
progress2, _ := progress.NewStatsTicker(0, false, false, false, 0)
exec2 := &protocols.ExecutorOptions{
Output: testutils.NewMockOutputWriter(opts2.OmitTemplate),
Options: opts2,
Progress: progress2,
Catalog: executerOpts.Catalog,
RateLimiter: executerOpts.RateLimiter,
Parser: p2,
}
filePath := "tests/match-1.yaml"
got1, err := templates.Parse(filePath, nil, exec1)
require.NoError(t, err)
got2, err := templates.Parse(filePath, nil, exec2)
require.NoError(t, err)
// template options must be distinct per engine
require.NotEqual(t, got1.Options, got2.Options)
// http request options must bind to engine-specific ExecutorOptions copies (not shared)
require.NotEmpty(t, got1.RequestsHTTP)
require.NotEmpty(t, got2.RequestsHTTP)
r1 := got1.RequestsHTTP[0]
r2 := got2.RequestsHTTP[0]
// ensure options structs are not the same pointer
require.NotSame(t, r1.Options().Options, r2.Options().Options)
// mutate engine2 options and ensure it doesn't affect engine1
r2.Options().Options.RateLimit = 999
require.NotEqual(t, r1.Options().Options.RateLimit, r2.Options().Options.RateLimit)
// compiled cache instance shared, but without engine leakage
require.Equal(t, p1.CompiledCache(), p2.CompiledCache())
}

View File

@@ -29,20 +29,42 @@ type Parser struct {
sync.Mutex
}
func NewParser() *Parser {
p := &Parser{
parsedTemplatesCache: NewCache(),
compiledTemplatesCache: NewCache(),
}
var (
sharedParsedCacheOnce sync.Once
sharedParsedCache *Cache
)
return p
var (
sharedCompiledCacheOnce sync.Once
sharedCompiledCache *Cache
)
// NewParser returns a new parser with a fresh cache
func NewParser() *Parser {
return &Parser{parsedTemplatesCache: NewCache(), compiledTemplatesCache: NewCache()}
}
// NewParserWithParsedCache returns a parser using provided cache
func NewParserWithParsedCache(cache *Cache) *Parser {
return &Parser{
parsedTemplatesCache: cache,
compiledTemplatesCache: NewCache(),
}
return &Parser{parsedTemplatesCache: cache, compiledTemplatesCache: NewCache()}
}
// NewSharedParser returns a parser backed by a process-wide shared parsed cache.
// Safe for concurrent use since Cache is concurrency-safe.
func NewSharedParser() *Parser {
sharedParsedCacheOnce.Do(func() {
sharedParsedCache = NewCache()
})
return &Parser{parsedTemplatesCache: sharedParsedCache, compiledTemplatesCache: NewCache()}
}
// NewSharedParserWithCompiledCache returns a parser backed by process-wide shared
// parsed and compiled caches. Intended for scenarios where compiled executers
// can be safely reused across engines by copying option-bearing fields.
func NewSharedParserWithCompiledCache() *Parser {
sharedParsedCacheOnce.Do(func() { sharedParsedCache = NewCache() })
sharedCompiledCacheOnce.Do(func() { sharedCompiledCache = NewCache() })
return &Parser{parsedTemplatesCache: sharedParsedCache, compiledTemplatesCache: sharedCompiledCache}
}
// Cache returns the parsed templates cache

View File

@@ -3,6 +3,7 @@ package templates
import (
"errors"
"fmt"
"sync"
"testing"
"github.com/projectdiscovery/nuclei/v3/pkg/catalog/disk"
@@ -156,3 +157,31 @@ func TestLoadTemplate(t *testing.T) {
}
})
}
func TestNewSharedParserSharesCache(t *testing.T) {
p1 := NewSharedParser()
p2 := NewSharedParser()
if p1.Cache() != p2.Cache() {
t.Fatalf("expected shared cache instance")
}
}
func TestNewSharedParserConcurrency(t *testing.T) {
var wg sync.WaitGroup
const goroutines = 50
parsers := make([]*Parser, goroutines)
wg.Add(goroutines)
for i := 0; i < goroutines; i++ {
go func(i int) {
defer wg.Done()
parsers[i] = NewSharedParser()
}(i)
}
wg.Wait()
base := parsers[0].Cache()
for i := 1; i < goroutines; i++ {
if parsers[i].Cache() != base {
t.Fatalf("expected all parsers to share the same cache")
}
}
}

View File

@@ -0,0 +1,74 @@
package testing
import (
"context"
"os"
"path/filepath"
"testing"
nuclei "github.com/projectdiscovery/nuclei/v3/lib"
"github.com/projectdiscovery/nuclei/v3/pkg/catalog/config"
)
// BenchmarkChunkEngines simulates the aurora agent pattern where multiple chunks
// create engines against the same template set. It compares shared vs non-shared parser.
// We only load templates (no scan execution) to stress parsing/compile memory like the loader path.
func BenchmarkChunkEngines(b *testing.B) {
templatesDir := config.DefaultConfig.TemplatesDirectory
if fi, err := os.Stat(filepath.Clean(templatesDir)); err != nil || !fi.IsDir() {
b.Skipf("templates directory not available: %s", templatesDir)
return
}
// number of synthetic "chunks" (engines) to simulate
const chunks = 20
b.Run("no_shared", func(b *testing.B) {
_ = os.Unsetenv("NUCLEI_USE_SHARED_PARSER")
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// create engines like separate chunks would
engines := make([]*nuclei.NucleiEngine, 0, chunks)
for c := 0; c < chunks; c++ {
ne, err := nuclei.NewNucleiEngineCtx(context.Background())
if err != nil {
b.Fatalf("engine error: %v", err)
}
engines = append(engines, ne)
}
// load templates on each engine (same set)
for _, ne := range engines {
if err := ne.LoadAllTemplates(); err != nil {
b.Fatalf("load templates error: %v", err)
}
}
for _, ne := range engines {
ne.Close()
}
}
})
b.Run("shared", func(b *testing.B) {
_ = os.Setenv("NUCLEI_USE_SHARED_PARSER", "1")
b.Cleanup(func() { _ = os.Unsetenv("NUCLEI_USE_SHARED_PARSER") })
b.ReportAllocs()
for i := 0; i < b.N; i++ {
engines := make([]*nuclei.NucleiEngine, 0, chunks)
for c := 0; c < chunks; c++ {
ne, err := nuclei.NewNucleiEngineCtx(context.Background())
if err != nil {
b.Fatalf("engine error: %v", err)
}
engines = append(engines, ne)
}
for _, ne := range engines {
if err := ne.LoadAllTemplates(); err != nil {
b.Fatalf("load templates error: %v", err)
}
}
for _, ne := range engines {
ne.Close()
}
}
})
}

View File

@@ -0,0 +1,62 @@
package testing
import (
"context"
"os"
"path/filepath"
"testing"
nuclei "github.com/projectdiscovery/nuclei/v3/lib"
"github.com/projectdiscovery/nuclei/v3/pkg/catalog/config"
)
// BenchmarkSharedParser benchmarks LoadAllTemplates with and without shared parsed cache.
// It skips if nuclei-templates directory is not present to avoid fetching during benchmarks.
func BenchmarkSharedParser(b *testing.B) {
templatesDir := config.DefaultConfig.TemplatesDirectory
if fi, err := os.Stat(filepath.Clean(templatesDir)); err != nil || !fi.IsDir() {
b.Skipf("templates directory not available: %s", templatesDir)
return
}
b.Run("no_shared", func(b *testing.B) {
_ = os.Unsetenv("NUCLEI_USE_SHARED_PARSER")
b.ReportAllocs()
for i := 0; i < b.N; i++ {
ne, err := nuclei.NewNucleiEngineCtx(context.Background())
if err != nil {
b.Fatalf("engine error: %v", err)
}
if err := ne.LoadAllTemplates(); err != nil {
b.Fatalf("load templates error: %v", err)
}
ne.Close()
}
})
b.Run("shared", func(b *testing.B) {
_ = os.Setenv("NUCLEI_USE_SHARED_PARSER", "1")
b.Cleanup(func() { _ = os.Unsetenv("NUCLEI_USE_SHARED_PARSER") })
// warm up shared cache once
warm, err := nuclei.NewNucleiEngineCtx(context.Background())
if err != nil {
b.Fatalf("warm engine error: %v", err)
}
if err := warm.LoadAllTemplates(); err != nil {
b.Fatalf("warm load error: %v", err)
}
warm.Close()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
ne, err := nuclei.NewNucleiEngineCtx(context.Background())
if err != nil {
b.Fatalf("engine error: %v", err)
}
if err := ne.LoadAllTemplates(); err != nil {
b.Fatalf("load templates error: %v", err)
}
ne.Close()
}
})
}

View File

@@ -0,0 +1,184 @@
package main
import (
"context"
"flag"
"fmt"
"os"
"runtime"
"strings"
"sync"
"time"
nuclei "github.com/projectdiscovery/nuclei/v3/lib"
"github.com/projectdiscovery/nuclei/v3/pkg/catalog/config"
"github.com/projectdiscovery/nuclei/v3/pkg/templates"
)
func memMB() uint64 {
var m runtime.MemStats
runtime.ReadMemStats(&m)
return m.Alloc / (1024 * 1024)
}
func snapshotCaches(label string, engines []*nuclei.NucleiEngine) {
fmt.Println(label)
max := 3
if len(engines) < max {
max = len(engines)
}
var base *templates.Parser
for i := 0; i < max; i++ {
p := engines[i].GetParser()
if base == nil {
base = p
}
fmt.Printf(" engine[%d]: parsed_cache_ptr=%p compiled_cache_ptr=%p parsed_count=%d compiled_count=%d\n", i, p.Cache(), p.CompiledCache(), p.ParsedCount(), p.CompiledCount())
}
equalParsed := true
equalCompiled := true
for i := 1; i < len(engines); i++ {
if engines[i].GetParser().Cache() != base.Cache() {
equalParsed = false
}
if engines[i].GetParser().CompiledCache() != base.CompiledCache() {
equalCompiled = false
}
}
fmt.Println(" parsed cache shared across engines:", equalParsed)
fmt.Println(" compiled cache shared across engines:", equalCompiled)
}
func runEngines(chunks int, targets []string, execute bool) (time.Duration, error) {
start := time.Now()
engines := make([]*nuclei.NucleiEngine, 0, chunks)
for i := 0; i < chunks; i++ {
ne, err := nuclei.NewNucleiEngineCtx(context.Background())
if err != nil {
return 0, fmt.Errorf("engine create: %w", err)
}
engines = append(engines, ne)
}
// Load templates
for _, ne := range engines {
if err := ne.LoadAllTemplates(); err != nil {
return 0, fmt.Errorf("load templates: %w", err)
}
}
if execute {
// Execute scans concurrently with a global 60s timeout to prevent long runs
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
var wg sync.WaitGroup
for _, ne := range engines {
ne := ne
ne.LoadTargets(targets, false)
wg.Add(1)
go func() {
defer wg.Done()
// ignore callback output
_ = ne.ExecuteCallbackWithCtx(ctx)
}()
}
wg.Wait()
}
for _, ne := range engines {
ne.Close()
}
return time.Since(start), nil
}
func main() {
var chunks int
var targetsCSV string
var execute bool
flag.IntVar(&chunks, "chunks", 10, "number of simulated chunk engines")
flag.StringVar(&targetsCSV, "targets", "https://scanme.sh,https://honey.scanme.sh", "comma-separated targets")
flag.BoolVar(&execute, "execute", false, "execute scans after loading templates (default: false)")
flag.Parse()
// Ensure templates directory exists
templatesDir := config.DefaultConfig.TemplatesDirectory
if fi, err := os.Stat(templatesDir); err != nil || !fi.IsDir() {
fmt.Printf("templates directory not found: %s\n", templatesDir)
os.Exit(1)
}
targets := []string{}
for _, t := range strings.Split(targetsCSV, ",") {
t = strings.TrimSpace(t)
if t != "" {
targets = append(targets, t)
}
}
fmt.Println("=== Agent-sim: without shared caches ===")
_ = os.Unsetenv("NUCLEI_USE_SHARED_COMPILED")
runtime.GC()
before := memMB()
// create engines to snapshot cache sharing behavior
enginesNS := make([]*nuclei.NucleiEngine, 0, chunks)
for i := 0; i < chunks; i++ {
ne, err := nuclei.NewNucleiEngineCtx(context.Background())
if err != nil {
fmt.Println("error:", err)
os.Exit(1)
}
enginesNS = append(enginesNS, ne)
}
for _, ne := range enginesNS {
if err := ne.LoadAllTemplates(); err != nil {
fmt.Println("error:", err)
os.Exit(1)
}
}
snapshotCaches("cache state (no_shared):", enginesNS)
for _, ne := range enginesNS {
ne.Close()
}
durNoShared, err := runEngines(chunks, targets, execute)
if err != nil {
fmt.Println("error:", err)
os.Exit(1)
}
runtime.GC()
after := memMB()
fmt.Printf("no_shared: duration=%s heap_before=%dMB heap_after=%dMB\n", durNoShared, before, after)
fmt.Println("\n=== Agent-sim: with shared compiled cache ===")
_ = os.Setenv("NUCLEI_USE_SHARED_COMPILED", "1")
runtime.GC()
before = memMB()
enginesSC := make([]*nuclei.NucleiEngine, 0, chunks)
for i := 0; i < chunks; i++ {
ne, err := nuclei.NewNucleiEngineCtx(context.Background())
if err != nil {
fmt.Println("error:", err)
os.Exit(1)
}
enginesSC = append(enginesSC, ne)
}
for _, ne := range enginesSC {
if err := ne.LoadAllTemplates(); err != nil {
fmt.Println("error:", err)
os.Exit(1)
}
}
snapshotCaches("cache state (shared-compiled):", enginesSC)
for _, ne := range enginesSC {
ne.Close()
}
durSharedCompiled, err := runEngines(chunks, targets, execute)
if err != nil {
fmt.Println("error:", err)
os.Exit(1)
}
runtime.GC()
after = memMB()
fmt.Printf("shared_compiled: duration=%s heap_before=%dMB heap_after=%dMB\n", durSharedCompiled, before, after)
fmt.Println("\nDone.")
}