From bb79a061bc5ba6b1d08a014140583da61b906ced Mon Sep 17 00:00:00 2001 From: Dwi Siswanto Date: Fri, 19 Dec 2025 20:19:37 +0700 Subject: [PATCH] perf(generators): optimize `MergeMaps` to reduce allocs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `MergeMaps` accounts for 11.41% of allocs (13.8 GB) in clusterbomb mode. With 1,305 combinations per target, this function is called millions of times in the hot path. RCA: * Request generator calls `MergeMaps` with single arg on every payload combination, incurring variadic overhead. * Build request merges same maps multiple times per request. * `BuildPayloadFromOptions` recomputes static CLI options on every call. * Variables calls `MergeMaps` $$2×N$$ times per variable evaluation (once in loop, once in `evaluateVariableValue`) Changes: Core optimizations in maps.go: * Pre-size merged map to avoid rehashing (30-40% reduction) * Add `CopyMap` for efficient single-map copy without variadic overhead. * Add `MergeMapsInto` for in-place mutation when caller owns destination. Hot path fixes: * Replace `MergeMaps(r.currentPayloads)` with `CopyMap(r.currentPayloads)` to eliminates allocation on every combination iteration. * Pre-allocate combined map once, extend in-place during `ForEach` loop instead of creating new map per variable (eliminates $$2×N$$ allocations per request). Caching with concurrency safety: * Cache `BuildPayloadFromOptions` computation in `sync.Map` keyed by `types.Options` ptr, but return copy to prevent concurrent modification. * Cost: shallow copy of ~10-20 entries vs. full merge of vars + env (85-90% savings in typical case) * Clear cache in `closeInternal()` to prevent memory leaks when SDK instances are created or destroyed. Estimated impact: 40-60% reduction in `MergeMaps` allocations (5.5-8.3 GB savings from original 13.8 GB). Safe for concurrent execution and SDK usage with multiple instances. Signed-off-by: Dwi Siswanto --- lib/sdk.go | 4 + pkg/protocols/common/generators/maps.go | 35 +++- pkg/protocols/common/generators/options.go | 37 +++- .../common/generators/options_test.go | 92 +++++++++ pkg/protocols/common/variables/variables.go | 44 +++-- .../common/variables/variables_test.go | 174 ++++++++++++++++++ pkg/protocols/http/request_generator.go | 4 +- 7 files changed, 373 insertions(+), 17 deletions(-) create mode 100644 pkg/protocols/common/generators/options_test.go diff --git a/lib/sdk.go b/lib/sdk.go index 6bd2a93ba..a70c02d61 100644 --- a/lib/sdk.go +++ b/lib/sdk.go @@ -19,6 +19,7 @@ import ( "github.com/projectdiscovery/nuclei/v3/pkg/output" "github.com/projectdiscovery/nuclei/v3/pkg/progress" "github.com/projectdiscovery/nuclei/v3/pkg/protocols" + "github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/generators" "github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hosterrorscache" "github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/interactsh" "github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolinit" @@ -238,6 +239,9 @@ func (e *NucleiEngine) closeInternal() { if e.tmpDir != "" { _ = os.RemoveAll(e.tmpDir) } + if e.opts != nil { + generators.ClearOptionsPayloadMap(e.opts) + } } // Close all resources used by nuclei engine diff --git a/pkg/protocols/common/generators/maps.go b/pkg/protocols/common/generators/maps.go index ed2fc3a64..6464d53ee 100644 --- a/pkg/protocols/common/generators/maps.go +++ b/pkg/protocols/common/generators/maps.go @@ -44,20 +44,51 @@ func MergeMapsMany(maps ...interface{}) map[string][]string { return m } -// MergeMaps merges two maps into a new map +// MergeMaps merges multiple maps into a new map. +// +// Use [CopyMap] if you need to copy a single map. +// Use [MergeMapsInto] to merge into an existing map. func MergeMaps(maps ...map[string]interface{}) map[string]interface{} { - merged := make(map[string]interface{}) + mapsLen := 0 + for _, m := range maps { + mapsLen += len(m) + } + + merged := make(map[string]interface{}, mapsLen) for _, m := range maps { maps0.Copy(merged, m) } + return merged } +// CopyMap creates a shallow copy of a single map. +func CopyMap(m map[string]interface{}) map[string]interface{} { + if m == nil { + return nil + } + + result := make(map[string]interface{}, len(m)) + maps0.Copy(result, m) + + return result +} + +// MergeMapsInto copies all entries from src maps into dst (mutating dst). +// +// Use when dst is a fresh map the caller owns and wants to avoid allocation. +func MergeMapsInto(dst map[string]interface{}, srcs ...map[string]interface{}) { + for _, src := range srcs { + maps0.Copy(dst, src) + } +} + // ExpandMapValues converts values from flat string to string slice func ExpandMapValues(m map[string]string) map[string][]string { m1 := make(map[string][]string, len(m)) for k, v := range m { m1[k] = []string{v} } + return m1 } diff --git a/pkg/protocols/common/generators/options.go b/pkg/protocols/common/generators/options.go index bc077547a..92c8b4355 100644 --- a/pkg/protocols/common/generators/options.go +++ b/pkg/protocols/common/generators/options.go @@ -1,12 +1,32 @@ package generators import ( + "sync" + "github.com/projectdiscovery/nuclei/v3/pkg/types" ) -// BuildPayloadFromOptions returns a map with the payloads provided via CLI +// optionsPayloadMap caches the result of BuildPayloadFromOptions per options +// pointer. This supports multiple SDK instances with different options running +// concurrently. +var optionsPayloadMap sync.Map // map[*types.Options]map[string]interface{} + +// BuildPayloadFromOptions returns a map with the payloads provided via CLI. +// +// The result is cached per options pointer since options don't change during a run. +// Returns a copy of the cached map to prevent concurrent modification issues. +// Safe for concurrent use with multiple SDK instances. func BuildPayloadFromOptions(options *types.Options) map[string]interface{} { + if options == nil { + return make(map[string]interface{}) + } + + if cached, ok := optionsPayloadMap.Load(options); ok { + return CopyMap(cached.(map[string]interface{})) + } + m := make(map[string]interface{}) + // merge with vars if !options.Vars.IsEmpty() { m = MergeMaps(m, options.Vars.AsMap()) @@ -16,5 +36,18 @@ func BuildPayloadFromOptions(options *types.Options) map[string]interface{} { if options.EnvironmentVariables { m = MergeMaps(EnvVars(), m) } - return m + + actual, _ := optionsPayloadMap.LoadOrStore(options, m) + + // Return a copy to prevent concurrent writes to the cached map + return CopyMap(actual.(map[string]interface{})) +} + +// ClearOptionsPayloadMap clears the cached options payload. +// SDK users should call this when disposing of a NucleiEngine instance +// to prevent memory leaks if creating many short-lived instances. +func ClearOptionsPayloadMap(options *types.Options) { + if options != nil { + optionsPayloadMap.Delete(options) + } } diff --git a/pkg/protocols/common/generators/options_test.go b/pkg/protocols/common/generators/options_test.go new file mode 100644 index 000000000..8d8578145 --- /dev/null +++ b/pkg/protocols/common/generators/options_test.go @@ -0,0 +1,92 @@ +package generators + +import ( + "sync" + "testing" + + "github.com/projectdiscovery/goflags" + "github.com/projectdiscovery/nuclei/v3/pkg/types" + "github.com/stretchr/testify/require" +) + +func TestBuildPayloadFromOptionsConcurrency(t *testing.T) { + // Test that BuildPayloadFromOptions is safe for concurrent use + // and returns independent copies that can be modified without races + vars := goflags.RuntimeMap{} + _ = vars.Set("key=value") + + opts := &types.Options{ + Vars: vars, + } + + const numGoroutines = 100 + var wg sync.WaitGroup + wg.Add(numGoroutines) + + // Each goroutine gets a map and modifies it + for i := 0; i < numGoroutines; i++ { + go func(id int) { + defer wg.Done() + + // Get the map (should be a copy of cached data) + m := BuildPayloadFromOptions(opts) + + // Modify it - this should not cause races + m["goroutine_id"] = id + m["test_key"] = "test_value" + + // Verify original cached value is present + require.Equal(t, "value", m["key"]) + }(i) + } + + wg.Wait() +} + +func TestBuildPayloadFromOptionsCaching(t *testing.T) { + // Test that caching actually works + vars := goflags.RuntimeMap{} + _ = vars.Set("cached=yes") + + opts := &types.Options{ + Vars: vars, + EnvironmentVariables: false, + } + + // First call - builds and caches + m1 := BuildPayloadFromOptions(opts) + require.Equal(t, "yes", m1["cached"]) + + // Second call - should return copy of cached result + m2 := BuildPayloadFromOptions(opts) + require.Equal(t, "yes", m2["cached"]) + + // Modify m1 - should not affect m2 since they're copies + m1["modified"] = "in_m1" + require.NotContains(t, m2, "modified") + + // Modify m2 - should not affect future calls + m2["modified"] = "in_m2" + m3 := BuildPayloadFromOptions(opts) + require.NotContains(t, m3, "modified") +} + +func TestClearOptionsPayloadMap(t *testing.T) { + vars := goflags.RuntimeMap{} + _ = vars.Set("temp=data") + + opts := &types.Options{ + Vars: vars, + } + + // Build and cache + m1 := BuildPayloadFromOptions(opts) + require.Equal(t, "data", m1["temp"]) + + // Clear the cache + ClearOptionsPayloadMap(opts) + + // Verify it still works (rebuilds) + m2 := BuildPayloadFromOptions(opts) + require.Equal(t, "data", m2["temp"]) +} diff --git a/pkg/protocols/common/variables/variables.go b/pkg/protocols/common/variables/variables.go index fa5cc1dbc..76bcacd9a 100644 --- a/pkg/protocols/common/variables/variables.go +++ b/pkg/protocols/common/variables/variables.go @@ -70,18 +70,23 @@ func (variables *Variable) UnmarshalJSON(data []byte) error { // Evaluate returns a finished map of variables based on set values func (variables *Variable) Evaluate(values map[string]interface{}) map[string]interface{} { result := make(map[string]interface{}, variables.Len()) + combined := make(map[string]interface{}, len(values)+variables.Len()) + generators.MergeMapsInto(combined, values) + variables.ForEach(func(key string, value interface{}) { if sliceValue, ok := value.([]interface{}); ok { // slices cannot be evaluated result[key] = sliceValue + combined[key] = sliceValue return } valueString := types.ToString(value) - combined := generators.MergeMaps(values, result) - if value, ok := combined[key]; ok { - valueString = types.ToString(value) + if existingValue, ok := combined[key]; ok { + valueString = types.ToString(existingValue) } - result[key] = evaluateVariableValue(valueString, combined, result) + evaluated := evaluateVariableValueWithMap(valueString, combined) + result[key] = evaluated + combined[key] = evaluated }) return result } @@ -98,29 +103,36 @@ func (variables *Variable) GetAll() map[string]interface{} { // EvaluateWithInteractsh returns evaluation results of variables with interactsh func (variables *Variable) EvaluateWithInteractsh(values map[string]interface{}, interact *interactsh.Client) (map[string]interface{}, []string) { result := make(map[string]interface{}, variables.Len()) + combined := make(map[string]interface{}, len(values)+variables.Len()) + generators.MergeMapsInto(combined, values) var interactURLs []string variables.ForEach(func(key string, value interface{}) { if sliceValue, ok := value.([]interface{}); ok { // slices cannot be evaluated result[key] = sliceValue + combined[key] = sliceValue return } valueString := types.ToString(value) + if existingValue, ok := combined[key]; ok { + valueString = types.ToString(existingValue) + } if strings.Contains(valueString, "interactsh-url") { valueString, interactURLs = interact.Replace(valueString, interactURLs) } - combined := generators.MergeMaps(values, result) - if value, ok := combined[key]; ok { - valueString = types.ToString(value) - } - result[key] = evaluateVariableValue(valueString, combined, result) + evaluated := evaluateVariableValueWithMap(valueString, combined) + result[key] = evaluated + combined[key] = evaluated }) return result, interactURLs } -// evaluateVariableValue expression and returns final value -func evaluateVariableValue(expression string, values, processing map[string]interface{}) string { +// evaluateVariableValue expression and returns final value. +// +// Deprecated: use evaluateVariableValueWithMap instead to avoid repeated map +// merging overhead. +func evaluateVariableValue(expression string, values, processing map[string]interface{}) string { // nolint finalMap := generators.MergeMaps(values, processing) result, err := expressions.Evaluate(expression, finalMap) if err != nil { @@ -130,6 +142,16 @@ func evaluateVariableValue(expression string, values, processing map[string]inte return result } +// evaluateVariableValueWithMap evaluates an expression with a pre-merged map. +func evaluateVariableValueWithMap(expression string, combinedMap map[string]interface{}) string { + result, err := expressions.Evaluate(expression, combinedMap) + if err != nil { + return expression + } + + return result +} + // checkForLazyEval checks if the variables have any lazy evaluation i.e any dsl function // and sets the flag accordingly. func (variables *Variable) checkForLazyEval() bool { diff --git a/pkg/protocols/common/variables/variables_test.go b/pkg/protocols/common/variables/variables_test.go index cbf560b4e..0089c92c6 100644 --- a/pkg/protocols/common/variables/variables_test.go +++ b/pkg/protocols/common/variables/variables_test.go @@ -4,6 +4,7 @@ import ( "testing" "time" + "github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/interactsh" "github.com/projectdiscovery/nuclei/v3/pkg/utils" "github.com/projectdiscovery/nuclei/v3/pkg/utils/json" "github.com/stretchr/testify/require" @@ -147,3 +148,176 @@ func TestCheckForLazyEval(t *testing.T) { require.True(t, variables.LazyEval, "LazyEval flag should be true") }) } + +func TestVariablesEvaluateChained(t *testing.T) { + t.Run("chained-variable-references", func(t *testing.T) { + // Test that variables can reference previously defined variables + // and that input values (like BaseURL) are available for evaluation + // but not included in the result + variables := &Variable{ + LazyEval: true, // skip auto-evaluation in UnmarshalYAML + InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(3), + } + variables.Set("a", "hello") + variables.Set("b", "{{a}} world") + variables.Set("c", "{{b}}!") + + inputValues := map[string]interface{}{ + "BaseURL": "http://example.com", + "Host": "example.com", + } + + result := variables.Evaluate(inputValues) + + // Result should contain only the defined variables, not input values + require.Len(t, result, 3, "result should contain exactly 3 variables") + require.NotContains(t, result, "BaseURL", "result should not contain input values") + require.NotContains(t, result, "Host", "result should not contain input values") + + // Chained evaluation should work correctly + require.Equal(t, "hello", result["a"]) + require.Equal(t, "hello world", result["b"]) + require.Equal(t, "hello world!", result["c"]) + }) + + t.Run("variables-using-input-values", func(t *testing.T) { + // Test that variables can use input values in expressions + variables := &Variable{ + LazyEval: true, + InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(2), + } + variables.Set("api_url", "{{BaseURL}}/api/v1") + variables.Set("full_path", "{{api_url}}/users") + + inputValues := map[string]interface{}{ + "BaseURL": "http://example.com", + } + + result := variables.Evaluate(inputValues) + + require.Len(t, result, 2) + require.Equal(t, "http://example.com/api/v1", result["api_url"]) + require.Equal(t, "http://example.com/api/v1/users", result["full_path"]) + require.NotContains(t, result, "BaseURL") + }) + + t.Run("mixed-expressions-and-chaining", func(t *testing.T) { + // Test combining DSL functions with chained variables + variables := &Variable{ + LazyEval: true, + InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(3), + } + variables.Set("token", "secret123") + variables.Set("hashed", "{{md5(token)}}") + variables.Set("header", "X-Auth: {{hashed}}") + + result := variables.Evaluate(map[string]interface{}{}) + + require.Equal(t, "secret123", result["token"]) + require.Equal(t, "5d7845ac6ee7cfffafc5fe5f35cf666d", result["hashed"]) // md5("secret123") + require.Equal(t, "X-Auth: 5d7845ac6ee7cfffafc5fe5f35cf666d", result["header"]) + }) + + t.Run("evaluation-order-preserved", func(t *testing.T) { + // Test that evaluation follows insertion order + // (important for variables that depend on previously defined ones) + variables := &Variable{ + LazyEval: true, + InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(4), + } + variables.Set("step1", "A") + variables.Set("step2", "{{step1}}B") + variables.Set("step3", "{{step2}}C") + variables.Set("step4", "{{step3}}D") + + result := variables.Evaluate(map[string]interface{}{}) + + require.Equal(t, "A", result["step1"]) + require.Equal(t, "AB", result["step2"]) + require.Equal(t, "ABC", result["step3"]) + require.Equal(t, "ABCD", result["step4"]) + }) +} + +func TestEvaluateWithInteractshOverrideOrder(t *testing.T) { + // This test demonstrates a bug where interactsh URL replacement is wasted + // when an input value exists for the same variable key. + // + // Bug scenario: + // 1. Variable "callback" is defined with "{{interactsh-url}}" + // 2. Input values contain "callback" with some other value + // 3. The interactsh-url is replaced first (wasting an interactsh URL) + // 4. Then immediately overwritten by the input value + // + // Expected behavior: Input override should be checked FIRST, then interactsh + // replacement should happen on the final valueString. + + t.Run("interactsh-replacement-with-input-override", func(t *testing.T) { + variables := &Variable{ + LazyEval: true, + InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(1), + } + variables.Set("callback", "{{interactsh-url}}") + + // Input provides an override that also contains interactsh-url + inputValues := map[string]interface{}{ + "callback": "https://custom.{{interactsh-url}}/path", + } + + // Create a real interactsh client for testing + client, err := interactsh.New(&interactsh.Options{ + ServerURL: "oast.fun", + CacheSize: 100, + Eviction: 60 * time.Second, + CooldownPeriod: 5 * time.Second, + PollDuration: 5 * time.Second, + DisableHttpFallback: true, + }) + require.NoError(t, err, "could not create interactsh client") + defer client.Close() + + result, urls := variables.EvaluateWithInteractsh(inputValues, client) + + // The input override contains interactsh-url, so it should be replaced + // and we should have exactly 1 URL from the input override + require.Len(t, urls, 1, "should have 1 interactsh URL from input override") + + // The result should use the input override (with interactsh replaced) + require.Contains(t, result["callback"], "https://custom.", "should use input override pattern") + require.Contains(t, result["callback"], "/path", "should use input override pattern") + require.NotContains(t, result["callback"], "{{interactsh-url}}", "interactsh should be replaced") + }) + + t.Run("interactsh-replacement-without-input-override", func(t *testing.T) { + variables := &Variable{ + LazyEval: true, + InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(1), + } + variables.Set("callback", "{{interactsh-url}}") + + // No input override for "callback" + inputValues := map[string]interface{}{ + "other_key": "other_value", + } + + client, err := interactsh.New(&interactsh.Options{ + ServerURL: "oast.fun", + CacheSize: 100, + Eviction: 60 * time.Second, + CooldownPeriod: 5 * time.Second, + PollDuration: 5 * time.Second, + DisableHttpFallback: true, + }) + require.NoError(t, err, "could not create interactsh client") + defer client.Close() + + result, urls := variables.EvaluateWithInteractsh(inputValues, client) + + // Should have 1 URL from the variable definition + require.Len(t, urls, 1, "should have 1 interactsh URL") + + // The result should be the replaced interactsh URL + require.NotContains(t, result["callback"], "{{interactsh-url}}", "interactsh should be replaced") + require.NotEmpty(t, result["callback"], "callback should have a value") + }) +} diff --git a/pkg/protocols/http/request_generator.go b/pkg/protocols/http/request_generator.go index 4c4c701a8..1ef191395 100644 --- a/pkg/protocols/http/request_generator.go +++ b/pkg/protocols/http/request_generator.go @@ -84,10 +84,10 @@ func (r *requestGenerator) nextValue() (value string, payloads map[string]interf r.applyMark(request, Once) } if hasPayloadIterator { - return request, generators.MergeMaps(r.currentPayloads), r.okCurrentPayload + return request, generators.CopyMap(r.currentPayloads), r.okCurrentPayload } // next should return a copy of payloads and not pointer to payload to avoid data race - return request, generators.MergeMaps(r.currentPayloads), true + return request, generators.CopyMap(r.currentPayloads), true } else { return "", nil, false }