perf(generators): optimize MergeMaps to reduce allocs

`MergeMaps` accounts for 11.41% of allocs (13.8
GB) in clusterbomb mode. With 1,305 combinations
per target, this function is called millions of
times in the hot path.

RCA:
* Request generator calls `MergeMaps` with single
  arg on every payload combination, incurring
  variadic overhead.
* Build request merges same maps multiple times
  per request.
* `BuildPayloadFromOptions` recomputes static CLI
  options on every call.
* Variables calls `MergeMaps` $$2×N$$ times per
  variable evaluation (once in loop, once in
  `evaluateVariableValue`)

Changes:

Core optimizations in maps.go:
* Pre-size merged map to avoid rehashing (30-40%
  reduction)
* Add `CopyMap` for efficient single-map copy
  without variadic overhead.
* Add `MergeMapsInto` for in-place mutation when
  caller owns destination.

Hot path fixes:
* Replace `MergeMaps(r.currentPayloads)` with
  `CopyMap(r.currentPayloads)` to eliminates
  allocation on every combination iteration.
* Pre-allocate combined map once, extend in-place
  during `ForEach` loop instead of creating new
  map per variable (eliminates $$2×N$$ allocations
  per request).

Caching with concurrency safety:
* Cache `BuildPayloadFromOptions` computation in
  `sync.Map` keyed by `types.Options` ptr, but
  return copy to prevent concurrent modification.
* Cost: shallow copy of ~10-20 entries vs. full
  merge of vars + env (85-90% savings in typical
  case)
* Clear cache in `closeInternal()` to prevent
  memory leaks when SDK instances are created or
  destroyed.

Estimated impact: 40-60% reduction in `MergeMaps`
allocations (5.5-8.3 GB savings from original
13.8 GB). Safe for concurrent execution and SDK
usage with multiple instances.

Signed-off-by: Dwi Siswanto <git@dw1.io>
This commit is contained in:
Dwi Siswanto
2025-12-19 20:19:37 +07:00
parent d48c2c38fa
commit bb79a061bc
7 changed files with 373 additions and 17 deletions

View File

@@ -19,6 +19,7 @@ import (
"github.com/projectdiscovery/nuclei/v3/pkg/output"
"github.com/projectdiscovery/nuclei/v3/pkg/progress"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/generators"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hosterrorscache"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/interactsh"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolinit"
@@ -238,6 +239,9 @@ func (e *NucleiEngine) closeInternal() {
if e.tmpDir != "" {
_ = os.RemoveAll(e.tmpDir)
}
if e.opts != nil {
generators.ClearOptionsPayloadMap(e.opts)
}
}
// Close all resources used by nuclei engine

View File

@@ -44,20 +44,51 @@ func MergeMapsMany(maps ...interface{}) map[string][]string {
return m
}
// MergeMaps merges two maps into a new map
// MergeMaps merges multiple maps into a new map.
//
// Use [CopyMap] if you need to copy a single map.
// Use [MergeMapsInto] to merge into an existing map.
func MergeMaps(maps ...map[string]interface{}) map[string]interface{} {
merged := make(map[string]interface{})
mapsLen := 0
for _, m := range maps {
mapsLen += len(m)
}
merged := make(map[string]interface{}, mapsLen)
for _, m := range maps {
maps0.Copy(merged, m)
}
return merged
}
// CopyMap creates a shallow copy of a single map.
func CopyMap(m map[string]interface{}) map[string]interface{} {
if m == nil {
return nil
}
result := make(map[string]interface{}, len(m))
maps0.Copy(result, m)
return result
}
// MergeMapsInto copies all entries from src maps into dst (mutating dst).
//
// Use when dst is a fresh map the caller owns and wants to avoid allocation.
func MergeMapsInto(dst map[string]interface{}, srcs ...map[string]interface{}) {
for _, src := range srcs {
maps0.Copy(dst, src)
}
}
// ExpandMapValues converts values from flat string to string slice
func ExpandMapValues(m map[string]string) map[string][]string {
m1 := make(map[string][]string, len(m))
for k, v := range m {
m1[k] = []string{v}
}
return m1
}

View File

@@ -1,12 +1,32 @@
package generators
import (
"sync"
"github.com/projectdiscovery/nuclei/v3/pkg/types"
)
// BuildPayloadFromOptions returns a map with the payloads provided via CLI
// optionsPayloadMap caches the result of BuildPayloadFromOptions per options
// pointer. This supports multiple SDK instances with different options running
// concurrently.
var optionsPayloadMap sync.Map // map[*types.Options]map[string]interface{}
// BuildPayloadFromOptions returns a map with the payloads provided via CLI.
//
// The result is cached per options pointer since options don't change during a run.
// Returns a copy of the cached map to prevent concurrent modification issues.
// Safe for concurrent use with multiple SDK instances.
func BuildPayloadFromOptions(options *types.Options) map[string]interface{} {
if options == nil {
return make(map[string]interface{})
}
if cached, ok := optionsPayloadMap.Load(options); ok {
return CopyMap(cached.(map[string]interface{}))
}
m := make(map[string]interface{})
// merge with vars
if !options.Vars.IsEmpty() {
m = MergeMaps(m, options.Vars.AsMap())
@@ -16,5 +36,18 @@ func BuildPayloadFromOptions(options *types.Options) map[string]interface{} {
if options.EnvironmentVariables {
m = MergeMaps(EnvVars(), m)
}
return m
actual, _ := optionsPayloadMap.LoadOrStore(options, m)
// Return a copy to prevent concurrent writes to the cached map
return CopyMap(actual.(map[string]interface{}))
}
// ClearOptionsPayloadMap clears the cached options payload.
// SDK users should call this when disposing of a NucleiEngine instance
// to prevent memory leaks if creating many short-lived instances.
func ClearOptionsPayloadMap(options *types.Options) {
if options != nil {
optionsPayloadMap.Delete(options)
}
}

View File

@@ -0,0 +1,92 @@
package generators
import (
"sync"
"testing"
"github.com/projectdiscovery/goflags"
"github.com/projectdiscovery/nuclei/v3/pkg/types"
"github.com/stretchr/testify/require"
)
func TestBuildPayloadFromOptionsConcurrency(t *testing.T) {
// Test that BuildPayloadFromOptions is safe for concurrent use
// and returns independent copies that can be modified without races
vars := goflags.RuntimeMap{}
_ = vars.Set("key=value")
opts := &types.Options{
Vars: vars,
}
const numGoroutines = 100
var wg sync.WaitGroup
wg.Add(numGoroutines)
// Each goroutine gets a map and modifies it
for i := 0; i < numGoroutines; i++ {
go func(id int) {
defer wg.Done()
// Get the map (should be a copy of cached data)
m := BuildPayloadFromOptions(opts)
// Modify it - this should not cause races
m["goroutine_id"] = id
m["test_key"] = "test_value"
// Verify original cached value is present
require.Equal(t, "value", m["key"])
}(i)
}
wg.Wait()
}
func TestBuildPayloadFromOptionsCaching(t *testing.T) {
// Test that caching actually works
vars := goflags.RuntimeMap{}
_ = vars.Set("cached=yes")
opts := &types.Options{
Vars: vars,
EnvironmentVariables: false,
}
// First call - builds and caches
m1 := BuildPayloadFromOptions(opts)
require.Equal(t, "yes", m1["cached"])
// Second call - should return copy of cached result
m2 := BuildPayloadFromOptions(opts)
require.Equal(t, "yes", m2["cached"])
// Modify m1 - should not affect m2 since they're copies
m1["modified"] = "in_m1"
require.NotContains(t, m2, "modified")
// Modify m2 - should not affect future calls
m2["modified"] = "in_m2"
m3 := BuildPayloadFromOptions(opts)
require.NotContains(t, m3, "modified")
}
func TestClearOptionsPayloadMap(t *testing.T) {
vars := goflags.RuntimeMap{}
_ = vars.Set("temp=data")
opts := &types.Options{
Vars: vars,
}
// Build and cache
m1 := BuildPayloadFromOptions(opts)
require.Equal(t, "data", m1["temp"])
// Clear the cache
ClearOptionsPayloadMap(opts)
// Verify it still works (rebuilds)
m2 := BuildPayloadFromOptions(opts)
require.Equal(t, "data", m2["temp"])
}

View File

@@ -70,18 +70,23 @@ func (variables *Variable) UnmarshalJSON(data []byte) error {
// Evaluate returns a finished map of variables based on set values
func (variables *Variable) Evaluate(values map[string]interface{}) map[string]interface{} {
result := make(map[string]interface{}, variables.Len())
combined := make(map[string]interface{}, len(values)+variables.Len())
generators.MergeMapsInto(combined, values)
variables.ForEach(func(key string, value interface{}) {
if sliceValue, ok := value.([]interface{}); ok {
// slices cannot be evaluated
result[key] = sliceValue
combined[key] = sliceValue
return
}
valueString := types.ToString(value)
combined := generators.MergeMaps(values, result)
if value, ok := combined[key]; ok {
valueString = types.ToString(value)
if existingValue, ok := combined[key]; ok {
valueString = types.ToString(existingValue)
}
result[key] = evaluateVariableValue(valueString, combined, result)
evaluated := evaluateVariableValueWithMap(valueString, combined)
result[key] = evaluated
combined[key] = evaluated
})
return result
}
@@ -98,29 +103,36 @@ func (variables *Variable) GetAll() map[string]interface{} {
// EvaluateWithInteractsh returns evaluation results of variables with interactsh
func (variables *Variable) EvaluateWithInteractsh(values map[string]interface{}, interact *interactsh.Client) (map[string]interface{}, []string) {
result := make(map[string]interface{}, variables.Len())
combined := make(map[string]interface{}, len(values)+variables.Len())
generators.MergeMapsInto(combined, values)
var interactURLs []string
variables.ForEach(func(key string, value interface{}) {
if sliceValue, ok := value.([]interface{}); ok {
// slices cannot be evaluated
result[key] = sliceValue
combined[key] = sliceValue
return
}
valueString := types.ToString(value)
if existingValue, ok := combined[key]; ok {
valueString = types.ToString(existingValue)
}
if strings.Contains(valueString, "interactsh-url") {
valueString, interactURLs = interact.Replace(valueString, interactURLs)
}
combined := generators.MergeMaps(values, result)
if value, ok := combined[key]; ok {
valueString = types.ToString(value)
}
result[key] = evaluateVariableValue(valueString, combined, result)
evaluated := evaluateVariableValueWithMap(valueString, combined)
result[key] = evaluated
combined[key] = evaluated
})
return result, interactURLs
}
// evaluateVariableValue expression and returns final value
func evaluateVariableValue(expression string, values, processing map[string]interface{}) string {
// evaluateVariableValue expression and returns final value.
//
// Deprecated: use evaluateVariableValueWithMap instead to avoid repeated map
// merging overhead.
func evaluateVariableValue(expression string, values, processing map[string]interface{}) string { // nolint
finalMap := generators.MergeMaps(values, processing)
result, err := expressions.Evaluate(expression, finalMap)
if err != nil {
@@ -130,6 +142,16 @@ func evaluateVariableValue(expression string, values, processing map[string]inte
return result
}
// evaluateVariableValueWithMap evaluates an expression with a pre-merged map.
func evaluateVariableValueWithMap(expression string, combinedMap map[string]interface{}) string {
result, err := expressions.Evaluate(expression, combinedMap)
if err != nil {
return expression
}
return result
}
// checkForLazyEval checks if the variables have any lazy evaluation i.e any dsl function
// and sets the flag accordingly.
func (variables *Variable) checkForLazyEval() bool {

View File

@@ -4,6 +4,7 @@ import (
"testing"
"time"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/interactsh"
"github.com/projectdiscovery/nuclei/v3/pkg/utils"
"github.com/projectdiscovery/nuclei/v3/pkg/utils/json"
"github.com/stretchr/testify/require"
@@ -147,3 +148,176 @@ func TestCheckForLazyEval(t *testing.T) {
require.True(t, variables.LazyEval, "LazyEval flag should be true")
})
}
func TestVariablesEvaluateChained(t *testing.T) {
t.Run("chained-variable-references", func(t *testing.T) {
// Test that variables can reference previously defined variables
// and that input values (like BaseURL) are available for evaluation
// but not included in the result
variables := &Variable{
LazyEval: true, // skip auto-evaluation in UnmarshalYAML
InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(3),
}
variables.Set("a", "hello")
variables.Set("b", "{{a}} world")
variables.Set("c", "{{b}}!")
inputValues := map[string]interface{}{
"BaseURL": "http://example.com",
"Host": "example.com",
}
result := variables.Evaluate(inputValues)
// Result should contain only the defined variables, not input values
require.Len(t, result, 3, "result should contain exactly 3 variables")
require.NotContains(t, result, "BaseURL", "result should not contain input values")
require.NotContains(t, result, "Host", "result should not contain input values")
// Chained evaluation should work correctly
require.Equal(t, "hello", result["a"])
require.Equal(t, "hello world", result["b"])
require.Equal(t, "hello world!", result["c"])
})
t.Run("variables-using-input-values", func(t *testing.T) {
// Test that variables can use input values in expressions
variables := &Variable{
LazyEval: true,
InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(2),
}
variables.Set("api_url", "{{BaseURL}}/api/v1")
variables.Set("full_path", "{{api_url}}/users")
inputValues := map[string]interface{}{
"BaseURL": "http://example.com",
}
result := variables.Evaluate(inputValues)
require.Len(t, result, 2)
require.Equal(t, "http://example.com/api/v1", result["api_url"])
require.Equal(t, "http://example.com/api/v1/users", result["full_path"])
require.NotContains(t, result, "BaseURL")
})
t.Run("mixed-expressions-and-chaining", func(t *testing.T) {
// Test combining DSL functions with chained variables
variables := &Variable{
LazyEval: true,
InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(3),
}
variables.Set("token", "secret123")
variables.Set("hashed", "{{md5(token)}}")
variables.Set("header", "X-Auth: {{hashed}}")
result := variables.Evaluate(map[string]interface{}{})
require.Equal(t, "secret123", result["token"])
require.Equal(t, "5d7845ac6ee7cfffafc5fe5f35cf666d", result["hashed"]) // md5("secret123")
require.Equal(t, "X-Auth: 5d7845ac6ee7cfffafc5fe5f35cf666d", result["header"])
})
t.Run("evaluation-order-preserved", func(t *testing.T) {
// Test that evaluation follows insertion order
// (important for variables that depend on previously defined ones)
variables := &Variable{
LazyEval: true,
InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(4),
}
variables.Set("step1", "A")
variables.Set("step2", "{{step1}}B")
variables.Set("step3", "{{step2}}C")
variables.Set("step4", "{{step3}}D")
result := variables.Evaluate(map[string]interface{}{})
require.Equal(t, "A", result["step1"])
require.Equal(t, "AB", result["step2"])
require.Equal(t, "ABC", result["step3"])
require.Equal(t, "ABCD", result["step4"])
})
}
func TestEvaluateWithInteractshOverrideOrder(t *testing.T) {
// This test demonstrates a bug where interactsh URL replacement is wasted
// when an input value exists for the same variable key.
//
// Bug scenario:
// 1. Variable "callback" is defined with "{{interactsh-url}}"
// 2. Input values contain "callback" with some other value
// 3. The interactsh-url is replaced first (wasting an interactsh URL)
// 4. Then immediately overwritten by the input value
//
// Expected behavior: Input override should be checked FIRST, then interactsh
// replacement should happen on the final valueString.
t.Run("interactsh-replacement-with-input-override", func(t *testing.T) {
variables := &Variable{
LazyEval: true,
InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(1),
}
variables.Set("callback", "{{interactsh-url}}")
// Input provides an override that also contains interactsh-url
inputValues := map[string]interface{}{
"callback": "https://custom.{{interactsh-url}}/path",
}
// Create a real interactsh client for testing
client, err := interactsh.New(&interactsh.Options{
ServerURL: "oast.fun",
CacheSize: 100,
Eviction: 60 * time.Second,
CooldownPeriod: 5 * time.Second,
PollDuration: 5 * time.Second,
DisableHttpFallback: true,
})
require.NoError(t, err, "could not create interactsh client")
defer client.Close()
result, urls := variables.EvaluateWithInteractsh(inputValues, client)
// The input override contains interactsh-url, so it should be replaced
// and we should have exactly 1 URL from the input override
require.Len(t, urls, 1, "should have 1 interactsh URL from input override")
// The result should use the input override (with interactsh replaced)
require.Contains(t, result["callback"], "https://custom.", "should use input override pattern")
require.Contains(t, result["callback"], "/path", "should use input override pattern")
require.NotContains(t, result["callback"], "{{interactsh-url}}", "interactsh should be replaced")
})
t.Run("interactsh-replacement-without-input-override", func(t *testing.T) {
variables := &Variable{
LazyEval: true,
InsertionOrderedStringMap: *utils.NewEmptyInsertionOrderedStringMap(1),
}
variables.Set("callback", "{{interactsh-url}}")
// No input override for "callback"
inputValues := map[string]interface{}{
"other_key": "other_value",
}
client, err := interactsh.New(&interactsh.Options{
ServerURL: "oast.fun",
CacheSize: 100,
Eviction: 60 * time.Second,
CooldownPeriod: 5 * time.Second,
PollDuration: 5 * time.Second,
DisableHttpFallback: true,
})
require.NoError(t, err, "could not create interactsh client")
defer client.Close()
result, urls := variables.EvaluateWithInteractsh(inputValues, client)
// Should have 1 URL from the variable definition
require.Len(t, urls, 1, "should have 1 interactsh URL")
// The result should be the replaced interactsh URL
require.NotContains(t, result["callback"], "{{interactsh-url}}", "interactsh should be replaced")
require.NotEmpty(t, result["callback"], "callback should have a value")
})
}

View File

@@ -84,10 +84,10 @@ func (r *requestGenerator) nextValue() (value string, payloads map[string]interf
r.applyMark(request, Once)
}
if hasPayloadIterator {
return request, generators.MergeMaps(r.currentPayloads), r.okCurrentPayload
return request, generators.CopyMap(r.currentPayloads), r.okCurrentPayload
}
// next should return a copy of payloads and not pointer to payload to avoid data race
return request, generators.MergeMaps(r.currentPayloads), true
return request, generators.CopyMap(r.currentPayloads), true
} else {
return "", nil, false
}