add variable support to extractors

This commit is contained in:
Doğan Can Bakır
2025-11-25 12:38:08 +09:00
parent 29977358d7
commit 80321bce4c
10 changed files with 64 additions and 19 deletions

View File

@@ -9,6 +9,7 @@ import (
"github.com/itchyny/gojq"
"github.com/projectdiscovery/nuclei/v3/pkg/operators/cache"
"github.com/projectdiscovery/nuclei/v3/pkg/operators/common/dsl"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/expressions"
)
// CompileExtractors performs the initial setup operation on an extractor
@@ -21,6 +22,11 @@ func (e *Extractor) CompileExtractors() error {
e.extractorType = computedType
// Compile the regexes
for _, regex := range e.Regex {
if varErr := expressions.ContainsUnresolvedVariables(regex); varErr != nil {
e.regexCompiled = append(e.regexCompiled, nil)
continue
}
if cached, err := cache.Regex().GetIFPresent(regex); err == nil && cached != nil {
e.regexCompiled = append(e.regexCompiled, cached)
continue
@@ -37,6 +43,10 @@ func (e *Extractor) CompileExtractors() error {
}
for _, query := range e.JSON {
if varErr := expressions.ContainsUnresolvedVariables(query); varErr != nil {
e.jsonCompiled = append(e.jsonCompiled, nil)
continue
}
query, err := gojq.Parse(query)
if err != nil {
return fmt.Errorf("could not parse json: %s", query)

View File

@@ -2,21 +2,38 @@ package extractors
import (
"fmt"
"regexp"
"strings"
"github.com/antchfx/htmlquery"
"github.com/antchfx/xmlquery"
"github.com/itchyny/gojq"
"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/expressions"
"github.com/projectdiscovery/nuclei/v3/pkg/types"
"github.com/projectdiscovery/nuclei/v3/pkg/utils/json"
)
// ExtractRegex extracts text from a corpus and returns it
func (e *Extractor) ExtractRegex(corpus string) map[string]struct{} {
func (e *Extractor) ExtractRegex(corpus string, data map[string]interface{}) map[string]struct{} {
results := make(map[string]struct{})
groupPlusOne := e.RegexGroup + 1
for _, regex := range e.regexCompiled {
for i, regex := range e.regexCompiled {
if varErr := expressions.ContainsUnresolvedVariables(e.Regex[i]); varErr != nil {
regexStr, err := expressions.Evaluate(e.Regex[i], data)
if err != nil {
gologger.Warning().Msgf("Could not evaluate expression: %s, error: %s", e.Regex[i], err.Error())
continue
}
regex, err = regexp.Compile(regexStr)
if err != nil {
gologger.Warning().Msgf("Could not compile regex: %s, error: %s", regexStr, err.Error())
continue
}
}
// skip prefix short-circuit for case-insensitive patterns
rstr := regex.String()
if !strings.Contains(rstr, "(?i") {
@@ -138,7 +155,7 @@ func (e *Extractor) ExtractXML(corpus string) map[string]struct{} {
}
// ExtractJSON extracts text from a corpus using JQ queries and returns it
func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {
func (e *Extractor) ExtractJSON(corpus string, data map[string]interface{}) map[string]struct{} {
results := make(map[string]struct{})
var jsonObj interface{}
@@ -147,7 +164,25 @@ func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {
return results
}
for _, k := range e.jsonCompiled {
for i, k := range e.jsonCompiled {
if varErr := expressions.ContainsUnresolvedVariables(e.JSON[i]); varErr != nil {
jsonStr, err := expressions.Evaluate(e.JSON[i], data)
if err != nil {
gologger.Warning().Msgf("Could not evaluate expression: %s, error: %s", e.JSON[i], err.Error())
continue
}
query, err := gojq.Parse(jsonStr)
if err != nil {
gologger.Warning().Msgf("Could not parse json: %s, error: %s", jsonStr, err.Error())
continue
}
k, err = gojq.Compile(query)
if err != nil {
gologger.Warning().Msgf("Could not compile json: %s, error: %s", jsonStr, err.Error())
continue
}
}
iter := k.Run(jsonObj)
for {
v, ok := iter.Next()

View File

@@ -11,10 +11,10 @@ func TestExtractor_ExtractRegex(t *testing.T) {
err := e.CompileExtractors()
require.Nil(t, err)
got := e.ExtractRegex("RegEx")
got := e.ExtractRegex("RegEx", map[string]interface{}{})
require.Equal(t, map[string]struct{}{"RegEx": {}}, got)
got = e.ExtractRegex("regex")
got = e.ExtractRegex("regex", map[string]interface{}{})
require.Equal(t, map[string]struct{}{}, got)
}
@@ -70,10 +70,10 @@ func TestExtractor_ExtractJSON(t *testing.T) {
err := e.CompileExtractors()
require.Nil(t, err)
got := e.ExtractJSON(`[{"id": 1}]`)
got := e.ExtractJSON(`[{"id": 1}]`, map[string]interface{}{})
require.Equal(t, map[string]struct{}{"1": {}}, got)
got = e.ExtractJSON(`{"id": 1}`)
got = e.ExtractJSON(`{"id": 1}`, map[string]interface{}{})
require.Equal(t, map[string]struct{}{}, got)
}

View File

@@ -57,7 +57,7 @@ func (request *Request) Extract(data map[string]interface{}, extractor *extracto
switch extractor.GetType() {
case extractors.RegexExtractor:
return extractor.ExtractRegex(types.ToString(item))
return extractor.ExtractRegex(types.ToString(item), data)
case extractors.KValExtractor:
return extractor.ExtractKval(data)
case extractors.DSLExtractor:

View File

@@ -45,11 +45,11 @@ func (request *Request) Extract(data map[string]interface{}, extractor *extracto
switch extractor.GetType() {
case extractors.RegexExtractor:
return extractor.ExtractRegex(itemStr)
return extractor.ExtractRegex(itemStr, data)
case extractors.KValExtractor:
return extractor.ExtractKval(data)
case extractors.JSONExtractor:
return extractor.ExtractJSON(itemStr)
return extractor.ExtractJSON(itemStr, data)
case extractors.XPathExtractor:
return extractor.ExtractXPath(itemStr)
case extractors.DSLExtractor:

View File

@@ -71,7 +71,7 @@ func (request *Request) Extract(data map[string]interface{}, extractor *extracto
switch extractor.GetType() {
case extractors.RegexExtractor:
return extractor.ExtractRegex(itemStr)
return extractor.ExtractRegex(itemStr, data)
case extractors.KValExtractor:
return extractor.ExtractKval(data)
case extractors.DSLExtractor:
@@ -79,7 +79,7 @@ func (request *Request) Extract(data map[string]interface{}, extractor *extracto
case extractors.XPathExtractor:
return extractor.ExtractXPath(itemStr)
case extractors.JSONExtractor:
return extractor.ExtractJSON(itemStr)
return extractor.ExtractJSON(itemStr, data)
}
return nil
}

View File

@@ -68,13 +68,13 @@ func (request *Request) Extract(data map[string]interface{}, extractor *extracto
}
switch extractor.GetType() {
case extractors.RegexExtractor:
return extractor.ExtractRegex(item)
return extractor.ExtractRegex(item, data)
case extractors.KValExtractor:
return extractor.ExtractKval(data)
case extractors.XPathExtractor:
return extractor.ExtractXPath(item)
case extractors.JSONExtractor:
return extractor.ExtractJSON(item)
return extractor.ExtractJSON(item, data)
case extractors.DSLExtractor:
return extractor.ExtractDSL(data)
}

View File

@@ -46,7 +46,7 @@ func (request *Request) Extract(data map[string]interface{}, extractor *extracto
switch extractor.GetType() {
case extractors.RegexExtractor:
return extractor.ExtractRegex(itemStr)
return extractor.ExtractRegex(itemStr, data)
case extractors.KValExtractor:
return extractor.ExtractKval(data)
case extractors.DSLExtractor:

View File

@@ -67,7 +67,7 @@ func (request *Request) Extract(data map[string]interface{}, extractor *extracto
}
switch extractor.GetType() {
case extractors.RegexExtractor:
return extractor.ExtractRegex(item)
return extractor.ExtractRegex(item, data)
case extractors.KValExtractor:
return extractor.ExtractKval(data)
case extractors.DSLExtractor:

View File

@@ -388,11 +388,11 @@ func MakeDefaultExtractFunc(data map[string]interface{}, extractor *extractors.E
switch extractor.GetType() {
case extractors.RegexExtractor:
return extractor.ExtractRegex(itemStr)
return extractor.ExtractRegex(itemStr, data)
case extractors.KValExtractor:
return extractor.ExtractKval(data)
case extractors.JSONExtractor:
return extractor.ExtractJSON(itemStr)
return extractor.ExtractJSON(itemStr, data)
case extractors.XPathExtractor:
return extractor.ExtractXPath(itemStr)
case extractors.DSLExtractor: