diff --git a/pkg/protocols/headless/operators.go b/pkg/protocols/headless/operators.go
index 3ad30e9c5..0f97f438b 100644
--- a/pkg/protocols/headless/operators.go
+++ b/pkg/protocols/headless/operators.go
@@ -76,6 +76,10 @@ func (request *Request) Extract(data map[string]interface{}, extractor *extracto
return extractor.ExtractKval(data)
case extractors.DSLExtractor:
return extractor.ExtractDSL(data)
+ case extractors.XPathExtractor:
+ return extractor.ExtractXPath(itemStr)
+ case extractors.JSONExtractor:
+ return extractor.ExtractJSON(itemStr)
}
return nil
}
diff --git a/pkg/protocols/headless/operators_test.go b/pkg/protocols/headless/operators_test.go
index dede326b8..533a7418a 100644
--- a/pkg/protocols/headless/operators_test.go
+++ b/pkg/protocols/headless/operators_test.go
@@ -1 +1,566 @@
package headless
+
+import (
+ "testing"
+
+ "github.com/projectdiscovery/nuclei/v3/pkg/operators/extractors"
+ "github.com/projectdiscovery/nuclei/v3/pkg/operators/matchers"
+ "github.com/stretchr/testify/require"
+)
+
+func TestRequest_ExtractXPath(t *testing.T) {
+ request := &Request{}
+
+ // Test HTML content extraction
+ htmlContent := `
+
+
+ Test Page
+
+
+
+
Welcome
+
This is a test page
+
Click here
+
+ - Item 1
+ - Item 2
+ - Item 3
+
+
+
+`
+
+ data := map[string]interface{}{
+ "data": htmlContent,
+ }
+
+ // Test extracting text content
+ extractor := &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.XPathExtractor},
+ XPath: []string{"/html/body/div/h1"},
+ }
+ err := extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result := request.Extract(data, extractor)
+ expected := map[string]struct{}{"Welcome": {}}
+ require.Equal(t, expected, result)
+
+ // Test extracting attribute value
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.XPathExtractor},
+ XPath: []string{"/html/body/div/a"},
+ Attribute: "href",
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{"https://example.com": {}}
+ require.Equal(t, expected, result)
+
+ // Test extracting multiple items
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.XPathExtractor},
+ XPath: []string{"/html/body/div/ul/li"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{
+ "Item 1": {},
+ "Item 2": {},
+ "Item 3": {},
+ }
+ require.Equal(t, expected, result)
+
+ // Test with non-existent XPath
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.XPathExtractor},
+ XPath: []string{"/html/body/div/nonexistent"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ require.Equal(t, map[string]struct{}{}, result)
+}
+
+func TestRequest_ExtractJSON(t *testing.T) {
+ request := &Request{}
+
+ // Test JSON content extraction
+ jsonContent := `{
+ "users": [
+ {"id": 1, "name": "John", "email": "john@example.com"},
+ {"id": 2, "name": "Jane", "email": "jane@example.com"},
+ {"id": 3, "name": "Bob", "email": "bob@example.com"}
+ ],
+ "metadata": {
+ "total": 3,
+ "page": 1
+ }
+ }`
+
+ data := map[string]interface{}{
+ "data": jsonContent,
+ }
+
+ // Test extracting user IDs
+ extractor := &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.JSONExtractor},
+ JSON: []string{".users[].id"},
+ }
+ err := extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result := request.Extract(data, extractor)
+ expected := map[string]struct{}{
+ "1": {},
+ "2": {},
+ "3": {},
+ }
+ require.Equal(t, expected, result)
+
+ // Test extracting user names
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.JSONExtractor},
+ JSON: []string{".users[].name"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{
+ "John": {},
+ "Jane": {},
+ "Bob": {},
+ }
+ require.Equal(t, expected, result)
+
+ // Test extracting nested values
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.JSONExtractor},
+ JSON: []string{".metadata.total"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{"3": {}}
+ require.Equal(t, expected, result)
+
+ // Test extracting emails
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.JSONExtractor},
+ JSON: []string{".users[].email"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{
+ "john@example.com": {},
+ "jane@example.com": {},
+ "bob@example.com": {},
+ }
+ require.Equal(t, expected, result)
+
+ // Test with invalid JSON
+ invalidJSON := `{"invalid": json}`
+ data = map[string]interface{}{
+ "data": invalidJSON,
+ }
+
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.JSONExtractor},
+ JSON: []string{".invalid"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ require.Equal(t, map[string]struct{}{}, result)
+
+ // Test with non-existent path
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.JSONExtractor},
+ JSON: []string{".nonexistent"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ require.Equal(t, map[string]struct{}{}, result)
+}
+
+func TestRequest_MatchXPath(t *testing.T) {
+ request := &Request{}
+
+ htmlContent := `
+
+
+ Test Page
+
+
+
+
+`
+
+ data := map[string]interface{}{
+ "data": htmlContent,
+ }
+
+ // Test XPath matcher with existing element
+ matcher := &matchers.Matcher{
+ Type: matchers.MatcherTypeHolder{MatcherType: matchers.XPathMatcher},
+ XPath: []string{"/html/body/div/h1"},
+ Condition: "and",
+ }
+ err := matcher.CompileMatchers()
+ require.Nil(t, err)
+
+ matched, snippets := request.Match(data, matcher)
+ require.True(t, matched)
+ require.Empty(t, snippets)
+
+ // Test XPath matcher with non-existent element
+ matcher = &matchers.Matcher{
+ Type: matchers.MatcherTypeHolder{MatcherType: matchers.XPathMatcher},
+ XPath: []string{"/html/body/div/nonexistent"},
+ Condition: "and",
+ }
+ err = matcher.CompileMatchers()
+ require.Nil(t, err)
+
+ matched, snippets = request.Match(data, matcher)
+ require.False(t, matched)
+ require.Empty(t, snippets)
+}
+
+func TestRequest_getMatchPart(t *testing.T) {
+ request := &Request{}
+
+ data := map[string]interface{}{
+ "data": "body content",
+ "header": "header content",
+ "history": "history content",
+ }
+
+ // Test default part (should map to "data")
+ part, ok := request.getMatchPart("", data)
+ require.True(t, ok)
+ require.Equal(t, "body content", part)
+
+ // Test "body" part (should map to "data")
+ part, ok = request.getMatchPart("body", data)
+ require.True(t, ok)
+ require.Equal(t, "body content", part)
+
+ // Test "resp" part (should map to "data")
+ part, ok = request.getMatchPart("resp", data)
+ require.True(t, ok)
+ require.Equal(t, "body content", part)
+
+ // Test "header" part
+ part, ok = request.getMatchPart("header", data)
+ require.True(t, ok)
+ require.Equal(t, "header content", part)
+
+ // Test "history" part
+ part, ok = request.getMatchPart("history", data)
+ require.True(t, ok)
+ require.Equal(t, "history content", part)
+
+ // Test non-existent part
+ part, ok = request.getMatchPart("nonexistent", data)
+ require.False(t, ok)
+ require.Equal(t, "", part)
+}
+
+func TestRequest_ExtractWithDifferentParts(t *testing.T) {
+ request := &Request{}
+
+ // Test extracting from different parts
+ htmlContent := `Title
`
+ jsonContent := `{"id": 123}`
+
+ data := map[string]interface{}{
+ "data": htmlContent,
+ "header": jsonContent,
+ "history": htmlContent,
+ }
+
+ // Test XPath extractor from "data" part
+ extractor := &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.XPathExtractor},
+ XPath: []string{"/html/body/div/h1"},
+ Part: "data",
+ }
+ err := extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result := request.Extract(data, extractor)
+ expected := map[string]struct{}{"Title": {}}
+ require.Equal(t, expected, result)
+
+ // Test JSON extractor from "header" part
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.JSONExtractor},
+ JSON: []string{".id"},
+ Part: "header",
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{"123": {}}
+ require.Equal(t, expected, result)
+
+ // Test XPath extractor from "history" part
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.XPathExtractor},
+ XPath: []string{"/html/body/div/h1"},
+ Part: "history",
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{"Title": {}}
+ require.Equal(t, expected, result)
+}
+
+func TestRequest_ExtractWithComplexJSON(t *testing.T) {
+ request := &Request{}
+
+ // Test with complex nested JSON structure
+ jsonContent := `{
+ "api": {
+ "version": "1.0",
+ "endpoints": [
+ {
+ "path": "/users",
+ "method": "GET",
+ "responses": [
+ {"code": 200, "description": "Success"},
+ {"code": 404, "description": "Not Found"}
+ ]
+ },
+ {
+ "path": "/posts",
+ "method": "POST",
+ "responses": [
+ {"code": 201, "description": "Created"},
+ {"code": 400, "description": "Bad Request"}
+ ]
+ }
+ ]
+ }
+ }`
+
+ data := map[string]interface{}{
+ "data": jsonContent,
+ }
+
+ // Test extracting API version
+ extractor := &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.JSONExtractor},
+ JSON: []string{".api.version"},
+ }
+ err := extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result := request.Extract(data, extractor)
+ expected := map[string]struct{}{"1.0": {}}
+ require.Equal(t, expected, result)
+
+ // Test extracting all endpoint paths
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.JSONExtractor},
+ JSON: []string{".api.endpoints[].path"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{
+ "/users": {},
+ "/posts": {},
+ }
+ require.Equal(t, expected, result)
+
+ // Test extracting all response codes
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.JSONExtractor},
+ JSON: []string{".api.endpoints[].responses[].code"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{
+ "200": {},
+ "404": {},
+ "201": {},
+ "400": {},
+ }
+ require.Equal(t, expected, result)
+
+ // Test extracting response descriptions
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.JSONExtractor},
+ JSON: []string{".api.endpoints[].responses[].description"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{
+ "Success": {},
+ "Not Found": {},
+ "Created": {},
+ "Bad Request": {},
+ }
+ require.Equal(t, expected, result)
+}
+
+func TestRequest_ExtractWithComplexHTML(t *testing.T) {
+ request := &Request{}
+
+ // Test with complex HTML structure
+ htmlContent := `
+
+
+ E-commerce Site
+
+
+
+
+
+
+
+
+
+`
+
+ data := map[string]interface{}{
+ "data": htmlContent,
+ }
+
+ // Test extracting navigation links
+ extractor := &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.XPathExtractor},
+ XPath: []string{"/html/body/header/nav/ul/li/a"},
+ }
+ err := extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result := request.Extract(data, extractor)
+ expected := map[string]struct{}{
+ "Home": {},
+ "Products": {},
+ "About": {},
+ }
+ require.Equal(t, expected, result)
+
+ // Test extracting product names
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.XPathExtractor},
+ XPath: []string{"/html/body/main/section/div/div/h3"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{
+ "Laptop": {},
+ "Phone": {},
+ "Tablet": {},
+ }
+ require.Equal(t, expected, result)
+
+ // Test extracting product prices
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.XPathExtractor},
+ XPath: []string{"/html/body/main/section/div/div/p[@class='price']"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{
+ "$999": {},
+ "$599": {},
+ "$399": {},
+ }
+ require.Equal(t, expected, result)
+
+ // Test extracting product ratings
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.XPathExtractor},
+ XPath: []string{"/html/body/main/section/div/div/span[@class='rating']"},
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{
+ "4.5": {},
+ "4.2": {},
+ "4.0": {},
+ }
+ require.Equal(t, expected, result)
+
+ // Test extracting data attributes
+ extractor = &extractors.Extractor{
+ Type: extractors.ExtractorTypeHolder{ExtractorType: extractors.XPathExtractor},
+ XPath: []string{"/html/body/main/section/div/div[@class='product']"},
+ Attribute: "data-id",
+ }
+ err = extractor.CompileExtractors()
+ require.Nil(t, err)
+
+ result = request.Extract(data, extractor)
+ expected = map[string]struct{}{
+ "1": {},
+ "2": {},
+ "3": {},
+ }
+ require.Equal(t, expected, result)
+}