multiple bug fixes + performance improvements (#5148)

* prototype errkit

* complete errkit implementation

* add cause to all timeouts

* fix request timeout annotation @timeout

* increase responseHeaderTimeout to 8 for stability

* rawhttp error related improvements

* feat: add port status caching

* add port status caching to http

* migrate to new utils/errkit

* remote dialinterface + error cause

* debug dir support using .gitignore debug-*

* make nuclei easy to debug

* debug dir update .gitignore

* temp change (to revert)

* Revert "temp change (to revert)"

This reverts commit d3131f7777.

* use available context instead of new one

* bump fastdialer

* fix hosterrorscache + misc improvements

* add 'address' field in error log

* fix js vague errors + pgwrap driver

* fix max host error + misc updates

* update tests as per changes

* fix request annotation context

* remove closed dialer reference

* fix sdk panic issue

* bump retryablehttp-go,utils,fastdialer

---------

Co-authored-by: Sandeep Singh <sandeep@projectdiscovery.io>
This commit is contained in:
Tarun Koyalwar
2024-05-25 00:29:04 +05:30
committed by GitHub
parent 46e4810efb
commit 23bd0336fb
38 changed files with 768 additions and 205 deletions

View File

@@ -10,17 +10,25 @@ import (
"github.com/bluele/gcache"
"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/nuclei/v3/pkg/catalog/config"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
"github.com/projectdiscovery/nuclei/v3/pkg/types/nucleierr"
"github.com/projectdiscovery/utils/errkit"
)
// CacheInterface defines the signature of the hosterrorscache so that
// users of Nuclei as embedded lib may implement their own cache
type CacheInterface interface {
SetVerbose(verbose bool) // log verbosely
Close() // close the cache
Check(value string) bool // return true if the host should be skipped
MarkFailed(value string, err error) // record a failure (and cause) for the host
SetVerbose(verbose bool) // log verbosely
Close() // close the cache
Check(ctx *contextargs.Context) bool // return true if the host should be skipped
MarkFailed(ctx *contextargs.Context, err error) // record a failure (and cause) for the host
}
var (
_ CacheInterface = (*Cache)(nil)
)
// Cache is a cache for host based errors. It allows skipping
// certain hosts based on an error threshold.
//
@@ -34,8 +42,10 @@ type Cache struct {
}
type cacheItem struct {
errors atomic.Int32
sync.Once
errors atomic.Int32
isPermanentErr bool
cause error // optional cause
}
const DefaultMaxHostsCount = 10000
@@ -55,6 +65,16 @@ func (c *Cache) SetVerbose(verbose bool) {
// Close closes the host errors cache
func (c *Cache) Close() {
if config.DefaultConfig.IsDebugArgEnabled(config.DebugArgHostErrorStats) {
items := c.failedTargets.GetALL(false)
for k, v := range items {
val, ok := v.(*cacheItem)
if !ok {
continue
}
gologger.Info().Label("MaxHostErrorStats").Msgf("Host: %s, Errors: %d", k, val.errors.Load())
}
}
c.failedTargets.Purge()
}
@@ -88,14 +108,19 @@ func (c *Cache) normalizeCacheValue(value string) string {
// - URL: https?:// type
// - Host:port type
// - host type
func (c *Cache) Check(value string) bool {
finalValue := c.normalizeCacheValue(value)
func (c *Cache) Check(ctx *contextargs.Context) bool {
finalValue := c.GetKeyFromContext(ctx, nil)
existingCacheItem, err := c.failedTargets.GetIFPresent(finalValue)
if err != nil {
return false
}
existingCacheItemValue := existingCacheItem.(*cacheItem)
if existingCacheItemValue.isPermanentErr {
// skipping permanent errors is expected so verbose instead of info
gologger.Verbose().Msgf("Skipped %s from target list as found unresponsive permanently: %s", finalValue, existingCacheItemValue.cause)
return true
}
if existingCacheItemValue.errors.Load() >= int32(c.MaxHostError) {
existingCacheItemValue.Do(func() {
@@ -107,15 +132,22 @@ func (c *Cache) Check(value string) bool {
}
// MarkFailed marks a host as failed previously
func (c *Cache) MarkFailed(value string, err error) {
func (c *Cache) MarkFailed(ctx *contextargs.Context, err error) {
if !c.checkError(err) {
return
}
finalValue := c.normalizeCacheValue(value)
finalValue := c.GetKeyFromContext(ctx, err)
existingCacheItem, err := c.failedTargets.GetIFPresent(finalValue)
if err != nil || existingCacheItem == nil {
newItem := &cacheItem{errors: atomic.Int32{}}
newItem.errors.Store(1)
if errkit.IsKind(err, errkit.ErrKindNetworkPermanent) {
// skip this address altogether
// permanent errors are always permanent hence this is created once
// and never updated so no need to synchronize
newItem.isPermanentErr = true
newItem.cause = err
}
_ = c.failedTargets.Set(finalValue, newItem)
return
}
@@ -124,19 +156,77 @@ func (c *Cache) MarkFailed(value string, err error) {
_ = c.failedTargets.Set(finalValue, existingCacheItemValue)
}
var reCheckError = regexp.MustCompile(`(no address found for host|Client\.Timeout exceeded while awaiting headers|could not resolve host|connection refused|connection reset by peer|i/o timeout|could not connect to any address found for host|timeout awaiting response headers)`)
// GetKeyFromContext returns the key for the cache from the context
func (c *Cache) GetKeyFromContext(ctx *contextargs.Context, err error) string {
// Note:
// ideally any changes made to remote addr in template like {{Hostname}}:81 etc
// should be reflected in contextargs but it is not yet reflected in some cases
// and needs refactor of ScanContext + ContextArgs to achieve that
// i.e why we use real address from error if present
address := ctx.MetaInput.Address()
// get address override from error
if err != nil {
tmp := errkit.GetAttrValue(err, "address")
if tmp.Any() != nil {
address = tmp.String()
}
}
finalValue := c.normalizeCacheValue(address)
return finalValue
}
var reCheckError = regexp.MustCompile(`(no address found for host|could not resolve host|connection refused|connection reset by peer|could not connect to any address found for host|timeout awaiting response headers)`)
// checkError checks if an error represents a type that should be
// added to the host skipping table.
// it first parses error and extracts the cause and checks for blacklisted
// or common errors that should be skipped
func (c *Cache) checkError(err error) bool {
if err == nil {
return false
}
errString := err.Error()
for _, msg := range c.TrackError {
if strings.Contains(errString, msg) {
return true
kind := errkit.GetErrorKind(err, nucleierr.ErrTemplateLogic)
switch kind {
case nucleierr.ErrTemplateLogic:
// these are errors that are not related to the target
// and are due to template logic
return false
case errkit.ErrKindNetworkTemporary:
// these should not be counted as host errors
return false
case errkit.ErrKindNetworkPermanent:
// these should be counted as host errors
return true
case errkit.ErrKindDeadline:
// these should not be counted as host errors
return false
default:
// parse error for furthur processing
errX := errkit.FromError(err)
tmp := errX.Cause()
cause := tmp.Error()
if strings.Contains(cause, "ReadStatusLine:") && strings.Contains(cause, "read: connection reset by peer") {
// this is a FP and should not be counted as a host error
// because server closes connection when it reads corrupted bytes which we send via rawhttp
return false
}
if strings.HasPrefix(cause, "ReadStatusLine:") {
// error is present in last part when using rawhttp
// this will be fixed once errkit is used everywhere
lastIndex := strings.LastIndex(cause, ":")
if lastIndex == -1 {
lastIndex = 0
}
if lastIndex >= len(cause)-1 {
lastIndex = 0
}
cause = cause[lastIndex+1:]
}
for _, msg := range c.TrackError {
if strings.Contains(cause, msg) {
return true
}
}
return reCheckError.MatchString(cause)
}
return reCheckError.MatchString(errString)
}

View File

@@ -1,11 +1,13 @@
package hosterrorscache
import (
"context"
"fmt"
"sync"
"sync/atomic"
"testing"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
"github.com/stretchr/testify/require"
)
@@ -13,8 +15,8 @@ func TestCacheCheck(t *testing.T) {
cache := New(3, DefaultMaxHostsCount, nil)
for i := 0; i < 100; i++ {
cache.MarkFailed("test", fmt.Errorf("could not resolve host"))
got := cache.Check("test")
cache.MarkFailed(newCtxArgs("test"), fmt.Errorf("could not resolve host"))
got := cache.Check(newCtxArgs("test"))
if i < 2 {
// till 3 the host is not flagged to skip
require.False(t, got)
@@ -24,7 +26,7 @@ func TestCacheCheck(t *testing.T) {
}
}
value := cache.Check("test")
value := cache.Check(newCtxArgs("test"))
require.Equal(t, true, value, "could not get checked value")
}
@@ -32,8 +34,8 @@ func TestTrackErrors(t *testing.T) {
cache := New(3, DefaultMaxHostsCount, []string{"custom error"})
for i := 0; i < 100; i++ {
cache.MarkFailed("custom", fmt.Errorf("got: nested: custom error"))
got := cache.Check("custom")
cache.MarkFailed(newCtxArgs("custom"), fmt.Errorf("got: nested: custom error"))
got := cache.Check(newCtxArgs("custom"))
if i < 2 {
// till 3 the host is not flagged to skip
require.False(t, got)
@@ -42,7 +44,7 @@ func TestTrackErrors(t *testing.T) {
require.True(t, got)
}
}
value := cache.Check("custom")
value := cache.Check(newCtxArgs("custom"))
require.Equal(t, true, value, "could not get checked value")
}
@@ -73,16 +75,18 @@ func TestCacheMarkFailed(t *testing.T) {
tests := []struct {
host string
expected int
expected int32
}{
{"http://example.com:80", 1},
{"example.com:80", 2},
{"example.com", 1},
// earlier if port is not provided then port was omitted
// but from now it will default to appropriate http scheme based port with 80 as default
{"example.com:443", 1},
}
for _, test := range tests {
normalizedCacheValue := cache.normalizeCacheValue(test.host)
cache.MarkFailed(test.host, fmt.Errorf("no address found for host"))
normalizedCacheValue := cache.GetKeyFromContext(newCtxArgs(test.host), nil)
cache.MarkFailed(newCtxArgs(test.host), fmt.Errorf("no address found for host"))
failedTarget, err := cache.failedTargets.Get(normalizedCacheValue)
require.Nil(t, err)
require.NotNil(t, failedTarget)
@@ -102,7 +106,7 @@ func TestCacheMarkFailedConcurrent(t *testing.T) {
}{
{"http://example.com:80", 200},
{"example.com:80", 200},
{"example.com", 100},
{"example.com:443", 100},
}
// the cache is not atomic during items creation, so we pre-create them with counter to zero
@@ -120,14 +124,14 @@ func TestCacheMarkFailedConcurrent(t *testing.T) {
wg.Add(1)
go func() {
defer wg.Done()
cache.MarkFailed(currentTest.host, fmt.Errorf("could not resolve host"))
cache.MarkFailed(newCtxArgs(currentTest.host), fmt.Errorf("could not resolve host"))
}()
}
}
wg.Wait()
for _, test := range tests {
require.True(t, cache.Check(test.host))
require.True(t, cache.Check(newCtxArgs(test.host)))
normalizedCacheValue := cache.normalizeCacheValue(test.host)
failedTarget, err := cache.failedTargets.Get(normalizedCacheValue)
@@ -139,3 +143,8 @@ func TestCacheMarkFailedConcurrent(t *testing.T) {
require.EqualValues(t, test.expected, value.errors.Load())
}
}
func newCtxArgs(value string) *contextargs.Context {
ctx := contextargs.NewWithInput(context.TODO(), value)
return ctx
}