fix(secret): improve word boundary detection for Hugging Face tokens (#10046)

This commit is contained in:
DmitriyLewen
2026-01-14 13:32:29 +06:00
committed by GitHub
parent 3c0ab97e10
commit cdb28eeeb1
4 changed files with 18 additions and 3 deletions

View File

@@ -77,7 +77,8 @@ const (
quote = `["']?`
connect = `\s*(:|=>|=)?\s*`
endSecret = `[.,]?(\s+|$)`
startWord = "([^0-9a-zA-Z]|^)"
startWord = "([^0-9a-zA-Z_]|^)"
endWord = "([^0-9a-zA-Z_]|$)"
aws = `aws_?`
)
@@ -175,7 +176,7 @@ var builtinRules = []Rule{
Category: CategoryHuggingFace,
Severity: "CRITICAL",
Title: "Hugging Face Access Token",
Regex: MustCompileWithoutWordPrefix(`?P<secret>hf_[A-Za-z0-9]{34,40}`),
Regex: MustCompileWithBoundaries(`?P<secret>hf_[A-Za-z0-9]{34,40}`),
SecretGroupName: "secret",
Keywords: []string{"hf_"},
},

View File

@@ -90,6 +90,10 @@ func MustCompileWithoutWordPrefix(str string) *Regexp {
return MustCompile(fmt.Sprintf("%s(%s)", startWord, str))
}
func MustCompileWithBoundaries(str string) *Regexp {
return MustCompile(fmt.Sprintf("%s(%s)%s", startWord, str, endWord))
}
func MustCompile(str string) *Regexp {
return &Regexp{regexp.MustCompile(str)}
}

View File

@@ -902,6 +902,14 @@ func TestSecretScanner(t *testing.T) {
FirstCause: true,
LastCause: true,
},
{
Number: 2,
Content: "",
Highlighted: "",
IsCause: false,
FirstCause: false,
LastCause: false,
},
},
},
Offset: 18,

View File

@@ -1 +1,3 @@
HF_example_token: hf_Testpoiqazwsxedcrfvtgbyhn12345ujmik6789
HF_example_token: hf_Testpoiqazwsxedcrfvtgbyhn12345ujmik6789
_call_hf_processorz1DeepseekOCRMultiModalProcessor._call_hf_processor