fix: use --file-patterns flag for all post analyzers (#7365)

This commit is contained in:
DmitriyLewen
2025-03-17 16:12:10 +06:00
committed by GitHub
parent e8c32dedaa
commit 8b88238f07
19 changed files with 100 additions and 43 deletions

View File

@@ -131,7 +131,7 @@ type AnalyzerGroup struct {
logger *log.Logger
analyzers []analyzer
postAnalyzers []PostAnalyzer
filePatterns map[Type][]*regexp.Regexp
filePatterns map[Type]FilePatterns
detectionPriority types.DetectionPriority
}
@@ -149,8 +149,20 @@ type AnalysisInput struct {
}
type PostAnalysisInput struct {
FS fs.FS
Options AnalysisOptions
FS fs.FS
FilePatterns FilePatterns
Options AnalysisOptions
}
type FilePatterns []*regexp.Regexp
func (f FilePatterns) Match(filePath string) bool {
for _, pattern := range f {
if pattern.MatchString(filePath) {
return true
}
}
return false
}
type AnalysisOptions struct {
@@ -333,7 +345,7 @@ func NewAnalyzerGroup(opts AnalyzerOptions) (AnalyzerGroup, error) {
group := AnalyzerGroup{
logger: log.WithPrefix("analyzer"),
filePatterns: make(map[Type][]*regexp.Regexp),
filePatterns: make(map[Type]FilePatterns),
detectionPriority: opts.DetectionPriority,
}
for _, p := range opts.FilePatterns {
@@ -349,10 +361,6 @@ func NewAnalyzerGroup(opts AnalyzerOptions) (AnalyzerGroup, error) {
return group, xerrors.Errorf("invalid file regexp (%s): %w", p, err)
}
if _, ok := group.filePatterns[Type(fileType)]; !ok {
group.filePatterns[Type(fileType)] = []*regexp.Regexp{}
}
group.filePatterns[Type(fileType)] = append(group.filePatterns[Type(fileType)], r)
}
@@ -422,7 +430,7 @@ func (ag AnalyzerGroup) AnalyzeFile(ctx context.Context, wg *sync.WaitGroup, lim
continue
}
if !ag.filePatternMatch(a.Type(), cleanPath) && !a.Required(cleanPath, info) {
if !ag.filePatterns[a.Type()].Match(cleanPath) && !a.Required(cleanPath, info) {
continue
}
rc, err := opener()
@@ -468,7 +476,7 @@ func (ag AnalyzerGroup) RequiredPostAnalyzers(filePath string, info os.FileInfo)
}
var postAnalyzerTypes []Type
for _, a := range ag.postAnalyzers {
if ag.filePatternMatch(a.Type(), filePath) || a.Required(filePath, info) {
if ag.filePatterns[a.Type()].Match(filePath) || a.Required(filePath, info) {
postAnalyzerTypes = append(postAnalyzerTypes, a.Type())
}
}
@@ -479,7 +487,8 @@ func (ag AnalyzerGroup) RequiredPostAnalyzers(filePath string, info os.FileInfo)
// and passes it to the respective post-analyzer.
// The obtained results are merged into the "result".
// This function may be called concurrently and must be thread-safe.
func (ag AnalyzerGroup) PostAnalyze(ctx context.Context, compositeFS *CompositeFS, result *AnalysisResult, opts AnalysisOptions) error {
func (ag AnalyzerGroup) PostAnalyze(ctx context.Context, compositeFS *CompositeFS, result *AnalysisResult,
opts AnalysisOptions) error {
for _, a := range ag.postAnalyzers {
fsys, ok := compositeFS.Get(a.Type())
if !ok {
@@ -510,8 +519,9 @@ func (ag AnalyzerGroup) PostAnalyze(ctx context.Context, compositeFS *CompositeF
}
res, err := a.PostAnalyze(ctx, PostAnalysisInput{
FS: filteredFS,
Options: opts,
FS: filteredFS,
FilePatterns: ag.filePatterns[a.Type()],
Options: opts,
})
if err != nil {
return xerrors.Errorf("post analysis error: %w", err)
@@ -526,15 +536,6 @@ func (ag AnalyzerGroup) PostAnalyzerFS() (*CompositeFS, error) {
return NewCompositeFS()
}
func (ag AnalyzerGroup) filePatternMatch(analyzerType Type, filePath string) bool {
for _, pattern := range ag.filePatterns[analyzerType] {
if pattern.MatchString(filePath) {
return true
}
}
return false
}
// StaticPaths collects static paths from all enabled analyzers
// It returns the collected paths and a boolean indicating if all enabled analyzers implement StaticPathAnalyzer
func (ag AnalyzerGroup) StaticPaths(disabled []Type) ([]string, bool) {
@@ -546,6 +547,12 @@ func (ag AnalyzerGroup) StaticPaths(disabled []Type) ([]string, bool) {
continue
}
// We can't be sure that the file pattern uses a static path.
// So we don't need to use `StaticPath` logic if any enabled analyzer has a file pattern.
if _, ok := ag.filePatterns[a.Type()]; ok {
return nil, false
}
// If any analyzer doesn't implement StaticPathAnalyzer, return false
staticPathAnalyzer, ok := a.(StaticPathAnalyzer)
if !ok {

View File

@@ -561,6 +561,7 @@ func TestAnalyzerGroup_PostAnalyze(t *testing.T) {
name string
dir string
analyzerType analyzer.Type
filePatterns []string
want *analyzer.AnalysisResult
}{
{
@@ -584,11 +585,25 @@ func TestAnalyzerGroup_PostAnalyze(t *testing.T) {
},
},
{
name: "poetry files with invalid file",
dir: "testdata/post-apps/poetry/",
name: "poetry files with file from pattern and invalid file",
dir: "testdata/post-apps/poetry/",
filePatterns: []string{
"poetry:poetry-pattern.lock",
},
analyzerType: analyzer.TypePoetry,
want: &analyzer.AnalysisResult{
Applications: []types.Application{
{
Type: types.Poetry,
FilePath: "testdata/post-apps/poetry/happy/poetry-pattern.lock",
Packages: types.Packages{
{
ID: "certifi@2022.12.7",
Name: "certifi",
Version: "2022.12.7",
},
},
},
{
Type: types.Poetry,
FilePath: "testdata/post-apps/poetry/happy/poetry.lock",
@@ -606,7 +621,9 @@ func TestAnalyzerGroup_PostAnalyze(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
a, err := analyzer.NewAnalyzerGroup(analyzer.AnalyzerOptions{})
a, err := analyzer.NewAnalyzerGroup(analyzer.AnalyzerOptions{
FilePatterns: tt.filePatterns,
})
require.NoError(t, err)
// Create a virtual filesystem

View File

@@ -45,7 +45,7 @@ func newConanLockAnalyzer(_ analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, er
func (a conanLockAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysisInput) (*analyzer.AnalysisResult, error) {
required := func(filePath string, d fs.DirEntry) bool {
// we need all file got from `a.Required` function (conan.lock files) and from file-patterns.
// Parse all required files: `conan.lock` (from a.Required func) + input.FilePatterns.Match()
return true
}

View File

@@ -55,7 +55,8 @@ func (a pubSpecLockAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostA
}
required := func(path string, d fs.DirEntry) bool {
return filepath.Base(path) == types.PubSpecLock
// Parse all required files: `pubspec.lock` (from a.Required func) + input.FilePatterns.Match()
return true
}
err = fsutils.WalkDir(input.FS, ".", required, func(path string, _ fs.DirEntry, r io.Reader) error {

View File

@@ -59,9 +59,8 @@ func (a *nugetLibraryAnalyzer) PostAnalyze(_ context.Context, input analyzer.Pos
a.logger.Debug("The nuget packages directory couldn't be found. License search disabled")
}
// We saved only config and lock files in the FS,
// so we need to parse all saved files
required := func(path string, d fs.DirEntry) bool {
// Parse all required files: `packages.lock.json`, `packages.config` (from a.Required func) + input.FilePatterns.Match()
return true
}

View File

@@ -68,7 +68,7 @@ func (a *gomodAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalys
var apps []types.Application
required := func(path string, d fs.DirEntry) bool {
return filepath.Base(path) == types.GoMod
return filepath.Base(path) == types.GoMod || input.FilePatterns.Match(path)
}
err := fsutils.WalkDir(input.FS, ".", required, func(path string, d fs.DirEntry, _ io.Reader) error {

View File

@@ -49,7 +49,8 @@ func (a gradleLockAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAn
}
required := func(path string, d fs.DirEntry) bool {
return a.Required(path, nil)
// Parse all required files: `*gradle.lockfile` (from a.Required func) + input.FilePatterns.Match()
return true
}
var apps []types.Application

View File

@@ -54,7 +54,7 @@ func (a juliaAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysi
var apps []types.Application
required := func(path string, d fs.DirEntry) bool {
return filepath.Base(path) == types.JuliaManifest
return filepath.Base(path) == types.JuliaManifest || input.FilePatterns.Match(path)
}
err := fsutils.WalkDir(input.FS, ".", required, func(path string, d fs.DirEntry, r io.Reader) error {

View File

@@ -47,7 +47,7 @@ func newNpmLibraryAnalyzer(_ analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, e
func (a npmLibraryAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysisInput) (*analyzer.AnalysisResult, error) {
// Parse package-lock.json
required := func(path string, _ fs.DirEntry) bool {
return filepath.Base(path) == types.NpmPkgLock
return filepath.Base(path) == types.NpmPkgLock || input.FilePatterns.Match(path)
}
var apps []types.Application

View File

@@ -45,7 +45,7 @@ func (a pnpmAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysis
var apps []types.Application
required := func(path string, d fs.DirEntry) bool {
return filepath.Base(path) == types.PnpmLock
return filepath.Base(path) == types.PnpmLock || input.FilePatterns.Match(path)
}
err := fsutils.WalkDir(input.FS, ".", required, func(filePath string, d fs.DirEntry, r io.Reader) error {

View File

@@ -71,7 +71,7 @@ func (a yarnAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysis
var apps []types.Application
required := func(path string, d fs.DirEntry) bool {
return filepath.Base(path) == types.YarnLock
return filepath.Base(path) == types.YarnLock || input.FilePatterns.Match(path)
}
err := fsutils.WalkDir(input.FS, ".", required, func(filePath string, d fs.DirEntry, r io.Reader) error {

View File

@@ -47,7 +47,7 @@ func (a composerAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnal
var apps []types.Application
required := func(path string, d fs.DirEntry) bool {
return filepath.Base(path) == types.ComposerLock
return filepath.Base(path) == types.ComposerLock || input.FilePatterns.Match(path)
}
err := fsutils.WalkDir(input.FS, ".", required, func(path string, d fs.DirEntry, r io.Reader) error {

View File

@@ -63,7 +63,7 @@ func (a packagingAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAna
var apps []types.Application
required := func(path string, _ fs.DirEntry) bool {
return filepath.Base(path) == "METADATA" || isEggFile(path)
return filepath.Base(path) == "METADATA" || isEggFile(path) || input.FilePatterns.Match(path)
}
err := fsutils.WalkDir(input.FS, ".", required, func(filePath string, d fs.DirEntry, r io.Reader) error {

View File

@@ -59,8 +59,8 @@ func (a pipLibraryAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAn
a.logger.Warn("Unable to find python `site-packages` directory. License detection is skipped.", log.Err(err))
}
// We only saved the `requirements.txt` files
required := func(_ string, _ fs.DirEntry) bool {
// Parse all required files: `conan.lock` (from a.Required func) + input.FilePatterns.Match()
return true
}

View File

@@ -45,7 +45,7 @@ func (a poetryAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalys
var apps []types.Application
required := func(path string, d fs.DirEntry) bool {
return filepath.Base(path) == types.PoetryLock
return filepath.Base(path) == types.PoetryLock || input.FilePatterns.Match(path)
}
err := fsutils.WalkDir(input.FS, ".", required, func(path string, d fs.DirEntry, r io.Reader) error {

View File

@@ -37,8 +37,9 @@ func NewUvAnalyzer(_ analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, error) {
func (a *uvAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysisInput) (*analyzer.AnalysisResult, error) {
var apps []types.Application
required := func(path string, d fs.DirEntry) bool {
return filepath.Base(path) == types.UvLock
required := func(_ string, _ fs.DirEntry) bool {
// Parse all required files: `uv.lock` (from a.Required func) + input.FilePatterns.Match()
return true
}
err := fsutils.WalkDir(input.FS, ".", required, func(path string, d fs.DirEntry, r io.Reader) error {

View File

@@ -57,7 +57,7 @@ func (a cargoAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysi
var apps []types.Application
required := func(path string, d fs.DirEntry) bool {
return filepath.Base(path) == types.CargoLock
return filepath.Base(path) == types.CargoLock || input.FilePatterns.Match(path)
}
err := fsutils.WalkDir(input.FS, ".", required, func(filePath string, d fs.DirEntry, r io.Reader) error {

View File

@@ -0,0 +1,18 @@
# This file is automatically @generated by Poetry and should not be changed by hand.
[[package]]
name = "certifi"
version = "2022.12.7"
description = "Python package for providing Mozilla's CA Bundle."
category = "main"
optional = true
python-versions = ">=3.6"
files = [
{file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"},
{file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"},
]
[metadata]
lock-version = "2.0"
python-versions = "^3.9"
content-hash = "0ee6cb4bc2d84091d5dcb0a0110a65f244987ed427933b2f49949195e3ef69c7"

View File

@@ -98,6 +98,7 @@ func TestArtifact_Inspect(t *testing.T) {
analyzer.TypeAlpine,
analyzer.TypeApk,
analyzer.TypePip,
analyzer.TypeNpmPkgLock,
},
},
wantBlobs: []cachetest.WantBlob{
@@ -2468,6 +2469,7 @@ func TestAnalyzerGroup_StaticPaths(t *testing.T) {
tests := []struct {
name string
disabledAnalyzers []analyzer.Type
filePatterns []string
want []string
wantAllStatic bool
}{
@@ -2480,6 +2482,15 @@ func TestAnalyzerGroup_StaticPaths(t *testing.T) {
},
wantAllStatic: true,
},
{
name: "all analyzers implement StaticPathAnalyzer, but there is file pattern",
disabledAnalyzers: append(analyzer.TypeConfigFiles, analyzer.TypePip, analyzer.TypeSecret),
filePatterns: []string{
"alpine:etc/alpine-release-custom",
},
want: []string{},
wantAllStatic: false,
},
{
name: "some analyzers don't implement StaticPathAnalyzer",
want: []string{},
@@ -2505,7 +2516,9 @@ func TestAnalyzerGroup_StaticPaths(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a new analyzer group
a, err := analyzer.NewAnalyzerGroup(analyzer.AnalyzerOptions{})
a, err := analyzer.NewAnalyzerGroup(analyzer.AnalyzerOptions{
FilePatterns: tt.filePatterns,
})
require.NoError(t, err)
// Get static paths