diff --git a/pkg/fanal/analyzer/analyzer_test.go b/pkg/fanal/analyzer/analyzer_test.go index 9c9fe04274..0183108521 100644 --- a/pkg/fanal/analyzer/analyzer_test.go +++ b/pkg/fanal/analyzer/analyzer_test.go @@ -676,7 +676,7 @@ func TestAnalyzerGroup_AnalyzerVersions(t *testing.T) { "ubuntu-esm": 1, }, PostAnalyzers: map[string]int{ - "dpkg": 5, + "dpkg": 6, "jar": 1, "poetry": 1, }, diff --git a/pkg/fanal/analyzer/pkg/dpkg/dpkg.go b/pkg/fanal/analyzer/pkg/dpkg/dpkg.go index 7b83be5717..e9ed040fef 100644 --- a/pkg/fanal/analyzer/pkg/dpkg/dpkg.go +++ b/pkg/fanal/analyzer/pkg/dpkg/dpkg.go @@ -41,7 +41,7 @@ func newDpkgAnalyzer(_ analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, error) } const ( - analyzerVersion = 5 + analyzerVersion = 6 statusFile = "var/lib/dpkg/status" statusDir = "var/lib/dpkg/status.d/" @@ -54,6 +54,56 @@ const ( var ( dpkgSrcCaptureRegexp = regexp.MustCompile(`(?P[^\s]*)( \((?P.*)\))?`) dpkgSrcCaptureRegexpNames = dpkgSrcCaptureRegexp.SubexpNames() + + // thirdPartyMaintainerPatterns contains patterns that indicate a package is from a third-party repository. + // Packages with maintainers matching these patterns will NOT have their InstalledFiles tracked, + // allowing language scanners to properly analyze files installed by those packages. + // See https://github.com/aquasecurity/trivy/issues/9916 for more details. + thirdPartyMaintainerPatterns = []string{ + // Container & orchestration + "support@docker.com", // Docker + + // Cloud providers & infrastructure + "@nvidia.com", // NVIDIA CUDA + "Google Cloud CLI Authors", // Google Cloud SDK + "sapmachine@sap.com", // SAP Machine JDK + "@hashicorp.com", // HashiCorp (Terraform, Vault, Consul, etc.) + "@microsoft.com", // Microsoft (VS Code, Azure CLI, .NET, etc.) + + // Databases + "@mongodb.com", // MongoDB + "developers@lists.mariadb.org", // MariaDB + "dev@couchdb.apache.org", // Apache CouchDB + "info@elastic.co", // Elastic (Elasticsearch, Kibana, etc.) + + // Web servers & API gateways + "nginx-packaging@f5.com", // NGINX (from nginx.org, not Debian) + "@konghq.com", // Kong + "@cloudflare.com", // Cloudflare (cloudflared, WARP) + + // Monitoring & observability + "support@influxdb.com", // InfluxData (InfluxDB, Telegraf) + "support@gitlab.com", // GitLab + "contact@grafana.com", // Grafana Labs + "@datadoghq.com", // Datadog + + // Language runtimes (third-party repos) + "@nodesource.com", // NodeSource (Node.js) + + // Networking & VPN + "info@tailscale.com", // Tailscale + + // Robotics + "@openrobotics.org", // ROS (Robot Operating System) + "@osrfoundation.org", // ROS (Robot Operating System) + } + + // thirdPartyMaintainerExact contains maintainer strings that require exact match. + // These are too short or generic for substring matching. + thirdPartyMaintainerExact = []string{ + "GitHub", // GitHub CLI + "HashiCorp", // HashiCorp (Terraform, Vault, Consul, etc.) + } ) func (a dpkgAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysisInput) (*analyzer.AnalysisResult, error) { @@ -82,7 +132,7 @@ func (a dpkgAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysis return xerrors.Errorf("failed to parse %s file: %w", path, err) } packageFiles[strings.TrimSuffix(filepath.Base(path), md5sumsExtension)] = systemFiles - systemInstalledFiles = append(systemInstalledFiles, systemFiles...) + // Note: systemInstalledFiles will be populated later based on maintainer check return nil } // parse status files @@ -97,14 +147,26 @@ func (a dpkgAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysis return nil, xerrors.Errorf("dpkg walk error: %w", err) } - // map the packages to their respective files + // Map packages to their respective files. + // Third-party packages will NOT have their InstalledFiles populated to avoid filtering out + // language packages (npm, pip, etc.) installed by those third-party OS packages. for i, pkgInfo := range packageInfos { for j, pkg := range pkgInfo.Packages { installedFiles, found := packageFiles[pkg.Name] if !found { installedFiles = packageFiles[pkg.Name+":"+pkg.Arch] } + + // Skip InstalledFiles for third-party packages + if isThirdPartyPackage(pkg.Maintainer) { + a.logger.Debug("Third-party package detected", + log.String("package", pkg.Name), + log.String("maintainer", pkg.Maintainer)) + continue + } + packageInfos[i].Packages[j].InstalledFiles = installedFiles + systemInstalledFiles = append(systemInstalledFiles, installedFiles...) } } @@ -349,6 +411,21 @@ func (a dpkgAnalyzer) isMd5SumsFile(dir, fileName string) bool { return strings.HasSuffix(fileName, md5sumsExtension) } +// isThirdPartyPackage checks if a package is from a third-party repository +// by examining the Maintainer field against known third-party patterns. +// +// Unlike RPM which has a dedicated "Vendor" field, dpkg packages don't have a reliable +// way to identify their origin. We use a heuristic approach based on maintainer patterns. +// See https://github.com/aquasecurity/trivy/issues/9916 for more details. +func isThirdPartyPackage(maintainer string) bool { + if slices.Contains(thirdPartyMaintainerExact, maintainer) { + return true + } + return slices.ContainsFunc(thirdPartyMaintainerPatterns, func(pattern string) bool { + return strings.Contains(maintainer, pattern) + }) +} + func (a dpkgAnalyzer) Type() analyzer.Type { return analyzer.TypeDpkg } diff --git a/pkg/fanal/analyzer/pkg/dpkg/dpkg_test.go b/pkg/fanal/analyzer/pkg/dpkg/dpkg_test.go index 0ea7e76753..6c9ac01c0e 100644 --- a/pkg/fanal/analyzer/pkg/dpkg/dpkg_test.go +++ b/pkg/fanal/analyzer/pkg/dpkg/dpkg_test.go @@ -1420,9 +1420,45 @@ func Test_dpkgAnalyzer_Analyze(t *testing.T) { }, }, { - name: "md5sums", - testFiles: map[string]string{"./testdata/tar.md5sums": "var/lib/dpkg/info/tar.md5sums"}, + name: "md5sums", + testFiles: map[string]string{ + "./testdata/tar-status": "var/lib/dpkg/status", + "./testdata/tar.md5sums": "var/lib/dpkg/info/tar.md5sums", + }, want: &analyzer.AnalysisResult{ + PackageInfos: []types.PackageInfo{ + { + FilePath: "var/lib/dpkg/status", + Packages: types.Packages{ + { + ID: "tar@1.29b-2", + Name: "tar", + Version: "1.29b", + Release: "2", + Arch: "amd64", + SrcName: "tar", + SrcVersion: "1.29b", + SrcRelease: "2", + Maintainer: "Ubuntu Developers ", + InstalledFiles: []string{ + "/usr/bin/tar", + "/usr/lib/mime/packages/tar", + "/usr/sbin/rmt-tar", + "/usr/sbin/tarcat", + "/usr/share/doc/tar/AUTHORS", + "/usr/share/doc/tar/NEWS.gz", + "/usr/share/doc/tar/README.Debian", + "/usr/share/doc/tar/THANKS.gz", + "/usr/share/doc/tar/changelog.Debian.gz", + "/usr/share/doc/tar/copyright", + "/usr/share/man/man1/tar.1.gz", + "/usr/share/man/man1/tarcat.1.gz", + "/usr/share/man/man8/rmt-tar.8.gz", + }, + }, + }, + }, + }, SystemInstalledFiles: []string{ "/usr/bin/tar", "/usr/lib/mime/packages/tar", @@ -1470,6 +1506,23 @@ func Test_dpkgAnalyzer_Analyze(t *testing.T) { } } +func Test_isThirdPartyPackage(t *testing.T) { + tests := []struct { + name string + maintainer string + want bool + }{ + {"third-party (Docker)", "Docker ", true}, + {"third-party (GitHub - exact match)", "GitHub", true}, + {"official (Ubuntu)", "Ubuntu Developers ", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, isThirdPartyPackage(tt.maintainer)) + }) + } +} + func Test_dpkgAnalyzer_Required(t *testing.T) { tests := []struct { name string diff --git a/pkg/fanal/analyzer/pkg/dpkg/testdata/tar-status b/pkg/fanal/analyzer/pkg/dpkg/testdata/tar-status new file mode 100644 index 0000000000..bd9015d9e1 --- /dev/null +++ b/pkg/fanal/analyzer/pkg/dpkg/testdata/tar-status @@ -0,0 +1,17 @@ +Package: tar +Essential: yes +Status: install ok installed +Priority: required +Section: utils +Installed-Size: 864 +Maintainer: Ubuntu Developers +Architecture: amd64 +Multi-Arch: foreign +Version: 1.29b-2 +Replaces: cpio (<< 2.4.2-39) +Pre-Depends: libacl1 (>= 2.2.51-8), libc6 (>= 2.17), libselinux1 (>= 1.32) +Suggests: bzip2, ncompress, xz-utils, tar-scripts, tar-doc +Breaks: dpkg-dev (<< 1.14.26) +Conflicts: cpio (<= 2.4.2-38) +Description: GNU version of the tar archiving utility + diff --git a/pkg/fanal/artifact/image/image_test.go b/pkg/fanal/artifact/image/image_test.go index d96fe6e5db..f7ccc15055 100644 --- a/pkg/fanal/artifact/image/image_test.go +++ b/pkg/fanal/artifact/image/image_test.go @@ -36,7 +36,7 @@ import ( // Common blob IDs used across multiple test cases to reduce duplication const ( - alpineBaseLayerID = "sha256:5fa8e7300cfe1b8f70c304e3b04f9b1f022942a0dc57d3fc0d4d3f04327e6d2a" + alpineBaseLayerID = "sha256:6c42077a82b21707f581759b12a99cc9a593ce35a0d7be4c19c01eb48bd5ba33" alpineBaseLayerDiffID = "sha256:beee9f30bc1f711043e78d4a2be0668955d4b761d587d6f60c2c8dc081efb203" alpineArtifactID = "sha256:3c709d2a158be3a97051e10cd0e30f047225cb9505101feb3fadcd395c2e0408" composerImageID = "sha256:a187dde48cd289ac374ad8539930628314bc581a481cdb41409c9289419ddb72" @@ -510,7 +510,7 @@ func TestArtifact_Inspect(t *testing.T) { }, wantBlobs: []cachetest.WantBlob{ { - ID: "sha256:5b61242ed7786d642c7037c5d42c97ef4eb77e79b5cee7d47c3a2476bdd37e54", + ID: "sha256:75a461ca76eecc6cea981889d69aa1c2dd78c436108be8be1bbc29295520c7d4", BlobInfo: types.BlobInfo{ SchemaVersion: types.BlobJSONSchemaVersion, Size: 3061760, @@ -598,7 +598,7 @@ func TestArtifact_Inspect(t *testing.T) { }, }, { - ID: "sha256:1a8ac8af11a039295f3fffd3e058c034dae966ac7ace649121f0559146133ee5", + ID: "sha256:81afc1747d0fdec7a606c27570313634ae331fab6f13566b23d0f6b3e498c050", BlobInfo: types.BlobInfo{ SchemaVersion: types.BlobJSONSchemaVersion, Size: 15441920, @@ -693,7 +693,7 @@ func TestArtifact_Inspect(t *testing.T) { }, }, { - ID: "sha256:a686ab4c4132800a0d67a8ddf33dd89387d750a7b3427c01b9ce7bf3219cadfb", + ID: "sha256:0778c3e388c54f736a3d6e74ed390a91fdb42c6809f8fb743d4f72acb41a5d6d", BlobInfo: types.BlobInfo{ SchemaVersion: types.BlobJSONSchemaVersion, Size: 29696, @@ -900,7 +900,7 @@ func TestArtifact_Inspect(t *testing.T) { }, }, { - ID: "sha256:789b01e58c608d3a3021ce18cf6c8bd21e701116134089d949da35a25f73d9ec", + ID: "sha256:5a3e3f25fdc97a14d69d99c63dd640cd2d38af5b987b7a95084cce3d835970fb", BlobInfo: types.BlobInfo{ SchemaVersion: types.BlobJSONSchemaVersion, Size: 6656, @@ -1763,10 +1763,10 @@ func TestArtifact_Inspect(t *testing.T) { Type: types.TypeContainerImage, ID: "sha256:0bebf0773ffd87baa7c64fbdbdf79a24ae125e3f99a8adebe52d1ccbe6bed16b", BlobIDs: []string{ - "sha256:5b61242ed7786d642c7037c5d42c97ef4eb77e79b5cee7d47c3a2476bdd37e54", - "sha256:1a8ac8af11a039295f3fffd3e058c034dae966ac7ace649121f0559146133ee5", - "sha256:a686ab4c4132800a0d67a8ddf33dd89387d750a7b3427c01b9ce7bf3219cadfb", - "sha256:789b01e58c608d3a3021ce18cf6c8bd21e701116134089d949da35a25f73d9ec", + "sha256:75a461ca76eecc6cea981889d69aa1c2dd78c436108be8be1bbc29295520c7d4", + "sha256:81afc1747d0fdec7a606c27570313634ae331fab6f13566b23d0f6b3e498c050", + "sha256:0778c3e388c54f736a3d6e74ed390a91fdb42c6809f8fb743d4f72acb41a5d6d", + "sha256:5a3e3f25fdc97a14d69d99c63dd640cd2d38af5b987b7a95084cce3d835970fb", }, ImageMetadata: artifact.ImageMetadata{ ID: "sha256:58701fd185bda36cab0557bb6438661831267aa4a9e0b54211c4d5317a48aff4",