diff --git a/docs/docs/target/repository.md b/docs/docs/target/repository.md index 9a8a3165d2..3146913c1a 100644 --- a/docs/docs/target/repository.md +++ b/docs/docs/target/repository.md @@ -109,6 +109,26 @@ $ trivy repo --scanners license (REPO_PATH | REPO_URL) Trivy can generate SBOM for code repositories. See [here](../supply-chain/sbom.md) for the detail. +## Git Metadata +When scanning git repositories (both local and remote), Trivy automatically extracts and includes git metadata in the scan results. +This metadata provides context about the scanned repository. + +The metadata includes information such as: + +- Repository URL +- Branch name +- Tags +- Commit details (hash, message, commiter) +- Author information + +This feature works automatically for any git repository. +When using JSON format output, the git metadata will be included in the `Metadata` field. +For detailed information about the available fields, please refer to the JSON output of your scan results. + +```bash +$ trivy repo --format json +``` + ## Scan Cache When scanning git repositories, it stores analysis results in the cache, using the latest commit hash as the key. Note that the cache is not used when the repository is dirty, otherwise Trivy will miss the files that are not committed. diff --git a/integration/repo_test.go b/integration/repo_test.go index f77ee38bbd..984cf671d4 100644 --- a/integration/repo_test.go +++ b/integration/repo_test.go @@ -313,6 +313,10 @@ func TestRepository(t *testing.T) { input: "testdata/fixtures/repo/trivy-ci-test", }, golden: "testdata/test-repo.json.golden", + override: func(_ *testing.T, want, _ *types.Report) { + // Clear all metadata as this is a local directory scan without git info + want.Metadata = types.Metadata{} + }, }, { name: "installed.json", diff --git a/integration/testdata/test-repo.json.golden b/integration/testdata/test-repo.json.golden index 07cb12e69e..43820fa056 100644 --- a/integration/testdata/test-repo.json.golden +++ b/integration/testdata/test-repo.json.golden @@ -13,7 +13,13 @@ "diff_ids": null }, "config": {} - } + }, + "RepoURL": "https://github.com/knqyf263/trivy-ci-test", + "Branch": "master", + "Commit": "5ae342eb2802672402d9b2c26f09e2051bbd91b8", + "CommitMsg": "Use COPY instead of ADD in Dockerfile (#4)", + "Author": "gy741 ", + "Committer": "knqyf263 " }, "Results": [ { diff --git a/pkg/fanal/artifact/artifact.go b/pkg/fanal/artifact/artifact.go index 8c98dff30d..0b4525622c 100644 --- a/pkg/fanal/artifact/artifact.go +++ b/pkg/fanal/artifact/artifact.go @@ -92,6 +92,7 @@ type Reference struct { ID string BlobIDs []string ImageMetadata ImageMetadata + RepoMetadata RepoMetadata // SBOM BOM *core.BOM @@ -104,3 +105,13 @@ type ImageMetadata struct { RepoDigests []string ConfigFile v1.ConfigFile } + +type RepoMetadata struct { + RepoURL string // repository URL (from upstream/origin) + Branch string // current branch name + Tags []string // tag names pointing to HEAD + Commit string // commit hash + CommitMsg string // commit message + Author string // commit author + Committer string // commit committer +} diff --git a/pkg/fanal/artifact/local/fs.go b/pkg/fanal/artifact/local/fs.go index 5b9ba9e658..4140aa12f9 100644 --- a/pkg/fanal/artifact/local/fs.go +++ b/pkg/fanal/artifact/local/fs.go @@ -13,6 +13,7 @@ import ( "sync" "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" "github.com/google/wire" "github.com/samber/lo" "golang.org/x/xerrors" @@ -56,7 +57,8 @@ type Artifact struct { artifactOption artifact.Option - commitHash string // only set when the git repository is clean + isClean bool // whether git repository is clean (for caching) + repoMetadata artifact.RepoMetadata // git repository metadata } func NewArtifact(rootPath string, c cache.ArtifactCache, w Walker, opt artifact.Option) (artifact.Artifact, error) { @@ -86,10 +88,14 @@ func NewArtifact(rootPath string, c cache.ArtifactCache, w Walker, opt artifact. art.logger.Debug("Analyzing...", log.String("root", art.rootPath), lo.Ternary(opt.Original != "", log.String("original", opt.Original), log.Nil)) - // Check if the directory is a git repository and clean - if hash, err := gitCommitHash(art.rootPath); err == nil { - art.logger.Debug("Using the latest commit hash for calculating cache key", log.String("commit_hash", hash)) - art.commitHash = hash + // Check if the directory is a git repository and extract metadata + if art.isClean, art.repoMetadata, err = extractGitInfo(art.rootPath); err == nil { + if art.isClean { + art.logger.Debug("Using the latest commit hash for calculating cache key", + log.String("commit_hash", art.repoMetadata.Commit)) + } else { + art.logger.Debug("Repository is dirty, random cache key will be used") + } } else if !errors.Is(err, git.ErrRepositoryNotExists) { // Only log if the file path is a git repository art.logger.Debug("Random cache key will be used", log.Err(err)) @@ -98,36 +104,72 @@ func NewArtifact(rootPath string, c cache.ArtifactCache, w Walker, opt artifact. return art, nil } -// gitCommitHash returns the latest commit hash if the git repository is clean, otherwise returns an error -func gitCommitHash(dir string) (string, error) { +// extractGitInfo extracts git repository information including clean status and metadata +// Returns clean status (for caching), metadata, and error +func extractGitInfo(dir string) (bool, artifact.RepoMetadata, error) { + var metadata artifact.RepoMetadata + repo, err := git.PlainOpen(dir) if err != nil { - return "", xerrors.Errorf("failed to open git repository: %w", err) + return false, metadata, xerrors.Errorf("failed to open git repository: %w", err) } - // Get the working tree - worktree, err := repo.Worktree() - if err != nil { - return "", xerrors.Errorf("failed to get worktree: %w", err) - } - - // Get the current status - status, err := worktree.Status() - if err != nil { - return "", xerrors.Errorf("failed to get status: %w", err) - } - - if !status.IsClean() { - return "", xerrors.New("repository is dirty") - } - - // Get the HEAD commit hash + // Get HEAD commit head, err := repo.Head() if err != nil { - return "", xerrors.Errorf("failed to get HEAD: %w", err) + return false, metadata, xerrors.Errorf("failed to get HEAD: %w", err) } - return head.Hash().String(), nil + commit, err := repo.CommitObject(head.Hash()) + if err != nil { + return false, metadata, xerrors.Errorf("failed to get commit object: %w", err) + } + + // Extract basic commit metadata + metadata.Commit = head.Hash().String() + metadata.CommitMsg = strings.TrimSpace(commit.Message) + metadata.Author = commit.Author.String() + metadata.Committer = commit.Committer.String() + + // Get branch name + if head.Name().IsBranch() { + metadata.Branch = head.Name().Short() + } + + // Get all tag names that point to HEAD + if tags, err := repo.Tags(); err == nil { + var headTags []string + _ = tags.ForEach(func(tag *plumbing.Reference) error { + if tag.Hash() == head.Hash() { + headTags = append(headTags, tag.Name().Short()) + } + return nil + }) + metadata.Tags = headTags + } + + // Get repository URL - prefer upstream, fallback to origin + remoteConfig, err := repo.Remote("upstream") + if err != nil { + remoteConfig, err = repo.Remote("origin") + } + if err == nil && len(remoteConfig.Config().URLs) > 0 { + metadata.RepoURL = remoteConfig.Config().URLs[0] + } + + // Check if repository is clean for caching purposes + worktree, err := repo.Worktree() + if err != nil { + return false, metadata, xerrors.Errorf("failed to get worktree: %w", err) + } + + status, err := worktree.Status() + if err != nil { + return false, metadata, xerrors.Errorf("failed to get status: %w", err) + } + + // Return clean status and metadata + return status.IsClean(), metadata, nil } func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) { @@ -138,7 +180,7 @@ func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) { } // Check if the cache exists only when it's a clean git repository - if a.commitHash != "" { + if a.isClean && a.repoMetadata.Commit != "" { _, missingBlobs, err := a.cache.MissingBlobs(cacheKey, []string{cacheKey}) if err != nil { return artifact.Reference{}, xerrors.Errorf("unable to get missing blob: %w", err) @@ -231,10 +273,11 @@ func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) { } return artifact.Reference{ - Name: hostName, - Type: a.artifactOption.Type, - ID: cacheKey, // use a cache key as pseudo artifact ID - BlobIDs: []string{cacheKey}, + Name: hostName, + Type: a.artifactOption.Type, + ID: cacheKey, // use a cache key as pseudo artifact ID + BlobIDs: []string{cacheKey}, + RepoMetadata: a.repoMetadata, }, nil } @@ -295,7 +338,7 @@ func (a Artifact) analyzeWithTraversal(ctx context.Context, root, relativePath s func (a Artifact) Clean(reference artifact.Reference) error { // Don't delete cache if it's a clean git repository - if a.commitHash != "" { + if a.isClean && a.repoMetadata.Commit != "" { return nil } return a.cache.DeleteBlobs(reference.BlobIDs) @@ -303,8 +346,8 @@ func (a Artifact) Clean(reference artifact.Reference) error { func (a Artifact) calcCacheKey() (string, error) { // If this is a clean git repository, use the commit hash as cache key - if a.commitHash != "" { - return cache.CalcKey(a.commitHash, artifactVersion, a.analyzer.AnalyzerVersions(), a.handlerManager.Versions(), a.artifactOption) + if a.isClean && a.repoMetadata.Commit != "" { + return cache.CalcKey(a.repoMetadata.Commit, artifactVersion, a.analyzer.AnalyzerVersions(), a.handlerManager.Versions(), a.artifactOption) } // For non-git repositories or dirty git repositories, use UUID as cache key diff --git a/pkg/fanal/artifact/repo/git_test.go b/pkg/fanal/artifact/repo/git_test.go index 70c1014b1d..e1a7ef99b9 100644 --- a/pkg/fanal/artifact/repo/git_test.go +++ b/pkg/fanal/artifact/repo/git_test.go @@ -185,6 +185,15 @@ func TestArtifact_Inspect(t *testing.T) { BlobIDs: []string{ "sha256:dc7c6039424c9fce969d3c2972d261af442a33f13e7494464386dbe280612d4c", // Calculated from commit hash }, + RepoMetadata: artifact.RepoMetadata{ + RepoURL: ts.URL + "/test-repo.git", + Branch: "main", + Tags: []string{"v0.0.1"}, + Commit: "8a19b492a589955c3e70c6ad8efd1e4ec6ae0d35", + CommitMsg: "Update README.md", + Author: "Teppei Fukuda ", + Committer: "GitHub ", + }, }, wantBlobInfo: &types.BlobInfo{ SchemaVersion: types.BlobJSONSchemaVersion, @@ -200,6 +209,15 @@ func TestArtifact_Inspect(t *testing.T) { BlobIDs: []string{ "sha256:dc7c6039424c9fce969d3c2972d261af442a33f13e7494464386dbe280612d4c", // Calculated from commit hash }, + RepoMetadata: artifact.RepoMetadata{ + RepoURL: "https://github.com/aquasecurity/trivy-test-repo/", + Branch: "main", + Tags: []string{"v0.0.1"}, + Commit: "8a19b492a589955c3e70c6ad8efd1e4ec6ae0d35", + CommitMsg: "Update README.md", + Author: "Teppei Fukuda ", + Committer: "GitHub ", + }, }, wantBlobInfo: &types.BlobInfo{ SchemaVersion: types.BlobJSONSchemaVersion, @@ -221,6 +239,15 @@ func TestArtifact_Inspect(t *testing.T) { BlobIDs: []string{ "sha256:6f4672e139d4066fd00391df614cdf42bda5f7a3f005d39e1d8600be86157098", }, + RepoMetadata: artifact.RepoMetadata{ + RepoURL: "https://github.com/aquasecurity/trivy-test-repo/", + Branch: "main", + Tags: []string{"v0.0.1"}, + Commit: "8a19b492a589955c3e70c6ad8efd1e4ec6ae0d35", + CommitMsg: "Update README.md", + Author: "Teppei Fukuda ", + Committer: "GitHub ", + }, }, wantBlobInfo: &types.BlobInfo{ SchemaVersion: types.BlobJSONSchemaVersion, diff --git a/pkg/scan/service.go b/pkg/scan/service.go index 47daf2fdf0..e2b5722298 100644 --- a/pkg/scan/service.go +++ b/pkg/scan/service.go @@ -207,6 +207,15 @@ func (s Service) ScanArtifact(ctx context.Context, options types.ScanOptions) (t ImageConfig: artifactInfo.ImageMetadata.ConfigFile, Size: scanResponse.Layers.TotalSize(), Layers: lo.Ternary(len(scanResponse.Layers) > 0, scanResponse.Layers, nil), + + // Git repository + RepoURL: artifactInfo.RepoMetadata.RepoURL, + Branch: artifactInfo.RepoMetadata.Branch, + Tags: artifactInfo.RepoMetadata.Tags, + Commit: artifactInfo.RepoMetadata.Commit, + CommitMsg: artifactInfo.RepoMetadata.CommitMsg, + Author: artifactInfo.RepoMetadata.Author, + Committer: artifactInfo.RepoMetadata.Committer, }, Results: scanResponse.Results, BOM: artifactInfo.BOM, diff --git a/pkg/types/report.go b/pkg/types/report.go index c10e1d3eb8..fb574b0557 100644 --- a/pkg/types/report.go +++ b/pkg/types/report.go @@ -34,6 +34,15 @@ type Metadata struct { RepoDigests []string `json:",omitempty"` ImageConfig v1.ConfigFile `json:",omitzero"` Layers ftypes.Layers `json:",omitzero"` + + // Git repository + RepoURL string `json:",omitzero"` + Branch string `json:",omitzero"` + Tags []string `json:",omitzero"` + Commit string `json:",omitzero"` + CommitMsg string `json:",omitzero"` + Author string `json:",omitzero"` + Committer string `json:",omitzero"` } // Results to hold list of Result