diff --git a/.gitignore b/.gitignore index 980941d3f0..0d0dea3acb 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,3 @@ vendor cmd/fanal/fanal *.tar *.gz -*.db diff --git a/analyzer/analyzer.go b/analyzer/analyzer.go index f5e342a1c3..c247624ce1 100644 --- a/analyzer/analyzer.go +++ b/analyzer/analyzer.go @@ -7,11 +7,14 @@ import ( "io" "os" - "github.com/aquasecurity/fanal/extractor" - "github.com/aquasecurity/fanal/types" "github.com/aquasecurity/fanal/utils" - godeptypes "github.com/aquasecurity/go-dep-parser/pkg/types" + + "github.com/aquasecurity/fanal/types" + "golang.org/x/xerrors" + + "github.com/aquasecurity/fanal/extractor" + godeptypes "github.com/aquasecurity/go-dep-parser/pkg/types" ) var ( diff --git a/analyzer/analyzer_test.go b/analyzer/analyzer_test.go index 74cd8c1c50..a0c48154d9 100644 --- a/analyzer/analyzer_test.go +++ b/analyzer/analyzer_test.go @@ -8,6 +8,7 @@ import ( "testing" "github.com/aquasecurity/fanal/extractor" + "github.com/stretchr/testify/assert" ) @@ -92,12 +93,6 @@ func TestConfig_Analyze(t *testing.T) { }, } - // cleanup global state from other tests - commandAnalyzers = []CommandAnalyzer{} - pkgAnalyzers = []PkgAnalyzer{} - osAnalyzers = []OSAnalyzer{} - libAnalyzers = []LibraryAnalyzer{} - for _, tc := range testCases { RegisterOSAnalyzer(mockOSAnalyzer{}) diff --git a/cache/cache.go b/cache/cache.go index 59978757b5..d0ef934588 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -1,65 +1,62 @@ package cache import ( + "io" "os" "path/filepath" + "strings" - bolt "github.com/simar7/gokv/bbolt" - "github.com/simar7/gokv/encoding" - kvtypes "github.com/simar7/gokv/types" "golang.org/x/xerrors" ) +const ( + cacheDirName = "fanal" +) + +var ( + replacer = strings.NewReplacer("/", "_") +) + type Cache interface { - Get(bucket, key string, value *[]byte) (found bool, err error) - Set(bucket, key string, value []byte) (err error) + Get(key string) io.Reader + Set(key string, file io.Reader) (io.Reader, error) Clear() error } -type RealCache struct { +type FSCache struct { directory string - cache *bolt.Store } -func New(cacheDir string) (Cache, error) { - dir := filepath.Join(cacheDir, "fanal") - if err := os.MkdirAll(dir, 0700); err != nil { - return nil, xerrors.Errorf("unable to create cache dir: %w", err) - } +func Initialize(cacheDir string) Cache { + return &FSCache{directory: filepath.Join(cacheDir, cacheDirName)} +} - cacheOptions := bolt.Options{ - RootBucketName: "fanal", - Path: filepath.Join(dir, "cache.db"), - Codec: encoding.Raw, - } - - kv, err := bolt.NewStore(cacheOptions) +func (fs FSCache) Get(key string) io.Reader { + filePath := filepath.Join(fs.directory, replacer.Replace(key)) + f, err := os.Open(filePath) if err != nil { - return nil, xerrors.Errorf("error initializing cache: %w", err) + return nil + } + return f +} + +func (fs FSCache) Set(key string, file io.Reader) (io.Reader, error) { + filePath := filepath.Join(fs.directory, replacer.Replace(key)) + if err := os.MkdirAll(fs.directory, os.ModePerm); err != nil { + return nil, xerrors.Errorf("failed to mkdir all: %w", err) + } + cacheFile, err := os.Create(filePath) + if err != nil { + return file, xerrors.Errorf("failed to create cache file: %w", err) } - return &RealCache{directory: dir, cache: kv}, nil + tee := io.TeeReader(file, cacheFile) + return tee, nil } -func (rc RealCache) Get(bucket, key string, value *[]byte) (bool, error) { - return rc.cache.Get(kvtypes.GetItemInput{ - BucketName: bucket, - Key: key, - Value: value, - }) -} - -func (rc RealCache) Set(bucket, key string, value []byte) error { - return rc.cache.BatchSet(kvtypes.BatchSetItemInput{ - BucketName: bucket, - Keys: []string{key}, - Values: value, - }) -} - -func (rc RealCache) Clear() error { - if err := os.RemoveAll(rc.directory); err != nil { - return xerrors.Errorf("failed to remove cache: %w", err) +func (fs FSCache) Clear() error { + if err := os.RemoveAll(fs.directory); err != nil { + return xerrors.New("failed to remove cache") } return nil } diff --git a/cache/cache_test.go b/cache/cache_test.go index 3e8ce7ebe8..abb1f415fd 100644 --- a/cache/cache_test.go +++ b/cache/cache_test.go @@ -1,20 +1,40 @@ package cache import ( + "bytes" "io/ioutil" "os" - "path/filepath" "testing" "github.com/stretchr/testify/assert" ) -func TestRealCache_Clear(t *testing.T) { - d, _ := ioutil.TempDir("", "TestRealCache_Clear") - defer os.RemoveAll(d) - c, err := New(d) +func TestSetAndGetAndClear(t *testing.T) { + tempCacheDir, _ := ioutil.TempDir("", "TestCacheDir-*") + f, _ := ioutil.TempFile(tempCacheDir, "foo.bar.baz-*") + + c := Initialize(tempCacheDir) + + // set + expectedCacheContents := "foo bar baz" + var buf bytes.Buffer + buf.Write([]byte(expectedCacheContents)) + + r, err := c.Set(f.Name(), &buf) assert.NoError(t, err) + + b, _ := ioutil.ReadAll(r) + assert.Equal(t, expectedCacheContents, string(b)) + + // get + actualFile := c.Get(f.Name()) + actualBytes, _ := ioutil.ReadAll(actualFile) + assert.Equal(t, expectedCacheContents, string(actualBytes)) + + // clear assert.NoError(t, c.Clear()) - _, err = os.Stat(filepath.Join(d, "fanal")) + + // confirm that no cachedir remains + _, err = os.Stat(tempCacheDir + cacheDirName) assert.True(t, os.IsNotExist(err)) } diff --git a/cmd/fanal/main.go b/cmd/fanal/main.go index c0423e97f9..d226533dba 100644 --- a/cmd/fanal/main.go +++ b/cmd/fanal/main.go @@ -48,10 +48,8 @@ func run() (err error) { clearCache := flag.Bool("clear", false, "clear cache") flag.Parse() - c, err := cache.New(utils.CacheDir()) - if err != nil { - return err - } + c := cache.Initialize(utils.CacheDir()) + if *clearCache { if err = c.Clear(); err != nil { return xerrors.Errorf("%w", err) diff --git a/extractor/docker/docker.go b/extractor/docker/docker.go index 30ed7e457e..35ce6216d1 100644 --- a/extractor/docker/docker.go +++ b/extractor/docker/docker.go @@ -2,11 +2,9 @@ package docker import ( "archive/tar" - "bytes" "compress/gzip" "context" "encoding/json" - "errors" "io" "io/ioutil" "log" @@ -24,7 +22,6 @@ import ( "github.com/docker/distribution/manifest/schema2" "github.com/docker/docker/client" "github.com/genuinetools/reg/registry" - "github.com/klauspost/compress/zstd" "github.com/knqyf263/nested" "github.com/opencontainers/go-digest" "golang.org/x/xerrors" @@ -33,13 +30,6 @@ import ( const ( opq string = ".wh..wh..opq" wh string = ".wh." - - KVImageBucket string = "imagebucket" - LayerTarsBucket string = "layertars" -) - -var ( - ErrFailedCacheWrite = errors.New("failed to write to cache") ) type manifest struct { @@ -73,7 +63,7 @@ type Extractor struct { cache cache.Cache } -func NewDockerExtractor(option types.DockerOption, cache cache.Cache) (Extractor, error) { +func NewDockerExtractor(option types.DockerOption, c cache.Cache) (Extractor, error) { RegisterRegistry(&gcr.GCR{}) RegisterRegistry(&ecr.ECR{}) @@ -85,7 +75,7 @@ func NewDockerExtractor(option types.DockerOption, cache cache.Cache) (Extractor return Extractor{ Option: option, Client: cli, - cache: cache, + cache: c, }, nil } @@ -152,46 +142,21 @@ func (d Extractor) createRegistryClient(ctx context.Context, domain string) (*re } func (d Extractor) SaveLocalImage(ctx context.Context, imageName string) (io.Reader, error) { - var storedReader io.Reader - - var storedImageBytes []byte - found, err := d.cache.Get(KVImageBucket, imageName, &storedImageBytes) - - if found { - dec, _ := zstd.NewReader(nil) - storedImage, err := dec.DecodeAll(storedImageBytes, nil) - if err == nil { - return bytes.NewReader(storedImage), nil - } - - // bad cache, redownload - found = false - } - - var savedImage []byte - if err != nil || !found { - storedReader, err = d.saveLocalImage(ctx, imageName) + var err error + r := d.cache.Get(imageName) + if r == nil { + // Save the image + r, err = d.saveLocalImage(ctx, imageName) if err != nil { return nil, xerrors.Errorf("failed to save the image: %w", err) } - - savedImage, err = ioutil.ReadAll(storedReader) + r, err = d.cache.Set(imageName, r) if err != nil { - return nil, xerrors.Errorf("failed to read saved image: %w", err) - } - - e, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest)) - if err != nil { - return nil, err - } - - dst := e.EncodeAll(savedImage, nil) - if err := d.cache.Set(KVImageBucket, imageName, dst); err != nil { - log.Println(err) + log.Print(err) } } - return bytes.NewReader(savedImage), nil + return r, nil } func (d Extractor) saveLocalImage(ctx context.Context, imageName string) (io.ReadCloser, error) { @@ -232,10 +197,10 @@ func (d Extractor) Extract(ctx context.Context, imageName string, filenames []st }(ref.Digest) } - filesInLayers := map[string]extractor.FileMap{} - opqInLayers := map[string]extractor.OPQDirs{} + filesInLayers := make(map[string]extractor.FileMap) + opqInLayers := make(map[string]extractor.OPQDirs) for i := 0; i < len(m.Manifest.Layers); i++ { - if err := d.extractLayerFiles(ctx, layerCh, errCh, filesInLayers, opqInLayers, filenames); err != nil { + if err := d.extractLayerFiles(ctx, layerCh, errCh, filenames, filesInLayers, opqInLayers); err != nil { return nil, err } } @@ -269,7 +234,7 @@ func downloadConfigFile(ctx context.Context, r *registry.Registry, image registr return config, nil } -func (d Extractor) extractLayerFiles(ctx context.Context, layerCh chan layer, errCh chan error, filesInLayers map[string]extractor.FileMap, opqInLayers map[string]extractor.OPQDirs, filenames []string) error { +func (d Extractor) extractLayerFiles(ctx context.Context, layerCh chan layer, errCh chan error, filenames []string, filesInLayers map[string]extractor.FileMap, opqInLayers map[string]extractor.OPQDirs) error { var l layer select { case l = <-layerCh: @@ -290,68 +255,28 @@ func (d Extractor) extractLayerFiles(ctx context.Context, layerCh chan layer, er } func (d Extractor) extractLayerWorker(dig digest.Digest, r *registry.Registry, ctx context.Context, image registry.Image, errCh chan error, layerCh chan layer) { - var cacheContent []byte - - found, _ := d.cache.Get(LayerTarsBucket, string(dig), &cacheContent) - - if found { - b, err := extractTarFromTarZstd(cacheContent) - if err == nil && len(b) > 0 { - cacheBuf := bytes.NewBuffer(b) - layerCh <- layer{ID: dig, Content: ioutil.NopCloser(cacheBuf)} + var rc io.Reader + // Use cache + rc = d.cache.Get(string(dig)) + if rc == nil { + // Download the layer. + layerRC, err := r.DownloadLayer(ctx, image.Path, dig) + if err != nil { + errCh <- xerrors.Errorf("failed to download the layer(%s): %w", dig, err) return } - } - rc, err := r.DownloadLayer(ctx, image.Path, dig) - if err != nil { - errCh <- xerrors.Errorf("failed to download the layer(%s): %w", dig, err) - return + rc, err = d.cache.Set(string(dig), layerRC) + if err != nil { + log.Print(err) + } } - defer rc.Close() - - // read the incoming gzip from the layer gzipReader, err := gzip.NewReader(rc) if err != nil { - errCh <- xerrors.Errorf("could not init gzip reader: %w", err) + errCh <- xerrors.Errorf("invalid gzip: %w", err) return } - defer gzipReader.Close() - - b := bytes.NewBuffer(nil) - tr := io.TeeReader(gzipReader, b) - d.storeLayerInCache(tr, dig) - layerCh <- layer{ID: dig, Content: ioutil.NopCloser(b)} - - return -} - -func extractTarFromTarZstd(cacheContent []byte) ([]byte, error) { - var tarContent []byte - - dec, err := zstd.NewReader(nil) - if err != nil { - return nil, err - } - - tarContent, err = dec.DecodeAll(cacheContent, nil) - if err != nil { - return nil, err - } - - return tarContent, nil -} - -func (d Extractor) storeLayerInCache(r io.Reader, dig digest.Digest) { - // compress tar to zstd before storing to cache - var dst bytes.Buffer - w, _ := zstd.NewWriter(&dst, zstd.WithEncoderLevel(zstd.SpeedFastest)) - _, _ = io.Copy(w, r) - _ = w.Close() - - if err := d.cache.Set(LayerTarsBucket, string(dig), dst.Bytes()); err != nil { - log.Printf("an error occurred while caching: %s", err) - } + layerCh <- layer{ID: dig, Content: gzipReader} } func getValidManifest(ctx context.Context, r *registry.Registry, image registry.Image) (*schema2.DeserializedManifest, error) { @@ -437,11 +362,11 @@ func (d Extractor) ExtractFromFile(ctx context.Context, r io.Reader, filenames [ return fileMap, nil } -func (d Extractor) ExtractFiles(layerReader io.Reader, filenames []string) (extractor.FileMap, extractor.OPQDirs, error) { +func (d Extractor) ExtractFiles(layer io.Reader, filenames []string) (extractor.FileMap, extractor.OPQDirs, error) { data := make(map[string][]byte) opqDirs := extractor.OPQDirs{} - tr := tar.NewReader(layerReader) + tr := tar.NewReader(layer) for { hdr, err := tr.Next() if err == io.EOF { diff --git a/extractor/docker/docker_test.go b/extractor/docker/docker_test.go index 6e17f1d8ff..938eda2f99 100644 --- a/extractor/docker/docker_test.go +++ b/extractor/docker/docker_test.go @@ -13,7 +13,8 @@ import ( "testing" "time" - "github.com/klauspost/compress/zstd" + "github.com/genuinetools/reg/registry" + "github.com/opencontainers/go-digest" "github.com/aquasecurity/fanal/cache" "github.com/aquasecurity/fanal/extractor" @@ -22,21 +23,6 @@ import ( "github.com/stretchr/testify/assert" ) -// TODO: Use a memory based FS rather than actual fs -// context: https://github.com/aquasecurity/fanal/pull/51#discussion_r352337762 -func setupCache() (cache.Cache, string, error) { - dir, err := ioutil.TempDir("", "Cache_TestStore-*") - if err != nil { - return nil, "", err - } - - c, err := cache.New(dir) - if err != nil { - return nil, "", err - } - return c, dir, nil -} - func TestExtractFromFile(t *testing.T) { vectors := []struct { file string // Test input file @@ -204,70 +190,35 @@ func TestExtractFiles(t *testing.T) { } func TestDockerExtractor_SaveLocalImage(t *testing.T) { - testCases := []struct { - name string - expectedImageData string - cacheHit bool - }{ - { - name: "happy path with cache miss", - expectedImageData: "foofromdocker", - }, - { - name: "happy path with cache hit", - cacheHit: true, - expectedImageData: "foofromcache", - }, + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + httpPath := r.URL.String() + switch { + case strings.Contains(httpPath, "images/get?names=fooimage"): + _, _ = fmt.Fprint(w, "foocontent") + default: + assert.FailNow(t, "unexpected path accessed: ", r.URL.String()) + } + })) + defer ts.Close() + + c, err := client.NewClientWithOpts(client.WithHost(ts.URL)) + assert.NoError(t, err) + + // setup cache + tempCacheDir, _ := ioutil.TempDir("", "TestDockerExtractor_SaveLocalImage-*") + defer func() { + _ = os.RemoveAll(tempCacheDir) + }() + + de := Extractor{ + Option: types.DockerOption{}, + Client: c, + cache: cache.Initialize(tempCacheDir), } - for _, tc := range testCases { - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - httpPath := r.URL.String() - switch { - case strings.Contains(httpPath, "images/get?names=fooimage"): - _, _ = fmt.Fprint(w, "foofromdocker") - default: - assert.FailNow(t, "unexpected path accessed: ", r.URL.String()) - } - })) - defer ts.Close() - - c, err := client.NewClientWithOpts(client.WithHost(ts.URL)) - assert.NoError(t, err) - - // setup cache - cache, tmpDir, err := setupCache() - defer os.RemoveAll(tmpDir) - assert.NoError(t, err) - - if tc.cacheHit { - e, _ := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedDefault)) - dst := e.EncodeAll([]byte("foofromcache"), nil) - _ = cache.Set(KVImageBucket, "fooimage", dst) - } - - de := Extractor{ - Option: types.DockerOption{}, - Client: c, - cache: cache, - } - - r, err := de.SaveLocalImage(context.TODO(), "fooimage") - actualSavedTarBytes, _ := ioutil.ReadAll(r) - assert.Equal(t, []byte(tc.expectedImageData), actualSavedTarBytes[:], tc.name) - assert.NoError(t, err, tc.name) - - // check the cache for what was stored - var actualValue []byte - found, err := de.cache.Get(KVImageBucket, "fooimage", &actualValue) - - assert.NoError(t, err, tc.name) - assert.True(t, found, tc.name) - - dec, _ := zstd.NewReader(nil) - actualStoredValue, _ := dec.DecodeAll(actualValue, nil) - assert.Equal(t, tc.expectedImageData, string(actualStoredValue), tc.name) - } + r, err := de.SaveLocalImage(context.TODO(), "fooimage") + assert.NotNil(t, r) + assert.NoError(t, err) } func TestDockerExtractor_Extract(t *testing.T) { @@ -284,28 +235,22 @@ func TestDockerExtractor_Extract(t *testing.T) { { name: "happy path", manifestResp: `{ - "schemaVersion": 2, - "mediaType": "application/vnd.docker.distribution.manifest.v2+json", - "layers": [ - { - "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", - "size": 153263, - "digest": "sha256:shafortestdirslashhelloworlddottxt" - }, - { - "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", - "size": 153263, - "digest": "sha256:shafortestdirslashbadworlddottxt" - } - ] + "schemaVersion": 2, + "mediaType": "application/vnd.docker.distribution.manifest.v2+json", + "layers": [ + { + "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", + "size": 153263, + "digest": "sha256:62d8908bee94c202b2d35224a221aaa2058318bfa9879fa541efaecba272331b" + } + ] }`, fileName: "testdata/testdir.tar.gz", // includes helloworld.txt and badworld.txt blobData: "foo", - fileToExtract: []string{"testdir/helloworld.txt", "testdir/badworld.txt"}, + fileToExtract: []string{"testdir/helloworld.txt"}, expectedFileMap: extractor.FileMap{ "/config": []uint8{0x66, 0x6f, 0x6f}, "testdir/helloworld.txt": []uint8{0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0xa}, - "testdir/badworld.txt": []uint8{0x62, 0x61, 0x64, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0xa}, }, }, { @@ -321,20 +266,20 @@ func TestDockerExtractor_Extract(t *testing.T) { { name: "sad path: corrupt layer data invalid gzip", manifestResp: `{ - "schemaVersion": 2, - "mediaType": "application/vnd.docker.distribution.manifest.v2+json", - "layers": [ - { - "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", - "size": 153263, - "digest": "sha256:shaforinvalidgzipfile" - } - ] + "schemaVersion": 2, + "mediaType": "application/vnd.docker.distribution.manifest.v2+json", + "layers": [ + { + "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", + "size": 153263, + "digest": "sha256:62d8908bee94c202b2d35224a221aaa2058318bfa9879fa541efaecba272331b" + } + ] }`, fileName: "testdata/opq.tar", blobData: "foo", expectedFileMap: extractor.FileMap(nil), - expectedError: "could not init gzip reader: gzip: invalid header", + expectedError: "invalid gzip: gzip: invalid header", }, } @@ -345,14 +290,8 @@ func TestDockerExtractor_Extract(t *testing.T) { case strings.Contains(httpPath, "/v2/library/fooimage/manifests/latest"): w.Header().Set("Content-Type", "application/vnd.docker.distribution.manifest.v2+json") _, _ = fmt.Fprint(w, tc.manifestResp) - case strings.Contains(httpPath, "/v2/library/fooimage/blobs/sha256:shafortestdirslashhelloworlddottxt"): - b, _ := ioutil.ReadFile("testdata/helloworld.tar.gz") - _, _ = w.Write(b) - case strings.Contains(httpPath, "/v2/library/fooimage/blobs/sha256:shafortestdirslashbadworlddottxt"): - b, _ := ioutil.ReadFile("testdata/badworld.tar.gz") - _, _ = w.Write(b) - case strings.Contains(httpPath, "/v2/library/fooimage/blobs/sha256:shaforinvalidgzipfile"): - b, _ := ioutil.ReadFile("testdata/opq.tar") + case strings.Contains(httpPath, "/v2/library/fooimage/blobs/sha256:62d8908bee94c202b2d35224a221aaa2058318bfa9879fa541efaecba272331b"): + b, _ := ioutil.ReadFile(tc.fileName) _, _ = w.Write(b) case strings.Contains(httpPath, "/v2/library/fooimage/blobs/"): _, _ = w.Write([]byte(tc.blobData)) @@ -366,9 +305,10 @@ func TestDockerExtractor_Extract(t *testing.T) { assert.NoError(t, err) // setup cache - s, tmpDir, err := setupCache() - assert.NoError(t, err) - defer os.RemoveAll(tmpDir) + tempCacheDir, _ := ioutil.TempDir("", "TestDockerExtractor_Extract-*") + defer func() { + _ = os.RemoveAll(tempCacheDir) + }() de := Extractor{ Option: types.DockerOption{ @@ -378,7 +318,7 @@ func TestDockerExtractor_Extract(t *testing.T) { Timeout: time.Second * 1000, }, Client: c, - cache: s, + cache: cache.Initialize(tempCacheDir), } tsURL := strings.TrimPrefix(ts.URL, "http://") @@ -402,6 +342,132 @@ func TestDockerExtractor_Extract(t *testing.T) { } } +func TestDocker_ExtractLayerWorker(t *testing.T) { + goodCacheContents, _ := ioutil.ReadFile("testdata/testdir.tar.gz") + goodReturnedTarContent, _ := ioutil.ReadFile("testdata/goodTarContentAll.golden") + badTarCacheData, _ := ioutil.ReadFile("testdata/invalidgzvalidtar.tar.gz") + + testCases := []struct { + name string + cacheHit bool + garbageCache bool + requiredFiles []string + expectedCacheContents []byte + expectedReturnedTarContent []byte + expectedError string + }{ + { + name: "happy path with cache miss and write back", + cacheHit: false, + requiredFiles: []string{"testdir/helloworld.txt", "testdir/badworld.txt"}, + expectedCacheContents: goodCacheContents, + expectedReturnedTarContent: goodReturnedTarContent, + }, + { + name: "happy path with cache hit with garbage cache", + cacheHit: true, + garbageCache: true, + requiredFiles: []string{"testdir/helloworld.txt", "testdir/badworld.txt"}, + expectedCacheContents: badTarCacheData, + expectedError: "invalid gzip: gzip: invalid header", + }, + { + name: "happy path with cache hit", + cacheHit: true, + expectedCacheContents: goodCacheContents, + expectedReturnedTarContent: goodReturnedTarContent, + }, + } + + for _, tc := range testCases { + inputDigest := digest.Digest("sha256:62d8908bee94c202b2d35224a221aaa2058318bfa9879fa541efaecba272331b") + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + httpPath := r.URL.String() + switch { + case strings.Contains(httpPath, "/v2/library/fooimage/blobs/sha256:62d8908bee94c202b2d35224a221aaa2058318bfa9879fa541efaecba272331b"): + layerData, _ := ioutil.ReadFile("testdata/testdir.tar.gz") + _, _ = w.Write(layerData) + default: + assert.FailNow(t, "unexpected path accessed: ", fmt.Sprintf("%s %s", r.URL.String(), tc.name)) + } + })) + defer ts.Close() + + c, err := client.NewClientWithOpts(client.WithHost(ts.URL)) + assert.NoError(t, err) + + // setup cache + tmpDir, _ := ioutil.TempDir("", "TestDocker_ExtractLayerWorker-*") + s := cache.Initialize(tmpDir) + defer os.RemoveAll(tmpDir) + + if tc.cacheHit { + switch tc.garbageCache { + case true: + garbage, _ := os.Open("testdata/invalidgzvalidtar.tar.gz") + defer garbage.Close() + r, err := s.Set(string(inputDigest), garbage) + assert.NoError(t, err) + _, _ = ioutil.ReadAll(r) // trigger the write + default: + goodTar, _ := os.Open("testdata/testdir.tar.gz") + defer goodTar.Close() + r, err := s.Set(string(inputDigest), goodTar) + assert.NoError(t, err) + _, _ = ioutil.ReadAll(r) // trigger the write + } + } + + de := Extractor{ + Option: types.DockerOption{ + AuthURL: ts.URL, + NonSSL: true, + SkipPing: true, + Timeout: time.Second * 1000, + }, + Client: c, + cache: s, + } + + tsUrl := strings.TrimPrefix(ts.URL, "http://") + inputImage := registry.Image{ + Domain: tsUrl, + Path: "library/fooimage", + Tag: "latest", + } + + layerCh := make(chan layer) + errCh := make(chan error) + r, err := de.createRegistryClient(context.TODO(), inputImage.Domain) + go func() { + de.extractLayerWorker(inputDigest, r, context.TODO(), inputImage, errCh, layerCh) + }() + + var errRecieved error + var layerReceived layer + + select { + case errRecieved = <-errCh: + if tc.garbageCache { + assert.Equal(t, tc.expectedError, errRecieved.Error(), tc.name) + } else { + assert.FailNow(t, "unexpected error received, err: ", fmt.Sprintf("%s, %s", errRecieved, tc.name)) + } + case layerReceived = <-layerCh: + assert.Equal(t, inputDigest, layerReceived.ID, tc.name) + got, _ := ioutil.ReadAll(layerReceived.Content) + assert.Equal(t, tc.expectedReturnedTarContent, got, tc.name) + } + + // check cache contents + foundReader := s.Get(string(inputDigest)) + actualContents, err := ioutil.ReadAll(foundReader) + assert.NoError(t, err) + assert.Equal(t, tc.expectedCacheContents, actualContents, tc.name) + } +} + func TestDocker_ExtractLayerFiles(t *testing.T) { de := Extractor{} @@ -421,7 +487,7 @@ func TestDocker_ExtractLayerFiles(t *testing.T) { filesInLayers := map[string]extractor.FileMap{} opqInLayers := map[string]extractor.OPQDirs{} - err := de.extractLayerFiles(context.TODO(), layerCh, errCh, filesInLayers, opqInLayers, inputFilenames) + err := de.extractLayerFiles(context.TODO(), layerCh, errCh, inputFilenames, filesInLayers, opqInLayers) assert.NoError(t, err) assert.Equal(t, map[string]extractor.FileMap{ "sha256:62d8908bee94c202b2d35224a221aaa2058318bfa9879fa541efaecba272331b": { diff --git a/extractor/docker/testdata/badworld.tar.gz b/extractor/docker/testdata/badworld.tar.gz deleted file mode 100644 index 30b74c720e..0000000000 Binary files a/extractor/docker/testdata/badworld.tar.gz and /dev/null differ diff --git a/extractor/docker/testdata/goodTarContent.golden b/extractor/docker/testdata/goodTarContent.golden new file mode 100644 index 0000000000..48e856294d Binary files /dev/null and b/extractor/docker/testdata/goodTarContent.golden differ diff --git a/utils/testdata/testdir.tar b/extractor/docker/testdata/goodTarContentAll.golden similarity index 100% rename from utils/testdata/testdir.tar rename to extractor/docker/testdata/goodTarContentAll.golden diff --git a/extractor/docker/testdata/helloworld.tar.gz b/extractor/docker/testdata/helloworld.tar.gz deleted file mode 100644 index 6a5cfe1441..0000000000 Binary files a/extractor/docker/testdata/helloworld.tar.gz and /dev/null differ diff --git a/extractor/docker/testdata/testdir.tar.gz b/extractor/docker/testdata/testdir.tar.gz new file mode 100644 index 0000000000..d6ae289aba Binary files /dev/null and b/extractor/docker/testdata/testdir.tar.gz differ diff --git a/extractor/docker/testdata/testdir.tar.zstd b/extractor/docker/testdata/testdir.tar.zstd new file mode 100644 index 0000000000..092b4b81b3 Binary files /dev/null and b/extractor/docker/testdata/testdir.tar.zstd differ diff --git a/go.mod b/go.mod index e9513cb16d..1b662bd7a4 100644 --- a/go.mod +++ b/go.mod @@ -12,16 +12,16 @@ require ( github.com/docker/docker v0.0.0-20180924202107-a9c061deec0f github.com/docker/go-connections v0.4.0 // indirect github.com/genuinetools/reg v0.16.0 - github.com/klauspost/compress v1.9.3 github.com/knqyf263/go-deb-version v0.0.0-20190517075300-09fca494f03d github.com/knqyf263/go-rpmdb v0.0.0-20190501070121-10a1c42a10dc github.com/knqyf263/nested v0.0.1 github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2 github.com/pkg/errors v0.8.1 - github.com/simar7/gokv v0.3.3-0.20191216080237-ab4446a6841b github.com/stretchr/testify v1.4.0 golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5 + golang.org/x/net v0.0.0-20191108221443-4ba9e2ef068c // indirect + golang.org/x/sys v0.0.0-20191105231009-c1f44814a5cd // indirect golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 ) diff --git a/go.sum b/go.sum index f0b659f445..2dc5a93aa8 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,6 @@ github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWX github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/alicebob/gopher-json v0.0.0-20180125190556-5a6b3ba71ee6/go.mod h1:SGnFV6hVsYE877CKEZ6tDNTjaSXYUk6QqoIK6PrAtcc= -github.com/alicebob/miniredis/v2 v2.11.0/go.mod h1:UA48pmi7aSazcGAvcdKcBB49z521IC9VjTTRz2nIaJE= github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/aquasecurity/go-dep-parser v0.0.0-20190819075924-ea223f0ef24b h1:55Ulc/gvfWm4ylhVaR7MxOwujRjA6et7KhmUbSgUFf4= github.com/aquasecurity/go-dep-parser v0.0.0-20190819075924-ea223f0ef24b/go.mod h1:BpNTD9vHfrejKsED9rx04ldM1WIbeyXGYxUrqTVwxVQ= @@ -29,9 +27,6 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24 github.com/beorn7/perks v1.0.0 h1:HWo1m869IqiPhD389kmkxeTalrjNbbJTC8LXupb+sl0= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/containerd/continuity v0.0.0-20190426062206-aaeac12a7ffc h1:TP+534wVlf61smEIq1nwLLAjQVEK2EADoW3CX9AuT+8= github.com/containerd/continuity v0.0.0-20190426062206-aaeac12a7ffc/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= @@ -60,8 +55,6 @@ github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7 h1:UhxFibDNY/bfvqU5CAUmr9zpesgbU6SWc8/B4mflAE4= github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE= -github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= -github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= @@ -86,8 +79,6 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/gomodule/redigo v1.7.1-0.20190322064113-39e2c31b7ca3/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= -github.com/gomodule/redigo v2.0.0+incompatible/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -110,8 +101,6 @@ github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1 github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.9.3 h1:hkFELABwacUEgBfiguNeQydKv3M9pawBq8o24Ypw9+M= -github.com/klauspost/compress v1.9.3/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/knqyf263/berkeleydb v0.0.0-20190501065933-fafe01fb9662 h1:UGS0RbPHwXJkq8tcba8OD0nvVUWLf2h7uUJznuHPPB0= github.com/knqyf263/berkeleydb v0.0.0-20190501065933-fafe01fb9662/go.mod h1:bu1CcN4tUtoRcI/B/RFHhxMNKFHVq/c3SV+UTyduoXg= github.com/knqyf263/go-deb-version v0.0.0-20190517075300-09fca494f03d h1:X4cedH4Kn3JPupAwwWuo4AzYp16P0OyLO9d7OnMZc/c= @@ -174,8 +163,6 @@ github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7z github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/shurcooL/httpfs v0.0.0-20181222201310-74dc9339e414/go.mod h1:ZY1cvUeJuFPAdZ/B6v7RHavJWZn2YPVFQ1OSXhCGOkg= -github.com/simar7/gokv v0.3.3-0.20191216080237-ab4446a6841b h1:2qPkc0Hnrd6IhlxioXb0RG/NBmIPx2b5y9Z1IKriuxQ= -github.com/simar7/gokv v0.3.3-0.20191216080237-ab4446a6841b/go.mod h1:jXjPspRkuCDCRTRBgfGsfXvW8ofOGh3Y+tjZvoFr7XU= github.com/sirupsen/logrus v1.2.0 h1:juTguoYk5qI21pwyTXY3B3Y5cOTH3ZUyZCg1v/mihuo= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.1 h1:GL2rEmy6nsikmW0r8opw9JIRScdMF5hA8cOYLH7In1k= @@ -191,9 +178,6 @@ github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJy github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/tomoyamachi/reg v0.16.1-0.20190706172545-2a2250fd7c00 h1:0e4vRd9YqnQBIAIAE39jLKDWffRfJWxloyWwcaMAQho= github.com/tomoyamachi/reg v0.16.1-0.20190706172545-2a2250fd7c00/go.mod h1:RQE7h2jyIxekQZ24/wad0c9RGP+KSq4XzHh7h83ALi8= -github.com/yuin/gopher-lua v0.0.0-20190206043414-8bfc7677f583/go.mod h1:gqRgreBUhTSL0GeU64rtZ3Uq3wtjOa/TB2YfrtkCbVQ= -go.etcd.io/bbolt v1.3.3 h1:MUGmc65QhB3pIlaQ5bB4LwqSj6GIonVJXpZiaKNyaKk= -go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -232,10 +216,10 @@ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190204203706-41f3e6584952/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e h1:nFYrTHrdrAOpShe27kaFHjsqYSEQ0KWqdWLu3xuZJts= golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190506115046-ca7f33d4116e h1:bq5BY1tGuaK8HxuwN6pT6kWgTVLeJ5KwuyBpsl1CZL4= golang.org/x/sys v0.0.0-20190506115046-ca7f33d4116e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191105231009-c1f44814a5cd h1:3x5uuvBgE6oaXJjCOvpCC1IpgJogqQ+PqGGU3ZxAgII= golang.org/x/sys v0.0.0-20191105231009-c1f44814a5cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=