diff options
Diffstat (limited to 'pkg/filecollector')
-rw-r--r-- | pkg/filecollector/file_collector.go | 210 | ||||
-rw-r--r-- | pkg/filecollector/file_collector_test.go | 172 |
2 files changed, 382 insertions, 0 deletions
diff --git a/pkg/filecollector/file_collector.go b/pkg/filecollector/file_collector.go new file mode 100644 index 0000000..8547bb7 --- /dev/null +++ b/pkg/filecollector/file_collector.go @@ -0,0 +1,210 @@ +package filecollector + +import ( + "archive/tar" + "context" + "fmt" + "io" + "io/fs" + "os" + "path" + "path/filepath" + "strings" + + git "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/format/gitignore" + "github.com/go-git/go-git/v5/plumbing/format/index" +) + +type Handler interface { + WriteFile(path string, fi fs.FileInfo, linkName string, f io.Reader) error +} + +type TarCollector struct { + TarWriter *tar.Writer + UID int + GID int + DstDir string +} + +func (tc TarCollector) WriteFile(fpath string, fi fs.FileInfo, linkName string, f io.Reader) error { + // create a new dir/file header + header, err := tar.FileInfoHeader(fi, linkName) + if err != nil { + return err + } + + // update the name to correctly reflect the desired destination when untaring + header.Name = path.Join(tc.DstDir, fpath) + header.Mode = int64(fi.Mode()) + header.ModTime = fi.ModTime() + header.Uid = tc.UID + header.Gid = tc.GID + + // write the header + if err := tc.TarWriter.WriteHeader(header); err != nil { + return err + } + + // this is a symlink no reader provided + if f == nil { + return nil + } + + // copy file data into tar writer + if _, err := io.Copy(tc.TarWriter, f); err != nil { + return err + } + return nil +} + +type CopyCollector struct { + DstDir string +} + +func (cc *CopyCollector) WriteFile(fpath string, fi fs.FileInfo, linkName string, f io.Reader) error { + fdestpath := filepath.Join(cc.DstDir, fpath) + if err := os.MkdirAll(filepath.Dir(fdestpath), 0o777); err != nil { + return err + } + if f == nil { + return os.Symlink(linkName, fdestpath) + } + df, err := os.OpenFile(fdestpath, os.O_CREATE|os.O_WRONLY, fi.Mode()) + if err != nil { + return err + } + defer df.Close() + if _, err := io.Copy(df, f); err != nil { + return err + } + return nil +} + +type FileCollector struct { + Ignorer gitignore.Matcher + SrcPath string + SrcPrefix string + Fs Fs + Handler Handler +} + +type Fs interface { + Walk(root string, fn filepath.WalkFunc) error + OpenGitIndex(path string) (*index.Index, error) + Open(path string) (io.ReadCloser, error) + Readlink(path string) (string, error) +} + +type DefaultFs struct { +} + +func (*DefaultFs) Walk(root string, fn filepath.WalkFunc) error { + return filepath.Walk(root, fn) +} + +func (*DefaultFs) OpenGitIndex(path string) (*index.Index, error) { + r, err := git.PlainOpen(path) + if err != nil { + return nil, err + } + i, err := r.Storer.Index() + if err != nil { + return nil, err + } + return i, nil +} + +func (*DefaultFs) Open(path string) (io.ReadCloser, error) { + return os.Open(path) +} + +func (*DefaultFs) Readlink(path string) (string, error) { + return os.Readlink(path) +} + +//nolint:gocyclo +func (fc *FileCollector) CollectFiles(ctx context.Context, submodulePath []string) filepath.WalkFunc { + i, _ := fc.Fs.OpenGitIndex(path.Join(fc.SrcPath, path.Join(submodulePath...))) + return func(file string, fi os.FileInfo, err error) error { + if err != nil { + return err + } + if ctx != nil { + select { + case <-ctx.Done(): + return fmt.Errorf("copy cancelled") + default: + } + } + + sansPrefix := strings.TrimPrefix(file, fc.SrcPrefix) + split := strings.Split(sansPrefix, string(filepath.Separator)) + // The root folders should be skipped, submodules only have the last path component set to "." by filepath.Walk + if fi.IsDir() && len(split) > 0 && split[len(split)-1] == "." { + return nil + } + var entry *index.Entry + if i != nil { + entry, err = i.Entry(strings.Join(split[len(submodulePath):], "/")) + } else { + err = index.ErrEntryNotFound + } + if err != nil && fc.Ignorer != nil && fc.Ignorer.Match(split, fi.IsDir()) { + if fi.IsDir() { + if i != nil { + ms, err := i.Glob(strings.Join(append(split[len(submodulePath):], "**"), "/")) + if err != nil || len(ms) == 0 { + return filepath.SkipDir + } + } else { + return filepath.SkipDir + } + } else { + return nil + } + } + if err == nil && entry.Mode == filemode.Submodule { + err = fc.Fs.Walk(file, fc.CollectFiles(ctx, split)) + if err != nil { + return err + } + return filepath.SkipDir + } + path := filepath.ToSlash(sansPrefix) + + // return on non-regular files (thanks to [kumo](https://medium.com/@komuw/just-like-you-did-fbdd7df829d3) for this suggested update) + if fi.Mode()&os.ModeSymlink == os.ModeSymlink { + linkName, err := fc.Fs.Readlink(file) + if err != nil { + return fmt.Errorf("unable to readlink '%s': %w", file, err) + } + return fc.Handler.WriteFile(path, fi, linkName, nil) + } else if !fi.Mode().IsRegular() { + return nil + } + + // open file + f, err := fc.Fs.Open(file) + if err != nil { + return err + } + defer f.Close() + + if ctx != nil { + // make io.Copy cancellable by closing the file + cpctx, cpfinish := context.WithCancel(ctx) + defer cpfinish() + go func() { + select { + case <-cpctx.Done(): + case <-ctx.Done(): + f.Close() + } + }() + } + + return fc.Handler.WriteFile(path, fi, "", f) + } +} diff --git a/pkg/filecollector/file_collector_test.go b/pkg/filecollector/file_collector_test.go new file mode 100644 index 0000000..60a8d4d --- /dev/null +++ b/pkg/filecollector/file_collector_test.go @@ -0,0 +1,172 @@ +package filecollector + +import ( + "archive/tar" + "context" + "io" + "path/filepath" + "strings" + "testing" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-billy/v5/memfs" + git "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/cache" + "github.com/go-git/go-git/v5/plumbing/format/gitignore" + "github.com/go-git/go-git/v5/plumbing/format/index" + "github.com/go-git/go-git/v5/storage/filesystem" + "github.com/stretchr/testify/assert" +) + +type memoryFs struct { + billy.Filesystem +} + +func (mfs *memoryFs) walk(root string, fn filepath.WalkFunc) error { + dir, err := mfs.ReadDir(root) + if err != nil { + return err + } + for i := 0; i < len(dir); i++ { + filename := filepath.Join(root, dir[i].Name()) + err = fn(filename, dir[i], nil) + if dir[i].IsDir() { + if err == filepath.SkipDir { + err = nil + } else if err := mfs.walk(filename, fn); err != nil { + return err + } + } + if err != nil { + return err + } + } + return nil +} + +func (mfs *memoryFs) Walk(root string, fn filepath.WalkFunc) error { + stat, err := mfs.Lstat(root) + if err != nil { + return err + } + err = fn(strings.Join([]string{root, "."}, string(filepath.Separator)), stat, nil) + if err != nil { + return err + } + return mfs.walk(root, fn) +} + +func (mfs *memoryFs) OpenGitIndex(path string) (*index.Index, error) { + f, _ := mfs.Filesystem.Chroot(filepath.Join(path, ".git")) + storage := filesystem.NewStorage(f, cache.NewObjectLRUDefault()) + i, err := storage.Index() + if err != nil { + return nil, err + } + return i, nil +} + +func (mfs *memoryFs) Open(path string) (io.ReadCloser, error) { + return mfs.Filesystem.Open(path) +} + +func (mfs *memoryFs) Readlink(path string) (string, error) { + return mfs.Filesystem.Readlink(path) +} + +func TestIgnoredTrackedfile(t *testing.T) { + fs := memfs.New() + _ = fs.MkdirAll("mygitrepo/.git", 0o777) + dotgit, _ := fs.Chroot("mygitrepo/.git") + worktree, _ := fs.Chroot("mygitrepo") + repo, _ := git.Init(filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()), worktree) + f, _ := worktree.Create(".gitignore") + _, _ = f.Write([]byte(".*\n")) + f.Close() + // This file shouldn't be in the tar + f, _ = worktree.Create(".env") + _, _ = f.Write([]byte("test=val1\n")) + f.Close() + w, _ := repo.Worktree() + // .gitignore is in the tar after adding it to the index + _, _ = w.Add(".gitignore") + + tmpTar, _ := fs.Create("temp.tar") + tw := tar.NewWriter(tmpTar) + ps, _ := gitignore.ReadPatterns(worktree, []string{}) + ignorer := gitignore.NewMatcher(ps) + fc := &FileCollector{ + Fs: &memoryFs{Filesystem: fs}, + Ignorer: ignorer, + SrcPath: "mygitrepo", + SrcPrefix: "mygitrepo" + string(filepath.Separator), + Handler: &TarCollector{ + TarWriter: tw, + }, + } + err := fc.Fs.Walk("mygitrepo", fc.CollectFiles(context.Background(), []string{})) + assert.NoError(t, err, "successfully collect files") + tw.Close() + _, _ = tmpTar.Seek(0, io.SeekStart) + tr := tar.NewReader(tmpTar) + h, err := tr.Next() + assert.NoError(t, err, "tar must not be empty") + assert.Equal(t, ".gitignore", h.Name) + _, err = tr.Next() + assert.ErrorIs(t, err, io.EOF, "tar must only contain one element") +} + +func TestSymlinks(t *testing.T) { + fs := memfs.New() + _ = fs.MkdirAll("mygitrepo/.git", 0o777) + dotgit, _ := fs.Chroot("mygitrepo/.git") + worktree, _ := fs.Chroot("mygitrepo") + repo, _ := git.Init(filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()), worktree) + // This file shouldn't be in the tar + f, err := worktree.Create(".env") + assert.NoError(t, err) + _, err = f.Write([]byte("test=val1\n")) + assert.NoError(t, err) + f.Close() + err = worktree.Symlink(".env", "test.env") + assert.NoError(t, err) + + w, err := repo.Worktree() + assert.NoError(t, err) + + // .gitignore is in the tar after adding it to the index + _, err = w.Add(".env") + assert.NoError(t, err) + _, err = w.Add("test.env") + assert.NoError(t, err) + + tmpTar, _ := fs.Create("temp.tar") + tw := tar.NewWriter(tmpTar) + ps, _ := gitignore.ReadPatterns(worktree, []string{}) + ignorer := gitignore.NewMatcher(ps) + fc := &FileCollector{ + Fs: &memoryFs{Filesystem: fs}, + Ignorer: ignorer, + SrcPath: "mygitrepo", + SrcPrefix: "mygitrepo" + string(filepath.Separator), + Handler: &TarCollector{ + TarWriter: tw, + }, + } + err = fc.Fs.Walk("mygitrepo", fc.CollectFiles(context.Background(), []string{})) + assert.NoError(t, err, "successfully collect files") + tw.Close() + _, _ = tmpTar.Seek(0, io.SeekStart) + tr := tar.NewReader(tmpTar) + h, err := tr.Next() + files := map[string]tar.Header{} + for err == nil { + files[h.Name] = *h + h, err = tr.Next() + } + + assert.Equal(t, ".env", files[".env"].Name) + assert.Equal(t, "test.env", files["test.env"].Name) + assert.Equal(t, ".env", files["test.env"].Linkname) + assert.ErrorIs(t, err, io.EOF, "tar must be read cleanly to EOF") +} |