summaryrefslogtreecommitdiffstats
path: root/pkg/filecollector
diff options
context:
space:
mode:
authorDaniel Baumann <daniel@debian.org>2024-10-20 23:07:42 +0200
committerDaniel Baumann <daniel@debian.org>2024-11-09 15:38:42 +0100
commit714c83b2736d7e308bc33c49057952490eb98be2 (patch)
tree1d9ba7035798368569cd49056f4d596efc908cd8 /pkg/filecollector
parentInitial commit. (diff)
downloadforgejo-act-714c83b2736d7e308bc33c49057952490eb98be2.tar.xz
forgejo-act-714c83b2736d7e308bc33c49057952490eb98be2.zip
Adding upstream version 1.21.4.HEADupstream/1.21.4upstreamdebian
Signed-off-by: Daniel Baumann <daniel@debian.org>
Diffstat (limited to 'pkg/filecollector')
-rw-r--r--pkg/filecollector/file_collector.go210
-rw-r--r--pkg/filecollector/file_collector_test.go172
2 files changed, 382 insertions, 0 deletions
diff --git a/pkg/filecollector/file_collector.go b/pkg/filecollector/file_collector.go
new file mode 100644
index 0000000..8547bb7
--- /dev/null
+++ b/pkg/filecollector/file_collector.go
@@ -0,0 +1,210 @@
+package filecollector
+
+import (
+ "archive/tar"
+ "context"
+ "fmt"
+ "io"
+ "io/fs"
+ "os"
+ "path"
+ "path/filepath"
+ "strings"
+
+ git "github.com/go-git/go-git/v5"
+ "github.com/go-git/go-git/v5/plumbing/filemode"
+ "github.com/go-git/go-git/v5/plumbing/format/gitignore"
+ "github.com/go-git/go-git/v5/plumbing/format/index"
+)
+
+type Handler interface {
+ WriteFile(path string, fi fs.FileInfo, linkName string, f io.Reader) error
+}
+
+type TarCollector struct {
+ TarWriter *tar.Writer
+ UID int
+ GID int
+ DstDir string
+}
+
+func (tc TarCollector) WriteFile(fpath string, fi fs.FileInfo, linkName string, f io.Reader) error {
+ // create a new dir/file header
+ header, err := tar.FileInfoHeader(fi, linkName)
+ if err != nil {
+ return err
+ }
+
+ // update the name to correctly reflect the desired destination when untaring
+ header.Name = path.Join(tc.DstDir, fpath)
+ header.Mode = int64(fi.Mode())
+ header.ModTime = fi.ModTime()
+ header.Uid = tc.UID
+ header.Gid = tc.GID
+
+ // write the header
+ if err := tc.TarWriter.WriteHeader(header); err != nil {
+ return err
+ }
+
+ // this is a symlink no reader provided
+ if f == nil {
+ return nil
+ }
+
+ // copy file data into tar writer
+ if _, err := io.Copy(tc.TarWriter, f); err != nil {
+ return err
+ }
+ return nil
+}
+
+type CopyCollector struct {
+ DstDir string
+}
+
+func (cc *CopyCollector) WriteFile(fpath string, fi fs.FileInfo, linkName string, f io.Reader) error {
+ fdestpath := filepath.Join(cc.DstDir, fpath)
+ if err := os.MkdirAll(filepath.Dir(fdestpath), 0o777); err != nil {
+ return err
+ }
+ if f == nil {
+ return os.Symlink(linkName, fdestpath)
+ }
+ df, err := os.OpenFile(fdestpath, os.O_CREATE|os.O_WRONLY, fi.Mode())
+ if err != nil {
+ return err
+ }
+ defer df.Close()
+ if _, err := io.Copy(df, f); err != nil {
+ return err
+ }
+ return nil
+}
+
+type FileCollector struct {
+ Ignorer gitignore.Matcher
+ SrcPath string
+ SrcPrefix string
+ Fs Fs
+ Handler Handler
+}
+
+type Fs interface {
+ Walk(root string, fn filepath.WalkFunc) error
+ OpenGitIndex(path string) (*index.Index, error)
+ Open(path string) (io.ReadCloser, error)
+ Readlink(path string) (string, error)
+}
+
+type DefaultFs struct {
+}
+
+func (*DefaultFs) Walk(root string, fn filepath.WalkFunc) error {
+ return filepath.Walk(root, fn)
+}
+
+func (*DefaultFs) OpenGitIndex(path string) (*index.Index, error) {
+ r, err := git.PlainOpen(path)
+ if err != nil {
+ return nil, err
+ }
+ i, err := r.Storer.Index()
+ if err != nil {
+ return nil, err
+ }
+ return i, nil
+}
+
+func (*DefaultFs) Open(path string) (io.ReadCloser, error) {
+ return os.Open(path)
+}
+
+func (*DefaultFs) Readlink(path string) (string, error) {
+ return os.Readlink(path)
+}
+
+//nolint:gocyclo
+func (fc *FileCollector) CollectFiles(ctx context.Context, submodulePath []string) filepath.WalkFunc {
+ i, _ := fc.Fs.OpenGitIndex(path.Join(fc.SrcPath, path.Join(submodulePath...)))
+ return func(file string, fi os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+ if ctx != nil {
+ select {
+ case <-ctx.Done():
+ return fmt.Errorf("copy cancelled")
+ default:
+ }
+ }
+
+ sansPrefix := strings.TrimPrefix(file, fc.SrcPrefix)
+ split := strings.Split(sansPrefix, string(filepath.Separator))
+ // The root folders should be skipped, submodules only have the last path component set to "." by filepath.Walk
+ if fi.IsDir() && len(split) > 0 && split[len(split)-1] == "." {
+ return nil
+ }
+ var entry *index.Entry
+ if i != nil {
+ entry, err = i.Entry(strings.Join(split[len(submodulePath):], "/"))
+ } else {
+ err = index.ErrEntryNotFound
+ }
+ if err != nil && fc.Ignorer != nil && fc.Ignorer.Match(split, fi.IsDir()) {
+ if fi.IsDir() {
+ if i != nil {
+ ms, err := i.Glob(strings.Join(append(split[len(submodulePath):], "**"), "/"))
+ if err != nil || len(ms) == 0 {
+ return filepath.SkipDir
+ }
+ } else {
+ return filepath.SkipDir
+ }
+ } else {
+ return nil
+ }
+ }
+ if err == nil && entry.Mode == filemode.Submodule {
+ err = fc.Fs.Walk(file, fc.CollectFiles(ctx, split))
+ if err != nil {
+ return err
+ }
+ return filepath.SkipDir
+ }
+ path := filepath.ToSlash(sansPrefix)
+
+ // return on non-regular files (thanks to [kumo](https://medium.com/@komuw/just-like-you-did-fbdd7df829d3) for this suggested update)
+ if fi.Mode()&os.ModeSymlink == os.ModeSymlink {
+ linkName, err := fc.Fs.Readlink(file)
+ if err != nil {
+ return fmt.Errorf("unable to readlink '%s': %w", file, err)
+ }
+ return fc.Handler.WriteFile(path, fi, linkName, nil)
+ } else if !fi.Mode().IsRegular() {
+ return nil
+ }
+
+ // open file
+ f, err := fc.Fs.Open(file)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ if ctx != nil {
+ // make io.Copy cancellable by closing the file
+ cpctx, cpfinish := context.WithCancel(ctx)
+ defer cpfinish()
+ go func() {
+ select {
+ case <-cpctx.Done():
+ case <-ctx.Done():
+ f.Close()
+ }
+ }()
+ }
+
+ return fc.Handler.WriteFile(path, fi, "", f)
+ }
+}
diff --git a/pkg/filecollector/file_collector_test.go b/pkg/filecollector/file_collector_test.go
new file mode 100644
index 0000000..60a8d4d
--- /dev/null
+++ b/pkg/filecollector/file_collector_test.go
@@ -0,0 +1,172 @@
+package filecollector
+
+import (
+ "archive/tar"
+ "context"
+ "io"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/go-git/go-billy/v5"
+ "github.com/go-git/go-billy/v5/memfs"
+ git "github.com/go-git/go-git/v5"
+ "github.com/go-git/go-git/v5/plumbing/cache"
+ "github.com/go-git/go-git/v5/plumbing/format/gitignore"
+ "github.com/go-git/go-git/v5/plumbing/format/index"
+ "github.com/go-git/go-git/v5/storage/filesystem"
+ "github.com/stretchr/testify/assert"
+)
+
+type memoryFs struct {
+ billy.Filesystem
+}
+
+func (mfs *memoryFs) walk(root string, fn filepath.WalkFunc) error {
+ dir, err := mfs.ReadDir(root)
+ if err != nil {
+ return err
+ }
+ for i := 0; i < len(dir); i++ {
+ filename := filepath.Join(root, dir[i].Name())
+ err = fn(filename, dir[i], nil)
+ if dir[i].IsDir() {
+ if err == filepath.SkipDir {
+ err = nil
+ } else if err := mfs.walk(filename, fn); err != nil {
+ return err
+ }
+ }
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (mfs *memoryFs) Walk(root string, fn filepath.WalkFunc) error {
+ stat, err := mfs.Lstat(root)
+ if err != nil {
+ return err
+ }
+ err = fn(strings.Join([]string{root, "."}, string(filepath.Separator)), stat, nil)
+ if err != nil {
+ return err
+ }
+ return mfs.walk(root, fn)
+}
+
+func (mfs *memoryFs) OpenGitIndex(path string) (*index.Index, error) {
+ f, _ := mfs.Filesystem.Chroot(filepath.Join(path, ".git"))
+ storage := filesystem.NewStorage(f, cache.NewObjectLRUDefault())
+ i, err := storage.Index()
+ if err != nil {
+ return nil, err
+ }
+ return i, nil
+}
+
+func (mfs *memoryFs) Open(path string) (io.ReadCloser, error) {
+ return mfs.Filesystem.Open(path)
+}
+
+func (mfs *memoryFs) Readlink(path string) (string, error) {
+ return mfs.Filesystem.Readlink(path)
+}
+
+func TestIgnoredTrackedfile(t *testing.T) {
+ fs := memfs.New()
+ _ = fs.MkdirAll("mygitrepo/.git", 0o777)
+ dotgit, _ := fs.Chroot("mygitrepo/.git")
+ worktree, _ := fs.Chroot("mygitrepo")
+ repo, _ := git.Init(filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()), worktree)
+ f, _ := worktree.Create(".gitignore")
+ _, _ = f.Write([]byte(".*\n"))
+ f.Close()
+ // This file shouldn't be in the tar
+ f, _ = worktree.Create(".env")
+ _, _ = f.Write([]byte("test=val1\n"))
+ f.Close()
+ w, _ := repo.Worktree()
+ // .gitignore is in the tar after adding it to the index
+ _, _ = w.Add(".gitignore")
+
+ tmpTar, _ := fs.Create("temp.tar")
+ tw := tar.NewWriter(tmpTar)
+ ps, _ := gitignore.ReadPatterns(worktree, []string{})
+ ignorer := gitignore.NewMatcher(ps)
+ fc := &FileCollector{
+ Fs: &memoryFs{Filesystem: fs},
+ Ignorer: ignorer,
+ SrcPath: "mygitrepo",
+ SrcPrefix: "mygitrepo" + string(filepath.Separator),
+ Handler: &TarCollector{
+ TarWriter: tw,
+ },
+ }
+ err := fc.Fs.Walk("mygitrepo", fc.CollectFiles(context.Background(), []string{}))
+ assert.NoError(t, err, "successfully collect files")
+ tw.Close()
+ _, _ = tmpTar.Seek(0, io.SeekStart)
+ tr := tar.NewReader(tmpTar)
+ h, err := tr.Next()
+ assert.NoError(t, err, "tar must not be empty")
+ assert.Equal(t, ".gitignore", h.Name)
+ _, err = tr.Next()
+ assert.ErrorIs(t, err, io.EOF, "tar must only contain one element")
+}
+
+func TestSymlinks(t *testing.T) {
+ fs := memfs.New()
+ _ = fs.MkdirAll("mygitrepo/.git", 0o777)
+ dotgit, _ := fs.Chroot("mygitrepo/.git")
+ worktree, _ := fs.Chroot("mygitrepo")
+ repo, _ := git.Init(filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()), worktree)
+ // This file shouldn't be in the tar
+ f, err := worktree.Create(".env")
+ assert.NoError(t, err)
+ _, err = f.Write([]byte("test=val1\n"))
+ assert.NoError(t, err)
+ f.Close()
+ err = worktree.Symlink(".env", "test.env")
+ assert.NoError(t, err)
+
+ w, err := repo.Worktree()
+ assert.NoError(t, err)
+
+ // .gitignore is in the tar after adding it to the index
+ _, err = w.Add(".env")
+ assert.NoError(t, err)
+ _, err = w.Add("test.env")
+ assert.NoError(t, err)
+
+ tmpTar, _ := fs.Create("temp.tar")
+ tw := tar.NewWriter(tmpTar)
+ ps, _ := gitignore.ReadPatterns(worktree, []string{})
+ ignorer := gitignore.NewMatcher(ps)
+ fc := &FileCollector{
+ Fs: &memoryFs{Filesystem: fs},
+ Ignorer: ignorer,
+ SrcPath: "mygitrepo",
+ SrcPrefix: "mygitrepo" + string(filepath.Separator),
+ Handler: &TarCollector{
+ TarWriter: tw,
+ },
+ }
+ err = fc.Fs.Walk("mygitrepo", fc.CollectFiles(context.Background(), []string{}))
+ assert.NoError(t, err, "successfully collect files")
+ tw.Close()
+ _, _ = tmpTar.Seek(0, io.SeekStart)
+ tr := tar.NewReader(tmpTar)
+ h, err := tr.Next()
+ files := map[string]tar.Header{}
+ for err == nil {
+ files[h.Name] = *h
+ h, err = tr.Next()
+ }
+
+ assert.Equal(t, ".env", files[".env"].Name)
+ assert.Equal(t, "test.env", files["test.env"].Name)
+ assert.Equal(t, ".env", files["test.env"].Linkname)
+ assert.ErrorIs(t, err, io.EOF, "tar must be read cleanly to EOF")
+}