diff options
Diffstat (limited to 'modules/git/pipeline/lfs.go')
-rw-r--r-- | modules/git/pipeline/lfs.go | 254 |
1 files changed, 254 insertions, 0 deletions
diff --git a/modules/git/pipeline/lfs.go b/modules/git/pipeline/lfs.go new file mode 100644 index 0000000..3407eb9 --- /dev/null +++ b/modules/git/pipeline/lfs.go @@ -0,0 +1,254 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package pipeline + +import ( + "bufio" + "bytes" + "fmt" + "io" + "sort" + "strings" + "sync" + "time" + + "code.gitea.io/gitea/modules/git" +) + +// LFSResult represents commits found using a provided pointer file hash +type LFSResult struct { + Name string + SHA string + Summary string + When time.Time + ParentHashes []git.ObjectID + BranchName string + FullCommitName string +} + +type lfsResultSlice []*LFSResult + +func (a lfsResultSlice) Len() int { return len(a) } +func (a lfsResultSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a lfsResultSlice) Less(i, j int) bool { return a[j].When.After(a[i].When) } + +func lfsError(msg string, err error) error { + return fmt.Errorf("LFS error occurred, %s: err: %w", msg, err) +} + +// FindLFSFile finds commits that contain a provided pointer file hash +func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, error) { + resultsMap := map[string]*LFSResult{} + results := make([]*LFSResult, 0) + + basePath := repo.Path + + // Use rev-list to provide us with all commits in order + revListReader, revListWriter := io.Pipe() + defer func() { + _ = revListWriter.Close() + _ = revListReader.Close() + }() + + go func() { + stderr := strings.Builder{} + err := git.NewCommand(repo.Ctx, "rev-list", "--all").Run(&git.RunOpts{ + Dir: repo.Path, + Stdout: revListWriter, + Stderr: &stderr, + }) + if err != nil { + _ = revListWriter.CloseWithError(git.ConcatenateError(err, (&stderr).String())) + } else { + _ = revListWriter.Close() + } + }() + + // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. + // so let's create a batch stdin and stdout + batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx) + if err != nil { + return nil, err + } + defer cancel() + + // We'll use a scanner for the revList because it's simpler than a bufio.Reader + scan := bufio.NewScanner(revListReader) + trees := [][]byte{} + paths := []string{} + + fnameBuf := make([]byte, 4096) + modeBuf := make([]byte, 40) + workingShaBuf := make([]byte, objectID.Type().FullLength()/2) + + for scan.Scan() { + // Get the next commit ID + commitID := scan.Bytes() + + // push the commit to the cat-file --batch process + _, err := batchStdinWriter.Write(commitID) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte{'\n'}) + if err != nil { + return nil, err + } + + var curCommit *git.Commit + curPath := "" + + commitReadingLoop: + for { + _, typ, size, err := git.ReadBatchLine(batchReader) + if err != nil { + return nil, err + } + + switch typ { + case "tag": + // This shouldn't happen but if it does well just get the commit and try again + id, err := git.ReadTagObjectID(batchReader, size) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte(id + "\n")) + if err != nil { + return nil, err + } + continue + case "commit": + // Read in the commit to get its tree and in case this is one of the last used commits + curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, size)) + if err != nil { + return nil, err + } + if _, err := batchReader.Discard(1); err != nil { + return nil, err + } + + if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil { + return nil, err + } + curPath = "" + case "tree": + var n int64 + for n < size { + mode, fname, binObjectID, count, err := git.ParseTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf) + if err != nil { + return nil, err + } + n += int64(count) + if bytes.Equal(binObjectID, objectID.RawValue()) { + result := LFSResult{ + Name: curPath + string(fname), + SHA: curCommit.ID.String(), + Summary: strings.Split(strings.TrimSpace(curCommit.CommitMessage), "\n")[0], + When: curCommit.Author.When, + ParentHashes: curCommit.Parents, + } + resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result + } else if string(mode) == git.EntryModeTree.String() { + hexObjectID := make([]byte, objectID.Type().FullLength()) + git.BinToHex(objectID.Type(), binObjectID, hexObjectID) + trees = append(trees, hexObjectID) + paths = append(paths, curPath+string(fname)+"/") + } + } + if _, err := batchReader.Discard(1); err != nil { + return nil, err + } + if len(trees) > 0 { + _, err := batchStdinWriter.Write(trees[len(trees)-1]) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte("\n")) + if err != nil { + return nil, err + } + curPath = paths[len(paths)-1] + trees = trees[:len(trees)-1] + paths = paths[:len(paths)-1] + } else { + break commitReadingLoop + } + default: + if err := git.DiscardFull(batchReader, size+1); err != nil { + return nil, err + } + } + } + } + + if err := scan.Err(); err != nil { + return nil, err + } + + for _, result := range resultsMap { + hasParent := false + for _, parentID := range result.ParentHashes { + if _, hasParent = resultsMap[parentID.String()+":"+result.Name]; hasParent { + break + } + } + if !hasParent { + results = append(results, result) + } + } + + sort.Sort(lfsResultSlice(results)) + + // Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple + shasToNameReader, shasToNameWriter := io.Pipe() + nameRevStdinReader, nameRevStdinWriter := io.Pipe() + errChan := make(chan error, 1) + wg := sync.WaitGroup{} + wg.Add(3) + + go func() { + defer wg.Done() + scanner := bufio.NewScanner(nameRevStdinReader) + i := 0 + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + result := results[i] + result.FullCommitName = line + result.BranchName = strings.Split(line, "~")[0] + i++ + } + }() + go NameRevStdin(repo.Ctx, shasToNameReader, nameRevStdinWriter, &wg, basePath) + go func() { + defer wg.Done() + defer shasToNameWriter.Close() + for _, result := range results { + _, err := shasToNameWriter.Write([]byte(result.SHA)) + if err != nil { + errChan <- err + break + } + _, err = shasToNameWriter.Write([]byte{'\n'}) + if err != nil { + errChan <- err + break + } + } + }() + + wg.Wait() + + select { + case err, has := <-errChan: + if has { + return nil, lfsError("unable to obtain name for LFS files", err) + } + default: + } + + return results, nil +} |