summaryrefslogtreecommitdiffstats
path: root/modules/git/log_name_status.go
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--modules/git/log_name_status.go437
1 files changed, 437 insertions, 0 deletions
diff --git a/modules/git/log_name_status.go b/modules/git/log_name_status.go
new file mode 100644
index 0000000..1fd58ab
--- /dev/null
+++ b/modules/git/log_name_status.go
@@ -0,0 +1,437 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package git
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "errors"
+ "io"
+ "path"
+ "sort"
+ "strings"
+
+ "code.gitea.io/gitea/modules/container"
+
+ "github.com/djherbis/buffer"
+ "github.com/djherbis/nio/v3"
+)
+
+// LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function
+func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) {
+ // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
+ // so let's create a batch stdin and stdout
+ stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024))
+
+ // Lets also create a context so that we can absolutely ensure that the command should die when we're done
+ ctx, ctxCancel := context.WithCancel(ctx)
+
+ cancel := func() {
+ ctxCancel()
+ _ = stdoutReader.Close()
+ _ = stdoutWriter.Close()
+ }
+
+ cmd := NewCommand(ctx)
+ cmd.AddArguments("log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z").AddDynamicArguments(head)
+
+ var files []string
+ if len(paths) < 70 {
+ if treepath != "" {
+ files = append(files, treepath)
+ for _, pth := range paths {
+ if pth != "" {
+ files = append(files, path.Join(treepath, pth))
+ }
+ }
+ } else {
+ for _, pth := range paths {
+ if pth != "" {
+ files = append(files, pth)
+ }
+ }
+ }
+ } else if treepath != "" {
+ files = append(files, treepath)
+ }
+ // Use the :(literal) pathspec magic to handle edge cases with files named like ":file.txt" or "*.jpg"
+ for i, file := range files {
+ files[i] = ":(literal)" + file
+ }
+ cmd.AddDashesAndList(files...)
+
+ go func() {
+ stderr := strings.Builder{}
+ err := cmd.Run(&RunOpts{
+ Dir: repository,
+ Stdout: stdoutWriter,
+ Stderr: &stderr,
+ })
+ if err != nil {
+ _ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
+ return
+ }
+
+ _ = stdoutWriter.Close()
+ }()
+
+ // For simplicities sake we'll us a buffered reader to read from the cat-file --batch
+ bufReader := bufio.NewReaderSize(stdoutReader, 32*1024)
+
+ return bufReader, cancel
+}
+
+// LogNameStatusRepoParser parses a git log raw output from LogRawRepo
+type LogNameStatusRepoParser struct {
+ treepath string
+ paths []string
+ next []byte
+ buffull bool
+ rd *bufio.Reader
+ cancel func()
+}
+
+// NewLogNameStatusRepoParser returns a new parser for a git log raw output
+func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser {
+ rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...)
+ return &LogNameStatusRepoParser{
+ treepath: treepath,
+ paths: paths,
+ rd: rd,
+ cancel: cancel,
+ }
+}
+
+// LogNameStatusCommitData represents a commit artefact from git log raw
+type LogNameStatusCommitData struct {
+ CommitID string
+ ParentIDs []string
+ Paths []bool
+}
+
+// Next returns the next LogStatusCommitData
+func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) {
+ var err error
+ if len(g.next) == 0 {
+ g.buffull = false
+ g.next, err = g.rd.ReadSlice('\x00')
+ if err != nil {
+ if err == bufio.ErrBufferFull {
+ g.buffull = true
+ } else if err == io.EOF {
+ return nil, nil
+ } else {
+ return nil, err
+ }
+ }
+ }
+
+ ret := LogNameStatusCommitData{}
+ if bytes.Equal(g.next, []byte("commit\000")) {
+ g.next, err = g.rd.ReadSlice('\x00')
+ if err != nil {
+ if err == bufio.ErrBufferFull {
+ g.buffull = true
+ } else if err == io.EOF {
+ return nil, nil
+ } else {
+ return nil, err
+ }
+ }
+ }
+
+ // Our "line" must look like: <commitid> SP (<parent> SP) * NUL
+ commitIDs := string(g.next)
+ if g.buffull {
+ more, err := g.rd.ReadString('\x00')
+ if err != nil {
+ return nil, err
+ }
+ commitIDs += more
+ }
+ commitIDs = commitIDs[:len(commitIDs)-1]
+ splitIDs := strings.Split(commitIDs, " ")
+ ret.CommitID = splitIDs[0]
+ if len(splitIDs) > 1 {
+ ret.ParentIDs = splitIDs[1:]
+ }
+
+ // now read the next "line"
+ g.buffull = false
+ g.next, err = g.rd.ReadSlice('\x00')
+ if err != nil {
+ if err == bufio.ErrBufferFull {
+ g.buffull = true
+ } else if err != io.EOF {
+ return nil, err
+ }
+ }
+
+ if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') {
+ return &ret, nil
+ }
+
+ // Ok we have some changes.
+ // This line will look like: NL <fname> NUL
+ //
+ // Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too.
+ if g.next[0] == '\n' {
+ g.next = g.next[1:]
+ } else {
+ g.buffull = false
+ g.next, err = g.rd.ReadSlice('\x00')
+ if err != nil {
+ if err == bufio.ErrBufferFull {
+ g.buffull = true
+ } else if err != io.EOF {
+ return nil, err
+ }
+ }
+ if len(g.next) == 0 {
+ return &ret, nil
+ }
+ if g.next[0] == '\x00' {
+ g.buffull = false
+ g.next, err = g.rd.ReadSlice('\x00')
+ if err != nil {
+ if err == bufio.ErrBufferFull {
+ g.buffull = true
+ } else if err != io.EOF {
+ return nil, err
+ }
+ }
+ }
+ }
+
+ fnameBuf := make([]byte, 4096)
+
+diffloop:
+ for {
+ if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) {
+ return &ret, nil
+ }
+ g.next, err = g.rd.ReadSlice('\x00')
+ if err != nil {
+ if err == bufio.ErrBufferFull {
+ g.buffull = true
+ } else if err == io.EOF {
+ return &ret, nil
+ } else {
+ return nil, err
+ }
+ }
+ copy(fnameBuf, g.next)
+ if len(fnameBuf) < len(g.next) {
+ fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...)
+ } else {
+ fnameBuf = fnameBuf[:len(g.next)]
+ }
+ if err != nil {
+ if err != bufio.ErrBufferFull {
+ return nil, err
+ }
+ more, err := g.rd.ReadBytes('\x00')
+ if err != nil {
+ return nil, err
+ }
+ fnameBuf = append(fnameBuf, more...)
+ }
+
+ // read the next line
+ g.buffull = false
+ g.next, err = g.rd.ReadSlice('\x00')
+ if err != nil {
+ if err == bufio.ErrBufferFull {
+ g.buffull = true
+ } else if err != io.EOF {
+ return nil, err
+ }
+ }
+
+ if treepath != "" {
+ if !bytes.HasPrefix(fnameBuf, []byte(treepath)) {
+ fnameBuf = fnameBuf[:cap(fnameBuf)]
+ continue diffloop
+ }
+ }
+ fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1]
+ if len(fnameBuf) > maxpathlen {
+ fnameBuf = fnameBuf[:cap(fnameBuf)]
+ continue diffloop
+ }
+ if len(fnameBuf) > 0 {
+ if len(treepath) > 0 {
+ if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 {
+ fnameBuf = fnameBuf[:cap(fnameBuf)]
+ continue diffloop
+ }
+ fnameBuf = fnameBuf[1:]
+ } else if bytes.IndexByte(fnameBuf, '/') >= 0 {
+ fnameBuf = fnameBuf[:cap(fnameBuf)]
+ continue diffloop
+ }
+ }
+
+ idx, ok := paths2ids[string(fnameBuf)]
+ if !ok {
+ fnameBuf = fnameBuf[:cap(fnameBuf)]
+ continue diffloop
+ }
+ if ret.Paths == nil {
+ ret.Paths = changed
+ }
+ changed[idx] = true
+ }
+}
+
+// Close closes the parser
+func (g *LogNameStatusRepoParser) Close() {
+ g.cancel()
+}
+
+// WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files
+func WalkGitLog(ctx context.Context, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) {
+ headRef := head.ID.String()
+
+ tree, err := head.SubTree(treepath)
+ if err != nil {
+ return nil, err
+ }
+
+ entries, err := tree.ListEntries()
+ if err != nil {
+ return nil, err
+ }
+
+ if len(paths) == 0 {
+ paths = make([]string, 0, len(entries)+1)
+ paths = append(paths, "")
+ for _, entry := range entries {
+ paths = append(paths, entry.Name())
+ }
+ } else {
+ sort.Strings(paths)
+ if paths[0] != "" {
+ paths = append([]string{""}, paths...)
+ }
+ // remove duplicates
+ for i := len(paths) - 1; i > 0; i-- {
+ if paths[i] == paths[i-1] {
+ paths = append(paths[:i-1], paths[i:]...)
+ }
+ }
+ }
+
+ path2idx := map[string]int{}
+ maxpathlen := len(treepath)
+
+ for i := range paths {
+ path2idx[paths[i]] = i
+ pthlen := len(paths[i]) + len(treepath) + 1
+ if pthlen > maxpathlen {
+ maxpathlen = pthlen
+ }
+ }
+
+ g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...)
+ // don't use defer g.Close() here as g may change its value - instead wrap in a func
+ defer func() {
+ g.Close()
+ }()
+
+ results := make([]string, len(paths))
+ remaining := len(paths)
+ nextRestart := (len(paths) * 3) / 4
+ if nextRestart > 70 {
+ nextRestart = 70
+ }
+ lastEmptyParent := head.ID.String()
+ commitSinceLastEmptyParent := uint64(0)
+ commitSinceNextRestart := uint64(0)
+ parentRemaining := make(container.Set[string])
+
+ changed := make([]bool, len(paths))
+
+heaploop:
+ for {
+ select {
+ case <-ctx.Done():
+ if ctx.Err() == context.DeadlineExceeded {
+ break heaploop
+ }
+ g.Close()
+ return nil, ctx.Err()
+ default:
+ }
+ current, err := g.Next(treepath, path2idx, changed, maxpathlen)
+ if err != nil {
+ if errors.Is(err, context.DeadlineExceeded) {
+ break heaploop
+ }
+ g.Close()
+ return nil, err
+ }
+ if current == nil {
+ break heaploop
+ }
+ parentRemaining.Remove(current.CommitID)
+ for i, found := range current.Paths {
+ if !found {
+ continue
+ }
+ changed[i] = false
+ if results[i] == "" {
+ results[i] = current.CommitID
+ if err := repo.LastCommitCache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil {
+ return nil, err
+ }
+ delete(path2idx, paths[i])
+ remaining--
+ if results[0] == "" {
+ results[0] = current.CommitID
+ if err := repo.LastCommitCache.Put(headRef, treepath, current.CommitID); err != nil {
+ return nil, err
+ }
+ delete(path2idx, "")
+ remaining--
+ }
+ }
+ }
+
+ if remaining <= 0 {
+ break heaploop
+ }
+ commitSinceLastEmptyParent++
+ if len(parentRemaining) == 0 {
+ lastEmptyParent = current.CommitID
+ commitSinceLastEmptyParent = 0
+ }
+ if remaining <= nextRestart {
+ commitSinceNextRestart++
+ if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent {
+ g.Close()
+ remainingPaths := make([]string, 0, len(paths))
+ for i, pth := range paths {
+ if results[i] == "" {
+ remainingPaths = append(remainingPaths, pth)
+ }
+ }
+ g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...)
+ parentRemaining = make(container.Set[string])
+ nextRestart = (remaining * 3) / 4
+ continue heaploop
+ }
+ }
+ parentRemaining.AddMultiple(current.ParentIDs...)
+ }
+ g.Close()
+
+ resultsMap := map[string]string{}
+ for i, pth := range paths {
+ resultsMap[pth] = results[i]
+ }
+
+ return resultsMap, nil
+}