summaryrefslogtreecommitdiffstats
path: root/modules/git/commit_info.go
diff options
context:
space:
mode:
authorFilip Navara <filip.navara@gmail.com>2019-04-19 14:17:27 +0200
committerLunny Xiao <xiaolunwen@gmail.com>2019-04-19 14:17:27 +0200
commit2af67f6044af1cad7136ce8c123e37ab090ca9bc (patch)
tree6eaa623db6a0665498d7f05c8bb1a4b4d7b141c7 /modules/git/commit_info.go
parentAPI OTP Context (#6674) (diff)
downloadforgejo-2af67f6044af1cad7136ce8c123e37ab090ca9bc.tar.xz
forgejo-2af67f6044af1cad7136ce8c123e37ab090ca9bc.zip
Improve listing performance by using go-git (#6478)
* Use go-git for tree reading and commit info lookup. Signed-off-by: Filip Navara <navara@emclient.com> * Use TreeEntry.IsRegular() instead of ObjectType that was removed. Signed-off-by: Filip Navara <navara@emclient.com> * Use the treePath to optimize commit info search. Signed-off-by: Filip Navara <navara@emclient.com> * Extract the latest commit at treePath along with the other commits. Signed-off-by: Filip Navara <navara@emclient.com> * Fix listing commit info for a directory that was created in one commit and never modified after. Signed-off-by: Filip Navara <navara@emclient.com> * Avoid nearly all external 'git' invocations when doing directory listing (.editorconfig code path is still hit). Signed-off-by: Filip Navara <navara@emclient.com> * Use go-git for reading blobs. Signed-off-by: Filip Navara <navara@emclient.com> * Make SHA1 type alias for plumbing.Hash in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Make Signature type alias for object.Signature in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Fix GetCommitsInfo for repository with only one commit. Signed-off-by: Filip Navara <navara@emclient.com> * Fix PGP signature verification. Signed-off-by: Filip Navara <navara@emclient.com> * Fix issues with walking commit graph across merges. Signed-off-by: Filip Navara <navara@emclient.com> * Fix typo in condition. Signed-off-by: Filip Navara <navara@emclient.com> * Speed up loading branch list by keeping the repository reference (and thus all the loaded packfile indexes). Signed-off-by: Filip Navara <navara@emclient.com> * Fix lising submodules. Signed-off-by: Filip Navara <navara@emclient.com> * Fix build Signed-off-by: Filip Navara <navara@emclient.com> * Add back commit cache because of name-rev Signed-off-by: Filip Navara <navara@emclient.com> * Fix tests Signed-off-by: Filip Navara <navara@emclient.com> * Fix code style * Fix spelling * Address PR feedback Signed-off-by: Filip Navara <navara@emclient.com> * Update vendor module list Signed-off-by: Filip Navara <navara@emclient.com> * Fix getting trees by commit id Signed-off-by: Filip Navara <navara@emclient.com> * Fix remaining unit test failures * Fix GetTreeBySHA * Avoid running `git name-rev` if not necessary Signed-off-by: Filip Navara <navara@emclient.com> * Move Branch code to git module * Clean up GPG signature verification and fix it for tagged commits * Address PR feedback (import formatting, copyright headers) * Make blob lookup by SHA working * Update tests to use public API * Allow getting content from any type of object through the blob interface * Change test to actually expect the object content that is in the GIT repository * Change one more test to actually expect the object content that is in the GIT repository * Add comments
Diffstat (limited to 'modules/git/commit_info.go')
-rw-r--r--modules/git/commit_info.go454
1 files changed, 183 insertions, 271 deletions
diff --git a/modules/git/commit_info.go b/modules/git/commit_info.go
index 971082be1f..02c6f710ad 100644
--- a/modules/git/commit_info.go
+++ b/modules/git/commit_info.go
@@ -5,325 +5,237 @@
package git
import (
- "bufio"
- "context"
- "fmt"
- "os/exec"
- "path"
- "runtime"
- "strconv"
- "strings"
- "sync"
- "time"
+ "github.com/emirpasic/gods/trees/binaryheap"
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/object"
)
-const (
- // parameters for searching for commit infos. If the untargeted search has
- // not found any entries in the past 5 commits, and 12 or fewer entries
- // remain, then we'll just let the targeted-searching threads finish off,
- // and stop the untargeted search to not interfere.
- deferToTargetedSearchColdStreak = 5
- deferToTargetedSearchNumRemainingEntries = 12
-)
+// GetCommitsInfo gets information of all commits that are corresponding to these entries
+func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCommitCache) ([][]interface{}, *Commit, error) {
+ entryPaths := make([]string, len(tes)+1)
+ // Get the commit for the treePath itself
+ entryPaths[0] = ""
+ for i, entry := range tes {
+ entryPaths[i+1] = entry.Name()
+ }
-// getCommitsInfoState shared state while getting commit info for entries
-type getCommitsInfoState struct {
- lock sync.Mutex
- /* read-only fields, can be read without the mutex */
- // entries and entryPaths are read-only after initialization, so they can
- // safely be read without the mutex
- entries []*TreeEntry
- // set of filepaths to get info for
- entryPaths map[string]struct{}
- treePath string
- headCommit *Commit
+ c, err := commit.repo.gogitRepo.CommitObject(plumbing.Hash(commit.ID))
+ if err != nil {
+ return nil, nil, err
+ }
- /* mutable fields, must hold mutex to read or write */
- // map from filepath to commit
- commits map[string]*Commit
- // set of filepaths that have been or are being searched for in a target search
- targetedPaths map[string]struct{}
-}
+ revs, err := getLastCommitForPaths(c, treePath, entryPaths)
+ if err != nil {
+ return nil, nil, err
+ }
-func (state *getCommitsInfoState) numRemainingEntries() int {
- state.lock.Lock()
- defer state.lock.Unlock()
- return len(state.entries) - len(state.commits)
-}
+ commit.repo.gogitStorage.Close()
-// getTargetEntryPath Returns the next path for a targeted-searching thread to
-// search for, or returns the empty string if nothing left to search for
-func (state *getCommitsInfoState) getTargetedEntryPath() string {
- var targetedEntryPath string
- state.lock.Lock()
- defer state.lock.Unlock()
- for _, entry := range state.entries {
- entryPath := path.Join(state.treePath, entry.Name())
- if _, ok := state.commits[entryPath]; ok {
- continue
- } else if _, ok = state.targetedPaths[entryPath]; ok {
- continue
+ commitsInfo := make([][]interface{}, len(tes))
+ for i, entry := range tes {
+ if rev, ok := revs[entry.Name()]; ok {
+ entryCommit := convertCommit(rev)
+ if entry.IsSubModule() {
+ subModuleURL := ""
+ if subModule, err := commit.GetSubModule(entry.Name()); err != nil {
+ return nil, nil, err
+ } else if subModule != nil {
+ subModuleURL = subModule.URL
+ }
+ subModuleFile := NewSubModuleFile(entryCommit, subModuleURL, entry.ID.String())
+ commitsInfo[i] = []interface{}{entry, subModuleFile}
+ } else {
+ commitsInfo[i] = []interface{}{entry, entryCommit}
+ }
+ } else {
+ commitsInfo[i] = []interface{}{entry, nil}
}
- targetedEntryPath = entryPath
- state.targetedPaths[entryPath] = struct{}{}
- break
}
- return targetedEntryPath
-}
-// repeatedly perform targeted searches for unpopulated entries
-func targetedSearch(state *getCommitsInfoState, done chan error, cache LastCommitCache) {
- for {
- entryPath := state.getTargetedEntryPath()
- if len(entryPath) == 0 {
- done <- nil
- return
- }
- if cache != nil {
- commit, err := cache.Get(state.headCommit.repo.Path, state.headCommit.ID.String(), entryPath)
- if err == nil && commit != nil {
- state.update(entryPath, commit)
- continue
- }
- }
- command := NewCommand("rev-list", "-1", state.headCommit.ID.String(), "--", entryPath)
- output, err := command.RunInDir(state.headCommit.repo.Path)
- if err != nil {
- done <- err
- return
- }
- id, err := NewIDFromString(strings.TrimSpace(output))
- if err != nil {
- done <- err
- return
- }
- commit, err := state.headCommit.repo.getCommit(id)
- if err != nil {
- done <- err
- return
- }
- state.update(entryPath, commit)
- if cache != nil {
- cache.Put(state.headCommit.repo.Path, state.headCommit.ID.String(), entryPath, commit)
- }
+ // Retrieve the commit for the treePath itself (see above). We basically
+ // get it for free during the tree traversal and it's used for listing
+ // pages to display information about newest commit for a given path.
+ var treeCommit *Commit
+ if rev, ok := revs[""]; ok {
+ treeCommit = convertCommit(rev)
}
+ return commitsInfo, treeCommit, nil
}
-func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitsInfoState {
- entryPaths := make(map[string]struct{}, len(entries))
- for _, entry := range entries {
- entryPaths[path.Join(treePath, entry.Name())] = struct{}{}
- }
- if treePath = path.Clean(treePath); treePath == "." {
- treePath = ""
- }
- return &getCommitsInfoState{
- entries: entries,
- entryPaths: entryPaths,
- commits: make(map[string]*Commit, len(entries)),
- targetedPaths: make(map[string]struct{}, len(entries)),
- treePath: treePath,
- headCommit: headCommit,
- }
+type commitAndPaths struct {
+ commit *object.Commit
+ // Paths that are still on the branch represented by commit
+ paths []string
+ // Set of hashes for the paths
+ hashes map[string]plumbing.Hash
}
-// GetCommitsInfo gets information of all commits that are corresponding to these entries
-func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCommitCache) ([][]interface{}, error) {
- state := initGetCommitInfoState(tes, commit, treePath)
- if err := getCommitsInfo(state, cache); err != nil {
+func getCommitTree(c *object.Commit, treePath string) (*object.Tree, error) {
+ tree, err := c.Tree()
+ if err != nil {
return nil, err
}
- if len(state.commits) < len(state.entryPaths) {
- return nil, fmt.Errorf("could not find commits for all entries")
- }
-
- commitsInfo := make([][]interface{}, len(tes))
- for i, entry := range tes {
- commit, ok := state.commits[path.Join(treePath, entry.Name())]
- if !ok {
- return nil, fmt.Errorf("could not find commit for %s", entry.Name())
- }
- switch entry.Type {
- case ObjectCommit:
- subModuleURL := ""
- if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil {
- return nil, err
- } else if subModule != nil {
- subModuleURL = subModule.URL
- }
- subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String())
- commitsInfo[i] = []interface{}{entry, subModuleFile}
- default:
- commitsInfo[i] = []interface{}{entry, commit}
- }
- }
- return commitsInfo, nil
-}
-func (state *getCommitsInfoState) cleanEntryPath(rawEntryPath string) (string, error) {
- if rawEntryPath[0] == '"' {
- var err error
- rawEntryPath, err = strconv.Unquote(rawEntryPath)
+ // Optimize deep traversals by focusing only on the specific tree
+ if treePath != "" {
+ tree, err = tree.Tree(treePath)
if err != nil {
- return rawEntryPath, err
+ return nil, err
}
}
- var entryNameStartIndex int
- if len(state.treePath) > 0 {
- entryNameStartIndex = len(state.treePath) + 1
- }
- if index := strings.IndexByte(rawEntryPath[entryNameStartIndex:], '/'); index >= 0 {
- return rawEntryPath[:entryNameStartIndex+index], nil
- }
- return rawEntryPath, nil
+ return tree, nil
}
-// update report that the given path was last modified by the given commit.
-// Returns whether state.commits was updated
-func (state *getCommitsInfoState) update(entryPath string, commit *Commit) bool {
- if _, ok := state.entryPaths[entryPath]; !ok {
- return false
- }
-
- var updated bool
- state.lock.Lock()
- defer state.lock.Unlock()
- if _, ok := state.commits[entryPath]; !ok {
- state.commits[entryPath] = commit
- updated = true
+func getFullPath(treePath, path string) string {
+ if treePath != "" {
+ if path != "" {
+ return treePath + "/" + path
+ }
+ return treePath
}
- return updated
+ return path
}
-const getCommitsInfoPretty = "--pretty=format:%H %ct %s"
-
-func getCommitsInfo(state *getCommitsInfoState, cache LastCommitCache) error {
- ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
- defer cancel()
-
- args := []string{"log", state.headCommit.ID.String(), getCommitsInfoPretty, "--name-status", "-c"}
- if len(state.treePath) > 0 {
- args = append(args, "--", state.treePath)
+func getFileHashes(c *object.Commit, treePath string, paths []string) (map[string]plumbing.Hash, error) {
+ tree, err := getCommitTree(c, treePath)
+ if err == object.ErrDirectoryNotFound {
+ // The whole tree didn't exist, so return empty map
+ return make(map[string]plumbing.Hash), nil
}
- cmd := exec.CommandContext(ctx, "git", args...)
- cmd.Dir = state.headCommit.repo.Path
-
- readCloser, err := cmd.StdoutPipe()
if err != nil {
- return err
+ return nil, err
}
- if err := cmd.Start(); err != nil {
- return err
+ hashes := make(map[string]plumbing.Hash)
+ for _, path := range paths {
+ if path != "" {
+ entry, err := tree.FindEntry(path)
+ if err == nil {
+ hashes[path] = entry.Hash
+ }
+ } else {
+ hashes[path] = tree.Hash
+ }
}
- // it's okay to ignore the error returned by cmd.Wait(); we expect the
- // subprocess to sometimes have a non-zero exit status, since we may
- // prematurely close stdout, resulting in a broken pipe.
- defer cmd.Wait()
- numThreads := runtime.NumCPU()
- done := make(chan error, numThreads)
- for i := 0; i < numThreads; i++ {
- go targetedSearch(state, done, cache)
- }
+ return hashes, nil
+}
- scanner := bufio.NewScanner(readCloser)
- err = state.processGitLogOutput(scanner)
+func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (map[string]*object.Commit, error) {
+ // We do a tree traversal with nodes sorted by commit time
+ seen := make(map[plumbing.Hash]bool)
+ heap := binaryheap.NewWith(func(a, b interface{}) int {
+ if a.(*commitAndPaths).commit.Committer.When.Before(b.(*commitAndPaths).commit.Committer.When) {
+ return 1
+ }
+ return -1
+ })
- // it is important that we close stdout here; if we do not close
- // stdout, the subprocess will keep running, and the deffered call
- // cmd.Wait() may block for a long time.
- if closeErr := readCloser.Close(); closeErr != nil && err == nil {
- err = closeErr
+ result := make(map[string]*object.Commit)
+ initialHashes, err := getFileHashes(c, treePath, paths)
+ if err != nil {
+ return nil, err
}
- for i := 0; i < numThreads; i++ {
- doneErr := <-done
- if doneErr != nil && err == nil {
- err = doneErr
+ // Start search from the root commit and with full set of paths
+ heap.Push(&commitAndPaths{c, paths, initialHashes})
+
+ for {
+ cIn, ok := heap.Pop()
+ if !ok {
+ break
}
- }
- return err
-}
+ current := cIn.(*commitAndPaths)
+ currentID := current.commit.ID()
-func (state *getCommitsInfoState) processGitLogOutput(scanner *bufio.Scanner) error {
- // keep a local cache of seen paths to avoid acquiring a lock for paths
- // we've already seen
- seenPaths := make(map[string]struct{}, len(state.entryPaths))
- // number of consecutive commits without any finds
- coldStreak := 0
- var commit *Commit
- var err error
- for scanner.Scan() {
- line := scanner.Text()
- if len(line) == 0 { // in-between commits
- numRemainingEntries := state.numRemainingEntries()
- if numRemainingEntries == 0 {
- break
- }
- if coldStreak >= deferToTargetedSearchColdStreak &&
- numRemainingEntries <= deferToTargetedSearchNumRemainingEntries {
- // stop this untargeted search, and let the targeted-search threads
- // finish the work
- break
- }
+ if seen[currentID] {
continue
}
- if line[0] >= 'A' && line[0] <= 'X' { // a file was changed by the current commit
- // look for the last tab, since for copies (C) and renames (R) two
- // filenames are printed: src, then dest
- tabIndex := strings.LastIndexByte(line, '\t')
- if tabIndex < 1 {
- return fmt.Errorf("misformatted line: %s", line)
+ seen[currentID] = true
+
+ // Load the parent commits for the one we are currently examining
+ numParents := current.commit.NumParents()
+ var parents []*object.Commit
+ for i := 0; i < numParents; i++ {
+ parent, err := current.commit.Parent(i)
+ if err != nil {
+ break
}
- entryPath, err := state.cleanEntryPath(line[tabIndex+1:])
+ parents = append(parents, parent)
+ }
+
+ // Examine the current commit and set of interesting paths
+ numOfParentsWithPath := make([]int, len(current.paths))
+ pathChanged := make([]bool, len(current.paths))
+ parentHashes := make([]map[string]plumbing.Hash, len(parents))
+ for j, parent := range parents {
+ parentHashes[j], err = getFileHashes(parent, treePath, current.paths)
if err != nil {
- return err
+ break
}
- if _, ok := seenPaths[entryPath]; !ok {
- if state.update(entryPath, commit) {
- coldStreak = 0
+
+ for i, path := range current.paths {
+ if parentHashes[j][path] != plumbing.ZeroHash {
+ numOfParentsWithPath[i]++
+ if parentHashes[j][path] != current.hashes[path] {
+ pathChanged[i] = true
+ }
}
- seenPaths[entryPath] = struct{}{}
}
- continue
}
- // a new commit
- commit, err = parseCommitInfo(line)
- if err != nil {
- return err
+ var remainingPaths []string
+ for i, path := range current.paths {
+ switch numOfParentsWithPath[i] {
+ case 0:
+ // The path didn't exist in any parent, so it must have been created by
+ // this commit. The results could already contain some newer change from
+ // different path, so don't override that.
+ if result[path] == nil {
+ result[path] = current.commit
+ }
+ case 1:
+ // The file is present on exactly one parent, so check if it was changed
+ // and save the revision if it did.
+ if pathChanged[i] {
+ if result[path] == nil {
+ result[path] = current.commit
+ }
+ } else {
+ remainingPaths = append(remainingPaths, path)
+ }
+ default:
+ // The file is present on more than one of the parent paths, so this is
+ // a merge. We have to examine all the parent trees to find out where
+ // the change occurred. pathChanged[i] would tell us that the file was
+ // changed during the merge, but it wouldn't tell us the relevant commit
+ // that introduced it.
+ remainingPaths = append(remainingPaths, path)
+ }
}
- coldStreak++
- }
- return scanner.Err()
-}
-// parseCommitInfo parse a commit from a line of `git log` output. Expects the
-// line to be formatted according to getCommitsInfoPretty.
-func parseCommitInfo(line string) (*Commit, error) {
- if len(line) < 43 {
- return nil, fmt.Errorf("invalid git output: %s", line)
- }
- ref, err := NewIDFromString(line[:40])
- if err != nil {
- return nil, err
- }
- spaceIndex := strings.IndexByte(line[41:], ' ')
- if spaceIndex < 0 {
- return nil, fmt.Errorf("invalid git output: %s", line)
- }
- unixSeconds, err := strconv.Atoi(line[41 : 41+spaceIndex])
- if err != nil {
- return nil, err
+ if len(remainingPaths) > 0 {
+ // Add the parent nodes along with remaining paths to the heap for further
+ // processing.
+ for j, parent := range parents {
+ if seen[parent.ID()] {
+ continue
+ }
+
+ // Combine remainingPath with paths available on the parent branch
+ // and make union of them
+ var remainingPathsForParent []string
+ for _, path := range remainingPaths {
+ if parentHashes[j][path] != plumbing.ZeroHash {
+ remainingPathsForParent = append(remainingPathsForParent, path)
+ }
+ }
+
+ heap.Push(&commitAndPaths{parent, remainingPathsForParent, parentHashes[j]})
+ }
+ }
}
- message := line[spaceIndex+42:]
- return &Commit{
- ID: ref,
- CommitMessage: message,
- Committer: &Signature{
- When: time.Unix(int64(unixSeconds), 0),
- },
- }, nil
+
+ return result, nil
}