From dd136858f1ea40ad3c94191d647487fa4f31926c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 18 Oct 2024 20:33:49 +0200 Subject: Adding upstream version 9.0.0. Signed-off-by: Daniel Baumann --- modules/indexer/code/git.go | 199 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 modules/indexer/code/git.go (limited to 'modules/indexer/code/git.go') diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go new file mode 100644 index 0000000..c7ffcfd --- /dev/null +++ b/modules/indexer/code/git.go @@ -0,0 +1,199 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package code + +import ( + "context" + "strconv" + "strings" + + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/indexer/code/internal" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" +) + +func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) { + stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + if err != nil { + return "", err + } + return strings.TrimSpace(stdout), nil +} + +// getRepoChanges returns changes to repo since last indexer update +func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { + status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode) + if err != nil { + return nil, err + } + + needGenesis := len(status.CommitSha) == 0 + if !needGenesis { + hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision) + stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + needGenesis = len(stdout) == 0 + } + + if needGenesis { + return genesisChanges(ctx, repo, revision) + } + return nonGenesisChanges(ctx, repo, revision) +} + +func isIndexable(entry *git.TreeEntry) bool { + if !entry.IsRegular() && !entry.IsExecutable() { + return false + } + name := strings.ToLower(entry.Name()) + for _, g := range setting.Indexer.ExcludePatterns { + if g.Match(name) { + return false + } + } + for _, g := range setting.Indexer.IncludePatterns { + if g.Match(name) { + return true + } + } + return len(setting.Indexer.IncludePatterns) == 0 +} + +// parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command +func parseGitLsTreeOutput(stdout []byte) ([]internal.FileUpdate, error) { + entries, err := git.ParseTreeEntries(stdout) + if err != nil { + return nil, err + } + idxCount := 0 + updates := make([]internal.FileUpdate, len(entries)) + for _, entry := range entries { + if isIndexable(entry) { + updates[idxCount] = internal.FileUpdate{ + Filename: entry.Name(), + BlobSha: entry.ID.String(), + Size: entry.Size(), + Sized: true, + } + idxCount++ + } + } + return updates[:idxCount], nil +} + +// genesisChanges get changes to add repo to the indexer for the first time +func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { + var changes internal.RepoChanges + stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) + if runErr != nil { + return nil, runErr + } + + var err error + changes.Updates, err = parseGitLsTreeOutput(stdout) + return &changes, err +} + +// nonGenesisChanges get changes since the previous indexer update +func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { + diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) + stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + if runErr != nil { + // previous commit sha may have been removed by a force push, so + // try rebuilding from scratch + log.Warn("git diff: %v", runErr) + if err := (*globalIndexer.Load()).Delete(ctx, repo.ID); err != nil { + return nil, err + } + return genesisChanges(ctx, repo, revision) + } + + var changes internal.RepoChanges + var err error + updatedFilenames := make([]string, 0, 10) + + updateChanges := func() error { + cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). + AddDashesAndList(updatedFilenames...) + lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) + if err != nil { + return err + } + + updates, err1 := parseGitLsTreeOutput(lsTreeStdout) + if err1 != nil { + return err1 + } + changes.Updates = append(changes.Updates, updates...) + return nil + } + lines := strings.Split(stdout, "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if len(line) == 0 { + continue + } + fields := strings.Split(line, "\t") + if len(fields) < 2 { + log.Warn("Unparsable output for diff --name-status: `%s`)", line) + continue + } + filename := fields[1] + if len(filename) == 0 { + continue + } else if filename[0] == '"' { + filename, err = strconv.Unquote(filename) + if err != nil { + return nil, err + } + } + + switch status := fields[0][0]; status { + case 'M', 'A': + updatedFilenames = append(updatedFilenames, filename) + case 'D': + changes.RemovedFilenames = append(changes.RemovedFilenames, filename) + case 'R', 'C': + if len(fields) < 3 { + log.Warn("Unparsable output for diff --name-status: `%s`)", line) + continue + } + dest := fields[2] + if len(dest) == 0 { + log.Warn("Unparsable output for diff --name-status: `%s`)", line) + continue + } + if dest[0] == '"' { + dest, err = strconv.Unquote(dest) + if err != nil { + return nil, err + } + } + if status == 'R' { + changes.RemovedFilenames = append(changes.RemovedFilenames, filename) + } + updatedFilenames = append(updatedFilenames, dest) + default: + log.Warn("Unrecognized status: %c (line=%s)", status, line) + } + + // According to https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation#more-information + // the command line length should less than 8191 characters, assume filepath is 256, then 8191/256 = 31, so we use 30 + if len(updatedFilenames) >= 30 { + if err := updateChanges(); err != nil { + return nil, err + } + updatedFilenames = updatedFilenames[0:0] + } + } + + if len(updatedFilenames) > 0 { + if err := updateChanges(); err != nil { + return nil, err + } + } + + return &changes, err +} -- cgit v1.2.3