summaryrefslogtreecommitdiffstats
path: root/modules/git/grep.go
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--modules/git/grep.go187
1 files changed, 187 insertions, 0 deletions
diff --git a/modules/git/grep.go b/modules/git/grep.go
new file mode 100644
index 0000000..5572bd9
--- /dev/null
+++ b/modules/git/grep.go
@@ -0,0 +1,187 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// Copyright 2024 The Forgejo Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package git
+
+import (
+ "bufio"
+ "bytes"
+ "cmp"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "strconv"
+ "strings"
+ "time"
+
+ "code.gitea.io/gitea/modules/setting"
+)
+
+type GrepResult struct {
+ Filename string
+ LineNumbers []int
+ LineCodes []string
+ HighlightedRanges [][3]int
+}
+
+type GrepOptions struct {
+ RefName string
+ MaxResultLimit int
+ MatchesPerFile int
+ ContextLineNumber int
+ IsFuzzy bool
+ PathSpec []setting.Glob
+}
+
+func (opts *GrepOptions) ensureDefaults() {
+ opts.RefName = cmp.Or(opts.RefName, "HEAD")
+ opts.MaxResultLimit = cmp.Or(opts.MaxResultLimit, 50)
+ opts.MatchesPerFile = cmp.Or(opts.MatchesPerFile, 20)
+}
+
+func hasPrefixFold(s, t string) bool {
+ if len(s) < len(t) {
+ return false
+ }
+ return strings.EqualFold(s[:len(t)], t)
+}
+
+func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
+ stdoutReader, stdoutWriter, err := os.Pipe()
+ if err != nil {
+ return nil, fmt.Errorf("unable to create os pipe to grep: %w", err)
+ }
+ defer func() {
+ _ = stdoutReader.Close()
+ _ = stdoutWriter.Close()
+ }()
+
+ opts.ensureDefaults()
+
+ /*
+ The output is like this ("^@" means \x00; the first number denotes the line,
+ the second number denotes the column of the first match in line):
+
+ HEAD:.air.toml
+ 6^@8^@bin = "gitea"
+
+ HEAD:.changelog.yml
+ 2^@10^@repo: go-gitea/gitea
+ */
+ var results []*GrepResult
+ // -I skips binary files
+ cmd := NewCommand(ctx, "grep",
+ "-I", "--null", "--break", "--heading", "--column",
+ "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
+ cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
+ cmd.AddOptionValues("--max-count", fmt.Sprint(opts.MatchesPerFile))
+ words := []string{search}
+ if opts.IsFuzzy {
+ words = strings.Fields(search)
+ }
+ for _, word := range words {
+ cmd.AddGitGrepExpression(word)
+ }
+
+ // pathspec
+ files := make([]string, 0,
+ len(setting.Indexer.IncludePatterns)+
+ len(setting.Indexer.ExcludePatterns)+
+ len(opts.PathSpec))
+ for _, expr := range append(setting.Indexer.IncludePatterns, opts.PathSpec...) {
+ files = append(files, ":"+expr.Pattern())
+ }
+ for _, expr := range setting.Indexer.ExcludePatterns {
+ files = append(files, ":^"+expr.Pattern())
+ }
+ cmd.AddDynamicArguments(opts.RefName).AddDashesAndList(files...)
+
+ stderr := bytes.Buffer{}
+ err = cmd.Run(&RunOpts{
+ Timeout: time.Duration(setting.Git.Timeout.Grep) * time.Second,
+
+ Dir: repo.Path,
+ Stdout: stdoutWriter,
+ Stderr: &stderr,
+ PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
+ _ = stdoutWriter.Close()
+ defer stdoutReader.Close()
+
+ isInBlock := false
+ scanner := bufio.NewReader(stdoutReader)
+ var res *GrepResult
+ for {
+ line, err := scanner.ReadString('\n')
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ }
+ // Remove delimiter.
+ if len(line) > 0 {
+ line = line[:len(line)-1]
+ }
+
+ if !isInBlock {
+ if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
+ isInBlock = true
+ res = &GrepResult{Filename: filename}
+ results = append(results, res)
+ }
+ continue
+ }
+ if line == "" {
+ if len(results) >= opts.MaxResultLimit {
+ cancel()
+ break
+ }
+ isInBlock = false
+ continue
+ }
+ if line == "--" {
+ continue
+ }
+ if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok {
+ lineNumInt, _ := strconv.Atoi(lineNum)
+ res.LineNumbers = append(res.LineNumbers, lineNumInt)
+ if lineCol, lineCode2, ok := strings.Cut(lineCode, "\x00"); ok {
+ lineColInt, _ := strconv.Atoi(lineCol)
+ start := lineColInt - 1
+ matchLen := len(lineCode2)
+ for _, word := range words {
+ if hasPrefixFold(lineCode2[start:], word) {
+ matchLen = len(word)
+ break
+ }
+ }
+ res.HighlightedRanges = append(res.HighlightedRanges, [3]int{
+ len(res.LineCodes),
+ start,
+ start + matchLen,
+ })
+ res.LineCodes = append(res.LineCodes, lineCode2)
+ continue
+ }
+ res.LineCodes = append(res.LineCodes, lineCode)
+ }
+ }
+ return nil
+ },
+ })
+ // git grep exits by cancel (killed), usually it is caused by the limit of results
+ if IsErrorExitCode(err, -1) && stderr.Len() == 0 {
+ return results, nil
+ }
+ // git grep exits with 1 if no results are found
+ if IsErrorExitCode(err, 1) && stderr.Len() == 0 {
+ return nil, nil
+ }
+ if err != nil && !errors.Is(err, context.Canceled) {
+ return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, stderr.String())
+ }
+ return results, nil
+}