diff options
Diffstat (limited to '')
-rw-r--r-- | modules/git/grep.go | 195 |
1 files changed, 195 insertions, 0 deletions
diff --git a/modules/git/grep.go b/modules/git/grep.go new file mode 100644 index 0000000..41466bf --- /dev/null +++ b/modules/git/grep.go @@ -0,0 +1,195 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// Copyright 2024 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "bufio" + "bytes" + "cmp" + "context" + "errors" + "fmt" + "io" + "os" + "strconv" + "strings" + "time" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" +) + +type GrepResult struct { + Filename string + LineNumbers []int + LineCodes []string + HighlightedRanges [][3]int +} + +type GrepOptions struct { + RefName string + MaxResultLimit int + MatchesPerFile int // >= git 2.38 + ContextLineNumber int + IsFuzzy bool + PathSpec []setting.Glob +} + +func (opts *GrepOptions) ensureDefaults() { + opts.RefName = cmp.Or(opts.RefName, "HEAD") + opts.MaxResultLimit = cmp.Or(opts.MaxResultLimit, 50) + opts.MatchesPerFile = cmp.Or(opts.MatchesPerFile, 20) +} + +func hasPrefixFold(s, t string) bool { + if len(s) < len(t) { + return false + } + return strings.EqualFold(s[:len(t)], t) +} + +func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) { + stdoutReader, stdoutWriter, err := os.Pipe() + if err != nil { + return nil, fmt.Errorf("unable to create os pipe to grep: %w", err) + } + defer func() { + _ = stdoutReader.Close() + _ = stdoutWriter.Close() + }() + + opts.ensureDefaults() + + /* + The output is like this ("^@" means \x00; the first number denotes the line, + the second number denotes the column of the first match in line): + + HEAD:.air.toml + 6^@8^@bin = "gitea" + + HEAD:.changelog.yml + 2^@10^@repo: go-gitea/gitea + */ + var results []*GrepResult + // -I skips binary files + cmd := NewCommand(ctx, "grep", + "-I", "--null", "--break", "--heading", "--column", + "--fixed-strings", "--line-number", "--ignore-case", "--full-name") + cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber)) + + // --max-count requires at least git 2.38 + if CheckGitVersionAtLeast("2.38.0") == nil { + cmd.AddOptionValues("--max-count", fmt.Sprint(opts.MatchesPerFile)) + } else { + log.Warn("git-grep: --max-count requires at least git 2.38") + } + + words := []string{search} + if opts.IsFuzzy { + words = strings.Fields(search) + } + for _, word := range words { + cmd.AddGitGrepExpression(word) + } + + // pathspec + files := make([]string, 0, + len(setting.Indexer.IncludePatterns)+ + len(setting.Indexer.ExcludePatterns)+ + len(opts.PathSpec)) + for _, expr := range append(setting.Indexer.IncludePatterns, opts.PathSpec...) { + files = append(files, ":"+expr.Pattern()) + } + for _, expr := range setting.Indexer.ExcludePatterns { + files = append(files, ":^"+expr.Pattern()) + } + cmd.AddDynamicArguments(opts.RefName).AddDashesAndList(files...) + + stderr := bytes.Buffer{} + err = cmd.Run(&RunOpts{ + Timeout: time.Duration(setting.Git.Timeout.Grep) * time.Second, + + Dir: repo.Path, + Stdout: stdoutWriter, + Stderr: &stderr, + PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error { + _ = stdoutWriter.Close() + defer stdoutReader.Close() + + isInBlock := false + scanner := bufio.NewReader(stdoutReader) + var res *GrepResult + for { + line, err := scanner.ReadString('\n') + if err != nil { + if err == io.EOF { + return nil + } + return err + } + // Remove delimiter. + if len(line) > 0 { + line = line[:len(line)-1] + } + + if !isInBlock { + if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok { + isInBlock = true + res = &GrepResult{Filename: filename} + results = append(results, res) + } + continue + } + if line == "" { + if len(results) >= opts.MaxResultLimit { + cancel() + break + } + isInBlock = false + continue + } + if line == "--" { + continue + } + if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok { + lineNumInt, _ := strconv.Atoi(lineNum) + res.LineNumbers = append(res.LineNumbers, lineNumInt) + if lineCol, lineCode2, ok := strings.Cut(lineCode, "\x00"); ok { + lineColInt, _ := strconv.Atoi(lineCol) + start := lineColInt - 1 + matchLen := len(lineCode2) + for _, word := range words { + if hasPrefixFold(lineCode2[start:], word) { + matchLen = len(word) + break + } + } + res.HighlightedRanges = append(res.HighlightedRanges, [3]int{ + len(res.LineCodes), + start, + start + matchLen, + }) + res.LineCodes = append(res.LineCodes, lineCode2) + continue + } + res.LineCodes = append(res.LineCodes, lineCode) + } + } + return nil + }, + }) + // git grep exits by cancel (killed), usually it is caused by the limit of results + if IsErrorExitCode(err, -1) && stderr.Len() == 0 { + return results, nil + } + // git grep exits with 1 if no results are found + if IsErrorExitCode(err, 1) && stderr.Len() == 0 { + return nil, nil + } + if err != nil && !errors.Is(err, context.Canceled) { + return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, stderr.String()) + } + return results, nil +} |