summaryrefslogtreecommitdiffstats
path: root/modules/references/references.go
diff options
context:
space:
mode:
authorDaniel Baumann <daniel@debian.org>2024-10-18 20:33:49 +0200
committerDaniel Baumann <daniel@debian.org>2024-10-18 20:33:49 +0200
commitdd136858f1ea40ad3c94191d647487fa4f31926c (patch)
tree58fec94a7b2a12510c9664b21793f1ed560c6518 /modules/references/references.go
parentInitial commit. (diff)
downloadforgejo-upstream/9.0.0.tar.xz
forgejo-upstream/9.0.0.zip
Adding upstream version 9.0.0.HEADupstream/9.0.0upstreamdebian
Signed-off-by: Daniel Baumann <daniel@debian.org>
Diffstat (limited to '')
-rw-r--r--modules/references/references.go594
1 files changed, 594 insertions, 0 deletions
diff --git a/modules/references/references.go b/modules/references/references.go
new file mode 100644
index 0000000..c61d06d
--- /dev/null
+++ b/modules/references/references.go
@@ -0,0 +1,594 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package references
+
+import (
+ "bytes"
+ "net/url"
+ "regexp"
+ "strconv"
+ "strings"
+ "sync"
+
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/markup/mdstripper"
+ "code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/util"
+)
+
+var (
+ // validNamePattern performs only the most basic validation for user or repository names
+ // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.
+ validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)
+
+ // NOTE: All below regex matching do not perform any extra validation.
+ // Thus a link is produced even if the linked entity does not exist.
+ // While fast, this is also incorrect and lead to false positives.
+ // TODO: fix invalid linking issue
+
+ // mentionPattern matches all mentions in the form of "@user" or "@org/team"
+ mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_]+\/?[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+\/?[0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:'|\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`)
+ // issueNumericPattern matches string that references to a numeric issue, e.g. #1287
+ issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\'|\")([#!][0-9]+)(?:\s|$|\)|\]|\'|\"|[:;,.?!]\s|[:;,.?!]$)`)
+ // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
+ issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\"|\')([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$)|\"|\')`)
+ // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
+ // e.g. org/repo#12345
+ crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
+ // crossReferenceCommitPattern matches a string that references a commit in a different repository
+ // e.g. go-gitea/gitea@d8a994ef, go-gitea/gitea@d8a994ef243349f321568f9e36d5c3f444b99cae (7-40 characters)
+ crossReferenceCommitPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+)/([0-9a-zA-Z-_\.]+)@([0-9a-f]{7,64})(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
+ // spaceTrimmedPattern let's find the trailing space
+ spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`)
+ // timeLogPattern matches string for time tracking
+ timeLogPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@([0-9]+([\.,][0-9]+)?(w|d|m|h))+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
+
+ issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp
+ issueKeywordsOnce sync.Once
+
+ giteaHostInit sync.Once
+ giteaHost string
+ giteaIssuePullPattern *regexp.Regexp
+
+ actionStrings = []string{
+ "none",
+ "closes",
+ "reopens",
+ "neutered",
+ }
+)
+
+// XRefAction represents the kind of effect a cross reference has once is resolved
+type XRefAction int64
+
+const (
+ // XRefActionNone means the cross-reference is simply a comment
+ XRefActionNone XRefAction = iota // 0
+ // XRefActionCloses means the cross-reference should close an issue if it is resolved
+ XRefActionCloses // 1
+ // XRefActionReopens means the cross-reference should reopen an issue if it is resolved
+ XRefActionReopens // 2
+ // XRefActionNeutered means the cross-reference will no longer affect the source
+ XRefActionNeutered // 3
+)
+
+func (a XRefAction) String() string {
+ return actionStrings[a]
+}
+
+// IssueReference contains an unverified cross-reference to a local issue or pull request
+type IssueReference struct {
+ Index int64
+ Owner string
+ Name string
+ Action XRefAction
+ TimeLog string
+}
+
+// RenderizableReference contains an unverified cross-reference to with rendering information
+// The IsPull member means that a `!num` reference was used instead of `#num`.
+// This kind of reference is used to make pulls available when an external issue tracker
+// is used. Otherwise, `#` and `!` are completely interchangeable.
+type RenderizableReference struct {
+ Issue string
+ Owner string
+ Name string
+ CommitSha string
+ IsPull bool
+ RefLocation *RefSpan
+ Action XRefAction
+ ActionLocation *RefSpan
+}
+
+type rawReference struct {
+ index int64
+ owner string
+ name string
+ isPull bool
+ action XRefAction
+ issue string
+ refLocation *RefSpan
+ actionLocation *RefSpan
+ timeLog string
+}
+
+func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {
+ refarr := make([]IssueReference, len(reflist))
+ for i, r := range reflist {
+ refarr[i] = IssueReference{
+ Index: r.index,
+ Owner: r.owner,
+ Name: r.name,
+ Action: r.action,
+ TimeLog: r.timeLog,
+ }
+ }
+ return refarr
+}
+
+// RefSpan is the position where the reference was found within the parsed text
+type RefSpan struct {
+ Start int
+ End int
+}
+
+func makeKeywordsPat(words []string) *regexp.Regexp {
+ acceptedWords := parseKeywords(words)
+ if len(acceptedWords) == 0 {
+ // Never match
+ return nil
+ }
+ return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`)
+}
+
+func parseKeywords(words []string) []string {
+ acceptedWords := make([]string, 0, 5)
+ wordPat := regexp.MustCompile(`^[\pL]+$`)
+ for _, word := range words {
+ word = strings.ToLower(strings.TrimSpace(word))
+ // Accept Unicode letter class runes (a-z, á, à, ä, )
+ if wordPat.MatchString(word) {
+ acceptedWords = append(acceptedWords, word)
+ } else {
+ log.Info("Invalid keyword: %s", word)
+ }
+ }
+ return acceptedWords
+}
+
+func newKeywords() {
+ issueKeywordsOnce.Do(func() {
+ // Delay initialization until after the settings module is initialized
+ doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords)
+ })
+}
+
+func doNewKeywords(close, reopen []string) {
+ issueCloseKeywordsPat = makeKeywordsPat(close)
+ issueReopenKeywordsPat = makeKeywordsPat(reopen)
+}
+
+// getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
+func getGiteaHostName() string {
+ giteaHostInit.Do(func() {
+ if uapp, err := url.Parse(setting.AppURL); err == nil {
+ giteaHost = strings.ToLower(uapp.Host)
+ giteaIssuePullPattern = regexp.MustCompile(
+ `(\s|^|\(|\[)` +
+ regexp.QuoteMeta(strings.TrimSpace(setting.AppURL)) +
+ `([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` +
+ `((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
+ } else {
+ giteaHost = ""
+ giteaIssuePullPattern = nil
+ }
+ })
+ return giteaHost
+}
+
+// getGiteaIssuePullPattern
+func getGiteaIssuePullPattern() *regexp.Regexp {
+ getGiteaHostName()
+ return giteaIssuePullPattern
+}
+
+// FindAllMentionsMarkdown matches mention patterns in given content and
+// returns a list of found unvalidated user names **not including** the @ prefix.
+func FindAllMentionsMarkdown(content string) []string {
+ bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))
+ locations := FindAllMentionsBytes(bcontent)
+ mentions := make([]string, len(locations))
+ for i, val := range locations {
+ mentions[i] = string(bcontent[val.Start+1 : val.End])
+ }
+ return mentions
+}
+
+// FindAllMentionsBytes matches mention patterns in given content
+// and returns a list of locations for the unvalidated user names, including the @ prefix.
+func FindAllMentionsBytes(content []byte) []RefSpan {
+ // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
+ // trailing spaces (\s@mention,\s), so if we get two consecutive references, the space
+ // from the second reference will be "eaten" by the first one:
+ // ...\s@mention1\s@mention2\s... --> ...`\s@mention1\s`, (not) `@mention2,\s...`
+ ret := make([]RefSpan, 0, 5)
+ pos := 0
+ for {
+ match := mentionPattern.FindSubmatchIndex(content[pos:])
+ if match == nil {
+ break
+ }
+ ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos})
+ notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
+ if notrail == nil {
+ pos = match[3] + pos
+ } else {
+ pos = match[3] + pos + notrail[1] - notrail[3]
+ }
+ }
+ return ret
+}
+
+// FindFirstMentionBytes matches the first mention in then given content
+// and returns the location of the unvalidated user name, including the @ prefix.
+func FindFirstMentionBytes(content []byte) (bool, RefSpan) {
+ mention := mentionPattern.FindSubmatchIndex(content)
+ if mention == nil {
+ return false, RefSpan{}
+ }
+ return true, RefSpan{Start: mention[2], End: mention[3]}
+}
+
+// FindAllIssueReferencesMarkdown strips content from markdown markup
+// and returns a list of unvalidated references found in it.
+func FindAllIssueReferencesMarkdown(content string) []IssueReference {
+ return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))
+}
+
+func findAllIssueReferencesMarkdown(content string) []*rawReference {
+ bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))
+ return findAllIssueReferencesBytes(bcontent, links)
+}
+
+func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) {
+ // We will iterate through the content, rewrite and simplify full references.
+ //
+ // We want to transform something like:
+ //
+ // this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo
+ // https://ourgitea.com/git/owner/repo/pulls/123456789
+ //
+ // Into something like:
+ //
+ // this is a #123456789, foo
+ // !123456789
+
+ pos := 0
+ for {
+ // re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)
+ match := re.FindSubmatchIndex((*contentBytes)[pos:])
+ if match == nil {
+ break
+ }
+ // match is a bunch of indices into the content from pos onwards so
+ // to simplify things let's just add pos to all of the indices in match
+ for i := range match {
+ match[i] += pos
+ }
+
+ // match[0]-match[1] is whole string
+ // match[2]-match[3] is preamble
+
+ // move the position to the end of the preamble
+ pos = match[3]
+
+ // match[4]-match[5] is owner/repo
+ // now copy the owner/repo to end of the preamble
+ endPos := pos + match[5] - match[4]
+ copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]])
+
+ // move the current position to the end of the newly copied owner/repo
+ pos = endPos
+
+ // Now set the issue/pull marker:
+ //
+ // match[6]-match[7] == 'issues'
+ (*contentBytes)[pos] = '#'
+ if string((*contentBytes)[match[6]:match[7]]) == "pulls" {
+ (*contentBytes)[pos] = '!'
+ }
+ pos++
+
+ // Then add the issue/pull number
+ //
+ // match[8]-match[9] is the number
+ endPos = pos + match[9] - match[8]
+ copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]])
+
+ // Now copy what's left at the end of the string to the new end position
+ copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:])
+ // now we reset the length
+
+ // our new section has length endPos - match[3]
+ // our old section has length match[9] - match[3]
+ *contentBytes = (*contentBytes)[:len(*contentBytes)-match[9]+endPos]
+ pos = endPos
+ }
+}
+
+// FindAllIssueReferences returns a list of unvalidated references found in a string.
+func FindAllIssueReferences(content string) []IssueReference {
+ // Need to convert fully qualified html references to local system to #/! short codes
+ contentBytes := []byte(content)
+ if re := getGiteaIssuePullPattern(); re != nil {
+ convertFullHTMLReferencesToShortRefs(re, &contentBytes)
+ } else {
+ log.Debug("No GiteaIssuePullPattern pattern")
+ }
+ return rawToIssueReferenceList(findAllIssueReferencesBytes(contentBytes, []string{}))
+}
+
+// FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
+func FindRenderizableReferenceNumeric(content string, prOnly, crossLinkOnly bool) (bool, *RenderizableReference) {
+ var match []int
+ if !crossLinkOnly {
+ match = issueNumericPattern.FindStringSubmatchIndex(content)
+ }
+ if match == nil {
+ if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {
+ return false, nil
+ }
+ }
+ r := getCrossReference(util.UnsafeStringToBytes(content), match[2], match[3], false, prOnly)
+ if r == nil {
+ return false, nil
+ }
+
+ return true, &RenderizableReference{
+ Issue: r.issue,
+ Owner: r.owner,
+ Name: r.name,
+ IsPull: r.isPull,
+ RefLocation: r.refLocation,
+ Action: r.action,
+ ActionLocation: r.actionLocation,
+ }
+}
+
+// FindRenderizableCommitCrossReference returns the first unvalidated commit cross reference found in a string.
+func FindRenderizableCommitCrossReference(content string) (bool, *RenderizableReference) {
+ m := crossReferenceCommitPattern.FindStringSubmatchIndex(content)
+ if len(m) < 8 {
+ return false, nil
+ }
+
+ return true, &RenderizableReference{
+ Owner: content[m[2]:m[3]],
+ Name: content[m[4]:m[5]],
+ CommitSha: content[m[6]:m[7]],
+ RefLocation: &RefSpan{Start: m[2], End: m[7]},
+ }
+}
+
+// FindRenderizableReferenceRegexp returns the first regexp unvalidated references found in a string.
+func FindRenderizableReferenceRegexp(content string, pattern *regexp.Regexp) (bool, *RenderizableReference) {
+ match := pattern.FindStringSubmatchIndex(content)
+ if len(match) < 4 {
+ return false, nil
+ }
+
+ action, location := findActionKeywords([]byte(content), match[2])
+
+ return true, &RenderizableReference{
+ Issue: content[match[2]:match[3]],
+ RefLocation: &RefSpan{Start: match[0], End: match[1]},
+ Action: action,
+ ActionLocation: location,
+ IsPull: false,
+ }
+}
+
+// FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
+func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {
+ match := issueAlphanumericPattern.FindStringSubmatchIndex(content)
+ if match == nil {
+ return false, nil
+ }
+
+ action, location := findActionKeywords([]byte(content), match[2])
+
+ return true, &RenderizableReference{
+ Issue: content[match[2]:match[3]],
+ RefLocation: &RefSpan{Start: match[2], End: match[3]},
+ Action: action,
+ ActionLocation: location,
+ IsPull: false,
+ }
+}
+
+// FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
+func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {
+ ret := make([]*rawReference, 0, 10)
+ pos := 0
+
+ // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
+ // trailing spaces (\s#ref,\s), so if we get two consecutive references, the space
+ // from the second reference will be "eaten" by the first one:
+ // ...\s#ref1\s#ref2\s... --> ...`\s#ref1\s`, (not) `#ref2,\s...`
+ for {
+ match := issueNumericPattern.FindSubmatchIndex(content[pos:])
+ if match == nil {
+ break
+ }
+ if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
+ ret = append(ret, ref)
+ }
+ notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
+ if notrail == nil {
+ pos = match[3] + pos
+ } else {
+ pos = match[3] + pos + notrail[1] - notrail[3]
+ }
+ }
+
+ pos = 0
+
+ for {
+ match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:])
+ if match == nil {
+ break
+ }
+ if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
+ ret = append(ret, ref)
+ }
+ notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
+ if notrail == nil {
+ pos = match[3] + pos
+ } else {
+ pos = match[3] + pos + notrail[1] - notrail[3]
+ }
+ }
+
+ localhost := getGiteaHostName()
+ for _, link := range links {
+ if u, err := url.Parse(link); err == nil {
+ // Note: we're not attempting to match the URL scheme (http/https)
+ host := strings.ToLower(u.Host)
+ if host != "" && host != localhost {
+ continue
+ }
+ parts := strings.Split(u.EscapedPath(), "/")
+ // /user/repo/issues/3
+ if len(parts) != 5 || parts[0] != "" {
+ continue
+ }
+ var sep string
+ if parts[3] == "issues" {
+ sep = "#"
+ } else if parts[3] == "pulls" {
+ sep = "!"
+ } else {
+ continue
+ }
+ // Note: closing/reopening keywords not supported with URLs
+ bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4])
+ if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil {
+ ref.refLocation = nil
+ ret = append(ret, ref)
+ }
+ }
+ }
+
+ if len(ret) == 0 {
+ return ret
+ }
+
+ pos = 0
+
+ for {
+ match := timeLogPattern.FindSubmatchIndex(content[pos:])
+ if match == nil {
+ break
+ }
+
+ timeLogEntry := string(content[match[2]+pos+1 : match[3]+pos])
+
+ var f *rawReference
+ for _, ref := range ret {
+ if ref.refLocation != nil && ref.refLocation.End < match[2]+pos && (f == nil || f.refLocation.End < ref.refLocation.End) {
+ f = ref
+ }
+ }
+
+ pos = match[1] + pos
+
+ if f == nil {
+ f = ret[0]
+ }
+
+ if len(f.timeLog) == 0 {
+ f.timeLog = timeLogEntry
+ }
+ }
+
+ return ret
+}
+
+func getCrossReference(content []byte, start, end int, fromLink, prOnly bool) *rawReference {
+ sep := bytes.IndexAny(content[start:end], "#!")
+ if sep < 0 {
+ return nil
+ }
+ isPull := content[start+sep] == '!'
+ if prOnly && !isPull {
+ return nil
+ }
+ repo := string(content[start : start+sep])
+ issue := string(content[start+sep+1 : end])
+ index, err := strconv.ParseInt(issue, 10, 64)
+ if err != nil {
+ return nil
+ }
+ if repo == "" {
+ if fromLink {
+ // Markdown links must specify owner/repo
+ return nil
+ }
+ action, location := findActionKeywords(content, start)
+ return &rawReference{
+ index: index,
+ action: action,
+ issue: issue,
+ isPull: isPull,
+ refLocation: &RefSpan{Start: start, End: end},
+ actionLocation: location,
+ }
+ }
+ parts := strings.Split(strings.ToLower(repo), "/")
+ if len(parts) != 2 {
+ return nil
+ }
+ owner, name := parts[0], parts[1]
+ if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {
+ return nil
+ }
+ action, location := findActionKeywords(content, start)
+ return &rawReference{
+ index: index,
+ owner: owner,
+ name: name,
+ action: action,
+ issue: issue,
+ isPull: isPull,
+ refLocation: &RefSpan{Start: start, End: end},
+ actionLocation: location,
+ }
+}
+
+func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {
+ newKeywords()
+ var m []int
+ if issueCloseKeywordsPat != nil {
+ m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start])
+ if m != nil {
+ return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}
+ }
+ }
+ if issueReopenKeywordsPat != nil {
+ m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])
+ if m != nil {
+ return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}
+ }
+ }
+ return XRefActionNone, nil
+}
+
+// IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved)
+func IsXrefActionable(ref *RenderizableReference, extTracker bool) bool {
+ if extTracker {
+ // External issues cannot be automatically closed
+ return false
+ }
+ return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens
+}