diff options
author | Daniel Baumann <daniel@debian.org> | 2024-10-18 20:33:49 +0200 |
---|---|---|
committer | Daniel Baumann <daniel@debian.org> | 2024-12-12 23:57:56 +0100 |
commit | e68b9d00a6e05b3a941f63ffb696f91e554ac5ec (patch) | |
tree | 97775d6c13b0f416af55314eb6a89ef792474615 /modules/references/references.go | |
parent | Initial commit. (diff) | |
download | forgejo-e68b9d00a6e05b3a941f63ffb696f91e554ac5ec.tar.xz forgejo-e68b9d00a6e05b3a941f63ffb696f91e554ac5ec.zip |
Adding upstream version 9.0.3.
Signed-off-by: Daniel Baumann <daniel@debian.org>
Diffstat (limited to '')
-rw-r--r-- | modules/references/references.go | 594 |
1 files changed, 594 insertions, 0 deletions
diff --git a/modules/references/references.go b/modules/references/references.go new file mode 100644 index 0000000..c61d06d --- /dev/null +++ b/modules/references/references.go @@ -0,0 +1,594 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package references + +import ( + "bytes" + "net/url" + "regexp" + "strconv" + "strings" + "sync" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/markup/mdstripper" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/util" +) + +var ( + // validNamePattern performs only the most basic validation for user or repository names + // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters. + validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`) + + // NOTE: All below regex matching do not perform any extra validation. + // Thus a link is produced even if the linked entity does not exist. + // While fast, this is also incorrect and lead to false positives. + // TODO: fix invalid linking issue + + // mentionPattern matches all mentions in the form of "@user" or "@org/team" + mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_]+\/?[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+\/?[0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:'|\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`) + // issueNumericPattern matches string that references to a numeric issue, e.g. #1287 + issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\'|\")([#!][0-9]+)(?:\s|$|\)|\]|\'|\"|[:;,.?!]\s|[:;,.?!]$)`) + // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234 + issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\"|\')([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$)|\"|\')`) + // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository + // e.g. org/repo#12345 + crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) + // crossReferenceCommitPattern matches a string that references a commit in a different repository + // e.g. go-gitea/gitea@d8a994ef, go-gitea/gitea@d8a994ef243349f321568f9e36d5c3f444b99cae (7-40 characters) + crossReferenceCommitPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+)/([0-9a-zA-Z-_\.]+)@([0-9a-f]{7,64})(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) + // spaceTrimmedPattern let's find the trailing space + spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`) + // timeLogPattern matches string for time tracking + timeLogPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@([0-9]+([\.,][0-9]+)?(w|d|m|h))+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) + + issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp + issueKeywordsOnce sync.Once + + giteaHostInit sync.Once + giteaHost string + giteaIssuePullPattern *regexp.Regexp + + actionStrings = []string{ + "none", + "closes", + "reopens", + "neutered", + } +) + +// XRefAction represents the kind of effect a cross reference has once is resolved +type XRefAction int64 + +const ( + // XRefActionNone means the cross-reference is simply a comment + XRefActionNone XRefAction = iota // 0 + // XRefActionCloses means the cross-reference should close an issue if it is resolved + XRefActionCloses // 1 + // XRefActionReopens means the cross-reference should reopen an issue if it is resolved + XRefActionReopens // 2 + // XRefActionNeutered means the cross-reference will no longer affect the source + XRefActionNeutered // 3 +) + +func (a XRefAction) String() string { + return actionStrings[a] +} + +// IssueReference contains an unverified cross-reference to a local issue or pull request +type IssueReference struct { + Index int64 + Owner string + Name string + Action XRefAction + TimeLog string +} + +// RenderizableReference contains an unverified cross-reference to with rendering information +// The IsPull member means that a `!num` reference was used instead of `#num`. +// This kind of reference is used to make pulls available when an external issue tracker +// is used. Otherwise, `#` and `!` are completely interchangeable. +type RenderizableReference struct { + Issue string + Owner string + Name string + CommitSha string + IsPull bool + RefLocation *RefSpan + Action XRefAction + ActionLocation *RefSpan +} + +type rawReference struct { + index int64 + owner string + name string + isPull bool + action XRefAction + issue string + refLocation *RefSpan + actionLocation *RefSpan + timeLog string +} + +func rawToIssueReferenceList(reflist []*rawReference) []IssueReference { + refarr := make([]IssueReference, len(reflist)) + for i, r := range reflist { + refarr[i] = IssueReference{ + Index: r.index, + Owner: r.owner, + Name: r.name, + Action: r.action, + TimeLog: r.timeLog, + } + } + return refarr +} + +// RefSpan is the position where the reference was found within the parsed text +type RefSpan struct { + Start int + End int +} + +func makeKeywordsPat(words []string) *regexp.Regexp { + acceptedWords := parseKeywords(words) + if len(acceptedWords) == 0 { + // Never match + return nil + } + return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`) +} + +func parseKeywords(words []string) []string { + acceptedWords := make([]string, 0, 5) + wordPat := regexp.MustCompile(`^[\pL]+$`) + for _, word := range words { + word = strings.ToLower(strings.TrimSpace(word)) + // Accept Unicode letter class runes (a-z, á, à, ä, ) + if wordPat.MatchString(word) { + acceptedWords = append(acceptedWords, word) + } else { + log.Info("Invalid keyword: %s", word) + } + } + return acceptedWords +} + +func newKeywords() { + issueKeywordsOnce.Do(func() { + // Delay initialization until after the settings module is initialized + doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords) + }) +} + +func doNewKeywords(close, reopen []string) { + issueCloseKeywordsPat = makeKeywordsPat(close) + issueReopenKeywordsPat = makeKeywordsPat(reopen) +} + +// getGiteaHostName returns a normalized string with the local host name, with no scheme or port information +func getGiteaHostName() string { + giteaHostInit.Do(func() { + if uapp, err := url.Parse(setting.AppURL); err == nil { + giteaHost = strings.ToLower(uapp.Host) + giteaIssuePullPattern = regexp.MustCompile( + `(\s|^|\(|\[)` + + regexp.QuoteMeta(strings.TrimSpace(setting.AppURL)) + + `([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` + + `((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) + } else { + giteaHost = "" + giteaIssuePullPattern = nil + } + }) + return giteaHost +} + +// getGiteaIssuePullPattern +func getGiteaIssuePullPattern() *regexp.Regexp { + getGiteaHostName() + return giteaIssuePullPattern +} + +// FindAllMentionsMarkdown matches mention patterns in given content and +// returns a list of found unvalidated user names **not including** the @ prefix. +func FindAllMentionsMarkdown(content string) []string { + bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content)) + locations := FindAllMentionsBytes(bcontent) + mentions := make([]string, len(locations)) + for i, val := range locations { + mentions[i] = string(bcontent[val.Start+1 : val.End]) + } + return mentions +} + +// FindAllMentionsBytes matches mention patterns in given content +// and returns a list of locations for the unvalidated user names, including the @ prefix. +func FindAllMentionsBytes(content []byte) []RefSpan { + // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and + // trailing spaces (\s@mention,\s), so if we get two consecutive references, the space + // from the second reference will be "eaten" by the first one: + // ...\s@mention1\s@mention2\s... --> ...`\s@mention1\s`, (not) `@mention2,\s...` + ret := make([]RefSpan, 0, 5) + pos := 0 + for { + match := mentionPattern.FindSubmatchIndex(content[pos:]) + if match == nil { + break + } + ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos}) + notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos]) + if notrail == nil { + pos = match[3] + pos + } else { + pos = match[3] + pos + notrail[1] - notrail[3] + } + } + return ret +} + +// FindFirstMentionBytes matches the first mention in then given content +// and returns the location of the unvalidated user name, including the @ prefix. +func FindFirstMentionBytes(content []byte) (bool, RefSpan) { + mention := mentionPattern.FindSubmatchIndex(content) + if mention == nil { + return false, RefSpan{} + } + return true, RefSpan{Start: mention[2], End: mention[3]} +} + +// FindAllIssueReferencesMarkdown strips content from markdown markup +// and returns a list of unvalidated references found in it. +func FindAllIssueReferencesMarkdown(content string) []IssueReference { + return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content)) +} + +func findAllIssueReferencesMarkdown(content string) []*rawReference { + bcontent, links := mdstripper.StripMarkdownBytes([]byte(content)) + return findAllIssueReferencesBytes(bcontent, links) +} + +func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) { + // We will iterate through the content, rewrite and simplify full references. + // + // We want to transform something like: + // + // this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo + // https://ourgitea.com/git/owner/repo/pulls/123456789 + // + // Into something like: + // + // this is a #123456789, foo + // !123456789 + + pos := 0 + for { + // re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$) + match := re.FindSubmatchIndex((*contentBytes)[pos:]) + if match == nil { + break + } + // match is a bunch of indices into the content from pos onwards so + // to simplify things let's just add pos to all of the indices in match + for i := range match { + match[i] += pos + } + + // match[0]-match[1] is whole string + // match[2]-match[3] is preamble + + // move the position to the end of the preamble + pos = match[3] + + // match[4]-match[5] is owner/repo + // now copy the owner/repo to end of the preamble + endPos := pos + match[5] - match[4] + copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]]) + + // move the current position to the end of the newly copied owner/repo + pos = endPos + + // Now set the issue/pull marker: + // + // match[6]-match[7] == 'issues' + (*contentBytes)[pos] = '#' + if string((*contentBytes)[match[6]:match[7]]) == "pulls" { + (*contentBytes)[pos] = '!' + } + pos++ + + // Then add the issue/pull number + // + // match[8]-match[9] is the number + endPos = pos + match[9] - match[8] + copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]]) + + // Now copy what's left at the end of the string to the new end position + copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:]) + // now we reset the length + + // our new section has length endPos - match[3] + // our old section has length match[9] - match[3] + *contentBytes = (*contentBytes)[:len(*contentBytes)-match[9]+endPos] + pos = endPos + } +} + +// FindAllIssueReferences returns a list of unvalidated references found in a string. +func FindAllIssueReferences(content string) []IssueReference { + // Need to convert fully qualified html references to local system to #/! short codes + contentBytes := []byte(content) + if re := getGiteaIssuePullPattern(); re != nil { + convertFullHTMLReferencesToShortRefs(re, &contentBytes) + } else { + log.Debug("No GiteaIssuePullPattern pattern") + } + return rawToIssueReferenceList(findAllIssueReferencesBytes(contentBytes, []string{})) +} + +// FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string. +func FindRenderizableReferenceNumeric(content string, prOnly, crossLinkOnly bool) (bool, *RenderizableReference) { + var match []int + if !crossLinkOnly { + match = issueNumericPattern.FindStringSubmatchIndex(content) + } + if match == nil { + if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil { + return false, nil + } + } + r := getCrossReference(util.UnsafeStringToBytes(content), match[2], match[3], false, prOnly) + if r == nil { + return false, nil + } + + return true, &RenderizableReference{ + Issue: r.issue, + Owner: r.owner, + Name: r.name, + IsPull: r.isPull, + RefLocation: r.refLocation, + Action: r.action, + ActionLocation: r.actionLocation, + } +} + +// FindRenderizableCommitCrossReference returns the first unvalidated commit cross reference found in a string. +func FindRenderizableCommitCrossReference(content string) (bool, *RenderizableReference) { + m := crossReferenceCommitPattern.FindStringSubmatchIndex(content) + if len(m) < 8 { + return false, nil + } + + return true, &RenderizableReference{ + Owner: content[m[2]:m[3]], + Name: content[m[4]:m[5]], + CommitSha: content[m[6]:m[7]], + RefLocation: &RefSpan{Start: m[2], End: m[7]}, + } +} + +// FindRenderizableReferenceRegexp returns the first regexp unvalidated references found in a string. +func FindRenderizableReferenceRegexp(content string, pattern *regexp.Regexp) (bool, *RenderizableReference) { + match := pattern.FindStringSubmatchIndex(content) + if len(match) < 4 { + return false, nil + } + + action, location := findActionKeywords([]byte(content), match[2]) + + return true, &RenderizableReference{ + Issue: content[match[2]:match[3]], + RefLocation: &RefSpan{Start: match[0], End: match[1]}, + Action: action, + ActionLocation: location, + IsPull: false, + } +} + +// FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string. +func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) { + match := issueAlphanumericPattern.FindStringSubmatchIndex(content) + if match == nil { + return false, nil + } + + action, location := findActionKeywords([]byte(content), match[2]) + + return true, &RenderizableReference{ + Issue: content[match[2]:match[3]], + RefLocation: &RefSpan{Start: match[2], End: match[3]}, + Action: action, + ActionLocation: location, + IsPull: false, + } +} + +// FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice. +func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference { + ret := make([]*rawReference, 0, 10) + pos := 0 + + // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and + // trailing spaces (\s#ref,\s), so if we get two consecutive references, the space + // from the second reference will be "eaten" by the first one: + // ...\s#ref1\s#ref2\s... --> ...`\s#ref1\s`, (not) `#ref2,\s...` + for { + match := issueNumericPattern.FindSubmatchIndex(content[pos:]) + if match == nil { + break + } + if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil { + ret = append(ret, ref) + } + notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos]) + if notrail == nil { + pos = match[3] + pos + } else { + pos = match[3] + pos + notrail[1] - notrail[3] + } + } + + pos = 0 + + for { + match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:]) + if match == nil { + break + } + if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil { + ret = append(ret, ref) + } + notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos]) + if notrail == nil { + pos = match[3] + pos + } else { + pos = match[3] + pos + notrail[1] - notrail[3] + } + } + + localhost := getGiteaHostName() + for _, link := range links { + if u, err := url.Parse(link); err == nil { + // Note: we're not attempting to match the URL scheme (http/https) + host := strings.ToLower(u.Host) + if host != "" && host != localhost { + continue + } + parts := strings.Split(u.EscapedPath(), "/") + // /user/repo/issues/3 + if len(parts) != 5 || parts[0] != "" { + continue + } + var sep string + if parts[3] == "issues" { + sep = "#" + } else if parts[3] == "pulls" { + sep = "!" + } else { + continue + } + // Note: closing/reopening keywords not supported with URLs + bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4]) + if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil { + ref.refLocation = nil + ret = append(ret, ref) + } + } + } + + if len(ret) == 0 { + return ret + } + + pos = 0 + + for { + match := timeLogPattern.FindSubmatchIndex(content[pos:]) + if match == nil { + break + } + + timeLogEntry := string(content[match[2]+pos+1 : match[3]+pos]) + + var f *rawReference + for _, ref := range ret { + if ref.refLocation != nil && ref.refLocation.End < match[2]+pos && (f == nil || f.refLocation.End < ref.refLocation.End) { + f = ref + } + } + + pos = match[1] + pos + + if f == nil { + f = ret[0] + } + + if len(f.timeLog) == 0 { + f.timeLog = timeLogEntry + } + } + + return ret +} + +func getCrossReference(content []byte, start, end int, fromLink, prOnly bool) *rawReference { + sep := bytes.IndexAny(content[start:end], "#!") + if sep < 0 { + return nil + } + isPull := content[start+sep] == '!' + if prOnly && !isPull { + return nil + } + repo := string(content[start : start+sep]) + issue := string(content[start+sep+1 : end]) + index, err := strconv.ParseInt(issue, 10, 64) + if err != nil { + return nil + } + if repo == "" { + if fromLink { + // Markdown links must specify owner/repo + return nil + } + action, location := findActionKeywords(content, start) + return &rawReference{ + index: index, + action: action, + issue: issue, + isPull: isPull, + refLocation: &RefSpan{Start: start, End: end}, + actionLocation: location, + } + } + parts := strings.Split(strings.ToLower(repo), "/") + if len(parts) != 2 { + return nil + } + owner, name := parts[0], parts[1] + if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) { + return nil + } + action, location := findActionKeywords(content, start) + return &rawReference{ + index: index, + owner: owner, + name: name, + action: action, + issue: issue, + isPull: isPull, + refLocation: &RefSpan{Start: start, End: end}, + actionLocation: location, + } +} + +func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) { + newKeywords() + var m []int + if issueCloseKeywordsPat != nil { + m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start]) + if m != nil { + return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]} + } + } + if issueReopenKeywordsPat != nil { + m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start]) + if m != nil { + return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]} + } + } + return XRefActionNone, nil +} + +// IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved) +func IsXrefActionable(ref *RenderizableReference, extTracker bool) bool { + if extTracker { + // External issues cannot be automatically closed + return false + } + return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens +} |