diff options
author | Daniel Baumann <daniel@debian.org> | 2024-10-18 20:33:49 +0200 |
---|---|---|
committer | Daniel Baumann <daniel@debian.org> | 2024-12-12 23:57:56 +0100 |
commit | e68b9d00a6e05b3a941f63ffb696f91e554ac5ec (patch) | |
tree | 97775d6c13b0f416af55314eb6a89ef792474615 /modules/markup/common | |
parent | Initial commit. (diff) | |
download | forgejo-e68b9d00a6e05b3a941f63ffb696f91e554ac5ec.tar.xz forgejo-e68b9d00a6e05b3a941f63ffb696f91e554ac5ec.zip |
Adding upstream version 9.0.3.
Signed-off-by: Daniel Baumann <daniel@debian.org>
Diffstat (limited to '')
-rw-r--r-- | modules/markup/common/footnote.go | 498 | ||||
-rw-r--r-- | modules/markup/common/footnote_test.go | 62 | ||||
-rw-r--r-- | modules/markup/common/html.go | 16 | ||||
-rw-r--r-- | modules/markup/common/linkify.go | 153 |
4 files changed, 729 insertions, 0 deletions
diff --git a/modules/markup/common/footnote.go b/modules/markup/common/footnote.go new file mode 100644 index 0000000..0e75e2a --- /dev/null +++ b/modules/markup/common/footnote.go @@ -0,0 +1,498 @@ +// Copyright 2019 Yusuke Inuzuka +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +// Most of what follows is a subtly changed version of github.com/yuin/goldmark/extension/footnote.go + +package common + +import ( + "bytes" + "fmt" + "strconv" + "unicode" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +// CleanValue will clean a value to make it safe to be an id +// This function is quite different from the original goldmark function +// and more closely matches the output from the shurcooL sanitizer +// In particular Unicode letters and numbers are a lot more than a-zA-Z0-9... +func CleanValue(value []byte) []byte { + value = bytes.TrimSpace(value) + rs := bytes.Runes(value) + result := make([]rune, 0, len(rs)) + needsDash := false + for _, r := range rs { + switch { + case unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_': + if needsDash && len(result) > 0 { + result = append(result, '-') + } + needsDash = false + result = append(result, unicode.ToLower(r)) + default: + needsDash = true + } + } + return []byte(string(result)) +} + +// Most of what follows is a subtly changed version of github.com/yuin/goldmark/extension/footnote.go + +// A FootnoteLink struct represents a link to a footnote of Markdown +// (PHP Markdown Extra) text. +type FootnoteLink struct { + ast.BaseInline + Index int + Name []byte +} + +// Dump implements Node.Dump. +func (n *FootnoteLink) Dump(source []byte, level int) { + m := map[string]string{} + m["Index"] = fmt.Sprintf("%v", n.Index) + m["Name"] = fmt.Sprintf("%v", n.Name) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnoteLink is a NodeKind of the FootnoteLink node. +var KindFootnoteLink = ast.NewNodeKind("GiteaFootnoteLink") + +// Kind implements Node.Kind. +func (n *FootnoteLink) Kind() ast.NodeKind { + return KindFootnoteLink +} + +// NewFootnoteLink returns a new FootnoteLink node. +func NewFootnoteLink(index int, name []byte) *FootnoteLink { + return &FootnoteLink{ + Index: index, + Name: name, + } +} + +// A FootnoteBackLink struct represents a link to a footnote of Markdown +// (PHP Markdown Extra) text. +type FootnoteBackLink struct { + ast.BaseInline + Index int + Name []byte +} + +// Dump implements Node.Dump. +func (n *FootnoteBackLink) Dump(source []byte, level int) { + m := map[string]string{} + m["Index"] = fmt.Sprintf("%v", n.Index) + m["Name"] = fmt.Sprintf("%v", n.Name) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnoteBackLink is a NodeKind of the FootnoteBackLink node. +var KindFootnoteBackLink = ast.NewNodeKind("GiteaFootnoteBackLink") + +// Kind implements Node.Kind. +func (n *FootnoteBackLink) Kind() ast.NodeKind { + return KindFootnoteBackLink +} + +// NewFootnoteBackLink returns a new FootnoteBackLink node. +func NewFootnoteBackLink(index int, name []byte) *FootnoteBackLink { + return &FootnoteBackLink{ + Index: index, + Name: name, + } +} + +// A Footnote struct represents a footnote of Markdown +// (PHP Markdown Extra) text. +type Footnote struct { + ast.BaseBlock + Ref []byte + Index int + Name []byte +} + +// Dump implements Node.Dump. +func (n *Footnote) Dump(source []byte, level int) { + m := map[string]string{} + m["Index"] = strconv.Itoa(n.Index) + m["Ref"] = string(n.Ref) + m["Name"] = string(n.Name) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnote is a NodeKind of the Footnote node. +var KindFootnote = ast.NewNodeKind("GiteaFootnote") + +// Kind implements Node.Kind. +func (n *Footnote) Kind() ast.NodeKind { + return KindFootnote +} + +// NewFootnote returns a new Footnote node. +func NewFootnote(ref []byte) *Footnote { + return &Footnote{ + Ref: ref, + Index: -1, + Name: ref, + } +} + +// A FootnoteList struct represents footnotes of Markdown +// (PHP Markdown Extra) text. +type FootnoteList struct { + ast.BaseBlock + Count int +} + +// Dump implements Node.Dump. +func (n *FootnoteList) Dump(source []byte, level int) { + m := map[string]string{} + m["Count"] = fmt.Sprintf("%v", n.Count) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnoteList is a NodeKind of the FootnoteList node. +var KindFootnoteList = ast.NewNodeKind("GiteaFootnoteList") + +// Kind implements Node.Kind. +func (n *FootnoteList) Kind() ast.NodeKind { + return KindFootnoteList +} + +// NewFootnoteList returns a new FootnoteList node. +func NewFootnoteList() *FootnoteList { + return &FootnoteList{ + Count: 0, + } +} + +var footnoteListKey = parser.NewContextKey() + +type footnoteBlockParser struct{} + +var defaultFootnoteBlockParser = &footnoteBlockParser{} + +// NewFootnoteBlockParser returns a new parser.BlockParser that can parse +// footnotes of the Markdown(PHP Markdown Extra) text. +func NewFootnoteBlockParser() parser.BlockParser { + return defaultFootnoteBlockParser +} + +func (b *footnoteBlockParser) Trigger() []byte { + return []byte{'['} +} + +func (b *footnoteBlockParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) { + line, segment := reader.PeekLine() + pos := pc.BlockOffset() + if pos < 0 || line[pos] != '[' { + return nil, parser.NoChildren + } + pos++ + if pos > len(line)-1 || line[pos] != '^' { + return nil, parser.NoChildren + } + open := pos + 1 + closure := util.FindClosure(line[pos+1:], '[', ']', false, false) //nolint + closes := pos + 1 + closure + next := closes + 1 + if closure > -1 { + if next >= len(line) || line[next] != ':' { + return nil, parser.NoChildren + } + } else { + return nil, parser.NoChildren + } + padding := segment.Padding + label := reader.Value(text.NewSegment(segment.Start+open-padding, segment.Start+closes-padding)) + if util.IsBlank(label) { + return nil, parser.NoChildren + } + item := NewFootnote(label) + + pos = next + 1 - padding + if pos >= len(line) { + reader.Advance(pos) + return item, parser.NoChildren + } + reader.AdvanceAndSetPadding(pos, padding) + return item, parser.HasChildren +} + +func (b *footnoteBlockParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State { + line, _ := reader.PeekLine() + if util.IsBlank(line) { + return parser.Continue | parser.HasChildren + } + childpos, padding := util.IndentPosition(line, reader.LineOffset(), 4) + if childpos < 0 { + return parser.Close + } + reader.AdvanceAndSetPadding(childpos, padding) + return parser.Continue | parser.HasChildren +} + +func (b *footnoteBlockParser) Close(node ast.Node, reader text.Reader, pc parser.Context) { + var list *FootnoteList + if tlist := pc.Get(footnoteListKey); tlist != nil { + list = tlist.(*FootnoteList) + } else { + list = NewFootnoteList() + pc.Set(footnoteListKey, list) + node.Parent().InsertBefore(node.Parent(), node, list) + } + node.Parent().RemoveChild(node.Parent(), node) + list.AppendChild(list, node) +} + +func (b *footnoteBlockParser) CanInterruptParagraph() bool { + return true +} + +func (b *footnoteBlockParser) CanAcceptIndentedLine() bool { + return false +} + +type footnoteParser struct{} + +var defaultFootnoteParser = &footnoteParser{} + +// NewFootnoteParser returns a new parser.InlineParser that can parse +// footnote links of the Markdown(PHP Markdown Extra) text. +func NewFootnoteParser() parser.InlineParser { + return defaultFootnoteParser +} + +func (s *footnoteParser) Trigger() []byte { + // footnote syntax probably conflict with the image syntax. + // So we need trigger this parser with '!'. + return []byte{'!', '['} +} + +func (s *footnoteParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + line, segment := block.PeekLine() + pos := 1 + if len(line) > 0 && line[0] == '!' { + pos++ + } + if pos >= len(line) || line[pos] != '^' { + return nil + } + pos++ + if pos >= len(line) { + return nil + } + open := pos + closure := util.FindClosure(line[pos:], '[', ']', false, false) //nolint + if closure < 0 { + return nil + } + closes := pos + closure + value := block.Value(text.NewSegment(segment.Start+open, segment.Start+closes)) + block.Advance(closes + 1) + + var list *FootnoteList + if tlist := pc.Get(footnoteListKey); tlist != nil { + list = tlist.(*FootnoteList) + } + if list == nil { + return nil + } + index := 0 + name := []byte{} + for def := list.FirstChild(); def != nil; def = def.NextSibling() { + d := def.(*Footnote) + if bytes.Equal(d.Ref, value) { + if d.Index < 0 { + list.Count++ + d.Index = list.Count + val := CleanValue(d.Name) + if len(val) == 0 { + val = []byte(strconv.Itoa(d.Index)) + } + d.Name = pc.IDs().Generate(val, KindFootnote) + } + index = d.Index + name = d.Name + break + } + } + if index == 0 { + return nil + } + + return NewFootnoteLink(index, name) +} + +type footnoteASTTransformer struct{} + +var defaultFootnoteASTTransformer = &footnoteASTTransformer{} + +// NewFootnoteASTTransformer returns a new parser.ASTTransformer that +// insert a footnote list to the last of the document. +func NewFootnoteASTTransformer() parser.ASTTransformer { + return defaultFootnoteASTTransformer +} + +func (a *footnoteASTTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { + var list *FootnoteList + if tlist := pc.Get(footnoteListKey); tlist != nil { + list = tlist.(*FootnoteList) + } else { + return + } + pc.Set(footnoteListKey, nil) + for footnote := list.FirstChild(); footnote != nil; { + container := footnote + next := footnote.NextSibling() + if fc := container.LastChild(); fc != nil && ast.IsParagraph(fc) { + container = fc + } + footnoteNode := footnote.(*Footnote) + index := footnoteNode.Index + name := footnoteNode.Name + if index < 0 { + list.RemoveChild(list, footnote) + } else { + container.AppendChild(container, NewFootnoteBackLink(index, name)) + } + footnote = next + } + list.SortChildren(func(n1, n2 ast.Node) int { + if n1.(*Footnote).Index < n2.(*Footnote).Index { + return -1 + } + return 1 + }) + if list.Count <= 0 { + list.Parent().RemoveChild(list.Parent(), list) + return + } + + node.AppendChild(node, list) +} + +// FootnoteHTMLRenderer is a renderer.NodeRenderer implementation that +// renders FootnoteLink nodes. +type FootnoteHTMLRenderer struct { + html.Config +} + +// NewFootnoteHTMLRenderer returns a new FootnoteHTMLRenderer. +func NewFootnoteHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { + r := &FootnoteHTMLRenderer{ + Config: html.NewConfig(), + } + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. +func (r *FootnoteHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(KindFootnoteLink, r.renderFootnoteLink) + reg.Register(KindFootnoteBackLink, r.renderFootnoteBackLink) + reg.Register(KindFootnote, r.renderFootnote) + reg.Register(KindFootnoteList, r.renderFootnoteList) +} + +func (r *FootnoteHTMLRenderer) renderFootnoteLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if entering { + n := node.(*FootnoteLink) + is := strconv.Itoa(n.Index) + _, _ = w.WriteString(`<sup id="fnref:`) + _, _ = w.Write(n.Name) + _, _ = w.WriteString(`"><a href="#fn:`) + _, _ = w.Write(n.Name) + _, _ = w.WriteString(`" class="footnote-ref" role="doc-noteref">`) + _, _ = w.WriteString(is) + _, _ = w.WriteString(`</a></sup>`) + } + return ast.WalkContinue, nil +} + +func (r *FootnoteHTMLRenderer) renderFootnoteBackLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if entering { + n := node.(*FootnoteBackLink) + _, _ = w.WriteString(` <a href="#fnref:`) + _, _ = w.Write(n.Name) + _, _ = w.WriteString(`" class="footnote-backref" role="doc-backlink">`) + _, _ = w.WriteString("↩︎") + _, _ = w.WriteString(`</a>`) + } + return ast.WalkContinue, nil +} + +func (r *FootnoteHTMLRenderer) renderFootnote(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*Footnote) + if entering { + _, _ = w.WriteString(`<li id="fn:`) + _, _ = w.Write(n.Name) + _, _ = w.WriteString(`" role="doc-endnote"`) + if node.Attributes() != nil { + html.RenderAttributes(w, node, html.ListItemAttributeFilter) + } + _, _ = w.WriteString(">\n") + } else { + _, _ = w.WriteString("</li>\n") + } + return ast.WalkContinue, nil +} + +func (r *FootnoteHTMLRenderer) renderFootnoteList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + tag := "div" + if entering { + _, _ = w.WriteString("<") + _, _ = w.WriteString(tag) + _, _ = w.WriteString(` class="footnotes" role="doc-endnotes"`) + if node.Attributes() != nil { + html.RenderAttributes(w, node, html.GlobalAttributeFilter) + } + _ = w.WriteByte('>') + if r.Config.XHTML { + _, _ = w.WriteString("\n<hr />\n") + } else { + _, _ = w.WriteString("\n<hr>\n") + } + _, _ = w.WriteString("<ol>\n") + } else { + _, _ = w.WriteString("</ol>\n") + _, _ = w.WriteString("</") + _, _ = w.WriteString(tag) + _, _ = w.WriteString(">\n") + } + return ast.WalkContinue, nil +} + +type footnoteExtension struct{} + +// FootnoteExtension represents the Gitea Footnote +var FootnoteExtension = &footnoteExtension{} + +// Extend extends the markdown converter with the Gitea Footnote parser +func (e *footnoteExtension) Extend(m goldmark.Markdown) { + m.Parser().AddOptions( + parser.WithBlockParsers( + util.Prioritized(NewFootnoteBlockParser(), 999), + ), + parser.WithInlineParsers( + util.Prioritized(NewFootnoteParser(), 101), + ), + parser.WithASTTransformers( + util.Prioritized(NewFootnoteASTTransformer(), 999), + ), + ) + m.Renderer().AddOptions(renderer.WithNodeRenderers( + util.Prioritized(NewFootnoteHTMLRenderer(), 500), + )) +} diff --git a/modules/markup/common/footnote_test.go b/modules/markup/common/footnote_test.go new file mode 100644 index 0000000..62763c5 --- /dev/null +++ b/modules/markup/common/footnote_test.go @@ -0,0 +1,62 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// Copyright 2023 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: MIT +package common + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCleanValue(t *testing.T) { + tests := []struct { + param string + expect string + }{ + // Github behavior test cases + {"", ""}, + {"test.0.1", "test-0-1"}, + {"test(0)", "test-0"}, + {"test!1", "test-1"}, + {"test:2", "test-2"}, + {"test*3", "test-3"}, + {"test!4", "test-4"}, + {"test:5", "test-5"}, + {"test*6", "test-6"}, + {"test:6 a", "test-6-a"}, + {"test:6 !b", "test-6-b"}, + {"test:ad # df", "test-ad-df"}, + {"test:ad #23 df 2*/*", "test-ad-23-df-2"}, + {"test:ad 23 df 2*/*", "test-ad-23-df-2"}, + {"test:ad # 23 df 2*/*", "test-ad-23-df-2"}, + {"Anchors in Markdown", "anchors-in-markdown"}, + {"a_b_c", "a_b_c"}, + {"a-b-c", "a-b-c"}, + {"a-b-c----", "a-b-c"}, + {"test:6a", "test-6a"}, + {"test:a6", "test-a6"}, + {"tes a a a a", "tes-a-a-a-a"}, + {" tes a a a a ", "tes-a-a-a-a"}, + {"Header with \"double quotes\"", "header-with-double-quotes"}, + {"Placeholder to force scrolling on link's click", "placeholder-to-force-scrolling-on-link-s-click"}, + {"tes()", "tes"}, + {"tes(0)", "tes-0"}, + {"tes{0}", "tes-0"}, + {"tes[0]", "tes-0"}, + {"test【0】", "test-0"}, + {"tes…@a", "tes-a"}, + {"tes¥& a", "tes-a"}, + {"tes= a", "tes-a"}, + {"tes|a", "tes-a"}, + {"tes\\a", "tes-a"}, + {"tes/a", "tes-a"}, + {"a啊啊b", "a啊啊b"}, + {"c🤔️🤔️d", "c-d"}, + {"a⚡a", "a-a"}, + {"e.~f", "e-f"}, + } + for _, test := range tests { + assert.Equal(t, []byte(test.expect), CleanValue([]byte(test.param)), test.param) + } +} diff --git a/modules/markup/common/html.go b/modules/markup/common/html.go new file mode 100644 index 0000000..5658839 --- /dev/null +++ b/modules/markup/common/html.go @@ -0,0 +1,16 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package common + +import ( + "mvdan.cc/xurls/v2" +) + +// NOTE: All below regex matching do not perform any extra validation. +// Thus a link is produced even if the linked entity does not exist. +// While fast, this is also incorrect and lead to false positives. +// TODO: fix invalid linking issue + +// LinkRegex is a regexp matching a valid link +var LinkRegex, _ = xurls.StrictMatchingScheme("https?://") diff --git a/modules/markup/common/linkify.go b/modules/markup/common/linkify.go new file mode 100644 index 0000000..f846802 --- /dev/null +++ b/modules/markup/common/linkify.go @@ -0,0 +1,153 @@ +// Copyright 2019 Yusuke Inuzuka +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +// Most of this file is a subtly changed version of github.com/yuin/goldmark/extension/linkify.go + +package common + +import ( + "bytes" + "regexp" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) + +type linkifyParser struct{} + +var defaultLinkifyParser = &linkifyParser{} + +// NewLinkifyParser return a new InlineParser can parse +// text that seems like a URL. +func NewLinkifyParser() parser.InlineParser { + return defaultLinkifyParser +} + +func (s *linkifyParser) Trigger() []byte { + // ' ' indicates any white spaces and a line head + return []byte{' ', '*', '_', '~', '('} +} + +var ( + protoHTTP = []byte("http:") + protoHTTPS = []byte("https:") + protoFTP = []byte("ftp:") + domainWWW = []byte("www.") +) + +func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + if pc.IsInLinkLabel() { + return nil + } + line, segment := block.PeekLine() + consumes := 0 + start := segment.Start + c := line[0] + // advance if current position is not a line head. + if c == ' ' || c == '*' || c == '_' || c == '~' || c == '(' { + consumes++ + start++ + line = line[1:] + } + + var m []int + var protocol []byte + typ := ast.AutoLinkURL + if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { + m = LinkRegex.FindSubmatchIndex(line) + } + if m == nil && bytes.HasPrefix(line, domainWWW) { + m = wwwURLRegxp.FindSubmatchIndex(line) + protocol = []byte("http") + } + if m != nil { + lastChar := line[m[1]-1] + if lastChar == '.' { + m[1]-- + } else if lastChar == ')' { + closing := 0 + for i := m[1] - 1; i >= m[0]; i-- { + if line[i] == ')' { + closing++ + } else if line[i] == '(' { + closing-- + } + } + if closing > 0 { + m[1] -= closing + } + } else if lastChar == ';' { + i := m[1] - 2 + for ; i >= m[0]; i-- { + if util.IsAlphaNumeric(line[i]) { + continue + } + break + } + if i != m[1]-2 { + if line[i] == '&' { + m[1] -= m[1] - i + } + } + } + } + if m == nil { + if len(line) > 0 && util.IsPunct(line[0]) { + return nil + } + typ = ast.AutoLinkEmail + stop := util.FindEmailIndex(line) + if stop < 0 { + return nil + } + at := bytes.IndexByte(line, '@') + m = []int{0, stop, at, stop - 1} + if bytes.IndexByte(line[m[2]:m[3]], '.') < 0 { + return nil + } + lastChar := line[m[1]-1] + if lastChar == '.' { + m[1]-- + } + if m[1] < len(line) { + nextChar := line[m[1]] + if nextChar == '-' || nextChar == '_' { + return nil + } + } + } + + if consumes != 0 { + s := segment.WithStop(segment.Start + 1) + ast.MergeOrAppendTextSegment(parent, s) + } + consumes += m[1] + block.Advance(consumes) + n := ast.NewTextSegment(text.NewSegment(start, start+m[1])) + link := ast.NewAutoLink(typ, n) + link.Protocol = protocol + return link +} + +func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) { + // nothing to do +} + +type linkify struct{} + +// Linkify is an extension that allow you to parse text that seems like a URL. +var Linkify = &linkify{} + +func (e *linkify) Extend(m goldmark.Markdown) { + m.Parser().AddOptions( + parser.WithInlineParsers( + util.Prioritized(NewLinkifyParser(), 999), + ), + ) +} |