summaryrefslogtreecommitdiffstats
path: root/modules/highlight
diff options
context:
space:
mode:
Diffstat (limited to 'modules/highlight')
-rw-r--r--modules/highlight/highlight.go224
-rw-r--r--modules/highlight/highlight_test.go190
2 files changed, 414 insertions, 0 deletions
diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go
new file mode 100644
index 0000000..bd6137d
--- /dev/null
+++ b/modules/highlight/highlight.go
@@ -0,0 +1,224 @@
+// Copyright 2015 The Gogs Authors. All rights reserved.
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package highlight
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ gohtml "html"
+ "html/template"
+ "io"
+ "path/filepath"
+ "strings"
+ "sync"
+
+ "code.gitea.io/gitea/modules/analyze"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/util"
+
+ "github.com/alecthomas/chroma/v2"
+ "github.com/alecthomas/chroma/v2/formatters/html"
+ "github.com/alecthomas/chroma/v2/lexers"
+ "github.com/alecthomas/chroma/v2/styles"
+ lru "github.com/hashicorp/golang-lru/v2"
+)
+
+// don't index files larger than this many bytes for performance purposes
+const sizeLimit = 1024 * 1024
+
+var (
+ // For custom user mapping
+ highlightMapping = map[string]string{}
+
+ once sync.Once
+
+ cache *lru.TwoQueueCache[string, any]
+
+ githubStyles = styles.Get("github")
+)
+
+// NewContext loads custom highlight map from local config
+func NewContext() {
+ once.Do(func() {
+ highlightMapping = setting.GetHighlightMapping()
+
+ // The size 512 is simply a conservative rule of thumb
+ c, err := lru.New2Q[string, any](512)
+ if err != nil {
+ panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err))
+ }
+ cache = c
+ })
+}
+
+// Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
+func Code(fileName, language, code string) (output template.HTML, lexerName string) {
+ NewContext()
+
+ // diff view newline will be passed as empty, change to literal '\n' so it can be copied
+ // preserve literal newline in blame view
+ if code == "" || code == "\n" {
+ return "\n", ""
+ }
+
+ if len(code) > sizeLimit {
+ return template.HTML(template.HTMLEscapeString(code)), ""
+ }
+
+ var lexer chroma.Lexer
+
+ if len(language) > 0 {
+ lexer = lexers.Get(language)
+
+ if lexer == nil {
+ // Attempt stripping off the '?'
+ if idx := strings.IndexByte(language, '?'); idx > 0 {
+ lexer = lexers.Get(language[:idx])
+ }
+ }
+ }
+
+ if lexer == nil {
+ if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
+ // use mapped value to find lexer
+ lexer = lexers.Get(val)
+ }
+ }
+
+ if lexer == nil {
+ if l, ok := cache.Get(fileName); ok {
+ lexer = l.(chroma.Lexer)
+ }
+ }
+
+ if lexer == nil {
+ lexer = lexers.Match(strings.ToLower(fileName))
+ if lexer == nil {
+ lexer = lexers.Fallback
+ }
+ cache.Add(fileName, lexer)
+ }
+
+ return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name)
+}
+
+// CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
+func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
+ formatter := html.New(html.WithClasses(true),
+ html.WithLineNumbers(false),
+ html.PreventSurroundingPre(true),
+ )
+
+ htmlbuf := bytes.Buffer{}
+ htmlw := bufio.NewWriter(&htmlbuf)
+
+ iterator, err := lexer.Tokenise(nil, code)
+ if err != nil {
+ log.Error("Can't tokenize code: %v", err)
+ return template.HTML(template.HTMLEscapeString(code))
+ }
+ // style not used for live site but need to pass something
+ err = formatter.Format(htmlw, githubStyles, iterator)
+ if err != nil {
+ log.Error("Can't format code: %v", err)
+ return template.HTML(template.HTMLEscapeString(code))
+ }
+
+ _ = htmlw.Flush()
+ // Chroma will add newlines for certain lexers in order to highlight them properly
+ // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
+ return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
+}
+
+// File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
+func File(fileName, language string, code []byte) ([]template.HTML, string, error) {
+ NewContext()
+
+ if len(code) > sizeLimit {
+ return PlainText(code), "", nil
+ }
+
+ formatter := html.New(html.WithClasses(true),
+ html.WithLineNumbers(false),
+ html.PreventSurroundingPre(true),
+ )
+
+ var lexer chroma.Lexer
+
+ // provided language overrides everything
+ if language != "" {
+ lexer = lexers.Get(language)
+ }
+
+ if lexer == nil {
+ if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
+ lexer = lexers.Get(val)
+ }
+ }
+
+ if lexer == nil {
+ guessLanguage := analyze.GetCodeLanguage(fileName, code)
+
+ lexer = lexers.Get(guessLanguage)
+ if lexer == nil {
+ lexer = lexers.Match(strings.ToLower(fileName))
+ if lexer == nil {
+ lexer = lexers.Fallback
+ }
+ }
+ }
+
+ lexerName := formatLexerName(lexer.Config().Name)
+
+ iterator, err := lexer.Tokenise(nil, string(code))
+ if err != nil {
+ return nil, "", fmt.Errorf("can't tokenize code: %w", err)
+ }
+
+ tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
+ htmlBuf := &bytes.Buffer{}
+
+ lines := make([]template.HTML, 0, len(tokensLines))
+ for _, tokens := range tokensLines {
+ iterator = chroma.Literator(tokens...)
+ err = formatter.Format(htmlBuf, githubStyles, iterator)
+ if err != nil {
+ return nil, "", fmt.Errorf("can't format code: %w", err)
+ }
+ lines = append(lines, template.HTML(htmlBuf.String()))
+ htmlBuf.Reset()
+ }
+
+ return lines, lexerName, nil
+}
+
+// PlainText returns non-highlighted HTML for code
+func PlainText(code []byte) []template.HTML {
+ r := bufio.NewReader(bytes.NewReader(code))
+ m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
+ for {
+ content, err := r.ReadString('\n')
+ if err != nil && err != io.EOF {
+ log.Error("failed to read string from buffer: %v", err)
+ break
+ }
+ if content == "" && err == io.EOF {
+ break
+ }
+ s := template.HTML(gohtml.EscapeString(content))
+ m = append(m, s)
+ }
+ return m
+}
+
+func formatLexerName(name string) string {
+ if name == "fallback" || name == "plaintext" {
+ return "Text"
+ }
+
+ return util.ToTitleCaseNoLower(name)
+}
diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go
new file mode 100644
index 0000000..03db4d5
--- /dev/null
+++ b/modules/highlight/highlight_test.go
@@ -0,0 +1,190 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package highlight
+
+import (
+ "html/template"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func lines(s string) (out []template.HTML) {
+ // "" => [], "a" => ["a"], "a\n" => ["a\n"], "a\nb" => ["a\n", "b"] (each line always includes EOL "\n" if it exists)
+ out = make([]template.HTML, 0)
+ s = strings.ReplaceAll(strings.ReplaceAll(strings.TrimSpace(s), "\n", ""), `\n`, "\n")
+ for {
+ if p := strings.IndexByte(s, '\n'); p != -1 {
+ out = append(out, template.HTML(s[:p+1]))
+ s = s[p+1:]
+ } else {
+ break
+ }
+ }
+ if s != "" {
+ out = append(out, template.HTML(s))
+ }
+ return out
+}
+
+func TestFile(t *testing.T) {
+ tests := []struct {
+ name string
+ code string
+ want []template.HTML
+ lexerName string
+ }{
+ {
+ name: "empty.py",
+ code: "",
+ want: lines(""),
+ lexerName: "Python",
+ },
+ {
+ name: "empty.js",
+ code: "",
+ want: lines(""),
+ lexerName: "JavaScript",
+ },
+ {
+ name: "empty.yaml",
+ code: "",
+ want: lines(""),
+ lexerName: "YAML",
+ },
+ {
+ name: "tags.txt",
+ code: "<>",
+ want: lines("&lt;&gt;"),
+ lexerName: "Text",
+ },
+ {
+ name: "tags.py",
+ code: "<>",
+ want: lines(`<span class="o">&lt;</span><span class="o">&gt;</span>`),
+ lexerName: "Python",
+ },
+ {
+ name: "eol-no.py",
+ code: "a=1",
+ want: lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>`),
+ lexerName: "Python",
+ },
+ {
+ name: "eol-newline1.py",
+ code: "a=1\n",
+ want: lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n`),
+ lexerName: "Python",
+ },
+ {
+ name: "eol-newline2.py",
+ code: "a=1\n\n",
+ want: lines(`
+<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n
+\n
+ `,
+ ),
+ lexerName: "Python",
+ },
+ {
+ name: "empty-line-with-space.py",
+ code: strings.ReplaceAll(strings.TrimSpace(`
+def:
+ a=1
+
+b=''
+{space}
+c=2
+ `), "{space}", " "),
+ want: lines(`
+<span class="n">def</span><span class="p">:</span>\n
+ <span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n
+\n
+<span class="n">b</span><span class="o">=</span><span class="sa"></span><span class="s1">&#39;</span><span class="s1">&#39;</span>\n
+ \n
+<span class="n">c</span><span class="o">=</span><span class="mi">2</span>`,
+ ),
+ lexerName: "Python",
+ },
+ {
+ name: "DOS.PAS",
+ code: "",
+ want: lines(""),
+ lexerName: "ObjectPascal",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ out, lexerName, err := File(tt.name, "", []byte(tt.code))
+ require.NoError(t, err)
+ assert.EqualValues(t, tt.want, out)
+ assert.Equal(t, tt.lexerName, lexerName)
+ })
+ }
+}
+
+func TestPlainText(t *testing.T) {
+ tests := []struct {
+ name string
+ code string
+ want []template.HTML
+ }{
+ {
+ name: "empty.py",
+ code: "",
+ want: lines(""),
+ },
+ {
+ name: "tags.py",
+ code: "<>",
+ want: lines("&lt;&gt;"),
+ },
+ {
+ name: "eol-no.py",
+ code: "a=1",
+ want: lines(`a=1`),
+ },
+ {
+ name: "eol-newline1.py",
+ code: "a=1\n",
+ want: lines(`a=1\n`),
+ },
+ {
+ name: "eol-newline2.py",
+ code: "a=1\n\n",
+ want: lines(`
+a=1\n
+\n
+ `),
+ },
+ {
+ name: "empty-line-with-space.py",
+ code: strings.ReplaceAll(strings.TrimSpace(`
+def:
+ a=1
+
+b=''
+{space}
+c=2
+ `), "{space}", " "),
+ want: lines(`
+def:\n
+ a=1\n
+\n
+b=&#39;&#39;\n
+ \n
+c=2`),
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ out := PlainText([]byte(tt.code))
+ assert.EqualValues(t, tt.want, out)
+ })
+ }
+}