diff options
Diffstat (limited to 'modules/highlight')
-rw-r--r-- | modules/highlight/highlight.go | 224 | ||||
-rw-r--r-- | modules/highlight/highlight_test.go | 190 |
2 files changed, 414 insertions, 0 deletions
diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go new file mode 100644 index 0000000..bd6137d --- /dev/null +++ b/modules/highlight/highlight.go @@ -0,0 +1,224 @@ +// Copyright 2015 The Gogs Authors. All rights reserved. +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package highlight + +import ( + "bufio" + "bytes" + "fmt" + gohtml "html" + "html/template" + "io" + "path/filepath" + "strings" + "sync" + + "code.gitea.io/gitea/modules/analyze" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/util" + + "github.com/alecthomas/chroma/v2" + "github.com/alecthomas/chroma/v2/formatters/html" + "github.com/alecthomas/chroma/v2/lexers" + "github.com/alecthomas/chroma/v2/styles" + lru "github.com/hashicorp/golang-lru/v2" +) + +// don't index files larger than this many bytes for performance purposes +const sizeLimit = 1024 * 1024 + +var ( + // For custom user mapping + highlightMapping = map[string]string{} + + once sync.Once + + cache *lru.TwoQueueCache[string, any] + + githubStyles = styles.Get("github") +) + +// NewContext loads custom highlight map from local config +func NewContext() { + once.Do(func() { + highlightMapping = setting.GetHighlightMapping() + + // The size 512 is simply a conservative rule of thumb + c, err := lru.New2Q[string, any](512) + if err != nil { + panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err)) + } + cache = c + }) +} + +// Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name +func Code(fileName, language, code string) (output template.HTML, lexerName string) { + NewContext() + + // diff view newline will be passed as empty, change to literal '\n' so it can be copied + // preserve literal newline in blame view + if code == "" || code == "\n" { + return "\n", "" + } + + if len(code) > sizeLimit { + return template.HTML(template.HTMLEscapeString(code)), "" + } + + var lexer chroma.Lexer + + if len(language) > 0 { + lexer = lexers.Get(language) + + if lexer == nil { + // Attempt stripping off the '?' + if idx := strings.IndexByte(language, '?'); idx > 0 { + lexer = lexers.Get(language[:idx]) + } + } + } + + if lexer == nil { + if val, ok := highlightMapping[filepath.Ext(fileName)]; ok { + // use mapped value to find lexer + lexer = lexers.Get(val) + } + } + + if lexer == nil { + if l, ok := cache.Get(fileName); ok { + lexer = l.(chroma.Lexer) + } + } + + if lexer == nil { + lexer = lexers.Match(strings.ToLower(fileName)) + if lexer == nil { + lexer = lexers.Fallback + } + cache.Add(fileName, lexer) + } + + return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name) +} + +// CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes +func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML { + formatter := html.New(html.WithClasses(true), + html.WithLineNumbers(false), + html.PreventSurroundingPre(true), + ) + + htmlbuf := bytes.Buffer{} + htmlw := bufio.NewWriter(&htmlbuf) + + iterator, err := lexer.Tokenise(nil, code) + if err != nil { + log.Error("Can't tokenize code: %v", err) + return template.HTML(template.HTMLEscapeString(code)) + } + // style not used for live site but need to pass something + err = formatter.Format(htmlw, githubStyles, iterator) + if err != nil { + log.Error("Can't format code: %v", err) + return template.HTML(template.HTMLEscapeString(code)) + } + + _ = htmlw.Flush() + // Chroma will add newlines for certain lexers in order to highlight them properly + // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output + return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n")) +} + +// File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name +func File(fileName, language string, code []byte) ([]template.HTML, string, error) { + NewContext() + + if len(code) > sizeLimit { + return PlainText(code), "", nil + } + + formatter := html.New(html.WithClasses(true), + html.WithLineNumbers(false), + html.PreventSurroundingPre(true), + ) + + var lexer chroma.Lexer + + // provided language overrides everything + if language != "" { + lexer = lexers.Get(language) + } + + if lexer == nil { + if val, ok := highlightMapping[filepath.Ext(fileName)]; ok { + lexer = lexers.Get(val) + } + } + + if lexer == nil { + guessLanguage := analyze.GetCodeLanguage(fileName, code) + + lexer = lexers.Get(guessLanguage) + if lexer == nil { + lexer = lexers.Match(strings.ToLower(fileName)) + if lexer == nil { + lexer = lexers.Fallback + } + } + } + + lexerName := formatLexerName(lexer.Config().Name) + + iterator, err := lexer.Tokenise(nil, string(code)) + if err != nil { + return nil, "", fmt.Errorf("can't tokenize code: %w", err) + } + + tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens()) + htmlBuf := &bytes.Buffer{} + + lines := make([]template.HTML, 0, len(tokensLines)) + for _, tokens := range tokensLines { + iterator = chroma.Literator(tokens...) + err = formatter.Format(htmlBuf, githubStyles, iterator) + if err != nil { + return nil, "", fmt.Errorf("can't format code: %w", err) + } + lines = append(lines, template.HTML(htmlBuf.String())) + htmlBuf.Reset() + } + + return lines, lexerName, nil +} + +// PlainText returns non-highlighted HTML for code +func PlainText(code []byte) []template.HTML { + r := bufio.NewReader(bytes.NewReader(code)) + m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1) + for { + content, err := r.ReadString('\n') + if err != nil && err != io.EOF { + log.Error("failed to read string from buffer: %v", err) + break + } + if content == "" && err == io.EOF { + break + } + s := template.HTML(gohtml.EscapeString(content)) + m = append(m, s) + } + return m +} + +func formatLexerName(name string) string { + if name == "fallback" || name == "plaintext" { + return "Text" + } + + return util.ToTitleCaseNoLower(name) +} diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go new file mode 100644 index 0000000..03db4d5 --- /dev/null +++ b/modules/highlight/highlight_test.go @@ -0,0 +1,190 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package highlight + +import ( + "html/template" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func lines(s string) (out []template.HTML) { + // "" => [], "a" => ["a"], "a\n" => ["a\n"], "a\nb" => ["a\n", "b"] (each line always includes EOL "\n" if it exists) + out = make([]template.HTML, 0) + s = strings.ReplaceAll(strings.ReplaceAll(strings.TrimSpace(s), "\n", ""), `\n`, "\n") + for { + if p := strings.IndexByte(s, '\n'); p != -1 { + out = append(out, template.HTML(s[:p+1])) + s = s[p+1:] + } else { + break + } + } + if s != "" { + out = append(out, template.HTML(s)) + } + return out +} + +func TestFile(t *testing.T) { + tests := []struct { + name string + code string + want []template.HTML + lexerName string + }{ + { + name: "empty.py", + code: "", + want: lines(""), + lexerName: "Python", + }, + { + name: "empty.js", + code: "", + want: lines(""), + lexerName: "JavaScript", + }, + { + name: "empty.yaml", + code: "", + want: lines(""), + lexerName: "YAML", + }, + { + name: "tags.txt", + code: "<>", + want: lines("<>"), + lexerName: "Text", + }, + { + name: "tags.py", + code: "<>", + want: lines(`<span class="o"><</span><span class="o">></span>`), + lexerName: "Python", + }, + { + name: "eol-no.py", + code: "a=1", + want: lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>`), + lexerName: "Python", + }, + { + name: "eol-newline1.py", + code: "a=1\n", + want: lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n`), + lexerName: "Python", + }, + { + name: "eol-newline2.py", + code: "a=1\n\n", + want: lines(` +<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n +\n + `, + ), + lexerName: "Python", + }, + { + name: "empty-line-with-space.py", + code: strings.ReplaceAll(strings.TrimSpace(` +def: + a=1 + +b='' +{space} +c=2 + `), "{space}", " "), + want: lines(` +<span class="n">def</span><span class="p">:</span>\n + <span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n +\n +<span class="n">b</span><span class="o">=</span><span class="sa"></span><span class="s1">'</span><span class="s1">'</span>\n + \n +<span class="n">c</span><span class="o">=</span><span class="mi">2</span>`, + ), + lexerName: "Python", + }, + { + name: "DOS.PAS", + code: "", + want: lines(""), + lexerName: "ObjectPascal", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + out, lexerName, err := File(tt.name, "", []byte(tt.code)) + require.NoError(t, err) + assert.EqualValues(t, tt.want, out) + assert.Equal(t, tt.lexerName, lexerName) + }) + } +} + +func TestPlainText(t *testing.T) { + tests := []struct { + name string + code string + want []template.HTML + }{ + { + name: "empty.py", + code: "", + want: lines(""), + }, + { + name: "tags.py", + code: "<>", + want: lines("<>"), + }, + { + name: "eol-no.py", + code: "a=1", + want: lines(`a=1`), + }, + { + name: "eol-newline1.py", + code: "a=1\n", + want: lines(`a=1\n`), + }, + { + name: "eol-newline2.py", + code: "a=1\n\n", + want: lines(` +a=1\n +\n + `), + }, + { + name: "empty-line-with-space.py", + code: strings.ReplaceAll(strings.TrimSpace(` +def: + a=1 + +b='' +{space} +c=2 + `), "{space}", " "), + want: lines(` +def:\n + a=1\n +\n +b=''\n + \n +c=2`), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + out := PlainText([]byte(tt.code)) + assert.EqualValues(t, tt.want, out) + }) + } +} |