summaryrefslogtreecommitdiffstats
path: root/modules/highlight/highlight.go
diff options
context:
space:
mode:
authorDaniel Baumann <daniel@debian.org>2024-10-18 20:33:49 +0200
committerDaniel Baumann <daniel@debian.org>2024-12-12 23:57:56 +0100
commite68b9d00a6e05b3a941f63ffb696f91e554ac5ec (patch)
tree97775d6c13b0f416af55314eb6a89ef792474615 /modules/highlight/highlight.go
parentInitial commit. (diff)
downloadforgejo-e68b9d00a6e05b3a941f63ffb696f91e554ac5ec.tar.xz
forgejo-e68b9d00a6e05b3a941f63ffb696f91e554ac5ec.zip
Adding upstream version 9.0.3.
Signed-off-by: Daniel Baumann <daniel@debian.org>
Diffstat (limited to '')
-rw-r--r--modules/highlight/highlight.go233
1 files changed, 233 insertions, 0 deletions
diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go
new file mode 100644
index 0000000..243aa2b
--- /dev/null
+++ b/modules/highlight/highlight.go
@@ -0,0 +1,233 @@
+// Copyright 2015 The Gogs Authors. All rights reserved.
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package highlight
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ gohtml "html"
+ "html/template"
+ "io"
+ "path/filepath"
+ "strings"
+ "sync"
+
+ "code.gitea.io/gitea/modules/analyze"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/util"
+
+ "github.com/alecthomas/chroma/v2"
+ "github.com/alecthomas/chroma/v2/formatters/html"
+ "github.com/alecthomas/chroma/v2/lexers"
+ "github.com/alecthomas/chroma/v2/styles"
+ lru "github.com/hashicorp/golang-lru/v2"
+)
+
+// don't index files larger than this many bytes for performance purposes
+const sizeLimit = 1024 * 1024
+
+var (
+ // For custom user mapping
+ highlightMapping = map[string]string{}
+
+ once sync.Once
+
+ cache *lru.TwoQueueCache[string, any]
+
+ githubStyles = styles.Get("github")
+)
+
+// NewContext loads custom highlight map from local config
+func NewContext() {
+ once.Do(func() {
+ highlightMapping = setting.GetHighlightMapping()
+
+ // The size 512 is simply a conservative rule of thumb
+ c, err := lru.New2Q[string, any](512)
+ if err != nil {
+ panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err))
+ }
+ cache = c
+ })
+}
+
+// Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
+func Code(fileName, language, code string) (output template.HTML, lexerName string) {
+ NewContext()
+
+ // diff view newline will be passed as empty, change to literal '\n' so it can be copied
+ // preserve literal newline in blame view
+ if code == "" || code == "\n" {
+ return "\n", ""
+ }
+
+ if len(code) > sizeLimit {
+ return template.HTML(template.HTMLEscapeString(code)), ""
+ }
+
+ var lexer chroma.Lexer
+
+ if len(language) > 0 {
+ lexer = lexers.Get(language)
+
+ if lexer == nil {
+ // Attempt stripping off the '?'
+ if idx := strings.IndexByte(language, '?'); idx > 0 {
+ lexer = lexers.Get(language[:idx])
+ }
+ }
+ }
+
+ if lexer == nil {
+ if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
+ // use mapped value to find lexer
+ lexer = lexers.Get(val)
+ }
+ }
+
+ if lexer == nil {
+ if l, ok := cache.Get(fileName); ok {
+ lexer = l.(chroma.Lexer)
+ }
+ }
+
+ if lexer == nil {
+ lexer = lexers.Match(strings.ToLower(fileName))
+ if lexer == nil {
+ lexer = lexers.Fallback
+ }
+ cache.Add(fileName, lexer)
+ }
+
+ return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name)
+}
+
+// CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
+func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
+ formatter := html.New(html.WithClasses(true),
+ html.WithLineNumbers(false),
+ html.PreventSurroundingPre(true),
+ )
+
+ htmlbuf := bytes.Buffer{}
+ htmlw := bufio.NewWriter(&htmlbuf)
+
+ iterator, err := lexer.Tokenise(nil, code)
+ if err != nil {
+ log.Error("Can't tokenize code: %v", err)
+ return template.HTML(template.HTMLEscapeString(code))
+ }
+ // style not used for live site but need to pass something
+ err = formatter.Format(htmlw, githubStyles, iterator)
+ if err != nil {
+ log.Error("Can't format code: %v", err)
+ return template.HTML(template.HTMLEscapeString(code))
+ }
+
+ _ = htmlw.Flush()
+ // Chroma will add newlines for certain lexers in order to highlight them properly
+ // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
+ return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
+}
+
+// For the case where Enry recognizes the language, but doesn't use the naming
+// that Chroma expects.
+var normalizeEnryToChroma = map[string]string{
+ "F#": "FSharp",
+}
+
+// File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
+func File(fileName, language string, code []byte) ([]template.HTML, string, error) {
+ NewContext()
+
+ if len(code) > sizeLimit {
+ return PlainText(code), "", nil
+ }
+
+ formatter := html.New(html.WithClasses(true),
+ html.WithLineNumbers(false),
+ html.PreventSurroundingPre(true),
+ )
+
+ var lexer chroma.Lexer
+
+ // provided language overrides everything
+ if language != "" {
+ lexer = lexers.Get(language)
+ }
+
+ if lexer == nil {
+ if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
+ lexer = lexers.Get(val)
+ }
+ }
+
+ if lexer == nil {
+ guessLanguage := analyze.GetCodeLanguage(fileName, code)
+ if normalizedGuessLanguage, ok := normalizeEnryToChroma[guessLanguage]; ok {
+ guessLanguage = normalizedGuessLanguage
+ }
+
+ lexer = lexers.Get(guessLanguage)
+ if lexer == nil {
+ lexer = lexers.Match(strings.ToLower(fileName))
+ if lexer == nil {
+ lexer = lexers.Fallback
+ }
+ }
+ }
+
+ lexerName := formatLexerName(lexer.Config().Name)
+
+ iterator, err := lexer.Tokenise(nil, string(code))
+ if err != nil {
+ return nil, "", fmt.Errorf("can't tokenize code: %w", err)
+ }
+
+ tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
+ htmlBuf := &bytes.Buffer{}
+
+ lines := make([]template.HTML, 0, len(tokensLines))
+ for _, tokens := range tokensLines {
+ iterator = chroma.Literator(tokens...)
+ err = formatter.Format(htmlBuf, githubStyles, iterator)
+ if err != nil {
+ return nil, "", fmt.Errorf("can't format code: %w", err)
+ }
+ lines = append(lines, template.HTML(htmlBuf.String()))
+ htmlBuf.Reset()
+ }
+
+ return lines, lexerName, nil
+}
+
+// PlainText returns non-highlighted HTML for code
+func PlainText(code []byte) []template.HTML {
+ r := bufio.NewReader(bytes.NewReader(code))
+ m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
+ for {
+ content, err := r.ReadString('\n')
+ if err != nil && err != io.EOF {
+ log.Error("failed to read string from buffer: %v", err)
+ break
+ }
+ if content == "" && err == io.EOF {
+ break
+ }
+ s := template.HTML(gohtml.EscapeString(content))
+ m = append(m, s)
+ }
+ return m
+}
+
+func formatLexerName(name string) string {
+ if name == "fallback" || name == "plaintext" {
+ return "Text"
+ }
+
+ return util.ToTitleCaseNoLower(name)
+}