From dd136858f1ea40ad3c94191d647487fa4f31926c Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel@debian.org>
Date: Fri, 18 Oct 2024 20:33:49 +0200
Subject: Adding upstream version 9.0.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
---
 modules/highlight/highlight.go      | 224 ++++++++++++++++++++++++++++++++++++
 modules/highlight/highlight_test.go | 190 ++++++++++++++++++++++++++++++
 2 files changed, 414 insertions(+)
 create mode 100644 modules/highlight/highlight.go
 create mode 100644 modules/highlight/highlight_test.go

(limited to 'modules/highlight')

diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go
new file mode 100644
index 0000000..bd6137d
--- /dev/null
+++ b/modules/highlight/highlight.go
@@ -0,0 +1,224 @@
+// Copyright 2015 The Gogs Authors. All rights reserved.
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package highlight
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	gohtml "html"
+	"html/template"
+	"io"
+	"path/filepath"
+	"strings"
+	"sync"
+
+	"code.gitea.io/gitea/modules/analyze"
+	"code.gitea.io/gitea/modules/log"
+	"code.gitea.io/gitea/modules/setting"
+	"code.gitea.io/gitea/modules/util"
+
+	"github.com/alecthomas/chroma/v2"
+	"github.com/alecthomas/chroma/v2/formatters/html"
+	"github.com/alecthomas/chroma/v2/lexers"
+	"github.com/alecthomas/chroma/v2/styles"
+	lru "github.com/hashicorp/golang-lru/v2"
+)
+
+// don't index files larger than this many bytes for performance purposes
+const sizeLimit = 1024 * 1024
+
+var (
+	// For custom user mapping
+	highlightMapping = map[string]string{}
+
+	once sync.Once
+
+	cache *lru.TwoQueueCache[string, any]
+
+	githubStyles = styles.Get("github")
+)
+
+// NewContext loads custom highlight map from local config
+func NewContext() {
+	once.Do(func() {
+		highlightMapping = setting.GetHighlightMapping()
+
+		// The size 512 is simply a conservative rule of thumb
+		c, err := lru.New2Q[string, any](512)
+		if err != nil {
+			panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err))
+		}
+		cache = c
+	})
+}
+
+// Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
+func Code(fileName, language, code string) (output template.HTML, lexerName string) {
+	NewContext()
+
+	// diff view newline will be passed as empty, change to literal '\n' so it can be copied
+	// preserve literal newline in blame view
+	if code == "" || code == "\n" {
+		return "\n", ""
+	}
+
+	if len(code) > sizeLimit {
+		return template.HTML(template.HTMLEscapeString(code)), ""
+	}
+
+	var lexer chroma.Lexer
+
+	if len(language) > 0 {
+		lexer = lexers.Get(language)
+
+		if lexer == nil {
+			// Attempt stripping off the '?'
+			if idx := strings.IndexByte(language, '?'); idx > 0 {
+				lexer = lexers.Get(language[:idx])
+			}
+		}
+	}
+
+	if lexer == nil {
+		if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
+			// use mapped value to find lexer
+			lexer = lexers.Get(val)
+		}
+	}
+
+	if lexer == nil {
+		if l, ok := cache.Get(fileName); ok {
+			lexer = l.(chroma.Lexer)
+		}
+	}
+
+	if lexer == nil {
+		lexer = lexers.Match(strings.ToLower(fileName))
+		if lexer == nil {
+			lexer = lexers.Fallback
+		}
+		cache.Add(fileName, lexer)
+	}
+
+	return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name)
+}
+
+// CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
+func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
+	formatter := html.New(html.WithClasses(true),
+		html.WithLineNumbers(false),
+		html.PreventSurroundingPre(true),
+	)
+
+	htmlbuf := bytes.Buffer{}
+	htmlw := bufio.NewWriter(&htmlbuf)
+
+	iterator, err := lexer.Tokenise(nil, code)
+	if err != nil {
+		log.Error("Can't tokenize code: %v", err)
+		return template.HTML(template.HTMLEscapeString(code))
+	}
+	// style not used for live site but need to pass something
+	err = formatter.Format(htmlw, githubStyles, iterator)
+	if err != nil {
+		log.Error("Can't format code: %v", err)
+		return template.HTML(template.HTMLEscapeString(code))
+	}
+
+	_ = htmlw.Flush()
+	// Chroma will add newlines for certain lexers in order to highlight them properly
+	// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
+	return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
+}
+
+// File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
+func File(fileName, language string, code []byte) ([]template.HTML, string, error) {
+	NewContext()
+
+	if len(code) > sizeLimit {
+		return PlainText(code), "", nil
+	}
+
+	formatter := html.New(html.WithClasses(true),
+		html.WithLineNumbers(false),
+		html.PreventSurroundingPre(true),
+	)
+
+	var lexer chroma.Lexer
+
+	// provided language overrides everything
+	if language != "" {
+		lexer = lexers.Get(language)
+	}
+
+	if lexer == nil {
+		if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
+			lexer = lexers.Get(val)
+		}
+	}
+
+	if lexer == nil {
+		guessLanguage := analyze.GetCodeLanguage(fileName, code)
+
+		lexer = lexers.Get(guessLanguage)
+		if lexer == nil {
+			lexer = lexers.Match(strings.ToLower(fileName))
+			if lexer == nil {
+				lexer = lexers.Fallback
+			}
+		}
+	}
+
+	lexerName := formatLexerName(lexer.Config().Name)
+
+	iterator, err := lexer.Tokenise(nil, string(code))
+	if err != nil {
+		return nil, "", fmt.Errorf("can't tokenize code: %w", err)
+	}
+
+	tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
+	htmlBuf := &bytes.Buffer{}
+
+	lines := make([]template.HTML, 0, len(tokensLines))
+	for _, tokens := range tokensLines {
+		iterator = chroma.Literator(tokens...)
+		err = formatter.Format(htmlBuf, githubStyles, iterator)
+		if err != nil {
+			return nil, "", fmt.Errorf("can't format code: %w", err)
+		}
+		lines = append(lines, template.HTML(htmlBuf.String()))
+		htmlBuf.Reset()
+	}
+
+	return lines, lexerName, nil
+}
+
+// PlainText returns non-highlighted HTML for code
+func PlainText(code []byte) []template.HTML {
+	r := bufio.NewReader(bytes.NewReader(code))
+	m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
+	for {
+		content, err := r.ReadString('\n')
+		if err != nil && err != io.EOF {
+			log.Error("failed to read string from buffer: %v", err)
+			break
+		}
+		if content == "" && err == io.EOF {
+			break
+		}
+		s := template.HTML(gohtml.EscapeString(content))
+		m = append(m, s)
+	}
+	return m
+}
+
+func formatLexerName(name string) string {
+	if name == "fallback" || name == "plaintext" {
+		return "Text"
+	}
+
+	return util.ToTitleCaseNoLower(name)
+}
diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go
new file mode 100644
index 0000000..03db4d5
--- /dev/null
+++ b/modules/highlight/highlight_test.go
@@ -0,0 +1,190 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package highlight
+
+import (
+	"html/template"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func lines(s string) (out []template.HTML) {
+	// "" => [], "a" => ["a"], "a\n" => ["a\n"], "a\nb" => ["a\n", "b"] (each line always includes EOL "\n" if it exists)
+	out = make([]template.HTML, 0)
+	s = strings.ReplaceAll(strings.ReplaceAll(strings.TrimSpace(s), "\n", ""), `\n`, "\n")
+	for {
+		if p := strings.IndexByte(s, '\n'); p != -1 {
+			out = append(out, template.HTML(s[:p+1]))
+			s = s[p+1:]
+		} else {
+			break
+		}
+	}
+	if s != "" {
+		out = append(out, template.HTML(s))
+	}
+	return out
+}
+
+func TestFile(t *testing.T) {
+	tests := []struct {
+		name      string
+		code      string
+		want      []template.HTML
+		lexerName string
+	}{
+		{
+			name:      "empty.py",
+			code:      "",
+			want:      lines(""),
+			lexerName: "Python",
+		},
+		{
+			name:      "empty.js",
+			code:      "",
+			want:      lines(""),
+			lexerName: "JavaScript",
+		},
+		{
+			name:      "empty.yaml",
+			code:      "",
+			want:      lines(""),
+			lexerName: "YAML",
+		},
+		{
+			name:      "tags.txt",
+			code:      "<>",
+			want:      lines("&lt;&gt;"),
+			lexerName: "Text",
+		},
+		{
+			name:      "tags.py",
+			code:      "<>",
+			want:      lines(`<span class="o">&lt;</span><span class="o">&gt;</span>`),
+			lexerName: "Python",
+		},
+		{
+			name:      "eol-no.py",
+			code:      "a=1",
+			want:      lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>`),
+			lexerName: "Python",
+		},
+		{
+			name:      "eol-newline1.py",
+			code:      "a=1\n",
+			want:      lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n`),
+			lexerName: "Python",
+		},
+		{
+			name: "eol-newline2.py",
+			code: "a=1\n\n",
+			want: lines(`
+<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n
+\n
+			`,
+			),
+			lexerName: "Python",
+		},
+		{
+			name: "empty-line-with-space.py",
+			code: strings.ReplaceAll(strings.TrimSpace(`
+def:
+    a=1
+
+b=''
+{space}
+c=2
+			`), "{space}", "    "),
+			want: lines(`
+<span class="n">def</span><span class="p">:</span>\n
+    <span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n
+\n
+<span class="n">b</span><span class="o">=</span><span class="sa"></span><span class="s1">&#39;</span><span class="s1">&#39;</span>\n
+    \n
+<span class="n">c</span><span class="o">=</span><span class="mi">2</span>`,
+			),
+			lexerName: "Python",
+		},
+		{
+			name:      "DOS.PAS",
+			code:      "",
+			want:      lines(""),
+			lexerName: "ObjectPascal",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			out, lexerName, err := File(tt.name, "", []byte(tt.code))
+			require.NoError(t, err)
+			assert.EqualValues(t, tt.want, out)
+			assert.Equal(t, tt.lexerName, lexerName)
+		})
+	}
+}
+
+func TestPlainText(t *testing.T) {
+	tests := []struct {
+		name string
+		code string
+		want []template.HTML
+	}{
+		{
+			name: "empty.py",
+			code: "",
+			want: lines(""),
+		},
+		{
+			name: "tags.py",
+			code: "<>",
+			want: lines("&lt;&gt;"),
+		},
+		{
+			name: "eol-no.py",
+			code: "a=1",
+			want: lines(`a=1`),
+		},
+		{
+			name: "eol-newline1.py",
+			code: "a=1\n",
+			want: lines(`a=1\n`),
+		},
+		{
+			name: "eol-newline2.py",
+			code: "a=1\n\n",
+			want: lines(`
+a=1\n
+\n
+			`),
+		},
+		{
+			name: "empty-line-with-space.py",
+			code: strings.ReplaceAll(strings.TrimSpace(`
+def:
+    a=1
+
+b=''
+{space}
+c=2
+			`), "{space}", "    "),
+			want: lines(`
+def:\n
+    a=1\n
+\n
+b=&#39;&#39;\n
+    \n
+c=2`),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			out := PlainText([]byte(tt.code))
+			assert.EqualValues(t, tt.want, out)
+		})
+	}
+}
-- 
cgit v1.2.3