summaryrefslogtreecommitdiffstats
path: root/build
diff options
context:
space:
mode:
authorGusted <postmaster@gusted.xyz>2024-10-30 16:59:48 +0100
committer0ko <0ko@noreply.codeberg.org>2024-10-30 16:59:48 +0100
commitdfe3ffc581eb2fb2137aa9f1530d1296bc6a2801 (patch)
tree1cf94462d3460e33a2e4eb2cbd50b45e7316df3a /build
parenti18n: update of translations from Codeberg Translate (#5681) (diff)
downloadforgejo-dfe3ffc581eb2fb2137aa9f1530d1296bc6a2801.tar.xz
forgejo-dfe3ffc581eb2fb2137aa9f1530d1296bc6a2801.zip
feat: harden localization against malicious HTML (#5703)
- Add a new script that proccess the localization files and verify that they only contain HTML according to our strictly defined rules. - This should make adding malicious HTML near-impossible. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/5703 Reviewed-by: 0ko <0ko@noreply.codeberg.org> Co-authored-by: Gusted <postmaster@gusted.xyz> Co-committed-by: Gusted <postmaster@gusted.xyz>
Diffstat (limited to 'build')
-rw-r--r--build/lint-locale.go156
-rw-r--r--build/lint-locale_test.go65
2 files changed, 221 insertions, 0 deletions
diff --git a/build/lint-locale.go b/build/lint-locale.go
new file mode 100644
index 0000000000..d403eaa70d
--- /dev/null
+++ b/build/lint-locale.go
@@ -0,0 +1,156 @@
+// Copyright 2024 The Forgejo Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+//nolint:forbidigo
+package main
+
+import (
+ "fmt"
+ "html"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "regexp"
+ "slices"
+ "strings"
+
+ "github.com/microcosm-cc/bluemonday"
+ "github.com/sergi/go-diff/diffmatchpatch"
+ "gopkg.in/ini.v1" //nolint:depguard
+)
+
+var (
+ policy *bluemonday.Policy
+ tagRemover *strings.Replacer
+ safeURL = "https://TO-BE-REPLACED.COM"
+
+ // Matches href="", href="#", href="%s", href="#%s", href="%[1]s" and href="#%[1]s".
+ placeHolderRegex = regexp.MustCompile(`href="#?(%s|%\[\d\]s)?"`)
+)
+
+func initBlueMondayPolicy() {
+ policy = bluemonday.NewPolicy()
+
+ policy.RequireParseableURLs(true)
+ policy.AllowURLSchemes("https")
+
+ // Only allow safe URL on href.
+ // Only allow target="_blank".
+ // Only allow rel="nopener noreferrer", rel="noopener" and rel="noreferrer".
+ // Only allow placeholder on id and class.
+ policy.AllowAttrs("href").Matching(regexp.MustCompile("^" + regexp.QuoteMeta(safeURL) + "$")).OnElements("a")
+ policy.AllowAttrs("target").Matching(regexp.MustCompile("^_blank$")).OnElements("a")
+ policy.AllowAttrs("rel").Matching(regexp.MustCompile("^(noopener|noreferrer|noopener noreferrer)$")).OnElements("a")
+ policy.AllowAttrs("id", "class").Matching(regexp.MustCompile(`^%s|%\[\d\]s$`)).OnElements("a")
+
+ // Only allow positional placeholder as class.
+ positionalPlaceholderRe := regexp.MustCompile(`^%\[\d\]s$`)
+ policy.AllowAttrs("class").Matching(positionalPlaceholderRe).OnElements("strong")
+ policy.AllowAttrs("id").Matching(positionalPlaceholderRe).OnElements("code")
+
+ // Allowed elements with no attributes. Must be a recognized tagname.
+ policy.AllowElements("strong", "br", "b", "strike", "code", "i")
+
+ // TODO: Remove <c> in `actions.workflow.dispatch.trigger_found`.
+ policy.AllowNoAttrs().OnElements("c")
+}
+
+func initRemoveTags() {
+ oldnew := []string{}
+ for _, el := range []string{
+ "email@example.com", "correu@example.com", "epasts@domens.lv", "email@exemplo.com", "eposta@ornek.com", "email@példa.hu", "email@esempio.it",
+ "user", "utente", "lietotājs", "gebruiker", "usuário", "Benutzer", "Bruker",
+ "server", "servidor", "kiszolgáló", "serveris",
+ "label", "etichetta", "etiķete", "rótulo", "Label", "utilizador",
+ "filename", "bestandsnaam", "dosyaadi", "fails", "nome do arquivo",
+ } {
+ oldnew = append(oldnew, "<"+el+">", "REPLACED-TAG")
+ }
+
+ tagRemover = strings.NewReplacer(oldnew...)
+}
+
+func preprocessTranslationValue(value string) string {
+ // href should be a parsable URL, replace placeholder strings with a safe url.
+ value = placeHolderRegex.ReplaceAllString(value, `href="`+safeURL+`"`)
+
+ // Remove tags that aren't tags but will be parsed as tags. We already know they are safe and sound.
+ value = tagRemover.Replace(value)
+
+ return value
+}
+
+func checkLocaleContent(localeContent []byte) []string {
+ // Same configuration as Forgejo uses.
+ cfg := ini.Empty(ini.LoadOptions{
+ IgnoreContinuation: true,
+ })
+ cfg.NameMapper = ini.SnackCase
+
+ if err := cfg.Append(localeContent); err != nil {
+ panic(err)
+ }
+
+ dmp := diffmatchpatch.New()
+ errors := []string{}
+
+ for _, section := range cfg.Sections() {
+ for _, key := range section.Keys() {
+ var trKey string
+ if section.Name() == "" || section.Name() == "DEFAULT" || section.Name() == "common" {
+ trKey = key.Name()
+ } else {
+ trKey = section.Name() + "." + key.Name()
+ }
+
+ keyValue := preprocessTranslationValue(key.Value())
+
+ if html.UnescapeString(policy.Sanitize(keyValue)) != keyValue {
+ // Create a nice diff of the difference.
+ diffs := dmp.DiffMain(keyValue, html.UnescapeString(policy.Sanitize(keyValue)), false)
+ diffs = dmp.DiffCleanupSemantic(diffs)
+ diffs = dmp.DiffCleanupEfficiency(diffs)
+
+ errors = append(errors, trKey+": "+dmp.DiffPrettyText(diffs))
+ }
+ }
+ }
+ return errors
+}
+
+func main() {
+ initBlueMondayPolicy()
+ initRemoveTags()
+
+ localeDir := filepath.Join("options", "locale")
+ localeFiles, err := os.ReadDir(localeDir)
+ if err != nil {
+ panic(err)
+ }
+
+ if !slices.ContainsFunc(localeFiles, func(e fs.DirEntry) bool { return strings.HasSuffix(e.Name(), ".ini") }) {
+ fmt.Println("No locale files found")
+ os.Exit(1)
+ }
+
+ exitCode := 0
+ for _, localeFile := range localeFiles {
+ if !strings.HasSuffix(localeFile.Name(), ".ini") {
+ continue
+ }
+
+ localeContent, err := os.ReadFile(filepath.Join(localeDir, localeFile.Name()))
+ if err != nil {
+ panic(err)
+ }
+
+ if err := checkLocaleContent(localeContent); len(err) > 0 {
+ fmt.Println(localeFile.Name())
+ fmt.Println(strings.Join(err, "\n"))
+ fmt.Println()
+ exitCode = 1
+ }
+ }
+
+ os.Exit(exitCode)
+}
diff --git a/build/lint-locale_test.go b/build/lint-locale_test.go
new file mode 100644
index 0000000000..b33dc9af2b
--- /dev/null
+++ b/build/lint-locale_test.go
@@ -0,0 +1,65 @@
+// Copyright 2024 The Forgejo Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+package main
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestLocalizationPolicy(t *testing.T) {
+ initBlueMondayPolicy()
+ initRemoveTags()
+
+ t.Run("Remove tags", func(t *testing.T) {
+ assert.Empty(t, checkLocaleContent([]byte(`hidden_comment_types_description = Comment types checked here will not be shown inside issue pages. Checking "Label" for example removes all "<user> added/removed <label>" comments.`)))
+
+ assert.EqualValues(t, []string{"key: \x1b[31m<not-an-allowed-key>\x1b[0m REPLACED-TAG"}, checkLocaleContent([]byte(`key = "<not-an-allowed-key> <label>"`)))
+ assert.EqualValues(t, []string{"key: \x1b[31m<user@example.com>\x1b[0m REPLACED-TAG"}, checkLocaleContent([]byte(`key = "<user@example.com> <email@example.com>"`)))
+ assert.EqualValues(t, []string{"key: \x1b[31m<tag>\x1b[0m REPLACED-TAG \x1b[31m</tag>\x1b[0m"}, checkLocaleContent([]byte(`key = "<tag> <email@example.com> </tag>"`)))
+ })
+
+ t.Run("Specific exception", func(t *testing.T) {
+ assert.Empty(t, checkLocaleContent([]byte(`workflow.dispatch.trigger_found = This workflow has a <c>workflow_dispatch</c> event trigger.`)))
+ assert.Empty(t, checkLocaleContent([]byte(`pulls.title_desc_one = wants to merge %[1]d commit from <code>%[2]s</code> into <code id="%[4]s">%[3]s</code>`)))
+ assert.Empty(t, checkLocaleContent([]byte(`editor.commit_directly_to_this_branch = Commit directly to the <strong class="%[2]s">%[1]s</strong> branch.`)))
+
+ assert.EqualValues(t, []string{"workflow.dispatch.trigger_found: This workflow has a \x1b[31m<d>\x1b[0mworkflow_dispatch\x1b[31m</d>\x1b[0m event trigger."}, checkLocaleContent([]byte(`workflow.dispatch.trigger_found = This workflow has a <d>workflow_dispatch</d> event trigger.`)))
+ assert.EqualValues(t, []string{"key: <code\x1b[31m id=\"branch_targe\"\x1b[0m>%[3]s</code>"}, checkLocaleContent([]byte(`key = <code id="branch_targe">%[3]s</code>`)))
+ assert.EqualValues(t, []string{"key: <a\x1b[31m class=\"ui sh\"\x1b[0m href=\"https://TO-BE-REPLACED.COM\">"}, checkLocaleContent([]byte(`key = <a class="ui sh" href="%[3]s">`)))
+ assert.EqualValues(t, []string{"key: <a\x1b[31m class=\"js-click-me\"\x1b[0m href=\"https://TO-BE-REPLACED.COM\">"}, checkLocaleContent([]byte(`key = <a class="js-click-me" href="%[3]s">`)))
+ assert.EqualValues(t, []string{"key: <strong\x1b[31m class=\"branch-target\"\x1b[0m>%[1]s</strong>"}, checkLocaleContent([]byte(`key = <strong class="branch-target">%[1]s</strong>`)))
+ })
+
+ t.Run("General safe tags", func(t *testing.T) {
+ assert.Empty(t, checkLocaleContent([]byte("error404 = The page you are trying to reach either <strong>does not exist</strong> or <strong>you are not authorized</strong> to view it.")))
+ assert.Empty(t, checkLocaleContent([]byte("teams.specific_repositories_helper = Members will only have access to repositories explicitly added to the team. Selecting this <strong>will not</strong> automatically remove repositories already added with <i>All repositories</i>.")))
+ assert.Empty(t, checkLocaleContent([]byte("sqlite_helper = File path for the SQLite3 database.<br>Enter an absolute path if you run Forgejo as a service.")))
+ assert.Empty(t, checkLocaleContent([]byte("hi_user_x = Hi <b>%s</b>,")))
+
+ assert.EqualValues(t, []string{"error404: The page you are trying to reach either <strong\x1b[31m title='aaa'\x1b[0m>does not exist</strong> or <strong>you are not authorized</strong> to view it."}, checkLocaleContent([]byte("error404 = The page you are trying to reach either <strong title='aaa'>does not exist</strong> or <strong>you are not authorized</strong> to view it.")))
+ })
+
+ t.Run("<a>", func(t *testing.T) {
+ assert.Empty(t, checkLocaleContent([]byte(`admin.new_user.text = Please <a href="%s">click here</a> to manage this user from the admin panel.`)))
+ assert.Empty(t, checkLocaleContent([]byte(`access_token_desc = Selected token permissions limit authorization only to the corresponding <a href="%[1]s" target="_blank">API</a> routes. Read the <a href="%[2]s" target="_blank">documentation</a> for more information.`)))
+ assert.Empty(t, checkLocaleContent([]byte(`webauthn_desc = Security keys are hardware devices containing cryptographic keys. They can be used for two-factor authentication. Security keys must support the <a rel="noreferrer" target="_blank" href="%s">WebAuthn Authenticator</a> standard.`)))
+ assert.Empty(t, checkLocaleContent([]byte("issues.closed_at = `closed this issue <a id=\"%[1]s\" href=\"#%[1]s\">%[2]s</a>`")))
+
+ assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"https://example.com\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="https://example.com">`)))
+ assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"javascript:alert('1')\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="javascript:alert('1')">`)))
+ assert.EqualValues(t, []string{"key: <a href=\"https://TO-BE-REPLACED.COM\"\x1b[31m download\x1b[0m>"}, checkLocaleContent([]byte(`key = <a href="%s" download>`)))
+ assert.EqualValues(t, []string{"key: <a href=\"https://TO-BE-REPLACED.COM\"\x1b[31m target=\"_self\"\x1b[0m>"}, checkLocaleContent([]byte(`key = <a href="%s" target="_self">`)))
+ assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"https://example.com/%s\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="https://example.com/%s">`)))
+ assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"https://example.com/?q=%s\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="https://example.com/?q=%s">`)))
+ assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"%s/open-redirect\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="%s/open-redirect">`)))
+ assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"%s?q=open-redirect\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="%s?q=open-redirect">`)))
+ })
+
+ t.Run("Escaped HTML characters", func(t *testing.T) {
+ assert.Empty(t, checkLocaleContent([]byte("activity.git_stats_push_to_branch = `إلى %s و\"`")))
+
+ assert.EqualValues(t, []string{"key: و\x1b[31m&nbsp\x1b[0m\x1b[32m\u00a0\x1b[0m"}, checkLocaleContent([]byte(`key = و&nbsp;`)))
+ })
+}