diff options
author | Gusted <postmaster@gusted.xyz> | 2024-10-30 16:59:48 +0100 |
---|---|---|
committer | 0ko <0ko@noreply.codeberg.org> | 2024-10-30 16:59:48 +0100 |
commit | dfe3ffc581eb2fb2137aa9f1530d1296bc6a2801 (patch) | |
tree | 1cf94462d3460e33a2e4eb2cbd50b45e7316df3a /build | |
parent | i18n: update of translations from Codeberg Translate (#5681) (diff) | |
download | forgejo-dfe3ffc581eb2fb2137aa9f1530d1296bc6a2801.tar.xz forgejo-dfe3ffc581eb2fb2137aa9f1530d1296bc6a2801.zip |
feat: harden localization against malicious HTML (#5703)
- Add a new script that proccess the localization files and verify that
they only contain HTML according to our strictly defined rules.
- This should make adding malicious HTML near-impossible.
Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/5703
Reviewed-by: 0ko <0ko@noreply.codeberg.org>
Co-authored-by: Gusted <postmaster@gusted.xyz>
Co-committed-by: Gusted <postmaster@gusted.xyz>
Diffstat (limited to 'build')
-rw-r--r-- | build/lint-locale.go | 156 | ||||
-rw-r--r-- | build/lint-locale_test.go | 65 |
2 files changed, 221 insertions, 0 deletions
diff --git a/build/lint-locale.go b/build/lint-locale.go new file mode 100644 index 0000000000..d403eaa70d --- /dev/null +++ b/build/lint-locale.go @@ -0,0 +1,156 @@ +// Copyright 2024 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +//nolint:forbidigo +package main + +import ( + "fmt" + "html" + "io/fs" + "os" + "path/filepath" + "regexp" + "slices" + "strings" + + "github.com/microcosm-cc/bluemonday" + "github.com/sergi/go-diff/diffmatchpatch" + "gopkg.in/ini.v1" //nolint:depguard +) + +var ( + policy *bluemonday.Policy + tagRemover *strings.Replacer + safeURL = "https://TO-BE-REPLACED.COM" + + // Matches href="", href="#", href="%s", href="#%s", href="%[1]s" and href="#%[1]s". + placeHolderRegex = regexp.MustCompile(`href="#?(%s|%\[\d\]s)?"`) +) + +func initBlueMondayPolicy() { + policy = bluemonday.NewPolicy() + + policy.RequireParseableURLs(true) + policy.AllowURLSchemes("https") + + // Only allow safe URL on href. + // Only allow target="_blank". + // Only allow rel="nopener noreferrer", rel="noopener" and rel="noreferrer". + // Only allow placeholder on id and class. + policy.AllowAttrs("href").Matching(regexp.MustCompile("^" + regexp.QuoteMeta(safeURL) + "$")).OnElements("a") + policy.AllowAttrs("target").Matching(regexp.MustCompile("^_blank$")).OnElements("a") + policy.AllowAttrs("rel").Matching(regexp.MustCompile("^(noopener|noreferrer|noopener noreferrer)$")).OnElements("a") + policy.AllowAttrs("id", "class").Matching(regexp.MustCompile(`^%s|%\[\d\]s$`)).OnElements("a") + + // Only allow positional placeholder as class. + positionalPlaceholderRe := regexp.MustCompile(`^%\[\d\]s$`) + policy.AllowAttrs("class").Matching(positionalPlaceholderRe).OnElements("strong") + policy.AllowAttrs("id").Matching(positionalPlaceholderRe).OnElements("code") + + // Allowed elements with no attributes. Must be a recognized tagname. + policy.AllowElements("strong", "br", "b", "strike", "code", "i") + + // TODO: Remove <c> in `actions.workflow.dispatch.trigger_found`. + policy.AllowNoAttrs().OnElements("c") +} + +func initRemoveTags() { + oldnew := []string{} + for _, el := range []string{ + "email@example.com", "correu@example.com", "epasts@domens.lv", "email@exemplo.com", "eposta@ornek.com", "email@példa.hu", "email@esempio.it", + "user", "utente", "lietotājs", "gebruiker", "usuário", "Benutzer", "Bruker", + "server", "servidor", "kiszolgáló", "serveris", + "label", "etichetta", "etiķete", "rótulo", "Label", "utilizador", + "filename", "bestandsnaam", "dosyaadi", "fails", "nome do arquivo", + } { + oldnew = append(oldnew, "<"+el+">", "REPLACED-TAG") + } + + tagRemover = strings.NewReplacer(oldnew...) +} + +func preprocessTranslationValue(value string) string { + // href should be a parsable URL, replace placeholder strings with a safe url. + value = placeHolderRegex.ReplaceAllString(value, `href="`+safeURL+`"`) + + // Remove tags that aren't tags but will be parsed as tags. We already know they are safe and sound. + value = tagRemover.Replace(value) + + return value +} + +func checkLocaleContent(localeContent []byte) []string { + // Same configuration as Forgejo uses. + cfg := ini.Empty(ini.LoadOptions{ + IgnoreContinuation: true, + }) + cfg.NameMapper = ini.SnackCase + + if err := cfg.Append(localeContent); err != nil { + panic(err) + } + + dmp := diffmatchpatch.New() + errors := []string{} + + for _, section := range cfg.Sections() { + for _, key := range section.Keys() { + var trKey string + if section.Name() == "" || section.Name() == "DEFAULT" || section.Name() == "common" { + trKey = key.Name() + } else { + trKey = section.Name() + "." + key.Name() + } + + keyValue := preprocessTranslationValue(key.Value()) + + if html.UnescapeString(policy.Sanitize(keyValue)) != keyValue { + // Create a nice diff of the difference. + diffs := dmp.DiffMain(keyValue, html.UnescapeString(policy.Sanitize(keyValue)), false) + diffs = dmp.DiffCleanupSemantic(diffs) + diffs = dmp.DiffCleanupEfficiency(diffs) + + errors = append(errors, trKey+": "+dmp.DiffPrettyText(diffs)) + } + } + } + return errors +} + +func main() { + initBlueMondayPolicy() + initRemoveTags() + + localeDir := filepath.Join("options", "locale") + localeFiles, err := os.ReadDir(localeDir) + if err != nil { + panic(err) + } + + if !slices.ContainsFunc(localeFiles, func(e fs.DirEntry) bool { return strings.HasSuffix(e.Name(), ".ini") }) { + fmt.Println("No locale files found") + os.Exit(1) + } + + exitCode := 0 + for _, localeFile := range localeFiles { + if !strings.HasSuffix(localeFile.Name(), ".ini") { + continue + } + + localeContent, err := os.ReadFile(filepath.Join(localeDir, localeFile.Name())) + if err != nil { + panic(err) + } + + if err := checkLocaleContent(localeContent); len(err) > 0 { + fmt.Println(localeFile.Name()) + fmt.Println(strings.Join(err, "\n")) + fmt.Println() + exitCode = 1 + } + } + + os.Exit(exitCode) +} diff --git a/build/lint-locale_test.go b/build/lint-locale_test.go new file mode 100644 index 0000000000..b33dc9af2b --- /dev/null +++ b/build/lint-locale_test.go @@ -0,0 +1,65 @@ +// Copyright 2024 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: MIT +package main + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestLocalizationPolicy(t *testing.T) { + initBlueMondayPolicy() + initRemoveTags() + + t.Run("Remove tags", func(t *testing.T) { + assert.Empty(t, checkLocaleContent([]byte(`hidden_comment_types_description = Comment types checked here will not be shown inside issue pages. Checking "Label" for example removes all "<user> added/removed <label>" comments.`))) + + assert.EqualValues(t, []string{"key: \x1b[31m<not-an-allowed-key>\x1b[0m REPLACED-TAG"}, checkLocaleContent([]byte(`key = "<not-an-allowed-key> <label>"`))) + assert.EqualValues(t, []string{"key: \x1b[31m<user@example.com>\x1b[0m REPLACED-TAG"}, checkLocaleContent([]byte(`key = "<user@example.com> <email@example.com>"`))) + assert.EqualValues(t, []string{"key: \x1b[31m<tag>\x1b[0m REPLACED-TAG \x1b[31m</tag>\x1b[0m"}, checkLocaleContent([]byte(`key = "<tag> <email@example.com> </tag>"`))) + }) + + t.Run("Specific exception", func(t *testing.T) { + assert.Empty(t, checkLocaleContent([]byte(`workflow.dispatch.trigger_found = This workflow has a <c>workflow_dispatch</c> event trigger.`))) + assert.Empty(t, checkLocaleContent([]byte(`pulls.title_desc_one = wants to merge %[1]d commit from <code>%[2]s</code> into <code id="%[4]s">%[3]s</code>`))) + assert.Empty(t, checkLocaleContent([]byte(`editor.commit_directly_to_this_branch = Commit directly to the <strong class="%[2]s">%[1]s</strong> branch.`))) + + assert.EqualValues(t, []string{"workflow.dispatch.trigger_found: This workflow has a \x1b[31m<d>\x1b[0mworkflow_dispatch\x1b[31m</d>\x1b[0m event trigger."}, checkLocaleContent([]byte(`workflow.dispatch.trigger_found = This workflow has a <d>workflow_dispatch</d> event trigger.`))) + assert.EqualValues(t, []string{"key: <code\x1b[31m id=\"branch_targe\"\x1b[0m>%[3]s</code>"}, checkLocaleContent([]byte(`key = <code id="branch_targe">%[3]s</code>`))) + assert.EqualValues(t, []string{"key: <a\x1b[31m class=\"ui sh\"\x1b[0m href=\"https://TO-BE-REPLACED.COM\">"}, checkLocaleContent([]byte(`key = <a class="ui sh" href="%[3]s">`))) + assert.EqualValues(t, []string{"key: <a\x1b[31m class=\"js-click-me\"\x1b[0m href=\"https://TO-BE-REPLACED.COM\">"}, checkLocaleContent([]byte(`key = <a class="js-click-me" href="%[3]s">`))) + assert.EqualValues(t, []string{"key: <strong\x1b[31m class=\"branch-target\"\x1b[0m>%[1]s</strong>"}, checkLocaleContent([]byte(`key = <strong class="branch-target">%[1]s</strong>`))) + }) + + t.Run("General safe tags", func(t *testing.T) { + assert.Empty(t, checkLocaleContent([]byte("error404 = The page you are trying to reach either <strong>does not exist</strong> or <strong>you are not authorized</strong> to view it."))) + assert.Empty(t, checkLocaleContent([]byte("teams.specific_repositories_helper = Members will only have access to repositories explicitly added to the team. Selecting this <strong>will not</strong> automatically remove repositories already added with <i>All repositories</i>."))) + assert.Empty(t, checkLocaleContent([]byte("sqlite_helper = File path for the SQLite3 database.<br>Enter an absolute path if you run Forgejo as a service."))) + assert.Empty(t, checkLocaleContent([]byte("hi_user_x = Hi <b>%s</b>,"))) + + assert.EqualValues(t, []string{"error404: The page you are trying to reach either <strong\x1b[31m title='aaa'\x1b[0m>does not exist</strong> or <strong>you are not authorized</strong> to view it."}, checkLocaleContent([]byte("error404 = The page you are trying to reach either <strong title='aaa'>does not exist</strong> or <strong>you are not authorized</strong> to view it."))) + }) + + t.Run("<a>", func(t *testing.T) { + assert.Empty(t, checkLocaleContent([]byte(`admin.new_user.text = Please <a href="%s">click here</a> to manage this user from the admin panel.`))) + assert.Empty(t, checkLocaleContent([]byte(`access_token_desc = Selected token permissions limit authorization only to the corresponding <a href="%[1]s" target="_blank">API</a> routes. Read the <a href="%[2]s" target="_blank">documentation</a> for more information.`))) + assert.Empty(t, checkLocaleContent([]byte(`webauthn_desc = Security keys are hardware devices containing cryptographic keys. They can be used for two-factor authentication. Security keys must support the <a rel="noreferrer" target="_blank" href="%s">WebAuthn Authenticator</a> standard.`))) + assert.Empty(t, checkLocaleContent([]byte("issues.closed_at = `closed this issue <a id=\"%[1]s\" href=\"#%[1]s\">%[2]s</a>`"))) + + assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"https://example.com\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="https://example.com">`))) + assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"javascript:alert('1')\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="javascript:alert('1')">`))) + assert.EqualValues(t, []string{"key: <a href=\"https://TO-BE-REPLACED.COM\"\x1b[31m download\x1b[0m>"}, checkLocaleContent([]byte(`key = <a href="%s" download>`))) + assert.EqualValues(t, []string{"key: <a href=\"https://TO-BE-REPLACED.COM\"\x1b[31m target=\"_self\"\x1b[0m>"}, checkLocaleContent([]byte(`key = <a href="%s" target="_self">`))) + assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"https://example.com/%s\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="https://example.com/%s">`))) + assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"https://example.com/?q=%s\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="https://example.com/?q=%s">`))) + assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"%s/open-redirect\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="%s/open-redirect">`))) + assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"%s?q=open-redirect\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="%s?q=open-redirect">`))) + }) + + t.Run("Escaped HTML characters", func(t *testing.T) { + assert.Empty(t, checkLocaleContent([]byte("activity.git_stats_push_to_branch = `إلى %s و\"`"))) + + assert.EqualValues(t, []string{"key: و\x1b[31m \x1b[0m\x1b[32m\u00a0\x1b[0m"}, checkLocaleContent([]byte(`key = و `))) + }) +} |