summaryrefslogtreecommitdiffstats
path: root/modules/util/sanitize.go
diff options
context:
space:
mode:
authorDaniel Baumann <daniel@debian.org>2024-10-18 20:33:49 +0200
committerDaniel Baumann <daniel@debian.org>2024-12-12 23:57:56 +0100
commite68b9d00a6e05b3a941f63ffb696f91e554ac5ec (patch)
tree97775d6c13b0f416af55314eb6a89ef792474615 /modules/util/sanitize.go
parentInitial commit. (diff)
downloadforgejo-e68b9d00a6e05b3a941f63ffb696f91e554ac5ec.tar.xz
forgejo-e68b9d00a6e05b3a941f63ffb696f91e554ac5ec.zip
Adding upstream version 9.0.3.
Signed-off-by: Daniel Baumann <daniel@debian.org>
Diffstat (limited to 'modules/util/sanitize.go')
-rw-r--r--modules/util/sanitize.go72
1 files changed, 72 insertions, 0 deletions
diff --git a/modules/util/sanitize.go b/modules/util/sanitize.go
new file mode 100644
index 0000000..0dd8b34
--- /dev/null
+++ b/modules/util/sanitize.go
@@ -0,0 +1,72 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package util
+
+import (
+ "bytes"
+ "unicode"
+)
+
+type sanitizedError struct {
+ err error
+}
+
+func (err sanitizedError) Error() string {
+ return SanitizeCredentialURLs(err.err.Error())
+}
+
+func (err sanitizedError) Unwrap() error {
+ return err.err
+}
+
+// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
+func SanitizeErrorCredentialURLs(err error) error {
+ return sanitizedError{err: err}
+}
+
+const userPlaceholder = "sanitized-credential"
+
+var schemeSep = []byte("://")
+
+// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
+func SanitizeCredentialURLs(s string) string {
+ bs := UnsafeStringToBytes(s)
+ schemeSepPos := bytes.Index(bs, schemeSep)
+ if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
+ return s // fast return if there is no URL scheme or no userinfo
+ }
+ out := make([]byte, 0, len(bs)+len(userPlaceholder))
+ for schemeSepPos != -1 {
+ schemeSepPos += 3 // skip the "://"
+ sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host"
+ sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
+ sepLoop:
+ for ; sepEndPos < len(bs); sepEndPos++ {
+ c := bs[sepEndPos]
+ if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
+ continue
+ }
+ switch c {
+ case '@':
+ sepAtPos = sepEndPos
+ case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
+ continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
+ default:
+ break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
+ }
+ }
+ // if there is '@', and the string is like "s://u@h", then hide the "u" part
+ if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
+ out = append(out, bs[:schemeSepPos]...)
+ out = append(out, userPlaceholder...)
+ out = append(out, bs[sepAtPos:sepEndPos]...)
+ } else {
+ out = append(out, bs[:sepEndPos]...)
+ }
+ bs = bs[sepEndPos:]
+ schemeSepPos = bytes.Index(bs, schemeSep)
+ }
+ out = append(out, bs...)
+ return UnsafeBytesToString(out)
+}