From dd136858f1ea40ad3c94191d647487fa4f31926c Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel@debian.org>
Date: Fri, 18 Oct 2024 20:33:49 +0200
Subject: Adding upstream version 9.0.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
---
 modules/util/sanitize.go | 72 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 modules/util/sanitize.go

(limited to 'modules/util/sanitize.go')

diff --git a/modules/util/sanitize.go b/modules/util/sanitize.go
new file mode 100644
index 0000000..0dd8b34
--- /dev/null
+++ b/modules/util/sanitize.go
@@ -0,0 +1,72 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package util
+
+import (
+	"bytes"
+	"unicode"
+)
+
+type sanitizedError struct {
+	err error
+}
+
+func (err sanitizedError) Error() string {
+	return SanitizeCredentialURLs(err.err.Error())
+}
+
+func (err sanitizedError) Unwrap() error {
+	return err.err
+}
+
+// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
+func SanitizeErrorCredentialURLs(err error) error {
+	return sanitizedError{err: err}
+}
+
+const userPlaceholder = "sanitized-credential"
+
+var schemeSep = []byte("://")
+
+// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
+func SanitizeCredentialURLs(s string) string {
+	bs := UnsafeStringToBytes(s)
+	schemeSepPos := bytes.Index(bs, schemeSep)
+	if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
+		return s // fast return if there is no URL scheme or no userinfo
+	}
+	out := make([]byte, 0, len(bs)+len(userPlaceholder))
+	for schemeSepPos != -1 {
+		schemeSepPos += 3         // skip the "://"
+		sepAtPos := -1            // the possible '@' position: "https://foo@[^here]host"
+		sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
+	sepLoop:
+		for ; sepEndPos < len(bs); sepEndPos++ {
+			c := bs[sepEndPos]
+			if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
+				continue
+			}
+			switch c {
+			case '@':
+				sepAtPos = sepEndPos
+			case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
+				continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
+			default:
+				break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
+			}
+		}
+		// if there is '@', and the string is like "s://u@h", then hide the "u" part
+		if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
+			out = append(out, bs[:schemeSepPos]...)
+			out = append(out, userPlaceholder...)
+			out = append(out, bs[sepAtPos:sepEndPos]...)
+		} else {
+			out = append(out, bs[:sepEndPos]...)
+		}
+		bs = bs[sepEndPos:]
+		schemeSepPos = bytes.Index(bs, schemeSep)
+	}
+	out = append(out, bs...)
+	return UnsafeBytesToString(out)
+}
-- 
cgit v1.2.3