From a9e38359e356de7d6397395bdde8af61440262d0 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Thu, 3 Sep 2015 11:14:07 +0200 Subject: git-p4: add config git-p4.pathEncoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Perforce keeps the encoding of a path as given by the originating OS. Git expects paths encoded as UTF-8. Add a config to tell git-p4 what encoding Perforce had used for the paths. This encoding is used to transcode the paths to UTF-8. As an example, Perforce on Windows often uses “cp1252” to encode path names. Signed-off-by: Lars Schneider Acked-by: Luke Diamand Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index 073f87bbfd..b1ad86de7f 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2213,6 +2213,17 @@ class P4Sync(Command, P4UserMap): text = regexp.sub(r'$\1$', text) contents = [ text ] + if gitConfig("git-p4.pathEncoding"): + relPath = relPath.decode(gitConfig("git-p4.pathEncoding")).encode('utf8', 'replace') + elif self.verbose: + try: + relPath.decode('ascii') + except: + print ( + "Path with Non-ASCII characters detected and no path encoding defined. " + "Please check the encoding: %s" % relPath + ) + self.gitStream.write("M %s inline %s\n" % (git_mode, relPath)) # total length... -- cgit v1.2.3 From 00a9403a1069b19f27d690853db34459b32b3d3d Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Wed, 16 Sep 2015 14:37:04 +0200 Subject: git-p4: improve path encoding verbose output If a path with non-ASCII characters is detected then print the encoding and the encoded string in verbose mode. Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index b1ad86de7f..65feb22f58 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2213,16 +2213,15 @@ class P4Sync(Command, P4UserMap): text = regexp.sub(r'$\1$', text) contents = [ text ] - if gitConfig("git-p4.pathEncoding"): - relPath = relPath.decode(gitConfig("git-p4.pathEncoding")).encode('utf8', 'replace') - elif self.verbose: - try: - relPath.decode('ascii') - except: - print ( - "Path with Non-ASCII characters detected and no path encoding defined. " - "Please check the encoding: %s" % relPath - ) + try: + relPath.decode('ascii') + except: + encoding = 'utf8' + if gitConfig('git-p4.pathEncoding'): + encoding = gitConfig('git-p4.pathEncoding') + relPath = relPath.decode(encoding).encode('utf8', 'replace') + if self.verbose: + print 'Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, relPath) self.gitStream.write("M %s inline %s\n" % (git_mode, relPath)) -- cgit v1.2.3 From 4cb870d8042bef3fdd953c633463eda24ce78f3d Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Mon, 21 Sep 2015 10:49:18 +0200 Subject: git-p4: use replacement character for non UTF-8 characters in paths If non UTF-8 characters are detected in paths then replace them with a placeholder instead of throwing a UnicodeDecodeError exception. This restores the original (implicit) implementation that was broken in 00a9403. Signed-off-by: Lars Schneider Reviewed-by: Luke Diamand Signed-off-by: Junio C Hamano --- git-p4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index 65feb22f58..603045050c 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2219,7 +2219,7 @@ class P4Sync(Command, P4UserMap): encoding = 'utf8' if gitConfig('git-p4.pathEncoding'): encoding = gitConfig('git-p4.pathEncoding') - relPath = relPath.decode(encoding).encode('utf8', 'replace') + relPath = relPath.decode(encoding, 'replace').encode('utf8', 'replace') if self.verbose: print 'Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, relPath) -- cgit v1.2.3