From a42643aa8d88a2278acad2da6bc702e426476e9b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 15 Dec 2014 18:15:20 -0500 Subject: read-cache: optionally disallow HFS+ .git variants The point of disallowing ".git" in the index is that we would never want to accidentally overwrite files in the repository directory. But this means we need to respect the filesystem's idea of when two paths are equal. The prior commit added a helper to make such a comparison for HFS+; let's use it in verify_path. We make this check optional for two reasons: 1. It restricts the set of allowable filenames, which is unnecessary for people who are not on HFS+. In practice this probably doesn't matter, though, as the restricted names are rather obscure and almost certainly would never come up in practice. 2. It has a minor performance penalty for every path we insert into the index. This patch ties the check to the core.protectHFS config option. Though this is expected to be most useful on OS X, we allow it to be set everywhere, as HFS+ may be mounted on other platforms. The variable does default to on for OS X, though. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'cache.h') diff --git a/cache.h b/cache.h index ce377e1354..b600a0c3e4 100644 --- a/cache.h +++ b/cache.h @@ -584,6 +584,7 @@ extern int fsync_object_files; extern int core_preload_index; extern int core_apply_sparse_checkout; extern int precomposed_unicode; +extern int protect_hfs; /* * The character that begins a commented line in user-editable file -- cgit v1.2.3 From 1d1d69bc52dcc7def5b2edbd165cc0a4e3911c8e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 16 Dec 2014 23:31:03 +0100 Subject: path: add is_ntfs_dotgit() helper We do not allow paths with a ".git" component to be added to the index, as that would mean repository contents could overwrite our repository files. However, asking "is this path the same as .git" is not as simple as strcmp() on some filesystems. On NTFS (and FAT32), there exist so-called "short names" for backwards-compatibility: 8.3 compliant names that refer to the same files as their long names. As ".git" is not an 8.3 compliant name, a short name is generated automatically, typically "git~1". Depending on the Windows version, any combination of trailing spaces and periods are ignored, too, so that both "git~1." and ".git." still refer to the Git directory. The reason is that 8.3 stores file names shorter than 8 characters with trailing spaces. So literally, it does not matter for the short name whether it is padded with spaces or whether it is shorter than 8 characters, it is considered to be the exact same. The period is the separator between file name and file extension, and again, an empty extension consists just of spaces in 8.3 format. So technically, we would need only take care of the equivalent of this regex: (\.git {0,4}|git~1 {0,3})\. {0,3} However, there are indications that at least some Windows versions might be more lenient and accept arbitrary combinations of trailing spaces and periods and strip them out. So we're playing it real safe here. Besides, there can be little doubt about the intention behind using file names matching even the more lenient pattern specified above, therefore we should be fine with disallowing such patterns. Extra care is taken to catch names such as '.\\.git\\booh' because the backslash is marked as a directory separator only on Windows, and we want to use this new helper function also in fsck on other platforms. A big thank you goes to Ed Thomson and an unnamed Microsoft engineer for the detailed analysis performed to come up with the corresponding fixes for libgit2. This commit adds a function to detect whether a given file name can refer to the Git directory by mistake. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- cache.h | 1 + path.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'cache.h') diff --git a/cache.h b/cache.h index b600a0c3e4..d17b1d6295 100644 --- a/cache.h +++ b/cache.h @@ -759,6 +759,7 @@ int longest_ancestor_length(const char *path, struct string_list *prefixes); char *strip_path_suffix(const char *path, const char *suffix); int daemon_avoid_alias(const char *path); int offset_1st_component(const char *path); +extern int is_ntfs_dotgit(const char *name); /* object replacement */ #define READ_SHA1_FILE_REPLACE 1 diff --git a/path.c b/path.c index 24594c4112..4ef1b01e05 100644 --- a/path.c +++ b/path.c @@ -830,3 +830,36 @@ int offset_1st_component(const char *path) return 2 + is_dir_sep(path[2]); return is_dir_sep(path[0]); } + +static int only_spaces_and_periods(const char *path, size_t len, size_t skip) +{ + if (len < skip) + return 0; + len -= skip; + path += skip; + while (len-- > 0) { + char c = *(path++); + if (c != ' ' && c != '.') + return 0; + } + return 1; +} + +int is_ntfs_dotgit(const char *name) +{ + int len; + + for (len = 0; ; len++) + if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) { + if (only_spaces_and_periods(name, len, 4) && + !strncasecmp(name, ".git", 4)) + return 1; + if (only_spaces_and_periods(name, len, 5) && + !strncasecmp(name, "git~1", 5)) + return 1; + if (name[len] != '\\') + return 0; + name += len + 1; + len = -1; + } +} -- cgit v1.2.3 From 2b4c6efc82119ba8f4169717473d95d1a89e4c69 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 16 Dec 2014 23:46:59 +0100 Subject: read-cache: optionally disallow NTFS .git variants The point of disallowing ".git" in the index is that we would never want to accidentally overwrite files in the repository directory. But this means we need to respect the filesystem's idea of when two paths are equal. The prior commit added a helper to make such a comparison for NTFS and FAT32; let's use it in verify_path(). We make this check optional for two reasons: 1. It restricts the set of allowable filenames, which is unnecessary for people who are not on NTFS nor FAT32. In practice this probably doesn't matter, though, as the restricted names are rather obscure and almost certainly would never come up in practice. 2. It has a minor performance penalty for every path we insert into the index. This patch ties the check to the core.protectNTFS config option. Though this is expected to be most useful on Windows, we allow it to be set everywhere, as NTFS may be mounted on other platforms. The variable does default to on for Windows, though. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config.txt | 6 ++++++ cache.h | 1 + config.c | 5 +++++ config.mak.uname | 2 ++ environment.c | 5 +++++ read-cache.c | 2 ++ t/t1014-read-tree-confusing.sh | 13 +++++++++++++ 7 files changed, 34 insertions(+) (limited to 'cache.h') diff --git a/Documentation/config.txt b/Documentation/config.txt index 0677bd8df5..097fdd47e1 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -239,6 +239,12 @@ core.protectHFS:: be considered equivalent to `.git` on an HFS+ filesystem. Defaults to `true` on Mac OS, and `false` elsewhere. +core.protectNTFS:: + If set to true, do not allow checkout of paths that would + cause problems with the NTFS filesystem, e.g. conflict with + 8.3 "short" names. + Defaults to `true` on Windows, and `false` elsewhere. + core.trustctime:: If false, the ctime differences between the index and the working tree are ignored; useful when the inode change time diff --git a/cache.h b/cache.h index d17b1d6295..29ed24b802 100644 --- a/cache.h +++ b/cache.h @@ -585,6 +585,7 @@ extern int core_preload_index; extern int core_apply_sparse_checkout; extern int precomposed_unicode; extern int protect_hfs; +extern int protect_ntfs; /* * The character that begins a commented line in user-editable file diff --git a/config.c b/config.c index b519cedc01..2cd64b6e3a 100644 --- a/config.c +++ b/config.c @@ -886,6 +886,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.protectntfs")) { + protect_ntfs = git_config_bool(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/config.mak.uname b/config.mak.uname index 23af148837..ec7ed7ac3b 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -362,6 +362,7 @@ ifeq ($(uname_S),Windows) EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib PTHREAD_LIBS = lib = + BASIC_CFLAGS += -DPROTECT_NTFS_DEFAULT=1 ifndef DEBUG BASIC_CFLAGS += -GL -Os -MT BASIC_LDFLAGS += -LTCG @@ -506,6 +507,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) COMPAT_OBJS += compat/mingw.o compat/winansi.o \ compat/win32/pthread.o compat/win32/syslog.o \ compat/win32/dirent.o + BASIC_CFLAGS += -DPROTECT_NTFS_DEFAULT=1 BASIC_LDFLAGS += -Wl,--large-address-aware EXTLIBS += -lws2_32 GITLIBS += git.res diff --git a/environment.c b/environment.c index 828b574a29..184748da3e 100644 --- a/environment.c +++ b/environment.c @@ -68,6 +68,11 @@ unsigned long pack_size_limit_cfg; #endif int protect_hfs = PROTECT_HFS_DEFAULT; +#ifndef PROTECT_NTFS_DEFAULT +#define PROTECT_NTFS_DEFAULT 0 +#endif +int protect_ntfs = PROTECT_NTFS_DEFAULT; + /* * The character that begins a commented line in user-editable file * that is subject to stripspace. diff --git a/read-cache.c b/read-cache.c index 7f48a08c15..4fa208b662 100644 --- a/read-cache.c +++ b/read-cache.c @@ -789,6 +789,8 @@ int verify_path(const char *path) inside: if (protect_hfs && is_hfs_dotgit(path)) return 0; + if (protect_ntfs && is_ntfs_dotgit(path)) + return 0; c = *path++; if ((c == '.' && !verify_dotfile(path)) || is_dir_sep(c) || c == '\0') diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh index ec310d5938..2f5a25d503 100755 --- a/t/t1014-read-tree-confusing.sh +++ b/t/t1014-read-tree-confusing.sh @@ -15,8 +15,17 @@ test_expect_success 'enable core.protectHFS for rejection tests' ' git config core.protectHFS true ' +test_expect_success 'enable core.protectNTFS for rejection tests' ' + git config core.protectNTFS true +' + while read path pretty; do : ${pretty:=$path} + case "$path" in + *SPACE) + path="${path%SPACE} " + ;; + esac test_expect_success "reject $pretty at end of path" ' printf "100644 blob %s\t%s" "$blob" "$path" >tree && bogus=$(git mktree