From 525e7fba7854c23ee3530d0bf88d75f106f14c95 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 16 Sep 2019 20:44:31 +0200 Subject: path.c: document the purpose of `is_ntfs_dotgit()` Previously, this function was completely undocumented. It is worth, though, to explain what is going on, as it is not really obvious at all. Suggested-by: Garima Singh Signed-off-by: Johannes Schindelin --- path.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'path.c') diff --git a/path.c b/path.c index 9ac0531a29..22bd0b6f52 100644 --- a/path.c +++ b/path.c @@ -1302,6 +1302,34 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip) return 1; } +/* + * On NTFS, we need to be careful to disallow certain synonyms of the `.git/` + * directory: + * + * - For historical reasons, file names that end in spaces or periods are + * automatically trimmed. Therefore, `.git . . ./` is a valid way to refer + * to `.git/`. + * + * - For other historical reasons, file names that do not conform to the 8.3 + * format (up to eight characters for the basename, three for the file + * extension, certain characters not allowed such as `+`, etc) are associated + * with a so-called "short name", at least on the `C:` drive by default. + * Which means that `git~1/` is a valid way to refer to `.git/`. + * + * Note: Technically, `.git/` could receive the short name `git~2` if the + * short name `git~1` were already used. In Git, however, we guarantee that + * `.git` is the first item in a directory, therefore it will be associated + * with the short name `git~1` (unless short names are disabled). + * + * When this function returns 1, it indicates that the specified file/directory + * name refers to a `.git` file or directory, or to any of these synonyms, and + * Git should therefore not track it. + * + * This function is intended to be used by `git fsck` even on platforms where + * the backslash is a regular filename character, therefore it needs to handle + * backlash characters in the provided `name` specially: they are interpreted + * as directory separators. + */ int is_ntfs_dotgit(const char *name) { size_t len; -- cgit v1.2.3 From 288a74bcd28229a00c3632f18cba92dbfdf73ee9 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 23 Sep 2019 08:58:11 +0200 Subject: is_ntfs_dotgit(): only verify the leading segment The config setting `core.protectNTFS` is specifically designed to work not only on Windows, but anywhere, to allow for repositories hosted on, say, Linux servers to be protected against NTFS-specific attack vectors. As a consequence, `is_ntfs_dotgit()` manually splits backslash-separated paths (but does not do the same for paths separated by forward slashes), under the assumption that the backslash might not be a valid directory separator on the _current_ Operating System. However, the two callers, `verify_path()` and `fsck_tree()`, are supposed to feed only individual path segments to the `is_ntfs_dotgit()` function. This causes a lot of duplicate scanning (and very inefficient scanning, too, as the inner loop of `is_ntfs_dotgit()` was optimized for readability rather than for speed. Let's simplify the design of `is_ntfs_dotgit()` by putting the burden of splitting the paths by backslashes as directory separators on the callers of said function. Consequently, the `verify_path()` function, which already splits the path by directory separators, now treats backslashes as directory separators _explicitly_ when `core.protectNTFS` is turned on, even on platforms where the backslash is _not_ a directory separator. Note that we have to repeat some code in `verify_path()`: if the backslash is not a directory separator on the current Operating System, we want to allow file names like `\`, but we _do_ want to disallow paths that are clearly intended to cause harm when the repository is cloned on Windows. The `fsck_tree()` function (the other caller of `is_ntfs_dotgit()`) now needs to look for backslashes in tree entries' names specifically when `core.protectNTFS` is turned on. While it would be tempting to completely disallow backslashes in that case (much like `fsck` reports names containing forward slashes as "full paths"), this would be overzealous: when `core.protectNTFS` is turned on in a non-Windows setup, backslashes are perfectly valid characters in file names while we _still_ want to disallow tree entries that are clearly designed to exploit NTFS-specific behavior. This simplification will make subsequent changes easier to implement, such as turning `core.protectNTFS` on by default (not only on Windows) or protecting against attack vectors involving NTFS Alternate Data Streams. Incidentally, this change allows for catching malicious repositories that contain tree entries of the form `dir\.gitmodules` already on the server side rather than only on the client side (and previously only on Windows): in contrast to `is_ntfs_dotgit()`, the `is_ntfs_dotgitmodules()` function already expects the caller to split the paths by directory separators. Signed-off-by: Johannes Schindelin --- fsck.c | 11 ++++++++++- path.c | 5 +---- read-cache.c | 8 ++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) (limited to 'path.c') diff --git a/fsck.c b/fsck.c index b1579c7e28..d80a96f4be 100644 --- a/fsck.c +++ b/fsck.c @@ -551,7 +551,7 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) while (desc.size) { unsigned mode; - const char *name; + const char *name, *backslash; const struct object_id *oid; oid = tree_entry_extract(&desc, &name, &mode); @@ -565,6 +565,15 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) is_hfs_dotgit(name) || is_ntfs_dotgit(name)); has_zero_pad |= *(char *)desc.buffer == '0'; + + if ((backslash = strchr(name, '\\'))) { + while (backslash) { + backslash++; + has_dotgit |= is_ntfs_dotgit(backslash); + backslash = strchr(backslash, '\\'); + } + } + if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); break; diff --git a/path.c b/path.c index 22bd0b6f52..f62a37d5f5 100644 --- a/path.c +++ b/path.c @@ -1342,10 +1342,7 @@ int is_ntfs_dotgit(const char *name) if (only_spaces_and_periods(name, len, 5) && !strncasecmp(name, "git~1", 5)) return 1; - if (name[len] != '\\') - return 0; - name += len + 1; - len = -1; + return 0; } } diff --git a/read-cache.c b/read-cache.c index 5b57b369e8..bde1e70c51 100644 --- a/read-cache.c +++ b/read-cache.c @@ -874,7 +874,15 @@ inside: if ((c == '.' && !verify_dotfile(path, mode)) || is_dir_sep(c) || c == '\0') return 0; + } else if (c == '\\' && protect_ntfs) { + if (is_ntfs_dotgit(path)) + return 0; + if (S_ISLNK(mode)) { + if (is_ntfs_dotgitmodules(path)) + return 0; + } } + c = *path++; } } -- cgit v1.2.3 From 7c3745fc6185495d5765628b4dfe1bd2c25a2981 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 28 Aug 2019 12:22:17 +0200 Subject: path: safeguard `.git` against NTFS Alternate Streams Accesses Probably inspired by HFS' resource streams, NTFS supports "Alternate Data Streams": by appending `:` to the file name, information in addition to the file contents can be written and read, information that is copied together with the file (unless copied to a non-NTFS location). These Alternate Data Streams are typically used for things like marking an executable as having just been downloaded from the internet (and hence not necessarily being trustworthy). In addition to a stream name, a stream type can be appended, like so: `::`. Unless specified, the default stream type is `$DATA` for files and `$INDEX_ALLOCATION` for directories. In other words, `.git::$INDEX_ALLOCATION` is a valid way to reference the `.git` directory! In our work in Git v2.2.1 to protect Git on NTFS drives under `core.protectNTFS`, we focused exclusively on NTFS short names, unaware of the fact that NTFS Alternate Data Streams offer a similar attack vector. Let's fix this. Seeing as it is better to be safe than sorry, we simply disallow paths referring to *any* NTFS Alternate Data Stream of `.git`, not just `::$INDEX_ALLOCATION`. This also simplifies the implementation. This closes CVE-2019-1352. Further reading about NTFS Alternate Data Streams: https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/c54dec26-1551-4d3a-a0ea-4fa40f848eb3 Reported-by: Nicolas Joly Signed-off-by: Johannes Schindelin --- path.c | 12 +++++++++++- t/t1014-read-tree-confusing.sh | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'path.c') diff --git a/path.c b/path.c index f62a37d5f5..e39ecf4689 100644 --- a/path.c +++ b/path.c @@ -1321,10 +1321,19 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip) * `.git` is the first item in a directory, therefore it will be associated * with the short name `git~1` (unless short names are disabled). * + * - For yet other historical reasons, NTFS supports so-called "Alternate Data + * Streams", i.e. metadata associated with a given file, referred to via + * `::`. There exists a default stream + * type for directories, allowing `.git/` to be accessed via + * `.git::$INDEX_ALLOCATION/`. + * * When this function returns 1, it indicates that the specified file/directory * name refers to a `.git` file or directory, or to any of these synonyms, and * Git should therefore not track it. * + * For performance reasons, _all_ Alternate Data Streams of `.git/` are + * forbidden, not just `::$INDEX_ALLOCATION`. + * * This function is intended to be used by `git fsck` even on platforms where * the backslash is a regular filename character, therefore it needs to handle * backlash characters in the provided `name` specially: they are interpreted @@ -1335,7 +1344,8 @@ int is_ntfs_dotgit(const char *name) size_t len; for (len = 0; ; len++) - if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) { + if (!name[len] || name[len] == '\\' || is_dir_sep(name[len]) || + name[len] == ':') { if (only_spaces_and_periods(name, len, 4) && !strncasecmp(name, ".git", 4)) return 1; diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh index 2f5a25d503..da3376b3bb 100755 --- a/t/t1014-read-tree-confusing.sh +++ b/t/t1014-read-tree-confusing.sh @@ -49,6 +49,7 @@ git~1 .git.SPACE .git.{space} .\\\\.GIT\\\\foobar backslashes .git\\\\foobar backslashes2 +.git...:alternate-stream EOF test_expect_success 'utf-8 paths allowed with core.protectHFS off' ' -- cgit v1.2.3 From 91bd46588e6959e6903e275f78b10bd07830d547 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 28 Aug 2019 12:22:17 +0200 Subject: path: also guard `.gitmodules` against NTFS Alternate Data Streams We just safe-guarded `.git` against NTFS Alternate Data Stream-related attack vectors, and now it is time to do the same for `.gitmodules`. Note: In the added regression test, we refrain from verifying all kinds of variations between short names and NTFS Alternate Data Streams: as the new code disallows _all_ Alternate Data Streams of `.gitmodules`, it is enough to test one in order to know that all of them are guarded against. Signed-off-by: Johannes Schindelin --- path.c | 2 +- t/t0060-path-utils.sh | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'path.c') diff --git a/path.c b/path.c index e39ecf4689..2037e2d8c1 100644 --- a/path.c +++ b/path.c @@ -1369,7 +1369,7 @@ static int is_ntfs_dot_generic(const char *name, only_spaces_and_periods: for (;;) { char c = name[i++]; - if (!c) + if (!c || c == ':') return 1; if (c != ' ' && c != '.') return 0; diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index 3f3357ed9f..2b8589e921 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -408,6 +408,9 @@ test_expect_success 'match .gitmodules' ' ~1000000 \ ~9999999 \ \ + .gitmodules:\$DATA \ + "gitmod~4 . :\$DATA" \ + \ --not \ ".gitmodules x" \ ".gitmodules .x" \ @@ -432,7 +435,9 @@ test_expect_success 'match .gitmodules' ' \ GI7EB~1 \ GI7EB~01 \ - GI7EB~1X + GI7EB~1X \ + \ + .gitmodules,:\$DATA ' test_done -- cgit v1.2.3 From 3a85dc7d534fc2d410ddc0c771c963b20d1b4857 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 6 Sep 2019 21:09:35 +0200 Subject: is_ntfs_dotgit(): speed it up Previously, this function was written without focusing on speed, intending to make reviewing the code as easy as possible, to avoid any bugs in this critical code. Turns out: we can do much better on both accounts. With this patch, we make it as fast as this developer can make it go: - We avoid the call to `is_dir_sep()` and make all the character comparisons explicit. - We avoid the cost of calling `strncasecmp()` and unroll the test for `.git` and `git~1`, not even using `tolower()` because it is faster to compare against two constant values. - We look for `.git` and `.git~1` first thing, and return early if not found. - We also avoid calling a separate function for detecting chains of spaces and periods. Each of these improvements has a noticeable impact on the speed of `is_ntfs_dotgit()`. Signed-off-by: Johannes Schindelin --- path.c | 55 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 25 deletions(-) (limited to 'path.c') diff --git a/path.c b/path.c index 2037e2d8c1..43b16aabd4 100644 --- a/path.c +++ b/path.c @@ -1288,20 +1288,6 @@ int daemon_avoid_alias(const char *p) } } -static int only_spaces_and_periods(const char *path, size_t len, size_t skip) -{ - if (len < skip) - return 0; - len -= skip; - path += skip; - while (len-- > 0) { - char c = *(path++); - if (c != ' ' && c != '.') - return 0; - } - return 1; -} - /* * On NTFS, we need to be careful to disallow certain synonyms of the `.git/` * directory: @@ -1341,19 +1327,38 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip) */ int is_ntfs_dotgit(const char *name) { - size_t len; + char c; - for (len = 0; ; len++) - if (!name[len] || name[len] == '\\' || is_dir_sep(name[len]) || - name[len] == ':') { - if (only_spaces_and_periods(name, len, 4) && - !strncasecmp(name, ".git", 4)) - return 1; - if (only_spaces_and_periods(name, len, 5) && - !strncasecmp(name, "git~1", 5)) - return 1; + /* + * Note that when we don't find `.git` or `git~1` we end up with `name` + * advanced partway through the string. That's okay, though, as we + * return immediately in those cases, without looking at `name` any + * further. + */ + c = *(name++); + if (c == '.') { + /* .git */ + if (((c = *(name++)) != 'g' && c != 'G') || + ((c = *(name++)) != 'i' && c != 'I') || + ((c = *(name++)) != 't' && c != 'T')) return 0; - } + } else if (c == 'g' || c == 'G') { + /* git ~1 */ + if (((c = *(name++)) != 'i' && c != 'I') || + ((c = *(name++)) != 't' && c != 'T') || + *(name++) != '~' || + *(name++) != '1') + return 0; + } else + return 0; + + for (;;) { + c = *(name++); + if (!c || c == '\\' || c == '/' || c == ':') + return 1; + if (c != '.' && c != ' ') + return 0; + } } static int is_ntfs_dot_generic(const char *name, -- cgit v1.2.3