diff options
author | Junio C Hamano <gitster@pobox.com> | 2023-05-10 01:45:44 +0200 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2023-05-10 01:45:45 +0200 |
commit | 620e92b8454d2569b9ad9a2070fd2edea99895cc (patch) | |
tree | 921324185df2fd40f2678b32172b54fd29040c45 | |
parent | The sixteenth batch (diff) | |
parent | parse_commit(): describe more date-parsing failure modes (diff) | |
download | git-620e92b8454d2569b9ad9a2070fd2edea99895cc.tar.xz git-620e92b8454d2569b9ad9a2070fd2edea99895cc.zip |
Merge branch 'jk/parse-commit-with-malformed-ident'
The commit object parser has been taught to be a bit more lenient
to parse timestamps on the author/committer line with a malformed
author/committer ident.
* jk/parse-commit-with-malformed-ident:
parse_commit(): describe more date-parsing failure modes
parse_commit(): handle broken whitespace-only timestamp
parse_commit(): parse timestamp from end of line
t4212: avoid putting git on left-hand side of pipe
-rw-r--r-- | commit.c | 57 | ||||
-rwxr-xr-x | t/t4212-log-corrupt.sh | 51 |
2 files changed, 98 insertions, 10 deletions
@@ -96,6 +96,7 @@ struct commit *lookup_commit_reference_by_name(const char *name) static timestamp_t parse_commit_date(const char *buf, const char *tail) { const char *dateptr; + const char *eol; if (buf + 6 >= tail) return 0; @@ -107,16 +108,56 @@ static timestamp_t parse_commit_date(const char *buf, const char *tail) return 0; if (memcmp(buf, "committer", 9)) return 0; - while (buf < tail && *buf++ != '>') - /* nada */; - if (buf >= tail) + + /* + * Jump to end-of-line so that we can walk backwards to find the + * end-of-email ">". This is more forgiving of malformed cases + * because unexpected characters tend to be in the name and email + * fields. + */ + eol = memchr(buf, '\n', tail - buf); + if (!eol) return 0; - dateptr = buf; - while (buf < tail && *buf++ != '\n') - /* nada */; - if (buf >= tail) + dateptr = eol; + while (dateptr > buf && dateptr[-1] != '>') + dateptr--; + if (dateptr == buf) return 0; - /* dateptr < buf && buf[-1] == '\n', so parsing will stop at buf-1 */ + + /* + * Trim leading whitespace, but make sure we have at least one + * non-whitespace character, as parse_timestamp() will otherwise walk + * right past the newline we found in "eol" when skipping whitespace + * itself. + * + * In theory it would be sufficient to allow any character not matched + * by isspace(), but there's a catch: our isspace() does not + * necessarily match the behavior of parse_timestamp(), as the latter + * is implemented by system routines which match more exotic control + * codes, or even locale-dependent sequences. + * + * Since we expect the timestamp to be a number, we can check for that. + * Anything else (e.g., a non-numeric token like "foo") would just + * cause parse_timestamp() to return 0 anyway. + */ + while (dateptr < eol && isspace(*dateptr)) + dateptr++; + if (!isdigit(*dateptr) && *dateptr != '-') + return 0; + + /* + * We know there is at least one digit (or dash), so we'll begin + * parsing there and stop at worst case at eol. + * + * Note that we may feed parse_timestamp() extra characters here if the + * commit is malformed, and it will parse as far as it can. For + * example, "123foo456" would return "123". That might be questionable + * (versus returning "0"), but it would help in a hypothetical case + * like "123456+0100", where the whitespace from the timezone is + * missing. Since such syntactic errors may be baked into history and + * hard to correct now, let's err on trying to make our best guess + * here, rather than insist on perfect syntax. + */ return parse_timestamp(dateptr, NULL, 10); } diff --git a/t/t4212-log-corrupt.sh b/t/t4212-log-corrupt.sh index e89e1f54b6..85e90acb09 100755 --- a/t/t4212-log-corrupt.sh +++ b/t/t4212-log-corrupt.sh @@ -8,8 +8,9 @@ TEST_PASSES_SANITIZE_LEAK=true test_expect_success 'setup' ' test_commit foo && - git cat-file commit HEAD | - sed "/^author /s/>/>-<>/" >broken_email.commit && + git cat-file commit HEAD >ok.commit && + sed "s/>/>-<>/" <ok.commit >broken_email.commit && + git hash-object --literally -w -t commit broken_email.commit >broken_email.hash && git update-ref refs/heads/broken_email $(cat broken_email.hash) ' @@ -43,6 +44,11 @@ test_expect_success 'git log --format with broken author email' ' test_must_be_empty actual.err ' +test_expect_success '--until handles broken email' ' + git rev-list --until=1980-01-01 broken_email >actual && + test_must_be_empty actual +' + munge_author_date () { git cat-file commit "$1" >commit.orig && sed "s/^\(author .*>\) [0-9]*/\1 $2/" <commit.orig >commit.munge && @@ -86,4 +92,45 @@ test_expect_success 'absurdly far-in-future date' ' git log -1 --format=%ad $commit ' +test_expect_success 'create commits with whitespace committer dates' ' + # It is important that this subject line is numeric, since we want to + # be sure we are not confused by skipping whitespace and accidentally + # parsing the subject as a timestamp. + # + # Do not use munge_author_date here. Besides not hitting the committer + # line, it leaves the timezone intact, and we want nothing but + # whitespace. + # + # We will make two munged commits here. The first, ws_commit, will + # be purely spaces. The second contains a vertical tab, which is + # considered a space by strtoumax(), but not by our isspace(). + test_commit 1234567890 && + git cat-file commit HEAD >commit.orig && + sed "s/>.*/> /" <commit.orig >commit.munge && + ws_commit=$(git hash-object --literally -w -t commit commit.munge) && + sed "s/>.*/> $(printf "\013")/" <commit.orig >commit.munge && + vt_commit=$(git hash-object --literally -w -t commit commit.munge) +' + +test_expect_success '--until treats whitespace date as sentinel' ' + echo $ws_commit >expect && + git rev-list --until=1980-01-01 $ws_commit >actual && + test_cmp expect actual && + + echo $vt_commit >expect && + git rev-list --until=1980-01-01 $vt_commit >actual && + test_cmp expect actual +' + +test_expect_success 'pretty-printer handles whitespace date' ' + # as with the %ad test above, we will show these as the empty string, + # not the 1970 epoch date. This is intentional; see 7d9a281941 (t4212: + # test bogus timestamps with git-log, 2014-02-24) for more discussion. + echo : >expect && + git log -1 --format="%at:%ct" $ws_commit >actual && + test_cmp expect actual && + git log -1 --format="%at:%ct" $vt_commit >actual && + test_cmp expect actual +' + test_done |