diff options
author | Patrick Steinhardt <ps@pks.im> | 2024-06-14 08:50:03 +0200 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2024-06-14 19:26:32 +0200 |
commit | d4d364b2c7442a1dd5bec8ab818791ec8769c3e9 (patch) | |
tree | 3b59ae304a6f225813e057ad8a1e990ff6b89042 /hash.h | |
parent | global: ensure that object IDs are always padded (diff) | |
download | git-d4d364b2c7442a1dd5bec8ab818791ec8769c3e9.tar.xz git-d4d364b2c7442a1dd5bec8ab818791ec8769c3e9.zip |
hash: convert `oidcmp()` and `oideq()` to compare whole hash
With the preceding commit, the hash array of object IDs is now fully
zero-padded even when the hash algorithm's output is smaller than the
array length. With that, we can now adapt both `oidcmp()` and `oideq()`
to unconditionally memcmp(3P) the whole array instead of depending on
the hash size.
While it may feel inefficient to compare unused bytes for e.g. SHA-1, in
practice the compiler should now be able to produce code that is better
optimized both because we have no branch anymore, but also because the
size to compare is now known at compile time. Goldbolt spits out the
following assembly on an x86_64 platform with GCC 14.1 for the old and
new implementations of `oidcmp()`:
oidcmp_old:
movsx rax, DWORD PTR [rdi+32]
test eax, eax
jne .L2
mov rax, QWORD PTR the_repository[rip]
cmp QWORD PTR [rax+16], 32
je .L6
.L4:
mov edx, 20
jmp memcmp
.L2:
lea rdx, [rax+rax*2]
lea rax, [rax+rdx*4]
lea rax, hash_algos[0+rax*8]
cmp QWORD PTR [rax+16], 32
jne .L4
.L6:
mov edx, 32
jmp memcmp
oidcmp_new:
mov edx, 32
jmp memcmp
The new implementation gets ridi of all the branches and effectively
only ends setting up `edx` for `memcmp()` and then calling it.
And for `oideq()`:
oideq_old:
movsx rcx, DWORD PTR [rdi+32]
mov rax, rdi
mov rdx, rsi
test ecx, ecx
jne .L2
mov rcx, QWORD PTR the_repository[rip]
cmp QWORD PTR [rcx+16], 32
mov rcx, QWORD PTR [rax]
je .L12
.L4:
mov rsi, QWORD PTR [rax+8]
xor rcx, QWORD PTR [rdx]
xor rsi, QWORD PTR [rdx+8]
or rcx, rsi
je .L13
.L8:
mov eax, 1
test eax, eax
sete al
movzx eax, al
ret
.L2:
lea rsi, [rcx+rcx*2]
lea rcx, [rcx+rsi*4]
lea rcx, hash_algos[0+rcx*8]
cmp QWORD PTR [rcx+16], 32
mov rcx, QWORD PTR [rax]
jne .L4
.L12:
mov rsi, QWORD PTR [rax+8]
xor rcx, QWORD PTR [rdx]
xor rsi, QWORD PTR [rdx+8]
or rcx, rsi
jne .L8
mov rcx, QWORD PTR [rax+16]
mov rax, QWORD PTR [rax+24]
xor rcx, QWORD PTR [rdx+16]
xor rax, QWORD PTR [rdx+24]
or rcx, rax
jne .L8
xor eax, eax
.L14:
test eax, eax
sete al
movzx eax, al
ret
.L13:
mov edi, DWORD PTR [rdx+16]
cmp DWORD PTR [rax+16], edi
jne .L8
xor eax, eax
jmp .L14
oideq_new:
mov rax, QWORD PTR [rdi]
mov rdx, QWORD PTR [rdi+8]
xor rax, QWORD PTR [rsi]
xor rdx, QWORD PTR [rsi+8]
or rax, rdx
je .L5
.L2:
mov eax, 1
xor eax, 1
ret
.L5:
mov rax, QWORD PTR [rdi+16]
mov rdx, QWORD PTR [rdi+24]
xor rax, QWORD PTR [rsi+16]
xor rdx, QWORD PTR [rsi+24]
or rax, rdx
jne .L2
xor eax, eax
xor eax, 1
ret
Interestingly, the compiler decides to split the comparisons into two so
that it first compares the lower half of the object ID for equality and
then the upper half. If the first check shows a difference, then we
wouldn't even end up comparing the second half.
In both cases, the new generated code is significantly shorter and has
way less branches. While I didn't benchmark the change, I'd be surprised
if the new code was slower.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to '')
-rw-r--r-- | hash.h | 20 |
1 files changed, 0 insertions, 20 deletions
@@ -6,26 +6,6 @@ #define the_hash_algo the_repository->hash_algo -static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2) -{ - const struct git_hash_algo *algop; - if (!oid1->algo) - algop = the_hash_algo; - else - algop = &hash_algos[oid1->algo]; - return hashcmp(oid1->hash, oid2->hash, algop); -} - -static inline int oideq(const struct object_id *oid1, const struct object_id *oid2) -{ - const struct git_hash_algo *algop; - if (!oid1->algo) - algop = the_hash_algo; - else - algop = &hash_algos[oid1->algo]; - return hasheq(oid1->hash, oid2->hash, algop); -} - static inline int is_null_oid(const struct object_id *oid) { return oideq(oid, null_oid()); |