s390/cmpxchg: make loop condition for 1,2 byte cases precise

The cmpxchg implementation for 1 and 2 bytes consists of a 4 byte cmpxchg loop. Currently, the decision to retry is imprecise, looping if bits outside the target byte(s) change instead of retrying until the target byte(s) differ from the old value. E.g. if an attempt to exchange (prev_left_0 old_bytes prev_right_0) is made and it fails because the word at the address is (prev_left_1 x prev_right_1) where both x != old_bytes and one of the prev_*_1 values differs from the respective prev_*_0 value, the cmpxchg is retried, even if by a semantic equivalent to a normal cmpxchg, the exchange would fail. Instead exit the loop if x != old_bytes and retry otherwise. Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com> Link: https://lore.kernel.org/r/20221116144711.3811011-1-scgl@linux.ibm.com Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
author: Janis Schoetterl-Glausch <scgl@linux.ibm.com> 2022-11-16 15:47:11 +0100
committer: Heiko Carstens <hca@linux.ibm.com> 2022-11-21 13:36:16 +0100
commit: 51098f0eb22e2f54055d75dd25bc84eff07d6d8a (patch)
tree: 1fe906286f11fce413e7f335b7d70e8350ab4459 /arch/s390/include/asm/cmpxchg.h
parent: s390/uaccess: add cmpxchg_user_key() (diff)
download: linux-51098f0eb22e2f54055d75dd25bc84eff07d6d8a.tar.xz
linux-51098f0eb22e2f54055d75dd25bc84eff07d6d8a.zip
1 files changed, 34 insertions, 26 deletions
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 1c5785b851ec..3f26416c2ad8 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -90,55 +90,63 @@ static __always_inline unsigned long __cmpxchg(unsigned long address,
 {
 	switch (size) {
 	case 1: {
-		unsigned int prev, tmp, shift;
+		unsigned int prev, shift, mask;
 
 		shift = (3 ^ (address & 3)) << 3;
 		address ^= address & 3;
+		old = (old & 0xff) << shift;
+		new = (new & 0xff) << shift;
+		mask = ~(0xff << shift);
 		asm volatile(
 			"	l	%[prev],%[address]\n"
-			"0:	nr	%[prev],%[mask]\n"
-			"	lr	%[tmp],%[prev]\n"
-			"	or	%[prev],%[old]\n"
-			"	or	%[tmp],%[new]\n"
-			"	cs	%[prev],%[tmp],%[address]\n"
+			"	nr	%[prev],%[mask]\n"
+			"	xilf	%[mask],0xffffffff\n"
+			"	or	%[new],%[prev]\n"
+			"	or	%[prev],%[tmp]\n"
+			"0:	lr	%[tmp],%[prev]\n"
+			"	cs	%[prev],%[new],%[address]\n"
 			"	jnl	1f\n"
 			"	xr	%[tmp],%[prev]\n"
+			"	xr	%[new],%[tmp]\n"
 			"	nr	%[tmp],%[mask]\n"
-			"	jnz	0b\n"
+			"	jz	0b\n"
 			"1:"
 			: [prev] "=&d" (prev),
-			  [tmp] "=&d" (tmp),
-			  [address] "+Q" (*(int *)address)
-			: [old] "d" ((old & 0xff) << shift),
-			  [new] "d" ((new & 0xff) << shift),
-			  [mask] "d" (~(0xff << shift))
-			: "memory", "cc");
+			  [address] "+Q" (*(int *)address),
+			  [tmp] "+&d" (old),
+			  [new] "+&d" (new),
+			  [mask] "+&d" (mask)
+			:: "memory", "cc");
 		return prev >> shift;
 	}
 	case 2: {
-		unsigned int prev, tmp, shift;
+		unsigned int prev, shift, mask;
 
 		shift = (2 ^ (address & 2)) << 3;
 		address ^= address & 2;
+		old = (old & 0xffff) << shift;
+		new = (new & 0xffff) << shift;
+		mask = ~(0xffff << shift);
 		asm volatile(
 			"	l	%[prev],%[address]\n"
-			"0:	nr	%[prev],%[mask]\n"
-			"	lr	%[tmp],%[prev]\n"
-			"	or	%[prev],%[old]\n"
-			"	or	%[tmp],%[new]\n"
-			"	cs	%[prev],%[tmp],%[address]\n"
+			"	nr	%[prev],%[mask]\n"
+			"	xilf	%[mask],0xffffffff\n"
+			"	or	%[new],%[prev]\n"
+			"	or	%[prev],%[tmp]\n"
+			"0:	lr	%[tmp],%[prev]\n"
+			"	cs	%[prev],%[new],%[address]\n"
 			"	jnl	1f\n"
 			"	xr	%[tmp],%[prev]\n"
+			"	xr	%[new],%[tmp]\n"
 			"	nr	%[tmp],%[mask]\n"
-			"	jnz	0b\n"
+			"	jz	0b\n"
 			"1:"
 			: [prev] "=&d" (prev),
-			  [tmp] "=&d" (tmp),
-			  [address] "+Q" (*(int *)address)
-			: [old] "d" ((old & 0xffff) << shift),
-			  [new] "d" ((new & 0xffff) << shift),
-			  [mask] "d" (~(0xffff << shift))
-			: "memory", "cc");
+			  [address] "+Q" (*(int *)address),
+			  [tmp] "+&d" (old),
+			  [new] "+&d" (new),
+			  [mask] "+&d" (mask)
+			:: "memory", "cc");
 		return prev >> shift;
 	}
 	case 4: {
author	Janis Schoetterl-Glausch <scgl@linux.ibm.com>	2022-11-16 15:47:11 +0100
committer	Heiko Carstens <hca@linux.ibm.com>	2022-11-21 13:36:16 +0100
commit	51098f0eb22e2f54055d75dd25bc84eff07d6d8a (patch)
tree	1fe906286f11fce413e7f335b7d70e8350ab4459 /arch/s390/include/asm/cmpxchg.h
parent	s390/uaccess: add cmpxchg_user_key() (diff)
download	linux-51098f0eb22e2f54055d75dd25bc84eff07d6d8a.tar.xz linux-51098f0eb22e2f54055d75dd25bc84eff07d6d8a.zip