From 4d74ecfa6458bf482d93ad9a98c7f0423ff0564b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 12 Jan 2023 12:38:27 +0000
Subject: KVM: arm64: Don't arm a hrtimer for an already pending timer

When fully emulating a timer, we back it with a hrtimer that is
armver on vcpu_load(). However, we do this even if the timer is
already pending.

This causes spurious interrupts to be taken, though the guest
doesn't observe them (the interrupt is already pending).

Although this is a waste of precious cycles, this isn't the
end of the world with the current state of KVM. However, this
can lead to a situation where a guest doesn't make forward
progress anymore with NV.

Fix it by checking that if the timer is already pending
before arming a new hrtimer. Also drop the hrtimer cancelling,
which is useless, by construction.

Reported-by: D Scott Phillips <scott@os.amperecomputing.com>
Fixes: bee038a67487 ("KVM: arm/arm64: Rework the timer code to use a timer_map")
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230112123829.458912-2-maz@kernel.org
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 arch/arm64/kvm/arch_timer.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index bb24a76b4224..587d87aec33f 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -428,10 +428,8 @@ static void timer_emulate(struct arch_timer_context *ctx)
 	 * scheduled for the future.  If the timer cannot fire at all,
 	 * then we also don't need a soft timer.
 	 */
-	if (!kvm_timer_irq_can_fire(ctx)) {
-		soft_timer_cancel(&ctx->hrtimer);
+	if (should_fire || !kvm_timer_irq_can_fire(ctx))
 		return;
-	}
 
 	soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
 }
-- 
cgit v1.2.3


From fc6ee952cf0008fb4a60dd8e539d9f17decaabe5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 12 Jan 2023 12:38:28 +0000
Subject: KVM: arm64: Reduce overhead of trapped timer sysreg accesses

Each read/write to a trapped timer system register results
in a whole kvm_timer_vcpu_put/load() cycle which affects all
of the timers, and a bit more.

There is no need for such a thing, and we can limit the impact
to the timer being affected, and only this one.

This drastically simplifies the emulated case, and limits the
damage for trapped accesses. This also brings some performance
back for NV.

Whilst we're at it, fix a comment that didn't quite capture why
we always set CNTVOFF_EL2 to 0 when disabling the virtual timer.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230112123829.458912-3-maz@kernel.org
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 arch/arm64/kvm/arch_timer.c | 73 +++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 587d87aec33f..1a1d7e258aba 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -434,6 +434,11 @@ static void timer_emulate(struct arch_timer_context *ctx)
 	soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
 }
 
+static void set_cntvoff(u64 cntvoff)
+{
+	kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
+}
+
 static void timer_save_state(struct arch_timer_context *ctx)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
@@ -457,6 +462,22 @@ static void timer_save_state(struct arch_timer_context *ctx)
 		write_sysreg_el0(0, SYS_CNTV_CTL);
 		isb();
 
+		/*
+		 * The kernel may decide to run userspace after
+		 * calling vcpu_put, so we reset cntvoff to 0 to
+		 * ensure a consistent read between user accesses to
+		 * the virtual counter and kernel access to the
+		 * physical counter of non-VHE case.
+		 *
+		 * For VHE, the virtual counter uses a fixed virtual
+		 * offset of zero, so no need to zero CNTVOFF_EL2
+		 * register, but this is actually useful when switching
+		 * between EL1/vEL2 with NV.
+		 *
+		 * Do it unconditionally, as this is either unavoidable
+		 * or dirt cheap.
+		 */
+		set_cntvoff(0);
 		break;
 	case TIMER_PTIMER:
 		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
@@ -530,6 +551,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
 
 	switch (index) {
 	case TIMER_VTIMER:
+		set_cntvoff(timer_get_offset(ctx));
 		write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
 		isb();
 		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
@@ -550,11 +572,6 @@ out:
 	local_irq_restore(flags);
 }
 
-static void set_cntvoff(u64 cntvoff)
-{
-	kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
-}
-
 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
 {
 	int r;
@@ -629,8 +646,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 		kvm_timer_vcpu_load_nogic(vcpu);
 	}
 
-	set_cntvoff(timer_get_offset(map.direct_vtimer));
-
 	kvm_timer_unblocking(vcpu);
 
 	timer_restore_state(map.direct_vtimer);
@@ -686,15 +701,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 
 	if (kvm_vcpu_is_blocking(vcpu))
 		kvm_timer_blocking(vcpu);
-
-	/*
-	 * The kernel may decide to run userspace after calling vcpu_put, so
-	 * we reset cntvoff to 0 to ensure a consistent read between user
-	 * accesses to the virtual counter and kernel access to the physical
-	 * counter of non-VHE case. For VHE, the virtual counter uses a fixed
-	 * virtual offset of zero, so no need to zero CNTVOFF_EL2 register.
-	 */
-	set_cntvoff(0);
 }
 
 /*
@@ -924,14 +930,22 @@ u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
 			      enum kvm_arch_timers tmr,
 			      enum kvm_arch_timer_regs treg)
 {
+	struct arch_timer_context *timer;
+	struct timer_map map;
 	u64 val;
 
+	get_timer_map(vcpu, &map);
+	timer = vcpu_get_timer(vcpu, tmr);
+
+	if (timer == map.emul_ptimer)
+		return kvm_arm_timer_read(vcpu, timer, treg);
+
 	preempt_disable();
-	kvm_timer_vcpu_put(vcpu);
+	timer_save_state(timer);
 
-	val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg);
+	val = kvm_arm_timer_read(vcpu, timer, treg);
 
-	kvm_timer_vcpu_load(vcpu);
+	timer_restore_state(timer);
 	preempt_enable();
 
 	return val;
@@ -965,13 +979,22 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
 				enum kvm_arch_timer_regs treg,
 				u64 val)
 {
-	preempt_disable();
-	kvm_timer_vcpu_put(vcpu);
-
-	kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val);
+	struct arch_timer_context *timer;
+	struct timer_map map;
 
-	kvm_timer_vcpu_load(vcpu);
-	preempt_enable();
+	get_timer_map(vcpu, &map);
+	timer = vcpu_get_timer(vcpu, tmr);
+	if (timer == map.emul_ptimer) {
+		soft_timer_cancel(&timer->hrtimer);
+		kvm_arm_timer_write(vcpu, timer, treg, val);
+		timer_emulate(timer);
+	} else {
+		preempt_disable();
+		timer_save_state(timer);
+		kvm_arm_timer_write(vcpu, timer, treg, val);
+		timer_restore_state(timer);
+		preempt_enable();
+	}
 }
 
 static int kvm_timer_starting_cpu(unsigned int cpu)
-- 
cgit v1.2.3


From ba82e06cf7f46ac97fe935a6abfe2392ae694ab0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 12 Jan 2023 12:38:29 +0000
Subject: KVM: arm64: timers: Don't BUG() on unhandled timer trap

Although not handling a trap is a pretty bad situation to be in,
panicing the kernel isn't useful and provides no valuable
information to help debugging the situation.

Instead, dump the encoding of the unhandled sysreg, and inject
an UNDEF in the guest. At least, this gives a user an opportunity
to report the issue with some information to help debugging it.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230112123829.458912-4-maz@kernel.org
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 arch/arm64/kvm/sys_regs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d5ee52d6bf73..32f4e424b9a5 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1049,7 +1049,9 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
 		treg = TIMER_REG_CVAL;
 		break;
 	default:
-		BUG();
+		print_sys_reg_msg(p, "%s", "Unhandled trapped timer register");
+		kvm_inject_undefined(vcpu);
+		return false;
 	}
 
 	if (p->is_write)
-- 
cgit v1.2.3