Save/restore mdcr_el2 with a vCPU system registers

Monitor Debug Configuration Register (EL2) configures performance monitor
extensions, which include debug as well as performance registers.  Trapping
on certain accesses to these registers will likely vary between VMs.

Make the code for saving/restoring register state when handling exceptions more
flexible.  ldp/stp indices must be in the range [-512, 504].  Use post-index
addressing to keep the base index register up to date.

Bug: 132422368

Change-Id: I5d095b19d8753f2a23b3b089f62a69041314ca08
diff --git a/src/arch/aarch64/hypervisor/exceptions.S b/src/arch/aarch64/hypervisor/exceptions.S
index 900396a..009dc83 100644
--- a/src/arch/aarch64/hypervisor/exceptions.S
+++ b/src/arch/aarch64/hypervisor/exceptions.S
@@ -253,64 +253,68 @@
 	stp x27, x28, [x1, #VCPU_REGS + 8 * 27]
 
 	/* Save lazy state. */
+	/* Use x28 as the base */
+	add x28, x1, #VCPU_LAZY
+
 	mrs x24, vmpidr_el2
 	mrs x25, csselr_el1
-	stp x24, x25, [x1, #VCPU_LAZY + 16 * 0]
+	stp x24, x25, [x28], #16
 
 	mrs x2, sctlr_el1
 	mrs x3, actlr_el1
-	stp x2, x3, [x1, #VCPU_LAZY + 16 * 1]
+	stp x2, x3, [x28], #16
 
 	mrs x4, cpacr_el1
 	mrs x5, ttbr0_el1
-	stp x4, x5, [x1, #VCPU_LAZY + 16 * 2]
+	stp x4, x5, [x28], #16
 
 	mrs x6, ttbr1_el1
 	mrs x7, tcr_el1
-	stp x6, x7, [x1, #VCPU_LAZY + 16 * 3]
+	stp x6, x7, [x28], #16
 
 	mrs x8, esr_el1
 	mrs x9, afsr0_el1
-	stp x8, x9, [x1, #VCPU_LAZY + 16 * 4]
+	stp x8, x9, [x28], #16
 
 	mrs x10, afsr1_el1
 	mrs x11, far_el1
-	stp x10, x11, [x1, #VCPU_LAZY + 16 * 5]
+	stp x10, x11, [x28], #16
 
 	mrs x12, mair_el1
 	mrs x13, vbar_el1
-	stp x12, x13, [x1, #VCPU_LAZY + 16 * 6]
+	stp x12, x13, [x28], #16
 
 	mrs x14, contextidr_el1
 	mrs x15, tpidr_el0
-	stp x14, x15, [x1, #VCPU_LAZY + 16 * 7]
+	stp x14, x15, [x28], #16
 
 	mrs x16, tpidrro_el0
 	mrs x17, tpidr_el1
-	stp x16, x17, [x1, #VCPU_LAZY + 16 * 8]
+	stp x16, x17, [x28], #16
 
 	mrs x18, amair_el1
 	mrs x19, cntkctl_el1
-	stp x18, x19, [x1, #VCPU_LAZY + 16 * 9]
+	stp x18, x19, [x28], #16
 
 	mrs x20, sp_el0
 	mrs x21, sp_el1
-	stp x20, x21, [x1, #VCPU_LAZY + 16 * 10]
+	stp x20, x21, [x28], #16
 
 	mrs x22, elr_el1
 	mrs x23, spsr_el1
-	stp x22, x23, [x1, #VCPU_LAZY + 16 * 11]
+	stp x22, x23, [x28], #16
 
 	mrs x24, par_el1
 	mrs x25, hcr_el2
-	stp x24, x25, [x1, #VCPU_LAZY + 16 * 12]
+	stp x24, x25, [x28], #16
 
 	mrs x26, cptr_el2
 	mrs x27, cnthctl_el2
-	stp x26, x27, [x1, #VCPU_LAZY + 16 * 13]
+	stp x26, x27, [x28], #16
 
-	mrs x28, vttbr_el2
-	str x28, [x1, #VCPU_LAZY + 16 * 14]
+	mrs x4, vttbr_el2
+	mrs x5, mdcr_el2
+	stp x4, x5, [x28], #16
 
 	/* Save GIC registers. */
 #if GIC_VERSION == 3 || GIC_VERSION == 4
@@ -321,32 +325,28 @@
 	str x3, [x2, #16 * 0]
 #endif
 
-	/*
-	 * Save floating point registers.
-	 *
-	 * Offset is too large, so start from a new base.
-	 */
-	add x2, x1, #VCPU_FREGS
-	stp q0, q1, [x2, #32 * 0]
-	stp q2, q3, [x2, #32 * 1]
-	stp q4, q5, [x2, #32 * 2]
-	stp q6, q7, [x2, #32 * 3]
-	stp q8, q9, [x2, #32 * 4]
-	stp q10, q11, [x2, #32 * 5]
-	stp q12, q13, [x2, #32 * 6]
-	stp q14, q15, [x2, #32 * 7]
-	stp q16, q17, [x2, #32 * 8]
-	stp q18, q19, [x2, #32 * 9]
-	stp q20, q21, [x2, #32 * 10]
-	stp q22, q23, [x2, #32 * 11]
-	stp q24, q25, [x2, #32 * 12]
-	stp q26, q27, [x2, #32 * 13]
-	stp q28, q29, [x2, #32 * 14]
-	/* Offest becomes too large, so move the base. */
-	stp q30, q31, [x2, #32 * 15]!
+	/* Save floating point registers. */
+	/* Use x28 as the base. */
+	add x28, x1, #VCPU_FREGS
+	stp q0, q1, [x28], #32
+	stp q2, q3, [x28], #32
+	stp q4, q5, [x28], #32
+	stp q6, q7, [x28], #32
+	stp q8, q9, [x28], #32
+	stp q10, q11, [x28], #32
+	stp q12, q13, [x28], #32
+	stp q14, q15, [x28], #32
+	stp q16, q17, [x28], #32
+	stp q18, q19, [x28], #32
+	stp q20, q21, [x28], #32
+	stp q22, q23, [x28], #32
+	stp q24, q25, [x28], #32
+	stp q26, q27, [x28], #32
+	stp q28, q29, [x28], #32
+	stp q30, q31, [x28], #32
 	mrs x3, fpsr
 	mrs x4, fpcr
-	stp x3, x4, [x2, #32 * 1]
+	stp x3, x4, [x28], #32
 
 	/* Save new vcpu pointer in non-volatile register. */
 	mov x19, x0
@@ -408,64 +408,68 @@
 
 vcpu_restore_lazy_and_run:
 	/* Restore lazy registers. */
-	ldp x24, x25, [x0, #VCPU_LAZY + 16 * 0]
+	/* Use x28 as the base. */
+	add x28, x0, #VCPU_LAZY
+
+	ldp x24, x25, [x28], #16
 	msr vmpidr_el2, x24
 	msr csselr_el1, x25
 
-	ldp x2, x3, [x0, #VCPU_LAZY + 16 * 1]
+	ldp x2, x3, [x28], #16
 	msr sctlr_el1, x2
 	msr actlr_el1, x3
 
-	ldp x4, x5, [x0, #VCPU_LAZY + 16 * 2]
+	ldp x4, x5, [x28], #16
 	msr cpacr_el1, x4
 	msr ttbr0_el1, x5
 
-	ldp x6, x7, [x0, #VCPU_LAZY + 16 * 3]
+	ldp x6, x7, [x28], #16
 	msr ttbr1_el1, x6
 	msr tcr_el1, x7
 
-	ldp x8, x9, [x0, #VCPU_LAZY + 16 * 4]
+	ldp x8, x9, [x28], #16
 	msr esr_el1, x8
 	msr afsr0_el1, x9
 
-	ldp x10, x11, [x0, #VCPU_LAZY + 16 * 5]
+	ldp x10, x11, [x28], #16
 	msr afsr1_el1, x10
 	msr far_el1, x11
 
-	ldp x12, x13, [x0, #VCPU_LAZY + 16 * 6]
+	ldp x12, x13, [x28], #16
 	msr mair_el1, x12
 	msr vbar_el1, x13
 
-	ldp x14, x15, [x0, #VCPU_LAZY + 16 * 7]
+	ldp x14, x15, [x28], #16
 	msr contextidr_el1, x14
 	msr tpidr_el0, x15
 
-	ldp x16, x17, [x0, #VCPU_LAZY + 16 * 8]
+	ldp x16, x17, [x28], #16
 	msr tpidrro_el0, x16
 	msr tpidr_el1, x17
 
-	ldp x18, x19, [x0, #VCPU_LAZY + 16 * 9]
+	ldp x18, x19, [x28], #16
 	msr amair_el1, x18
 	msr cntkctl_el1, x19
 
-	ldp x20, x21, [x0, #VCPU_LAZY + 16 * 10]
+	ldp x20, x21, [x28], #16
 	msr sp_el0, x20
 	msr sp_el1, x21
 
-	ldp x22, x23, [x0, #VCPU_LAZY + 16 * 11]
+	ldp x22, x23, [x28], #16
 	msr elr_el1, x22
 	msr spsr_el1, x23
 
-	ldp x24, x25, [x0, #VCPU_LAZY + 16 * 12]
+	ldp x24, x25, [x28], #16
 	msr par_el1, x24
 	msr hcr_el2, x25
 
-	ldp x26, x27, [x0, #VCPU_LAZY + 16 * 13]
+	ldp x26, x27, [x28], #16
 	msr cptr_el2, x26
 	msr cnthctl_el2, x27
 
-	ldr x28, [x0, #VCPU_LAZY + 16 * 14]
-	msr vttbr_el2, x28
+	ldp x4, x5, [x28], #16
+	msr vttbr_el2, x4
+	msr mdcr_el2, x5
 
 	/* Restore GIC registers. */
 #if GIC_VERSION == 3 || GIC_VERSION == 4
diff --git a/src/arch/aarch64/hypervisor/offsets.h b/src/arch/aarch64/hypervisor/offsets.h
index 51724e0..9f43fb8 100644
--- a/src/arch/aarch64/hypervisor/offsets.h
+++ b/src/arch/aarch64/hypervisor/offsets.h
@@ -21,7 +21,7 @@
 #define CPU_STACK_BOTTOM 8
 #define VCPU_REGS 32
 #define VCPU_LAZY (VCPU_REGS + 264)
-#define VCPU_FREGS (VCPU_LAZY + 232)
+#define VCPU_FREGS (VCPU_LAZY + 248)
 
 #if GIC_VERSION == 3 || GIC_VERSION == 4
 #define VCPU_GIC (VCPU_FREGS + 528)
diff --git a/src/arch/aarch64/inc/hf/arch/types.h b/src/arch/aarch64/inc/hf/arch/types.h
index 6e3addc..f7a58c4 100644
--- a/src/arch/aarch64/inc/hf/arch/types.h
+++ b/src/arch/aarch64/inc/hf/arch/types.h
@@ -103,6 +103,7 @@
 		uintreg_t cptr_el2;
 		uintreg_t cnthctl_el2;
 		uintreg_t vttbr_el2;
+		uintreg_t mdcr_el2;
 	} lazy;
 
 	/* Floating point registers. */