From f8ea1e8e56284669d0cf98e84a80893d43f482be Mon Sep 17 00:00:00 2001
From: Martin Ling <martin-git@earth.li>
Date: Mon, 20 Dec 2021 13:08:15 +0000
Subject: [PATCH] Use stack pointer to hold base address of state structure.

Keeping the base address of this structure in a register allows us to
use offsets to load individual fields from it, without needing their
individual addresses.

However, the ldr instruction can only use immediate offsets relative to
the low registers (r0-r7), or the stack pointer (r13).

Low registers are in short supply and are needed for other instructions
which can only use r0-r7, so we use the stack pointer here.

It's safe to do this because we do not use the stack. There are no
function calls, interrupt handlers or push/pop instructions in the M0
code.

This change saves four cycles by eliminating loads of the addresses for
the offset & tx registers, plus a further two by eliminating the need to
stash one of these addresses in r8.
---
 firmware/hackrf_usb/sgpio_m0.s | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s
index e0e09319..f77b81da 100644
--- a/firmware/hackrf_usb/sgpio_m0.s
+++ b/firmware/hackrf_usb/sgpio_m0.s
@@ -66,8 +66,8 @@ shadow registers.
 
 There are two key code paths, with the following worst-case timings:
 
-RX:             149 cycles
-TX:             134 cycles
+RX:             143 cycles
+TX:             128 cycles
 
 Design
 ======
@@ -101,10 +101,15 @@ registers and fixed memory addresses.
 
 // Buffer that we're funneling data to/from.
 .equ TARGET_DATA_BUFFER,                   0x20008000
-.equ TARGET_BUFFER_POSITION,               0x20007000
-.equ TARGET_BUFFER_TX,                     0x20007004
 .equ TARGET_BUFFER_MASK,                   0x7fff
 
+// Base address of the state structure.
+.equ STATE_BASE,                           0x20007000
+
+// Offsets into the state structure.
+.equ OFFSET,                               0x00
+.equ TX,                                   0x04
+
 // Our slice chain is set up as follows (ascending data age; arrows are reversed for flow):
 //     L  -> F  -> K  -> C -> J  -> E  -> I  -> A
 // Which has equivalent shadow register offsets:
@@ -120,6 +125,7 @@ registers and fixed memory addresses.
 
 /* Allocations of single-use registers */
 
+state             .req r13
 buf_base          .req r12
 buf_mask          .req r11
 sgpio_data        .req r7
@@ -140,6 +146,9 @@ main:
 	mov buf_base, r0                                                                        // 1
 	ldr r0, =TARGET_BUFFER_MASK                                                             // 2
 	mov buf_mask, r0                                                                        // 1
+	ldr r0, =STATE_BASE                                                                     // 2
+	mov state, r0                                                                           // 1
+
 loop:
 	// The worst case timing is assumed to occur when reading the interrupt
 	// status register *just* misses the flag being set - so we include the
@@ -171,15 +180,11 @@ loop:
 
 	// ... and grab the address of the buffer segment we want to write to / read from.
 	mov r0, buf_base                  // r0 = &buffer                                       // 1
-	ldr r3, =TARGET_BUFFER_POSITION   // r3 = &position_in_buffer                           // 2
-	ldr r2, [r3]                      // r2 = position_in_buffer                            // 2
+	ldr r2, [state, #OFFSET]          // r2 = position_in_buffer                            // 2
 	add buf_ptr, r0, r2               // buf_ptr = &buffer + position_in_buffer             // 1
 
-	mov r8, r3                        // Store &position_in_buffer.                         // 1
-
 	// Load direction (TX or RX)
-	ldr r0, =TARGET_BUFFER_TX                                                               // 2
-	ldr r0, [r0]                                                                            // 2
+	ldr r0, [state, #TX]                                                                    // 2
 
 	// TX?
 	lsr r0, #1                                                                              // 1
@@ -221,8 +226,7 @@ done:
 	mov r0, buf_mask                                                                        // 1
 	and r0, buf_ptr, r0    // r0 = (pos_in_buffer + size_copied) % buffer_size              // 1
 
-	// ... restore &position_in_buffer, and store the new position there...
-	mov r1, r8                                                                              // 1
-	str r0, [r1]           // pos_in_buffer = (pos_in_buffer + size_copied) % buffer_size   // 2
+	// ... and store the new position.
+	str r0, [state, #OFFSET] // pos_in_buffer = (pos_in_buffer + size_copied) % buffer_size // 2
 
 	b loop                                                                                  // 3