diff --git a/firmware/common/LPC43xx_M4_memory.ld b/firmware/common/LPC43xx_M4_memory.ld index 6d501e3c..85b9a6c4 100644 --- a/firmware/common/LPC43xx_M4_memory.ld +++ b/firmware/common/LPC43xx_M4_memory.ld @@ -34,6 +34,5 @@ MEMORY } usb_bulk_buffer = ORIGIN(ram_usb); -usb_bulk_buffer_offset = ORIGIN(ram_shared); -usb_bulk_buffer_tx = ORIGIN(ram_shared)+4; +m0_state = ORIGIN(ram_shared); PROVIDE(__ram_m0_start__ = ORIGIN(ram_m0)); diff --git a/firmware/hackrf_usb/CMakeLists.txt b/firmware/hackrf_usb/CMakeLists.txt index 651c6527..1e58db95 100644 --- a/firmware/hackrf_usb/CMakeLists.txt +++ b/firmware/hackrf_usb/CMakeLists.txt @@ -35,7 +35,6 @@ set(SRC_M4 hackrf_usb.c "${PATH_HACKRF_FIRMWARE_COMMON}/tuning.c" "${PATH_HACKRF_FIRMWARE_COMMON}/streaming.c" - usb_bulk_buffer.c "${PATH_HACKRF_FIRMWARE_COMMON}/usb.c" "${PATH_HACKRF_FIRMWARE_COMMON}/usb_request.c" "${PATH_HACKRF_FIRMWARE_COMMON}/usb_standard_request.c" diff --git a/firmware/hackrf_usb/usb_bulk_buffer.c b/firmware/hackrf_usb/m0_state.h similarity index 65% rename from firmware/hackrf_usb/usb_bulk_buffer.c rename to firmware/hackrf_usb/m0_state.h index ea3e6b77..102a3ada 100644 --- a/firmware/hackrf_usb/usb_bulk_buffer.c +++ b/firmware/hackrf_usb/m0_state.h @@ -1,6 +1,5 @@ /* - * Copyright 2012 Jared Boone - * Copyright 2013 Benjamin Vernoux + * Copyright 2022 Great Scott Gadgets * * This file is part of HackRF. * @@ -20,6 +19,18 @@ * Boston, MA 02110-1301, USA. */ -#include "usb_bulk_buffer.h" +#ifndef __M0_STATE_H__ +#define __M0_STATE_H__ -volatile uint32_t usb_bulk_buffer_offset = 0; +struct m0_state { + uint32_t offset; + uint32_t tx; +}; + +/* Address of m0_state is set in ldscripts. If you change the name of this + * variable, it won't be where it needs to be in the processor's address space, + * unless you also adjust the ldscripts. + */ +extern volatile struct m0_state m0_state; + +#endif/*__M0_STATE_H__*/ diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index 9bbca574..0d1ab215 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -1,108 +1,244 @@ /* - * This file is part of GreatFET + * Copyright 2019-2022 Great Scott Gadgets * - * Specialized SGPIO interrupt handler for Rhododendron. + * This file is part of HackRF. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. */ +/* + +Introduction +============ + +This file contains the code that runs on the Cortex-M0 core of the LPC43xx. + +The M0 core is used to implement all the timing-critical usage of the SGPIO +peripheral, which interfaces to the MAX5864 ADC/DAC via the CPLD. + +The M0 reads or writes 32 bytes at a time from the SGPIO registers, +transferring these bytes to or from a shared USB bulk buffer. The M4 core +handles transferring data between this buffer and the USB host. + +The SGPIO peripheral is set up and enabled by the M4 core. All the M0 needs to +do is handle the SGPIO exchange interrupt, which indicates that new data can +now be read from or written to the SGPIO shadow registers. + +Timing +====== + +This code has tight timing constraints. + +We have to complete a read or write from SGPIO every 163 cycles. + +The CPU clock is 204MHz. We exchange 32 bytes at a time in the SGPIO +registers, which is 16 samples worth of IQ data. At the maximum sample rate of +20MHz, the SGPIO update rate is 20 / 16 = 1.25MHz. So we have 204 / 1.25 = +163.2 cycles available. + +Access to the SGPIO peripheral is slow, due to the asynchronous bridge that +connects it to the AHB bus matrix. Section 20.4.1 of the LPC43xx user manual +(UM10503) specifies the access latencies as: + +Read: 4 x MCLK + 4 x CLK_PERIPH_SGPIO +Write: 4 x MCLK + 2 x CLK_PERIPH_SGPIO + +In our case both these clocks are at 204MHz so reads add 8 cycles and writes +add 6. These are latencies that add to the usual M0 instruction timings, so an +ldr from SGPIO takes 10 cycles, and an str to SGPIO takes 8 cycles. + +These latencies are assumed to apply to all accesses to the SGPIO peripheral's +address space, which includes its interrupt control registers as well as the +shadow registers. + +There are two key code paths, with the following worst-case timings: + +RX: 140 cycles +TX: 125 cycles + +Design +====== + +Due to the timing constraints, this code is highly optimised. + +This is the only code that runs on the M0, so it does not need to follow +calling conventions, nor use features of the architecture in standard ways. + +The SGPIO handling does not run as an ISR. It polls the interrupt status. +This saves the cycle costs of interrupt entry and exit, and allows all +registers to be used freely. + +All possible registers, including the stack pointer and link register, can be +used to store values needed in the code, to minimise memory loads and stores. + +There are no function calls. There is no stack usage. All values are in +registers and fixed memory addresses. + +*/ // Constants that point to registers we'll need to modify in the SGPIO block. -.equ SGPIO_REGISTER_BLOCK_BASE, 0x40101000 .equ SGPIO_SHADOW_REGISTERS_BASE, 0x40101100 -.equ SGPIO_EXCHANGE_INTERRUPT_CLEAR_REG, 0x40101F30 -.equ SGPIO_EXCHANGE_INTERRUPT_STATUS_REG, 0x40101F2C -.equ SGPIO_GPIO_INPUT, 0x40101210 +.equ SGPIO_EXCHANGE_INTERRUPT_BASE, 0x40101F00 +// Offsets into the interrupt control registers. +.equ INT_CLEAR, 0x30 +.equ INT_STATUS, 0x2C // Buffer that we're funneling data to/from. .equ TARGET_DATA_BUFFER, 0x20008000 -.equ TARGET_BUFFER_POSITION, 0x20007000 -.equ TARGET_BUFFER_TX, 0x20007004 .equ TARGET_BUFFER_MASK, 0x7fff +// Base address of the state structure. +.equ STATE_BASE, 0x20007000 + +// Offsets into the state structure. +.equ OFFSET, 0x00 +.equ TX, 0x04 + +// Our slice chain is set up as follows (ascending data age; arrows are reversed for flow): +// L -> F -> K -> C -> J -> E -> I -> A +// Which has equivalent shadow register offsets: +// 44 -> 20 -> 40 -> 8 -> 36 -> 16 -> 32 -> 0 +.equ SLICE0, 44 +.equ SLICE1, 20 +.equ SLICE2, 40 +.equ SLICE3, 8 +.equ SLICE4, 36 +.equ SLICE5, 16 +.equ SLICE6, 32 +.equ SLICE7, 0 + +/* Allocations of single-use registers */ + +state .req r13 +buf_base .req r12 +buf_mask .req r11 +sgpio_data .req r7 +sgpio_int .req r6 +buf_ptr .req r5 + +// Entry point. At this point, the libopencm3 startup code has set things up as +// normal; .data and .bss are initialised, the stack is set up, etc. However, +// we don't actually use any of that. All the code in this file would work +// fine if the M0 jumped straight to main at reset. .global main .thumb_func -main: +main: // Cycle counts: + // Initialise registers used for constant values. + value .req r0 + ldr sgpio_int, =SGPIO_EXCHANGE_INTERRUPT_BASE // sgpio_int = SGPIO_INT_BASE // 2 + ldr sgpio_data, =SGPIO_SHADOW_REGISTERS_BASE // sgpio_data = SGPIO_REG_SS // 2 + ldr value, =TARGET_DATA_BUFFER // value = TARGET_DATA_BUFFER // 2 + mov buf_base, value // buf_base = value // 1 + ldr value, =TARGET_BUFFER_MASK // value = TARGET_DATA_MASK // 2 + mov buf_mask, value // buf_mask = value // 1 + ldr value, =STATE_BASE // value = STATE_BASE // 2 + mov state, value // state = value // 1 + + // Initialise state. + zero .req r0 + mov zero, #0 // zero = 0 // 1 + str zero, [state, #OFFSET] // state.offset = zero // 2 + str zero, [state, #TX] // state.tx = zero // 2 + +loop: + // The worst case timing is assumed to occur when reading the interrupt + // status register *just* misses the flag being set - so we include the + // cycles required to check it a second time. + // + // We also assume that we can spend a full 10 cycles doing an ldr from + // SGPIO the first time (2 for ldr, plus 8 for SGPIO-AHB bus latency), + // and still miss a flag that was set at the start of those 10 cycles. + // + // This latter asssumption is probably slightly pessimistic, since the + // sampling of the flag on the SGPIO side must occur some time after + // the ldr instruction begins executing on the M0. However, we avoid + // relying on any assumptions about the timing details of a read over + // the SGPIO to AHB bridge. + + int_status .req r0 + scratch .req r1 // Spin until we're ready to handle an SGPIO packet: - // Grab the exchange interrupt staus... - ldr r0, =SGPIO_EXCHANGE_INTERRUPT_STATUS_REG - ldr r0, [r0] + // Grab the exchange interrupt status... + ldr int_status, [sgpio_int, #INT_STATUS] // int_status = SGPIO_STATUS_1 // 10, twice - // ... check to see if it has any interrupt bits set... - lsr r0, #1 + // ... check to see if bit #0 (slice A) was set, by shifting it into the carry bit... + lsr scratch, int_status, #1 // scratch = int_status >> 1 // 1, twice // ... and if not, jump back to the beginning. - bcc main + bcc loop // if !carry: goto loop // 3, then 1 - // Clear the interrupt pending bits for the SGPIO slices we're working with. - ldr r0, =SGPIO_EXCHANGE_INTERRUPT_CLEAR_REG - ldr r1, =0xffff - str r1, [r0] - - // Grab the base address of the SGPIO shadow registers... - ldr r7, =SGPIO_SHADOW_REGISTERS_BASE + // Clear the interrupt pending bits that were set. + str int_status, [sgpio_int, #INT_CLEAR] // SGPIO_CLR_STATUS_1 = int_status // 8 // ... and grab the address of the buffer segment we want to write to / read from. - ldr r0, =TARGET_DATA_BUFFER // r0 = &buffer - ldr r3, =TARGET_BUFFER_POSITION // r3 = &position_in_buffer - ldr r2, [r3] // r2 = position_in_buffer - add r6, r0, r2 // r6 = buffer_target = &buffer + position_in_buffer + ldr buf_ptr, [state, #OFFSET] // buf_ptr = state.offset // 2 + add buf_ptr, buf_base // buf_ptr += buf_base // 1 - mov r8, r3 // Store &position_in_buffer. - - // Our slice chain is set up as follows (ascending data age; arrows are reversed for flow): - // L -> F -> K -> C -> J -> E -> I -> A - // Which has equivalent shadow register offsets: - // 44 -> 20 -> 40 -> 8 -> 36 -> 16 -> 32 -> 0 + tx .req r0 // Load direction (TX or RX) - ldr r0, =TARGET_BUFFER_TX - ldr r0, [r0] + ldr tx, [state, #TX] // tx = state.tx // 2 // TX? - lsr r0, #1 - bcc direction_rx + lsr tx, #1 // tx >>= 1 // 1 + bcc direction_rx // if !carry: goto direction_rx // 1 thru, 3 taken direction_tx: - ldm r6!, {r0-r5} - str r0, [r7, #44] - str r1, [r7, #20] - str r2, [r7, #40] - str r3, [r7, #8 ] - str r4, [r7, #36] - str r5, [r7, #16] + ldm buf_ptr!, {r0-r3} // r0-r3 = buf_ptr[0:16]; buf_ptr += 16 // 5 + str r0, [sgpio_data, #SLICE0] // SGPIO_REG_SS[SLICE0] = r0 // 8 + str r1, [sgpio_data, #SLICE1] // SGPIO_REG_SS[SLICE1] = r1 // 8 + str r2, [sgpio_data, #SLICE2] // SGPIO_REG_SS[SLICE2] = r2 // 8 + str r3, [sgpio_data, #SLICE3] // SGPIO_REG_SS[SLICE3] = r3 // 8 - ldm r6!, {r0-r1} - str r0, [r7, #32] - str r1, [r7, #0] + ldm buf_ptr!, {r0-r3} // r0-r3 = buf_ptr[0:16]; buf_ptr += 16 // 5 + str r0, [sgpio_data, #SLICE4] // SGPIO_REG_SS[SLICE4] = r0 // 8 + str r1, [sgpio_data, #SLICE5] // SGPIO_REG_SS[SLICE5] = r1 // 8 + str r2, [sgpio_data, #SLICE6] // SGPIO_REG_SS[SLICE6] = r2 // 8 + str r3, [sgpio_data, #SLICE7] // SGPIO_REG_SS[SLICE7] = r3 // 8 - b done + b done // goto done // 3 direction_rx: - // 8 cycles - ldr r0, [r7, #44] // 2 - ldr r1, [r7, #20] // 2 - ldr r2, [r7, #40] // 2 - ldr r3, [r7, #8 ] // 2 - ldr r4, [r7, #36] // 2 - ldr r5, [r7, #16] // 2 - stm r6!, {r0-r5} // 7 + ldr r0, [sgpio_data, #SLICE0] // r0 = SGPIO_REG_SS[SLICE0] // 10 + ldr r1, [sgpio_data, #SLICE1] // r1 = SGPIO_REG_SS[SLICE1] // 10 + ldr r2, [sgpio_data, #SLICE2] // r2 = SGPIO_REG_SS[SLICE2] // 10 + ldr r3, [sgpio_data, #SLICE3] // r3 = SGPIO_REG_SS[SLICE3] // 10 + stm buf_ptr!, {r0-r3} // buf_ptr[0:16] = r0-r3; buf_ptr += 16 // 5 - // 6 cycles - ldr r0, [r7, #32] // 2 - ldr r1, [r7, #0] // 2 - stm r6!, {r0-r1} + ldr r0, [sgpio_data, #SLICE4] // r0 = SGPIO_REG_SS[SLICE4] // 10 + ldr r1, [sgpio_data, #SLICE5] // r1 = SGPIO_REG_SS[SLICE5] // 10 + ldr r2, [sgpio_data, #SLICE6] // r2 = SGPIO_REG_SS[SLICE6] // 10 + ldr r3, [sgpio_data, #SLICE7] // r3 = SGPIO_REG_SS[SLICE7] // 10 + stm buf_ptr!, {r0-r3} // buf_ptr[0:16] = r0-r3; buf_ptr += 16 // 5 done: + offset .req r0 // Finally, update the buffer location... - ldr r0, =TARGET_BUFFER_MASK - and r0, r6, r0 // r0 = (position_in_buffer + size_copied) % buffer_size + mov offset, buf_mask // offset = buf_mask // 1 + and offset, buf_ptr // offset &= buf_ptr // 1 - // ... restore &position_in_buffer, and store the new position there... - mov r1, r8 - str r0, [r1] // position_in_buffer = (position_in_buffer + size_copied) % buffer_size + // ... and store the new position. + str offset, [state, #OFFSET] // state.offset = offset // 2 - b main + b loop // goto loop // 3 + +// The linker will put a literal pool here, so add a label for clearer objdump output: +constants: diff --git a/firmware/hackrf_usb/usb_api_sweep.c b/firmware/hackrf_usb/usb_api_sweep.c index 1a73e8dd..648a812d 100644 --- a/firmware/hackrf_usb/usb_api_sweep.c +++ b/firmware/hackrf_usb/usb_api_sweep.c @@ -25,6 +25,7 @@ #include #include "usb_api_transceiver.h" #include "usb_bulk_buffer.h" +#include "m0_state.h" #include "tuning.h" #include "usb_endpoint.h" #include "streaming.h" @@ -99,7 +100,7 @@ void sweep_mode(void) { while (TRANSCEIVER_MODE_RX_SWEEP == transceiver_mode()) { // Set up IN transfer of buffer 0. - if ( usb_bulk_buffer_offset >= 16384 && phase == 1) { + if ( m0_state.offset >= 16384 && phase == 1) { transfer = true; buffer = &usb_bulk_buffer[0x0000]; phase = 0; @@ -107,7 +108,7 @@ void sweep_mode(void) { } // Set up IN transfer of buffer 1. - if ( usb_bulk_buffer_offset < 16384 && phase == 0) { + if ( m0_state.offset < 16384 && phase == 0) { transfer = true; buffer = &usb_bulk_buffer[0x4000]; phase = 1; diff --git a/firmware/hackrf_usb/usb_api_transceiver.c b/firmware/hackrf_usb/usb_api_transceiver.c index 1f40c86d..a580bca7 100644 --- a/firmware/hackrf_usb/usb_api_transceiver.c +++ b/firmware/hackrf_usb/usb_api_transceiver.c @@ -27,6 +27,7 @@ #include #include "usb_bulk_buffer.h" +#include "m0_state.h" #include "usb_api_cpld.h" // Remove when CPLD update is handled elsewhere @@ -262,20 +263,20 @@ void set_transceiver_mode(const transceiver_mode_t new_transceiver_mode) { led_off(LED3); led_on(LED2); rf_path_set_direction(&rf_path, RF_PATH_DIRECTION_RX); - usb_bulk_buffer_tx = false; + m0_state.tx = false; break; case TRANSCEIVER_MODE_TX: led_off(LED2); led_on(LED3); rf_path_set_direction(&rf_path, RF_PATH_DIRECTION_TX); - usb_bulk_buffer_tx = true; + m0_state.tx = true; break; case TRANSCEIVER_MODE_OFF: default: led_off(LED2); led_off(LED3); rf_path_set_direction(&rf_path, RF_PATH_DIRECTION_OFF); - usb_bulk_buffer_tx = false; + m0_state.tx = false; } @@ -284,7 +285,7 @@ void set_transceiver_mode(const transceiver_mode_t new_transceiver_mode) { hw_sync_enable(_hw_sync_mode); - usb_bulk_buffer_offset = 0; + m0_state.offset = 0; } } @@ -330,7 +331,7 @@ void rx_mode(void) { while (TRANSCEIVER_MODE_RX == _transceiver_mode) { // Set up IN transfer of buffer 0. - if (16384 <= usb_bulk_buffer_offset && 1 == phase) { + if (16384 <= m0_state.offset && 1 == phase) { usb_transfer_schedule_block( &usb_endpoint_bulk_in, &usb_bulk_buffer[0x0000], @@ -340,7 +341,7 @@ void rx_mode(void) { phase = 0; } // Set up IN transfer of buffer 1. - if (16384 > usb_bulk_buffer_offset && 0 == phase) { + if (16384 > m0_state.offset && 0 == phase) { usb_transfer_schedule_block( &usb_endpoint_bulk_in, &usb_bulk_buffer[0x4000], @@ -368,7 +369,7 @@ void tx_mode(void) { while (TRANSCEIVER_MODE_TX == _transceiver_mode) { // Set up OUT transfer of buffer 0. - if (16384 <= usb_bulk_buffer_offset && 1 == phase) { + if (16384 <= m0_state.offset && 1 == phase) { usb_transfer_schedule_block( &usb_endpoint_bulk_out, &usb_bulk_buffer[0x0000], @@ -378,7 +379,7 @@ void tx_mode(void) { phase = 0; } // Set up OUT transfer of buffer 1. - if (16384 > usb_bulk_buffer_offset && 0 == phase) { + if (16384 > m0_state.offset && 0 == phase) { usb_transfer_schedule_block( &usb_endpoint_bulk_out, &usb_bulk_buffer[0x4000], diff --git a/firmware/hackrf_usb/usb_bulk_buffer.h b/firmware/hackrf_usb/usb_bulk_buffer.h index 0900fd4d..6e120714 100644 --- a/firmware/hackrf_usb/usb_bulk_buffer.h +++ b/firmware/hackrf_usb/usb_bulk_buffer.h @@ -32,8 +32,4 @@ */ extern uint8_t usb_bulk_buffer[32768]; -extern volatile uint32_t usb_bulk_buffer_offset; - -extern bool usb_bulk_buffer_tx; - #endif/*__USB_BULK_BUFFER_H__*/