Merge pull request #1022 from martinling/sgpio-cleanup
M0 SGPIO code cleanup & optimisation
This commit is contained in:
@ -34,6 +34,5 @@ MEMORY
|
||||
}
|
||||
|
||||
usb_bulk_buffer = ORIGIN(ram_usb);
|
||||
usb_bulk_buffer_offset = ORIGIN(ram_shared);
|
||||
usb_bulk_buffer_tx = ORIGIN(ram_shared)+4;
|
||||
m0_state = ORIGIN(ram_shared);
|
||||
PROVIDE(__ram_m0_start__ = ORIGIN(ram_m0));
|
||||
|
@ -35,7 +35,6 @@ set(SRC_M4
|
||||
hackrf_usb.c
|
||||
"${PATH_HACKRF_FIRMWARE_COMMON}/tuning.c"
|
||||
"${PATH_HACKRF_FIRMWARE_COMMON}/streaming.c"
|
||||
usb_bulk_buffer.c
|
||||
"${PATH_HACKRF_FIRMWARE_COMMON}/usb.c"
|
||||
"${PATH_HACKRF_FIRMWARE_COMMON}/usb_request.c"
|
||||
"${PATH_HACKRF_FIRMWARE_COMMON}/usb_standard_request.c"
|
||||
|
@ -1,6 +1,5 @@
|
||||
/*
|
||||
* Copyright 2012 Jared Boone
|
||||
* Copyright 2013 Benjamin Vernoux
|
||||
* Copyright 2022 Great Scott Gadgets
|
||||
*
|
||||
* This file is part of HackRF.
|
||||
*
|
||||
@ -20,6 +19,18 @@
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "usb_bulk_buffer.h"
|
||||
#ifndef __M0_STATE_H__
|
||||
#define __M0_STATE_H__
|
||||
|
||||
volatile uint32_t usb_bulk_buffer_offset = 0;
|
||||
struct m0_state {
|
||||
uint32_t offset;
|
||||
uint32_t tx;
|
||||
};
|
||||
|
||||
/* Address of m0_state is set in ldscripts. If you change the name of this
|
||||
* variable, it won't be where it needs to be in the processor's address space,
|
||||
* unless you also adjust the ldscripts.
|
||||
*/
|
||||
extern volatile struct m0_state m0_state;
|
||||
|
||||
#endif/*__M0_STATE_H__*/
|
@ -1,108 +1,244 @@
|
||||
/*
|
||||
* This file is part of GreatFET
|
||||
* Copyright 2019-2022 Great Scott Gadgets
|
||||
*
|
||||
* Specialized SGPIO interrupt handler for Rhododendron.
|
||||
* This file is part of HackRF.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This file contains the code that runs on the Cortex-M0 core of the LPC43xx.
|
||||
|
||||
The M0 core is used to implement all the timing-critical usage of the SGPIO
|
||||
peripheral, which interfaces to the MAX5864 ADC/DAC via the CPLD.
|
||||
|
||||
The M0 reads or writes 32 bytes at a time from the SGPIO registers,
|
||||
transferring these bytes to or from a shared USB bulk buffer. The M4 core
|
||||
handles transferring data between this buffer and the USB host.
|
||||
|
||||
The SGPIO peripheral is set up and enabled by the M4 core. All the M0 needs to
|
||||
do is handle the SGPIO exchange interrupt, which indicates that new data can
|
||||
now be read from or written to the SGPIO shadow registers.
|
||||
|
||||
Timing
|
||||
======
|
||||
|
||||
This code has tight timing constraints.
|
||||
|
||||
We have to complete a read or write from SGPIO every 163 cycles.
|
||||
|
||||
The CPU clock is 204MHz. We exchange 32 bytes at a time in the SGPIO
|
||||
registers, which is 16 samples worth of IQ data. At the maximum sample rate of
|
||||
20MHz, the SGPIO update rate is 20 / 16 = 1.25MHz. So we have 204 / 1.25 =
|
||||
163.2 cycles available.
|
||||
|
||||
Access to the SGPIO peripheral is slow, due to the asynchronous bridge that
|
||||
connects it to the AHB bus matrix. Section 20.4.1 of the LPC43xx user manual
|
||||
(UM10503) specifies the access latencies as:
|
||||
|
||||
Read: 4 x MCLK + 4 x CLK_PERIPH_SGPIO
|
||||
Write: 4 x MCLK + 2 x CLK_PERIPH_SGPIO
|
||||
|
||||
In our case both these clocks are at 204MHz so reads add 8 cycles and writes
|
||||
add 6. These are latencies that add to the usual M0 instruction timings, so an
|
||||
ldr from SGPIO takes 10 cycles, and an str to SGPIO takes 8 cycles.
|
||||
|
||||
These latencies are assumed to apply to all accesses to the SGPIO peripheral's
|
||||
address space, which includes its interrupt control registers as well as the
|
||||
shadow registers.
|
||||
|
||||
There are two key code paths, with the following worst-case timings:
|
||||
|
||||
RX: 140 cycles
|
||||
TX: 125 cycles
|
||||
|
||||
Design
|
||||
======
|
||||
|
||||
Due to the timing constraints, this code is highly optimised.
|
||||
|
||||
This is the only code that runs on the M0, so it does not need to follow
|
||||
calling conventions, nor use features of the architecture in standard ways.
|
||||
|
||||
The SGPIO handling does not run as an ISR. It polls the interrupt status.
|
||||
This saves the cycle costs of interrupt entry and exit, and allows all
|
||||
registers to be used freely.
|
||||
|
||||
All possible registers, including the stack pointer and link register, can be
|
||||
used to store values needed in the code, to minimise memory loads and stores.
|
||||
|
||||
There are no function calls. There is no stack usage. All values are in
|
||||
registers and fixed memory addresses.
|
||||
|
||||
*/
|
||||
|
||||
// Constants that point to registers we'll need to modify in the SGPIO block.
|
||||
.equ SGPIO_REGISTER_BLOCK_BASE, 0x40101000
|
||||
.equ SGPIO_SHADOW_REGISTERS_BASE, 0x40101100
|
||||
.equ SGPIO_EXCHANGE_INTERRUPT_CLEAR_REG, 0x40101F30
|
||||
.equ SGPIO_EXCHANGE_INTERRUPT_STATUS_REG, 0x40101F2C
|
||||
.equ SGPIO_GPIO_INPUT, 0x40101210
|
||||
.equ SGPIO_EXCHANGE_INTERRUPT_BASE, 0x40101F00
|
||||
|
||||
// Offsets into the interrupt control registers.
|
||||
.equ INT_CLEAR, 0x30
|
||||
.equ INT_STATUS, 0x2C
|
||||
|
||||
// Buffer that we're funneling data to/from.
|
||||
.equ TARGET_DATA_BUFFER, 0x20008000
|
||||
.equ TARGET_BUFFER_POSITION, 0x20007000
|
||||
.equ TARGET_BUFFER_TX, 0x20007004
|
||||
.equ TARGET_BUFFER_MASK, 0x7fff
|
||||
|
||||
// Base address of the state structure.
|
||||
.equ STATE_BASE, 0x20007000
|
||||
|
||||
// Offsets into the state structure.
|
||||
.equ OFFSET, 0x00
|
||||
.equ TX, 0x04
|
||||
|
||||
// Our slice chain is set up as follows (ascending data age; arrows are reversed for flow):
|
||||
// L -> F -> K -> C -> J -> E -> I -> A
|
||||
// Which has equivalent shadow register offsets:
|
||||
// 44 -> 20 -> 40 -> 8 -> 36 -> 16 -> 32 -> 0
|
||||
.equ SLICE0, 44
|
||||
.equ SLICE1, 20
|
||||
.equ SLICE2, 40
|
||||
.equ SLICE3, 8
|
||||
.equ SLICE4, 36
|
||||
.equ SLICE5, 16
|
||||
.equ SLICE6, 32
|
||||
.equ SLICE7, 0
|
||||
|
||||
/* Allocations of single-use registers */
|
||||
|
||||
state .req r13
|
||||
buf_base .req r12
|
||||
buf_mask .req r11
|
||||
sgpio_data .req r7
|
||||
sgpio_int .req r6
|
||||
buf_ptr .req r5
|
||||
|
||||
// Entry point. At this point, the libopencm3 startup code has set things up as
|
||||
// normal; .data and .bss are initialised, the stack is set up, etc. However,
|
||||
// we don't actually use any of that. All the code in this file would work
|
||||
// fine if the M0 jumped straight to main at reset.
|
||||
.global main
|
||||
.thumb_func
|
||||
main:
|
||||
main: // Cycle counts:
|
||||
// Initialise registers used for constant values.
|
||||
value .req r0
|
||||
ldr sgpio_int, =SGPIO_EXCHANGE_INTERRUPT_BASE // sgpio_int = SGPIO_INT_BASE // 2
|
||||
ldr sgpio_data, =SGPIO_SHADOW_REGISTERS_BASE // sgpio_data = SGPIO_REG_SS // 2
|
||||
ldr value, =TARGET_DATA_BUFFER // value = TARGET_DATA_BUFFER // 2
|
||||
mov buf_base, value // buf_base = value // 1
|
||||
ldr value, =TARGET_BUFFER_MASK // value = TARGET_DATA_MASK // 2
|
||||
mov buf_mask, value // buf_mask = value // 1
|
||||
ldr value, =STATE_BASE // value = STATE_BASE // 2
|
||||
mov state, value // state = value // 1
|
||||
|
||||
// Initialise state.
|
||||
zero .req r0
|
||||
mov zero, #0 // zero = 0 // 1
|
||||
str zero, [state, #OFFSET] // state.offset = zero // 2
|
||||
str zero, [state, #TX] // state.tx = zero // 2
|
||||
|
||||
loop:
|
||||
// The worst case timing is assumed to occur when reading the interrupt
|
||||
// status register *just* misses the flag being set - so we include the
|
||||
// cycles required to check it a second time.
|
||||
//
|
||||
// We also assume that we can spend a full 10 cycles doing an ldr from
|
||||
// SGPIO the first time (2 for ldr, plus 8 for SGPIO-AHB bus latency),
|
||||
// and still miss a flag that was set at the start of those 10 cycles.
|
||||
//
|
||||
// This latter asssumption is probably slightly pessimistic, since the
|
||||
// sampling of the flag on the SGPIO side must occur some time after
|
||||
// the ldr instruction begins executing on the M0. However, we avoid
|
||||
// relying on any assumptions about the timing details of a read over
|
||||
// the SGPIO to AHB bridge.
|
||||
|
||||
int_status .req r0
|
||||
scratch .req r1
|
||||
|
||||
// Spin until we're ready to handle an SGPIO packet:
|
||||
// Grab the exchange interrupt staus...
|
||||
ldr r0, =SGPIO_EXCHANGE_INTERRUPT_STATUS_REG
|
||||
ldr r0, [r0]
|
||||
// Grab the exchange interrupt status...
|
||||
ldr int_status, [sgpio_int, #INT_STATUS] // int_status = SGPIO_STATUS_1 // 10, twice
|
||||
|
||||
// ... check to see if it has any interrupt bits set...
|
||||
lsr r0, #1
|
||||
// ... check to see if bit #0 (slice A) was set, by shifting it into the carry bit...
|
||||
lsr scratch, int_status, #1 // scratch = int_status >> 1 // 1, twice
|
||||
|
||||
// ... and if not, jump back to the beginning.
|
||||
bcc main
|
||||
bcc loop // if !carry: goto loop // 3, then 1
|
||||
|
||||
// Clear the interrupt pending bits for the SGPIO slices we're working with.
|
||||
ldr r0, =SGPIO_EXCHANGE_INTERRUPT_CLEAR_REG
|
||||
ldr r1, =0xffff
|
||||
str r1, [r0]
|
||||
|
||||
// Grab the base address of the SGPIO shadow registers...
|
||||
ldr r7, =SGPIO_SHADOW_REGISTERS_BASE
|
||||
// Clear the interrupt pending bits that were set.
|
||||
str int_status, [sgpio_int, #INT_CLEAR] // SGPIO_CLR_STATUS_1 = int_status // 8
|
||||
|
||||
// ... and grab the address of the buffer segment we want to write to / read from.
|
||||
ldr r0, =TARGET_DATA_BUFFER // r0 = &buffer
|
||||
ldr r3, =TARGET_BUFFER_POSITION // r3 = &position_in_buffer
|
||||
ldr r2, [r3] // r2 = position_in_buffer
|
||||
add r6, r0, r2 // r6 = buffer_target = &buffer + position_in_buffer
|
||||
ldr buf_ptr, [state, #OFFSET] // buf_ptr = state.offset // 2
|
||||
add buf_ptr, buf_base // buf_ptr += buf_base // 1
|
||||
|
||||
mov r8, r3 // Store &position_in_buffer.
|
||||
|
||||
// Our slice chain is set up as follows (ascending data age; arrows are reversed for flow):
|
||||
// L -> F -> K -> C -> J -> E -> I -> A
|
||||
// Which has equivalent shadow register offsets:
|
||||
// 44 -> 20 -> 40 -> 8 -> 36 -> 16 -> 32 -> 0
|
||||
tx .req r0
|
||||
|
||||
// Load direction (TX or RX)
|
||||
ldr r0, =TARGET_BUFFER_TX
|
||||
ldr r0, [r0]
|
||||
ldr tx, [state, #TX] // tx = state.tx // 2
|
||||
|
||||
// TX?
|
||||
lsr r0, #1
|
||||
bcc direction_rx
|
||||
lsr tx, #1 // tx >>= 1 // 1
|
||||
bcc direction_rx // if !carry: goto direction_rx // 1 thru, 3 taken
|
||||
|
||||
direction_tx:
|
||||
|
||||
ldm r6!, {r0-r5}
|
||||
str r0, [r7, #44]
|
||||
str r1, [r7, #20]
|
||||
str r2, [r7, #40]
|
||||
str r3, [r7, #8 ]
|
||||
str r4, [r7, #36]
|
||||
str r5, [r7, #16]
|
||||
ldm buf_ptr!, {r0-r3} // r0-r3 = buf_ptr[0:16]; buf_ptr += 16 // 5
|
||||
str r0, [sgpio_data, #SLICE0] // SGPIO_REG_SS[SLICE0] = r0 // 8
|
||||
str r1, [sgpio_data, #SLICE1] // SGPIO_REG_SS[SLICE1] = r1 // 8
|
||||
str r2, [sgpio_data, #SLICE2] // SGPIO_REG_SS[SLICE2] = r2 // 8
|
||||
str r3, [sgpio_data, #SLICE3] // SGPIO_REG_SS[SLICE3] = r3 // 8
|
||||
|
||||
ldm r6!, {r0-r1}
|
||||
str r0, [r7, #32]
|
||||
str r1, [r7, #0]
|
||||
ldm buf_ptr!, {r0-r3} // r0-r3 = buf_ptr[0:16]; buf_ptr += 16 // 5
|
||||
str r0, [sgpio_data, #SLICE4] // SGPIO_REG_SS[SLICE4] = r0 // 8
|
||||
str r1, [sgpio_data, #SLICE5] // SGPIO_REG_SS[SLICE5] = r1 // 8
|
||||
str r2, [sgpio_data, #SLICE6] // SGPIO_REG_SS[SLICE6] = r2 // 8
|
||||
str r3, [sgpio_data, #SLICE7] // SGPIO_REG_SS[SLICE7] = r3 // 8
|
||||
|
||||
b done
|
||||
b done // goto done // 3
|
||||
|
||||
direction_rx:
|
||||
|
||||
// 8 cycles
|
||||
ldr r0, [r7, #44] // 2
|
||||
ldr r1, [r7, #20] // 2
|
||||
ldr r2, [r7, #40] // 2
|
||||
ldr r3, [r7, #8 ] // 2
|
||||
ldr r4, [r7, #36] // 2
|
||||
ldr r5, [r7, #16] // 2
|
||||
stm r6!, {r0-r5} // 7
|
||||
ldr r0, [sgpio_data, #SLICE0] // r0 = SGPIO_REG_SS[SLICE0] // 10
|
||||
ldr r1, [sgpio_data, #SLICE1] // r1 = SGPIO_REG_SS[SLICE1] // 10
|
||||
ldr r2, [sgpio_data, #SLICE2] // r2 = SGPIO_REG_SS[SLICE2] // 10
|
||||
ldr r3, [sgpio_data, #SLICE3] // r3 = SGPIO_REG_SS[SLICE3] // 10
|
||||
stm buf_ptr!, {r0-r3} // buf_ptr[0:16] = r0-r3; buf_ptr += 16 // 5
|
||||
|
||||
// 6 cycles
|
||||
ldr r0, [r7, #32] // 2
|
||||
ldr r1, [r7, #0] // 2
|
||||
stm r6!, {r0-r1}
|
||||
ldr r0, [sgpio_data, #SLICE4] // r0 = SGPIO_REG_SS[SLICE4] // 10
|
||||
ldr r1, [sgpio_data, #SLICE5] // r1 = SGPIO_REG_SS[SLICE5] // 10
|
||||
ldr r2, [sgpio_data, #SLICE6] // r2 = SGPIO_REG_SS[SLICE6] // 10
|
||||
ldr r3, [sgpio_data, #SLICE7] // r3 = SGPIO_REG_SS[SLICE7] // 10
|
||||
stm buf_ptr!, {r0-r3} // buf_ptr[0:16] = r0-r3; buf_ptr += 16 // 5
|
||||
|
||||
done:
|
||||
offset .req r0
|
||||
|
||||
// Finally, update the buffer location...
|
||||
ldr r0, =TARGET_BUFFER_MASK
|
||||
and r0, r6, r0 // r0 = (position_in_buffer + size_copied) % buffer_size
|
||||
mov offset, buf_mask // offset = buf_mask // 1
|
||||
and offset, buf_ptr // offset &= buf_ptr // 1
|
||||
|
||||
// ... restore &position_in_buffer, and store the new position there...
|
||||
mov r1, r8
|
||||
str r0, [r1] // position_in_buffer = (position_in_buffer + size_copied) % buffer_size
|
||||
// ... and store the new position.
|
||||
str offset, [state, #OFFSET] // state.offset = offset // 2
|
||||
|
||||
b main
|
||||
b loop // goto loop // 3
|
||||
|
||||
// The linker will put a literal pool here, so add a label for clearer objdump output:
|
||||
constants:
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <hackrf_core.h>
|
||||
#include "usb_api_transceiver.h"
|
||||
#include "usb_bulk_buffer.h"
|
||||
#include "m0_state.h"
|
||||
#include "tuning.h"
|
||||
#include "usb_endpoint.h"
|
||||
#include "streaming.h"
|
||||
@ -99,7 +100,7 @@ void sweep_mode(void) {
|
||||
|
||||
while (TRANSCEIVER_MODE_RX_SWEEP == transceiver_mode()) {
|
||||
// Set up IN transfer of buffer 0.
|
||||
if ( usb_bulk_buffer_offset >= 16384 && phase == 1) {
|
||||
if ( m0_state.offset >= 16384 && phase == 1) {
|
||||
transfer = true;
|
||||
buffer = &usb_bulk_buffer[0x0000];
|
||||
phase = 0;
|
||||
@ -107,7 +108,7 @@ void sweep_mode(void) {
|
||||
}
|
||||
|
||||
// Set up IN transfer of buffer 1.
|
||||
if ( usb_bulk_buffer_offset < 16384 && phase == 0) {
|
||||
if ( m0_state.offset < 16384 && phase == 0) {
|
||||
transfer = true;
|
||||
buffer = &usb_bulk_buffer[0x4000];
|
||||
phase = 1;
|
||||
|
@ -27,6 +27,7 @@
|
||||
|
||||
#include <libopencm3/cm3/vector.h>
|
||||
#include "usb_bulk_buffer.h"
|
||||
#include "m0_state.h"
|
||||
|
||||
#include "usb_api_cpld.h" // Remove when CPLD update is handled elsewhere
|
||||
|
||||
@ -262,20 +263,20 @@ void set_transceiver_mode(const transceiver_mode_t new_transceiver_mode) {
|
||||
led_off(LED3);
|
||||
led_on(LED2);
|
||||
rf_path_set_direction(&rf_path, RF_PATH_DIRECTION_RX);
|
||||
usb_bulk_buffer_tx = false;
|
||||
m0_state.tx = false;
|
||||
break;
|
||||
case TRANSCEIVER_MODE_TX:
|
||||
led_off(LED2);
|
||||
led_on(LED3);
|
||||
rf_path_set_direction(&rf_path, RF_PATH_DIRECTION_TX);
|
||||
usb_bulk_buffer_tx = true;
|
||||
m0_state.tx = true;
|
||||
break;
|
||||
case TRANSCEIVER_MODE_OFF:
|
||||
default:
|
||||
led_off(LED2);
|
||||
led_off(LED3);
|
||||
rf_path_set_direction(&rf_path, RF_PATH_DIRECTION_OFF);
|
||||
usb_bulk_buffer_tx = false;
|
||||
m0_state.tx = false;
|
||||
}
|
||||
|
||||
|
||||
@ -284,7 +285,7 @@ void set_transceiver_mode(const transceiver_mode_t new_transceiver_mode) {
|
||||
|
||||
hw_sync_enable(_hw_sync_mode);
|
||||
|
||||
usb_bulk_buffer_offset = 0;
|
||||
m0_state.offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -330,7 +331,7 @@ void rx_mode(void) {
|
||||
|
||||
while (TRANSCEIVER_MODE_RX == _transceiver_mode) {
|
||||
// Set up IN transfer of buffer 0.
|
||||
if (16384 <= usb_bulk_buffer_offset && 1 == phase) {
|
||||
if (16384 <= m0_state.offset && 1 == phase) {
|
||||
usb_transfer_schedule_block(
|
||||
&usb_endpoint_bulk_in,
|
||||
&usb_bulk_buffer[0x0000],
|
||||
@ -340,7 +341,7 @@ void rx_mode(void) {
|
||||
phase = 0;
|
||||
}
|
||||
// Set up IN transfer of buffer 1.
|
||||
if (16384 > usb_bulk_buffer_offset && 0 == phase) {
|
||||
if (16384 > m0_state.offset && 0 == phase) {
|
||||
usb_transfer_schedule_block(
|
||||
&usb_endpoint_bulk_in,
|
||||
&usb_bulk_buffer[0x4000],
|
||||
@ -368,7 +369,7 @@ void tx_mode(void) {
|
||||
|
||||
while (TRANSCEIVER_MODE_TX == _transceiver_mode) {
|
||||
// Set up OUT transfer of buffer 0.
|
||||
if (16384 <= usb_bulk_buffer_offset && 1 == phase) {
|
||||
if (16384 <= m0_state.offset && 1 == phase) {
|
||||
usb_transfer_schedule_block(
|
||||
&usb_endpoint_bulk_out,
|
||||
&usb_bulk_buffer[0x0000],
|
||||
@ -378,7 +379,7 @@ void tx_mode(void) {
|
||||
phase = 0;
|
||||
}
|
||||
// Set up OUT transfer of buffer 1.
|
||||
if (16384 > usb_bulk_buffer_offset && 0 == phase) {
|
||||
if (16384 > m0_state.offset && 0 == phase) {
|
||||
usb_transfer_schedule_block(
|
||||
&usb_endpoint_bulk_out,
|
||||
&usb_bulk_buffer[0x4000],
|
||||
|
@ -32,8 +32,4 @@
|
||||
*/
|
||||
extern uint8_t usb_bulk_buffer[32768];
|
||||
|
||||
extern volatile uint32_t usb_bulk_buffer_offset;
|
||||
|
||||
extern bool usb_bulk_buffer_tx;
|
||||
|
||||
#endif/*__USB_BULK_BUFFER_H__*/
|
||||
|
Reference in New Issue
Block a user