Document purpose and timing of existing M0 code.
This commit does not modify the code; it only updates comments.
This commit is contained in:
@ -1,9 +1,93 @@
|
|||||||
/*
|
/*
|
||||||
* This file is part of GreatFET
|
* Copyright 2019-2022 Great Scott Gadgets
|
||||||
*
|
*
|
||||||
* Specialized SGPIO interrupt handler for Rhododendron.
|
* This file is part of HackRF.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2, or (at your option)
|
||||||
|
* any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; see the file COPYING. If not, write to
|
||||||
|
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||||
|
* Boston, MA 02110-1301, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
============
|
||||||
|
|
||||||
|
This file contains the code that runs on the Cortex-M0 core of the LPC43xx.
|
||||||
|
|
||||||
|
The M0 core is used to implement all the timing-critical usage of the SGPIO
|
||||||
|
peripheral, which interfaces to the MAX5864 ADC/DAC via the CPLD.
|
||||||
|
|
||||||
|
The M0 reads or writes 32 bytes at a time from the SGPIO registers,
|
||||||
|
transferring these bytes to or from a shared USB bulk buffer. The M4 core
|
||||||
|
handles transferring data between this buffer and the USB host.
|
||||||
|
|
||||||
|
The SGPIO peripheral is set up and enabled by the M4 core. All the M0 needs to
|
||||||
|
do is handle the SGPIO exchange interrupt, which indicates that new data can
|
||||||
|
now be read from or written to the SGPIO shadow registers.
|
||||||
|
|
||||||
|
Timing
|
||||||
|
======
|
||||||
|
|
||||||
|
This code has tight timing constraints.
|
||||||
|
|
||||||
|
We have to complete a read or write from SGPIO every 163 cycles.
|
||||||
|
|
||||||
|
The CPU clock is 204MHz. We exchange 32 bytes at a time in the SGPIO
|
||||||
|
registers, which is 16 samples worth of IQ data. At the maximum sample rate of
|
||||||
|
20MHz, the SGPIO update rate is 20 / 16 = 1.25MHz. So we have 204 / 1.25 =
|
||||||
|
163.2 cycles available.
|
||||||
|
|
||||||
|
Access to the SGPIO peripheral is slow, due to the asynchronous bridge that
|
||||||
|
connects it to the AHB bus matrix. Section 20.4.1 of the LPC43xx user manual
|
||||||
|
(UM10503) specifies the access latencies as:
|
||||||
|
|
||||||
|
Read: 4 x MCLK + 4 x CLK_PERIPH_SGPIO
|
||||||
|
Write: 4 x MCLK + 2 x CLK_PERIPH_SGPIO
|
||||||
|
|
||||||
|
In our case both these clocks are at 204MHz so reads add 8 cycles and writes
|
||||||
|
add 6. These are latencies that add to the usual M0 instruction timings, so an
|
||||||
|
ldr from SGPIO takes 10 cycles, and an str to SGPIO takes 8 cycles.
|
||||||
|
|
||||||
|
These latencies are assumed to apply to all accesses to the SGPIO peripheral's
|
||||||
|
address space, which includes its interrupt control registers as well as the
|
||||||
|
shadow registers.
|
||||||
|
|
||||||
|
There are two key code paths, with the following worst-case timings:
|
||||||
|
|
||||||
|
RX: 159 cycles
|
||||||
|
TX: 144 cycles
|
||||||
|
|
||||||
|
Design
|
||||||
|
======
|
||||||
|
|
||||||
|
Due to the timing constraints, this code is highly optimised.
|
||||||
|
|
||||||
|
This is the only code that runs on the M0, so it does not need to follow
|
||||||
|
calling conventions, nor use features of the architecture in standard ways.
|
||||||
|
|
||||||
|
The SGPIO handling does not run as an ISR. It polls the interrupt status.
|
||||||
|
This saves the cycle costs of interrupt entry and exit, and allows all
|
||||||
|
registers to be used freely.
|
||||||
|
|
||||||
|
All possible registers, including the stack pointer and link register, can be
|
||||||
|
used to store values needed in the code, to minimise memory loads and stores.
|
||||||
|
|
||||||
|
There are no function calls. There is no stack usage. All values are in
|
||||||
|
registers and fixed memory addresses.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
// Constants that point to registers we'll need to modify in the SGPIO block.
|
// Constants that point to registers we'll need to modify in the SGPIO block.
|
||||||
.equ SGPIO_REGISTER_BLOCK_BASE, 0x40101000
|
.equ SGPIO_REGISTER_BLOCK_BASE, 0x40101000
|
||||||
@ -19,36 +103,53 @@
|
|||||||
.equ TARGET_BUFFER_TX, 0x20007004
|
.equ TARGET_BUFFER_TX, 0x20007004
|
||||||
.equ TARGET_BUFFER_MASK, 0x7fff
|
.equ TARGET_BUFFER_MASK, 0x7fff
|
||||||
|
|
||||||
|
// Entry point. At this point, the libopencm3 startup code has set things up as
|
||||||
|
// normal; .data and .bss are initialised, the stack is set up, etc. However,
|
||||||
|
// we don't actually use any of that. All the code in this file would work
|
||||||
|
// fine if the M0 jumped straight to main at reset.
|
||||||
.global main
|
.global main
|
||||||
.thumb_func
|
.thumb_func
|
||||||
main:
|
main: // Cycle counts:
|
||||||
|
// The worst case timing is assumed to occur when reading the interrupt
|
||||||
|
// status register *just* misses the flag being set - so we include the
|
||||||
|
// cycles required to check it a second time.
|
||||||
|
//
|
||||||
|
// We also assume that we can spend a full 10 cycles doing an ldr from
|
||||||
|
// SGPIO the first time (2 for ldr, plus 8 for SGPIO-AHB bus latency),
|
||||||
|
// and still miss a flag that was set at the start of those 10 cycles.
|
||||||
|
//
|
||||||
|
// This latter asssumption is probably slightly pessimistic, since the
|
||||||
|
// sampling of the flag on the SGPIO side must occur some time after
|
||||||
|
// the ldr instruction begins executing on the M0. However, we avoid
|
||||||
|
// relying on any assumptions about the timing details of a read over
|
||||||
|
// the SGPIO to AHB bridge.
|
||||||
|
|
||||||
// Spin until we're ready to handle an SGPIO packet:
|
// Spin until we're ready to handle an SGPIO packet:
|
||||||
// Grab the exchange interrupt staus...
|
// Grab the exchange interrupt staus...
|
||||||
ldr r0, =SGPIO_EXCHANGE_INTERRUPT_STATUS_REG
|
ldr r0, =SGPIO_EXCHANGE_INTERRUPT_STATUS_REG // 2, twice
|
||||||
ldr r0, [r0]
|
ldr r0, [r0] // 10, twice
|
||||||
|
|
||||||
// ... check to see if it has any interrupt bits set...
|
// ... check to see if it has any interrupt bits set...
|
||||||
lsr r0, #1
|
lsr r0, #1 // 1, twice
|
||||||
|
|
||||||
// ... and if not, jump back to the beginning.
|
// ... and if not, jump back to the beginning.
|
||||||
bcc main
|
bcc main // 3, then 1
|
||||||
|
|
||||||
// Clear the interrupt pending bits for the SGPIO slices we're working with.
|
// Clear the interrupt pending bits for the SGPIO slices we're working with.
|
||||||
ldr r0, =SGPIO_EXCHANGE_INTERRUPT_CLEAR_REG
|
ldr r0, =SGPIO_EXCHANGE_INTERRUPT_CLEAR_REG // 2
|
||||||
ldr r1, =0xffff
|
ldr r1, =0xffff // 2
|
||||||
str r1, [r0]
|
str r1, [r0] // 8
|
||||||
|
|
||||||
// Grab the base address of the SGPIO shadow registers...
|
// Grab the base address of the SGPIO shadow registers...
|
||||||
ldr r7, =SGPIO_SHADOW_REGISTERS_BASE
|
ldr r7, =SGPIO_SHADOW_REGISTERS_BASE // 2
|
||||||
|
|
||||||
// ... and grab the address of the buffer segment we want to write to / read from.
|
// ... and grab the address of the buffer segment we want to write to / read from.
|
||||||
ldr r0, =TARGET_DATA_BUFFER // r0 = &buffer
|
ldr r0, =TARGET_DATA_BUFFER // r0 = &buffer // 2
|
||||||
ldr r3, =TARGET_BUFFER_POSITION // r3 = &position_in_buffer
|
ldr r3, =TARGET_BUFFER_POSITION // r3 = &position_in_buffer // 2
|
||||||
ldr r2, [r3] // r2 = position_in_buffer
|
ldr r2, [r3] // r2 = position_in_buffer // 2
|
||||||
add r6, r0, r2 // r6 = buffer_target = &buffer + position_in_buffer
|
add r6, r0, r2 // r6 = buffer_target = &buffer + position_in_buffer // 1
|
||||||
|
|
||||||
mov r8, r3 // Store &position_in_buffer.
|
mov r8, r3 // Store &position_in_buffer. // 1
|
||||||
|
|
||||||
// Our slice chain is set up as follows (ascending data age; arrows are reversed for flow):
|
// Our slice chain is set up as follows (ascending data age; arrows are reversed for flow):
|
||||||
// L -> F -> K -> C -> J -> E -> I -> A
|
// L -> F -> K -> C -> J -> E -> I -> A
|
||||||
@ -56,53 +157,51 @@ main:
|
|||||||
// 44 -> 20 -> 40 -> 8 -> 36 -> 16 -> 32 -> 0
|
// 44 -> 20 -> 40 -> 8 -> 36 -> 16 -> 32 -> 0
|
||||||
|
|
||||||
// Load direction (TX or RX)
|
// Load direction (TX or RX)
|
||||||
ldr r0, =TARGET_BUFFER_TX
|
ldr r0, =TARGET_BUFFER_TX // 2
|
||||||
ldr r0, [r0]
|
ldr r0, [r0] // 2
|
||||||
|
|
||||||
// TX?
|
// TX?
|
||||||
lsr r0, #1
|
lsr r0, #1 // 1
|
||||||
bcc direction_rx
|
bcc direction_rx // 1 thru, 3 taken
|
||||||
|
|
||||||
direction_tx:
|
direction_tx:
|
||||||
|
|
||||||
ldm r6!, {r0-r5}
|
ldm r6!, {r0-r5} // 7
|
||||||
str r0, [r7, #44]
|
str r0, [r7, #44] // 8
|
||||||
str r1, [r7, #20]
|
str r1, [r7, #20] // 8
|
||||||
str r2, [r7, #40]
|
str r2, [r7, #40] // 8
|
||||||
str r3, [r7, #8 ]
|
str r3, [r7, #8 ] // 8
|
||||||
str r4, [r7, #36]
|
str r4, [r7, #36] // 8
|
||||||
str r5, [r7, #16]
|
str r5, [r7, #16] // 8
|
||||||
|
|
||||||
ldm r6!, {r0-r1}
|
ldm r6!, {r0-r1} // 3
|
||||||
str r0, [r7, #32]
|
str r0, [r7, #32] // 8
|
||||||
str r1, [r7, #0]
|
str r1, [r7, #0] // 8
|
||||||
|
|
||||||
b done
|
b done // 3
|
||||||
|
|
||||||
direction_rx:
|
direction_rx:
|
||||||
|
|
||||||
// 8 cycles
|
ldr r0, [r7, #44] // 10
|
||||||
ldr r0, [r7, #44] // 2
|
ldr r1, [r7, #20] // 10
|
||||||
ldr r1, [r7, #20] // 2
|
ldr r2, [r7, #40] // 10
|
||||||
ldr r2, [r7, #40] // 2
|
ldr r3, [r7, #8 ] // 10
|
||||||
ldr r3, [r7, #8 ] // 2
|
ldr r4, [r7, #36] // 10
|
||||||
ldr r4, [r7, #36] // 2
|
ldr r5, [r7, #16] // 10
|
||||||
ldr r5, [r7, #16] // 2
|
stm r6!, {r0-r5} // 7
|
||||||
stm r6!, {r0-r5} // 7
|
|
||||||
|
|
||||||
// 6 cycles
|
ldr r0, [r7, #32] // 10
|
||||||
ldr r0, [r7, #32] // 2
|
ldr r1, [r7, #0] // 10
|
||||||
ldr r1, [r7, #0] // 2
|
stm r6!, {r0-r1} // 3
|
||||||
stm r6!, {r0-r1}
|
|
||||||
|
|
||||||
done:
|
done:
|
||||||
|
|
||||||
// Finally, update the buffer location...
|
// Finally, update the buffer location...
|
||||||
ldr r0, =TARGET_BUFFER_MASK
|
ldr r0, =TARGET_BUFFER_MASK // 2
|
||||||
and r0, r6, r0 // r0 = (position_in_buffer + size_copied) % buffer_size
|
and r0, r6, r0 // r0 = (pos_in_buffer + size_copied) % buffer_size // 1
|
||||||
|
|
||||||
// ... restore &position_in_buffer, and store the new position there...
|
// ... restore &position_in_buffer, and store the new position there...
|
||||||
mov r1, r8
|
mov r1, r8 // 1
|
||||||
str r0, [r1] // position_in_buffer = (position_in_buffer + size_copied) % buffer_size
|
str r0, [r1] // pos_in_buffer = (pos_in_buffer + size_copied) % buffer_size // 2
|
||||||
|
|
||||||
b main
|
b main // 3
|
||||||
|
Reference in New Issue
Block a user