diff --git a/embassy-rp/src/intrinsics.rs b/embassy-rp/src/intrinsics.rs index 3e75fb7fc..3b63846d4 100644 --- a/embassy-rp/src/intrinsics.rs +++ b/embassy-rp/src/intrinsics.rs @@ -274,3 +274,201 @@ macro_rules! intrinsics { intrinsics!($($rest)*); }; } + +// Credit: taken from `rp-hal` (also licensed Apache+MIT) +// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/sio.rs + +// This takes advantage of how AAPCS defines a 64-bit return on 32-bit registers +// by packing it into r0[0:31] and r1[32:63]. So all we need to do is put +// the remainder in the high order 32 bits of a 64 bit result. We can also +// alias the division operators to these for a similar reason r0 is the +// result either way and r1 a scratch register, so the caller can't assume it +// retains the argument value. +#[cfg(target_arch = "arm")] +core::arch::global_asm!( + ".macro hwdivider_head", + "ldr r2, =(0xd0000000)", // SIO_BASE + // Check the DIRTY state of the divider by shifting it into the C + // status bit. + "ldr r3, [r2, #0x078]", // DIV_CSR + "lsrs r3, #2", // DIRTY = 1, so shift 2 down + // We only need to save the state when DIRTY, otherwise we can just do the + // division directly. + "bcs 2f", + "1:", + // Do the actual division now, we're either not DIRTY, or we've saved the + // state and branched back here so it's safe now. + ".endm", + ".macro hwdivider_tail", + // 8 cycle delay to wait for the result. Each branch takes two cycles + // and fits into a 2-byte Thumb instruction, so this is smaller than + // 8 NOPs. + "b 3f", + "3: b 3f", + "3: b 3f", + "3: b 3f", + "3:", + // Read the quotient last, since that's what clears the dirty flag. + "ldr r1, [r2, #0x074]", // DIV_REMAINDER + "ldr r0, [r2, #0x070]", // DIV_QUOTIENT + // Either return to the caller or back to the state restore. + "bx lr", + "2:", + // Since we can't save the signed-ness of the calculation, we have to make + // sure that there's at least an 8 cycle delay before we read the result. + // The push takes 5 cycles, and we've already spent at least 7 checking + // the DIRTY state to get here. + "push {{r4-r6, lr}}", + // Read the quotient last, since that's what clears the dirty flag. + "ldr r3, [r2, #0x060]", // DIV_UDIVIDEND + "ldr r4, [r2, #0x064]", // DIV_UDIVISOR + "ldr r5, [r2, #0x074]", // DIV_REMAINDER + "ldr r6, [r2, #0x070]", // DIV_QUOTIENT + // If we get interrupted here (before a write sets the DIRTY flag) it's + // fine, since we have the full state, so the interruptor doesn't have to + // restore it. Once the write happens and the DIRTY flag is set, the + // interruptor becomes responsible for restoring our state. + "bl 1b", + // If we are interrupted here, then the interruptor will start an incorrect + // calculation using a wrong divisor, but we'll restore the divisor and + // result ourselves correctly. This sets DIRTY, so any interruptor will + // save the state. + "str r3, [r2, #0x060]", // DIV_UDIVIDEND + // If we are interrupted here, the the interruptor may start the + // calculation using incorrectly signed inputs, but we'll restore the + // result ourselves. This sets DIRTY, so any interruptor will save the + // state. + "str r4, [r2, #0x064]", // DIV_UDIVISOR + // If we are interrupted here, the interruptor will have restored + // everything but the quotient may be wrongly signed. If the calculation + // started by the above writes is still ongoing it is stopped, so it won't + // replace the result we're restoring. DIRTY and READY set, but only + // DIRTY matters to make the interruptor save the state. + "str r5, [r2, #0x074]", // DIV_REMAINDER + // State fully restored after the quotient write. This sets both DIRTY + // and READY, so whatever we may have interrupted can read the result. + "str r6, [r2, #0x070]", // DIV_QUOTIENT + "pop {{r4-r6, pc}}", + ".endm", +); + +macro_rules! division_function { + ( + $name:ident $($intrinsic:ident)* ( $argty:ty ) { + $($begin:literal),+ + } + ) => { + #[cfg(all(target_arch = "arm", feature = "intrinsics"))] + core::arch::global_asm!( + // Mangle the name slightly, since this is a global symbol. + concat!(".global _rphal_", stringify!($name)), + concat!(".type _rphal_", stringify!($name), ", %function"), + ".align 2", + concat!("_rphal_", stringify!($name), ":"), + $( + concat!(".global ", stringify!($intrinsic)), + concat!(".type ", stringify!($intrinsic), ", %function"), + concat!(stringify!($intrinsic), ":"), + )* + + "hwdivider_head", + $($begin),+ , + "hwdivider_tail", + ); + + #[cfg(all(target_arch = "arm", not(feature = "intrinsics")))] + core::arch::global_asm!( + // Mangle the name slightly, since this is a global symbol. + concat!(".global _rphal_", stringify!($name)), + concat!(".type _rphal_", stringify!($name), ", %function"), + ".align 2", + concat!("_rphal_", stringify!($name), ":"), + + "hwdivider_head", + $($begin),+ , + "hwdivider_tail", + ); + + #[cfg(target_arch = "arm")] + extern "aapcs" { + // Connect a local name to global symbol above through FFI. + #[link_name = concat!("_rphal_", stringify!($name)) ] + fn $name(n: $argty, d: $argty) -> u64; + } + + #[cfg(not(target_arch = "arm"))] + #[allow(unused_variables)] + unsafe fn $name(n: $argty, d: $argty) -> u64 { 0 } + }; +} + +division_function! { + unsigned_divmod __aeabi_uidivmod __aeabi_uidiv ( u32 ) { + "str r0, [r2, #0x060]", // DIV_UDIVIDEND + "str r1, [r2, #0x064]" // DIV_UDIVISOR + } +} + +division_function! { + signed_divmod __aeabi_idivmod __aeabi_idiv ( i32 ) { + "str r0, [r2, #0x068]", // DIV_SDIVIDEND + "str r1, [r2, #0x06c]" // DIV_SDIVISOR + } +} + +fn divider_unsigned(n: u32, d: u32) -> DivResult { + let packed = unsafe { unsigned_divmod(n, d) }; + DivResult { + quotient: packed as u32, + remainder: (packed >> 32) as u32, + } +} + +fn divider_signed(n: i32, d: i32) -> DivResult { + let packed = unsafe { signed_divmod(n, d) }; + // Double casts to avoid sign extension + DivResult { + quotient: packed as u32 as i32, + remainder: (packed >> 32) as u32 as i32, + } +} + +/// Result of divide/modulo operation +struct DivResult { + /// The quotient of divide/modulo operation + pub quotient: T, + /// The remainder of divide/modulo operation + pub remainder: T, +} + +intrinsics! { + extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { + divider_unsigned(n, d).quotient + } + + extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { + divider_unsigned(n, d).remainder + } + + extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { + let quo_rem = divider_unsigned(n, d); + if let Some(rem) = rem { + *rem = quo_rem.remainder; + } + quo_rem.quotient + } + + extern "C" fn __divsi3(n: i32, d: i32) -> i32 { + divider_signed(n, d).quotient + } + + extern "C" fn __modsi3(n: i32, d: i32) -> i32 { + divider_signed(n, d).remainder + } + + extern "C" fn __divmodsi4(n: i32, d: i32, rem: &mut i32) -> i32 { + let quo_rem = divider_signed(n, d); + *rem = quo_rem.remainder; + quo_rem.quotient + } +}