rp: optimize rom-func-cache for runtime
storing a full function pointer initialized to a resolver trampoline lets us avoid the runtime cost of checking whether we need to do the initialization.
This commit is contained in:
parent
41e90e22e2
commit
837cdacd16
1 changed files with 55 additions and 62 deletions
|
@ -56,50 +56,11 @@ macro_rules! declare_rom_function {
|
||||||
fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
|
fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
|
||||||
$lookup:block
|
$lookup:block
|
||||||
) => {
|
) => {
|
||||||
#[doc = r"Additional access for the `"]
|
declare_rom_function!{
|
||||||
#[doc = stringify!($name)]
|
__internal ,
|
||||||
#[doc = r"` ROM function."]
|
$(#[$outer])*
|
||||||
pub mod $name {
|
fn $name( $($argname: $ty),* ) -> $ret
|
||||||
/// Retrieve a function pointer.
|
$lookup
|
||||||
#[cfg(not(feature = "rom-func-cache"))]
|
|
||||||
pub fn ptr() -> extern "C" fn( $($argname: $ty),* ) -> $ret {
|
|
||||||
let p: *const u32 = $lookup;
|
|
||||||
unsafe {
|
|
||||||
let func : extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
|
|
||||||
func
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retrieve a function pointer.
|
|
||||||
#[cfg(feature = "rom-func-cache")]
|
|
||||||
pub fn ptr() -> extern "C" fn( $($argname: $ty),* ) -> $ret {
|
|
||||||
use core::sync::atomic::{AtomicU16, Ordering};
|
|
||||||
|
|
||||||
// All pointers in the ROM fit in 16 bits, so we don't need a
|
|
||||||
// full width word to store the cached value.
|
|
||||||
static CACHED_PTR: AtomicU16 = AtomicU16::new(0);
|
|
||||||
// This is safe because the lookup will always resolve
|
|
||||||
// to the same value. So even if an interrupt or another
|
|
||||||
// core starts at the same time, it just repeats some
|
|
||||||
// work and eventually writes back the correct value.
|
|
||||||
let p: *const u32 = match CACHED_PTR.load(Ordering::Relaxed) {
|
|
||||||
0 => {
|
|
||||||
let raw: *const u32 = $lookup;
|
|
||||||
CACHED_PTR.store(raw as u16, Ordering::Relaxed);
|
|
||||||
raw
|
|
||||||
},
|
|
||||||
val => val as *const u32,
|
|
||||||
};
|
|
||||||
unsafe {
|
|
||||||
let func : extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
|
|
||||||
func
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$(#[$outer])*
|
|
||||||
pub extern "C" fn $name( $($argname: $ty),* ) -> $ret {
|
|
||||||
$name::ptr()($($argname),*)
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -107,6 +68,21 @@ macro_rules! declare_rom_function {
|
||||||
$(#[$outer:meta])*
|
$(#[$outer:meta])*
|
||||||
unsafe fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
|
unsafe fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
|
||||||
$lookup:block
|
$lookup:block
|
||||||
|
) => {
|
||||||
|
declare_rom_function!{
|
||||||
|
__internal unsafe ,
|
||||||
|
$(#[$outer])*
|
||||||
|
fn $name( $($argname: $ty),* ) -> $ret
|
||||||
|
$lookup
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
(
|
||||||
|
__internal
|
||||||
|
$( $maybe_unsafe:ident )? ,
|
||||||
|
$(#[$outer:meta])*
|
||||||
|
fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
|
||||||
|
$lookup:block
|
||||||
) => {
|
) => {
|
||||||
#[doc = r"Additional access for the `"]
|
#[doc = r"Additional access for the `"]
|
||||||
#[doc = stringify!($name)]
|
#[doc = stringify!($name)]
|
||||||
|
@ -114,43 +90,58 @@ macro_rules! declare_rom_function {
|
||||||
pub mod $name {
|
pub mod $name {
|
||||||
/// Retrieve a function pointer.
|
/// Retrieve a function pointer.
|
||||||
#[cfg(not(feature = "rom-func-cache"))]
|
#[cfg(not(feature = "rom-func-cache"))]
|
||||||
pub fn ptr() -> unsafe extern "C" fn( $($argname: $ty),* ) -> $ret {
|
pub fn ptr() -> $( $maybe_unsafe )? extern "C" fn( $($argname: $ty),* ) -> $ret {
|
||||||
let p: *const u32 = $lookup;
|
let p: *const u32 = $lookup;
|
||||||
unsafe {
|
unsafe {
|
||||||
let func : unsafe extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
|
let func : $( $maybe_unsafe )? extern "C" fn( $($argname: $ty),* ) -> $ret
|
||||||
|
= core::mem::transmute(p);
|
||||||
func
|
func
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "rom-func-cache")]
|
||||||
|
// unlike rp2040-hal we store a full word, containing the full function pointer.
|
||||||
|
// rp2040-hal saves two bytes by storing only the rom offset, at the cost of
|
||||||
|
// having to do an indirection and an atomic operation on every rom call.
|
||||||
|
static mut CACHE: $( $maybe_unsafe )? extern "C" fn( $($argname: $ty),* ) -> $ret
|
||||||
|
= trampoline;
|
||||||
|
|
||||||
|
#[cfg(feature = "rom-func-cache")]
|
||||||
|
$( $maybe_unsafe )? extern "C" fn trampoline( $($argname: $ty),* ) -> $ret {
|
||||||
|
use core::sync::atomic::{compiler_fence, Ordering};
|
||||||
|
|
||||||
|
let p: *const u32 = $lookup;
|
||||||
|
#[allow(unused_unsafe)]
|
||||||
|
unsafe {
|
||||||
|
CACHE = core::mem::transmute(p);
|
||||||
|
compiler_fence(Ordering::Release);
|
||||||
|
CACHE($($argname),*)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Retrieve a function pointer.
|
/// Retrieve a function pointer.
|
||||||
#[cfg(feature = "rom-func-cache")]
|
#[cfg(feature = "rom-func-cache")]
|
||||||
pub fn ptr() -> unsafe extern "C" fn( $($argname: $ty),* ) -> $ret {
|
pub fn ptr() -> $( $maybe_unsafe )? extern "C" fn( $($argname: $ty),* ) -> $ret {
|
||||||
use core::sync::atomic::{AtomicU16, Ordering};
|
use core::sync::atomic::{compiler_fence, Ordering};
|
||||||
|
|
||||||
// All pointers in the ROM fit in 16 bits, so we don't need a
|
|
||||||
// full width word to store the cached value.
|
|
||||||
static CACHED_PTR: AtomicU16 = AtomicU16::new(0);
|
|
||||||
// This is safe because the lookup will always resolve
|
// This is safe because the lookup will always resolve
|
||||||
// to the same value. So even if an interrupt or another
|
// to the same value. So even if an interrupt or another
|
||||||
// core starts at the same time, it just repeats some
|
// core starts at the same time, it just repeats some
|
||||||
// work and eventually writes back the correct value.
|
// work and eventually writes back the correct value.
|
||||||
let p: *const u32 = match CACHED_PTR.load(Ordering::Relaxed) {
|
//
|
||||||
0 => {
|
// We easily get away with using only compiler fences here
|
||||||
let raw: *const u32 = $lookup;
|
// because RP2040 SRAM is not cached. If it were we'd need
|
||||||
CACHED_PTR.store(raw as u16, Ordering::Relaxed);
|
// to make sure updates propagate quickly, or just take the
|
||||||
raw
|
// hit and let each core resolve every function once.
|
||||||
},
|
compiler_fence(Ordering::Acquire);
|
||||||
val => val as *const u32,
|
|
||||||
};
|
|
||||||
unsafe {
|
unsafe {
|
||||||
let func : unsafe extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
|
CACHE
|
||||||
func
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$(#[$outer])*
|
$(#[$outer])*
|
||||||
pub unsafe extern "C" fn $name( $($argname: $ty),* ) -> $ret {
|
pub $( $maybe_unsafe )? extern "C" fn $name( $($argname: $ty),* ) -> $ret {
|
||||||
$name::ptr()($($argname),*)
|
$name::ptr()($($argname),*)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -369,6 +360,7 @@ pub fn fplib_start() -> *const u8 {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// See Table 180 in the RP2040 datasheet for the contents of this table.
|
/// See Table 180 in the RP2040 datasheet for the contents of this table.
|
||||||
|
#[cfg_attr(feature = "rom-func-cache", inline(never))]
|
||||||
pub fn soft_float_table() -> *const usize {
|
pub fn soft_float_table() -> *const usize {
|
||||||
rom_table_lookup(DATA_TABLE, *b"SF")
|
rom_table_lookup(DATA_TABLE, *b"SF")
|
||||||
}
|
}
|
||||||
|
@ -379,6 +371,7 @@ pub fn fplib_end() -> *const u8 {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This entry is only present in the V2 bootrom. See Table 182 in the RP2040 datasheet for the contents of this table.
|
/// This entry is only present in the V2 bootrom. See Table 182 in the RP2040 datasheet for the contents of this table.
|
||||||
|
#[cfg_attr(feature = "rom-func-cache", inline(never))]
|
||||||
pub fn soft_double_table() -> *const usize {
|
pub fn soft_double_table() -> *const usize {
|
||||||
if rom_version_number() < 2 {
|
if rom_version_number() < 2 {
|
||||||
panic!(
|
panic!(
|
||||||
|
|
Loading…
Reference in a new issue