diff --git a/docs/modules/ROOT/pages/embassy_in_the_wild.adoc b/docs/modules/ROOT/pages/embassy_in_the_wild.adoc
index 85ad7f4a2..4b650449f 100644
--- a/docs/modules/ROOT/pages/embassy_in_the_wild.adoc
+++ b/docs/modules/ROOT/pages/embassy_in_the_wild.adoc
@@ -9,3 +9,11 @@ Here are known examples of real-world projects which make use of Embassy. Feel f
 * The link:https://github.com/lora-rs/lora-rs[lora-rs] project includes link:https://github.com/lora-rs/lora-rs/tree/main/examples/stm32l0/src/bin[various standalone examples] for NRF52840, RP2040, STM32L0 and STM32WL
 ** link:https://github.com/matoushybl/air-force-one[Air force one: A simple air quality monitoring system]
 *** Targets nRF52 and uses nrf-softdevice
+
+* link:https://github.com/schmettow/ylab-edge-go[YLab Edge Go] and link:https://github.com/schmettow/ylab-edge-pro[YLab Edge Pro] projects develop 
+firmware (RP2040, STM32) for capturing physiological data in behavioural science research. Included so far are:
+** biopotentials (analog ports)
+** motion capture (6-axis accelerometers)
+** air quality (CO2, Temp, Humidity)
+** comes with an app for capturing and visualizing data [link:https://github.com/schmettow/ystudio-zero[Ystudio]]
+
diff --git a/embassy-nrf/src/buffered_uarte.rs b/embassy-nrf/src/buffered_uarte.rs
index b04c96e09..385d4015e 100644
--- a/embassy-nrf/src/buffered_uarte.rs
+++ b/embassy-nrf/src/buffered_uarte.rs
@@ -20,8 +20,7 @@ use embassy_hal_internal::{into_ref, PeripheralRef};
 // Re-export SVD variants to allow user to directly set values
 pub use pac::uarte0::{baudrate::BAUDRATE_A as Baudrate, config::PARITY_A as Parity};
 
-use crate::gpio::sealed::Pin;
-use crate::gpio::{AnyPin, Pin as GpioPin, PselBits};
+use crate::gpio::{AnyPin, Pin as GpioPin, PselBits, SealedPin};
 use crate::interrupt::typelevel::Interrupt;
 use crate::ppi::{
     self, AnyConfigurableChannel, AnyGroup, Channel, ConfigurableChannel, Event, Group, Ppi, PpiGroup, Task,
@@ -30,19 +29,15 @@ use crate::timer::{Instance as TimerInstance, Timer};
 use crate::uarte::{configure, drop_tx_rx, Config, Instance as UarteInstance};
 use crate::{interrupt, pac, Peripheral};
 
-mod sealed {
-    use super::*;
+pub(crate) struct State {
+    tx_buf: RingBuffer,
+    tx_count: AtomicUsize,
 
-    pub struct State {
-        pub tx_buf: RingBuffer,
-        pub tx_count: AtomicUsize,
-
-        pub rx_buf: RingBuffer,
-        pub rx_started: AtomicBool,
-        pub rx_started_count: AtomicU8,
-        pub rx_ended_count: AtomicU8,
-        pub rx_ppi_ch: AtomicU8,
-    }
+    rx_buf: RingBuffer,
+    rx_started: AtomicBool,
+    rx_started_count: AtomicU8,
+    rx_ended_count: AtomicU8,
+    rx_ppi_ch: AtomicU8,
 }
 
 /// UART error.
@@ -53,8 +48,6 @@ pub enum Error {
     // No errors for now
 }
 
-pub(crate) use sealed::State;
-
 impl State {
     pub(crate) const fn new() -> Self {
         Self {
diff --git a/embassy-nrf/src/gpio.rs b/embassy-nrf/src/gpio.rs
index f2353f21d..7b272dca0 100644
--- a/embassy-nrf/src/gpio.rs
+++ b/embassy-nrf/src/gpio.rs
@@ -7,7 +7,6 @@ use core::hint::unreachable_unchecked;
 use cfg_if::cfg_if;
 use embassy_hal_internal::{impl_peripheral, into_ref, PeripheralRef};
 
-use self::sealed::Pin as _;
 #[cfg(feature = "nrf51")]
 use crate::pac::gpio;
 #[cfg(feature = "nrf51")]
@@ -361,59 +360,56 @@ impl<'d> Drop for Flex<'d> {
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
+pub(crate) trait SealedPin {
+    fn pin_port(&self) -> u8;
 
-    pub trait Pin {
-        fn pin_port(&self) -> u8;
-
-        #[inline]
-        fn _pin(&self) -> u8 {
-            cfg_if! {
-                if #[cfg(feature = "_gpio-p1")] {
-                    self.pin_port() % 32
-                } else {
-                    self.pin_port()
-                }
+    #[inline]
+    fn _pin(&self) -> u8 {
+        cfg_if! {
+            if #[cfg(feature = "_gpio-p1")] {
+                self.pin_port() % 32
+            } else {
+                self.pin_port()
             }
         }
+    }
 
-        #[inline]
-        fn block(&self) -> &gpio::RegisterBlock {
-            unsafe {
-                match self.pin_port() / 32 {
-                    #[cfg(feature = "nrf51")]
-                    0 => &*pac::GPIO::ptr(),
-                    #[cfg(not(feature = "nrf51"))]
-                    0 => &*pac::P0::ptr(),
-                    #[cfg(feature = "_gpio-p1")]
-                    1 => &*pac::P1::ptr(),
-                    _ => unreachable_unchecked(),
-                }
+    #[inline]
+    fn block(&self) -> &gpio::RegisterBlock {
+        unsafe {
+            match self.pin_port() / 32 {
+                #[cfg(feature = "nrf51")]
+                0 => &*pac::GPIO::ptr(),
+                #[cfg(not(feature = "nrf51"))]
+                0 => &*pac::P0::ptr(),
+                #[cfg(feature = "_gpio-p1")]
+                1 => &*pac::P1::ptr(),
+                _ => unreachable_unchecked(),
             }
         }
+    }
 
-        #[inline]
-        fn conf(&self) -> &gpio::PIN_CNF {
-            &self.block().pin_cnf[self._pin() as usize]
-        }
+    #[inline]
+    fn conf(&self) -> &gpio::PIN_CNF {
+        &self.block().pin_cnf[self._pin() as usize]
+    }
 
-        /// Set the output as high.
-        #[inline]
-        fn set_high(&self) {
-            unsafe { self.block().outset.write(|w| w.bits(1u32 << self._pin())) }
-        }
+    /// Set the output as high.
+    #[inline]
+    fn set_high(&self) {
+        unsafe { self.block().outset.write(|w| w.bits(1u32 << self._pin())) }
+    }
 
-        /// Set the output as low.
-        #[inline]
-        fn set_low(&self) {
-            unsafe { self.block().outclr.write(|w| w.bits(1u32 << self._pin())) }
-        }
+    /// Set the output as low.
+    #[inline]
+    fn set_low(&self) {
+        unsafe { self.block().outclr.write(|w| w.bits(1u32 << self._pin())) }
     }
 }
 
 /// Interface for a Pin that can be configured by an [Input] or [Output] driver, or converted to an [AnyPin].
-pub trait Pin: Peripheral<P = Self> + Into<AnyPin> + sealed::Pin + Sized + 'static {
+#[allow(private_bounds)]
+pub trait Pin: Peripheral<P = Self> + Into<AnyPin> + SealedPin + Sized + 'static {
     /// Number of the pin within the port (0..31)
     #[inline]
     fn pin(&self) -> u8 {
@@ -464,7 +460,7 @@ impl AnyPin {
 
 impl_peripheral!(AnyPin);
 impl Pin for AnyPin {}
-impl sealed::Pin for AnyPin {
+impl SealedPin for AnyPin {
     #[inline]
     fn pin_port(&self) -> u8 {
         self.pin_port
@@ -502,7 +498,7 @@ pub(crate) fn deconfigure_pin(psel_bits: u32) {
 macro_rules! impl_pin {
     ($type:ident, $port_num:expr, $pin_num:expr) => {
         impl crate::gpio::Pin for peripherals::$type {}
-        impl crate::gpio::sealed::Pin for peripherals::$type {
+        impl crate::gpio::SealedPin for peripherals::$type {
             #[inline]
             fn pin_port(&self) -> u8 {
                 $port_num * 32 + $pin_num
diff --git a/embassy-nrf/src/gpiote.rs b/embassy-nrf/src/gpiote.rs
index 4a28279a9..d7f075722 100644
--- a/embassy-nrf/src/gpiote.rs
+++ b/embassy-nrf/src/gpiote.rs
@@ -7,8 +7,7 @@ use core::task::{Context, Poll};
 use embassy_hal_internal::{impl_peripheral, into_ref, Peripheral, PeripheralRef};
 use embassy_sync::waitqueue::AtomicWaker;
 
-use crate::gpio::sealed::Pin as _;
-use crate::gpio::{AnyPin, Flex, Input, Output, Pin as GpioPin};
+use crate::gpio::{AnyPin, Flex, Input, Output, Pin as GpioPin, SealedPin as _};
 use crate::interrupt::InterruptExt;
 use crate::ppi::{Event, Task};
 use crate::{interrupt, pac, peripherals};
@@ -446,14 +445,13 @@ impl<'d> Flex<'d> {
 
 // =======================
 
-mod sealed {
-    pub trait Channel {}
-}
+trait SealedChannel {}
 
 /// GPIOTE channel trait.
 ///
 /// Implemented by all GPIOTE channels.
-pub trait Channel: sealed::Channel + Into<AnyChannel> + Sized + 'static {
+#[allow(private_bounds)]
+pub trait Channel: SealedChannel + Into<AnyChannel> + Sized + 'static {
     /// Get the channel number.
     fn number(&self) -> usize;
 
@@ -478,7 +476,7 @@ pub struct AnyChannel {
     number: u8,
 }
 impl_peripheral!(AnyChannel);
-impl sealed::Channel for AnyChannel {}
+impl SealedChannel for AnyChannel {}
 impl Channel for AnyChannel {
     fn number(&self) -> usize {
         self.number as usize
@@ -487,7 +485,7 @@ impl Channel for AnyChannel {
 
 macro_rules! impl_channel {
     ($type:ident, $number:expr) => {
-        impl sealed::Channel for peripherals::$type {}
+        impl SealedChannel for peripherals::$type {}
         impl Channel for peripherals::$type {
             fn number(&self) -> usize {
                 $number as usize
diff --git a/embassy-nrf/src/i2s.rs b/embassy-nrf/src/i2s.rs
index 907acdf4c..966271ed9 100644
--- a/embassy-nrf/src/i2s.rs
+++ b/embassy-nrf/src/i2s.rs
@@ -6,11 +6,12 @@ use core::future::poll_fn;
 use core::marker::PhantomData;
 use core::mem::size_of;
 use core::ops::{Deref, DerefMut};
-use core::sync::atomic::{compiler_fence, Ordering};
+use core::sync::atomic::{compiler_fence, AtomicBool, Ordering};
 use core::task::Poll;
 
 use embassy_hal_internal::drop::OnDrop;
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
 
 use crate::gpio::{AnyPin, Pin as GpioPin};
 use crate::interrupt::typelevel::Interrupt;
@@ -1140,50 +1141,45 @@ impl<S: Sample, const NB: usize, const NS: usize> MultiBuffering<S, NB, NS> {
     }
 }
 
-pub(crate) mod sealed {
-    use core::sync::atomic::AtomicBool;
+/// Peripheral static state
+pub(crate) struct State {
+    started: AtomicBool,
+    rx_waker: AtomicWaker,
+    tx_waker: AtomicWaker,
+    stop_waker: AtomicWaker,
+}
 
-    use embassy_sync::waitqueue::AtomicWaker;
-
-    /// Peripheral static state
-    pub struct State {
-        pub started: AtomicBool,
-        pub rx_waker: AtomicWaker,
-        pub tx_waker: AtomicWaker,
-        pub stop_waker: AtomicWaker,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                started: AtomicBool::new(false),
-                rx_waker: AtomicWaker::new(),
-                tx_waker: AtomicWaker::new(),
-                stop_waker: AtomicWaker::new(),
-            }
+impl State {
+    pub(crate) const fn new() -> Self {
+        Self {
+            started: AtomicBool::new(false),
+            rx_waker: AtomicWaker::new(),
+            tx_waker: AtomicWaker::new(),
+            stop_waker: AtomicWaker::new(),
         }
     }
-
-    pub trait Instance {
-        fn regs() -> &'static crate::pac::i2s::RegisterBlock;
-        fn state() -> &'static State;
-    }
+}
+
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static crate::pac::i2s::RegisterBlock;
+    fn state() -> &'static State;
 }
 
 /// I2S peripheral instance.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static + Send {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_i2s {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::i2s::sealed::Instance for peripherals::$type {
+        impl crate::i2s::SealedInstance for peripherals::$type {
             fn regs() -> &'static crate::pac::i2s::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
-            fn state() -> &'static crate::i2s::sealed::State {
-                static STATE: crate::i2s::sealed::State = crate::i2s::sealed::State::new();
+            fn state() -> &'static crate::i2s::State {
+                static STATE: crate::i2s::State = crate::i2s::State::new();
                 &STATE
             }
         }
diff --git a/embassy-nrf/src/pdm.rs b/embassy-nrf/src/pdm.rs
index 754d38310..ef2662c85 100644
--- a/embassy-nrf/src/pdm.rs
+++ b/embassy-nrf/src/pdm.rs
@@ -9,11 +9,11 @@ use core::task::Poll;
 
 use embassy_hal_internal::drop::OnDrop;
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
 use fixed::types::I7F1;
 
 use crate::chip::EASY_DMA_SIZE;
-use crate::gpio::sealed::Pin;
-use crate::gpio::{AnyPin, Pin as GpioPin};
+use crate::gpio::{AnyPin, Pin as GpioPin, SealedPin};
 use crate::interrupt::typelevel::Interrupt;
 use crate::pac::pdm::mode::{EDGE_A, OPERATION_A};
 pub use crate::pac::pdm::pdmclkctrl::FREQ_A as Frequency;
@@ -451,42 +451,39 @@ impl<'d, T: Instance> Drop for Pdm<'d, T> {
     }
 }
 
-pub(crate) mod sealed {
-    use embassy_sync::waitqueue::AtomicWaker;
+/// Peripheral static state
+pub(crate) struct State {
+    waker: AtomicWaker,
+}
 
-    /// Peripheral static state
-    pub struct State {
-        pub waker: AtomicWaker,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                waker: AtomicWaker::new(),
-            }
+impl State {
+    pub(crate) const fn new() -> Self {
+        Self {
+            waker: AtomicWaker::new(),
         }
     }
-
-    pub trait Instance {
-        fn regs() -> &'static crate::pac::pdm::RegisterBlock;
-        fn state() -> &'static State;
-    }
+}
+
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static crate::pac::pdm::RegisterBlock;
+    fn state() -> &'static State;
 }
 
 /// PDM peripheral instance
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static + Send {
     /// Interrupt for this peripheral
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_pdm {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::pdm::sealed::Instance for peripherals::$type {
+        impl crate::pdm::SealedInstance for peripherals::$type {
             fn regs() -> &'static crate::pac::pdm::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
-            fn state() -> &'static crate::pdm::sealed::State {
-                static STATE: crate::pdm::sealed::State = crate::pdm::sealed::State::new();
+            fn state() -> &'static crate::pdm::State {
+                static STATE: crate::pdm::State = crate::pdm::State::new();
                 &STATE
             }
         }
diff --git a/embassy-nrf/src/ppi/mod.rs b/embassy-nrf/src/ppi/mod.rs
index f5764b8b7..13f7dcc83 100644
--- a/embassy-nrf/src/ppi/mod.rs
+++ b/embassy-nrf/src/ppi/mod.rs
@@ -210,13 +210,12 @@ unsafe impl Send for Event<'_> {}
 // ======================
 //       traits
 
-pub(crate) mod sealed {
-    pub trait Channel {}
-    pub trait Group {}
-}
+pub(crate) trait SealedChannel {}
+pub(crate) trait SealedGroup {}
 
 /// Interface for PPI channels.
-pub trait Channel: sealed::Channel + Peripheral<P = Self> + Sized + 'static {
+#[allow(private_bounds)]
+pub trait Channel: SealedChannel + Peripheral<P = Self> + Sized + 'static {
     /// Returns the number of the channel
     fn number(&self) -> usize;
 }
@@ -234,7 +233,8 @@ pub trait StaticChannel: Channel + Into<AnyStaticChannel> {
 }
 
 /// Interface for a group of PPI channels.
-pub trait Group: sealed::Group + Peripheral<P = Self> + Into<AnyGroup> + Sized + 'static {
+#[allow(private_bounds)]
+pub trait Group: SealedGroup + Peripheral<P = Self> + Into<AnyGroup> + Sized + 'static {
     /// Returns the number of the group.
     fn number(&self) -> usize;
     /// Convert into a type erased group.
@@ -254,7 +254,7 @@ pub struct AnyStaticChannel {
     pub(crate) number: u8,
 }
 impl_peripheral!(AnyStaticChannel);
-impl sealed::Channel for AnyStaticChannel {}
+impl SealedChannel for AnyStaticChannel {}
 impl Channel for AnyStaticChannel {
     fn number(&self) -> usize {
         self.number as usize
@@ -272,7 +272,7 @@ pub struct AnyConfigurableChannel {
     pub(crate) number: u8,
 }
 impl_peripheral!(AnyConfigurableChannel);
-impl sealed::Channel for AnyConfigurableChannel {}
+impl SealedChannel for AnyConfigurableChannel {}
 impl Channel for AnyConfigurableChannel {
     fn number(&self) -> usize {
         self.number as usize
@@ -287,7 +287,7 @@ impl ConfigurableChannel for AnyConfigurableChannel {
 #[cfg(not(feature = "nrf51"))]
 macro_rules! impl_ppi_channel {
     ($type:ident, $number:expr) => {
-        impl crate::ppi::sealed::Channel for peripherals::$type {}
+        impl crate::ppi::SealedChannel for peripherals::$type {}
         impl crate::ppi::Channel for peripherals::$type {
             fn number(&self) -> usize {
                 $number
@@ -338,7 +338,7 @@ pub struct AnyGroup {
     number: u8,
 }
 impl_peripheral!(AnyGroup);
-impl sealed::Group for AnyGroup {}
+impl SealedGroup for AnyGroup {}
 impl Group for AnyGroup {
     fn number(&self) -> usize {
         self.number as usize
@@ -347,7 +347,7 @@ impl Group for AnyGroup {
 
 macro_rules! impl_group {
     ($type:ident, $number:expr) => {
-        impl sealed::Group for peripherals::$type {}
+        impl SealedGroup for peripherals::$type {}
         impl Group for peripherals::$type {
             fn number(&self) -> usize {
                 $number
diff --git a/embassy-nrf/src/pwm.rs b/embassy-nrf/src/pwm.rs
index 833370d4b..1318d3f94 100644
--- a/embassy-nrf/src/pwm.rs
+++ b/embassy-nrf/src/pwm.rs
@@ -6,8 +6,7 @@ use core::sync::atomic::{compiler_fence, Ordering};
 
 use embassy_hal_internal::{into_ref, PeripheralRef};
 
-use crate::gpio::sealed::Pin as _;
-use crate::gpio::{AnyPin, Pin as GpioPin, PselBits};
+use crate::gpio::{AnyPin, Pin as GpioPin, PselBits, SealedPin as _};
 use crate::ppi::{Event, Task};
 use crate::util::slice_in_ram_or;
 use crate::{interrupt, pac, Peripheral};
@@ -847,23 +846,20 @@ impl<'a, T: Instance> Drop for SimplePwm<'a, T> {
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
-
-    pub trait Instance {
-        fn regs() -> &'static pac::pwm0::RegisterBlock;
-    }
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static pac::pwm0::RegisterBlock;
 }
 
 /// PWM peripheral instance.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_pwm {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::pwm::sealed::Instance for peripherals::$type {
+        impl crate::pwm::SealedInstance for peripherals::$type {
             fn regs() -> &'static pac::pwm0::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
diff --git a/embassy-nrf/src/qdec.rs b/embassy-nrf/src/qdec.rs
index 9455ec925..7409c9b1e 100644
--- a/embassy-nrf/src/qdec.rs
+++ b/embassy-nrf/src/qdec.rs
@@ -7,9 +7,9 @@ use core::marker::PhantomData;
 use core::task::Poll;
 
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
 
-use crate::gpio::sealed::Pin as _;
-use crate::gpio::{AnyPin, Pin as GpioPin};
+use crate::gpio::{AnyPin, Pin as GpioPin, SealedPin as _};
 use crate::interrupt::typelevel::Interrupt;
 use crate::{interrupt, Peripheral};
 
@@ -245,42 +245,39 @@ pub enum LedPolarity {
     ActiveLow,
 }
 
-pub(crate) mod sealed {
-    use embassy_sync::waitqueue::AtomicWaker;
+/// Peripheral static state
+pub(crate) struct State {
+    waker: AtomicWaker,
+}
 
-    /// Peripheral static state
-    pub struct State {
-        pub waker: AtomicWaker,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                waker: AtomicWaker::new(),
-            }
+impl State {
+    pub(crate) const fn new() -> Self {
+        Self {
+            waker: AtomicWaker::new(),
         }
     }
-
-    pub trait Instance {
-        fn regs() -> &'static crate::pac::qdec::RegisterBlock;
-        fn state() -> &'static State;
-    }
+}
+
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static crate::pac::qdec::RegisterBlock;
+    fn state() -> &'static State;
 }
 
 /// qdec peripheral instance.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static + Send {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_qdec {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::qdec::sealed::Instance for peripherals::$type {
+        impl crate::qdec::SealedInstance for peripherals::$type {
             fn regs() -> &'static crate::pac::qdec::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
-            fn state() -> &'static crate::qdec::sealed::State {
-                static STATE: crate::qdec::sealed::State = crate::qdec::sealed::State::new();
+            fn state() -> &'static crate::qdec::State {
+                static STATE: crate::qdec::State = crate::qdec::State::new();
                 &STATE
             }
         }
diff --git a/embassy-nrf/src/qspi.rs b/embassy-nrf/src/qspi.rs
index 4134a4c87..060fe72cd 100755
--- a/embassy-nrf/src/qspi.rs
+++ b/embassy-nrf/src/qspi.rs
@@ -9,6 +9,7 @@ use core::task::Poll;
 
 use embassy_hal_internal::drop::OnDrop;
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
 use embedded_storage::nor_flash::{ErrorType, NorFlash, NorFlashError, NorFlashErrorKind, ReadNorFlash};
 
 use crate::gpio::{self, Pin as GpioPin};
@@ -652,42 +653,39 @@ mod _eh1 {
     impl<'d, T: Instance> embedded_storage_async::nor_flash::MultiwriteNorFlash for Qspi<'d, T> {}
 }
 
-pub(crate) mod sealed {
-    use embassy_sync::waitqueue::AtomicWaker;
+/// Peripheral static state
+pub(crate) struct State {
+    waker: AtomicWaker,
+}
 
-    /// Peripheral static state
-    pub struct State {
-        pub waker: AtomicWaker,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                waker: AtomicWaker::new(),
-            }
+impl State {
+    pub(crate) const fn new() -> Self {
+        Self {
+            waker: AtomicWaker::new(),
         }
     }
-
-    pub trait Instance {
-        fn regs() -> &'static crate::pac::qspi::RegisterBlock;
-        fn state() -> &'static State;
-    }
+}
+
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static crate::pac::qspi::RegisterBlock;
+    fn state() -> &'static State;
 }
 
 /// QSPI peripheral instance.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static + Send {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_qspi {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::qspi::sealed::Instance for peripherals::$type {
+        impl crate::qspi::SealedInstance for peripherals::$type {
             fn regs() -> &'static crate::pac::qspi::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
-            fn state() -> &'static crate::qspi::sealed::State {
-                static STATE: crate::qspi::sealed::State = crate::qspi::sealed::State::new();
+            fn state() -> &'static crate::qspi::State {
+                static STATE: crate::qspi::State = crate::qspi::State::new();
                 &STATE
             }
         }
diff --git a/embassy-nrf/src/radio/mod.rs b/embassy-nrf/src/radio/mod.rs
index 4c0cc3280..8edca1df2 100644
--- a/embassy-nrf/src/radio/mod.rs
+++ b/embassy-nrf/src/radio/mod.rs
@@ -19,6 +19,7 @@ pub mod ieee802154;
 
 use core::marker::PhantomData;
 
+use embassy_sync::waitqueue::AtomicWaker;
 use pac::radio::state::STATE_A as RadioState;
 pub use pac::radio::txpower::TXPOWER_A as TxPower;
 
@@ -56,36 +57,32 @@ impl<T: Instance> interrupt::typelevel::Handler<T::Interrupt> for InterruptHandl
     }
 }
 
-pub(crate) mod sealed {
-    use embassy_sync::waitqueue::AtomicWaker;
-
-    pub struct State {
-        /// end packet transmission or reception
-        pub event_waker: AtomicWaker,
-    }
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                event_waker: AtomicWaker::new(),
-            }
+pub(crate) struct State {
+    /// end packet transmission or reception
+    event_waker: AtomicWaker,
+}
+impl State {
+    pub(crate) const fn new() -> Self {
+        Self {
+            event_waker: AtomicWaker::new(),
         }
     }
+}
 
-    pub trait Instance {
-        fn regs() -> &'static crate::pac::radio::RegisterBlock;
-        fn state() -> &'static State;
-    }
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static crate::pac::radio::RegisterBlock;
+    fn state() -> &'static State;
 }
 
 macro_rules! impl_radio {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::radio::sealed::Instance for peripherals::$type {
+        impl crate::radio::SealedInstance for peripherals::$type {
             fn regs() -> &'static pac::radio::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
 
-            fn state() -> &'static crate::radio::sealed::State {
-                static STATE: crate::radio::sealed::State = crate::radio::sealed::State::new();
+            fn state() -> &'static crate::radio::State {
+                static STATE: crate::radio::State = crate::radio::State::new();
                 &STATE
             }
         }
@@ -96,7 +93,8 @@ macro_rules! impl_radio {
 }
 
 /// Radio peripheral instance.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static + Send {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
diff --git a/embassy-nrf/src/rng.rs b/embassy-nrf/src/rng.rs
index 1c463fb7c..ff61e08f3 100644
--- a/embassy-nrf/src/rng.rs
+++ b/embassy-nrf/src/rng.rs
@@ -2,13 +2,16 @@
 
 #![macro_use]
 
+use core::cell::{RefCell, RefMut};
 use core::future::poll_fn;
 use core::marker::PhantomData;
 use core::ptr;
 use core::task::Poll;
 
+use critical_section::{CriticalSection, Mutex};
 use embassy_hal_internal::drop::OnDrop;
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::WakerRegistration;
 
 use crate::interrupt::typelevel::Interrupt;
 use crate::{interrupt, Peripheral};
@@ -205,73 +208,61 @@ impl<'d, T: Instance> rand_core::RngCore for Rng<'d, T> {
 
 impl<'d, T: Instance> rand_core::CryptoRng for Rng<'d, T> {}
 
-pub(crate) mod sealed {
-    use core::cell::{Ref, RefCell, RefMut};
+/// Peripheral static state
+pub(crate) struct State {
+    inner: Mutex<RefCell<InnerState>>,
+}
 
-    use critical_section::{CriticalSection, Mutex};
-    use embassy_sync::waitqueue::WakerRegistration;
+struct InnerState {
+    ptr: *mut u8,
+    end: *mut u8,
+    waker: WakerRegistration,
+}
 
-    use super::*;
+unsafe impl Send for InnerState {}
 
-    /// Peripheral static state
-    pub struct State {
-        inner: Mutex<RefCell<InnerState>>,
-    }
-
-    pub struct InnerState {
-        pub ptr: *mut u8,
-        pub end: *mut u8,
-        pub waker: WakerRegistration,
-    }
-
-    unsafe impl Send for InnerState {}
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                inner: Mutex::new(RefCell::new(InnerState::new())),
-            }
-        }
-
-        pub fn borrow<'cs>(&'cs self, cs: CriticalSection<'cs>) -> Ref<'cs, InnerState> {
-            self.inner.borrow(cs).borrow()
-        }
-
-        pub fn borrow_mut<'cs>(&'cs self, cs: CriticalSection<'cs>) -> RefMut<'cs, InnerState> {
-            self.inner.borrow(cs).borrow_mut()
+impl State {
+    pub(crate) const fn new() -> Self {
+        Self {
+            inner: Mutex::new(RefCell::new(InnerState::new())),
         }
     }
 
-    impl InnerState {
-        pub const fn new() -> Self {
-            Self {
-                ptr: ptr::null_mut(),
-                end: ptr::null_mut(),
-                waker: WakerRegistration::new(),
-            }
-        }
-    }
-
-    pub trait Instance {
-        fn regs() -> &'static crate::pac::rng::RegisterBlock;
-        fn state() -> &'static State;
+    fn borrow_mut<'cs>(&'cs self, cs: CriticalSection<'cs>) -> RefMut<'cs, InnerState> {
+        self.inner.borrow(cs).borrow_mut()
     }
 }
 
+impl InnerState {
+    const fn new() -> Self {
+        Self {
+            ptr: ptr::null_mut(),
+            end: ptr::null_mut(),
+            waker: WakerRegistration::new(),
+        }
+    }
+}
+
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static crate::pac::rng::RegisterBlock;
+    fn state() -> &'static State;
+}
+
 /// RNG peripheral instance.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static + Send {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_rng {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::rng::sealed::Instance for peripherals::$type {
+        impl crate::rng::SealedInstance for peripherals::$type {
             fn regs() -> &'static crate::pac::rng::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
-            fn state() -> &'static crate::rng::sealed::State {
-                static STATE: crate::rng::sealed::State = crate::rng::sealed::State::new();
+            fn state() -> &'static crate::rng::State {
+                static STATE: crate::rng::State = crate::rng::State::new();
                 &STATE
             }
         }
diff --git a/embassy-nrf/src/saadc.rs b/embassy-nrf/src/saadc.rs
index 662b05614..17c65fa3e 100644
--- a/embassy-nrf/src/saadc.rs
+++ b/embassy-nrf/src/saadc.rs
@@ -16,7 +16,6 @@ pub(crate) use saadc::ch::pselp::PSELP_A as InputChannel;
 use saadc::oversample::OVERSAMPLE_A;
 use saadc::resolution::VAL_A;
 
-use self::sealed::Input as _;
 use crate::interrupt::InterruptExt;
 use crate::ppi::{ConfigurableChannel, Event, Ppi, Task};
 use crate::timer::{Frequency, Instance as TimerInstance, Timer};
@@ -662,16 +661,13 @@ pub enum Resolution {
     _14BIT = 3,
 }
 
-pub(crate) mod sealed {
-    use super::*;
-
-    pub trait Input {
-        fn channel(&self) -> InputChannel;
-    }
+pub(crate) trait SealedInput {
+    fn channel(&self) -> InputChannel;
 }
 
 /// An input that can be used as either or negative end of a ADC differential in the SAADC periperhal.
-pub trait Input: sealed::Input + Into<AnyInput> + Peripheral<P = Self> + Sized + 'static {
+#[allow(private_bounds)]
+pub trait Input: SealedInput + Into<AnyInput> + Peripheral<P = Self> + Sized + 'static {
     /// Convert this SAADC input to a type-erased `AnyInput`.
     ///
     /// This allows using several inputs  in situations that might require
@@ -693,7 +689,7 @@ pub struct AnyInput {
 
 impl_peripheral!(AnyInput);
 
-impl sealed::Input for AnyInput {
+impl SealedInput for AnyInput {
     fn channel(&self) -> InputChannel {
         self.channel
     }
@@ -706,7 +702,7 @@ macro_rules! impl_saadc_input {
         impl_saadc_input!(@local, crate::peripherals::$pin, $ch);
     };
     (@local, $pin:ty, $ch:ident) => {
-        impl crate::saadc::sealed::Input for $pin {
+        impl crate::saadc::SealedInput for $pin {
             fn channel(&self) -> crate::saadc::InputChannel {
                 crate::saadc::InputChannel::$ch
             }
diff --git a/embassy-nrf/src/spim.rs b/embassy-nrf/src/spim.rs
index c45d45e68..373f22642 100644
--- a/embassy-nrf/src/spim.rs
+++ b/embassy-nrf/src/spim.rs
@@ -4,18 +4,20 @@
 
 use core::future::poll_fn;
 use core::marker::PhantomData;
+#[cfg(feature = "_nrf52832_anomaly_109")]
+use core::sync::atomic::AtomicU8;
 use core::sync::atomic::{compiler_fence, Ordering};
 use core::task::Poll;
 
 use embassy_embedded_hal::SetConfig;
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
 pub use embedded_hal_02::spi::{Mode, Phase, Polarity, MODE_0, MODE_1, MODE_2, MODE_3};
 pub use pac::spim0::config::ORDER_A as BitOrder;
 pub use pac::spim0::frequency::FREQUENCY_A as Frequency;
 
 use crate::chip::{EASY_DMA_SIZE, FORCE_COPY_BUFFER_SIZE};
-use crate::gpio::sealed::Pin as _;
-use crate::gpio::{self, convert_drive, AnyPin, OutputDrive, Pin as GpioPin, PselBits};
+use crate::gpio::{self, convert_drive, AnyPin, OutputDrive, Pin as GpioPin, PselBits, SealedPin as _};
 use crate::interrupt::typelevel::Interrupt;
 use crate::util::{slice_in_ram_or, slice_ptr_len, slice_ptr_parts, slice_ptr_parts_mut};
 use crate::{interrupt, pac, Peripheral};
@@ -487,54 +489,46 @@ impl<'d, T: Instance> Drop for Spim<'d, T> {
     }
 }
 
-pub(crate) mod sealed {
+pub(crate) struct State {
+    waker: AtomicWaker,
     #[cfg(feature = "_nrf52832_anomaly_109")]
-    use core::sync::atomic::AtomicU8;
+    rx: AtomicU8,
+    #[cfg(feature = "_nrf52832_anomaly_109")]
+    tx: AtomicU8,
+}
 
-    use embassy_sync::waitqueue::AtomicWaker;
-
-    use super::*;
-
-    pub struct State {
-        pub waker: AtomicWaker,
-        #[cfg(feature = "_nrf52832_anomaly_109")]
-        pub rx: AtomicU8,
-        #[cfg(feature = "_nrf52832_anomaly_109")]
-        pub tx: AtomicU8,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                waker: AtomicWaker::new(),
-                #[cfg(feature = "_nrf52832_anomaly_109")]
-                rx: AtomicU8::new(0),
-                #[cfg(feature = "_nrf52832_anomaly_109")]
-                tx: AtomicU8::new(0),
-            }
+impl State {
+    pub(crate) const fn new() -> Self {
+        Self {
+            waker: AtomicWaker::new(),
+            #[cfg(feature = "_nrf52832_anomaly_109")]
+            rx: AtomicU8::new(0),
+            #[cfg(feature = "_nrf52832_anomaly_109")]
+            tx: AtomicU8::new(0),
         }
     }
-
-    pub trait Instance {
-        fn regs() -> &'static pac::spim0::RegisterBlock;
-        fn state() -> &'static State;
-    }
+}
+
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static pac::spim0::RegisterBlock;
+    fn state() -> &'static State;
 }
 
 /// SPIM peripheral instance
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_spim {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::spim::sealed::Instance for peripherals::$type {
+        impl crate::spim::SealedInstance for peripherals::$type {
             fn regs() -> &'static pac::spim0::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
-            fn state() -> &'static crate::spim::sealed::State {
-                static STATE: crate::spim::sealed::State = crate::spim::sealed::State::new();
+            fn state() -> &'static crate::spim::State {
+                static STATE: crate::spim::State = crate::spim::State::new();
                 &STATE
             }
         }
diff --git a/embassy-nrf/src/spis.rs b/embassy-nrf/src/spis.rs
index 772ca40cc..47bbeaf77 100644
--- a/embassy-nrf/src/spis.rs
+++ b/embassy-nrf/src/spis.rs
@@ -8,12 +8,12 @@ use core::task::Poll;
 
 use embassy_embedded_hal::SetConfig;
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
 pub use embedded_hal_02::spi::{Mode, Phase, Polarity, MODE_0, MODE_1, MODE_2, MODE_3};
 pub use pac::spis0::config::ORDER_A as BitOrder;
 
 use crate::chip::{EASY_DMA_SIZE, FORCE_COPY_BUFFER_SIZE};
-use crate::gpio::sealed::Pin as _;
-use crate::gpio::{self, AnyPin, Pin as GpioPin};
+use crate::gpio::{self, AnyPin, Pin as GpioPin, SealedPin as _};
 use crate::interrupt::typelevel::Interrupt;
 use crate::util::{slice_in_ram_or, slice_ptr_parts, slice_ptr_parts_mut};
 use crate::{interrupt, pac, Peripheral};
@@ -456,43 +456,38 @@ impl<'d, T: Instance> Drop for Spis<'d, T> {
     }
 }
 
-pub(crate) mod sealed {
-    use embassy_sync::waitqueue::AtomicWaker;
+pub(crate) struct State {
+    waker: AtomicWaker,
+}
 
-    use super::*;
-
-    pub struct State {
-        pub waker: AtomicWaker,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                waker: AtomicWaker::new(),
-            }
+impl State {
+    pub(crate) const fn new() -> Self {
+        Self {
+            waker: AtomicWaker::new(),
         }
     }
-
-    pub trait Instance {
-        fn regs() -> &'static pac::spis0::RegisterBlock;
-        fn state() -> &'static State;
-    }
+}
+
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static pac::spis0::RegisterBlock;
+    fn state() -> &'static State;
 }
 
 /// SPIS peripheral instance
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_spis {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::spis::sealed::Instance for peripherals::$type {
+        impl crate::spis::SealedInstance for peripherals::$type {
             fn regs() -> &'static pac::spis0::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
-            fn state() -> &'static crate::spis::sealed::State {
-                static STATE: crate::spis::sealed::State = crate::spis::sealed::State::new();
+            fn state() -> &'static crate::spis::State {
+                static STATE: crate::spis::State = crate::spis::State::new();
                 &STATE
             }
         }
diff --git a/embassy-nrf/src/timer.rs b/embassy-nrf/src/timer.rs
index 2970ad3f2..ac5328ded 100644
--- a/embassy-nrf/src/timer.rs
+++ b/embassy-nrf/src/timer.rs
@@ -11,30 +11,25 @@ use embassy_hal_internal::{into_ref, PeripheralRef};
 use crate::ppi::{Event, Task};
 use crate::{pac, Peripheral};
 
-pub(crate) mod sealed {
-
-    use super::*;
-
-    pub trait Instance {
-        /// The number of CC registers this instance has.
-        const CCS: usize;
-        fn regs() -> &'static pac::timer0::RegisterBlock;
-    }
-    pub trait ExtendedInstance {}
+pub(crate) trait SealedInstance {
+    /// The number of CC registers this instance has.
+    const CCS: usize;
+    fn regs() -> &'static pac::timer0::RegisterBlock;
 }
 
 /// Basic Timer instance.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static + Send {
     /// Interrupt for this peripheral.
     type Interrupt: crate::interrupt::typelevel::Interrupt;
 }
 
 /// Extended timer instance.
-pub trait ExtendedInstance: Instance + sealed::ExtendedInstance {}
+pub trait ExtendedInstance: Instance {}
 
 macro_rules! impl_timer {
     ($type:ident, $pac_type:ident, $irq:ident, $ccs:literal) => {
-        impl crate::timer::sealed::Instance for peripherals::$type {
+        impl crate::timer::SealedInstance for peripherals::$type {
             const CCS: usize = $ccs;
             fn regs() -> &'static pac::timer0::RegisterBlock {
                 unsafe { &*(pac::$pac_type::ptr() as *const pac::timer0::RegisterBlock) }
@@ -49,7 +44,6 @@ macro_rules! impl_timer {
     };
     ($type:ident, $pac_type:ident, $irq:ident, extended) => {
         impl_timer!($type, $pac_type, $irq, 6);
-        impl crate::timer::sealed::ExtendedInstance for peripherals::$type {}
         impl crate::timer::ExtendedInstance for peripherals::$type {}
     };
 }
diff --git a/embassy-nrf/src/twim.rs b/embassy-nrf/src/twim.rs
index 24810a08c..c64743ecc 100644
--- a/embassy-nrf/src/twim.rs
+++ b/embassy-nrf/src/twim.rs
@@ -727,41 +727,38 @@ impl<'a, T: Instance> Drop for Twim<'a, T> {
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
+pub(crate) struct State {
+    end_waker: AtomicWaker,
+}
 
-    pub struct State {
-        pub end_waker: AtomicWaker,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                end_waker: AtomicWaker::new(),
-            }
+impl State {
+    pub(crate) const fn new() -> Self {
+        Self {
+            end_waker: AtomicWaker::new(),
         }
     }
-
-    pub trait Instance {
-        fn regs() -> &'static pac::twim0::RegisterBlock;
-        fn state() -> &'static State;
-    }
+}
+
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static pac::twim0::RegisterBlock;
+    fn state() -> &'static State;
 }
 
 /// TWIM peripheral instance.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_twim {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::twim::sealed::Instance for peripherals::$type {
+        impl crate::twim::SealedInstance for peripherals::$type {
             fn regs() -> &'static pac::twim0::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
-            fn state() -> &'static crate::twim::sealed::State {
-                static STATE: crate::twim::sealed::State = crate::twim::sealed::State::new();
+            fn state() -> &'static crate::twim::State {
+                static STATE: crate::twim::State = crate::twim::State::new();
                 &STATE
             }
         }
diff --git a/embassy-nrf/src/twis.rs b/embassy-nrf/src/twis.rs
index 415150447..f3eab008f 100644
--- a/embassy-nrf/src/twis.rs
+++ b/embassy-nrf/src/twis.rs
@@ -754,41 +754,38 @@ impl<'a, T: Instance> Drop for Twis<'a, T> {
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
+pub(crate) struct State {
+    waker: AtomicWaker,
+}
 
-    pub struct State {
-        pub waker: AtomicWaker,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                waker: AtomicWaker::new(),
-            }
+impl State {
+    pub(crate) const fn new() -> Self {
+        Self {
+            waker: AtomicWaker::new(),
         }
     }
-
-    pub trait Instance {
-        fn regs() -> &'static pac::twis0::RegisterBlock;
-        fn state() -> &'static State;
-    }
+}
+
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static pac::twis0::RegisterBlock;
+    fn state() -> &'static State;
 }
 
 /// TWIS peripheral instance.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_twis {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::twis::sealed::Instance for peripherals::$type {
+        impl crate::twis::SealedInstance for peripherals::$type {
             fn regs() -> &'static pac::twis0::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
-            fn state() -> &'static crate::twis::sealed::State {
-                static STATE: crate::twis::sealed::State = crate::twis::sealed::State::new();
+            fn state() -> &'static crate::twis::State {
+                static STATE: crate::twis::State = crate::twis::State::new();
                 &STATE
             }
         }
diff --git a/embassy-nrf/src/uarte.rs b/embassy-nrf/src/uarte.rs
index cbd5dccbc..fa0a773a8 100644
--- a/embassy-nrf/src/uarte.rs
+++ b/embassy-nrf/src/uarte.rs
@@ -15,18 +15,18 @@
 
 use core::future::poll_fn;
 use core::marker::PhantomData;
-use core::sync::atomic::{compiler_fence, Ordering};
+use core::sync::atomic::{compiler_fence, AtomicU8, Ordering};
 use core::task::Poll;
 
 use embassy_hal_internal::drop::OnDrop;
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
 use pac::uarte0::RegisterBlock;
 // Re-export SVD variants to allow user to directly set values.
 pub use pac::uarte0::{baudrate::BAUDRATE_A as Baudrate, config::PARITY_A as Parity};
 
 use crate::chip::{EASY_DMA_SIZE, FORCE_COPY_BUFFER_SIZE};
-use crate::gpio::sealed::Pin as _;
-use crate::gpio::{self, AnyPin, Pin as GpioPin, PselBits};
+use crate::gpio::{self, AnyPin, Pin as GpioPin, PselBits, SealedPin as _};
 use crate::interrupt::typelevel::Interrupt;
 use crate::ppi::{AnyConfigurableChannel, ConfigurableChannel, Event, Ppi, Task};
 use crate::timer::{Frequency, Instance as TimerInstance, Timer};
@@ -939,7 +939,7 @@ pub(crate) fn apply_workaround_for_enable_anomaly(r: &crate::pac::uarte0::Regist
     }
 }
 
-pub(crate) fn drop_tx_rx(r: &pac::uarte0::RegisterBlock, s: &sealed::State) {
+pub(crate) fn drop_tx_rx(r: &pac::uarte0::RegisterBlock, s: &State) {
     if s.tx_rx_refcount.fetch_sub(1, Ordering::Relaxed) == 1 {
         // Finally we can disable, and we do so for the peripheral
         // i.e. not just rx concerns.
@@ -954,49 +954,42 @@ pub(crate) fn drop_tx_rx(r: &pac::uarte0::RegisterBlock, s: &sealed::State) {
     }
 }
 
-pub(crate) mod sealed {
-    use core::sync::atomic::AtomicU8;
-
-    use embassy_sync::waitqueue::AtomicWaker;
-
-    use super::*;
-
-    pub struct State {
-        pub rx_waker: AtomicWaker,
-        pub tx_waker: AtomicWaker,
-        pub tx_rx_refcount: AtomicU8,
-    }
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                rx_waker: AtomicWaker::new(),
-                tx_waker: AtomicWaker::new(),
-                tx_rx_refcount: AtomicU8::new(0),
-            }
+pub(crate) struct State {
+    pub(crate) rx_waker: AtomicWaker,
+    pub(crate) tx_waker: AtomicWaker,
+    pub(crate) tx_rx_refcount: AtomicU8,
+}
+impl State {
+    pub(crate) const fn new() -> Self {
+        Self {
+            rx_waker: AtomicWaker::new(),
+            tx_waker: AtomicWaker::new(),
+            tx_rx_refcount: AtomicU8::new(0),
         }
     }
-
-    pub trait Instance {
-        fn regs() -> &'static pac::uarte0::RegisterBlock;
-        fn state() -> &'static State;
-        fn buffered_state() -> &'static crate::buffered_uarte::State;
-    }
+}
+
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static pac::uarte0::RegisterBlock;
+    fn state() -> &'static State;
+    fn buffered_state() -> &'static crate::buffered_uarte::State;
 }
 
 /// UARTE peripheral instance.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static + Send {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_uarte {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::uarte::sealed::Instance for peripherals::$type {
+        impl crate::uarte::SealedInstance for peripherals::$type {
             fn regs() -> &'static pac::uarte0::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
-            fn state() -> &'static crate::uarte::sealed::State {
-                static STATE: crate::uarte::sealed::State = crate::uarte::sealed::State::new();
+            fn state() -> &'static crate::uarte::State {
+                static STATE: crate::uarte::State = crate::uarte::State::new();
                 &STATE
             }
             fn buffered_state() -> &'static crate::buffered_uarte::State {
diff --git a/embassy-nrf/src/usb/mod.rs b/embassy-nrf/src/usb/mod.rs
index e26b49db3..09cf87e97 100644
--- a/embassy-nrf/src/usb/mod.rs
+++ b/embassy-nrf/src/usb/mod.rs
@@ -793,23 +793,20 @@ impl Allocator {
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
-
-    pub trait Instance {
-        fn regs() -> &'static pac::usbd::RegisterBlock;
-    }
+pub(crate) trait SealedInstance {
+    fn regs() -> &'static pac::usbd::RegisterBlock;
 }
 
 /// USB peripheral instance.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + 'static + Send {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
 macro_rules! impl_usb {
     ($type:ident, $pac_type:ident, $irq:ident) => {
-        impl crate::usb::sealed::Instance for peripherals::$type {
+        impl crate::usb::SealedInstance for peripherals::$type {
             fn regs() -> &'static pac::usbd::RegisterBlock {
                 unsafe { &*pac::$pac_type::ptr() }
             }
diff --git a/embassy-rp/src/adc.rs b/embassy-rp/src/adc.rs
index 4c01fe195..101c5b71f 100644
--- a/embassy-rp/src/adc.rs
+++ b/embassy-rp/src/adc.rs
@@ -8,8 +8,7 @@ use core::task::Poll;
 use embassy_hal_internal::{into_ref, PeripheralRef};
 use embassy_sync::waitqueue::AtomicWaker;
 
-use crate::gpio::sealed::Pin as GpioPin;
-use crate::gpio::{self, AnyPin, Pull};
+use crate::gpio::{self, AnyPin, Pull, SealedPin as GpioPin};
 use crate::interrupt::typelevel::Binding;
 use crate::interrupt::InterruptExt;
 use crate::peripherals::{ADC, ADC_TEMP_SENSOR};
@@ -334,29 +333,28 @@ impl interrupt::typelevel::Handler<interrupt::typelevel::ADC_IRQ_FIFO> for Inter
     }
 }
 
-mod sealed {
-    pub trait AdcSample: crate::dma::Word {}
-
-    pub trait AdcChannel {}
-}
+trait SealedAdcSample: crate::dma::Word {}
+trait SealedAdcChannel {}
 
 /// ADC sample.
-pub trait AdcSample: sealed::AdcSample {}
+#[allow(private_bounds)]
+pub trait AdcSample: SealedAdcSample {}
 
-impl sealed::AdcSample for u16 {}
+impl SealedAdcSample for u16 {}
 impl AdcSample for u16 {}
 
-impl sealed::AdcSample for u8 {}
+impl SealedAdcSample for u8 {}
 impl AdcSample for u8 {}
 
 /// ADC channel.
-pub trait AdcChannel: sealed::AdcChannel {}
+#[allow(private_bounds)]
+pub trait AdcChannel: SealedAdcChannel {}
 /// ADC pin.
 pub trait AdcPin: AdcChannel + gpio::Pin {}
 
 macro_rules! impl_pin {
     ($pin:ident, $channel:expr) => {
-        impl sealed::AdcChannel for peripherals::$pin {}
+        impl SealedAdcChannel for peripherals::$pin {}
         impl AdcChannel for peripherals::$pin {}
         impl AdcPin for peripherals::$pin {}
     };
@@ -367,5 +365,5 @@ impl_pin!(PIN_27, 1);
 impl_pin!(PIN_28, 2);
 impl_pin!(PIN_29, 3);
 
-impl sealed::AdcChannel for peripherals::ADC_TEMP_SENSOR {}
+impl SealedAdcChannel for peripherals::ADC_TEMP_SENSOR {}
 impl AdcChannel for peripherals::ADC_TEMP_SENSOR {}
diff --git a/embassy-rp/src/clocks.rs b/embassy-rp/src/clocks.rs
index b7f6aeac9..bedb79464 100644
--- a/embassy-rp/src/clocks.rs
+++ b/embassy-rp/src/clocks.rs
@@ -6,8 +6,7 @@ use core::sync::atomic::{AtomicU16, AtomicU32, Ordering};
 use embassy_hal_internal::{into_ref, PeripheralRef};
 use pac::clocks::vals::*;
 
-use crate::gpio::sealed::Pin;
-use crate::gpio::AnyPin;
+use crate::gpio::{AnyPin, SealedPin};
 use crate::pac::common::{Reg, RW};
 use crate::{pac, reset, Peripheral};
 
@@ -788,14 +787,14 @@ impl_gpinpin!(PIN_20, 20, 0);
 impl_gpinpin!(PIN_22, 22, 1);
 
 /// General purpose clock input driver.
-pub struct Gpin<'d, T: Pin> {
+pub struct Gpin<'d, T: GpinPin> {
     gpin: PeripheralRef<'d, AnyPin>,
     _phantom: PhantomData<T>,
 }
 
-impl<'d, T: Pin> Gpin<'d, T> {
+impl<'d, T: GpinPin> Gpin<'d, T> {
     /// Create new gpin driver.
-    pub fn new<P: GpinPin>(gpin: impl Peripheral<P = P> + 'd) -> Gpin<'d, P> {
+    pub fn new(gpin: impl Peripheral<P = T> + 'd) -> Self {
         into_ref!(gpin);
 
         gpin.gpio().ctrl().write(|w| w.set_funcsel(0x08));
@@ -811,7 +810,7 @@ impl<'d, T: Pin> Gpin<'d, T> {
     // }
 }
 
-impl<'d, T: Pin> Drop for Gpin<'d, T> {
+impl<'d, T: GpinPin> Drop for Gpin<'d, T> {
     fn drop(&mut self) {
         self.gpin
             .gpio()
diff --git a/embassy-rp/src/dma.rs b/embassy-rp/src/dma.rs
index 44aabce6b..e6374a86c 100644
--- a/embassy-rp/src/dma.rs
+++ b/embassy-rp/src/dma.rs
@@ -208,14 +208,12 @@ pub(crate) const CHANNEL_COUNT: usize = 12;
 const NEW_AW: AtomicWaker = AtomicWaker::new();
 static CHANNEL_WAKERS: [AtomicWaker; CHANNEL_COUNT] = [NEW_AW; CHANNEL_COUNT];
 
-mod sealed {
-    pub trait Channel {}
-
-    pub trait Word {}
-}
+trait SealedChannel {}
+trait SealedWord {}
 
 /// DMA channel interface.
-pub trait Channel: Peripheral<P = Self> + sealed::Channel + Into<AnyChannel> + Sized + 'static {
+#[allow(private_bounds)]
+pub trait Channel: Peripheral<P = Self> + SealedChannel + Into<AnyChannel> + Sized + 'static {
     /// Channel number.
     fn number(&self) -> u8;
 
@@ -231,26 +229,27 @@ pub trait Channel: Peripheral<P = Self> + sealed::Channel + Into<AnyChannel> + S
 }
 
 /// DMA word.
-pub trait Word: sealed::Word {
+#[allow(private_bounds)]
+pub trait Word: SealedWord {
     /// Word size.
     fn size() -> vals::DataSize;
 }
 
-impl sealed::Word for u8 {}
+impl SealedWord for u8 {}
 impl Word for u8 {
     fn size() -> vals::DataSize {
         vals::DataSize::SIZE_BYTE
     }
 }
 
-impl sealed::Word for u16 {}
+impl SealedWord for u16 {}
 impl Word for u16 {
     fn size() -> vals::DataSize {
         vals::DataSize::SIZE_HALFWORD
     }
 }
 
-impl sealed::Word for u32 {}
+impl SealedWord for u32 {}
 impl Word for u32 {
     fn size() -> vals::DataSize {
         vals::DataSize::SIZE_WORD
@@ -264,7 +263,7 @@ pub struct AnyChannel {
 
 impl_peripheral!(AnyChannel);
 
-impl sealed::Channel for AnyChannel {}
+impl SealedChannel for AnyChannel {}
 impl Channel for AnyChannel {
     fn number(&self) -> u8 {
         self.number
@@ -273,7 +272,7 @@ impl Channel for AnyChannel {
 
 macro_rules! channel {
     ($name:ident, $num:expr) => {
-        impl sealed::Channel for peripherals::$name {}
+        impl SealedChannel for peripherals::$name {}
         impl Channel for peripherals::$name {
             fn number(&self) -> u8 {
                 $num
diff --git a/embassy-rp/src/flash.rs b/embassy-rp/src/flash.rs
index 422b77400..45b385cb4 100644
--- a/embassy-rp/src/flash.rs
+++ b/embassy-rp/src/flash.rs
@@ -903,22 +903,22 @@ pub(crate) unsafe fn in_ram(operation: impl FnOnce()) -> Result<(), Error> {
     Ok(())
 }
 
-mod sealed {
-    pub trait Instance {}
-    pub trait Mode {}
-}
+trait SealedInstance {}
+trait SealedMode {}
 
 /// Flash instance.
-pub trait Instance: sealed::Instance {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance {}
 /// Flash mode.
-pub trait Mode: sealed::Mode {}
+#[allow(private_bounds)]
+pub trait Mode: SealedMode {}
 
-impl sealed::Instance for FLASH {}
+impl SealedInstance for FLASH {}
 impl Instance for FLASH {}
 
 macro_rules! impl_mode {
     ($name:ident) => {
-        impl sealed::Mode for $name {}
+        impl SealedMode for $name {}
         impl Mode for $name {}
     };
 }
diff --git a/embassy-rp/src/gpio.rs b/embassy-rp/src/gpio.rs
index a84c00a2c..ea87fd9da 100644
--- a/embassy-rp/src/gpio.rs
+++ b/embassy-rp/src/gpio.rs
@@ -8,7 +8,6 @@ use core::task::{Context, Poll};
 use embassy_hal_internal::{impl_peripheral, into_ref, PeripheralRef};
 use embassy_sync::waitqueue::AtomicWaker;
 
-use self::sealed::Pin as _;
 use crate::interrupt::InterruptExt;
 use crate::pac::common::{Reg, RW};
 use crate::pac::SIO;
@@ -802,68 +801,65 @@ impl<'w> Drop for DormantWake<'w> {
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
+pub(crate) trait SealedPin: Sized {
+    fn pin_bank(&self) -> u8;
 
-    pub trait Pin: Sized {
-        fn pin_bank(&self) -> u8;
+    #[inline]
+    fn _pin(&self) -> u8 {
+        self.pin_bank() & 0x1f
+    }
 
-        #[inline]
-        fn _pin(&self) -> u8 {
-            self.pin_bank() & 0x1f
+    #[inline]
+    fn _bank(&self) -> Bank {
+        match self.pin_bank() >> 5 {
+            #[cfg(feature = "qspi-as-gpio")]
+            1 => Bank::Qspi,
+            _ => Bank::Bank0,
         }
+    }
 
-        #[inline]
-        fn _bank(&self) -> Bank {
-            match self.pin_bank() >> 5 {
-                #[cfg(feature = "qspi-as-gpio")]
-                1 => Bank::Qspi,
-                _ => Bank::Bank0,
-            }
+    fn io(&self) -> pac::io::Io {
+        match self._bank() {
+            Bank::Bank0 => crate::pac::IO_BANK0,
+            #[cfg(feature = "qspi-as-gpio")]
+            Bank::Qspi => crate::pac::IO_QSPI,
         }
+    }
 
-        fn io(&self) -> pac::io::Io {
-            match self._bank() {
-                Bank::Bank0 => crate::pac::IO_BANK0,
-                #[cfg(feature = "qspi-as-gpio")]
-                Bank::Qspi => crate::pac::IO_QSPI,
-            }
-        }
+    fn gpio(&self) -> pac::io::Gpio {
+        self.io().gpio(self._pin() as _)
+    }
 
-        fn gpio(&self) -> pac::io::Gpio {
-            self.io().gpio(self._pin() as _)
-        }
+    fn pad_ctrl(&self) -> Reg<pac::pads::regs::GpioCtrl, RW> {
+        let block = match self._bank() {
+            Bank::Bank0 => crate::pac::PADS_BANK0,
+            #[cfg(feature = "qspi-as-gpio")]
+            Bank::Qspi => crate::pac::PADS_QSPI,
+        };
+        block.gpio(self._pin() as _)
+    }
 
-        fn pad_ctrl(&self) -> Reg<pac::pads::regs::GpioCtrl, RW> {
-            let block = match self._bank() {
-                Bank::Bank0 => crate::pac::PADS_BANK0,
-                #[cfg(feature = "qspi-as-gpio")]
-                Bank::Qspi => crate::pac::PADS_QSPI,
-            };
-            block.gpio(self._pin() as _)
-        }
+    fn sio_out(&self) -> pac::sio::Gpio {
+        SIO.gpio_out(self._bank() as _)
+    }
 
-        fn sio_out(&self) -> pac::sio::Gpio {
-            SIO.gpio_out(self._bank() as _)
-        }
+    fn sio_oe(&self) -> pac::sio::Gpio {
+        SIO.gpio_oe(self._bank() as _)
+    }
 
-        fn sio_oe(&self) -> pac::sio::Gpio {
-            SIO.gpio_oe(self._bank() as _)
-        }
+    fn sio_in(&self) -> Reg<u32, RW> {
+        SIO.gpio_in(self._bank() as _)
+    }
 
-        fn sio_in(&self) -> Reg<u32, RW> {
-            SIO.gpio_in(self._bank() as _)
-        }
-
-        fn int_proc(&self) -> pac::io::Int {
-            let proc = SIO.cpuid().read();
-            self.io().int_proc(proc as _)
-        }
+    fn int_proc(&self) -> pac::io::Int {
+        let proc = SIO.cpuid().read();
+        self.io().int_proc(proc as _)
     }
 }
 
 /// Interface for a Pin that can be configured by an [Input] or [Output] driver, or converted to an [AnyPin].
-pub trait Pin: Peripheral<P = Self> + Into<AnyPin> + sealed::Pin + Sized + 'static {
+#[allow(private_bounds)]
+pub trait Pin: Peripheral<P = Self> + Into<AnyPin> + SealedPin + Sized + 'static {
     /// Degrade to a generic pin struct
     fn degrade(self) -> AnyPin {
         AnyPin {
@@ -903,7 +899,7 @@ impl AnyPin {
 impl_peripheral!(AnyPin);
 
 impl Pin for AnyPin {}
-impl sealed::Pin for AnyPin {
+impl SealedPin for AnyPin {
     fn pin_bank(&self) -> u8 {
         self.pin_bank
     }
@@ -914,7 +910,7 @@ impl sealed::Pin for AnyPin {
 macro_rules! impl_pin {
     ($name:ident, $bank:expr, $pin_num:expr) => {
         impl Pin for peripherals::$name {}
-        impl sealed::Pin for peripherals::$name {
+        impl SealedPin for peripherals::$name {
             #[inline]
             fn pin_bank(&self) -> u8 {
                 ($bank as u8) * 32 + $pin_num
diff --git a/embassy-rp/src/i2c.rs b/embassy-rp/src/i2c.rs
index 26a819b25..256875b4a 100644
--- a/embassy-rp/src/i2c.rs
+++ b/embassy-rp/src/i2c.rs
@@ -784,34 +784,24 @@ pub fn i2c_reserved_addr(addr: u16) -> bool {
     ((addr & 0x78) == 0 || (addr & 0x78) == 0x78) && addr != 0
 }
 
-mod sealed {
-    use embassy_sync::waitqueue::AtomicWaker;
+pub(crate) trait SealedInstance {
+    const TX_DREQ: u8;
+    const RX_DREQ: u8;
 
-    use crate::interrupt;
-
-    pub trait Instance {
-        const TX_DREQ: u8;
-        const RX_DREQ: u8;
-
-        type Interrupt: interrupt::typelevel::Interrupt;
-
-        fn regs() -> crate::pac::i2c::I2c;
-        fn reset() -> crate::pac::resets::regs::Peripherals;
-        fn waker() -> &'static AtomicWaker;
-    }
-
-    pub trait Mode {}
-
-    pub trait SdaPin<T: Instance> {}
-    pub trait SclPin<T: Instance> {}
+    fn regs() -> crate::pac::i2c::I2c;
+    fn reset() -> crate::pac::resets::regs::Peripherals;
+    fn waker() -> &'static AtomicWaker;
 }
 
+trait SealedMode {}
+
 /// Driver mode.
-pub trait Mode: sealed::Mode {}
+#[allow(private_bounds)]
+pub trait Mode: SealedMode {}
 
 macro_rules! impl_mode {
     ($name:ident) => {
-        impl sealed::Mode for $name {}
+        impl SealedMode for $name {}
         impl Mode for $name {}
     };
 }
@@ -825,16 +815,18 @@ impl_mode!(Blocking);
 impl_mode!(Async);
 
 /// I2C instance.
-pub trait Instance: sealed::Instance {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance {
+    /// Interrupt for this peripheral.
+    type Interrupt: interrupt::typelevel::Interrupt;
+}
 
 macro_rules! impl_instance {
     ($type:ident, $irq:ident, $reset:ident, $tx_dreq:expr, $rx_dreq:expr) => {
-        impl sealed::Instance for peripherals::$type {
+        impl SealedInstance for peripherals::$type {
             const TX_DREQ: u8 = $tx_dreq;
             const RX_DREQ: u8 = $rx_dreq;
 
-            type Interrupt = crate::interrupt::typelevel::$irq;
-
             #[inline]
             fn regs() -> pac::i2c::I2c {
                 pac::$type
@@ -854,7 +846,9 @@ macro_rules! impl_instance {
                 &WAKER
             }
         }
-        impl Instance for peripherals::$type {}
+        impl Instance for peripherals::$type {
+            type Interrupt = crate::interrupt::typelevel::$irq;
+        }
     };
 }
 
@@ -862,13 +856,12 @@ impl_instance!(I2C0, I2C0_IRQ, set_i2c0, 32, 33);
 impl_instance!(I2C1, I2C1_IRQ, set_i2c1, 34, 35);
 
 /// SDA pin.
-pub trait SdaPin<T: Instance>: sealed::SdaPin<T> + crate::gpio::Pin {}
+pub trait SdaPin<T: Instance>: crate::gpio::Pin {}
 /// SCL pin.
-pub trait SclPin<T: Instance>: sealed::SclPin<T> + crate::gpio::Pin {}
+pub trait SclPin<T: Instance>: crate::gpio::Pin {}
 
 macro_rules! impl_pin {
     ($pin:ident, $instance:ident, $function:ident) => {
-        impl sealed::$function<peripherals::$instance> for peripherals::$pin {}
         impl $function<peripherals::$instance> for peripherals::$pin {}
     };
 }
diff --git a/embassy-rp/src/pio/mod.rs b/embassy-rp/src/pio/mod.rs
index 7eca700ba..2e5c57a26 100644
--- a/embassy-rp/src/pio/mod.rs
+++ b/embassy-rp/src/pio/mod.rs
@@ -15,8 +15,7 @@ use pac::pio::vals::SmExecctrlStatusSel;
 use pio::{Program, SideSet, Wrap};
 
 use crate::dma::{Channel, Transfer, Word};
-use crate::gpio::sealed::Pin as SealedPin;
-use crate::gpio::{self, AnyPin, Drive, Level, Pull, SlewRate};
+use crate::gpio::{self, AnyPin, Drive, Level, Pull, SealedPin, SlewRate};
 use crate::interrupt::typelevel::{Binding, Handler, Interrupt};
 use crate::pac::dma::vals::TreqSel;
 use crate::relocate::RelocatedProgram;
@@ -695,6 +694,12 @@ impl<'d, PIO: Instance + 'd, const SM: usize> StateMachine<'d, PIO, SM> {
         }
     }
 
+    /// Set the clock divider for this state machine.
+    pub fn set_clock_divider(&mut self, clock_divider: FixedU32<U8>) {
+        let sm = Self::this_sm();
+        sm.clkdiv().write(|w| w.0 = clock_divider.to_bits() << 8);
+    }
+
     #[inline(always)]
     fn this_sm() -> crate::pac::pio::StateMachine {
         PIO::PIO.sm(SM)
@@ -1148,49 +1153,47 @@ fn on_pio_drop<PIO: Instance>() {
     }
 }
 
-mod sealed {
-    use super::*;
+trait SealedInstance {
+    const PIO_NO: u8;
+    const PIO: &'static crate::pac::pio::Pio;
+    const FUNCSEL: crate::pac::io::vals::Gpio0ctrlFuncsel;
 
-    pub trait PioPin {}
+    #[inline]
+    fn wakers() -> &'static Wakers {
+        const NEW_AW: AtomicWaker = AtomicWaker::new();
+        static WAKERS: Wakers = Wakers([NEW_AW; 12]);
 
-    pub trait Instance {
-        const PIO_NO: u8;
-        const PIO: &'static crate::pac::pio::Pio;
-        const FUNCSEL: crate::pac::io::vals::Gpio0ctrlFuncsel;
-        type Interrupt: crate::interrupt::typelevel::Interrupt;
+        &WAKERS
+    }
 
-        #[inline]
-        fn wakers() -> &'static Wakers {
-            const NEW_AW: AtomicWaker = AtomicWaker::new();
-            static WAKERS: Wakers = Wakers([NEW_AW; 12]);
+    #[inline]
+    fn state() -> &'static State {
+        static STATE: State = State {
+            users: AtomicU8::new(0),
+            used_pins: AtomicU32::new(0),
+        };
 
-            &WAKERS
-        }
-
-        #[inline]
-        fn state() -> &'static State {
-            static STATE: State = State {
-                users: AtomicU8::new(0),
-                used_pins: AtomicU32::new(0),
-            };
-
-            &STATE
-        }
+        &STATE
     }
 }
 
 /// PIO instance.
-pub trait Instance: sealed::Instance + Sized + Unpin {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + Sized + Unpin {
+    /// Interrupt for this peripheral.
+    type Interrupt: crate::interrupt::typelevel::Interrupt;
+}
 
 macro_rules! impl_pio {
     ($name:ident, $pio:expr, $pac:ident, $funcsel:ident, $irq:ident) => {
-        impl sealed::Instance for peripherals::$name {
+        impl SealedInstance for peripherals::$name {
             const PIO_NO: u8 = $pio;
             const PIO: &'static pac::pio::Pio = &pac::$pac;
             const FUNCSEL: pac::io::vals::Gpio0ctrlFuncsel = pac::io::vals::Gpio0ctrlFuncsel::$funcsel;
+        }
+        impl Instance for peripherals::$name {
             type Interrupt = crate::interrupt::typelevel::$irq;
         }
-        impl Instance for peripherals::$name {}
     };
 }
 
@@ -1198,12 +1201,11 @@ impl_pio!(PIO0, 0, PIO0, PIO0_0, PIO0_IRQ_0);
 impl_pio!(PIO1, 1, PIO1, PIO1_0, PIO1_IRQ_0);
 
 /// PIO pin.
-pub trait PioPin: sealed::PioPin + gpio::Pin {}
+pub trait PioPin: gpio::Pin {}
 
 macro_rules! impl_pio_pin {
     ($( $pin:ident, )*) => {
         $(
-            impl sealed::PioPin for peripherals::$pin {}
             impl PioPin for peripherals::$pin {}
         )*
     };
diff --git a/embassy-rp/src/pwm.rs b/embassy-rp/src/pwm.rs
index 5aab3ff4f..a1f400cfb 100644
--- a/embassy-rp/src/pwm.rs
+++ b/embassy-rp/src/pwm.rs
@@ -6,8 +6,7 @@ use fixed::FixedU16;
 use pac::pwm::regs::{ChDiv, Intr};
 use pac::pwm::vals::Divmode;
 
-use crate::gpio::sealed::Pin as _;
-use crate::gpio::{AnyPin, Pin as GpioPin};
+use crate::gpio::{AnyPin, Pin as GpioPin, Pull, SealedPin as _};
 use crate::{pac, peripherals, RegExt};
 
 /// The configuration of a PWM slice.
@@ -93,6 +92,7 @@ impl<'d, T: Slice> Pwm<'d, T> {
         inner: impl Peripheral<P = T> + 'd,
         a: Option<PeripheralRef<'d, AnyPin>>,
         b: Option<PeripheralRef<'d, AnyPin>>,
+        b_pull: Pull,
         config: Config,
         divmode: Divmode,
     ) -> Self {
@@ -111,6 +111,10 @@ impl<'d, T: Slice> Pwm<'d, T> {
         }
         if let Some(pin) = &b {
             pin.gpio().ctrl().write(|w| w.set_funcsel(4));
+            pin.pad_ctrl().modify(|w| {
+                w.set_pue(b_pull == Pull::Up);
+                w.set_pde(b_pull == Pull::Down);
+            });
         }
         Self {
             inner,
@@ -122,7 +126,7 @@ impl<'d, T: Slice> Pwm<'d, T> {
     /// Create PWM driver without any configured pins.
     #[inline]
     pub fn new_free(inner: impl Peripheral<P = T> + 'd, config: Config) -> Self {
-        Self::new_inner(inner, None, None, config, Divmode::DIV)
+        Self::new_inner(inner, None, None, Pull::None, config, Divmode::DIV)
     }
 
     /// Create PWM driver with a single 'a' as output.
@@ -133,7 +137,7 @@ impl<'d, T: Slice> Pwm<'d, T> {
         config: Config,
     ) -> Self {
         into_ref!(a);
-        Self::new_inner(inner, Some(a.map_into()), None, config, Divmode::DIV)
+        Self::new_inner(inner, Some(a.map_into()), None, Pull::None, config, Divmode::DIV)
     }
 
     /// Create PWM driver with a single 'b' pin as output.
@@ -144,7 +148,7 @@ impl<'d, T: Slice> Pwm<'d, T> {
         config: Config,
     ) -> Self {
         into_ref!(b);
-        Self::new_inner(inner, None, Some(b.map_into()), config, Divmode::DIV)
+        Self::new_inner(inner, None, Some(b.map_into()), Pull::None, config, Divmode::DIV)
     }
 
     /// Create PWM driver with a 'a' and 'b' pins as output.
@@ -156,7 +160,14 @@ impl<'d, T: Slice> Pwm<'d, T> {
         config: Config,
     ) -> Self {
         into_ref!(a, b);
-        Self::new_inner(inner, Some(a.map_into()), Some(b.map_into()), config, Divmode::DIV)
+        Self::new_inner(
+            inner,
+            Some(a.map_into()),
+            Some(b.map_into()),
+            Pull::None,
+            config,
+            Divmode::DIV,
+        )
     }
 
     /// Create PWM driver with a single 'b' as input pin.
@@ -164,11 +175,12 @@ impl<'d, T: Slice> Pwm<'d, T> {
     pub fn new_input(
         inner: impl Peripheral<P = T> + 'd,
         b: impl Peripheral<P = impl ChannelBPin<T>> + 'd,
+        b_pull: Pull,
         mode: InputMode,
         config: Config,
     ) -> Self {
         into_ref!(b);
-        Self::new_inner(inner, None, Some(b.map_into()), config, mode.into())
+        Self::new_inner(inner, None, Some(b.map_into()), b_pull, config, mode.into())
     }
 
     /// Create PWM driver with a 'a' and 'b' pins in the desired input mode.
@@ -177,11 +189,19 @@ impl<'d, T: Slice> Pwm<'d, T> {
         inner: impl Peripheral<P = T> + 'd,
         a: impl Peripheral<P = impl ChannelAPin<T>> + 'd,
         b: impl Peripheral<P = impl ChannelBPin<T>> + 'd,
+        b_pull: Pull,
         mode: InputMode,
         config: Config,
     ) -> Self {
         into_ref!(a, b);
-        Self::new_inner(inner, Some(a.map_into()), Some(b.map_into()), config, mode.into())
+        Self::new_inner(
+            inner,
+            Some(a.map_into()),
+            Some(b.map_into()),
+            b_pull,
+            config,
+            mode.into(),
+        )
     }
 
     /// Set the PWM config.
@@ -300,12 +320,11 @@ impl<'d, T: Slice> Drop for Pwm<'d, T> {
     }
 }
 
-mod sealed {
-    pub trait Slice {}
-}
+trait SealedSlice {}
 
 /// PWM Slice.
-pub trait Slice: Peripheral<P = Self> + sealed::Slice + Sized + 'static {
+#[allow(private_bounds)]
+pub trait Slice: Peripheral<P = Self> + SealedSlice + Sized + 'static {
     /// Slice number.
     fn number(&self) -> u8;
 
@@ -317,7 +336,7 @@ pub trait Slice: Peripheral<P = Self> + sealed::Slice + Sized + 'static {
 
 macro_rules! slice {
     ($name:ident, $num:expr) => {
-        impl sealed::Slice for peripherals::$name {}
+        impl SealedSlice for peripherals::$name {}
         impl Slice for peripherals::$name {
             fn number(&self) -> u8 {
                 $num
diff --git a/embassy-rp/src/rtc/mod.rs b/embassy-rp/src/rtc/mod.rs
index c8691bdc2..2ce7ac645 100644
--- a/embassy-rp/src/rtc/mod.rs
+++ b/embassy-rp/src/rtc/mod.rs
@@ -188,16 +188,15 @@ pub enum RtcError {
     NotRunning,
 }
 
-mod sealed {
-    pub trait Instance {
-        fn regs(&self) -> crate::pac::rtc::Rtc;
-    }
+trait SealedInstance {
+    fn regs(&self) -> crate::pac::rtc::Rtc;
 }
 
 /// RTC peripheral instance.
-pub trait Instance: sealed::Instance {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance {}
 
-impl sealed::Instance for crate::peripherals::RTC {
+impl SealedInstance for crate::peripherals::RTC {
     fn regs(&self) -> crate::pac::rtc::Rtc {
         crate::pac::RTC
     }
diff --git a/embassy-rp/src/spi.rs b/embassy-rp/src/spi.rs
index a2a22ffe5..ef4c644ae 100644
--- a/embassy-rp/src/spi.rs
+++ b/embassy-rp/src/spi.rs
@@ -7,8 +7,7 @@ use embassy_hal_internal::{into_ref, PeripheralRef};
 pub use embedded_hal_02::spi::{Phase, Polarity};
 
 use crate::dma::{AnyChannel, Channel};
-use crate::gpio::sealed::Pin as _;
-use crate::gpio::{AnyPin, Pin as GpioPin};
+use crate::gpio::{AnyPin, Pin as GpioPin, SealedPin as _};
 use crate::{pac, peripherals, Peripheral};
 
 /// SPI errors.
@@ -443,28 +442,26 @@ impl<'d, T: Instance> Spi<'d, T, Async> {
     }
 }
 
-mod sealed {
-    use super::*;
+trait SealedMode {}
 
-    pub trait Mode {}
+trait SealedInstance {
+    const TX_DREQ: u8;
+    const RX_DREQ: u8;
 
-    pub trait Instance {
-        const TX_DREQ: u8;
-        const RX_DREQ: u8;
-
-        fn regs(&self) -> pac::spi::Spi;
-    }
+    fn regs(&self) -> pac::spi::Spi;
 }
 
 /// Mode.
-pub trait Mode: sealed::Mode {}
+#[allow(private_bounds)]
+pub trait Mode: SealedMode {}
 
 /// SPI instance trait.
-pub trait Instance: sealed::Instance {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance {}
 
 macro_rules! impl_instance {
     ($type:ident, $irq:ident, $tx_dreq:expr, $rx_dreq:expr) => {
-        impl sealed::Instance for peripherals::$type {
+        impl SealedInstance for peripherals::$type {
             const TX_DREQ: u8 = $tx_dreq;
             const RX_DREQ: u8 = $rx_dreq;
 
@@ -527,7 +524,7 @@ impl_pin!(PIN_29, SPI1, CsPin);
 
 macro_rules! impl_mode {
     ($name:ident) => {
-        impl sealed::Mode for $name {}
+        impl SealedMode for $name {}
         impl Mode for $name {}
     };
 }
diff --git a/embassy-rp/src/uart/mod.rs b/embassy-rp/src/uart/mod.rs
index 65dcf4eb4..ee2dcb27d 100644
--- a/embassy-rp/src/uart/mod.rs
+++ b/embassy-rp/src/uart/mod.rs
@@ -12,8 +12,7 @@ use pac::uart::regs::Uartris;
 
 use crate::clocks::clk_peri_freq;
 use crate::dma::{AnyChannel, Channel};
-use crate::gpio::sealed::Pin;
-use crate::gpio::AnyPin;
+use crate::gpio::{AnyPin, SealedPin};
 use crate::interrupt::typelevel::{Binding, Interrupt};
 use crate::pac::io::vals::{Inover, Outover};
 use crate::{interrupt, pac, peripherals, Peripheral, RegExt};
@@ -1107,35 +1106,26 @@ impl<'d, T: Instance, M: Mode> embedded_hal_nb::serial::Write for Uart<'d, T, M>
     }
 }
 
-mod sealed {
-    use super::*;
+trait SealedMode {}
 
-    pub trait Mode {}
+trait SealedInstance {
+    const TX_DREQ: u8;
+    const RX_DREQ: u8;
 
-    pub trait Instance {
-        const TX_DREQ: u8;
-        const RX_DREQ: u8;
+    fn regs() -> pac::uart::Uart;
 
-        type Interrupt: interrupt::typelevel::Interrupt;
+    fn buffered_state() -> &'static buffered::State;
 
-        fn regs() -> pac::uart::Uart;
-
-        fn buffered_state() -> &'static buffered::State;
-
-        fn dma_state() -> &'static DmaState;
-    }
-    pub trait TxPin<T: Instance> {}
-    pub trait RxPin<T: Instance> {}
-    pub trait CtsPin<T: Instance> {}
-    pub trait RtsPin<T: Instance> {}
+    fn dma_state() -> &'static DmaState;
 }
 
 /// UART mode.
-pub trait Mode: sealed::Mode {}
+#[allow(private_bounds)]
+pub trait Mode: SealedMode {}
 
 macro_rules! impl_mode {
     ($name:ident) => {
-        impl sealed::Mode for $name {}
+        impl SealedMode for $name {}
         impl Mode for $name {}
     };
 }
@@ -1149,16 +1139,18 @@ impl_mode!(Blocking);
 impl_mode!(Async);
 
 /// UART instance.
-pub trait Instance: sealed::Instance {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance {
+    /// Interrupt for this instance.
+    type Interrupt: interrupt::typelevel::Interrupt;
+}
 
 macro_rules! impl_instance {
     ($inst:ident, $irq:ident, $tx_dreq:expr, $rx_dreq:expr) => {
-        impl sealed::Instance for peripherals::$inst {
+        impl SealedInstance for peripherals::$inst {
             const TX_DREQ: u8 = $tx_dreq;
             const RX_DREQ: u8 = $rx_dreq;
 
-            type Interrupt = crate::interrupt::typelevel::$irq;
-
             fn regs() -> pac::uart::Uart {
                 pac::$inst
             }
@@ -1176,7 +1168,9 @@ macro_rules! impl_instance {
                 &STATE
             }
         }
-        impl Instance for peripherals::$inst {}
+        impl Instance for peripherals::$inst {
+            type Interrupt = crate::interrupt::typelevel::$irq;
+        }
     };
 }
 
@@ -1184,17 +1178,16 @@ impl_instance!(UART0, UART0_IRQ, 20, 21);
 impl_instance!(UART1, UART1_IRQ, 22, 23);
 
 /// Trait for TX pins.
-pub trait TxPin<T: Instance>: sealed::TxPin<T> + crate::gpio::Pin {}
+pub trait TxPin<T: Instance>: crate::gpio::Pin {}
 /// Trait for RX pins.
-pub trait RxPin<T: Instance>: sealed::RxPin<T> + crate::gpio::Pin {}
+pub trait RxPin<T: Instance>: crate::gpio::Pin {}
 /// Trait for Clear To Send (CTS) pins.
-pub trait CtsPin<T: Instance>: sealed::CtsPin<T> + crate::gpio::Pin {}
+pub trait CtsPin<T: Instance>: crate::gpio::Pin {}
 /// Trait for Request To Send (RTS) pins.
-pub trait RtsPin<T: Instance>: sealed::RtsPin<T> + crate::gpio::Pin {}
+pub trait RtsPin<T: Instance>: crate::gpio::Pin {}
 
 macro_rules! impl_pin {
     ($pin:ident, $instance:ident, $function:ident) => {
-        impl sealed::$function<peripherals::$instance> for peripherals::$pin {}
         impl $function<peripherals::$instance> for peripherals::$pin {}
     };
 }
diff --git a/embassy-rp/src/usb.rs b/embassy-rp/src/usb.rs
index d68dee4a3..37d37d6d9 100644
--- a/embassy-rp/src/usb.rs
+++ b/embassy-rp/src/usb.rs
@@ -14,20 +14,19 @@ use embassy_usb_driver::{
 use crate::interrupt::typelevel::{Binding, Interrupt};
 use crate::{interrupt, pac, peripherals, Peripheral, RegExt};
 
-pub(crate) mod sealed {
-    pub trait Instance {
-        fn regs() -> crate::pac::usb::Usb;
-        fn dpram() -> crate::pac::usb_dpram::UsbDpram;
-    }
+trait SealedInstance {
+    fn regs() -> crate::pac::usb::Usb;
+    fn dpram() -> crate::pac::usb_dpram::UsbDpram;
 }
 
 /// USB peripheral instance.
-pub trait Instance: sealed::Instance + 'static {
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + 'static {
     /// Interrupt for this peripheral.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
 
-impl crate::usb::sealed::Instance for peripherals::USB {
+impl crate::usb::SealedInstance for peripherals::USB {
     fn regs() -> pac::usb::Usb {
         pac::USBCTRL_REGS
     }
diff --git a/embassy-stm32/Cargo.toml b/embassy-stm32/Cargo.toml
index 7c6312f6c..89b24f0eb 100644
--- a/embassy-stm32/Cargo.toml
+++ b/embassy-stm32/Cargo.toml
@@ -70,7 +70,8 @@ rand_core = "0.6.3"
 sdio-host = "0.5.0"
 critical-section = "1.1"
 #stm32-metapac = { version = "15" }
-stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-f84633553331c2d154ee72de779a40cbb10fd1bd" }
+stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-2b7ec569a5510c324693f0515ac8ea20b12917a9" }
+
 vcell = "0.1.3"
 nb = "1.0.0"
 stm32-fmc = "0.3.0"
@@ -93,9 +94,9 @@ critical-section = { version = "1.1", features = ["std"] }
 [build-dependencies]
 proc-macro2 = "1.0.36"
 quote = "1.0.15"
-#stm32-metapac = { version = "15", default-features = false, features = ["metadata"]}
-stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-f84633553331c2d154ee72de779a40cbb10fd1bd", default-features = false, features = ["metadata"]}
 
+#stm32-metapac = { version = "15", default-features = false, features = ["metadata"]}
+stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-2b7ec569a5510c324693f0515ac8ea20b12917a9", default-features = false, features = ["metadata"]}
 
 [features]
 default = ["rt"]
diff --git a/embassy-stm32/build.rs b/embassy-stm32/build.rs
index 15bb8ea62..38b6c480c 100644
--- a/embassy-stm32/build.rs
+++ b/embassy-stm32/build.rs
@@ -484,7 +484,7 @@ fn main() {
                 let expr = if let Some(mux) = self.chained_muxes.get(&v.name) {
                     self.gen_mux(mux)
                 } else {
-                    self.gen_clock(&v.name)
+                    self.gen_clock(v.name)
                 };
                 match_arms.extend(quote! {
                     crate::pac::rcc::vals::#enum_name::#variant_name => #expr,
@@ -1006,7 +1006,19 @@ fn main() {
         (("quadspi", "BK2_IO3"), quote!(crate::qspi::BK2D3Pin)),
         (("quadspi", "BK2_NCS"), quote!(crate::qspi::BK2NSSPin)),
         (("quadspi", "CLK"), quote!(crate::qspi::SckPin)),
-             ].into();
+        (("octospi", "IO0"), quote!(crate::ospi::D0Pin)),
+        (("octospi", "IO1"), quote!(crate::ospi::D1Pin)),
+        (("octospi", "IO2"), quote!(crate::ospi::D2Pin)),
+        (("octospi", "IO3"), quote!(crate::ospi::D3Pin)),
+        (("octospi", "IO4"), quote!(crate::ospi::D4Pin)),
+        (("octospi", "IO5"), quote!(crate::ospi::D5Pin)),
+        (("octospi", "IO6"), quote!(crate::ospi::D6Pin)),
+        (("octospi", "IO7"), quote!(crate::ospi::D7Pin)),
+        (("octospi", "DQS"), quote!(crate::ospi::DQSPin)),
+        (("octospi", "NCS"), quote!(crate::ospi::NSSPin)),
+        (("octospi", "CLK"), quote!(crate::ospi::SckPin)),
+        (("octospi", "NCLK"), quote!(crate::ospi::NckPin)),
+    ].into();
 
     for p in METADATA.peripherals {
         if let Some(regs) = &p.registers {
@@ -1129,6 +1141,7 @@ fn main() {
         // SDMMCv1 uses the same channel for both directions, so just implement for RX
         (("sdmmc", "RX"), quote!(crate::sdmmc::SdmmcDma)),
         (("quadspi", "QUADSPI"), quote!(crate::qspi::QuadDma)),
+        (("octospi", "OCTOSPI1"), quote!(crate::ospi::OctoDma)),
         (("dac", "CH1"), quote!(crate::dac::DacDma1)),
         (("dac", "CH2"), quote!(crate::dac::DacDma2)),
         (("timer", "UP"), quote!(crate::timer::UpDma)),
@@ -1139,11 +1152,18 @@ fn main() {
         (("timer", "CH2"), quote!(crate::timer::Ch2Dma)),
         (("timer", "CH3"), quote!(crate::timer::Ch3Dma)),
         (("timer", "CH4"), quote!(crate::timer::Ch4Dma)),
+        (("cordic", "WRITE"), quote!(crate::cordic::WriteDma)), // FIXME: stm32u5a crash on Cordic driver
+        (("cordic", "READ"), quote!(crate::cordic::ReadDma)),   // FIXME: stm32u5a crash on Cordic driver
     ]
     .into();
 
     for p in METADATA.peripherals {
         if let Some(regs) = &p.registers {
+            // FIXME: stm32u5a crash on Cordic driver
+            if chip_name.starts_with("stm32u5a") && regs.kind == "cordic" {
+                continue;
+            }
+
             let mut dupe = HashSet::new();
             for ch in p.dma_channels {
                 // Some chips have multiple request numbers for the same (peri, signal, channel) combos.
diff --git a/embassy-stm32/src/adc/v3.rs b/embassy-stm32/src/adc/v3.rs
index 8c9b47197..e25630be2 100644
--- a/embassy-stm32/src/adc/v3.rs
+++ b/embassy-stm32/src/adc/v3.rs
@@ -222,6 +222,13 @@ impl<'d, T: Instance> Adc<'d, T> {
             // spin
         }
 
+        // RM0492, RM0481, etc.
+        // "This option bit must be set to 1 when ADCx_INP0 or ADCx_INN1 channel is selected."
+        #[cfg(adc_h5)]
+        if pin.channel() == 0 {
+            T::regs().or().modify(|reg| reg.set_op0(true));
+        }
+
         // Configure channel
         Self::set_channel_sample_time(pin.channel(), self.sample_time);
 
@@ -244,6 +251,13 @@ impl<'d, T: Instance> Adc<'d, T> {
 
         T::regs().cr().modify(|reg| reg.set_addis(true));
 
+        // RM0492, RM0481, etc.
+        // "This option bit must be set to 1 when ADCx_INP0 or ADCx_INN1 channel is selected."
+        #[cfg(adc_h5)]
+        if pin.channel() == 0 {
+            T::regs().or().modify(|reg| reg.set_op0(false));
+        }
+
         val
     }
 
diff --git a/embassy-stm32/src/cordic/enums.rs b/embassy-stm32/src/cordic/enums.rs
new file mode 100644
index 000000000..e8695fac7
--- /dev/null
+++ b/embassy-stm32/src/cordic/enums.rs
@@ -0,0 +1,71 @@
+/// CORDIC function
+#[allow(missing_docs)]
+#[derive(Debug, Clone, Copy)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum Function {
+    Cos = 0,
+    Sin,
+    Phase,
+    Modulus,
+    Arctan,
+    Cosh,
+    Sinh,
+    Arctanh,
+    Ln,
+    Sqrt,
+}
+
+/// CORDIC precision
+#[allow(missing_docs)]
+#[derive(Debug, Clone, Copy, Default)]
+pub enum Precision {
+    Iters4 = 1,
+    Iters8,
+    Iters12,
+    Iters16,
+    Iters20,
+    #[default]
+    Iters24, // this value is recommended by Reference Manual
+    Iters28,
+    Iters32,
+    Iters36,
+    Iters40,
+    Iters44,
+    Iters48,
+    Iters52,
+    Iters56,
+    Iters60,
+}
+
+/// CORDIC scale
+#[allow(missing_docs)]
+#[derive(Debug, Clone, Copy, Default, PartialEq)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum Scale {
+    #[default]
+    Arg1Res1 = 0,
+    Arg1o2Res2,
+    Arg1o4Res4,
+    Arg1o8Res8,
+    Arg1o16Res16,
+    Arg1o32Res32,
+    Arg1o64Res64,
+    Arg1o128Res128,
+}
+
+/// CORDIC argument/result register access count
+#[allow(missing_docs)]
+#[derive(Clone, Copy, Default)]
+pub enum AccessCount {
+    #[default]
+    One,
+    Two,
+}
+
+/// CORDIC argument/result data width
+#[allow(missing_docs)]
+#[derive(Clone, Copy)]
+pub enum Width {
+    Bits32,
+    Bits16,
+}
diff --git a/embassy-stm32/src/cordic/errors.rs b/embassy-stm32/src/cordic/errors.rs
new file mode 100644
index 000000000..3c70fc9e7
--- /dev/null
+++ b/embassy-stm32/src/cordic/errors.rs
@@ -0,0 +1,144 @@
+use super::{Function, Scale};
+
+/// Error for [Cordic](super::Cordic)
+#[derive(Debug)]
+pub enum CordicError {
+    /// Config error
+    ConfigError(ConfigError),
+    /// Argument length is incorrect
+    ArgumentLengthIncorrect,
+    /// Result buffer length error
+    ResultLengthNotEnough,
+    /// Input value is out of range for Q1.x format
+    NumberOutOfRange(NumberOutOfRange),
+    /// Argument error
+    ArgError(ArgError),
+}
+
+impl From<ConfigError> for CordicError {
+    fn from(value: ConfigError) -> Self {
+        Self::ConfigError(value)
+    }
+}
+
+impl From<NumberOutOfRange> for CordicError {
+    fn from(value: NumberOutOfRange) -> Self {
+        Self::NumberOutOfRange(value)
+    }
+}
+
+impl From<ArgError> for CordicError {
+    fn from(value: ArgError) -> Self {
+        Self::ArgError(value)
+    }
+}
+
+#[cfg(feature = "defmt")]
+impl defmt::Format for CordicError {
+    fn format(&self, fmt: defmt::Formatter) {
+        use CordicError::*;
+
+        match self {
+            ConfigError(e) => defmt::write!(fmt, "{}", e),
+            ResultLengthNotEnough => defmt::write!(fmt, "Output buffer length is not long enough"),
+            ArgumentLengthIncorrect => defmt::write!(fmt, "Argument length incorrect"),
+            NumberOutOfRange(e) => defmt::write!(fmt, "{}", e),
+            ArgError(e) => defmt::write!(fmt, "{}", e),
+        }
+    }
+}
+
+/// Error during parsing [Cordic::Config](super::Config)
+#[allow(dead_code)]
+#[derive(Debug)]
+pub struct ConfigError {
+    pub(super) func: Function,
+    pub(super) scale_range: [u8; 2],
+}
+
+#[cfg(feature = "defmt")]
+impl defmt::Format for ConfigError {
+    fn format(&self, fmt: defmt::Formatter) {
+        defmt::write!(fmt, "For FUNCTION: {},", self.func);
+
+        if self.scale_range[0] == self.scale_range[1] {
+            defmt::write!(fmt, " SCALE value should be {}", self.scale_range[0])
+        } else {
+            defmt::write!(
+                fmt,
+                " SCALE value should be {} <= SCALE <= {}",
+                self.scale_range[0],
+                self.scale_range[1]
+            )
+        }
+    }
+}
+
+/// Input value is out of range for Q1.x format
+#[allow(missing_docs)]
+#[derive(Debug)]
+pub enum NumberOutOfRange {
+    BelowLowerBound,
+    AboveUpperBound,
+}
+
+#[cfg(feature = "defmt")]
+impl defmt::Format for NumberOutOfRange {
+    fn format(&self, fmt: defmt::Formatter) {
+        use NumberOutOfRange::*;
+
+        match self {
+            BelowLowerBound => defmt::write!(fmt, "input value should be equal or greater than -1"),
+            AboveUpperBound => defmt::write!(fmt, "input value should be equal or less than 1"),
+        }
+    }
+}
+
+/// Error on checking input arguments
+#[allow(dead_code)]
+#[derive(Debug)]
+pub struct ArgError {
+    pub(super) func: Function,
+    pub(super) scale: Option<Scale>,
+    pub(super) arg_range: [f32; 2], // only for debug display, f32 is ok
+    pub(super) inclusive_upper_bound: bool,
+    pub(super) arg_type: ArgType,
+}
+
+#[cfg(feature = "defmt")]
+impl defmt::Format for ArgError {
+    fn format(&self, fmt: defmt::Formatter) {
+        defmt::write!(fmt, "For FUNCTION: {},", self.func);
+
+        if let Some(scale) = self.scale {
+            defmt::write!(fmt, " when SCALE is {},", scale);
+        }
+
+        defmt::write!(fmt, " {} should be", self.arg_type);
+
+        if self.inclusive_upper_bound {
+            defmt::write!(
+                fmt,
+                " {} <= {} <= {}",
+                self.arg_range[0],
+                self.arg_type,
+                self.arg_range[1]
+            )
+        } else {
+            defmt::write!(
+                fmt,
+                " {} <= {} < {}",
+                self.arg_range[0],
+                self.arg_type,
+                self.arg_range[1]
+            )
+        };
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub(super) enum ArgType {
+    Arg1,
+    Arg2,
+}
diff --git a/embassy-stm32/src/cordic/mod.rs b/embassy-stm32/src/cordic/mod.rs
new file mode 100644
index 000000000..9ac10e714
--- /dev/null
+++ b/embassy-stm32/src/cordic/mod.rs
@@ -0,0 +1,729 @@
+//! coordinate rotation digital computer (CORDIC)
+
+use embassy_hal_internal::drop::OnDrop;
+use embassy_hal_internal::{into_ref, Peripheral, PeripheralRef};
+
+use crate::pac::cordic::vals;
+use crate::{dma, peripherals};
+
+mod enums;
+pub use enums::*;
+
+mod errors;
+pub use errors::*;
+
+pub mod utils;
+
+/// CORDIC driver
+pub struct Cordic<'d, T: Instance> {
+    peri: PeripheralRef<'d, T>,
+    config: Config,
+}
+
+/// Cordic instance
+trait SealedInstance {
+    /// Get access to CORDIC registers
+    fn regs() -> crate::pac::cordic::Cordic;
+
+    /// Set Function value
+    fn set_func(&self, func: Function) {
+        Self::regs()
+            .csr()
+            .modify(|v| v.set_func(vals::Func::from_bits(func as u8)));
+    }
+
+    /// Set Precision value
+    fn set_precision(&self, precision: Precision) {
+        Self::regs()
+            .csr()
+            .modify(|v| v.set_precision(vals::Precision::from_bits(precision as u8)))
+    }
+
+    /// Set Scale value
+    fn set_scale(&self, scale: Scale) {
+        Self::regs()
+            .csr()
+            .modify(|v| v.set_scale(vals::Scale::from_bits(scale as u8)))
+    }
+
+    /// Enable global interrupt
+    fn enable_irq(&self) {
+        Self::regs().csr().modify(|v| v.set_ien(true))
+    }
+
+    /// Disable global interrupt
+    fn disable_irq(&self) {
+        Self::regs().csr().modify(|v| v.set_ien(false))
+    }
+
+    /// Enable Read DMA
+    fn enable_read_dma(&self) {
+        Self::regs().csr().modify(|v| {
+            v.set_dmaren(true);
+        })
+    }
+
+    /// Disable Read DMA
+    fn disable_read_dma(&self) {
+        Self::regs().csr().modify(|v| {
+            v.set_dmaren(false);
+        })
+    }
+
+    /// Enable Write DMA
+    fn enable_write_dma(&self) {
+        Self::regs().csr().modify(|v| {
+            v.set_dmawen(true);
+        })
+    }
+
+    /// Disable Write DMA
+    fn disable_write_dma(&self) {
+        Self::regs().csr().modify(|v| {
+            v.set_dmawen(false);
+        })
+    }
+
+    /// Set NARGS value
+    fn set_argument_count(&self, n: AccessCount) {
+        Self::regs().csr().modify(|v| {
+            v.set_nargs(match n {
+                AccessCount::One => vals::Num::NUM1,
+                AccessCount::Two => vals::Num::NUM2,
+            })
+        })
+    }
+
+    /// Set NRES value
+    fn set_result_count(&self, n: AccessCount) {
+        Self::regs().csr().modify(|v| {
+            v.set_nres(match n {
+                AccessCount::One => vals::Num::NUM1,
+                AccessCount::Two => vals::Num::NUM2,
+            });
+        })
+    }
+
+    /// Set ARGSIZE and RESSIZE value
+    fn set_data_width(&self, arg: Width, res: Width) {
+        Self::regs().csr().modify(|v| {
+            v.set_argsize(match arg {
+                Width::Bits32 => vals::Size::BITS32,
+                Width::Bits16 => vals::Size::BITS16,
+            });
+            v.set_ressize(match res {
+                Width::Bits32 => vals::Size::BITS32,
+                Width::Bits16 => vals::Size::BITS16,
+            })
+        })
+    }
+
+    /// Read RRDY flag
+    fn ready_to_read(&self) -> bool {
+        Self::regs().csr().read().rrdy()
+    }
+
+    /// Write value to WDATA
+    fn write_argument(&self, arg: u32) {
+        Self::regs().wdata().write_value(arg)
+    }
+
+    /// Read value from RDATA
+    fn read_result(&self) -> u32 {
+        Self::regs().rdata().read()
+    }
+}
+
+/// CORDIC instance trait
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + Peripheral<P = Self> + crate::rcc::RccPeripheral {}
+
+/// CORDIC configuration
+#[derive(Debug)]
+pub struct Config {
+    function: Function,
+    precision: Precision,
+    scale: Scale,
+}
+
+impl Config {
+    /// Create a config for Cordic driver
+    pub fn new(function: Function, precision: Precision, scale: Scale) -> Result<Self, CordicError> {
+        let config = Self {
+            function,
+            precision,
+            scale,
+        };
+
+        config.check_scale()?;
+
+        Ok(config)
+    }
+
+    fn check_scale(&self) -> Result<(), ConfigError> {
+        use Function::*;
+
+        let scale_raw = self.scale as u8;
+
+        let err_range = match self.function {
+            Cos | Sin | Phase | Modulus if !(0..=0).contains(&scale_raw) => Some([0, 0]),
+
+            Arctan if !(0..=7).contains(&scale_raw) => Some([0, 7]),
+
+            Cosh | Sinh | Arctanh if !(1..=1).contains(&scale_raw) => Some([1, 1]),
+
+            Ln if !(1..=4).contains(&scale_raw) => Some([1, 4]),
+
+            Sqrt if !(0..=2).contains(&scale_raw) => Some([0, 2]),
+
+            Cos | Sin | Phase | Modulus | Arctan | Cosh | Sinh | Arctanh | Ln | Sqrt => None,
+        };
+
+        if let Some(range) = err_range {
+            Err(ConfigError {
+                func: self.function,
+                scale_range: range,
+            })
+        } else {
+            Ok(())
+        }
+    }
+}
+
+// common method
+impl<'d, T: Instance> Cordic<'d, T> {
+    /// Create a Cordic driver instance
+    ///
+    /// Note:  
+    /// If you need a peripheral -> CORDIC -> peripheral mode,  
+    /// you may want to set Cordic into [Mode::ZeroOverhead] mode, and add extra arguments with [Self::extra_config]
+    pub fn new(peri: impl Peripheral<P = T> + 'd, config: Config) -> Self {
+        T::enable_and_reset();
+
+        into_ref!(peri);
+
+        let mut instance = Self { peri, config };
+
+        instance.reconfigure();
+
+        instance
+    }
+
+    /// Set a new config for Cordic driver  
+    pub fn set_config(&mut self, config: Config) {
+        self.config = config;
+        self.reconfigure();
+    }
+
+    /// Set extra config for data count and data width.
+    pub fn extra_config(&mut self, arg_cnt: AccessCount, arg_width: Width, res_width: Width) {
+        self.peri.set_argument_count(arg_cnt);
+        self.peri.set_data_width(arg_width, res_width);
+    }
+
+    fn clean_rrdy_flag(&mut self) {
+        while self.peri.ready_to_read() {
+            self.peri.read_result();
+        }
+    }
+
+    /// Disable IRQ and DMA, clean RRDY, and set ARG2 to +1 (0x7FFFFFFF)
+    pub fn reconfigure(&mut self) {
+        // reset ARG2 to +1
+        {
+            self.peri.disable_irq();
+            self.peri.disable_read_dma();
+            self.peri.disable_write_dma();
+            self.clean_rrdy_flag();
+
+            self.peri.set_func(Function::Cos);
+            self.peri.set_precision(Precision::Iters4);
+            self.peri.set_scale(Scale::Arg1Res1);
+            self.peri.set_argument_count(AccessCount::Two);
+            self.peri.set_data_width(Width::Bits32, Width::Bits32);
+            self.peri.write_argument(0x0u32);
+            self.peri.write_argument(0x7FFFFFFFu32);
+
+            self.clean_rrdy_flag();
+        }
+
+        self.peri.set_func(self.config.function);
+        self.peri.set_precision(self.config.precision);
+        self.peri.set_scale(self.config.scale);
+
+        // we don't set NRES in here, but to make sure NRES is set each time user call "calc"-ish functions,
+        // since each "calc"-ish functions can have different ARGSIZE and RESSIZE, thus NRES should be change accordingly.
+    }
+}
+
+impl<'d, T: Instance> Drop for Cordic<'d, T> {
+    fn drop(&mut self) {
+        T::disable();
+    }
+}
+
+// q1.31 related
+impl<'d, T: Instance> Cordic<'d, T> {
+    /// Run a blocking CORDIC calculation in q1.31 format  
+    ///
+    /// Notice:  
+    /// If you set `arg1_only` to `true`, please be sure ARG2 value has been set to desired value before.  
+    /// This function won't set ARG2 to +1 before or after each round of calculation.  
+    /// If you want to make sure ARG2 is set to +1, consider run [.reconfigure()](Self::reconfigure).
+    pub fn blocking_calc_32bit(
+        &mut self,
+        arg: &[u32],
+        res: &mut [u32],
+        arg1_only: bool,
+        res1_only: bool,
+    ) -> Result<usize, CordicError> {
+        if arg.is_empty() {
+            return Ok(0);
+        }
+
+        let res_cnt = Self::check_arg_res_length_32bit(arg.len(), res.len(), arg1_only, res1_only)?;
+
+        self.peri
+            .set_argument_count(if arg1_only { AccessCount::One } else { AccessCount::Two });
+
+        self.peri
+            .set_result_count(if res1_only { AccessCount::One } else { AccessCount::Two });
+
+        self.peri.set_data_width(Width::Bits32, Width::Bits32);
+
+        let mut cnt = 0;
+
+        match arg1_only {
+            true => {
+                // To use cordic preload function, the first value is special.
+                // It is loaded to CORDIC WDATA register out side of loop
+                let first_value = arg[0];
+
+                // preload 1st value to CORDIC, to start the CORDIC calc
+                self.peri.write_argument(first_value);
+
+                for &arg1 in &arg[1..] {
+                    // preload arg1 (for next calc)
+                    self.peri.write_argument(arg1);
+
+                    // then read current result out
+                    res[cnt] = self.peri.read_result();
+                    cnt += 1;
+                    if !res1_only {
+                        res[cnt] = self.peri.read_result();
+                        cnt += 1;
+                    }
+                }
+
+                // read the last result
+                res[cnt] = self.peri.read_result();
+                cnt += 1;
+                if !res1_only {
+                    res[cnt] = self.peri.read_result();
+                    // cnt += 1;
+                }
+            }
+            false => {
+                // To use cordic preload function, the first and last value is special.
+                // They are load to CORDIC WDATA register out side of loop
+                let first_value = arg[0];
+                let last_value = arg[arg.len() - 1];
+
+                let paired_args = &arg[1..arg.len() - 1];
+
+                // preload 1st value to CORDIC
+                self.peri.write_argument(first_value);
+
+                for args in paired_args.chunks(2) {
+                    let arg2 = args[0];
+                    let arg1 = args[1];
+
+                    // load arg2 (for current calc) first, to start the CORDIC calc
+                    self.peri.write_argument(arg2);
+
+                    // preload arg1 (for next calc)
+                    self.peri.write_argument(arg1);
+
+                    // then read current result out
+                    res[cnt] = self.peri.read_result();
+                    cnt += 1;
+                    if !res1_only {
+                        res[cnt] = self.peri.read_result();
+                        cnt += 1;
+                    }
+                }
+
+                // load last value to CORDIC, and finish the calculation
+                self.peri.write_argument(last_value);
+                res[cnt] = self.peri.read_result();
+                cnt += 1;
+                if !res1_only {
+                    res[cnt] = self.peri.read_result();
+                    // cnt += 1;
+                }
+            }
+        }
+
+        // at this point cnt should be equal to res_cnt
+
+        Ok(res_cnt)
+    }
+
+    /// Run a async CORDIC calculation in q.1.31 format
+    ///
+    /// Notice:  
+    /// If you set `arg1_only` to `true`, please be sure ARG2 value has been set to desired value before.  
+    /// This function won't set ARG2 to +1 before or after each round of calculation.  
+    /// If you want to make sure ARG2 is set to +1, consider run [.reconfigure()](Self::reconfigure).
+    pub async fn async_calc_32bit(
+        &mut self,
+        write_dma: impl Peripheral<P = impl WriteDma<T>>,
+        read_dma: impl Peripheral<P = impl ReadDma<T>>,
+        arg: &[u32],
+        res: &mut [u32],
+        arg1_only: bool,
+        res1_only: bool,
+    ) -> Result<usize, CordicError> {
+        if arg.is_empty() {
+            return Ok(0);
+        }
+
+        let res_cnt = Self::check_arg_res_length_32bit(arg.len(), res.len(), arg1_only, res1_only)?;
+
+        let active_res_buf = &mut res[..res_cnt];
+
+        into_ref!(write_dma, read_dma);
+
+        self.peri
+            .set_argument_count(if arg1_only { AccessCount::One } else { AccessCount::Two });
+
+        self.peri
+            .set_result_count(if res1_only { AccessCount::One } else { AccessCount::Two });
+
+        self.peri.set_data_width(Width::Bits32, Width::Bits32);
+
+        let write_req = write_dma.request();
+        let read_req = read_dma.request();
+
+        self.peri.enable_write_dma();
+        self.peri.enable_read_dma();
+
+        let _on_drop = OnDrop::new(|| {
+            self.peri.disable_write_dma();
+            self.peri.disable_read_dma();
+        });
+
+        unsafe {
+            let write_transfer = dma::Transfer::new_write(
+                &mut write_dma,
+                write_req,
+                arg,
+                T::regs().wdata().as_ptr() as *mut _,
+                Default::default(),
+            );
+
+            let read_transfer = dma::Transfer::new_read(
+                &mut read_dma,
+                read_req,
+                T::regs().rdata().as_ptr() as *mut _,
+                active_res_buf,
+                Default::default(),
+            );
+
+            embassy_futures::join::join(write_transfer, read_transfer).await;
+        }
+
+        Ok(res_cnt)
+    }
+
+    fn check_arg_res_length_32bit(
+        arg_len: usize,
+        res_len: usize,
+        arg1_only: bool,
+        res1_only: bool,
+    ) -> Result<usize, CordicError> {
+        if !arg1_only && arg_len % 2 != 0 {
+            return Err(CordicError::ArgumentLengthIncorrect);
+        }
+
+        let mut minimal_res_length = arg_len;
+
+        if !res1_only {
+            minimal_res_length *= 2;
+        }
+
+        if !arg1_only {
+            minimal_res_length /= 2
+        }
+
+        if minimal_res_length > res_len {
+            return Err(CordicError::ResultLengthNotEnough);
+        }
+
+        Ok(minimal_res_length)
+    }
+}
+
+// q1.15 related
+impl<'d, T: Instance> Cordic<'d, T> {
+    /// Run a blocking CORDIC calculation in q1.15 format  
+    ///
+    /// Notice::  
+    /// User will take respond to merge two u16 arguments into one u32 data, and/or split one u32 data into two u16 results.
+    pub fn blocking_calc_16bit(&mut self, arg: &[u32], res: &mut [u32]) -> Result<usize, CordicError> {
+        if arg.is_empty() {
+            return Ok(0);
+        }
+
+        if arg.len() > res.len() {
+            return Err(CordicError::ResultLengthNotEnough);
+        }
+
+        let res_cnt = arg.len();
+
+        // In q1.15 mode, 1 write/read to access 2 arguments/results
+        self.peri.set_argument_count(AccessCount::One);
+        self.peri.set_result_count(AccessCount::One);
+
+        self.peri.set_data_width(Width::Bits16, Width::Bits16);
+
+        // To use cordic preload function, the first value is special.
+        // It is loaded to CORDIC WDATA register out side of loop
+        let first_value = arg[0];
+
+        // preload 1st value to CORDIC, to start the CORDIC calc
+        self.peri.write_argument(first_value);
+
+        let mut cnt = 0;
+
+        for &arg_val in &arg[1..] {
+            // preload arg_val (for next calc)
+            self.peri.write_argument(arg_val);
+
+            // then read current result out
+            res[cnt] = self.peri.read_result();
+            cnt += 1;
+        }
+
+        // read last result out
+        res[cnt] = self.peri.read_result();
+        // cnt += 1;
+
+        Ok(res_cnt)
+    }
+
+    /// Run a async CORDIC calculation in q1.15 format  
+    ///
+    /// Notice::  
+    /// User will take respond to merge two u16 arguments into one u32 data, and/or split one u32 data into two u16 results.
+    pub async fn async_calc_16bit(
+        &mut self,
+        write_dma: impl Peripheral<P = impl WriteDma<T>>,
+        read_dma: impl Peripheral<P = impl ReadDma<T>>,
+        arg: &[u32],
+        res: &mut [u32],
+    ) -> Result<usize, CordicError> {
+        if arg.is_empty() {
+            return Ok(0);
+        }
+
+        if arg.len() > res.len() {
+            return Err(CordicError::ResultLengthNotEnough);
+        }
+
+        let res_cnt = arg.len();
+
+        let active_res_buf = &mut res[..res_cnt];
+
+        into_ref!(write_dma, read_dma);
+
+        // In q1.15 mode, 1 write/read to access 2 arguments/results
+        self.peri.set_argument_count(AccessCount::One);
+        self.peri.set_result_count(AccessCount::One);
+
+        self.peri.set_data_width(Width::Bits16, Width::Bits16);
+
+        let write_req = write_dma.request();
+        let read_req = read_dma.request();
+
+        self.peri.enable_write_dma();
+        self.peri.enable_read_dma();
+
+        let _on_drop = OnDrop::new(|| {
+            self.peri.disable_write_dma();
+            self.peri.disable_read_dma();
+        });
+
+        unsafe {
+            let write_transfer = dma::Transfer::new_write(
+                &mut write_dma,
+                write_req,
+                arg,
+                T::regs().wdata().as_ptr() as *mut _,
+                Default::default(),
+            );
+
+            let read_transfer = dma::Transfer::new_read(
+                &mut read_dma,
+                read_req,
+                T::regs().rdata().as_ptr() as *mut _,
+                active_res_buf,
+                Default::default(),
+            );
+
+            embassy_futures::join::join(write_transfer, read_transfer).await;
+        }
+
+        Ok(res_cnt)
+    }
+}
+
+macro_rules! check_arg_value {
+    ($func_arg1_name:ident, $func_arg2_name:ident, $float_type:ty) => {
+        impl<'d, T: Instance> Cordic<'d, T> {
+            /// check input value ARG1, SCALE and FUNCTION are compatible with each other
+            pub fn $func_arg1_name(&self, arg: $float_type) -> Result<(), ArgError> {
+                let config = &self.config;
+
+                use Function::*;
+
+                struct Arg1ErrInfo {
+                    scale: Option<Scale>,
+                    range: [f32; 2], // f32 is ok, it only used in error display
+                    inclusive_upper_bound: bool,
+                }
+
+                let err_info = match config.function {
+                    Cos | Sin | Phase | Modulus | Arctan if !(-1.0..=1.0).contains(arg) => Some(Arg1ErrInfo {
+                        scale: None,
+                        range: [-1.0, 1.0],
+                        inclusive_upper_bound: true,
+                    }),
+
+                    Cosh | Sinh if !(-0.559..=0.559).contains(arg) => Some(Arg1ErrInfo {
+                        scale: None,
+                        range: [-0.559, 0.559],
+                        inclusive_upper_bound: true,
+                    }),
+
+                    Arctanh if !(-0.403..=0.403).contains(arg) => Some(Arg1ErrInfo {
+                        scale: None,
+                        range: [-0.403, 0.403],
+                        inclusive_upper_bound: true,
+                    }),
+
+                    Ln => match config.scale {
+                        Scale::Arg1o2Res2 if !(0.0535..0.5).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o2Res2),
+                            range: [0.0535, 0.5],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1o4Res4 if !(0.25..0.75).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o4Res4),
+                            range: [0.25, 0.75],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1o8Res8 if !(0.375..0.875).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o8Res8),
+                            range: [0.375, 0.875],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1o16Res16 if !(0.4375..0.584).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o16Res16),
+                            range: [0.4375, 0.584],
+                            inclusive_upper_bound: false,
+                        }),
+
+                        Scale::Arg1o2Res2 | Scale::Arg1o4Res4 | Scale::Arg1o8Res8 | Scale::Arg1o16Res16 => None,
+
+                        _ => unreachable!(),
+                    },
+
+                    Sqrt => match config.scale {
+                        Scale::Arg1Res1 if !(0.027..0.75).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1Res1),
+                            range: [0.027, 0.75],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1o2Res2 if !(0.375..0.875).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o2Res2),
+                            range: [0.375, 0.875],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1o4Res4 if !(0.4375..0.584).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o4Res4),
+                            range: [0.4375, 0.584],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1Res1 | Scale::Arg1o2Res2 | Scale::Arg1o4Res4 => None,
+                        _ => unreachable!(),
+                    },
+
+                    Cos | Sin | Phase | Modulus | Arctan | Cosh | Sinh | Arctanh => None,
+                };
+
+                if let Some(err) = err_info {
+                    return Err(ArgError {
+                        func: config.function,
+                        scale: err.scale,
+                        arg_range: err.range,
+                        inclusive_upper_bound: err.inclusive_upper_bound,
+                        arg_type: ArgType::Arg1,
+                    });
+                }
+
+                Ok(())
+            }
+
+            /// check input value ARG2 and FUNCTION are compatible with each other
+            pub fn $func_arg2_name(&self, arg: $float_type) -> Result<(), ArgError> {
+                let config = &self.config;
+
+                use Function::*;
+
+                struct Arg2ErrInfo {
+                    range: [f32; 2], // f32 is ok, it only used in error display
+                }
+
+                let err_info = match config.function {
+                    Cos | Sin if !(0.0..=1.0).contains(arg) => Some(Arg2ErrInfo { range: [0.0, 1.0] }),
+
+                    Phase | Modulus if !(-1.0..=1.0).contains(arg) => Some(Arg2ErrInfo { range: [-1.0, 1.0] }),
+
+                    Cos | Sin | Phase | Modulus | Arctan | Cosh | Sinh | Arctanh | Ln | Sqrt => None,
+                };
+
+                if let Some(err) = err_info {
+                    return Err(ArgError {
+                        func: config.function,
+                        scale: None,
+                        arg_range: err.range,
+                        inclusive_upper_bound: true,
+                        arg_type: ArgType::Arg2,
+                    });
+                }
+
+                Ok(())
+            }
+        }
+    };
+}
+
+check_arg_value!(check_f64_arg1, check_f64_arg2, &f64);
+check_arg_value!(check_f32_arg1, check_f32_arg2, &f32);
+
+foreach_interrupt!(
+    ($inst:ident, cordic, $block:ident, GLOBAL, $irq:ident) => {
+        impl Instance for peripherals::$inst {
+        }
+
+        impl SealedInstance for peripherals::$inst {
+            fn regs() -> crate::pac::cordic::Cordic {
+                crate::pac::$inst
+            }
+        }
+    };
+);
+
+dma_trait!(WriteDma, Instance);
+dma_trait!(ReadDma, Instance);
diff --git a/embassy-stm32/src/cordic/utils.rs b/embassy-stm32/src/cordic/utils.rs
new file mode 100644
index 000000000..008f50270
--- /dev/null
+++ b/embassy-stm32/src/cordic/utils.rs
@@ -0,0 +1,62 @@
+//! Common math utils
+use super::errors::NumberOutOfRange;
+
+macro_rules! floating_fixed_convert {
+    ($f_to_q:ident, $q_to_f:ident, $unsigned_bin_typ:ty, $signed_bin_typ:ty, $float_ty:ty, $offset:literal, $min_positive:literal) => {
+        /// convert float point to fixed point format
+        pub fn $f_to_q(value: $float_ty) -> Result<$unsigned_bin_typ, NumberOutOfRange> {
+            const MIN_POSITIVE: $float_ty = unsafe { core::mem::transmute($min_positive) };
+
+            if value < -1.0 {
+                return Err(NumberOutOfRange::BelowLowerBound)
+            }
+
+            if value > 1.0 {
+                return Err(NumberOutOfRange::AboveUpperBound)
+            }
+
+
+            let value = if 1.0 - MIN_POSITIVE < value && value <= 1.0 {
+                // make a exception for value between (1.0^{-x} , 1.0] float point,
+                // convert it to max representable value of q1.x format
+                (1.0 as $float_ty) - MIN_POSITIVE
+            } else {
+                value
+            };
+
+            // It's necessary to cast the float value to signed integer, before convert it to a unsigned value.
+            // Since value from register is actually a "signed value", a "as" cast will keep original binary format but mark it as a unsigned value for register writing.
+            // see https://doc.rust-lang.org/reference/expressions/operator-expr.html#numeric-cast
+            Ok((value * ((1 as $unsigned_bin_typ << $offset) as $float_ty)) as $signed_bin_typ as $unsigned_bin_typ)
+        }
+
+        #[inline(always)]
+        /// convert fixed point to float point format
+        pub fn $q_to_f(value: $unsigned_bin_typ) -> $float_ty {
+            // It's necessary to cast the unsigned integer to signed integer, before convert it to a float value.
+            // Since value from register is actually a "signed value", a "as" cast will keep original binary format but mark it as a signed value.
+            // see https://doc.rust-lang.org/reference/expressions/operator-expr.html#numeric-cast
+            (value as $signed_bin_typ as $float_ty) / ((1 as $unsigned_bin_typ << $offset) as $float_ty)
+        }
+    };
+}
+
+floating_fixed_convert!(
+    f64_to_q1_31,
+    q1_31_to_f64,
+    u32,
+    i32,
+    f64,
+    31,
+    0x3E00_0000_0000_0000u64 // binary form of 1f64^(-31)
+);
+
+floating_fixed_convert!(
+    f32_to_q1_15,
+    q1_15_to_f32,
+    u16,
+    i16,
+    f32,
+    15,
+    0x3800_0000u32 // binary form of 1f32^(-15)
+);
diff --git a/embassy-stm32/src/i2c/mod.rs b/embassy-stm32/src/i2c/mod.rs
index f1b11cc44..a46061d54 100644
--- a/embassy-stm32/src/i2c/mod.rs
+++ b/embassy-stm32/src/i2c/mod.rs
@@ -6,6 +6,7 @@
 mod _version;
 
 use core::future::Future;
+use core::iter;
 use core::marker::PhantomData;
 
 use embassy_hal_internal::{into_ref, Peripheral, PeripheralRef};
@@ -332,8 +333,142 @@ impl<'d, T: Instance, TXDMA: TxDma<T>, RXDMA: RxDma<T>> embedded_hal_async::i2c:
         address: u8,
         operations: &mut [embedded_hal_1::i2c::Operation<'_>],
     ) -> Result<(), Self::Error> {
-        let _ = address;
-        let _ = operations;
-        todo!()
+        self.transaction(address, operations).await
     }
 }
+
+/// Frame type in I2C transaction.
+///
+/// This tells each method what kind of framing to use, to generate a (repeated) start condition (ST
+/// or SR), and/or a stop condition (SP). For read operations, this also controls whether to send an
+/// ACK or NACK after the last byte received.
+///
+/// For write operations, the following options are identical because they differ only in the (N)ACK
+/// treatment relevant for read operations:
+///
+/// - `FirstFrame` and `FirstAndNextFrame`
+/// - `NextFrame` and `LastFrameNoStop`
+///
+/// Abbreviations used below:
+///
+/// - `ST` = start condition
+/// - `SR` = repeated start condition
+/// - `SP` = stop condition
+/// - `ACK`/`NACK` = last byte in read operation
+#[derive(Copy, Clone)]
+#[allow(dead_code)]
+enum FrameOptions {
+    /// `[ST/SR]+[NACK]+[SP]` First frame (of this type) in transaction and also last frame overall.
+    FirstAndLastFrame,
+    /// `[ST/SR]+[NACK]` First frame of this type in transaction, last frame in a read operation but
+    /// not the last frame overall.
+    FirstFrame,
+    /// `[ST/SR]+[ACK]` First frame of this type in transaction, neither last frame overall nor last
+    /// frame in a read operation.
+    FirstAndNextFrame,
+    /// `[ACK]` Middle frame in a read operation (neither first nor last).
+    NextFrame,
+    /// `[NACK]+[SP]` Last frame overall in this transaction but not the first frame.
+    LastFrame,
+    /// `[NACK]` Last frame in a read operation but not last frame overall in this transaction.
+    LastFrameNoStop,
+}
+
+#[allow(dead_code)]
+impl FrameOptions {
+    /// Sends start or repeated start condition before transfer.
+    fn send_start(self) -> bool {
+        match self {
+            Self::FirstAndLastFrame | Self::FirstFrame | Self::FirstAndNextFrame => true,
+            Self::NextFrame | Self::LastFrame | Self::LastFrameNoStop => false,
+        }
+    }
+
+    /// Sends stop condition after transfer.
+    fn send_stop(self) -> bool {
+        match self {
+            Self::FirstAndLastFrame | Self::LastFrame => true,
+            Self::FirstFrame | Self::FirstAndNextFrame | Self::NextFrame | Self::LastFrameNoStop => false,
+        }
+    }
+
+    /// Sends NACK after last byte received, indicating end of read operation.
+    fn send_nack(self) -> bool {
+        match self {
+            Self::FirstAndLastFrame | Self::FirstFrame | Self::LastFrame | Self::LastFrameNoStop => true,
+            Self::FirstAndNextFrame | Self::NextFrame => false,
+        }
+    }
+}
+
+/// Iterates over operations in transaction.
+///
+/// Returns necessary frame options for each operation to uphold the [transaction contract] and have
+/// the right start/stop/(N)ACK conditions on the wire.
+///
+/// [transaction contract]: embedded_hal_1::i2c::I2c::transaction
+#[allow(dead_code)]
+fn operation_frames<'a, 'b: 'a>(
+    operations: &'a mut [embedded_hal_1::i2c::Operation<'b>],
+) -> Result<impl IntoIterator<Item = (&'a mut embedded_hal_1::i2c::Operation<'b>, FrameOptions)>, Error> {
+    use embedded_hal_1::i2c::Operation::{Read, Write};
+
+    // Check empty read buffer before starting transaction. Otherwise, we would risk halting with an
+    // error in the middle of the transaction.
+    //
+    // In principle, we could allow empty read frames within consecutive read operations, as long as
+    // at least one byte remains in the final (merged) read operation, but that makes the logic more
+    // complicated and error-prone.
+    if operations.iter().any(|op| match op {
+        Read(read) => read.is_empty(),
+        Write(_) => false,
+    }) {
+        return Err(Error::Overrun);
+    }
+
+    let mut operations = operations.iter_mut().peekable();
+
+    let mut next_first_frame = true;
+
+    Ok(iter::from_fn(move || {
+        let Some(op) = operations.next() else {
+            return None;
+        };
+
+        // Is `op` first frame of its type?
+        let first_frame = next_first_frame;
+        let next_op = operations.peek();
+
+        // Get appropriate frame options as combination of the following properties:
+        //
+        // - For each first operation of its type, generate a (repeated) start condition.
+        // - For the last operation overall in the entire transaction, generate a stop condition.
+        // - For read operations, check the next operation: if it is also a read operation, we merge
+        //   these and send ACK for all bytes in the current operation; send NACK only for the final
+        //   read operation's last byte (before write or end of entire transaction) to indicate last
+        //   byte read and release the bus for transmission of the bus master's next byte (or stop).
+        //
+        // We check the third property unconditionally, i.e. even for write opeartions. This is okay
+        // because the resulting frame options are identical for write operations.
+        let frame = match (first_frame, next_op) {
+            (true, None) => FrameOptions::FirstAndLastFrame,
+            (true, Some(Read(_))) => FrameOptions::FirstAndNextFrame,
+            (true, Some(Write(_))) => FrameOptions::FirstFrame,
+            //
+            (false, None) => FrameOptions::LastFrame,
+            (false, Some(Read(_))) => FrameOptions::NextFrame,
+            (false, Some(Write(_))) => FrameOptions::LastFrameNoStop,
+        };
+
+        // Pre-calculate if `next_op` is the first operation of its type. We do this here and not at
+        // the beginning of the loop because we hand out `op` as iterator value and cannot access it
+        // anymore in the next iteration.
+        next_first_frame = match (&op, next_op) {
+            (_, None) => false,
+            (Read(_), Some(Write(_))) | (Write(_), Some(Read(_))) => true,
+            (Read(_), Some(Read(_))) | (Write(_), Some(Write(_))) => false,
+        };
+
+        Some((op, frame))
+    }))
+}
diff --git a/embassy-stm32/src/i2c/v1.rs b/embassy-stm32/src/i2c/v1.rs
index 9f29ed5e0..d45c48b24 100644
--- a/embassy-stm32/src/i2c/v1.rs
+++ b/embassy-stm32/src/i2c/v1.rs
@@ -41,68 +41,6 @@ pub unsafe fn on_interrupt<T: Instance>() {
     });
 }
 
-/// Frame type in I2C transaction.
-///
-/// This tells each method what kind of framing to use, to generate a (repeated) start condition (ST
-/// or SR), and/or a stop condition (SP). For read operations, this also controls whether to send an
-/// ACK or NACK after the last byte received.
-///
-/// For write operations, the following options are identical because they differ only in the (N)ACK
-/// treatment relevant for read operations:
-///
-/// - `FirstFrame` and `FirstAndNextFrame`
-/// - `NextFrame` and `LastFrameNoStop`
-///
-/// Abbreviations used below:
-///
-/// - `ST` = start condition
-/// - `SR` = repeated start condition
-/// - `SP` = stop condition
-#[derive(Copy, Clone)]
-enum FrameOptions {
-    /// `[ST/SR]+[NACK]+[SP]` First frame (of this type) in operation and last frame overall in this
-    /// transaction.
-    FirstAndLastFrame,
-    /// `[ST/SR]+[NACK]` First frame of this type in transaction, last frame in a read operation but
-    /// not the last frame overall.
-    FirstFrame,
-    /// `[ST/SR]+[ACK]` First frame of this type in transaction, neither last frame overall nor last
-    /// frame in a read operation.
-    FirstAndNextFrame,
-    /// `[ACK]` Middle frame in a read operation (neither first nor last).
-    NextFrame,
-    /// `[NACK]+[SP]` Last frame overall in this transaction but not the first frame.
-    LastFrame,
-    /// `[NACK]` Last frame in a read operation but not last frame overall in this transaction.
-    LastFrameNoStop,
-}
-
-impl FrameOptions {
-    /// Sends start or repeated start condition before transfer.
-    fn send_start(self) -> bool {
-        match self {
-            Self::FirstAndLastFrame | Self::FirstFrame | Self::FirstAndNextFrame => true,
-            Self::NextFrame | Self::LastFrame | Self::LastFrameNoStop => false,
-        }
-    }
-
-    /// Sends stop condition after transfer.
-    fn send_stop(self) -> bool {
-        match self {
-            Self::FirstAndLastFrame | Self::LastFrame => true,
-            Self::FirstFrame | Self::FirstAndNextFrame | Self::NextFrame | Self::LastFrameNoStop => false,
-        }
-    }
-
-    /// Sends NACK after last byte received, indicating end of read operation.
-    fn send_nack(self) -> bool {
-        match self {
-            Self::FirstAndLastFrame | Self::FirstFrame | Self::LastFrame | Self::LastFrameNoStop => true,
-            Self::FirstAndNextFrame | Self::NextFrame => false,
-        }
-    }
-}
-
 impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
     pub(crate) fn init(&mut self, freq: Hertz, _config: Config) {
         T::regs().cr1().modify(|reg| {
@@ -199,17 +137,12 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
                 timeout.check()?;
             }
 
-            // Also wait until signalled we're master and everything is waiting for us
-            while {
-                Self::check_and_clear_error_flags()?;
-
-                let sr2 = T::regs().sr2().read();
-                !sr2.msl() && !sr2.busy()
-            } {
-                timeout.check()?;
+            // Check if we were the ones to generate START
+            if T::regs().cr1().read().start() || !T::regs().sr2().read().msl() {
+                return Err(Error::Arbitration);
             }
 
-            // Set up current address, we're trying to talk to
+            // Set up current address we're trying to talk to
             T::regs().dr().write(|reg| reg.set_dr(addr << 1));
 
             // Wait until address was sent
@@ -231,10 +164,6 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         if frame.send_stop() {
             // Send a STOP condition
             T::regs().cr1().modify(|reg| reg.set_stop(true));
-            // Wait for STOP condition to transmit.
-            while T::regs().cr1().read().stop() {
-                timeout.check()?;
-            }
         }
 
         // Fallthrough is success
@@ -301,15 +230,12 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
                 timeout.check()?;
             }
 
-            // Also wait until signalled we're master and everything is waiting for us
-            while {
-                let sr2 = T::regs().sr2().read();
-                !sr2.msl() && !sr2.busy()
-            } {
-                timeout.check()?;
+            // Check if we were the ones to generate START
+            if T::regs().cr1().read().start() || !T::regs().sr2().read().msl() {
+                return Err(Error::Arbitration);
             }
 
-            // Set up current address, we're trying to talk to
+            // Set up current address we're trying to talk to
             T::regs().dr().write(|reg| reg.set_dr((addr << 1) + 1));
 
             // Wait until address was sent
@@ -340,13 +266,6 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         // Receive last byte
         *last = self.recv_byte(timeout)?;
 
-        if frame.send_stop() {
-            // Wait for the STOP to be sent.
-            while T::regs().cr1().read().stop() {
-                timeout.check()?;
-            }
-        }
-
         // Fallthrough is success
         Ok(())
     }
@@ -386,64 +305,13 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
     ///
     /// [transaction contract]: embedded_hal_1::i2c::I2c::transaction
     pub fn blocking_transaction(&mut self, addr: u8, operations: &mut [Operation<'_>]) -> Result<(), Error> {
-        // Check empty read buffer before starting transaction. Otherwise, we would not generate the
-        // stop condition below.
-        if operations.iter().any(|op| match op {
-            Operation::Read(read) => read.is_empty(),
-            Operation::Write(_) => false,
-        }) {
-            return Err(Error::Overrun);
-        }
-
         let timeout = self.timeout();
 
-        let mut operations = operations.iter_mut();
-
-        let mut prev_op: Option<&mut Operation<'_>> = None;
-        let mut next_op = operations.next();
-
-        while let Some(op) = next_op {
-            next_op = operations.next();
-
-            // Check if this is the first frame of this type. This is the case for the first overall
-            // frame in the transaction and whenever the type of operation changes.
-            let first_frame =
-                match (prev_op.as_ref(), &op) {
-                    (None, _) => true,
-                    (Some(Operation::Read(_)), Operation::Write(_))
-                    | (Some(Operation::Write(_)), Operation::Read(_)) => true,
-                    (Some(Operation::Read(_)), Operation::Read(_))
-                    | (Some(Operation::Write(_)), Operation::Write(_)) => false,
-                };
-
-            let frame = match (first_frame, next_op.as_ref()) {
-                // If this is the first frame of this type, we generate a (repeated) start condition
-                // but have to consider the next operation: if it is the last, we generate the final
-                // stop condition. Otherwise, we branch on the operation: with read operations, only
-                // the last byte overall (before a write operation or the end of the transaction) is
-                // to be NACK'd, i.e. if another read operation follows, we must ACK this last byte.
-                (true, None) => FrameOptions::FirstAndLastFrame,
-                // Make sure to keep sending ACK for last byte in read operation when it is followed
-                // by another consecutive read operation. If the current operation is write, this is
-                // identical to `FirstFrame`.
-                (true, Some(Operation::Read(_))) => FrameOptions::FirstAndNextFrame,
-                // Otherwise, send NACK for last byte (in read operation). (For write, this does not
-                // matter and could also be `FirstAndNextFrame`.)
-                (true, Some(Operation::Write(_))) => FrameOptions::FirstFrame,
-
-                // If this is not the first frame of its type, we do not generate a (repeated) start
-                // condition. Otherwise, we branch the same way as above.
-                (false, None) => FrameOptions::LastFrame,
-                (false, Some(Operation::Read(_))) => FrameOptions::NextFrame,
-                (false, Some(Operation::Write(_))) => FrameOptions::LastFrameNoStop,
-            };
-
+        for (op, frame) in operation_frames(operations)? {
             match op {
                 Operation::Read(read) => self.blocking_read_timeout(addr, read, timeout, frame)?,
                 Operation::Write(write) => self.write_bytes(addr, write, timeout, frame)?,
             }
-
-            prev_op = Some(op);
         }
 
         Ok(())
@@ -459,111 +327,110 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         });
     }
 
-    async fn write_with_stop(&mut self, address: u8, write: &[u8], send_stop: bool) -> Result<(), Error>
+    async fn write_frame(&mut self, address: u8, write: &[u8], frame: FrameOptions) -> Result<(), Error>
     where
         TXDMA: crate::i2c::TxDma<T>,
     {
-        let dma_transfer = unsafe {
-            let regs = T::regs();
-            regs.cr2().modify(|w| {
-                // DMA mode can be enabled for transmission by setting the DMAEN bit in the I2C_CR2 register.
-                w.set_dmaen(true);
-                w.set_itbufen(false);
-            });
-            // Set the I2C_DR register address in the DMA_SxPAR register. The data will be moved to this address from the memory after each TxE event.
-            let dst = regs.dr().as_ptr() as *mut u8;
-
-            let ch = &mut self.tx_dma;
-            let request = ch.request();
-            Transfer::new_write(ch, request, write, dst, Default::default())
-        };
+        T::regs().cr2().modify(|w| {
+            // Note: Do not enable the ITBUFEN bit in the I2C_CR2 register if DMA is used for
+            // reception.
+            w.set_itbufen(false);
+            // DMA mode can be enabled for transmission by setting the DMAEN bit in the I2C_CR2
+            // register.
+            w.set_dmaen(true);
+            // Sending NACK is not necessary (nor possible) for write transfer.
+            w.set_last(false);
+        });
 
+        // Sentinel to disable transfer when an error occurs or future is canceled.
+        // TODO: Generate STOP condition on cancel?
         let on_drop = OnDrop::new(|| {
-            let regs = T::regs();
-            regs.cr2().modify(|w| {
+            T::regs().cr2().modify(|w| {
                 w.set_dmaen(false);
                 w.set_iterren(false);
                 w.set_itevten(false);
             })
         });
 
-        Self::enable_interrupts();
-
-        // Send a START condition
-        T::regs().cr1().modify(|reg| {
-            reg.set_start(true);
-        });
-
         let state = T::state();
 
-        // Wait until START condition was generated
-        poll_fn(|cx| {
-            state.waker.register(cx.waker());
+        if frame.send_start() {
+            // Send a START condition
+            T::regs().cr1().modify(|reg| {
+                reg.set_start(true);
+            });
 
-            match Self::check_and_clear_error_flags() {
-                Err(e) => Poll::Ready(Err(e)),
-                Ok(sr1) => {
-                    if sr1.start() {
-                        Poll::Ready(Ok(()))
-                    } else {
-                        Poll::Pending
+            // Wait until START condition was generated
+            poll_fn(|cx| {
+                state.waker.register(cx.waker());
+
+                match Self::check_and_clear_error_flags() {
+                    Err(e) => Poll::Ready(Err(e)),
+                    Ok(sr1) => {
+                        if sr1.start() {
+                            Poll::Ready(Ok(()))
+                        } else {
+                            // When pending, (re-)enable interrupts to wake us up.
+                            Self::enable_interrupts();
+                            Poll::Pending
+                        }
                     }
                 }
+            })
+            .await?;
+
+            // Check if we were the ones to generate START
+            if T::regs().cr1().read().start() || !T::regs().sr2().read().msl() {
+                return Err(Error::Arbitration);
             }
-        })
-        .await?;
 
-        // Also wait until signalled we're master and everything is waiting for us
-        Self::enable_interrupts();
-        poll_fn(|cx| {
-            state.waker.register(cx.waker());
+            // Set up current address we're trying to talk to
+            T::regs().dr().write(|reg| reg.set_dr(address << 1));
 
-            match Self::check_and_clear_error_flags() {
-                Err(e) => Poll::Ready(Err(e)),
-                Ok(_) => {
-                    let sr2 = T::regs().sr2().read();
-                    if !sr2.msl() && !sr2.busy() {
-                        Poll::Pending
-                    } else {
-                        Poll::Ready(Ok(()))
+            // Wait for the address to be acknowledged
+            poll_fn(|cx| {
+                state.waker.register(cx.waker());
+
+                match Self::check_and_clear_error_flags() {
+                    Err(e) => Poll::Ready(Err(e)),
+                    Ok(sr1) => {
+                        if sr1.addr() {
+                            Poll::Ready(Ok(()))
+                        } else {
+                            // When pending, (re-)enable interrupts to wake us up.
+                            Self::enable_interrupts();
+                            Poll::Pending
+                        }
                     }
                 }
-            }
-        })
-        .await?;
+            })
+            .await?;
 
-        // Set up current address, we're trying to talk to
-        Self::enable_interrupts();
-        T::regs().dr().write(|reg| reg.set_dr(address << 1));
+            // Clear condition by reading SR2
+            T::regs().sr2().read();
+        }
 
-        poll_fn(|cx| {
-            state.waker.register(cx.waker());
-            match Self::check_and_clear_error_flags() {
-                Err(e) => Poll::Ready(Err(e)),
-                Ok(sr1) => {
-                    if sr1.addr() {
-                        // Clear the ADDR condition by reading SR2.
-                        T::regs().sr2().read();
-                        Poll::Ready(Ok(()))
-                    } else {
-                        // If we need to go around, then re-enable the interrupts, otherwise nothing
-                        // can wake us up and we'll hang.
-                        Self::enable_interrupts();
-                        Poll::Pending
-                    }
-                }
-            }
-        })
-        .await?;
-        Self::enable_interrupts();
+        let dma_transfer = unsafe {
+            // Set the I2C_DR register address in the DMA_SxPAR register. The data will be moved to
+            // this address from the memory after each TxE event.
+            let dst = T::regs().dr().as_ptr() as *mut u8;
+
+            let ch = &mut self.tx_dma;
+            let request = ch.request();
+            Transfer::new_write(ch, request, write, dst, Default::default())
+        };
+
+        // Wait for bytes to be sent, or an error to occur.
         let poll_error = poll_fn(|cx| {
             state.waker.register(cx.waker());
 
             match Self::check_and_clear_error_flags() {
-                // Unclear why the Err turbofish is necessary here? The compiler didn’t require it in the other
-                // identical poll_fn check_and_clear matches.
-                Err(e) => Poll::Ready(Err::<T, Error>(e)),
-                Ok(_) => Poll::Pending,
+                Err(e) => Poll::Ready(Err::<(), Error>(e)),
+                Ok(_) => {
+                    // When pending, (re-)enable interrupts to wake us up.
+                    Self::enable_interrupts();
+                    Poll::Pending
+                }
             }
         });
 
@@ -573,38 +440,37 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
             _ => Ok(()),
         }?;
 
-        // The I2C transfer itself will take longer than the DMA transfer, so wait for that to finish too.
-
-        // 18.3.8 “Master transmitter: In the interrupt routine after the EOT interrupt, disable DMA
-        // requests then wait for a BTF event before programming the Stop condition.”
-
-        // TODO: If this has to be done “in the interrupt routine after the EOT interrupt”, where to put it?
         T::regs().cr2().modify(|w| {
             w.set_dmaen(false);
         });
 
-        Self::enable_interrupts();
-        poll_fn(|cx| {
-            state.waker.register(cx.waker());
+        if frame.send_stop() {
+            // The I2C transfer itself will take longer than the DMA transfer, so wait for that to finish too.
 
-            match Self::check_and_clear_error_flags() {
-                Err(e) => Poll::Ready(Err(e)),
-                Ok(sr1) => {
-                    if sr1.btf() {
-                        if send_stop {
-                            T::regs().cr1().modify(|w| {
-                                w.set_stop(true);
-                            });
+            // 18.3.8 “Master transmitter: In the interrupt routine after the EOT interrupt, disable DMA
+            // requests then wait for a BTF event before programming the Stop condition.”
+            poll_fn(|cx| {
+                state.waker.register(cx.waker());
+
+                match Self::check_and_clear_error_flags() {
+                    Err(e) => Poll::Ready(Err(e)),
+                    Ok(sr1) => {
+                        if sr1.btf() {
+                            Poll::Ready(Ok(()))
+                        } else {
+                            // When pending, (re-)enable interrupts to wake us up.
+                            Self::enable_interrupts();
+                            Poll::Pending
                         }
-
-                        Poll::Ready(Ok(()))
-                    } else {
-                        Poll::Pending
                     }
                 }
-            }
-        })
-        .await?;
+            })
+            .await?;
+
+            T::regs().cr1().modify(|w| {
+                w.set_stop(true);
+            });
+        }
 
         drop(on_drop);
 
@@ -617,20 +483,8 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
     where
         TXDMA: crate::i2c::TxDma<T>,
     {
-        self.write_with_stop(address, write, true).await?;
-
-        // Wait for STOP condition to transmit.
-        Self::enable_interrupts();
-        poll_fn(|cx| {
-            T::state().waker.register(cx.waker());
-            // TODO: error interrupts are enabled here, should we additional check for and return errors?
-            if T::regs().cr1().read().stop() {
-                Poll::Pending
-            } else {
-                Poll::Ready(Ok(()))
-            }
-        })
-        .await?;
+        self.write_frame(address, write, FrameOptions::FirstAndLastFrame)
+            .await?;
 
         Ok(())
     }
@@ -640,135 +494,151 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
     where
         RXDMA: crate::i2c::RxDma<T>,
     {
-        let state = T::state();
-        let buffer_len = buffer.len();
+        self.read_frame(address, buffer, FrameOptions::FirstAndLastFrame)
+            .await?;
 
-        let dma_transfer = unsafe {
-            let regs = T::regs();
-            regs.cr2().modify(|w| {
-                // DMA mode can be enabled for transmission by setting the DMAEN bit in the I2C_CR2 register.
-                w.set_itbufen(false);
-                w.set_dmaen(true);
-            });
-            // Set the I2C_DR register address in the DMA_SxPAR register. The data will be moved to this address from the memory after each TxE event.
-            let src = regs.dr().as_ptr() as *mut u8;
+        Ok(())
+    }
 
-            let ch = &mut self.rx_dma;
-            let request = ch.request();
-            Transfer::new_read(ch, request, src, buffer, Default::default())
-        };
+    async fn read_frame(&mut self, address: u8, buffer: &mut [u8], frame: FrameOptions) -> Result<(), Error>
+    where
+        RXDMA: crate::i2c::RxDma<T>,
+    {
+        if buffer.is_empty() {
+            return Err(Error::Overrun);
+        }
 
+        // Some branches below depend on whether the buffer contains only a single byte.
+        let single_byte = buffer.len() == 1;
+
+        T::regs().cr2().modify(|w| {
+            // Note: Do not enable the ITBUFEN bit in the I2C_CR2 register if DMA is used for
+            // reception.
+            w.set_itbufen(false);
+            // DMA mode can be enabled for transmission by setting the DMAEN bit in the I2C_CR2
+            // register.
+            w.set_dmaen(true);
+            // If, in the I2C_CR2 register, the LAST bit is set, I2C automatically sends a NACK
+            // after the next byte following EOT_1. The user can generate a Stop condition in
+            // the DMA Transfer Complete interrupt routine if enabled.
+            w.set_last(frame.send_nack() && !single_byte);
+        });
+
+        // Sentinel to disable transfer when an error occurs or future is canceled.
+        // TODO: Generate STOP condition on cancel?
         let on_drop = OnDrop::new(|| {
-            let regs = T::regs();
-            regs.cr2().modify(|w| {
+            T::regs().cr2().modify(|w| {
                 w.set_dmaen(false);
                 w.set_iterren(false);
                 w.set_itevten(false);
             })
         });
 
-        Self::enable_interrupts();
+        let state = T::state();
 
-        // Send a START condition and set ACK bit
-        T::regs().cr1().modify(|reg| {
-            reg.set_start(true);
-            reg.set_ack(true);
-        });
+        if frame.send_start() {
+            // Send a START condition and set ACK bit
+            T::regs().cr1().modify(|reg| {
+                reg.set_start(true);
+                reg.set_ack(true);
+            });
 
-        // Wait until START condition was generated
-        poll_fn(|cx| {
-            state.waker.register(cx.waker());
+            // Wait until START condition was generated
+            poll_fn(|cx| {
+                state.waker.register(cx.waker());
 
-            match Self::check_and_clear_error_flags() {
-                Err(e) => Poll::Ready(Err(e)),
-                Ok(sr1) => {
-                    if sr1.start() {
-                        Poll::Ready(Ok(()))
-                    } else {
-                        Poll::Pending
-                    }
-                }
-            }
-        })
-        .await?;
-
-        // Also wait until signalled we're master and everything is waiting for us
-        Self::enable_interrupts();
-        poll_fn(|cx| {
-            state.waker.register(cx.waker());
-
-            // blocking read didn’t have a check_and_clear call here, but blocking write did so
-            // I’m adding it here in case that was an oversight.
-            match Self::check_and_clear_error_flags() {
-                Err(e) => Poll::Ready(Err(e)),
-                Ok(_) => {
-                    let sr2 = T::regs().sr2().read();
-                    if !sr2.msl() && !sr2.busy() {
-                        Poll::Pending
-                    } else {
-                        Poll::Ready(Ok(()))
-                    }
-                }
-            }
-        })
-        .await?;
-
-        // Set up current address, we're trying to talk to
-        T::regs().dr().write(|reg| reg.set_dr((address << 1) + 1));
-
-        // Wait for the address to be acknowledged
-
-        Self::enable_interrupts();
-        poll_fn(|cx| {
-            state.waker.register(cx.waker());
-
-            match Self::check_and_clear_error_flags() {
-                Err(e) => Poll::Ready(Err(e)),
-                Ok(sr1) => {
-                    if sr1.addr() {
-                        // 18.3.8: When a single byte must be received: the NACK must be programmed during EV6
-                        // event, i.e. program ACK=0 when ADDR=1, before clearing ADDR flag.
-                        if buffer_len == 1 {
-                            T::regs().cr1().modify(|w| {
-                                w.set_ack(false);
-                            });
+                match Self::check_and_clear_error_flags() {
+                    Err(e) => Poll::Ready(Err(e)),
+                    Ok(sr1) => {
+                        if sr1.start() {
+                            Poll::Ready(Ok(()))
+                        } else {
+                            // When pending, (re-)enable interrupts to wake us up.
+                            Self::enable_interrupts();
+                            Poll::Pending
                         }
-                        Poll::Ready(Ok(()))
-                    } else {
-                        Poll::Pending
                     }
                 }
+            })
+            .await?;
+
+            // Check if we were the ones to generate START
+            if T::regs().cr1().read().start() || !T::regs().sr2().read().msl() {
+                return Err(Error::Arbitration);
             }
-        })
-        .await?;
 
-        // Clear ADDR condition by reading SR2
-        T::regs().sr2().read();
+            // Set up current address we're trying to talk to
+            T::regs().dr().write(|reg| reg.set_dr((address << 1) + 1));
 
-        // 18.3.8: When a single byte must be received: [snip] Then the
-        // user can program the STOP condition either after clearing ADDR flag, or in the
-        // DMA Transfer Complete interrupt routine.
-        if buffer_len == 1 {
+            // Wait for the address to be acknowledged
+            poll_fn(|cx| {
+                state.waker.register(cx.waker());
+
+                match Self::check_and_clear_error_flags() {
+                    Err(e) => Poll::Ready(Err(e)),
+                    Ok(sr1) => {
+                        if sr1.addr() {
+                            Poll::Ready(Ok(()))
+                        } else {
+                            // When pending, (re-)enable interrupts to wake us up.
+                            Self::enable_interrupts();
+                            Poll::Pending
+                        }
+                    }
+                }
+            })
+            .await?;
+
+            // 18.3.8: When a single byte must be received: the NACK must be programmed during EV6
+            // event, i.e. program ACK=0 when ADDR=1, before clearing ADDR flag.
+            if frame.send_nack() && single_byte {
+                T::regs().cr1().modify(|w| {
+                    w.set_ack(false);
+                });
+            }
+
+            // Clear condition by reading SR2
+            T::regs().sr2().read();
+        } else {
+            // Before starting reception of single byte (but without START condition, i.e. in case
+            // of continued frame), program NACK to emit at end of this byte.
+            if frame.send_nack() && single_byte {
+                T::regs().cr1().modify(|w| {
+                    w.set_ack(false);
+                });
+            }
+        }
+
+        // 18.3.8: When a single byte must be received: [snip] Then the user can program the STOP
+        // condition either after clearing ADDR flag, or in the DMA Transfer Complete interrupt
+        // routine.
+        if frame.send_stop() && single_byte {
             T::regs().cr1().modify(|w| {
                 w.set_stop(true);
             });
-        } else {
-            // If, in the I2C_CR2 register, the LAST bit is set, I2C
-            // automatically sends a NACK after the next byte following EOT_1. The user can
-            // generate a Stop condition in the DMA Transfer Complete interrupt routine if enabled.
-            T::regs().cr2().modify(|w| {
-                w.set_last(true);
-            })
         }
 
+        let dma_transfer = unsafe {
+            // Set the I2C_DR register address in the DMA_SxPAR register. The data will be moved
+            // from this address from the memory after each RxE event.
+            let src = T::regs().dr().as_ptr() as *mut u8;
+
+            let ch = &mut self.rx_dma;
+            let request = ch.request();
+            Transfer::new_read(ch, request, src, buffer, Default::default())
+        };
+
         // Wait for bytes to be received, or an error to occur.
-        Self::enable_interrupts();
         let poll_error = poll_fn(|cx| {
             state.waker.register(cx.waker());
 
             match Self::check_and_clear_error_flags() {
-                Err(e) => Poll::Ready(Err::<T, Error>(e)),
-                _ => Poll::Pending,
+                Err(e) => Poll::Ready(Err::<(), Error>(e)),
+                _ => {
+                    // When pending, (re-)enable interrupts to wake us up.
+                    Self::enable_interrupts();
+                    Poll::Pending
+                }
             }
         });
 
@@ -777,18 +647,16 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
             _ => Ok(()),
         }?;
 
-        // Wait for the STOP to be sent (STOP bit cleared).
-        Self::enable_interrupts();
-        poll_fn(|cx| {
-            state.waker.register(cx.waker());
-            // TODO: error interrupts are enabled here, should we additional check for and return errors?
-            if T::regs().cr1().read().stop() {
-                Poll::Pending
-            } else {
-                Poll::Ready(Ok(()))
-            }
-        })
-        .await?;
+        T::regs().cr2().modify(|w| {
+            w.set_dmaen(false);
+        });
+
+        if frame.send_stop() && !single_byte {
+            T::regs().cr1().modify(|w| {
+                w.set_stop(true);
+            });
+        }
+
         drop(on_drop);
 
         // Fallthrough is success
@@ -801,8 +669,34 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         RXDMA: crate::i2c::RxDma<T>,
         TXDMA: crate::i2c::TxDma<T>,
     {
-        self.write_with_stop(address, write, false).await?;
-        self.read(address, read).await
+        // Check empty read buffer before starting transaction. Otherwise, we would not generate the
+        // stop condition below.
+        if read.is_empty() {
+            return Err(Error::Overrun);
+        }
+
+        self.write_frame(address, write, FrameOptions::FirstFrame).await?;
+        self.read_frame(address, read, FrameOptions::FirstAndLastFrame).await
+    }
+
+    /// Transaction with operations.
+    ///
+    /// Consecutive operations of same type are merged. See [transaction contract] for details.
+    ///
+    /// [transaction contract]: embedded_hal_1::i2c::I2c::transaction
+    pub async fn transaction(&mut self, addr: u8, operations: &mut [Operation<'_>]) -> Result<(), Error>
+    where
+        RXDMA: crate::i2c::RxDma<T>,
+        TXDMA: crate::i2c::TxDma<T>,
+    {
+        for (op, frame) in operation_frames(operations)? {
+            match op {
+                Operation::Read(read) => self.read_frame(addr, read, frame).await?,
+                Operation::Write(write) => self.write_frame(addr, write, frame).await?,
+            }
+        }
+
+        Ok(())
     }
 }
 
diff --git a/embassy-stm32/src/i2c/v2.rs b/embassy-stm32/src/i2c/v2.rs
index 8baf2849d..da3b0ee30 100644
--- a/embassy-stm32/src/i2c/v2.rs
+++ b/embassy-stm32/src/i2c/v2.rs
@@ -557,6 +557,21 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         Ok(())
     }
 
+    /// Transaction with operations.
+    ///
+    /// Consecutive operations of same type are merged. See [transaction contract] for details.
+    ///
+    /// [transaction contract]: embedded_hal_1::i2c::I2c::transaction
+    pub async fn transaction(&mut self, addr: u8, operations: &mut [Operation<'_>]) -> Result<(), Error>
+    where
+        RXDMA: crate::i2c::RxDma<T>,
+        TXDMA: crate::i2c::TxDma<T>,
+    {
+        let _ = addr;
+        let _ = operations;
+        todo!()
+    }
+
     // =========================
     //  Blocking public API
 
diff --git a/embassy-stm32/src/lib.rs b/embassy-stm32/src/lib.rs
index 8b826e5ac..ea17f8477 100644
--- a/embassy-stm32/src/lib.rs
+++ b/embassy-stm32/src/lib.rs
@@ -32,6 +32,9 @@ pub mod timer;
 pub mod adc;
 #[cfg(can)]
 pub mod can;
+// FIXME: Cordic driver cause stm32u5a5zj crash
+#[cfg(all(cordic, not(any(stm32u5a5, stm32u5a9))))]
+pub mod cordic;
 #[cfg(crc)]
 pub mod crc;
 #[cfg(cryp)]
@@ -61,6 +64,8 @@ pub mod ipcc;
 pub mod low_power;
 #[cfg(opamp)]
 pub mod opamp;
+#[cfg(octospi)]
+pub mod ospi;
 #[cfg(quadspi)]
 pub mod qspi;
 #[cfg(rng)]
@@ -244,7 +249,7 @@ pub fn init(config: Config) -> Peripherals {
 
         #[cfg(dbgmcu)]
         crate::pac::DBGMCU.cr().modify(|cr| {
-            #[cfg(any(dbgmcu_h5))]
+            #[cfg(dbgmcu_h5)]
             {
                 cr.set_stop(config.enable_debug_during_sleep);
                 cr.set_standby(config.enable_debug_during_sleep);
diff --git a/embassy-stm32/src/ospi/enums.rs b/embassy-stm32/src/ospi/enums.rs
new file mode 100644
index 000000000..4021f7ce3
--- /dev/null
+++ b/embassy-stm32/src/ospi/enums.rs
@@ -0,0 +1,386 @@
+//! Enums used in Ospi configuration.
+
+#[allow(dead_code)]
+#[derive(Copy, Clone)]
+pub(crate) enum OspiMode {
+    IndirectWrite,
+    IndirectRead,
+    AutoPolling,
+    MemoryMapped,
+}
+
+impl Into<u8> for OspiMode {
+    fn into(self) -> u8 {
+        match self {
+            OspiMode::IndirectWrite => 0b00,
+            OspiMode::IndirectRead => 0b01,
+            OspiMode::AutoPolling => 0b10,
+            OspiMode::MemoryMapped => 0b11,
+        }
+    }
+}
+
+/// Ospi lane width
+#[allow(dead_code)]
+#[derive(Copy, Clone)]
+pub enum OspiWidth {
+    /// None
+    NONE,
+    /// Single lane
+    SING,
+    /// Dual lanes
+    DUAL,
+    /// Quad lanes
+    QUAD,
+    /// Eight lanes
+    OCTO,
+}
+
+impl Into<u8> for OspiWidth {
+    fn into(self) -> u8 {
+        match self {
+            OspiWidth::NONE => 0b00,
+            OspiWidth::SING => 0b01,
+            OspiWidth::DUAL => 0b10,
+            OspiWidth::QUAD => 0b11,
+            OspiWidth::OCTO => 0b100,
+        }
+    }
+}
+
+/// Flash bank selection
+#[allow(dead_code)]
+#[derive(Copy, Clone)]
+pub enum FlashSelection {
+    /// Bank 1
+    Flash1,
+    /// Bank 2
+    Flash2,
+}
+
+impl Into<bool> for FlashSelection {
+    fn into(self) -> bool {
+        match self {
+            FlashSelection::Flash1 => false,
+            FlashSelection::Flash2 => true,
+        }
+    }
+}
+
+/// Wrap Size
+#[allow(dead_code)]
+#[allow(missing_docs)]
+#[derive(Copy, Clone)]
+pub enum WrapSize {
+    None,
+    _16Bytes,
+    _32Bytes,
+    _64Bytes,
+    _128Bytes,
+}
+
+impl Into<u8> for WrapSize {
+    fn into(self) -> u8 {
+        match self {
+            WrapSize::None => 0x00,
+            WrapSize::_16Bytes => 0x02,
+            WrapSize::_32Bytes => 0x03,
+            WrapSize::_64Bytes => 0x04,
+            WrapSize::_128Bytes => 0x05,
+        }
+    }
+}
+
+/// Memory Type
+#[allow(missing_docs)]
+#[allow(dead_code)]
+#[derive(Copy, Clone)]
+pub enum MemoryType {
+    Micron,
+    Macronix,
+    Standard,
+    MacronixRam,
+    HyperBusMemory,
+    HyperBusRegister,
+}
+
+impl Into<u8> for MemoryType {
+    fn into(self) -> u8 {
+        match self {
+            MemoryType::Micron => 0x00,
+            MemoryType::Macronix => 0x01,
+            MemoryType::Standard => 0x02,
+            MemoryType::MacronixRam => 0x03,
+            MemoryType::HyperBusMemory => 0x04,
+            MemoryType::HyperBusRegister => 0x04,
+        }
+    }
+}
+
+/// Ospi memory size.
+#[allow(missing_docs)]
+#[derive(Copy, Clone)]
+pub enum MemorySize {
+    _1KiB,
+    _2KiB,
+    _4KiB,
+    _8KiB,
+    _16KiB,
+    _32KiB,
+    _64KiB,
+    _128KiB,
+    _256KiB,
+    _512KiB,
+    _1MiB,
+    _2MiB,
+    _4MiB,
+    _8MiB,
+    _16MiB,
+    _32MiB,
+    _64MiB,
+    _128MiB,
+    _256MiB,
+    _512MiB,
+    _1GiB,
+    _2GiB,
+    _4GiB,
+    Other(u8),
+}
+
+impl Into<u8> for MemorySize {
+    fn into(self) -> u8 {
+        match self {
+            MemorySize::_1KiB => 9,
+            MemorySize::_2KiB => 10,
+            MemorySize::_4KiB => 11,
+            MemorySize::_8KiB => 12,
+            MemorySize::_16KiB => 13,
+            MemorySize::_32KiB => 14,
+            MemorySize::_64KiB => 15,
+            MemorySize::_128KiB => 16,
+            MemorySize::_256KiB => 17,
+            MemorySize::_512KiB => 18,
+            MemorySize::_1MiB => 19,
+            MemorySize::_2MiB => 20,
+            MemorySize::_4MiB => 21,
+            MemorySize::_8MiB => 22,
+            MemorySize::_16MiB => 23,
+            MemorySize::_32MiB => 24,
+            MemorySize::_64MiB => 25,
+            MemorySize::_128MiB => 26,
+            MemorySize::_256MiB => 27,
+            MemorySize::_512MiB => 28,
+            MemorySize::_1GiB => 29,
+            MemorySize::_2GiB => 30,
+            MemorySize::_4GiB => 31,
+            MemorySize::Other(val) => val,
+        }
+    }
+}
+
+/// Ospi Address size
+#[derive(Copy, Clone)]
+pub enum AddressSize {
+    /// 8-bit address
+    _8Bit,
+    /// 16-bit address
+    _16Bit,
+    /// 24-bit address
+    _24bit,
+    /// 32-bit address
+    _32bit,
+}
+
+impl Into<u8> for AddressSize {
+    fn into(self) -> u8 {
+        match self {
+            AddressSize::_8Bit => 0b00,
+            AddressSize::_16Bit => 0b01,
+            AddressSize::_24bit => 0b10,
+            AddressSize::_32bit => 0b11,
+        }
+    }
+}
+
+/// Time the Chip Select line stays high.
+#[allow(missing_docs)]
+#[derive(Copy, Clone)]
+pub enum ChipSelectHighTime {
+    _1Cycle,
+    _2Cycle,
+    _3Cycle,
+    _4Cycle,
+    _5Cycle,
+    _6Cycle,
+    _7Cycle,
+    _8Cycle,
+}
+
+impl Into<u8> for ChipSelectHighTime {
+    fn into(self) -> u8 {
+        match self {
+            ChipSelectHighTime::_1Cycle => 0,
+            ChipSelectHighTime::_2Cycle => 1,
+            ChipSelectHighTime::_3Cycle => 2,
+            ChipSelectHighTime::_4Cycle => 3,
+            ChipSelectHighTime::_5Cycle => 4,
+            ChipSelectHighTime::_6Cycle => 5,
+            ChipSelectHighTime::_7Cycle => 6,
+            ChipSelectHighTime::_8Cycle => 7,
+        }
+    }
+}
+
+/// FIFO threshold.
+#[allow(missing_docs)]
+#[derive(Copy, Clone)]
+pub enum FIFOThresholdLevel {
+    _1Bytes,
+    _2Bytes,
+    _3Bytes,
+    _4Bytes,
+    _5Bytes,
+    _6Bytes,
+    _7Bytes,
+    _8Bytes,
+    _9Bytes,
+    _10Bytes,
+    _11Bytes,
+    _12Bytes,
+    _13Bytes,
+    _14Bytes,
+    _15Bytes,
+    _16Bytes,
+    _17Bytes,
+    _18Bytes,
+    _19Bytes,
+    _20Bytes,
+    _21Bytes,
+    _22Bytes,
+    _23Bytes,
+    _24Bytes,
+    _25Bytes,
+    _26Bytes,
+    _27Bytes,
+    _28Bytes,
+    _29Bytes,
+    _30Bytes,
+    _31Bytes,
+    _32Bytes,
+}
+
+impl Into<u8> for FIFOThresholdLevel {
+    fn into(self) -> u8 {
+        match self {
+            FIFOThresholdLevel::_1Bytes => 0,
+            FIFOThresholdLevel::_2Bytes => 1,
+            FIFOThresholdLevel::_3Bytes => 2,
+            FIFOThresholdLevel::_4Bytes => 3,
+            FIFOThresholdLevel::_5Bytes => 4,
+            FIFOThresholdLevel::_6Bytes => 5,
+            FIFOThresholdLevel::_7Bytes => 6,
+            FIFOThresholdLevel::_8Bytes => 7,
+            FIFOThresholdLevel::_9Bytes => 8,
+            FIFOThresholdLevel::_10Bytes => 9,
+            FIFOThresholdLevel::_11Bytes => 10,
+            FIFOThresholdLevel::_12Bytes => 11,
+            FIFOThresholdLevel::_13Bytes => 12,
+            FIFOThresholdLevel::_14Bytes => 13,
+            FIFOThresholdLevel::_15Bytes => 14,
+            FIFOThresholdLevel::_16Bytes => 15,
+            FIFOThresholdLevel::_17Bytes => 16,
+            FIFOThresholdLevel::_18Bytes => 17,
+            FIFOThresholdLevel::_19Bytes => 18,
+            FIFOThresholdLevel::_20Bytes => 19,
+            FIFOThresholdLevel::_21Bytes => 20,
+            FIFOThresholdLevel::_22Bytes => 21,
+            FIFOThresholdLevel::_23Bytes => 22,
+            FIFOThresholdLevel::_24Bytes => 23,
+            FIFOThresholdLevel::_25Bytes => 24,
+            FIFOThresholdLevel::_26Bytes => 25,
+            FIFOThresholdLevel::_27Bytes => 26,
+            FIFOThresholdLevel::_28Bytes => 27,
+            FIFOThresholdLevel::_29Bytes => 28,
+            FIFOThresholdLevel::_30Bytes => 29,
+            FIFOThresholdLevel::_31Bytes => 30,
+            FIFOThresholdLevel::_32Bytes => 31,
+        }
+    }
+}
+
+/// Dummy cycle count
+#[allow(missing_docs)]
+#[derive(Copy, Clone)]
+pub enum DummyCycles {
+    _0,
+    _1,
+    _2,
+    _3,
+    _4,
+    _5,
+    _6,
+    _7,
+    _8,
+    _9,
+    _10,
+    _11,
+    _12,
+    _13,
+    _14,
+    _15,
+    _16,
+    _17,
+    _18,
+    _19,
+    _20,
+    _21,
+    _22,
+    _23,
+    _24,
+    _25,
+    _26,
+    _27,
+    _28,
+    _29,
+    _30,
+    _31,
+}
+
+impl Into<u8> for DummyCycles {
+    fn into(self) -> u8 {
+        match self {
+            DummyCycles::_0 => 0,
+            DummyCycles::_1 => 1,
+            DummyCycles::_2 => 2,
+            DummyCycles::_3 => 3,
+            DummyCycles::_4 => 4,
+            DummyCycles::_5 => 5,
+            DummyCycles::_6 => 6,
+            DummyCycles::_7 => 7,
+            DummyCycles::_8 => 8,
+            DummyCycles::_9 => 9,
+            DummyCycles::_10 => 10,
+            DummyCycles::_11 => 11,
+            DummyCycles::_12 => 12,
+            DummyCycles::_13 => 13,
+            DummyCycles::_14 => 14,
+            DummyCycles::_15 => 15,
+            DummyCycles::_16 => 16,
+            DummyCycles::_17 => 17,
+            DummyCycles::_18 => 18,
+            DummyCycles::_19 => 19,
+            DummyCycles::_20 => 20,
+            DummyCycles::_21 => 21,
+            DummyCycles::_22 => 22,
+            DummyCycles::_23 => 23,
+            DummyCycles::_24 => 24,
+            DummyCycles::_25 => 25,
+            DummyCycles::_26 => 26,
+            DummyCycles::_27 => 27,
+            DummyCycles::_28 => 28,
+            DummyCycles::_29 => 29,
+            DummyCycles::_30 => 30,
+            DummyCycles::_31 => 31,
+        }
+    }
+}
diff --git a/embassy-stm32/src/ospi/mod.rs b/embassy-stm32/src/ospi/mod.rs
new file mode 100644
index 000000000..398c3298f
--- /dev/null
+++ b/embassy-stm32/src/ospi/mod.rs
@@ -0,0 +1,1050 @@
+//! OCTOSPI Serial Peripheral Interface
+//!
+
+#![macro_use]
+
+pub mod enums;
+
+use embassy_embedded_hal::{GetConfig, SetConfig};
+use embassy_hal_internal::{into_ref, PeripheralRef};
+pub use enums::*;
+use stm32_metapac::octospi::vals::{PhaseMode, SizeInBits};
+
+use crate::dma::{word, Transfer};
+use crate::gpio::{AFType, AnyPin, Pull, SealedPin as _};
+use crate::pac::octospi::{vals, Octospi as Regs};
+use crate::rcc::RccPeripheral;
+use crate::{peripherals, Peripheral};
+
+/// OPSI driver config.
+#[derive(Clone, Copy)]
+pub struct Config {
+    /// Fifo threshold used by the peripheral to generate the interrupt indicating data
+    /// or space is available in the FIFO
+    pub fifo_threshold: FIFOThresholdLevel,
+    /// Indicates the type of external device connected
+    pub memory_type: MemoryType, // Need to add an additional enum to provide this public interface
+    /// Defines the size of the external device connected to the OSPI corresponding
+    /// to the number of address bits required to access the device
+    pub device_size: MemorySize,
+    /// Sets the minimum number of clock cycles that the chip select signal must be held high
+    /// between commands
+    pub chip_select_high_time: ChipSelectHighTime,
+    /// Enables the free running clock
+    pub free_running_clock: bool,
+    /// Sets the clock level when the device is not selected
+    pub clock_mode: bool,
+    /// Indicates the wrap size corresponding to the external device configuration
+    pub wrap_size: WrapSize,
+    /// Specified the prescaler factor used for generating the external clock based
+    /// on the AHB clock
+    pub clock_prescaler: u8,
+    /// Allows the delay of 1/2 cycle the data sampling to account for external
+    /// signal delays
+    pub sample_shifting: bool,
+    /// Allows hold to 1/4 cycle the data
+    pub delay_hold_quarter_cycle: bool,
+    /// Enables the transaction boundary feature and defines the boundary to release
+    /// the chip select
+    pub chip_select_boundary: u8,
+    /// Enbales the delay block bypass so the sampling is not affected by the delay block
+    pub delay_block_bypass: bool,
+    /// Enables communication regulation feature. Chip select is released when the other
+    /// OctoSpi requests access to the bus
+    pub max_transfer: u8,
+    /// Enables the refresh feature, chip select is released every refresh + 1 clock cycles
+    pub refresh: u32,
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            fifo_threshold: FIFOThresholdLevel::_16Bytes, // 32 bytes FIFO, half capacity
+            memory_type: MemoryType::Micron,
+            device_size: MemorySize::Other(0),
+            chip_select_high_time: ChipSelectHighTime::_5Cycle,
+            free_running_clock: false,
+            clock_mode: false,
+            wrap_size: WrapSize::None,
+            clock_prescaler: 0,
+            sample_shifting: false,
+            delay_hold_quarter_cycle: false,
+            chip_select_boundary: 0, // Acceptable range 0 to 31
+            delay_block_bypass: true,
+            max_transfer: 0,
+            refresh: 0,
+        }
+    }
+}
+
+/// OSPI transfer configuration.
+pub struct TransferConfig {
+    /// Instruction width (IMODE)
+    pub iwidth: OspiWidth,
+    /// Instruction Id
+    pub instruction: Option<u32>,
+    /// Number of Instruction Bytes
+    pub isize: AddressSize,
+    /// Instruction Double Transfer rate enable
+    pub idtr: bool,
+
+    /// Address width (ADMODE)
+    pub adwidth: OspiWidth,
+    /// Device memory address
+    pub address: Option<u32>,
+    /// Number of Address Bytes
+    pub adsize: AddressSize,
+    /// Address Double Transfer rate enable
+    pub addtr: bool,
+
+    /// Alternate bytes width (ABMODE)
+    pub abwidth: OspiWidth,
+    /// Alternate Bytes
+    pub alternate_bytes: Option<u32>,
+    /// Number of Alternate Bytes
+    pub absize: AddressSize,
+    /// Alternate Bytes Double Transfer rate enable
+    pub abdtr: bool,
+
+    /// Data width (DMODE)
+    pub dwidth: OspiWidth,
+    /// Data buffer
+    pub ddtr: bool,
+
+    /// Number of dummy cycles (DCYC)
+    pub dummy: DummyCycles,
+}
+
+impl Default for TransferConfig {
+    fn default() -> Self {
+        Self {
+            iwidth: OspiWidth::NONE,
+            instruction: None,
+            isize: AddressSize::_8Bit,
+            idtr: false,
+
+            adwidth: OspiWidth::NONE,
+            address: None,
+            adsize: AddressSize::_8Bit,
+            addtr: false,
+
+            abwidth: OspiWidth::NONE,
+            alternate_bytes: None,
+            absize: AddressSize::_8Bit,
+            abdtr: false,
+
+            dwidth: OspiWidth::NONE,
+            ddtr: false,
+
+            dummy: DummyCycles::_0,
+        }
+    }
+}
+
+/// Error used for Octospi implementation
+#[derive(Debug)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum OspiError {
+    /// Peripheral configuration is invalid
+    InvalidConfiguration,
+    /// Operation configuration is invalid
+    InvalidCommand,
+    /// Size zero buffer passed to instruction
+    EmptyBuffer,
+}
+
+/// OSPI driver.
+pub struct Ospi<'d, T: Instance, Dma> {
+    _peri: PeripheralRef<'d, T>,
+    sck: Option<PeripheralRef<'d, AnyPin>>,
+    d0: Option<PeripheralRef<'d, AnyPin>>,
+    d1: Option<PeripheralRef<'d, AnyPin>>,
+    d2: Option<PeripheralRef<'d, AnyPin>>,
+    d3: Option<PeripheralRef<'d, AnyPin>>,
+    d4: Option<PeripheralRef<'d, AnyPin>>,
+    d5: Option<PeripheralRef<'d, AnyPin>>,
+    d6: Option<PeripheralRef<'d, AnyPin>>,
+    d7: Option<PeripheralRef<'d, AnyPin>>,
+    nss: Option<PeripheralRef<'d, AnyPin>>,
+    dqs: Option<PeripheralRef<'d, AnyPin>>,
+    dma: PeripheralRef<'d, Dma>,
+    config: Config,
+    width: OspiWidth,
+}
+
+impl<'d, T: Instance, Dma> Ospi<'d, T, Dma> {
+    /// Create new OSPI driver for a single spi external chip
+    pub fn new_singlespi(
+        peri: impl Peripheral<P = T> + 'd,
+        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
+        d0: impl Peripheral<P = impl D0Pin<T>> + 'd,
+        d1: impl Peripheral<P = impl D1Pin<T>> + 'd,
+        nss: impl Peripheral<P = impl NSSPin<T>> + 'd,
+        dma: impl Peripheral<P = Dma> + 'd,
+        config: Config,
+    ) -> Self {
+        into_ref!(peri, sck, d0, d1, nss);
+
+        sck.set_as_af_pull(sck.af_num(), AFType::OutputPushPull, Pull::None);
+        sck.set_speed(crate::gpio::Speed::VeryHigh);
+        nss.set_as_af_pull(nss.af_num(), AFType::OutputPushPull, Pull::Up);
+        nss.set_speed(crate::gpio::Speed::VeryHigh);
+        d0.set_as_af_pull(d0.af_num(), AFType::OutputPushPull, Pull::None);
+        d0.set_speed(crate::gpio::Speed::VeryHigh);
+        d1.set_as_af_pull(d1.af_num(), AFType::Input, Pull::None);
+        d1.set_speed(crate::gpio::Speed::VeryHigh);
+
+        Self::new_inner(
+            peri,
+            Some(d0.map_into()),
+            Some(d1.map_into()),
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            Some(sck.map_into()),
+            Some(nss.map_into()),
+            None,
+            dma,
+            config,
+            OspiWidth::SING,
+            false,
+        )
+    }
+
+    /// Create new OSPI driver for a dualspi external chip
+    pub fn new_dualspi(
+        peri: impl Peripheral<P = T> + 'd,
+        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
+        d0: impl Peripheral<P = impl D0Pin<T>> + 'd,
+        d1: impl Peripheral<P = impl D1Pin<T>> + 'd,
+        nss: impl Peripheral<P = impl NSSPin<T>> + 'd,
+        dma: impl Peripheral<P = Dma> + 'd,
+        config: Config,
+    ) -> Self {
+        into_ref!(peri, sck, d0, d1, nss);
+
+        sck.set_as_af_pull(sck.af_num(), AFType::OutputPushPull, Pull::None);
+        sck.set_speed(crate::gpio::Speed::VeryHigh);
+        nss.set_as_af_pull(nss.af_num(), AFType::OutputPushPull, Pull::Up);
+        nss.set_speed(crate::gpio::Speed::VeryHigh);
+        d0.set_as_af_pull(d0.af_num(), AFType::OutputPushPull, Pull::None);
+        d0.set_speed(crate::gpio::Speed::VeryHigh);
+        d1.set_as_af_pull(d1.af_num(), AFType::OutputPushPull, Pull::None);
+        d1.set_speed(crate::gpio::Speed::VeryHigh);
+
+        Self::new_inner(
+            peri,
+            Some(d0.map_into()),
+            Some(d1.map_into()),
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            Some(sck.map_into()),
+            Some(nss.map_into()),
+            None,
+            dma,
+            config,
+            OspiWidth::DUAL,
+            false,
+        )
+    }
+
+    /// Create new OSPI driver for a quadspi external chip
+    pub fn new_quadspi(
+        peri: impl Peripheral<P = T> + 'd,
+        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
+        d0: impl Peripheral<P = impl D0Pin<T>> + 'd,
+        d1: impl Peripheral<P = impl D1Pin<T>> + 'd,
+        d2: impl Peripheral<P = impl D2Pin<T>> + 'd,
+        d3: impl Peripheral<P = impl D3Pin<T>> + 'd,
+        nss: impl Peripheral<P = impl NSSPin<T>> + 'd,
+        dma: impl Peripheral<P = Dma> + 'd,
+        config: Config,
+    ) -> Self {
+        into_ref!(peri, sck, d0, d1, d2, d3, nss);
+
+        sck.set_as_af_pull(sck.af_num(), AFType::OutputPushPull, Pull::None);
+        sck.set_speed(crate::gpio::Speed::VeryHigh);
+        nss.set_as_af_pull(nss.af_num(), AFType::OutputPushPull, Pull::Up);
+        nss.set_speed(crate::gpio::Speed::VeryHigh);
+        d0.set_as_af_pull(d0.af_num(), AFType::OutputPushPull, Pull::None);
+        d0.set_speed(crate::gpio::Speed::VeryHigh);
+        d1.set_as_af_pull(d1.af_num(), AFType::OutputPushPull, Pull::None);
+        d1.set_speed(crate::gpio::Speed::VeryHigh);
+        d2.set_as_af_pull(d2.af_num(), AFType::OutputPushPull, Pull::None);
+        d2.set_speed(crate::gpio::Speed::VeryHigh);
+        d3.set_as_af_pull(d3.af_num(), AFType::OutputPushPull, Pull::None);
+        d3.set_speed(crate::gpio::Speed::VeryHigh);
+
+        Self::new_inner(
+            peri,
+            Some(d0.map_into()),
+            Some(d1.map_into()),
+            Some(d2.map_into()),
+            Some(d3.map_into()),
+            None,
+            None,
+            None,
+            None,
+            Some(sck.map_into()),
+            Some(nss.map_into()),
+            None,
+            dma,
+            config,
+            OspiWidth::QUAD,
+            false,
+        )
+    }
+
+    /// Create new OSPI driver for two quadspi external chips
+    pub fn new_dualquadspi(
+        peri: impl Peripheral<P = T> + 'd,
+        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
+        d0: impl Peripheral<P = impl D0Pin<T>> + 'd,
+        d1: impl Peripheral<P = impl D1Pin<T>> + 'd,
+        d2: impl Peripheral<P = impl D2Pin<T>> + 'd,
+        d3: impl Peripheral<P = impl D3Pin<T>> + 'd,
+        d4: impl Peripheral<P = impl D4Pin<T>> + 'd,
+        d5: impl Peripheral<P = impl D5Pin<T>> + 'd,
+        d6: impl Peripheral<P = impl D6Pin<T>> + 'd,
+        d7: impl Peripheral<P = impl D7Pin<T>> + 'd,
+        nss: impl Peripheral<P = impl NSSPin<T>> + 'd,
+        dma: impl Peripheral<P = Dma> + 'd,
+        config: Config,
+    ) -> Self {
+        into_ref!(peri, sck, d0, d1, d2, d3, d4, d5, d6, d7, nss);
+
+        sck.set_as_af_pull(sck.af_num(), AFType::OutputPushPull, Pull::None);
+        sck.set_speed(crate::gpio::Speed::VeryHigh);
+        nss.set_as_af_pull(nss.af_num(), AFType::OutputPushPull, Pull::Up);
+        nss.set_speed(crate::gpio::Speed::VeryHigh);
+        d0.set_as_af_pull(d0.af_num(), AFType::OutputPushPull, Pull::None);
+        d0.set_speed(crate::gpio::Speed::VeryHigh);
+        d1.set_as_af_pull(d1.af_num(), AFType::OutputPushPull, Pull::None);
+        d1.set_speed(crate::gpio::Speed::VeryHigh);
+        d2.set_as_af_pull(d2.af_num(), AFType::OutputPushPull, Pull::None);
+        d2.set_speed(crate::gpio::Speed::VeryHigh);
+        d3.set_as_af_pull(d3.af_num(), AFType::OutputPushPull, Pull::None);
+        d3.set_speed(crate::gpio::Speed::VeryHigh);
+        d4.set_as_af_pull(d4.af_num(), AFType::OutputPushPull, Pull::None);
+        d4.set_speed(crate::gpio::Speed::VeryHigh);
+        d5.set_as_af_pull(d5.af_num(), AFType::OutputPushPull, Pull::None);
+        d5.set_speed(crate::gpio::Speed::VeryHigh);
+        d6.set_as_af_pull(d6.af_num(), AFType::OutputPushPull, Pull::None);
+        d6.set_speed(crate::gpio::Speed::VeryHigh);
+        d7.set_as_af_pull(d7.af_num(), AFType::OutputPushPull, Pull::None);
+        d7.set_speed(crate::gpio::Speed::VeryHigh);
+
+        Self::new_inner(
+            peri,
+            Some(d0.map_into()),
+            Some(d1.map_into()),
+            Some(d2.map_into()),
+            Some(d3.map_into()),
+            Some(d4.map_into()),
+            Some(d5.map_into()),
+            Some(d6.map_into()),
+            Some(d7.map_into()),
+            Some(sck.map_into()),
+            Some(nss.map_into()),
+            None,
+            dma,
+            config,
+            OspiWidth::QUAD,
+            true,
+        )
+    }
+
+    /// Create new OSPI driver for octospi external chips
+    pub fn new_octospi(
+        peri: impl Peripheral<P = T> + 'd,
+        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
+        d0: impl Peripheral<P = impl D0Pin<T>> + 'd,
+        d1: impl Peripheral<P = impl D1Pin<T>> + 'd,
+        d2: impl Peripheral<P = impl D2Pin<T>> + 'd,
+        d3: impl Peripheral<P = impl D3Pin<T>> + 'd,
+        d4: impl Peripheral<P = impl D4Pin<T>> + 'd,
+        d5: impl Peripheral<P = impl D5Pin<T>> + 'd,
+        d6: impl Peripheral<P = impl D6Pin<T>> + 'd,
+        d7: impl Peripheral<P = impl D7Pin<T>> + 'd,
+        nss: impl Peripheral<P = impl NSSPin<T>> + 'd,
+        dma: impl Peripheral<P = Dma> + 'd,
+        config: Config,
+    ) -> Self {
+        into_ref!(peri, sck, d0, d1, d2, d3, d4, d5, d6, d7, nss);
+
+        sck.set_as_af_pull(sck.af_num(), AFType::OutputPushPull, Pull::None);
+        sck.set_speed(crate::gpio::Speed::VeryHigh);
+        nss.set_as_af_pull(nss.af_num(), AFType::OutputPushPull, Pull::Up);
+        nss.set_speed(crate::gpio::Speed::VeryHigh);
+        d0.set_as_af_pull(d0.af_num(), AFType::OutputPushPull, Pull::None);
+        d0.set_speed(crate::gpio::Speed::VeryHigh);
+        d1.set_as_af_pull(d1.af_num(), AFType::OutputPushPull, Pull::None);
+        d1.set_speed(crate::gpio::Speed::VeryHigh);
+        d2.set_as_af_pull(d2.af_num(), AFType::OutputPushPull, Pull::None);
+        d2.set_speed(crate::gpio::Speed::VeryHigh);
+        d3.set_as_af_pull(d3.af_num(), AFType::OutputPushPull, Pull::None);
+        d3.set_speed(crate::gpio::Speed::VeryHigh);
+        d4.set_as_af_pull(d4.af_num(), AFType::OutputPushPull, Pull::None);
+        d4.set_speed(crate::gpio::Speed::VeryHigh);
+        d5.set_as_af_pull(d5.af_num(), AFType::OutputPushPull, Pull::None);
+        d5.set_speed(crate::gpio::Speed::VeryHigh);
+        d6.set_as_af_pull(d6.af_num(), AFType::OutputPushPull, Pull::None);
+        d6.set_speed(crate::gpio::Speed::VeryHigh);
+        d7.set_as_af_pull(d7.af_num(), AFType::OutputPushPull, Pull::None);
+        d7.set_speed(crate::gpio::Speed::VeryHigh);
+
+        Self::new_inner(
+            peri,
+            Some(d0.map_into()),
+            Some(d1.map_into()),
+            Some(d2.map_into()),
+            Some(d3.map_into()),
+            Some(d4.map_into()),
+            Some(d5.map_into()),
+            Some(d6.map_into()),
+            Some(d7.map_into()),
+            Some(sck.map_into()),
+            Some(nss.map_into()),
+            None,
+            dma,
+            config,
+            OspiWidth::OCTO,
+            false,
+        )
+    }
+
+    fn new_inner(
+        peri: impl Peripheral<P = T> + 'd,
+        d0: Option<PeripheralRef<'d, AnyPin>>,
+        d1: Option<PeripheralRef<'d, AnyPin>>,
+        d2: Option<PeripheralRef<'d, AnyPin>>,
+        d3: Option<PeripheralRef<'d, AnyPin>>,
+        d4: Option<PeripheralRef<'d, AnyPin>>,
+        d5: Option<PeripheralRef<'d, AnyPin>>,
+        d6: Option<PeripheralRef<'d, AnyPin>>,
+        d7: Option<PeripheralRef<'d, AnyPin>>,
+        sck: Option<PeripheralRef<'d, AnyPin>>,
+        nss: Option<PeripheralRef<'d, AnyPin>>,
+        dqs: Option<PeripheralRef<'d, AnyPin>>,
+        dma: impl Peripheral<P = Dma> + 'd,
+        config: Config,
+        width: OspiWidth,
+        dual_quad: bool,
+    ) -> Self {
+        into_ref!(peri, dma);
+
+        // System configuration
+        T::enable_and_reset();
+        while T::REGS.sr().read().busy() {}
+
+        // Device configuration
+        T::REGS.dcr1().modify(|w| {
+            w.set_devsize(config.device_size.into());
+            w.set_mtyp(vals::MemType::from_bits(config.memory_type.into()));
+            w.set_csht(config.chip_select_high_time.into());
+            w.set_dlybyp(config.delay_block_bypass);
+            w.set_frck(false);
+            w.set_ckmode(config.clock_mode);
+        });
+
+        T::REGS.dcr2().modify(|w| {
+            w.set_wrapsize(config.wrap_size.into());
+        });
+
+        T::REGS.dcr3().modify(|w| {
+            w.set_csbound(config.chip_select_boundary);
+            #[cfg(octospi_v1)]
+            {
+                w.set_maxtran(config.max_transfer);
+            }
+        });
+
+        T::REGS.dcr4().modify(|w| {
+            w.set_refresh(config.refresh);
+        });
+
+        T::REGS.cr().modify(|w| {
+            w.set_fthres(vals::Threshold(config.fifo_threshold.into()));
+        });
+
+        // Wait for busy flag to clear
+        while T::REGS.sr().read().busy() {}
+
+        T::REGS.dcr2().modify(|w| {
+            w.set_prescaler(config.clock_prescaler);
+        });
+
+        T::REGS.cr().modify(|w| {
+            w.set_dmm(dual_quad);
+        });
+
+        T::REGS.tcr().modify(|w| {
+            w.set_sshift(match config.sample_shifting {
+                true => vals::SampleShift::HALFCYCLE,
+                false => vals::SampleShift::NONE,
+            });
+            w.set_dhqc(config.delay_hold_quarter_cycle);
+        });
+
+        // Enable peripheral
+        T::REGS.cr().modify(|w| {
+            w.set_en(true);
+        });
+
+        // Free running clock needs to be set after peripheral enable
+        if config.free_running_clock {
+            T::REGS.dcr1().modify(|w| {
+                w.set_frck(config.free_running_clock);
+            });
+        }
+
+        Self {
+            _peri: peri,
+            sck,
+            d0,
+            d1,
+            d2,
+            d3,
+            d4,
+            d5,
+            d6,
+            d7,
+            nss,
+            dqs,
+            dma,
+            config,
+            width,
+        }
+    }
+
+    // Function to configure the peripheral for the requested command
+    fn configure_command(&mut self, command: &TransferConfig, data_len: Option<usize>) -> Result<(), OspiError> {
+        // Check that transaction doesn't use more than hardware initialized pins
+        if <enums::OspiWidth as Into<u8>>::into(command.iwidth) > <enums::OspiWidth as Into<u8>>::into(self.width)
+            || <enums::OspiWidth as Into<u8>>::into(command.adwidth) > <enums::OspiWidth as Into<u8>>::into(self.width)
+            || <enums::OspiWidth as Into<u8>>::into(command.abwidth) > <enums::OspiWidth as Into<u8>>::into(self.width)
+            || <enums::OspiWidth as Into<u8>>::into(command.dwidth) > <enums::OspiWidth as Into<u8>>::into(self.width)
+        {
+            return Err(OspiError::InvalidCommand);
+        }
+
+        T::REGS.cr().modify(|w| {
+            w.set_fmode(0.into());
+        });
+
+        // Configure alternate bytes
+        if let Some(ab) = command.alternate_bytes {
+            T::REGS.abr().write(|v| v.set_alternate(ab));
+            T::REGS.ccr().modify(|w| {
+                w.set_abmode(PhaseMode::from_bits(command.abwidth.into()));
+                w.set_abdtr(command.abdtr);
+                w.set_absize(SizeInBits::from_bits(command.absize.into()));
+            })
+        }
+
+        // Configure dummy cycles
+        T::REGS.tcr().modify(|w| {
+            w.set_dcyc(command.dummy.into());
+        });
+
+        // Configure data
+        if let Some(data_length) = data_len {
+            T::REGS.dlr().write(|v| {
+                v.set_dl((data_length - 1) as u32);
+            })
+        } else {
+            T::REGS.dlr().write(|v| {
+                v.set_dl((0) as u32);
+            })
+        }
+
+        // Configure instruction/address/data modes
+        T::REGS.ccr().modify(|w| {
+            w.set_imode(PhaseMode::from_bits(command.iwidth.into()));
+            w.set_idtr(command.idtr);
+            w.set_isize(SizeInBits::from_bits(command.isize.into()));
+
+            w.set_admode(PhaseMode::from_bits(command.adwidth.into()));
+            w.set_addtr(command.idtr);
+            w.set_adsize(SizeInBits::from_bits(command.adsize.into()));
+
+            w.set_dmode(PhaseMode::from_bits(command.dwidth.into()));
+            w.set_ddtr(command.ddtr);
+        });
+
+        // Set informationrequired to initiate transaction
+        if let Some(instruction) = command.instruction {
+            if let Some(address) = command.address {
+                T::REGS.ir().write(|v| {
+                    v.set_instruction(instruction);
+                });
+
+                T::REGS.ar().write(|v| {
+                    v.set_address(address);
+                });
+            } else {
+                // Double check requirements for delay hold and sample shifting
+                // if let None = command.data_len {
+                //     if self.config.delay_hold_quarter_cycle && command.idtr {
+                //         T::REGS.ccr().modify(|w| {
+                //             w.set_ddtr(true);
+                //         });
+                //     }
+                // }
+
+                T::REGS.ir().write(|v| {
+                    v.set_instruction(instruction);
+                });
+            }
+        } else {
+            if let Some(address) = command.address {
+                T::REGS.ar().write(|v| {
+                    v.set_address(address);
+                });
+            } else {
+                // The only single phase transaction supported is instruction only
+                return Err(OspiError::InvalidCommand);
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Function used to control or configure the target device without data transfer
+    pub async fn command(&mut self, command: &TransferConfig) -> Result<(), OspiError> {
+        // Wait for peripheral to be free
+        while T::REGS.sr().read().busy() {}
+
+        // Need additional validation that command configuration doesn't have data set
+        self.configure_command(command, None)?;
+
+        // Transaction initiated by setting final configuration, i.e the instruction register
+        while !T::REGS.sr().read().tcf() {}
+        T::REGS.fcr().write(|w| {
+            w.set_ctcf(true);
+        });
+
+        Ok(())
+    }
+
+    /// Blocking read with byte by byte data transfer
+    pub fn blocking_read<W: Word>(&mut self, buf: &mut [W], transaction: TransferConfig) -> Result<(), OspiError> {
+        if buf.is_empty() {
+            return Err(OspiError::EmptyBuffer);
+        }
+
+        // Wait for peripheral to be free
+        while T::REGS.sr().read().busy() {}
+
+        // Ensure DMA is not enabled for this transaction
+        T::REGS.cr().modify(|w| {
+            w.set_dmaen(false);
+        });
+
+        self.configure_command(&transaction, Some(buf.len()))?;
+
+        let current_address = T::REGS.ar().read().address();
+        let current_instruction = T::REGS.ir().read().instruction();
+
+        // For a indirect read transaction, the transaction begins when the instruction/address is set
+        T::REGS.cr().modify(|v| v.set_fmode(vals::FunctionalMode::INDIRECTREAD));
+        if T::REGS.ccr().read().admode() == vals::PhaseMode::NONE {
+            T::REGS.ir().write(|v| v.set_instruction(current_instruction));
+        } else {
+            T::REGS.ar().write(|v| v.set_address(current_address));
+        }
+
+        for idx in 0..buf.len() {
+            while !T::REGS.sr().read().tcf() && !T::REGS.sr().read().ftf() {}
+            buf[idx] = unsafe { (T::REGS.dr().as_ptr() as *mut W).read_volatile() };
+        }
+
+        while !T::REGS.sr().read().tcf() {}
+        T::REGS.fcr().write(|v| v.set_ctcf(true));
+
+        Ok(())
+    }
+
+    /// Blocking write with byte by byte data transfer
+    pub fn blocking_write<W: Word>(&mut self, buf: &[W], transaction: TransferConfig) -> Result<(), OspiError> {
+        if buf.is_empty() {
+            return Err(OspiError::EmptyBuffer);
+        }
+
+        // Wait for peripheral to be free
+        while T::REGS.sr().read().busy() {}
+
+        T::REGS.cr().modify(|w| {
+            w.set_dmaen(false);
+        });
+
+        self.configure_command(&transaction, Some(buf.len()))?;
+
+        T::REGS
+            .cr()
+            .modify(|v| v.set_fmode(vals::FunctionalMode::INDIRECTWRITE));
+
+        for idx in 0..buf.len() {
+            while !T::REGS.sr().read().ftf() {}
+            unsafe { (T::REGS.dr().as_ptr() as *mut W).write_volatile(buf[idx]) };
+        }
+
+        while !T::REGS.sr().read().tcf() {}
+        T::REGS.fcr().write(|v| v.set_ctcf(true));
+
+        Ok(())
+    }
+
+    /// Blocking read with DMA transfer
+    pub fn blocking_read_dma<W: Word>(&mut self, buf: &mut [W], transaction: TransferConfig) -> Result<(), OspiError>
+    where
+        Dma: OctoDma<T>,
+    {
+        if buf.is_empty() {
+            return Err(OspiError::EmptyBuffer);
+        }
+
+        // Wait for peripheral to be free
+        while T::REGS.sr().read().busy() {}
+
+        self.configure_command(&transaction, Some(buf.len()))?;
+
+        let current_address = T::REGS.ar().read().address();
+        let current_instruction = T::REGS.ir().read().instruction();
+
+        // For a indirect read transaction, the transaction begins when the instruction/address is set
+        T::REGS.cr().modify(|v| v.set_fmode(vals::FunctionalMode::INDIRECTREAD));
+        if T::REGS.ccr().read().admode() == vals::PhaseMode::NONE {
+            T::REGS.ir().write(|v| v.set_instruction(current_instruction));
+        } else {
+            T::REGS.ar().write(|v| v.set_address(current_address));
+        }
+
+        let request = self.dma.request();
+        let transfer = unsafe {
+            Transfer::new_read(
+                &mut self.dma,
+                request,
+                T::REGS.dr().as_ptr() as *mut W,
+                buf,
+                Default::default(),
+            )
+        };
+
+        T::REGS.cr().modify(|w| w.set_dmaen(true));
+
+        transfer.blocking_wait();
+
+        finish_dma(T::REGS);
+
+        Ok(())
+    }
+
+    /// Blocking write with DMA transfer
+    pub fn blocking_write_dma<W: Word>(&mut self, buf: &[W], transaction: TransferConfig) -> Result<(), OspiError>
+    where
+        Dma: OctoDma<T>,
+    {
+        if buf.is_empty() {
+            return Err(OspiError::EmptyBuffer);
+        }
+
+        // Wait for peripheral to be free
+        while T::REGS.sr().read().busy() {}
+
+        self.configure_command(&transaction, Some(buf.len()))?;
+        T::REGS
+            .cr()
+            .modify(|v| v.set_fmode(vals::FunctionalMode::INDIRECTWRITE));
+
+        let request = self.dma.request();
+        let transfer = unsafe {
+            Transfer::new_write(
+                &mut self.dma,
+                request,
+                buf,
+                T::REGS.dr().as_ptr() as *mut W,
+                Default::default(),
+            )
+        };
+
+        T::REGS.cr().modify(|w| w.set_dmaen(true));
+
+        transfer.blocking_wait();
+
+        finish_dma(T::REGS);
+
+        Ok(())
+    }
+
+    /// Asynchronous read from external device
+    pub async fn read<W: Word>(&mut self, buf: &mut [W], transaction: TransferConfig) -> Result<(), OspiError>
+    where
+        Dma: OctoDma<T>,
+    {
+        if buf.is_empty() {
+            return Err(OspiError::EmptyBuffer);
+        }
+
+        // Wait for peripheral to be free
+        while T::REGS.sr().read().busy() {}
+
+        self.configure_command(&transaction, Some(buf.len()))?;
+
+        let current_address = T::REGS.ar().read().address();
+        let current_instruction = T::REGS.ir().read().instruction();
+
+        // For a indirect read transaction, the transaction begins when the instruction/address is set
+        T::REGS.cr().modify(|v| v.set_fmode(vals::FunctionalMode::INDIRECTREAD));
+        if T::REGS.ccr().read().admode() == vals::PhaseMode::NONE {
+            T::REGS.ir().write(|v| v.set_instruction(current_instruction));
+        } else {
+            T::REGS.ar().write(|v| v.set_address(current_address));
+        }
+
+        let request = self.dma.request();
+        let transfer = unsafe {
+            Transfer::new_read(
+                &mut self.dma,
+                request,
+                T::REGS.dr().as_ptr() as *mut W,
+                buf,
+                Default::default(),
+            )
+        };
+
+        T::REGS.cr().modify(|w| w.set_dmaen(true));
+
+        transfer.await;
+
+        finish_dma(T::REGS);
+
+        Ok(())
+    }
+
+    /// Asynchronous write to external device
+    pub async fn write<W: Word>(&mut self, buf: &[W], transaction: TransferConfig) -> Result<(), OspiError>
+    where
+        Dma: OctoDma<T>,
+    {
+        if buf.is_empty() {
+            return Err(OspiError::EmptyBuffer);
+        }
+
+        // Wait for peripheral to be free
+        while T::REGS.sr().read().busy() {}
+
+        self.configure_command(&transaction, Some(buf.len()))?;
+        T::REGS
+            .cr()
+            .modify(|v| v.set_fmode(vals::FunctionalMode::INDIRECTWRITE));
+
+        let request = self.dma.request();
+        let transfer = unsafe {
+            Transfer::new_write(
+                &mut self.dma,
+                request,
+                buf,
+                T::REGS.dr().as_ptr() as *mut W,
+                Default::default(),
+            )
+        };
+
+        T::REGS.cr().modify(|w| w.set_dmaen(true));
+
+        transfer.await;
+
+        finish_dma(T::REGS);
+
+        Ok(())
+    }
+
+    /// Set new bus configuration
+    pub fn set_config(&mut self, config: &Config) {
+        // Wait for busy flag to clear
+        while T::REGS.sr().read().busy() {}
+
+        // Disable DMA channel while configuring the peripheral
+        T::REGS.cr().modify(|w| {
+            w.set_dmaen(false);
+        });
+
+        // Device configuration
+        T::REGS.dcr1().modify(|w| {
+            w.set_devsize(config.device_size.into());
+            w.set_mtyp(vals::MemType::from_bits(config.memory_type.into()));
+            w.set_csht(config.chip_select_high_time.into());
+            w.set_dlybyp(config.delay_block_bypass);
+            w.set_frck(false);
+            w.set_ckmode(config.clock_mode);
+        });
+
+        T::REGS.dcr2().modify(|w| {
+            w.set_wrapsize(config.wrap_size.into());
+        });
+
+        T::REGS.dcr3().modify(|w| {
+            w.set_csbound(config.chip_select_boundary);
+            #[cfg(octospi_v1)]
+            {
+                w.set_maxtran(config.max_transfer);
+            }
+        });
+
+        T::REGS.dcr4().modify(|w| {
+            w.set_refresh(config.refresh);
+        });
+
+        T::REGS.cr().modify(|w| {
+            w.set_fthres(vals::Threshold(config.fifo_threshold.into()));
+        });
+
+        // Wait for busy flag to clear
+        while T::REGS.sr().read().busy() {}
+
+        T::REGS.dcr2().modify(|w| {
+            w.set_prescaler(config.clock_prescaler);
+        });
+
+        T::REGS.tcr().modify(|w| {
+            w.set_sshift(match config.sample_shifting {
+                true => vals::SampleShift::HALFCYCLE,
+                false => vals::SampleShift::NONE,
+            });
+            w.set_dhqc(config.delay_hold_quarter_cycle);
+        });
+
+        // Enable peripheral
+        T::REGS.cr().modify(|w| {
+            w.set_en(true);
+        });
+
+        // Free running clock needs to be set after peripheral enable
+        if config.free_running_clock {
+            T::REGS.dcr1().modify(|w| {
+                w.set_frck(config.free_running_clock);
+            });
+        }
+
+        self.config = *config;
+    }
+
+    /// Get current configuration
+    pub fn get_config(&self) -> Config {
+        self.config
+    }
+}
+
+impl<'d, T: Instance, Dma> Drop for Ospi<'d, T, Dma> {
+    fn drop(&mut self) {
+        self.sck.as_ref().map(|x| x.set_as_disconnected());
+        self.d0.as_ref().map(|x| x.set_as_disconnected());
+        self.d1.as_ref().map(|x| x.set_as_disconnected());
+        self.d2.as_ref().map(|x| x.set_as_disconnected());
+        self.d3.as_ref().map(|x| x.set_as_disconnected());
+        self.d4.as_ref().map(|x| x.set_as_disconnected());
+        self.d5.as_ref().map(|x| x.set_as_disconnected());
+        self.d6.as_ref().map(|x| x.set_as_disconnected());
+        self.d7.as_ref().map(|x| x.set_as_disconnected());
+        self.nss.as_ref().map(|x| x.set_as_disconnected());
+        self.dqs.as_ref().map(|x| x.set_as_disconnected());
+
+        T::disable();
+    }
+}
+
+fn finish_dma(regs: Regs) {
+    while !regs.sr().read().tcf() {}
+    regs.fcr().write(|v| v.set_ctcf(true));
+
+    regs.cr().modify(|w| {
+        w.set_dmaen(false);
+    });
+}
+
+trait RegsExt {
+    fn dr_ptr<W>(&self) -> *mut W;
+}
+
+impl RegsExt for Regs {
+    fn dr_ptr<W>(&self) -> *mut W {
+        let dr = self.dr();
+        dr.as_ptr() as *mut W
+    }
+}
+
+pub(crate) trait SealedInstance {
+    const REGS: Regs;
+}
+
+trait SealedWord {
+    const CONFIG: u8;
+}
+
+/// OSPI instance trait.
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + RccPeripheral {}
+
+pin_trait!(SckPin, Instance);
+pin_trait!(NckPin, Instance);
+pin_trait!(D0Pin, Instance);
+pin_trait!(D1Pin, Instance);
+pin_trait!(D2Pin, Instance);
+pin_trait!(D3Pin, Instance);
+pin_trait!(D4Pin, Instance);
+pin_trait!(D5Pin, Instance);
+pin_trait!(D6Pin, Instance);
+pin_trait!(D7Pin, Instance);
+pin_trait!(DQSPin, Instance);
+pin_trait!(NSSPin, Instance);
+dma_trait!(OctoDma, Instance);
+
+foreach_peripheral!(
+    (octospi, $inst:ident) => {
+        impl SealedInstance for peripherals::$inst {
+            const REGS: Regs = crate::pac::$inst;
+        }
+
+        impl Instance for peripherals::$inst {}
+    };
+);
+
+impl<'d, T: Instance, Dma> SetConfig for Ospi<'d, T, Dma> {
+    type Config = Config;
+    type ConfigError = ();
+    fn set_config(&mut self, config: &Self::Config) -> Result<(), ()> {
+        self.set_config(config);
+        Ok(())
+    }
+}
+
+impl<'d, T: Instance, Dma> GetConfig for Ospi<'d, T, Dma> {
+    type Config = Config;
+    fn get_config(&self) -> Self::Config {
+        self.get_config()
+    }
+}
+
+/// Word sizes usable for OSPI.
+#[allow(private_bounds)]
+pub trait Word: word::Word + SealedWord {}
+
+macro_rules! impl_word {
+    ($T:ty, $config:expr) => {
+        impl SealedWord for $T {
+            const CONFIG: u8 = $config;
+        }
+        impl Word for $T {}
+    };
+}
+
+impl_word!(u8, 8);
+impl_word!(u16, 16);
+impl_word!(u32, 32);
diff --git a/embassy-stm32/src/qspi/mod.rs b/embassy-stm32/src/qspi/mod.rs
index 3c054e666..0a4b4f074 100644
--- a/embassy-stm32/src/qspi/mod.rs
+++ b/embassy-stm32/src/qspi/mod.rs
@@ -27,8 +27,6 @@ pub struct TransferConfig {
     pub address: Option<u32>,
     /// Number of dummy cycles (DCYC)
     pub dummy: DummyCycles,
-    /// Length of data
-    pub data_len: Option<usize>,
 }
 
 impl Default for TransferConfig {
@@ -40,7 +38,6 @@ impl Default for TransferConfig {
             instruction: 0,
             address: None,
             dummy: DummyCycles::_0,
-            data_len: None,
         }
     }
 }
@@ -231,7 +228,7 @@ impl<'d, T: Instance, Dma> Qspi<'d, T, Dma> {
     pub fn command(&mut self, transaction: TransferConfig) {
         #[cfg(not(stm32h7))]
         T::REGS.cr().modify(|v| v.set_dmaen(false));
-        self.setup_transaction(QspiMode::IndirectWrite, &transaction);
+        self.setup_transaction(QspiMode::IndirectWrite, &transaction, None);
 
         while !T::REGS.sr().read().tcf() {}
         T::REGS.fcr().modify(|v| v.set_ctcf(true));
@@ -241,21 +238,19 @@ impl<'d, T: Instance, Dma> Qspi<'d, T, Dma> {
     pub fn blocking_read(&mut self, buf: &mut [u8], transaction: TransferConfig) {
         #[cfg(not(stm32h7))]
         T::REGS.cr().modify(|v| v.set_dmaen(false));
-        self.setup_transaction(QspiMode::IndirectWrite, &transaction);
+        self.setup_transaction(QspiMode::IndirectWrite, &transaction, Some(buf.len()));
 
-        if let Some(len) = transaction.data_len {
-            let current_ar = T::REGS.ar().read().address();
-            T::REGS.ccr().modify(|v| {
-                v.set_fmode(QspiMode::IndirectRead.into());
-            });
-            T::REGS.ar().write(|v| {
-                v.set_address(current_ar);
-            });
+        let current_ar = T::REGS.ar().read().address();
+        T::REGS.ccr().modify(|v| {
+            v.set_fmode(QspiMode::IndirectRead.into());
+        });
+        T::REGS.ar().write(|v| {
+            v.set_address(current_ar);
+        });
 
-            for idx in 0..len {
-                while !T::REGS.sr().read().tcf() && !T::REGS.sr().read().ftf() {}
-                buf[idx] = unsafe { (T::REGS.dr().as_ptr() as *mut u8).read_volatile() };
-            }
+        for b in buf {
+            while !T::REGS.sr().read().tcf() && !T::REGS.sr().read().ftf() {}
+            *b = unsafe { (T::REGS.dr().as_ptr() as *mut u8).read_volatile() };
         }
 
         while !T::REGS.sr().read().tcf() {}
@@ -268,17 +263,15 @@ impl<'d, T: Instance, Dma> Qspi<'d, T, Dma> {
         #[cfg(not(stm32h7))]
         T::REGS.cr().modify(|v| v.set_dmaen(false));
 
-        self.setup_transaction(QspiMode::IndirectWrite, &transaction);
+        self.setup_transaction(QspiMode::IndirectWrite, &transaction, Some(buf.len()));
 
-        if let Some(len) = transaction.data_len {
-            T::REGS.ccr().modify(|v| {
-                v.set_fmode(QspiMode::IndirectWrite.into());
-            });
+        T::REGS.ccr().modify(|v| {
+            v.set_fmode(QspiMode::IndirectWrite.into());
+        });
 
-            for idx in 0..len {
-                while !T::REGS.sr().read().ftf() {}
-                unsafe { (T::REGS.dr().as_ptr() as *mut u8).write_volatile(buf[idx]) };
-            }
+        for &b in buf {
+            while !T::REGS.sr().read().ftf() {}
+            unsafe { (T::REGS.dr().as_ptr() as *mut u8).write_volatile(b) };
         }
 
         while !T::REGS.sr().read().tcf() {}
@@ -290,7 +283,7 @@ impl<'d, T: Instance, Dma> Qspi<'d, T, Dma> {
     where
         Dma: QuadDma<T>,
     {
-        self.setup_transaction(QspiMode::IndirectWrite, &transaction);
+        self.setup_transaction(QspiMode::IndirectWrite, &transaction, Some(buf.len()));
 
         T::REGS.ccr().modify(|v| {
             v.set_fmode(QspiMode::IndirectRead.into());
@@ -323,7 +316,7 @@ impl<'d, T: Instance, Dma> Qspi<'d, T, Dma> {
     where
         Dma: QuadDma<T>,
     {
-        self.setup_transaction(QspiMode::IndirectWrite, &transaction);
+        self.setup_transaction(QspiMode::IndirectWrite, &transaction, Some(buf.len()));
 
         T::REGS.ccr().modify(|v| {
             v.set_fmode(QspiMode::IndirectWrite.into());
@@ -347,7 +340,7 @@ impl<'d, T: Instance, Dma> Qspi<'d, T, Dma> {
         transfer.blocking_wait();
     }
 
-    fn setup_transaction(&mut self, fmode: QspiMode, transaction: &TransferConfig) {
+    fn setup_transaction(&mut self, fmode: QspiMode, transaction: &TransferConfig, data_len: Option<usize>) {
         T::REGS.fcr().modify(|v| {
             v.set_csmf(true);
             v.set_ctcf(true);
@@ -357,7 +350,7 @@ impl<'d, T: Instance, Dma> Qspi<'d, T, Dma> {
 
         while T::REGS.sr().read().busy() {}
 
-        if let Some(len) = transaction.data_len {
+        if let Some(len) = data_len {
             T::REGS.dlr().write(|v| v.set_dl(len as u32 - 1));
         }
 
diff --git a/embassy-stm32/src/rcc/mod.rs b/embassy-stm32/src/rcc/mod.rs
index d53d02203..c328344aa 100644
--- a/embassy-stm32/src/rcc/mod.rs
+++ b/embassy-stm32/src/rcc/mod.rs
@@ -111,7 +111,7 @@ mod util {
     }
 }
 
-/// Get the kernel clocok frequency of the peripheral `T`.
+/// Get the kernel clock frequency of the peripheral `T`.
 ///
 /// # Panics
 ///
diff --git a/embassy-stm32/src/time_driver.rs b/embassy-stm32/src/time_driver.rs
index cc8161276..e592fbf7d 100644
--- a/embassy-stm32/src/time_driver.rs
+++ b/embassy-stm32/src/time_driver.rs
@@ -15,9 +15,7 @@ use crate::pac::timer::vals;
 use crate::rcc::SealedRccPeripheral;
 #[cfg(feature = "low-power")]
 use crate::rtc::Rtc;
-#[cfg(any(time_driver_tim1, time_driver_tim8, time_driver_tim20))]
-use crate::timer::AdvancedInstance1Channel;
-use crate::timer::CoreInstance;
+use crate::timer::{CoreInstance, GeneralInstance1Channel};
 use crate::{interrupt, peripherals};
 
 // NOTE regarding ALARM_COUNT:
@@ -69,7 +67,7 @@ type T = peripherals::TIM23;
 type T = peripherals::TIM24;
 
 foreach_interrupt! {
-    (TIM1, timer, $block:ident, UP, $irq:ident) => {
+    (TIM1, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim1)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -85,7 +83,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM2, timer, $block:ident, UP, $irq:ident) => {
+    (TIM2, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim2)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -93,7 +91,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM3, timer, $block:ident, UP, $irq:ident) => {
+    (TIM3, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim3)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -101,7 +99,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM4, timer, $block:ident, UP, $irq:ident) => {
+    (TIM4, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim4)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -109,7 +107,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM5, timer, $block:ident, UP, $irq:ident) => {
+    (TIM5, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim5)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -117,7 +115,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM8, timer, $block:ident, UP, $irq:ident) => {
+    (TIM8, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim8)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -133,7 +131,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM9, timer, $block:ident, UP, $irq:ident) => {
+    (TIM9, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim9)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -141,7 +139,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM12, timer, $block:ident, UP, $irq:ident) => {
+    (TIM12, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim12)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -149,7 +147,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM15, timer, $block:ident, UP, $irq:ident) => {
+    (TIM15, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim15)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -157,7 +155,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM20, timer, $block:ident, UP, $irq:ident) => {
+    (TIM20, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim20)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -173,7 +171,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM21, timer, $block:ident, UP, $irq:ident) => {
+    (TIM21, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim21)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -181,7 +179,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM22, timer, $block:ident, UP, $irq:ident) => {
+    (TIM22, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim22)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -189,7 +187,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM23, timer, $block:ident, UP, $irq:ident) => {
+    (TIM23, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim23)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -197,7 +195,7 @@ foreach_interrupt! {
             DRIVER.on_interrupt()
         }
     };
-    (TIM24, timer, $block:ident, UP, $irq:ident) => {
+    (TIM24, timer, $block:ident, CC, $irq:ident) => {
         #[cfg(time_driver_tim24)]
         #[cfg(feature = "rt")]
         #[interrupt]
@@ -263,6 +261,7 @@ pub(crate) struct RtcDriver {
     rtc: Mutex<CriticalSectionRawMutex, Cell<Option<&'static Rtc>>>,
 }
 
+#[allow(clippy::declare_interior_mutable_const)]
 const ALARM_STATE_NEW: AlarmState = AlarmState::new();
 
 embassy_time_driver::time_driver_impl!(static DRIVER: RtcDriver = RtcDriver {
@@ -307,16 +306,8 @@ impl RtcDriver {
             w.set_ccie(0, true);
         });
 
-        <T as CoreInstance>::Interrupt::unpend();
-        unsafe { <T as CoreInstance>::Interrupt::enable() };
-
-        #[cfg(any(time_driver_tim1, time_driver_tim8, time_driver_tim20))]
-        {
-            <T as AdvancedInstance1Channel>::CaptureCompareInterrupt::unpend();
-            unsafe {
-                <T as AdvancedInstance1Channel>::CaptureCompareInterrupt::enable();
-            }
-        }
+        <T as GeneralInstance1Channel>::CaptureCompareInterrupt::unpend();
+        unsafe { <T as GeneralInstance1Channel>::CaptureCompareInterrupt::enable() };
 
         r.cr1().modify(|w| w.set_cen(true));
     }
diff --git a/embassy-stm32/src/timer/mod.rs b/embassy-stm32/src/timer/mod.rs
index 2ba6b3f11..346127005 100644
--- a/embassy-stm32/src/timer/mod.rs
+++ b/embassy-stm32/src/timer/mod.rs
@@ -47,8 +47,8 @@ pub enum TimerBits {
 
 /// Core timer instance.
 pub trait CoreInstance: RccPeripheral + 'static {
-    /// Interrupt for this timer.
-    type Interrupt: interrupt::typelevel::Interrupt;
+    /// Update Interrupt for this timer.
+    type UpdateInterrupt: interrupt::typelevel::Interrupt;
 
     /// Amount of bits this timer has.
     const BITS: TimerBits;
@@ -64,29 +64,46 @@ pub trait BasicNoCr2Instance: CoreInstance {}
 pub trait BasicInstance: BasicNoCr2Instance {}
 
 /// General-purpose 16-bit timer with 1 channel instance.
-pub trait GeneralInstance1Channel: CoreInstance {}
+pub trait GeneralInstance1Channel: CoreInstance {
+    /// Capture compare interrupt for this timer.
+    type CaptureCompareInterrupt: interrupt::typelevel::Interrupt;
+}
 
 /// General-purpose 16-bit timer with 2 channels instance.
-pub trait GeneralInstance2Channel: GeneralInstance1Channel {}
+pub trait GeneralInstance2Channel: GeneralInstance1Channel {
+    /// Trigger event interrupt for this timer.
+    type TriggerInterrupt: interrupt::typelevel::Interrupt;
+}
 
-/// General-purpose 16-bit timer with 4 channels instance.
-pub trait GeneralInstance4Channel: BasicInstance + GeneralInstance2Channel {
+// This trait add *extra* methods to GeneralInstance4Channel,
+// that GeneralInstance4Channel doesn't use, but the "AdvancedInstance"s need.
+// And it's a private trait, so it's content won't leak to outer namespace.
+//
+// If you want to add a new method to it, please leave a detail comment to explain it.
+trait General4ChBlankSealed {
     // SimplePwm<'d, T> is implemented for T: GeneralInstance4Channel
     // Advanced timers implement this trait, but the output needs to be
     // enabled explicitly.
     // To support general-purpose and advanced timers, this function is added
     // here defaulting to noop and overwritten for advanced timers.
-    /// Enable timer outputs.
+    //
+    // Enable timer outputs.
     fn enable_outputs(&self) {}
 }
 
+/// General-purpose 16-bit timer with 4 channels instance.
+#[allow(private_bounds)]
+pub trait GeneralInstance4Channel: BasicInstance + GeneralInstance2Channel + General4ChBlankSealed {}
+
 /// General-purpose 32-bit timer with 4 channels instance.
 pub trait GeneralInstance32bit4Channel: GeneralInstance4Channel {}
 
 /// Advanced 16-bit timer with 1 channel instance.
 pub trait AdvancedInstance1Channel: BasicNoCr2Instance + GeneralInstance1Channel {
-    /// Capture compare interrupt for this timer.
-    type CaptureCompareInterrupt: interrupt::typelevel::Interrupt;
+    /// Communication interrupt for this timer.
+    type CommunicationInterrupt: interrupt::typelevel::Interrupt;
+    /// Break input interrupt for this timer.
+    type BreakInputInterrupt: interrupt::typelevel::Interrupt;
 }
 /// Advanced 16-bit timer with 2 channels instance.
 
@@ -127,7 +144,7 @@ dma_trait!(Ch4Dma, GeneralInstance4Channel);
 macro_rules! impl_core_timer {
     ($inst:ident, $bits:expr) => {
         impl CoreInstance for crate::peripherals::$inst {
-            type Interrupt = crate::_generated::peripheral_interrupts::$inst::UP;
+            type UpdateInterrupt = crate::_generated::peripheral_interrupts::$inst::UP;
 
             const BITS: TimerBits = $bits;
 
@@ -138,6 +155,49 @@ macro_rules! impl_core_timer {
     };
 }
 
+#[allow(unused)]
+macro_rules! impl_general_1ch {
+    ($inst:ident) => {
+        impl GeneralInstance1Channel for crate::peripherals::$inst {
+            type CaptureCompareInterrupt = crate::_generated::peripheral_interrupts::$inst::CC;
+        }
+    };
+}
+
+#[allow(unused)]
+macro_rules! impl_general_2ch {
+    ($inst:ident) => {
+        impl GeneralInstance2Channel for crate::peripherals::$inst {
+            type TriggerInterrupt = crate::_generated::peripheral_interrupts::$inst::TRG;
+        }
+    };
+}
+
+#[allow(unused)]
+macro_rules! impl_advanced_1ch {
+    ($inst:ident) => {
+        impl AdvancedInstance1Channel for crate::peripherals::$inst {
+            type CommunicationInterrupt = crate::_generated::peripheral_interrupts::$inst::COM;
+            type BreakInputInterrupt = crate::_generated::peripheral_interrupts::$inst::BRK;
+        }
+    };
+}
+
+// This macro only apply to "AdvancedInstance(s)",
+// not "GeneralInstance4Channel" itself.
+#[allow(unused)]
+macro_rules! impl_general_4ch_blank_sealed {
+    ($inst:ident) => {
+        impl General4ChBlankSealed for crate::peripherals::$inst {
+            fn enable_outputs(&self) {
+                unsafe { crate::pac::timer::Tim1chCmp::from_ptr(Self::regs()) }
+                    .bdtr()
+                    .modify(|w| w.set_moe(true));
+            }
+        }
+    };
+}
+
 foreach_interrupt! {
     ($inst:ident, timer, TIM_BASIC, UP, $irq:ident) => {
         impl_core_timer!($inst, TimerBits::Bits16);
@@ -149,47 +209,52 @@ foreach_interrupt! {
         impl_core_timer!($inst, TimerBits::Bits16);
         impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl GeneralInstance1Channel for crate::peripherals::$inst {}
-        impl GeneralInstance2Channel for crate::peripherals::$inst {}
+        impl_general_1ch!($inst);
+        impl_general_2ch!($inst);
         impl GeneralInstance4Channel for crate::peripherals::$inst {}
+        impl General4ChBlankSealed for crate::peripherals::$inst {}
     };
 
     ($inst:ident, timer, TIM_2CH, UP, $irq:ident) => {
         impl_core_timer!($inst, TimerBits::Bits16);
         impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl GeneralInstance1Channel for crate::peripherals::$inst {}
-        impl GeneralInstance2Channel for crate::peripherals::$inst {}
+        impl_general_1ch!($inst);
+        impl_general_2ch!($inst);
         impl GeneralInstance4Channel for crate::peripherals::$inst {}
+        impl General4ChBlankSealed for crate::peripherals::$inst {}
     };
 
     ($inst:ident, timer, TIM_GP16, UP, $irq:ident) => {
         impl_core_timer!($inst, TimerBits::Bits16);
         impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl GeneralInstance1Channel for crate::peripherals::$inst {}
-        impl GeneralInstance2Channel for crate::peripherals::$inst {}
+        impl_general_1ch!($inst);
+        impl_general_2ch!($inst);
         impl GeneralInstance4Channel for crate::peripherals::$inst {}
+        impl General4ChBlankSealed for crate::peripherals::$inst {}
     };
 
     ($inst:ident, timer, TIM_GP32, UP, $irq:ident) => {
         impl_core_timer!($inst, TimerBits::Bits32);
         impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl GeneralInstance1Channel for crate::peripherals::$inst {}
-        impl GeneralInstance2Channel for crate::peripherals::$inst {}
+        impl_general_1ch!($inst);
+        impl_general_2ch!($inst);
         impl GeneralInstance4Channel for crate::peripherals::$inst {}
         impl GeneralInstance32bit4Channel for crate::peripherals::$inst {}
+        impl General4ChBlankSealed for crate::peripherals::$inst {}
     };
 
     ($inst:ident, timer, TIM_1CH_CMP, UP, $irq:ident) => {
         impl_core_timer!($inst, TimerBits::Bits16);
         impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl GeneralInstance1Channel for crate::peripherals::$inst {}
-        impl GeneralInstance2Channel for crate::peripherals::$inst {}
-        impl GeneralInstance4Channel for crate::peripherals::$inst { fn enable_outputs(&self) { set_moe::<Self>() }}
-        impl AdvancedInstance1Channel for crate::peripherals::$inst { type CaptureCompareInterrupt = crate::_generated::peripheral_interrupts::$inst::CC; }
+        impl_general_1ch!($inst);
+        impl_general_2ch!($inst);
+        impl GeneralInstance4Channel for crate::peripherals::$inst {}
+        impl_general_4ch_blank_sealed!($inst);
+        impl_advanced_1ch!($inst);
         impl AdvancedInstance2Channel for crate::peripherals::$inst {}
         impl AdvancedInstance4Channel for crate::peripherals::$inst {}
     };
@@ -198,10 +263,11 @@ foreach_interrupt! {
         impl_core_timer!($inst, TimerBits::Bits16);
         impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl GeneralInstance1Channel for crate::peripherals::$inst {}
-        impl GeneralInstance2Channel for crate::peripherals::$inst {}
-        impl GeneralInstance4Channel for crate::peripherals::$inst { fn enable_outputs(&self) { set_moe::<Self>() }}
-        impl AdvancedInstance1Channel for crate::peripherals::$inst { type CaptureCompareInterrupt = crate::_generated::peripheral_interrupts::$inst::CC; }
+        impl_general_1ch!($inst);
+        impl_general_2ch!($inst);
+        impl GeneralInstance4Channel for crate::peripherals::$inst {}
+        impl_general_4ch_blank_sealed!($inst);
+        impl_advanced_1ch!($inst);
         impl AdvancedInstance2Channel for crate::peripherals::$inst {}
         impl AdvancedInstance4Channel for crate::peripherals::$inst {}
     };
@@ -210,19 +276,12 @@ foreach_interrupt! {
         impl_core_timer!($inst, TimerBits::Bits16);
         impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl GeneralInstance1Channel for crate::peripherals::$inst {}
-        impl GeneralInstance2Channel for crate::peripherals::$inst {}
-        impl GeneralInstance4Channel for crate::peripherals::$inst { fn enable_outputs(&self) { set_moe::<Self>() }}
-        impl AdvancedInstance1Channel for crate::peripherals::$inst { type CaptureCompareInterrupt = crate::_generated::peripheral_interrupts::$inst::CC; }
+        impl_general_1ch!($inst);
+        impl_general_2ch!($inst);
+        impl GeneralInstance4Channel for crate::peripherals::$inst {}
+        impl_general_4ch_blank_sealed!($inst);
+        impl_advanced_1ch!($inst);
         impl AdvancedInstance2Channel for crate::peripherals::$inst {}
         impl AdvancedInstance4Channel for crate::peripherals::$inst {}
     };
 }
-
-#[cfg(not(stm32l0))]
-#[allow(unused)]
-fn set_moe<T: GeneralInstance4Channel>() {
-    unsafe { crate::pac::timer::Tim1chCmp::from_ptr(T::regs()) }
-        .bdtr()
-        .modify(|w| w.set_moe(true));
-}
diff --git a/embassy-stm32/src/usart/buffered.rs b/embassy-stm32/src/usart/buffered.rs
index 51862e185..949ac1b13 100644
--- a/embassy-stm32/src/usart/buffered.rs
+++ b/embassy-stm32/src/usart/buffered.rs
@@ -105,27 +105,23 @@ impl<T: BasicInstance> interrupt::typelevel::Handler<T::Interrupt> for Interrupt
     }
 }
 
-pub(crate) use sealed::State;
-pub(crate) mod sealed {
-    use super::*;
-    pub struct State {
-        pub(crate) rx_waker: AtomicWaker,
-        pub(crate) rx_buf: RingBuffer,
-        pub(crate) tx_waker: AtomicWaker,
-        pub(crate) tx_buf: RingBuffer,
-        pub(crate) tx_done: AtomicBool,
-    }
+pub(crate) struct State {
+    pub(crate) rx_waker: AtomicWaker,
+    pub(crate) rx_buf: RingBuffer,
+    pub(crate) tx_waker: AtomicWaker,
+    pub(crate) tx_buf: RingBuffer,
+    pub(crate) tx_done: AtomicBool,
+}
 
-    impl State {
-        /// Create new state
-        pub const fn new() -> Self {
-            Self {
-                rx_buf: RingBuffer::new(),
-                tx_buf: RingBuffer::new(),
-                rx_waker: AtomicWaker::new(),
-                tx_waker: AtomicWaker::new(),
-                tx_done: AtomicBool::new(true),
-            }
+impl State {
+    /// Create new state
+    pub(crate) const fn new() -> Self {
+        Self {
+            rx_buf: RingBuffer::new(),
+            tx_buf: RingBuffer::new(),
+            rx_waker: AtomicWaker::new(),
+            tx_waker: AtomicWaker::new(),
+            tx_done: AtomicBool::new(true),
         }
     }
 }
diff --git a/embassy-sync/CHANGELOG.md b/embassy-sync/CHANGELOG.md
index e7db97ef7..3f6b39d8b 100644
--- a/embassy-sync/CHANGELOG.md
+++ b/embassy-sync/CHANGELOG.md
@@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## Unreleased
+
+- Add `len`, `is_empty` and `is_full` functions to `Channel`.
+
 ## 0.5.0 - 2023-12-04
 
 - Add a PriorityChannel.
@@ -35,7 +39,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Remove unnecessary uses of `atomic-polyfill`
 - Add `#[must_use]` to all futures.
 
-
 ## 0.1.0 - 2022-08-26
 
 - First release
diff --git a/embassy-sync/src/channel.rs b/embassy-sync/src/channel.rs
index 48f4dafd6..18be462cb 100644
--- a/embassy-sync/src/channel.rs
+++ b/embassy-sync/src/channel.rs
@@ -449,6 +449,18 @@ impl<T, const N: usize> ChannelState<T, N> {
             Poll::Pending
         }
     }
+
+    fn len(&self) -> usize {
+        self.queue.len()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.queue.is_empty()
+    }
+
+    fn is_full(&self) -> bool {
+        self.queue.is_full()
+    }
 }
 
 /// A bounded channel for communicating between asynchronous tasks
@@ -572,6 +584,21 @@ where
     pub fn try_receive(&self) -> Result<T, TryReceiveError> {
         self.lock(|c| c.try_receive())
     }
+
+    /// Returns the number of elements currently in the channel.
+    pub fn len(&self) -> usize {
+        self.lock(|c| c.len())
+    }
+
+    /// Returns whether the channel is empty.
+    pub fn is_empty(&self) -> bool {
+        self.lock(|c| c.is_empty())
+    }
+
+    /// Returns whether the channel is full.
+    pub fn is_full(&self) -> bool {
+        self.lock(|c| c.is_full())
+    }
 }
 
 /// Implements the DynamicChannel to allow creating types that are unaware of the queue size with the
diff --git a/embassy-usb/src/builder.rs b/embassy-usb/src/builder.rs
index c06107396..387b780de 100644
--- a/embassy-usb/src/builder.rs
+++ b/embassy-usb/src/builder.rs
@@ -38,11 +38,12 @@ pub struct Config<'a> {
 
     /// Maximum packet size in bytes for the control endpoint 0.
     ///
-    /// Valid values are 8, 16, 32 and 64. There's generally no need to change this from the default
-    /// value of 8 bytes unless a class uses control transfers for sending large amounts of data, in
-    /// which case using a larger packet size may be more efficient.
+    /// Valid values depend on the speed at which the bus is enumerated.
+    /// - low speed: 8
+    /// - full speed: 8, 16, 32, or 64
+    /// - high speed: 64
     ///
-    /// Default: 8 bytes
+    /// Default: 64 bytes
     pub max_packet_size_0: u8,
 
     /// Manufacturer name string descriptor.
diff --git a/examples/rp/src/bin/pio_stepper.rs b/examples/rp/src/bin/pio_stepper.rs
index ab9ecf623..4952f4fbd 100644
--- a/examples/rp/src/bin/pio_stepper.rs
+++ b/examples/rp/src/bin/pio_stepper.rs
@@ -69,7 +69,7 @@ impl<'d, T: Instance, const SM: usize> PioStepper<'d, T, SM> {
         let clock_divider: FixedU32<U8> = (125_000_000 / (freq * 136)).to_fixed();
         assert!(clock_divider <= 65536, "clkdiv must be <= 65536");
         assert!(clock_divider >= 1, "clkdiv must be >= 1");
-        T::PIO.sm(SM).clkdiv().write(|w| w.0 = clock_divider.to_bits() << 8);
+        self.sm.set_clock_divider(clock_divider);
         self.sm.clkdiv_restart();
     }
 
diff --git a/examples/rp/src/bin/pwm_input.rs b/examples/rp/src/bin/pwm_input.rs
index 0652dc42b..bf454a936 100644
--- a/examples/rp/src/bin/pwm_input.rs
+++ b/examples/rp/src/bin/pwm_input.rs
@@ -5,6 +5,7 @@
 
 use defmt::*;
 use embassy_executor::Spawner;
+use embassy_rp::gpio::Pull;
 use embassy_rp::pwm::{Config, InputMode, Pwm};
 use embassy_time::{Duration, Ticker};
 use {defmt_rtt as _, panic_probe as _};
@@ -14,7 +15,7 @@ async fn main(_spawner: Spawner) {
     let p = embassy_rp::init(Default::default());
 
     let cfg: Config = Default::default();
-    let pwm = Pwm::new_input(p.PWM_SLICE2, p.PIN_5, InputMode::RisingEdge, cfg);
+    let pwm = Pwm::new_input(p.PWM_SLICE2, p.PIN_5, Pull::None, InputMode::RisingEdge, cfg);
 
     let mut ticker = Ticker::every(Duration::from_secs(1));
     loop {
diff --git a/examples/stm32f0/src/bin/multiprio.rs b/examples/stm32f0/src/bin/multiprio.rs
index e49951726..1c3f3991a 100644
--- a/examples/stm32f0/src/bin/multiprio.rs
+++ b/examples/stm32f0/src/bin/multiprio.rs
@@ -126,6 +126,11 @@ fn main() -> ! {
     // Initialize and create handle for devicer peripherals
     let _p = embassy_stm32::init(Default::default());
 
+    // STM32s don’t have any interrupts exclusively for software use, but they can all be triggered by software as well as
+    // by the peripheral, so we can just use any free interrupt vectors which aren’t used by the rest of your application.
+    // In this case we’re using UART1 and UART2, but there’s nothing special about them. Any otherwise unused interrupt
+    // vector would work exactly the same.
+
     // High-priority executor: USART1, priority level 6
     interrupt::USART1.set_priority(Priority::P6);
     let spawner = EXECUTOR_HIGH.start(interrupt::USART1);
diff --git a/examples/stm32f3/src/bin/multiprio.rs b/examples/stm32f3/src/bin/multiprio.rs
index 328447210..87830b416 100644
--- a/examples/stm32f3/src/bin/multiprio.rs
+++ b/examples/stm32f3/src/bin/multiprio.rs
@@ -127,6 +127,11 @@ fn main() -> ! {
 
     let _p = embassy_stm32::init(Default::default());
 
+    // STM32s don’t have any interrupts exclusively for software use, but they can all be triggered by software as well as
+    // by the peripheral, so we can just use any free interrupt vectors which aren’t used by the rest of your application.
+    // In this case we’re using UART4 and UART5, but there’s nothing special about them. Any otherwise unused interrupt
+    // vector would work exactly the same.
+
     // High-priority executor: UART4, priority level 6
     interrupt::UART4.set_priority(Priority::P6);
     let spawner = EXECUTOR_HIGH.start(interrupt::UART4);
diff --git a/examples/stm32f4/src/bin/multiprio.rs b/examples/stm32f4/src/bin/multiprio.rs
index 328447210..87830b416 100644
--- a/examples/stm32f4/src/bin/multiprio.rs
+++ b/examples/stm32f4/src/bin/multiprio.rs
@@ -127,6 +127,11 @@ fn main() -> ! {
 
     let _p = embassy_stm32::init(Default::default());
 
+    // STM32s don’t have any interrupts exclusively for software use, but they can all be triggered by software as well as
+    // by the peripheral, so we can just use any free interrupt vectors which aren’t used by the rest of your application.
+    // In this case we’re using UART4 and UART5, but there’s nothing special about them. Any otherwise unused interrupt
+    // vector would work exactly the same.
+
     // High-priority executor: UART4, priority level 6
     interrupt::UART4.set_priority(Priority::P6);
     let spawner = EXECUTOR_HIGH.start(interrupt::UART4);
diff --git a/examples/stm32f4/src/bin/usb_hid_keyboard.rs b/examples/stm32f4/src/bin/usb_hid_keyboard.rs
index a799b4e72..d6e0be5ea 100644
--- a/examples/stm32f4/src/bin/usb_hid_keyboard.rs
+++ b/examples/stm32f4/src/bin/usb_hid_keyboard.rs
@@ -49,6 +49,7 @@ async fn main(_spawner: Spawner) {
     // Create the driver, from the HAL.
     let mut ep_out_buffer = [0u8; 256];
     let mut config = embassy_stm32::usb::Config::default();
+    // If the board you’re using doesn’t have the VBUS pin wired up correctly for detecting the USB bus voltage (e.g. on the f4 blackpill board), set this to false
     config.vbus_detection = true;
     let driver = Driver::new_fs(p.USB_OTG_FS, Irqs, p.PA12, p.PA11, &mut ep_out_buffer, config);
 
diff --git a/examples/stm32h5/src/bin/cordic.rs b/examples/stm32h5/src/bin/cordic.rs
new file mode 100644
index 000000000..73e873574
--- /dev/null
+++ b/examples/stm32h5/src/bin/cordic.rs
@@ -0,0 +1,78 @@
+#![no_std]
+#![no_main]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::cordic::{self, utils};
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let mut dp = embassy_stm32::init(Default::default());
+
+    let mut cordic = cordic::Cordic::new(
+        &mut dp.CORDIC,
+        unwrap!(cordic::Config::new(
+            cordic::Function::Sin,
+            Default::default(),
+            Default::default(),
+        )),
+    );
+
+    // for output buf, the length is not that strict, larger than minimal required is ok.
+    let mut output_f64 = [0f64; 19];
+    let mut output_u32 = [0u32; 21];
+
+    // tips:
+    // CORDIC peripheral has some strict on input value, you can also use ".check_argX_fXX()" methods
+    // to make sure your input values are compatible with current CORDIC setup.
+    let arg1 = [-1.0, -0.5, 0.0, 0.5, 1.0]; // for trigonometric function, the ARG1 value [-pi, pi] should be map to [-1, 1]
+    let arg2 = [0.5]; // and for Sin function, ARG2 should be in [0, 1]
+
+    let mut input_buf = [0u32; 9];
+
+    // convert input from floating point to fixed point
+    input_buf[0] = unwrap!(utils::f64_to_q1_31(arg1[0]));
+    input_buf[1] = unwrap!(utils::f64_to_q1_31(arg2[0]));
+
+    // If input length is small, blocking mode can be used to minimize overhead.
+    let cnt0 = unwrap!(cordic.blocking_calc_32bit(
+        &input_buf[..2], // input length is strict, since driver use its length to detect calculation count
+        &mut output_u32,
+        false,
+        false
+    ));
+
+    // convert result from fixed point into floating point
+    for (&u32_val, f64_val) in output_u32[..cnt0].iter().zip(output_f64.iter_mut()) {
+        *f64_val = utils::q1_31_to_f64(u32_val);
+    }
+
+    // convert input from floating point to fixed point
+    //
+    // first value from arg1 is used, so truncate to arg1[1..]
+    for (&f64_val, u32_val) in arg1[1..].iter().zip(input_buf.iter_mut()) {
+        *u32_val = unwrap!(utils::f64_to_q1_31(f64_val));
+    }
+
+    // If calculation is a little longer, async mode can make use of DMA, and let core do some other stuff.
+    let cnt1 = unwrap!(
+        cordic
+            .async_calc_32bit(
+                &mut dp.GPDMA1_CH0,
+                &mut dp.GPDMA1_CH1,
+                &input_buf[..arg1.len() - 1], // limit input buf to its actual length
+                &mut output_u32,
+                true,
+                false
+            )
+            .await
+    );
+
+    // convert result from fixed point into floating point
+    for (&u32_val, f64_val) in output_u32[..cnt1].iter().zip(output_f64[cnt0..cnt0 + cnt1].iter_mut()) {
+        *f64_val = utils::q1_31_to_f64(u32_val);
+    }
+
+    println!("result: {}", output_f64[..cnt0 + cnt1]);
+}
diff --git a/examples/stm32h7/src/bin/multiprio.rs b/examples/stm32h7/src/bin/multiprio.rs
index 73f8dd092..fcbb6c653 100644
--- a/examples/stm32h7/src/bin/multiprio.rs
+++ b/examples/stm32h7/src/bin/multiprio.rs
@@ -127,6 +127,11 @@ fn main() -> ! {
 
     let _p = embassy_stm32::init(Default::default());
 
+    // STM32s don’t have any interrupts exclusively for software use, but they can all be triggered by software as well as
+    // by the peripheral, so we can just use any free interrupt vectors which aren’t used by the rest of your application.
+    // In this case we’re using UART4 and UART5, but there’s nothing special about them. Any otherwise unused interrupt
+    // vector would work exactly the same.
+
     // High-priority executor: UART4, priority level 6
     interrupt::UART4.set_priority(Priority::P6);
     let spawner = EXECUTOR_HIGH.start(interrupt::UART4);
diff --git a/tests/rp/src/bin/pwm.rs b/tests/rp/src/bin/pwm.rs
index 4b02e5bab..c05197000 100644
--- a/tests/rp/src/bin/pwm.rs
+++ b/tests/rp/src/bin/pwm.rs
@@ -94,7 +94,7 @@ async fn main(_spawner: Spawner) {
     // Test level-gated
     {
         let mut pin2 = Output::new(&mut p11, Level::Low);
-        let pwm = Pwm::new_input(&mut p.PWM_SLICE3, &mut p7, InputMode::Level, cfg.clone());
+        let pwm = Pwm::new_input(&mut p.PWM_SLICE3, &mut p7, Pull::None, InputMode::Level, cfg.clone());
         assert_eq!(pwm.counter(), 0);
         Timer::after_millis(5).await;
         assert_eq!(pwm.counter(), 0);
@@ -110,7 +110,13 @@ async fn main(_spawner: Spawner) {
     // Test rising-gated
     {
         let mut pin2 = Output::new(&mut p11, Level::Low);
-        let pwm = Pwm::new_input(&mut p.PWM_SLICE3, &mut p7, InputMode::RisingEdge, cfg.clone());
+        let pwm = Pwm::new_input(
+            &mut p.PWM_SLICE3,
+            &mut p7,
+            Pull::None,
+            InputMode::RisingEdge,
+            cfg.clone(),
+        );
         assert_eq!(pwm.counter(), 0);
         Timer::after_millis(5).await;
         assert_eq!(pwm.counter(), 0);
@@ -125,7 +131,13 @@ async fn main(_spawner: Spawner) {
     // Test falling-gated
     {
         let mut pin2 = Output::new(&mut p11, Level::High);
-        let pwm = Pwm::new_input(&mut p.PWM_SLICE3, &mut p7, InputMode::FallingEdge, cfg.clone());
+        let pwm = Pwm::new_input(
+            &mut p.PWM_SLICE3,
+            &mut p7,
+            Pull::None,
+            InputMode::FallingEdge,
+            cfg.clone(),
+        );
         assert_eq!(pwm.counter(), 0);
         Timer::after_millis(5).await;
         assert_eq!(pwm.counter(), 0);
@@ -137,6 +149,34 @@ async fn main(_spawner: Spawner) {
         assert_eq!(pwm.counter(), 1);
     }
 
+    // pull-down
+    {
+        let pin2 = Input::new(&mut p11, Pull::None);
+        Pwm::new_input(
+            &mut p.PWM_SLICE3,
+            &mut p7,
+            Pull::Down,
+            InputMode::FallingEdge,
+            cfg.clone(),
+        );
+        Timer::after_millis(1).await;
+        assert!(pin2.is_low());
+    }
+
+    // pull-up
+    {
+        let pin2 = Input::new(&mut p11, Pull::None);
+        Pwm::new_input(
+            &mut p.PWM_SLICE3,
+            &mut p7,
+            Pull::Up,
+            InputMode::FallingEdge,
+            cfg.clone(),
+        );
+        Timer::after_millis(1).await;
+        assert!(pin2.is_high());
+    }
+
     info!("Test OK");
     cortex_m::asm::bkpt();
 }
diff --git a/tests/stm32/Cargo.toml b/tests/stm32/Cargo.toml
index e42470004..e09083111 100644
--- a/tests/stm32/Cargo.toml
+++ b/tests/stm32/Cargo.toml
@@ -14,8 +14,8 @@ stm32f429zi = ["embassy-stm32/stm32f429zi", "chrono", "eth", "stop", "can", "not
 stm32f446re = ["embassy-stm32/stm32f446re", "chrono", "stop", "can", "not-gpdma", "dac", "sdmmc"]
 stm32f767zi = ["embassy-stm32/stm32f767zi", "chrono", "not-gpdma", "eth", "rng"]
 stm32g071rb = ["embassy-stm32/stm32g071rb", "cm0", "not-gpdma", "dac", "ucpd"]
-stm32g491re = ["embassy-stm32/stm32g491re", "chrono", "stop", "not-gpdma", "rng", "fdcan"]
-stm32h563zi = ["embassy-stm32/stm32h563zi", "chrono", "eth", "rng", "hash"]
+stm32g491re = ["embassy-stm32/stm32g491re", "chrono", "stop", "not-gpdma", "rng", "fdcan", "cordic"]
+stm32h563zi = ["embassy-stm32/stm32h563zi", "chrono", "eth", "rng", "hash", "cordic"]
 stm32h753zi = ["embassy-stm32/stm32h753zi", "chrono", "not-gpdma", "eth", "rng", "fdcan", "hash", "cryp"]
 stm32h755zi = ["embassy-stm32/stm32h755zi-cm7", "chrono", "not-gpdma", "eth", "dac", "rng", "fdcan", "hash", "cryp"]
 stm32h7a3zi = ["embassy-stm32/stm32h7a3zi", "not-gpdma", "rng", "fdcan"]
@@ -25,8 +25,8 @@ stm32l496zg = ["embassy-stm32/stm32l496zg", "not-gpdma", "rng"]
 stm32l4a6zg = ["embassy-stm32/stm32l4a6zg", "chrono", "not-gpdma", "rng", "hash"]
 stm32l4r5zi = ["embassy-stm32/stm32l4r5zi", "chrono", "not-gpdma", "rng"]
 stm32l552ze = ["embassy-stm32/stm32l552ze", "not-gpdma", "rng", "hash"]
-stm32u585ai = ["embassy-stm32/stm32u585ai", "chrono", "rng", "hash"]
-stm32u5a5zj = ["embassy-stm32/stm32u5a5zj", "chrono", "rng", "hash"]
+stm32u585ai = ["embassy-stm32/stm32u585ai", "chrono", "rng", "hash", "cordic"]
+stm32u5a5zj = ["embassy-stm32/stm32u5a5zj", "chrono", "rng", "hash"] # FIXME: cordic test cause it crash
 stm32wb55rg = ["embassy-stm32/stm32wb55rg", "chrono", "not-gpdma", "ble", "mac" , "rng"]
 stm32wba52cg = ["embassy-stm32/stm32wba52cg", "chrono", "rng", "hash"]
 stm32wl55jc = ["embassy-stm32/stm32wl55jc-cm4", "not-gpdma", "rng", "chrono"]
@@ -48,6 +48,7 @@ embassy-stm32-wpan = []
 not-gpdma = []
 dac = []
 ucpd = []
+cordic = ["dep:num-traits"]
 
 cm0 = ["portable-atomic/unsafe-assume-single-core"]
 
@@ -83,6 +84,7 @@ chrono = { version = "^0.4", default-features = false, optional = true}
 sha2 = { version = "0.10.8", default-features = false }
 hmac = "0.12.1"
 aes-gcm = {version = "0.10.3", default-features = false, features = ["aes", "heapless"] }
+num-traits = {version="0.2", default-features = false,features = ["libm"], optional = true}
 
 # BEGIN TESTS
 # Generated by gen_test.py. DO NOT EDIT.
@@ -91,6 +93,11 @@ name = "can"
 path = "src/bin/can.rs"
 required-features = [ "can",]
 
+[[bin]]
+name = "cordic"
+path = "src/bin/cordic.rs"
+required-features = [ "rng", "cordic",]
+
 [[bin]]
 name = "cryp"
 path = "src/bin/cryp.rs"
diff --git a/tests/stm32/gen_test.py b/tests/stm32/gen_test.py
index 8ff156c0e..daf714376 100644
--- a/tests/stm32/gen_test.py
+++ b/tests/stm32/gen_test.py
@@ -14,7 +14,7 @@ for f in sorted(glob('./src/bin/*.rs')):
     with open(f, 'r') as f:
         for line in f:
             if line.startswith('// required-features:'):
-                features = line.split(':', 2)[1].strip().split(',')
+                features = [feature.strip() for feature in line.split(':', 2)[1].strip().split(',')]
 
     tests[name] = features
 
diff --git a/tests/stm32/src/bin/cordic.rs b/tests/stm32/src/bin/cordic.rs
new file mode 100644
index 000000000..400e10207
--- /dev/null
+++ b/tests/stm32/src/bin/cordic.rs
@@ -0,0 +1,135 @@
+// required-features: rng, cordic
+
+// Test Cordic driver, with Q1.31 format, Sin function, at 24 iterations (aka PRECISION = 6), using DMA transfer
+
+#![no_std]
+#![no_main]
+
+#[path = "../common.rs"]
+mod common;
+use common::*;
+use embassy_executor::Spawner;
+use embassy_stm32::cordic::utils;
+use embassy_stm32::{bind_interrupts, cordic, peripherals, rng};
+use num_traits::Float;
+use {defmt_rtt as _, panic_probe as _};
+
+bind_interrupts!(struct Irqs {
+   RNG => rng::InterruptHandler<peripherals::RNG>;
+});
+
+/* input value control, can be changed */
+
+const INPUT_U32_COUNT: usize = 9;
+const INPUT_U8_COUNT: usize = 4 * INPUT_U32_COUNT;
+
+// Assume first calculation needs 2 arguments, the reset needs 1 argument.
+// And all calculation generate 2 results.
+const OUTPUT_LENGTH: usize = (INPUT_U32_COUNT - 1) * 2;
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let dp = embassy_stm32::init(config());
+
+    //
+    // use RNG generate random Q1.31 value
+    //
+    // we don't generate floating-point value, since not all binary value are valid floating-point value,
+    // and Q1.31 only accept a fixed range of value.
+
+    let mut rng = rng::Rng::new(dp.RNG, Irqs);
+
+    let mut input_buf_u8 = [0u8; INPUT_U8_COUNT];
+    defmt::unwrap!(rng.async_fill_bytes(&mut input_buf_u8).await);
+
+    // convert every [u8; 4] to a u32, for a Q1.31 value
+    let mut input_q1_31 = unsafe { core::mem::transmute::<[u8; INPUT_U8_COUNT], [u32; INPUT_U32_COUNT]>(input_buf_u8) };
+
+    // ARG2 for Sin function should be inside [0, 1], set MSB to 0 of a Q1.31 value, will make sure it's no less than 0.
+    input_q1_31[1] &= !(1u32 << 31);
+
+    //
+    // CORDIC calculation
+    //
+
+    let mut output_q1_31 = [0u32; OUTPUT_LENGTH];
+
+    // setup Cordic driver
+    let mut cordic = cordic::Cordic::new(
+        dp.CORDIC,
+        defmt::unwrap!(cordic::Config::new(
+            cordic::Function::Sin,
+            Default::default(),
+            Default::default(),
+        )),
+    );
+
+    #[cfg(feature = "stm32g491re")]
+    let (mut write_dma, mut read_dma) = (dp.DMA1_CH4, dp.DMA1_CH5);
+
+    #[cfg(any(feature = "stm32h563zi", feature = "stm32u585ai", feature = "stm32u5a5zj"))]
+    let (mut write_dma, mut read_dma) = (dp.GPDMA1_CH0, dp.GPDMA1_CH1);
+
+    // calculate first result using blocking mode
+    let cnt0 = defmt::unwrap!(cordic.blocking_calc_32bit(&input_q1_31[..2], &mut output_q1_31, false, false));
+
+    // calculate rest results using async mode
+    let cnt1 = defmt::unwrap!(
+        cordic
+            .async_calc_32bit(
+                &mut write_dma,
+                &mut read_dma,
+                &input_q1_31[2..],
+                &mut output_q1_31[cnt0..],
+                true,
+                false,
+            )
+            .await
+    );
+
+    // all output value length should be the same as our output buffer size
+    defmt::assert_eq!(cnt0 + cnt1, output_q1_31.len());
+
+    let mut cordic_result_f64 = [0.0f64; OUTPUT_LENGTH];
+
+    for (f64_val, u32_val) in cordic_result_f64.iter_mut().zip(output_q1_31) {
+        *f64_val = utils::q1_31_to_f64(u32_val);
+    }
+
+    //
+    // software calculation
+    //
+
+    let mut software_result_f64 = [0.0f64; OUTPUT_LENGTH];
+
+    let arg2 = utils::q1_31_to_f64(input_q1_31[1]);
+
+    for (&arg1, res) in input_q1_31
+        .iter()
+        .enumerate()
+        .filter_map(|(idx, val)| if idx != 1 { Some(val) } else { None })
+        .zip(software_result_f64.chunks_mut(2))
+    {
+        let arg1 = utils::q1_31_to_f64(arg1);
+
+        let (raw_res1, raw_res2) = (arg1 * core::f64::consts::PI).sin_cos();
+        (res[0], res[1]) = (raw_res1 * arg2, raw_res2 * arg2);
+    }
+
+    //
+    // check result are the same
+    //
+
+    for (cordic_res, software_res) in cordic_result_f64[..cnt0 + cnt1]
+        .chunks(2)
+        .zip(software_result_f64.chunks(2))
+    {
+        for (cord_res, soft_res) in cordic_res.iter().zip(software_res.iter()) {
+            // 2.0.powi(-19) is the max residual error for Sin function, in q1.31 format, with 24 iterations (aka PRECISION = 6)
+            defmt::assert!((cord_res - soft_res).abs() <= 2.0.powi(-19));
+        }
+    }
+
+    info!("Test OK");
+    cortex_m::asm::bkpt();
+}