diff --git a/.vscode/settings.json b/.vscode/settings.json
index 46d26562c..0c195a13b 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,5 +1,4 @@
 {
-  "editor.formatOnSave": true,
   "[toml]": {
     "editor.formatOnSave": false
   },
diff --git a/ci.sh b/ci.sh
index cd82af2f1..d17f4e13e 100755
--- a/ci.sh
+++ b/ci.sh
@@ -124,6 +124,7 @@ cargo batch \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features stm32h755zi-cm7,defmt,exti,time-driver-any,time \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features stm32h725re,defmt,exti,time-driver-any,time \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features stm32h7b3ai,defmt,exti,time-driver-any,time \
+    --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features stm32h7b3ai,defmt,exti,time-driver-tim1,time \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features stm32l431cb,defmt,exti,time-driver-any,time \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features stm32l476vg,defmt,exti,time-driver-any,time \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features stm32l422cb,defmt,exti,time-driver-any,time \
diff --git a/cyw43/src/fmt.rs b/cyw43/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/cyw43/src/fmt.rs
+++ b/cyw43/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-boot-nrf/src/fmt.rs b/embassy-boot-nrf/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-boot-nrf/src/fmt.rs
+++ b/embassy-boot-nrf/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-boot-rp/src/fmt.rs b/embassy-boot-rp/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-boot-rp/src/fmt.rs
+++ b/embassy-boot-rp/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-boot-stm32/src/fmt.rs b/embassy-boot-stm32/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-boot-stm32/src/fmt.rs
+++ b/embassy-boot-stm32/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-boot/src/boot_loader.rs b/embassy-boot/src/boot_loader.rs
index ca1a1b10c..a38558056 100644
--- a/embassy-boot/src/boot_loader.rs
+++ b/embassy-boot/src/boot_loader.rs
@@ -183,29 +183,29 @@ impl<ACTIVE: NorFlash, DFU: NorFlash, STATE: NorFlash> BootLoader<ACTIVE, DFU, S
     /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
     /// |-----------|------------|--------|--------|--------|--------|
     /// |    Active |          0 |      1 |      2 |      3 |      - |
-    /// |       DFU |          0 |      3 |      2 |      1 |      X |
+    /// |       DFU |          0 |      4 |      5 |      6 |      X |
     ///
     /// The algorithm starts by copying 'backwards', and after the first step, the layout is
     /// as follows:
     ///
     /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
     /// |-----------|------------|--------|--------|--------|--------|
-    /// |    Active |          1 |      1 |      2 |      1 |      - |
-    /// |       DFU |          1 |      3 |      2 |      1 |      3 |
+    /// |    Active |          1 |      1 |      2 |      6 |      - |
+    /// |       DFU |          1 |      4 |      5 |      6 |      3 |
     ///
     /// The next iteration performs the same steps
     ///
     /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
     /// |-----------|------------|--------|--------|--------|--------|
-    /// |    Active |          2 |      1 |      2 |      1 |      - |
-    /// |       DFU |          2 |      3 |      2 |      2 |      3 |
+    /// |    Active |          2 |      1 |      5 |      6 |      - |
+    /// |       DFU |          2 |      4 |      5 |      2 |      3 |
     ///
     /// And again until we're done
     ///
     /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
     /// |-----------|------------|--------|--------|--------|--------|
-    /// |    Active |          3 |      3 |      2 |      1 |      - |
-    /// |       DFU |          3 |      3 |      1 |      2 |      3 |
+    /// |    Active |          3 |      4 |      5 |      6 |      - |
+    /// |       DFU |          3 |      4 |      1 |      2 |      3 |
     ///
     /// ## REVERTING
     ///
@@ -220,19 +220,19 @@ impl<ACTIVE: NorFlash, DFU: NorFlash, STATE: NorFlash> BootLoader<ACTIVE, DFU, S
     ///
     /// | Partition | Revert Index | Page 0 | Page 1 | Page 3 | Page 4 |
     /// |-----------|--------------|--------|--------|--------|--------|
-    /// |    Active |            3 |      1 |      2 |      1 |      - |
-    /// |       DFU |            3 |      3 |      1 |      2 |      3 |
+    /// |    Active |            3 |      1 |      5 |      6 |      - |
+    /// |       DFU |            3 |      4 |      1 |      2 |      3 |
     ///
     ///
     /// | Partition | Revert Index | Page 0 | Page 1 | Page 3 | Page 4 |
     /// |-----------|--------------|--------|--------|--------|--------|
-    /// |    Active |            3 |      1 |      2 |      1 |      - |
-    /// |       DFU |            3 |      3 |      2 |      2 |      3 |
+    /// |    Active |            3 |      1 |      2 |      6 |      - |
+    /// |       DFU |            3 |      4 |      5 |      2 |      3 |
     ///
     /// | Partition | Revert Index | Page 0 | Page 1 | Page 3 | Page 4 |
     /// |-----------|--------------|--------|--------|--------|--------|
     /// |    Active |            3 |      1 |      2 |      3 |      - |
-    /// |       DFU |            3 |      3 |      2 |      1 |      3 |
+    /// |       DFU |            3 |      4 |      5 |      6 |      3 |
     ///
     pub fn prepare_boot(&mut self, aligned_buf: &mut [u8]) -> Result<State, BootError> {
         // Ensure we have enough progress pages to store copy progress
diff --git a/embassy-boot/src/fmt.rs b/embassy-boot/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-boot/src/fmt.rs
+++ b/embassy-boot/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-embedded-hal/src/adapter/blocking_async.rs b/embassy-embedded-hal/src/adapter/blocking_async.rs
index ae0d0a7f9..bafc31583 100644
--- a/embassy-embedded-hal/src/adapter/blocking_async.rs
+++ b/embassy-embedded-hal/src/adapter/blocking_async.rs
@@ -104,8 +104,10 @@ where
 }
 
 /// NOR flash wrapper
-use embedded_storage::nor_flash::{ErrorType, NorFlash, ReadNorFlash};
-use embedded_storage_async::nor_flash::{NorFlash as AsyncNorFlash, ReadNorFlash as AsyncReadNorFlash};
+use embedded_storage::nor_flash::{ErrorType, MultiwriteNorFlash, NorFlash, ReadNorFlash};
+use embedded_storage_async::nor_flash::{
+    MultiwriteNorFlash as AsyncMultiwriteNorFlash, NorFlash as AsyncNorFlash, ReadNorFlash as AsyncReadNorFlash,
+};
 
 impl<T> ErrorType for BlockingAsync<T>
 where
@@ -143,3 +145,5 @@ where
         self.wrapped.capacity()
     }
 }
+
+impl<T> AsyncMultiwriteNorFlash for BlockingAsync<T> where T: MultiwriteNorFlash {}
diff --git a/embassy-embedded-hal/src/shared_bus/asynch/i2c.rs b/embassy-embedded-hal/src/shared_bus/asynch/i2c.rs
index 779c04263..71ce09def 100644
--- a/embassy-embedded-hal/src/shared_bus/asynch/i2c.rs
+++ b/embassy-embedded-hal/src/shared_bus/asynch/i2c.rs
@@ -106,6 +106,11 @@ impl<'a, M: RawMutex, BUS: SetConfig> I2cDeviceWithConfig<'a, M, BUS> {
     pub fn new(bus: &'a Mutex<M, BUS>, config: BUS::Config) -> Self {
         Self { bus, config }
     }
+
+    /// Change the device's config at runtime
+    pub fn set_config(&mut self, config: BUS::Config) {
+        self.config = config;
+    }
 }
 
 impl<'a, M, BUS> i2c::ErrorType for I2cDeviceWithConfig<'a, M, BUS>
diff --git a/embassy-embedded-hal/src/shared_bus/asynch/spi.rs b/embassy-embedded-hal/src/shared_bus/asynch/spi.rs
index 62b2c92a0..9890f218d 100644
--- a/embassy-embedded-hal/src/shared_bus/asynch/spi.rs
+++ b/embassy-embedded-hal/src/shared_bus/asynch/spi.rs
@@ -122,6 +122,11 @@ impl<'a, M: RawMutex, BUS: SetConfig, CS> SpiDeviceWithConfig<'a, M, BUS, CS> {
     pub fn new(bus: &'a Mutex<M, BUS>, cs: CS, config: BUS::Config) -> Self {
         Self { bus, cs, config }
     }
+
+    /// Change the device's config at runtime
+    pub fn set_config(&mut self, config: BUS::Config) {
+        self.config = config;
+    }
 }
 
 impl<'a, M, BUS, CS> spi::ErrorType for SpiDeviceWithConfig<'a, M, BUS, CS>
diff --git a/embassy-embedded-hal/src/shared_bus/blocking/i2c.rs b/embassy-embedded-hal/src/shared_bus/blocking/i2c.rs
index 233c9e1fd..627767c8a 100644
--- a/embassy-embedded-hal/src/shared_bus/blocking/i2c.rs
+++ b/embassy-embedded-hal/src/shared_bus/blocking/i2c.rs
@@ -67,9 +67,11 @@ where
     }
 
     fn transaction<'a>(&mut self, address: u8, operations: &mut [Operation<'a>]) -> Result<(), Self::Error> {
-        let _ = address;
-        let _ = operations;
-        todo!()
+        self.bus.lock(|bus| {
+            bus.borrow_mut()
+                .transaction(address, operations)
+                .map_err(I2cDeviceError::I2c)
+        })
     }
 }
 
@@ -130,6 +132,11 @@ impl<'a, M: RawMutex, BUS: SetConfig> I2cDeviceWithConfig<'a, M, BUS> {
     pub fn new(bus: &'a Mutex<M, RefCell<BUS>>, config: BUS::Config) -> Self {
         Self { bus, config }
     }
+
+    /// Change the device's config at runtime
+    pub fn set_config(&mut self, config: BUS::Config) {
+        self.config = config;
+    }
 }
 
 impl<'a, M, BUS> ErrorType for I2cDeviceWithConfig<'a, M, BUS>
@@ -171,8 +178,10 @@ where
     }
 
     fn transaction<'a>(&mut self, address: u8, operations: &mut [Operation<'a>]) -> Result<(), Self::Error> {
-        let _ = address;
-        let _ = operations;
-        todo!()
+        self.bus.lock(|bus| {
+            let mut bus = bus.borrow_mut();
+            bus.set_config(&self.config).map_err(|_| I2cDeviceError::Config)?;
+            bus.transaction(address, operations).map_err(I2cDeviceError::I2c)
+        })
     }
 }
diff --git a/embassy-embedded-hal/src/shared_bus/blocking/spi.rs b/embassy-embedded-hal/src/shared_bus/blocking/spi.rs
index 59b65bfbd..801899f9f 100644
--- a/embassy-embedded-hal/src/shared_bus/blocking/spi.rs
+++ b/embassy-embedded-hal/src/shared_bus/blocking/spi.rs
@@ -147,6 +147,11 @@ impl<'a, M: RawMutex, BUS: SetConfig, CS> SpiDeviceWithConfig<'a, M, BUS, CS> {
     pub fn new(bus: &'a Mutex<M, RefCell<BUS>>, cs: CS, config: BUS::Config) -> Self {
         Self { bus, cs, config }
     }
+
+    /// Change the device's config at runtime
+    pub fn set_config(&mut self, config: BUS::Config) {
+        self.config = config;
+    }
 }
 
 impl<'a, M, BUS, CS> spi::ErrorType for SpiDeviceWithConfig<'a, M, BUS, CS>
diff --git a/embassy-executor-macros/src/macros/task.rs b/embassy-executor-macros/src/macros/task.rs
index 1efb2788b..96c6267b2 100644
--- a/embassy-executor-macros/src/macros/task.rs
+++ b/embassy-executor-macros/src/macros/task.rs
@@ -93,10 +93,21 @@ pub fn run(args: &[NestedMeta], f: syn::ItemFn) -> Result<TokenStream, TokenStre
     #[cfg(feature = "nightly")]
     let mut task_outer: ItemFn = parse_quote! {
         #visibility fn #task_ident(#fargs) -> ::embassy_executor::SpawnToken<impl Sized> {
-            type Fut = impl ::core::future::Future + 'static;
+            trait _EmbassyInternalTaskTrait {
+                type Fut: ::core::future::Future + 'static;
+                fn construct(#fargs) -> Self::Fut;
+            }
+
+            impl _EmbassyInternalTaskTrait for () {
+                type Fut = impl core::future::Future + 'static;
+                fn construct(#fargs) -> Self::Fut {
+                    #task_inner_ident(#(#full_args,)*)
+                }
+            }
+
             const POOL_SIZE: usize = #pool_size;
-            static POOL: ::embassy_executor::raw::TaskPool<Fut, POOL_SIZE> = ::embassy_executor::raw::TaskPool::new();
-            unsafe { POOL._spawn_async_fn(move || #task_inner_ident(#(#full_args,)*)) }
+            static POOL: ::embassy_executor::raw::TaskPool<<() as _EmbassyInternalTaskTrait>::Fut, POOL_SIZE> = ::embassy_executor::raw::TaskPool::new();
+            unsafe { POOL._spawn_async_fn(move || <() as _EmbassyInternalTaskTrait>::construct(#(#full_args,)*)) }
         }
     };
     #[cfg(not(feature = "nightly"))]
diff --git a/embassy-executor-macros/src/util/ctxt.rs b/embassy-executor-macros/src/util/ctxt.rs
index 74c872c3c..9c78cda01 100644
--- a/embassy-executor-macros/src/util/ctxt.rs
+++ b/embassy-executor-macros/src/util/ctxt.rs
@@ -7,7 +7,6 @@ use std::thread;
 
 use proc_macro2::TokenStream;
 use quote::{quote, ToTokens};
-use syn;
 
 /// A type to collect errors together and format them.
 ///
diff --git a/embassy-executor/src/fmt.rs b/embassy-executor/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-executor/src/fmt.rs
+++ b/embassy-executor/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-executor/src/raw/mod.rs b/embassy-executor/src/raw/mod.rs
index 3d5e3ab9f..d9ea5c005 100644
--- a/embassy-executor/src/raw/mod.rs
+++ b/embassy-executor/src/raw/mod.rs
@@ -30,7 +30,7 @@ use core::ptr::NonNull;
 use core::task::{Context, Poll};
 
 #[cfg(feature = "integrated-timers")]
-use embassy_time_driver::{self, AlarmHandle};
+use embassy_time_driver::AlarmHandle;
 #[cfg(feature = "rtos-trace")]
 use rtos_trace::trace;
 
diff --git a/embassy-executor/tests/test.rs b/embassy-executor/tests/test.rs
index 2c2441dd5..348cc7dc4 100644
--- a/embassy-executor/tests/test.rs
+++ b/embassy-executor/tests/test.rs
@@ -1,4 +1,4 @@
-#![cfg_attr(feature = "nightly", feature(type_alias_impl_trait))]
+#![cfg_attr(feature = "nightly", feature(impl_trait_in_assoc_type))]
 
 use std::boxed::Box;
 use std::future::poll_fn;
diff --git a/embassy-futures/src/fmt.rs b/embassy-futures/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-futures/src/fmt.rs
+++ b/embassy-futures/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-hal-internal/src/fmt.rs b/embassy-hal-internal/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-hal-internal/src/fmt.rs
+++ b/embassy-hal-internal/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-hal-internal/src/interrupt.rs b/embassy-hal-internal/src/interrupt.rs
index 19dabcf6f..5e64dce9d 100644
--- a/embassy-hal-internal/src/interrupt.rs
+++ b/embassy-hal-internal/src/interrupt.rs
@@ -30,14 +30,12 @@ macro_rules! interrupt_mod {
             pub mod typelevel {
                 use super::InterruptExt;
 
-                mod sealed {
-                    pub trait Interrupt {}
-                }
+                trait SealedInterrupt {}
 
                 /// Type-level interrupt.
                 ///
                 /// This trait is implemented for all typelevel interrupt types in this module.
-                pub trait Interrupt: sealed::Interrupt {
+                pub trait Interrupt: SealedInterrupt {
 
                     /// Interrupt enum variant.
                     ///
@@ -105,7 +103,7 @@ macro_rules! interrupt_mod {
                     #[doc=stringify!($irqs)]
                     #[doc=" typelevel interrupt."]
                     pub enum $irqs {}
-                    impl sealed::Interrupt for $irqs{}
+                    impl SealedInterrupt for $irqs{}
                     impl Interrupt for $irqs {
                         const IRQ: super::Interrupt = super::Interrupt::$irqs;
                     }
diff --git a/embassy-net-adin1110/src/fmt.rs b/embassy-net-adin1110/src/fmt.rs
index 12737c690..2ac42c557 100644
--- a/embassy-net-adin1110/src/fmt.rs
+++ b/embassy-net-adin1110/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -83,14 +83,17 @@ macro_rules! todo {
     };
 }
 
+#[cfg(not(feature = "defmt"))]
 macro_rules! unreachable {
     ($($x:tt)*) => {
-        {
-            #[cfg(not(feature = "defmt"))]
-            ::core::unreachable!($($x)*);
-            #[cfg(feature = "defmt")]
-            ::defmt::unreachable!($($x)*);
-        }
+        ::core::unreachable!($($x)*)
+    };
+}
+
+#[cfg(feature = "defmt")]
+macro_rules! unreachable {
+    ($($x:tt)*) => {
+        ::defmt::unreachable!($($x)*)
     };
 }
 
@@ -113,7 +116,7 @@ macro_rules! trace {
             #[cfg(feature = "defmt")]
             ::defmt::trace!($s $(, $x)*);
             #[cfg(not(any(feature = "log", feature="defmt")))]
-            let _ignored = ($( & $x ),*);
+            let _ = ($( & $x ),*);
         }
     };
 }
@@ -126,7 +129,7 @@ macro_rules! debug {
             #[cfg(feature = "defmt")]
             ::defmt::debug!($s $(, $x)*);
             #[cfg(not(any(feature = "log", feature="defmt")))]
-            let _ignored = ($( & $x ),*);
+            let _ = ($( & $x ),*);
         }
     };
 }
@@ -139,7 +142,7 @@ macro_rules! info {
             #[cfg(feature = "defmt")]
             ::defmt::info!($s $(, $x)*);
             #[cfg(not(any(feature = "log", feature="defmt")))]
-            let _ignored = ($( & $x ),*);
+            let _ = ($( & $x ),*);
         }
     };
 }
@@ -152,7 +155,7 @@ macro_rules! warn {
             #[cfg(feature = "defmt")]
             ::defmt::warn!($s $(, $x)*);
             #[cfg(not(any(feature = "log", feature="defmt")))]
-            let _ignored = ($( & $x ),*);
+            let _ = ($( & $x ),*);
         }
     };
 }
@@ -165,7 +168,7 @@ macro_rules! error {
             #[cfg(feature = "defmt")]
             ::defmt::error!($s $(, $x)*);
             #[cfg(not(any(feature = "log", feature="defmt")))]
-            let _ignored = ($( & $x ),*);
+            let _ = ($( & $x ),*);
         }
     };
 }
@@ -226,7 +229,7 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-pub struct Bytes<'a>(pub &'a [u8]);
+pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
diff --git a/embassy-net-driver-channel/src/fmt.rs b/embassy-net-driver-channel/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-net-driver-channel/src/fmt.rs
+++ b/embassy-net-driver-channel/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-net-enc28j60/src/fmt.rs b/embassy-net-enc28j60/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-net-enc28j60/src/fmt.rs
+++ b/embassy-net-enc28j60/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-net-enc28j60/src/lib.rs b/embassy-net-enc28j60/src/lib.rs
index f18134927..dda35f498 100644
--- a/embassy-net-enc28j60/src/lib.rs
+++ b/embassy-net-enc28j60/src/lib.rs
@@ -17,7 +17,6 @@ mod phy;
 mod traits;
 
 use core::cmp;
-use core::convert::TryInto;
 
 use embassy_net_driver::{Capabilities, HardwareAddress, LinkState};
 use embassy_time::Duration;
@@ -645,8 +644,8 @@ where
         Self: 'a;
 
     fn receive(&mut self, cx: &mut core::task::Context) -> Option<(Self::RxToken<'_>, Self::TxToken<'_>)> {
-        let rx_buf = unsafe { &mut RX_BUF };
-        let tx_buf = unsafe { &mut TX_BUF };
+        let rx_buf = unsafe { &mut *core::ptr::addr_of_mut!(RX_BUF) };
+        let tx_buf = unsafe { &mut *core::ptr::addr_of_mut!(TX_BUF) };
         if let Some(n) = self.receive(rx_buf) {
             Some((RxToken { buf: &mut rx_buf[..n] }, TxToken { buf: tx_buf, eth: self }))
         } else {
@@ -656,7 +655,7 @@ where
     }
 
     fn transmit(&mut self, _cx: &mut core::task::Context) -> Option<Self::TxToken<'_>> {
-        let tx_buf = unsafe { &mut TX_BUF };
+        let tx_buf = unsafe { &mut *core::ptr::addr_of_mut!(TX_BUF) };
         Some(TxToken { buf: tx_buf, eth: self })
     }
 
diff --git a/embassy-net-esp-hosted/src/fmt.rs b/embassy-net-esp-hosted/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-net-esp-hosted/src/fmt.rs
+++ b/embassy-net-esp-hosted/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-net-ppp/src/fmt.rs b/embassy-net-ppp/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-net-ppp/src/fmt.rs
+++ b/embassy-net-ppp/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-net-tuntap/src/lib.rs b/embassy-net-tuntap/src/lib.rs
index de30934eb..56f55fba1 100644
--- a/embassy-net-tuntap/src/lib.rs
+++ b/embassy-net-tuntap/src/lib.rs
@@ -6,7 +6,7 @@ use std::os::unix::io::{AsRawFd, RawFd};
 use std::task::Context;
 
 use async_io::Async;
-use embassy_net_driver::{self, Capabilities, Driver, HardwareAddress, LinkState};
+use embassy_net_driver::{Capabilities, Driver, HardwareAddress, LinkState};
 use log::*;
 
 /// Get the MTU of the given interface.
diff --git a/embassy-net-wiznet/src/chip/mod.rs b/embassy-net-wiznet/src/chip/mod.rs
index b987c2b36..e1f963d95 100644
--- a/embassy-net-wiznet/src/chip/mod.rs
+++ b/embassy-net-wiznet/src/chip/mod.rs
@@ -2,49 +2,40 @@
 mod w5500;
 pub use w5500::W5500;
 mod w5100s;
+use embedded_hal_async::spi::SpiDevice;
 pub use w5100s::W5100S;
 
-pub(crate) mod sealed {
-    use embedded_hal_async::spi::SpiDevice;
+pub(crate) trait SealedChip {
+    type Address;
 
-    pub trait Chip {
-        type Address;
+    const COMMON_MODE: Self::Address;
+    const COMMON_MAC: Self::Address;
+    const COMMON_SOCKET_INTR: Self::Address;
+    const COMMON_PHY_CFG: Self::Address;
+    const SOCKET_MODE: Self::Address;
+    const SOCKET_COMMAND: Self::Address;
+    const SOCKET_RXBUF_SIZE: Self::Address;
+    const SOCKET_TXBUF_SIZE: Self::Address;
+    const SOCKET_TX_FREE_SIZE: Self::Address;
+    const SOCKET_TX_DATA_WRITE_PTR: Self::Address;
+    const SOCKET_RECVD_SIZE: Self::Address;
+    const SOCKET_RX_DATA_READ_PTR: Self::Address;
+    const SOCKET_INTR_MASK: Self::Address;
+    const SOCKET_INTR: Self::Address;
 
-        const COMMON_MODE: Self::Address;
-        const COMMON_MAC: Self::Address;
-        const COMMON_SOCKET_INTR: Self::Address;
-        const COMMON_PHY_CFG: Self::Address;
-        const SOCKET_MODE: Self::Address;
-        const SOCKET_COMMAND: Self::Address;
-        const SOCKET_RXBUF_SIZE: Self::Address;
-        const SOCKET_TXBUF_SIZE: Self::Address;
-        const SOCKET_TX_FREE_SIZE: Self::Address;
-        const SOCKET_TX_DATA_WRITE_PTR: Self::Address;
-        const SOCKET_RECVD_SIZE: Self::Address;
-        const SOCKET_RX_DATA_READ_PTR: Self::Address;
-        const SOCKET_INTR_MASK: Self::Address;
-        const SOCKET_INTR: Self::Address;
+    const SOCKET_MODE_VALUE: u8;
 
-        const SOCKET_MODE_VALUE: u8;
+    const BUF_SIZE: u16;
+    const AUTO_WRAP: bool;
 
-        const BUF_SIZE: u16;
-        const AUTO_WRAP: bool;
+    fn rx_addr(addr: u16) -> Self::Address;
+    fn tx_addr(addr: u16) -> Self::Address;
 
-        fn rx_addr(addr: u16) -> Self::Address;
-        fn tx_addr(addr: u16) -> Self::Address;
-
-        async fn bus_read<SPI: SpiDevice>(
-            spi: &mut SPI,
-            address: Self::Address,
-            data: &mut [u8],
-        ) -> Result<(), SPI::Error>;
-        async fn bus_write<SPI: SpiDevice>(
-            spi: &mut SPI,
-            address: Self::Address,
-            data: &[u8],
-        ) -> Result<(), SPI::Error>;
-    }
+    async fn bus_read<SPI: SpiDevice>(spi: &mut SPI, address: Self::Address, data: &mut [u8])
+        -> Result<(), SPI::Error>;
+    async fn bus_write<SPI: SpiDevice>(spi: &mut SPI, address: Self::Address, data: &[u8]) -> Result<(), SPI::Error>;
 }
 
 /// Trait for Wiznet chips.
-pub trait Chip: sealed::Chip {}
+#[allow(private_bounds)]
+pub trait Chip: SealedChip {}
diff --git a/embassy-net-wiznet/src/chip/w5100s.rs b/embassy-net-wiznet/src/chip/w5100s.rs
index 7d328bce5..23ce3ed83 100644
--- a/embassy-net-wiznet/src/chip/w5100s.rs
+++ b/embassy-net-wiznet/src/chip/w5100s.rs
@@ -8,7 +8,7 @@ const RX_BASE: u16 = 0x6000;
 pub enum W5100S {}
 
 impl super::Chip for W5100S {}
-impl super::sealed::Chip for W5100S {
+impl super::SealedChip for W5100S {
     type Address = u16;
 
     const COMMON_MODE: Self::Address = 0x00;
diff --git a/embassy-net-wiznet/src/chip/w5500.rs b/embassy-net-wiznet/src/chip/w5500.rs
index 16236126d..12e610ea2 100644
--- a/embassy-net-wiznet/src/chip/w5500.rs
+++ b/embassy-net-wiznet/src/chip/w5500.rs
@@ -12,7 +12,7 @@ pub enum RegisterBlock {
 pub enum W5500 {}
 
 impl super::Chip for W5500 {}
-impl super::sealed::Chip for W5500 {
+impl super::SealedChip for W5500 {
     type Address = (RegisterBlock, u16);
 
     const COMMON_MODE: Self::Address = (RegisterBlock::Common, 0x00);
diff --git a/embassy-net/src/fmt.rs b/embassy-net/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-net/src/fmt.rs
+++ b/embassy-net/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-nrf/src/fmt.rs b/embassy-nrf/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-nrf/src/fmt.rs
+++ b/embassy-nrf/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-nrf/src/gpio.rs b/embassy-nrf/src/gpio.rs
index 3649ea61a..f2353f21d 100644
--- a/embassy-nrf/src/gpio.rs
+++ b/embassy-nrf/src/gpio.rs
@@ -473,10 +473,12 @@ impl sealed::Pin for AnyPin {
 
 // ====================
 
+#[cfg(not(feature = "_nrf51"))]
 pub(crate) trait PselBits {
     fn psel_bits(&self) -> u32;
 }
 
+#[cfg(not(feature = "_nrf51"))]
 impl<'a, P: Pin> PselBits for Option<PeripheralRef<'a, P>> {
     #[inline]
     fn psel_bits(&self) -> u32 {
diff --git a/embassy-nrf/src/gpiote.rs b/embassy-nrf/src/gpiote.rs
index 12f4ed0a0..4a28279a9 100644
--- a/embassy-nrf/src/gpiote.rs
+++ b/embassy-nrf/src/gpiote.rs
@@ -167,8 +167,10 @@ unsafe fn handle_gpiote_interrupt() {
     }
 }
 
+#[cfg(not(feature = "_nrf51"))]
 struct BitIter(u32);
 
+#[cfg(not(feature = "_nrf51"))]
 impl Iterator for BitIter {
     type Item = u32;
 
diff --git a/embassy-nrf/src/lib.rs b/embassy-nrf/src/lib.rs
index 718f229a3..3457dd933 100644
--- a/embassy-nrf/src/lib.rs
+++ b/embassy-nrf/src/lib.rs
@@ -225,10 +225,31 @@ pub mod config {
         /// Config for the first stage DCDC (VDDH -> VDD), if disabled LDO will be used.
         #[cfg(feature = "nrf52840")]
         pub reg0: bool,
+        /// Configure the voltage of the first stage DCDC. It is stored in non-volatile memory (UICR.REGOUT0 register); pass None to not touch it.
+        #[cfg(feature = "nrf52840")]
+        pub reg0_voltage: Option<Reg0Voltage>,
         /// Config for the second stage DCDC (VDD -> DEC4), if disabled LDO will be used.
         pub reg1: bool,
     }
 
+    ///  Output voltage setting for REG0 regulator stage.
+    #[cfg(feature = "nrf52840")]
+    pub enum Reg0Voltage {
+        /// 1.8 V
+        _1V8 = 0,
+        /// 2.1 V
+        _2V1 = 1,
+        /// 2.4 V
+        _2V4 = 2,
+        /// 2.7 V
+        _2V7 = 3,
+        /// 3.0 V
+        _3V0 = 4,
+        /// 3.3 V
+        _3v3 = 5,
+        //ERASED = 7, means 1.8V
+    }
+
     /// Settings for enabling the built in DCDC converters.
     #[cfg(feature = "_nrf5340-app")]
     pub struct DcdcConfig {
@@ -279,6 +300,8 @@ pub mod config {
                 dcdc: DcdcConfig {
                     #[cfg(feature = "nrf52840")]
                     reg0: false,
+                    #[cfg(feature = "nrf52840")]
+                    reg0_voltage: None,
                     reg1: false,
                 },
                 #[cfg(feature = "_nrf5340-app")]
@@ -337,6 +360,7 @@ mod consts {
     pub const UICR_PSELRESET2: *mut u32 = 0x10001204 as *mut u32;
     pub const UICR_NFCPINS: *mut u32 = 0x1000120C as *mut u32;
     pub const UICR_APPROTECT: *mut u32 = 0x10001208 as *mut u32;
+    pub const UICR_REGOUT0: *mut u32 = 0x10001304 as *mut u32;
     pub const APPROTECT_ENABLED: u32 = 0x0000_0000;
     pub const APPROTECT_DISABLED: u32 = 0x0000_005a;
 }
@@ -493,6 +517,21 @@ pub fn init(config: config::Config) -> Peripherals {
         }
     }
 
+    #[cfg(feature = "nrf52840")]
+    unsafe {
+        if let Some(value) = config.dcdc.reg0_voltage {
+            let value = value as u32;
+            let res = uicr_write_masked(consts::UICR_REGOUT0, value, 0b00000000_00000000_00000000_00000111);
+            needs_reset |= res == WriteResult::Written;
+            if res == WriteResult::Failed {
+                warn!(
+                    "Failed to set regulator voltage, as UICR is already programmed to some other setting, and can't be changed without erasing it.\n\
+                    To fix this, erase UICR manually, for example using `probe-rs erase` or `nrfjprog --eraseuicr`."
+                );
+            }
+        }
+    }
+
     if needs_reset {
         cortex_m::peripheral::SCB::sys_reset();
     }
diff --git a/embassy-nrf/src/timer.rs b/embassy-nrf/src/timer.rs
index 3c35baee5..2970ad3f2 100644
--- a/embassy-nrf/src/timer.rs
+++ b/embassy-nrf/src/timer.rs
@@ -21,8 +21,6 @@ pub(crate) mod sealed {
         fn regs() -> &'static pac::timer0::RegisterBlock;
     }
     pub trait ExtendedInstance {}
-
-    pub trait TimerType {}
 }
 
 /// Basic Timer instance.
diff --git a/embassy-rp/src/adc.rs b/embassy-rp/src/adc.rs
index 21360bf66..4c01fe195 100644
--- a/embassy-rp/src/adc.rs
+++ b/embassy-rp/src/adc.rs
@@ -19,14 +19,9 @@ static WAKER: AtomicWaker = AtomicWaker::new();
 
 /// ADC config.
 #[non_exhaustive]
+#[derive(Default)]
 pub struct Config {}
 
-impl Default for Config {
-    fn default() -> Self {
-        Self {}
-    }
-}
-
 enum Source<'p> {
     Pin(PeripheralRef<'p, AnyPin>),
     TempSensor(PeripheralRef<'p, ADC_TEMP_SENSOR>),
@@ -175,7 +170,7 @@ impl<'d, M: Mode> Adc<'d, M> {
         while !r.cs().read().ready() {}
         match r.cs().read().err() {
             true => Err(Error::ConversionFailed),
-            false => Ok(r.result().read().result().into()),
+            false => Ok(r.result().read().result()),
         }
     }
 }
@@ -221,7 +216,7 @@ impl<'d> Adc<'d, Async> {
         Self::wait_for_ready().await;
         match r.cs().read().err() {
             true => Err(Error::ConversionFailed),
-            false => Ok(r.result().read().result().into()),
+            false => Ok(r.result().read().result()),
         }
     }
 
diff --git a/embassy-rp/src/clocks.rs b/embassy-rp/src/clocks.rs
index 19232b801..b7f6aeac9 100644
--- a/embassy-rp/src/clocks.rs
+++ b/embassy-rp/src/clocks.rs
@@ -737,7 +737,7 @@ fn configure_pll(p: pac::pll::Pll, input_freq: u32, config: PllConfig) -> u32 {
     assert!(config.refdiv >= 1 && config.refdiv <= 63);
     assert!(ref_freq >= 5_000_000 && ref_freq <= 800_000_000);
     let vco_freq = ref_freq.saturating_mul(config.fbdiv as u32);
-    assert!(vco_freq >= 750_000_000 && vco_freq <= 1800_000_000);
+    assert!(vco_freq >= 750_000_000 && vco_freq <= 1_800_000_000);
 
     // Load VCO-related dividers before starting VCO
     p.cs().write(|w| w.set_refdiv(config.refdiv as _));
diff --git a/embassy-rp/src/dma.rs b/embassy-rp/src/dma.rs
index 088a842a1..44aabce6b 100644
--- a/embassy-rp/src/dma.rs
+++ b/embassy-rp/src/dma.rs
@@ -96,7 +96,7 @@ pub unsafe fn write_repeated<'a, C: Channel, W: Word>(
 ) -> Transfer<'a, C> {
     copy_inner(
         ch,
-        &mut DUMMY as *const u32,
+        core::ptr::addr_of_mut!(DUMMY) as *const u32,
         to as *mut u32,
         len,
         W::size(),
diff --git a/embassy-rp/src/flash.rs b/embassy-rp/src/flash.rs
index 2d673cf6c..422b77400 100644
--- a/embassy-rp/src/flash.rs
+++ b/embassy-rp/src/flash.rs
@@ -326,9 +326,9 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, Async, FLASH_SIZE> {
         // If the destination address is already aligned, then we can just DMA directly
         if (bytes.as_ptr() as u32) % 4 == 0 {
             // Safety: alignment and size have been checked for compatibility
-            let mut buf: &mut [u32] =
+            let buf: &mut [u32] =
                 unsafe { core::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u32, bytes.len() / 4) };
-            self.background_read(offset, &mut buf)?.await;
+            self.background_read(offset, buf)?.await;
             return Ok(());
         }
 
@@ -420,8 +420,6 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> embedded_storage_async::nor_flash
 
 #[allow(dead_code)]
 mod ram_helpers {
-    use core::marker::PhantomData;
-
     use super::*;
     use crate::rom_data;
 
diff --git a/embassy-rp/src/float/mod.rs b/embassy-rp/src/float/mod.rs
index 945afff90..3ad6f1c50 100644
--- a/embassy-rp/src/float/mod.rs
+++ b/embassy-rp/src/float/mod.rs
@@ -89,6 +89,7 @@ pub(crate) trait Float:
     }
 
     /// Returns true if `self` is infinity
+    #[allow(unused)]
     fn is_infinity(self) -> bool {
         (self.repr() & (Self::EXPONENT_MASK | Self::SIGNIFICAND_MASK)) == Self::EXPONENT_MASK
     }
diff --git a/embassy-rp/src/fmt.rs b/embassy-rp/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-rp/src/fmt.rs
+++ b/embassy-rp/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-rp/src/gpio.rs b/embassy-rp/src/gpio.rs
index 62eeb4cf6..a84c00a2c 100644
--- a/embassy-rp/src/gpio.rs
+++ b/embassy-rp/src/gpio.rs
@@ -225,8 +225,8 @@ fn irq_handler<const N: usize>(bank: pac::io::Io, wakers: &[AtomicWaker; N]) {
         // The status register is divided into groups of four, one group for
         // each pin. Each group consists of four trigger levels LEVEL_LOW,
         // LEVEL_HIGH, EDGE_LOW, and EDGE_HIGH for each pin.
-        let pin_group = (pin % 8) as usize;
-        let event = (intsx.read().0 >> pin_group * 4) & 0xf as u32;
+        let pin_group = pin % 8;
+        let event = (intsx.read().0 >> (pin_group * 4)) & 0xf;
 
         // no more than one event can be awaited per pin at any given time, so
         // we can just clear all interrupt enables for that pin without having
@@ -238,7 +238,7 @@ fn irq_handler<const N: usize>(bank: pac::io::Io, wakers: &[AtomicWaker; N]) {
                 w.set_level_high(pin_group, true);
                 w.set_level_low(pin_group, true);
             });
-            wakers[pin as usize].wake();
+            wakers[pin].wake();
         }
     }
 }
@@ -976,8 +976,6 @@ impl_pin!(PIN_QSPI_SD3, Bank::Qspi, 5);
 // ====================
 
 mod eh02 {
-    use core::convert::Infallible;
-
     use super::*;
 
     impl<'d> embedded_hal_02::digital::v2::InputPin for Input<'d> {
diff --git a/embassy-rp/src/i2c.rs b/embassy-rp/src/i2c.rs
index ac0eac96d..26a819b25 100644
--- a/embassy-rp/src/i2c.rs
+++ b/embassy-rp/src/i2c.rs
@@ -352,7 +352,7 @@ impl<T: Instance> interrupt::typelevel::Handler<T::Interrupt> for InterruptHandl
     }
 }
 
-pub(crate) fn set_up_i2c_pin<'d, P, T>(pin: &P)
+pub(crate) fn set_up_i2c_pin<P, T>(pin: &P)
 where
     P: core::ops::Deref<Target = T>,
     T: crate::gpio::Pin,
@@ -749,7 +749,7 @@ where
 
         let addr: u16 = address.into();
 
-        if operations.len() > 0 {
+        if !operations.is_empty() {
             Self::setup(addr)?;
         }
         let mut iterator = operations.iter_mut();
@@ -762,7 +762,7 @@ where
                     self.read_async_internal(buffer, false, last).await?;
                 }
                 Operation::Write(buffer) => {
-                    self.write_async_internal(buffer.into_iter().cloned(), last).await?;
+                    self.write_async_internal(buffer.iter().cloned(), last).await?;
                 }
             }
         }
diff --git a/embassy-rp/src/i2c_slave.rs b/embassy-rp/src/i2c_slave.rs
index 97ca17295..e2d4fbac0 100644
--- a/embassy-rp/src/i2c_slave.rs
+++ b/embassy-rp/src/i2c_slave.rs
@@ -289,7 +289,7 @@ impl<'d, T: Instance> I2cSlave<'d, T> {
     pub async fn respond_to_read(&mut self, buffer: &[u8]) -> Result<ReadStatus, Error> {
         let p = T::regs();
 
-        if buffer.len() == 0 {
+        if buffer.is_empty() {
             return Err(Error::InvalidResponseBufferLength);
         }
 
@@ -318,15 +318,13 @@ impl<'d, T: Instance> I2cSlave<'d, T> {
                     }
 
                     Poll::Pending
+                } else if stat.rx_done() {
+                    p.ic_clr_rx_done().read();
+                    Poll::Ready(Ok(ReadStatus::Done))
+                } else if stat.rd_req() && stat.tx_empty() {
+                    Poll::Ready(Ok(ReadStatus::NeedMoreBytes))
                 } else {
-                    if stat.rx_done() {
-                        p.ic_clr_rx_done().read();
-                        Poll::Ready(Ok(ReadStatus::Done))
-                    } else if stat.rd_req() && stat.tx_empty() {
-                        Poll::Ready(Ok(ReadStatus::NeedMoreBytes))
-                    } else {
-                        Poll::Pending
-                    }
+                    Poll::Pending
                 }
             },
             |_me| {
diff --git a/embassy-rp/src/lib.rs b/embassy-rp/src/lib.rs
index 46973fdc8..1c83e306d 100644
--- a/embassy-rp/src/lib.rs
+++ b/embassy-rp/src/lib.rs
@@ -183,14 +183,14 @@ embassy_hal_internal::peripherals! {
     DMA_CH10,
     DMA_CH11,
 
-    PWM_CH0,
-    PWM_CH1,
-    PWM_CH2,
-    PWM_CH3,
-    PWM_CH4,
-    PWM_CH5,
-    PWM_CH6,
-    PWM_CH7,
+    PWM_SLICE0,
+    PWM_SLICE1,
+    PWM_SLICE2,
+    PWM_SLICE3,
+    PWM_SLICE4,
+    PWM_SLICE5,
+    PWM_SLICE6,
+    PWM_SLICE7,
 
     USB,
 
@@ -238,8 +238,8 @@ select_bootloader! {
 }
 
 /// Installs a stack guard for the CORE0 stack in MPU region 0.
-/// Will fail if the MPU is already confgigured. This function requires
-/// a `_stack_end` symbol to be defined by the linker script, and expexcts
+/// Will fail if the MPU is already configured. This function requires
+/// a `_stack_end` symbol to be defined by the linker script, and expects
 /// `_stack_end` to be located at the lowest address (largest depth) of
 /// the stack.
 ///
@@ -274,7 +274,7 @@ pub fn install_core0_stack_guard() -> Result<(), ()> {
     extern "C" {
         static mut _stack_end: usize;
     }
-    unsafe { install_stack_guard(&mut _stack_end as *mut usize) }
+    unsafe { install_stack_guard(core::ptr::addr_of_mut!(_stack_end)) }
 }
 
 #[inline(always)]
@@ -354,6 +354,7 @@ pub fn init(config: config::Config) -> Peripherals {
 
 /// Extension trait for PAC regs, adding atomic xor/bitset/bitclear writes.
 trait RegExt<T: Copy> {
+    #[allow(unused)]
     fn write_xor<R>(&self, f: impl FnOnce(&mut T) -> R) -> R;
     fn write_set<R>(&self, f: impl FnOnce(&mut T) -> R) -> R;
     fn write_clear<R>(&self, f: impl FnOnce(&mut T) -> R) -> R;
diff --git a/embassy-rp/src/multicore.rs b/embassy-rp/src/multicore.rs
index 252f30dc1..d9d65694a 100644
--- a/embassy-rp/src/multicore.rs
+++ b/embassy-rp/src/multicore.rs
@@ -59,7 +59,7 @@ static IS_CORE1_INIT: AtomicBool = AtomicBool::new(false);
 
 #[inline(always)]
 fn core1_setup(stack_bottom: *mut usize) {
-    if let Err(_) = install_stack_guard(stack_bottom) {
+    if install_stack_guard(stack_bottom).is_err() {
         // currently only happens if the MPU was already set up, which
         // would indicate that the core is already in use from outside
         // embassy, somehow. trap if so since we can't deal with that.
diff --git a/embassy-rp/src/pio/mod.rs b/embassy-rp/src/pio/mod.rs
index ca9795024..7eca700ba 100644
--- a/embassy-rp/src/pio/mod.rs
+++ b/embassy-rp/src/pio/mod.rs
@@ -268,7 +268,7 @@ impl<'l, PIO: Instance> Pin<'l, PIO> {
     }
 
     /// Set the pin's input sync bypass.
-    pub fn set_input_sync_bypass<'a>(&mut self, bypass: bool) {
+    pub fn set_input_sync_bypass(&mut self, bypass: bool) {
         let mask = 1 << self.pin();
         if bypass {
             PIO::PIO.input_sync_bypass().write_set(|w| *w = mask);
@@ -463,7 +463,7 @@ impl<'d, PIO: Instance, const SM: usize> Drop for StateMachine<'d, PIO, SM> {
     }
 }
 
-fn assert_consecutive<'d, PIO: Instance>(pins: &[&Pin<'d, PIO>]) {
+fn assert_consecutive<PIO: Instance>(pins: &[&Pin<PIO>]) {
     for (p1, p2) in pins.iter().zip(pins.iter().skip(1)) {
         // purposely does not allow wrap-around because we can't claim pins 30 and 31.
         assert!(p1.pin() + 1 == p2.pin(), "pins must be consecutive");
@@ -764,7 +764,7 @@ impl<'d, PIO: Instance + 'd, const SM: usize> StateMachine<'d, PIO, SM> {
                     w.set_set_count(1);
                 });
                 // SET PINS, (dir)
-                unsafe { sm.exec_instr(0b111_00000_000_00000 | level as u16) };
+                unsafe { sm.exec_instr(0b11100_000_000_00000 | level as u16) };
             }
         });
     }
@@ -867,9 +867,7 @@ impl<'d, PIO: Instance> Common<'d, PIO> {
         prog: &Program<SIZE>,
     ) -> Result<LoadedProgram<'d, PIO>, LoadError> {
         match prog.origin {
-            Some(origin) => self
-                .try_load_program_at(prog, origin)
-                .map_err(|a| LoadError::AddressInUse(a)),
+            Some(origin) => self.try_load_program_at(prog, origin).map_err(LoadError::AddressInUse),
             None => {
                 // naively search for free space, allowing wraparound since
                 // PIO does support that. with only 32 instruction slots it
diff --git a/embassy-rp/src/pwm.rs b/embassy-rp/src/pwm.rs
index 784a05f92..5aab3ff4f 100644
--- a/embassy-rp/src/pwm.rs
+++ b/embassy-rp/src/pwm.rs
@@ -82,13 +82,13 @@ impl From<InputMode> for Divmode {
 }
 
 /// PWM driver.
-pub struct Pwm<'d, T: Channel> {
+pub struct Pwm<'d, T: Slice> {
     inner: PeripheralRef<'d, T>,
     pin_a: Option<PeripheralRef<'d, AnyPin>>,
     pin_b: Option<PeripheralRef<'d, AnyPin>>,
 }
 
-impl<'d, T: Channel> Pwm<'d, T> {
+impl<'d, T: Slice> Pwm<'d, T> {
     fn new_inner(
         inner: impl Peripheral<P = T> + 'd,
         a: Option<PeripheralRef<'d, AnyPin>>,
@@ -114,8 +114,8 @@ impl<'d, T: Channel> Pwm<'d, T> {
         }
         Self {
             inner,
-            pin_a: a.into(),
-            pin_b: b.into(),
+            pin_a: a,
+            pin_b: b,
         }
     }
 
@@ -129,7 +129,7 @@ impl<'d, T: Channel> Pwm<'d, T> {
     #[inline]
     pub fn new_output_a(
         inner: impl Peripheral<P = T> + 'd,
-        a: impl Peripheral<P = impl PwmPinA<T>> + 'd,
+        a: impl Peripheral<P = impl ChannelAPin<T>> + 'd,
         config: Config,
     ) -> Self {
         into_ref!(a);
@@ -140,7 +140,7 @@ impl<'d, T: Channel> Pwm<'d, T> {
     #[inline]
     pub fn new_output_b(
         inner: impl Peripheral<P = T> + 'd,
-        b: impl Peripheral<P = impl PwmPinB<T>> + 'd,
+        b: impl Peripheral<P = impl ChannelBPin<T>> + 'd,
         config: Config,
     ) -> Self {
         into_ref!(b);
@@ -151,8 +151,8 @@ impl<'d, T: Channel> Pwm<'d, T> {
     #[inline]
     pub fn new_output_ab(
         inner: impl Peripheral<P = T> + 'd,
-        a: impl Peripheral<P = impl PwmPinA<T>> + 'd,
-        b: impl Peripheral<P = impl PwmPinB<T>> + 'd,
+        a: impl Peripheral<P = impl ChannelAPin<T>> + 'd,
+        b: impl Peripheral<P = impl ChannelBPin<T>> + 'd,
         config: Config,
     ) -> Self {
         into_ref!(a, b);
@@ -163,7 +163,7 @@ impl<'d, T: Channel> Pwm<'d, T> {
     #[inline]
     pub fn new_input(
         inner: impl Peripheral<P = T> + 'd,
-        b: impl Peripheral<P = impl PwmPinB<T>> + 'd,
+        b: impl Peripheral<P = impl ChannelBPin<T>> + 'd,
         mode: InputMode,
         config: Config,
     ) -> Self {
@@ -175,8 +175,8 @@ impl<'d, T: Channel> Pwm<'d, T> {
     #[inline]
     pub fn new_output_input(
         inner: impl Peripheral<P = T> + 'd,
-        a: impl Peripheral<P = impl PwmPinA<T>> + 'd,
-        b: impl Peripheral<P = impl PwmPinB<T>> + 'd,
+        a: impl Peripheral<P = impl ChannelAPin<T>> + 'd,
+        b: impl Peripheral<P = impl ChannelBPin<T>> + 'd,
         mode: InputMode,
         config: Config,
     ) -> Self {
@@ -190,7 +190,7 @@ impl<'d, T: Channel> Pwm<'d, T> {
     }
 
     fn configure(p: pac::pwm::Channel, config: &Config) {
-        if config.divider > FixedU16::<fixed::types::extra::U4>::from_bits(0xFF_F) {
+        if config.divider > FixedU16::<fixed::types::extra::U4>::from_bits(0xFFF) {
             panic!("Requested divider is too large");
         }
 
@@ -265,18 +265,18 @@ impl<'d, T: Channel> Pwm<'d, T> {
     }
 }
 
-/// Batch representation of PWM channels.
+/// Batch representation of PWM slices.
 pub struct PwmBatch(u32);
 
 impl PwmBatch {
     #[inline]
-    /// Enable a PWM channel in this batch.
-    pub fn enable(&mut self, pwm: &Pwm<'_, impl Channel>) {
+    /// Enable a PWM slice in this batch.
+    pub fn enable(&mut self, pwm: &Pwm<'_, impl Slice>) {
         self.0 |= pwm.bit();
     }
 
     #[inline]
-    /// Enable channels in this batch in a PWM.
+    /// Enable slices in this batch in a PWM.
     pub fn set_enabled(enabled: bool, batch: impl FnOnce(&mut PwmBatch)) {
         let mut en = PwmBatch(0);
         batch(&mut en);
@@ -288,7 +288,7 @@ impl PwmBatch {
     }
 }
 
-impl<'d, T: Channel> Drop for Pwm<'d, T> {
+impl<'d, T: Slice> Drop for Pwm<'d, T> {
     fn drop(&mut self) {
         self.inner.regs().csr().write_clear(|w| w.set_en(false));
         if let Some(pin) = &self.pin_a {
@@ -301,24 +301,24 @@ impl<'d, T: Channel> Drop for Pwm<'d, T> {
 }
 
 mod sealed {
-    pub trait Channel {}
+    pub trait Slice {}
 }
 
-/// PWM Channel.
-pub trait Channel: Peripheral<P = Self> + sealed::Channel + Sized + 'static {
-    /// Channel number.
+/// PWM Slice.
+pub trait Slice: Peripheral<P = Self> + sealed::Slice + Sized + 'static {
+    /// Slice number.
     fn number(&self) -> u8;
 
-    /// Channel register block.
+    /// Slice register block.
     fn regs(&self) -> pac::pwm::Channel {
         pac::PWM.ch(self.number() as _)
     }
 }
 
-macro_rules! channel {
+macro_rules! slice {
     ($name:ident, $num:expr) => {
-        impl sealed::Channel for peripherals::$name {}
-        impl Channel for peripherals::$name {
+        impl sealed::Slice for peripherals::$name {}
+        impl Slice for peripherals::$name {
             fn number(&self) -> u8 {
                 $num
             }
@@ -326,19 +326,19 @@ macro_rules! channel {
     };
 }
 
-channel!(PWM_CH0, 0);
-channel!(PWM_CH1, 1);
-channel!(PWM_CH2, 2);
-channel!(PWM_CH3, 3);
-channel!(PWM_CH4, 4);
-channel!(PWM_CH5, 5);
-channel!(PWM_CH6, 6);
-channel!(PWM_CH7, 7);
+slice!(PWM_SLICE0, 0);
+slice!(PWM_SLICE1, 1);
+slice!(PWM_SLICE2, 2);
+slice!(PWM_SLICE3, 3);
+slice!(PWM_SLICE4, 4);
+slice!(PWM_SLICE5, 5);
+slice!(PWM_SLICE6, 6);
+slice!(PWM_SLICE7, 7);
 
-/// PWM Pin A.
-pub trait PwmPinA<T: Channel>: GpioPin {}
-/// PWM Pin B.
-pub trait PwmPinB<T: Channel>: GpioPin {}
+/// PWM Channel A.
+pub trait ChannelAPin<T: Slice>: GpioPin {}
+/// PWM Channel B.
+pub trait ChannelBPin<T: Slice>: GpioPin {}
 
 macro_rules! impl_pin {
     ($pin:ident, $channel:ident, $kind:ident) => {
@@ -346,33 +346,33 @@ macro_rules! impl_pin {
     };
 }
 
-impl_pin!(PIN_0, PWM_CH0, PwmPinA);
-impl_pin!(PIN_1, PWM_CH0, PwmPinB);
-impl_pin!(PIN_2, PWM_CH1, PwmPinA);
-impl_pin!(PIN_3, PWM_CH1, PwmPinB);
-impl_pin!(PIN_4, PWM_CH2, PwmPinA);
-impl_pin!(PIN_5, PWM_CH2, PwmPinB);
-impl_pin!(PIN_6, PWM_CH3, PwmPinA);
-impl_pin!(PIN_7, PWM_CH3, PwmPinB);
-impl_pin!(PIN_8, PWM_CH4, PwmPinA);
-impl_pin!(PIN_9, PWM_CH4, PwmPinB);
-impl_pin!(PIN_10, PWM_CH5, PwmPinA);
-impl_pin!(PIN_11, PWM_CH5, PwmPinB);
-impl_pin!(PIN_12, PWM_CH6, PwmPinA);
-impl_pin!(PIN_13, PWM_CH6, PwmPinB);
-impl_pin!(PIN_14, PWM_CH7, PwmPinA);
-impl_pin!(PIN_15, PWM_CH7, PwmPinB);
-impl_pin!(PIN_16, PWM_CH0, PwmPinA);
-impl_pin!(PIN_17, PWM_CH0, PwmPinB);
-impl_pin!(PIN_18, PWM_CH1, PwmPinA);
-impl_pin!(PIN_19, PWM_CH1, PwmPinB);
-impl_pin!(PIN_20, PWM_CH2, PwmPinA);
-impl_pin!(PIN_21, PWM_CH2, PwmPinB);
-impl_pin!(PIN_22, PWM_CH3, PwmPinA);
-impl_pin!(PIN_23, PWM_CH3, PwmPinB);
-impl_pin!(PIN_24, PWM_CH4, PwmPinA);
-impl_pin!(PIN_25, PWM_CH4, PwmPinB);
-impl_pin!(PIN_26, PWM_CH5, PwmPinA);
-impl_pin!(PIN_27, PWM_CH5, PwmPinB);
-impl_pin!(PIN_28, PWM_CH6, PwmPinA);
-impl_pin!(PIN_29, PWM_CH6, PwmPinB);
+impl_pin!(PIN_0, PWM_SLICE0, ChannelAPin);
+impl_pin!(PIN_1, PWM_SLICE0, ChannelBPin);
+impl_pin!(PIN_2, PWM_SLICE1, ChannelAPin);
+impl_pin!(PIN_3, PWM_SLICE1, ChannelBPin);
+impl_pin!(PIN_4, PWM_SLICE2, ChannelAPin);
+impl_pin!(PIN_5, PWM_SLICE2, ChannelBPin);
+impl_pin!(PIN_6, PWM_SLICE3, ChannelAPin);
+impl_pin!(PIN_7, PWM_SLICE3, ChannelBPin);
+impl_pin!(PIN_8, PWM_SLICE4, ChannelAPin);
+impl_pin!(PIN_9, PWM_SLICE4, ChannelBPin);
+impl_pin!(PIN_10, PWM_SLICE5, ChannelAPin);
+impl_pin!(PIN_11, PWM_SLICE5, ChannelBPin);
+impl_pin!(PIN_12, PWM_SLICE6, ChannelAPin);
+impl_pin!(PIN_13, PWM_SLICE6, ChannelBPin);
+impl_pin!(PIN_14, PWM_SLICE7, ChannelAPin);
+impl_pin!(PIN_15, PWM_SLICE7, ChannelBPin);
+impl_pin!(PIN_16, PWM_SLICE0, ChannelAPin);
+impl_pin!(PIN_17, PWM_SLICE0, ChannelBPin);
+impl_pin!(PIN_18, PWM_SLICE1, ChannelAPin);
+impl_pin!(PIN_19, PWM_SLICE1, ChannelBPin);
+impl_pin!(PIN_20, PWM_SLICE2, ChannelAPin);
+impl_pin!(PIN_21, PWM_SLICE2, ChannelBPin);
+impl_pin!(PIN_22, PWM_SLICE3, ChannelAPin);
+impl_pin!(PIN_23, PWM_SLICE3, ChannelBPin);
+impl_pin!(PIN_24, PWM_SLICE4, ChannelAPin);
+impl_pin!(PIN_25, PWM_SLICE4, ChannelBPin);
+impl_pin!(PIN_26, PWM_SLICE5, ChannelAPin);
+impl_pin!(PIN_27, PWM_SLICE5, ChannelBPin);
+impl_pin!(PIN_28, PWM_SLICE6, ChannelAPin);
+impl_pin!(PIN_29, PWM_SLICE6, ChannelBPin);
diff --git a/embassy-rp/src/relocate.rs b/embassy-rp/src/relocate.rs
index b35b4ed72..34487819f 100644
--- a/embassy-rp/src/relocate.rs
+++ b/embassy-rp/src/relocate.rs
@@ -1,5 +1,3 @@
-use core::iter::Iterator;
-
 use pio::{Program, SideSet, Wrap};
 
 pub struct CodeIterator<'a, I>
@@ -22,15 +20,15 @@ where
 {
     type Item = u16;
     fn next(&mut self) -> Option<Self::Item> {
-        self.iter.next().and_then(|&instr| {
-            Some(if instr & 0b1110_0000_0000_0000 == 0 {
+        self.iter.next().map(|&instr| {
+            if instr & 0b1110_0000_0000_0000 == 0 {
                 // this is a JMP instruction -> add offset to address
                 let address = (instr & 0b1_1111) as u8;
                 let address = address.wrapping_add(self.offset) % 32;
                 instr & (!0b11111) | address as u16
             } else {
                 instr
-            })
+            }
         })
     }
 }
diff --git a/embassy-rp/src/rtc/mod.rs b/embassy-rp/src/rtc/mod.rs
index b696989f5..c8691bdc2 100644
--- a/embassy-rp/src/rtc/mod.rs
+++ b/embassy-rp/src/rtc/mod.rs
@@ -29,8 +29,7 @@ impl<'d, T: Instance> Rtc<'d, T> {
         // Set the RTC divider
         inner.regs().clkdiv_m1().write(|w| w.set_clkdiv_m1(clk_rtc_freq() - 1));
 
-        let result = Self { inner };
-        result
+        Self { inner }
     }
 
     /// Enable or disable the leap year check. The rp2040 chip will always add a Feb 29th on every year that is divisable by 4, but this may be incorrect (e.g. on century years). This function allows you to disable this check.
diff --git a/embassy-rp/src/uart/buffered.rs b/embassy-rp/src/uart/buffered.rs
index 99c958129..da1157984 100644
--- a/embassy-rp/src/uart/buffered.rs
+++ b/embassy-rp/src/uart/buffered.rs
@@ -1,17 +1,11 @@
 //! Buffered UART driver.
-use core::future::{poll_fn, Future};
+use core::future::Future;
 use core::slice;
-use core::task::Poll;
 
-use atomic_polyfill::{AtomicU8, Ordering};
+use atomic_polyfill::AtomicU8;
 use embassy_hal_internal::atomic_ring_buffer::RingBuffer;
-use embassy_sync::waitqueue::AtomicWaker;
-use embassy_time::Timer;
 
 use super::*;
-use crate::clocks::clk_peri_freq;
-use crate::interrupt::typelevel::{Binding, Interrupt};
-use crate::{interrupt, RegExt};
 
 pub struct State {
     tx_waker: AtomicWaker,
@@ -467,7 +461,7 @@ impl<'d, T: Instance> Drop for BufferedUartRx<'d, T> {
 
         // TX is inactive if the the buffer is not available.
         // We can now unregister the interrupt handler
-        if state.tx_buf.len() == 0 {
+        if state.tx_buf.is_empty() {
             T::Interrupt::disable();
         }
     }
@@ -480,7 +474,7 @@ impl<'d, T: Instance> Drop for BufferedUartTx<'d, T> {
 
         // RX is inactive if the the buffer is not available.
         // We can now unregister the interrupt handler
-        if state.rx_buf.len() == 0 {
+        if state.rx_buf.is_empty() {
             T::Interrupt::disable();
         }
     }
diff --git a/embassy-rp/src/uart/mod.rs b/embassy-rp/src/uart/mod.rs
index f372cb640..65dcf4eb4 100644
--- a/embassy-rp/src/uart/mod.rs
+++ b/embassy-rp/src/uart/mod.rs
@@ -322,7 +322,7 @@ impl<'d, T: Instance, M: Mode> UartRx<'d, T, M> {
 
 impl<'d, T: Instance, M: Mode> Drop for UartRx<'d, T, M> {
     fn drop(&mut self) {
-        if let Some(_) = self.rx_dma {
+        if self.rx_dma.is_some() {
             T::Interrupt::disable();
             // clear dma flags. irq handlers use these to disambiguate among themselves.
             T::regs().uartdmacr().write_clear(|reg| {
diff --git a/embassy-rp/src/usb.rs b/embassy-rp/src/usb.rs
index 905661d64..d68dee4a3 100644
--- a/embassy-rp/src/usb.rs
+++ b/embassy-rp/src/usb.rs
@@ -465,7 +465,6 @@ impl<'d, T: Instance> driver::Bus for Bus<'d, T> {
 
 trait Dir {
     fn dir() -> Direction;
-    fn waker(i: usize) -> &'static AtomicWaker;
 }
 
 /// Type for In direction.
@@ -474,11 +473,6 @@ impl Dir for In {
     fn dir() -> Direction {
         Direction::In
     }
-
-    #[inline]
-    fn waker(i: usize) -> &'static AtomicWaker {
-        &EP_IN_WAKERS[i]
-    }
 }
 
 /// Type for Out direction.
@@ -487,11 +481,6 @@ impl Dir for Out {
     fn dir() -> Direction {
         Direction::Out
     }
-
-    #[inline]
-    fn waker(i: usize) -> &'static AtomicWaker {
-        &EP_OUT_WAKERS[i]
-    }
 }
 
 /// Endpoint for RP USB driver.
diff --git a/embassy-stm32-wpan/src/consts.rs b/embassy-stm32-wpan/src/consts.rs
index bd70851ea..6aaef1d35 100644
--- a/embassy-stm32-wpan/src/consts.rs
+++ b/embassy-stm32-wpan/src/consts.rs
@@ -1,5 +1,3 @@
-use core::convert::TryFrom;
-
 use crate::evt::CsEvt;
 use crate::PacketHeader;
 
diff --git a/embassy-stm32-wpan/src/fmt.rs b/embassy-stm32-wpan/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-stm32-wpan/src/fmt.rs
+++ b/embassy-stm32-wpan/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-stm32/Cargo.toml b/embassy-stm32/Cargo.toml
index 158c630b9..d00e7aa55 100644
--- a/embassy-stm32/Cargo.toml
+++ b/embassy-stm32/Cargo.toml
@@ -71,8 +71,8 @@ sdio-host = "0.5.0"
 critical-section = "1.1"
 #stm32-metapac = { version = "15" }
 stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-ac187e40aa97da86f7d3cf22abad918f42f01739" }
+
 vcell = "0.1.3"
-bxcan = "0.7.0"
 nb = "1.0.0"
 stm32-fmc = "0.3.0"
 cfg-if = "1.0.0"
@@ -84,6 +84,7 @@ document-features = "0.2.7"
 
 static_assertions = { version = "1.1" }
 volatile-register = { version = "0.2.1" }
+bitflags = "2.4.2"
 
 
 
@@ -97,7 +98,6 @@ quote = "1.0.15"
 #stm32-metapac = { version = "15", default-features = false, features = ["metadata"]}
 stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-ac187e40aa97da86f7d3cf22abad918f42f01739", default-features = false, features = ["metadata"]}
 
-
 [features]
 default = ["rt"]
 
@@ -105,7 +105,7 @@ default = ["rt"]
 rt = ["stm32-metapac/rt"]
 
 ## Use [`defmt`](https://docs.rs/defmt/latest/defmt/) for logging
-defmt = ["dep:defmt", "bxcan/unstable-defmt", "embassy-sync/defmt", "embassy-embedded-hal/defmt", "embassy-hal-internal/defmt", "embedded-io-async/defmt-03", "embassy-usb-driver/defmt", "embassy-net-driver/defmt", "embassy-time?/defmt"]
+defmt = ["dep:defmt", "embassy-sync/defmt", "embassy-embedded-hal/defmt", "embassy-hal-internal/defmt", "embedded-io-async/defmt-03", "embassy-usb-driver/defmt", "embassy-net-driver/defmt", "embassy-time?/defmt"]
 
 exti = []
 low-power = [ "dep:embassy-executor", "embassy-executor?/arch-cortex-m", "time" ]
diff --git a/embassy-stm32/build.rs b/embassy-stm32/build.rs
index 6217a3309..129c5df76 100644
--- a/embassy-stm32/build.rs
+++ b/embassy-stm32/build.rs
@@ -509,25 +509,20 @@ fn main() {
         if let Some(rcc) = &p.rcc {
             let en = rcc.enable.as_ref().unwrap();
 
-            let rst = match &rcc.reset {
+            let (start_rst, end_rst) = match &rcc.reset {
                 Some(rst) => {
                     let rst_reg = format_ident!("{}", rst.register.to_ascii_lowercase());
                     let set_rst_field = format_ident!("set_{}", rst.field.to_ascii_lowercase());
-                    quote! {
-                        crate::pac::RCC.#rst_reg().modify(|w| w.#set_rst_field(true));
-                        crate::pac::RCC.#rst_reg().modify(|w| w.#set_rst_field(false));
-                    }
+                    (
+                        quote! {
+                            crate::pac::RCC.#rst_reg().modify(|w| w.#set_rst_field(true));
+                        },
+                        quote! {
+                            crate::pac::RCC.#rst_reg().modify(|w| w.#set_rst_field(false));
+                        },
+                    )
                 }
-                None => TokenStream::new(),
-            };
-
-            let after_enable = if chip_name.starts_with("stm32f2") {
-                // Errata: ES0005 - 2.1.11 Delay after an RCC peripheral clock enabling
-                quote! {
-                    cortex_m::asm::dsb();
-                }
-            } else {
-                TokenStream::new()
+                None => (TokenStream::new(), TokenStream::new()),
             };
 
             let ptype = if let Some(reg) = &p.registers { reg.kind } else { "" };
@@ -589,16 +584,29 @@ fn main() {
             };
 
             g.extend(quote! {
-                impl crate::rcc::sealed::RccPeripheral for peripherals::#pname {
+                impl crate::rcc::SealedRccPeripheral for peripherals::#pname {
                     fn frequency() -> crate::time::Hertz {
                         #clock_frequency
                     }
                     fn enable_and_reset_with_cs(_cs: critical_section::CriticalSection) {
                         #before_enable
                         #incr_stop_refcount
+
+                        #start_rst
+
                         crate::pac::RCC.#en_reg().modify(|w| w.#set_en_field(true));
-                        #after_enable
-                        #rst
+
+                        // we must wait two peripheral clock cycles before the clock is active
+                        // this seems to work, but might be incorrect
+                        // see http://efton.sk/STM32/gotcha/g183.html
+
+                        // dummy read (like in the ST HALs)
+                        let _ = crate::pac::RCC.#en_reg().read();
+
+                        // DSB for good measure
+                        cortex_m::asm::dsb();
+
+                        #end_rst
                     }
                     fn disable_with_cs(_cs: critical_section::CriticalSection) {
                         #before_disable
@@ -764,6 +772,8 @@ fn main() {
     #[rustfmt::skip]
     let signals: HashMap<_, _> = [
                 // (kind, signal) => trait
+        (("ucpd", "CC1"), quote!(crate::ucpd::Cc1Pin)),
+        (("ucpd", "CC2"), quote!(crate::ucpd::Cc2Pin)),
         (("usart", "TX"), quote!(crate::usart::TxPin)),
         (("usart", "RX"), quote!(crate::usart::RxPin)),
         (("usart", "CTS"), quote!(crate::usart::CtsPin)),
@@ -816,20 +826,20 @@ fn main() {
         (("dcmi", "PIXCLK"), quote!(crate::dcmi::PixClkPin)),
         (("usb", "DP"), quote!(crate::usb::DpPin)),
         (("usb", "DM"), quote!(crate::usb::DmPin)),
-        (("otg", "DP"), quote!(crate::usb_otg::DpPin)),
-        (("otg", "DM"), quote!(crate::usb_otg::DmPin)),
-        (("otg", "ULPI_CK"), quote!(crate::usb_otg::UlpiClkPin)),
-        (("otg", "ULPI_DIR"), quote!(crate::usb_otg::UlpiDirPin)),
-        (("otg", "ULPI_NXT"), quote!(crate::usb_otg::UlpiNxtPin)),
-        (("otg", "ULPI_STP"), quote!(crate::usb_otg::UlpiStpPin)),
-        (("otg", "ULPI_D0"), quote!(crate::usb_otg::UlpiD0Pin)),
-        (("otg", "ULPI_D1"), quote!(crate::usb_otg::UlpiD1Pin)),
-        (("otg", "ULPI_D2"), quote!(crate::usb_otg::UlpiD2Pin)),
-        (("otg", "ULPI_D3"), quote!(crate::usb_otg::UlpiD3Pin)),
-        (("otg", "ULPI_D4"), quote!(crate::usb_otg::UlpiD4Pin)),
-        (("otg", "ULPI_D5"), quote!(crate::usb_otg::UlpiD5Pin)),
-        (("otg", "ULPI_D6"), quote!(crate::usb_otg::UlpiD6Pin)),
-        (("otg", "ULPI_D7"), quote!(crate::usb_otg::UlpiD7Pin)),
+        (("otg", "DP"), quote!(crate::usb::DpPin)),
+        (("otg", "DM"), quote!(crate::usb::DmPin)),
+        (("otg", "ULPI_CK"), quote!(crate::usb::UlpiClkPin)),
+        (("otg", "ULPI_DIR"), quote!(crate::usb::UlpiDirPin)),
+        (("otg", "ULPI_NXT"), quote!(crate::usb::UlpiNxtPin)),
+        (("otg", "ULPI_STP"), quote!(crate::usb::UlpiStpPin)),
+        (("otg", "ULPI_D0"), quote!(crate::usb::UlpiD0Pin)),
+        (("otg", "ULPI_D1"), quote!(crate::usb::UlpiD1Pin)),
+        (("otg", "ULPI_D2"), quote!(crate::usb::UlpiD2Pin)),
+        (("otg", "ULPI_D3"), quote!(crate::usb::UlpiD3Pin)),
+        (("otg", "ULPI_D4"), quote!(crate::usb::UlpiD4Pin)),
+        (("otg", "ULPI_D5"), quote!(crate::usb::UlpiD5Pin)),
+        (("otg", "ULPI_D6"), quote!(crate::usb::UlpiD6Pin)),
+        (("otg", "ULPI_D7"), quote!(crate::usb::UlpiD7Pin)),
         (("can", "TX"), quote!(crate::can::TxPin)),
         (("can", "RX"), quote!(crate::can::RxPin)),
         (("eth", "REF_CLK"), quote!(crate::eth::RefClkPin)),
@@ -1114,6 +1124,8 @@ fn main() {
 
     let signals: HashMap<_, _> = [
         // (kind, signal) => trait
+        (("ucpd", "RX"), quote!(crate::ucpd::RxDma)),
+        (("ucpd", "TX"), quote!(crate::ucpd::TxDma)),
         (("usart", "RX"), quote!(crate::usart::RxDma)),
         (("usart", "TX"), quote!(crate::usart::TxDma)),
         (("lpuart", "RX"), quote!(crate::usart::RxDma)),
@@ -1134,6 +1146,8 @@ fn main() {
         (("dac", "CH2"), quote!(crate::dac::DacDma2)),
         (("timer", "UP"), quote!(crate::timer::UpDma)),
         (("hash", "IN"), quote!(crate::hash::Dma)),
+        (("cryp", "IN"), quote!(crate::cryp::DmaIn)),
+        (("cryp", "OUT"), quote!(crate::cryp::DmaOut)),
         (("timer", "CH1"), quote!(crate::timer::Ch1Dma)),
         (("timer", "CH2"), quote!(crate::timer::Ch2Dma)),
         (("timer", "CH3"), quote!(crate::timer::Ch3Dma)),
@@ -1485,7 +1499,7 @@ fn main() {
                 #[crate::interrupt]
                 unsafe fn #irq () {
                     #(
-                        <crate::peripherals::#channels as crate::dma::sealed::ChannelInterrupt>::on_irq();
+                        <crate::peripherals::#channels as crate::dma::ChannelInterrupt>::on_irq();
                     )*
                 }
             }
diff --git a/embassy-stm32/src/adc/f1.rs b/embassy-stm32/src/adc/f1.rs
index b27b99827..cecf67947 100644
--- a/embassy-stm32/src/adc/f1.rs
+++ b/embassy-stm32/src/adc/f1.rs
@@ -33,7 +33,7 @@ impl<T: Instance> interrupt::typelevel::Handler<T::Interrupt> for InterruptHandl
 
 pub struct Vref;
 impl<T: Instance> AdcPin<T> for Vref {}
-impl<T: Instance> super::sealed::AdcPin<T> for Vref {
+impl<T: Instance> super::SealedAdcPin<T> for Vref {
     fn channel(&self) -> u8 {
         17
     }
@@ -41,7 +41,7 @@ impl<T: Instance> super::sealed::AdcPin<T> for Vref {
 
 pub struct Temperature;
 impl<T: Instance> AdcPin<T> for Temperature {}
-impl<T: Instance> super::sealed::AdcPin<T> for Temperature {
+impl<T: Instance> super::SealedAdcPin<T> for Temperature {
     fn channel(&self) -> u8 {
         16
     }
diff --git a/embassy-stm32/src/adc/f3.rs b/embassy-stm32/src/adc/f3.rs
index efade1f64..c5581dba1 100644
--- a/embassy-stm32/src/adc/f3.rs
+++ b/embassy-stm32/src/adc/f3.rs
@@ -33,7 +33,7 @@ impl<T: Instance> interrupt::typelevel::Handler<T::Interrupt> for InterruptHandl
 
 pub struct Vref;
 impl<T: Instance> AdcPin<T> for Vref {}
-impl<T: Instance> super::sealed::AdcPin<T> for Vref {
+impl<T: Instance> super::SealedAdcPin<T> for Vref {
     fn channel(&self) -> u8 {
         18
     }
@@ -48,7 +48,7 @@ impl Vref {
 
 pub struct Temperature;
 impl<T: Instance> AdcPin<T> for Temperature {}
-impl<T: Instance> super::sealed::AdcPin<T> for Temperature {
+impl<T: Instance> super::SealedAdcPin<T> for Temperature {
     fn channel(&self) -> u8 {
         16
     }
@@ -102,7 +102,7 @@ impl<'d, T: Instance> Adc<'d, T> {
     }
 
     fn freq() -> Hertz {
-        <T as crate::rcc::sealed::RccPeripheral>::frequency()
+        <T as crate::rcc::SealedRccPeripheral>::frequency()
     }
 
     pub fn sample_time_for_us(&self, us: u32) -> SampleTime {
diff --git a/embassy-stm32/src/adc/f3_v1_1.rs b/embassy-stm32/src/adc/f3_v1_1.rs
index f842893fa..672ace04f 100644
--- a/embassy-stm32/src/adc/f3_v1_1.rs
+++ b/embassy-stm32/src/adc/f3_v1_1.rs
@@ -65,7 +65,7 @@ fn update_vref<T: Instance>(op: i8) {
 
 pub struct Vref<T: Instance>(core::marker::PhantomData<T>);
 impl<T: Instance> AdcPin<T> for Vref<T> {}
-impl<T: Instance> super::sealed::AdcPin<T> for Vref<T> {
+impl<T: Instance> super::SealedAdcPin<T> for Vref<T> {
     fn channel(&self) -> u8 {
         17
     }
@@ -124,7 +124,7 @@ impl<T: Instance> Drop for Vref<T> {
 
 pub struct Temperature<T: Instance>(core::marker::PhantomData<T>);
 impl<T: Instance> AdcPin<T> for Temperature<T> {}
-impl<T: Instance> super::sealed::AdcPin<T> for Temperature<T> {
+impl<T: Instance> super::SealedAdcPin<T> for Temperature<T> {
     fn channel(&self) -> u8 {
         16
     }
diff --git a/embassy-stm32/src/adc/mod.rs b/embassy-stm32/src/adc/mod.rs
index 0d0d40549..ead2357ce 100644
--- a/embassy-stm32/src/adc/mod.rs
+++ b/embassy-stm32/src/adc/mod.rs
@@ -17,6 +17,8 @@ mod _version;
 #[allow(unused)]
 #[cfg(not(adc_f3_v2))]
 pub use _version::*;
+#[cfg(any(adc_f1, adc_f3, adc_v1, adc_l0, adc_f3_v1_1))]
+use embassy_sync::waitqueue::AtomicWaker;
 
 #[cfg(not(any(adc_f1, adc_f3_v2)))]
 pub use crate::pac::adc::vals::Res as Resolution;
@@ -31,63 +33,65 @@ pub struct Adc<'d, T: Instance> {
     sample_time: SampleTime,
 }
 
-pub(crate) mod sealed {
-    #[cfg(any(adc_f1, adc_f3, adc_v1, adc_l0, adc_f3_v1_1))]
-    use embassy_sync::waitqueue::AtomicWaker;
+#[cfg(any(adc_f1, adc_f3, adc_v1, adc_l0, adc_f3_v1_1))]
+pub struct State {
+    pub waker: AtomicWaker,
+}
 
-    #[cfg(any(adc_f1, adc_f3, adc_v1, adc_l0, adc_f3_v1_1))]
-    pub struct State {
-        pub waker: AtomicWaker,
-    }
-
-    #[cfg(any(adc_f1, adc_f3, adc_v1, adc_l0, adc_f3_v1_1))]
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                waker: AtomicWaker::new(),
-            }
+#[cfg(any(adc_f1, adc_f3, adc_v1, adc_l0, adc_f3_v1_1))]
+impl State {
+    pub const fn new() -> Self {
+        Self {
+            waker: AtomicWaker::new(),
         }
     }
+}
 
-    pub trait InterruptableInstance {
-        type Interrupt: crate::interrupt::typelevel::Interrupt;
-    }
+trait SealedInstance {
+    #[allow(unused)]
+    fn regs() -> crate::pac::adc::Adc;
+    #[cfg(not(any(adc_f1, adc_v1, adc_l0, adc_f3_v2, adc_f3_v1_1, adc_g0)))]
+    fn common_regs() -> crate::pac::adccommon::AdcCommon;
+    #[cfg(any(adc_f1, adc_f3, adc_v1, adc_l0, adc_f3_v1_1))]
+    fn state() -> &'static State;
+}
 
-    pub trait Instance: InterruptableInstance {
-        fn regs() -> crate::pac::adc::Adc;
-        #[cfg(not(any(adc_f1, adc_v1, adc_l0, adc_f3_v2, adc_f3_v1_1, adc_g0)))]
-        fn common_regs() -> crate::pac::adccommon::AdcCommon;
-        #[cfg(any(adc_f1, adc_f3, adc_v1, adc_l0, adc_f3_v1_1))]
-        fn state() -> &'static State;
-    }
+pub(crate) trait SealedAdcPin<T: Instance> {
+    #[cfg(any(adc_v1, adc_l0, adc_v2))]
+    fn set_as_analog(&mut self) {}
 
-    pub trait AdcPin<T: Instance> {
-        #[cfg(any(adc_v1, adc_l0, adc_v2))]
-        fn set_as_analog(&mut self) {}
+    #[allow(unused)]
+    fn channel(&self) -> u8;
+}
 
-        fn channel(&self) -> u8;
-    }
-
-    pub trait InternalChannel<T> {
-        fn channel(&self) -> u8;
-    }
+trait SealedInternalChannel<T> {
+    #[allow(unused)]
+    fn channel(&self) -> u8;
 }
 
 /// ADC instance.
 #[cfg(not(any(adc_f1, adc_v1, adc_l0, adc_v2, adc_v3, adc_v4, adc_f3, adc_f3_v1_1, adc_g0, adc_h5)))]
-pub trait Instance: sealed::Instance + crate::Peripheral<P = Self> {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + crate::Peripheral<P = Self> {
+    type Interrupt: crate::interrupt::typelevel::Interrupt;
+}
 /// ADC instance.
 #[cfg(any(adc_f1, adc_v1, adc_l0, adc_v2, adc_v3, adc_v4, adc_f3, adc_f3_v1_1, adc_g0, adc_h5))]
-pub trait Instance: sealed::Instance + crate::Peripheral<P = Self> + crate::rcc::RccPeripheral {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + crate::Peripheral<P = Self> + crate::rcc::RccPeripheral {
+    type Interrupt: crate::interrupt::typelevel::Interrupt;
+}
 
 /// ADC pin.
-pub trait AdcPin<T: Instance>: sealed::AdcPin<T> {}
+#[allow(private_bounds)]
+pub trait AdcPin<T: Instance>: SealedAdcPin<T> {}
 /// ADC internal channel.
-pub trait InternalChannel<T>: sealed::InternalChannel<T> {}
+#[allow(private_bounds)]
+pub trait InternalChannel<T>: SealedInternalChannel<T> {}
 
 foreach_adc!(
     ($inst:ident, $common_inst:ident, $clock:ident) => {
-        impl crate::adc::sealed::Instance for peripherals::$inst {
+        impl crate::adc::SealedInstance for peripherals::$inst {
             fn regs() -> crate::pac::adc::Adc {
                 crate::pac::$inst
             }
@@ -98,21 +102,15 @@ foreach_adc!(
             }
 
             #[cfg(any(adc_f1, adc_f3, adc_v1, adc_l0, adc_f3_v1_1))]
-            fn state() -> &'static sealed::State {
-                static STATE: sealed::State = sealed::State::new();
+            fn state() -> &'static State {
+                static STATE: State = State::new();
                 &STATE
             }
         }
 
-        foreach_interrupt!(
-            ($inst,adc,ADC,GLOBAL,$irq:ident) => {
-                impl sealed::InterruptableInstance for peripherals::$inst {
-                    type Interrupt = crate::interrupt::typelevel::$irq;
-                }
-            };
-        );
-
-        impl crate::adc::Instance for peripherals::$inst {}
+        impl crate::adc::Instance for peripherals::$inst {
+            type Interrupt = crate::_generated::peripheral_interrupts::$inst::GLOBAL;
+        }
     };
 );
 
@@ -120,10 +118,10 @@ macro_rules! impl_adc_pin {
     ($inst:ident, $pin:ident, $ch:expr) => {
         impl crate::adc::AdcPin<peripherals::$inst> for crate::peripherals::$pin {}
 
-        impl crate::adc::sealed::AdcPin<peripherals::$inst> for crate::peripherals::$pin {
+        impl crate::adc::SealedAdcPin<peripherals::$inst> for crate::peripherals::$pin {
             #[cfg(any(adc_v1, adc_l0, adc_v2))]
             fn set_as_analog(&mut self) {
-                <Self as crate::gpio::sealed::Pin>::set_as_analog(self);
+                <Self as crate::gpio::SealedPin>::set_as_analog(self);
             }
 
             fn channel(&self) -> u8 {
diff --git a/embassy-stm32/src/adc/v1.rs b/embassy-stm32/src/adc/v1.rs
index a8dc6ce98..e9b46be80 100644
--- a/embassy-stm32/src/adc/v1.rs
+++ b/embassy-stm32/src/adc/v1.rs
@@ -39,7 +39,7 @@ pub struct Vbat;
 impl AdcPin<ADC> for Vbat {}
 
 #[cfg(not(adc_l0))]
-impl super::sealed::AdcPin<ADC> for Vbat {
+impl super::SealedAdcPin<ADC> for Vbat {
     fn channel(&self) -> u8 {
         18
     }
@@ -47,7 +47,7 @@ impl super::sealed::AdcPin<ADC> for Vbat {
 
 pub struct Vref;
 impl AdcPin<ADC> for Vref {}
-impl super::sealed::AdcPin<ADC> for Vref {
+impl super::SealedAdcPin<ADC> for Vref {
     fn channel(&self) -> u8 {
         17
     }
@@ -55,7 +55,7 @@ impl super::sealed::AdcPin<ADC> for Vref {
 
 pub struct Temperature;
 impl AdcPin<ADC> for Temperature {}
-impl super::sealed::AdcPin<ADC> for Temperature {
+impl super::SealedAdcPin<ADC> for Temperature {
     fn channel(&self) -> u8 {
         16
     }
diff --git a/embassy-stm32/src/adc/v2.rs b/embassy-stm32/src/adc/v2.rs
index f6f7dbfcc..a43eb72db 100644
--- a/embassy-stm32/src/adc/v2.rs
+++ b/embassy-stm32/src/adc/v2.rs
@@ -16,7 +16,7 @@ pub const ADC_POWERUP_TIME_US: u32 = 3;
 
 pub struct VrefInt;
 impl AdcPin<ADC1> for VrefInt {}
-impl super::sealed::AdcPin<ADC1> for VrefInt {
+impl super::SealedAdcPin<ADC1> for VrefInt {
     fn channel(&self) -> u8 {
         17
     }
@@ -31,7 +31,7 @@ impl VrefInt {
 
 pub struct Temperature;
 impl AdcPin<ADC1> for Temperature {}
-impl super::sealed::AdcPin<ADC1> for Temperature {
+impl super::SealedAdcPin<ADC1> for Temperature {
     fn channel(&self) -> u8 {
         cfg_if::cfg_if! {
             if #[cfg(any(stm32f2, stm32f40, stm32f41))] {
@@ -52,7 +52,7 @@ impl Temperature {
 
 pub struct Vbat;
 impl AdcPin<ADC1> for Vbat {}
-impl super::sealed::AdcPin<ADC1> for Vbat {
+impl super::SealedAdcPin<ADC1> for Vbat {
     fn channel(&self) -> u8 {
         18
     }
diff --git a/embassy-stm32/src/adc/v3.rs b/embassy-stm32/src/adc/v3.rs
index 5f3512cad..8c9b47197 100644
--- a/embassy-stm32/src/adc/v3.rs
+++ b/embassy-stm32/src/adc/v3.rs
@@ -12,7 +12,7 @@ pub const VREF_CALIB_MV: u32 = 3000;
 
 pub struct VrefInt;
 impl<T: Instance> AdcPin<T> for VrefInt {}
-impl<T: Instance> super::sealed::AdcPin<T> for VrefInt {
+impl<T: Instance> super::SealedAdcPin<T> for VrefInt {
     fn channel(&self) -> u8 {
         cfg_if! {
             if #[cfg(adc_g0)] {
@@ -29,7 +29,7 @@ impl<T: Instance> super::sealed::AdcPin<T> for VrefInt {
 
 pub struct Temperature;
 impl<T: Instance> AdcPin<T> for Temperature {}
-impl<T: Instance> super::sealed::AdcPin<T> for Temperature {
+impl<T: Instance> super::SealedAdcPin<T> for Temperature {
     fn channel(&self) -> u8 {
         cfg_if! {
             if #[cfg(adc_g0)] {
@@ -46,7 +46,7 @@ impl<T: Instance> super::sealed::AdcPin<T> for Temperature {
 
 pub struct Vbat;
 impl<T: Instance> AdcPin<T> for Vbat {}
-impl<T: Instance> super::sealed::AdcPin<T> for Vbat {
+impl<T: Instance> super::SealedAdcPin<T> for Vbat {
     fn channel(&self) -> u8 {
         cfg_if! {
             if #[cfg(adc_g0)] {
@@ -65,7 +65,7 @@ cfg_if! {
     if #[cfg(adc_h5)] {
         pub struct VddCore;
         impl<T: Instance> AdcPin<T> for VddCore {}
-        impl<T: Instance> super::sealed::AdcPin<T> for VddCore {
+        impl<T: Instance> super::SealedAdcPin<T> for VddCore {
             fn channel(&self) -> u8 {
                 6
             }
diff --git a/embassy-stm32/src/adc/v4.rs b/embassy-stm32/src/adc/v4.rs
index 3fd047375..1ae25bea2 100644
--- a/embassy-stm32/src/adc/v4.rs
+++ b/embassy-stm32/src/adc/v4.rs
@@ -35,7 +35,7 @@ const VBAT_CHANNEL: u8 = 17;
 /// Internal voltage reference channel.
 pub struct VrefInt;
 impl<T: Instance> InternalChannel<T> for VrefInt {}
-impl<T: Instance> super::sealed::InternalChannel<T> for VrefInt {
+impl<T: Instance> super::SealedInternalChannel<T> for VrefInt {
     fn channel(&self) -> u8 {
         VREF_CHANNEL
     }
@@ -44,7 +44,7 @@ impl<T: Instance> super::sealed::InternalChannel<T> for VrefInt {
 /// Internal temperature channel.
 pub struct Temperature;
 impl<T: Instance> InternalChannel<T> for Temperature {}
-impl<T: Instance> super::sealed::InternalChannel<T> for Temperature {
+impl<T: Instance> super::SealedInternalChannel<T> for Temperature {
     fn channel(&self) -> u8 {
         TEMP_CHANNEL
     }
@@ -53,7 +53,7 @@ impl<T: Instance> super::sealed::InternalChannel<T> for Temperature {
 /// Internal battery voltage channel.
 pub struct Vbat;
 impl<T: Instance> InternalChannel<T> for Vbat {}
-impl<T: Instance> super::sealed::InternalChannel<T> for Vbat {
+impl<T: Instance> super::SealedInternalChannel<T> for Vbat {
     fn channel(&self) -> u8 {
         VBAT_CHANNEL
     }
@@ -276,7 +276,7 @@ impl<'d, T: Instance> Adc<'d, T> {
     pub fn read<P>(&mut self, pin: &mut P) -> u16
     where
         P: AdcPin<T>,
-        P: crate::gpio::sealed::Pin,
+        P: crate::gpio::Pin,
     {
         pin.set_as_analog();
 
diff --git a/embassy-stm32/src/can/bxcan.rs b/embassy-stm32/src/can/bxcan.rs
deleted file mode 100644
index 7e00eca6f..000000000
--- a/embassy-stm32/src/can/bxcan.rs
+++ /dev/null
@@ -1,637 +0,0 @@
-use core::convert::AsMut;
-use core::future::poll_fn;
-use core::marker::PhantomData;
-use core::ops::{Deref, DerefMut};
-use core::task::Poll;
-
-pub use bxcan;
-use bxcan::{Data, ExtendedId, Frame, Id, StandardId};
-use embassy_hal_internal::{into_ref, PeripheralRef};
-use futures::FutureExt;
-
-use crate::gpio::sealed::AFType;
-use crate::interrupt::typelevel::Interrupt;
-use crate::pac::can::vals::{Ide, Lec};
-use crate::rcc::RccPeripheral;
-use crate::{interrupt, peripherals, Peripheral};
-
-pub mod enums;
-use enums::*;
-pub mod util;
-
-/// Contains CAN frame and additional metadata.
-///
-/// Timestamp is available if `time` feature is enabled.
-#[derive(Debug, Clone, PartialEq, Eq)]
-#[cfg_attr(feature = "defmt", derive(defmt::Format))]
-pub struct Envelope {
-    /// Reception time.
-    #[cfg(feature = "time")]
-    pub ts: embassy_time::Instant,
-    /// The actual CAN frame.
-    pub frame: bxcan::Frame,
-}
-
-/// Interrupt handler.
-pub struct TxInterruptHandler<T: Instance> {
-    _phantom: PhantomData<T>,
-}
-
-impl<T: Instance> interrupt::typelevel::Handler<T::TXInterrupt> for TxInterruptHandler<T> {
-    unsafe fn on_interrupt() {
-        T::regs().tsr().write(|v| {
-            v.set_rqcp(0, true);
-            v.set_rqcp(1, true);
-            v.set_rqcp(2, true);
-        });
-
-        T::state().tx_waker.wake();
-    }
-}
-
-/// RX0 interrupt handler.
-pub struct Rx0InterruptHandler<T: Instance> {
-    _phantom: PhantomData<T>,
-}
-
-impl<T: Instance> interrupt::typelevel::Handler<T::RX0Interrupt> for Rx0InterruptHandler<T> {
-    unsafe fn on_interrupt() {
-        // info!("rx0 irq");
-        Can::<T>::receive_fifo(RxFifo::Fifo0);
-    }
-}
-
-/// RX1 interrupt handler.
-pub struct Rx1InterruptHandler<T: Instance> {
-    _phantom: PhantomData<T>,
-}
-
-impl<T: Instance> interrupt::typelevel::Handler<T::RX1Interrupt> for Rx1InterruptHandler<T> {
-    unsafe fn on_interrupt() {
-        // info!("rx1 irq");
-        Can::<T>::receive_fifo(RxFifo::Fifo1);
-    }
-}
-
-/// SCE interrupt handler.
-pub struct SceInterruptHandler<T: Instance> {
-    _phantom: PhantomData<T>,
-}
-
-impl<T: Instance> interrupt::typelevel::Handler<T::SCEInterrupt> for SceInterruptHandler<T> {
-    unsafe fn on_interrupt() {
-        // info!("sce irq");
-        let msr = T::regs().msr();
-        let msr_val = msr.read();
-
-        if msr_val.erri() {
-            msr.modify(|v| v.set_erri(true));
-            T::state().err_waker.wake();
-        }
-    }
-}
-
-/// CAN driver
-pub struct Can<'d, T: Instance> {
-    can: bxcan::Can<BxcanInstance<'d, T>>,
-}
-
-/// Error returned by `try_read`
-#[derive(Debug)]
-#[cfg_attr(feature = "defmt", derive(defmt::Format))]
-pub enum TryReadError {
-    /// Bus error
-    BusError(BusError),
-    /// Receive buffer is empty
-    Empty,
-}
-
-/// Error returned by `try_write`
-#[derive(Debug)]
-#[cfg_attr(feature = "defmt", derive(defmt::Format))]
-pub enum TryWriteError {
-    /// All transmit mailboxes are full
-    Full,
-}
-
-impl<'d, T: Instance> Can<'d, T> {
-    /// Creates a new Bxcan instance, keeping the peripheral in sleep mode.
-    /// You must call [Can::enable_non_blocking] to use the peripheral.
-    pub fn new(
-        peri: impl Peripheral<P = T> + 'd,
-        rx: impl Peripheral<P = impl RxPin<T>> + 'd,
-        tx: impl Peripheral<P = impl TxPin<T>> + 'd,
-        _irqs: impl interrupt::typelevel::Binding<T::TXInterrupt, TxInterruptHandler<T>>
-            + interrupt::typelevel::Binding<T::RX0Interrupt, Rx0InterruptHandler<T>>
-            + interrupt::typelevel::Binding<T::RX1Interrupt, Rx1InterruptHandler<T>>
-            + interrupt::typelevel::Binding<T::SCEInterrupt, SceInterruptHandler<T>>
-            + 'd,
-    ) -> Self {
-        into_ref!(peri, rx, tx);
-
-        rx.set_as_af(rx.af_num(), AFType::Input);
-        tx.set_as_af(tx.af_num(), AFType::OutputPushPull);
-
-        T::enable_and_reset();
-
-        {
-            T::regs().ier().write(|w| {
-                w.set_errie(true);
-                w.set_fmpie(0, true);
-                w.set_fmpie(1, true);
-                w.set_tmeie(true);
-            });
-
-            T::regs().mcr().write(|w| {
-                // Enable timestamps on rx messages
-
-                w.set_ttcm(true);
-            });
-        }
-
-        unsafe {
-            T::TXInterrupt::unpend();
-            T::TXInterrupt::enable();
-
-            T::RX0Interrupt::unpend();
-            T::RX0Interrupt::enable();
-
-            T::RX1Interrupt::unpend();
-            T::RX1Interrupt::enable();
-
-            T::SCEInterrupt::unpend();
-            T::SCEInterrupt::enable();
-        }
-
-        rx.set_as_af(rx.af_num(), AFType::Input);
-        tx.set_as_af(tx.af_num(), AFType::OutputPushPull);
-
-        let can = bxcan::Can::builder(BxcanInstance(peri)).leave_disabled();
-        Self { can }
-    }
-
-    /// Set CAN bit rate.
-    pub fn set_bitrate(&mut self, bitrate: u32) {
-        let bit_timing = util::calc_can_timings(T::frequency(), bitrate).unwrap();
-        let sjw = u8::from(bit_timing.sync_jump_width) as u32;
-        let seg1 = u8::from(bit_timing.seg1) as u32;
-        let seg2 = u8::from(bit_timing.seg2) as u32;
-        let prescaler = u16::from(bit_timing.prescaler) as u32;
-        self.can
-            .modify_config()
-            .set_bit_timing((sjw - 1) << 24 | (seg1 - 1) << 16 | (seg2 - 1) << 20 | (prescaler - 1))
-            .leave_disabled();
-    }
-
-    /// Enables the peripheral and synchronizes with the bus.
-    ///
-    /// This will wait for 11 consecutive recessive bits (bus idle state).
-    /// Contrary to enable method from bxcan library, this will not freeze the executor while waiting.
-    pub async fn enable(&mut self) {
-        while self.enable_non_blocking().is_err() {
-            // SCE interrupt is only generated for entering sleep mode, but not leaving.
-            // Yield to allow other tasks to execute while can bus is initializing.
-            embassy_futures::yield_now().await;
-        }
-    }
-
-    /// Queues the message to be sent.
-    ///
-    /// If the TX queue is full, this will wait until there is space, therefore exerting backpressure.
-    pub async fn write(&mut self, frame: &Frame) -> bxcan::TransmitStatus {
-        self.split().0.write(frame).await
-    }
-
-    /// Attempts to transmit a frame without blocking.
-    ///
-    /// Returns [Err(TryWriteError::Full)] if all transmit mailboxes are full.
-    pub fn try_write(&mut self, frame: &Frame) -> Result<bxcan::TransmitStatus, TryWriteError> {
-        self.split().0.try_write(frame)
-    }
-
-    /// Waits for a specific transmit mailbox to become empty
-    pub async fn flush(&self, mb: bxcan::Mailbox) {
-        CanTx::<T>::flush_inner(mb).await
-    }
-
-    /// Waits until any of the transmit mailboxes become empty
-    pub async fn flush_any(&self) {
-        CanTx::<T>::flush_any_inner().await
-    }
-
-    /// Waits until all of the transmit mailboxes become empty
-    pub async fn flush_all(&self) {
-        CanTx::<T>::flush_all_inner().await
-    }
-
-    /// Read a CAN frame.
-    ///
-    /// If no CAN frame is in the RX buffer, this will wait until there is one.
-    ///
-    /// Returns a tuple of the time the message was received and the message frame
-    pub async fn read(&mut self) -> Result<Envelope, BusError> {
-        self.split().1.read().await
-    }
-
-    /// Attempts to read a CAN frame without blocking.
-    ///
-    /// Returns [Err(TryReadError::Empty)] if there are no frames in the rx queue.
-    pub fn try_read(&mut self) -> Result<Envelope, TryReadError> {
-        self.split().1.try_read()
-    }
-
-    /// Waits while receive queue is empty.
-    pub async fn wait_not_empty(&mut self) {
-        self.split().1.wait_not_empty().await
-    }
-
-    unsafe fn receive_fifo(fifo: RxFifo) {
-        // Generate timestamp as early as possible
-        #[cfg(feature = "time")]
-        let ts = embassy_time::Instant::now();
-
-        let state = T::state();
-        let regs = T::regs();
-        let fifo_idx = match fifo {
-            RxFifo::Fifo0 => 0usize,
-            RxFifo::Fifo1 => 1usize,
-        };
-        let rfr = regs.rfr(fifo_idx);
-        let fifo = regs.rx(fifo_idx);
-
-        loop {
-            // If there are no pending messages, there is nothing to do
-            if rfr.read().fmp() == 0 {
-                return;
-            }
-
-            let rir = fifo.rir().read();
-            let id = if rir.ide() == Ide::STANDARD {
-                Id::from(StandardId::new_unchecked(rir.stid()))
-            } else {
-                let stid = (rir.stid() & 0x7FF) as u32;
-                let exid = rir.exid() & 0x3FFFF;
-                let id = (stid << 18) | (exid);
-                Id::from(ExtendedId::new_unchecked(id))
-            };
-            let data_len = fifo.rdtr().read().dlc() as usize;
-            let mut data: [u8; 8] = [0; 8];
-            data[0..4].copy_from_slice(&fifo.rdlr().read().0.to_ne_bytes());
-            data[4..8].copy_from_slice(&fifo.rdhr().read().0.to_ne_bytes());
-
-            let frame = Frame::new_data(id, Data::new(&data[0..data_len]).unwrap());
-            let envelope = Envelope {
-                #[cfg(feature = "time")]
-                ts,
-                frame,
-            };
-
-            rfr.modify(|v| v.set_rfom(true));
-
-            /*
-                NOTE: consensus was reached that if rx_queue is full, packets should be dropped
-            */
-            let _ = state.rx_queue.try_send(envelope);
-        }
-    }
-
-    /// Split the CAN driver into transmit and receive halves.
-    ///
-    /// Useful for doing separate transmit/receive tasks.
-    pub fn split<'c>(&'c mut self) -> (CanTx<'c, 'd, T>, CanRx<'c, 'd, T>) {
-        let (tx, rx0, rx1) = self.can.split_by_ref();
-        (CanTx { tx }, CanRx { rx0, rx1 })
-    }
-}
-
-impl<'d, T: Instance> AsMut<bxcan::Can<BxcanInstance<'d, T>>> for Can<'d, T> {
-    /// Get mutable access to the lower-level driver from the `bxcan` crate.
-    fn as_mut(&mut self) -> &mut bxcan::Can<BxcanInstance<'d, T>> {
-        &mut self.can
-    }
-}
-
-/// CAN driver, transmit half.
-pub struct CanTx<'c, 'd, T: Instance> {
-    tx: &'c mut bxcan::Tx<BxcanInstance<'d, T>>,
-}
-
-impl<'c, 'd, T: Instance> CanTx<'c, 'd, T> {
-    /// Queues the message to be sent.
-    ///
-    /// If the TX queue is full, this will wait until there is space, therefore exerting backpressure.
-    pub async fn write(&mut self, frame: &Frame) -> bxcan::TransmitStatus {
-        poll_fn(|cx| {
-            T::state().tx_waker.register(cx.waker());
-            if let Ok(status) = self.tx.transmit(frame) {
-                return Poll::Ready(status);
-            }
-
-            Poll::Pending
-        })
-        .await
-    }
-
-    /// Attempts to transmit a frame without blocking.
-    ///
-    /// Returns [Err(TryWriteError::Full)] if all transmit mailboxes are full.
-    pub fn try_write(&mut self, frame: &Frame) -> Result<bxcan::TransmitStatus, TryWriteError> {
-        self.tx.transmit(frame).map_err(|_| TryWriteError::Full)
-    }
-
-    async fn flush_inner(mb: bxcan::Mailbox) {
-        poll_fn(|cx| {
-            T::state().tx_waker.register(cx.waker());
-            if T::regs().tsr().read().tme(mb.index()) {
-                return Poll::Ready(());
-            }
-
-            Poll::Pending
-        })
-        .await;
-    }
-
-    /// Waits for a specific transmit mailbox to become empty
-    pub async fn flush(&self, mb: bxcan::Mailbox) {
-        Self::flush_inner(mb).await
-    }
-
-    async fn flush_any_inner() {
-        poll_fn(|cx| {
-            T::state().tx_waker.register(cx.waker());
-
-            let tsr = T::regs().tsr().read();
-            if tsr.tme(bxcan::Mailbox::Mailbox0.index())
-                || tsr.tme(bxcan::Mailbox::Mailbox1.index())
-                || tsr.tme(bxcan::Mailbox::Mailbox2.index())
-            {
-                return Poll::Ready(());
-            }
-
-            Poll::Pending
-        })
-        .await;
-    }
-
-    /// Waits until any of the transmit mailboxes become empty
-    pub async fn flush_any(&self) {
-        Self::flush_any_inner().await
-    }
-
-    async fn flush_all_inner() {
-        poll_fn(|cx| {
-            T::state().tx_waker.register(cx.waker());
-
-            let tsr = T::regs().tsr().read();
-            if tsr.tme(bxcan::Mailbox::Mailbox0.index())
-                && tsr.tme(bxcan::Mailbox::Mailbox1.index())
-                && tsr.tme(bxcan::Mailbox::Mailbox2.index())
-            {
-                return Poll::Ready(());
-            }
-
-            Poll::Pending
-        })
-        .await;
-    }
-
-    /// Waits until all of the transmit mailboxes become empty
-    pub async fn flush_all(&self) {
-        Self::flush_all_inner().await
-    }
-}
-
-/// CAN driver, receive half.
-#[allow(dead_code)]
-pub struct CanRx<'c, 'd, T: Instance> {
-    rx0: &'c mut bxcan::Rx0<BxcanInstance<'d, T>>,
-    rx1: &'c mut bxcan::Rx1<BxcanInstance<'d, T>>,
-}
-
-impl<'c, 'd, T: Instance> CanRx<'c, 'd, T> {
-    /// Read a CAN frame.
-    ///
-    /// If no CAN frame is in the RX buffer, this will wait until there is one.
-    ///
-    /// Returns a tuple of the time the message was received and the message frame
-    pub async fn read(&mut self) -> Result<Envelope, BusError> {
-        poll_fn(|cx| {
-            T::state().err_waker.register(cx.waker());
-            if let Poll::Ready(envelope) = T::state().rx_queue.receive().poll_unpin(cx) {
-                return Poll::Ready(Ok(envelope));
-            } else if let Some(err) = self.curr_error() {
-                return Poll::Ready(Err(err));
-            }
-
-            Poll::Pending
-        })
-        .await
-    }
-
-    /// Attempts to read a CAN frame without blocking.
-    ///
-    /// Returns [Err(TryReadError::Empty)] if there are no frames in the rx queue.
-    pub fn try_read(&mut self) -> Result<Envelope, TryReadError> {
-        if let Ok(envelope) = T::state().rx_queue.try_receive() {
-            return Ok(envelope);
-        }
-
-        if let Some(err) = self.curr_error() {
-            return Err(TryReadError::BusError(err));
-        }
-
-        Err(TryReadError::Empty)
-    }
-
-    /// Waits while receive queue is empty.
-    pub async fn wait_not_empty(&mut self) {
-        poll_fn(|cx| T::state().rx_queue.poll_ready_to_receive(cx)).await
-    }
-
-    fn curr_error(&self) -> Option<BusError> {
-        let err = { T::regs().esr().read() };
-        if err.boff() {
-            return Some(BusError::BusOff);
-        } else if err.epvf() {
-            return Some(BusError::BusPassive);
-        } else if err.ewgf() {
-            return Some(BusError::BusWarning);
-        } else if let Some(err) = err.lec().into_bus_err() {
-            return Some(err);
-        }
-        None
-    }
-}
-
-enum RxFifo {
-    Fifo0,
-    Fifo1,
-}
-
-impl<'d, T: Instance> Drop for Can<'d, T> {
-    fn drop(&mut self) {
-        // Cannot call `free()` because it moves the instance.
-        // Manually reset the peripheral.
-        T::regs().mcr().write(|w| w.set_reset(true));
-        T::disable();
-    }
-}
-
-impl<'d, T: Instance> Deref for Can<'d, T> {
-    type Target = bxcan::Can<BxcanInstance<'d, T>>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.can
-    }
-}
-
-impl<'d, T: Instance> DerefMut for Can<'d, T> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.can
-    }
-}
-
-pub(crate) mod sealed {
-    use embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
-    use embassy_sync::channel::Channel;
-    use embassy_sync::waitqueue::AtomicWaker;
-
-    use super::Envelope;
-
-    pub struct State {
-        pub tx_waker: AtomicWaker,
-        pub err_waker: AtomicWaker,
-        pub rx_queue: Channel<CriticalSectionRawMutex, Envelope, 32>,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                tx_waker: AtomicWaker::new(),
-                err_waker: AtomicWaker::new(),
-                rx_queue: Channel::new(),
-            }
-        }
-    }
-
-    pub trait Instance {
-        const REGISTERS: *mut bxcan::RegisterBlock;
-
-        fn regs() -> crate::pac::can::Can;
-        fn state() -> &'static State;
-    }
-}
-
-/// CAN instance trait.
-pub trait Instance: sealed::Instance + RccPeripheral + 'static {
-    /// TX interrupt for this instance.
-    type TXInterrupt: crate::interrupt::typelevel::Interrupt;
-    /// RX0 interrupt for this instance.
-    type RX0Interrupt: crate::interrupt::typelevel::Interrupt;
-    /// RX1 interrupt for this instance.
-    type RX1Interrupt: crate::interrupt::typelevel::Interrupt;
-    /// SCE interrupt for this instance.
-    type SCEInterrupt: crate::interrupt::typelevel::Interrupt;
-}
-
-/// BXCAN instance newtype.
-pub struct BxcanInstance<'a, T>(PeripheralRef<'a, T>);
-
-unsafe impl<'d, T: Instance> bxcan::Instance for BxcanInstance<'d, T> {
-    const REGISTERS: *mut bxcan::RegisterBlock = T::REGISTERS;
-}
-
-foreach_peripheral!(
-    (can, $inst:ident) => {
-        impl sealed::Instance for peripherals::$inst {
-            const REGISTERS: *mut bxcan::RegisterBlock = crate::pac::$inst.as_ptr() as *mut _;
-
-            fn regs() -> crate::pac::can::Can {
-                crate::pac::$inst
-            }
-
-            fn state() -> &'static sealed::State {
-                static STATE: sealed::State = sealed::State::new();
-                &STATE
-            }
-        }
-
-        impl Instance for peripherals::$inst {
-            type TXInterrupt = crate::_generated::peripheral_interrupts::$inst::TX;
-            type RX0Interrupt = crate::_generated::peripheral_interrupts::$inst::RX0;
-            type RX1Interrupt = crate::_generated::peripheral_interrupts::$inst::RX1;
-            type SCEInterrupt = crate::_generated::peripheral_interrupts::$inst::SCE;
-        }
-    };
-);
-
-foreach_peripheral!(
-    (can, CAN) => {
-        unsafe impl<'d> bxcan::FilterOwner for BxcanInstance<'d, peripherals::CAN> {
-            const NUM_FILTER_BANKS: u8 = 14;
-        }
-    };
-    // CAN1 and CAN2 is a combination of master and slave instance.
-    // CAN1 owns the filter bank and needs to be enabled in order
-    // for CAN2 to receive messages.
-    (can, CAN1) => {
-        cfg_if::cfg_if! {
-            if #[cfg(all(
-                any(stm32l4, stm32f72, stm32f73),
-                not(any(stm32l49, stm32l4a))
-            ))] {
-                // Most L4 devices and some F7 devices use the name "CAN1"
-                // even if there is no "CAN2" peripheral.
-                unsafe impl<'d> bxcan::FilterOwner for BxcanInstance<'d, peripherals::CAN1> {
-                    const NUM_FILTER_BANKS: u8 = 14;
-                }
-            } else {
-                unsafe impl<'d> bxcan::FilterOwner for BxcanInstance<'d, peripherals::CAN1> {
-                    const NUM_FILTER_BANKS: u8 = 28;
-                }
-                unsafe impl<'d> bxcan::MasterInstance for BxcanInstance<'d, peripherals::CAN1> {}
-            }
-        }
-    };
-    (can, CAN3) => {
-        unsafe impl<'d> bxcan::FilterOwner for BxcanInstance<'d, peripherals::CAN3> {
-            const NUM_FILTER_BANKS: u8 = 14;
-        }
-    };
-);
-
-pin_trait!(RxPin, Instance);
-pin_trait!(TxPin, Instance);
-
-trait Index {
-    fn index(&self) -> usize;
-}
-
-impl Index for bxcan::Mailbox {
-    fn index(&self) -> usize {
-        match self {
-            bxcan::Mailbox::Mailbox0 => 0,
-            bxcan::Mailbox::Mailbox1 => 1,
-            bxcan::Mailbox::Mailbox2 => 2,
-        }
-    }
-}
-
-trait IntoBusError {
-    fn into_bus_err(self) -> Option<BusError>;
-}
-
-impl IntoBusError for Lec {
-    fn into_bus_err(self) -> Option<BusError> {
-        match self {
-            Lec::STUFF => Some(BusError::Stuff),
-            Lec::FORM => Some(BusError::Form),
-            Lec::ACK => Some(BusError::Acknowledge),
-            Lec::BITRECESSIVE => Some(BusError::BitRecessive),
-            Lec::BITDOMINANT => Some(BusError::BitDominant),
-            Lec::CRC => Some(BusError::Crc),
-            Lec::CUSTOM => Some(BusError::Software),
-            _ => None,
-        }
-    }
-}
diff --git a/embassy-stm32/src/can/bxcan/filter.rs b/embassy-stm32/src/can/bxcan/filter.rs
new file mode 100644
index 000000000..9940c7f50
--- /dev/null
+++ b/embassy-stm32/src/can/bxcan/filter.rs
@@ -0,0 +1,475 @@
+//! Filter bank API.
+
+use core::marker::PhantomData;
+
+use super::{ExtendedId, Fifo, FilterOwner, Id, Instance, MasterInstance, StandardId};
+
+const F32_RTR: u32 = 0b010; // set the RTR bit to match remote frames
+const F32_IDE: u32 = 0b100; // set the IDE bit to match extended identifiers
+const F16_RTR: u16 = 0b10000;
+const F16_IDE: u16 = 0b01000;
+
+/// A 16-bit filter list entry.
+///
+/// This can match data and remote frames using standard IDs.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub struct ListEntry16(u16);
+
+/// A 32-bit filter list entry.
+///
+/// This can match data and remote frames using extended or standard IDs.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub struct ListEntry32(u32);
+
+/// A 16-bit identifier mask.
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub struct Mask16 {
+    id: u16,
+    mask: u16,
+}
+
+/// A 32-bit identifier mask.
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub struct Mask32 {
+    id: u32,
+    mask: u32,
+}
+
+impl ListEntry16 {
+    /// Creates a filter list entry that accepts data frames with the given standard ID.
+    ///
+    /// This entry will *not* accept remote frames with the same ID.
+    pub fn data_frames_with_id(id: StandardId) -> Self {
+        Self(id.as_raw() << 5)
+    }
+
+    /// Creates a filter list entry that accepts remote frames with the given standard ID.
+    pub fn remote_frames_with_id(id: StandardId) -> Self {
+        Self(id.as_raw() << 5 | F16_RTR)
+    }
+}
+
+impl ListEntry32 {
+    /// Creates a filter list entry that accepts data frames with the given ID.
+    ///
+    /// This entry will *not* accept remote frames with the same ID.
+    ///
+    /// The filter will only accept *either* standard *or* extended frames, depending on `id`.
+    pub fn data_frames_with_id(id: impl Into<Id>) -> Self {
+        match id.into() {
+            Id::Standard(id) => Self(u32::from(id.as_raw()) << 21),
+            Id::Extended(id) => Self(id.as_raw() << 3 | F32_IDE),
+        }
+    }
+
+    /// Creates a filter list entry that accepts remote frames with the given ID.
+    pub fn remote_frames_with_id(id: impl Into<Id>) -> Self {
+        match id.into() {
+            Id::Standard(id) => Self(u32::from(id.as_raw()) << 21 | F32_RTR),
+            Id::Extended(id) => Self(id.as_raw() << 3 | F32_IDE | F32_RTR),
+        }
+    }
+}
+
+impl Mask16 {
+    /// Creates a 16-bit identifier mask that accepts all frames.
+    ///
+    /// This will accept both standard and extended data and remote frames with any ID.
+    pub fn accept_all() -> Self {
+        Self { id: 0, mask: 0 }
+    }
+
+    /// Creates a 16-bit identifier mask that accepts all frames with the given standard
+    /// ID and mask combination.
+    ///
+    /// Filter logic: `frame_accepted = (incoming_id & mask) == (id & mask)`
+    ///
+    /// A mask of all all ones (`0x7FF`) matches an exact ID, a mask of 0 matches all IDs.
+    ///
+    /// Both data and remote frames with `id` will be accepted. Any extended frames will be
+    /// rejected.
+    pub fn frames_with_std_id(id: StandardId, mask: StandardId) -> Self {
+        Self {
+            id: id.as_raw() << 5,
+            mask: mask.as_raw() << 5 | F16_IDE, // also require IDE = 0
+        }
+    }
+
+    /// Make the filter accept data frames only.
+    pub fn data_frames_only(&mut self) -> &mut Self {
+        self.id &= !F16_RTR; // RTR = 0
+        self.mask |= F16_RTR;
+        self
+    }
+
+    /// Make the filter accept remote frames only.
+    pub fn remote_frames_only(&mut self) -> &mut Self {
+        self.id |= F16_RTR; // RTR = 1
+        self.mask |= F16_RTR;
+        self
+    }
+}
+
+impl Mask32 {
+    /// Creates a 32-bit identifier mask that accepts all frames.
+    ///
+    /// This will accept both standard and extended data and remote frames with any ID.
+    pub fn accept_all() -> Self {
+        Self { id: 0, mask: 0 }
+    }
+
+    /// Creates a 32-bit identifier mask that accepts all frames with the given extended
+    /// ID and mask combination.
+    ///
+    /// Filter logic: `frame_accepted = (incoming_id & mask) == (id & mask)`
+    ///
+    /// A mask of all all ones (`0x1FFF_FFFF`) matches an exact ID, a mask of 0 matches all IDs.
+    ///
+    /// Both data and remote frames with `id` will be accepted. Standard frames will be rejected.
+    pub fn frames_with_ext_id(id: ExtendedId, mask: ExtendedId) -> Self {
+        Self {
+            id: id.as_raw() << 3 | F32_IDE,
+            mask: mask.as_raw() << 3 | F32_IDE, // also require IDE = 1
+        }
+    }
+
+    /// Creates a 32-bit identifier mask that accepts all frames with the given standard
+    /// ID and mask combination.
+    ///
+    /// Filter logic: `frame_accepted = (incoming_id & mask) == (id & mask)`
+    ///
+    /// A mask of all all ones (`0x7FF`) matches the exact ID, a mask of 0 matches all IDs.
+    ///
+    /// Both data and remote frames with `id` will be accepted. Extended frames will be rejected.
+    pub fn frames_with_std_id(id: StandardId, mask: StandardId) -> Self {
+        Self {
+            id: u32::from(id.as_raw()) << 21,
+            mask: u32::from(mask.as_raw()) << 21 | F32_IDE, // also require IDE = 0
+        }
+    }
+
+    /// Make the filter accept data frames only.
+    pub fn data_frames_only(&mut self) -> &mut Self {
+        self.id &= !F32_RTR; // RTR = 0
+        self.mask |= F32_RTR;
+        self
+    }
+
+    /// Make the filter accept remote frames only.
+    pub fn remote_frames_only(&mut self) -> &mut Self {
+        self.id |= F32_RTR; // RTR = 1
+        self.mask |= F32_RTR;
+        self
+    }
+}
+
+/// The configuration of a filter bank.
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum BankConfig {
+    /// Specify up to 4 exact standard CAN ID's.
+    List16([ListEntry16; 4]),
+    /// Specify up to 2 exact standard or extended CAN ID's.
+    List32([ListEntry32; 2]),
+    /// Specify up to 2 standard ID's with masks.
+    Mask16([Mask16; 2]),
+    /// Specify a single extended ID with mask.
+    Mask32(Mask32),
+}
+
+impl From<[ListEntry16; 4]> for BankConfig {
+    #[inline]
+    fn from(entries: [ListEntry16; 4]) -> Self {
+        Self::List16(entries)
+    }
+}
+
+impl From<[ListEntry32; 2]> for BankConfig {
+    #[inline]
+    fn from(entries: [ListEntry32; 2]) -> Self {
+        Self::List32(entries)
+    }
+}
+
+impl From<[Mask16; 2]> for BankConfig {
+    #[inline]
+    fn from(entries: [Mask16; 2]) -> Self {
+        Self::Mask16(entries)
+    }
+}
+
+impl From<Mask32> for BankConfig {
+    #[inline]
+    fn from(filter: Mask32) -> Self {
+        Self::Mask32(filter)
+    }
+}
+
+/// Interface to the filter banks of a CAN peripheral.
+pub struct MasterFilters<'a, I: FilterOwner> {
+    /// Number of assigned filter banks.
+    ///
+    /// On chips with splittable filter banks, this value can be dynamic.
+    bank_count: u8,
+    _can: PhantomData<&'a mut I>,
+    canregs: crate::pac::can::Can,
+}
+
+// NOTE: This type mutably borrows the CAN instance and has unique access to the registers while it
+// exists.
+impl<I: FilterOwner> MasterFilters<'_, I> {
+    pub(crate) unsafe fn new(canregs: crate::pac::can::Can) -> Self {
+        // Enable initialization mode.
+        canregs.fmr().modify(|reg| reg.set_finit(true));
+
+        // Read the filter split value.
+        let bank_count = canregs.fmr().read().can2sb();
+
+        // (Reset value of CAN2SB is 0x0E, 14, which, in devices with 14 filter banks, assigns all
+        // of them to the master peripheral, and in devices with 28, assigns them 50/50 to
+        // master/slave instances)
+
+        Self {
+            bank_count,
+            _can: PhantomData,
+            canregs,
+        }
+    }
+
+    fn banks_imm(&self) -> FilterBanks {
+        FilterBanks {
+            start_idx: 0,
+            bank_count: self.bank_count,
+            canregs: self.canregs,
+        }
+    }
+
+    /// Returns the number of filter banks currently assigned to this instance.
+    ///
+    /// Chips with splittable filter banks may start out with some banks assigned to the master
+    /// instance and some assigned to the slave instance.
+    pub fn num_banks(&self) -> u8 {
+        self.bank_count
+    }
+
+    /// Disables all enabled filter banks.
+    ///
+    /// This causes all incoming frames to be disposed.
+    pub fn clear(&mut self) -> &mut Self {
+        self.banks_imm().clear();
+        self
+    }
+
+    /// Disables a filter bank.
+    ///
+    /// If `index` is out of bounds, this will panic.
+    pub fn disable_bank(&mut self, index: u8) -> &mut Self {
+        self.banks_imm().disable(index);
+        self
+    }
+
+    /// Configures a filter bank according to `config` and enables it.
+    ///
+    /// Each filter bank is associated with one of the two RX FIFOs, configured by the [`Fifo`]
+    /// passed to this function. In the event that both FIFOs are configured to accept an incoming
+    /// frame, the accepting filter bank with the lowest index wins. The FIFO state is ignored, so
+    /// if the FIFO is full, it will overflow, even if the other FIFO is also configured to accept
+    /// the frame.
+    ///
+    /// # Parameters
+    ///
+    /// - `index`: the filter index.
+    /// - `fifo`: the receive FIFO the filter should pass accepted messages to.
+    /// - `config`: the filter configuration.
+    pub fn enable_bank(&mut self, index: u8, fifo: Fifo, config: impl Into<BankConfig>) -> &mut Self {
+        self.banks_imm().enable(index, fifo, config.into());
+        self
+    }
+}
+
+impl<I: MasterInstance> MasterFilters<'_, I> {
+    /// Sets the index at which the filter banks owned by the slave peripheral start.
+    pub fn set_split(&mut self, split_index: u8) -> &mut Self {
+        assert!(split_index <= I::NUM_FILTER_BANKS);
+        self.canregs.fmr().modify(|reg| reg.set_can2sb(split_index));
+        self.bank_count = split_index;
+        self
+    }
+
+    /// Accesses the filters assigned to the slave peripheral.
+    pub fn slave_filters(&mut self) -> SlaveFilters<'_, I> {
+        // NB: This mutably borrows `self`, so it has full access to the filter bank registers.
+        SlaveFilters {
+            start_idx: self.bank_count,
+            bank_count: I::NUM_FILTER_BANKS - self.bank_count,
+            _can: PhantomData,
+            canregs: self.canregs,
+        }
+    }
+}
+
+impl<I: FilterOwner> Drop for MasterFilters<'_, I> {
+    #[inline]
+    fn drop(&mut self) {
+        // Leave initialization mode.
+        self.canregs.fmr().modify(|regs| regs.set_finit(false));
+    }
+}
+
+/// Interface to the filter banks assigned to a slave peripheral.
+pub struct SlaveFilters<'a, I: Instance> {
+    start_idx: u8,
+    bank_count: u8,
+    _can: PhantomData<&'a mut I>,
+    canregs: crate::pac::can::Can,
+}
+
+impl<I: Instance> SlaveFilters<'_, I> {
+    fn banks_imm(&self) -> FilterBanks {
+        FilterBanks {
+            start_idx: self.start_idx,
+            bank_count: self.bank_count,
+            canregs: self.canregs,
+        }
+    }
+
+    /// Returns the number of filter banks currently assigned to this instance.
+    ///
+    /// Chips with splittable filter banks may start out with some banks assigned to the master
+    /// instance and some assigned to the slave instance.
+    pub fn num_banks(&self) -> u8 {
+        self.bank_count
+    }
+
+    /// Disables all enabled filter banks.
+    ///
+    /// This causes all incoming frames to be disposed.
+    pub fn clear(&mut self) -> &mut Self {
+        self.banks_imm().clear();
+        self
+    }
+
+    /// Disables a filter bank.
+    ///
+    /// If `index` is out of bounds, this will panic.
+    pub fn disable_bank(&mut self, index: u8) -> &mut Self {
+        self.banks_imm().disable(index);
+        self
+    }
+
+    /// Configures a filter bank according to `config` and enables it.
+    ///
+    /// # Parameters
+    ///
+    /// - `index`: the filter index.
+    /// - `fifo`: the receive FIFO the filter should pass accepted messages to.
+    /// - `config`: the filter configuration.
+    pub fn enable_bank(&mut self, index: u8, fifo: Fifo, config: impl Into<BankConfig>) -> &mut Self {
+        self.banks_imm().enable(index, fifo, config.into());
+        self
+    }
+}
+
+struct FilterBanks {
+    start_idx: u8,
+    bank_count: u8,
+    canregs: crate::pac::can::Can,
+}
+
+impl FilterBanks {
+    fn clear(&mut self) {
+        let mask = filter_bitmask(self.start_idx, self.bank_count);
+
+        self.canregs.fa1r().modify(|reg| {
+            for i in 0..28usize {
+                if (0x01u32 << i) & mask != 0 {
+                    reg.set_fact(i, false);
+                }
+            }
+        });
+    }
+
+    fn assert_bank_index(&self, index: u8) {
+        assert!((self.start_idx..self.start_idx + self.bank_count).contains(&index));
+    }
+
+    fn disable(&mut self, index: u8) {
+        self.assert_bank_index(index);
+        self.canregs.fa1r().modify(|reg| reg.set_fact(index as usize, false))
+    }
+
+    fn enable(&mut self, index: u8, fifo: Fifo, config: BankConfig) {
+        self.assert_bank_index(index);
+
+        // Configure mode.
+        let mode = matches!(config, BankConfig::List16(_) | BankConfig::List32(_));
+        self.canregs.fm1r().modify(|reg| reg.set_fbm(index as usize, mode));
+
+        // Configure scale.
+        let scale = matches!(config, BankConfig::List32(_) | BankConfig::Mask32(_));
+        self.canregs.fs1r().modify(|reg| reg.set_fsc(index as usize, scale));
+
+        // Configure filter register.
+        let (fxr1, fxr2);
+        match config {
+            BankConfig::List16([a, b, c, d]) => {
+                fxr1 = (u32::from(b.0) << 16) | u32::from(a.0);
+                fxr2 = (u32::from(d.0) << 16) | u32::from(c.0);
+            }
+            BankConfig::List32([a, b]) => {
+                fxr1 = a.0;
+                fxr2 = b.0;
+            }
+            BankConfig::Mask16([a, b]) => {
+                fxr1 = (u32::from(a.mask) << 16) | u32::from(a.id);
+                fxr2 = (u32::from(b.mask) << 16) | u32::from(b.id);
+            }
+            BankConfig::Mask32(a) => {
+                fxr1 = a.id;
+                fxr2 = a.mask;
+            }
+        };
+        let bank = self.canregs.fb(index as usize);
+        bank.fr1().write(|w| w.0 = fxr1);
+        bank.fr2().write(|w| w.0 = fxr2);
+
+        // Assign to the right FIFO
+        self.canregs.ffa1r().modify(|reg| {
+            reg.set_ffa(
+                index as usize,
+                match fifo {
+                    Fifo::Fifo0 => false,
+                    Fifo::Fifo1 => true,
+                },
+            )
+        });
+
+        // Set active.
+        self.canregs.fa1r().modify(|reg| reg.set_fact(index as usize, true))
+    }
+}
+
+/// Computes a bitmask for per-filter-bank registers that only includes filters in the given range.
+fn filter_bitmask(start_idx: u8, bank_count: u8) -> u32 {
+    let count_mask = (1 << bank_count) - 1; // `bank_count` 1-bits
+    count_mask << start_idx
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_filter_bitmask() {
+        assert_eq!(filter_bitmask(0, 1), 0x1);
+        assert_eq!(filter_bitmask(1, 1), 0b10);
+        assert_eq!(filter_bitmask(0, 4), 0xf);
+        assert_eq!(filter_bitmask(1, 3), 0xe);
+        assert_eq!(filter_bitmask(8, 1), 0x100);
+        assert_eq!(filter_bitmask(8, 4), 0xf00);
+    }
+}
diff --git a/embassy-stm32/src/can/bxcan/mod.rs b/embassy-stm32/src/can/bxcan/mod.rs
new file mode 100644
index 000000000..65fd0e9c2
--- /dev/null
+++ b/embassy-stm32/src/can/bxcan/mod.rs
@@ -0,0 +1,989 @@
+pub mod filter;
+mod registers;
+
+use core::future::poll_fn;
+use core::marker::PhantomData;
+use core::task::Poll;
+
+use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
+use embassy_sync::channel::Channel;
+use embassy_sync::waitqueue::AtomicWaker;
+pub use embedded_can::{ExtendedId, Id, StandardId};
+
+use self::filter::MasterFilters;
+use self::registers::{Registers, RxFifo};
+pub use super::common::{BufferedCanReceiver, BufferedCanSender};
+use super::frame::{Envelope, Frame};
+use super::util;
+use crate::can::enums::{BusError, TryReadError};
+use crate::gpio::AFType;
+use crate::interrupt::typelevel::Interrupt;
+use crate::rcc::RccPeripheral;
+use crate::{interrupt, peripherals, Peripheral};
+
+/// Interrupt handler.
+pub struct TxInterruptHandler<T: Instance> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Instance> interrupt::typelevel::Handler<T::TXInterrupt> for TxInterruptHandler<T> {
+    unsafe fn on_interrupt() {
+        T::regs().tsr().write(|v| {
+            v.set_rqcp(0, true);
+            v.set_rqcp(1, true);
+            v.set_rqcp(2, true);
+        });
+        T::state().tx_mode.on_interrupt::<T>();
+    }
+}
+
+/// RX0 interrupt handler.
+pub struct Rx0InterruptHandler<T: Instance> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Instance> interrupt::typelevel::Handler<T::RX0Interrupt> for Rx0InterruptHandler<T> {
+    unsafe fn on_interrupt() {
+        T::state().rx_mode.on_interrupt::<T>(RxFifo::Fifo0);
+    }
+}
+
+/// RX1 interrupt handler.
+pub struct Rx1InterruptHandler<T: Instance> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Instance> interrupt::typelevel::Handler<T::RX1Interrupt> for Rx1InterruptHandler<T> {
+    unsafe fn on_interrupt() {
+        T::state().rx_mode.on_interrupt::<T>(RxFifo::Fifo1);
+    }
+}
+
+/// SCE interrupt handler.
+pub struct SceInterruptHandler<T: Instance> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Instance> interrupt::typelevel::Handler<T::SCEInterrupt> for SceInterruptHandler<T> {
+    unsafe fn on_interrupt() {
+        // info!("sce irq");
+        let msr = T::regs().msr();
+        let msr_val = msr.read();
+
+        if msr_val.erri() {
+            msr.modify(|v| v.set_erri(true));
+            T::state().err_waker.wake();
+        }
+    }
+}
+
+/// Configuration proxy returned by [`Can::modify_config`].
+pub struct CanConfig<'a, T: Instance> {
+    can: PhantomData<&'a mut T>,
+}
+
+impl<T: Instance> CanConfig<'_, T> {
+    /// Configures the bit timings.
+    ///
+    /// You can use <http://www.bittiming.can-wiki.info/> to calculate the `btr` parameter. Enter
+    /// parameters as follows:
+    ///
+    /// - *Clock Rate*: The input clock speed to the CAN peripheral (*not* the CPU clock speed).
+    ///   This is the clock rate of the peripheral bus the CAN peripheral is attached to (eg. APB1).
+    /// - *Sample Point*: Should normally be left at the default value of 87.5%.
+    /// - *SJW*: Should normally be left at the default value of 1.
+    ///
+    /// Then copy the `CAN_BUS_TIME` register value from the table and pass it as the `btr`
+    /// parameter to this method.
+    pub fn set_bit_timing(self, bt: crate::can::util::NominalBitTiming) -> Self {
+        Registers(T::regs()).set_bit_timing(bt);
+        self
+    }
+
+    /// Configure the CAN bit rate.
+    ///
+    /// This is a helper that internally calls `set_bit_timing()`[Self::set_bit_timing].
+    pub fn set_bitrate(self, bitrate: u32) -> Self {
+        let bit_timing = util::calc_can_timings(T::frequency(), bitrate).unwrap();
+        self.set_bit_timing(bit_timing)
+    }
+
+    /// Enables or disables loopback mode: Internally connects the TX and RX
+    /// signals together.
+    pub fn set_loopback(self, enabled: bool) -> Self {
+        Registers(T::regs()).set_loopback(enabled);
+        self
+    }
+
+    /// Enables or disables silent mode: Disconnects the TX signal from the pin.
+    pub fn set_silent(self, enabled: bool) -> Self {
+        Registers(T::regs()).set_silent(enabled);
+        self
+    }
+
+    /// Enables or disables automatic retransmission of messages.
+    ///
+    /// If this is enabled, the CAN peripheral will automatically try to retransmit each frame
+    /// until it can be sent. Otherwise, it will try only once to send each frame.
+    ///
+    /// Automatic retransmission is enabled by default.
+    pub fn set_automatic_retransmit(self, enabled: bool) -> Self {
+        Registers(T::regs()).set_automatic_retransmit(enabled);
+        self
+    }
+}
+
+impl<T: Instance> Drop for CanConfig<'_, T> {
+    #[inline]
+    fn drop(&mut self) {
+        Registers(T::regs()).leave_init_mode();
+    }
+}
+
+/// CAN driver
+pub struct Can<'d, T: Instance> {
+    peri: PeripheralRef<'d, T>,
+}
+
+/// Error returned by `try_write`
+#[derive(Debug)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum TryWriteError {
+    /// All transmit mailboxes are full
+    Full,
+}
+
+impl<'d, T: Instance> Can<'d, T> {
+    /// Creates a new Bxcan instance, keeping the peripheral in sleep mode.
+    /// You must call [Can::enable_non_blocking] to use the peripheral.
+    pub fn new(
+        peri: impl Peripheral<P = T> + 'd,
+        rx: impl Peripheral<P = impl RxPin<T>> + 'd,
+        tx: impl Peripheral<P = impl TxPin<T>> + 'd,
+        _irqs: impl interrupt::typelevel::Binding<T::TXInterrupt, TxInterruptHandler<T>>
+            + interrupt::typelevel::Binding<T::RX0Interrupt, Rx0InterruptHandler<T>>
+            + interrupt::typelevel::Binding<T::RX1Interrupt, Rx1InterruptHandler<T>>
+            + interrupt::typelevel::Binding<T::SCEInterrupt, SceInterruptHandler<T>>
+            + 'd,
+    ) -> Self {
+        into_ref!(peri, rx, tx);
+
+        rx.set_as_af(rx.af_num(), AFType::Input);
+        tx.set_as_af(tx.af_num(), AFType::OutputPushPull);
+
+        T::enable_and_reset();
+
+        {
+            T::regs().ier().write(|w| {
+                w.set_errie(true);
+                w.set_fmpie(0, true);
+                w.set_fmpie(1, true);
+                w.set_tmeie(true);
+            });
+
+            T::regs().mcr().write(|w| {
+                // Enable timestamps on rx messages
+
+                w.set_ttcm(true);
+            });
+        }
+
+        unsafe {
+            T::TXInterrupt::unpend();
+            T::TXInterrupt::enable();
+
+            T::RX0Interrupt::unpend();
+            T::RX0Interrupt::enable();
+
+            T::RX1Interrupt::unpend();
+            T::RX1Interrupt::enable();
+
+            T::SCEInterrupt::unpend();
+            T::SCEInterrupt::enable();
+        }
+
+        rx.set_as_af(rx.af_num(), AFType::Input);
+        tx.set_as_af(tx.af_num(), AFType::OutputPushPull);
+
+        Registers(T::regs()).leave_init_mode();
+
+        Self { peri }
+    }
+
+    /// Set CAN bit rate.
+    pub fn set_bitrate(&mut self, bitrate: u32) {
+        let bit_timing = util::calc_can_timings(T::frequency(), bitrate).unwrap();
+        self.modify_config().set_bit_timing(bit_timing);
+    }
+
+    /// Configure bit timings and silent/loop-back mode.
+    ///
+    /// Calling this method will enter initialization mode. You must enable the peripheral
+    /// again afterwards with [`enable`](Self::enable).
+    pub fn modify_config(&mut self) -> CanConfig<'_, T> {
+        Registers(T::regs()).enter_init_mode();
+
+        CanConfig { can: PhantomData }
+    }
+
+    /// Enables the peripheral and synchronizes with the bus.
+    ///
+    /// This will wait for 11 consecutive recessive bits (bus idle state).
+    /// Contrary to enable method from bxcan library, this will not freeze the executor while waiting.
+    pub async fn enable(&mut self) {
+        while Registers(T::regs()).enable_non_blocking().is_err() {
+            // SCE interrupt is only generated for entering sleep mode, but not leaving.
+            // Yield to allow other tasks to execute while can bus is initializing.
+            embassy_futures::yield_now().await;
+        }
+    }
+
+    /// Queues the message to be sent.
+    ///
+    /// If the TX queue is full, this will wait until there is space, therefore exerting backpressure.
+    pub async fn write(&mut self, frame: &Frame) -> TransmitStatus {
+        self.split().0.write(frame).await
+    }
+
+    /// Attempts to transmit a frame without blocking.
+    ///
+    /// Returns [Err(TryWriteError::Full)] if all transmit mailboxes are full.
+    pub fn try_write(&mut self, frame: &Frame) -> Result<TransmitStatus, TryWriteError> {
+        self.split().0.try_write(frame)
+    }
+
+    /// Waits for a specific transmit mailbox to become empty
+    pub async fn flush(&self, mb: Mailbox) {
+        CanTx::<T>::flush_inner(mb).await
+    }
+
+    /// Waits until any of the transmit mailboxes become empty
+    pub async fn flush_any(&self) {
+        CanTx::<T>::flush_any_inner().await
+    }
+
+    /// Waits until all of the transmit mailboxes become empty
+    pub async fn flush_all(&self) {
+        CanTx::<T>::flush_all_inner().await
+    }
+
+    /// Attempts to abort the sending of a frame that is pending in a mailbox.
+    ///
+    /// If there is no frame in the provided mailbox, or its transmission succeeds before it can be
+    /// aborted, this function has no effect and returns `false`.
+    ///
+    /// If there is a frame in the provided mailbox, and it is canceled successfully, this function
+    /// returns `true`.
+    pub fn abort(&mut self, mailbox: Mailbox) -> bool {
+        Registers(T::regs()).abort(mailbox)
+    }
+
+    /// Returns `true` if no frame is pending for transmission.
+    pub fn is_transmitter_idle(&self) -> bool {
+        Registers(T::regs()).is_idle()
+    }
+
+    /// Read a CAN frame.
+    ///
+    /// If no CAN frame is in the RX buffer, this will wait until there is one.
+    ///
+    /// Returns a tuple of the time the message was received and the message frame
+    pub async fn read(&mut self) -> Result<Envelope, BusError> {
+        T::state().rx_mode.read::<T>().await
+    }
+
+    /// Attempts to read a CAN frame without blocking.
+    ///
+    /// Returns [Err(TryReadError::Empty)] if there are no frames in the rx queue.
+    pub fn try_read(&mut self) -> Result<Envelope, TryReadError> {
+        T::state().rx_mode.try_read::<T>()
+    }
+
+    /// Waits while receive queue is empty.
+    pub async fn wait_not_empty(&mut self) {
+        T::state().rx_mode.wait_not_empty::<T>().await
+    }
+
+    /// Split the CAN driver into transmit and receive halves.
+    ///
+    /// Useful for doing separate transmit/receive tasks.
+    pub fn split<'c>(&'c mut self) -> (CanTx<'d, T>, CanRx<'d, T>) {
+        (
+            CanTx {
+                _peri: unsafe { self.peri.clone_unchecked() },
+            },
+            CanRx {
+                peri: unsafe { self.peri.clone_unchecked() },
+            },
+        )
+    }
+
+    /// Return a buffered instance of driver. User must supply Buffers
+    pub fn buffered<'c, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize>(
+        &'c mut self,
+        txb: &'static mut TxBuf<TX_BUF_SIZE>,
+        rxb: &'static mut RxBuf<RX_BUF_SIZE>,
+    ) -> BufferedCan<'d, T, TX_BUF_SIZE, RX_BUF_SIZE> {
+        let (tx, rx) = self.split();
+        BufferedCan {
+            tx: tx.buffered(txb),
+            rx: rx.buffered(rxb),
+        }
+    }
+}
+
+impl<'d, T: FilterOwner> Can<'d, T> {
+    /// Accesses the filter banks owned by this CAN peripheral.
+    ///
+    /// To modify filters of a slave peripheral, `modify_filters` has to be called on the master
+    /// peripheral instead.
+    pub fn modify_filters(&mut self) -> MasterFilters<'_, T> {
+        unsafe { MasterFilters::new(T::regs()) }
+    }
+}
+
+/// Buffered CAN driver.
+pub struct BufferedCan<'d, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize> {
+    tx: BufferedCanTx<'d, T, TX_BUF_SIZE>,
+    rx: BufferedCanRx<'d, T, RX_BUF_SIZE>,
+}
+
+impl<'d, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize> BufferedCan<'d, T, TX_BUF_SIZE, RX_BUF_SIZE> {
+    /// Async write frame to TX buffer.
+    pub async fn write(&mut self, frame: &Frame) {
+        self.tx.write(frame).await
+    }
+
+    /// Returns a sender that can be used for sending CAN frames.
+    pub fn writer(&self) -> BufferedCanSender {
+        self.tx.writer()
+    }
+
+    /// Async read frame from RX buffer.
+    pub async fn read(&mut self) -> Result<Envelope, BusError> {
+        self.rx.read().await
+    }
+
+    /// Attempts to read a CAN frame without blocking.
+    ///
+    /// Returns [Err(TryReadError::Empty)] if there are no frames in the rx queue.
+    pub fn try_read(&mut self) -> Result<Envelope, TryReadError> {
+        self.rx.try_read()
+    }
+
+    /// Waits while receive queue is empty.
+    pub async fn wait_not_empty(&mut self) {
+        self.rx.wait_not_empty().await
+    }
+
+    /// Returns a receiver that can be used for receiving CAN frames. Note, each CAN frame will only be received by one receiver.
+    pub fn reader(&self) -> BufferedCanReceiver {
+        self.rx.reader()
+    }
+}
+
+/// CAN driver, transmit half.
+pub struct CanTx<'d, T: Instance> {
+    _peri: PeripheralRef<'d, T>,
+}
+
+impl<'d, T: Instance> CanTx<'d, T> {
+    /// Queues the message to be sent.
+    ///
+    /// If the TX queue is full, this will wait until there is space, therefore exerting backpressure.
+    pub async fn write(&mut self, frame: &Frame) -> TransmitStatus {
+        poll_fn(|cx| {
+            T::state().tx_mode.register(cx.waker());
+            if let Ok(status) = Registers(T::regs()).transmit(frame) {
+                return Poll::Ready(status);
+            }
+
+            Poll::Pending
+        })
+        .await
+    }
+
+    /// Attempts to transmit a frame without blocking.
+    ///
+    /// Returns [Err(TryWriteError::Full)] if all transmit mailboxes are full.
+    pub fn try_write(&mut self, frame: &Frame) -> Result<TransmitStatus, TryWriteError> {
+        Registers(T::regs()).transmit(frame).map_err(|_| TryWriteError::Full)
+    }
+
+    async fn flush_inner(mb: Mailbox) {
+        poll_fn(|cx| {
+            T::state().tx_mode.register(cx.waker());
+            if T::regs().tsr().read().tme(mb.index()) {
+                return Poll::Ready(());
+            }
+
+            Poll::Pending
+        })
+        .await;
+    }
+
+    /// Waits for a specific transmit mailbox to become empty
+    pub async fn flush(&self, mb: Mailbox) {
+        Self::flush_inner(mb).await
+    }
+
+    async fn flush_any_inner() {
+        poll_fn(|cx| {
+            T::state().tx_mode.register(cx.waker());
+
+            let tsr = T::regs().tsr().read();
+            if tsr.tme(Mailbox::Mailbox0.index())
+                || tsr.tme(Mailbox::Mailbox1.index())
+                || tsr.tme(Mailbox::Mailbox2.index())
+            {
+                return Poll::Ready(());
+            }
+
+            Poll::Pending
+        })
+        .await;
+    }
+
+    /// Waits until any of the transmit mailboxes become empty
+    pub async fn flush_any(&self) {
+        Self::flush_any_inner().await
+    }
+
+    async fn flush_all_inner() {
+        poll_fn(|cx| {
+            T::state().tx_mode.register(cx.waker());
+
+            let tsr = T::regs().tsr().read();
+            if tsr.tme(Mailbox::Mailbox0.index())
+                && tsr.tme(Mailbox::Mailbox1.index())
+                && tsr.tme(Mailbox::Mailbox2.index())
+            {
+                return Poll::Ready(());
+            }
+
+            Poll::Pending
+        })
+        .await;
+    }
+
+    /// Waits until all of the transmit mailboxes become empty
+    pub async fn flush_all(&self) {
+        Self::flush_all_inner().await
+    }
+
+    /// Attempts to abort the sending of a frame that is pending in a mailbox.
+    ///
+    /// If there is no frame in the provided mailbox, or its transmission succeeds before it can be
+    /// aborted, this function has no effect and returns `false`.
+    ///
+    /// If there is a frame in the provided mailbox, and it is canceled successfully, this function
+    /// returns `true`.
+    pub fn abort(&mut self, mailbox: Mailbox) -> bool {
+        Registers(T::regs()).abort(mailbox)
+    }
+
+    /// Returns `true` if no frame is pending for transmission.
+    pub fn is_idle(&self) -> bool {
+        Registers(T::regs()).is_idle()
+    }
+
+    /// Return a buffered instance of driver. User must supply Buffers
+    pub fn buffered<const TX_BUF_SIZE: usize>(
+        self,
+        txb: &'static mut TxBuf<TX_BUF_SIZE>,
+    ) -> BufferedCanTx<'d, T, TX_BUF_SIZE> {
+        BufferedCanTx::new(self, txb)
+    }
+}
+
+/// User supplied buffer for TX buffering
+pub type TxBuf<const BUF_SIZE: usize> = Channel<CriticalSectionRawMutex, Frame, BUF_SIZE>;
+
+/// Buffered CAN driver, transmit half.
+pub struct BufferedCanTx<'d, T: Instance, const TX_BUF_SIZE: usize> {
+    _tx: CanTx<'d, T>,
+    tx_buf: &'static TxBuf<TX_BUF_SIZE>,
+}
+
+impl<'d, T: Instance, const TX_BUF_SIZE: usize> BufferedCanTx<'d, T, TX_BUF_SIZE> {
+    fn new(_tx: CanTx<'d, T>, tx_buf: &'static TxBuf<TX_BUF_SIZE>) -> Self {
+        Self { _tx, tx_buf }.setup()
+    }
+
+    fn setup(self) -> Self {
+        // We don't want interrupts being processed while we change modes.
+        critical_section::with(|_| unsafe {
+            let tx_inner = super::common::ClassicBufferedTxInner {
+                tx_receiver: self.tx_buf.receiver().into(),
+            };
+            T::mut_state().tx_mode = TxMode::Buffered(tx_inner);
+        });
+        self
+    }
+
+    /// Async write frame to TX buffer.
+    pub async fn write(&mut self, frame: &Frame) {
+        self.tx_buf.send(*frame).await;
+        T::TXInterrupt::pend(); // Wake for Tx
+    }
+
+    /// Returns a sender that can be used for sending CAN frames.
+    pub fn writer(&self) -> BufferedCanSender {
+        BufferedCanSender {
+            tx_buf: self.tx_buf.sender().into(),
+            waker: T::TXInterrupt::pend,
+        }
+    }
+}
+
+impl<'d, T: Instance, const TX_BUF_SIZE: usize> Drop for BufferedCanTx<'d, T, TX_BUF_SIZE> {
+    fn drop(&mut self) {
+        critical_section::with(|_| unsafe {
+            T::mut_state().tx_mode = TxMode::NonBuffered(embassy_sync::waitqueue::AtomicWaker::new());
+        });
+    }
+}
+
+/// CAN driver, receive half.
+#[allow(dead_code)]
+pub struct CanRx<'d, T: Instance> {
+    peri: PeripheralRef<'d, T>,
+}
+
+impl<'d, T: Instance> CanRx<'d, T> {
+    /// Read a CAN frame.
+    ///
+    /// If no CAN frame is in the RX buffer, this will wait until there is one.
+    ///
+    /// Returns a tuple of the time the message was received and the message frame
+    pub async fn read(&mut self) -> Result<Envelope, BusError> {
+        T::state().rx_mode.read::<T>().await
+    }
+
+    /// Attempts to read a CAN frame without blocking.
+    ///
+    /// Returns [Err(TryReadError::Empty)] if there are no frames in the rx queue.
+    pub fn try_read(&mut self) -> Result<Envelope, TryReadError> {
+        T::state().rx_mode.try_read::<T>()
+    }
+
+    /// Waits while receive queue is empty.
+    pub async fn wait_not_empty(&mut self) {
+        T::state().rx_mode.wait_not_empty::<T>().await
+    }
+
+    /// Return a buffered instance of driver. User must supply Buffers
+    pub fn buffered<const RX_BUF_SIZE: usize>(
+        self,
+        rxb: &'static mut RxBuf<RX_BUF_SIZE>,
+    ) -> BufferedCanRx<'d, T, RX_BUF_SIZE> {
+        BufferedCanRx::new(self, rxb)
+    }
+}
+
+/// User supplied buffer for RX Buffering
+pub type RxBuf<const BUF_SIZE: usize> = Channel<CriticalSectionRawMutex, Result<Envelope, BusError>, BUF_SIZE>;
+
+/// CAN driver, receive half in Buffered mode.
+pub struct BufferedCanRx<'d, T: Instance, const RX_BUF_SIZE: usize> {
+    _rx: CanRx<'d, T>,
+    rx_buf: &'static RxBuf<RX_BUF_SIZE>,
+}
+
+impl<'d, T: Instance, const RX_BUF_SIZE: usize> BufferedCanRx<'d, T, RX_BUF_SIZE> {
+    fn new(_rx: CanRx<'d, T>, rx_buf: &'static RxBuf<RX_BUF_SIZE>) -> Self {
+        BufferedCanRx { _rx, rx_buf }.setup()
+    }
+
+    fn setup(self) -> Self {
+        // We don't want interrupts being processed while we change modes.
+        critical_section::with(|_| unsafe {
+            let rx_inner = super::common::ClassicBufferedRxInner {
+                rx_sender: self.rx_buf.sender().into(),
+            };
+            T::mut_state().rx_mode = RxMode::Buffered(rx_inner);
+        });
+        self
+    }
+
+    /// Async read frame from RX buffer.
+    pub async fn read(&mut self) -> Result<Envelope, BusError> {
+        self.rx_buf.receive().await
+    }
+
+    /// Attempts to read a CAN frame without blocking.
+    ///
+    /// Returns [Err(TryReadError::Empty)] if there are no frames in the rx queue.
+    pub fn try_read(&mut self) -> Result<Envelope, TryReadError> {
+        match &T::state().rx_mode {
+            RxMode::Buffered(_) => {
+                if let Ok(result) = self.rx_buf.try_receive() {
+                    match result {
+                        Ok(envelope) => Ok(envelope),
+                        Err(e) => Err(TryReadError::BusError(e)),
+                    }
+                } else {
+                    if let Some(err) = Registers(T::regs()).curr_error() {
+                        return Err(TryReadError::BusError(err));
+                    } else {
+                        Err(TryReadError::Empty)
+                    }
+                }
+            }
+            _ => {
+                panic!("Bad Mode")
+            }
+        }
+    }
+
+    /// Waits while receive queue is empty.
+    pub async fn wait_not_empty(&mut self) {
+        poll_fn(|cx| self.rx_buf.poll_ready_to_receive(cx)).await
+    }
+
+    /// Returns a receiver that can be used for receiving CAN frames. Note, each CAN frame will only be received by one receiver.
+    pub fn reader(&self) -> BufferedCanReceiver {
+        self.rx_buf.receiver().into()
+    }
+}
+
+impl<'d, T: Instance, const RX_BUF_SIZE: usize> Drop for BufferedCanRx<'d, T, RX_BUF_SIZE> {
+    fn drop(&mut self) {
+        critical_section::with(|_| unsafe {
+            T::mut_state().rx_mode = RxMode::NonBuffered(embassy_sync::waitqueue::AtomicWaker::new());
+        });
+    }
+}
+
+impl<'d, T: Instance> Drop for Can<'d, T> {
+    fn drop(&mut self) {
+        // Cannot call `free()` because it moves the instance.
+        // Manually reset the peripheral.
+        T::regs().mcr().write(|w| w.set_reset(true));
+        T::disable();
+    }
+}
+
+/// Identifies one of the two receive FIFOs.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum Fifo {
+    /// First receive FIFO
+    Fifo0 = 0,
+    /// Second receive FIFO
+    Fifo1 = 1,
+}
+
+/// Identifies one of the three transmit mailboxes.
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum Mailbox {
+    /// Transmit mailbox 0
+    Mailbox0 = 0,
+    /// Transmit mailbox 1
+    Mailbox1 = 1,
+    /// Transmit mailbox 2
+    Mailbox2 = 2,
+}
+
+/// Contains information about a frame enqueued for transmission via [`Can::transmit`] or
+/// [`Tx::transmit`].
+pub struct TransmitStatus {
+    dequeued_frame: Option<Frame>,
+    mailbox: Mailbox,
+}
+
+impl TransmitStatus {
+    /// Returns the lower-priority frame that was dequeued to make space for the new frame.
+    #[inline]
+    pub fn dequeued_frame(&self) -> Option<&Frame> {
+        self.dequeued_frame.as_ref()
+    }
+
+    /// Returns the [`Mailbox`] the frame was enqueued in.
+    #[inline]
+    pub fn mailbox(&self) -> Mailbox {
+        self.mailbox
+    }
+}
+
+pub(crate) enum RxMode {
+    NonBuffered(AtomicWaker),
+    Buffered(super::common::ClassicBufferedRxInner),
+}
+
+impl RxMode {
+    pub fn on_interrupt<T: Instance>(&self, fifo: RxFifo) {
+        match self {
+            Self::NonBuffered(waker) => {
+                // Disable interrupts until read
+                let fifo_idx = match fifo {
+                    RxFifo::Fifo0 => 0usize,
+                    RxFifo::Fifo1 => 1usize,
+                };
+                T::regs().ier().write(|w| {
+                    w.set_fmpie(fifo_idx, false);
+                });
+                waker.wake();
+            }
+            Self::Buffered(buf) => {
+                loop {
+                    match Registers(T::regs()).receive_fifo(fifo) {
+                        Some(envelope) => {
+                            // NOTE: consensus was reached that if rx_queue is full, packets should be dropped
+                            let _ = buf.rx_sender.try_send(Ok(envelope));
+                        }
+                        None => return,
+                    };
+                }
+            }
+        }
+    }
+
+    pub async fn read<T: Instance>(&self) -> Result<Envelope, BusError> {
+        match self {
+            Self::NonBuffered(waker) => {
+                poll_fn(|cx| {
+                    T::state().err_waker.register(cx.waker());
+                    waker.register(cx.waker());
+                    match self.try_read::<T>() {
+                        Ok(result) => Poll::Ready(Ok(result)),
+                        Err(TryReadError::Empty) => Poll::Pending,
+                        Err(TryReadError::BusError(be)) => Poll::Ready(Err(be)),
+                    }
+                })
+                .await
+            }
+            _ => {
+                panic!("Bad Mode")
+            }
+        }
+    }
+    pub fn try_read<T: Instance>(&self) -> Result<Envelope, TryReadError> {
+        match self {
+            Self::NonBuffered(_) => {
+                let registers = Registers(T::regs());
+                if let Some(msg) = registers.receive_fifo(RxFifo::Fifo0) {
+                    T::regs().ier().write(|w| {
+                        w.set_fmpie(0, true);
+                    });
+                    Ok(msg)
+                } else if let Some(msg) = registers.receive_fifo(RxFifo::Fifo1) {
+                    T::regs().ier().write(|w| {
+                        w.set_fmpie(1, true);
+                    });
+                    Ok(msg)
+                } else if let Some(err) = registers.curr_error() {
+                    Err(TryReadError::BusError(err))
+                } else {
+                    Err(TryReadError::Empty)
+                }
+            }
+            _ => {
+                panic!("Bad Mode")
+            }
+        }
+    }
+    pub async fn wait_not_empty<T: Instance>(&self) {
+        match &T::state().rx_mode {
+            Self::NonBuffered(waker) => {
+                poll_fn(|cx| {
+                    waker.register(cx.waker());
+                    if Registers(T::regs()).receive_frame_available() {
+                        Poll::Ready(())
+                    } else {
+                        Poll::Pending
+                    }
+                })
+                .await
+            }
+            _ => {
+                panic!("Bad Mode")
+            }
+        }
+    }
+}
+
+enum TxMode {
+    NonBuffered(AtomicWaker),
+    Buffered(super::common::ClassicBufferedTxInner),
+}
+
+impl TxMode {
+    pub fn buffer_free<T: Instance>(&self) -> bool {
+        let tsr = T::regs().tsr().read();
+        tsr.tme(Mailbox::Mailbox0.index()) || tsr.tme(Mailbox::Mailbox1.index()) || tsr.tme(Mailbox::Mailbox2.index())
+    }
+    pub fn on_interrupt<T: Instance>(&self) {
+        match &T::state().tx_mode {
+            TxMode::NonBuffered(waker) => waker.wake(),
+            TxMode::Buffered(buf) => {
+                while self.buffer_free::<T>() {
+                    match buf.tx_receiver.try_receive() {
+                        Ok(frame) => {
+                            _ = Registers(T::regs()).transmit(&frame);
+                        }
+                        Err(_) => {
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    fn register(&self, arg: &core::task::Waker) {
+        match self {
+            TxMode::NonBuffered(waker) => {
+                waker.register(arg);
+            }
+            _ => {
+                panic!("Bad mode");
+            }
+        }
+    }
+}
+
+struct State {
+    pub(crate) rx_mode: RxMode,
+    pub(crate) tx_mode: TxMode,
+    pub err_waker: AtomicWaker,
+}
+
+impl State {
+    pub const fn new() -> Self {
+        Self {
+            rx_mode: RxMode::NonBuffered(AtomicWaker::new()),
+            tx_mode: TxMode::NonBuffered(AtomicWaker::new()),
+            err_waker: AtomicWaker::new(),
+        }
+    }
+}
+
+trait SealedInstance {
+    fn regs() -> crate::pac::can::Can;
+    fn state() -> &'static State;
+    unsafe fn mut_state() -> &'static mut State;
+}
+
+/// CAN instance trait.
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + RccPeripheral + 'static {
+    /// TX interrupt for this instance.
+    type TXInterrupt: crate::interrupt::typelevel::Interrupt;
+    /// RX0 interrupt for this instance.
+    type RX0Interrupt: crate::interrupt::typelevel::Interrupt;
+    /// RX1 interrupt for this instance.
+    type RX1Interrupt: crate::interrupt::typelevel::Interrupt;
+    /// SCE interrupt for this instance.
+    type SCEInterrupt: crate::interrupt::typelevel::Interrupt;
+}
+
+/// A bxCAN instance that owns filter banks.
+///
+/// In master-slave-instance setups, only the master instance owns the filter banks, and needs to
+/// split some of them off for use by the slave instance. In that case, the master instance should
+/// implement [`FilterOwner`] and [`MasterInstance`], while the slave instance should only implement
+/// [`Instance`].
+///
+/// In single-instance configurations, the instance owns all filter banks and they can not be split
+/// off. In that case, the instance should implement [`Instance`] and [`FilterOwner`].
+///
+/// # Safety
+///
+/// This trait must only be implemented if the instance does, in fact, own its associated filter
+/// banks, and `NUM_FILTER_BANKS` must be correct.
+pub unsafe trait FilterOwner: Instance {
+    /// The total number of filter banks available to the instance.
+    ///
+    /// This is usually either 14 or 28, and should be specified in the chip's reference manual or datasheet.
+    const NUM_FILTER_BANKS: u8;
+}
+
+/// A bxCAN master instance that shares filter banks with a slave instance.
+///
+/// In master-slave-instance setups, this trait should be implemented for the master instance.
+///
+/// # Safety
+///
+/// This trait must only be implemented when there is actually an associated slave instance.
+pub unsafe trait MasterInstance: FilterOwner {}
+
+foreach_peripheral!(
+    (can, $inst:ident) => {
+        impl SealedInstance for peripherals::$inst {
+
+            fn regs() -> crate::pac::can::Can {
+                crate::pac::$inst
+            }
+
+            unsafe fn mut_state() -> & 'static mut State {
+                static mut STATE: State = State::new();
+                &mut *core::ptr::addr_of_mut!(STATE)
+            }
+            fn state() -> &'static State {
+                unsafe { peripherals::$inst::mut_state() }
+            }
+        }
+
+        impl Instance for peripherals::$inst {
+            type TXInterrupt = crate::_generated::peripheral_interrupts::$inst::TX;
+            type RX0Interrupt = crate::_generated::peripheral_interrupts::$inst::RX0;
+            type RX1Interrupt = crate::_generated::peripheral_interrupts::$inst::RX1;
+            type SCEInterrupt = crate::_generated::peripheral_interrupts::$inst::SCE;
+        }
+    };
+);
+
+foreach_peripheral!(
+    (can, CAN) => {
+        unsafe impl FilterOwner for peripherals::CAN {
+            const NUM_FILTER_BANKS: u8 = 14;
+        }
+    };
+    // CAN1 and CAN2 is a combination of master and slave instance.
+    // CAN1 owns the filter bank and needs to be enabled in order
+    // for CAN2 to receive messages.
+    (can, CAN1) => {
+        cfg_if::cfg_if! {
+            if #[cfg(all(
+                any(stm32l4, stm32f72, stm32f73),
+                not(any(stm32l49, stm32l4a))
+            ))] {
+                // Most L4 devices and some F7 devices use the name "CAN1"
+                // even if there is no "CAN2" peripheral.
+                unsafe impl FilterOwner for peripherals::CAN1 {
+                    const NUM_FILTER_BANKS: u8 = 14;
+                }
+            } else {
+                unsafe impl FilterOwner for peripherals::CAN1 {
+                    const NUM_FILTER_BANKS: u8 = 28;
+                }
+                unsafe impl MasterInstance for peripherals::CAN1 {}
+            }
+        }
+    };
+    (can, CAN3) => {
+        unsafe impl FilterOwner for peripherals::CAN3 {
+            const NUM_FILTER_BANKS: u8 = 14;
+        }
+    };
+);
+
+pin_trait!(RxPin, Instance);
+pin_trait!(TxPin, Instance);
+
+trait Index {
+    fn index(&self) -> usize;
+}
+
+impl Index for Mailbox {
+    fn index(&self) -> usize {
+        match self {
+            Mailbox::Mailbox0 => 0,
+            Mailbox::Mailbox1 => 1,
+            Mailbox::Mailbox2 => 2,
+        }
+    }
+}
diff --git a/embassy-stm32/src/can/bxcan/registers.rs b/embassy-stm32/src/can/bxcan/registers.rs
new file mode 100644
index 000000000..732567797
--- /dev/null
+++ b/embassy-stm32/src/can/bxcan/registers.rs
@@ -0,0 +1,510 @@
+use core::cmp::Ordering;
+use core::convert::Infallible;
+
+pub use embedded_can::{ExtendedId, Id, StandardId};
+use stm32_metapac::can::vals::Lec;
+
+use super::{Mailbox, TransmitStatus};
+use crate::can::enums::BusError;
+use crate::can::frame::{Envelope, Frame, Header};
+
+pub(crate) struct Registers(pub crate::pac::can::Can);
+
+impl Registers {
+    pub fn enter_init_mode(&mut self) {
+        self.0.mcr().modify(|reg| {
+            reg.set_sleep(false);
+            reg.set_inrq(true);
+        });
+        loop {
+            let msr = self.0.msr().read();
+            if !msr.slak() && msr.inak() {
+                break;
+            }
+        }
+    }
+
+    // Leaves initialization mode, enters sleep mode.
+    pub fn leave_init_mode(&mut self) {
+        self.0.mcr().modify(|reg| {
+            reg.set_sleep(true);
+            reg.set_inrq(false);
+        });
+        loop {
+            let msr = self.0.msr().read();
+            if msr.slak() && !msr.inak() {
+                break;
+            }
+        }
+    }
+
+    pub fn set_bit_timing(&mut self, bt: crate::can::util::NominalBitTiming) {
+        let prescaler = u16::from(bt.prescaler) & 0x1FF;
+        let seg1 = u8::from(bt.seg1);
+        let seg2 = u8::from(bt.seg2) & 0x7F;
+        let sync_jump_width = u8::from(bt.sync_jump_width) & 0x7F;
+        self.0.btr().modify(|reg| {
+            reg.set_brp(prescaler - 1);
+            reg.set_ts(0, seg1 - 1);
+            reg.set_ts(1, seg2 - 1);
+            reg.set_sjw(sync_jump_width - 1);
+        });
+    }
+
+    /// Enables or disables silent mode: Disconnects the TX signal from the pin.
+    pub fn set_silent(&self, enabled: bool) {
+        let mode = match enabled {
+            false => stm32_metapac::can::vals::Silm::NORMAL,
+            true => stm32_metapac::can::vals::Silm::SILENT,
+        };
+        self.0.btr().modify(|reg| reg.set_silm(mode));
+    }
+
+    /// Enables or disables automatic retransmission of messages.
+    ///
+    /// If this is enabled, the CAN peripheral will automatically try to retransmit each frame
+    /// until it can be sent. Otherwise, it will try only once to send each frame.
+    ///
+    /// Automatic retransmission is enabled by default.
+    pub fn set_automatic_retransmit(&self, enabled: bool) {
+        self.0.mcr().modify(|reg| reg.set_nart(enabled));
+    }
+
+    /// Enables or disables loopback mode: Internally connects the TX and RX
+    /// signals together.
+    pub fn set_loopback(&self, enabled: bool) {
+        self.0.btr().modify(|reg| reg.set_lbkm(enabled));
+    }
+
+    /// Configures the automatic wake-up feature.
+    ///
+    /// This is turned off by default.
+    ///
+    /// When turned on, an incoming frame will cause the peripheral to wake up from sleep and
+    /// receive the frame. If enabled, [`Interrupt::Wakeup`] will also be triggered by the incoming
+    /// frame.
+    #[allow(dead_code)]
+    pub fn set_automatic_wakeup(&mut self, enabled: bool) {
+        self.0.mcr().modify(|reg| reg.set_awum(enabled));
+    }
+
+    /// Leaves initialization mode and enables the peripheral (non-blocking version).
+    ///
+    /// Usually, it is recommended to call [`CanConfig::enable`] instead. This method is only needed
+    /// if you want non-blocking initialization.
+    ///
+    /// If this returns [`WouldBlock`][nb::Error::WouldBlock], the peripheral will enable itself
+    /// in the background. The peripheral is enabled and ready to use when this method returns
+    /// successfully.
+    pub fn enable_non_blocking(&mut self) -> nb::Result<(), Infallible> {
+        let msr = self.0.msr().read();
+        if msr.slak() {
+            self.0.mcr().modify(|reg| {
+                reg.set_abom(true);
+                reg.set_sleep(false);
+            });
+            Err(nb::Error::WouldBlock)
+        } else {
+            Ok(())
+        }
+    }
+
+    /// Puts the peripheral in a sleep mode to save power.
+    ///
+    /// While in sleep mode, an incoming CAN frame will trigger [`Interrupt::Wakeup`] if enabled.
+    #[allow(dead_code)]
+    pub fn sleep(&mut self) {
+        self.0.mcr().modify(|reg| {
+            reg.set_sleep(true);
+            reg.set_inrq(false);
+        });
+        loop {
+            let msr = self.0.msr().read();
+            if msr.slak() && !msr.inak() {
+                break;
+            }
+        }
+    }
+
+    /// Wakes up from sleep mode.
+    ///
+    /// Note that this will not trigger [`Interrupt::Wakeup`], only reception of an incoming CAN
+    /// frame will cause that interrupt.
+    #[allow(dead_code)]
+    pub fn wakeup(&mut self) {
+        self.0.mcr().modify(|reg| {
+            reg.set_sleep(false);
+            reg.set_inrq(false);
+        });
+        loop {
+            let msr = self.0.msr().read();
+            if !msr.slak() && !msr.inak() {
+                break;
+            }
+        }
+    }
+
+    pub fn curr_error(&self) -> Option<BusError> {
+        let err = { self.0.esr().read() };
+        if err.boff() {
+            return Some(BusError::BusOff);
+        } else if err.epvf() {
+            return Some(BusError::BusPassive);
+        } else if err.ewgf() {
+            return Some(BusError::BusWarning);
+        } else if err.lec() != Lec::NOERROR {
+            return Some(match err.lec() {
+                Lec::STUFF => BusError::Stuff,
+                Lec::FORM => BusError::Form,
+                Lec::ACK => BusError::Acknowledge,
+                Lec::BITRECESSIVE => BusError::BitRecessive,
+                Lec::BITDOMINANT => BusError::BitDominant,
+                Lec::CRC => BusError::Crc,
+                Lec::CUSTOM => BusError::Software,
+                Lec::NOERROR => unreachable!(),
+            });
+        }
+        None
+    }
+
+    /// Puts a CAN frame in a transmit mailbox for transmission on the bus.
+    ///
+    /// Frames are transmitted to the bus based on their priority (see [`FramePriority`]).
+    /// Transmit order is preserved for frames with identical priority.
+    ///
+    /// If all transmit mailboxes are full, and `frame` has a higher priority than the
+    /// lowest-priority message in the transmit mailboxes, transmission of the enqueued frame is
+    /// cancelled and `frame` is enqueued instead. The frame that was replaced is returned as
+    /// [`TransmitStatus::dequeued_frame`].
+    pub fn transmit(&mut self, frame: &Frame) -> nb::Result<TransmitStatus, Infallible> {
+        // Get the index of the next free mailbox or the one with the lowest priority.
+        let tsr = self.0.tsr().read();
+        let idx = tsr.code() as usize;
+
+        let frame_is_pending = !tsr.tme(0) || !tsr.tme(1) || !tsr.tme(2);
+        let pending_frame = if frame_is_pending {
+            // High priority frames are transmitted first by the mailbox system.
+            // Frames with identical identifier shall be transmitted in FIFO order.
+            // The controller schedules pending frames of same priority based on the
+            // mailbox index instead. As a workaround check all pending mailboxes
+            // and only accept higher priority frames.
+            self.check_priority(0, frame.id().into())?;
+            self.check_priority(1, frame.id().into())?;
+            self.check_priority(2, frame.id().into())?;
+
+            let all_frames_are_pending = !tsr.tme(0) && !tsr.tme(1) && !tsr.tme(2);
+            if all_frames_are_pending {
+                // No free mailbox is available. This can only happen when three frames with
+                // ascending priority (descending IDs) were requested for transmission and all
+                // of them are blocked by bus traffic with even higher priority.
+                // To prevent a priority inversion abort and replace the lowest priority frame.
+                self.read_pending_mailbox(idx)
+            } else {
+                // There was a free mailbox.
+                None
+            }
+        } else {
+            // All mailboxes are available: Send frame without performing any checks.
+            None
+        };
+
+        self.write_mailbox(idx, frame);
+
+        let mailbox = match idx {
+            0 => Mailbox::Mailbox0,
+            1 => Mailbox::Mailbox1,
+            2 => Mailbox::Mailbox2,
+            _ => unreachable!(),
+        };
+        Ok(TransmitStatus {
+            dequeued_frame: pending_frame,
+            mailbox,
+        })
+    }
+
+    /// Returns `Ok` when the mailbox is free or if it contains pending frame with a
+    /// lower priority (higher ID) than the identifier `id`.
+    fn check_priority(&self, idx: usize, id: IdReg) -> nb::Result<(), Infallible> {
+        // Read the pending frame's id to check its priority.
+        assert!(idx < 3);
+        let tir = &self.0.tx(idx).tir().read();
+        //let tir = &can.tx[idx].tir.read();
+
+        // Check the priority by comparing the identifiers. But first make sure the
+        // frame has not finished the transmission (`TXRQ` == 0) in the meantime.
+        if tir.txrq() && id <= IdReg::from_register(tir.0) {
+            // There's a mailbox whose priority is higher or equal
+            // the priority of the new frame.
+            return Err(nb::Error::WouldBlock);
+        }
+
+        Ok(())
+    }
+
+    fn write_mailbox(&mut self, idx: usize, frame: &Frame) {
+        debug_assert!(idx < 3);
+
+        let mb = self.0.tx(idx);
+        mb.tdtr().write(|w| w.set_dlc(frame.header().len() as u8));
+
+        mb.tdlr()
+            .write(|w| w.0 = u32::from_ne_bytes(frame.data()[0..4].try_into().unwrap()));
+        mb.tdhr()
+            .write(|w| w.0 = u32::from_ne_bytes(frame.data()[4..8].try_into().unwrap()));
+        let id: IdReg = frame.id().into();
+        mb.tir().write(|w| {
+            w.0 = id.0;
+            w.set_txrq(true);
+        });
+    }
+
+    fn read_pending_mailbox(&mut self, idx: usize) -> Option<Frame> {
+        if self.abort_by_index(idx) {
+            debug_assert!(idx < 3);
+
+            let mb = self.0.tx(idx);
+
+            let id = IdReg(mb.tir().read().0);
+            let mut data = [0xff; 8];
+            data[0..4].copy_from_slice(&mb.tdlr().read().0.to_ne_bytes());
+            data[4..8].copy_from_slice(&mb.tdhr().read().0.to_ne_bytes());
+            let len = mb.tdtr().read().dlc();
+
+            Some(Frame::new(Header::new(id.id(), len, id.rtr()), &data).unwrap())
+        } else {
+            // Abort request failed because the frame was already sent (or being sent) on
+            // the bus. All mailboxes are now free. This can happen for small prescaler
+            // values (e.g. 1MBit/s bit timing with a source clock of 8MHz) or when an ISR
+            // has preempted the execution.
+            None
+        }
+    }
+
+    /// Tries to abort a pending frame. Returns `true` when aborted.
+    fn abort_by_index(&mut self, idx: usize) -> bool {
+        self.0.tsr().write(|reg| reg.set_abrq(idx, true));
+
+        // Wait for the abort request to be finished.
+        loop {
+            let tsr = self.0.tsr().read();
+            if false == tsr.abrq(idx) {
+                break tsr.txok(idx) == false;
+            }
+        }
+    }
+
+    /// Attempts to abort the sending of a frame that is pending in a mailbox.
+    ///
+    /// If there is no frame in the provided mailbox, or its transmission succeeds before it can be
+    /// aborted, this function has no effect and returns `false`.
+    ///
+    /// If there is a frame in the provided mailbox, and it is canceled successfully, this function
+    /// returns `true`.
+    pub fn abort(&mut self, mailbox: Mailbox) -> bool {
+        // If the mailbox is empty, the value of TXOKx depends on what happened with the previous
+        // frame in that mailbox. Only call abort_by_index() if the mailbox is not empty.
+        let tsr = self.0.tsr().read();
+        let mailbox_empty = match mailbox {
+            Mailbox::Mailbox0 => tsr.tme(0),
+            Mailbox::Mailbox1 => tsr.tme(1),
+            Mailbox::Mailbox2 => tsr.tme(2),
+        };
+        if mailbox_empty {
+            false
+        } else {
+            self.abort_by_index(mailbox as usize)
+        }
+    }
+
+    /// Returns `true` if no frame is pending for transmission.
+    pub fn is_idle(&self) -> bool {
+        let tsr = self.0.tsr().read();
+        tsr.tme(0) && tsr.tme(1) && tsr.tme(2)
+    }
+
+    pub fn receive_frame_available(&self) -> bool {
+        if self.0.rfr(0).read().fmp() != 0 {
+            true
+        } else if self.0.rfr(1).read().fmp() != 0 {
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn receive_fifo(&self, fifo: RxFifo) -> Option<Envelope> {
+        // Generate timestamp as early as possible
+        #[cfg(feature = "time")]
+        let ts = embassy_time::Instant::now();
+
+        use crate::pac::can::vals::Ide;
+
+        let fifo_idx = match fifo {
+            RxFifo::Fifo0 => 0usize,
+            RxFifo::Fifo1 => 1usize,
+        };
+        let rfr = self.0.rfr(fifo_idx);
+        let fifo = self.0.rx(fifo_idx);
+
+        // If there are no pending messages, there is nothing to do
+        if rfr.read().fmp() == 0 {
+            return None;
+        }
+
+        let rir = fifo.rir().read();
+        let id: embedded_can::Id = if rir.ide() == Ide::STANDARD {
+            embedded_can::StandardId::new(rir.stid()).unwrap().into()
+        } else {
+            let stid = (rir.stid() & 0x7FF) as u32;
+            let exid = rir.exid() & 0x3FFFF;
+            let id = (stid << 18) | (exid);
+            embedded_can::ExtendedId::new(id).unwrap().into()
+        };
+        let rdtr = fifo.rdtr().read();
+        let data_len = rdtr.dlc();
+        let rtr = rir.rtr() == stm32_metapac::can::vals::Rtr::REMOTE;
+
+        #[cfg(not(feature = "time"))]
+        let ts = rdtr.time();
+
+        let mut data: [u8; 8] = [0; 8];
+        data[0..4].copy_from_slice(&fifo.rdlr().read().0.to_ne_bytes());
+        data[4..8].copy_from_slice(&fifo.rdhr().read().0.to_ne_bytes());
+
+        let frame = Frame::new(Header::new(id, data_len, rtr), &data).unwrap();
+        let envelope = Envelope { ts, frame };
+
+        rfr.modify(|v| v.set_rfom(true));
+
+        Some(envelope)
+    }
+}
+
+/// Identifier of a CAN message.
+///
+/// Can be either a standard identifier (11bit, Range: 0..0x3FF) or a
+/// extendended identifier (29bit , Range: 0..0x1FFFFFFF).
+///
+/// The `Ord` trait can be used to determine the frame’s priority this ID
+/// belongs to.
+/// Lower identifier values have a higher priority. Additionally standard frames
+/// have a higher priority than extended frames and data frames have a higher
+/// priority than remote frames.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub(crate) struct IdReg(u32);
+
+impl IdReg {
+    const STANDARD_SHIFT: u32 = 21;
+
+    const EXTENDED_SHIFT: u32 = 3;
+
+    const IDE_MASK: u32 = 0x0000_0004;
+
+    const RTR_MASK: u32 = 0x0000_0002;
+
+    /// Creates a new standard identifier (11bit, Range: 0..0x7FF)
+    ///
+    /// Panics for IDs outside the allowed range.
+    fn new_standard(id: StandardId) -> Self {
+        Self(u32::from(id.as_raw()) << Self::STANDARD_SHIFT)
+    }
+
+    /// Creates a new extendended identifier (29bit , Range: 0..0x1FFFFFFF).
+    ///
+    /// Panics for IDs outside the allowed range.
+    fn new_extended(id: ExtendedId) -> IdReg {
+        Self(id.as_raw() << Self::EXTENDED_SHIFT | Self::IDE_MASK)
+    }
+
+    fn from_register(reg: u32) -> IdReg {
+        Self(reg & 0xFFFF_FFFE)
+    }
+
+    /// Returns the identifier.
+    fn to_id(self) -> Id {
+        if self.is_extended() {
+            Id::Extended(unsafe { ExtendedId::new_unchecked(self.0 >> Self::EXTENDED_SHIFT) })
+        } else {
+            Id::Standard(unsafe { StandardId::new_unchecked((self.0 >> Self::STANDARD_SHIFT) as u16) })
+        }
+    }
+
+    /// Returns the identifier.
+    fn id(self) -> embedded_can::Id {
+        if self.is_extended() {
+            embedded_can::ExtendedId::new(self.0 >> Self::EXTENDED_SHIFT)
+                .unwrap()
+                .into()
+        } else {
+            embedded_can::StandardId::new((self.0 >> Self::STANDARD_SHIFT) as u16)
+                .unwrap()
+                .into()
+        }
+    }
+
+    /// Returns `true` if the identifier is an extended identifier.
+    fn is_extended(self) -> bool {
+        self.0 & Self::IDE_MASK != 0
+    }
+
+    /// Returns `true` if the identifer is part of a remote frame (RTR bit set).
+    fn rtr(self) -> bool {
+        self.0 & Self::RTR_MASK != 0
+    }
+}
+
+impl From<&embedded_can::Id> for IdReg {
+    fn from(eid: &embedded_can::Id) -> Self {
+        match eid {
+            embedded_can::Id::Standard(id) => IdReg::new_standard(StandardId::new(id.as_raw()).unwrap()),
+            embedded_can::Id::Extended(id) => IdReg::new_extended(ExtendedId::new(id.as_raw()).unwrap()),
+        }
+    }
+}
+
+impl From<IdReg> for embedded_can::Id {
+    fn from(idr: IdReg) -> Self {
+        idr.id()
+    }
+}
+
+/// `IdReg` is ordered by priority.
+impl Ord for IdReg {
+    fn cmp(&self, other: &Self) -> Ordering {
+        // When the IDs match, data frames have priority over remote frames.
+        let rtr = self.rtr().cmp(&other.rtr()).reverse();
+
+        let id_a = self.to_id();
+        let id_b = other.to_id();
+        match (id_a, id_b) {
+            (Id::Standard(a), Id::Standard(b)) => {
+                // Lower IDs have priority over higher IDs.
+                a.as_raw().cmp(&b.as_raw()).reverse().then(rtr)
+            }
+            (Id::Extended(a), Id::Extended(b)) => a.as_raw().cmp(&b.as_raw()).reverse().then(rtr),
+            (Id::Standard(a), Id::Extended(b)) => {
+                // Standard frames have priority over extended frames if their Base IDs match.
+                a.as_raw()
+                    .cmp(&b.standard_id().as_raw())
+                    .reverse()
+                    .then(Ordering::Greater)
+            }
+            (Id::Extended(a), Id::Standard(b)) => {
+                a.standard_id().as_raw().cmp(&b.as_raw()).reverse().then(Ordering::Less)
+            }
+        }
+    }
+}
+
+impl PartialOrd for IdReg {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub(crate) enum RxFifo {
+    Fifo0,
+    Fifo1,
+}
diff --git a/embassy-stm32/src/can/common.rs b/embassy-stm32/src/can/common.rs
new file mode 100644
index 000000000..a54b54f6e
--- /dev/null
+++ b/embassy-stm32/src/can/common.rs
@@ -0,0 +1,52 @@
+use embassy_sync::channel::{DynamicReceiver, DynamicSender};
+
+use super::enums::*;
+use super::frame::*;
+
+pub(crate) struct ClassicBufferedRxInner {
+    pub rx_sender: DynamicSender<'static, Result<Envelope, BusError>>,
+}
+pub(crate) struct ClassicBufferedTxInner {
+    pub tx_receiver: DynamicReceiver<'static, Frame>,
+}
+
+#[cfg(any(can_fdcan_v1, can_fdcan_h7))]
+
+pub(crate) struct FdBufferedRxInner {
+    pub rx_sender: DynamicSender<'static, Result<FdEnvelope, BusError>>,
+}
+
+#[cfg(any(can_fdcan_v1, can_fdcan_h7))]
+pub(crate) struct FdBufferedTxInner {
+    pub tx_receiver: DynamicReceiver<'static, FdFrame>,
+}
+
+/// Sender that can be used for sending CAN frames.
+#[derive(Copy, Clone)]
+pub struct BufferedCanSender {
+    pub(crate) tx_buf: embassy_sync::channel::DynamicSender<'static, Frame>,
+    pub(crate) waker: fn(),
+}
+
+impl BufferedCanSender {
+    /// Async write frame to TX buffer.
+    pub fn try_write(&mut self, frame: Frame) -> Result<(), embassy_sync::channel::TrySendError<Frame>> {
+        self.tx_buf.try_send(frame)?;
+        (self.waker)();
+        Ok(())
+    }
+
+    /// Async write frame to TX buffer.
+    pub async fn write(&mut self, frame: Frame) {
+        self.tx_buf.send(frame).await;
+        (self.waker)();
+    }
+
+    /// Allows a poll_fn to poll until the channel is ready to write
+    pub fn poll_ready_to_send(&self, cx: &mut core::task::Context<'_>) -> core::task::Poll<()> {
+        self.tx_buf.poll_ready_to_send(cx)
+    }
+}
+
+/// Receiver that can be used for receiving CAN frames. Note, each CAN frame will only be received by one receiver.
+pub type BufferedCanReceiver = embassy_sync::channel::DynamicReceiver<'static, Result<Envelope, BusError>>;
diff --git a/embassy-stm32/src/can/enums.rs b/embassy-stm32/src/can/enums.rs
index 36139a45c..4d89c84d1 100644
--- a/embassy-stm32/src/can/enums.rs
+++ b/embassy-stm32/src/can/enums.rs
@@ -28,3 +28,25 @@ pub enum BusError {
     ///  At least one of error counter has reached the Error_Warning limit of 96.
     BusWarning,
 }
+
+/// Frame Create Errors
+#[derive(Debug)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum FrameCreateError {
+    /// Data in header does not match supplied.
+    NotEnoughData,
+    /// Invalid data length not 0-8 for Classic packet or valid for FD.
+    InvalidDataLength,
+    /// Invalid ID.
+    InvalidCanId,
+}
+
+/// Error returned by `try_read`
+#[derive(Debug)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum TryReadError {
+    /// Bus error
+    BusError(BusError),
+    /// Receive buffer is empty
+    Empty,
+}
diff --git a/embassy-stm32/src/can/fd/message_ram/mod.rs b/embassy-stm32/src/can/fd/message_ram/mod.rs
index 830edf3bb..040a999b4 100644
--- a/embassy-stm32/src/can/fd/message_ram/mod.rs
+++ b/embassy-stm32/src/can/fd/message_ram/mod.rs
@@ -140,26 +140,6 @@ pub(crate) struct _TxBufferElement;
 impl generic::Readable for TxBufferElementHeader {}
 impl generic::Writable for TxBufferElementHeader {}
 
-/// FdCan Message RAM instance.
-///
-/// # Safety
-///
-/// It is only safe to implement this trait, when:
-///
-/// * The implementing type has ownership of the Message RAM, preventing any
-///   other accesses to the register block.
-/// * `MSG_RAM` is a pointer to the Message RAM block and can be safely accessed
-/// for as long as ownership or a borrow of the implementing type is present.
-pub unsafe trait Instance {
-    const MSG_RAM: *mut RegisterBlock;
-    fn msg_ram(&self) -> &RegisterBlock {
-        unsafe { &*Self::MSG_RAM }
-    }
-    fn msg_ram_mut(&mut self) -> &mut RegisterBlock {
-        unsafe { &mut *Self::MSG_RAM }
-    }
-}
-
 // Ensure the RegisterBlock is the same size as on pg 1957 of RM0440.
 static_assertions::assert_eq_size!(Filters, [u32; 28 + 16]);
 static_assertions::assert_eq_size!(Receive, [u32; 54]);
diff --git a/embassy-stm32/src/can/fd/peripheral.rs b/embassy-stm32/src/can/fd/peripheral.rs
index 8ec09ac12..e32f19d91 100644
--- a/embassy-stm32/src/can/fd/peripheral.rs
+++ b/embassy-stm32/src/can/fd/peripheral.rs
@@ -30,7 +30,12 @@ impl Registers {
         &mut self.msg_ram_mut().transmit.tbsa[bufidx]
     }
     pub fn msg_ram_mut(&self) -> &mut RegisterBlock {
+        #[cfg(stm32h7)]
+        let ptr = self.msgram.ram(self.msg_ram_offset / 4).as_ptr() as *mut RegisterBlock;
+
+        #[cfg(not(stm32h7))]
         let ptr = self.msgram.as_ptr() as *mut RegisterBlock;
+
         unsafe { &mut (*ptr) }
     }
 
@@ -56,7 +61,10 @@ impl Registers {
         match maybe_header {
             Some((header, ts)) => {
                 let data = &buffer[0..header.len() as usize];
-                Some((F::from_header(header, data)?, ts))
+                match F::from_header(header, data) {
+                    Ok(frame) => Some((frame, ts)),
+                    Err(_) => None,
+                }
             }
             None => None,
         }
@@ -182,7 +190,7 @@ impl Registers {
                 DataLength::Fdcan(len) => len,
                 DataLength::Classic(len) => len,
             };
-            if len as usize > ClassicFrame::MAX_DATA_LEN {
+            if len as usize > ClassicData::MAX_DATA_LEN {
                 return None;
             }
 
@@ -317,17 +325,6 @@ impl Registers {
         */
     }
 
-    /// Disables the CAN interface and returns back the raw peripheral it was created from.
-    #[inline]
-    pub fn free(mut self) {
-        //self.disable_interrupts(Interrupts::all());
-
-        //TODO check this!
-        self.enter_init_mode();
-        self.set_power_down_mode(true);
-        //self.control.instance
-    }
-
     /// Applies the settings of a new FdCanConfig See [`FdCanConfig`]
     #[inline]
     pub fn apply_config(&mut self, config: FdCanConfig) {
@@ -400,66 +397,17 @@ impl Registers {
 
     /// Moves out of ConfigMode and into specified mode
     #[inline]
-    pub fn into_mode(mut self, config: FdCanConfig, mode: crate::can::_version::FdcanOperatingMode) {
+    pub fn into_mode(mut self, config: FdCanConfig, mode: crate::can::_version::OperatingMode) {
         match mode {
-            crate::can::FdcanOperatingMode::InternalLoopbackMode => self.set_loopback_mode(LoopbackMode::Internal),
-            crate::can::FdcanOperatingMode::ExternalLoopbackMode => self.set_loopback_mode(LoopbackMode::External),
-            crate::can::FdcanOperatingMode::NormalOperationMode => self.set_normal_operations(true),
-            crate::can::FdcanOperatingMode::RestrictedOperationMode => self.set_restricted_operations(true),
-            crate::can::FdcanOperatingMode::BusMonitoringMode => self.set_bus_monitoring_mode(true),
+            crate::can::OperatingMode::InternalLoopbackMode => self.set_loopback_mode(LoopbackMode::Internal),
+            crate::can::OperatingMode::ExternalLoopbackMode => self.set_loopback_mode(LoopbackMode::External),
+            crate::can::OperatingMode::NormalOperationMode => self.set_normal_operations(true),
+            crate::can::OperatingMode::RestrictedOperationMode => self.set_restricted_operations(true),
+            crate::can::OperatingMode::BusMonitoringMode => self.set_bus_monitoring_mode(true),
         }
         self.leave_init_mode(config);
     }
 
-    /// Moves out of ConfigMode and into InternalLoopbackMode
-    #[inline]
-    pub fn into_internal_loopback(mut self, config: FdCanConfig) {
-        self.set_loopback_mode(LoopbackMode::Internal);
-        self.leave_init_mode(config);
-    }
-
-    /// Moves out of ConfigMode and into ExternalLoopbackMode
-    #[inline]
-    pub fn into_external_loopback(mut self, config: FdCanConfig) {
-        self.set_loopback_mode(LoopbackMode::External);
-        self.leave_init_mode(config);
-    }
-
-    /// Moves out of ConfigMode and into RestrictedOperationMode
-    #[inline]
-    pub fn into_restricted(mut self, config: FdCanConfig) {
-        self.set_restricted_operations(true);
-        self.leave_init_mode(config);
-    }
-
-    /// Moves out of ConfigMode and into NormalOperationMode
-    #[inline]
-    pub fn into_normal(mut self, config: FdCanConfig) {
-        self.set_normal_operations(true);
-        self.leave_init_mode(config);
-    }
-
-    /// Moves out of ConfigMode and into BusMonitoringMode
-    #[inline]
-    pub fn into_bus_monitoring(mut self, config: FdCanConfig) {
-        self.set_bus_monitoring_mode(true);
-        self.leave_init_mode(config);
-    }
-
-    /// Moves out of ConfigMode and into Testmode
-    #[inline]
-    pub fn into_test_mode(mut self, config: FdCanConfig) {
-        self.set_test_mode(true);
-        self.leave_init_mode(config);
-    }
-
-    /// Moves out of ConfigMode and into PoweredDownmode
-    #[inline]
-    pub fn into_powered_down(mut self, config: FdCanConfig) {
-        self.set_power_down_mode(true);
-        self.leave_init_mode(config);
-    }
-
     /// Configures the bit timings.
     ///
     /// You can use <http://www.bittiming.can-wiki.info/> to calculate the `btr` parameter. Enter
@@ -557,6 +505,7 @@ impl Registers {
 
     /// Configures and resets the timestamp counter
     #[inline]
+    #[allow(unused)]
     pub fn set_timestamp_counter_source(&mut self, select: TimestampSource) {
         #[cfg(stm32h7)]
         let (tcp, tss) = match select {
@@ -634,7 +583,7 @@ impl Registers {
 
         use crate::can::fd::message_ram::*;
         //use fdcan::message_ram::*;
-        let mut offset_words = self.msg_ram_offset as u16;
+        let mut offset_words = (self.msg_ram_offset / 4) as u16;
 
         // 11-bit filter
         r.sidfc().modify(|w| w.set_flssa(offset_words));
diff --git a/embassy-stm32/src/can/fdcan.rs b/embassy-stm32/src/can/fdcan.rs
index fe8969a5a..e31821ca2 100644
--- a/embassy-stm32/src/can/fdcan.rs
+++ b/embassy-stm32/src/can/fdcan.rs
@@ -5,24 +5,24 @@ use core::task::Poll;
 
 use embassy_hal_internal::{into_ref, PeripheralRef};
 use embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
-use embassy_sync::channel::Channel;
+use embassy_sync::channel::{Channel, DynamicReceiver, DynamicSender};
+use embassy_sync::waitqueue::AtomicWaker;
 
 use crate::can::fd::peripheral::Registers;
-use crate::gpio::sealed::AFType;
+use crate::gpio::AFType;
 use crate::interrupt::typelevel::Interrupt;
 use crate::rcc::RccPeripheral;
 use crate::{interrupt, peripherals, Peripheral};
 
-pub mod enums;
 pub(crate) mod fd;
-pub mod frame;
-mod util;
 
-use enums::*;
-use fd::config::*;
-use fd::filter::*;
-pub use fd::{config, filter};
-use frame::*;
+use self::fd::config::*;
+use self::fd::filter::*;
+pub use self::fd::{config, filter};
+pub use super::common::{BufferedCanReceiver, BufferedCanSender};
+use super::enums::*;
+use super::frame::*;
+use super::util;
 
 /// Timestamp for incoming packets. Use Embassy time when enabled.
 #[cfg(feature = "time")]
@@ -53,8 +53,8 @@ impl<T: Instance> interrupt::typelevel::Handler<T::IT0Interrupt> for IT0Interrup
             }
 
             match &T::state().tx_mode {
-                sealed::TxMode::NonBuffered(waker) => waker.wake(),
-                sealed::TxMode::ClassicBuffered(buf) => {
+                TxMode::NonBuffered(waker) => waker.wake(),
+                TxMode::ClassicBuffered(buf) => {
                     if !T::registers().tx_queue_is_full() {
                         match buf.tx_receiver.try_receive() {
                             Ok(frame) => {
@@ -64,7 +64,7 @@ impl<T: Instance> interrupt::typelevel::Handler<T::IT0Interrupt> for IT0Interrup
                         }
                     }
                 }
-                sealed::TxMode::FdBuffered(buf) => {
+                TxMode::FdBuffered(buf) => {
                     if !T::registers().tx_queue_is_full() {
                         match buf.tx_receiver.try_receive() {
                             Ok(frame) => {
@@ -106,7 +106,7 @@ impl<T: Instance> interrupt::typelevel::Handler<T::IT1Interrupt> for IT1Interrup
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 #[cfg_attr(feature = "defmt", derive(defmt::Format))]
 /// Different operating modes
-pub enum FdcanOperatingMode {
+pub enum OperatingMode {
     //PoweredDownMode,
     //ConfigMode,
     /// This mode can be used for a “Hot Selftest”, meaning the FDCAN can be tested without
@@ -144,7 +144,7 @@ pub enum FdcanOperatingMode {
 
 /// FDCAN Configuration instance instance
 /// Create instance of this first
-pub struct FdcanConfigurator<'d, T: Instance> {
+pub struct CanConfigurator<'d, T: Instance> {
     config: crate::can::fd::config::FdCanConfig,
     /// Reference to internals.
     instance: FdcanInstance<'d, T>,
@@ -165,7 +165,7 @@ fn calc_ns_per_timer_tick<T: Instance>(mode: crate::can::fd::config::FrameTransm
     }
 }
 
-impl<'d, T: Instance> FdcanConfigurator<'d, T> {
+impl<'d, T: Instance> CanConfigurator<'d, T> {
     /// Creates a new Fdcan instance, keeping the peripheral in sleep mode.
     /// You must call [Fdcan::enable_non_blocking] to use the peripheral.
     pub fn new(
@@ -175,7 +175,7 @@ impl<'d, T: Instance> FdcanConfigurator<'d, T> {
         _irqs: impl interrupt::typelevel::Binding<T::IT0Interrupt, IT0InterruptHandler<T>>
             + interrupt::typelevel::Binding<T::IT1Interrupt, IT1InterruptHandler<T>>
             + 'd,
-    ) -> FdcanConfigurator<'d, T> {
+    ) -> CanConfigurator<'d, T> {
         into_ref!(peri, rx, tx);
 
         rx.set_as_af(rx.af_num(), AFType::Input);
@@ -269,13 +269,13 @@ impl<'d, T: Instance> FdcanConfigurator<'d, T> {
     }
 
     /// Start in mode.
-    pub fn start(self, mode: FdcanOperatingMode) -> Fdcan<'d, T> {
+    pub fn start(self, mode: OperatingMode) -> Can<'d, T> {
         let ns_per_timer_tick = calc_ns_per_timer_tick::<T>(self.config.frame_transmit);
         critical_section::with(|_| unsafe {
             T::mut_state().ns_per_timer_tick = ns_per_timer_tick;
         });
         T::registers().into_mode(self.config, mode);
-        let ret = Fdcan {
+        let ret = Can {
             config: self.config,
             instance: self.instance,
             _mode: mode,
@@ -284,30 +284,30 @@ impl<'d, T: Instance> FdcanConfigurator<'d, T> {
     }
 
     /// Start, entering mode. Does same as start(mode)
-    pub fn into_normal_mode(self) -> Fdcan<'d, T> {
-        self.start(FdcanOperatingMode::NormalOperationMode)
+    pub fn into_normal_mode(self) -> Can<'d, T> {
+        self.start(OperatingMode::NormalOperationMode)
     }
 
     /// Start, entering mode. Does same as start(mode)
-    pub fn into_internal_loopback_mode(self) -> Fdcan<'d, T> {
-        self.start(FdcanOperatingMode::InternalLoopbackMode)
+    pub fn into_internal_loopback_mode(self) -> Can<'d, T> {
+        self.start(OperatingMode::InternalLoopbackMode)
     }
 
     /// Start, entering mode. Does same as start(mode)
-    pub fn into_external_loopback_mode(self) -> Fdcan<'d, T> {
-        self.start(FdcanOperatingMode::ExternalLoopbackMode)
+    pub fn into_external_loopback_mode(self) -> Can<'d, T> {
+        self.start(OperatingMode::ExternalLoopbackMode)
     }
 }
 
 /// FDCAN Instance
-pub struct Fdcan<'d, T: Instance> {
+pub struct Can<'d, T: Instance> {
     config: crate::can::fd::config::FdCanConfig,
     /// Reference to internals.
     instance: FdcanInstance<'d, T>,
-    _mode: FdcanOperatingMode,
+    _mode: OperatingMode,
 }
 
-impl<'d, T: Instance> Fdcan<'d, T> {
+impl<'d, T: Instance> Can<'d, T> {
     /// Flush one of the TX mailboxes.
     pub async fn flush(&self, idx: usize) {
         poll_fn(|cx| {
@@ -330,12 +330,12 @@ impl<'d, T: Instance> Fdcan<'d, T> {
     /// frame is dropped from the mailbox, it is returned.  If no lower-priority frames
     /// can be replaced, this call asynchronously waits for a frame to be successfully
     /// transmitted, then tries again.
-    pub async fn write(&mut self, frame: &ClassicFrame) -> Option<ClassicFrame> {
+    pub async fn write(&mut self, frame: &Frame) -> Option<Frame> {
         T::state().tx_mode.write::<T>(frame).await
     }
 
     /// Returns the next received message frame
-    pub async fn read(&mut self) -> Result<(ClassicFrame, Timestamp), BusError> {
+    pub async fn read(&mut self) -> Result<Envelope, BusError> {
         T::state().rx_mode.read_classic::<T>().await
     }
 
@@ -348,19 +348,19 @@ impl<'d, T: Instance> Fdcan<'d, T> {
     }
 
     /// Returns the next received message frame
-    pub async fn read_fd(&mut self) -> Result<(FdFrame, Timestamp), BusError> {
+    pub async fn read_fd(&mut self) -> Result<FdEnvelope, BusError> {
         T::state().rx_mode.read_fd::<T>().await
     }
 
     /// Split instance into separate Tx(write) and Rx(read) portions
-    pub fn split(self) -> (FdcanTx<'d, T>, FdcanRx<'d, T>) {
+    pub fn split(self) -> (CanTx<'d, T>, CanRx<'d, T>) {
         (
-            FdcanTx {
+            CanTx {
                 config: self.config,
                 _instance: self.instance,
                 _mode: self._mode,
             },
-            FdcanRx {
+            CanRx {
                 _instance1: PhantomData::<T>,
                 _instance2: T::regs(),
                 _mode: self._mode,
@@ -369,8 +369,8 @@ impl<'d, T: Instance> Fdcan<'d, T> {
     }
 
     /// Join split rx and tx portions back together
-    pub fn join(tx: FdcanTx<'d, T>, rx: FdcanRx<'d, T>) -> Self {
-        Fdcan {
+    pub fn join(tx: CanTx<'d, T>, rx: CanRx<'d, T>) -> Self {
+        Can {
             config: tx.config,
             //_instance2: T::regs(),
             instance: tx._instance,
@@ -398,59 +398,27 @@ impl<'d, T: Instance> Fdcan<'d, T> {
 }
 
 /// User supplied buffer for RX Buffering
-pub type RxBuf<const BUF_SIZE: usize> =
-    Channel<CriticalSectionRawMutex, Result<(ClassicFrame, Timestamp), BusError>, BUF_SIZE>;
+pub type RxBuf<const BUF_SIZE: usize> = Channel<CriticalSectionRawMutex, Result<Envelope, BusError>, BUF_SIZE>;
 
 /// User supplied buffer for TX buffering
-pub type TxBuf<const BUF_SIZE: usize> = Channel<CriticalSectionRawMutex, ClassicFrame, BUF_SIZE>;
+pub type TxBuf<const BUF_SIZE: usize> = Channel<CriticalSectionRawMutex, Frame, BUF_SIZE>;
 
 /// Buffered FDCAN Instance
 pub struct BufferedCan<'d, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize> {
     _instance1: PhantomData<T>,
     _instance2: &'d crate::pac::can::Fdcan,
-    _mode: FdcanOperatingMode,
+    _mode: OperatingMode,
     tx_buf: &'static TxBuf<TX_BUF_SIZE>,
     rx_buf: &'static RxBuf<RX_BUF_SIZE>,
 }
 
-/// Sender that can be used for sending CAN frames.
-#[derive(Copy, Clone)]
-pub struct BufferedCanSender {
-    tx_buf: embassy_sync::channel::DynamicSender<'static, ClassicFrame>,
-    waker: fn(),
-}
-
-impl BufferedCanSender {
-    /// Async write frame to TX buffer.
-    pub fn try_write(&mut self, frame: ClassicFrame) -> Result<(), embassy_sync::channel::TrySendError<ClassicFrame>> {
-        self.tx_buf.try_send(frame)?;
-        (self.waker)();
-        Ok(())
-    }
-
-    /// Async write frame to TX buffer.
-    pub async fn write(&mut self, frame: ClassicFrame) {
-        self.tx_buf.send(frame).await;
-        (self.waker)();
-    }
-
-    /// Allows a poll_fn to poll until the channel is ready to write
-    pub fn poll_ready_to_send(&self, cx: &mut core::task::Context<'_>) -> core::task::Poll<()> {
-        self.tx_buf.poll_ready_to_send(cx)
-    }
-}
-
-/// Receiver that can be used for receiving CAN frames. Note, each CAN frame will only be received by one receiver.
-pub type BufferedCanReceiver =
-    embassy_sync::channel::DynamicReceiver<'static, Result<(ClassicFrame, Timestamp), BusError>>;
-
 impl<'c, 'd, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize>
     BufferedCan<'d, T, TX_BUF_SIZE, RX_BUF_SIZE>
 {
     fn new(
         _instance1: PhantomData<T>,
         _instance2: &'d crate::pac::can::Fdcan,
-        _mode: FdcanOperatingMode,
+        _mode: OperatingMode,
         tx_buf: &'static TxBuf<TX_BUF_SIZE>,
         rx_buf: &'static RxBuf<RX_BUF_SIZE>,
     ) -> Self {
@@ -467,26 +435,26 @@ impl<'c, 'd, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize>
     fn setup(self) -> Self {
         // We don't want interrupts being processed while we change modes.
         critical_section::with(|_| unsafe {
-            let rx_inner = sealed::ClassicBufferedRxInner {
+            let rx_inner = super::common::ClassicBufferedRxInner {
                 rx_sender: self.rx_buf.sender().into(),
             };
-            let tx_inner = sealed::ClassicBufferedTxInner {
+            let tx_inner = super::common::ClassicBufferedTxInner {
                 tx_receiver: self.tx_buf.receiver().into(),
             };
-            T::mut_state().rx_mode = sealed::RxMode::ClassicBuffered(rx_inner);
-            T::mut_state().tx_mode = sealed::TxMode::ClassicBuffered(tx_inner);
+            T::mut_state().rx_mode = RxMode::ClassicBuffered(rx_inner);
+            T::mut_state().tx_mode = TxMode::ClassicBuffered(tx_inner);
         });
         self
     }
 
     /// Async write frame to TX buffer.
-    pub async fn write(&mut self, frame: ClassicFrame) {
+    pub async fn write(&mut self, frame: Frame) {
         self.tx_buf.send(frame).await;
         T::IT0Interrupt::pend(); // Wake for Tx
     }
 
     /// Async read frame from RX buffer.
-    pub async fn read(&mut self) -> Result<(ClassicFrame, Timestamp), BusError> {
+    pub async fn read(&mut self) -> Result<Envelope, BusError> {
         self.rx_buf.receive().await
     }
 
@@ -509,15 +477,14 @@ impl<'c, 'd, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize> Dr
 {
     fn drop(&mut self) {
         critical_section::with(|_| unsafe {
-            T::mut_state().rx_mode = sealed::RxMode::NonBuffered(embassy_sync::waitqueue::AtomicWaker::new());
-            T::mut_state().tx_mode = sealed::TxMode::NonBuffered(embassy_sync::waitqueue::AtomicWaker::new());
+            T::mut_state().rx_mode = RxMode::NonBuffered(embassy_sync::waitqueue::AtomicWaker::new());
+            T::mut_state().tx_mode = TxMode::NonBuffered(embassy_sync::waitqueue::AtomicWaker::new());
         });
     }
 }
 
 /// User supplied buffer for RX Buffering
-pub type RxFdBuf<const BUF_SIZE: usize> =
-    Channel<CriticalSectionRawMutex, Result<(FdFrame, Timestamp), BusError>, BUF_SIZE>;
+pub type RxFdBuf<const BUF_SIZE: usize> = Channel<CriticalSectionRawMutex, Result<FdEnvelope, BusError>, BUF_SIZE>;
 
 /// User supplied buffer for TX buffering
 pub type TxFdBuf<const BUF_SIZE: usize> = Channel<CriticalSectionRawMutex, FdFrame, BUF_SIZE>;
@@ -526,7 +493,7 @@ pub type TxFdBuf<const BUF_SIZE: usize> = Channel<CriticalSectionRawMutex, FdFra
 pub struct BufferedCanFd<'d, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize> {
     _instance1: PhantomData<T>,
     _instance2: &'d crate::pac::can::Fdcan,
-    _mode: FdcanOperatingMode,
+    _mode: OperatingMode,
     tx_buf: &'static TxFdBuf<TX_BUF_SIZE>,
     rx_buf: &'static RxFdBuf<RX_BUF_SIZE>,
 }
@@ -534,7 +501,7 @@ pub struct BufferedCanFd<'d, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF
 /// Sender that can be used for sending CAN frames.
 #[derive(Copy, Clone)]
 pub struct BufferedFdCanSender {
-    tx_buf: embassy_sync::channel::DynamicSender<'static, FdFrame>,
+    tx_buf: DynamicSender<'static, FdFrame>,
     waker: fn(),
 }
 
@@ -559,8 +526,7 @@ impl BufferedFdCanSender {
 }
 
 /// Receiver that can be used for receiving CAN frames. Note, each CAN frame will only be received by one receiver.
-pub type BufferedFdCanReceiver =
-    embassy_sync::channel::DynamicReceiver<'static, Result<(FdFrame, Timestamp), BusError>>;
+pub type BufferedFdCanReceiver = DynamicReceiver<'static, Result<FdEnvelope, BusError>>;
 
 impl<'c, 'd, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize>
     BufferedCanFd<'d, T, TX_BUF_SIZE, RX_BUF_SIZE>
@@ -568,7 +534,7 @@ impl<'c, 'd, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize>
     fn new(
         _instance1: PhantomData<T>,
         _instance2: &'d crate::pac::can::Fdcan,
-        _mode: FdcanOperatingMode,
+        _mode: OperatingMode,
         tx_buf: &'static TxFdBuf<TX_BUF_SIZE>,
         rx_buf: &'static RxFdBuf<RX_BUF_SIZE>,
     ) -> Self {
@@ -585,14 +551,14 @@ impl<'c, 'd, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize>
     fn setup(self) -> Self {
         // We don't want interrupts being processed while we change modes.
         critical_section::with(|_| unsafe {
-            let rx_inner = sealed::FdBufferedRxInner {
+            let rx_inner = super::common::FdBufferedRxInner {
                 rx_sender: self.rx_buf.sender().into(),
             };
-            let tx_inner = sealed::FdBufferedTxInner {
+            let tx_inner = super::common::FdBufferedTxInner {
                 tx_receiver: self.tx_buf.receiver().into(),
             };
-            T::mut_state().rx_mode = sealed::RxMode::FdBuffered(rx_inner);
-            T::mut_state().tx_mode = sealed::TxMode::FdBuffered(tx_inner);
+            T::mut_state().rx_mode = RxMode::FdBuffered(rx_inner);
+            T::mut_state().tx_mode = TxMode::FdBuffered(tx_inner);
         });
         self
     }
@@ -604,7 +570,7 @@ impl<'c, 'd, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize>
     }
 
     /// Async read frame from RX buffer.
-    pub async fn read(&mut self) -> Result<(FdFrame, Timestamp), BusError> {
+    pub async fn read(&mut self) -> Result<FdEnvelope, BusError> {
         self.rx_buf.receive().await
     }
 
@@ -627,32 +593,32 @@ impl<'c, 'd, T: Instance, const TX_BUF_SIZE: usize, const RX_BUF_SIZE: usize> Dr
 {
     fn drop(&mut self) {
         critical_section::with(|_| unsafe {
-            T::mut_state().rx_mode = sealed::RxMode::NonBuffered(embassy_sync::waitqueue::AtomicWaker::new());
-            T::mut_state().tx_mode = sealed::TxMode::NonBuffered(embassy_sync::waitqueue::AtomicWaker::new());
+            T::mut_state().rx_mode = RxMode::NonBuffered(embassy_sync::waitqueue::AtomicWaker::new());
+            T::mut_state().tx_mode = TxMode::NonBuffered(embassy_sync::waitqueue::AtomicWaker::new());
         });
     }
 }
 
 /// FDCAN Rx only Instance
-pub struct FdcanRx<'d, T: Instance> {
+pub struct CanRx<'d, T: Instance> {
     _instance1: PhantomData<T>,
     _instance2: &'d crate::pac::can::Fdcan,
-    _mode: FdcanOperatingMode,
+    _mode: OperatingMode,
 }
 
 /// FDCAN Tx only Instance
-pub struct FdcanTx<'d, T: Instance> {
+pub struct CanTx<'d, T: Instance> {
     config: crate::can::fd::config::FdCanConfig,
     _instance: FdcanInstance<'d, T>, //(PeripheralRef<'a, T>);
-    _mode: FdcanOperatingMode,
+    _mode: OperatingMode,
 }
 
-impl<'c, 'd, T: Instance> FdcanTx<'d, T> {
+impl<'c, 'd, T: Instance> CanTx<'d, T> {
     /// Queues the message to be sent but exerts backpressure.  If a lower-priority
     /// frame is dropped from the mailbox, it is returned.  If no lower-priority frames
     /// can be replaced, this call asynchronously waits for a frame to be successfully
     /// transmitted, then tries again.
-    pub async fn write(&mut self, frame: &ClassicFrame) -> Option<ClassicFrame> {
+    pub async fn write(&mut self, frame: &Frame) -> Option<Frame> {
         T::state().tx_mode.write::<T>(frame).await
     }
 
@@ -665,204 +631,216 @@ impl<'c, 'd, T: Instance> FdcanTx<'d, T> {
     }
 }
 
-impl<'c, 'd, T: Instance> FdcanRx<'d, T> {
+impl<'c, 'd, T: Instance> CanRx<'d, T> {
     /// Returns the next received message frame
-    pub async fn read(&mut self) -> Result<(ClassicFrame, Timestamp), BusError> {
+    pub async fn read(&mut self) -> Result<Envelope, BusError> {
         T::state().rx_mode.read_classic::<T>().await
     }
 
     /// Returns the next received message frame
-    pub async fn read_fd(&mut self) -> Result<(FdFrame, Timestamp), BusError> {
+    pub async fn read_fd(&mut self) -> Result<FdEnvelope, BusError> {
         T::state().rx_mode.read_fd::<T>().await
     }
 }
 
-pub(crate) mod sealed {
-    use core::future::poll_fn;
-    use core::task::Poll;
+enum RxMode {
+    NonBuffered(AtomicWaker),
+    ClassicBuffered(super::common::ClassicBufferedRxInner),
+    FdBuffered(super::common::FdBufferedRxInner),
+}
 
-    use embassy_sync::channel::{DynamicReceiver, DynamicSender};
-    use embassy_sync::waitqueue::AtomicWaker;
-
-    use super::CanHeader;
-    use crate::can::_version::{BusError, Timestamp};
-    use crate::can::frame::{ClassicFrame, FdFrame};
-
-    pub struct ClassicBufferedRxInner {
-        pub rx_sender: DynamicSender<'static, Result<(ClassicFrame, Timestamp), BusError>>,
-    }
-    pub struct ClassicBufferedTxInner {
-        pub tx_receiver: DynamicReceiver<'static, ClassicFrame>,
-    }
-
-    pub struct FdBufferedRxInner {
-        pub rx_sender: DynamicSender<'static, Result<(FdFrame, Timestamp), BusError>>,
-    }
-    pub struct FdBufferedTxInner {
-        pub tx_receiver: DynamicReceiver<'static, FdFrame>,
-    }
-
-    pub enum RxMode {
-        NonBuffered(AtomicWaker),
-        ClassicBuffered(ClassicBufferedRxInner),
-        FdBuffered(FdBufferedRxInner),
-    }
-
-    impl RxMode {
-        pub fn register(&self, arg: &core::task::Waker) {
-            match self {
-                RxMode::NonBuffered(waker) => waker.register(arg),
-                _ => {
-                    panic!("Bad Mode")
-                }
-            }
-        }
-
-        pub fn on_interrupt<T: Instance>(&self, fifonr: usize) {
-            T::regs().ir().write(|w| w.set_rfn(fifonr, true));
-            match self {
-                RxMode::NonBuffered(waker) => {
-                    waker.wake();
-                }
-                RxMode::ClassicBuffered(buf) => {
-                    if let Some(result) = self.read::<T, _>() {
-                        let _ = buf.rx_sender.try_send(result);
-                    }
-                }
-                RxMode::FdBuffered(buf) => {
-                    if let Some(result) = self.read::<T, _>() {
-                        let _ = buf.rx_sender.try_send(result);
-                    }
-                }
-            }
-        }
-
-        fn read<T: Instance, F: CanHeader>(&self) -> Option<Result<(F, Timestamp), BusError>> {
-            if let Some((msg, ts)) = T::registers().read(0) {
-                let ts = T::calc_timestamp(T::state().ns_per_timer_tick, ts);
-                Some(Ok((msg, ts)))
-            } else if let Some((msg, ts)) = T::registers().read(1) {
-                let ts = T::calc_timestamp(T::state().ns_per_timer_tick, ts);
-                Some(Ok((msg, ts)))
-            } else if let Some(err) = T::registers().curr_error() {
-                // TODO: this is probably wrong
-                Some(Err(err))
-            } else {
-                None
-            }
-        }
-
-        async fn read_async<T: Instance, F: CanHeader>(&self) -> Result<(F, Timestamp), BusError> {
-            poll_fn(|cx| {
-                T::state().err_waker.register(cx.waker());
-                self.register(cx.waker());
-                match self.read::<T, _>() {
-                    Some(result) => Poll::Ready(result),
-                    None => Poll::Pending,
-                }
-            })
-            .await
-        }
-
-        pub async fn read_classic<T: Instance>(&self) -> Result<(ClassicFrame, Timestamp), BusError> {
-            self.read_async::<T, _>().await
-        }
-
-        pub async fn read_fd<T: Instance>(&self) -> Result<(FdFrame, Timestamp), BusError> {
-            self.read_async::<T, _>().await
-        }
-    }
-
-    pub enum TxMode {
-        NonBuffered(AtomicWaker),
-        ClassicBuffered(ClassicBufferedTxInner),
-        FdBuffered(FdBufferedTxInner),
-    }
-
-    impl TxMode {
-        pub fn register(&self, arg: &core::task::Waker) {
-            match self {
-                TxMode::NonBuffered(waker) => {
-                    waker.register(arg);
-                }
-                _ => {
-                    panic!("Bad mode");
-                }
-            }
-        }
-
-        /// Queues the message to be sent but exerts backpressure.  If a lower-priority
-        /// frame is dropped from the mailbox, it is returned.  If no lower-priority frames
-        /// can be replaced, this call asynchronously waits for a frame to be successfully
-        /// transmitted, then tries again.
-        async fn write_generic<T: Instance, F: embedded_can::Frame + CanHeader>(&self, frame: &F) -> Option<F> {
-            poll_fn(|cx| {
-                self.register(cx.waker());
-
-                if let Ok(dropped) = T::registers().write(frame) {
-                    return Poll::Ready(dropped);
-                }
-
-                // Couldn't replace any lower priority frames.  Need to wait for some mailboxes
-                // to clear.
-                Poll::Pending
-            })
-            .await
-        }
-
-        /// Queues the message to be sent but exerts backpressure.  If a lower-priority
-        /// frame is dropped from the mailbox, it is returned.  If no lower-priority frames
-        /// can be replaced, this call asynchronously waits for a frame to be successfully
-        /// transmitted, then tries again.
-        pub async fn write<T: Instance>(&self, frame: &ClassicFrame) -> Option<ClassicFrame> {
-            self.write_generic::<T, _>(frame).await
-        }
-
-        /// Queues the message to be sent but exerts backpressure.  If a lower-priority
-        /// frame is dropped from the mailbox, it is returned.  If no lower-priority frames
-        /// can be replaced, this call asynchronously waits for a frame to be successfully
-        /// transmitted, then tries again.
-        pub async fn write_fd<T: Instance>(&self, frame: &FdFrame) -> Option<FdFrame> {
-            self.write_generic::<T, _>(frame).await
-        }
-    }
-
-    pub struct State {
-        pub rx_mode: RxMode,
-        pub tx_mode: TxMode,
-        pub ns_per_timer_tick: u64,
-
-        pub err_waker: AtomicWaker,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                rx_mode: RxMode::NonBuffered(AtomicWaker::new()),
-                tx_mode: TxMode::NonBuffered(AtomicWaker::new()),
-                ns_per_timer_tick: 0,
-                err_waker: AtomicWaker::new(),
+impl RxMode {
+    fn register(&self, arg: &core::task::Waker) {
+        match self {
+            RxMode::NonBuffered(waker) => waker.register(arg),
+            _ => {
+                panic!("Bad Mode")
             }
         }
     }
 
-    pub trait Instance {
-        const MSG_RAM_OFFSET: usize;
+    fn on_interrupt<T: Instance>(&self, fifonr: usize) {
+        T::regs().ir().write(|w| w.set_rfn(fifonr, true));
+        match self {
+            RxMode::NonBuffered(waker) => {
+                waker.wake();
+            }
+            RxMode::ClassicBuffered(buf) => {
+                if let Some(result) = self.try_read::<T>() {
+                    let _ = buf.rx_sender.try_send(result);
+                }
+            }
+            RxMode::FdBuffered(buf) => {
+                if let Some(result) = self.try_read_fd::<T>() {
+                    let _ = buf.rx_sender.try_send(result);
+                }
+            }
+        }
+    }
 
-        fn regs() -> &'static crate::pac::can::Fdcan;
-        fn registers() -> crate::can::fd::peripheral::Registers;
-        fn ram() -> &'static crate::pac::fdcanram::Fdcanram;
-        fn state() -> &'static State;
-        unsafe fn mut_state() -> &'static mut State;
-        fn calc_timestamp(ns_per_timer_tick: u64, ts_val: u16) -> Timestamp;
+    //async fn read_classic<T: Instance>(&self) -> Result<Envelope, BusError> {
+    fn try_read<T: Instance>(&self) -> Option<Result<Envelope, BusError>> {
+        if let Some((frame, ts)) = T::registers().read(0) {
+            let ts = T::calc_timestamp(T::state().ns_per_timer_tick, ts);
+            Some(Ok(Envelope { ts, frame }))
+        } else if let Some((frame, ts)) = T::registers().read(1) {
+            let ts = T::calc_timestamp(T::state().ns_per_timer_tick, ts);
+            Some(Ok(Envelope { ts, frame }))
+        } else if let Some(err) = T::registers().curr_error() {
+            // TODO: this is probably wrong
+            Some(Err(err))
+        } else {
+            None
+        }
+    }
+
+    //async fn read_classic<T: Instance>(&self) -> Result<Envelope, BusError> {
+    fn try_read_fd<T: Instance>(&self) -> Option<Result<FdEnvelope, BusError>> {
+        if let Some((frame, ts)) = T::registers().read(0) {
+            let ts = T::calc_timestamp(T::state().ns_per_timer_tick, ts);
+            Some(Ok(FdEnvelope { ts, frame }))
+        } else if let Some((frame, ts)) = T::registers().read(1) {
+            let ts = T::calc_timestamp(T::state().ns_per_timer_tick, ts);
+            Some(Ok(FdEnvelope { ts, frame }))
+        } else if let Some(err) = T::registers().curr_error() {
+            // TODO: this is probably wrong
+            Some(Err(err))
+        } else {
+            None
+        }
+    }
+
+    fn read<T: Instance, F: CanHeader>(&self) -> Option<Result<(F, Timestamp), BusError>> {
+        if let Some((msg, ts)) = T::registers().read(0) {
+            let ts = T::calc_timestamp(T::state().ns_per_timer_tick, ts);
+            Some(Ok((msg, ts)))
+        } else if let Some((msg, ts)) = T::registers().read(1) {
+            let ts = T::calc_timestamp(T::state().ns_per_timer_tick, ts);
+            Some(Ok((msg, ts)))
+        } else if let Some(err) = T::registers().curr_error() {
+            // TODO: this is probably wrong
+            Some(Err(err))
+        } else {
+            None
+        }
+    }
+
+    async fn read_async<T: Instance, F: CanHeader>(&self) -> Result<(F, Timestamp), BusError> {
+        poll_fn(|cx| {
+            T::state().err_waker.register(cx.waker());
+            self.register(cx.waker());
+            match self.read::<T, _>() {
+                Some(result) => Poll::Ready(result),
+                None => Poll::Pending,
+            }
+        })
+        .await
+    }
+
+    async fn read_classic<T: Instance>(&self) -> Result<Envelope, BusError> {
+        match self.read_async::<T, _>().await {
+            Ok((frame, ts)) => Ok(Envelope { ts, frame }),
+            Err(e) => Err(e),
+        }
+    }
+
+    async fn read_fd<T: Instance>(&self) -> Result<FdEnvelope, BusError> {
+        match self.read_async::<T, _>().await {
+            Ok((frame, ts)) => Ok(FdEnvelope { ts, frame }),
+            Err(e) => Err(e),
+        }
     }
 }
 
+enum TxMode {
+    NonBuffered(AtomicWaker),
+    ClassicBuffered(super::common::ClassicBufferedTxInner),
+    FdBuffered(super::common::FdBufferedTxInner),
+}
+
+impl TxMode {
+    fn register(&self, arg: &core::task::Waker) {
+        match self {
+            TxMode::NonBuffered(waker) => {
+                waker.register(arg);
+            }
+            _ => {
+                panic!("Bad mode");
+            }
+        }
+    }
+
+    /// Queues the message to be sent but exerts backpressure.  If a lower-priority
+    /// frame is dropped from the mailbox, it is returned.  If no lower-priority frames
+    /// can be replaced, this call asynchronously waits for a frame to be successfully
+    /// transmitted, then tries again.
+    async fn write_generic<T: Instance, F: embedded_can::Frame + CanHeader>(&self, frame: &F) -> Option<F> {
+        poll_fn(|cx| {
+            self.register(cx.waker());
+
+            if let Ok(dropped) = T::registers().write(frame) {
+                return Poll::Ready(dropped);
+            }
+
+            // Couldn't replace any lower priority frames.  Need to wait for some mailboxes
+            // to clear.
+            Poll::Pending
+        })
+        .await
+    }
+
+    /// Queues the message to be sent but exerts backpressure.  If a lower-priority
+    /// frame is dropped from the mailbox, it is returned.  If no lower-priority frames
+    /// can be replaced, this call asynchronously waits for a frame to be successfully
+    /// transmitted, then tries again.
+    async fn write<T: Instance>(&self, frame: &Frame) -> Option<Frame> {
+        self.write_generic::<T, _>(frame).await
+    }
+
+    /// Queues the message to be sent but exerts backpressure.  If a lower-priority
+    /// frame is dropped from the mailbox, it is returned.  If no lower-priority frames
+    /// can be replaced, this call asynchronously waits for a frame to be successfully
+    /// transmitted, then tries again.
+    async fn write_fd<T: Instance>(&self, frame: &FdFrame) -> Option<FdFrame> {
+        self.write_generic::<T, _>(frame).await
+    }
+}
+
+struct State {
+    pub rx_mode: RxMode,
+    pub tx_mode: TxMode,
+    pub ns_per_timer_tick: u64,
+
+    pub err_waker: AtomicWaker,
+}
+
+impl State {
+    const fn new() -> Self {
+        Self {
+            rx_mode: RxMode::NonBuffered(AtomicWaker::new()),
+            tx_mode: TxMode::NonBuffered(AtomicWaker::new()),
+            ns_per_timer_tick: 0,
+            err_waker: AtomicWaker::new(),
+        }
+    }
+}
+
+trait SealedInstance {
+    const MSG_RAM_OFFSET: usize;
+
+    fn regs() -> &'static crate::pac::can::Fdcan;
+    fn registers() -> crate::can::fd::peripheral::Registers;
+    fn state() -> &'static State;
+    unsafe fn mut_state() -> &'static mut State;
+    fn calc_timestamp(ns_per_timer_tick: u64, ts_val: u16) -> Timestamp;
+}
+
 /// Instance trait
-pub trait Instance: sealed::Instance + RccPeripheral + 'static {
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + RccPeripheral + 'static {
     /// Interrupt 0
     type IT0Interrupt: crate::interrupt::typelevel::Interrupt;
-    /// Interrupt 0
+    /// Interrupt 1
     type IT1Interrupt: crate::interrupt::typelevel::Interrupt;
 }
 
@@ -871,7 +849,7 @@ pub struct FdcanInstance<'a, T>(PeripheralRef<'a, T>);
 
 macro_rules! impl_fdcan {
     ($inst:ident, $msg_ram_inst:ident, $msg_ram_offset:literal) => {
-        impl sealed::Instance for peripherals::$inst {
+        impl SealedInstance for peripherals::$inst {
             const MSG_RAM_OFFSET: usize = $msg_ram_offset;
 
             fn regs() -> &'static crate::pac::can::Fdcan {
@@ -880,14 +858,11 @@ macro_rules! impl_fdcan {
             fn registers() -> Registers {
                 Registers{regs: &crate::pac::$inst, msgram: &crate::pac::$msg_ram_inst, msg_ram_offset: Self::MSG_RAM_OFFSET}
             }
-            fn ram() -> &'static crate::pac::fdcanram::Fdcanram {
-                &crate::pac::$msg_ram_inst
+            unsafe fn mut_state() -> &'static mut State {
+                static mut STATE: State = State::new();
+                &mut *core::ptr::addr_of_mut!(STATE)
             }
-            unsafe fn mut_state() -> & 'static mut sealed::State {
-                static mut STATE: sealed::State = sealed::State::new();
-                & mut STATE
-            }
-            fn state() -> &'static sealed::State {
+            fn state() -> &'static State {
                 unsafe { peripherals::$inst::mut_state() }
             }
 
diff --git a/embassy-stm32/src/can/frame.rs b/embassy-stm32/src/can/frame.rs
index 9c293035d..d2d1f7aa6 100644
--- a/embassy-stm32/src/can/frame.rs
+++ b/embassy-stm32/src/can/frame.rs
@@ -1,6 +1,16 @@
 //! Definition for CAN Frames
 use bit_field::BitField;
 
+use crate::can::enums::FrameCreateError;
+
+/// Calculate proper timestamp when available.
+#[cfg(feature = "time")]
+pub type Timestamp = embassy_time::Instant;
+
+/// Raw register timestamp
+#[cfg(not(feature = "time"))]
+pub type Timestamp = u16;
+
 /// CAN Header, without meta data
 #[derive(Debug, Copy, Clone)]
 pub struct Header {
@@ -9,6 +19,20 @@ pub struct Header {
     flags: u8,
 }
 
+#[cfg(feature = "defmt")]
+impl defmt::Format for Header {
+    fn format(&self, fmt: defmt::Formatter<'_>) {
+        match self.id() {
+            embedded_can::Id::Standard(id) => {
+                defmt::write!(fmt, "Can Standard ID={:x} len={}", id.as_raw(), self.len,)
+            }
+            embedded_can::Id::Extended(id) => {
+                defmt::write!(fmt, "Can Extended ID={:x} len={}", id.as_raw(), self.len,)
+            }
+        }
+    }
+}
+
 impl Header {
     const FLAG_RTR: usize = 0; // Remote
     const FLAG_FDCAN: usize = 1; // FDCan vs Classic CAN
@@ -54,13 +78,21 @@ impl Header {
     pub fn bit_rate_switching(&self) -> bool {
         self.flags.get_bit(Self::FLAG_BRS)
     }
+
+    /// Get priority of frame
+    pub(crate) fn priority(&self) -> u32 {
+        match self.id() {
+            embedded_can::Id::Standard(id) => (id.as_raw() as u32) << 18,
+            embedded_can::Id::Extended(id) => id.as_raw(),
+        }
+    }
 }
 
 /// Trait for FDCAN frame types, providing ability to construct from a Header
 /// and to retrieve the Header from a frame
 pub trait CanHeader: Sized {
     /// Construct frame from header and payload
-    fn from_header(header: Header, data: &[u8]) -> Option<Self>;
+    fn from_header(header: Header, data: &[u8]) -> Result<Self, FrameCreateError>;
 
     /// Get this frame's header struct
     fn header(&self) -> &Header;
@@ -70,24 +102,26 @@ pub trait CanHeader: Sized {
 ///
 /// Contains 0 to 8 Bytes of data.
 #[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
 pub struct ClassicData {
-    pub(crate) bytes: [u8; 8],
+    pub(crate) bytes: [u8; Self::MAX_DATA_LEN],
 }
 
 impl ClassicData {
+    pub(crate) const MAX_DATA_LEN: usize = 8;
     /// Creates a data payload from a raw byte slice.
     ///
     /// Returns `None` if `data` is more than 64 bytes (which is the maximum) or
     /// cannot be represented with an FDCAN DLC.
-    pub fn new(data: &[u8]) -> Option<Self> {
-        if !FdData::is_valid_len(data.len()) {
-            return None;
+    pub fn new(data: &[u8]) -> Result<Self, FrameCreateError> {
+        if data.len() > 8 {
+            return Err(FrameCreateError::InvalidDataLength);
         }
 
         let mut bytes = [0; 8];
         bytes[..data.len()].copy_from_slice(data);
 
-        Some(Self { bytes })
+        Ok(Self { bytes })
     }
 
     /// Raw read access to data.
@@ -110,60 +144,53 @@ impl ClassicData {
     }
 }
 
-/// Frame with up to 8 bytes of data payload as per Classic CAN
+/// Frame with up to 8 bytes of data payload as per Classic(non-FD) CAN
+/// For CAN-FD support use FdFrame
 #[derive(Debug, Copy, Clone)]
-pub struct ClassicFrame {
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub struct Frame {
     can_header: Header,
     data: ClassicData,
 }
 
-impl ClassicFrame {
-    pub(crate) const MAX_DATA_LEN: usize = 8;
-
+impl Frame {
     /// Create a new CAN classic Frame
-    pub fn new(can_header: Header, data: ClassicData) -> ClassicFrame {
-        ClassicFrame { can_header, data }
+    pub fn new(can_header: Header, raw_data: &[u8]) -> Result<Self, FrameCreateError> {
+        let data = ClassicData::new(raw_data)?;
+        Ok(Frame { can_header, data: data })
+    }
+
+    /// Creates a new data frame.
+    pub fn new_data(id: impl Into<embedded_can::Id>, data: &[u8]) -> Result<Self, FrameCreateError> {
+        let eid: embedded_can::Id = id.into();
+        let header = Header::new(eid, data.len() as u8, false);
+        Self::new(header, data)
     }
 
     /// Create new extended frame
-    pub fn new_extended(raw_id: u32, raw_data: &[u8]) -> Option<Self> {
+    pub fn new_extended(raw_id: u32, raw_data: &[u8]) -> Result<Self, FrameCreateError> {
         if let Some(id) = embedded_can::ExtendedId::new(raw_id) {
-            match ClassicData::new(raw_data) {
-                Some(data) => Some(ClassicFrame::new(
-                    Header::new(id.into(), raw_data.len() as u8, false),
-                    data,
-                )),
-                None => None,
-            }
+            Self::new(Header::new(id.into(), raw_data.len() as u8, false), raw_data)
         } else {
-            None
+            Err(FrameCreateError::InvalidCanId)
         }
     }
 
     /// Create new standard frame
-    pub fn new_standard(raw_id: u16, raw_data: &[u8]) -> Option<Self> {
+    pub fn new_standard(raw_id: u16, raw_data: &[u8]) -> Result<Self, FrameCreateError> {
         if let Some(id) = embedded_can::StandardId::new(raw_id) {
-            match ClassicData::new(raw_data) {
-                Some(data) => Some(ClassicFrame::new(
-                    Header::new(id.into(), raw_data.len() as u8, false),
-                    data,
-                )),
-                None => None,
-            }
+            Self::new(Header::new(id.into(), raw_data.len() as u8, false), raw_data)
         } else {
-            None
+            Err(FrameCreateError::InvalidCanId)
         }
     }
 
     /// Create new remote frame
-    pub fn new_remote(id: impl Into<embedded_can::Id>, len: usize) -> Option<Self> {
+    pub fn new_remote(id: impl Into<embedded_can::Id>, len: usize) -> Result<Self, FrameCreateError> {
         if len <= 8usize {
-            Some(ClassicFrame::new(
-                Header::new(id.into(), len as u8, true),
-                ClassicData::empty(),
-            ))
+            Self::new(Header::new(id.into(), len as u8, true), &[0; 8])
         } else {
-            None
+            Err(FrameCreateError::InvalidDataLength)
         }
     }
 
@@ -181,24 +208,28 @@ impl ClassicFrame {
     pub fn data(&self) -> &[u8] {
         &self.data.raw()
     }
+
+    /// Get priority of frame
+    pub fn priority(&self) -> u32 {
+        self.header().priority()
+    }
 }
 
-impl embedded_can::Frame for ClassicFrame {
+impl embedded_can::Frame for Frame {
     fn new(id: impl Into<embedded_can::Id>, raw_data: &[u8]) -> Option<Self> {
-        match ClassicData::new(raw_data) {
-            Some(data) => Some(ClassicFrame::new(
-                Header::new(id.into(), raw_data.len() as u8, false),
-                data,
-            )),
-            None => None,
+        let frameopt = Frame::new(Header::new(id.into(), raw_data.len() as u8, false), raw_data);
+        match frameopt {
+            Ok(frame) => Some(frame),
+            Err(_) => None,
         }
     }
     fn new_remote(id: impl Into<embedded_can::Id>, len: usize) -> Option<Self> {
         if len <= 8 {
-            Some(ClassicFrame::new(
-                Header::new(id.into(), len as u8, true),
-                ClassicData::empty(),
-            ))
+            let frameopt = Frame::new(Header::new(id.into(), len as u8, true), &[0; 8]);
+            match frameopt {
+                Ok(frame) => Some(frame),
+                Err(_) => None,
+            }
         } else {
             None
         }
@@ -223,9 +254,9 @@ impl embedded_can::Frame for ClassicFrame {
     }
 }
 
-impl CanHeader for ClassicFrame {
-    fn from_header(header: Header, data: &[u8]) -> Option<Self> {
-        Some(Self::new(header, ClassicData::new(data)?))
+impl CanHeader for Frame {
+    fn from_header(header: Header, data: &[u8]) -> Result<Self, FrameCreateError> {
+        Self::new(header, data)
     }
 
     fn header(&self) -> &Header {
@@ -233,10 +264,31 @@ impl CanHeader for ClassicFrame {
     }
 }
 
+/// Contains CAN frame and additional metadata.
+///
+/// Timestamp is available if `time` feature is enabled.
+/// For CAN-FD support use FdEnvelope
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub struct Envelope {
+    /// Reception time.
+    pub ts: Timestamp,
+    /// The actual CAN frame.
+    pub frame: Frame,
+}
+
+impl Envelope {
+    /// Convert into a tuple
+    pub fn parts(self) -> (Frame, Timestamp) {
+        (self.frame, self.ts)
+    }
+}
+
 /// Payload of a (FD)CAN data frame.
 ///
 /// Contains 0 to 64 Bytes of data.
 #[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
 pub struct FdData {
     pub(crate) bytes: [u8; 64],
 }
@@ -246,15 +298,15 @@ impl FdData {
     ///
     /// Returns `None` if `data` is more than 64 bytes (which is the maximum) or
     /// cannot be represented with an FDCAN DLC.
-    pub fn new(data: &[u8]) -> Option<Self> {
+    pub fn new(data: &[u8]) -> Result<Self, FrameCreateError> {
         if !FdData::is_valid_len(data.len()) {
-            return None;
+            return Err(FrameCreateError::InvalidDataLength);
         }
 
         let mut bytes = [0; 64];
         bytes[..data.len()].copy_from_slice(data);
 
-        Some(Self { bytes })
+        Ok(Self { bytes })
     }
 
     /// Raw read access to data.
@@ -286,6 +338,7 @@ impl FdData {
 
 /// Frame with up to 8 bytes of data payload as per Fd CAN
 #[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
 pub struct FdFrame {
     can_header: Header,
     data: FdData,
@@ -293,40 +346,35 @@ pub struct FdFrame {
 
 impl FdFrame {
     /// Create a new CAN classic Frame
-    pub fn new(can_header: Header, data: FdData) -> FdFrame {
-        FdFrame { can_header, data }
+    pub fn new(can_header: Header, raw_data: &[u8]) -> Result<Self, FrameCreateError> {
+        let data = FdData::new(raw_data)?;
+        Ok(FdFrame { can_header, data })
     }
 
     /// Create new extended frame
-    pub fn new_extended(raw_id: u32, raw_data: &[u8]) -> Option<Self> {
+    pub fn new_extended(raw_id: u32, raw_data: &[u8]) -> Result<Self, FrameCreateError> {
         if let Some(id) = embedded_can::ExtendedId::new(raw_id) {
-            match FdData::new(raw_data) {
-                Some(data) => Some(FdFrame::new(Header::new(id.into(), raw_data.len() as u8, false), data)),
-                None => None,
-            }
+            Self::new(Header::new(id.into(), raw_data.len() as u8, false), raw_data)
         } else {
-            None
+            Err(FrameCreateError::InvalidCanId)
         }
     }
 
     /// Create new standard frame
-    pub fn new_standard(raw_id: u16, raw_data: &[u8]) -> Option<Self> {
+    pub fn new_standard(raw_id: u16, raw_data: &[u8]) -> Result<Self, FrameCreateError> {
         if let Some(id) = embedded_can::StandardId::new(raw_id) {
-            match FdData::new(raw_data) {
-                Some(data) => Some(FdFrame::new(Header::new(id.into(), raw_data.len() as u8, false), data)),
-                None => None,
-            }
+            Self::new(Header::new(id.into(), raw_data.len() as u8, false), raw_data)
         } else {
-            None
+            Err(FrameCreateError::InvalidCanId)
         }
     }
 
     /// Create new remote frame
-    pub fn new_remote(id: impl Into<embedded_can::Id>, len: usize) -> Option<Self> {
+    pub fn new_remote(id: impl Into<embedded_can::Id>, len: usize) -> Result<Self, FrameCreateError> {
         if len <= 8 {
-            Some(FdFrame::new(Header::new(id.into(), len as u8, true), FdData::empty()))
+            Self::new(Header::new(id.into(), len as u8, true), &[0; 8])
         } else {
-            None
+            Err(FrameCreateError::InvalidDataLength)
         }
     }
 
@@ -348,20 +396,17 @@ impl FdFrame {
 
 impl embedded_can::Frame for FdFrame {
     fn new(id: impl Into<embedded_can::Id>, raw_data: &[u8]) -> Option<Self> {
-        match FdData::new(raw_data) {
-            Some(data) => Some(FdFrame::new(
-                Header::new_fd(id.into(), raw_data.len() as u8, false, true),
-                data,
-            )),
-            None => None,
+        match FdFrame::new(Header::new_fd(id.into(), raw_data.len() as u8, false, true), raw_data) {
+            Ok(frame) => Some(frame),
+            Err(_) => None,
         }
     }
     fn new_remote(id: impl Into<embedded_can::Id>, len: usize) -> Option<Self> {
         if len <= 8 {
-            Some(FdFrame::new(
-                Header::new_fd(id.into(), len as u8, true, true),
-                FdData::empty(),
-            ))
+            match FdFrame::new(Header::new_fd(id.into(), len as u8, true, true), &[0; 64]) {
+                Ok(frame) => Some(frame),
+                Err(_) => None,
+            }
         } else {
             None
         }
@@ -388,11 +433,31 @@ impl embedded_can::Frame for FdFrame {
 }
 
 impl CanHeader for FdFrame {
-    fn from_header(header: Header, data: &[u8]) -> Option<Self> {
-        Some(Self::new(header, FdData::new(data)?))
+    fn from_header(header: Header, data: &[u8]) -> Result<Self, FrameCreateError> {
+        Self::new(header, data)
     }
 
     fn header(&self) -> &Header {
         self.header()
     }
 }
+
+/// Contains CAN FD frame and additional metadata.
+///
+/// Timestamp is available if `time` feature is enabled.
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub struct FdEnvelope {
+    /// Reception time.
+    pub ts: Timestamp,
+
+    /// The actual CAN frame.
+    pub frame: FdFrame,
+}
+
+impl FdEnvelope {
+    /// Convert into a tuple
+    pub fn parts(self) -> (FdFrame, Timestamp) {
+        (self.frame, self.ts)
+    }
+}
diff --git a/embassy-stm32/src/can/mod.rs b/embassy-stm32/src/can/mod.rs
index 915edb3a6..410a6bfcb 100644
--- a/embassy-stm32/src/can/mod.rs
+++ b/embassy-stm32/src/can/mod.rs
@@ -1,7 +1,14 @@
 //! Controller Area Network (CAN)
 #![macro_use]
 
-#[cfg_attr(can_bxcan, path = "bxcan.rs")]
+#[cfg_attr(can_bxcan, path = "bxcan/mod.rs")]
 #[cfg_attr(any(can_fdcan_v1, can_fdcan_h7), path = "fdcan.rs")]
 mod _version;
 pub use _version::*;
+
+mod common;
+pub mod enums;
+pub mod frame;
+pub mod util;
+
+pub use frame::Frame;
diff --git a/embassy-stm32/src/crc/v1.rs b/embassy-stm32/src/crc/v1.rs
index 0166ab819..f8909d438 100644
--- a/embassy-stm32/src/crc/v1.rs
+++ b/embassy-stm32/src/crc/v1.rs
@@ -2,7 +2,7 @@ use embassy_hal_internal::{into_ref, PeripheralRef};
 
 use crate::pac::CRC as PAC_CRC;
 use crate::peripherals::CRC;
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::SealedRccPeripheral;
 use crate::Peripheral;
 
 /// CRC driver.
diff --git a/embassy-stm32/src/crc/v2v3.rs b/embassy-stm32/src/crc/v2v3.rs
index 0c4ae55ce..46f5ea1be 100644
--- a/embassy-stm32/src/crc/v2v3.rs
+++ b/embassy-stm32/src/crc/v2v3.rs
@@ -3,7 +3,7 @@ use embassy_hal_internal::{into_ref, PeripheralRef};
 use crate::pac::crc::vals;
 use crate::pac::CRC as PAC_CRC;
 use crate::peripherals::CRC;
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::SealedRccPeripheral;
 use crate::Peripheral;
 
 /// CRC driver.
diff --git a/embassy-stm32/src/cryp/mod.rs b/embassy-stm32/src/cryp/mod.rs
index 8f259520a..18b5ec918 100644
--- a/embassy-stm32/src/cryp/mod.rs
+++ b/embassy-stm32/src/cryp/mod.rs
@@ -2,14 +2,39 @@
 #[cfg(any(cryp_v2, cryp_v3))]
 use core::cmp::min;
 use core::marker::PhantomData;
+use core::ptr;
 
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
 
+use crate::dma::{NoDma, Priority, Transfer, TransferOptions};
+use crate::interrupt::typelevel::Interrupt;
 use crate::{interrupt, pac, peripherals, Peripheral};
 
 const DES_BLOCK_SIZE: usize = 8; // 64 bits
 const AES_BLOCK_SIZE: usize = 16; // 128 bits
 
+static CRYP_WAKER: AtomicWaker = AtomicWaker::new();
+
+/// CRYP interrupt handler.
+pub struct InterruptHandler<T: Instance> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Instance> interrupt::typelevel::Handler<T::Interrupt> for InterruptHandler<T> {
+    unsafe fn on_interrupt() {
+        let bits = T::regs().misr().read();
+        if bits.inmis() {
+            T::regs().imscr().modify(|w| w.set_inim(false));
+            CRYP_WAKER.wake();
+        }
+        if bits.outmis() {
+            T::regs().imscr().modify(|w| w.set_outim(false));
+            CRYP_WAKER.wake();
+        }
+    }
+}
+
 /// This trait encapsulates all cipher-specific behavior/
 pub trait Cipher<'c> {
     /// Processing block size. Determined by the processor and the algorithm.
@@ -32,17 +57,26 @@ pub trait Cipher<'c> {
     fn prepare_key(&self, _p: &pac::cryp::Cryp) {}
 
     /// Performs any cipher-specific initialization.
-    fn init_phase(&self, _p: &pac::cryp::Cryp) {}
+    fn init_phase_blocking<T: Instance, DmaIn, DmaOut>(&self, _p: &pac::cryp::Cryp, _cryp: &Cryp<T, DmaIn, DmaOut>) {}
+
+    /// Performs any cipher-specific initialization.
+    async fn init_phase<T: Instance, DmaIn, DmaOut>(&self, _p: &pac::cryp::Cryp, _cryp: &mut Cryp<'_, T, DmaIn, DmaOut>)
+    where
+        DmaIn: crate::cryp::DmaIn<T>,
+        DmaOut: crate::cryp::DmaOut<T>,
+    {
+    }
 
     /// Called prior to processing the last data block for cipher-specific operations.
-    fn pre_final_block(&self, _p: &pac::cryp::Cryp, _dir: Direction, _padding_len: usize) -> [u32; 4] {
+    fn pre_final(&self, _p: &pac::cryp::Cryp, _dir: Direction, _padding_len: usize) -> [u32; 4] {
         return [0; 4];
     }
 
     /// Called after processing the last data block for cipher-specific operations.
-    fn post_final_block(
+    fn post_final_blocking<T: Instance, DmaIn, DmaOut>(
         &self,
         _p: &pac::cryp::Cryp,
+        _cryp: &Cryp<T, DmaIn, DmaOut>,
         _dir: Direction,
         _int_data: &mut [u8; AES_BLOCK_SIZE],
         _temp1: [u32; 4],
@@ -50,7 +84,22 @@ pub trait Cipher<'c> {
     ) {
     }
 
-    /// Called prior to processing the first associated data block for cipher-specific operations.
+    /// Called after processing the last data block for cipher-specific operations.
+    async fn post_final<T: Instance, DmaIn, DmaOut>(
+        &self,
+        _p: &pac::cryp::Cryp,
+        _cryp: &mut Cryp<'_, T, DmaIn, DmaOut>,
+        _dir: Direction,
+        _int_data: &mut [u8; AES_BLOCK_SIZE],
+        _temp1: [u32; 4],
+        _padding_mask: [u8; 16],
+    ) where
+        DmaIn: crate::cryp::DmaIn<T>,
+        DmaOut: crate::cryp::DmaOut<T>,
+    {
+    }
+
+    /// Returns the AAD header block as required by the cipher.
     fn get_header_block(&self) -> &[u8] {
         return [0; 0].as_slice();
     }
@@ -425,14 +474,24 @@ impl<'c, const KEY_SIZE: usize> Cipher<'c> for AesGcm<'c, KEY_SIZE> {
         p.cr().modify(|w| w.set_algomode3(true));
     }
 
-    fn init_phase(&self, p: &pac::cryp::Cryp) {
+    fn init_phase_blocking<T: Instance, DmaIn, DmaOut>(&self, p: &pac::cryp::Cryp, _cryp: &Cryp<T, DmaIn, DmaOut>) {
+        p.cr().modify(|w| w.set_gcm_ccmph(0));
+        p.cr().modify(|w| w.set_crypen(true));
+        while p.cr().read().crypen() {}
+    }
+
+    async fn init_phase<T: Instance, DmaIn, DmaOut>(
+        &self,
+        p: &pac::cryp::Cryp,
+        _cryp: &mut Cryp<'_, T, DmaIn, DmaOut>,
+    ) {
         p.cr().modify(|w| w.set_gcm_ccmph(0));
         p.cr().modify(|w| w.set_crypen(true));
         while p.cr().read().crypen() {}
     }
 
     #[cfg(cryp_v2)]
-    fn pre_final_block(&self, p: &pac::cryp::Cryp, dir: Direction, _padding_len: usize) -> [u32; 4] {
+    fn pre_final(&self, p: &pac::cryp::Cryp, dir: Direction, _padding_len: usize) -> [u32; 4] {
         //Handle special GCM partial block process.
         if dir == Direction::Encrypt {
             p.cr().modify(|w| w.set_crypen(false));
@@ -446,16 +505,17 @@ impl<'c, const KEY_SIZE: usize> Cipher<'c> for AesGcm<'c, KEY_SIZE> {
     }
 
     #[cfg(cryp_v3)]
-    fn pre_final_block(&self, p: &pac::cryp::Cryp, _dir: Direction, padding_len: usize) -> [u32; 4] {
+    fn pre_final(&self, p: &pac::cryp::Cryp, _dir: Direction, padding_len: usize) -> [u32; 4] {
         //Handle special GCM partial block process.
         p.cr().modify(|w| w.set_npblb(padding_len as u8));
         [0; 4]
     }
 
     #[cfg(cryp_v2)]
-    fn post_final_block(
+    fn post_final_blocking<T: Instance, DmaIn, DmaOut>(
         &self,
         p: &pac::cryp::Cryp,
+        cryp: &Cryp<T, DmaIn, DmaOut>,
         dir: Direction,
         int_data: &mut [u8; AES_BLOCK_SIZE],
         _temp1: [u32; 4],
@@ -471,17 +531,44 @@ impl<'c, const KEY_SIZE: usize> Cipher<'c> for AesGcm<'c, KEY_SIZE> {
             }
             p.cr().modify(|w| w.set_crypen(true));
             p.cr().modify(|w| w.set_gcm_ccmph(3));
-            let mut index = 0;
-            let end_index = Self::BLOCK_SIZE;
-            while index < end_index {
-                let mut in_word: [u8; 4] = [0; 4];
-                in_word.copy_from_slice(&int_data[index..index + 4]);
-                p.din().write_value(u32::from_ne_bytes(in_word));
-                index += 4;
-            }
-            for _ in 0..4 {
-                p.dout().read();
+
+            cryp.write_bytes_blocking(Self::BLOCK_SIZE, int_data);
+            cryp.read_bytes_blocking(Self::BLOCK_SIZE, int_data);
+        }
+    }
+
+    #[cfg(cryp_v2)]
+    async fn post_final<T: Instance, DmaIn, DmaOut>(
+        &self,
+        p: &pac::cryp::Cryp,
+        cryp: &mut Cryp<'_, T, DmaIn, DmaOut>,
+        dir: Direction,
+        int_data: &mut [u8; AES_BLOCK_SIZE],
+        _temp1: [u32; 4],
+        padding_mask: [u8; AES_BLOCK_SIZE],
+    ) where
+        DmaIn: crate::cryp::DmaIn<T>,
+        DmaOut: crate::cryp::DmaOut<T>,
+    {
+        if dir == Direction::Encrypt {
+            // Handle special GCM partial block process.
+            p.cr().modify(|w| w.set_crypen(false));
+            p.cr().modify(|w| w.set_algomode3(true));
+            p.cr().modify(|w| w.set_algomode0(0));
+            for i in 0..AES_BLOCK_SIZE {
+                int_data[i] = int_data[i] & padding_mask[i];
             }
+            p.cr().modify(|w| w.set_crypen(true));
+            p.cr().modify(|w| w.set_gcm_ccmph(3));
+
+            let mut out_data: [u8; AES_BLOCK_SIZE] = [0; AES_BLOCK_SIZE];
+
+            let read = Cryp::<T, DmaIn, DmaOut>::read_bytes(&mut cryp.outdma, Self::BLOCK_SIZE, &mut out_data);
+            let write = Cryp::<T, DmaIn, DmaOut>::write_bytes(&mut cryp.indma, Self::BLOCK_SIZE, int_data);
+
+            embassy_futures::join::join(read, write).await;
+
+            int_data.copy_from_slice(&out_data);
         }
     }
 }
@@ -532,14 +619,24 @@ impl<'c, const KEY_SIZE: usize> Cipher<'c> for AesGmac<'c, KEY_SIZE> {
         p.cr().modify(|w| w.set_algomode3(true));
     }
 
-    fn init_phase(&self, p: &pac::cryp::Cryp) {
+    fn init_phase_blocking<T: Instance, DmaIn, DmaOut>(&self, p: &pac::cryp::Cryp, _cryp: &Cryp<T, DmaIn, DmaOut>) {
+        p.cr().modify(|w| w.set_gcm_ccmph(0));
+        p.cr().modify(|w| w.set_crypen(true));
+        while p.cr().read().crypen() {}
+    }
+
+    async fn init_phase<T: Instance, DmaIn, DmaOut>(
+        &self,
+        p: &pac::cryp::Cryp,
+        _cryp: &mut Cryp<'_, T, DmaIn, DmaOut>,
+    ) {
         p.cr().modify(|w| w.set_gcm_ccmph(0));
         p.cr().modify(|w| w.set_crypen(true));
         while p.cr().read().crypen() {}
     }
 
     #[cfg(cryp_v2)]
-    fn pre_final_block(&self, p: &pac::cryp::Cryp, dir: Direction, _padding_len: usize) -> [u32; 4] {
+    fn pre_final(&self, p: &pac::cryp::Cryp, dir: Direction, _padding_len: usize) -> [u32; 4] {
         //Handle special GCM partial block process.
         if dir == Direction::Encrypt {
             p.cr().modify(|w| w.set_crypen(false));
@@ -553,16 +650,17 @@ impl<'c, const KEY_SIZE: usize> Cipher<'c> for AesGmac<'c, KEY_SIZE> {
     }
 
     #[cfg(cryp_v3)]
-    fn pre_final_block(&self, p: &pac::cryp::Cryp, _dir: Direction, padding_len: usize) -> [u32; 4] {
+    fn pre_final(&self, p: &pac::cryp::Cryp, _dir: Direction, padding_len: usize) -> [u32; 4] {
         //Handle special GCM partial block process.
         p.cr().modify(|w| w.set_npblb(padding_len as u8));
         [0; 4]
     }
 
     #[cfg(cryp_v2)]
-    fn post_final_block(
+    fn post_final_blocking<T: Instance, DmaIn, DmaOut>(
         &self,
         p: &pac::cryp::Cryp,
+        cryp: &Cryp<T, DmaIn, DmaOut>,
         dir: Direction,
         int_data: &mut [u8; AES_BLOCK_SIZE],
         _temp1: [u32; 4],
@@ -578,17 +676,42 @@ impl<'c, const KEY_SIZE: usize> Cipher<'c> for AesGmac<'c, KEY_SIZE> {
             }
             p.cr().modify(|w| w.set_crypen(true));
             p.cr().modify(|w| w.set_gcm_ccmph(3));
-            let mut index = 0;
-            let end_index = Self::BLOCK_SIZE;
-            while index < end_index {
-                let mut in_word: [u8; 4] = [0; 4];
-                in_word.copy_from_slice(&int_data[index..index + 4]);
-                p.din().write_value(u32::from_ne_bytes(in_word));
-                index += 4;
-            }
-            for _ in 0..4 {
-                p.dout().read();
+
+            cryp.write_bytes_blocking(Self::BLOCK_SIZE, int_data);
+            cryp.read_bytes_blocking(Self::BLOCK_SIZE, int_data);
+        }
+    }
+
+    #[cfg(cryp_v2)]
+    async fn post_final<T: Instance, DmaIn, DmaOut>(
+        &self,
+        p: &pac::cryp::Cryp,
+        cryp: &mut Cryp<'_, T, DmaIn, DmaOut>,
+        dir: Direction,
+        int_data: &mut [u8; AES_BLOCK_SIZE],
+        _temp1: [u32; 4],
+        padding_mask: [u8; AES_BLOCK_SIZE],
+    ) where
+        DmaIn: crate::cryp::DmaIn<T>,
+        DmaOut: crate::cryp::DmaOut<T>,
+    {
+        if dir == Direction::Encrypt {
+            // Handle special GCM partial block process.
+            p.cr().modify(|w| w.set_crypen(false));
+            p.cr().modify(|w| w.set_algomode3(true));
+            p.cr().modify(|w| w.set_algomode0(0));
+            for i in 0..AES_BLOCK_SIZE {
+                int_data[i] = int_data[i] & padding_mask[i];
             }
+            p.cr().modify(|w| w.set_crypen(true));
+            p.cr().modify(|w| w.set_gcm_ccmph(3));
+
+            let mut out_data: [u8; AES_BLOCK_SIZE] = [0; AES_BLOCK_SIZE];
+
+            let read = Cryp::<T, DmaIn, DmaOut>::read_bytes(&mut cryp.outdma, Self::BLOCK_SIZE, &mut out_data);
+            let write = Cryp::<T, DmaIn, DmaOut>::write_bytes(&mut cryp.indma, Self::BLOCK_SIZE, int_data);
+
+            embassy_futures::join::join(read, write).await;
         }
     }
 }
@@ -697,18 +820,24 @@ impl<'c, const KEY_SIZE: usize, const TAG_SIZE: usize, const IV_SIZE: usize> Cip
         p.cr().modify(|w| w.set_algomode3(true));
     }
 
-    fn init_phase(&self, p: &pac::cryp::Cryp) {
+    fn init_phase_blocking<T: Instance, DmaIn, DmaOut>(&self, p: &pac::cryp::Cryp, cryp: &Cryp<T, DmaIn, DmaOut>) {
         p.cr().modify(|w| w.set_gcm_ccmph(0));
 
-        let mut index = 0;
-        let end_index = index + Self::BLOCK_SIZE;
-        // Write block in
-        while index < end_index {
-            let mut in_word: [u8; 4] = [0; 4];
-            in_word.copy_from_slice(&self.block0[index..index + 4]);
-            p.din().write_value(u32::from_ne_bytes(in_word));
-            index += 4;
-        }
+        cryp.write_bytes_blocking(Self::BLOCK_SIZE, &self.block0);
+
+        p.cr().modify(|w| w.set_crypen(true));
+        while p.cr().read().crypen() {}
+    }
+
+    async fn init_phase<T: Instance, DmaIn, DmaOut>(&self, p: &pac::cryp::Cryp, cryp: &mut Cryp<'_, T, DmaIn, DmaOut>)
+    where
+        DmaIn: crate::cryp::DmaIn<T>,
+        DmaOut: crate::cryp::DmaOut<T>,
+    {
+        p.cr().modify(|w| w.set_gcm_ccmph(0));
+
+        Cryp::<T, DmaIn, DmaOut>::write_bytes(&mut cryp.indma, Self::BLOCK_SIZE, &self.block0).await;
+
         p.cr().modify(|w| w.set_crypen(true));
         while p.cr().read().crypen() {}
     }
@@ -718,7 +847,7 @@ impl<'c, const KEY_SIZE: usize, const TAG_SIZE: usize, const IV_SIZE: usize> Cip
     }
 
     #[cfg(cryp_v2)]
-    fn pre_final_block(&self, p: &pac::cryp::Cryp, dir: Direction, _padding_len: usize) -> [u32; 4] {
+    fn pre_final(&self, p: &pac::cryp::Cryp, dir: Direction, _padding_len: usize) -> [u32; 4] {
         //Handle special CCM partial block process.
         let mut temp1 = [0; 4];
         if dir == Direction::Decrypt {
@@ -737,16 +866,17 @@ impl<'c, const KEY_SIZE: usize, const TAG_SIZE: usize, const IV_SIZE: usize> Cip
     }
 
     #[cfg(cryp_v3)]
-    fn pre_final_block(&self, p: &pac::cryp::Cryp, _dir: Direction, padding_len: usize) -> [u32; 4] {
+    fn pre_final(&self, p: &pac::cryp::Cryp, _dir: Direction, padding_len: usize) -> [u32; 4] {
         //Handle special GCM partial block process.
         p.cr().modify(|w| w.set_npblb(padding_len as u8));
         [0; 4]
     }
 
     #[cfg(cryp_v2)]
-    fn post_final_block(
+    fn post_final_blocking<T: Instance, DmaIn, DmaOut>(
         &self,
         p: &pac::cryp::Cryp,
+        cryp: &Cryp<T, DmaIn, DmaOut>,
         dir: Direction,
         int_data: &mut [u8; AES_BLOCK_SIZE],
         temp1: [u32; 4],
@@ -774,8 +904,48 @@ impl<'c, const KEY_SIZE: usize, const TAG_SIZE: usize, const IV_SIZE: usize> Cip
                 let int_word = u32::from_le_bytes(int_bytes);
                 in_data[i] = int_word;
                 in_data[i] = in_data[i] ^ temp1[i] ^ temp2[i];
-                p.din().write_value(in_data[i]);
             }
+            cryp.write_words_blocking(Self::BLOCK_SIZE, &in_data);
+        }
+    }
+
+    #[cfg(cryp_v2)]
+    async fn post_final<T: Instance, DmaIn, DmaOut>(
+        &self,
+        p: &pac::cryp::Cryp,
+        cryp: &mut Cryp<'_, T, DmaIn, DmaOut>,
+        dir: Direction,
+        int_data: &mut [u8; AES_BLOCK_SIZE],
+        temp1: [u32; 4],
+        padding_mask: [u8; 16],
+    ) where
+        DmaIn: crate::cryp::DmaIn<T>,
+        DmaOut: crate::cryp::DmaOut<T>,
+    {
+        if dir == Direction::Decrypt {
+            //Handle special CCM partial block process.
+            let mut temp2 = [0; 4];
+            temp2[0] = p.csgcmccmr(0).read().swap_bytes();
+            temp2[1] = p.csgcmccmr(1).read().swap_bytes();
+            temp2[2] = p.csgcmccmr(2).read().swap_bytes();
+            temp2[3] = p.csgcmccmr(3).read().swap_bytes();
+            p.cr().modify(|w| w.set_algomode3(true));
+            p.cr().modify(|w| w.set_algomode0(1));
+            p.cr().modify(|w| w.set_gcm_ccmph(3));
+            // Header phase
+            p.cr().modify(|w| w.set_gcm_ccmph(1));
+            for i in 0..AES_BLOCK_SIZE {
+                int_data[i] = int_data[i] & padding_mask[i];
+            }
+            let mut in_data: [u32; 4] = [0; 4];
+            for i in 0..in_data.len() {
+                let mut int_bytes: [u8; 4] = [0; 4];
+                int_bytes.copy_from_slice(&int_data[(i * 4)..(i * 4) + 4]);
+                let int_word = u32::from_le_bytes(int_bytes);
+                in_data[i] = int_word;
+                in_data[i] = in_data[i] ^ temp1[i] ^ temp2[i];
+            }
+            Cryp::<T, DmaIn, DmaOut>::write_words(&mut cryp.indma, Self::BLOCK_SIZE, &in_data).await;
         }
     }
 }
@@ -845,24 +1015,40 @@ pub enum Direction {
 }
 
 /// Crypto Accelerator Driver
-pub struct Cryp<'d, T: Instance> {
+pub struct Cryp<'d, T: Instance, DmaIn = NoDma, DmaOut = NoDma> {
     _peripheral: PeripheralRef<'d, T>,
+    indma: PeripheralRef<'d, DmaIn>,
+    outdma: PeripheralRef<'d, DmaOut>,
 }
 
-impl<'d, T: Instance> Cryp<'d, T> {
+impl<'d, T: Instance, DmaIn, DmaOut> Cryp<'d, T, DmaIn, DmaOut> {
     /// Create a new CRYP driver.
-    pub fn new(peri: impl Peripheral<P = T> + 'd) -> Self {
+    pub fn new(
+        peri: impl Peripheral<P = T> + 'd,
+        indma: impl Peripheral<P = DmaIn> + 'd,
+        outdma: impl Peripheral<P = DmaOut> + 'd,
+        _irq: impl interrupt::typelevel::Binding<T::Interrupt, InterruptHandler<T>> + 'd,
+    ) -> Self {
         T::enable_and_reset();
-        into_ref!(peri);
-        let instance = Self { _peripheral: peri };
+        into_ref!(peri, indma, outdma);
+        let instance = Self {
+            _peripheral: peri,
+            indma: indma,
+            outdma: outdma,
+        };
+
+        T::Interrupt::unpend();
+        unsafe { T::Interrupt::enable() };
+
         instance
     }
 
-    /// Start a new cipher operation.
-    /// Key size must be 128, 192, or 256 bits.
-    /// Initialization vector must only be supplied if necessary.
-    /// Panics if there is any mismatch in parameters, such as an incorrect IV length or invalid mode.
-    pub fn start<'c, C: Cipher<'c> + CipherSized + IVSized>(&self, cipher: &'c C, dir: Direction) -> Context<'c, C> {
+    /// Start a new encrypt or decrypt operation for the given cipher.
+    pub fn start_blocking<'c, C: Cipher<'c> + CipherSized + IVSized>(
+        &self,
+        cipher: &'c C,
+        dir: Direction,
+    ) -> Context<'c, C> {
         let mut ctx: Context<'c, C> = Context {
             dir,
             last_block_processed: false,
@@ -929,7 +1115,90 @@ impl<'d, T: Instance> Cryp<'d, T> {
         // Flush in/out FIFOs
         T::regs().cr().modify(|w| w.fflush());
 
-        ctx.cipher.init_phase(&T::regs());
+        ctx.cipher.init_phase_blocking(&T::regs(), self);
+
+        self.store_context(&mut ctx);
+
+        ctx
+    }
+
+    /// Start a new encrypt or decrypt operation for the given cipher.
+    pub async fn start<'c, C: Cipher<'c> + CipherSized + IVSized>(
+        &mut self,
+        cipher: &'c C,
+        dir: Direction,
+    ) -> Context<'c, C>
+    where
+        DmaIn: crate::cryp::DmaIn<T>,
+        DmaOut: crate::cryp::DmaOut<T>,
+    {
+        let mut ctx: Context<'c, C> = Context {
+            dir,
+            last_block_processed: false,
+            cr: 0,
+            iv: [0; 4],
+            csgcmccm: [0; 8],
+            csgcm: [0; 8],
+            aad_complete: false,
+            header_len: 0,
+            payload_len: 0,
+            cipher: cipher,
+            phantom_data: PhantomData,
+            header_processed: false,
+            aad_buffer: [0; 16],
+            aad_buffer_len: 0,
+        };
+
+        T::regs().cr().modify(|w| w.set_crypen(false));
+
+        let key = ctx.cipher.key();
+
+        if key.len() == (128 / 8) {
+            T::regs().cr().modify(|w| w.set_keysize(0));
+        } else if key.len() == (192 / 8) {
+            T::regs().cr().modify(|w| w.set_keysize(1));
+        } else if key.len() == (256 / 8) {
+            T::regs().cr().modify(|w| w.set_keysize(2));
+        }
+
+        self.load_key(key);
+
+        // Set data type to 8-bit. This will match software implementations.
+        T::regs().cr().modify(|w| w.set_datatype(2));
+
+        ctx.cipher.prepare_key(&T::regs());
+
+        ctx.cipher.set_algomode(&T::regs());
+
+        // Set encrypt/decrypt
+        if dir == Direction::Encrypt {
+            T::regs().cr().modify(|w| w.set_algodir(false));
+        } else {
+            T::regs().cr().modify(|w| w.set_algodir(true));
+        }
+
+        // Load the IV into the registers.
+        let iv = ctx.cipher.iv();
+        let mut full_iv: [u8; 16] = [0; 16];
+        full_iv[0..iv.len()].copy_from_slice(iv);
+        let mut iv_idx = 0;
+        let mut iv_word: [u8; 4] = [0; 4];
+        iv_word.copy_from_slice(&full_iv[iv_idx..iv_idx + 4]);
+        iv_idx += 4;
+        T::regs().init(0).ivlr().write_value(u32::from_be_bytes(iv_word));
+        iv_word.copy_from_slice(&full_iv[iv_idx..iv_idx + 4]);
+        iv_idx += 4;
+        T::regs().init(0).ivrr().write_value(u32::from_be_bytes(iv_word));
+        iv_word.copy_from_slice(&full_iv[iv_idx..iv_idx + 4]);
+        iv_idx += 4;
+        T::regs().init(1).ivlr().write_value(u32::from_be_bytes(iv_word));
+        iv_word.copy_from_slice(&full_iv[iv_idx..iv_idx + 4]);
+        T::regs().init(1).ivrr().write_value(u32::from_be_bytes(iv_word));
+
+        // Flush in/out FIFOs
+        T::regs().cr().modify(|w| w.fflush());
+
+        ctx.cipher.init_phase(&T::regs(), self).await;
 
         self.store_context(&mut ctx);
 
@@ -938,10 +1207,9 @@ impl<'d, T: Instance> Cryp<'d, T> {
 
     #[cfg(any(cryp_v2, cryp_v3))]
     /// Controls the header phase of cipher processing.
-    /// This function is only valid for GCM, CCM, and GMAC modes.
-    /// It only needs to be called if using one of these modes and there is associated data.
-    /// All AAD must be supplied to this function prior to starting the payload phase with `payload_blocking`.
-    /// The AAD must be supplied in multiples of the block size (128 bits), except when supplying the last block.
+    /// This function is only valid for authenticated ciphers including GCM, CCM, and GMAC.
+    /// All additional associated data (AAD) must be supplied to this function prior to starting the payload phase with `payload_blocking`.
+    /// The AAD must be supplied in multiples of the block size (128-bits for AES, 64-bits for DES), except when supplying the last block.
     /// When supplying the last block of AAD, `last_aad_block` must be `true`.
     pub fn aad_blocking<
         'c,
@@ -985,15 +1253,7 @@ impl<'d, T: Instance> Cryp<'d, T> {
         if ctx.aad_buffer_len < C::BLOCK_SIZE {
             // The buffer isn't full and this is the last buffer, so process it as is (already padded).
             if last_aad_block {
-                let mut index = 0;
-                let end_index = C::BLOCK_SIZE;
-                // Write block in
-                while index < end_index {
-                    let mut in_word: [u8; 4] = [0; 4];
-                    in_word.copy_from_slice(&ctx.aad_buffer[index..index + 4]);
-                    T::regs().din().write_value(u32::from_ne_bytes(in_word));
-                    index += 4;
-                }
+                self.write_bytes_blocking(C::BLOCK_SIZE, &ctx.aad_buffer);
                 // Block until input FIFO is empty.
                 while !T::regs().sr().read().ifem() {}
 
@@ -1008,15 +1268,7 @@ impl<'d, T: Instance> Cryp<'d, T> {
             }
         } else {
             // Load the full block from the buffer.
-            let mut index = 0;
-            let end_index = C::BLOCK_SIZE;
-            // Write block in
-            while index < end_index {
-                let mut in_word: [u8; 4] = [0; 4];
-                in_word.copy_from_slice(&ctx.aad_buffer[index..index + 4]);
-                T::regs().din().write_value(u32::from_ne_bytes(in_word));
-                index += 4;
-            }
+            self.write_bytes_blocking(C::BLOCK_SIZE, &ctx.aad_buffer);
             // Block until input FIFO is empty.
             while !T::regs().sr().read().ifem() {}
         }
@@ -1032,33 +1284,108 @@ impl<'d, T: Instance> Cryp<'d, T> {
 
         // Load full data blocks into core.
         let num_full_blocks = aad_len_remaining / C::BLOCK_SIZE;
-        for block in 0..num_full_blocks {
-            let mut index = len_to_copy + (block * C::BLOCK_SIZE);
-            let end_index = index + C::BLOCK_SIZE;
-            // Write block in
-            while index < end_index {
-                let mut in_word: [u8; 4] = [0; 4];
-                in_word.copy_from_slice(&aad[index..index + 4]);
-                T::regs().din().write_value(u32::from_ne_bytes(in_word));
-                index += 4;
-            }
-            // Block until input FIFO is empty.
-            while !T::regs().sr().read().ifem() {}
-        }
+        let start_index = len_to_copy;
+        let end_index = start_index + (C::BLOCK_SIZE * num_full_blocks);
+        self.write_bytes_blocking(C::BLOCK_SIZE, &aad[start_index..end_index]);
 
         if last_aad_block {
             if leftovers > 0 {
-                let mut index = 0;
-                let end_index = C::BLOCK_SIZE;
-                // Write block in
-                while index < end_index {
-                    let mut in_word: [u8; 4] = [0; 4];
-                    in_word.copy_from_slice(&ctx.aad_buffer[index..index + 4]);
-                    T::regs().din().write_value(u32::from_ne_bytes(in_word));
-                    index += 4;
-                }
-                // Block until input FIFO is empty.
-                while !T::regs().sr().read().ifem() {}
+                self.write_bytes_blocking(C::BLOCK_SIZE, &ctx.aad_buffer);
+            }
+            // Switch to payload phase.
+            ctx.aad_complete = true;
+            T::regs().cr().modify(|w| w.set_crypen(false));
+            T::regs().cr().modify(|w| w.set_gcm_ccmph(2));
+            T::regs().cr().modify(|w| w.fflush());
+        }
+
+        self.store_context(ctx);
+    }
+
+    #[cfg(any(cryp_v2, cryp_v3))]
+    /// Controls the header phase of cipher processing.
+    /// This function is only valid for authenticated ciphers including GCM, CCM, and GMAC.
+    /// All additional associated data (AAD) must be supplied to this function prior to starting the payload phase with `payload`.
+    /// The AAD must be supplied in multiples of the block size (128-bits for AES, 64-bits for DES), except when supplying the last block.
+    /// When supplying the last block of AAD, `last_aad_block` must be `true`.
+    pub async fn aad<'c, const TAG_SIZE: usize, C: Cipher<'c> + CipherSized + IVSized + CipherAuthenticated<TAG_SIZE>>(
+        &mut self,
+        ctx: &mut Context<'c, C>,
+        aad: &[u8],
+        last_aad_block: bool,
+    ) where
+        DmaIn: crate::cryp::DmaIn<T>,
+        DmaOut: crate::cryp::DmaOut<T>,
+    {
+        self.load_context(ctx);
+
+        // Perform checks for correctness.
+        if ctx.aad_complete {
+            panic!("Cannot update AAD after starting payload!")
+        }
+
+        ctx.header_len += aad.len() as u64;
+
+        // Header phase
+        T::regs().cr().modify(|w| w.set_crypen(false));
+        T::regs().cr().modify(|w| w.set_gcm_ccmph(1));
+        T::regs().cr().modify(|w| w.set_crypen(true));
+
+        // First write the header B1 block if not yet written.
+        if !ctx.header_processed {
+            ctx.header_processed = true;
+            let header = ctx.cipher.get_header_block();
+            ctx.aad_buffer[0..header.len()].copy_from_slice(header);
+            ctx.aad_buffer_len += header.len();
+        }
+
+        // Fill the header block to make a full block.
+        let len_to_copy = min(aad.len(), C::BLOCK_SIZE - ctx.aad_buffer_len);
+        ctx.aad_buffer[ctx.aad_buffer_len..ctx.aad_buffer_len + len_to_copy].copy_from_slice(&aad[..len_to_copy]);
+        ctx.aad_buffer_len += len_to_copy;
+        ctx.aad_buffer[ctx.aad_buffer_len..].fill(0);
+        let mut aad_len_remaining = aad.len() - len_to_copy;
+
+        if ctx.aad_buffer_len < C::BLOCK_SIZE {
+            // The buffer isn't full and this is the last buffer, so process it as is (already padded).
+            if last_aad_block {
+                Self::write_bytes(&mut self.indma, C::BLOCK_SIZE, &ctx.aad_buffer).await;
+                assert_eq!(T::regs().sr().read().ifem(), true);
+
+                // Switch to payload phase.
+                ctx.aad_complete = true;
+                T::regs().cr().modify(|w| w.set_crypen(false));
+                T::regs().cr().modify(|w| w.set_gcm_ccmph(2));
+                T::regs().cr().modify(|w| w.fflush());
+            } else {
+                // Just return because we don't yet have a full block to process.
+                return;
+            }
+        } else {
+            // Load the full block from the buffer.
+            Self::write_bytes(&mut self.indma, C::BLOCK_SIZE, &ctx.aad_buffer).await;
+            assert_eq!(T::regs().sr().read().ifem(), true);
+        }
+
+        // Handle a partial block that is passed in.
+        ctx.aad_buffer_len = 0;
+        let leftovers = aad_len_remaining % C::BLOCK_SIZE;
+        ctx.aad_buffer[..leftovers].copy_from_slice(&aad[aad.len() - leftovers..aad.len()]);
+        ctx.aad_buffer_len += leftovers;
+        ctx.aad_buffer[ctx.aad_buffer_len..].fill(0);
+        aad_len_remaining -= leftovers;
+        assert_eq!(aad_len_remaining % C::BLOCK_SIZE, 0);
+
+        // Load full data blocks into core.
+        let num_full_blocks = aad_len_remaining / C::BLOCK_SIZE;
+        let start_index = len_to_copy;
+        let end_index = start_index + (C::BLOCK_SIZE * num_full_blocks);
+        Self::write_bytes(&mut self.indma, C::BLOCK_SIZE, &aad[start_index..end_index]).await;
+
+        if last_aad_block {
+            if leftovers > 0 {
+                Self::write_bytes(&mut self.indma, C::BLOCK_SIZE, &ctx.aad_buffer).await;
+                assert_eq!(T::regs().sr().read().ifem(), true);
             }
             // Switch to payload phase.
             ctx.aad_complete = true;
@@ -1074,7 +1401,7 @@ impl<'d, T: Instance> Cryp<'d, T> {
     /// The context determines algorithm, mode, and state of the crypto accelerator.
     /// When the last piece of data is supplied, `last_block` should be `true`.
     /// This function panics under various mismatches of parameters.
-    /// Input and output buffer lengths must match.
+    /// Output buffer must be at least as long as the input buffer.
     /// Data must be a multiple of block size (128-bits for AES, 64-bits for DES) for CBC and ECB modes.
     /// Padding or ciphertext stealing must be managed by the application for these modes.
     /// Data must also be a multiple of block size unless `last_block` is `true`.
@@ -1125,54 +1452,23 @@ impl<'d, T: Instance> Cryp<'d, T> {
         // Load data into core, block by block.
         let num_full_blocks = input.len() / C::BLOCK_SIZE;
         for block in 0..num_full_blocks {
-            let mut index = block * C::BLOCK_SIZE;
-            let end_index = index + C::BLOCK_SIZE;
+            let index = block * C::BLOCK_SIZE;
             // Write block in
-            while index < end_index {
-                let mut in_word: [u8; 4] = [0; 4];
-                in_word.copy_from_slice(&input[index..index + 4]);
-                T::regs().din().write_value(u32::from_ne_bytes(in_word));
-                index += 4;
-            }
-            let mut index = block * C::BLOCK_SIZE;
-            let end_index = index + C::BLOCK_SIZE;
-            // Block until there is output to read.
-            while !T::regs().sr().read().ofne() {}
+            self.write_bytes_blocking(C::BLOCK_SIZE, &input[index..index + C::BLOCK_SIZE]);
             // Read block out
-            while index < end_index {
-                let out_word: u32 = T::regs().dout().read();
-                output[index..index + 4].copy_from_slice(u32::to_ne_bytes(out_word).as_slice());
-                index += 4;
-            }
+            self.read_bytes_blocking(C::BLOCK_SIZE, &mut output[index..index + C::BLOCK_SIZE]);
         }
 
         // Handle the final block, which is incomplete.
         if last_block_remainder > 0 {
             let padding_len = C::BLOCK_SIZE - last_block_remainder;
-            let temp1 = ctx.cipher.pre_final_block(&T::regs(), ctx.dir, padding_len);
+            let temp1 = ctx.cipher.pre_final(&T::regs(), ctx.dir, padding_len);
 
             let mut intermediate_data: [u8; AES_BLOCK_SIZE] = [0; AES_BLOCK_SIZE];
             let mut last_block: [u8; AES_BLOCK_SIZE] = [0; AES_BLOCK_SIZE];
             last_block[..last_block_remainder].copy_from_slice(&input[input.len() - last_block_remainder..input.len()]);
-            let mut index = 0;
-            let end_index = C::BLOCK_SIZE;
-            // Write block in
-            while index < end_index {
-                let mut in_word: [u8; 4] = [0; 4];
-                in_word.copy_from_slice(&last_block[index..index + 4]);
-                T::regs().din().write_value(u32::from_ne_bytes(in_word));
-                index += 4;
-            }
-            let mut index = 0;
-            let end_index = C::BLOCK_SIZE;
-            // Block until there is output to read.
-            while !T::regs().sr().read().ofne() {}
-            // Read block out
-            while index < end_index {
-                let out_word: u32 = T::regs().dout().read();
-                intermediate_data[index..index + 4].copy_from_slice(u32::to_ne_bytes(out_word).as_slice());
-                index += 4;
-            }
+            self.write_bytes_blocking(C::BLOCK_SIZE, &last_block);
+            self.read_bytes_blocking(C::BLOCK_SIZE, &mut intermediate_data);
 
             // Handle the last block depending on mode.
             let output_len = output.len();
@@ -1182,7 +1478,106 @@ impl<'d, T: Instance> Cryp<'d, T> {
             let mut mask: [u8; 16] = [0; 16];
             mask[..last_block_remainder].fill(0xFF);
             ctx.cipher
-                .post_final_block(&T::regs(), ctx.dir, &mut intermediate_data, temp1, mask);
+                .post_final_blocking(&T::regs(), self, ctx.dir, &mut intermediate_data, temp1, mask);
+        }
+
+        ctx.payload_len += input.len() as u64;
+
+        self.store_context(ctx);
+    }
+
+    /// Performs encryption/decryption on the provided context.
+    /// The context determines algorithm, mode, and state of the crypto accelerator.
+    /// When the last piece of data is supplied, `last_block` should be `true`.
+    /// This function panics under various mismatches of parameters.
+    /// Output buffer must be at least as long as the input buffer.
+    /// Data must be a multiple of block size (128-bits for AES, 64-bits for DES) for CBC and ECB modes.
+    /// Padding or ciphertext stealing must be managed by the application for these modes.
+    /// Data must also be a multiple of block size unless `last_block` is `true`.
+    pub async fn payload<'c, C: Cipher<'c> + CipherSized + IVSized>(
+        &mut self,
+        ctx: &mut Context<'c, C>,
+        input: &[u8],
+        output: &mut [u8],
+        last_block: bool,
+    ) where
+        DmaIn: crate::cryp::DmaIn<T>,
+        DmaOut: crate::cryp::DmaOut<T>,
+    {
+        self.load_context(ctx);
+
+        let last_block_remainder = input.len() % C::BLOCK_SIZE;
+
+        // Perform checks for correctness.
+        if !ctx.aad_complete && ctx.header_len > 0 {
+            panic!("Additional associated data must be processed first!");
+        } else if !ctx.aad_complete {
+            #[cfg(any(cryp_v2, cryp_v3))]
+            {
+                ctx.aad_complete = true;
+                T::regs().cr().modify(|w| w.set_crypen(false));
+                T::regs().cr().modify(|w| w.set_gcm_ccmph(2));
+                T::regs().cr().modify(|w| w.fflush());
+                T::regs().cr().modify(|w| w.set_crypen(true));
+            }
+        }
+        if ctx.last_block_processed {
+            panic!("The last block has already been processed!");
+        }
+        if input.len() > output.len() {
+            panic!("Output buffer length must match input length.");
+        }
+        if !last_block {
+            if last_block_remainder != 0 {
+                panic!("Input length must be a multiple of {} bytes.", C::BLOCK_SIZE);
+            }
+        }
+        if C::REQUIRES_PADDING {
+            if last_block_remainder != 0 {
+                panic!("Input must be a multiple of {} bytes in ECB and CBC modes. Consider padding or ciphertext stealing.", C::BLOCK_SIZE);
+            }
+        }
+        if last_block {
+            ctx.last_block_processed = true;
+        }
+
+        // Load data into core, block by block.
+        let num_full_blocks = input.len() / C::BLOCK_SIZE;
+        for block in 0..num_full_blocks {
+            let index = block * C::BLOCK_SIZE;
+            // Read block out
+            let read = Self::read_bytes(
+                &mut self.outdma,
+                C::BLOCK_SIZE,
+                &mut output[index..index + C::BLOCK_SIZE],
+            );
+            // Write block in
+            let write = Self::write_bytes(&mut self.indma, C::BLOCK_SIZE, &input[index..index + C::BLOCK_SIZE]);
+            embassy_futures::join::join(read, write).await;
+        }
+
+        // Handle the final block, which is incomplete.
+        if last_block_remainder > 0 {
+            let padding_len = C::BLOCK_SIZE - last_block_remainder;
+            let temp1 = ctx.cipher.pre_final(&T::regs(), ctx.dir, padding_len);
+
+            let mut intermediate_data: [u8; AES_BLOCK_SIZE] = [0; AES_BLOCK_SIZE];
+            let mut last_block: [u8; AES_BLOCK_SIZE] = [0; AES_BLOCK_SIZE];
+            last_block[..last_block_remainder].copy_from_slice(&input[input.len() - last_block_remainder..input.len()]);
+            let read = Self::read_bytes(&mut self.outdma, C::BLOCK_SIZE, &mut intermediate_data);
+            let write = Self::write_bytes(&mut self.indma, C::BLOCK_SIZE, &last_block);
+            embassy_futures::join::join(read, write).await;
+
+            // Handle the last block depending on mode.
+            let output_len = output.len();
+            output[output_len - last_block_remainder..output_len]
+                .copy_from_slice(&intermediate_data[0..last_block_remainder]);
+
+            let mut mask: [u8; 16] = [0; 16];
+            mask[..last_block_remainder].fill(0xFF);
+            ctx.cipher
+                .post_final(&T::regs(), self, ctx.dir, &mut intermediate_data, temp1, mask)
+                .await;
         }
 
         ctx.payload_len += input.len() as u64;
@@ -1191,8 +1586,8 @@ impl<'d, T: Instance> Cryp<'d, T> {
     }
 
     #[cfg(any(cryp_v2, cryp_v3))]
-    /// This function only needs to be called for GCM, CCM, and GMAC modes to
-    /// generate an authentication tag.
+    /// Generates an authentication tag for authenticated ciphers including GCM, CCM, and GMAC.
+    /// Called after the all data has been encrypted/decrypted by `payload`.
     pub fn finish_blocking<
         'c,
         const TAG_SIZE: usize,
@@ -1213,28 +1608,72 @@ impl<'d, T: Instance> Cryp<'d, T> {
         let payloadlen2: u32 = (ctx.payload_len * 8) as u32;
 
         #[cfg(cryp_v2)]
-        {
-            T::regs().din().write_value(headerlen1.swap_bytes());
-            T::regs().din().write_value(headerlen2.swap_bytes());
-            T::regs().din().write_value(payloadlen1.swap_bytes());
-            T::regs().din().write_value(payloadlen2.swap_bytes());
-        }
-
+        let footer: [u32; 4] = [
+            headerlen1.swap_bytes(),
+            headerlen2.swap_bytes(),
+            payloadlen1.swap_bytes(),
+            payloadlen2.swap_bytes(),
+        ];
         #[cfg(cryp_v3)]
-        {
-            T::regs().din().write_value(headerlen1);
-            T::regs().din().write_value(headerlen2);
-            T::regs().din().write_value(payloadlen1);
-            T::regs().din().write_value(payloadlen2);
-        }
+        let footer: [u32; 4] = [headerlen1, headerlen2, payloadlen1, payloadlen2];
+
+        self.write_words_blocking(C::BLOCK_SIZE, &footer);
 
         while !T::regs().sr().read().ofne() {}
 
         let mut full_tag: [u8; 16] = [0; 16];
-        full_tag[0..4].copy_from_slice(T::regs().dout().read().to_ne_bytes().as_slice());
-        full_tag[4..8].copy_from_slice(T::regs().dout().read().to_ne_bytes().as_slice());
-        full_tag[8..12].copy_from_slice(T::regs().dout().read().to_ne_bytes().as_slice());
-        full_tag[12..16].copy_from_slice(T::regs().dout().read().to_ne_bytes().as_slice());
+        self.read_bytes_blocking(C::BLOCK_SIZE, &mut full_tag);
+        let mut tag: [u8; TAG_SIZE] = [0; TAG_SIZE];
+        tag.copy_from_slice(&full_tag[0..TAG_SIZE]);
+
+        T::regs().cr().modify(|w| w.set_crypen(false));
+
+        tag
+    }
+
+    #[cfg(any(cryp_v2, cryp_v3))]
+    // Generates an authentication tag for authenticated ciphers including GCM, CCM, and GMAC.
+    /// Called after the all data has been encrypted/decrypted by `payload`.
+    pub async fn finish<
+        'c,
+        const TAG_SIZE: usize,
+        C: Cipher<'c> + CipherSized + IVSized + CipherAuthenticated<TAG_SIZE>,
+    >(
+        &mut self,
+        mut ctx: Context<'c, C>,
+    ) -> [u8; TAG_SIZE]
+    where
+        DmaIn: crate::cryp::DmaIn<T>,
+        DmaOut: crate::cryp::DmaOut<T>,
+    {
+        self.load_context(&mut ctx);
+
+        T::regs().cr().modify(|w| w.set_crypen(false));
+        T::regs().cr().modify(|w| w.set_gcm_ccmph(3));
+        T::regs().cr().modify(|w| w.set_crypen(true));
+
+        let headerlen1: u32 = ((ctx.header_len * 8) >> 32) as u32;
+        let headerlen2: u32 = (ctx.header_len * 8) as u32;
+        let payloadlen1: u32 = ((ctx.payload_len * 8) >> 32) as u32;
+        let payloadlen2: u32 = (ctx.payload_len * 8) as u32;
+
+        #[cfg(cryp_v2)]
+        let footer: [u32; 4] = [
+            headerlen1.swap_bytes(),
+            headerlen2.swap_bytes(),
+            payloadlen1.swap_bytes(),
+            payloadlen2.swap_bytes(),
+        ];
+        #[cfg(cryp_v3)]
+        let footer: [u32; 4] = [headerlen1, headerlen2, payloadlen1, payloadlen2];
+
+        let write = Self::write_words(&mut self.indma, C::BLOCK_SIZE, &footer);
+
+        let mut full_tag: [u8; 16] = [0; 16];
+        let read = Self::read_bytes(&mut self.outdma, C::BLOCK_SIZE, &mut full_tag);
+
+        embassy_futures::join::join(read, write).await;
+
         let mut tag: [u8; TAG_SIZE] = [0; TAG_SIZE];
         tag.copy_from_slice(&full_tag[0..TAG_SIZE]);
 
@@ -1325,18 +1764,134 @@ impl<'d, T: Instance> Cryp<'d, T> {
         // Enable crypto processor.
         T::regs().cr().modify(|w| w.set_crypen(true));
     }
-}
 
-pub(crate) mod sealed {
-    use super::*;
+    fn write_bytes_blocking(&self, block_size: usize, blocks: &[u8]) {
+        // Ensure input is a multiple of block size.
+        assert_eq!(blocks.len() % block_size, 0);
+        let mut index = 0;
+        let end_index = blocks.len();
+        while index < end_index {
+            let mut in_word: [u8; 4] = [0; 4];
+            in_word.copy_from_slice(&blocks[index..index + 4]);
+            T::regs().din().write_value(u32::from_ne_bytes(in_word));
+            index += 4;
+            if index % block_size == 0 {
+                // Block until input FIFO is empty.
+                while !T::regs().sr().read().ifem() {}
+            }
+        }
+    }
 
-    pub trait Instance {
-        fn regs() -> pac::cryp::Cryp;
+    async fn write_bytes(dma: &mut PeripheralRef<'_, DmaIn>, block_size: usize, blocks: &[u8])
+    where
+        DmaIn: crate::cryp::DmaIn<T>,
+    {
+        if blocks.len() == 0 {
+            return;
+        }
+        // Ensure input is a multiple of block size.
+        assert_eq!(blocks.len() % block_size, 0);
+        // Configure DMA to transfer input to crypto core.
+        let dma_request = dma.request();
+        let dst_ptr = T::regs().din().as_ptr();
+        let num_words = blocks.len() / 4;
+        let src_ptr = ptr::slice_from_raw_parts(blocks.as_ptr().cast(), num_words);
+        let options = TransferOptions {
+            priority: Priority::High,
+            ..Default::default()
+        };
+        let dma_transfer = unsafe { Transfer::new_write_raw(dma, dma_request, src_ptr, dst_ptr, options) };
+        T::regs().dmacr().modify(|w| w.set_dien(true));
+        // Wait for the transfer to complete.
+        dma_transfer.await;
+    }
+
+    #[cfg(any(cryp_v2, cryp_v3))]
+    fn write_words_blocking(&self, block_size: usize, blocks: &[u32]) {
+        assert_eq!((blocks.len() * 4) % block_size, 0);
+        let mut byte_counter: usize = 0;
+        for word in blocks {
+            T::regs().din().write_value(*word);
+            byte_counter += 4;
+            if byte_counter % block_size == 0 {
+                // Block until input FIFO is empty.
+                while !T::regs().sr().read().ifem() {}
+            }
+        }
+    }
+
+    #[cfg(any(cryp_v2, cryp_v3))]
+    async fn write_words(dma: &mut PeripheralRef<'_, DmaIn>, block_size: usize, blocks: &[u32])
+    where
+        DmaIn: crate::cryp::DmaIn<T>,
+    {
+        if blocks.len() == 0 {
+            return;
+        }
+        // Ensure input is a multiple of block size.
+        assert_eq!((blocks.len() * 4) % block_size, 0);
+        // Configure DMA to transfer input to crypto core.
+        let dma_request = dma.request();
+        let dst_ptr = T::regs().din().as_ptr();
+        let num_words = blocks.len();
+        let src_ptr = ptr::slice_from_raw_parts(blocks.as_ptr().cast(), num_words);
+        let options = TransferOptions {
+            priority: Priority::High,
+            ..Default::default()
+        };
+        let dma_transfer = unsafe { Transfer::new_write_raw(dma, dma_request, src_ptr, dst_ptr, options) };
+        T::regs().dmacr().modify(|w| w.set_dien(true));
+        // Wait for the transfer to complete.
+        dma_transfer.await;
+    }
+
+    fn read_bytes_blocking(&self, block_size: usize, blocks: &mut [u8]) {
+        // Block until there is output to read.
+        while !T::regs().sr().read().ofne() {}
+        // Ensure input is a multiple of block size.
+        assert_eq!(blocks.len() % block_size, 0);
+        // Read block out
+        let mut index = 0;
+        let end_index = blocks.len();
+        while index < end_index {
+            let out_word: u32 = T::regs().dout().read();
+            blocks[index..index + 4].copy_from_slice(u32::to_ne_bytes(out_word).as_slice());
+            index += 4;
+        }
+    }
+
+    async fn read_bytes(dma: &mut PeripheralRef<'_, DmaOut>, block_size: usize, blocks: &mut [u8])
+    where
+        DmaOut: crate::cryp::DmaOut<T>,
+    {
+        if blocks.len() == 0 {
+            return;
+        }
+        // Ensure input is a multiple of block size.
+        assert_eq!(blocks.len() % block_size, 0);
+        // Configure DMA to get output from crypto core.
+        let dma_request = dma.request();
+        let src_ptr = T::regs().dout().as_ptr();
+        let num_words = blocks.len() / 4;
+        let dst_ptr = ptr::slice_from_raw_parts_mut(blocks.as_mut_ptr().cast(), num_words);
+        let options = TransferOptions {
+            priority: Priority::VeryHigh,
+            ..Default::default()
+        };
+        let dma_transfer = unsafe { Transfer::new_read_raw(dma, dma_request, src_ptr, dst_ptr, options) };
+        T::regs().dmacr().modify(|w| w.set_doen(true));
+        // Wait for the transfer to complete.
+        dma_transfer.await;
     }
 }
 
+trait SealedInstance {
+    fn regs() -> pac::cryp::Cryp;
+}
+
 /// CRYP instance trait.
-pub trait Instance: sealed::Instance + Peripheral<P = Self> + crate::rcc::RccPeripheral + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + Peripheral<P = Self> + crate::rcc::RccPeripheral + 'static + Send {
     /// Interrupt for this CRYP instance.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
@@ -1347,10 +1902,13 @@ foreach_interrupt!(
             type Interrupt = crate::interrupt::typelevel::$irq;
         }
 
-        impl sealed::Instance for peripherals::$inst {
+        impl SealedInstance for peripherals::$inst {
             fn regs() -> crate::pac::cryp::Cryp {
                 crate::pac::$inst
             }
         }
     };
 );
+
+dma_trait!(DmaIn, Instance);
+dma_trait!(DmaOut, Instance);
diff --git a/embassy-stm32/src/dac/mod.rs b/embassy-stm32/src/dac/mod.rs
index 60f9404c2..acfed8356 100644
--- a/embassy-stm32/src/dac/mod.rs
+++ b/embassy-stm32/src/dac/mod.rs
@@ -127,7 +127,7 @@ impl<'d, T: Instance, const N: u8, DMA> DacChannel<'d, T, N, DMA> {
     pub fn new(
         _peri: impl Peripheral<P = T> + 'd,
         dma: impl Peripheral<P = DMA> + 'd,
-        pin: impl Peripheral<P = impl DacPin<T, N> + crate::gpio::sealed::Pin> + 'd,
+        pin: impl Peripheral<P = impl DacPin<T, N> + crate::gpio::Pin> + 'd,
     ) -> Self {
         into_ref!(dma, pin);
         pin.set_as_analog();
@@ -392,8 +392,8 @@ impl<'d, T: Instance, DMACh1, DMACh2> Dac<'d, T, DMACh1, DMACh2> {
         _peri: impl Peripheral<P = T> + 'd,
         dma_ch1: impl Peripheral<P = DMACh1> + 'd,
         dma_ch2: impl Peripheral<P = DMACh2> + 'd,
-        pin_ch1: impl Peripheral<P = impl DacPin<T, 1> + crate::gpio::sealed::Pin> + 'd,
-        pin_ch2: impl Peripheral<P = impl DacPin<T, 2> + crate::gpio::sealed::Pin> + 'd,
+        pin_ch1: impl Peripheral<P = impl DacPin<T, 1> + crate::gpio::Pin> + 'd,
+        pin_ch2: impl Peripheral<P = impl DacPin<T, 2> + crate::gpio::Pin> + 'd,
     ) -> Self {
         into_ref!(dma_ch1, dma_ch2, pin_ch1, pin_ch2);
         pin_ch1.set_as_analog();
@@ -488,14 +488,13 @@ impl<'d, T: Instance, DMACh1, DMACh2> Dac<'d, T, DMACh1, DMACh2> {
     }
 }
 
-pub(crate) mod sealed {
-    pub trait Instance {
-        fn regs() -> &'static crate::pac::dac::Dac;
-    }
+trait SealedInstance {
+    fn regs() -> &'static crate::pac::dac::Dac;
 }
 
 /// DAC instance.
-pub trait Instance: sealed::Instance + RccPeripheral + 'static {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + RccPeripheral + 'static {}
 dma_trait!(DacDma1, Instance);
 dma_trait!(DacDma2, Instance);
 
@@ -504,7 +503,7 @@ pub trait DacPin<T: Instance, const C: u8>: crate::gpio::Pin + 'static {}
 
 foreach_peripheral!(
     (dac, $inst:ident) => {
-        impl crate::dac::sealed::Instance for peripherals::$inst {
+        impl crate::dac::SealedInstance for peripherals::$inst {
             fn regs() -> &'static crate::pac::dac::Dac {
                 &crate::pac::$inst
             }
diff --git a/embassy-stm32/src/dcmi.rs b/embassy-stm32/src/dcmi.rs
index 826b04a4b..646ee2ce2 100644
--- a/embassy-stm32/src/dcmi.rs
+++ b/embassy-stm32/src/dcmi.rs
@@ -7,8 +7,7 @@ use embassy_hal_internal::{into_ref, PeripheralRef};
 use embassy_sync::waitqueue::AtomicWaker;
 
 use crate::dma::Transfer;
-use crate::gpio::sealed::AFType;
-use crate::gpio::Speed;
+use crate::gpio::{AFType, Speed};
 use crate::interrupt::typelevel::Interrupt;
 use crate::{interrupt, Peripheral};
 
@@ -431,14 +430,13 @@ where
     }
 }
 
-mod sealed {
-    pub trait Instance: crate::rcc::RccPeripheral {
-        fn regs(&self) -> crate::pac::dcmi::Dcmi;
-    }
+trait SealedInstance: crate::rcc::RccPeripheral {
+    fn regs(&self) -> crate::pac::dcmi::Dcmi;
 }
 
 /// DCMI instance.
-pub trait Instance: sealed::Instance + 'static {
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + 'static {
     /// Interrupt for this instance.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
@@ -465,7 +463,7 @@ pin_trait!(PixClkPin, Instance);
 #[allow(unused)]
 macro_rules! impl_peripheral {
     ($inst:ident, $irq:ident) => {
-        impl sealed::Instance for crate::peripherals::$inst {
+        impl SealedInstance for crate::peripherals::$inst {
             fn regs(&self) -> crate::pac::dcmi::Dcmi {
                 crate::pac::$inst
             }
diff --git a/embassy-stm32/src/dma/dma_bdma.rs b/embassy-stm32/src/dma/dma_bdma.rs
index 08aba2795..a6344cf06 100644
--- a/embassy-stm32/src/dma/dma_bdma.rs
+++ b/embassy-stm32/src/dma/dma_bdma.rs
@@ -1,4 +1,4 @@
-use core::future::Future;
+use core::future::{poll_fn, Future};
 use core::pin::Pin;
 use core::sync::atomic::{fence, AtomicUsize, Ordering};
 use core::task::{Context, Poll, Waker};
@@ -10,8 +10,7 @@ use super::ringbuffer::{DmaCtrl, OverrunError, ReadableDmaRingBuffer, WritableDm
 use super::word::{Word, WordSize};
 use super::{AnyChannel, Channel, Dir, Request, STATE};
 use crate::interrupt::typelevel::Interrupt;
-use crate::interrupt::Priority;
-use crate::pac;
+use crate::{interrupt, pac};
 
 pub(crate) struct ChannelInfo {
     pub(crate) dma: DmaInfo,
@@ -45,6 +44,8 @@ pub struct TransferOptions {
     /// FIFO threshold for DMA FIFO mode. If none, direct mode is used.
     #[cfg(dma)]
     pub fifo_threshold: Option<FifoThreshold>,
+    /// Request priority level
+    pub priority: Priority,
     /// Enable circular DMA
     ///
     /// Note:
@@ -68,6 +69,7 @@ impl Default for TransferOptions {
             flow_ctrl: FlowControl::Dma,
             #[cfg(dma)]
             fifo_threshold: None,
+            priority: Priority::VeryHigh,
             circular: false,
             half_transfer_ir: false,
             complete_transfer_ir: true,
@@ -75,6 +77,44 @@ impl Default for TransferOptions {
     }
 }
 
+/// DMA request priority
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum Priority {
+    /// Low Priority
+    Low,
+    /// Medium Priority
+    Medium,
+    /// High Priority
+    High,
+    /// Very High Priority
+    VeryHigh,
+}
+
+#[cfg(dma)]
+impl From<Priority> for pac::dma::vals::Pl {
+    fn from(value: Priority) -> Self {
+        match value {
+            Priority::Low => pac::dma::vals::Pl::LOW,
+            Priority::Medium => pac::dma::vals::Pl::MEDIUM,
+            Priority::High => pac::dma::vals::Pl::HIGH,
+            Priority::VeryHigh => pac::dma::vals::Pl::VERYHIGH,
+        }
+    }
+}
+
+#[cfg(bdma)]
+impl From<Priority> for pac::bdma::vals::Pl {
+    fn from(value: Priority) -> Self {
+        match value {
+            Priority::Low => pac::bdma::vals::Pl::LOW,
+            Priority::Medium => pac::bdma::vals::Pl::MEDIUM,
+            Priority::High => pac::bdma::vals::Pl::HIGH,
+            Priority::VeryHigh => pac::bdma::vals::Pl::VERYHIGH,
+        }
+    }
+}
+
 #[cfg(dma)]
 pub use dma_only::*;
 #[cfg(dma)]
@@ -213,8 +253,8 @@ impl ChannelState {
 /// safety: must be called only once
 pub(crate) unsafe fn init(
     cs: critical_section::CriticalSection,
-    #[cfg(dma)] dma_priority: Priority,
-    #[cfg(bdma)] bdma_priority: Priority,
+    #[cfg(dma)] dma_priority: interrupt::Priority,
+    #[cfg(bdma)] bdma_priority: interrupt::Priority,
 ) {
     foreach_interrupt! {
         ($peri:ident, dma, $block:ident, $signal_name:ident, $irq:ident) => {
@@ -334,7 +374,7 @@ impl AnyChannel {
                     w.set_dir(dir.into());
                     w.set_msize(data_size.into());
                     w.set_psize(data_size.into());
-                    w.set_pl(pac::dma::vals::Pl::VERYHIGH);
+                    w.set_pl(options.priority.into());
                     w.set_minc(incr_mem);
                     w.set_pinc(false);
                     w.set_teie(true);
@@ -374,7 +414,7 @@ impl AnyChannel {
                     w.set_tcie(options.complete_transfer_ir);
                     w.set_htie(options.half_transfer_ir);
                     w.set_circ(options.circular);
-                    w.set_pl(pac::bdma::vals::Pl::VERYHIGH);
+                    w.set_pl(options.priority.into());
                     w.set_en(false); // don't start yet
                 });
             }
@@ -470,6 +510,31 @@ impl AnyChannel {
             DmaInfo::Bdma(r) => r.ch(info.num).ndtr().read().ndt(),
         }
     }
+
+    fn disable_circular_mode(&self) {
+        let info = self.info();
+        match self.info().dma {
+            #[cfg(dma)]
+            DmaInfo::Dma(regs) => regs.st(info.num).cr().modify(|w| {
+                w.set_circ(false);
+            }),
+            #[cfg(bdma)]
+            DmaInfo::Bdma(regs) => regs.ch(info.num).cr().modify(|w| {
+                w.set_circ(false);
+            }),
+        }
+    }
+
+    fn poll_stop(&self) -> Poll<()> {
+        use core::sync::atomic::compiler_fence;
+        compiler_fence(Ordering::SeqCst);
+
+        if !self.is_running() {
+            Poll::Ready(())
+        } else {
+            Poll::Pending
+        }
+    }
 }
 
 /// DMA transfer.
@@ -789,6 +854,25 @@ impl<'a, W: Word> ReadableRingBuffer<'a, W> {
     pub fn is_running(&mut self) -> bool {
         self.channel.is_running()
     }
+
+    /// Stop the DMA transfer and await until the buffer is full.
+    ///
+    /// This disables the DMA transfer's circular mode so that the transfer
+    /// stops when the buffer is full.
+    ///
+    /// This is designed to be used with streaming input data such as the
+    /// I2S/SAI or ADC.
+    ///
+    /// When using the UART, you probably want `request_stop()`.
+    pub async fn stop(&mut self) {
+        self.channel.disable_circular_mode();
+        //wait until cr.susp reads as true
+        poll_fn(|cx| {
+            self.set_waker(cx.waker());
+            self.channel.poll_stop()
+        })
+        .await
+    }
 }
 
 impl<'a, W: Word> Drop for ReadableRingBuffer<'a, W> {
@@ -900,6 +984,23 @@ impl<'a, W: Word> WritableRingBuffer<'a, W> {
     pub fn is_running(&mut self) -> bool {
         self.channel.is_running()
     }
+
+    /// Stop the DMA transfer and await until the buffer is empty.
+    ///
+    /// This disables the DMA transfer's circular mode so that the transfer
+    /// stops when all available data has been written.
+    ///
+    /// This is designed to be used with streaming output data such as the
+    /// I2S/SAI or DAC.
+    pub async fn stop(&mut self) {
+        self.channel.disable_circular_mode();
+        //wait until cr.susp reads as true
+        poll_fn(|cx| {
+            self.set_waker(cx.waker());
+            self.channel.poll_stop()
+        })
+        .await
+    }
 }
 
 impl<'a, W: Word> Drop for WritableRingBuffer<'a, W> {
diff --git a/embassy-stm32/src/dma/dmamux.rs b/embassy-stm32/src/dma/dmamux.rs
index 1e9ab5944..dc7cd3a66 100644
--- a/embassy-stm32/src/dma/dmamux.rs
+++ b/embassy-stm32/src/dma/dmamux.rs
@@ -19,9 +19,7 @@ pub(crate) fn configure_dmamux(info: &DmamuxInfo, request: u8) {
     });
 }
 
-pub(crate) mod dmamux_sealed {
-    pub trait MuxChannel {}
-}
+pub(crate) trait SealedMuxChannel {}
 
 /// DMAMUX1 instance.
 pub struct DMAMUX1;
@@ -30,14 +28,15 @@ pub struct DMAMUX1;
 pub struct DMAMUX2;
 
 /// DMAMUX channel trait.
-pub trait MuxChannel: dmamux_sealed::MuxChannel {
+#[allow(private_bounds)]
+pub trait MuxChannel: SealedMuxChannel {
     /// DMAMUX instance this channel is on.
     type Mux;
 }
 
 macro_rules! dmamux_channel_impl {
     ($channel_peri:ident, $dmamux:ident) => {
-        impl crate::dma::dmamux_sealed::MuxChannel for crate::peripherals::$channel_peri {}
+        impl crate::dma::SealedMuxChannel for crate::peripherals::$channel_peri {}
         impl crate::dma::MuxChannel for crate::peripherals::$channel_peri {
             type Mux = crate::dma::$dmamux;
         }
diff --git a/embassy-stm32/src/dma/mod.rs b/embassy-stm32/src/dma/mod.rs
index 960483f34..7e3681469 100644
--- a/embassy-stm32/src/dma/mod.rs
+++ b/embassy-stm32/src/dma/mod.rs
@@ -23,7 +23,7 @@ use core::mem;
 
 use embassy_hal_internal::{impl_peripheral, Peripheral};
 
-use crate::interrupt::Priority;
+use crate::interrupt;
 
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 #[cfg_attr(feature = "defmt", derive(defmt::Format))]
@@ -39,17 +39,18 @@ pub type Request = u8;
 #[cfg(not(any(dma_v2, bdma_v2, gpdma, dmamux)))]
 pub type Request = ();
 
-pub(crate) mod sealed {
-    pub trait Channel {
-        fn id(&self) -> u8;
-    }
-    pub trait ChannelInterrupt {
-        unsafe fn on_irq();
-    }
+pub(crate) trait SealedChannel {
+    fn id(&self) -> u8;
+}
+
+pub(crate) trait ChannelInterrupt {
+    #[cfg_attr(not(feature = "rt"), allow(unused))]
+    unsafe fn on_irq();
 }
 
 /// DMA channel.
-pub trait Channel: sealed::Channel + Peripheral<P = Self> + Into<AnyChannel> + 'static {
+#[allow(private_bounds)]
+pub trait Channel: SealedChannel + Peripheral<P = Self> + Into<AnyChannel> + 'static {
     /// Type-erase (degrade) this pin into an `AnyChannel`.
     ///
     /// This converts DMA channel singletons (`DMA1_CH3`, `DMA2_CH1`, ...), which
@@ -63,12 +64,12 @@ pub trait Channel: sealed::Channel + Peripheral<P = Self> + Into<AnyChannel> + '
 
 macro_rules! dma_channel_impl {
     ($channel_peri:ident, $index:expr) => {
-        impl crate::dma::sealed::Channel for crate::peripherals::$channel_peri {
+        impl crate::dma::SealedChannel for crate::peripherals::$channel_peri {
             fn id(&self) -> u8 {
                 $index
             }
         }
-        impl crate::dma::sealed::ChannelInterrupt for crate::peripherals::$channel_peri {
+        impl crate::dma::ChannelInterrupt for crate::peripherals::$channel_peri {
             unsafe fn on_irq() {
                 crate::dma::AnyChannel { id: $index }.on_irq();
             }
@@ -96,7 +97,7 @@ impl AnyChannel {
     }
 }
 
-impl sealed::Channel for AnyChannel {
+impl SealedChannel for AnyChannel {
     fn id(&self) -> u8 {
         self.id
     }
@@ -131,9 +132,9 @@ pub(crate) fn slice_ptr_parts_mut<T>(slice: *mut [T]) -> (usize, usize) {
 // safety: must be called only once at startup
 pub(crate) unsafe fn init(
     cs: critical_section::CriticalSection,
-    #[cfg(bdma)] bdma_priority: Priority,
-    #[cfg(dma)] dma_priority: Priority,
-    #[cfg(gpdma)] gpdma_priority: Priority,
+    #[cfg(bdma)] bdma_priority: interrupt::Priority,
+    #[cfg(dma)] dma_priority: interrupt::Priority,
+    #[cfg(gpdma)] gpdma_priority: interrupt::Priority,
 ) {
     #[cfg(any(dma, bdma))]
     dma_bdma::init(
diff --git a/embassy-stm32/src/dma/word.rs b/embassy-stm32/src/dma/word.rs
index a72c4b7d9..fb1bde860 100644
--- a/embassy-stm32/src/dma/word.rs
+++ b/embassy-stm32/src/dma/word.rs
@@ -20,14 +20,13 @@ impl WordSize {
     }
 }
 
-mod sealed {
-    pub trait Word {}
-}
+trait SealedWord {}
 
 /// DMA word trait.
 ///
 /// This is implemented for u8, u16, u32, etc.
-pub trait Word: sealed::Word + Default + Copy + 'static {
+#[allow(private_bounds)]
+pub trait Word: SealedWord + Default + Copy + 'static {
     /// Word size
     fn size() -> WordSize;
     /// Amount of bits of this word size.
@@ -36,7 +35,7 @@ pub trait Word: sealed::Word + Default + Copy + 'static {
 
 macro_rules! impl_word {
     (_, $T:ident, $bits:literal, $size:ident) => {
-        impl sealed::Word for $T {}
+        impl SealedWord for $T {}
         impl Word for $T {
             fn bits() -> usize {
                 $bits
diff --git a/embassy-stm32/src/eth/mod.rs b/embassy-stm32/src/eth/mod.rs
index 71fe09c3f..bfe8a60d6 100644
--- a/embassy-stm32/src/eth/mod.rs
+++ b/embassy-stm32/src/eth/mod.rs
@@ -177,16 +177,15 @@ pub unsafe trait PHY {
     fn poll_link<S: StationManagement>(&mut self, sm: &mut S, cx: &mut Context) -> bool;
 }
 
-pub(crate) mod sealed {
-    pub trait Instance {
-        fn regs() -> crate::pac::eth::Eth;
-    }
+trait SealedInstance {
+    fn regs() -> crate::pac::eth::Eth;
 }
 
 /// Ethernet instance.
-pub trait Instance: sealed::Instance + RccPeripheral + Send + 'static {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + RccPeripheral + Send + 'static {}
 
-impl sealed::Instance for crate::peripherals::ETH {
+impl SealedInstance for crate::peripherals::ETH {
     fn regs() -> crate::pac::eth::Eth {
         crate::pac::ETH
     }
diff --git a/embassy-stm32/src/eth/v1/mod.rs b/embassy-stm32/src/eth/v1/mod.rs
index e5b7b0452..6f0174def 100644
--- a/embassy-stm32/src/eth/v1/mod.rs
+++ b/embassy-stm32/src/eth/v1/mod.rs
@@ -12,15 +12,14 @@ use stm32_metapac::eth::vals::{Apcs, Cr, Dm, DmaomrSr, Fes, Ftf, Ifg, MbProgress
 pub(crate) use self::rx_desc::{RDes, RDesRing};
 pub(crate) use self::tx_desc::{TDes, TDesRing};
 use super::*;
-use crate::gpio::sealed::{AFType, Pin as __GpioPin};
-use crate::gpio::AnyPin;
+use crate::gpio::{AFType, AnyPin, SealedPin};
 use crate::interrupt::InterruptExt;
 #[cfg(eth_v1a)]
 use crate::pac::AFIO;
 #[cfg(any(eth_v1b, eth_v1c))]
 use crate::pac::SYSCFG;
 use crate::pac::{ETH, RCC};
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::SealedRccPeripheral;
 use crate::{interrupt, Peripheral};
 
 /// Interrupt handler.
@@ -149,8 +148,8 @@ impl<'d, T: Instance, P: PHY> Ethernet<'d, T, P> {
         #[cfg(any(eth_v1b, eth_v1c))]
         config_pins!(ref_clk, mdio, mdc, crs, rx_d0, rx_d1, tx_d0, tx_d1, tx_en);
 
-        let dma = ETH.ethernet_dma();
-        let mac = ETH.ethernet_mac();
+        let dma = T::regs().ethernet_dma();
+        let mac = T::regs().ethernet_mac();
 
         // Reset and wait
         dma.dmabmr().modify(|w| w.set_sr(true));
@@ -192,7 +191,7 @@ impl<'d, T: Instance, P: PHY> Ethernet<'d, T, P> {
 
         // TODO MTU size setting not found for v1 ethernet, check if correct
 
-        let hclk = <T as RccPeripheral>::frequency();
+        let hclk = <T as SealedRccPeripheral>::frequency();
         let hclk_mhz = hclk.0 / 1_000_000;
 
         // Set the MDC clock frequency in the range 1MHz - 2.5MHz
@@ -235,8 +234,8 @@ impl<'d, T: Instance, P: PHY> Ethernet<'d, T, P> {
 
         fence(Ordering::SeqCst);
 
-        let mac = ETH.ethernet_mac();
-        let dma = ETH.ethernet_dma();
+        let mac = T::regs().ethernet_mac();
+        let dma = T::regs().ethernet_dma();
 
         mac.maccr().modify(|w| {
             w.set_re(true);
@@ -275,7 +274,7 @@ pub struct EthernetStationManagement<T: Instance> {
 
 unsafe impl<T: Instance> StationManagement for EthernetStationManagement<T> {
     fn smi_read(&mut self, phy_addr: u8, reg: u8) -> u16 {
-        let mac = ETH.ethernet_mac();
+        let mac = T::regs().ethernet_mac();
 
         mac.macmiiar().modify(|w| {
             w.set_pa(phy_addr);
@@ -289,7 +288,7 @@ unsafe impl<T: Instance> StationManagement for EthernetStationManagement<T> {
     }
 
     fn smi_write(&mut self, phy_addr: u8, reg: u8, val: u16) {
-        let mac = ETH.ethernet_mac();
+        let mac = T::regs().ethernet_mac();
 
         mac.macmiidr().write(|w| w.set_md(val));
         mac.macmiiar().modify(|w| {
@@ -305,8 +304,8 @@ unsafe impl<T: Instance> StationManagement for EthernetStationManagement<T> {
 
 impl<'d, T: Instance, P: PHY> Drop for Ethernet<'d, T, P> {
     fn drop(&mut self) {
-        let dma = ETH.ethernet_dma();
-        let mac = ETH.ethernet_mac();
+        let dma = T::regs().ethernet_dma();
+        let mac = T::regs().ethernet_mac();
 
         // Disable the TX DMA and wait for any previous transmissions to be completed
         dma.dmaomr().modify(|w| w.set_st(St::STOPPED));
diff --git a/embassy-stm32/src/eth/v2/mod.rs b/embassy-stm32/src/eth/v2/mod.rs
index 8d69561d4..c6e015022 100644
--- a/embassy-stm32/src/eth/v2/mod.rs
+++ b/embassy-stm32/src/eth/v2/mod.rs
@@ -7,11 +7,10 @@ use embassy_hal_internal::{into_ref, PeripheralRef};
 
 pub(crate) use self::descriptors::{RDes, RDesRing, TDes, TDesRing};
 use super::*;
-use crate::gpio::sealed::{AFType, Pin as _};
-use crate::gpio::{AnyPin, Speed};
+use crate::gpio::{AFType, AnyPin, SealedPin as _, Speed};
 use crate::interrupt::InterruptExt;
 use crate::pac::ETH;
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::SealedRccPeripheral;
 use crate::{interrupt, Peripheral};
 
 /// Interrupt handler.
@@ -207,9 +206,9 @@ impl<'d, T: Instance, P: PHY> Ethernet<'d, T, P> {
         phy: P,
         mac_addr: [u8; 6],
     ) -> Self {
-        let dma = ETH.ethernet_dma();
-        let mac = ETH.ethernet_mac();
-        let mtl = ETH.ethernet_mtl();
+        let dma = T::regs().ethernet_dma();
+        let mac = T::regs().ethernet_mac();
+        let mtl = T::regs().ethernet_mtl();
 
         // Reset and wait
         dma.dmamr().modify(|w| w.set_swr(true));
@@ -265,7 +264,7 @@ impl<'d, T: Instance, P: PHY> Ethernet<'d, T, P> {
             w.set_rbsz(RX_BUFFER_SIZE as u16);
         });
 
-        let hclk = <T as RccPeripheral>::frequency();
+        let hclk = <T as SealedRccPeripheral>::frequency();
         let hclk_mhz = hclk.0 / 1_000_000;
 
         // Set the MDC clock frequency in the range 1MHz - 2.5MHz
@@ -296,9 +295,9 @@ impl<'d, T: Instance, P: PHY> Ethernet<'d, T, P> {
 
         fence(Ordering::SeqCst);
 
-        let mac = ETH.ethernet_mac();
-        let mtl = ETH.ethernet_mtl();
-        let dma = ETH.ethernet_dma();
+        let mac = T::regs().ethernet_mac();
+        let mtl = T::regs().ethernet_mtl();
+        let dma = T::regs().ethernet_dma();
 
         mac.maccr().modify(|w| {
             w.set_re(true);
@@ -334,7 +333,7 @@ pub struct EthernetStationManagement<T: Instance> {
 
 unsafe impl<T: Instance> StationManagement for EthernetStationManagement<T> {
     fn smi_read(&mut self, phy_addr: u8, reg: u8) -> u16 {
-        let mac = ETH.ethernet_mac();
+        let mac = T::regs().ethernet_mac();
 
         mac.macmdioar().modify(|w| {
             w.set_pa(phy_addr);
@@ -348,7 +347,7 @@ unsafe impl<T: Instance> StationManagement for EthernetStationManagement<T> {
     }
 
     fn smi_write(&mut self, phy_addr: u8, reg: u8, val: u16) {
-        let mac = ETH.ethernet_mac();
+        let mac = T::regs().ethernet_mac();
 
         mac.macmdiodr().write(|w| w.set_md(val));
         mac.macmdioar().modify(|w| {
@@ -364,9 +363,9 @@ unsafe impl<T: Instance> StationManagement for EthernetStationManagement<T> {
 
 impl<'d, T: Instance, P: PHY> Drop for Ethernet<'d, T, P> {
     fn drop(&mut self) {
-        let dma = ETH.ethernet_dma();
-        let mac = ETH.ethernet_mac();
-        let mtl = ETH.ethernet_mtl();
+        let dma = T::regs().ethernet_dma();
+        let mac = T::regs().ethernet_mac();
+        let mtl = T::regs().ethernet_mtl();
 
         // Disable the TX DMA and wait for any previous transmissions to be completed
         dma.dmactx_cr().modify(|w| w.set_st(false));
diff --git a/embassy-stm32/src/exti.rs b/embassy-stm32/src/exti.rs
index bd10ba158..8d5dae436 100644
--- a/embassy-stm32/src/exti.rs
+++ b/embassy-stm32/src/exti.rs
@@ -330,12 +330,11 @@ macro_rules! impl_irq {
 
 foreach_exti_irq!(impl_irq);
 
-pub(crate) mod sealed {
-    pub trait Channel {}
-}
+trait SealedChannel {}
 
 /// EXTI channel trait.
-pub trait Channel: sealed::Channel + Sized {
+#[allow(private_bounds)]
+pub trait Channel: SealedChannel + Sized {
     /// Get the EXTI channel number.
     fn number(&self) -> u8;
 
@@ -359,7 +358,7 @@ pub struct AnyChannel {
 }
 
 impl_peripheral!(AnyChannel);
-impl sealed::Channel for AnyChannel {}
+impl SealedChannel for AnyChannel {}
 impl Channel for AnyChannel {
     fn number(&self) -> u8 {
         self.number
@@ -368,7 +367,7 @@ impl Channel for AnyChannel {
 
 macro_rules! impl_exti {
     ($type:ident, $number:expr) => {
-        impl sealed::Channel for peripherals::$type {}
+        impl SealedChannel for peripherals::$type {}
         impl Channel for peripherals::$type {
             fn number(&self) -> u8 {
                 $number
diff --git a/embassy-stm32/src/flash/f0.rs b/embassy-stm32/src/flash/f0.rs
index e0c76e6b2..e2f135208 100644
--- a/embassy-stm32/src/flash/f0.rs
+++ b/embassy-stm32/src/flash/f0.rs
@@ -1,4 +1,3 @@
-use core::convert::TryInto;
 use core::ptr::write_volatile;
 use core::sync::atomic::{fence, Ordering};
 
diff --git a/embassy-stm32/src/flash/f1f3.rs b/embassy-stm32/src/flash/f1f3.rs
index e7790369a..b16354a74 100644
--- a/embassy-stm32/src/flash/f1f3.rs
+++ b/embassy-stm32/src/flash/f1f3.rs
@@ -1,4 +1,3 @@
-use core::convert::TryInto;
 use core::ptr::write_volatile;
 use core::sync::atomic::{fence, Ordering};
 
diff --git a/embassy-stm32/src/flash/f4.rs b/embassy-stm32/src/flash/f4.rs
index 57447bea5..00e61f2d2 100644
--- a/embassy-stm32/src/flash/f4.rs
+++ b/embassy-stm32/src/flash/f4.rs
@@ -1,4 +1,3 @@
-use core::convert::TryInto;
 use core::ptr::write_volatile;
 use core::sync::atomic::{fence, AtomicBool, Ordering};
 
diff --git a/embassy-stm32/src/flash/f7.rs b/embassy-stm32/src/flash/f7.rs
index 0f512bbc4..72de0b445 100644
--- a/embassy-stm32/src/flash/f7.rs
+++ b/embassy-stm32/src/flash/f7.rs
@@ -1,4 +1,3 @@
-use core::convert::TryInto;
 use core::ptr::write_volatile;
 use core::sync::atomic::{fence, Ordering};
 
diff --git a/embassy-stm32/src/flash/g.rs b/embassy-stm32/src/flash/g.rs
index b69c4343b..6a5adc941 100644
--- a/embassy-stm32/src/flash/g.rs
+++ b/embassy-stm32/src/flash/g.rs
@@ -1,4 +1,3 @@
-use core::convert::TryInto;
 use core::ptr::write_volatile;
 use core::sync::atomic::{fence, Ordering};
 
diff --git a/embassy-stm32/src/flash/h7.rs b/embassy-stm32/src/flash/h7.rs
index 743925e17..e32a82eef 100644
--- a/embassy-stm32/src/flash/h7.rs
+++ b/embassy-stm32/src/flash/h7.rs
@@ -1,4 +1,3 @@
-use core::convert::TryInto;
 use core::ptr::write_volatile;
 use core::sync::atomic::{fence, Ordering};
 
diff --git a/embassy-stm32/src/flash/u5.rs b/embassy-stm32/src/flash/u5.rs
index 3787082f9..580c490da 100644
--- a/embassy-stm32/src/flash/u5.rs
+++ b/embassy-stm32/src/flash/u5.rs
@@ -1,4 +1,3 @@
-use core::convert::TryInto;
 use core::ptr::write_volatile;
 use core::sync::atomic::{fence, Ordering};
 
diff --git a/embassy-stm32/src/fmc.rs b/embassy-stm32/src/fmc.rs
index 9d731a512..aced69878 100644
--- a/embassy-stm32/src/fmc.rs
+++ b/embassy-stm32/src/fmc.rs
@@ -3,8 +3,7 @@ use core::marker::PhantomData;
 
 use embassy_hal_internal::into_ref;
 
-use crate::gpio::sealed::AFType;
-use crate::gpio::{Pull, Speed};
+use crate::gpio::{AFType, Pull, Speed};
 use crate::Peripheral;
 
 /// FMC driver
@@ -44,7 +43,7 @@ where
 
     /// Get the kernel clock currently in use for this FMC instance.
     pub fn source_clock_hz(&self) -> u32 {
-        <T as crate::rcc::sealed::RccPeripheral>::frequency().0
+        <T as crate::rcc::SealedRccPeripheral>::frequency().0
     }
 }
 
@@ -69,7 +68,7 @@ where
     }
 
     fn source_clock_hz(&self) -> u32 {
-        <T as crate::rcc::sealed::RccPeripheral>::frequency().0
+        <T as crate::rcc::SealedRccPeripheral>::frequency().0
     }
 }
 
@@ -201,18 +200,17 @@ impl<'d, T: Instance> Fmc<'d, T> {
     ));
 }
 
-pub(crate) mod sealed {
-    pub trait Instance: crate::rcc::sealed::RccPeripheral {
-        const REGS: crate::pac::fmc::Fmc;
-    }
+trait SealedInstance: crate::rcc::SealedRccPeripheral {
+    const REGS: crate::pac::fmc::Fmc;
 }
 
 /// FMC instance trait.
-pub trait Instance: sealed::Instance + 'static {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + 'static {}
 
 foreach_peripheral!(
     (fmc, $inst:ident) => {
-        impl crate::fmc::sealed::Instance for crate::peripherals::$inst {
+        impl crate::fmc::SealedInstance for crate::peripherals::$inst {
             const REGS: crate::pac::fmc::Fmc = crate::pac::$inst;
         }
         impl crate::fmc::Instance for crate::peripherals::$inst {}
diff --git a/embassy-stm32/src/fmt.rs b/embassy-stm32/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-stm32/src/fmt.rs
+++ b/embassy-stm32/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-stm32/src/gpio.rs b/embassy-stm32/src/gpio.rs
index 00e3e1727..214813a42 100644
--- a/embassy-stm32/src/gpio.rs
+++ b/embassy-stm32/src/gpio.rs
@@ -6,7 +6,6 @@ use core::convert::Infallible;
 use critical_section::CriticalSection;
 use embassy_hal_internal::{impl_peripheral, into_ref, PeripheralRef};
 
-use self::sealed::Pin as _;
 use crate::pac::gpio::{self, vals};
 use crate::{pac, peripherals, Peripheral};
 
@@ -129,6 +128,18 @@ impl<'d> Flex<'d> {
         });
     }
 
+    /// Put the pin into AF mode, unchecked.
+    ///
+    /// This puts the pin into the AF mode, with the requested number, pull and speed. This is
+    /// completely unchecked, it can attach the pin to literally any peripheral, so use with care.
+    #[inline]
+    pub fn set_as_af_unchecked(&mut self, af_num: u8, af_type: AFType, pull: Pull, speed: Speed) {
+        critical_section::with(|_| {
+            self.pin.set_as_af_pull(af_num, af_type, pull);
+            self.pin.set_speed(speed);
+        });
+    }
+
     /// Get whether the pin input level is high.
     #[inline]
     pub fn is_high(&self) -> bool {
@@ -508,172 +519,168 @@ pub enum OutputType {
     OpenDrain,
 }
 
-impl From<OutputType> for sealed::AFType {
+impl From<OutputType> for AFType {
     fn from(value: OutputType) -> Self {
         match value {
-            OutputType::OpenDrain => sealed::AFType::OutputOpenDrain,
-            OutputType::PushPull => sealed::AFType::OutputPushPull,
+            OutputType::OpenDrain => AFType::OutputOpenDrain,
+            OutputType::PushPull => AFType::OutputPushPull,
         }
     }
 }
 
-#[allow(missing_docs)]
-pub(crate) mod sealed {
-    use super::*;
+/// Alternate function type settings
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum AFType {
+    /// Input
+    Input,
+    /// Output, drive the pin both high or low.
+    OutputPushPull,
+    /// Output, drive the pin low, or don't drive it at all if the output level is high.
+    OutputOpenDrain,
+}
 
-    /// Alternate function type settings
-    #[derive(Debug, Copy, Clone)]
-    #[cfg_attr(feature = "defmt", derive(defmt::Format))]
-    pub enum AFType {
-        /// Input
-        Input,
-        /// Output, drive the pin both high or low.
-        OutputPushPull,
-        /// Output, drive the pin low, or don't drive it at all if the output level is high.
-        OutputOpenDrain,
+pub(crate) trait SealedPin {
+    fn pin_port(&self) -> u8;
+
+    #[inline]
+    fn _pin(&self) -> u8 {
+        self.pin_port() % 16
+    }
+    #[inline]
+    fn _port(&self) -> u8 {
+        self.pin_port() / 16
     }
 
-    pub trait Pin {
-        fn pin_port(&self) -> u8;
+    #[inline]
+    fn block(&self) -> gpio::Gpio {
+        pac::GPIO(self._port() as _)
+    }
 
-        #[inline]
-        fn _pin(&self) -> u8 {
-            self.pin_port() % 16
-        }
-        #[inline]
-        fn _port(&self) -> u8 {
-            self.pin_port() / 16
-        }
+    /// Set the output as high.
+    #[inline]
+    fn set_high(&self) {
+        let n = self._pin() as _;
+        self.block().bsrr().write(|w| w.set_bs(n, true));
+    }
 
-        #[inline]
-        fn block(&self) -> gpio::Gpio {
-            pac::GPIO(self._port() as _)
-        }
+    /// Set the output as low.
+    #[inline]
+    fn set_low(&self) {
+        let n = self._pin() as _;
+        self.block().bsrr().write(|w| w.set_br(n, true));
+    }
 
-        /// Set the output as high.
-        #[inline]
-        fn set_high(&self) {
-            let n = self._pin() as _;
-            self.block().bsrr().write(|w| w.set_bs(n, true));
-        }
+    #[inline]
+    fn set_as_af(&self, af_num: u8, af_type: AFType) {
+        self.set_as_af_pull(af_num, af_type, Pull::None);
+    }
 
-        /// Set the output as low.
-        #[inline]
-        fn set_low(&self) {
-            let n = self._pin() as _;
-            self.block().bsrr().write(|w| w.set_br(n, true));
-        }
+    #[cfg(gpio_v1)]
+    #[inline]
+    fn set_as_af_pull(&self, _af_num: u8, af_type: AFType, pull: Pull) {
+        // F1 uses the AFIO register for remapping.
+        // For now, this is not implemented, so af_num is ignored
+        // _af_num should be zero here, since it is not set by stm32-data
+        let r = self.block();
+        let n = self._pin() as usize;
+        let crlh = if n < 8 { 0 } else { 1 };
+        match af_type {
+            AFType::Input => {
+                let cnf = match pull {
+                    Pull::Up => {
+                        r.bsrr().write(|w| w.set_bs(n, true));
+                        vals::CnfIn::PULL
+                    }
+                    Pull::Down => {
+                        r.bsrr().write(|w| w.set_br(n, true));
+                        vals::CnfIn::PULL
+                    }
+                    Pull::None => vals::CnfIn::FLOATING,
+                };
 
-        #[inline]
-        fn set_as_af(&self, af_num: u8, af_type: AFType) {
-            self.set_as_af_pull(af_num, af_type, Pull::None);
+                r.cr(crlh).modify(|w| {
+                    w.set_mode(n % 8, vals::Mode::INPUT);
+                    w.set_cnf_in(n % 8, cnf);
+                });
+            }
+            AFType::OutputPushPull => {
+                r.cr(crlh).modify(|w| {
+                    w.set_mode(n % 8, vals::Mode::OUTPUT50MHZ);
+                    w.set_cnf_out(n % 8, vals::CnfOut::ALTPUSHPULL);
+                });
+            }
+            AFType::OutputOpenDrain => {
+                r.cr(crlh).modify(|w| {
+                    w.set_mode(n % 8, vals::Mode::OUTPUT50MHZ);
+                    w.set_cnf_out(n % 8, vals::CnfOut::ALTOPENDRAIN);
+                });
+            }
         }
+    }
+
+    #[cfg(gpio_v2)]
+    #[inline]
+    fn set_as_af_pull(&self, af_num: u8, af_type: AFType, pull: Pull) {
+        let pin = self._pin() as usize;
+        let block = self.block();
+        block.afr(pin / 8).modify(|w| w.set_afr(pin % 8, af_num));
+        match af_type {
+            AFType::Input => {}
+            AFType::OutputPushPull => block.otyper().modify(|w| w.set_ot(pin, vals::Ot::PUSHPULL)),
+            AFType::OutputOpenDrain => block.otyper().modify(|w| w.set_ot(pin, vals::Ot::OPENDRAIN)),
+        }
+        block.pupdr().modify(|w| w.set_pupdr(pin, pull.into()));
+
+        block.moder().modify(|w| w.set_moder(pin, vals::Moder::ALTERNATE));
+    }
+
+    #[inline]
+    fn set_as_analog(&self) {
+        let pin = self._pin() as usize;
+        let block = self.block();
+        #[cfg(gpio_v1)]
+        {
+            let crlh = if pin < 8 { 0 } else { 1 };
+            block.cr(crlh).modify(|w| {
+                w.set_mode(pin % 8, vals::Mode::INPUT);
+                w.set_cnf_in(pin % 8, vals::CnfIn::ANALOG);
+            });
+        }
+        #[cfg(gpio_v2)]
+        block.moder().modify(|w| w.set_moder(pin, vals::Moder::ANALOG));
+    }
+
+    /// Set the pin as "disconnected", ie doing nothing and consuming the lowest
+    /// amount of power possible.
+    ///
+    /// This is currently the same as set_as_analog but is semantically different really.
+    /// Drivers should set_as_disconnected pins when dropped.
+    #[inline]
+    fn set_as_disconnected(&self) {
+        self.set_as_analog();
+    }
+
+    #[inline]
+    fn set_speed(&self, speed: Speed) {
+        let pin = self._pin() as usize;
 
         #[cfg(gpio_v1)]
-        #[inline]
-        fn set_as_af_pull(&self, _af_num: u8, af_type: AFType, pull: Pull) {
-            // F1 uses the AFIO register for remapping.
-            // For now, this is not implemented, so af_num is ignored
-            // _af_num should be zero here, since it is not set by stm32-data
-            let r = self.block();
-            let n = self._pin() as usize;
-            let crlh = if n < 8 { 0 } else { 1 };
-            match af_type {
-                AFType::Input => {
-                    let cnf = match pull {
-                        Pull::Up => {
-                            r.bsrr().write(|w| w.set_bs(n, true));
-                            vals::CnfIn::PULL
-                        }
-                        Pull::Down => {
-                            r.bsrr().write(|w| w.set_br(n, true));
-                            vals::CnfIn::PULL
-                        }
-                        Pull::None => vals::CnfIn::FLOATING,
-                    };
-
-                    r.cr(crlh).modify(|w| {
-                        w.set_mode(n % 8, vals::Mode::INPUT);
-                        w.set_cnf_in(n % 8, cnf);
-                    });
-                }
-                AFType::OutputPushPull => {
-                    r.cr(crlh).modify(|w| {
-                        w.set_mode(n % 8, vals::Mode::OUTPUT50MHZ);
-                        w.set_cnf_out(n % 8, vals::CnfOut::ALTPUSHPULL);
-                    });
-                }
-                AFType::OutputOpenDrain => {
-                    r.cr(crlh).modify(|w| {
-                        w.set_mode(n % 8, vals::Mode::OUTPUT50MHZ);
-                        w.set_cnf_out(n % 8, vals::CnfOut::ALTOPENDRAIN);
-                    });
-                }
-            }
+        {
+            let crlh = if pin < 8 { 0 } else { 1 };
+            self.block().cr(crlh).modify(|w| {
+                w.set_mode(pin % 8, speed.into());
+            });
         }
 
         #[cfg(gpio_v2)]
-        #[inline]
-        fn set_as_af_pull(&self, af_num: u8, af_type: AFType, pull: Pull) {
-            let pin = self._pin() as usize;
-            let block = self.block();
-            block.afr(pin / 8).modify(|w| w.set_afr(pin % 8, af_num));
-            match af_type {
-                AFType::Input => {}
-                AFType::OutputPushPull => block.otyper().modify(|w| w.set_ot(pin, vals::Ot::PUSHPULL)),
-                AFType::OutputOpenDrain => block.otyper().modify(|w| w.set_ot(pin, vals::Ot::OPENDRAIN)),
-            }
-            block.pupdr().modify(|w| w.set_pupdr(pin, pull.into()));
-
-            block.moder().modify(|w| w.set_moder(pin, vals::Moder::ALTERNATE));
-        }
-
-        #[inline]
-        fn set_as_analog(&self) {
-            let pin = self._pin() as usize;
-            let block = self.block();
-            #[cfg(gpio_v1)]
-            {
-                let crlh = if pin < 8 { 0 } else { 1 };
-                block.cr(crlh).modify(|w| {
-                    w.set_mode(pin % 8, vals::Mode::INPUT);
-                    w.set_cnf_in(pin % 8, vals::CnfIn::ANALOG);
-                });
-            }
-            #[cfg(gpio_v2)]
-            block.moder().modify(|w| w.set_moder(pin, vals::Moder::ANALOG));
-        }
-
-        /// Set the pin as "disconnected", ie doing nothing and consuming the lowest
-        /// amount of power possible.
-        ///
-        /// This is currently the same as set_as_analog but is semantically different really.
-        /// Drivers should set_as_disconnected pins when dropped.
-        #[inline]
-        fn set_as_disconnected(&self) {
-            self.set_as_analog();
-        }
-
-        #[inline]
-        fn set_speed(&self, speed: Speed) {
-            let pin = self._pin() as usize;
-
-            #[cfg(gpio_v1)]
-            {
-                let crlh = if pin < 8 { 0 } else { 1 };
-                self.block().cr(crlh).modify(|w| {
-                    w.set_mode(pin % 8, speed.into());
-                });
-            }
-
-            #[cfg(gpio_v2)]
-            self.block().ospeedr().modify(|w| w.set_ospeedr(pin, speed.into()));
-        }
+        self.block().ospeedr().modify(|w| w.set_ospeedr(pin, speed.into()));
     }
 }
 
 /// GPIO pin trait.
-pub trait Pin: Peripheral<P = Self> + Into<AnyPin> + sealed::Pin + Sized + 'static {
+#[allow(private_bounds)]
+pub trait Pin: Peripheral<P = Self> + Into<AnyPin> + SealedPin + Sized + 'static {
     /// EXTI channel assigned to this pin.
     ///
     /// For example, PC4 uses EXTI4.
@@ -737,7 +744,7 @@ impl Pin for AnyPin {
     #[cfg(feature = "exti")]
     type ExtiChannel = crate::exti::AnyChannel;
 }
-impl sealed::Pin for AnyPin {
+impl SealedPin for AnyPin {
     #[inline]
     fn pin_port(&self) -> u8 {
         self.pin_port
@@ -752,7 +759,7 @@ foreach_pin!(
             #[cfg(feature = "exti")]
             type ExtiChannel = peripherals::$exti_ch;
         }
-        impl sealed::Pin for peripherals::$pin_name {
+        impl SealedPin for peripherals::$pin_name {
             #[inline]
             fn pin_port(&self) -> u8 {
                 $port_num * 16 + $pin_num
@@ -769,16 +776,9 @@ foreach_pin!(
 
 pub(crate) unsafe fn init(_cs: CriticalSection) {
     #[cfg(afio)]
-    <crate::peripherals::AFIO as crate::rcc::sealed::RccPeripheral>::enable_and_reset_with_cs(_cs);
+    <crate::peripherals::AFIO as crate::rcc::SealedRccPeripheral>::enable_and_reset_with_cs(_cs);
 
     crate::_generated::init_gpio();
-
-    // Setting this bit is mandatory to use PG[15:2].
-    #[cfg(stm32u5)]
-    crate::pac::PWR.svmcr().modify(|w| {
-        w.set_io2sv(true);
-        w.set_io2vmen(true);
-    });
 }
 
 impl<'d> embedded_hal_02::digital::v2::InputPin for Input<'d> {
@@ -833,6 +833,18 @@ impl<'d> embedded_hal_02::digital::v2::ToggleableOutputPin for Output<'d> {
     }
 }
 
+impl<'d> embedded_hal_02::digital::v2::InputPin for OutputOpenDrain<'d> {
+    type Error = Infallible;
+
+    fn is_high(&self) -> Result<bool, Self::Error> {
+        Ok(self.is_high())
+    }
+
+    fn is_low(&self) -> Result<bool, Self::Error> {
+        Ok(self.is_low())
+    }
+}
+
 impl<'d> embedded_hal_02::digital::v2::OutputPin for OutputOpenDrain<'d> {
     type Error = Infallible;
 
@@ -1049,9 +1061,3 @@ impl<'d> embedded_hal_1::digital::StatefulOutputPin for Flex<'d> {
         Ok((*self).is_set_low())
     }
 }
-
-/// Low-level GPIO manipulation.
-#[cfg(feature = "unstable-pac")]
-pub mod low_level {
-    pub use super::sealed::*;
-}
diff --git a/embassy-stm32/src/hash/mod.rs b/embassy-stm32/src/hash/mod.rs
index b47814f8b..787d5b1c9 100644
--- a/embassy-stm32/src/hash/mod.rs
+++ b/embassy-stm32/src/hash/mod.rs
@@ -17,7 +17,7 @@ use crate::dma::NoDma;
 use crate::dma::Transfer;
 use crate::interrupt::typelevel::Interrupt;
 use crate::peripherals::HASH;
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::SealedRccPeripheral;
 use crate::{interrupt, pac, peripherals, Peripheral};
 
 #[cfg(hash_v1)]
@@ -561,16 +561,13 @@ impl<'d, T: Instance, D> Hash<'d, T, D> {
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
-
-    pub trait Instance {
-        fn regs() -> pac::hash::Hash;
-    }
+trait SealedInstance {
+    fn regs() -> pac::hash::Hash;
 }
 
 /// HASH instance trait.
-pub trait Instance: sealed::Instance + Peripheral<P = Self> + crate::rcc::RccPeripheral + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + Peripheral<P = Self> + crate::rcc::RccPeripheral + 'static + Send {
     /// Interrupt for this HASH instance.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
@@ -581,7 +578,7 @@ foreach_interrupt!(
             type Interrupt = crate::interrupt::typelevel::$irq;
         }
 
-        impl sealed::Instance for peripherals::$inst {
+        impl SealedInstance for peripherals::$inst {
             fn regs() -> crate::pac::hash::Hash {
                 crate::pac::$inst
             }
diff --git a/embassy-stm32/src/hrtim/mod.rs b/embassy-stm32/src/hrtim/mod.rs
index 3ec646fc3..02e45819c 100644
--- a/embassy-stm32/src/hrtim/mod.rs
+++ b/embassy-stm32/src/hrtim/mod.rs
@@ -7,9 +7,7 @@ use core::marker::PhantomData;
 use embassy_hal_internal::{into_ref, PeripheralRef};
 pub use traits::Instance;
 
-#[allow(unused_imports)]
-use crate::gpio::sealed::{AFType, Pin};
-use crate::gpio::AnyPin;
+use crate::gpio::{AFType, AnyPin};
 use crate::time::Hertz;
 use crate::Peripheral;
 
@@ -54,16 +52,13 @@ pub struct ChF<T: Instance> {
     phantom: PhantomData<T>,
 }
 
-mod sealed {
-    use super::Instance;
-
-    pub trait AdvancedChannel<T: Instance> {
-        fn raw() -> usize;
-    }
+trait SealedAdvancedChannel<T: Instance> {
+    fn raw() -> usize;
 }
 
 /// Advanced channel instance trait.
-pub trait AdvancedChannel<T: Instance>: sealed::AdvancedChannel<T> {}
+#[allow(private_bounds)]
+pub trait AdvancedChannel<T: Instance>: SealedAdvancedChannel<T> {}
 
 /// HRTIM PWM pin.
 pub struct PwmPin<'d, T, C> {
@@ -113,7 +108,7 @@ macro_rules! advanced_channel_impl {
             }
         }
 
-        impl<T: Instance> sealed::AdvancedChannel<T> for $channel<T> {
+        impl<T: Instance> SealedAdvancedChannel<T> for $channel<T> {
             fn raw() -> usize {
                 $ch_num
             }
diff --git a/embassy-stm32/src/hrtim/traits.rs b/embassy-stm32/src/hrtim/traits.rs
index dcc2b9ef4..75f9971e2 100644
--- a/embassy-stm32/src/hrtim/traits.rs
+++ b/embassy-stm32/src/hrtim/traits.rs
@@ -1,4 +1,4 @@
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::RccPeripheral;
 use crate::time::Hertz;
 
 #[repr(u8)]
@@ -72,94 +72,92 @@ impl Prescaler {
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
+pub(crate) trait SealedInstance: RccPeripheral {
+    fn regs() -> crate::pac::hrtim::Hrtim;
 
-    pub trait Instance: RccPeripheral {
-        fn regs() -> crate::pac::hrtim::Hrtim;
+    #[allow(unused)]
+    fn set_master_frequency(frequency: Hertz) {
+        let f = frequency.0;
 
-        fn set_master_frequency(frequency: Hertz) {
-            let f = frequency.0;
+        // TODO: wire up HRTIM to the RCC mux infra.
+        //#[cfg(stm32f334)]
+        //let timer_f = unsafe { crate::rcc::get_freqs() }.hrtim.unwrap_or(Self::frequency()).0;
+        //#[cfg(not(stm32f334))]
+        let timer_f = Self::frequency().0;
 
-            // TODO: wire up HRTIM to the RCC mux infra.
-            //#[cfg(stm32f334)]
-            //let timer_f = unsafe { crate::rcc::get_freqs() }.hrtim.unwrap_or(Self::frequency()).0;
-            //#[cfg(not(stm32f334))]
-            let timer_f = Self::frequency().0;
+        let psc_min = (timer_f / f) / (u16::MAX as u32 / 32);
+        let psc = if Self::regs().isr().read().dllrdy() {
+            Prescaler::compute_min_high_res(psc_min)
+        } else {
+            Prescaler::compute_min_low_res(psc_min)
+        };
 
-            let psc_min = (timer_f / f) / (u16::MAX as u32 / 32);
-            let psc = if Self::regs().isr().read().dllrdy() {
-                Prescaler::compute_min_high_res(psc_min)
-            } else {
-                Prescaler::compute_min_low_res(psc_min)
-            };
+        let timer_f = 32 * (timer_f / psc as u32);
+        let per: u16 = (timer_f / f) as u16;
 
-            let timer_f = 32 * (timer_f / psc as u32);
-            let per: u16 = (timer_f / f) as u16;
+        let regs = Self::regs();
 
-            let regs = Self::regs();
+        regs.mcr().modify(|w| w.set_ckpsc(psc.into()));
+        regs.mper().modify(|w| w.set_mper(per));
+    }
 
-            regs.mcr().modify(|w| w.set_ckpsc(psc.into()));
-            regs.mper().modify(|w| w.set_mper(per));
-        }
+    fn set_channel_frequency(channel: usize, frequency: Hertz) {
+        let f = frequency.0;
 
-        fn set_channel_frequency(channel: usize, frequency: Hertz) {
-            let f = frequency.0;
+        // TODO: wire up HRTIM to the RCC mux infra.
+        //#[cfg(stm32f334)]
+        //let timer_f = unsafe { crate::rcc::get_freqs() }.hrtim.unwrap_or(Self::frequency()).0;
+        //#[cfg(not(stm32f334))]
+        let timer_f = Self::frequency().0;
 
-            // TODO: wire up HRTIM to the RCC mux infra.
-            //#[cfg(stm32f334)]
-            //let timer_f = unsafe { crate::rcc::get_freqs() }.hrtim.unwrap_or(Self::frequency()).0;
-            //#[cfg(not(stm32f334))]
-            let timer_f = Self::frequency().0;
+        let psc_min = (timer_f / f) / (u16::MAX as u32 / 32);
+        let psc = if Self::regs().isr().read().dllrdy() {
+            Prescaler::compute_min_high_res(psc_min)
+        } else {
+            Prescaler::compute_min_low_res(psc_min)
+        };
 
-            let psc_min = (timer_f / f) / (u16::MAX as u32 / 32);
-            let psc = if Self::regs().isr().read().dllrdy() {
-                Prescaler::compute_min_high_res(psc_min)
-            } else {
-                Prescaler::compute_min_low_res(psc_min)
-            };
+        let timer_f = 32 * (timer_f / psc as u32);
+        let per: u16 = (timer_f / f) as u16;
 
-            let timer_f = 32 * (timer_f / psc as u32);
-            let per: u16 = (timer_f / f) as u16;
+        let regs = Self::regs();
 
-            let regs = Self::regs();
+        regs.tim(channel).cr().modify(|w| w.set_ckpsc(psc.into()));
+        regs.tim(channel).per().modify(|w| w.set_per(per));
+    }
 
-            regs.tim(channel).cr().modify(|w| w.set_ckpsc(psc.into()));
-            regs.tim(channel).per().modify(|w| w.set_per(per));
-        }
+    /// Set the dead time as a proportion of max_duty
+    fn set_channel_dead_time(channel: usize, dead_time: u16) {
+        let regs = Self::regs();
 
-        /// Set the dead time as a proportion of max_duty
-        fn set_channel_dead_time(channel: usize, dead_time: u16) {
-            let regs = Self::regs();
+        let channel_psc: Prescaler = regs.tim(channel).cr().read().ckpsc().into();
 
-            let channel_psc: Prescaler = regs.tim(channel).cr().read().ckpsc().into();
+        // The dead-time base clock runs 4 times slower than the hrtim base clock
+        // u9::MAX = 511
+        let psc_min = (channel_psc as u32 * dead_time as u32) / (4 * 511);
+        let psc = if Self::regs().isr().read().dllrdy() {
+            Prescaler::compute_min_high_res(psc_min)
+        } else {
+            Prescaler::compute_min_low_res(psc_min)
+        };
 
-            // The dead-time base clock runs 4 times slower than the hrtim base clock
-            // u9::MAX = 511
-            let psc_min = (channel_psc as u32 * dead_time as u32) / (4 * 511);
-            let psc = if Self::regs().isr().read().dllrdy() {
-                Prescaler::compute_min_high_res(psc_min)
-            } else {
-                Prescaler::compute_min_low_res(psc_min)
-            };
+        let dt_val = (psc as u32 * dead_time as u32) / (4 * channel_psc as u32);
 
-            let dt_val = (psc as u32 * dead_time as u32) / (4 * channel_psc as u32);
-
-            regs.tim(channel).dt().modify(|w| {
-                w.set_dtprsc(psc.into());
-                w.set_dtf(dt_val as u16);
-                w.set_dtr(dt_val as u16);
-            });
-        }
+        regs.tim(channel).dt().modify(|w| {
+            w.set_dtprsc(psc.into());
+            w.set_dtf(dt_val as u16);
+            w.set_dtr(dt_val as u16);
+        });
     }
 }
 
 /// HRTIM instance trait.
-pub trait Instance: sealed::Instance + 'static {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + 'static {}
 
 foreach_interrupt! {
     ($inst:ident, hrtim, HRTIM, MASTER, $irq:ident) => {
-        impl sealed::Instance for crate::peripherals::$inst {
+        impl SealedInstance for crate::peripherals::$inst {
             fn regs() -> crate::pac::hrtim::Hrtim {
                 crate::pac::$inst
             }
diff --git a/embassy-stm32/src/i2c/mod.rs b/embassy-stm32/src/i2c/mod.rs
index 2416005b5..f1b11cc44 100644
--- a/embassy-stm32/src/i2c/mod.rs
+++ b/embassy-stm32/src/i2c/mod.rs
@@ -14,8 +14,7 @@ use embassy_sync::waitqueue::AtomicWaker;
 use embassy_time::{Duration, Instant};
 
 use crate::dma::NoDma;
-use crate::gpio::sealed::AFType;
-use crate::gpio::Pull;
+use crate::gpio::{AFType, Pull};
 use crate::interrupt::typelevel::Interrupt;
 use crate::time::Hertz;
 use crate::{interrupt, peripherals};
@@ -175,30 +174,27 @@ impl Timeout {
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
+struct State {
+    #[allow(unused)]
+    waker: AtomicWaker,
+}
 
-    pub struct State {
-        #[allow(unused)]
-        pub waker: AtomicWaker,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                waker: AtomicWaker::new(),
-            }
+impl State {
+    const fn new() -> Self {
+        Self {
+            waker: AtomicWaker::new(),
         }
     }
-
-    pub trait Instance: crate::rcc::RccPeripheral {
-        fn regs() -> crate::pac::i2c::I2c;
-        fn state() -> &'static State;
-    }
+}
+
+trait SealedInstance: crate::rcc::RccPeripheral {
+    fn regs() -> crate::pac::i2c::I2c;
+    fn state() -> &'static State;
 }
 
 /// I2C peripheral instance
-pub trait Instance: sealed::Instance + 'static {
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + 'static {
     /// Event interrupt for this instance
     type EventInterrupt: interrupt::typelevel::Interrupt;
     /// Error interrupt for this instance
@@ -234,13 +230,13 @@ impl<T: Instance> interrupt::typelevel::Handler<T::ErrorInterrupt> for ErrorInte
 
 foreach_peripheral!(
     (i2c, $inst:ident) => {
-        impl sealed::Instance for peripherals::$inst {
+        impl SealedInstance for peripherals::$inst {
             fn regs() -> crate::pac::i2c::I2c {
                 crate::pac::$inst
             }
 
-            fn state() -> &'static sealed::State {
-                static STATE: sealed::State = sealed::State::new();
+            fn state() -> &'static State {
+                static STATE: State = State::new();
                 &STATE
             }
         }
@@ -311,10 +307,10 @@ impl<'d, T: Instance> embedded_hal_1::i2c::I2c for I2c<'d, T, NoDma, NoDma> {
 
     fn transaction(
         &mut self,
-        _address: u8,
-        _operations: &mut [embedded_hal_1::i2c::Operation<'_>],
+        address: u8,
+        operations: &mut [embedded_hal_1::i2c::Operation<'_>],
     ) -> Result<(), Self::Error> {
-        todo!();
+        self.blocking_transaction(address, operations)
     }
 }
 
diff --git a/embassy-stm32/src/i2c/v1.rs b/embassy-stm32/src/i2c/v1.rs
index cbbc201de..9f29ed5e0 100644
--- a/embassy-stm32/src/i2c/v1.rs
+++ b/embassy-stm32/src/i2c/v1.rs
@@ -10,11 +10,11 @@ use core::task::Poll;
 use embassy_embedded_hal::SetConfig;
 use embassy_futures::select::{select, Either};
 use embassy_hal_internal::drop::OnDrop;
+use embedded_hal_1::i2c::Operation;
 
 use super::*;
 use crate::dma::Transfer;
 use crate::pac::i2c;
-use crate::time::Hertz;
 
 // /!\                      /!\
 // /!\ Implementation note! /!\
@@ -41,6 +41,68 @@ pub unsafe fn on_interrupt<T: Instance>() {
     });
 }
 
+/// Frame type in I2C transaction.
+///
+/// This tells each method what kind of framing to use, to generate a (repeated) start condition (ST
+/// or SR), and/or a stop condition (SP). For read operations, this also controls whether to send an
+/// ACK or NACK after the last byte received.
+///
+/// For write operations, the following options are identical because they differ only in the (N)ACK
+/// treatment relevant for read operations:
+///
+/// - `FirstFrame` and `FirstAndNextFrame`
+/// - `NextFrame` and `LastFrameNoStop`
+///
+/// Abbreviations used below:
+///
+/// - `ST` = start condition
+/// - `SR` = repeated start condition
+/// - `SP` = stop condition
+#[derive(Copy, Clone)]
+enum FrameOptions {
+    /// `[ST/SR]+[NACK]+[SP]` First frame (of this type) in operation and last frame overall in this
+    /// transaction.
+    FirstAndLastFrame,
+    /// `[ST/SR]+[NACK]` First frame of this type in transaction, last frame in a read operation but
+    /// not the last frame overall.
+    FirstFrame,
+    /// `[ST/SR]+[ACK]` First frame of this type in transaction, neither last frame overall nor last
+    /// frame in a read operation.
+    FirstAndNextFrame,
+    /// `[ACK]` Middle frame in a read operation (neither first nor last).
+    NextFrame,
+    /// `[NACK]+[SP]` Last frame overall in this transaction but not the first frame.
+    LastFrame,
+    /// `[NACK]` Last frame in a read operation but not last frame overall in this transaction.
+    LastFrameNoStop,
+}
+
+impl FrameOptions {
+    /// Sends start or repeated start condition before transfer.
+    fn send_start(self) -> bool {
+        match self {
+            Self::FirstAndLastFrame | Self::FirstFrame | Self::FirstAndNextFrame => true,
+            Self::NextFrame | Self::LastFrame | Self::LastFrameNoStop => false,
+        }
+    }
+
+    /// Sends stop condition after transfer.
+    fn send_stop(self) -> bool {
+        match self {
+            Self::FirstAndLastFrame | Self::LastFrame => true,
+            Self::FirstFrame | Self::FirstAndNextFrame | Self::NextFrame | Self::LastFrameNoStop => false,
+        }
+    }
+
+    /// Sends NACK after last byte received, indicating end of read operation.
+    fn send_nack(self) -> bool {
+        match self {
+            Self::FirstAndLastFrame | Self::FirstFrame | Self::LastFrame | Self::LastFrameNoStop => true,
+            Self::FirstAndNextFrame | Self::NextFrame => false,
+        }
+    }
+}
+
 impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
     pub(crate) fn init(&mut self, freq: Hertz, _config: Config) {
         T::regs().cr1().modify(|reg| {
@@ -124,46 +186,57 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         Ok(sr1)
     }
 
-    fn write_bytes(&mut self, addr: u8, bytes: &[u8], timeout: Timeout) -> Result<(), Error> {
-        // Send a START condition
+    fn write_bytes(&mut self, addr: u8, bytes: &[u8], timeout: Timeout, frame: FrameOptions) -> Result<(), Error> {
+        if frame.send_start() {
+            // Send a START condition
 
-        T::regs().cr1().modify(|reg| {
-            reg.set_start(true);
-        });
+            T::regs().cr1().modify(|reg| {
+                reg.set_start(true);
+            });
 
-        // Wait until START condition was generated
-        while !Self::check_and_clear_error_flags()?.start() {
-            timeout.check()?;
+            // Wait until START condition was generated
+            while !Self::check_and_clear_error_flags()?.start() {
+                timeout.check()?;
+            }
+
+            // Also wait until signalled we're master and everything is waiting for us
+            while {
+                Self::check_and_clear_error_flags()?;
+
+                let sr2 = T::regs().sr2().read();
+                !sr2.msl() && !sr2.busy()
+            } {
+                timeout.check()?;
+            }
+
+            // Set up current address, we're trying to talk to
+            T::regs().dr().write(|reg| reg.set_dr(addr << 1));
+
+            // Wait until address was sent
+            // Wait for the address to be acknowledged
+            // Check for any I2C errors. If a NACK occurs, the ADDR bit will never be set.
+            while !Self::check_and_clear_error_flags()?.addr() {
+                timeout.check()?;
+            }
+
+            // Clear condition by reading SR2
+            let _ = T::regs().sr2().read();
         }
 
-        // Also wait until signalled we're master and everything is waiting for us
-        while {
-            Self::check_and_clear_error_flags()?;
-
-            let sr2 = T::regs().sr2().read();
-            !sr2.msl() && !sr2.busy()
-        } {
-            timeout.check()?;
-        }
-
-        // Set up current address, we're trying to talk to
-        T::regs().dr().write(|reg| reg.set_dr(addr << 1));
-
-        // Wait until address was sent
-        // Wait for the address to be acknowledged
-        // Check for any I2C errors. If a NACK occurs, the ADDR bit will never be set.
-        while !Self::check_and_clear_error_flags()?.addr() {
-            timeout.check()?;
-        }
-
-        // Clear condition by reading SR2
-        let _ = T::regs().sr2().read();
-
         // Send bytes
         for c in bytes {
             self.send_byte(*c, timeout)?;
         }
 
+        if frame.send_stop() {
+            // Send a STOP condition
+            T::regs().cr1().modify(|reg| reg.set_stop(true));
+            // Wait for STOP condition to transmit.
+            while T::regs().cr1().read().stop() {
+                timeout.check()?;
+            }
+        }
+
         // Fallthrough is success
         Ok(())
     }
@@ -205,8 +278,18 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         Ok(value)
     }
 
-    fn blocking_read_timeout(&mut self, addr: u8, buffer: &mut [u8], timeout: Timeout) -> Result<(), Error> {
-        if let Some((last, buffer)) = buffer.split_last_mut() {
+    fn blocking_read_timeout(
+        &mut self,
+        addr: u8,
+        buffer: &mut [u8],
+        timeout: Timeout,
+        frame: FrameOptions,
+    ) -> Result<(), Error> {
+        let Some((last, buffer)) = buffer.split_last_mut() else {
+            return Err(Error::Overrun);
+        };
+
+        if frame.send_start() {
             // Send a START condition and set ACK bit
             T::regs().cr1().modify(|reg| {
                 reg.set_start(true);
@@ -237,49 +320,45 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
 
             // Clear condition by reading SR2
             let _ = T::regs().sr2().read();
+        }
 
-            // Receive bytes into buffer
-            for c in buffer {
-                *c = self.recv_byte(timeout)?;
-            }
+        // Receive bytes into buffer
+        for c in buffer {
+            *c = self.recv_byte(timeout)?;
+        }
 
-            // Prepare to send NACK then STOP after next byte
-            T::regs().cr1().modify(|reg| {
+        // Prepare to send NACK then STOP after next byte
+        T::regs().cr1().modify(|reg| {
+            if frame.send_nack() {
                 reg.set_ack(false);
+            }
+            if frame.send_stop() {
                 reg.set_stop(true);
-            });
+            }
+        });
 
-            // Receive last byte
-            *last = self.recv_byte(timeout)?;
+        // Receive last byte
+        *last = self.recv_byte(timeout)?;
 
+        if frame.send_stop() {
             // Wait for the STOP to be sent.
             while T::regs().cr1().read().stop() {
                 timeout.check()?;
             }
-
-            // Fallthrough is success
-            Ok(())
-        } else {
-            Err(Error::Overrun)
         }
+
+        // Fallthrough is success
+        Ok(())
     }
 
     /// Blocking read.
     pub fn blocking_read(&mut self, addr: u8, read: &mut [u8]) -> Result<(), Error> {
-        self.blocking_read_timeout(addr, read, self.timeout())
+        self.blocking_read_timeout(addr, read, self.timeout(), FrameOptions::FirstAndLastFrame)
     }
 
     /// Blocking write.
     pub fn blocking_write(&mut self, addr: u8, write: &[u8]) -> Result<(), Error> {
-        let timeout = self.timeout();
-
-        self.write_bytes(addr, write, timeout)?;
-        // Send a STOP condition
-        T::regs().cr1().modify(|reg| reg.set_stop(true));
-        // Wait for STOP condition to transmit.
-        while T::regs().cr1().read().stop() {
-            timeout.check()?;
-        }
+        self.write_bytes(addr, write, self.timeout(), FrameOptions::FirstAndLastFrame)?;
 
         // Fallthrough is success
         Ok(())
@@ -287,10 +366,85 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
 
     /// Blocking write, restart, read.
     pub fn blocking_write_read(&mut self, addr: u8, write: &[u8], read: &mut [u8]) -> Result<(), Error> {
+        // Check empty read buffer before starting transaction. Otherwise, we would not generate the
+        // stop condition below.
+        if read.is_empty() {
+            return Err(Error::Overrun);
+        }
+
         let timeout = self.timeout();
 
-        self.write_bytes(addr, write, timeout)?;
-        self.blocking_read_timeout(addr, read, timeout)?;
+        self.write_bytes(addr, write, timeout, FrameOptions::FirstFrame)?;
+        self.blocking_read_timeout(addr, read, timeout, FrameOptions::FirstAndLastFrame)?;
+
+        Ok(())
+    }
+
+    /// Blocking transaction with operations.
+    ///
+    /// Consecutive operations of same type are merged. See [transaction contract] for details.
+    ///
+    /// [transaction contract]: embedded_hal_1::i2c::I2c::transaction
+    pub fn blocking_transaction(&mut self, addr: u8, operations: &mut [Operation<'_>]) -> Result<(), Error> {
+        // Check empty read buffer before starting transaction. Otherwise, we would not generate the
+        // stop condition below.
+        if operations.iter().any(|op| match op {
+            Operation::Read(read) => read.is_empty(),
+            Operation::Write(_) => false,
+        }) {
+            return Err(Error::Overrun);
+        }
+
+        let timeout = self.timeout();
+
+        let mut operations = operations.iter_mut();
+
+        let mut prev_op: Option<&mut Operation<'_>> = None;
+        let mut next_op = operations.next();
+
+        while let Some(op) = next_op {
+            next_op = operations.next();
+
+            // Check if this is the first frame of this type. This is the case for the first overall
+            // frame in the transaction and whenever the type of operation changes.
+            let first_frame =
+                match (prev_op.as_ref(), &op) {
+                    (None, _) => true,
+                    (Some(Operation::Read(_)), Operation::Write(_))
+                    | (Some(Operation::Write(_)), Operation::Read(_)) => true,
+                    (Some(Operation::Read(_)), Operation::Read(_))
+                    | (Some(Operation::Write(_)), Operation::Write(_)) => false,
+                };
+
+            let frame = match (first_frame, next_op.as_ref()) {
+                // If this is the first frame of this type, we generate a (repeated) start condition
+                // but have to consider the next operation: if it is the last, we generate the final
+                // stop condition. Otherwise, we branch on the operation: with read operations, only
+                // the last byte overall (before a write operation or the end of the transaction) is
+                // to be NACK'd, i.e. if another read operation follows, we must ACK this last byte.
+                (true, None) => FrameOptions::FirstAndLastFrame,
+                // Make sure to keep sending ACK for last byte in read operation when it is followed
+                // by another consecutive read operation. If the current operation is write, this is
+                // identical to `FirstFrame`.
+                (true, Some(Operation::Read(_))) => FrameOptions::FirstAndNextFrame,
+                // Otherwise, send NACK for last byte (in read operation). (For write, this does not
+                // matter and could also be `FirstAndNextFrame`.)
+                (true, Some(Operation::Write(_))) => FrameOptions::FirstFrame,
+
+                // If this is not the first frame of its type, we do not generate a (repeated) start
+                // condition. Otherwise, we branch the same way as above.
+                (false, None) => FrameOptions::LastFrame,
+                (false, Some(Operation::Read(_))) => FrameOptions::NextFrame,
+                (false, Some(Operation::Write(_))) => FrameOptions::LastFrameNoStop,
+            };
+
+            match op {
+                Operation::Read(read) => self.blocking_read_timeout(addr, read, timeout, frame)?,
+                Operation::Write(write) => self.write_bytes(addr, write, timeout, frame)?,
+            }
+
+            prev_op = Some(op);
+        }
 
         Ok(())
     }
diff --git a/embassy-stm32/src/i2c/v2.rs b/embassy-stm32/src/i2c/v2.rs
index bd3abaac1..8baf2849d 100644
--- a/embassy-stm32/src/i2c/v2.rs
+++ b/embassy-stm32/src/i2c/v2.rs
@@ -4,11 +4,11 @@ use core::task::Poll;
 
 use embassy_embedded_hal::SetConfig;
 use embassy_hal_internal::drop::OnDrop;
+use embedded_hal_1::i2c::Operation;
 
 use super::*;
 use crate::dma::Transfer;
 use crate::pac::i2c;
-use crate::time::Hertz;
 
 pub(crate) unsafe fn on_interrupt<T: Instance>() {
     let regs = T::regs();
@@ -579,6 +579,17 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         // Automatic Stop
     }
 
+    /// Blocking transaction with operations.
+    ///
+    /// Consecutive operations of same type are merged. See [transaction contract] for details.
+    ///
+    /// [transaction contract]: embedded_hal_1::i2c::I2c::transaction
+    pub fn blocking_transaction(&mut self, addr: u8, operations: &mut [Operation<'_>]) -> Result<(), Error> {
+        let _ = addr;
+        let _ = operations;
+        todo!()
+    }
+
     /// Blocking write multiple buffers.
     ///
     /// The buffers are concatenated in a single write transaction.
diff --git a/embassy-stm32/src/i2s.rs b/embassy-stm32/src/i2s.rs
index fa9ec0532..c5a606b21 100644
--- a/embassy-stm32/src/i2s.rs
+++ b/embassy-stm32/src/i2s.rs
@@ -1,8 +1,7 @@
 //! Inter-IC Sound (I2S)
 use embassy_hal_internal::into_ref;
 
-use crate::gpio::sealed::{AFType, Pin as _};
-use crate::gpio::AnyPin;
+use crate::gpio::{AFType, AnyPin, SealedPin};
 use crate::pac::spi::vals;
 use crate::spi::{Config as SpiConfig, *};
 use crate::time::Hertz;
diff --git a/embassy-stm32/src/ipcc.rs b/embassy-stm32/src/ipcc.rs
index 523719bb9..4d535cce2 100644
--- a/embassy-stm32/src/ipcc.rs
+++ b/embassy-stm32/src/ipcc.rs
@@ -4,11 +4,12 @@ use core::future::poll_fn;
 use core::sync::atomic::{compiler_fence, Ordering};
 use core::task::Poll;
 
-use self::sealed::Instance;
+use embassy_sync::waitqueue::AtomicWaker;
+
 use crate::interrupt;
 use crate::interrupt::typelevel::Interrupt;
 use crate::peripherals::IPCC;
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::SealedRccPeripheral;
 
 /// Interrupt handler.
 pub struct ReceiveInterruptHandler {}
@@ -207,7 +208,7 @@ impl Ipcc {
     }
 }
 
-impl sealed::Instance for crate::peripherals::IPCC {
+impl SealedInstance for crate::peripherals::IPCC {
     fn regs() -> crate::pac::ipcc::Ipcc {
         crate::pac::IPCC
     }
@@ -216,58 +217,52 @@ impl sealed::Instance for crate::peripherals::IPCC {
         crate::pac::PWR.cr4().modify(|w| w.set_c2boot(enabled));
     }
 
-    fn state() -> &'static self::sealed::State {
-        static STATE: self::sealed::State = self::sealed::State::new();
+    fn state() -> &'static State {
+        static STATE: State = State::new();
         &STATE
     }
 }
 
-pub(crate) mod sealed {
-    use embassy_sync::waitqueue::AtomicWaker;
+struct State {
+    rx_wakers: [AtomicWaker; 6],
+    tx_wakers: [AtomicWaker; 6],
+}
 
-    use super::*;
+impl State {
+    const fn new() -> Self {
+        const WAKER: AtomicWaker = AtomicWaker::new();
 
-    pub struct State {
-        rx_wakers: [AtomicWaker; 6],
-        tx_wakers: [AtomicWaker; 6],
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            const WAKER: AtomicWaker = AtomicWaker::new();
-
-            Self {
-                rx_wakers: [WAKER; 6],
-                tx_wakers: [WAKER; 6],
-            }
-        }
-
-        pub const fn rx_waker_for(&self, channel: IpccChannel) -> &AtomicWaker {
-            match channel {
-                IpccChannel::Channel1 => &self.rx_wakers[0],
-                IpccChannel::Channel2 => &self.rx_wakers[1],
-                IpccChannel::Channel3 => &self.rx_wakers[2],
-                IpccChannel::Channel4 => &self.rx_wakers[3],
-                IpccChannel::Channel5 => &self.rx_wakers[4],
-                IpccChannel::Channel6 => &self.rx_wakers[5],
-            }
-        }
-
-        pub const fn tx_waker_for(&self, channel: IpccChannel) -> &AtomicWaker {
-            match channel {
-                IpccChannel::Channel1 => &self.tx_wakers[0],
-                IpccChannel::Channel2 => &self.tx_wakers[1],
-                IpccChannel::Channel3 => &self.tx_wakers[2],
-                IpccChannel::Channel4 => &self.tx_wakers[3],
-                IpccChannel::Channel5 => &self.tx_wakers[4],
-                IpccChannel::Channel6 => &self.tx_wakers[5],
-            }
+        Self {
+            rx_wakers: [WAKER; 6],
+            tx_wakers: [WAKER; 6],
         }
     }
 
-    pub trait Instance: crate::rcc::RccPeripheral {
-        fn regs() -> crate::pac::ipcc::Ipcc;
-        fn set_cpu2(enabled: bool);
-        fn state() -> &'static State;
+    const fn rx_waker_for(&self, channel: IpccChannel) -> &AtomicWaker {
+        match channel {
+            IpccChannel::Channel1 => &self.rx_wakers[0],
+            IpccChannel::Channel2 => &self.rx_wakers[1],
+            IpccChannel::Channel3 => &self.rx_wakers[2],
+            IpccChannel::Channel4 => &self.rx_wakers[3],
+            IpccChannel::Channel5 => &self.rx_wakers[4],
+            IpccChannel::Channel6 => &self.rx_wakers[5],
+        }
+    }
+
+    const fn tx_waker_for(&self, channel: IpccChannel) -> &AtomicWaker {
+        match channel {
+            IpccChannel::Channel1 => &self.tx_wakers[0],
+            IpccChannel::Channel2 => &self.tx_wakers[1],
+            IpccChannel::Channel3 => &self.tx_wakers[2],
+            IpccChannel::Channel4 => &self.tx_wakers[3],
+            IpccChannel::Channel5 => &self.tx_wakers[4],
+            IpccChannel::Channel6 => &self.tx_wakers[5],
+        }
     }
 }
+
+trait SealedInstance: crate::rcc::RccPeripheral {
+    fn regs() -> crate::pac::ipcc::Ipcc;
+    fn set_cpu2(enabled: bool);
+    fn state() -> &'static State;
+}
diff --git a/embassy-stm32/src/lib.rs b/embassy-stm32/src/lib.rs
index aba53ad80..8f510047f 100644
--- a/embassy-stm32/src/lib.rs
+++ b/embassy-stm32/src/lib.rs
@@ -75,14 +75,14 @@ pub mod sai;
 pub mod sdmmc;
 #[cfg(spi)]
 pub mod spi;
+#[cfg(ucpd)]
+pub mod ucpd;
 #[cfg(uid)]
 pub mod uid;
 #[cfg(usart)]
 pub mod usart;
-#[cfg(usb)]
+#[cfg(any(usb, otg))]
 pub mod usb;
-#[cfg(otg)]
-pub mod usb_otg;
 #[cfg(iwdg)]
 pub mod wdg;
 
@@ -107,10 +107,10 @@ pub use crate::_generated::interrupt;
 /// Example of how to bind one interrupt:
 ///
 /// ```rust,ignore
-/// use embassy_stm32::{bind_interrupts, usb_otg, peripherals};
+/// use embassy_stm32::{bind_interrupts, usb, peripherals};
 ///
 /// bind_interrupts!(struct Irqs {
-///     OTG_FS => usb_otg::InterruptHandler<peripherals::USB_OTG_FS>;
+///     OTG_FS => usb::InterruptHandler<peripherals::USB_OTG_FS>;
 /// });
 /// ```
 ///
@@ -160,7 +160,7 @@ pub(crate) use stm32_metapac as pac;
 use crate::interrupt::Priority;
 #[cfg(feature = "rt")]
 pub use crate::pac::NVIC_PRIO_BITS;
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::SealedRccPeripheral;
 
 /// `embassy-stm32` global configuration.
 #[non_exhaustive]
@@ -174,6 +174,14 @@ pub struct Config {
     #[cfg(dbgmcu)]
     pub enable_debug_during_sleep: bool,
 
+    /// On low-power boards (eg. `stm32l4`, `stm32l5` and `stm32u5`),
+    /// some GPIO pins are powered by an auxiliary, independent power supply (`VDDIO2`),
+    /// which needs to be enabled before these pins can be used.
+    ///
+    /// May increase power consumption. Defaults to true.
+    #[cfg(any(stm32l4, stm32l5, stm32u5))]
+    pub enable_independent_io_supply: bool,
+
     /// BDMA interrupt priority.
     ///
     /// Defaults to P0 (highest).
@@ -191,6 +199,18 @@ pub struct Config {
     /// Defaults to P0 (highest).
     #[cfg(gpdma)]
     pub gpdma_interrupt_priority: Priority,
+
+    /// Enables UCPD1 dead battery functionality.
+    ///
+    /// Defaults to false (disabled).
+    #[cfg(peri_ucpd1)]
+    pub enable_ucpd1_dead_battery: bool,
+
+    /// Enables UCPD2 dead battery functionality.
+    ///
+    /// Defaults to false (disabled).
+    #[cfg(peri_ucpd2)]
+    pub enable_ucpd2_dead_battery: bool,
 }
 
 impl Default for Config {
@@ -199,12 +219,18 @@ impl Default for Config {
             rcc: Default::default(),
             #[cfg(dbgmcu)]
             enable_debug_during_sleep: true,
+            #[cfg(any(stm32l4, stm32l5, stm32u5))]
+            enable_independent_io_supply: true,
             #[cfg(bdma)]
             bdma_interrupt_priority: Priority::P0,
             #[cfg(dma)]
             dma_interrupt_priority: Priority::P0,
             #[cfg(gpdma)]
             gpdma_interrupt_priority: Priority::P0,
+            #[cfg(peri_ucpd1)]
+            enable_ucpd1_dead_battery: false,
+            #[cfg(peri_ucpd2)]
+            enable_ucpd2_dead_battery: false,
         }
     }
 }
@@ -256,7 +282,44 @@ pub fn init(config: Config) -> Peripherals {
         #[cfg(not(any(stm32f2, stm32f4, stm32f7, stm32l0, stm32h5, stm32h7)))]
         peripherals::FLASH::enable_and_reset_with_cs(cs);
 
+        // Enable the VDDIO2 power supply on chips that have it.
+        // Note that this requires the PWR peripheral to be enabled first.
+        #[cfg(any(stm32l4, stm32l5))]
+        {
+            crate::pac::PWR.cr2().modify(|w| {
+                // The official documentation states that we should ideally enable VDDIO2
+                // through the PVME2 bit, but it looks like this isn't required,
+                // and CubeMX itself skips this step.
+                w.set_iosv(config.enable_independent_io_supply);
+            });
+        }
+        #[cfg(stm32u5)]
+        {
+            crate::pac::PWR.svmcr().modify(|w| {
+                w.set_io2sv(config.enable_independent_io_supply);
+            });
+        }
+
+        // dead battery functionality is still present on these
+        // chips despite them not having UCPD- disable it
+        #[cfg(any(stm32g070, stm32g0b0))]
+        {
+            crate::pac::SYSCFG.cfgr1().modify(|w| {
+                w.set_ucpd1_strobe(true);
+                w.set_ucpd2_strobe(true);
+            });
+        }
+
         unsafe {
+            #[cfg(ucpd)]
+            ucpd::init(
+                cs,
+                #[cfg(peri_ucpd1)]
+                config.enable_ucpd1_dead_battery,
+                #[cfg(peri_ucpd2)]
+                config.enable_ucpd2_dead_battery,
+            );
+
             #[cfg(feature = "_split-pins-enabled")]
             crate::pac::SYSCFG.pmcr().modify(|pmcr| {
                 #[cfg(feature = "split-pa0")]
diff --git a/embassy-stm32/src/opamp.rs b/embassy-stm32/src/opamp.rs
index cf531e266..a3b4352c0 100644
--- a/embassy-stm32/src/opamp.rs
+++ b/embassy-stm32/src/opamp.rs
@@ -81,8 +81,8 @@ impl<'d, T: Instance> OpAmp<'d, T> {
     /// [`OpAmpOutput`] is dropped.
     pub fn buffer_ext(
         &'d mut self,
-        in_pin: impl Peripheral<P = impl NonInvertingPin<T> + crate::gpio::sealed::Pin>,
-        out_pin: impl Peripheral<P = impl OutputPin<T> + crate::gpio::sealed::Pin> + 'd,
+        in_pin: impl Peripheral<P = impl NonInvertingPin<T> + crate::gpio::Pin>,
+        out_pin: impl Peripheral<P = impl OutputPin<T> + crate::gpio::Pin> + 'd,
         gain: OpAmpGain,
     ) -> OpAmpOutput<'d, T> {
         into_ref!(in_pin);
@@ -122,7 +122,7 @@ impl<'d, T: Instance> OpAmp<'d, T> {
     #[cfg(opamp_g4)]
     pub fn buffer_int(
         &'d mut self,
-        pin: impl Peripheral<P = impl NonInvertingPin<T> + crate::gpio::sealed::Pin>,
+        pin: impl Peripheral<P = impl NonInvertingPin<T> + crate::gpio::Pin>,
         gain: OpAmpGain,
     ) -> OpAmpInternalOutput<'d, T> {
         into_ref!(pin);
@@ -166,37 +166,39 @@ impl<'d, T: Instance> Drop for OpAmpInternalOutput<'d, T> {
     }
 }
 
-/// Opamp instance trait.
-pub trait Instance: sealed::Instance + 'static {}
-
-pub(crate) mod sealed {
-    pub trait Instance {
-        fn regs() -> crate::pac::opamp::Opamp;
-    }
-
-    pub trait NonInvertingPin<T: Instance> {
-        fn channel(&self) -> u8;
-    }
-
-    pub trait InvertingPin<T: Instance> {
-        fn channel(&self) -> u8;
-    }
-
-    pub trait OutputPin<T: Instance> {}
+pub(crate) trait SealedInstance {
+    fn regs() -> crate::pac::opamp::Opamp;
 }
 
+pub(crate) trait SealedNonInvertingPin<T: Instance> {
+    fn channel(&self) -> u8;
+}
+
+pub(crate) trait SealedInvertingPin<T: Instance> {
+    #[allow(unused)]
+    fn channel(&self) -> u8;
+}
+
+pub(crate) trait SealedOutputPin<T: Instance> {}
+
+/// Opamp instance trait.
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + 'static {}
 /// Non-inverting pin trait.
-pub trait NonInvertingPin<T: Instance>: sealed::NonInvertingPin<T> {}
+#[allow(private_bounds)]
+pub trait NonInvertingPin<T: Instance>: SealedNonInvertingPin<T> {}
 /// Inverting pin trait.
-pub trait InvertingPin<T: Instance>: sealed::InvertingPin<T> {}
+#[allow(private_bounds)]
+pub trait InvertingPin<T: Instance>: SealedInvertingPin<T> {}
 /// Output pin trait.
-pub trait OutputPin<T: Instance>: sealed::OutputPin<T> {}
+#[allow(private_bounds)]
+pub trait OutputPin<T: Instance>: SealedOutputPin<T> {}
 
 macro_rules! impl_opamp_external_output {
     ($inst:ident, $adc:ident, $ch:expr) => {
         foreach_adc!(
             ($adc, $common_inst:ident, $adc_clock:ident) => {
-                impl<'d> crate::adc::sealed::AdcPin<crate::peripherals::$adc>
+                impl<'d> crate::adc::SealedAdcPin<crate::peripherals::$adc>
                     for OpAmpOutput<'d, crate::peripherals::$inst>
                 {
                     fn channel(&self) -> u8 {
@@ -242,7 +244,7 @@ macro_rules! impl_opamp_internal_output {
     ($inst:ident, $adc:ident, $ch:expr) => {
         foreach_adc!(
             ($adc, $common_inst:ident, $adc_clock:ident) => {
-                impl<'d> crate::adc::sealed::AdcPin<crate::peripherals::$adc>
+                impl<'d> crate::adc::SealedAdcPin<crate::peripherals::$adc>
                     for OpAmpInternalOutput<'d, crate::peripherals::$inst>
                 {
                     fn channel(&self) -> u8 {
@@ -291,7 +293,7 @@ foreach_peripheral!(
 
 foreach_peripheral! {
     (opamp, $inst:ident) => {
-        impl sealed::Instance for crate::peripherals::$inst {
+        impl SealedInstance for crate::peripherals::$inst {
             fn regs() -> crate::pac::opamp::Opamp {
                 crate::pac::$inst
             }
@@ -306,7 +308,7 @@ foreach_peripheral! {
 macro_rules! impl_opamp_vp_pin {
     ($inst:ident, $pin:ident, $ch:expr) => {
         impl crate::opamp::NonInvertingPin<peripherals::$inst> for crate::peripherals::$pin {}
-        impl crate::opamp::sealed::NonInvertingPin<peripherals::$inst> for crate::peripherals::$pin {
+        impl crate::opamp::SealedNonInvertingPin<peripherals::$inst> for crate::peripherals::$pin {
             fn channel(&self) -> u8 {
                 $ch
             }
@@ -318,6 +320,6 @@ macro_rules! impl_opamp_vp_pin {
 macro_rules! impl_opamp_vout_pin {
     ($inst:ident, $pin:ident) => {
         impl crate::opamp::OutputPin<peripherals::$inst> for crate::peripherals::$pin {}
-        impl crate::opamp::sealed::OutputPin<peripherals::$inst> for crate::peripherals::$pin {}
+        impl crate::opamp::SealedOutputPin<peripherals::$inst> for crate::peripherals::$pin {}
     };
 }
diff --git a/embassy-stm32/src/qspi/mod.rs b/embassy-stm32/src/qspi/mod.rs
index 8a709a89e..3c054e666 100644
--- a/embassy-stm32/src/qspi/mod.rs
+++ b/embassy-stm32/src/qspi/mod.rs
@@ -8,8 +8,7 @@ use embassy_hal_internal::{into_ref, PeripheralRef};
 use enums::*;
 
 use crate::dma::Transfer;
-use crate::gpio::sealed::AFType;
-use crate::gpio::{AnyPin, Pull};
+use crate::gpio::{AFType, AnyPin, Pull};
 use crate::pac::quadspi::Quadspi as Regs;
 use crate::rcc::RccPeripheral;
 use crate::{peripherals, Peripheral};
@@ -381,16 +380,13 @@ impl<'d, T: Instance, Dma> Qspi<'d, T, Dma> {
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
-
-    pub trait Instance {
-        const REGS: Regs;
-    }
+trait SealedInstance {
+    const REGS: Regs;
 }
 
 /// QSPI instance trait.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + RccPeripheral {}
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + RccPeripheral {}
 
 pin_trait!(SckPin, Instance);
 pin_trait!(BK1D0Pin, Instance);
@@ -409,7 +405,7 @@ dma_trait!(QuadDma, Instance);
 
 foreach_peripheral!(
     (quadspi, $inst:ident) => {
-        impl sealed::Instance for peripherals::$inst {
+        impl SealedInstance for peripherals::$inst {
             const REGS: Regs = crate::pac::$inst;
         }
 
diff --git a/embassy-stm32/src/rcc/bd.rs b/embassy-stm32/src/rcc/bd.rs
index 39407b28c..54d3c662b 100644
--- a/embassy-stm32/src/rcc/bd.rs
+++ b/embassy-stm32/src/rcc/bd.rs
@@ -24,6 +24,7 @@ pub struct LseConfig {
 #[allow(dead_code)]
 #[derive(Default, Clone, Copy)]
 pub enum LseDrive {
+    #[cfg(not(stm32h5))] // ES0565: LSE Low drive mode is not functional
     Low = 0,
     MediumLow = 0x01,
     #[default]
@@ -38,6 +39,7 @@ impl From<LseDrive> for crate::pac::rcc::vals::Lsedrv {
         use crate::pac::rcc::vals::Lsedrv;
 
         match value {
+            #[cfg(not(stm32h5))] // ES0565: LSE Low drive mode is not functional
             LseDrive::Low => Lsedrv::LOW,
             LseDrive::MediumLow => Lsedrv::MEDIUMLOW,
             LseDrive::MediumHigh => Lsedrv::MEDIUMHIGH,
diff --git a/embassy-stm32/src/rcc/h.rs b/embassy-stm32/src/rcc/h.rs
index bab8bb19e..1949fc891 100644
--- a/embassy-stm32/src/rcc/h.rs
+++ b/embassy-stm32/src/rcc/h.rs
@@ -455,7 +455,14 @@ pub(crate) unsafe fn init(config: Config) {
     };
     #[cfg(pwr_h7rm0468)]
     let (d1cpre_clk_max, hclk_max, pclk_max) = match config.voltage_scale {
-        VoltageScale::Scale0 => (Hertz(520_000_000), Hertz(275_000_000), Hertz(137_500_000)),
+        VoltageScale::Scale0 => {
+            let d1cpre_clk_max = if pac::SYSCFG.ur18().read().cpu_freq_boost() {
+                550_000_000
+            } else {
+                520_000_000
+            };
+            (Hertz(d1cpre_clk_max), Hertz(275_000_000), Hertz(137_500_000))
+        }
         VoltageScale::Scale1 => (Hertz(400_000_000), Hertz(200_000_000), Hertz(100_000_000)),
         VoltageScale::Scale2 => (Hertz(300_000_000), Hertz(150_000_000), Hertz(75_000_000)),
         VoltageScale::Scale3 => (Hertz(170_000_000), Hertz(85_000_000), Hertz(42_500_000)),
diff --git a/embassy-stm32/src/rcc/hsi48.rs b/embassy-stm32/src/rcc/hsi48.rs
index 19a8c8cb9..6f0d7b379 100644
--- a/embassy-stm32/src/rcc/hsi48.rs
+++ b/embassy-stm32/src/rcc/hsi48.rs
@@ -2,7 +2,7 @@
 
 use crate::pac::crs::vals::Syncsrc;
 use crate::pac::{CRS, RCC};
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::SealedRccPeripheral;
 use crate::time::Hertz;
 
 /// HSI48 speed
diff --git a/embassy-stm32/src/rcc/mco.rs b/embassy-stm32/src/rcc/mco.rs
index 654943bc1..d8604e07e 100644
--- a/embassy-stm32/src/rcc/mco.rs
+++ b/embassy-stm32/src/rcc/mco.rs
@@ -2,8 +2,7 @@ use core::marker::PhantomData;
 
 use embassy_hal_internal::into_ref;
 
-use crate::gpio::sealed::AFType;
-use crate::gpio::Speed;
+use crate::gpio::{AFType, Speed};
 #[cfg(not(any(stm32f1, rcc_f0v1, rcc_f3v1, rcc_f37)))]
 pub use crate::pac::rcc::vals::Mcopre as McoPrescaler;
 #[cfg(not(any(rcc_f2, rcc_f410, rcc_f4, rcc_f7, rcc_h50, rcc_h5, rcc_h7ab, rcc_h7rm0433, rcc_h7)))]
@@ -19,23 +18,25 @@ pub enum McoPrescaler {
     DIV1,
 }
 
-pub(crate) mod sealed {
-    pub trait McoInstance {
-        type Source;
-        unsafe fn apply_clock_settings(source: Self::Source, prescaler: super::McoPrescaler);
-    }
-}
+pub(crate) trait SealedMcoInstance {}
 
-pub trait McoInstance: sealed::McoInstance + 'static {}
+#[allow(private_bounds)]
+pub trait McoInstance: SealedMcoInstance + 'static {
+    type Source;
+
+    #[doc(hidden)]
+    unsafe fn _apply_clock_settings(source: Self::Source, prescaler: super::McoPrescaler);
+}
 
 pin_trait!(McoPin, McoInstance);
 
 macro_rules! impl_peri {
     ($peri:ident, $source:ident, $set_source:ident, $set_prescaler:ident) => {
-        impl sealed::McoInstance for peripherals::$peri {
+        impl SealedMcoInstance for peripherals::$peri {}
+        impl McoInstance for peripherals::$peri {
             type Source = $source;
 
-            unsafe fn apply_clock_settings(source: Self::Source, _prescaler: McoPrescaler) {
+            unsafe fn _apply_clock_settings(source: Self::Source, _prescaler: McoPrescaler) {
                 #[cfg(not(any(stm32u5, stm32wba)))]
                 let r = RCC.cfgr();
                 #[cfg(any(stm32u5, stm32wba))]
@@ -48,8 +49,6 @@ macro_rules! impl_peri {
                 });
             }
         }
-
-        impl McoInstance for peripherals::$peri {}
     };
 }
 
@@ -79,7 +78,7 @@ impl<'d, T: McoInstance> Mco<'d, T> {
         into_ref!(pin);
 
         critical_section::with(|_| unsafe {
-            T::apply_clock_settings(source, prescaler);
+            T::_apply_clock_settings(source, prescaler);
             pin.set_as_af(pin.af_num(), AFType::OutputPushPull);
             pin.set_speed(Speed::VeryHigh);
         });
diff --git a/embassy-stm32/src/rcc/mod.rs b/embassy-stm32/src/rcc/mod.rs
index 910ebe205..d53d02203 100644
--- a/embassy-stm32/src/rcc/mod.rs
+++ b/embassy-stm32/src/rcc/mod.rs
@@ -10,6 +10,7 @@ pub use bd::*;
 
 #[cfg(any(mco, mco1, mco2))]
 mod mco;
+use critical_section::CriticalSection;
 #[cfg(any(mco, mco1, mco2))]
 pub use mco::*;
 
@@ -32,6 +33,7 @@ mod _version;
 pub use _version::*;
 
 pub use crate::_generated::{mux, Clocks};
+use crate::time::Hertz;
 
 #[cfg(feature = "low-power")]
 /// Must be written within a critical section
@@ -63,29 +65,21 @@ pub(crate) unsafe fn get_freqs() -> &'static Clocks {
     CLOCK_FREQS.assume_init_ref()
 }
 
-#[cfg(feature = "unstable-pac")]
-pub mod low_level {
-    pub use super::sealed::*;
-}
+pub(crate) trait SealedRccPeripheral {
+    fn frequency() -> crate::time::Hertz;
+    fn enable_and_reset_with_cs(cs: CriticalSection);
+    fn disable_with_cs(cs: CriticalSection);
 
-pub(crate) mod sealed {
-    use critical_section::CriticalSection;
-
-    pub trait RccPeripheral {
-        fn frequency() -> crate::time::Hertz;
-        fn enable_and_reset_with_cs(cs: CriticalSection);
-        fn disable_with_cs(cs: CriticalSection);
-
-        fn enable_and_reset() {
-            critical_section::with(|cs| Self::enable_and_reset_with_cs(cs))
-        }
-        fn disable() {
-            critical_section::with(|cs| Self::disable_with_cs(cs))
-        }
+    fn enable_and_reset() {
+        critical_section::with(|cs| Self::enable_and_reset_with_cs(cs))
+    }
+    fn disable() {
+        critical_section::with(|cs| Self::disable_with_cs(cs))
     }
 }
 
-pub trait RccPeripheral: sealed::RccPeripheral + 'static {}
+#[allow(private_bounds)]
+pub trait RccPeripheral: SealedRccPeripheral + 'static {}
 
 #[allow(unused)]
 mod util {
@@ -116,3 +110,12 @@ mod util {
         Ok(Some(x))
     }
 }
+
+/// Get the kernel clocok frequency of the peripheral `T`.
+///
+/// # Panics
+///
+/// Panics if the clock is not active.
+pub fn frequency<T: RccPeripheral>() -> Hertz {
+    T::frequency()
+}
diff --git a/embassy-stm32/src/rng.rs b/embassy-stm32/src/rng.rs
index ca641f352..7a228e4a4 100644
--- a/embassy-stm32/src/rng.rs
+++ b/embassy-stm32/src/rng.rs
@@ -222,16 +222,13 @@ impl<'d, T: Instance> RngCore for Rng<'d, T> {
 
 impl<'d, T: Instance> CryptoRng for Rng<'d, T> {}
 
-pub(crate) mod sealed {
-    use super::*;
-
-    pub trait Instance {
-        fn regs() -> pac::rng::Rng;
-    }
+trait SealedInstance {
+    fn regs() -> pac::rng::Rng;
 }
 
 /// RNG instance trait.
-pub trait Instance: sealed::Instance + Peripheral<P = Self> + crate::rcc::RccPeripheral + 'static + Send {
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + Peripheral<P = Self> + crate::rcc::RccPeripheral + 'static + Send {
     /// Interrupt for this RNG instance.
     type Interrupt: interrupt::typelevel::Interrupt;
 }
@@ -242,7 +239,7 @@ foreach_interrupt!(
             type Interrupt = crate::interrupt::typelevel::$irq;
         }
 
-        impl sealed::Instance for peripherals::$inst {
+        impl SealedInstance for peripherals::$inst {
             fn regs() -> crate::pac::rng::Rng {
                 crate::pac::$inst
             }
diff --git a/embassy-stm32/src/rtc/datetime.rs b/embassy-stm32/src/rtc/datetime.rs
index ef92fa4bb..bab8cf4a3 100644
--- a/embassy-stm32/src/rtc/datetime.rs
+++ b/embassy-stm32/src/rtc/datetime.rs
@@ -1,13 +1,10 @@
 #[cfg(feature = "chrono")]
-use core::convert::From;
-
-#[cfg(feature = "chrono")]
-use chrono::{self, Datelike, NaiveDate, Timelike, Weekday};
+use chrono::{Datelike, NaiveDate, Timelike, Weekday};
 
 #[cfg(any(feature = "defmt", feature = "time"))]
 use crate::peripherals::RTC;
 #[cfg(any(feature = "defmt", feature = "time"))]
-use crate::rtc::sealed::Instance;
+use crate::rtc::SealedInstance;
 
 /// Represents an instant in time that can be substracted to compute a duration
 pub struct RtcInstant {
diff --git a/embassy-stm32/src/rtc/mod.rs b/embassy-stm32/src/rtc/mod.rs
index 169505501..00abe9356 100644
--- a/embassy-stm32/src/rtc/mod.rs
+++ b/embassy-stm32/src/rtc/mod.rs
@@ -31,7 +31,6 @@ pub use _version::*;
 use embassy_hal_internal::Peripheral;
 
 use crate::peripherals::RTC;
-use crate::rtc::sealed::Instance;
 
 #[allow(dead_code)]
 #[repr(u8)]
@@ -212,7 +211,7 @@ impl Rtc {
     /// Create a new RTC instance.
     pub fn new(_rtc: impl Peripheral<P = RTC>, rtc_config: RtcConfig) -> Self {
         #[cfg(not(any(stm32l0, stm32f3, stm32l1, stm32f0, stm32f2)))]
-        <RTC as crate::rcc::sealed::RccPeripheral>::enable_and_reset();
+        <RTC as crate::rcc::SealedRccPeripheral>::enable_and_reset();
 
         let mut this = Self {
             #[cfg(feature = "low-power")]
@@ -437,7 +436,7 @@ impl Rtc {
                     .fpr(0)
                     .modify(|w| w.set_line(RTC::EXTI_WAKEUP_LINE, true));
 
-                <RTC as crate::rtc::sealed::Instance>::WakeupInterrupt::unpend();
+                <RTC as crate::rtc::SealedInstance>::WakeupInterrupt::unpend();
             });
         }
 
@@ -449,8 +448,8 @@ impl Rtc {
         use crate::interrupt::typelevel::Interrupt;
         use crate::pac::EXTI;
 
-        <RTC as crate::rtc::sealed::Instance>::WakeupInterrupt::unpend();
-        unsafe { <RTC as crate::rtc::sealed::Instance>::WakeupInterrupt::enable() };
+        <RTC as crate::rtc::SealedInstance>::WakeupInterrupt::unpend();
+        unsafe { <RTC as crate::rtc::SealedInstance>::WakeupInterrupt::enable() };
 
         EXTI.rtsr(0).modify(|w| w.set_line(RTC::EXTI_WAKEUP_LINE, true));
         EXTI.imr(0).modify(|w| w.set_line(RTC::EXTI_WAKEUP_LINE, true));
@@ -477,34 +476,30 @@ pub(crate) fn bcd2_to_byte(bcd: (u8, u8)) -> u8 {
     tmp + (value & 0x0F)
 }
 
-pub(crate) mod sealed {
-    use crate::pac::rtc::Rtc;
+trait SealedInstance {
+    const BACKUP_REGISTER_COUNT: usize;
 
-    pub trait Instance {
-        const BACKUP_REGISTER_COUNT: usize;
+    #[cfg(feature = "low-power")]
+    const EXTI_WAKEUP_LINE: usize;
 
-        #[cfg(feature = "low-power")]
-        const EXTI_WAKEUP_LINE: usize;
+    #[cfg(feature = "low-power")]
+    type WakeupInterrupt: crate::interrupt::typelevel::Interrupt;
 
-        #[cfg(feature = "low-power")]
-        type WakeupInterrupt: crate::interrupt::typelevel::Interrupt;
-
-        fn regs() -> Rtc {
-            crate::pac::RTC
-        }
-
-        /// Read content of the backup register.
-        ///
-        /// The registers retain their values during wakes from standby mode or system resets. They also
-        /// retain their value when Vdd is switched off as long as V_BAT is powered.
-        fn read_backup_register(rtc: &Rtc, register: usize) -> Option<u32>;
-
-        /// Set content of the backup register.
-        ///
-        /// The registers retain their values during wakes from standby mode or system resets. They also
-        /// retain their value when Vdd is switched off as long as V_BAT is powered.
-        fn write_backup_register(rtc: &Rtc, register: usize, value: u32);
-
-        // fn apply_config(&mut self, rtc_config: RtcConfig);
+    fn regs() -> crate::pac::rtc::Rtc {
+        crate::pac::RTC
     }
+
+    /// Read content of the backup register.
+    ///
+    /// The registers retain their values during wakes from standby mode or system resets. They also
+    /// retain their value when Vdd is switched off as long as V_BAT is powered.
+    fn read_backup_register(rtc: &crate::pac::rtc::Rtc, register: usize) -> Option<u32>;
+
+    /// Set content of the backup register.
+    ///
+    /// The registers retain their values during wakes from standby mode or system resets. They also
+    /// retain their value when Vdd is switched off as long as V_BAT is powered.
+    fn write_backup_register(rtc: &crate::pac::rtc::Rtc, register: usize, value: u32);
+
+    // fn apply_config(&mut self, rtc_config: RtcConfig);
 }
diff --git a/embassy-stm32/src/rtc/v2.rs b/embassy-stm32/src/rtc/v2.rs
index 1eda097a7..92f9de846 100644
--- a/embassy-stm32/src/rtc/v2.rs
+++ b/embassy-stm32/src/rtc/v2.rs
@@ -1,9 +1,8 @@
 use stm32_metapac::rtc::vals::{Osel, Pol};
 
-use super::sealed;
+use super::SealedInstance;
 use crate::pac::rtc::Rtc;
 use crate::peripherals::RTC;
-use crate::rtc::sealed::Instance;
 
 #[allow(dead_code)]
 impl super::Rtc {
@@ -126,7 +125,7 @@ impl super::Rtc {
     }
 }
 
-impl sealed::Instance for crate::peripherals::RTC {
+impl SealedInstance for crate::peripherals::RTC {
     const BACKUP_REGISTER_COUNT: usize = 20;
 
     #[cfg(all(feature = "low-power", stm32f4))]
diff --git a/embassy-stm32/src/rtc/v3.rs b/embassy-stm32/src/rtc/v3.rs
index 3d44a52ff..8a78d16e1 100644
--- a/embassy-stm32/src/rtc/v3.rs
+++ b/embassy-stm32/src/rtc/v3.rs
@@ -1,9 +1,9 @@
 use stm32_metapac::rtc::vals::{Calp, Calw16, Calw8, Fmt, Key, Osel, Pol, TampalrmType};
 
-use super::{sealed, RtcCalibrationCyclePeriod};
+use super::RtcCalibrationCyclePeriod;
 use crate::pac::rtc::Rtc;
 use crate::peripherals::RTC;
-use crate::rtc::sealed::Instance;
+use crate::rtc::SealedInstance;
 
 impl super::Rtc {
     /// Applies the RTC config
@@ -126,7 +126,7 @@ impl super::Rtc {
     }
 }
 
-impl sealed::Instance for crate::peripherals::RTC {
+impl SealedInstance for crate::peripherals::RTC {
     const BACKUP_REGISTER_COUNT: usize = 32;
 
     #[cfg(all(feature = "low-power", stm32g4))]
diff --git a/embassy-stm32/src/sai/mod.rs b/embassy-stm32/src/sai/mod.rs
index 02f96f8a9..54dd81524 100644
--- a/embassy-stm32/src/sai/mod.rs
+++ b/embassy-stm32/src/sai/mod.rs
@@ -6,12 +6,10 @@ use core::marker::PhantomData;
 
 use embassy_hal_internal::{into_ref, PeripheralRef};
 
-use self::sealed::WhichSubBlock;
 pub use crate::dma::word;
 #[cfg(not(gpdma))]
 use crate::dma::{ringbuffer, Channel, ReadableRingBuffer, Request, TransferOptions, WritableRingBuffer};
-use crate::gpio::sealed::{AFType, Pin as _};
-use crate::gpio::AnyPin;
+use crate::gpio::{AFType, AnyPin, SealedPin as _};
 use crate::pac::sai::{vals, Sai as Regs};
 use crate::rcc::RccPeripheral;
 use crate::{peripherals, Peripheral};
@@ -386,6 +384,7 @@ impl OutputDrive {
 /// Master clock divider.
 #[derive(Copy, Clone, PartialEq)]
 #[allow(missing_docs)]
+#[cfg(any(sai_v1, sai_v2))]
 pub enum MasterClockDivider {
     MasterClockDisabled,
     Div1,
@@ -406,8 +405,79 @@ pub enum MasterClockDivider {
     Div30,
 }
 
+/// Master clock divider.
+#[derive(Copy, Clone, PartialEq)]
+#[allow(missing_docs)]
+#[cfg(any(sai_v3_2pdm, sai_v3_4pdm, sai_v4_2pdm, sai_v4_4pdm))]
+pub enum MasterClockDivider {
+    MasterClockDisabled,
+    Div1,
+    Div2,
+    Div3,
+    Div4,
+    Div5,
+    Div6,
+    Div7,
+    Div8,
+    Div9,
+    Div10,
+    Div11,
+    Div12,
+    Div13,
+    Div14,
+    Div15,
+    Div16,
+    Div17,
+    Div18,
+    Div19,
+    Div20,
+    Div21,
+    Div22,
+    Div23,
+    Div24,
+    Div25,
+    Div26,
+    Div27,
+    Div28,
+    Div29,
+    Div30,
+    Div31,
+    Div32,
+    Div33,
+    Div34,
+    Div35,
+    Div36,
+    Div37,
+    Div38,
+    Div39,
+    Div40,
+    Div41,
+    Div42,
+    Div43,
+    Div44,
+    Div45,
+    Div46,
+    Div47,
+    Div48,
+    Div49,
+    Div50,
+    Div51,
+    Div52,
+    Div53,
+    Div54,
+    Div55,
+    Div56,
+    Div57,
+    Div58,
+    Div59,
+    Div60,
+    Div61,
+    Div62,
+    Div63,
+}
+
 impl MasterClockDivider {
-    #[cfg(any(sai_v1, sai_v2, sai_v3_2pdm, sai_v3_4pdm, sai_v4_2pdm, sai_v4_4pdm))]
+    #[cfg(any(sai_v1, sai_v2))]
     const fn mckdiv(&self) -> u8 {
         match self {
             MasterClockDivider::MasterClockDisabled => 0,
@@ -429,6 +499,76 @@ impl MasterClockDivider {
             MasterClockDivider::Div30 => 15,
         }
     }
+
+    #[cfg(any(sai_v3_2pdm, sai_v3_4pdm, sai_v4_2pdm, sai_v4_4pdm))]
+    const fn mckdiv(&self) -> u8 {
+        match self {
+            MasterClockDivider::MasterClockDisabled => 0,
+            MasterClockDivider::Div1 => 1,
+            MasterClockDivider::Div2 => 2,
+            MasterClockDivider::Div3 => 3,
+            MasterClockDivider::Div4 => 4,
+            MasterClockDivider::Div5 => 5,
+            MasterClockDivider::Div6 => 6,
+            MasterClockDivider::Div7 => 7,
+            MasterClockDivider::Div8 => 8,
+            MasterClockDivider::Div9 => 9,
+            MasterClockDivider::Div10 => 10,
+            MasterClockDivider::Div11 => 11,
+            MasterClockDivider::Div12 => 12,
+            MasterClockDivider::Div13 => 13,
+            MasterClockDivider::Div14 => 14,
+            MasterClockDivider::Div15 => 15,
+            MasterClockDivider::Div16 => 16,
+            MasterClockDivider::Div17 => 17,
+            MasterClockDivider::Div18 => 18,
+            MasterClockDivider::Div19 => 19,
+            MasterClockDivider::Div20 => 20,
+            MasterClockDivider::Div21 => 21,
+            MasterClockDivider::Div22 => 22,
+            MasterClockDivider::Div23 => 23,
+            MasterClockDivider::Div24 => 24,
+            MasterClockDivider::Div25 => 25,
+            MasterClockDivider::Div26 => 26,
+            MasterClockDivider::Div27 => 27,
+            MasterClockDivider::Div28 => 28,
+            MasterClockDivider::Div29 => 29,
+            MasterClockDivider::Div30 => 30,
+            MasterClockDivider::Div31 => 31,
+            MasterClockDivider::Div32 => 32,
+            MasterClockDivider::Div33 => 33,
+            MasterClockDivider::Div34 => 34,
+            MasterClockDivider::Div35 => 35,
+            MasterClockDivider::Div36 => 36,
+            MasterClockDivider::Div37 => 37,
+            MasterClockDivider::Div38 => 38,
+            MasterClockDivider::Div39 => 39,
+            MasterClockDivider::Div40 => 40,
+            MasterClockDivider::Div41 => 41,
+            MasterClockDivider::Div42 => 42,
+            MasterClockDivider::Div43 => 43,
+            MasterClockDivider::Div44 => 44,
+            MasterClockDivider::Div45 => 45,
+            MasterClockDivider::Div46 => 46,
+            MasterClockDivider::Div47 => 47,
+            MasterClockDivider::Div48 => 48,
+            MasterClockDivider::Div49 => 49,
+            MasterClockDivider::Div50 => 50,
+            MasterClockDivider::Div51 => 51,
+            MasterClockDivider::Div52 => 52,
+            MasterClockDivider::Div53 => 53,
+            MasterClockDivider::Div54 => 54,
+            MasterClockDivider::Div55 => 55,
+            MasterClockDivider::Div56 => 56,
+            MasterClockDivider::Div57 => 57,
+            MasterClockDivider::Div58 => 58,
+            MasterClockDivider::Div59 => 59,
+            MasterClockDivider::Div60 => 60,
+            MasterClockDivider::Div61 => 61,
+            MasterClockDivider::Div62 => 62,
+            MasterClockDivider::Div63 => 63,
+        }
+    }
 }
 
 /// [`SAI`] configuration.
@@ -899,43 +1039,42 @@ impl<'d, T: Instance, W: word::Word> Drop for Sai<'d, T, W> {
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
+trait SealedInstance {
+    const REGS: Regs;
+}
 
-    pub trait Instance {
-        const REGS: Regs;
-    }
+#[derive(Copy, Clone)]
+enum WhichSubBlock {
+    A = 0,
+    B = 1,
+}
 
-    #[derive(Copy, Clone)]
-    pub enum WhichSubBlock {
-        A = 0,
-        B = 1,
-    }
-
-    pub trait SubBlock {
-        const WHICH: WhichSubBlock;
-    }
+trait SealedSubBlock {
+    const WHICH: WhichSubBlock;
 }
 
 /// Sub-block instance trait.
-pub trait SubBlockInstance: sealed::SubBlock {}
+#[allow(private_bounds)]
+pub trait SubBlockInstance: SealedSubBlock {}
 
 /// Sub-block A.
 pub enum A {}
-impl sealed::SubBlock for A {
+impl SealedSubBlock for A {
     const WHICH: WhichSubBlock = WhichSubBlock::A;
 }
 impl SubBlockInstance for A {}
 
 /// Sub-block B.
 pub enum B {}
-impl sealed::SubBlock for B {
+impl SealedSubBlock for B {
     const WHICH: WhichSubBlock = WhichSubBlock::B;
 }
 impl SubBlockInstance for B {}
 
 /// SAI instance trait.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + RccPeripheral {}
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + RccPeripheral {}
+
 pin_trait!(SckPin, Instance, SubBlockInstance);
 pin_trait!(FsPin, Instance, SubBlockInstance);
 pin_trait!(SdPin, Instance, SubBlockInstance);
@@ -945,7 +1084,7 @@ dma_trait!(Dma, Instance, SubBlockInstance);
 
 foreach_peripheral!(
     (sai, $inst:ident) => {
-        impl sealed::Instance for peripherals::$inst {
+        impl SealedInstance for peripherals::$inst {
             const REGS: Regs = crate::pac::$inst;
         }
 
diff --git a/embassy-stm32/src/sdmmc/mod.rs b/embassy-stm32/src/sdmmc/mod.rs
index bf1d2ca9b..f79a11606 100644
--- a/embassy-stm32/src/sdmmc/mod.rs
+++ b/embassy-stm32/src/sdmmc/mod.rs
@@ -13,8 +13,7 @@ use embassy_sync::waitqueue::AtomicWaker;
 use sdio_host::{BusWidth, CardCapacity, CardStatus, CurrentState, SDStatus, CID, CSD, OCR, SCR};
 
 use crate::dma::NoDma;
-use crate::gpio::sealed::{AFType, Pin};
-use crate::gpio::{AnyPin, Pull, Speed};
+use crate::gpio::{AFType, AnyPin, Pull, SealedPin, Speed};
 use crate::interrupt::typelevel::Interrupt;
 use crate::pac::sdmmc::Sdmmc as RegBlock;
 use crate::rcc::RccPeripheral;
@@ -240,12 +239,14 @@ const DMA_TRANSFER_OPTIONS: crate::dma::TransferOptions = crate::dma::TransferOp
     mburst: crate::dma::Burst::Incr4,
     flow_ctrl: crate::dma::FlowControl::Peripheral,
     fifo_threshold: Some(crate::dma::FifoThreshold::Full),
+    priority: crate::dma::Priority::VeryHigh,
     circular: false,
     half_transfer_ir: false,
     complete_transfer_ir: true,
 };
 #[cfg(all(sdmmc_v1, not(dma)))]
 const DMA_TRANSFER_OPTIONS: crate::dma::TransferOptions = crate::dma::TransferOptions {
+    priority: crate::dma::Priority::VeryHigh,
     circular: false,
     half_transfer_ir: false,
     complete_transfer_ir: true,
@@ -1416,21 +1417,17 @@ impl Cmd {
 
 //////////////////////////////////////////////////////
 
-pub(crate) mod sealed {
-    use super::*;
-
-    pub trait Instance {
-        type Interrupt: interrupt::typelevel::Interrupt;
-
-        fn regs() -> RegBlock;
-        fn state() -> &'static AtomicWaker;
-    }
-
-    pub trait Pins<T: Instance> {}
+trait SealedInstance {
+    fn regs() -> RegBlock;
+    fn state() -> &'static AtomicWaker;
 }
 
 /// SDMMC instance trait.
-pub trait Instance: sealed::Instance + RccPeripheral + 'static {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + RccPeripheral + 'static {
+    /// Interrupt for this instance.
+    type Interrupt: interrupt::typelevel::Interrupt;
+}
 
 pin_trait!(CkPin, Instance);
 pin_trait!(CmdPin, Instance);
@@ -1457,9 +1454,7 @@ impl<T: Instance> SdmmcDma<T> for NoDma {}
 
 foreach_peripheral!(
     (sdmmc, $inst:ident) => {
-        impl sealed::Instance for peripherals::$inst {
-            type Interrupt = crate::interrupt::typelevel::$inst;
-
+        impl SealedInstance for peripherals::$inst {
             fn regs() -> RegBlock {
                 crate::pac::$inst
             }
@@ -1470,6 +1465,8 @@ foreach_peripheral!(
             }
         }
 
-        impl Instance for peripherals::$inst {}
+        impl Instance for peripherals::$inst {
+            type Interrupt = crate::interrupt::typelevel::$inst;
+        }
     };
 );
diff --git a/embassy-stm32/src/spi/mod.rs b/embassy-stm32/src/spi/mod.rs
index 172bc8112..0b38c4288 100644
--- a/embassy-stm32/src/spi/mod.rs
+++ b/embassy-stm32/src/spi/mod.rs
@@ -9,8 +9,7 @@ use embassy_hal_internal::{into_ref, PeripheralRef};
 pub use embedded_hal_02::spi::{Mode, Phase, Polarity, MODE_0, MODE_1, MODE_2, MODE_3};
 
 use crate::dma::{slice_ptr_parts, word, Transfer};
-use crate::gpio::sealed::{AFType, Pin as _};
-use crate::gpio::{AnyPin, Pull};
+use crate::gpio::{AFType, AnyPin, Pull, SealedPin as _};
 use crate::pac::spi::{regs, vals, Spi as Regs};
 use crate::rcc::RccPeripheral;
 use crate::time::Hertz;
@@ -210,7 +209,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
         // see RM0453 rev 1 section 7.2.13 page 291
         // The SUBGHZSPI_SCK frequency is obtained by PCLK3 divided by two.
         // The SUBGHZSPI_SCK clock maximum speed must not exceed 16 MHz.
-        let pclk3_freq = <peripherals::SUBGHZSPI as crate::rcc::sealed::RccPeripheral>::frequency().0;
+        let pclk3_freq = <peripherals::SUBGHZSPI as crate::rcc::SealedRccPeripheral>::frequency().0;
         let freq = Hertz(core::cmp::min(pclk3_freq / 2, 16_000_000));
         let mut config = Config::default();
         config.mode = MODE_0;
@@ -271,13 +270,13 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
                 if mosi.is_none() {
                     w.set_rxonly(vals::Rxonly::OUTPUTDISABLED);
                 }
-                w.set_dff(<u8 as sealed::Word>::CONFIG)
+                w.set_dff(<u8 as SealedWord>::CONFIG)
             });
         }
         #[cfg(spi_v2)]
         {
             T::REGS.cr2().modify(|w| {
-                let (ds, frxth) = <u8 as sealed::Word>::CONFIG;
+                let (ds, frxth) = <u8 as SealedWord>::CONFIG;
                 w.set_frxth(frxth);
                 w.set_ds(ds);
                 w.set_ssoe(false);
@@ -317,7 +316,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
             T::REGS.cfg1().modify(|w| {
                 w.set_crcen(false);
                 w.set_mbr(br);
-                w.set_dsize(<u8 as sealed::Word>::CONFIG);
+                w.set_dsize(<u8 as SealedWord>::CONFIG);
                 w.set_fthlv(vals::Fthlv::ONEFRAME);
             });
             T::REGS.cr2().modify(|w| {
@@ -336,7 +335,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
             miso,
             txdma,
             rxdma,
-            current_word_size: <u8 as sealed::Word>::CONFIG,
+            current_word_size: <u8 as SealedWord>::CONFIG,
         }
     }
 
@@ -700,7 +699,7 @@ use vals::Mbr as Br;
 
 fn compute_baud_rate(clocks: Hertz, freq: Hertz) -> Br {
     let val = match clocks.0 / freq.0 {
-        0 => unreachable!(),
+        0 => panic!("You are trying to reach a frequency higher than the clock"),
         1..=2 => 0b000,
         3..=5 => 0b001,
         6..=11 => 0b010,
@@ -975,24 +974,21 @@ impl<'d, T: Instance, Tx: TxDma<T>, Rx: RxDma<T>, W: Word> embedded_hal_async::s
     }
 }
 
-pub(crate) mod sealed {
-    use super::*;
+pub(crate) trait SealedInstance {
+    const REGS: Regs;
+}
 
-    pub trait Instance {
-        const REGS: Regs;
-    }
-
-    pub trait Word {
-        const CONFIG: word_impl::Config;
-    }
+trait SealedWord {
+    const CONFIG: word_impl::Config;
 }
 
 /// Word sizes usable for SPI.
-pub trait Word: word::Word + sealed::Word {}
+#[allow(private_bounds)]
+pub trait Word: word::Word + SealedWord {}
 
 macro_rules! impl_word {
     ($T:ty, $config:expr) => {
-        impl sealed::Word for $T {
+        impl SealedWord for $T {
             const CONFIG: Config = $config;
         }
         impl Word for $T {}
@@ -1068,7 +1064,8 @@ mod word_impl {
 }
 
 /// SPI instance trait.
-pub trait Instance: Peripheral<P = Self> + sealed::Instance + RccPeripheral {}
+#[allow(private_bounds)]
+pub trait Instance: Peripheral<P = Self> + SealedInstance + RccPeripheral {}
 
 pin_trait!(SckPin, Instance);
 pin_trait!(MosiPin, Instance);
@@ -1082,7 +1079,7 @@ dma_trait!(TxDma, Instance);
 
 foreach_peripheral!(
     (spi, $inst:ident) => {
-        impl sealed::Instance for peripherals::$inst {
+        impl SealedInstance for peripherals::$inst {
             const REGS: Regs = crate::pac::$inst;
         }
 
diff --git a/embassy-stm32/src/time_driver.rs b/embassy-stm32/src/time_driver.rs
index 37b2e7526..cc8161276 100644
--- a/embassy-stm32/src/time_driver.rs
+++ b/embassy-stm32/src/time_driver.rs
@@ -1,7 +1,6 @@
 #![allow(non_snake_case)]
 
 use core::cell::Cell;
-use core::convert::TryInto;
 use core::sync::atomic::{compiler_fence, AtomicU32, AtomicU8, Ordering};
 use core::{mem, ptr};
 
@@ -9,16 +8,16 @@ use critical_section::CriticalSection;
 use embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
 use embassy_sync::blocking_mutex::Mutex;
 use embassy_time_driver::{AlarmHandle, Driver, TICK_HZ};
-use stm32_metapac::timer::regs;
+use stm32_metapac::timer::{regs, TimGp16};
 
 use crate::interrupt::typelevel::Interrupt;
 use crate::pac::timer::vals;
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::SealedRccPeripheral;
 #[cfg(feature = "low-power")]
 use crate::rtc::Rtc;
 #[cfg(any(time_driver_tim1, time_driver_tim8, time_driver_tim20))]
-use crate::timer::sealed::AdvancedControlInstance;
-use crate::timer::sealed::{CoreInstance, GeneralPurpose16bitInstance as Instance};
+use crate::timer::AdvancedInstance1Channel;
+use crate::timer::CoreInstance;
 use crate::{interrupt, peripherals};
 
 // NOTE regarding ALARM_COUNT:
@@ -208,6 +207,10 @@ foreach_interrupt! {
     };
 }
 
+fn regs_gp16() -> TimGp16 {
+    unsafe { TimGp16::from_ptr(T::regs()) }
+}
+
 // Clock timekeeping works with something we call "periods", which are time intervals
 // of 2^15 ticks. The Clock counter value is 16 bits, so one "overflow cycle" is 2 periods.
 //
@@ -272,9 +275,9 @@ embassy_time_driver::time_driver_impl!(static DRIVER: RtcDriver = RtcDriver {
 
 impl RtcDriver {
     fn init(&'static self, cs: critical_section::CriticalSection) {
-        let r = T::regs_gp16();
+        let r = regs_gp16();
 
-        <T as RccPeripheral>::enable_and_reset_with_cs(cs);
+        <T as SealedRccPeripheral>::enable_and_reset_with_cs(cs);
 
         let timer_freq = T::frequency();
 
@@ -287,7 +290,7 @@ impl RtcDriver {
             Ok(n) => n,
         };
 
-        r.psc().write(|w| w.set_psc(psc));
+        r.psc().write_value(psc);
         r.arr().write(|w| w.set_arr(u16::MAX));
 
         // Set URS, generate update and clear URS
@@ -309,9 +312,9 @@ impl RtcDriver {
 
         #[cfg(any(time_driver_tim1, time_driver_tim8, time_driver_tim20))]
         {
-            <T as AdvancedControlInstance>::CaptureCompareInterrupt::unpend();
+            <T as AdvancedInstance1Channel>::CaptureCompareInterrupt::unpend();
             unsafe {
-                <T as AdvancedControlInstance>::CaptureCompareInterrupt::enable();
+                <T as AdvancedInstance1Channel>::CaptureCompareInterrupt::enable();
             }
         }
 
@@ -319,7 +322,7 @@ impl RtcDriver {
     }
 
     fn on_interrupt(&self) {
-        let r = T::regs_gp16();
+        let r = regs_gp16();
 
         // XXX: reduce the size of this critical section ?
         critical_section::with(|cs| {
@@ -350,7 +353,7 @@ impl RtcDriver {
     }
 
     fn next_period(&self) {
-        let r = T::regs_gp16();
+        let r = regs_gp16();
 
         // We only modify the period from the timer interrupt, so we know this can't race.
         let period = self.period.load(Ordering::Relaxed) + 1;
@@ -414,7 +417,7 @@ impl RtcDriver {
     /// Add the given offset to the current time
     fn add_time(&self, offset: embassy_time::Duration, cs: CriticalSection) {
         let offset = offset.as_ticks();
-        let cnt = T::regs_gp16().cnt().read().cnt() as u32;
+        let cnt = regs_gp16().cnt().read().cnt() as u32;
         let period = self.period.load(Ordering::SeqCst);
 
         // Correct the race, if it exists
@@ -440,7 +443,7 @@ impl RtcDriver {
         let period = if cnt > u16::MAX as u32 / 2 { period + 1 } else { period };
 
         self.period.store(period, Ordering::SeqCst);
-        T::regs_gp16().cnt().write(|w| w.set_cnt(cnt as u16));
+        regs_gp16().cnt().write(|w| w.set_cnt(cnt as u16));
 
         // Now, recompute all alarms
         for i in 0..ALARM_COUNT {
@@ -497,7 +500,7 @@ impl RtcDriver {
                     .unwrap()
                     .start_wakeup_alarm(time_until_next_alarm, cs);
 
-                T::regs_gp16().cr1().modify(|w| w.set_cen(false));
+                regs_gp16().cr1().modify(|w| w.set_cen(false));
 
                 Ok(())
             }
@@ -507,7 +510,7 @@ impl RtcDriver {
     #[cfg(feature = "low-power")]
     /// Resume the timer with the given offset
     pub(crate) fn resume_time(&self) {
-        if T::regs_gp16().cr1().read().cen() {
+        if regs_gp16().cr1().read().cen() {
             // Time isn't currently stopped
 
             return;
@@ -516,14 +519,14 @@ impl RtcDriver {
         critical_section::with(|cs| {
             self.stop_wakeup_alarm(cs);
 
-            T::regs_gp16().cr1().modify(|w| w.set_cen(true));
+            regs_gp16().cr1().modify(|w| w.set_cen(true));
         })
     }
 }
 
 impl Driver for RtcDriver {
     fn now(&self) -> u64 {
-        let r = T::regs_gp16();
+        let r = regs_gp16();
 
         let period = self.period.load(Ordering::Relaxed);
         compiler_fence(Ordering::Acquire);
@@ -554,7 +557,7 @@ impl Driver for RtcDriver {
 
     fn set_alarm(&self, alarm: AlarmHandle, timestamp: u64) -> bool {
         critical_section::with(|cs| {
-            let r = T::regs_gp16();
+            let r = regs_gp16();
 
             let n = alarm.id() as usize;
             let alarm = self.get_alarm(cs, alarm);
diff --git a/embassy-stm32/src/timer/complementary_pwm.rs b/embassy-stm32/src/timer/complementary_pwm.rs
index 72f1ec864..a892646cf 100644
--- a/embassy-stm32/src/timer/complementary_pwm.rs
+++ b/embassy-stm32/src/timer/complementary_pwm.rs
@@ -5,12 +5,15 @@ use core::marker::PhantomData;
 use embassy_hal_internal::{into_ref, PeripheralRef};
 use stm32_metapac::timer::vals::Ckd;
 
-use super::simple_pwm::*;
-use super::*;
-#[allow(unused_imports)]
-use crate::gpio::sealed::{AFType, Pin};
+use super::low_level::{CountingMode, OutputPolarity, Timer};
+use super::simple_pwm::{Ch1, Ch2, Ch3, Ch4, PwmPin};
+use super::{
+    AdvancedInstance4Channel, Channel, Channel1ComplementaryPin, Channel2ComplementaryPin, Channel3ComplementaryPin,
+    Channel4ComplementaryPin,
+};
 use crate::gpio::{AnyPin, OutputType};
 use crate::time::Hertz;
+use crate::timer::low_level::OutputCompareMode;
 use crate::Peripheral;
 
 /// Complementary PWM pin wrapper.
@@ -23,7 +26,7 @@ pub struct ComplementaryPwmPin<'d, T, C> {
 
 macro_rules! complementary_channel_impl {
     ($new_chx:ident, $channel:ident, $pin_trait:ident) => {
-        impl<'d, T: ComplementaryCaptureCompare16bitInstance> ComplementaryPwmPin<'d, T, $channel> {
+        impl<'d, T: AdvancedInstance4Channel> ComplementaryPwmPin<'d, T, $channel> {
             #[doc = concat!("Create a new ", stringify!($channel), " complementary PWM pin instance.")]
             pub fn $new_chx(pin: impl Peripheral<P = impl $pin_trait<T>> + 'd, output_type: OutputType) -> Self {
                 into_ref!(pin);
@@ -48,11 +51,11 @@ complementary_channel_impl!(new_ch3, Ch3, Channel3ComplementaryPin);
 complementary_channel_impl!(new_ch4, Ch4, Channel4ComplementaryPin);
 
 /// PWM driver with support for standard and complementary outputs.
-pub struct ComplementaryPwm<'d, T> {
-    inner: PeripheralRef<'d, T>,
+pub struct ComplementaryPwm<'d, T: AdvancedInstance4Channel> {
+    inner: Timer<'d, T>,
 }
 
-impl<'d, T: ComplementaryCaptureCompare16bitInstance> ComplementaryPwm<'d, T> {
+impl<'d, T: AdvancedInstance4Channel> ComplementaryPwm<'d, T> {
     /// Create a new complementary PWM driver.
     #[allow(clippy::too_many_arguments)]
     pub fn new(
@@ -72,11 +75,7 @@ impl<'d, T: ComplementaryCaptureCompare16bitInstance> ComplementaryPwm<'d, T> {
     }
 
     fn new_inner(tim: impl Peripheral<P = T> + 'd, freq: Hertz, counting_mode: CountingMode) -> Self {
-        into_ref!(tim);
-
-        T::enable_and_reset();
-
-        let mut this = Self { inner: tim };
+        let mut this = Self { inner: Timer::new(tim) };
 
         this.inner.set_counting_mode(counting_mode);
         this.set_frequency(freq);
@@ -123,7 +122,7 @@ impl<'d, T: ComplementaryCaptureCompare16bitInstance> ComplementaryPwm<'d, T> {
     ///
     /// This value depends on the configured frequency and the timer's clock rate from RCC.
     pub fn get_max_duty(&self) -> u16 {
-        self.inner.get_max_compare_value() + 1
+        self.inner.get_max_compare_value() as u16 + 1
     }
 
     /// Set the duty for a given channel.
@@ -131,7 +130,7 @@ impl<'d, T: ComplementaryCaptureCompare16bitInstance> ComplementaryPwm<'d, T> {
     /// The value ranges from 0 for 0% duty, to [`get_max_duty`](Self::get_max_duty) for 100% duty, both included.
     pub fn set_duty(&mut self, channel: Channel, duty: u16) {
         assert!(duty <= self.get_max_duty());
-        self.inner.set_compare_value(channel, duty)
+        self.inner.set_compare_value(channel, duty as _)
     }
 
     /// Set the output polarity for a given channel.
@@ -149,7 +148,7 @@ impl<'d, T: ComplementaryCaptureCompare16bitInstance> ComplementaryPwm<'d, T> {
     }
 }
 
-impl<'d, T: ComplementaryCaptureCompare16bitInstance> embedded_hal_02::Pwm for ComplementaryPwm<'d, T> {
+impl<'d, T: AdvancedInstance4Channel> embedded_hal_02::Pwm for ComplementaryPwm<'d, T> {
     type Channel = Channel;
     type Time = Hertz;
     type Duty = u16;
@@ -169,16 +168,16 @@ impl<'d, T: ComplementaryCaptureCompare16bitInstance> embedded_hal_02::Pwm for C
     }
 
     fn get_duty(&self, channel: Self::Channel) -> Self::Duty {
-        self.inner.get_compare_value(channel)
+        self.inner.get_compare_value(channel) as u16
     }
 
     fn get_max_duty(&self) -> Self::Duty {
-        self.inner.get_max_compare_value() + 1
+        self.inner.get_max_compare_value() as u16 + 1
     }
 
     fn set_duty(&mut self, channel: Self::Channel, duty: Self::Duty) {
         assert!(duty <= self.get_max_duty());
-        self.inner.set_compare_value(channel, duty)
+        self.inner.set_compare_value(channel, duty as u32)
     }
 
     fn set_period<P>(&mut self, period: P)
diff --git a/embassy-stm32/src/timer/low_level.rs b/embassy-stm32/src/timer/low_level.rs
new file mode 100644
index 000000000..a5d942314
--- /dev/null
+++ b/embassy-stm32/src/timer/low_level.rs
@@ -0,0 +1,638 @@
+//! Low-level timer driver.
+//!
+//! This is an unopinionated, very low-level driver for all STM32 timers. It allows direct register
+//! manipulation with the `regs_*()` methods, and has utility functions that are thin wrappers
+//! over the registers.
+//!
+//! The available functionality depends on the timer type.
+
+use embassy_hal_internal::{into_ref, Peripheral, PeripheralRef};
+
+use super::*;
+use crate::pac::timer::vals;
+use crate::time::Hertz;
+
+/// Input capture mode.
+#[derive(Clone, Copy)]
+pub enum InputCaptureMode {
+    /// Rising edge only.
+    Rising,
+    /// Falling edge only.
+    Falling,
+    /// Both rising or falling edges.
+    BothEdges,
+}
+
+/// Input TI selection.
+#[derive(Clone, Copy)]
+pub enum InputTISelection {
+    /// Normal
+    Normal,
+    /// Alternate
+    Alternate,
+    /// TRC
+    TRC,
+}
+
+impl From<InputTISelection> for stm32_metapac::timer::vals::CcmrInputCcs {
+    fn from(tisel: InputTISelection) -> Self {
+        match tisel {
+            InputTISelection::Normal => stm32_metapac::timer::vals::CcmrInputCcs::TI4,
+            InputTISelection::Alternate => stm32_metapac::timer::vals::CcmrInputCcs::TI3,
+            InputTISelection::TRC => stm32_metapac::timer::vals::CcmrInputCcs::TRC,
+        }
+    }
+}
+
+/// Timer counting mode.
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum CountingMode {
+    #[default]
+    /// The timer counts up to the reload value and then resets back to 0.
+    EdgeAlignedUp,
+    /// The timer counts down to 0 and then resets back to the reload value.
+    EdgeAlignedDown,
+    /// The timer counts up to the reload value and then counts back to 0.
+    ///
+    /// The output compare interrupt flags of channels configured in output are
+    /// set when the counter is counting down.
+    CenterAlignedDownInterrupts,
+    /// The timer counts up to the reload value and then counts back to 0.
+    ///
+    /// The output compare interrupt flags of channels configured in output are
+    /// set when the counter is counting up.
+    CenterAlignedUpInterrupts,
+    /// The timer counts up to the reload value and then counts back to 0.
+    ///
+    /// The output compare interrupt flags of channels configured in output are
+    /// set when the counter is counting both up or down.
+    CenterAlignedBothInterrupts,
+}
+
+impl CountingMode {
+    /// Return whether this mode is edge-aligned (up or down).
+    pub fn is_edge_aligned(&self) -> bool {
+        matches!(self, CountingMode::EdgeAlignedUp | CountingMode::EdgeAlignedDown)
+    }
+
+    /// Return whether this mode is center-aligned.
+    pub fn is_center_aligned(&self) -> bool {
+        matches!(
+            self,
+            CountingMode::CenterAlignedDownInterrupts
+                | CountingMode::CenterAlignedUpInterrupts
+                | CountingMode::CenterAlignedBothInterrupts
+        )
+    }
+}
+
+impl From<CountingMode> for (vals::Cms, vals::Dir) {
+    fn from(value: CountingMode) -> Self {
+        match value {
+            CountingMode::EdgeAlignedUp => (vals::Cms::EDGEALIGNED, vals::Dir::UP),
+            CountingMode::EdgeAlignedDown => (vals::Cms::EDGEALIGNED, vals::Dir::DOWN),
+            CountingMode::CenterAlignedDownInterrupts => (vals::Cms::CENTERALIGNED1, vals::Dir::UP),
+            CountingMode::CenterAlignedUpInterrupts => (vals::Cms::CENTERALIGNED2, vals::Dir::UP),
+            CountingMode::CenterAlignedBothInterrupts => (vals::Cms::CENTERALIGNED3, vals::Dir::UP),
+        }
+    }
+}
+
+impl From<(vals::Cms, vals::Dir)> for CountingMode {
+    fn from(value: (vals::Cms, vals::Dir)) -> Self {
+        match value {
+            (vals::Cms::EDGEALIGNED, vals::Dir::UP) => CountingMode::EdgeAlignedUp,
+            (vals::Cms::EDGEALIGNED, vals::Dir::DOWN) => CountingMode::EdgeAlignedDown,
+            (vals::Cms::CENTERALIGNED1, _) => CountingMode::CenterAlignedDownInterrupts,
+            (vals::Cms::CENTERALIGNED2, _) => CountingMode::CenterAlignedUpInterrupts,
+            (vals::Cms::CENTERALIGNED3, _) => CountingMode::CenterAlignedBothInterrupts,
+        }
+    }
+}
+
+/// Output compare mode.
+#[derive(Clone, Copy)]
+pub enum OutputCompareMode {
+    /// The comparison between the output compare register TIMx_CCRx and
+    /// the counter TIMx_CNT has no effect on the outputs.
+    /// (this mode is used to generate a timing base).
+    Frozen,
+    /// Set channel to active level on match. OCxREF signal is forced high when the
+    /// counter TIMx_CNT matches the capture/compare register x (TIMx_CCRx).
+    ActiveOnMatch,
+    /// Set channel to inactive level on match. OCxREF signal is forced low when the
+    /// counter TIMx_CNT matches the capture/compare register x (TIMx_CCRx).
+    InactiveOnMatch,
+    /// Toggle - OCxREF toggles when TIMx_CNT=TIMx_CCRx.
+    Toggle,
+    /// Force inactive level - OCxREF is forced low.
+    ForceInactive,
+    /// Force active level - OCxREF is forced high.
+    ForceActive,
+    /// PWM mode 1 - In upcounting, channel is active as long as TIMx_CNT<TIMx_CCRx
+    /// else inactive. In downcounting, channel is inactive (OCxREF=0) as long as
+    /// TIMx_CNT>TIMx_CCRx else active (OCxREF=1).
+    PwmMode1,
+    /// PWM mode 2 - In upcounting, channel is inactive as long as
+    /// TIMx_CNT<TIMx_CCRx else active. In downcounting, channel is active as long as
+    /// TIMx_CNT>TIMx_CCRx else inactive.
+    PwmMode2,
+    // TODO: there's more modes here depending on the chip family.
+}
+
+impl From<OutputCompareMode> for stm32_metapac::timer::vals::Ocm {
+    fn from(mode: OutputCompareMode) -> Self {
+        match mode {
+            OutputCompareMode::Frozen => stm32_metapac::timer::vals::Ocm::FROZEN,
+            OutputCompareMode::ActiveOnMatch => stm32_metapac::timer::vals::Ocm::ACTIVEONMATCH,
+            OutputCompareMode::InactiveOnMatch => stm32_metapac::timer::vals::Ocm::INACTIVEONMATCH,
+            OutputCompareMode::Toggle => stm32_metapac::timer::vals::Ocm::TOGGLE,
+            OutputCompareMode::ForceInactive => stm32_metapac::timer::vals::Ocm::FORCEINACTIVE,
+            OutputCompareMode::ForceActive => stm32_metapac::timer::vals::Ocm::FORCEACTIVE,
+            OutputCompareMode::PwmMode1 => stm32_metapac::timer::vals::Ocm::PWMMODE1,
+            OutputCompareMode::PwmMode2 => stm32_metapac::timer::vals::Ocm::PWMMODE2,
+        }
+    }
+}
+
+/// Timer output pin polarity.
+#[derive(Clone, Copy)]
+pub enum OutputPolarity {
+    /// Active high (higher duty value makes the pin spend more time high).
+    ActiveHigh,
+    /// Active low (higher duty value makes the pin spend more time low).
+    ActiveLow,
+}
+
+impl From<OutputPolarity> for bool {
+    fn from(mode: OutputPolarity) -> Self {
+        match mode {
+            OutputPolarity::ActiveHigh => false,
+            OutputPolarity::ActiveLow => true,
+        }
+    }
+}
+
+/// Low-level timer driver.
+pub struct Timer<'d, T: CoreInstance> {
+    tim: PeripheralRef<'d, T>,
+}
+
+impl<'d, T: CoreInstance> Drop for Timer<'d, T> {
+    fn drop(&mut self) {
+        T::disable()
+    }
+}
+
+impl<'d, T: CoreInstance> Timer<'d, T> {
+    /// Create a new timer driver.
+    pub fn new(tim: impl Peripheral<P = T> + 'd) -> Self {
+        into_ref!(tim);
+
+        T::enable_and_reset();
+
+        Self { tim }
+    }
+
+    /// Get access to the virutal core 16bit timer registers.
+    ///
+    /// Note: This works even if the timer is more capable, because registers
+    /// for the less capable timers are a subset. This allows writing a driver
+    /// for a given set of capabilities, and having it transparently work with
+    /// more capable timers.
+    pub fn regs_core(&self) -> crate::pac::timer::TimCore {
+        unsafe { crate::pac::timer::TimCore::from_ptr(T::regs()) }
+    }
+
+    #[cfg(not(stm32l0))]
+    fn regs_gp32_unchecked(&self) -> crate::pac::timer::TimGp32 {
+        unsafe { crate::pac::timer::TimGp32::from_ptr(T::regs()) }
+    }
+
+    /// Start the timer.
+    pub fn start(&self) {
+        self.regs_core().cr1().modify(|r| r.set_cen(true));
+    }
+
+    /// Stop the timer.
+    pub fn stop(&self) {
+        self.regs_core().cr1().modify(|r| r.set_cen(false));
+    }
+
+    /// Reset the counter value to 0
+    pub fn reset(&self) {
+        self.regs_core().cnt().write(|r| r.set_cnt(0));
+    }
+
+    /// Set the frequency of how many times per second the timer counts up to the max value or down to 0.
+    ///
+    /// This means that in the default edge-aligned mode,
+    /// the timer counter will wrap around at the same frequency as is being set.
+    /// In center-aligned mode (which not all timers support), the wrap-around frequency is effectively halved
+    /// because it needs to count up and down.
+    pub fn set_frequency(&self, frequency: Hertz) {
+        let f = frequency.0;
+        assert!(f > 0);
+        let timer_f = T::frequency().0;
+
+        match T::BITS {
+            TimerBits::Bits16 => {
+                let pclk_ticks_per_timer_period = timer_f / f;
+                let psc: u16 = unwrap!(((pclk_ticks_per_timer_period - 1) / (1 << 16)).try_into());
+                let divide_by = pclk_ticks_per_timer_period / (u32::from(psc) + 1);
+
+                // the timer counts `0..=arr`, we want it to count `0..divide_by`
+                let arr = unwrap!(u16::try_from(divide_by - 1));
+
+                let regs = self.regs_core();
+                regs.psc().write_value(psc);
+                regs.arr().write(|r| r.set_arr(arr));
+
+                regs.cr1().modify(|r| r.set_urs(vals::Urs::COUNTERONLY));
+                regs.egr().write(|r| r.set_ug(true));
+                regs.cr1().modify(|r| r.set_urs(vals::Urs::ANYEVENT));
+            }
+            #[cfg(not(stm32l0))]
+            TimerBits::Bits32 => {
+                let pclk_ticks_per_timer_period = (timer_f / f) as u64;
+                let psc: u16 = unwrap!(((pclk_ticks_per_timer_period - 1) / (1 << 32)).try_into());
+                let arr: u32 = unwrap!((pclk_ticks_per_timer_period / (psc as u64 + 1)).try_into());
+
+                let regs = self.regs_gp32_unchecked();
+                regs.psc().write_value(psc);
+                regs.arr().write_value(arr);
+
+                regs.cr1().modify(|r| r.set_urs(vals::Urs::COUNTERONLY));
+                regs.egr().write(|r| r.set_ug(true));
+                regs.cr1().modify(|r| r.set_urs(vals::Urs::ANYEVENT));
+            }
+        }
+    }
+
+    /// Clear update interrupt.
+    ///
+    /// Returns whether the update interrupt flag was set.
+    pub fn clear_update_interrupt(&self) -> bool {
+        let regs = self.regs_core();
+        let sr = regs.sr().read();
+        if sr.uif() {
+            regs.sr().modify(|r| {
+                r.set_uif(false);
+            });
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Enable/disable the update interrupt.
+    pub fn enable_update_interrupt(&self, enable: bool) {
+        self.regs_core().dier().modify(|r| r.set_uie(enable));
+    }
+
+    /// Enable/disable autoreload preload.
+    pub fn set_autoreload_preload(&self, enable: bool) {
+        self.regs_core().cr1().modify(|r| r.set_arpe(enable));
+    }
+
+    /// Get the timer frequency.
+    pub fn get_frequency(&self) -> Hertz {
+        let timer_f = T::frequency();
+
+        match T::BITS {
+            TimerBits::Bits16 => {
+                let regs = self.regs_core();
+                let arr = regs.arr().read().arr();
+                let psc = regs.psc().read();
+
+                timer_f / arr / (psc + 1)
+            }
+            #[cfg(not(stm32l0))]
+            TimerBits::Bits32 => {
+                let regs = self.regs_gp32_unchecked();
+                let arr = regs.arr().read();
+                let psc = regs.psc().read();
+
+                timer_f / arr / (psc + 1)
+            }
+        }
+    }
+}
+
+impl<'d, T: BasicNoCr2Instance> Timer<'d, T> {
+    /// Get access to the Baisc 16bit timer registers.
+    ///
+    /// Note: This works even if the timer is more capable, because registers
+    /// for the less capable timers are a subset. This allows writing a driver
+    /// for a given set of capabilities, and having it transparently work with
+    /// more capable timers.
+    pub fn regs_basic_no_cr2(&self) -> crate::pac::timer::TimBasicNoCr2 {
+        unsafe { crate::pac::timer::TimBasicNoCr2::from_ptr(T::regs()) }
+    }
+
+    /// Enable/disable the update dma.
+    pub fn enable_update_dma(&self, enable: bool) {
+        self.regs_basic_no_cr2().dier().modify(|r| r.set_ude(enable));
+    }
+
+    /// Get the update dma enable/disable state.
+    pub fn get_update_dma_state(&self) -> bool {
+        self.regs_basic_no_cr2().dier().read().ude()
+    }
+}
+
+impl<'d, T: BasicInstance> Timer<'d, T> {
+    /// Get access to the Baisc 16bit timer registers.
+    ///
+    /// Note: This works even if the timer is more capable, because registers
+    /// for the less capable timers are a subset. This allows writing a driver
+    /// for a given set of capabilities, and having it transparently work with
+    /// more capable timers.
+    pub fn regs_basic(&self) -> crate::pac::timer::TimBasic {
+        unsafe { crate::pac::timer::TimBasic::from_ptr(T::regs()) }
+    }
+}
+
+impl<'d, T: GeneralInstance1Channel> Timer<'d, T> {
+    /// Get access to the general purpose 1 channel 16bit timer registers.
+    ///
+    /// Note: This works even if the timer is more capable, because registers
+    /// for the less capable timers are a subset. This allows writing a driver
+    /// for a given set of capabilities, and having it transparently work with
+    /// more capable timers.
+    pub fn regs_1ch(&self) -> crate::pac::timer::Tim1ch {
+        unsafe { crate::pac::timer::Tim1ch::from_ptr(T::regs()) }
+    }
+
+    /// Set clock divider.
+    pub fn set_clock_division(&self, ckd: vals::Ckd) {
+        self.regs_1ch().cr1().modify(|r| r.set_ckd(ckd));
+    }
+
+    /// Get max compare value. This depends on the timer frequency and the clock frequency from RCC.
+    pub fn get_max_compare_value(&self) -> u32 {
+        match T::BITS {
+            TimerBits::Bits16 => self.regs_1ch().arr().read().arr() as u32,
+            #[cfg(not(stm32l0))]
+            TimerBits::Bits32 => self.regs_gp32_unchecked().arr().read(),
+        }
+    }
+}
+
+impl<'d, T: GeneralInstance2Channel> Timer<'d, T> {
+    /// Get access to the general purpose 2 channel 16bit timer registers.
+    ///
+    /// Note: This works even if the timer is more capable, because registers
+    /// for the less capable timers are a subset. This allows writing a driver
+    /// for a given set of capabilities, and having it transparently work with
+    /// more capable timers.
+    pub fn regs_2ch(&self) -> crate::pac::timer::Tim2ch {
+        unsafe { crate::pac::timer::Tim2ch::from_ptr(T::regs()) }
+    }
+}
+
+impl<'d, T: GeneralInstance4Channel> Timer<'d, T> {
+    /// Get access to the general purpose 16bit timer registers.
+    ///
+    /// Note: This works even if the timer is more capable, because registers
+    /// for the less capable timers are a subset. This allows writing a driver
+    /// for a given set of capabilities, and having it transparently work with
+    /// more capable timers.
+    pub fn regs_gp16(&self) -> crate::pac::timer::TimGp16 {
+        unsafe { crate::pac::timer::TimGp16::from_ptr(T::regs()) }
+    }
+
+    /// Enable timer outputs.
+    pub fn enable_outputs(&self) {
+        self.tim.enable_outputs()
+    }
+
+    /// Set counting mode.
+    pub fn set_counting_mode(&self, mode: CountingMode) {
+        let (cms, dir) = mode.into();
+
+        let timer_enabled = self.regs_core().cr1().read().cen();
+        // Changing from edge aligned to center aligned (and vice versa) is not allowed while the timer is running.
+        // Changing direction is discouraged while the timer is running.
+        assert!(!timer_enabled);
+
+        self.regs_gp16().cr1().modify(|r| r.set_dir(dir));
+        self.regs_gp16().cr1().modify(|r| r.set_cms(cms))
+    }
+
+    /// Get counting mode.
+    pub fn get_counting_mode(&self) -> CountingMode {
+        let cr1 = self.regs_gp16().cr1().read();
+        (cr1.cms(), cr1.dir()).into()
+    }
+
+    /// Set input capture filter.
+    pub fn set_input_capture_filter(&self, channel: Channel, icf: vals::FilterValue) {
+        let raw_channel = channel.index();
+        self.regs_gp16()
+            .ccmr_input(raw_channel / 2)
+            .modify(|r| r.set_icf(raw_channel % 2, icf));
+    }
+
+    /// Clear input interrupt.
+    pub fn clear_input_interrupt(&self, channel: Channel) {
+        self.regs_gp16().sr().modify(|r| r.set_ccif(channel.index(), false));
+    }
+
+    /// Enable input interrupt.
+    pub fn enable_input_interrupt(&self, channel: Channel, enable: bool) {
+        self.regs_gp16().dier().modify(|r| r.set_ccie(channel.index(), enable));
+    }
+
+    /// Set input capture prescaler.
+    pub fn set_input_capture_prescaler(&self, channel: Channel, factor: u8) {
+        let raw_channel = channel.index();
+        self.regs_gp16()
+            .ccmr_input(raw_channel / 2)
+            .modify(|r| r.set_icpsc(raw_channel % 2, factor));
+    }
+
+    /// Set input TI selection.
+    pub fn set_input_ti_selection(&self, channel: Channel, tisel: InputTISelection) {
+        let raw_channel = channel.index();
+        self.regs_gp16()
+            .ccmr_input(raw_channel / 2)
+            .modify(|r| r.set_ccs(raw_channel % 2, tisel.into()));
+    }
+
+    /// Set input capture mode.
+    pub fn set_input_capture_mode(&self, channel: Channel, mode: InputCaptureMode) {
+        self.regs_gp16().ccer().modify(|r| match mode {
+            InputCaptureMode::Rising => {
+                r.set_ccnp(channel.index(), false);
+                r.set_ccp(channel.index(), false);
+            }
+            InputCaptureMode::Falling => {
+                r.set_ccnp(channel.index(), false);
+                r.set_ccp(channel.index(), true);
+            }
+            InputCaptureMode::BothEdges => {
+                r.set_ccnp(channel.index(), true);
+                r.set_ccp(channel.index(), true);
+            }
+        });
+    }
+
+    /// Set output compare mode.
+    pub fn set_output_compare_mode(&self, channel: Channel, mode: OutputCompareMode) {
+        let raw_channel: usize = channel.index();
+        self.regs_gp16()
+            .ccmr_output(raw_channel / 2)
+            .modify(|w| w.set_ocm(raw_channel % 2, mode.into()));
+    }
+
+    /// Set output polarity.
+    pub fn set_output_polarity(&self, channel: Channel, polarity: OutputPolarity) {
+        self.regs_gp16()
+            .ccer()
+            .modify(|w| w.set_ccp(channel.index(), polarity.into()));
+    }
+
+    /// Enable/disable a channel.
+    pub fn enable_channel(&self, channel: Channel, enable: bool) {
+        self.regs_gp16().ccer().modify(|w| w.set_cce(channel.index(), enable));
+    }
+
+    /// Get enable/disable state of a channel
+    pub fn get_channel_enable_state(&self, channel: Channel) -> bool {
+        self.regs_gp16().ccer().read().cce(channel.index())
+    }
+
+    /// Set compare value for a channel.
+    pub fn set_compare_value(&self, channel: Channel, value: u32) {
+        match T::BITS {
+            TimerBits::Bits16 => {
+                let value = unwrap!(u16::try_from(value));
+                self.regs_gp16().ccr(channel.index()).modify(|w| w.set_ccr(value));
+            }
+            #[cfg(not(stm32l0))]
+            TimerBits::Bits32 => {
+                self.regs_gp32_unchecked().ccr(channel.index()).write_value(value);
+            }
+        }
+    }
+
+    /// Get compare value for a channel.
+    pub fn get_compare_value(&self, channel: Channel) -> u32 {
+        match T::BITS {
+            TimerBits::Bits16 => self.regs_gp16().ccr(channel.index()).read().ccr() as u32,
+            #[cfg(not(stm32l0))]
+            TimerBits::Bits32 => self.regs_gp32_unchecked().ccr(channel.index()).read(),
+        }
+    }
+
+    /// Get capture value for a channel.
+    pub fn get_capture_value(&self, channel: Channel) -> u32 {
+        self.get_compare_value(channel)
+    }
+
+    /// Set output compare preload.
+    pub fn set_output_compare_preload(&self, channel: Channel, preload: bool) {
+        let channel_index = channel.index();
+        self.regs_gp16()
+            .ccmr_output(channel_index / 2)
+            .modify(|w| w.set_ocpe(channel_index % 2, preload));
+    }
+
+    /// Get capture compare DMA selection
+    pub fn get_cc_dma_selection(&self) -> vals::Ccds {
+        self.regs_gp16().cr2().read().ccds()
+    }
+
+    /// Set capture compare DMA selection
+    pub fn set_cc_dma_selection(&self, ccds: vals::Ccds) {
+        self.regs_gp16().cr2().modify(|w| w.set_ccds(ccds))
+    }
+
+    /// Get capture compare DMA enable state
+    pub fn get_cc_dma_enable_state(&self, channel: Channel) -> bool {
+        self.regs_gp16().dier().read().ccde(channel.index())
+    }
+
+    /// Set capture compare DMA enable state
+    pub fn set_cc_dma_enable_state(&self, channel: Channel, ccde: bool) {
+        self.regs_gp16().dier().modify(|w| w.set_ccde(channel.index(), ccde))
+    }
+}
+
+#[cfg(not(stm32l0))]
+impl<'d, T: GeneralInstance32bit4Channel> Timer<'d, T> {
+    /// Get access to the general purpose 32bit timer registers.
+    ///
+    /// Note: This works even if the timer is more capable, because registers
+    /// for the less capable timers are a subset. This allows writing a driver
+    /// for a given set of capabilities, and having it transparently work with
+    /// more capable timers.
+    pub fn regs_gp32(&self) -> crate::pac::timer::TimGp32 {
+        unsafe { crate::pac::timer::TimGp32::from_ptr(T::regs()) }
+    }
+}
+
+#[cfg(not(stm32l0))]
+impl<'d, T: AdvancedInstance1Channel> Timer<'d, T> {
+    /// Get access to the general purpose 1 channel with one complementary 16bit timer registers.
+    ///
+    /// Note: This works even if the timer is more capable, because registers
+    /// for the less capable timers are a subset. This allows writing a driver
+    /// for a given set of capabilities, and having it transparently work with
+    /// more capable timers.
+    pub fn regs_1ch_cmp(&self) -> crate::pac::timer::Tim1chCmp {
+        unsafe { crate::pac::timer::Tim1chCmp::from_ptr(T::regs()) }
+    }
+
+    /// Set clock divider for the dead time.
+    pub fn set_dead_time_clock_division(&self, value: vals::Ckd) {
+        self.regs_1ch_cmp().cr1().modify(|w| w.set_ckd(value));
+    }
+
+    /// Set dead time, as a fraction of the max duty value.
+    pub fn set_dead_time_value(&self, value: u8) {
+        self.regs_1ch_cmp().bdtr().modify(|w| w.set_dtg(value));
+    }
+
+    /// Set state of MOE-bit in BDTR register to en-/disable output
+    pub fn set_moe(&self, enable: bool) {
+        self.regs_1ch_cmp().bdtr().modify(|w| w.set_moe(enable));
+    }
+}
+
+#[cfg(not(stm32l0))]
+impl<'d, T: AdvancedInstance2Channel> Timer<'d, T> {
+    /// Get access to the general purpose 2 channel with one complementary 16bit timer registers.
+    ///
+    /// Note: This works even if the timer is more capable, because registers
+    /// for the less capable timers are a subset. This allows writing a driver
+    /// for a given set of capabilities, and having it transparently work with
+    /// more capable timers.
+    pub fn regs_2ch_cmp(&self) -> crate::pac::timer::Tim2chCmp {
+        unsafe { crate::pac::timer::Tim2chCmp::from_ptr(T::regs()) }
+    }
+}
+
+#[cfg(not(stm32l0))]
+impl<'d, T: AdvancedInstance4Channel> Timer<'d, T> {
+    /// Get access to the advanced timer registers.
+    pub fn regs_advanced(&self) -> crate::pac::timer::TimAdv {
+        unsafe { crate::pac::timer::TimAdv::from_ptr(T::regs()) }
+    }
+
+    /// Set complementary output polarity.
+    pub fn set_complementary_output_polarity(&self, channel: Channel, polarity: OutputPolarity) {
+        self.regs_advanced()
+            .ccer()
+            .modify(|w| w.set_ccnp(channel.index(), polarity.into()));
+    }
+
+    /// Enable/disable a complementary channel.
+    pub fn enable_complementary_channel(&self, channel: Channel, enable: bool) {
+        self.regs_advanced()
+            .ccer()
+            .modify(|w| w.set_ccne(channel.index(), enable));
+    }
+}
diff --git a/embassy-stm32/src/timer/mod.rs b/embassy-stm32/src/timer/mod.rs
index 8530c5229..2ba6b3f11 100644
--- a/embassy-stm32/src/timer/mod.rs
+++ b/embassy-stm32/src/timer/mod.rs
@@ -1,490 +1,13 @@
 //! Timers, PWM, quadrature decoder.
-//!
-
-//! Timer inheritance
-//!
-
-// sealed:
-//
-// Core -------------------------> 1CH -------------------------> 1CH_CMP
-//   |                              |                              ^   |
-//   +--> Basic_NoCr2 --> Basic     +--> 2CH --> GP16 --> GP32     |   +--> 2CH_CMP --> ADV
-//            |             |             |      ^  |              |           ^         ^
-//            |             |             +------|--|--------------|-----------+         |
-//            |             +--------------------+  +--------------|-----------|---------+
-//            |             |                                      |           |
-//            |             +--------------------------------------|-----------+
-//            +----------------------------------------------------+
-
-//! ```text
-//! BasicInstance --> CaptureCompare16bitInstance --+--> ComplementaryCaptureCompare16bitInstance
-//!                                                 |
-//!                                                 +--> CaptureCompare32bitInstance
-//! ```
-//!
-//! Mapping:
-//!
-//! |                   trait                    | timer                                                                                             |
-//! | :----------------------------------------: | ------------------------------------------------------------------------------------------------- |
-//! |              [BasicInstance]               | Basic Timer                                                                                       |
-//! |       [CaptureCompare16bitInstance]        | 1-channel Timer, 2-channel Timer, General Purpose 16-bit Timer                                    |
-//! |       [CaptureCompare32bitInstance]        | General Purpose 32-bit Timer                                                                      |
-//! | [ComplementaryCaptureCompare16bitInstance] | 1-channel with one complentary Timer, 2-channel with one complentary Timer, Advance Control Timer |
 
 #[cfg(not(stm32l0))]
 pub mod complementary_pwm;
+pub mod low_level;
 pub mod qei;
 pub mod simple_pwm;
 
-use stm32_metapac::timer::vals;
-
 use crate::interrupt;
 use crate::rcc::RccPeripheral;
-use crate::time::Hertz;
-
-/// Low-level timer access.
-#[cfg(feature = "unstable-pac")]
-pub mod low_level {
-    pub use super::sealed::*;
-}
-
-pub(crate) mod sealed {
-    use super::*;
-
-    /// Virtual Core 16-bit timer instance.  
-    pub trait CoreInstance: RccPeripheral {
-        /// Interrupt for this timer.
-        type Interrupt: interrupt::typelevel::Interrupt;
-
-        /// Get access to the virutal core 16bit timer registers.
-        ///
-        /// Note: This works even if the timer is more capable, because registers
-        /// for the less capable timers are a subset. This allows writing a driver
-        /// for a given set of capabilities, and having it transparently work with
-        /// more capable timers.
-        fn regs_core() -> crate::pac::timer::TimCore;
-
-        /// Start the timer.
-        fn start(&self) {
-            Self::regs_core().cr1().modify(|r| r.set_cen(true));
-        }
-
-        /// Stop the timer.
-        fn stop(&self) {
-            Self::regs_core().cr1().modify(|r| r.set_cen(false));
-        }
-
-        /// Reset the counter value to 0
-        fn reset(&self) {
-            Self::regs_core().cnt().write(|r| r.set_cnt(0));
-        }
-
-        /// Set the frequency of how many times per second the timer counts up to the max value or down to 0.
-        ///
-        /// This means that in the default edge-aligned mode,
-        /// the timer counter will wrap around at the same frequency as is being set.
-        /// In center-aligned mode (which not all timers support), the wrap-around frequency is effectively halved
-        /// because it needs to count up and down.
-        fn set_frequency(&self, frequency: Hertz) {
-            let f = frequency.0;
-            let timer_f = Self::frequency().0;
-            assert!(f > 0);
-            let pclk_ticks_per_timer_period = timer_f / f;
-            let psc: u16 = unwrap!(((pclk_ticks_per_timer_period - 1) / (1 << 16)).try_into());
-            let divide_by = pclk_ticks_per_timer_period / (u32::from(psc) + 1);
-
-            // the timer counts `0..=arr`, we want it to count `0..divide_by`
-            let arr = unwrap!(u16::try_from(divide_by - 1));
-
-            let regs = Self::regs_core();
-            regs.psc().write(|r| r.set_psc(psc));
-            regs.arr().write(|r| r.set_arr(arr));
-
-            regs.cr1().modify(|r| r.set_urs(vals::Urs::COUNTERONLY));
-            regs.egr().write(|r| r.set_ug(true));
-            regs.cr1().modify(|r| r.set_urs(vals::Urs::ANYEVENT));
-        }
-
-        /// Clear update interrupt.
-        ///
-        /// Returns whether the update interrupt flag was set.
-        fn clear_update_interrupt(&self) -> bool {
-            let regs = Self::regs_core();
-            let sr = regs.sr().read();
-            if sr.uif() {
-                regs.sr().modify(|r| {
-                    r.set_uif(false);
-                });
-                true
-            } else {
-                false
-            }
-        }
-
-        /// Enable/disable the update interrupt.
-        fn enable_update_interrupt(&self, enable: bool) {
-            Self::regs_core().dier().modify(|r| r.set_uie(enable));
-        }
-
-        /// Enable/disable autoreload preload.
-        fn set_autoreload_preload(&self, enable: bool) {
-            Self::regs_core().cr1().modify(|r| r.set_arpe(enable));
-        }
-
-        /// Get the timer frequency.
-        fn get_frequency(&self) -> Hertz {
-            let timer_f = Self::frequency();
-
-            let regs = Self::regs_core();
-            let arr = regs.arr().read().arr();
-            let psc = regs.psc().read().psc();
-
-            timer_f / arr / (psc + 1)
-        }
-    }
-
-    /// Virtual Basic without CR2 16-bit timer instance.
-    pub trait BasicNoCr2Instance: CoreInstance {
-        /// Get access to the Baisc 16bit timer registers.
-        ///
-        /// Note: This works even if the timer is more capable, because registers
-        /// for the less capable timers are a subset. This allows writing a driver
-        /// for a given set of capabilities, and having it transparently work with
-        /// more capable timers.
-        fn regs_basic_no_cr2() -> crate::pac::timer::TimBasicNoCr2;
-
-        /// Enable/disable the update dma.
-        fn enable_update_dma(&self, enable: bool) {
-            Self::regs_basic_no_cr2().dier().modify(|r| r.set_ude(enable));
-        }
-
-        /// Get the update dma enable/disable state.
-        fn get_update_dma_state(&self) -> bool {
-            Self::regs_basic_no_cr2().dier().read().ude()
-        }
-    }
-
-    /// Basic 16-bit timer instance.
-    pub trait BasicInstance: BasicNoCr2Instance {
-        /// Get access to the Baisc 16bit timer registers.
-        ///
-        /// Note: This works even if the timer is more capable, because registers
-        /// for the less capable timers are a subset. This allows writing a driver
-        /// for a given set of capabilities, and having it transparently work with
-        /// more capable timers.
-        fn regs_basic() -> crate::pac::timer::TimBasic;
-    }
-
-    /// Gneral-purpose 1 channel 16-bit timer instance.
-    pub trait GeneralPurpose1ChannelInstance: CoreInstance {
-        /// Get access to the general purpose 1 channel 16bit timer registers.
-        ///
-        /// Note: This works even if the timer is more capable, because registers
-        /// for the less capable timers are a subset. This allows writing a driver
-        /// for a given set of capabilities, and having it transparently work with
-        /// more capable timers.
-        fn regs_1ch() -> crate::pac::timer::Tim1ch;
-
-        /// Set clock divider.
-        fn set_clock_division(&self, ckd: vals::Ckd) {
-            Self::regs_1ch().cr1().modify(|r| r.set_ckd(ckd));
-        }
-
-        /// Get max compare value. This depends on the timer frequency and the clock frequency from RCC.
-        fn get_max_compare_value(&self) -> u16 {
-            Self::regs_1ch().arr().read().arr()
-        }
-    }
-
-    /// Gneral-purpose 1 channel 16-bit  timer instance.
-    pub trait GeneralPurpose2ChannelInstance: GeneralPurpose1ChannelInstance {
-        /// Get access to the general purpose 2 channel 16bit timer registers.
-        ///
-        /// Note: This works even if the timer is more capable, because registers
-        /// for the less capable timers are a subset. This allows writing a driver
-        /// for a given set of capabilities, and having it transparently work with
-        /// more capable timers.
-        fn regs_2ch() -> crate::pac::timer::Tim2ch;
-    }
-
-    /// Gneral-purpose 16-bit timer instance.
-    pub trait GeneralPurpose16bitInstance: BasicInstance + GeneralPurpose2ChannelInstance {
-        /// Get access to the general purpose 16bit timer registers.
-        ///
-        /// Note: This works even if the timer is more capable, because registers
-        /// for the less capable timers are a subset. This allows writing a driver
-        /// for a given set of capabilities, and having it transparently work with
-        /// more capable timers.
-        fn regs_gp16() -> crate::pac::timer::TimGp16;
-
-        /// Set counting mode.
-        fn set_counting_mode(&self, mode: CountingMode) {
-            let (cms, dir) = mode.into();
-
-            let timer_enabled = Self::regs_core().cr1().read().cen();
-            // Changing from edge aligned to center aligned (and vice versa) is not allowed while the timer is running.
-            // Changing direction is discouraged while the timer is running.
-            assert!(!timer_enabled);
-
-            Self::regs_gp16().cr1().modify(|r| r.set_dir(dir));
-            Self::regs_gp16().cr1().modify(|r| r.set_cms(cms))
-        }
-
-        /// Get counting mode.
-        fn get_counting_mode(&self) -> CountingMode {
-            let cr1 = Self::regs_gp16().cr1().read();
-            (cr1.cms(), cr1.dir()).into()
-        }
-
-        /// Set input capture filter.
-        fn set_input_capture_filter(&self, channel: Channel, icf: vals::FilterValue) {
-            let raw_channel = channel.index();
-            Self::regs_gp16()
-                .ccmr_input(raw_channel / 2)
-                .modify(|r| r.set_icf(raw_channel % 2, icf));
-        }
-
-        /// Clear input interrupt.
-        fn clear_input_interrupt(&self, channel: Channel) {
-            Self::regs_gp16().sr().modify(|r| r.set_ccif(channel.index(), false));
-        }
-
-        /// Enable input interrupt.
-        fn enable_input_interrupt(&self, channel: Channel, enable: bool) {
-            Self::regs_gp16().dier().modify(|r| r.set_ccie(channel.index(), enable));
-        }
-
-        /// Set input capture prescaler.
-        fn set_input_capture_prescaler(&self, channel: Channel, factor: u8) {
-            let raw_channel = channel.index();
-            Self::regs_gp16()
-                .ccmr_input(raw_channel / 2)
-                .modify(|r| r.set_icpsc(raw_channel % 2, factor));
-        }
-
-        /// Set input TI selection.
-        fn set_input_ti_selection(&self, channel: Channel, tisel: InputTISelection) {
-            let raw_channel = channel.index();
-            Self::regs_gp16()
-                .ccmr_input(raw_channel / 2)
-                .modify(|r| r.set_ccs(raw_channel % 2, tisel.into()));
-        }
-
-        /// Set input capture mode.
-        fn set_input_capture_mode(&self, channel: Channel, mode: InputCaptureMode) {
-            Self::regs_gp16().ccer().modify(|r| match mode {
-                InputCaptureMode::Rising => {
-                    r.set_ccnp(channel.index(), false);
-                    r.set_ccp(channel.index(), false);
-                }
-                InputCaptureMode::Falling => {
-                    r.set_ccnp(channel.index(), false);
-                    r.set_ccp(channel.index(), true);
-                }
-                InputCaptureMode::BothEdges => {
-                    r.set_ccnp(channel.index(), true);
-                    r.set_ccp(channel.index(), true);
-                }
-            });
-        }
-
-        /// Set output compare mode.
-        fn set_output_compare_mode(&self, channel: Channel, mode: OutputCompareMode) {
-            let raw_channel: usize = channel.index();
-            Self::regs_gp16()
-                .ccmr_output(raw_channel / 2)
-                .modify(|w| w.set_ocm(raw_channel % 2, mode.into()));
-        }
-
-        /// Set output polarity.
-        fn set_output_polarity(&self, channel: Channel, polarity: OutputPolarity) {
-            Self::regs_gp16()
-                .ccer()
-                .modify(|w| w.set_ccp(channel.index(), polarity.into()));
-        }
-
-        /// Enable/disable a channel.
-        fn enable_channel(&self, channel: Channel, enable: bool) {
-            Self::regs_gp16().ccer().modify(|w| w.set_cce(channel.index(), enable));
-        }
-
-        /// Get enable/disable state of a channel
-        fn get_channel_enable_state(&self, channel: Channel) -> bool {
-            Self::regs_gp16().ccer().read().cce(channel.index())
-        }
-
-        /// Set compare value for a channel.
-        fn set_compare_value(&self, channel: Channel, value: u16) {
-            Self::regs_gp16().ccr(channel.index()).modify(|w| w.set_ccr(value));
-        }
-
-        /// Get capture value for a channel.
-        fn get_capture_value(&self, channel: Channel) -> u16 {
-            Self::regs_gp16().ccr(channel.index()).read().ccr()
-        }
-
-        /// Get compare value for a channel.
-        fn get_compare_value(&self, channel: Channel) -> u16 {
-            Self::regs_gp16().ccr(channel.index()).read().ccr()
-        }
-
-        /// Set output compare preload.
-        fn set_output_compare_preload(&self, channel: Channel, preload: bool) {
-            let channel_index = channel.index();
-            Self::regs_gp16()
-                .ccmr_output(channel_index / 2)
-                .modify(|w| w.set_ocpe(channel_index % 2, preload));
-        }
-
-        /// Get capture compare DMA selection
-        fn get_cc_dma_selection(&self) -> super::vals::Ccds {
-            Self::regs_gp16().cr2().read().ccds()
-        }
-
-        /// Set capture compare DMA selection
-        fn set_cc_dma_selection(&self, ccds: super::vals::Ccds) {
-            Self::regs_gp16().cr2().modify(|w| w.set_ccds(ccds))
-        }
-
-        /// Get capture compare DMA enable state
-        fn get_cc_dma_enable_state(&self, channel: Channel) -> bool {
-            Self::regs_gp16().dier().read().ccde(channel.index())
-        }
-
-        /// Set capture compare DMA enable state
-        fn set_cc_dma_enable_state(&self, channel: Channel, ccde: bool) {
-            Self::regs_gp16().dier().modify(|w| w.set_ccde(channel.index(), ccde))
-        }
-    }
-
-    #[cfg(not(stm32l0))]
-    /// Gneral-purpose 32-bit timer instance.
-    pub trait GeneralPurpose32bitInstance: GeneralPurpose16bitInstance {
-        /// Get access to the general purpose 32bit timer registers.
-        ///
-        /// Note: This works even if the timer is more capable, because registers
-        /// for the less capable timers are a subset. This allows writing a driver
-        /// for a given set of capabilities, and having it transparently work with
-        /// more capable timers.
-        fn regs_gp32() -> crate::pac::timer::TimGp32;
-
-        /// Set timer frequency.
-        fn set_frequency(&self, frequency: Hertz) {
-            let f = frequency.0;
-            assert!(f > 0);
-            let timer_f = Self::frequency().0;
-            let pclk_ticks_per_timer_period = (timer_f / f) as u64;
-            let psc: u16 = unwrap!(((pclk_ticks_per_timer_period - 1) / (1 << 32)).try_into());
-            let arr: u32 = unwrap!((pclk_ticks_per_timer_period / (psc as u64 + 1)).try_into());
-
-            let regs = Self::regs_gp32();
-            regs.psc().write(|r| r.set_psc(psc));
-            regs.arr().write_value(arr);
-
-            regs.cr1().modify(|r| r.set_urs(vals::Urs::COUNTERONLY));
-            regs.egr().write(|r| r.set_ug(true));
-            regs.cr1().modify(|r| r.set_urs(vals::Urs::ANYEVENT));
-        }
-
-        /// Get timer frequency.
-        fn get_frequency(&self) -> Hertz {
-            let timer_f = Self::frequency();
-
-            let regs = Self::regs_gp32();
-            let arr = regs.arr().read();
-            let psc = regs.psc().read().psc();
-
-            timer_f / arr / (psc + 1)
-        }
-
-        /// Set comapre value for a channel.
-        fn set_compare_value(&self, channel: Channel, value: u32) {
-            Self::regs_gp32().ccr(channel.index()).write_value(value);
-        }
-
-        /// Get capture value for a channel.
-        fn get_capture_value(&self, channel: Channel) -> u32 {
-            Self::regs_gp32().ccr(channel.index()).read()
-        }
-
-        /// Get max compare value. This depends on the timer frequency and the clock frequency from RCC.
-        fn get_max_compare_value(&self) -> u32 {
-            Self::regs_gp32().arr().read()
-        }
-
-        /// Get compare value for a channel.
-        fn get_compare_value(&self, channel: Channel) -> u32 {
-            Self::regs_gp32().ccr(channel.index()).read()
-        }
-    }
-
-    #[cfg(not(stm32l0))]
-    /// Gneral-purpose 1 channel with one complementary 16-bit timer instance.
-    pub trait GeneralPurpose1ChannelComplementaryInstance: BasicNoCr2Instance + GeneralPurpose1ChannelInstance {
-        /// Get access to the general purpose 1 channel with one complementary 16bit timer registers.
-        ///
-        /// Note: This works even if the timer is more capable, because registers
-        /// for the less capable timers are a subset. This allows writing a driver
-        /// for a given set of capabilities, and having it transparently work with
-        /// more capable timers.
-        fn regs_1ch_cmp() -> crate::pac::timer::Tim1chCmp;
-
-        /// Set clock divider for the dead time.
-        fn set_dead_time_clock_division(&self, value: vals::Ckd) {
-            Self::regs_1ch_cmp().cr1().modify(|w| w.set_ckd(value));
-        }
-
-        /// Set dead time, as a fraction of the max duty value.
-        fn set_dead_time_value(&self, value: u8) {
-            Self::regs_1ch_cmp().bdtr().modify(|w| w.set_dtg(value));
-        }
-
-        /// Enable timer outputs.
-        fn enable_outputs(&self) {
-            Self::regs_1ch_cmp().bdtr().modify(|w| w.set_moe(true));
-        }
-    }
-
-    #[cfg(not(stm32l0))]
-    /// Gneral-purpose 2 channel with one complementary 16-bit timer instance.
-    pub trait GeneralPurpose2ChannelComplementaryInstance:
-        BasicInstance + GeneralPurpose2ChannelInstance + GeneralPurpose1ChannelComplementaryInstance
-    {
-        /// Get access to the general purpose 2 channel with one complementary 16bit timer registers.
-        ///
-        /// Note: This works even if the timer is more capable, because registers
-        /// for the less capable timers are a subset. This allows writing a driver
-        /// for a given set of capabilities, and having it transparently work with
-        /// more capable timers.
-        fn regs_2ch_cmp() -> crate::pac::timer::Tim2chCmp;
-    }
-
-    #[cfg(not(stm32l0))]
-    /// Advanced control timer instance.
-    pub trait AdvancedControlInstance:
-        GeneralPurpose2ChannelComplementaryInstance + GeneralPurpose16bitInstance
-    {
-        /// Capture compare interrupt for this timer.
-        type CaptureCompareInterrupt: interrupt::typelevel::Interrupt;
-
-        /// Get access to the advanced timer registers.
-        fn regs_advanced() -> crate::pac::timer::TimAdv;
-
-        /// Set complementary output polarity.
-        fn set_complementary_output_polarity(&self, channel: Channel, polarity: OutputPolarity) {
-            Self::regs_advanced()
-                .ccer()
-                .modify(|w| w.set_ccnp(channel.index(), polarity.into()));
-        }
-
-        /// Enable/disable a complementary channel.
-        fn enable_complementary_channel(&self, channel: Channel, enable: bool) {
-            Self::regs_advanced()
-                .ccer()
-                .modify(|w| w.set_ccne(channel.index(), enable));
-        }
-    }
-}
 
 /// Timer channel.
 #[derive(Clone, Copy)]
@@ -511,454 +34,195 @@ impl Channel {
     }
 }
 
-/// Input capture mode.
-#[derive(Clone, Copy)]
-pub enum InputCaptureMode {
-    /// Rising edge only.
-    Rising,
-    /// Falling edge only.
-    Falling,
-    /// Both rising or falling edges.
-    BothEdges,
+/// Amount of bits of a timer.
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum TimerBits {
+    /// 16 bits.
+    Bits16,
+    /// 32 bits.
+    #[cfg(not(stm32l0))]
+    Bits32,
 }
 
-/// Input TI selection.
-#[derive(Clone, Copy)]
-pub enum InputTISelection {
-    /// Normal
-    Normal,
-    /// Alternate
-    Alternate,
-    /// TRC
-    TRC,
-}
+/// Core timer instance.
+pub trait CoreInstance: RccPeripheral + 'static {
+    /// Interrupt for this timer.
+    type Interrupt: interrupt::typelevel::Interrupt;
 
-impl From<InputTISelection> for stm32_metapac::timer::vals::CcmrInputCcs {
-    fn from(tisel: InputTISelection) -> Self {
-        match tisel {
-            InputTISelection::Normal => stm32_metapac::timer::vals::CcmrInputCcs::TI4,
-            InputTISelection::Alternate => stm32_metapac::timer::vals::CcmrInputCcs::TI3,
-            InputTISelection::TRC => stm32_metapac::timer::vals::CcmrInputCcs::TRC,
-        }
-    }
-}
+    /// Amount of bits this timer has.
+    const BITS: TimerBits;
 
-/// Timer counting mode.
-#[repr(u8)]
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
-pub enum CountingMode {
-    #[default]
-    /// The timer counts up to the reload value and then resets back to 0.
-    EdgeAlignedUp,
-    /// The timer counts down to 0 and then resets back to the reload value.
-    EdgeAlignedDown,
-    /// The timer counts up to the reload value and then counts back to 0.
+    /// Registers for this timer.
     ///
-    /// The output compare interrupt flags of channels configured in output are
-    /// set when the counter is counting down.
-    CenterAlignedDownInterrupts,
-    /// The timer counts up to the reload value and then counts back to 0.
-    ///
-    /// The output compare interrupt flags of channels configured in output are
-    /// set when the counter is counting up.
-    CenterAlignedUpInterrupts,
-    /// The timer counts up to the reload value and then counts back to 0.
-    ///
-    /// The output compare interrupt flags of channels configured in output are
-    /// set when the counter is counting both up or down.
-    CenterAlignedBothInterrupts,
+    /// This is a raw pointer to the register block. The actual register block layout varies depending on the timer type.
+    fn regs() -> *mut ();
+}
+/// Cut-down basic timer instance.
+pub trait BasicNoCr2Instance: CoreInstance {}
+/// Basic timer instance.
+pub trait BasicInstance: BasicNoCr2Instance {}
+
+/// General-purpose 16-bit timer with 1 channel instance.
+pub trait GeneralInstance1Channel: CoreInstance {}
+
+/// General-purpose 16-bit timer with 2 channels instance.
+pub trait GeneralInstance2Channel: GeneralInstance1Channel {}
+
+/// General-purpose 16-bit timer with 4 channels instance.
+pub trait GeneralInstance4Channel: BasicInstance + GeneralInstance2Channel {
+    // SimplePwm<'d, T> is implemented for T: GeneralInstance4Channel
+    // Advanced timers implement this trait, but the output needs to be
+    // enabled explicitly.
+    // To support general-purpose and advanced timers, this function is added
+    // here defaulting to noop and overwritten for advanced timers.
+    /// Enable timer outputs.
+    fn enable_outputs(&self) {}
 }
 
-impl CountingMode {
-    /// Return whether this mode is edge-aligned (up or down).
-    pub fn is_edge_aligned(&self) -> bool {
-        matches!(self, CountingMode::EdgeAlignedUp | CountingMode::EdgeAlignedDown)
-    }
+/// General-purpose 32-bit timer with 4 channels instance.
+pub trait GeneralInstance32bit4Channel: GeneralInstance4Channel {}
 
-    /// Return whether this mode is center-aligned.
-    pub fn is_center_aligned(&self) -> bool {
-        matches!(
-            self,
-            CountingMode::CenterAlignedDownInterrupts
-                | CountingMode::CenterAlignedUpInterrupts
-                | CountingMode::CenterAlignedBothInterrupts
-        )
-    }
+/// Advanced 16-bit timer with 1 channel instance.
+pub trait AdvancedInstance1Channel: BasicNoCr2Instance + GeneralInstance1Channel {
+    /// Capture compare interrupt for this timer.
+    type CaptureCompareInterrupt: interrupt::typelevel::Interrupt;
 }
+/// Advanced 16-bit timer with 2 channels instance.
 
-impl From<CountingMode> for (vals::Cms, vals::Dir) {
-    fn from(value: CountingMode) -> Self {
-        match value {
-            CountingMode::EdgeAlignedUp => (vals::Cms::EDGEALIGNED, vals::Dir::UP),
-            CountingMode::EdgeAlignedDown => (vals::Cms::EDGEALIGNED, vals::Dir::DOWN),
-            CountingMode::CenterAlignedDownInterrupts => (vals::Cms::CENTERALIGNED1, vals::Dir::UP),
-            CountingMode::CenterAlignedUpInterrupts => (vals::Cms::CENTERALIGNED2, vals::Dir::UP),
-            CountingMode::CenterAlignedBothInterrupts => (vals::Cms::CENTERALIGNED3, vals::Dir::UP),
-        }
-    }
-}
+pub trait AdvancedInstance2Channel: BasicInstance + GeneralInstance2Channel + AdvancedInstance1Channel {}
 
-impl From<(vals::Cms, vals::Dir)> for CountingMode {
-    fn from(value: (vals::Cms, vals::Dir)) -> Self {
-        match value {
-            (vals::Cms::EDGEALIGNED, vals::Dir::UP) => CountingMode::EdgeAlignedUp,
-            (vals::Cms::EDGEALIGNED, vals::Dir::DOWN) => CountingMode::EdgeAlignedDown,
-            (vals::Cms::CENTERALIGNED1, _) => CountingMode::CenterAlignedDownInterrupts,
-            (vals::Cms::CENTERALIGNED2, _) => CountingMode::CenterAlignedUpInterrupts,
-            (vals::Cms::CENTERALIGNED3, _) => CountingMode::CenterAlignedBothInterrupts,
-        }
-    }
-}
+/// Advanced 16-bit timer with 4 channels instance.
+pub trait AdvancedInstance4Channel: AdvancedInstance2Channel + GeneralInstance4Channel {}
 
-/// Output compare mode.
-#[derive(Clone, Copy)]
-pub enum OutputCompareMode {
-    /// The comparison between the output compare register TIMx_CCRx and
-    /// the counter TIMx_CNT has no effect on the outputs.
-    /// (this mode is used to generate a timing base).
-    Frozen,
-    /// Set channel to active level on match. OCxREF signal is forced high when the
-    /// counter TIMx_CNT matches the capture/compare register x (TIMx_CCRx).
-    ActiveOnMatch,
-    /// Set channel to inactive level on match. OCxREF signal is forced low when the
-    /// counter TIMx_CNT matches the capture/compare register x (TIMx_CCRx).
-    InactiveOnMatch,
-    /// Toggle - OCxREF toggles when TIMx_CNT=TIMx_CCRx.
-    Toggle,
-    /// Force inactive level - OCxREF is forced low.
-    ForceInactive,
-    /// Force active level - OCxREF is forced high.
-    ForceActive,
-    /// PWM mode 1 - In upcounting, channel is active as long as TIMx_CNT<TIMx_CCRx
-    /// else inactive. In downcounting, channel is inactive (OCxREF=0) as long as
-    /// TIMx_CNT>TIMx_CCRx else active (OCxREF=1).
-    PwmMode1,
-    /// PWM mode 2 - In upcounting, channel is inactive as long as
-    /// TIMx_CNT<TIMx_CCRx else active. In downcounting, channel is active as long as
-    /// TIMx_CNT>TIMx_CCRx else inactive.
-    PwmMode2,
-    // TODO: there's more modes here depending on the chip family.
-}
+pin_trait!(Channel1Pin, GeneralInstance4Channel);
+pin_trait!(Channel2Pin, GeneralInstance4Channel);
+pin_trait!(Channel3Pin, GeneralInstance4Channel);
+pin_trait!(Channel4Pin, GeneralInstance4Channel);
+pin_trait!(ExternalTriggerPin, GeneralInstance4Channel);
 
-impl From<OutputCompareMode> for stm32_metapac::timer::vals::Ocm {
-    fn from(mode: OutputCompareMode) -> Self {
-        match mode {
-            OutputCompareMode::Frozen => stm32_metapac::timer::vals::Ocm::FROZEN,
-            OutputCompareMode::ActiveOnMatch => stm32_metapac::timer::vals::Ocm::ACTIVEONMATCH,
-            OutputCompareMode::InactiveOnMatch => stm32_metapac::timer::vals::Ocm::INACTIVEONMATCH,
-            OutputCompareMode::Toggle => stm32_metapac::timer::vals::Ocm::TOGGLE,
-            OutputCompareMode::ForceInactive => stm32_metapac::timer::vals::Ocm::FORCEINACTIVE,
-            OutputCompareMode::ForceActive => stm32_metapac::timer::vals::Ocm::FORCEACTIVE,
-            OutputCompareMode::PwmMode1 => stm32_metapac::timer::vals::Ocm::PWMMODE1,
-            OutputCompareMode::PwmMode2 => stm32_metapac::timer::vals::Ocm::PWMMODE2,
-        }
-    }
-}
+pin_trait!(Channel1ComplementaryPin, AdvancedInstance4Channel);
+pin_trait!(Channel2ComplementaryPin, AdvancedInstance4Channel);
+pin_trait!(Channel3ComplementaryPin, AdvancedInstance4Channel);
+pin_trait!(Channel4ComplementaryPin, AdvancedInstance4Channel);
 
-/// Timer output pin polarity.
-#[derive(Clone, Copy)]
-pub enum OutputPolarity {
-    /// Active high (higher duty value makes the pin spend more time high).
-    ActiveHigh,
-    /// Active low (higher duty value makes the pin spend more time low).
-    ActiveLow,
-}
+pin_trait!(BreakInputPin, AdvancedInstance4Channel);
+pin_trait!(BreakInput2Pin, AdvancedInstance4Channel);
 
-impl From<OutputPolarity> for bool {
-    fn from(mode: OutputPolarity) -> Self {
-        match mode {
-            OutputPolarity::ActiveHigh => false,
-            OutputPolarity::ActiveLow => true,
-        }
-    }
-}
+pin_trait!(BreakInputComparator1Pin, AdvancedInstance4Channel);
+pin_trait!(BreakInputComparator2Pin, AdvancedInstance4Channel);
 
-/// Basic 16-bit timer instance.
-pub trait BasicInstance: sealed::BasicInstance + sealed::BasicNoCr2Instance + sealed::CoreInstance + 'static {}
+pin_trait!(BreakInput2Comparator1Pin, AdvancedInstance4Channel);
+pin_trait!(BreakInput2Comparator2Pin, AdvancedInstance4Channel);
 
-// It's just a General-purpose 16-bit timer instance.
-/// Capture Compare timer instance.
-pub trait CaptureCompare16bitInstance:
-    BasicInstance
-    + sealed::GeneralPurpose2ChannelInstance
-    + sealed::GeneralPurpose1ChannelInstance
-    + sealed::GeneralPurpose16bitInstance
-    + 'static
-{
-}
+// Update Event trigger DMA for every timer
+dma_trait!(UpDma, BasicInstance);
 
-#[cfg(not(stm32l0))]
-// It's just a General-purpose 32-bit timer instance.
-/// Capture Compare 32-bit timer instance.
-pub trait CaptureCompare32bitInstance:
-    CaptureCompare16bitInstance + sealed::GeneralPurpose32bitInstance + 'static
-{
-}
-
-#[cfg(not(stm32l0))]
-// It's just a Advanced Control timer instance.
-/// Complementary Capture Compare 32-bit timer instance.
-pub trait ComplementaryCaptureCompare16bitInstance:
-    CaptureCompare16bitInstance
-    + sealed::GeneralPurpose1ChannelComplementaryInstance
-    + sealed::GeneralPurpose2ChannelComplementaryInstance
-    + sealed::AdvancedControlInstance
-    + 'static
-{
-}
-
-pin_trait!(Channel1Pin, CaptureCompare16bitInstance);
-pin_trait!(Channel2Pin, CaptureCompare16bitInstance);
-pin_trait!(Channel3Pin, CaptureCompare16bitInstance);
-pin_trait!(Channel4Pin, CaptureCompare16bitInstance);
-pin_trait!(ExternalTriggerPin, CaptureCompare16bitInstance);
-
-cfg_if::cfg_if! {
-    if #[cfg(not(stm32l0))] {
-        pin_trait!(Channel1ComplementaryPin, ComplementaryCaptureCompare16bitInstance);
-        pin_trait!(Channel2ComplementaryPin, ComplementaryCaptureCompare16bitInstance);
-        pin_trait!(Channel3ComplementaryPin, ComplementaryCaptureCompare16bitInstance);
-        pin_trait!(Channel4ComplementaryPin, ComplementaryCaptureCompare16bitInstance);
-
-        pin_trait!(BreakInputPin, ComplementaryCaptureCompare16bitInstance);
-        pin_trait!(BreakInput2Pin, ComplementaryCaptureCompare16bitInstance);
-
-        pin_trait!(BreakInputComparator1Pin, ComplementaryCaptureCompare16bitInstance);
-        pin_trait!(BreakInputComparator2Pin, ComplementaryCaptureCompare16bitInstance);
-
-        pin_trait!(BreakInput2Comparator1Pin, ComplementaryCaptureCompare16bitInstance);
-        pin_trait!(BreakInput2Comparator2Pin, ComplementaryCaptureCompare16bitInstance);
-    }
-}
+dma_trait!(Ch1Dma, GeneralInstance4Channel);
+dma_trait!(Ch2Dma, GeneralInstance4Channel);
+dma_trait!(Ch3Dma, GeneralInstance4Channel);
+dma_trait!(Ch4Dma, GeneralInstance4Channel);
 
 #[allow(unused)]
 macro_rules! impl_core_timer {
-    ($inst:ident, $irq:ident) => {
-        impl sealed::CoreInstance for crate::peripherals::$inst {
-            type Interrupt = crate::interrupt::typelevel::$irq;
+    ($inst:ident, $bits:expr) => {
+        impl CoreInstance for crate::peripherals::$inst {
+            type Interrupt = crate::_generated::peripheral_interrupts::$inst::UP;
 
-            fn regs_core() -> crate::pac::timer::TimCore {
-                unsafe { crate::pac::timer::TimCore::from_ptr(crate::pac::$inst.as_ptr()) }
-            }
-        }
-    };
-}
+            const BITS: TimerBits = $bits;
 
-#[allow(unused)]
-macro_rules! impl_basic_no_cr2_timer {
-    ($inst:ident) => {
-        impl sealed::BasicNoCr2Instance for crate::peripherals::$inst {
-            fn regs_basic_no_cr2() -> crate::pac::timer::TimBasicNoCr2 {
-                unsafe { crate::pac::timer::TimBasicNoCr2::from_ptr(crate::pac::$inst.as_ptr()) }
-            }
-        }
-    };
-}
-
-#[allow(unused)]
-macro_rules! impl_basic_timer {
-    ($inst:ident) => {
-        impl sealed::BasicInstance for crate::peripherals::$inst {
-            fn regs_basic() -> crate::pac::timer::TimBasic {
-                unsafe { crate::pac::timer::TimBasic::from_ptr(crate::pac::$inst.as_ptr()) }
-            }
-        }
-    };
-}
-
-#[allow(unused)]
-macro_rules! impl_1ch_timer {
-    ($inst:ident) => {
-        impl sealed::GeneralPurpose1ChannelInstance for crate::peripherals::$inst {
-            fn regs_1ch() -> crate::pac::timer::Tim1ch {
-                unsafe { crate::pac::timer::Tim1ch::from_ptr(crate::pac::$inst.as_ptr()) }
-            }
-        }
-    };
-}
-
-#[allow(unused)]
-macro_rules! impl_2ch_timer {
-    ($inst:ident) => {
-        impl sealed::GeneralPurpose2ChannelInstance for crate::peripherals::$inst {
-            fn regs_2ch() -> crate::pac::timer::Tim2ch {
-                unsafe { crate::pac::timer::Tim2ch::from_ptr(crate::pac::$inst.as_ptr()) }
-            }
-        }
-    };
-}
-
-#[allow(unused)]
-macro_rules! impl_gp16_timer {
-    ($inst:ident) => {
-        impl sealed::GeneralPurpose16bitInstance for crate::peripherals::$inst {
-            fn regs_gp16() -> crate::pac::timer::TimGp16 {
-                unsafe { crate::pac::timer::TimGp16::from_ptr(crate::pac::$inst.as_ptr()) }
-            }
-        }
-    };
-}
-
-#[allow(unused)]
-macro_rules! impl_gp32_timer {
-    ($inst:ident) => {
-        impl sealed::GeneralPurpose32bitInstance for crate::peripherals::$inst {
-            fn regs_gp32() -> crate::pac::timer::TimGp32 {
-                crate::pac::$inst
-            }
-        }
-    };
-}
-
-#[allow(unused)]
-macro_rules! impl_1ch_cmp_timer {
-    ($inst:ident) => {
-        impl sealed::GeneralPurpose1ChannelComplementaryInstance for crate::peripherals::$inst {
-            fn regs_1ch_cmp() -> crate::pac::timer::Tim1chCmp {
-                unsafe { crate::pac::timer::Tim1chCmp::from_ptr(crate::pac::$inst.as_ptr()) }
-            }
-        }
-    };
-}
-
-#[allow(unused)]
-macro_rules! impl_2ch_cmp_timer {
-    ($inst:ident) => {
-        impl sealed::GeneralPurpose2ChannelComplementaryInstance for crate::peripherals::$inst {
-            fn regs_2ch_cmp() -> crate::pac::timer::Tim2chCmp {
-                unsafe { crate::pac::timer::Tim2chCmp::from_ptr(crate::pac::$inst.as_ptr()) }
-            }
-        }
-    };
-}
-
-#[allow(unused)]
-macro_rules! impl_adv_timer {
-    ($inst:ident, $irq:ident) => {
-        impl sealed::AdvancedControlInstance for crate::peripherals::$inst {
-            type CaptureCompareInterrupt = crate::interrupt::typelevel::$irq;
-
-            fn regs_advanced() -> crate::pac::timer::TimAdv {
-                unsafe { crate::pac::timer::TimAdv::from_ptr(crate::pac::$inst.as_ptr()) }
+            fn regs() -> *mut () {
+                crate::pac::$inst.as_ptr()
             }
         }
     };
 }
 
 foreach_interrupt! {
-
     ($inst:ident, timer, TIM_BASIC, UP, $irq:ident) => {
-        impl_core_timer!($inst, $irq);
-        impl_basic_no_cr2_timer!($inst);
-        impl_basic_timer!($inst);
+        impl_core_timer!($inst, TimerBits::Bits16);
+        impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
     };
 
     ($inst:ident, timer, TIM_1CH, UP, $irq:ident) => {
-        impl_core_timer!($inst, $irq);
-        impl_basic_no_cr2_timer!($inst);
-        impl_basic_timer!($inst);
-        impl_1ch_timer!($inst);
-        impl_2ch_timer!($inst);
-        impl_gp16_timer!($inst);
+        impl_core_timer!($inst, TimerBits::Bits16);
+        impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl CaptureCompare16bitInstance for crate::peripherals::$inst {}
+        impl GeneralInstance1Channel for crate::peripherals::$inst {}
+        impl GeneralInstance2Channel for crate::peripherals::$inst {}
+        impl GeneralInstance4Channel for crate::peripherals::$inst {}
     };
 
-
     ($inst:ident, timer, TIM_2CH, UP, $irq:ident) => {
-        impl_core_timer!($inst, $irq);
-        impl_basic_no_cr2_timer!($inst);
-        impl_basic_timer!($inst);
-        impl_1ch_timer!($inst);
-        impl_2ch_timer!($inst);
-        impl_gp16_timer!($inst);
+        impl_core_timer!($inst, TimerBits::Bits16);
+        impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl CaptureCompare16bitInstance for crate::peripherals::$inst {}
+        impl GeneralInstance1Channel for crate::peripherals::$inst {}
+        impl GeneralInstance2Channel for crate::peripherals::$inst {}
+        impl GeneralInstance4Channel for crate::peripherals::$inst {}
     };
 
     ($inst:ident, timer, TIM_GP16, UP, $irq:ident) => {
-        impl_core_timer!($inst, $irq);
-        impl_basic_no_cr2_timer!($inst);
-        impl_basic_timer!($inst);
-        impl_1ch_timer!($inst);
-        impl_2ch_timer!($inst);
-        impl_gp16_timer!($inst);
+        impl_core_timer!($inst, TimerBits::Bits16);
+        impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl CaptureCompare16bitInstance for crate::peripherals::$inst {}
+        impl GeneralInstance1Channel for crate::peripherals::$inst {}
+        impl GeneralInstance2Channel for crate::peripherals::$inst {}
+        impl GeneralInstance4Channel for crate::peripherals::$inst {}
     };
 
     ($inst:ident, timer, TIM_GP32, UP, $irq:ident) => {
-        impl_core_timer!($inst, $irq);
-        impl_basic_no_cr2_timer!($inst);
-        impl_basic_timer!($inst);
-        impl_1ch_timer!($inst);
-        impl_2ch_timer!($inst);
-        impl_gp16_timer!($inst);
-        impl_gp32_timer!($inst);
+        impl_core_timer!($inst, TimerBits::Bits32);
+        impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl CaptureCompare16bitInstance for crate::peripherals::$inst {}
-        impl CaptureCompare32bitInstance for crate::peripherals::$inst {}
+        impl GeneralInstance1Channel for crate::peripherals::$inst {}
+        impl GeneralInstance2Channel for crate::peripherals::$inst {}
+        impl GeneralInstance4Channel for crate::peripherals::$inst {}
+        impl GeneralInstance32bit4Channel for crate::peripherals::$inst {}
     };
 
     ($inst:ident, timer, TIM_1CH_CMP, UP, $irq:ident) => {
-        impl_core_timer!($inst, $irq);
-        impl_basic_no_cr2_timer!($inst);
-        impl_basic_timer!($inst);
-        impl_1ch_timer!($inst);
-        impl_2ch_timer!($inst);
-        impl_gp16_timer!($inst);
-        impl_1ch_cmp_timer!($inst);
-        impl_2ch_cmp_timer!($inst);
+        impl_core_timer!($inst, TimerBits::Bits16);
+        impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl CaptureCompare16bitInstance for crate::peripherals::$inst {}
-        impl ComplementaryCaptureCompare16bitInstance for crate::peripherals::$inst {}
+        impl GeneralInstance1Channel for crate::peripherals::$inst {}
+        impl GeneralInstance2Channel for crate::peripherals::$inst {}
+        impl GeneralInstance4Channel for crate::peripherals::$inst { fn enable_outputs(&self) { set_moe::<Self>() }}
+        impl AdvancedInstance1Channel for crate::peripherals::$inst { type CaptureCompareInterrupt = crate::_generated::peripheral_interrupts::$inst::CC; }
+        impl AdvancedInstance2Channel for crate::peripherals::$inst {}
+        impl AdvancedInstance4Channel for crate::peripherals::$inst {}
     };
-    ($inst:ident, timer, TIM_1CH_CMP, CC, $irq:ident) => {
-        impl_adv_timer!($inst, $irq);
-    };
-
 
     ($inst:ident, timer, TIM_2CH_CMP, UP, $irq:ident) => {
-        impl_core_timer!($inst, $irq);
-        impl_basic_no_cr2_timer!($inst);
-        impl_basic_timer!($inst);
-        impl_1ch_timer!($inst);
-        impl_2ch_timer!($inst);
-        impl_gp16_timer!($inst);
-        impl_1ch_cmp_timer!($inst);
-        impl_2ch_cmp_timer!($inst);
+        impl_core_timer!($inst, TimerBits::Bits16);
+        impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl CaptureCompare16bitInstance for crate::peripherals::$inst {}
-        impl ComplementaryCaptureCompare16bitInstance for crate::peripherals::$inst {}
+        impl GeneralInstance1Channel for crate::peripherals::$inst {}
+        impl GeneralInstance2Channel for crate::peripherals::$inst {}
+        impl GeneralInstance4Channel for crate::peripherals::$inst { fn enable_outputs(&self) { set_moe::<Self>() }}
+        impl AdvancedInstance1Channel for crate::peripherals::$inst { type CaptureCompareInterrupt = crate::_generated::peripheral_interrupts::$inst::CC; }
+        impl AdvancedInstance2Channel for crate::peripherals::$inst {}
+        impl AdvancedInstance4Channel for crate::peripherals::$inst {}
     };
-    ($inst:ident, timer, TIM_2CH_CMP, CC, $irq:ident) => {
-        impl_adv_timer!($inst, $irq);
-    };
-
 
     ($inst:ident, timer, TIM_ADV, UP, $irq:ident) => {
-        impl_core_timer!($inst, $irq);
-        impl_basic_no_cr2_timer!($inst);
-        impl_basic_timer!($inst);
-        impl_1ch_timer!($inst);
-        impl_2ch_timer!($inst);
-        impl_gp16_timer!($inst);
-        impl_1ch_cmp_timer!($inst);
-        impl_2ch_cmp_timer!($inst);
+        impl_core_timer!($inst, TimerBits::Bits16);
+        impl BasicNoCr2Instance for crate::peripherals::$inst {}
         impl BasicInstance for crate::peripherals::$inst {}
-        impl CaptureCompare16bitInstance for crate::peripherals::$inst {}
-        impl ComplementaryCaptureCompare16bitInstance for crate::peripherals::$inst {}
-    };
-    ($inst:ident, timer, TIM_ADV, CC, $irq:ident) => {
-        impl_adv_timer!($inst, $irq);
+        impl GeneralInstance1Channel for crate::peripherals::$inst {}
+        impl GeneralInstance2Channel for crate::peripherals::$inst {}
+        impl GeneralInstance4Channel for crate::peripherals::$inst { fn enable_outputs(&self) { set_moe::<Self>() }}
+        impl AdvancedInstance1Channel for crate::peripherals::$inst { type CaptureCompareInterrupt = crate::_generated::peripheral_interrupts::$inst::CC; }
+        impl AdvancedInstance2Channel for crate::peripherals::$inst {}
+        impl AdvancedInstance4Channel for crate::peripherals::$inst {}
     };
 }
 
-// Update Event trigger DMA for every timer
-dma_trait!(UpDma, BasicInstance);
-
-dma_trait!(Ch1Dma, CaptureCompare16bitInstance);
-dma_trait!(Ch2Dma, CaptureCompare16bitInstance);
-dma_trait!(Ch3Dma, CaptureCompare16bitInstance);
-dma_trait!(Ch4Dma, CaptureCompare16bitInstance);
+#[cfg(not(stm32l0))]
+#[allow(unused)]
+fn set_moe<T: GeneralInstance4Channel>() {
+    unsafe { crate::pac::timer::Tim1chCmp::from_ptr(T::regs()) }
+        .bdtr()
+        .modify(|w| w.set_moe(true));
+}
diff --git a/embassy-stm32/src/timer/qei.rs b/embassy-stm32/src/timer/qei.rs
index 59efb72ba..ab9879be6 100644
--- a/embassy-stm32/src/timer/qei.rs
+++ b/embassy-stm32/src/timer/qei.rs
@@ -3,10 +3,11 @@
 use core::marker::PhantomData;
 
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use stm32_metapac::timer::vals;
 
-use super::*;
-use crate::gpio::sealed::AFType;
-use crate::gpio::AnyPin;
+use super::low_level::Timer;
+use super::{Channel1Pin, Channel2Pin, GeneralInstance4Channel};
+use crate::gpio::{AFType, AnyPin};
 use crate::Peripheral;
 
 /// Counting direction
@@ -30,7 +31,7 @@ pub struct QeiPin<'d, T, Channel> {
 
 macro_rules! channel_impl {
     ($new_chx:ident, $channel:ident, $pin_trait:ident) => {
-        impl<'d, T: CaptureCompare16bitInstance> QeiPin<'d, T, $channel> {
+        impl<'d, T: GeneralInstance4Channel> QeiPin<'d, T, $channel> {
             #[doc = concat!("Create a new ", stringify!($channel), " QEI pin instance.")]
             pub fn $new_chx(pin: impl Peripheral<P = impl $pin_trait<T>> + 'd) -> Self {
                 into_ref!(pin);
@@ -53,29 +54,28 @@ channel_impl!(new_ch1, Ch1, Channel1Pin);
 channel_impl!(new_ch2, Ch2, Channel2Pin);
 
 /// Quadrature decoder driver.
-pub struct Qei<'d, T> {
-    _inner: PeripheralRef<'d, T>,
+pub struct Qei<'d, T: GeneralInstance4Channel> {
+    inner: Timer<'d, T>,
 }
 
-impl<'d, T: CaptureCompare16bitInstance> Qei<'d, T> {
+impl<'d, T: GeneralInstance4Channel> Qei<'d, T> {
     /// Create a new quadrature decoder driver.
     pub fn new(tim: impl Peripheral<P = T> + 'd, _ch1: QeiPin<'d, T, Ch1>, _ch2: QeiPin<'d, T, Ch2>) -> Self {
         Self::new_inner(tim)
     }
 
     fn new_inner(tim: impl Peripheral<P = T> + 'd) -> Self {
-        into_ref!(tim);
-
-        T::enable_and_reset();
+        let inner = Timer::new(tim);
+        let r = inner.regs_gp16();
 
         // Configure TxC1 and TxC2 as captures
-        T::regs_gp16().ccmr_input(0).modify(|w| {
+        r.ccmr_input(0).modify(|w| {
             w.set_ccs(0, vals::CcmrInputCcs::TI4);
             w.set_ccs(1, vals::CcmrInputCcs::TI4);
         });
 
         // enable and configure to capture on rising edge
-        T::regs_gp16().ccer().modify(|w| {
+        r.ccer().modify(|w| {
             w.set_cce(0, true);
             w.set_cce(1, true);
 
@@ -83,19 +83,19 @@ impl<'d, T: CaptureCompare16bitInstance> Qei<'d, T> {
             w.set_ccp(1, false);
         });
 
-        T::regs_gp16().smcr().modify(|w| {
+        r.smcr().modify(|w| {
             w.set_sms(vals::Sms::ENCODER_MODE_3);
         });
 
-        T::regs_gp16().arr().modify(|w| w.set_arr(u16::MAX));
-        T::regs_gp16().cr1().modify(|w| w.set_cen(true));
+        r.arr().modify(|w| w.set_arr(u16::MAX));
+        r.cr1().modify(|w| w.set_cen(true));
 
-        Self { _inner: tim }
+        Self { inner }
     }
 
     /// Get direction.
     pub fn read_direction(&self) -> Direction {
-        match T::regs_gp16().cr1().read().dir() {
+        match self.inner.regs_gp16().cr1().read().dir() {
             vals::Dir::DOWN => Direction::Downcounting,
             vals::Dir::UP => Direction::Upcounting,
         }
@@ -103,6 +103,6 @@ impl<'d, T: CaptureCompare16bitInstance> Qei<'d, T> {
 
     /// Get count.
     pub fn count(&self) -> u16 {
-        T::regs_gp16().cnt().read().cnt()
+        self.inner.regs_gp16().cnt().read().cnt()
     }
 }
diff --git a/embassy-stm32/src/timer/simple_pwm.rs b/embassy-stm32/src/timer/simple_pwm.rs
index 1acba504e..b54e9a0d6 100644
--- a/embassy-stm32/src/timer/simple_pwm.rs
+++ b/embassy-stm32/src/timer/simple_pwm.rs
@@ -4,9 +4,8 @@ use core::marker::PhantomData;
 
 use embassy_hal_internal::{into_ref, PeripheralRef};
 
-use super::*;
-#[allow(unused_imports)]
-use crate::gpio::sealed::{AFType, Pin};
+use super::low_level::{CountingMode, OutputCompareMode, OutputPolarity, Timer};
+use super::{Channel, Channel1Pin, Channel2Pin, Channel3Pin, Channel4Pin, GeneralInstance4Channel};
 use crate::gpio::{AnyPin, OutputType};
 use crate::time::Hertz;
 use crate::Peripheral;
@@ -30,7 +29,7 @@ pub struct PwmPin<'d, T, C> {
 
 macro_rules! channel_impl {
     ($new_chx:ident, $channel:ident, $pin_trait:ident) => {
-        impl<'d, T: CaptureCompare16bitInstance> PwmPin<'d, T, $channel> {
+        impl<'d, T: GeneralInstance4Channel> PwmPin<'d, T, $channel> {
             #[doc = concat!("Create a new ", stringify!($channel), " PWM pin instance.")]
             pub fn $new_chx(pin: impl Peripheral<P = impl $pin_trait<T>> + 'd, output_type: OutputType) -> Self {
                 into_ref!(pin);
@@ -55,11 +54,11 @@ channel_impl!(new_ch3, Ch3, Channel3Pin);
 channel_impl!(new_ch4, Ch4, Channel4Pin);
 
 /// Simple PWM driver.
-pub struct SimplePwm<'d, T> {
-    inner: PeripheralRef<'d, T>,
+pub struct SimplePwm<'d, T: GeneralInstance4Channel> {
+    inner: Timer<'d, T>,
 }
 
-impl<'d, T: CaptureCompare16bitInstance> SimplePwm<'d, T> {
+impl<'d, T: GeneralInstance4Channel> SimplePwm<'d, T> {
     /// Create a new simple PWM driver.
     pub fn new(
         tim: impl Peripheral<P = T> + 'd,
@@ -74,14 +73,11 @@ impl<'d, T: CaptureCompare16bitInstance> SimplePwm<'d, T> {
     }
 
     fn new_inner(tim: impl Peripheral<P = T> + 'd, freq: Hertz, counting_mode: CountingMode) -> Self {
-        into_ref!(tim);
-
-        T::enable_and_reset();
-
-        let mut this = Self { inner: tim };
+        let mut this = Self { inner: Timer::new(tim) };
 
         this.inner.set_counting_mode(counting_mode);
         this.set_frequency(freq);
+        this.inner.enable_outputs(); // Required for advanced timers, see GeneralInstance4Channel for details
         this.inner.start();
 
         [Channel::Ch1, Channel::Ch2, Channel::Ch3, Channel::Ch4]
@@ -126,14 +122,14 @@ impl<'d, T: CaptureCompare16bitInstance> SimplePwm<'d, T> {
     /// Get max duty value.
     ///
     /// This value depends on the configured frequency and the timer's clock rate from RCC.
-    pub fn get_max_duty(&self) -> u16 {
+    pub fn get_max_duty(&self) -> u32 {
         self.inner.get_max_compare_value() + 1
     }
 
     /// Set the duty for a given channel.
     ///
     /// The value ranges from 0 for 0% duty, to [`get_max_duty`](Self::get_max_duty) for 100% duty, both included.
-    pub fn set_duty(&mut self, channel: Channel, duty: u16) {
+    pub fn set_duty(&mut self, channel: Channel, duty: u32) {
         assert!(duty <= self.get_max_duty());
         self.inner.set_compare_value(channel, duty)
     }
@@ -141,7 +137,7 @@ impl<'d, T: CaptureCompare16bitInstance> SimplePwm<'d, T> {
     /// Get the duty for a given channel.
     ///
     /// The value ranges from 0 for 0% duty, to [`get_max_duty`](Self::get_max_duty) for 100% duty, both included.
-    pub fn get_duty(&self, channel: Channel) -> u16 {
+    pub fn get_duty(&self, channel: Channel) -> u32 {
         self.inner.get_compare_value(channel)
     }
 
@@ -165,8 +161,6 @@ impl<'d, T: CaptureCompare16bitInstance> SimplePwm<'d, T> {
         channel: Channel,
         duty: &[u16],
     ) {
-        assert!(duty.iter().all(|v| *v <= self.get_max_duty()));
-
         into_ref!(dma);
 
         #[allow(clippy::let_unit_value)] // eg. stm32f334
@@ -201,7 +195,7 @@ impl<'d, T: CaptureCompare16bitInstance> SimplePwm<'d, T> {
                 &mut dma,
                 req,
                 duty,
-                T::regs_1ch().ccr(channel.index()).as_ptr() as *mut _,
+                self.inner.regs_1ch().ccr(channel.index()).as_ptr() as *mut _,
                 dma_transfer_option,
             )
             .await
@@ -227,22 +221,20 @@ impl<'d, T: CaptureCompare16bitInstance> SimplePwm<'d, T> {
 
 macro_rules! impl_waveform_chx {
     ($fn_name:ident, $dma_ch:ident, $cc_ch:ident) => {
-        impl<'d, T: CaptureCompare16bitInstance> SimplePwm<'d, T> {
+        impl<'d, T: GeneralInstance4Channel> SimplePwm<'d, T> {
             /// Generate a sequence of PWM waveform
             ///
             /// Note:
             /// you will need to provide corresponding TIMx_CHy DMA channel to use this method.
             pub async fn $fn_name(&mut self, dma: impl Peripheral<P = impl super::$dma_ch<T>>, duty: &[u16]) {
-                use super::vals::Ccds;
-
-                assert!(duty.iter().all(|v| *v <= self.get_max_duty()));
+                use crate::pac::timer::vals::Ccds;
 
                 into_ref!(dma);
 
                 #[allow(clippy::let_unit_value)] // eg. stm32f334
                 let req = dma.request();
 
-                let cc_channel = super::Channel::$cc_ch;
+                let cc_channel = Channel::$cc_ch;
 
                 let original_duty_state = self.get_duty(cc_channel);
                 let original_enable_state = self.is_enabled(cc_channel);
@@ -279,7 +271,7 @@ macro_rules! impl_waveform_chx {
                         &mut dma,
                         req,
                         duty,
-                        T::regs_gp16().ccr(cc_channel.index()).as_ptr() as *mut _,
+                        self.inner.regs_gp16().ccr(cc_channel.index()).as_ptr() as *mut _,
                         dma_transfer_option,
                     )
                     .await
@@ -314,10 +306,10 @@ impl_waveform_chx!(waveform_ch2, Ch2Dma, Ch2);
 impl_waveform_chx!(waveform_ch3, Ch3Dma, Ch3);
 impl_waveform_chx!(waveform_ch4, Ch4Dma, Ch4);
 
-impl<'d, T: CaptureCompare16bitInstance> embedded_hal_02::Pwm for SimplePwm<'d, T> {
+impl<'d, T: GeneralInstance4Channel> embedded_hal_02::Pwm for SimplePwm<'d, T> {
     type Channel = Channel;
     type Time = Hertz;
-    type Duty = u16;
+    type Duty = u32;
 
     fn disable(&mut self, channel: Self::Channel) {
         self.inner.enable_channel(channel, false);
diff --git a/embassy-stm32/src/ucpd.rs b/embassy-stm32/src/ucpd.rs
new file mode 100644
index 000000000..fe614b811
--- /dev/null
+++ b/embassy-stm32/src/ucpd.rs
@@ -0,0 +1,607 @@
+//! USB Type-C/USB Power Delivery Interface (UCPD)
+
+// Implementation Notes
+//
+// As of Feb. 2024 the UCPD peripheral is availalbe on: G0, G4, H5, L5, U5
+//
+// Cube HAL LL Driver (g0):
+// https://github.com/STMicroelectronics/stm32g0xx_hal_driver/blob/v1.4.6/Inc/stm32g0xx_ll_ucpd.h
+// https://github.com/STMicroelectronics/stm32g0xx_hal_driver/blob/v1.4.6/Src/stm32g0xx_ll_ucpd.c
+// Except for a the `LL_UCPD_RxAnalogFilterEnable/Disable()` functions the Cube HAL implementation of
+// all families is the same.
+//
+// Dead battery pull-down resistors functionality is enabled by default on startup and must
+// be disabled by setting a bit in PWR/SYSCFG registers. The exact name and location for that
+// bit is different for each familily.
+
+use core::future::poll_fn;
+use core::marker::PhantomData;
+use core::sync::atomic::{AtomicBool, Ordering};
+use core::task::Poll;
+
+use embassy_hal_internal::drop::OnDrop;
+use embassy_hal_internal::{into_ref, Peripheral, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
+
+use crate::dma::{AnyChannel, Request, Transfer, TransferOptions};
+use crate::interrupt;
+use crate::interrupt::typelevel::Interrupt;
+use crate::pac::ucpd::vals::{Anamode, Ccenable, PscUsbpdclk, Txmode};
+pub use crate::pac::ucpd::vals::{Phyccsel as CcSel, TypecVstateCc as CcVState};
+use crate::rcc::RccPeripheral;
+
+pub(crate) fn init(
+    _cs: critical_section::CriticalSection,
+    #[cfg(peri_ucpd1)] ucpd1_db_enable: bool,
+    #[cfg(peri_ucpd2)] ucpd2_db_enable: bool,
+) {
+    #[cfg(stm32g0x1)]
+    {
+        // according to RM0444 (STM32G0x1) section 8.1.1:
+        // when UCPD is disabled setting the strobe will disable dead battery
+        // (which is enabled after reset) but if UCPD is enabled, setting the
+        // strobe will apply the CC pin configuration from the control register
+        // (which is why we need to be careful about when we call this)
+        crate::pac::SYSCFG.cfgr1().modify(|w| {
+            w.set_ucpd1_strobe(!ucpd1_db_enable);
+            w.set_ucpd2_strobe(!ucpd2_db_enable);
+        });
+    }
+
+    #[cfg(any(stm32g4, stm32l5))]
+    {
+        crate::pac::PWR.cr3().modify(|w| {
+            #[cfg(stm32g4)]
+            w.set_ucpd1_dbdis(!ucpd1_db_enable);
+            #[cfg(stm32l5)]
+            w.set_ucpd_dbdis(!ucpd1_db_enable);
+        })
+    }
+
+    #[cfg(any(stm32h5, stm32u5))]
+    {
+        crate::pac::PWR.ucpdr().modify(|w| {
+            w.set_ucpd_dbdis(!ucpd1_db_enable);
+        })
+    }
+}
+
+/// Pull-up or Pull-down resistor state of both CC lines.
+#[derive(Debug, Clone, Copy, PartialEq)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum CcPull {
+    /// Analog PHY for CC pin disabled.
+    Disabled,
+
+    /// Rd=5.1k pull-down resistor.
+    Sink,
+
+    /// Rp=56k pull-up resistor to indicate default USB power.
+    SourceDefaultUsb,
+
+    /// Rp=22k pull-up resistor to indicate support for up to 1.5A.
+    Source1_5A,
+
+    /// Rp=10k pull-up resistor to indicate support for up to 3.0A.
+    Source3_0A,
+}
+
+/// UCPD driver.
+pub struct Ucpd<'d, T: Instance> {
+    cc_phy: CcPhy<'d, T>,
+}
+
+impl<'d, T: Instance> Ucpd<'d, T> {
+    /// Creates a new UCPD driver instance.
+    pub fn new(
+        _peri: impl Peripheral<P = T> + 'd,
+        _irq: impl interrupt::typelevel::Binding<T::Interrupt, InterruptHandler<T>> + 'd,
+        cc1: impl Peripheral<P = impl Cc1Pin<T>> + 'd,
+        cc2: impl Peripheral<P = impl Cc2Pin<T>> + 'd,
+    ) -> Self {
+        into_ref!(cc1, cc2);
+        cc1.set_as_analog();
+        cc2.set_as_analog();
+
+        T::enable_and_reset();
+        T::Interrupt::unpend();
+        unsafe { T::Interrupt::enable() };
+
+        let r = T::REGS;
+        r.cfgr1().write(|w| {
+            // "The receiver is designed to work in the clock frequency range from 6 to 18 MHz.
+            // However, the optimum performance is ensured in the range from 6 to 12 MHz"
+            // UCPD is driven by HSI16 (16MHz internal oscillator), which we need to divide by 2.
+            w.set_psc_usbpdclk(PscUsbpdclk::DIV2);
+
+            // Prescaler to produce a target half-bit frequency of 600kHz which is required
+            // to produce transmit with a nominal nominal bit rate of 300Kbps+-10% using
+            // biphase mark coding (BMC, aka differential manchester coding).
+            // A divider of 13 gives the target frequency closest to spec (~615kHz, 1.625us).
+            w.set_hbitclkdiv(13 - 1);
+
+            // Time window for detecting non-idle (12-20us).
+            // 1.75us * 8 = 14us.
+            w.set_transwin(8 - 1);
+
+            // Time from the end of last bit of a Frame until the start of the first bit of the
+            // next Preamble (min 25us).
+            // 1.75us * 17 = ~30us
+            w.set_ifrgap(17 - 1);
+
+            // TODO: Currently only hard reset and SOP messages can be received.
+            // UNDOCUMENTED: This register can only be written while UCPDEN=0 (found by testing).
+            w.set_rxordseten(0b1001);
+
+            // Enable DMA
+            w.set_txdmaen(true);
+            w.set_rxdmaen(true);
+
+            w.set_ucpden(true);
+        });
+
+        Self {
+            cc_phy: CcPhy { _lifetime: PhantomData },
+        }
+    }
+
+    /// Returns the TypeC CC PHY.
+    pub fn cc_phy(&mut self) -> &mut CcPhy<'d, T> {
+        &mut self.cc_phy
+    }
+
+    /// Splits the UCPD driver into a TypeC PHY to control and monitor CC voltage
+    /// and a Power Delivery (PD) PHY with receiver and transmitter.
+    pub fn split_pd_phy(
+        self,
+        rx_dma: impl Peripheral<P = impl RxDma<T>> + 'd,
+        tx_dma: impl Peripheral<P = impl TxDma<T>> + 'd,
+        cc_sel: CcSel,
+    ) -> (CcPhy<'d, T>, PdPhy<'d, T>) {
+        let r = T::REGS;
+
+        // TODO: Currently only SOP messages are supported.
+        r.tx_ordsetr().write(|w| w.set_txordset(0b10001_11000_11000_11000));
+
+        // Enable the receiver on one of the two CC lines.
+        r.cr().modify(|w| w.set_phyccsel(cc_sel));
+
+        // Enable hard reset receive interrupt.
+        r.imr().modify(|w| w.set_rxhrstdetie(true));
+
+        // Both parts must be dropped before the peripheral can be disabled.
+        T::state().drop_not_ready.store(true, Ordering::Relaxed);
+
+        into_ref!(rx_dma, tx_dma);
+        let rx_dma_req = rx_dma.request();
+        let tx_dma_req = tx_dma.request();
+        (
+            self.cc_phy,
+            PdPhy {
+                _lifetime: PhantomData,
+                rx_dma_ch: rx_dma.map_into(),
+                rx_dma_req,
+                tx_dma_ch: tx_dma.map_into(),
+                tx_dma_req,
+            },
+        )
+    }
+}
+
+/// Control and monitoring of TypeC CC pin functionailty.
+pub struct CcPhy<'d, T: Instance> {
+    _lifetime: PhantomData<&'d mut T>,
+}
+
+impl<'d, T: Instance> Drop for CcPhy<'d, T> {
+    fn drop(&mut self) {
+        let r = T::REGS;
+        r.cr().modify(|w| {
+            w.set_cc1tcdis(true);
+            w.set_cc2tcdis(true);
+            w.set_ccenable(Ccenable::DISABLED);
+        });
+
+        // Check if the PdPhy part was dropped already.
+        let drop_not_ready = &T::state().drop_not_ready;
+        if drop_not_ready.load(Ordering::Relaxed) {
+            drop_not_ready.store(true, Ordering::Relaxed);
+        } else {
+            r.cfgr1().write(|w| w.set_ucpden(false));
+            T::disable();
+            T::Interrupt::disable();
+        }
+    }
+}
+
+impl<'d, T: Instance> CcPhy<'d, T> {
+    /// Sets the pull-up/pull-down resistor values exposed on the CC pins.
+    pub fn set_pull(&mut self, cc_pull: CcPull) {
+        T::REGS.cr().modify(|w| {
+            w.set_anamode(if cc_pull == CcPull::Sink {
+                Anamode::SINK
+            } else {
+                Anamode::SOURCE
+            });
+            w.set_anasubmode(match cc_pull {
+                CcPull::SourceDefaultUsb => 1,
+                CcPull::Source1_5A => 2,
+                CcPull::Source3_0A => 3,
+                _ => 0,
+            });
+            w.set_ccenable(if cc_pull == CcPull::Disabled {
+                Ccenable::DISABLED
+            } else {
+                Ccenable::BOTH
+            });
+        });
+
+        // Disable dead-battery pull-down resistors which are enabled by default on boot.
+        critical_section::with(|cs| {
+            init(
+                cs,
+                false,
+                #[cfg(peri_ucpd2)]
+                false,
+            );
+        });
+    }
+
+    /// Returns the current voltage level of CC1 and CC2 pin as tuple.
+    ///
+    /// Interpretation of the voltage levels depends on the configured CC line
+    /// pull-up/pull-down resistance.
+    pub fn vstate(&self) -> (CcVState, CcVState) {
+        let sr = T::REGS.sr().read();
+        (sr.typec_vstate_cc1(), sr.typec_vstate_cc2())
+    }
+
+    /// Waits for a change in voltage state on either CC line.
+    pub async fn wait_for_vstate_change(&self) -> (CcVState, CcVState) {
+        let _on_drop = OnDrop::new(|| self.enable_cc_interrupts(false));
+        let prev_vstate = self.vstate();
+        poll_fn(|cx| {
+            let vstate = self.vstate();
+            if vstate != prev_vstate {
+                Poll::Ready(vstate)
+            } else {
+                T::state().waker.register(cx.waker());
+                self.enable_cc_interrupts(true);
+                Poll::Pending
+            }
+        })
+        .await
+    }
+
+    fn enable_cc_interrupts(&self, enable: bool) {
+        T::REGS.imr().modify(|w| {
+            w.set_typecevt1ie(enable);
+            w.set_typecevt2ie(enable);
+        });
+    }
+}
+
+/// Receive Error.
+#[derive(Debug, Clone, Copy)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum RxError {
+    /// Incorrect CRC or truncated message (a line becoming static before EOP is met).
+    Crc,
+
+    /// Provided buffer was too small for the received message.
+    Overrun,
+
+    /// Hard Reset received before or during reception.
+    HardReset,
+}
+
+/// Transmit Error.
+#[derive(Debug, Clone, Copy)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum TxError {
+    /// Concurrent receive in progress or excessive noise on the line.
+    Discarded,
+
+    /// Hard Reset received before or during transmission.
+    HardReset,
+}
+
+/// Power Delivery (PD) PHY.
+pub struct PdPhy<'d, T: Instance> {
+    _lifetime: PhantomData<&'d mut T>,
+    rx_dma_ch: PeripheralRef<'d, AnyChannel>,
+    rx_dma_req: Request,
+    tx_dma_ch: PeripheralRef<'d, AnyChannel>,
+    tx_dma_req: Request,
+}
+
+impl<'d, T: Instance> Drop for PdPhy<'d, T> {
+    fn drop(&mut self) {
+        // Check if the Type-C part was dropped already.
+        let drop_not_ready = &T::state().drop_not_ready;
+        if drop_not_ready.load(Ordering::Relaxed) {
+            drop_not_ready.store(true, Ordering::Relaxed);
+        } else {
+            T::REGS.cfgr1().write(|w| w.set_ucpden(false));
+            T::disable();
+            T::Interrupt::disable();
+        }
+    }
+}
+
+impl<'d, T: Instance> PdPhy<'d, T> {
+    /// Receives a PD message into the provided buffer.
+    ///
+    /// Returns the number of received bytes or an error.
+    pub async fn receive(&mut self, buf: &mut [u8]) -> Result<usize, RxError> {
+        let r = T::REGS;
+
+        let dma = unsafe {
+            Transfer::new_read(
+                &self.rx_dma_ch,
+                self.rx_dma_req,
+                r.rxdr().as_ptr() as *mut u8,
+                buf,
+                TransferOptions::default(),
+            )
+        };
+
+        // Clear interrupt flags (possibly set from last receive).
+        r.icr().write(|w| {
+            w.set_rxorddetcf(true);
+            w.set_rxovrcf(true);
+            w.set_rxmsgendcf(true);
+        });
+
+        r.cr().modify(|w| w.set_phyrxen(true));
+        let _on_drop = OnDrop::new(|| {
+            r.cr().modify(|w| w.set_phyrxen(false));
+            self.enable_rx_interrupt(false);
+        });
+
+        poll_fn(|cx| {
+            let sr = r.sr().read();
+            if sr.rxhrstdet() {
+                // Clean and re-enable hard reset receive interrupt.
+                r.icr().write(|w| w.set_rxhrstdetcf(true));
+                r.imr().modify(|w| w.set_rxhrstdetie(true));
+                Poll::Ready(Err(RxError::HardReset))
+            } else if sr.rxmsgend() {
+                let ret = if sr.rxovr() {
+                    Err(RxError::Overrun)
+                } else if sr.rxerr() {
+                    Err(RxError::Crc)
+                } else {
+                    Ok(())
+                };
+                Poll::Ready(ret)
+            } else {
+                T::state().waker.register(cx.waker());
+                self.enable_rx_interrupt(true);
+                Poll::Pending
+            }
+        })
+        .await?;
+
+        // Make sure that the last byte was fetched by DMA.
+        while r.sr().read().rxne() {
+            if dma.get_remaining_transfers() == 0 {
+                return Err(RxError::Overrun);
+            }
+        }
+
+        Ok(r.rx_payszr().read().rxpaysz().into())
+    }
+
+    fn enable_rx_interrupt(&self, enable: bool) {
+        T::REGS.imr().modify(|w| w.set_rxmsgendie(enable));
+    }
+
+    /// Transmits a PD message.
+    pub async fn transmit(&mut self, buf: &[u8]) -> Result<(), TxError> {
+        let r = T::REGS;
+
+        // When a previous transmission was dropped before it had finished it
+        // might still be running because there is no way to abort an ongoing
+        // message transmission. Wait for it to finish but ignore errors.
+        if r.cr().read().txsend() {
+            if let Err(TxError::HardReset) = self.wait_tx_done().await {
+                return Err(TxError::HardReset);
+            }
+        }
+
+        // Clear the TX interrupt flags.
+        T::REGS.icr().write(|w| {
+            w.set_txmsgdisccf(true);
+            w.set_txmsgsentcf(true);
+        });
+
+        // Start the DMA and let it do its thing in the background.
+        let _dma = unsafe {
+            Transfer::new_write(
+                &self.tx_dma_ch,
+                self.tx_dma_req,
+                buf,
+                r.txdr().as_ptr() as *mut u8,
+                TransferOptions::default(),
+            )
+        };
+
+        // Configure and start the transmission.
+        r.tx_payszr().write(|w| w.set_txpaysz(buf.len() as _));
+        r.cr().modify(|w| {
+            w.set_txmode(Txmode::PACKET);
+            w.set_txsend(true);
+        });
+
+        self.wait_tx_done().await
+    }
+
+    async fn wait_tx_done(&self) -> Result<(), TxError> {
+        let _on_drop = OnDrop::new(|| self.enable_tx_interrupts(false));
+        poll_fn(|cx| {
+            let r = T::REGS;
+            let sr = r.sr().read();
+            if sr.rxhrstdet() {
+                // Clean and re-enable hard reset receive interrupt.
+                r.icr().write(|w| w.set_rxhrstdetcf(true));
+                r.imr().modify(|w| w.set_rxhrstdetie(true));
+                Poll::Ready(Err(TxError::HardReset))
+            } else if sr.txmsgdisc() {
+                Poll::Ready(Err(TxError::Discarded))
+            } else if sr.txmsgsent() {
+                Poll::Ready(Ok(()))
+            } else {
+                T::state().waker.register(cx.waker());
+                self.enable_tx_interrupts(true);
+                Poll::Pending
+            }
+        })
+        .await
+    }
+
+    fn enable_tx_interrupts(&self, enable: bool) {
+        T::REGS.imr().modify(|w| {
+            w.set_txmsgdiscie(enable);
+            w.set_txmsgsentie(enable);
+        });
+    }
+
+    /// Transmit a hard reset.
+    pub async fn transmit_hardreset(&mut self) -> Result<(), TxError> {
+        let r = T::REGS;
+
+        // Clear the hardreset interrupt flags.
+        T::REGS.icr().write(|w| {
+            w.set_hrstdisccf(true);
+            w.set_hrstsentcf(true);
+        });
+
+        // Trigger hard reset transmission.
+        r.cr().modify(|w| {
+            w.set_txhrst(true);
+        });
+
+        let _on_drop = OnDrop::new(|| self.enable_hardreset_interrupts(false));
+        poll_fn(|cx| {
+            let r = T::REGS;
+            let sr = r.sr().read();
+            if sr.rxhrstdet() {
+                // Clean and re-enable hard reset receive interrupt.
+                r.icr().write(|w| w.set_rxhrstdetcf(true));
+                r.imr().modify(|w| w.set_rxhrstdetie(true));
+                Poll::Ready(Err(TxError::HardReset))
+            } else if sr.hrstdisc() {
+                Poll::Ready(Err(TxError::Discarded))
+            } else if sr.hrstsent() {
+                Poll::Ready(Ok(()))
+            } else {
+                T::state().waker.register(cx.waker());
+                self.enable_hardreset_interrupts(true);
+                Poll::Pending
+            }
+        })
+        .await
+    }
+
+    fn enable_hardreset_interrupts(&self, enable: bool) {
+        T::REGS.imr().modify(|w| {
+            w.set_hrstdiscie(enable);
+            w.set_hrstsentie(enable);
+        });
+    }
+}
+
+/// Interrupt handler.
+pub struct InterruptHandler<T: Instance> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Instance> interrupt::typelevel::Handler<T::Interrupt> for InterruptHandler<T> {
+    unsafe fn on_interrupt() {
+        let r = T::REGS;
+        let sr = r.sr().read();
+
+        if sr.typecevt1() || sr.typecevt2() {
+            r.icr().write(|w| {
+                w.set_typecevt1cf(true);
+                w.set_typecevt2cf(true);
+            });
+        }
+
+        if sr.rxhrstdet() {
+            r.imr().modify(|w| w.set_rxhrstdetie(false));
+        }
+
+        if sr.rxmsgend() {
+            r.imr().modify(|w| w.set_rxmsgendie(false));
+        }
+
+        if sr.txmsgdisc() || sr.txmsgsent() {
+            r.imr().modify(|w| {
+                w.set_txmsgdiscie(false);
+                w.set_txmsgsentie(false);
+            });
+        }
+
+        if sr.hrstdisc() || sr.hrstsent() {
+            r.imr().modify(|w| {
+                w.set_hrstdiscie(false);
+                w.set_hrstsentie(false);
+            });
+        }
+
+        // Wake the task to clear and re-enabled interrupts.
+        T::state().waker.wake();
+    }
+}
+
+struct State {
+    waker: AtomicWaker,
+    // Inverted logic for a default state of 0 so that the data goes into the .bss section.
+    drop_not_ready: AtomicBool,
+}
+
+impl State {
+    pub const fn new() -> Self {
+        Self {
+            waker: AtomicWaker::new(),
+            drop_not_ready: AtomicBool::new(false),
+        }
+    }
+}
+
+trait SealedInstance {
+    const REGS: crate::pac::ucpd::Ucpd;
+    fn state() -> &'static State;
+}
+
+/// UCPD instance trait.
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + RccPeripheral {
+    /// Interrupt for this instance.
+    type Interrupt: crate::interrupt::typelevel::Interrupt;
+}
+
+foreach_interrupt!(
+    ($inst:ident, ucpd, UCPD, GLOBAL, $irq:ident) => {
+        impl SealedInstance for crate::peripherals::$inst {
+            const REGS: crate::pac::ucpd::Ucpd = crate::pac::$inst;
+
+            fn state() -> &'static State {
+                static STATE: State = State::new();
+                &STATE
+            }
+        }
+
+        impl Instance for crate::peripherals::$inst {
+            type Interrupt = crate::interrupt::typelevel::$irq;
+        }
+    };
+);
+
+pin_trait!(Cc1Pin, Instance);
+pin_trait!(Cc2Pin, Instance);
+
+dma_trait!(TxDma, Instance);
+dma_trait!(RxDma, Instance);
diff --git a/embassy-stm32/src/uid.rs b/embassy-stm32/src/uid.rs
index aa13586f8..5e38532bd 100644
--- a/embassy-stm32/src/uid.rs
+++ b/embassy-stm32/src/uid.rs
@@ -27,5 +27,5 @@ pub fn uid_hex_bytes() -> &'static [u8; 24] {
             LOADED = true;
         }
     });
-    unsafe { &UID_HEX }
+    unsafe { &*core::ptr::addr_of!(UID_HEX) }
 }
diff --git a/embassy-stm32/src/usart/buffered.rs b/embassy-stm32/src/usart/buffered.rs
index c11e3382f..51862e185 100644
--- a/embassy-stm32/src/usart/buffered.rs
+++ b/embassy-stm32/src/usart/buffered.rs
@@ -1,13 +1,10 @@
-use core::future::poll_fn;
 use core::slice;
-use core::sync::atomic::{AtomicBool, Ordering};
-use core::task::Poll;
+use core::sync::atomic::AtomicBool;
 
 use embassy_hal_internal::atomic_ring_buffer::RingBuffer;
 use embassy_sync::waitqueue::AtomicWaker;
 
 use super::*;
-use crate::interrupt::typelevel::Interrupt;
 
 /// Interrupt handler.
 pub struct InterruptHandler<T: BasicInstance> {
@@ -55,7 +52,7 @@ impl<T: BasicInstance> interrupt::typelevel::Handler<T::Interrupt> for Interrupt
                 // FIXME: Should we disable any further RX interrupts when the buffer becomes full.
             }
 
-            if state.rx_buf.is_full() {
+            if !state.rx_buf.is_empty() {
                 state.rx_waker.wake();
             }
         }
diff --git a/embassy-stm32/src/usart/mod.rs b/embassy-stm32/src/usart/mod.rs
index ea727b010..7c0523a25 100644
--- a/embassy-stm32/src/usart/mod.rs
+++ b/embassy-stm32/src/usart/mod.rs
@@ -10,10 +10,11 @@ use core::task::Poll;
 use embassy_embedded_hal::SetConfig;
 use embassy_hal_internal::drop::OnDrop;
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
 use futures::future::{select, Either};
 
 use crate::dma::{NoDma, Transfer};
-use crate::gpio::sealed::AFType;
+use crate::gpio::AFType;
 use crate::interrupt::typelevel::Interrupt;
 #[allow(unused_imports)]
 #[cfg(not(any(usart_v1, usart_v2)))]
@@ -208,7 +209,14 @@ enum ReadCompletionEvent {
     Idle(usize),
 }
 
-/// Bidirectional UART Driver
+/// Bidirectional UART Driver, which acts as a combination of [`UartTx`] and [`UartRx`].
+///
+/// ### Notes on [`embedded_io::Read`]
+///
+/// `embedded_io::Read` requires guarantees that the base [`UartRx`] cannot provide.
+///
+/// See [`UartRx`] for more details, and see [`BufferedUart`] and [`RingBufferedUartRx`]
+/// as alternatives that do provide the necessary guarantees for `embedded_io::Read`.
 pub struct Uart<'d, T: BasicInstance, TxDma = NoDma, RxDma = NoDma> {
     tx: UartTx<'d, T, TxDma>,
     rx: UartRx<'d, T, RxDma>,
@@ -224,7 +232,10 @@ impl<'d, T: BasicInstance, TxDma, RxDma> SetConfig for Uart<'d, T, TxDma, RxDma>
     }
 }
 
-/// Tx-only UART Driver
+/// Tx-only UART Driver.
+///
+/// Can be obtained from [`Uart::split`], or can be constructed independently,
+/// if you do not need the receiving half of the driver.
 pub struct UartTx<'d, T: BasicInstance, TxDma = NoDma> {
     phantom: PhantomData<&'d mut T>,
     tx_dma: PeripheralRef<'d, TxDma>,
@@ -239,7 +250,35 @@ impl<'d, T: BasicInstance, TxDma> SetConfig for UartTx<'d, T, TxDma> {
     }
 }
 
-/// Rx-only UART Driver
+/// Rx-only UART Driver.
+///
+/// Can be obtained from [`Uart::split`], or can be constructed independently,
+/// if you do not need the transmitting half of the driver.
+///
+/// ### Notes on [`embedded_io::Read`]
+///
+/// `embedded_io::Read` requires guarantees that this struct cannot provide:
+///
+/// - Any data received between calls to [`UartRx::read`] or [`UartRx::blocking_read`]
+/// will be thrown away, as `UartRx` is unbuffered.
+/// Users of `embedded_io::Read` are likely to not expect this behavior
+/// (for instance if they read multiple small chunks in a row).
+/// - [`UartRx::read`] and [`UartRx::blocking_read`] only return once the entire buffer has been
+/// filled, whereas `embedded_io::Read` requires us to fill the buffer with what we already
+/// received, and only block/wait until the first byte arrived.
+/// <br />
+/// While [`UartRx::read_until_idle`] does return early, it will still eagerly wait for data until
+/// the buffer is full or no data has been transmitted in a while,
+/// which may not be what users of `embedded_io::Read` expect.
+///
+/// [`UartRx::into_ring_buffered`] can be called to equip `UartRx` with a buffer,
+/// that it can then use to store data received between calls to `read`,
+/// provided you are using DMA already.
+///
+/// Alternatively, you can use [`BufferedUartRx`], which is interrupt-based and which can also
+/// store data received between calls.
+///
+/// Also see [this github comment](https://github.com/embassy-rs/embassy/pull/2185#issuecomment-1810047043).
 pub struct UartRx<'d, T: BasicInstance, RxDma = NoDma> {
     _peri: PeripheralRef<'d, T>,
     rx_dma: PeripheralRef<'d, RxDma>,
@@ -1259,7 +1298,6 @@ where
 impl<T, TxDma, RxDma> embedded_io::Write for Uart<'_, T, TxDma, RxDma>
 where
     T: BasicInstance,
-    TxDma: crate::usart::TxDma<T>,
 {
     fn write(&mut self, buf: &[u8]) -> Result<usize, Self::Error> {
         self.blocking_write(buf)?;
@@ -1274,7 +1312,6 @@ where
 impl<T, TxDma> embedded_io::Write for UartTx<'_, T, TxDma>
 where
     T: BasicInstance,
-    TxDma: crate::usart::TxDma<T>,
 {
     fn write(&mut self, buf: &[u8]) -> Result<usize, Self::Error> {
         self.blocking_write(buf)?;
@@ -1326,8 +1363,6 @@ mod ringbuffered;
 #[cfg(not(gpdma))]
 pub use ringbuffered::RingBufferedUartRx;
 
-use self::sealed::Kind;
-
 #[cfg(any(usart_v1, usart_v2))]
 fn tdr(r: crate::pac::usart::Usart) -> *mut u8 {
     r.dr().as_ptr() as _
@@ -1370,52 +1405,50 @@ fn clear_interrupt_flags(r: Regs, sr: regs::Isr) {
     r.icr().write(|w| *w = regs::Icr(sr.0));
 }
 
-pub(crate) mod sealed {
-    use embassy_sync::waitqueue::AtomicWaker;
+#[derive(Clone, Copy, PartialEq, Eq)]
+enum Kind {
+    Uart,
+    #[cfg(any(usart_v3, usart_v4))]
+    #[allow(unused)]
+    Lpuart,
+}
 
-    use super::*;
+struct State {
+    rx_waker: AtomicWaker,
+}
 
-    #[derive(Clone, Copy, PartialEq, Eq)]
-    pub enum Kind {
-        Uart,
-        #[cfg(any(usart_v3, usart_v4))]
-        Lpuart,
-    }
-
-    pub struct State {
-        pub rx_waker: AtomicWaker,
-        pub tx_waker: AtomicWaker,
-    }
-
-    impl State {
-        pub const fn new() -> Self {
-            Self {
-                rx_waker: AtomicWaker::new(),
-                tx_waker: AtomicWaker::new(),
-            }
+impl State {
+    const fn new() -> Self {
+        Self {
+            rx_waker: AtomicWaker::new(),
         }
     }
-
-    pub trait BasicInstance: crate::rcc::RccPeripheral {
-        const KIND: Kind;
-        type Interrupt: interrupt::typelevel::Interrupt;
-
-        fn regs() -> Regs;
-        fn state() -> &'static State;
-
-        fn buffered_state() -> &'static buffered::State;
-    }
-
-    pub trait FullInstance: BasicInstance {
-        fn regs_uart() -> crate::pac::usart::Usart;
-    }
+}
+
+trait SealedBasicInstance: crate::rcc::RccPeripheral {
+    const KIND: Kind;
+
+    fn regs() -> Regs;
+    fn state() -> &'static State;
+
+    fn buffered_state() -> &'static buffered::State;
+}
+
+trait SealedFullInstance: SealedBasicInstance {
+    #[allow(unused)]
+    fn regs_uart() -> crate::pac::usart::Usart;
 }
 
 /// Basic UART driver instance
-pub trait BasicInstance: Peripheral<P = Self> + sealed::BasicInstance + 'static + Send {}
+#[allow(private_bounds)]
+pub trait BasicInstance: Peripheral<P = Self> + SealedBasicInstance + 'static + Send {
+    /// Interrupt for this instance.
+    type Interrupt: interrupt::typelevel::Interrupt;
+}
 
 /// Full UART driver instance
-pub trait FullInstance: sealed::FullInstance {}
+#[allow(private_bounds)]
+pub trait FullInstance: SealedFullInstance {}
 
 pin_trait!(RxPin, BasicInstance);
 pin_trait!(TxPin, BasicInstance);
@@ -1429,16 +1462,15 @@ dma_trait!(RxDma, BasicInstance);
 
 macro_rules! impl_usart {
     ($inst:ident, $irq:ident, $kind:expr) => {
-        impl sealed::BasicInstance for crate::peripherals::$inst {
+        impl SealedBasicInstance for crate::peripherals::$inst {
             const KIND: Kind = $kind;
-            type Interrupt = crate::interrupt::typelevel::$irq;
 
             fn regs() -> Regs {
                 unsafe { Regs::from_ptr(crate::pac::$inst.as_ptr()) }
             }
 
-            fn state() -> &'static crate::usart::sealed::State {
-                static STATE: crate::usart::sealed::State = crate::usart::sealed::State::new();
+            fn state() -> &'static crate::usart::State {
+                static STATE: crate::usart::State = crate::usart::State::new();
                 &STATE
             }
 
@@ -1448,7 +1480,9 @@ macro_rules! impl_usart {
             }
         }
 
-        impl BasicInstance for peripherals::$inst {}
+        impl BasicInstance for peripherals::$inst {
+            type Interrupt = crate::interrupt::typelevel::$irq;
+        }
     };
 }
 
@@ -1460,7 +1494,7 @@ foreach_interrupt!(
     ($inst:ident, usart, $block:ident, $signal_name:ident, $irq:ident) => {
         impl_usart!($inst, $irq, Kind::Uart);
 
-        impl sealed::FullInstance for peripherals::$inst {
+        impl SealedFullInstance for peripherals::$inst {
             fn regs_uart() -> crate::pac::usart::Usart {
                 crate::pac::$inst
             }
diff --git a/embassy-stm32/src/usb/mod.rs b/embassy-stm32/src/usb/mod.rs
index 4debd4e54..1e3c44167 100644
--- a/embassy-stm32/src/usb/mod.rs
+++ b/embassy-stm32/src/usb/mod.rs
@@ -1,37 +1,69 @@
 //! Universal Serial Bus (USB)
 
-use crate::interrupt;
-use crate::rcc::RccPeripheral;
+#[cfg_attr(usb, path = "usb.rs")]
+#[cfg_attr(otg, path = "otg.rs")]
+mod _version;
+pub use _version::*;
 
-mod usb;
-pub use usb::*;
+use crate::interrupt::typelevel::Interrupt;
+use crate::rcc::SealedRccPeripheral;
 
-pub(crate) mod sealed {
-    pub trait Instance {
-        fn regs() -> crate::pac::usb::Usb;
+/// clock, power initialization stuff that's common for USB and OTG.
+fn common_init<T: Instance>() {
+    // Check the USB clock is enabled and running at exactly 48 MHz.
+    // frequency() will panic if not enabled
+    let freq = T::frequency();
+    // Check frequency is within the 0.25% tolerance allowed by the spec.
+    // Clock might not be exact 48Mhz due to rounding errors in PLL calculation, or if the user
+    // has tight clock restrictions due to something else (like audio).
+    if freq.0.abs_diff(48_000_000) > 120_000 {
+        panic!(
+            "USB clock should be 48Mhz but is {} Hz. Please double-check your RCC settings.",
+            freq.0
+        )
     }
+
+    #[cfg(any(stm32l4, stm32l5, stm32wb))]
+    critical_section::with(|_| crate::pac::PWR.cr2().modify(|w| w.set_usv(true)));
+
+    #[cfg(pwr_h5)]
+    critical_section::with(|_| crate::pac::PWR.usbscr().modify(|w| w.set_usb33sv(true)));
+
+    #[cfg(stm32h7)]
+    {
+        // If true, VDD33USB is generated by internal regulator from VDD50USB
+        // If false, VDD33USB and VDD50USB must be suplied directly with 3.3V (default on nucleo)
+        // TODO: unhardcode
+        let internal_regulator = false;
+
+        // Enable USB power
+        critical_section::with(|_| {
+            crate::pac::PWR.cr3().modify(|w| {
+                w.set_usb33den(true);
+                w.set_usbregen(internal_regulator);
+            })
+        });
+
+        // Wait for USB power to stabilize
+        while !crate::pac::PWR.cr3().read().usb33rdy() {}
+    }
+
+    #[cfg(stm32u5)]
+    {
+        // Enable USB power
+        critical_section::with(|_| {
+            crate::pac::PWR.svmcr().modify(|w| {
+                w.set_usv(true);
+                w.set_uvmen(true);
+            })
+        });
+
+        // Wait for USB power to stabilize
+        while !crate::pac::PWR.svmsr().read().vddusbrdy() {}
+    }
+
+    T::Interrupt::unpend();
+    unsafe { T::Interrupt::enable() };
+
+    <T as SealedRccPeripheral>::enable_and_reset();
 }
-
-/// USB instance trait.
-pub trait Instance: sealed::Instance + RccPeripheral + 'static {
-    /// Interrupt for this USB instance.
-    type Interrupt: interrupt::typelevel::Interrupt;
-}
-
-// Internal PHY pins
-pin_trait!(DpPin, Instance);
-pin_trait!(DmPin, Instance);
-
-foreach_interrupt!(
-    ($inst:ident, usb, $block:ident, LP, $irq:ident) => {
-        impl sealed::Instance for crate::peripherals::$inst {
-            fn regs() -> crate::pac::usb::Usb {
-                crate::pac::$inst
-            }
-        }
-
-        impl Instance for crate::peripherals::$inst {
-            type Interrupt = crate::interrupt::typelevel::$irq;
-        }
-    };
-);
diff --git a/embassy-stm32/src/usb_otg/usb.rs b/embassy-stm32/src/usb/otg.rs
similarity index 89%
rename from embassy-stm32/src/usb_otg/usb.rs
rename to embassy-stm32/src/usb/otg.rs
index 373697ec8..b0e7067bd 100644
--- a/embassy-stm32/src/usb_otg/usb.rs
+++ b/embassy-stm32/src/usb/otg.rs
@@ -6,17 +6,16 @@ use core::task::Poll;
 use embassy_hal_internal::{into_ref, Peripheral};
 use embassy_sync::waitqueue::AtomicWaker;
 use embassy_usb_driver::{
-    self, Bus as _, Direction, EndpointAddress, EndpointAllocError, EndpointError, EndpointIn, EndpointInfo,
-    EndpointOut, EndpointType, Event, Unsupported,
+    Bus as _, Direction, EndpointAddress, EndpointAllocError, EndpointError, EndpointIn, EndpointInfo, EndpointOut,
+    EndpointType, Event, Unsupported,
 };
 use futures::future::poll_fn;
 
-use super::*;
-use crate::gpio::sealed::AFType;
+use crate::gpio::AFType;
 use crate::interrupt;
 use crate::interrupt::typelevel::Interrupt;
 use crate::pac::otg::{regs, vals};
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::{RccPeripheral, SealedRccPeripheral};
 use crate::time::Hertz;
 
 /// Interrupt handler.
@@ -561,8 +560,7 @@ impl<'d, T: Instance> Bus<'d, T> {
 
 impl<'d, T: Instance> Bus<'d, T> {
     fn init(&mut self) {
-        #[cfg(stm32l4)]
-        critical_section::with(|_| crate::pac::PWR.cr2().modify(|w| w.set_usv(true)));
+        super::common_init::<T>();
 
         #[cfg(stm32f7)]
         {
@@ -590,22 +588,6 @@ impl<'d, T: Instance> Bus<'d, T> {
 
         #[cfg(stm32h7)]
         {
-            // If true, VDD33USB is generated by internal regulator from VDD50USB
-            // If false, VDD33USB and VDD50USB must be suplied directly with 3.3V (default on nucleo)
-            // TODO: unhardcode
-            let internal_regulator = false;
-
-            // Enable USB power
-            critical_section::with(|_| {
-                crate::pac::PWR.cr3().modify(|w| {
-                    w.set_usb33den(true);
-                    w.set_usbregen(internal_regulator);
-                })
-            });
-
-            // Wait for USB power to stabilize
-            while !crate::pac::PWR.cr3().read().usb33rdy() {}
-
             // Enable ULPI clock if external PHY is used
             let ulpien = !self.phy_type.internal();
             critical_section::with(|_| {
@@ -626,25 +608,6 @@ impl<'d, T: Instance> Bus<'d, T> {
             });
         }
 
-        #[cfg(stm32u5)]
-        {
-            // Enable USB power
-            critical_section::with(|_| {
-                crate::pac::PWR.svmcr().modify(|w| {
-                    w.set_usv(true);
-                    w.set_uvmen(true);
-                })
-            });
-
-            // Wait for USB power to stabilize
-            while !crate::pac::PWR.svmsr().read().vddusbrdy() {}
-        }
-
-        <T as RccPeripheral>::enable_and_reset();
-
-        T::Interrupt::unpend();
-        unsafe { T::Interrupt::enable() };
-
         let r = T::regs();
         let core_id = r.cid().read().0;
         trace!("Core id {:08x}", core_id);
@@ -846,7 +809,7 @@ impl<'d, T: Instance> Bus<'d, T> {
     fn disable(&mut self) {
         T::Interrupt::disable();
 
-        <T as RccPeripheral>::disable();
+        <T as SealedRccPeripheral>::disable();
 
         #[cfg(stm32l4)]
         crate::pac::PWR.cr2().modify(|w| w.set_usv(false));
@@ -1469,3 +1432,158 @@ fn calculate_trdt(speed: vals::Dspd, ahb_freq: Hertz) -> u8 {
 fn quirk_setup_late_cnak(r: crate::pac::otg::Otg) -> bool {
     r.cid().read().0 & 0xf000 == 0x1000
 }
+
+// Using Instance::ENDPOINT_COUNT requires feature(const_generic_expr) so just define maximum eps
+const MAX_EP_COUNT: usize = 9;
+
+trait SealedInstance {
+    const HIGH_SPEED: bool;
+    const FIFO_DEPTH_WORDS: u16;
+    const ENDPOINT_COUNT: usize;
+
+    fn regs() -> crate::pac::otg::Otg;
+    fn state() -> &'static super::State<{ MAX_EP_COUNT }>;
+}
+
+/// USB instance trait.
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + RccPeripheral + 'static {
+    /// Interrupt for this USB instance.
+    type Interrupt: interrupt::typelevel::Interrupt;
+}
+
+// Internal PHY pins
+pin_trait!(DpPin, Instance);
+pin_trait!(DmPin, Instance);
+
+// External PHY pins
+pin_trait!(UlpiClkPin, Instance);
+pin_trait!(UlpiDirPin, Instance);
+pin_trait!(UlpiNxtPin, Instance);
+pin_trait!(UlpiStpPin, Instance);
+pin_trait!(UlpiD0Pin, Instance);
+pin_trait!(UlpiD1Pin, Instance);
+pin_trait!(UlpiD2Pin, Instance);
+pin_trait!(UlpiD3Pin, Instance);
+pin_trait!(UlpiD4Pin, Instance);
+pin_trait!(UlpiD5Pin, Instance);
+pin_trait!(UlpiD6Pin, Instance);
+pin_trait!(UlpiD7Pin, Instance);
+
+foreach_interrupt!(
+    (USB_OTG_FS, otg, $block:ident, GLOBAL, $irq:ident) => {
+        impl SealedInstance for crate::peripherals::USB_OTG_FS {
+            const HIGH_SPEED: bool = false;
+
+            cfg_if::cfg_if! {
+                if #[cfg(stm32f1)] {
+                    const FIFO_DEPTH_WORDS: u16 = 128;
+                    const ENDPOINT_COUNT: usize = 8;
+                } else if #[cfg(any(
+                    stm32f2,
+                    stm32f401,
+                    stm32f405,
+                    stm32f407,
+                    stm32f411,
+                    stm32f415,
+                    stm32f417,
+                    stm32f427,
+                    stm32f429,
+                    stm32f437,
+                    stm32f439,
+                ))] {
+                    const FIFO_DEPTH_WORDS: u16 = 320;
+                    const ENDPOINT_COUNT: usize = 4;
+                } else if #[cfg(any(
+                    stm32f412,
+                    stm32f413,
+                    stm32f423,
+                    stm32f446,
+                    stm32f469,
+                    stm32f479,
+                    stm32f7,
+                    stm32l4,
+                    stm32u5,
+                ))] {
+                    const FIFO_DEPTH_WORDS: u16 = 320;
+                    const ENDPOINT_COUNT: usize = 6;
+                } else if #[cfg(stm32g0x1)] {
+                    const FIFO_DEPTH_WORDS: u16 = 512;
+                    const ENDPOINT_COUNT: usize = 8;
+                } else if #[cfg(stm32h7)] {
+                    const FIFO_DEPTH_WORDS: u16 = 1024;
+                    const ENDPOINT_COUNT: usize = 9;
+                } else if #[cfg(stm32u5)] {
+                    const FIFO_DEPTH_WORDS: u16 = 320;
+                    const ENDPOINT_COUNT: usize = 6;
+                } else {
+                    compile_error!("USB_OTG_FS peripheral is not supported by this chip.");
+                }
+            }
+
+            fn regs() -> crate::pac::otg::Otg {
+                crate::pac::USB_OTG_FS
+            }
+
+            fn state() -> &'static State<MAX_EP_COUNT> {
+                static STATE: State<MAX_EP_COUNT> = State::new();
+                &STATE
+            }
+        }
+
+        impl Instance for crate::peripherals::USB_OTG_FS {
+            type Interrupt = crate::interrupt::typelevel::$irq;
+        }
+    };
+
+    (USB_OTG_HS, otg, $block:ident, GLOBAL, $irq:ident) => {
+        impl SealedInstance for crate::peripherals::USB_OTG_HS {
+            const HIGH_SPEED: bool = true;
+
+            cfg_if::cfg_if! {
+                if #[cfg(any(
+                    stm32f2,
+                    stm32f405,
+                    stm32f407,
+                    stm32f415,
+                    stm32f417,
+                    stm32f427,
+                    stm32f429,
+                    stm32f437,
+                    stm32f439,
+                ))] {
+                    const FIFO_DEPTH_WORDS: u16 = 1024;
+                    const ENDPOINT_COUNT: usize = 6;
+                } else if #[cfg(any(
+                    stm32f446,
+                    stm32f469,
+                    stm32f479,
+                    stm32f7,
+                    stm32h7,
+                ))] {
+                    const FIFO_DEPTH_WORDS: u16 = 1024;
+                    const ENDPOINT_COUNT: usize = 9;
+                } else if #[cfg(stm32u5)] {
+                    const FIFO_DEPTH_WORDS: u16 = 1024;
+                    const ENDPOINT_COUNT: usize = 9;
+                } else {
+                    compile_error!("USB_OTG_HS peripheral is not supported by this chip.");
+                }
+            }
+
+            fn regs() -> crate::pac::otg::Otg {
+                // OTG HS registers are a superset of FS registers
+                unsafe { crate::pac::otg::Otg::from_ptr(crate::pac::USB_OTG_HS.as_ptr()) }
+            }
+
+            fn state() -> &'static State<MAX_EP_COUNT> {
+                static STATE: State<MAX_EP_COUNT> = State::new();
+                &STATE
+            }
+        }
+
+        impl Instance for crate::peripherals::USB_OTG_HS {
+            type Interrupt = crate::interrupt::typelevel::$irq;
+        }
+    };
+);
diff --git a/embassy-stm32/src/usb/usb.rs b/embassy-stm32/src/usb/usb.rs
index be321a19b..f48808cb3 100644
--- a/embassy-stm32/src/usb/usb.rs
+++ b/embassy-stm32/src/usb/usb.rs
@@ -12,12 +12,10 @@ use embassy_usb_driver::{
     Direction, EndpointAddress, EndpointAllocError, EndpointError, EndpointInfo, EndpointType, Event, Unsupported,
 };
 
-use super::{DmPin, DpPin, Instance};
-use crate::interrupt::typelevel::Interrupt;
 use crate::pac::usb::regs;
 use crate::pac::usb::vals::{EpType, Stat};
 use crate::pac::USBRAM;
-use crate::rcc::sealed::RccPeripheral;
+use crate::rcc::RccPeripheral;
 use crate::{interrupt, Peripheral};
 
 /// Interrupt handler.
@@ -259,19 +257,11 @@ impl<'d, T: Instance> Driver<'d, T> {
         dm: impl Peripheral<P = impl DmPin<T>> + 'd,
     ) -> Self {
         into_ref!(dp, dm);
-        T::Interrupt::unpend();
-        unsafe { T::Interrupt::enable() };
+
+        super::common_init::<T>();
 
         let regs = T::regs();
 
-        #[cfg(any(stm32l4, stm32l5, stm32wb))]
-        crate::pac::PWR.cr2().modify(|w| w.set_usv(true));
-
-        #[cfg(pwr_h5)]
-        crate::pac::PWR.usbscr().modify(|w| w.set_usb33sv(true));
-
-        <T as RccPeripheral>::enable_and_reset();
-
         regs.cntr().write(|w| {
             w.set_pdwn(false);
             w.set_fres(true);
@@ -287,8 +277,8 @@ impl<'d, T: Instance> Driver<'d, T> {
 
         #[cfg(not(stm32l1))]
         {
-            dp.set_as_af(dp.af_num(), crate::gpio::sealed::AFType::OutputPushPull);
-            dm.set_as_af(dm.af_num(), crate::gpio::sealed::AFType::OutputPushPull);
+            dp.set_as_af(dp.af_num(), crate::gpio::AFType::OutputPushPull);
+            dm.set_as_af(dm.af_num(), crate::gpio::AFType::OutputPushPull);
         }
         #[cfg(stm32l1)]
         let _ = (dp, dm); // suppress "unused" warnings.
@@ -647,7 +637,6 @@ impl<'d, T: Instance> driver::Bus for Bus<'d, T> {
 
 trait Dir {
     fn dir() -> Direction;
-    fn waker(i: usize) -> &'static AtomicWaker;
 }
 
 /// Marker type for the "IN" direction.
@@ -656,11 +645,6 @@ impl Dir for In {
     fn dir() -> Direction {
         Direction::In
     }
-
-    #[inline]
-    fn waker(i: usize) -> &'static AtomicWaker {
-        &EP_IN_WAKERS[i]
-    }
 }
 
 /// Marker type for the "OUT" direction.
@@ -669,11 +653,6 @@ impl Dir for Out {
     fn dir() -> Direction {
         Direction::Out
     }
-
-    #[inline]
-    fn waker(i: usize) -> &'static AtomicWaker {
-        &EP_OUT_WAKERS[i]
-    }
 }
 
 /// USB endpoint.
@@ -1057,3 +1036,32 @@ impl<'d, T: Instance> driver::ControlPipe for ControlPipe<'d, T> {
         });
     }
 }
+
+trait SealedInstance {
+    fn regs() -> crate::pac::usb::Usb;
+}
+
+/// USB instance trait.
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + RccPeripheral + 'static {
+    /// Interrupt for this USB instance.
+    type Interrupt: interrupt::typelevel::Interrupt;
+}
+
+// Internal PHY pins
+pin_trait!(DpPin, Instance);
+pin_trait!(DmPin, Instance);
+
+foreach_interrupt!(
+    ($inst:ident, usb, $block:ident, LP, $irq:ident) => {
+        impl SealedInstance for crate::peripherals::$inst {
+            fn regs() -> crate::pac::usb::Usb {
+                crate::pac::$inst
+            }
+        }
+
+        impl Instance for crate::peripherals::$inst {
+            type Interrupt = crate::interrupt::typelevel::$irq;
+        }
+    };
+);
diff --git a/embassy-stm32/src/usb_otg/mod.rs b/embassy-stm32/src/usb_otg/mod.rs
deleted file mode 100644
index 0649e684b..000000000
--- a/embassy-stm32/src/usb_otg/mod.rs
+++ /dev/null
@@ -1,163 +0,0 @@
-//! USB On The Go (OTG)
-
-use crate::rcc::RccPeripheral;
-use crate::{interrupt, peripherals};
-
-mod usb;
-pub use usb::*;
-
-// Using Instance::ENDPOINT_COUNT requires feature(const_generic_expr) so just define maximum eps
-const MAX_EP_COUNT: usize = 9;
-
-pub(crate) mod sealed {
-    pub trait Instance {
-        const HIGH_SPEED: bool;
-        const FIFO_DEPTH_WORDS: u16;
-        const ENDPOINT_COUNT: usize;
-
-        fn regs() -> crate::pac::otg::Otg;
-        fn state() -> &'static super::State<{ super::MAX_EP_COUNT }>;
-    }
-}
-
-/// USB OTG instance.
-pub trait Instance: sealed::Instance + RccPeripheral {
-    /// Interrupt for this USB OTG instance.
-    type Interrupt: interrupt::typelevel::Interrupt;
-}
-
-// Internal PHY pins
-pin_trait!(DpPin, Instance);
-pin_trait!(DmPin, Instance);
-
-// External PHY pins
-pin_trait!(UlpiClkPin, Instance);
-pin_trait!(UlpiDirPin, Instance);
-pin_trait!(UlpiNxtPin, Instance);
-pin_trait!(UlpiStpPin, Instance);
-pin_trait!(UlpiD0Pin, Instance);
-pin_trait!(UlpiD1Pin, Instance);
-pin_trait!(UlpiD2Pin, Instance);
-pin_trait!(UlpiD3Pin, Instance);
-pin_trait!(UlpiD4Pin, Instance);
-pin_trait!(UlpiD5Pin, Instance);
-pin_trait!(UlpiD6Pin, Instance);
-pin_trait!(UlpiD7Pin, Instance);
-
-foreach_interrupt!(
-    (USB_OTG_FS, otg, $block:ident, GLOBAL, $irq:ident) => {
-        impl sealed::Instance for peripherals::USB_OTG_FS {
-            const HIGH_SPEED: bool = false;
-
-            cfg_if::cfg_if! {
-                if #[cfg(stm32f1)] {
-                    const FIFO_DEPTH_WORDS: u16 = 128;
-                    const ENDPOINT_COUNT: usize = 8;
-                } else if #[cfg(any(
-                    stm32f2,
-                    stm32f401,
-                    stm32f405,
-                    stm32f407,
-                    stm32f411,
-                    stm32f415,
-                    stm32f417,
-                    stm32f427,
-                    stm32f429,
-                    stm32f437,
-                    stm32f439,
-                ))] {
-                    const FIFO_DEPTH_WORDS: u16 = 320;
-                    const ENDPOINT_COUNT: usize = 4;
-                } else if #[cfg(any(
-                    stm32f412,
-                    stm32f413,
-                    stm32f423,
-                    stm32f446,
-                    stm32f469,
-                    stm32f479,
-                    stm32f7,
-                    stm32l4,
-                    stm32u5,
-                ))] {
-                    const FIFO_DEPTH_WORDS: u16 = 320;
-                    const ENDPOINT_COUNT: usize = 6;
-                } else if #[cfg(stm32g0x1)] {
-                    const FIFO_DEPTH_WORDS: u16 = 512;
-                    const ENDPOINT_COUNT: usize = 8;
-                } else if #[cfg(stm32h7)] {
-                    const FIFO_DEPTH_WORDS: u16 = 1024;
-                    const ENDPOINT_COUNT: usize = 9;
-                } else if #[cfg(stm32u5)] {
-                    const FIFO_DEPTH_WORDS: u16 = 320;
-                    const ENDPOINT_COUNT: usize = 6;
-                } else {
-                    compile_error!("USB_OTG_FS peripheral is not supported by this chip.");
-                }
-            }
-
-            fn regs() -> crate::pac::otg::Otg {
-                crate::pac::USB_OTG_FS
-            }
-
-                        fn state() -> &'static State<MAX_EP_COUNT> {
-                static STATE: State<MAX_EP_COUNT> = State::new();
-                &STATE
-            }
-        }
-
-        impl Instance for peripherals::USB_OTG_FS {
-            type Interrupt = crate::interrupt::typelevel::$irq;
-        }
-    };
-
-    (USB_OTG_HS, otg, $block:ident, GLOBAL, $irq:ident) => {
-        impl sealed::Instance for peripherals::USB_OTG_HS {
-            const HIGH_SPEED: bool = true;
-
-            cfg_if::cfg_if! {
-                if #[cfg(any(
-                    stm32f2,
-                    stm32f405,
-                    stm32f407,
-                    stm32f415,
-                    stm32f417,
-                    stm32f427,
-                    stm32f429,
-                    stm32f437,
-                    stm32f439,
-                ))] {
-                    const FIFO_DEPTH_WORDS: u16 = 1024;
-                    const ENDPOINT_COUNT: usize = 6;
-                } else if #[cfg(any(
-                    stm32f446,
-                    stm32f469,
-                    stm32f479,
-                    stm32f7,
-                    stm32h7,
-                ))] {
-                    const FIFO_DEPTH_WORDS: u16 = 1024;
-                    const ENDPOINT_COUNT: usize = 9;
-                } else if #[cfg(stm32u5)] {
-                    const FIFO_DEPTH_WORDS: u16 = 1024;
-                    const ENDPOINT_COUNT: usize = 9;
-                } else {
-                    compile_error!("USB_OTG_HS peripheral is not supported by this chip.");
-                }
-            }
-
-            fn regs() -> crate::pac::otg::Otg {
-                // OTG HS registers are a superset of FS registers
-                unsafe { crate::pac::otg::Otg::from_ptr(crate::pac::USB_OTG_HS.as_ptr()) }
-            }
-
-                        fn state() -> &'static State<MAX_EP_COUNT> {
-                static STATE: State<MAX_EP_COUNT> = State::new();
-                &STATE
-            }
-        }
-
-        impl Instance for peripherals::USB_OTG_HS {
-            type Interrupt = crate::interrupt::typelevel::$irq;
-        }
-    };
-);
diff --git a/embassy-stm32/src/wdg/mod.rs b/embassy-stm32/src/wdg/mod.rs
index 2ff0db09e..ab21c4b6b 100644
--- a/embassy-stm32/src/wdg/mod.rs
+++ b/embassy-stm32/src/wdg/mod.rs
@@ -80,18 +80,17 @@ impl<'d, T: Instance> IndependentWatchdog<'d, T> {
     }
 }
 
-mod sealed {
-    pub trait Instance {
-        fn regs() -> crate::pac::iwdg::Iwdg;
-    }
+trait SealedInstance {
+    fn regs() -> crate::pac::iwdg::Iwdg;
 }
 
 /// IWDG instance trait.
-pub trait Instance: sealed::Instance {}
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance {}
 
 foreach_peripheral!(
     (iwdg, $inst:ident) => {
-        impl sealed::Instance for crate::peripherals::$inst {
+        impl SealedInstance for crate::peripherals::$inst {
             fn regs() -> crate::pac::iwdg::Iwdg {
                 crate::pac::$inst
             }
diff --git a/embassy-sync/Cargo.toml b/embassy-sync/Cargo.toml
index 85673026c..aaf6fab1d 100644
--- a/embassy-sync/Cargo.toml
+++ b/embassy-sync/Cargo.toml
@@ -20,7 +20,7 @@ src_base_git = "https://github.com/embassy-rs/embassy/blob/$COMMIT/embassy-sync/
 target = "thumbv7em-none-eabi"
 
 [features]
-std = []
+std = ["critical-section/std"]
 turbowakers = []
 
 [dependencies]
diff --git a/embassy-sync/README.md b/embassy-sync/README.md
index c2e13799e..2c1c0cf68 100644
--- a/embassy-sync/README.md
+++ b/embassy-sync/README.md
@@ -5,7 +5,7 @@ An [Embassy](https://embassy.dev) project.
 Synchronization primitives and data structures with async support:
 
 - [`Channel`](channel::Channel) - A Multiple Producer Multiple Consumer (MPMC) channel. Each message is only received by a single consumer.
-- [`PriorityChannel`](channel::priority::PriorityChannel) - A Multiple Producer Multiple Consumer (MPMC) channel. Each message is only received by a single consumer. Higher priority items are sifted to the front of the channel.
+- [`PriorityChannel`](channel::priority::PriorityChannel) - A Multiple Producer Multiple Consumer (MPMC) channel. Each message is only received by a single consumer. Higher priority items are shifted to the front of the channel.
 - [`PubSubChannel`](pubsub::PubSubChannel) - A broadcast channel (publish-subscribe) channel. Each message is received by all consumers.
 - [`Signal`](signal::Signal) - Signalling latest value to a single consumer.
 - [`Mutex`](mutex::Mutex) - Mutex for synchronizing state between asynchronous tasks.
diff --git a/embassy-sync/src/channel.rs b/embassy-sync/src/channel.rs
index 01db0d09a..48f4dafd6 100644
--- a/embassy-sync/src/channel.rs
+++ b/embassy-sync/src/channel.rs
@@ -263,6 +263,12 @@ impl<'ch, T> Future for DynamicReceiveFuture<'ch, T> {
     }
 }
 
+impl<'ch, M: RawMutex, T, const N: usize> From<ReceiveFuture<'ch, M, T, N>> for DynamicReceiveFuture<'ch, T> {
+    fn from(value: ReceiveFuture<'ch, M, T, N>) -> Self {
+        Self { channel: value.channel }
+    }
+}
+
 /// Future returned by [`Channel::send`] and  [`Sender::send`].
 #[must_use = "futures do nothing unless you `.await` or poll them"]
 pub struct SendFuture<'ch, M, T, const N: usize>
@@ -321,6 +327,15 @@ impl<'ch, T> Future for DynamicSendFuture<'ch, T> {
 
 impl<'ch, T> Unpin for DynamicSendFuture<'ch, T> {}
 
+impl<'ch, M: RawMutex, T, const N: usize> From<SendFuture<'ch, M, T, N>> for DynamicSendFuture<'ch, T> {
+    fn from(value: SendFuture<'ch, M, T, N>) -> Self {
+        Self {
+            channel: value.channel,
+            message: value.message,
+        }
+    }
+}
+
 pub(crate) trait DynamicChannel<T> {
     fn try_send_with_context(&self, message: T, cx: Option<&mut Context<'_>>) -> Result<(), TrySendError<T>>;
 
diff --git a/embassy-sync/src/fmt.rs b/embassy-sync/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-sync/src/fmt.rs
+++ b/embassy-sync/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-sync/src/lib.rs b/embassy-sync/src/lib.rs
index d88c76db5..1873483f9 100644
--- a/embassy-sync/src/lib.rs
+++ b/embassy-sync/src/lib.rs
@@ -13,9 +13,11 @@ mod ring_buffer;
 pub mod blocking_mutex;
 pub mod channel;
 pub mod mutex;
+pub mod once_lock;
 pub mod pipe;
 pub mod priority_channel;
 pub mod pubsub;
+pub mod semaphore;
 pub mod signal;
 pub mod waitqueue;
 pub mod zerocopy_channel;
diff --git a/embassy-sync/src/once_lock.rs b/embassy-sync/src/once_lock.rs
new file mode 100644
index 000000000..31cc99711
--- /dev/null
+++ b/embassy-sync/src/once_lock.rs
@@ -0,0 +1,236 @@
+//! Syncronization primitive for initializing a value once, allowing others to await a reference to the value.
+
+use core::cell::Cell;
+use core::future::poll_fn;
+use core::mem::MaybeUninit;
+use core::sync::atomic::{AtomicBool, Ordering};
+use core::task::Poll;
+
+/// The `OnceLock` is a synchronization primitive that allows for
+/// initializing a value once, and allowing others to `.await` a
+/// reference to the value. This is useful for lazy initialization of
+/// a static value.
+///
+/// **Note**: this implementation uses a busy loop to poll the value,
+/// which is not as efficient as registering a dedicated `Waker`.
+/// However, the if the usecase for is to initialize a static variable
+/// relatively early in the program life cycle, it should be fine.
+///
+/// # Example
+/// ```
+/// use futures_executor::block_on;
+/// use embassy_sync::once_lock::OnceLock;
+///
+/// // Define a static value that will be lazily initialized
+/// static VALUE: OnceLock<u32> = OnceLock::new();
+///
+/// let f = async {
+///
+/// // Initialize the value
+/// let reference = VALUE.get_or_init(|| 20);
+/// assert_eq!(reference, &20);
+///
+/// // Wait for the value to be initialized
+/// // and get a static reference it
+/// assert_eq!(VALUE.get().await, &20);
+///
+/// };
+/// block_on(f)
+/// ```
+pub struct OnceLock<T> {
+    init: AtomicBool,
+    data: Cell<MaybeUninit<T>>,
+}
+
+unsafe impl<T> Sync for OnceLock<T> {}
+
+impl<T> OnceLock<T> {
+    /// Create a new uninitialized `OnceLock`.
+    pub const fn new() -> Self {
+        Self {
+            init: AtomicBool::new(false),
+            data: Cell::new(MaybeUninit::zeroed()),
+        }
+    }
+
+    /// Get a reference to the underlying value, waiting for it to be set.
+    /// If the value is already set, this will return immediately.
+    pub async fn get(&self) -> &T {
+        poll_fn(|cx| match self.try_get() {
+            Some(data) => Poll::Ready(data),
+            None => {
+                cx.waker().wake_by_ref();
+                Poll::Pending
+            }
+        })
+        .await
+    }
+
+    /// Try to get a reference to the underlying value if it exists.
+    pub fn try_get(&self) -> Option<&T> {
+        if self.init.load(Ordering::Relaxed) {
+            Some(unsafe { self.get_ref_unchecked() })
+        } else {
+            None
+        }
+    }
+
+    /// Set the underlying value. If the value is already set, this will return an error with the given value.
+    pub fn init(&self, value: T) -> Result<(), T> {
+        // Critical section is required to ensure that the value is
+        // not simultaniously initialized elsewhere at the same time.
+        critical_section::with(|_| {
+            // If the value is not set, set it and return Ok.
+            if !self.init.load(Ordering::Relaxed) {
+                self.data.set(MaybeUninit::new(value));
+                self.init.store(true, Ordering::Relaxed);
+                Ok(())
+
+            // Otherwise return an error with the given value.
+            } else {
+                Err(value)
+            }
+        })
+    }
+
+    /// Get a reference to the underlying value, initializing it if it does not exist.
+    pub fn get_or_init<F>(&self, f: F) -> &T
+    where
+        F: FnOnce() -> T,
+    {
+        // Critical section is required to ensure that the value is
+        // not simultaniously initialized elsewhere at the same time.
+        critical_section::with(|_| {
+            // If the value is not set, set it.
+            if !self.init.load(Ordering::Relaxed) {
+                self.data.set(MaybeUninit::new(f()));
+                self.init.store(true, Ordering::Relaxed);
+            }
+        });
+
+        // Return a reference to the value.
+        unsafe { self.get_ref_unchecked() }
+    }
+
+    /// Consume the `OnceLock`, returning the underlying value if it was initialized.
+    pub fn into_inner(self) -> Option<T> {
+        if self.init.load(Ordering::Relaxed) {
+            Some(unsafe { self.data.into_inner().assume_init() })
+        } else {
+            None
+        }
+    }
+
+    /// Take the underlying value if it was initialized, uninitializing the `OnceLock` in the process.
+    pub fn take(&mut self) -> Option<T> {
+        // If the value is set, uninitialize the lock and return the value.
+        critical_section::with(|_| {
+            if self.init.load(Ordering::Relaxed) {
+                let val = unsafe { self.data.replace(MaybeUninit::zeroed()).assume_init() };
+                self.init.store(false, Ordering::Relaxed);
+                Some(val)
+
+            // Otherwise return None.
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Check if the value has been set.
+    pub fn is_set(&self) -> bool {
+        self.init.load(Ordering::Relaxed)
+    }
+
+    /// Get a reference to the underlying value.
+    /// # Safety
+    /// Must only be used if a value has been set.
+    unsafe fn get_ref_unchecked(&self) -> &T {
+        (*self.data.as_ptr()).assume_init_ref()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn once_lock() {
+        let lock = OnceLock::new();
+        assert_eq!(lock.try_get(), None);
+        assert_eq!(lock.is_set(), false);
+
+        let v = 42;
+        assert_eq!(lock.init(v), Ok(()));
+        assert_eq!(lock.is_set(), true);
+        assert_eq!(lock.try_get(), Some(&v));
+        assert_eq!(lock.try_get(), Some(&v));
+
+        let v = 43;
+        assert_eq!(lock.init(v), Err(v));
+        assert_eq!(lock.is_set(), true);
+        assert_eq!(lock.try_get(), Some(&42));
+    }
+
+    #[test]
+    fn once_lock_get_or_init() {
+        let lock = OnceLock::new();
+        assert_eq!(lock.try_get(), None);
+        assert_eq!(lock.is_set(), false);
+
+        let v = lock.get_or_init(|| 42);
+        assert_eq!(v, &42);
+        assert_eq!(lock.is_set(), true);
+        assert_eq!(lock.try_get(), Some(&42));
+
+        let v = lock.get_or_init(|| 43);
+        assert_eq!(v, &42);
+        assert_eq!(lock.is_set(), true);
+        assert_eq!(lock.try_get(), Some(&42));
+    }
+
+    #[test]
+    fn once_lock_static() {
+        static LOCK: OnceLock<i32> = OnceLock::new();
+
+        let v: &'static i32 = LOCK.get_or_init(|| 42);
+        assert_eq!(v, &42);
+
+        let v: &'static i32 = LOCK.get_or_init(|| 43);
+        assert_eq!(v, &42);
+    }
+
+    #[futures_test::test]
+    async fn once_lock_async() {
+        static LOCK: OnceLock<i32> = OnceLock::new();
+
+        assert!(LOCK.init(42).is_ok());
+
+        let v: &'static i32 = LOCK.get().await;
+        assert_eq!(v, &42);
+    }
+
+    #[test]
+    fn once_lock_into_inner() {
+        let lock: OnceLock<i32> = OnceLock::new();
+
+        let v = lock.get_or_init(|| 42);
+        assert_eq!(v, &42);
+
+        assert_eq!(lock.into_inner(), Some(42));
+    }
+
+    #[test]
+    fn once_lock_take_init() {
+        let mut lock: OnceLock<i32> = OnceLock::new();
+
+        assert_eq!(lock.get_or_init(|| 42), &42);
+        assert_eq!(lock.is_set(), true);
+
+        assert_eq!(lock.take(), Some(42));
+        assert_eq!(lock.is_set(), false);
+
+        assert_eq!(lock.get_or_init(|| 43), &43);
+        assert_eq!(lock.is_set(), true);
+    }
+}
diff --git a/embassy-sync/src/semaphore.rs b/embassy-sync/src/semaphore.rs
new file mode 100644
index 000000000..52c468b4a
--- /dev/null
+++ b/embassy-sync/src/semaphore.rs
@@ -0,0 +1,704 @@
+//! A synchronization primitive for controlling access to a pool of resources.
+use core::cell::{Cell, RefCell};
+use core::convert::Infallible;
+use core::future::poll_fn;
+use core::mem::MaybeUninit;
+use core::task::{Poll, Waker};
+
+use heapless::Deque;
+
+use crate::blocking_mutex::raw::RawMutex;
+use crate::blocking_mutex::Mutex;
+use crate::waitqueue::WakerRegistration;
+
+/// An asynchronous semaphore.
+///
+/// A semaphore tracks a number of permits, typically representing a pool of shared resources.
+/// Users can acquire permits to synchronize access to those resources. The semaphore does not
+/// contain the resources themselves, only the count of available permits.
+pub trait Semaphore: Sized {
+    /// The error returned when the semaphore is unable to acquire the requested permits.
+    type Error;
+
+    /// Asynchronously acquire one or more permits from the semaphore.
+    async fn acquire(&self, permits: usize) -> Result<SemaphoreReleaser<'_, Self>, Self::Error>;
+
+    /// Try to immediately acquire one or more permits from the semaphore.
+    fn try_acquire(&self, permits: usize) -> Option<SemaphoreReleaser<'_, Self>>;
+
+    /// Asynchronously acquire all permits controlled by the semaphore.
+    ///
+    /// This method will wait until at least `min` permits are available, then acquire all available permits
+    /// from the semaphore. Note that other tasks may have already acquired some permits which could be released
+    /// back to the semaphore at any time. The number of permits actually acquired may be determined by calling
+    /// [`SemaphoreReleaser::permits`].
+    async fn acquire_all(&self, min: usize) -> Result<SemaphoreReleaser<'_, Self>, Self::Error>;
+
+    /// Try to immediately acquire all available permits from the semaphore, if at least `min` permits are available.
+    fn try_acquire_all(&self, min: usize) -> Option<SemaphoreReleaser<'_, Self>>;
+
+    /// Release `permits` back to the semaphore, making them available to be acquired.
+    fn release(&self, permits: usize);
+
+    /// Reset the number of available permints in the semaphore to `permits`.
+    fn set(&self, permits: usize);
+}
+
+/// A representation of a number of acquired permits.
+///
+/// The acquired permits will be released back to the [`Semaphore`] when this is dropped.
+pub struct SemaphoreReleaser<'a, S: Semaphore> {
+    semaphore: &'a S,
+    permits: usize,
+}
+
+impl<'a, S: Semaphore> Drop for SemaphoreReleaser<'a, S> {
+    fn drop(&mut self) {
+        self.semaphore.release(self.permits);
+    }
+}
+
+impl<'a, S: Semaphore> SemaphoreReleaser<'a, S> {
+    /// The number of acquired permits.
+    pub fn permits(&self) -> usize {
+        self.permits
+    }
+
+    /// Prevent the acquired permits from being released on drop.
+    ///
+    /// Returns the number of acquired permits.
+    pub fn disarm(self) -> usize {
+        let permits = self.permits;
+        core::mem::forget(self);
+        permits
+    }
+}
+
+/// A greedy [`Semaphore`] implementation.
+///
+/// Tasks can acquire permits as soon as they become available, even if another task
+/// is waiting on a larger number of permits.
+pub struct GreedySemaphore<M: RawMutex> {
+    state: Mutex<M, Cell<SemaphoreState>>,
+}
+
+impl<M: RawMutex> Default for GreedySemaphore<M> {
+    fn default() -> Self {
+        Self::new(0)
+    }
+}
+
+impl<M: RawMutex> GreedySemaphore<M> {
+    /// Create a new `Semaphore`.
+    pub const fn new(permits: usize) -> Self {
+        Self {
+            state: Mutex::new(Cell::new(SemaphoreState {
+                permits,
+                waker: WakerRegistration::new(),
+            })),
+        }
+    }
+
+    #[cfg(test)]
+    fn permits(&self) -> usize {
+        self.state.lock(|cell| {
+            let state = cell.replace(SemaphoreState::EMPTY);
+            let permits = state.permits;
+            cell.replace(state);
+            permits
+        })
+    }
+
+    fn poll_acquire(
+        &self,
+        permits: usize,
+        acquire_all: bool,
+        waker: Option<&Waker>,
+    ) -> Poll<Result<SemaphoreReleaser<'_, Self>, Infallible>> {
+        self.state.lock(|cell| {
+            let mut state = cell.replace(SemaphoreState::EMPTY);
+            if let Some(permits) = state.take(permits, acquire_all) {
+                cell.set(state);
+                Poll::Ready(Ok(SemaphoreReleaser {
+                    semaphore: self,
+                    permits,
+                }))
+            } else {
+                if let Some(waker) = waker {
+                    state.register(waker);
+                }
+                cell.set(state);
+                Poll::Pending
+            }
+        })
+    }
+}
+
+impl<M: RawMutex> Semaphore for GreedySemaphore<M> {
+    type Error = Infallible;
+
+    async fn acquire(&self, permits: usize) -> Result<SemaphoreReleaser<'_, Self>, Self::Error> {
+        poll_fn(|cx| self.poll_acquire(permits, false, Some(cx.waker()))).await
+    }
+
+    fn try_acquire(&self, permits: usize) -> Option<SemaphoreReleaser<'_, Self>> {
+        match self.poll_acquire(permits, false, None) {
+            Poll::Ready(Ok(n)) => Some(n),
+            _ => None,
+        }
+    }
+
+    async fn acquire_all(&self, min: usize) -> Result<SemaphoreReleaser<'_, Self>, Self::Error> {
+        poll_fn(|cx| self.poll_acquire(min, true, Some(cx.waker()))).await
+    }
+
+    fn try_acquire_all(&self, min: usize) -> Option<SemaphoreReleaser<'_, Self>> {
+        match self.poll_acquire(min, true, None) {
+            Poll::Ready(Ok(n)) => Some(n),
+            _ => None,
+        }
+    }
+
+    fn release(&self, permits: usize) {
+        if permits > 0 {
+            self.state.lock(|cell| {
+                let mut state = cell.replace(SemaphoreState::EMPTY);
+                state.permits += permits;
+                state.wake();
+                cell.set(state);
+            });
+        }
+    }
+
+    fn set(&self, permits: usize) {
+        self.state.lock(|cell| {
+            let mut state = cell.replace(SemaphoreState::EMPTY);
+            if permits > state.permits {
+                state.wake();
+            }
+            state.permits = permits;
+            cell.set(state);
+        });
+    }
+}
+
+struct SemaphoreState {
+    permits: usize,
+    waker: WakerRegistration,
+}
+
+impl SemaphoreState {
+    const EMPTY: SemaphoreState = SemaphoreState {
+        permits: 0,
+        waker: WakerRegistration::new(),
+    };
+
+    fn register(&mut self, w: &Waker) {
+        self.waker.register(w);
+    }
+
+    fn take(&mut self, mut permits: usize, acquire_all: bool) -> Option<usize> {
+        if self.permits < permits {
+            None
+        } else {
+            if acquire_all {
+                permits = self.permits;
+            }
+            self.permits -= permits;
+            Some(permits)
+        }
+    }
+
+    fn wake(&mut self) {
+        self.waker.wake();
+    }
+}
+
+/// A fair [`Semaphore`] implementation.
+///
+/// Tasks are allowed to acquire permits in FIFO order. A task waiting to acquire
+/// a large number of permits will prevent other tasks from acquiring any permits
+/// until its request is satisfied.
+///
+/// Up to `N` tasks may attempt to acquire permits concurrently. If additional
+/// tasks attempt to acquire a permit, a [`WaitQueueFull`] error will be returned.
+pub struct FairSemaphore<M, const N: usize>
+where
+    M: RawMutex,
+{
+    state: Mutex<M, RefCell<FairSemaphoreState<N>>>,
+}
+
+impl<M, const N: usize> Default for FairSemaphore<M, N>
+where
+    M: RawMutex,
+{
+    fn default() -> Self {
+        Self::new(0)
+    }
+}
+
+impl<M, const N: usize> FairSemaphore<M, N>
+where
+    M: RawMutex,
+{
+    /// Create a new `FairSemaphore`.
+    pub const fn new(permits: usize) -> Self {
+        Self {
+            state: Mutex::new(RefCell::new(FairSemaphoreState::new(permits))),
+        }
+    }
+
+    #[cfg(test)]
+    fn permits(&self) -> usize {
+        self.state.lock(|cell| cell.borrow().permits)
+    }
+
+    fn poll_acquire(
+        &self,
+        permits: usize,
+        acquire_all: bool,
+        cx: Option<(&Cell<Option<usize>>, &Waker)>,
+    ) -> Poll<Result<SemaphoreReleaser<'_, Self>, WaitQueueFull>> {
+        let ticket = cx.as_ref().map(|(cell, _)| cell.get()).unwrap_or(None);
+        self.state.lock(|cell| {
+            let mut state = cell.borrow_mut();
+            if let Some(permits) = state.take(ticket, permits, acquire_all) {
+                Poll::Ready(Ok(SemaphoreReleaser {
+                    semaphore: self,
+                    permits,
+                }))
+            } else if let Some((cell, waker)) = cx {
+                match state.register(ticket, waker) {
+                    Ok(ticket) => {
+                        cell.set(Some(ticket));
+                        Poll::Pending
+                    }
+                    Err(err) => Poll::Ready(Err(err)),
+                }
+            } else {
+                Poll::Pending
+            }
+        })
+    }
+}
+
+/// An error indicating the [`FairSemaphore`]'s wait queue is full.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub struct WaitQueueFull;
+
+impl<M: RawMutex, const N: usize> Semaphore for FairSemaphore<M, N> {
+    type Error = WaitQueueFull;
+
+    async fn acquire(&self, permits: usize) -> Result<SemaphoreReleaser<'_, Self>, Self::Error> {
+        let ticket = Cell::new(None);
+        let _guard = OnDrop::new(|| self.state.lock(|cell| cell.borrow_mut().cancel(ticket.get())));
+        poll_fn(|cx| self.poll_acquire(permits, false, Some((&ticket, cx.waker())))).await
+    }
+
+    fn try_acquire(&self, permits: usize) -> Option<SemaphoreReleaser<'_, Self>> {
+        match self.poll_acquire(permits, false, None) {
+            Poll::Ready(Ok(x)) => Some(x),
+            _ => None,
+        }
+    }
+
+    async fn acquire_all(&self, min: usize) -> Result<SemaphoreReleaser<'_, Self>, Self::Error> {
+        let ticket = Cell::new(None);
+        let _guard = OnDrop::new(|| self.state.lock(|cell| cell.borrow_mut().cancel(ticket.get())));
+        poll_fn(|cx| self.poll_acquire(min, true, Some((&ticket, cx.waker())))).await
+    }
+
+    fn try_acquire_all(&self, min: usize) -> Option<SemaphoreReleaser<'_, Self>> {
+        match self.poll_acquire(min, true, None) {
+            Poll::Ready(Ok(x)) => Some(x),
+            _ => None,
+        }
+    }
+
+    fn release(&self, permits: usize) {
+        if permits > 0 {
+            self.state.lock(|cell| {
+                let mut state = cell.borrow_mut();
+                state.permits += permits;
+                state.wake();
+            });
+        }
+    }
+
+    fn set(&self, permits: usize) {
+        self.state.lock(|cell| {
+            let mut state = cell.borrow_mut();
+            if permits > state.permits {
+                state.wake();
+            }
+            state.permits = permits;
+        });
+    }
+}
+
+struct FairSemaphoreState<const N: usize> {
+    permits: usize,
+    next_ticket: usize,
+    wakers: Deque<Option<Waker>, N>,
+}
+
+impl<const N: usize> FairSemaphoreState<N> {
+    /// Create a new empty instance
+    const fn new(permits: usize) -> Self {
+        Self {
+            permits,
+            next_ticket: 0,
+            wakers: Deque::new(),
+        }
+    }
+
+    /// Register a waker. If the queue is full the function returns an error
+    fn register(&mut self, ticket: Option<usize>, w: &Waker) -> Result<usize, WaitQueueFull> {
+        self.pop_canceled();
+
+        match ticket {
+            None => {
+                let ticket = self.next_ticket.wrapping_add(self.wakers.len());
+                self.wakers.push_back(Some(w.clone())).or(Err(WaitQueueFull))?;
+                Ok(ticket)
+            }
+            Some(ticket) => {
+                self.set_waker(ticket, Some(w.clone()));
+                Ok(ticket)
+            }
+        }
+    }
+
+    fn cancel(&mut self, ticket: Option<usize>) {
+        if let Some(ticket) = ticket {
+            self.set_waker(ticket, None);
+        }
+    }
+
+    fn set_waker(&mut self, ticket: usize, waker: Option<Waker>) {
+        let i = ticket.wrapping_sub(self.next_ticket);
+        if i < self.wakers.len() {
+            let (a, b) = self.wakers.as_mut_slices();
+            let x = if i < a.len() { &mut a[i] } else { &mut b[i - a.len()] };
+            *x = waker;
+        }
+    }
+
+    fn take(&mut self, ticket: Option<usize>, mut permits: usize, acquire_all: bool) -> Option<usize> {
+        self.pop_canceled();
+
+        if permits > self.permits {
+            return None;
+        }
+
+        match ticket {
+            Some(n) if n != self.next_ticket => return None,
+            None if !self.wakers.is_empty() => return None,
+            _ => (),
+        }
+
+        if acquire_all {
+            permits = self.permits;
+        }
+        self.permits -= permits;
+
+        if ticket.is_some() {
+            self.pop();
+        }
+
+        Some(permits)
+    }
+
+    fn pop_canceled(&mut self) {
+        while let Some(None) = self.wakers.front() {
+            self.pop();
+        }
+    }
+
+    /// Panics if `self.wakers` is empty
+    fn pop(&mut self) {
+        self.wakers.pop_front().unwrap();
+        self.next_ticket = self.next_ticket.wrapping_add(1);
+    }
+
+    fn wake(&mut self) {
+        self.pop_canceled();
+
+        if let Some(Some(waker)) = self.wakers.front() {
+            waker.wake_by_ref();
+        }
+    }
+}
+
+/// A type to delay the drop handler invocation.
+#[must_use = "to delay the drop handler invocation to the end of the scope"]
+struct OnDrop<F: FnOnce()> {
+    f: MaybeUninit<F>,
+}
+
+impl<F: FnOnce()> OnDrop<F> {
+    /// Create a new instance.
+    pub fn new(f: F) -> Self {
+        Self { f: MaybeUninit::new(f) }
+    }
+}
+
+impl<F: FnOnce()> Drop for OnDrop<F> {
+    fn drop(&mut self) {
+        unsafe { self.f.as_ptr().read()() }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    mod greedy {
+        use core::pin::pin;
+
+        use futures_util::poll;
+
+        use super::super::*;
+        use crate::blocking_mutex::raw::NoopRawMutex;
+
+        #[test]
+        fn try_acquire() {
+            let semaphore = GreedySemaphore::<NoopRawMutex>::new(3);
+
+            let a = semaphore.try_acquire(1).unwrap();
+            assert_eq!(a.permits(), 1);
+            assert_eq!(semaphore.permits(), 2);
+
+            core::mem::drop(a);
+            assert_eq!(semaphore.permits(), 3);
+        }
+
+        #[test]
+        fn disarm() {
+            let semaphore = GreedySemaphore::<NoopRawMutex>::new(3);
+
+            let a = semaphore.try_acquire(1).unwrap();
+            assert_eq!(a.disarm(), 1);
+            assert_eq!(semaphore.permits(), 2);
+        }
+
+        #[futures_test::test]
+        async fn acquire() {
+            let semaphore = GreedySemaphore::<NoopRawMutex>::new(3);
+
+            let a = semaphore.acquire(1).await.unwrap();
+            assert_eq!(a.permits(), 1);
+            assert_eq!(semaphore.permits(), 2);
+
+            core::mem::drop(a);
+            assert_eq!(semaphore.permits(), 3);
+        }
+
+        #[test]
+        fn try_acquire_all() {
+            let semaphore = GreedySemaphore::<NoopRawMutex>::new(3);
+
+            let a = semaphore.try_acquire_all(1).unwrap();
+            assert_eq!(a.permits(), 3);
+            assert_eq!(semaphore.permits(), 0);
+        }
+
+        #[futures_test::test]
+        async fn acquire_all() {
+            let semaphore = GreedySemaphore::<NoopRawMutex>::new(3);
+
+            let a = semaphore.acquire_all(1).await.unwrap();
+            assert_eq!(a.permits(), 3);
+            assert_eq!(semaphore.permits(), 0);
+        }
+
+        #[test]
+        fn release() {
+            let semaphore = GreedySemaphore::<NoopRawMutex>::new(3);
+            assert_eq!(semaphore.permits(), 3);
+            semaphore.release(2);
+            assert_eq!(semaphore.permits(), 5);
+        }
+
+        #[test]
+        fn set() {
+            let semaphore = GreedySemaphore::<NoopRawMutex>::new(3);
+            assert_eq!(semaphore.permits(), 3);
+            semaphore.set(2);
+            assert_eq!(semaphore.permits(), 2);
+        }
+
+        #[test]
+        fn contested() {
+            let semaphore = GreedySemaphore::<NoopRawMutex>::new(3);
+
+            let a = semaphore.try_acquire(1).unwrap();
+            let b = semaphore.try_acquire(3);
+            assert!(b.is_none());
+
+            core::mem::drop(a);
+
+            let b = semaphore.try_acquire(3);
+            assert!(b.is_some());
+        }
+
+        #[futures_test::test]
+        async fn greedy() {
+            let semaphore = GreedySemaphore::<NoopRawMutex>::new(3);
+
+            let a = semaphore.try_acquire(1).unwrap();
+
+            let b_fut = semaphore.acquire(3);
+            let mut b_fut = pin!(b_fut);
+            let b = poll!(b_fut.as_mut());
+            assert!(b.is_pending());
+
+            // Succeed even through `b` is waiting
+            let c = semaphore.try_acquire(1);
+            assert!(c.is_some());
+
+            let b = poll!(b_fut.as_mut());
+            assert!(b.is_pending());
+
+            core::mem::drop(a);
+
+            let b = poll!(b_fut.as_mut());
+            assert!(b.is_pending());
+
+            core::mem::drop(c);
+
+            let b = poll!(b_fut.as_mut());
+            assert!(b.is_ready());
+        }
+    }
+
+    mod fair {
+        use core::pin::pin;
+
+        use futures_util::poll;
+
+        use super::super::*;
+        use crate::blocking_mutex::raw::NoopRawMutex;
+
+        #[test]
+        fn try_acquire() {
+            let semaphore = FairSemaphore::<NoopRawMutex, 2>::new(3);
+
+            let a = semaphore.try_acquire(1).unwrap();
+            assert_eq!(a.permits(), 1);
+            assert_eq!(semaphore.permits(), 2);
+
+            core::mem::drop(a);
+            assert_eq!(semaphore.permits(), 3);
+        }
+
+        #[test]
+        fn disarm() {
+            let semaphore = FairSemaphore::<NoopRawMutex, 2>::new(3);
+
+            let a = semaphore.try_acquire(1).unwrap();
+            assert_eq!(a.disarm(), 1);
+            assert_eq!(semaphore.permits(), 2);
+        }
+
+        #[futures_test::test]
+        async fn acquire() {
+            let semaphore = FairSemaphore::<NoopRawMutex, 2>::new(3);
+
+            let a = semaphore.acquire(1).await.unwrap();
+            assert_eq!(a.permits(), 1);
+            assert_eq!(semaphore.permits(), 2);
+
+            core::mem::drop(a);
+            assert_eq!(semaphore.permits(), 3);
+        }
+
+        #[test]
+        fn try_acquire_all() {
+            let semaphore = FairSemaphore::<NoopRawMutex, 2>::new(3);
+
+            let a = semaphore.try_acquire_all(1).unwrap();
+            assert_eq!(a.permits(), 3);
+            assert_eq!(semaphore.permits(), 0);
+        }
+
+        #[futures_test::test]
+        async fn acquire_all() {
+            let semaphore = FairSemaphore::<NoopRawMutex, 2>::new(3);
+
+            let a = semaphore.acquire_all(1).await.unwrap();
+            assert_eq!(a.permits(), 3);
+            assert_eq!(semaphore.permits(), 0);
+        }
+
+        #[test]
+        fn release() {
+            let semaphore = FairSemaphore::<NoopRawMutex, 2>::new(3);
+            assert_eq!(semaphore.permits(), 3);
+            semaphore.release(2);
+            assert_eq!(semaphore.permits(), 5);
+        }
+
+        #[test]
+        fn set() {
+            let semaphore = FairSemaphore::<NoopRawMutex, 2>::new(3);
+            assert_eq!(semaphore.permits(), 3);
+            semaphore.set(2);
+            assert_eq!(semaphore.permits(), 2);
+        }
+
+        #[test]
+        fn contested() {
+            let semaphore = FairSemaphore::<NoopRawMutex, 2>::new(3);
+
+            let a = semaphore.try_acquire(1).unwrap();
+            let b = semaphore.try_acquire(3);
+            assert!(b.is_none());
+
+            core::mem::drop(a);
+
+            let b = semaphore.try_acquire(3);
+            assert!(b.is_some());
+        }
+
+        #[futures_test::test]
+        async fn fairness() {
+            let semaphore = FairSemaphore::<NoopRawMutex, 2>::new(3);
+
+            let a = semaphore.try_acquire(1);
+            assert!(a.is_some());
+
+            let b_fut = semaphore.acquire(3);
+            let mut b_fut = pin!(b_fut);
+            let b = poll!(b_fut.as_mut()); // Poll `b_fut` once so it is registered
+            assert!(b.is_pending());
+
+            let c = semaphore.try_acquire(1);
+            assert!(c.is_none());
+
+            let c_fut = semaphore.acquire(1);
+            let mut c_fut = pin!(c_fut);
+            let c = poll!(c_fut.as_mut()); // Poll `c_fut` once so it is registered
+            assert!(c.is_pending()); // `c` is blocked behind `b`
+
+            let d = semaphore.acquire(1).await;
+            assert!(matches!(d, Err(WaitQueueFull)));
+
+            core::mem::drop(a);
+
+            let c = poll!(c_fut.as_mut());
+            assert!(c.is_pending()); // `c` is still blocked behind `b`
+
+            let b = poll!(b_fut.as_mut());
+            assert!(b.is_ready());
+
+            let c = poll!(c_fut.as_mut());
+            assert!(c.is_pending()); // `c` is still blocked behind `b`
+
+            core::mem::drop(b);
+
+            let c = poll!(c_fut.as_mut());
+            assert!(c.is_ready());
+        }
+    }
+}
diff --git a/embassy-sync/src/signal.rs b/embassy-sync/src/signal.rs
index d75750ce7..520f1a896 100644
--- a/embassy-sync/src/signal.rs
+++ b/embassy-sync/src/signal.rs
@@ -125,7 +125,7 @@ where
         })
     }
 
-    /// non-blocking method to check whether this signal has been signaled.
+    /// non-blocking method to check whether this signal has been signaled. This does not clear the signal.  
     pub fn signaled(&self) -> bool {
         self.state.lock(|cell| {
             let state = cell.replace(State::None);
diff --git a/embassy-time/src/fmt.rs b/embassy-time/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-time/src/fmt.rs
+++ b/embassy-time/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-time/src/timer.rs b/embassy-time/src/timer.rs
index daa4c1699..757c3ff00 100644
--- a/embassy-time/src/timer.rs
+++ b/embassy-time/src/timer.rs
@@ -190,8 +190,20 @@ impl Ticker {
         self.expires_at = Instant::now() + self.duration;
     }
 
+    /// Reset the ticker at the deadline.
+    /// If the deadline is in the past, the ticker will fire instantly.
+    pub fn reset_at(&mut self, deadline: Instant) {
+        self.expires_at = deadline + self.duration;
+    }
+
+    /// Resets the ticker, after the specified duration has passed.
+    /// If the specified duration is zero, the next tick will be after the duration of the ticker.
+    pub fn reset_after(&mut self, after: Duration) {
+        self.expires_at = Instant::now() + after + self.duration;
+    }
+
     /// Waits for the next tick.
-    pub fn next(&mut self) -> impl Future<Output = ()> + '_ {
+    pub fn next(&mut self) -> impl Future<Output = ()> + Send + Sync + '_ {
         poll_fn(|cx| {
             if self.expires_at <= Instant::now() {
                 let dur = self.duration;
diff --git a/embassy-usb-dfu/src/fmt.rs b/embassy-usb-dfu/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-usb-dfu/src/fmt.rs
+++ b/embassy-usb-dfu/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-usb-logger/src/lib.rs b/embassy-usb-logger/src/lib.rs
index da5ff0f36..34d1ca663 100644
--- a/embassy-usb-logger/src/lib.rs
+++ b/embassy-usb-logger/src/lib.rs
@@ -16,7 +16,6 @@ type CS = embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
 /// The logger state containing buffers that must live as long as the USB peripheral.
 pub struct LoggerState<'d> {
     state: State<'d>,
-    device_descriptor: [u8; 32],
     config_descriptor: [u8; 128],
     bos_descriptor: [u8; 16],
     msos_descriptor: [u8; 256],
@@ -28,7 +27,6 @@ impl<'d> LoggerState<'d> {
     pub fn new() -> Self {
         Self {
             state: State::new(),
-            device_descriptor: [0; 32],
             config_descriptor: [0; 128],
             bos_descriptor: [0; 16],
             msos_descriptor: [0; 256],
@@ -74,7 +72,6 @@ impl<const N: usize> UsbLogger<N> {
         let mut builder = Builder::new(
             driver,
             config,
-            &mut state.device_descriptor,
             &mut state.config_descriptor,
             &mut state.bos_descriptor,
             &mut state.msos_descriptor,
@@ -151,7 +148,17 @@ struct Writer<'d, const N: usize>(&'d Pipe<CS, N>);
 
 impl<'d, const N: usize> core::fmt::Write for Writer<'d, N> {
     fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> {
-        let _ = self.0.try_write(s.as_bytes());
+        // The Pipe is implemented in such way that we cannot
+        // write across the wraparound discontinuity.
+        let b = s.as_bytes();
+        if let Ok(n) = self.0.try_write(b) {
+            if n < b.len() {
+                // We wrote some data but not all, attempt again
+                // as the reason might be a wraparound in the
+                // ring buffer, which resolves on second attempt.
+                let _ = self.0.try_write(&b[n..]);
+            }
+        }
         Ok(())
     }
 }
diff --git a/embassy-usb/src/builder.rs b/embassy-usb/src/builder.rs
index c4705d041..c06107396 100644
--- a/embassy-usb/src/builder.rs
+++ b/embassy-usb/src/builder.rs
@@ -128,7 +128,6 @@ pub struct Builder<'d, D: Driver<'d>> {
     driver: D,
     next_string_index: u8,
 
-    device_descriptor: DescriptorWriter<'d>,
     config_descriptor: DescriptorWriter<'d>,
     bos_descriptor: BosWriter<'d>,
 
@@ -144,7 +143,6 @@ impl<'d, D: Driver<'d>> Builder<'d, D> {
     pub fn new(
         driver: D,
         config: Config<'d>,
-        device_descriptor_buf: &'d mut [u8],
         config_descriptor_buf: &'d mut [u8],
         bos_descriptor_buf: &'d mut [u8],
         msos_descriptor_buf: &'d mut [u8],
@@ -167,11 +165,9 @@ impl<'d, D: Driver<'d>> Builder<'d, D> {
             _ => panic!("invalid max_packet_size_0, the allowed values are 8, 16, 32 or 64"),
         }
 
-        let mut device_descriptor = DescriptorWriter::new(device_descriptor_buf);
         let mut config_descriptor = DescriptorWriter::new(config_descriptor_buf);
         let mut bos_descriptor = BosWriter::new(DescriptorWriter::new(bos_descriptor_buf));
 
-        device_descriptor.device(&config);
         config_descriptor.configuration(&config);
         bos_descriptor.bos();
 
@@ -183,7 +179,6 @@ impl<'d, D: Driver<'d>> Builder<'d, D> {
             control_buf,
             next_string_index: STRING_INDEX_CUSTOM_START,
 
-            device_descriptor,
             config_descriptor,
             bos_descriptor,
 
@@ -199,7 +194,6 @@ impl<'d, D: Driver<'d>> Builder<'d, D> {
         self.bos_descriptor.end_bos();
 
         // Log the number of allocator bytes actually used in descriptor buffers
-        info!("USB: device_descriptor used: {}", self.device_descriptor.position());
         info!("USB: config_descriptor used: {}", self.config_descriptor.position());
         info!("USB: bos_descriptor used: {}", self.bos_descriptor.writer.position());
         info!("USB: msos_descriptor used: {}", msos_descriptor.len());
@@ -209,7 +203,6 @@ impl<'d, D: Driver<'d>> Builder<'d, D> {
             self.driver,
             self.config,
             self.handlers,
-            self.device_descriptor.into_buf(),
             self.config_descriptor.into_buf(),
             self.bos_descriptor.writer.into_buf(),
             msos_descriptor,
diff --git a/embassy-usb/src/descriptor.rs b/embassy-usb/src/descriptor.rs
index fa83ef583..eb3d1f53a 100644
--- a/embassy-usb/src/descriptor.rs
+++ b/embassy-usb/src/descriptor.rs
@@ -82,30 +82,6 @@ impl<'a> DescriptorWriter<'a> {
         self.position = start + length;
     }
 
-    pub(crate) fn device(&mut self, config: &Config) {
-        self.write(
-            descriptor_type::DEVICE,
-            &[
-                0x10,
-                0x02,                     // bcdUSB 2.1
-                config.device_class,      // bDeviceClass
-                config.device_sub_class,  // bDeviceSubClass
-                config.device_protocol,   // bDeviceProtocol
-                config.max_packet_size_0, // bMaxPacketSize0
-                config.vendor_id as u8,
-                (config.vendor_id >> 8) as u8, // idVendor
-                config.product_id as u8,
-                (config.product_id >> 8) as u8, // idProduct
-                config.device_release as u8,
-                (config.device_release >> 8) as u8,    // bcdDevice
-                config.manufacturer.map_or(0, |_| 1),  // iManufacturer
-                config.product.map_or(0, |_| 2),       // iProduct
-                config.serial_number.map_or(0, |_| 3), // iSerialNumber
-                1,                                     // bNumConfigurations
-            ],
-        );
-    }
-
     pub(crate) fn configuration(&mut self, config: &Config) {
         self.num_interfaces_mark = Some(self.position + 4);
 
@@ -269,6 +245,33 @@ impl<'a> DescriptorWriter<'a> {
     }
 }
 
+/// Create a new Device Descriptor array.
+///
+/// All device descriptors are always 18 bytes, so there's no need for
+/// a variable-length buffer or DescriptorWriter.
+pub(crate) fn device_descriptor(config: &Config) -> [u8; 18] {
+    [
+        18,   // bLength
+        0x01, // bDescriptorType
+        0x10,
+        0x02,                     // bcdUSB 2.1
+        config.device_class,      // bDeviceClass
+        config.device_sub_class,  // bDeviceSubClass
+        config.device_protocol,   // bDeviceProtocol
+        config.max_packet_size_0, // bMaxPacketSize0
+        config.vendor_id as u8,
+        (config.vendor_id >> 8) as u8, // idVendor
+        config.product_id as u8,
+        (config.product_id >> 8) as u8, // idProduct
+        config.device_release as u8,
+        (config.device_release >> 8) as u8,    // bcdDevice
+        config.manufacturer.map_or(0, |_| 1),  // iManufacturer
+        config.product.map_or(0, |_| 2),       // iProduct
+        config.serial_number.map_or(0, |_| 3), // iSerialNumber
+        1,                                     // bNumConfigurations
+    ]
+}
+
 /// A writer for Binary Object Store descriptor.
 pub struct BosWriter<'a> {
     pub(crate) writer: DescriptorWriter<'a>,
diff --git a/embassy-usb/src/fmt.rs b/embassy-usb/src/fmt.rs
index 78e583c1c..2ac42c557 100644
--- a/embassy-usb/src/fmt.rs
+++ b/embassy-usb/src/fmt.rs
@@ -1,5 +1,5 @@
 #![macro_use]
-#![allow(unused_macros)]
+#![allow(unused)]
 
 use core::fmt::{Debug, Display, LowerHex};
 
@@ -229,7 +229,6 @@ impl<T, E> Try for Result<T, E> {
     }
 }
 
-#[allow(unused)]
 pub(crate) struct Bytes<'a>(pub &'a [u8]);
 
 impl<'a> Debug for Bytes<'a> {
diff --git a/embassy-usb/src/lib.rs b/embassy-usb/src/lib.rs
index 241e33a78..d58950838 100644
--- a/embassy-usb/src/lib.rs
+++ b/embassy-usb/src/lib.rs
@@ -168,8 +168,6 @@ struct Interface {
 #[derive(PartialEq, Eq, Copy, Clone, Debug)]
 #[cfg_attr(feature = "defmt", derive(defmt::Format))]
 pub struct UsbBufferReport {
-    /// Number of device descriptor bytes used
-    pub device_descriptor_used: usize,
     /// Number of config descriptor bytes used
     pub config_descriptor_used: usize,
     /// Number of bos descriptor bytes used
@@ -191,7 +189,7 @@ struct Inner<'d, D: Driver<'d>> {
     bus: D::Bus,
 
     config: Config<'d>,
-    device_descriptor: &'d [u8],
+    device_descriptor: [u8; 18],
     config_descriptor: &'d [u8],
     bos_descriptor: &'d [u8],
     msos_descriptor: crate::msos::MsOsDescriptorSet<'d>,
@@ -217,7 +215,6 @@ impl<'d, D: Driver<'d>> UsbDevice<'d, D> {
         driver: D,
         config: Config<'d>,
         handlers: Vec<&'d mut dyn Handler, MAX_HANDLER_COUNT>,
-        device_descriptor: &'d [u8],
         config_descriptor: &'d [u8],
         bos_descriptor: &'d [u8],
         msos_descriptor: crate::msos::MsOsDescriptorSet<'d>,
@@ -227,6 +224,7 @@ impl<'d, D: Driver<'d>> UsbDevice<'d, D> {
         // Start the USB bus.
         // This prevent further allocation by consuming the driver.
         let (bus, control) = driver.start(config.max_packet_size_0 as u16);
+        let device_descriptor = descriptor::device_descriptor(&config);
 
         Self {
             control_buf,
@@ -256,7 +254,6 @@ impl<'d, D: Driver<'d>> UsbDevice<'d, D> {
     /// Useful for tuning buffer sizes for actual usage
     pub fn buffer_usage(&self) -> UsbBufferReport {
         UsbBufferReport {
-            device_descriptor_used: self.inner.device_descriptor.len(),
             config_descriptor_used: self.inner.config_descriptor.len(),
             bos_descriptor_used: self.inner.bos_descriptor.len(),
             msos_descriptor_used: self.inner.msos_descriptor.len(),
@@ -720,7 +717,7 @@ impl<'d, D: Driver<'d>> Inner<'d, D> {
 
         match dtype {
             descriptor_type::BOS => InResponse::Accepted(self.bos_descriptor),
-            descriptor_type::DEVICE => InResponse::Accepted(self.device_descriptor),
+            descriptor_type::DEVICE => InResponse::Accepted(&self.device_descriptor),
             descriptor_type::CONFIGURATION => InResponse::Accepted(self.config_descriptor),
             descriptor_type::STRING => {
                 if index == 0 {
diff --git a/embassy-usb/src/msos.rs b/embassy-usb/src/msos.rs
index 3858c0f51..25936d084 100644
--- a/embassy-usb/src/msos.rs
+++ b/embassy-usb/src/msos.rs
@@ -226,27 +226,21 @@ pub mod windows_version {
     pub const WIN10: u32 = 0x0A000000;
 }
 
-mod sealed {
-    use core::mem::size_of;
+/// A trait for descriptors
+trait Descriptor: Sized {
+    const TYPE: DescriptorType;
 
-    /// A trait for descriptors
-    pub trait Descriptor: Sized {
-        const TYPE: super::DescriptorType;
-
-        /// The size of the descriptor's header.
-        fn size(&self) -> usize {
-            size_of::<Self>()
-        }
-
-        fn write_to(&self, buf: &mut [u8]);
+    /// The size of the descriptor's header.
+    fn size(&self) -> usize {
+        size_of::<Self>()
     }
 
-    pub trait DescriptorSet: Descriptor {
-        const LENGTH_OFFSET: usize;
-    }
+    fn write_to(&self, buf: &mut [u8]);
 }
 
-use sealed::*;
+trait DescriptorSet: Descriptor {
+    const LENGTH_OFFSET: usize;
+}
 
 /// Copies the data of `t` into `buf`.
 ///
@@ -255,7 +249,7 @@ use sealed::*;
 unsafe fn transmute_write_to<T: Sized>(t: &T, buf: &mut [u8]) {
     let bytes = core::slice::from_raw_parts((t as *const T) as *const u8, size_of::<T>());
     assert!(buf.len() >= bytes.len(), "MS OS descriptor buffer full");
-    (&mut buf[..bytes.len()]).copy_from_slice(bytes);
+    buf[..bytes.len()].copy_from_slice(bytes);
 }
 
 /// Table 9. Microsoft OS 2.0 descriptor wDescriptorType values.
@@ -412,9 +406,11 @@ impl DescriptorSet for FunctionSubsetHeader {
 // Feature Descriptors
 
 /// A marker trait for feature descriptors that are valid at the device level.
+#[allow(private_bounds)]
 pub trait DeviceLevelDescriptor: Descriptor {}
 
 /// A marker trait for feature descriptors that are valid at the function level.
+#[allow(private_bounds)]
 pub trait FunctionLevelDescriptor: Descriptor {}
 
 /// Table 13. Microsoft OS 2.0 compatible ID descriptor.
@@ -444,9 +440,9 @@ impl CompatibleIdFeatureDescriptor {
     pub fn new(compatible_id: &str, sub_compatible_id: &str) -> Self {
         assert!(compatible_id.len() <= 8 && sub_compatible_id.len() <= 8);
         let mut cid = [0u8; 8];
-        (&mut cid[..compatible_id.len()]).copy_from_slice(compatible_id.as_bytes());
+        cid[..compatible_id.len()].copy_from_slice(compatible_id.as_bytes());
         let mut scid = [0u8; 8];
-        (&mut scid[..sub_compatible_id.len()]).copy_from_slice(sub_compatible_id.as_bytes());
+        scid[..sub_compatible_id.len()].copy_from_slice(sub_compatible_id.as_bytes());
         Self::new_raw(cid, scid)
     }
 
diff --git a/examples/boot/application/stm32wb-dfu/src/main.rs b/examples/boot/application/stm32wb-dfu/src/main.rs
index 37c3d7d90..929d6802c 100644
--- a/examples/boot/application/stm32wb-dfu/src/main.rs
+++ b/examples/boot/application/stm32wb-dfu/src/main.rs
@@ -41,7 +41,6 @@ async fn main(_spawner: Spawner) {
     config.product = Some("USB-DFU Runtime example");
     config.serial_number = Some("1235678");
 
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -49,7 +48,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [],
diff --git a/examples/boot/bootloader/stm32wb-dfu/src/main.rs b/examples/boot/bootloader/stm32wb-dfu/src/main.rs
index d989fbfdf..093b39f9d 100644
--- a/examples/boot/bootloader/stm32wb-dfu/src/main.rs
+++ b/examples/boot/bootloader/stm32wb-dfu/src/main.rs
@@ -49,7 +49,6 @@ fn main() -> ! {
         let mut buffer = AlignedBuffer([0; WRITE_SIZE]);
         let updater = BlockingFirmwareUpdater::new(fw_config, &mut buffer.0[..]);
 
-        let mut device_descriptor = [0; 256];
         let mut config_descriptor = [0; 256];
         let mut bos_descriptor = [0; 256];
         let mut control_buf = [0; 4096];
@@ -57,7 +56,6 @@ fn main() -> ! {
         let mut builder = Builder::new(
             driver,
             config,
-            &mut device_descriptor,
             &mut config_descriptor,
             &mut bos_descriptor,
             &mut [],
diff --git a/examples/nrf52840/src/bin/usb_ethernet.rs b/examples/nrf52840/src/bin/usb_ethernet.rs
index 3469c6e5f..a7e5c2668 100644
--- a/examples/nrf52840/src/bin/usb_ethernet.rs
+++ b/examples/nrf52840/src/bin/usb_ethernet.rs
@@ -70,7 +70,6 @@ async fn main(spawner: Spawner) {
     config.device_protocol = 0x01;
 
     // Create embassy-usb DeviceBuilder using the driver and config.
-    static DEVICE_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static CONFIG_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static BOS_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static MSOS_DESC: StaticCell<[u8; 128]> = StaticCell::new();
@@ -78,7 +77,6 @@ async fn main(spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut DEVICE_DESC.init([0; 256])[..],
         &mut CONFIG_DESC.init([0; 256])[..],
         &mut BOS_DESC.init([0; 256])[..],
         &mut MSOS_DESC.init([0; 128])[..],
diff --git a/examples/nrf52840/src/bin/usb_hid_keyboard.rs b/examples/nrf52840/src/bin/usb_hid_keyboard.rs
index 3e86590c4..52f081487 100644
--- a/examples/nrf52840/src/bin/usb_hid_keyboard.rs
+++ b/examples/nrf52840/src/bin/usb_hid_keyboard.rs
@@ -50,7 +50,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut msos_descriptor = [0; 256];
@@ -63,7 +62,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut msos_descriptor,
diff --git a/examples/nrf52840/src/bin/usb_hid_mouse.rs b/examples/nrf52840/src/bin/usb_hid_mouse.rs
index 04ad841b7..5d2837793 100644
--- a/examples/nrf52840/src/bin/usb_hid_mouse.rs
+++ b/examples/nrf52840/src/bin/usb_hid_mouse.rs
@@ -43,7 +43,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut msos_descriptor = [0; 256];
@@ -55,7 +54,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut msos_descriptor,
diff --git a/examples/nrf52840/src/bin/usb_serial.rs b/examples/nrf52840/src/bin/usb_serial.rs
index aff539b1b..02048e692 100644
--- a/examples/nrf52840/src/bin/usb_serial.rs
+++ b/examples/nrf52840/src/bin/usb_serial.rs
@@ -48,7 +48,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut msos_descriptor = [0; 256];
@@ -59,7 +58,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut msos_descriptor,
diff --git a/examples/nrf52840/src/bin/usb_serial_multitask.rs b/examples/nrf52840/src/bin/usb_serial_multitask.rs
index 4e8118fb8..895cca8b9 100644
--- a/examples/nrf52840/src/bin/usb_serial_multitask.rs
+++ b/examples/nrf52840/src/bin/usb_serial_multitask.rs
@@ -67,7 +67,6 @@ async fn main(spawner: Spawner) {
     let state = STATE.init(State::new());
 
     // Create embassy-usb DeviceBuilder using the driver and config.
-    static DEVICE_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static CONFIG_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static BOS_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static MSOS_DESC: StaticCell<[u8; 128]> = StaticCell::new();
@@ -75,7 +74,6 @@ async fn main(spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut DEVICE_DESC.init([0; 256])[..],
         &mut CONFIG_DESC.init([0; 256])[..],
         &mut BOS_DESC.init([0; 256])[..],
         &mut MSOS_DESC.init([0; 128])[..],
diff --git a/examples/nrf52840/src/bin/usb_serial_winusb.rs b/examples/nrf52840/src/bin/usb_serial_winusb.rs
index 060f9ba94..c6675a3d3 100644
--- a/examples/nrf52840/src/bin/usb_serial_winusb.rs
+++ b/examples/nrf52840/src/bin/usb_serial_winusb.rs
@@ -53,7 +53,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut msos_descriptor = [0; 256];
@@ -64,7 +63,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut msos_descriptor,
diff --git a/examples/rp/src/bin/multicore.rs b/examples/rp/src/bin/multicore.rs
index c7b087476..7cb546c91 100644
--- a/examples/rp/src/bin/multicore.rs
+++ b/examples/rp/src/bin/multicore.rs
@@ -30,10 +30,14 @@ fn main() -> ! {
     let p = embassy_rp::init(Default::default());
     let led = Output::new(p.PIN_25, Level::Low);
 
-    spawn_core1(p.CORE1, unsafe { &mut CORE1_STACK }, move || {
-        let executor1 = EXECUTOR1.init(Executor::new());
-        executor1.run(|spawner| unwrap!(spawner.spawn(core1_task(led))));
-    });
+    spawn_core1(
+        p.CORE1,
+        unsafe { &mut *core::ptr::addr_of_mut!(CORE1_STACK) },
+        move || {
+            let executor1 = EXECUTOR1.init(Executor::new());
+            executor1.run(|spawner| unwrap!(spawner.spawn(core1_task(led))));
+        },
+    );
 
     let executor0 = EXECUTOR0.init(Executor::new());
     executor0.run(|spawner| unwrap!(spawner.spawn(core0_task())));
diff --git a/examples/rp/src/bin/pio_hd44780.rs b/examples/rp/src/bin/pio_hd44780.rs
index 3fab7b5f2..6c02630e0 100644
--- a/examples/rp/src/bin/pio_hd44780.rs
+++ b/examples/rp/src/bin/pio_hd44780.rs
@@ -35,7 +35,7 @@ async fn main(_spawner: Spawner) {
     // allowing direct connection of the display to the RP2040 without level shifters.
     let p = embassy_rp::init(Default::default());
 
-    let _pwm = Pwm::new_output_b(p.PWM_CH7, p.PIN_15, {
+    let _pwm = Pwm::new_output_b(p.PWM_SLICE7, p.PIN_15, {
         let mut c = pwm::Config::default();
         c.divider = 125.into();
         c.top = 100;
diff --git a/examples/rp/src/bin/pio_uart.rs b/examples/rp/src/bin/pio_uart.rs
index a07f1c180..53b696309 100644
--- a/examples/rp/src/bin/pio_uart.rs
+++ b/examples/rp/src/bin/pio_uart.rs
@@ -60,7 +60,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -70,7 +69,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/rp/src/bin/pwm.rs b/examples/rp/src/bin/pwm.rs
index 4fb62546d..26e233260 100644
--- a/examples/rp/src/bin/pwm.rs
+++ b/examples/rp/src/bin/pwm.rs
@@ -18,7 +18,7 @@ async fn main(_spawner: Spawner) {
     let mut c: Config = Default::default();
     c.top = 0x8000;
     c.compare_b = 8;
-    let mut pwm = Pwm::new_output_b(p.PWM_CH4, p.PIN_25, c.clone());
+    let mut pwm = Pwm::new_output_b(p.PWM_SLICE4, p.PIN_25, c.clone());
 
     loop {
         info!("current LED duty cycle: {}/32768", c.compare_b);
diff --git a/examples/rp/src/bin/pwm_input.rs b/examples/rp/src/bin/pwm_input.rs
index e7bcbfbd4..0652dc42b 100644
--- a/examples/rp/src/bin/pwm_input.rs
+++ b/examples/rp/src/bin/pwm_input.rs
@@ -14,7 +14,7 @@ async fn main(_spawner: Spawner) {
     let p = embassy_rp::init(Default::default());
 
     let cfg: Config = Default::default();
-    let pwm = Pwm::new_input(p.PWM_CH2, p.PIN_5, InputMode::RisingEdge, cfg);
+    let pwm = Pwm::new_input(p.PWM_SLICE2, p.PIN_5, InputMode::RisingEdge, cfg);
 
     let mut ticker = Ticker::every(Duration::from_secs(1));
     loop {
diff --git a/examples/rp/src/bin/usb_ethernet.rs b/examples/rp/src/bin/usb_ethernet.rs
index 01f0d5967..f1b124efa 100644
--- a/examples/rp/src/bin/usb_ethernet.rs
+++ b/examples/rp/src/bin/usb_ethernet.rs
@@ -64,14 +64,12 @@ async fn main(spawner: Spawner) {
     config.device_protocol = 0x01;
 
     // Create embassy-usb DeviceBuilder using the driver and config.
-    static DEVICE_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static CONFIG_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static BOS_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static CONTROL_BUF: StaticCell<[u8; 128]> = StaticCell::new();
     let mut builder = Builder::new(
         driver,
         config,
-        &mut DEVICE_DESC.init([0; 256])[..],
         &mut CONFIG_DESC.init([0; 256])[..],
         &mut BOS_DESC.init([0; 256])[..],
         &mut [], // no msos descriptors
diff --git a/examples/rp/src/bin/usb_hid_keyboard.rs b/examples/rp/src/bin/usb_hid_keyboard.rs
index b5ac16245..710be8d13 100644
--- a/examples/rp/src/bin/usb_hid_keyboard.rs
+++ b/examples/rp/src/bin/usb_hid_keyboard.rs
@@ -36,7 +36,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     // You can also add a Microsoft OS descriptor.
@@ -50,7 +49,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut msos_descriptor,
diff --git a/examples/rp/src/bin/usb_hid_mouse.rs b/examples/rp/src/bin/usb_hid_mouse.rs
index afebd8813..e8b399cb1 100644
--- a/examples/rp/src/bin/usb_hid_mouse.rs
+++ b/examples/rp/src/bin/usb_hid_mouse.rs
@@ -39,7 +39,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     // You can also add a Microsoft OS descriptor.
@@ -53,7 +52,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut msos_descriptor,
diff --git a/examples/rp/src/bin/usb_midi.rs b/examples/rp/src/bin/usb_midi.rs
index 95306a35c..11db1b2e1 100644
--- a/examples/rp/src/bin/usb_midi.rs
+++ b/examples/rp/src/bin/usb_midi.rs
@@ -46,7 +46,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -54,7 +53,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/rp/src/bin/usb_raw.rs b/examples/rp/src/bin/usb_raw.rs
index a6c8a5b2e..97e7e0244 100644
--- a/examples/rp/src/bin/usb_raw.rs
+++ b/examples/rp/src/bin/usb_raw.rs
@@ -93,7 +93,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut msos_descriptor = [0; 256];
@@ -106,7 +105,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut msos_descriptor,
diff --git a/examples/rp/src/bin/usb_raw_bulk.rs b/examples/rp/src/bin/usb_raw_bulk.rs
index 0dc8e9f72..331c3da4c 100644
--- a/examples/rp/src/bin/usb_raw_bulk.rs
+++ b/examples/rp/src/bin/usb_raw_bulk.rs
@@ -71,7 +71,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut msos_descriptor = [0; 256];
@@ -80,7 +79,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut msos_descriptor,
diff --git a/examples/rp/src/bin/usb_serial.rs b/examples/rp/src/bin/usb_serial.rs
index ab24a994c..3c9bc96dd 100644
--- a/examples/rp/src/bin/usb_serial.rs
+++ b/examples/rp/src/bin/usb_serial.rs
@@ -46,7 +46,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -56,7 +55,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/rp/src/bin/usb_serial_with_logger.rs b/examples/rp/src/bin/usb_serial_with_logger.rs
index 4ba4fc25c..f9cfdef94 100644
--- a/examples/rp/src/bin/usb_serial_with_logger.rs
+++ b/examples/rp/src/bin/usb_serial_with_logger.rs
@@ -46,7 +46,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -57,7 +56,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/std/src/bin/net.rs b/examples/std/src/bin/net.rs
index dad93d0a1..59813d8cb 100644
--- a/examples/std/src/bin/net.rs
+++ b/examples/std/src/bin/net.rs
@@ -1,5 +1,3 @@
-use std::default::Default;
-
 use clap::Parser;
 use embassy_executor::{Executor, Spawner};
 use embassy_net::tcp::TcpSocket;
diff --git a/examples/std/src/bin/net_dns.rs b/examples/std/src/bin/net_dns.rs
index fca1e076e..3b6a3de37 100644
--- a/examples/std/src/bin/net_dns.rs
+++ b/examples/std/src/bin/net_dns.rs
@@ -1,5 +1,3 @@
-use std::default::Default;
-
 use clap::Parser;
 use embassy_executor::{Executor, Spawner};
 use embassy_net::dns::DnsQueryType;
diff --git a/examples/std/src/bin/tcp_accept.rs b/examples/std/src/bin/tcp_accept.rs
index 00ccd83a7..e8b6eaa6c 100644
--- a/examples/std/src/bin/tcp_accept.rs
+++ b/examples/std/src/bin/tcp_accept.rs
@@ -1,5 +1,4 @@
 use core::fmt::Write as _;
-use std::default::Default;
 
 use clap::Parser;
 use embassy_executor::{Executor, Spawner};
diff --git a/examples/stm32f1/Cargo.toml b/examples/stm32f1/Cargo.toml
index df5d32f70..4f282f326 100644
--- a/examples/stm32f1/Cargo.toml
+++ b/examples/stm32f1/Cargo.toml
@@ -23,6 +23,7 @@ panic-probe = { version = "0.3", features = ["print-defmt"] }
 futures = { version = "0.3.17", default-features = false, features = ["async-await"] }
 heapless = { version = "0.8", default-features = false }
 nb = "1.0.0"
+static_cell = "2.0.0"
 
 [profile.dev]
 opt-level = "s"
diff --git a/examples/stm32f1/src/bin/can.rs b/examples/stm32f1/src/bin/can.rs
index c1c4f8359..1c13d623d 100644
--- a/examples/stm32f1/src/bin/can.rs
+++ b/examples/stm32f1/src/bin/can.rs
@@ -3,11 +3,14 @@
 
 use defmt::*;
 use embassy_executor::Spawner;
-use embassy_stm32::can::bxcan::filter::Mask32;
-use embassy_stm32::can::bxcan::{Fifo, Frame, Id, StandardId};
-use embassy_stm32::can::{Can, Rx0InterruptHandler, Rx1InterruptHandler, SceInterruptHandler, TxInterruptHandler};
+use embassy_stm32::can::frame::Envelope;
+use embassy_stm32::can::{
+    filter, Can, Fifo, Frame, Id, Rx0InterruptHandler, Rx1InterruptHandler, SceInterruptHandler, StandardId,
+    TxInterruptHandler,
+};
 use embassy_stm32::peripherals::CAN;
 use embassy_stm32::{bind_interrupts, Config};
+use static_cell::StaticCell;
 use {defmt_rtt as _, panic_probe as _};
 
 bind_interrupts!(struct Irqs {
@@ -20,6 +23,27 @@ bind_interrupts!(struct Irqs {
 // This example is configured to work with real CAN transceivers on B8/B9.
 // See other examples for loopback.
 
+fn handle_frame(env: Envelope, read_mode: &str) {
+    match env.frame.id() {
+        Id::Extended(id) => {
+            defmt::println!(
+                "{} Extended Frame id={:x} {:02x}",
+                read_mode,
+                id.as_raw(),
+                env.frame.data()
+            );
+        }
+        Id::Standard(id) => {
+            defmt::println!(
+                "{} Standard Frame id={:x} {:02x}",
+                read_mode,
+                id.as_raw(),
+                env.frame.data()
+            );
+        }
+    }
+}
+
 #[embassy_executor::main]
 async fn main(_spawner: Spawner) {
     let p = embassy_stm32::init(Config::default());
@@ -27,36 +51,86 @@ async fn main(_spawner: Spawner) {
     // Set alternate pin mapping to B8/B9
     embassy_stm32::pac::AFIO.mapr().modify(|w| w.set_can1_remap(2));
 
+    static RX_BUF: StaticCell<embassy_stm32::can::RxBuf<10>> = StaticCell::new();
+    static TX_BUF: StaticCell<embassy_stm32::can::TxBuf<10>> = StaticCell::new();
+
     let mut can = Can::new(p.CAN, p.PB8, p.PB9, Irqs);
 
-    can.as_mut()
-        .modify_filters()
-        .enable_bank(0, Fifo::Fifo0, Mask32::accept_all());
+    can.modify_filters()
+        .enable_bank(0, Fifo::Fifo0, filter::Mask32::accept_all());
 
-    can.as_mut()
-        .modify_config()
+    can.modify_config()
         .set_loopback(false)
         .set_silent(false)
-        .leave_disabled();
-
-    can.set_bitrate(250_000);
+        .set_bitrate(250_000);
 
     can.enable().await;
-
     let mut i: u8 = 0;
-    loop {
-        let tx_frame = Frame::new_data(unwrap!(StandardId::new(i as _)), [i]);
-        can.write(&tx_frame).await;
 
-        match can.read().await {
-            Ok(env) => match env.frame.id() {
-                Id::Extended(id) => {
-                    defmt::println!("Extended Frame id={:x}", id.as_raw());
-                }
-                Id::Standard(id) => {
-                    defmt::println!("Standard Frame id={:x}", id.as_raw());
-                }
-            },
+    /*
+       // Example for using buffered Tx and Rx without needing to
+       // split first as is done below.
+       let mut can = can.buffered(
+           TX_BUF.init(embassy_stm32::can::TxBuf::<10>::new()),
+           RX_BUF.init(embassy_stm32::can::RxBuf::<10>::new()));
+       loop {
+           let tx_frame = Frame::new_data(unwrap!(StandardId::new(i as _)), &[i, 0, 1, 2, 3, 4, 5, 6]).unwrap();
+           can.write(&tx_frame).await;
+
+           match can.read().await {
+               Ok((frame, ts)) => {
+                   handle_frame(Envelope { ts, frame }, "Buf");
+               }
+               Err(err) => {
+                   defmt::println!("Error {}", err);
+               }
+           }
+           i += 1;
+       }
+
+    */
+    let (mut tx, mut rx) = can.split();
+
+    // This example shows using the wait_not_empty API before try read
+    while i < 3 {
+        let tx_frame = Frame::new_data(unwrap!(StandardId::new(i as _)), &[i, 0, 1, 2, 3, 4, 5, 6]).unwrap();
+        tx.write(&tx_frame).await;
+
+        rx.wait_not_empty().await;
+        let env = rx.try_read().unwrap();
+        handle_frame(env, "Wait");
+        i += 1;
+    }
+
+    // This example shows using the full async non-buffered API
+    while i < 6 {
+        let tx_frame = Frame::new_data(unwrap!(StandardId::new(i as _)), &[i, 0, 1, 2, 3, 4, 5, 6]).unwrap();
+        tx.write(&tx_frame).await;
+
+        match rx.read().await {
+            Ok(env) => {
+                handle_frame(env, "NoBuf");
+            }
+            Err(err) => {
+                defmt::println!("Error {}", err);
+            }
+        }
+        i += 1;
+    }
+
+    // This example shows using buffered RX and TX. User passes in desired buffer (size)
+    // It's possible this way to have just RX or TX buffered.
+    let mut rx = rx.buffered(RX_BUF.init(embassy_stm32::can::RxBuf::<10>::new()));
+    let mut tx = tx.buffered(TX_BUF.init(embassy_stm32::can::TxBuf::<10>::new()));
+
+    loop {
+        let tx_frame = Frame::new_data(unwrap!(StandardId::new(i as _)), &[i, 0, 1, 2, 3, 4, 5, 6]).unwrap();
+        tx.write(&tx_frame).await;
+
+        match rx.read().await {
+            Ok(envelope) => {
+                handle_frame(envelope, "Buf");
+            }
             Err(err) => {
                 defmt::println!("Error {}", err);
             }
diff --git a/examples/stm32f1/src/bin/usb_serial.rs b/examples/stm32f1/src/bin/usb_serial.rs
index 1ae6c1dee..ee99acf41 100644
--- a/examples/stm32f1/src/bin/usb_serial.rs
+++ b/examples/stm32f1/src/bin/usb_serial.rs
@@ -60,7 +60,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 7];
@@ -70,7 +69,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32f2/src/bin/pll.rs b/examples/stm32f2/src/bin/pll.rs
index e32f283d1..e39e2daec 100644
--- a/examples/stm32f2/src/bin/pll.rs
+++ b/examples/stm32f2/src/bin/pll.rs
@@ -1,8 +1,6 @@
 #![no_std]
 #![no_main]
 
-use core::convert::TryFrom;
-
 use defmt::*;
 use embassy_executor::Spawner;
 use embassy_stm32::time::Hertz;
diff --git a/examples/stm32f3/src/bin/usb_serial.rs b/examples/stm32f3/src/bin/usb_serial.rs
index ee1c43afd..5760f2c1c 100644
--- a/examples/stm32f3/src/bin/usb_serial.rs
+++ b/examples/stm32f3/src/bin/usb_serial.rs
@@ -54,7 +54,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 7];
@@ -64,7 +63,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32f4/src/bin/can.rs b/examples/stm32f4/src/bin/can.rs
index d074b4265..cedc057a7 100644
--- a/examples/stm32f4/src/bin/can.rs
+++ b/examples/stm32f4/src/bin/can.rs
@@ -4,9 +4,10 @@
 use defmt::*;
 use embassy_executor::Spawner;
 use embassy_stm32::bind_interrupts;
-use embassy_stm32::can::bxcan::filter::Mask32;
-use embassy_stm32::can::bxcan::{Fifo, Frame, StandardId};
-use embassy_stm32::can::{Can, Rx0InterruptHandler, Rx1InterruptHandler, SceInterruptHandler, TxInterruptHandler};
+use embassy_stm32::can::filter::Mask32;
+use embassy_stm32::can::{
+    Can, Fifo, Frame, Rx0InterruptHandler, Rx1InterruptHandler, SceInterruptHandler, StandardId, TxInterruptHandler,
+};
 use embassy_stm32::gpio::{Input, Pull};
 use embassy_stm32::peripherals::CAN1;
 use embassy_time::Instant;
@@ -34,23 +35,18 @@ async fn main(_spawner: Spawner) {
 
     let mut can = Can::new(p.CAN1, p.PA11, p.PA12, Irqs);
 
-    can.as_mut()
-        .modify_filters()
-        .enable_bank(0, Fifo::Fifo0, Mask32::accept_all());
+    can.modify_filters().enable_bank(0, Fifo::Fifo0, Mask32::accept_all());
 
-    can.as_mut()
-        .modify_config()
+    can.modify_config()
         .set_loopback(true) // Receive own frames
         .set_silent(true)
-        .leave_disabled();
-
-    can.set_bitrate(1_000_000);
+        .set_bitrate(1_000_000);
 
     can.enable().await;
 
     let mut i: u8 = 0;
     loop {
-        let tx_frame = Frame::new_data(unwrap!(StandardId::new(i as _)), [i]);
+        let tx_frame = Frame::new_data(unwrap!(StandardId::new(i as _)), &[i]).unwrap();
         let tx_ts = Instant::now();
         can.write(&tx_frame).await;
 
@@ -64,7 +60,7 @@ async fn main(_spawner: Spawner) {
 
         info!(
             "loopback frame {=u8}, latency: {} us",
-            unwrap!(envelope.frame.data())[0],
+            envelope.frame.data()[0],
             latency.as_micros()
         );
         i += 1;
diff --git a/examples/stm32f4/src/bin/usb_ethernet.rs b/examples/stm32f4/src/bin/usb_ethernet.rs
index a196259a8..d2cbeea1b 100644
--- a/examples/stm32f4/src/bin/usb_ethernet.rs
+++ b/examples/stm32f4/src/bin/usb_ethernet.rs
@@ -7,8 +7,8 @@ use embassy_net::tcp::TcpSocket;
 use embassy_net::{Stack, StackResources};
 use embassy_stm32::rng::{self, Rng};
 use embassy_stm32::time::Hertz;
-use embassy_stm32::usb_otg::Driver;
-use embassy_stm32::{bind_interrupts, peripherals, usb_otg, Config};
+use embassy_stm32::usb::Driver;
+use embassy_stm32::{bind_interrupts, peripherals, usb, Config};
 use embassy_usb::class::cdc_ncm::embassy_net::{Device, Runner, State as NetState};
 use embassy_usb::class::cdc_ncm::{CdcNcmClass, State};
 use embassy_usb::{Builder, UsbDevice};
@@ -36,7 +36,7 @@ async fn net_task(stack: &'static Stack<Device<'static, MTU>>) -> ! {
 }
 
 bind_interrupts!(struct Irqs {
-    OTG_FS => usb_otg::InterruptHandler<peripherals::USB_OTG_FS>;
+    OTG_FS => usb::InterruptHandler<peripherals::USB_OTG_FS>;
     HASH_RNG => rng::InterruptHandler<peripherals::RNG>;
 });
 
@@ -63,13 +63,14 @@ async fn main(spawner: Spawner) {
         config.rcc.apb1_pre = APBPrescaler::DIV4;
         config.rcc.apb2_pre = APBPrescaler::DIV2;
         config.rcc.sys = Sysclk::PLL1_P;
+        config.rcc.mux.clk48sel = mux::Clk48sel::PLL1_Q;
     }
     let p = embassy_stm32::init(config);
 
     // Create the driver, from the HAL.
     static OUTPUT_BUFFER: StaticCell<[u8; 256]> = StaticCell::new();
     let ep_out_buffer = &mut OUTPUT_BUFFER.init([0; 256])[..];
-    let mut config = embassy_stm32::usb_otg::Config::default();
+    let mut config = embassy_stm32::usb::Config::default();
     config.vbus_detection = true;
     let driver = Driver::new_fs(p.USB_OTG_FS, Irqs, p.PA12, p.PA11, ep_out_buffer, config);
 
@@ -88,14 +89,12 @@ async fn main(spawner: Spawner) {
     config.device_protocol = 0x01;
 
     // Create embassy-usb DeviceBuilder using the driver and config.
-    static DEVICE_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static CONFIG_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static BOS_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static CONTROL_BUF: StaticCell<[u8; 128]> = StaticCell::new();
     let mut builder = Builder::new(
         driver,
         config,
-        &mut DEVICE_DESC.init([0; 256])[..],
         &mut CONFIG_DESC.init([0; 256])[..],
         &mut BOS_DESC.init([0; 256])[..],
         &mut [], // no msos descriptors
diff --git a/examples/stm32f4/src/bin/usb_hid_keyboard.rs b/examples/stm32f4/src/bin/usb_hid_keyboard.rs
new file mode 100644
index 000000000..a799b4e72
--- /dev/null
+++ b/examples/stm32f4/src/bin/usb_hid_keyboard.rs
@@ -0,0 +1,221 @@
+#![no_std]
+#![no_main]
+
+use core::sync::atomic::{AtomicBool, Ordering};
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::exti::ExtiInput;
+use embassy_stm32::gpio::Pull;
+use embassy_stm32::time::Hertz;
+use embassy_stm32::usb::Driver;
+use embassy_stm32::{bind_interrupts, peripherals, usb, Config};
+use embassy_usb::class::hid::{HidReaderWriter, ReportId, RequestHandler, State};
+use embassy_usb::control::OutResponse;
+use embassy_usb::{Builder, Handler};
+use futures::future::join;
+use usbd_hid::descriptor::{KeyboardReport, SerializedDescriptor};
+use {defmt_rtt as _, panic_probe as _};
+
+bind_interrupts!(struct Irqs {
+    OTG_FS => usb::InterruptHandler<peripherals::USB_OTG_FS>;
+});
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let mut config = Config::default();
+    {
+        use embassy_stm32::rcc::*;
+        config.rcc.hse = Some(Hse {
+            freq: Hertz(8_000_000),
+            mode: HseMode::Bypass,
+        });
+        config.rcc.pll_src = PllSource::HSE;
+        config.rcc.pll = Some(Pll {
+            prediv: PllPreDiv::DIV4,
+            mul: PllMul::MUL168,
+            divp: Some(PllPDiv::DIV2), // 8mhz / 4 * 168 / 2 = 168Mhz.
+            divq: Some(PllQDiv::DIV7), // 8mhz / 4 * 168 / 7 = 48Mhz.
+            divr: None,
+        });
+        config.rcc.ahb_pre = AHBPrescaler::DIV1;
+        config.rcc.apb1_pre = APBPrescaler::DIV4;
+        config.rcc.apb2_pre = APBPrescaler::DIV2;
+        config.rcc.sys = Sysclk::PLL1_P;
+        config.rcc.mux.clk48sel = mux::Clk48sel::PLL1_Q;
+    }
+    let p = embassy_stm32::init(config);
+
+    // Create the driver, from the HAL.
+    let mut ep_out_buffer = [0u8; 256];
+    let mut config = embassy_stm32::usb::Config::default();
+    config.vbus_detection = true;
+    let driver = Driver::new_fs(p.USB_OTG_FS, Irqs, p.PA12, p.PA11, &mut ep_out_buffer, config);
+
+    // Create embassy-usb Config
+    let mut config = embassy_usb::Config::new(0xc0de, 0xcafe);
+    config.manufacturer = Some("Embassy");
+    config.product = Some("HID keyboard example");
+    config.serial_number = Some("12345678");
+    config.max_power = 100;
+    config.max_packet_size_0 = 64;
+
+    // Required for windows compatibility.
+    // https://developer.nordicsemi.com/nRF_Connect_SDK/doc/1.9.1/kconfig/CONFIG_CDC_ACM_IAD.html#help
+    config.device_class = 0xEF;
+    config.device_sub_class = 0x02;
+    config.device_protocol = 0x01;
+    config.composite_with_iads = true;
+
+    // Create embassy-usb DeviceBuilder using the driver and config.
+    // It needs some buffers for building the descriptors.
+    let mut config_descriptor = [0; 256];
+    let mut bos_descriptor = [0; 256];
+    // You can also add a Microsoft OS descriptor.
+    let mut msos_descriptor = [0; 256];
+    let mut control_buf = [0; 64];
+
+    let request_handler = MyRequestHandler {};
+    let mut device_handler = MyDeviceHandler::new();
+
+    let mut state = State::new();
+
+    let mut builder = Builder::new(
+        driver,
+        config,
+        &mut config_descriptor,
+        &mut bos_descriptor,
+        &mut msos_descriptor,
+        &mut control_buf,
+    );
+
+    builder.handler(&mut device_handler);
+
+    // Create classes on the builder.
+    let config = embassy_usb::class::hid::Config {
+        report_descriptor: KeyboardReport::desc(),
+        request_handler: Some(&request_handler),
+        poll_ms: 60,
+        max_packet_size: 8,
+    };
+
+    let hid = HidReaderWriter::<_, 1, 8>::new(&mut builder, &mut state, config);
+
+    // Build the builder.
+    let mut usb = builder.build();
+
+    // Run the USB device.
+    let usb_fut = usb.run();
+
+    let (reader, mut writer) = hid.split();
+
+    let mut button = ExtiInput::new(p.PC13, p.EXTI13, Pull::Down);
+
+    // Do stuff with the class!
+    let in_fut = async {
+        loop {
+            button.wait_for_rising_edge().await;
+            // signal_pin.wait_for_high().await;
+            info!("Button pressed!");
+            // Create a report with the A key pressed. (no shift modifier)
+            let report = KeyboardReport {
+                keycodes: [4, 0, 0, 0, 0, 0],
+                leds: 0,
+                modifier: 0,
+                reserved: 0,
+            };
+            // Send the report.
+            match writer.write_serialize(&report).await {
+                Ok(()) => {}
+                Err(e) => warn!("Failed to send report: {:?}", e),
+            };
+
+            button.wait_for_falling_edge().await;
+            // signal_pin.wait_for_low().await;
+            info!("Button released!");
+            let report = KeyboardReport {
+                keycodes: [0, 0, 0, 0, 0, 0],
+                leds: 0,
+                modifier: 0,
+                reserved: 0,
+            };
+            match writer.write_serialize(&report).await {
+                Ok(()) => {}
+                Err(e) => warn!("Failed to send report: {:?}", e),
+            };
+        }
+    };
+
+    let out_fut = async {
+        reader.run(false, &request_handler).await;
+    };
+
+    // Run everything concurrently.
+    // If we had made everything `'static` above instead, we could do this using separate tasks instead.
+    join(usb_fut, join(in_fut, out_fut)).await;
+}
+
+struct MyRequestHandler {}
+
+impl RequestHandler for MyRequestHandler {
+    fn get_report(&self, id: ReportId, _buf: &mut [u8]) -> Option<usize> {
+        info!("Get report for {:?}", id);
+        None
+    }
+
+    fn set_report(&self, id: ReportId, data: &[u8]) -> OutResponse {
+        info!("Set report for {:?}: {=[u8]}", id, data);
+        OutResponse::Accepted
+    }
+
+    fn set_idle_ms(&self, id: Option<ReportId>, dur: u32) {
+        info!("Set idle rate for {:?} to {:?}", id, dur);
+    }
+
+    fn get_idle_ms(&self, id: Option<ReportId>) -> Option<u32> {
+        info!("Get idle rate for {:?}", id);
+        None
+    }
+}
+
+struct MyDeviceHandler {
+    configured: AtomicBool,
+}
+
+impl MyDeviceHandler {
+    fn new() -> Self {
+        MyDeviceHandler {
+            configured: AtomicBool::new(false),
+        }
+    }
+}
+
+impl Handler for MyDeviceHandler {
+    fn enabled(&mut self, enabled: bool) {
+        self.configured.store(false, Ordering::Relaxed);
+        if enabled {
+            info!("Device enabled");
+        } else {
+            info!("Device disabled");
+        }
+    }
+
+    fn reset(&mut self) {
+        self.configured.store(false, Ordering::Relaxed);
+        info!("Bus reset, the Vbus current limit is 100mA");
+    }
+
+    fn addressed(&mut self, addr: u8) {
+        self.configured.store(false, Ordering::Relaxed);
+        info!("USB address set to: {}", addr);
+    }
+
+    fn configured(&mut self, configured: bool) {
+        self.configured.store(configured, Ordering::Relaxed);
+        if configured {
+            info!("Device configured, it may now draw up to the configured current limit from Vbus.")
+        } else {
+            info!("Device is no longer configured, the Vbus current limit is 100mA.");
+        }
+    }
+}
diff --git a/examples/stm32f4/src/bin/usb_hid_mouse.rs b/examples/stm32f4/src/bin/usb_hid_mouse.rs
index c98792880..0bc236119 100644
--- a/examples/stm32f4/src/bin/usb_hid_mouse.rs
+++ b/examples/stm32f4/src/bin/usb_hid_mouse.rs
@@ -4,8 +4,8 @@
 use defmt::*;
 use embassy_executor::Spawner;
 use embassy_stm32::time::Hertz;
-use embassy_stm32::usb_otg::Driver;
-use embassy_stm32::{bind_interrupts, peripherals, usb_otg, Config};
+use embassy_stm32::usb::Driver;
+use embassy_stm32::{bind_interrupts, peripherals, usb, Config};
 use embassy_time::Timer;
 use embassy_usb::class::hid::{HidWriter, ReportId, RequestHandler, State};
 use embassy_usb::control::OutResponse;
@@ -15,7 +15,7 @@ use usbd_hid::descriptor::{MouseReport, SerializedDescriptor};
 use {defmt_rtt as _, panic_probe as _};
 
 bind_interrupts!(struct Irqs {
-    OTG_FS => usb_otg::InterruptHandler<peripherals::USB_OTG_FS>;
+    OTG_FS => usb::InterruptHandler<peripherals::USB_OTG_FS>;
 });
 
 #[embassy_executor::main]
@@ -39,12 +39,13 @@ async fn main(_spawner: Spawner) {
         config.rcc.apb1_pre = APBPrescaler::DIV4;
         config.rcc.apb2_pre = APBPrescaler::DIV2;
         config.rcc.sys = Sysclk::PLL1_P;
+        config.rcc.mux.clk48sel = mux::Clk48sel::PLL1_Q;
     }
     let p = embassy_stm32::init(config);
 
     // Create the driver, from the HAL.
     let mut ep_out_buffer = [0u8; 256];
-    let mut config = embassy_stm32::usb_otg::Config::default();
+    let mut config = embassy_stm32::usb::Config::default();
     config.vbus_detection = true;
     let driver = Driver::new_fs(p.USB_OTG_FS, Irqs, p.PA12, p.PA11, &mut ep_out_buffer, config);
 
@@ -63,7 +64,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -75,7 +75,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32f4/src/bin/usb_raw.rs b/examples/stm32f4/src/bin/usb_raw.rs
index afff55187..4e583aeb8 100644
--- a/examples/stm32f4/src/bin/usb_raw.rs
+++ b/examples/stm32f4/src/bin/usb_raw.rs
@@ -52,8 +52,8 @@
 use defmt::*;
 use embassy_executor::Spawner;
 use embassy_stm32::time::Hertz;
-use embassy_stm32::usb_otg::Driver;
-use embassy_stm32::{bind_interrupts, peripherals, usb_otg, Config};
+use embassy_stm32::usb::Driver;
+use embassy_stm32::{bind_interrupts, peripherals, usb, Config};
 use embassy_usb::control::{InResponse, OutResponse, Recipient, Request, RequestType};
 use embassy_usb::msos::{self, windows_version};
 use embassy_usb::types::InterfaceNumber;
@@ -66,7 +66,7 @@ use {defmt_rtt as _, panic_probe as _};
 const DEVICE_INTERFACE_GUIDS: &[&str] = &["{DAC2087C-63FA-458D-A55D-827C0762DEC7}"];
 
 bind_interrupts!(struct Irqs {
-    OTG_FS => usb_otg::InterruptHandler<peripherals::USB_OTG_FS>;
+    OTG_FS => usb::InterruptHandler<peripherals::USB_OTG_FS>;
 });
 
 #[embassy_executor::main]
@@ -92,12 +92,13 @@ async fn main(_spawner: Spawner) {
         config.rcc.apb1_pre = APBPrescaler::DIV4;
         config.rcc.apb2_pre = APBPrescaler::DIV2;
         config.rcc.sys = Sysclk::PLL1_P;
+        config.rcc.mux.clk48sel = mux::Clk48sel::PLL1_Q;
     }
     let p = embassy_stm32::init(config);
 
     // Create the driver, from the HAL.
     let mut ep_out_buffer = [0u8; 256];
-    let mut config = embassy_stm32::usb_otg::Config::default();
+    let mut config = embassy_stm32::usb::Config::default();
     config.vbus_detection = true;
     let driver = Driver::new_fs(p.USB_OTG_FS, Irqs, p.PA12, p.PA11, &mut ep_out_buffer, config);
 
@@ -116,7 +117,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut msos_descriptor = [0; 256];
@@ -129,7 +129,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut msos_descriptor,
diff --git a/examples/stm32f4/src/bin/usb_serial.rs b/examples/stm32f4/src/bin/usb_serial.rs
index 58d994a61..f3a375d31 100644
--- a/examples/stm32f4/src/bin/usb_serial.rs
+++ b/examples/stm32f4/src/bin/usb_serial.rs
@@ -4,8 +4,8 @@
 use defmt::{panic, *};
 use embassy_executor::Spawner;
 use embassy_stm32::time::Hertz;
-use embassy_stm32::usb_otg::{Driver, Instance};
-use embassy_stm32::{bind_interrupts, peripherals, usb_otg, Config};
+use embassy_stm32::usb::{Driver, Instance};
+use embassy_stm32::{bind_interrupts, peripherals, usb, Config};
 use embassy_usb::class::cdc_acm::{CdcAcmClass, State};
 use embassy_usb::driver::EndpointError;
 use embassy_usb::Builder;
@@ -13,7 +13,7 @@ use futures::future::join;
 use {defmt_rtt as _, panic_probe as _};
 
 bind_interrupts!(struct Irqs {
-    OTG_FS => usb_otg::InterruptHandler<peripherals::USB_OTG_FS>;
+    OTG_FS => usb::InterruptHandler<peripherals::USB_OTG_FS>;
 });
 
 #[embassy_executor::main]
@@ -39,12 +39,13 @@ async fn main(_spawner: Spawner) {
         config.rcc.apb1_pre = APBPrescaler::DIV4;
         config.rcc.apb2_pre = APBPrescaler::DIV2;
         config.rcc.sys = Sysclk::PLL1_P;
+        config.rcc.mux.clk48sel = mux::Clk48sel::PLL1_Q;
     }
     let p = embassy_stm32::init(config);
 
     // Create the driver, from the HAL.
     let mut ep_out_buffer = [0u8; 256];
-    let mut config = embassy_stm32::usb_otg::Config::default();
+    let mut config = embassy_stm32::usb::Config::default();
     config.vbus_detection = true;
     let driver = Driver::new_fs(p.USB_OTG_FS, Irqs, p.PA12, p.PA11, &mut ep_out_buffer, config);
 
@@ -63,7 +64,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -73,7 +73,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32f4/src/bin/ws2812_pwm.rs b/examples/stm32f4/src/bin/ws2812_pwm.rs
index 6122cea2d..cbaff75fc 100644
--- a/examples/stm32f4/src/bin/ws2812_pwm.rs
+++ b/examples/stm32f4/src/bin/ws2812_pwm.rs
@@ -15,8 +15,9 @@
 use embassy_executor::Spawner;
 use embassy_stm32::gpio::OutputType;
 use embassy_stm32::time::khz;
+use embassy_stm32::timer::low_level::CountingMode;
 use embassy_stm32::timer::simple_pwm::{PwmPin, SimplePwm};
-use embassy_stm32::timer::{Channel, CountingMode};
+use embassy_stm32::timer::Channel;
 use embassy_time::{Duration, Ticker, Timer};
 use {defmt_rtt as _, panic_probe as _};
 
@@ -60,7 +61,7 @@ async fn main(_spawner: Spawner) {
     // construct ws2812 non-return-to-zero (NRZ) code bit by bit
     // ws2812 only need 24 bits for each LED, but we add one bit more to keep PWM output low
 
-    let max_duty = ws2812_pwm.get_max_duty();
+    let max_duty = ws2812_pwm.get_max_duty() as u16;
     let n0 = 8 * max_duty / 25; // ws2812 Bit 0 high level timing
     let n1 = 2 * n0; // ws2812 Bit 1 high level timing
 
diff --git a/examples/stm32f7/src/bin/can.rs b/examples/stm32f7/src/bin/can.rs
index bcfdb67a8..e32b4d3df 100644
--- a/examples/stm32f7/src/bin/can.rs
+++ b/examples/stm32f7/src/bin/can.rs
@@ -1,16 +1,18 @@
 #![no_std]
 #![no_main]
 
+use core::num::{NonZeroU16, NonZeroU8};
+
 use defmt::*;
 use embassy_executor::Spawner;
-use embassy_stm32::bind_interrupts;
-use embassy_stm32::can::bxcan::filter::Mask32;
-use embassy_stm32::can::bxcan::{Fifo, Frame, StandardId};
+use embassy_stm32::can::filter::Mask32;
 use embassy_stm32::can::{
-    Can, CanTx, Rx0InterruptHandler, Rx1InterruptHandler, SceInterruptHandler, TxInterruptHandler,
+    Can, CanTx, Fifo, Frame, Rx0InterruptHandler, Rx1InterruptHandler, SceInterruptHandler, StandardId,
+    TxInterruptHandler,
 };
 use embassy_stm32::gpio::{Input, Pull};
 use embassy_stm32::peripherals::CAN3;
+use embassy_stm32::{bind_interrupts, can};
 use static_cell::StaticCell;
 use {defmt_rtt as _, panic_probe as _};
 
@@ -22,9 +24,9 @@ bind_interrupts!(struct Irqs {
 });
 
 #[embassy_executor::task]
-pub async fn send_can_message(tx: &'static mut CanTx<'static, 'static, CAN3>) {
+pub async fn send_can_message(tx: &'static mut CanTx<'static, CAN3>) {
     loop {
-        let frame = Frame::new_data(unwrap!(StandardId::new(0 as _)), [0]);
+        let frame = Frame::new_data(unwrap!(StandardId::new(0 as _)), &[0]).unwrap();
         tx.write(&frame).await;
         embassy_time::Timer::after_secs(1).await;
     }
@@ -45,19 +47,22 @@ async fn main(spawner: Spawner) {
 
     static CAN: StaticCell<Can<'static, CAN3>> = StaticCell::new();
     let can = CAN.init(Can::new(p.CAN3, p.PA8, p.PA15, Irqs));
-    can.as_mut()
-        .modify_filters()
-        .enable_bank(0, Fifo::Fifo0, Mask32::accept_all());
+    can.modify_filters().enable_bank(0, Fifo::Fifo0, Mask32::accept_all());
 
-    can.as_mut()
-        .modify_config()
-        .set_bit_timing(0x001c0001) // http://www.bittiming.can-wiki.info/
-        .set_loopback(true)
-        .enable();
+    can.modify_config()
+        .set_bit_timing(can::util::NominalBitTiming {
+            prescaler: NonZeroU16::new(2).unwrap(),
+            seg1: NonZeroU8::new(13).unwrap(),
+            seg2: NonZeroU8::new(2).unwrap(),
+            sync_jump_width: NonZeroU8::new(1).unwrap(),
+        }) // http://www.bittiming.can-wiki.info/
+        .set_loopback(true);
+
+    can.enable().await;
 
     let (tx, mut rx) = can.split();
 
-    static CAN_TX: StaticCell<CanTx<'static, 'static, CAN3>> = StaticCell::new();
+    static CAN_TX: StaticCell<CanTx<'static, CAN3>> = StaticCell::new();
     let tx = CAN_TX.init(tx);
     spawner.spawn(send_can_message(tx)).unwrap();
 
diff --git a/examples/stm32f7/src/bin/cryp.rs b/examples/stm32f7/src/bin/cryp.rs
index 04927841a..235853cb9 100644
--- a/examples/stm32f7/src/bin/cryp.rs
+++ b/examples/stm32f7/src/bin/cryp.rs
@@ -6,11 +6,15 @@ use aes_gcm::aead::{AeadInPlace, KeyInit};
 use aes_gcm::Aes128Gcm;
 use defmt::info;
 use embassy_executor::Spawner;
-use embassy_stm32::cryp::*;
-use embassy_stm32::Config;
+use embassy_stm32::cryp::{self, *};
+use embassy_stm32::{bind_interrupts, peripherals, Config};
 use embassy_time::Instant;
 use {defmt_rtt as _, panic_probe as _};
 
+bind_interrupts!(struct Irqs {
+    CRYP => cryp::InterruptHandler<peripherals::CRYP>;
+});
+
 #[embassy_executor::main]
 async fn main(_spawner: Spawner) -> ! {
     let config = Config::default();
@@ -19,7 +23,7 @@ async fn main(_spawner: Spawner) -> ! {
     let payload: &[u8] = b"hello world";
     let aad: &[u8] = b"additional data";
 
-    let hw_cryp = Cryp::new(p.CRYP);
+    let mut hw_cryp = Cryp::new(p.CRYP, p.DMA2_CH6, p.DMA2_CH5, Irqs);
     let key: [u8; 16] = [0; 16];
     let mut ciphertext: [u8; 11] = [0; 11];
     let mut plaintext: [u8; 11] = [0; 11];
@@ -29,16 +33,18 @@ async fn main(_spawner: Spawner) -> ! {
 
     // Encrypt in hardware using AES-GCM 128-bit
     let aes_gcm = AesGcm::new(&key, &iv);
-    let mut gcm_encrypt = hw_cryp.start(&aes_gcm, Direction::Encrypt);
-    hw_cryp.aad_blocking(&mut gcm_encrypt, aad, true);
-    hw_cryp.payload_blocking(&mut gcm_encrypt, payload, &mut ciphertext, true);
-    let encrypt_tag = hw_cryp.finish_blocking(gcm_encrypt);
+    let mut gcm_encrypt = hw_cryp.start(&aes_gcm, Direction::Encrypt).await;
+    hw_cryp.aad(&mut gcm_encrypt, aad, true).await;
+    hw_cryp.payload(&mut gcm_encrypt, payload, &mut ciphertext, true).await;
+    let encrypt_tag = hw_cryp.finish(gcm_encrypt).await;
 
     // Decrypt in hardware using AES-GCM 128-bit
-    let mut gcm_decrypt = hw_cryp.start(&aes_gcm, Direction::Decrypt);
-    hw_cryp.aad_blocking(&mut gcm_decrypt, aad, true);
-    hw_cryp.payload_blocking(&mut gcm_decrypt, &ciphertext, &mut plaintext, true);
-    let decrypt_tag = hw_cryp.finish_blocking(gcm_decrypt);
+    let mut gcm_decrypt = hw_cryp.start(&aes_gcm, Direction::Decrypt).await;
+    hw_cryp.aad(&mut gcm_decrypt, aad, true).await;
+    hw_cryp
+        .payload(&mut gcm_decrypt, &ciphertext, &mut plaintext, true)
+        .await;
+    let decrypt_tag = hw_cryp.finish(gcm_decrypt).await;
 
     let hw_end_time = Instant::now();
     let hw_execution_time = hw_end_time - hw_start_time;
diff --git a/examples/stm32f7/src/bin/usb_serial.rs b/examples/stm32f7/src/bin/usb_serial.rs
index 97daf6bd1..39a5512f4 100644
--- a/examples/stm32f7/src/bin/usb_serial.rs
+++ b/examples/stm32f7/src/bin/usb_serial.rs
@@ -4,8 +4,8 @@
 use defmt::{panic, *};
 use embassy_executor::Spawner;
 use embassy_stm32::time::Hertz;
-use embassy_stm32::usb_otg::{Driver, Instance};
-use embassy_stm32::{bind_interrupts, peripherals, usb_otg, Config};
+use embassy_stm32::usb::{Driver, Instance};
+use embassy_stm32::{bind_interrupts, peripherals, usb, Config};
 use embassy_usb::class::cdc_acm::{CdcAcmClass, State};
 use embassy_usb::driver::EndpointError;
 use embassy_usb::Builder;
@@ -13,7 +13,7 @@ use futures::future::join;
 use {defmt_rtt as _, panic_probe as _};
 
 bind_interrupts!(struct Irqs {
-    OTG_FS => usb_otg::InterruptHandler<peripherals::USB_OTG_FS>;
+    OTG_FS => usb::InterruptHandler<peripherals::USB_OTG_FS>;
 });
 
 #[embassy_executor::main]
@@ -39,12 +39,13 @@ async fn main(_spawner: Spawner) {
         config.rcc.apb1_pre = APBPrescaler::DIV4;
         config.rcc.apb2_pre = APBPrescaler::DIV2;
         config.rcc.sys = Sysclk::PLL1_P;
+        config.rcc.mux.clk48sel = mux::Clk48sel::PLL1_Q;
     }
     let p = embassy_stm32::init(config);
 
     // Create the driver, from the HAL.
     let mut ep_out_buffer = [0u8; 256];
-    let mut config = embassy_stm32::usb_otg::Config::default();
+    let mut config = embassy_stm32::usb::Config::default();
     config.vbus_detection = true;
     let driver = Driver::new_fs(p.USB_OTG_FS, Irqs, p.PA12, p.PA11, &mut ep_out_buffer, config);
 
@@ -63,7 +64,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -73,7 +73,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32g0/src/bin/usb_serial.rs b/examples/stm32g0/src/bin/usb_serial.rs
index 8b9915626..162dfd86b 100644
--- a/examples/stm32g0/src/bin/usb_serial.rs
+++ b/examples/stm32g0/src/bin/usb_serial.rs
@@ -36,7 +36,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 7];
@@ -46,7 +45,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32g4/src/bin/can.rs b/examples/stm32g4/src/bin/can.rs
index 4373a89a8..2ed632a93 100644
--- a/examples/stm32g4/src/bin/can.rs
+++ b/examples/stm32g4/src/bin/can.rs
@@ -36,7 +36,7 @@ async fn main(_spawner: Spawner) {
     }
     let peripherals = embassy_stm32::init(config);
 
-    let mut can = can::FdcanConfigurator::new(peripherals.FDCAN1, peripherals.PA11, peripherals.PA12, Irqs);
+    let mut can = can::CanConfigurator::new(peripherals.FDCAN1, peripherals.PA11, peripherals.PA12, Irqs);
 
     can.set_extended_filter(
         can::filter::ExtendedFilterSlot::_0,
@@ -56,21 +56,22 @@ async fn main(_spawner: Spawner) {
     info!("Configured");
 
     let mut can = can.start(match use_fd {
-        true => can::FdcanOperatingMode::InternalLoopbackMode,
-        false => can::FdcanOperatingMode::NormalOperationMode,
+        true => can::OperatingMode::InternalLoopbackMode,
+        false => can::OperatingMode::NormalOperationMode,
     });
 
     let mut i = 0;
     let mut last_read_ts = embassy_time::Instant::now();
 
     loop {
-        let frame = can::frame::ClassicFrame::new_extended(0x123456F, &[i; 8]).unwrap();
+        let frame = can::frame::Frame::new_extended(0x123456F, &[i; 8]).unwrap();
         info!("Writing frame");
 
         _ = can.write(&frame).await;
 
         match can.read().await {
-            Ok((rx_frame, ts)) => {
+            Ok(envelope) => {
+                let (ts, rx_frame) = (envelope.ts, envelope.frame);
                 let delta = (ts - last_read_ts).as_millis();
                 last_read_ts = ts;
                 info!(
@@ -105,7 +106,8 @@ async fn main(_spawner: Spawner) {
         }
 
         match can.read_fd().await {
-            Ok((rx_frame, ts)) => {
+            Ok(envelope) => {
+                let (ts, rx_frame) = (envelope.ts, envelope.frame);
                 let delta = (ts - last_read_ts).as_millis();
                 last_read_ts = ts;
                 info!(
@@ -129,12 +131,13 @@ async fn main(_spawner: Spawner) {
     let (mut tx, mut rx) = can.split();
     // With split
     loop {
-        let frame = can::frame::ClassicFrame::new_extended(0x123456F, &[i; 8]).unwrap();
+        let frame = can::frame::Frame::new_extended(0x123456F, &[i; 8]).unwrap();
         info!("Writing frame");
         _ = tx.write(&frame).await;
 
         match rx.read().await {
-            Ok((rx_frame, ts)) => {
+            Ok(envelope) => {
+                let (ts, rx_frame) = (envelope.ts, envelope.frame);
                 let delta = (ts - last_read_ts).as_millis();
                 last_read_ts = ts;
                 info!(
@@ -156,7 +159,7 @@ async fn main(_spawner: Spawner) {
         }
     }
 
-    let can = can::Fdcan::join(tx, rx);
+    let can = can::Can::join(tx, rx);
 
     info!("\n\n\nBuffered\n");
     if use_fd {
@@ -173,7 +176,8 @@ async fn main(_spawner: Spawner) {
             _ = can.write(frame).await;
 
             match can.read().await {
-                Ok((rx_frame, ts)) => {
+                Ok(envelope) => {
+                    let (ts, rx_frame) = (envelope.ts, envelope.frame);
                     let delta = (ts - last_read_ts).as_millis();
                     last_read_ts = ts;
                     info!(
@@ -198,7 +202,7 @@ async fn main(_spawner: Spawner) {
             RX_BUF.init(can::RxBuf::<10>::new()),
         );
         loop {
-            let frame = can::frame::ClassicFrame::new_extended(0x123456F, &[i; 8]).unwrap();
+            let frame = can::frame::Frame::new_extended(0x123456F, &[i; 8]).unwrap();
             info!("Writing frame");
 
             // You can use any of these approaches to send. The writer makes it
@@ -208,7 +212,8 @@ async fn main(_spawner: Spawner) {
             can.writer().write(frame).await;
 
             match can.read().await {
-                Ok((rx_frame, ts)) => {
+                Ok(envelope) => {
+                    let (ts, rx_frame) = (envelope.ts, envelope.frame);
                     let delta = (ts - last_read_ts).as_millis();
                     last_read_ts = ts;
                     info!(
diff --git a/examples/stm32g4/src/bin/usb_c_pd.rs b/examples/stm32g4/src/bin/usb_c_pd.rs
new file mode 100644
index 000000000..7caea634f
--- /dev/null
+++ b/examples/stm32g4/src/bin/usb_c_pd.rs
@@ -0,0 +1,86 @@
+#![no_std]
+#![no_main]
+
+use defmt::{error, info, Format};
+use embassy_executor::Spawner;
+use embassy_stm32::ucpd::{self, CcPhy, CcPull, CcSel, CcVState, Ucpd};
+use embassy_stm32::{bind_interrupts, peripherals, Config};
+use embassy_time::{with_timeout, Duration};
+use {defmt_rtt as _, panic_probe as _};
+
+bind_interrupts!(struct Irqs {
+    UCPD1 => ucpd::InterruptHandler<peripherals::UCPD1>;
+});
+
+#[derive(Debug, Format)]
+enum CableOrientation {
+    Normal,
+    Flipped,
+    DebugAccessoryMode,
+}
+
+// Returns true when the cable
+async fn wait_attached<T: ucpd::Instance>(cc_phy: &mut CcPhy<'_, T>) -> CableOrientation {
+    loop {
+        let (cc1, cc2) = cc_phy.vstate();
+        if cc1 == CcVState::LOWEST && cc2 == CcVState::LOWEST {
+            // Detached, wait until attached by monitoring the CC lines.
+            cc_phy.wait_for_vstate_change().await;
+            continue;
+        }
+
+        // Attached, wait for CC lines to be stable for tCCDebounce (100..200ms).
+        if with_timeout(Duration::from_millis(100), cc_phy.wait_for_vstate_change())
+            .await
+            .is_ok()
+        {
+            // State has changed, restart detection procedure.
+            continue;
+        };
+
+        // State was stable for the complete debounce period, check orientation.
+        return match (cc1, cc2) {
+            (_, CcVState::LOWEST) => CableOrientation::Normal,  // CC1 connected
+            (CcVState::LOWEST, _) => CableOrientation::Flipped, // CC2 connected
+            _ => CableOrientation::DebugAccessoryMode,          // Both connected (special cable)
+        };
+    }
+}
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let mut config = Config::default();
+    config.enable_ucpd1_dead_battery = true;
+    let p = embassy_stm32::init(config);
+
+    info!("Hello World!");
+
+    let mut ucpd = Ucpd::new(p.UCPD1, Irqs {}, p.PB6, p.PB4);
+    ucpd.cc_phy().set_pull(CcPull::Sink);
+
+    info!("Waiting for USB connection...");
+    let cable_orientation = wait_attached(ucpd.cc_phy()).await;
+    info!("USB cable connected, orientation: {}", cable_orientation);
+
+    let cc_sel = match cable_orientation {
+        CableOrientation::Normal => {
+            info!("Starting PD communication on CC1 pin");
+            CcSel::CC1
+        }
+        CableOrientation::Flipped => {
+            info!("Starting PD communication on CC2 pin");
+            CcSel::CC2
+        }
+        CableOrientation::DebugAccessoryMode => panic!("No PD communication in DAM"),
+    };
+    let (_cc_phy, mut pd_phy) = ucpd.split_pd_phy(p.DMA1_CH1, p.DMA1_CH2, cc_sel);
+
+    loop {
+        // Enough space for the longest non-extended data message.
+        let mut buf = [0_u8; 30];
+        match pd_phy.receive(buf.as_mut()).await {
+            Ok(n) => info!("USB PD RX: {=[u8]:?}", &buf[..n]),
+            Err(e) => error!("USB PD RX: {}", e),
+        }
+    }
+}
diff --git a/examples/stm32g4/src/bin/usb_serial.rs b/examples/stm32g4/src/bin/usb_serial.rs
index dc95aa6e5..dbe8f27c1 100644
--- a/examples/stm32g4/src/bin/usb_serial.rs
+++ b/examples/stm32g4/src/bin/usb_serial.rs
@@ -56,7 +56,6 @@ async fn main(_spawner: Spawner) {
     config.device_protocol = 0x01;
     config.composite_with_iads = true;
 
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -66,7 +65,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32h5/src/bin/can.rs b/examples/stm32h5/src/bin/can.rs
index 643df27f9..dd625c90a 100644
--- a/examples/stm32h5/src/bin/can.rs
+++ b/examples/stm32h5/src/bin/can.rs
@@ -24,7 +24,7 @@ async fn main(_spawner: Spawner) {
 
     let peripherals = embassy_stm32::init(config);
 
-    let mut can = can::FdcanConfigurator::new(peripherals.FDCAN1, peripherals.PA11, peripherals.PA12, Irqs);
+    let mut can = can::CanConfigurator::new(peripherals.FDCAN1, peripherals.PA11, peripherals.PA12, Irqs);
 
     // 250k bps
     can.set_bitrate(250_000);
@@ -38,12 +38,13 @@ async fn main(_spawner: Spawner) {
     let mut last_read_ts = embassy_time::Instant::now();
 
     loop {
-        let frame = can::frame::ClassicFrame::new_extended(0x123456F, &[i; 8]).unwrap();
+        let frame = can::frame::Frame::new_extended(0x123456F, &[i; 8]).unwrap();
         info!("Writing frame");
         _ = can.write(&frame).await;
 
         match can.read().await {
-            Ok((rx_frame, ts)) => {
+            Ok(envelope) => {
+                let (rx_frame, ts) = envelope.parts();
                 let delta = (ts - last_read_ts).as_millis();
                 last_read_ts = ts;
                 info!(
@@ -69,12 +70,13 @@ async fn main(_spawner: Spawner) {
     let (mut tx, mut rx) = can.split();
     // With split
     loop {
-        let frame = can::frame::ClassicFrame::new_extended(0x123456F, &[i; 8]).unwrap();
+        let frame = can::frame::Frame::new_extended(0x123456F, &[i; 8]).unwrap();
         info!("Writing frame");
         _ = tx.write(&frame).await;
 
         match rx.read().await {
-            Ok((rx_frame, ts)) => {
+            Ok(envelope) => {
+                let (rx_frame, ts) = envelope.parts();
                 let delta = (ts - last_read_ts).as_millis();
                 last_read_ts = ts;
                 info!(
diff --git a/examples/stm32h5/src/bin/usb_serial.rs b/examples/stm32h5/src/bin/usb_serial.rs
index 83477c8fa..4f86bb342 100644
--- a/examples/stm32h5/src/bin/usb_serial.rs
+++ b/examples/stm32h5/src/bin/usb_serial.rs
@@ -65,7 +65,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -75,7 +74,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32h7/src/bin/camera.rs b/examples/stm32h7/src/bin/camera.rs
index e5a104baf..170a5aa28 100644
--- a/examples/stm32h7/src/bin/camera.rs
+++ b/examples/stm32h7/src/bin/camera.rs
@@ -78,9 +78,9 @@ async fn main(_spawner: Spawner) {
     );
 
     defmt::info!("attempting capture");
-    defmt::unwrap!(dcmi.capture(unsafe { &mut FRAME }).await);
+    defmt::unwrap!(dcmi.capture(unsafe { &mut *core::ptr::addr_of_mut!(FRAME) }).await);
 
-    defmt::info!("captured frame: {:x}", unsafe { &FRAME });
+    defmt::info!("captured frame: {:x}", unsafe { &*core::ptr::addr_of!(FRAME) });
 
     defmt::info!("main loop running");
     loop {
diff --git a/examples/stm32h7/src/bin/can.rs b/examples/stm32h7/src/bin/can.rs
index 13a6a5051..22cb27481 100644
--- a/examples/stm32h7/src/bin/can.rs
+++ b/examples/stm32h7/src/bin/can.rs
@@ -24,7 +24,7 @@ async fn main(_spawner: Spawner) {
 
     let peripherals = embassy_stm32::init(config);
 
-    let mut can = can::FdcanConfigurator::new(peripherals.FDCAN1, peripherals.PA11, peripherals.PA12, Irqs);
+    let mut can = can::CanConfigurator::new(peripherals.FDCAN1, peripherals.PA11, peripherals.PA12, Irqs);
 
     // 250k bps
     can.set_bitrate(250_000);
@@ -38,12 +38,13 @@ async fn main(_spawner: Spawner) {
     let mut last_read_ts = embassy_time::Instant::now();
 
     loop {
-        let frame = can::frame::ClassicFrame::new_extended(0x123456F, &[i; 8]).unwrap();
+        let frame = can::frame::Frame::new_extended(0x123456F, &[i; 8]).unwrap();
         info!("Writing frame");
         _ = can.write(&frame).await;
 
         match can.read().await {
-            Ok((rx_frame, ts)) => {
+            Ok(envelope) => {
+                let (rx_frame, ts) = envelope.parts();
                 let delta = (ts - last_read_ts).as_millis();
                 last_read_ts = ts;
                 info!(
@@ -69,12 +70,13 @@ async fn main(_spawner: Spawner) {
     let (mut tx, mut rx) = can.split();
     // With split
     loop {
-        let frame = can::frame::ClassicFrame::new_extended(0x123456F, &[i; 8]).unwrap();
+        let frame = can::frame::Frame::new_extended(0x123456F, &[i; 8]).unwrap();
         info!("Writing frame");
         _ = tx.write(&frame).await;
 
         match rx.read().await {
-            Ok((rx_frame, ts)) => {
+            Ok(envelope) => {
+                let (rx_frame, ts) = envelope.parts();
                 let delta = (ts - last_read_ts).as_millis();
                 last_read_ts = ts;
                 info!(
diff --git a/examples/stm32h7/src/bin/dac_dma.rs b/examples/stm32h7/src/bin/dac_dma.rs
index feec28993..3a9887e3c 100644
--- a/examples/stm32h7/src/bin/dac_dma.rs
+++ b/examples/stm32h7/src/bin/dac_dma.rs
@@ -6,9 +6,9 @@ use embassy_executor::Spawner;
 use embassy_stm32::dac::{DacCh1, DacCh2, ValueArray};
 use embassy_stm32::pac::timer::vals::Mms;
 use embassy_stm32::peripherals::{DAC1, DMA1_CH3, DMA1_CH4, TIM6, TIM7};
-use embassy_stm32::rcc::low_level::RccPeripheral;
+use embassy_stm32::rcc::frequency;
 use embassy_stm32::time::Hertz;
-use embassy_stm32::timer::low_level::BasicInstance;
+use embassy_stm32::timer::low_level::Timer;
 use micromath::F32Ext;
 use {defmt_rtt as _, panic_probe as _};
 
@@ -51,19 +51,19 @@ async fn main(spawner: Spawner) {
     // Obtain two independent channels (p.DAC1 can only be consumed once, though!)
     let (dac_ch1, dac_ch2) = embassy_stm32::dac::Dac::new(p.DAC1, p.DMA1_CH3, p.DMA1_CH4, p.PA4, p.PA5).split();
 
-    spawner.spawn(dac_task1(dac_ch1)).ok();
-    spawner.spawn(dac_task2(dac_ch2)).ok();
+    spawner.spawn(dac_task1(p.TIM6, dac_ch1)).ok();
+    spawner.spawn(dac_task2(p.TIM7, dac_ch2)).ok();
 }
 
 #[embassy_executor::task]
-async fn dac_task1(mut dac: DacCh1<'static, DAC1, DMA1_CH3>) {
+async fn dac_task1(tim: TIM6, mut dac: DacCh1<'static, DAC1, DMA1_CH3>) {
     let data: &[u8; 256] = &calculate_array::<256>();
 
-    info!("TIM6 frequency is {}", TIM6::frequency());
+    info!("TIM6 frequency is {}", frequency::<TIM6>());
     const FREQUENCY: Hertz = Hertz::hz(200);
 
     // Compute the reload value such that we obtain the FREQUENCY for the sine
-    let reload: u32 = (TIM6::frequency().0 / FREQUENCY.0) / data.len() as u32;
+    let reload: u32 = (frequency::<TIM6>().0 / FREQUENCY.0) / data.len() as u32;
 
     // Depends on your clock and on the specific chip used, you may need higher or lower values here
     if reload < 10 {
@@ -74,17 +74,17 @@ async fn dac_task1(mut dac: DacCh1<'static, DAC1, DMA1_CH3>) {
     dac.set_triggering(true);
     dac.enable();
 
-    TIM6::enable_and_reset();
-    TIM6::regs_basic().arr().modify(|w| w.set_arr(reload as u16 - 1));
-    TIM6::regs_basic().cr2().modify(|w| w.set_mms(Mms::UPDATE));
-    TIM6::regs_basic().cr1().modify(|w| {
+    let tim = Timer::new(tim);
+    tim.regs_basic().arr().modify(|w| w.set_arr(reload as u16 - 1));
+    tim.regs_basic().cr2().modify(|w| w.set_mms(Mms::UPDATE));
+    tim.regs_basic().cr1().modify(|w| {
         w.set_opm(false);
         w.set_cen(true);
     });
 
     debug!(
         "TIM6 Frequency {}, Target Frequency {}, Reload {}, Reload as u16 {}, Samples {}",
-        TIM6::frequency(),
+        frequency::<TIM6>(),
         FREQUENCY,
         reload,
         reload as u16,
@@ -99,22 +99,22 @@ async fn dac_task1(mut dac: DacCh1<'static, DAC1, DMA1_CH3>) {
 }
 
 #[embassy_executor::task]
-async fn dac_task2(mut dac: DacCh2<'static, DAC1, DMA1_CH4>) {
+async fn dac_task2(tim: TIM7, mut dac: DacCh2<'static, DAC1, DMA1_CH4>) {
     let data: &[u8; 256] = &calculate_array::<256>();
 
-    info!("TIM7 frequency is {}", TIM7::frequency());
+    info!("TIM7 frequency is {}", frequency::<TIM6>());
 
     const FREQUENCY: Hertz = Hertz::hz(600);
-    let reload: u32 = (TIM7::frequency().0 / FREQUENCY.0) / data.len() as u32;
+    let reload: u32 = (frequency::<TIM7>().0 / FREQUENCY.0) / data.len() as u32;
 
     if reload < 10 {
         error!("Reload value {} below threshold!", reload);
     }
 
-    TIM7::enable_and_reset();
-    TIM7::regs_basic().arr().modify(|w| w.set_arr(reload as u16 - 1));
-    TIM7::regs_basic().cr2().modify(|w| w.set_mms(Mms::UPDATE));
-    TIM7::regs_basic().cr1().modify(|w| {
+    let tim = Timer::new(tim);
+    tim.regs_basic().arr().modify(|w| w.set_arr(reload as u16 - 1));
+    tim.regs_basic().cr2().modify(|w| w.set_mms(Mms::UPDATE));
+    tim.regs_basic().cr1().modify(|w| {
         w.set_opm(false);
         w.set_cen(true);
     });
@@ -125,7 +125,7 @@ async fn dac_task2(mut dac: DacCh2<'static, DAC1, DMA1_CH4>) {
 
     debug!(
         "TIM7 Frequency {}, Target Frequency {}, Reload {}, Reload as u16 {}, Samples {}",
-        TIM7::frequency(),
+        frequency::<TIM7>(),
         FREQUENCY,
         reload,
         reload as u16,
diff --git a/examples/stm32h7/src/bin/eth.rs b/examples/stm32h7/src/bin/eth.rs
index cd9a27fcd..7c7964ecd 100644
--- a/examples/stm32h7/src/bin/eth.rs
+++ b/examples/stm32h7/src/bin/eth.rs
@@ -64,19 +64,21 @@ async fn main(spawner: Spawner) -> ! {
     let mac_addr = [0x00, 0x00, 0xDE, 0xAD, 0xBE, 0xEF];
 
     static PACKETS: StaticCell<PacketQueue<4, 4>> = StaticCell::new();
+    // warning: Not all STM32H7 devices have the exact same pins here
+    // for STM32H747XIH, replace p.PB13 for PG12
     let device = Ethernet::new(
         PACKETS.init(PacketQueue::<4, 4>::new()),
         p.ETH,
         Irqs,
-        p.PA1,
-        p.PA2,
-        p.PC1,
-        p.PA7,
-        p.PC4,
-        p.PC5,
-        p.PG13,
-        p.PB13,
-        p.PG11,
+        p.PA1,  // ref_clk
+        p.PA2,  // mdio
+        p.PC1,  // eth_mdc
+        p.PA7,  // CRS_DV: Carrier Sense
+        p.PC4,  // RX_D0: Received Bit 0
+        p.PC5,  // RX_D1: Received Bit 1
+        p.PG13, // TX_D0: Transmit Bit 0
+        p.PB13, // TX_D1: Transmit Bit 1
+        p.PG11, // TX_EN: Transmit Enable
         GenericSMI::new(0),
         mac_addr,
     );
diff --git a/examples/stm32h7/src/bin/low_level_timer_api.rs b/examples/stm32h7/src/bin/low_level_timer_api.rs
index 049d9967d..a95b44b74 100644
--- a/examples/stm32h7/src/bin/low_level_timer_api.rs
+++ b/examples/stm32h7/src/bin/low_level_timer_api.rs
@@ -3,11 +3,11 @@
 
 use defmt::*;
 use embassy_executor::Spawner;
-use embassy_stm32::gpio::low_level::AFType;
-use embassy_stm32::gpio::Speed;
+use embassy_stm32::gpio::{AFType, Flex, Pull, Speed};
 use embassy_stm32::time::{khz, Hertz};
-use embassy_stm32::timer::*;
-use embassy_stm32::{into_ref, Config, Peripheral, PeripheralRef};
+use embassy_stm32::timer::low_level::{OutputCompareMode, Timer as LLTimer};
+use embassy_stm32::timer::{Channel, Channel1Pin, Channel2Pin, Channel3Pin, Channel4Pin, GeneralInstance32bit4Channel};
+use embassy_stm32::{into_ref, Config, Peripheral};
 use embassy_time::Timer;
 use {defmt_rtt as _, panic_probe as _};
 
@@ -56,11 +56,15 @@ async fn main(_spawner: Spawner) {
         Timer::after_millis(300).await;
     }
 }
-pub struct SimplePwm32<'d, T: CaptureCompare32bitInstance> {
-    inner: PeripheralRef<'d, T>,
+pub struct SimplePwm32<'d, T: GeneralInstance32bit4Channel> {
+    tim: LLTimer<'d, T>,
+    _ch1: Flex<'d>,
+    _ch2: Flex<'d>,
+    _ch3: Flex<'d>,
+    _ch4: Flex<'d>,
 }
 
-impl<'d, T: CaptureCompare32bitInstance> SimplePwm32<'d, T> {
+impl<'d, T: GeneralInstance32bit4Channel> SimplePwm32<'d, T> {
     pub fn new(
         tim: impl Peripheral<P = T> + 'd,
         ch1: impl Peripheral<P = impl Channel1Pin<T>> + 'd,
@@ -69,25 +73,33 @@ impl<'d, T: CaptureCompare32bitInstance> SimplePwm32<'d, T> {
         ch4: impl Peripheral<P = impl Channel4Pin<T>> + 'd,
         freq: Hertz,
     ) -> Self {
-        into_ref!(tim, ch1, ch2, ch3, ch4);
+        into_ref!(ch1, ch2, ch3, ch4);
 
-        T::enable_and_reset();
+        let af1 = ch1.af_num();
+        let af2 = ch2.af_num();
+        let af3 = ch3.af_num();
+        let af4 = ch4.af_num();
+        let mut ch1 = Flex::new(ch1);
+        let mut ch2 = Flex::new(ch2);
+        let mut ch3 = Flex::new(ch3);
+        let mut ch4 = Flex::new(ch4);
+        ch1.set_as_af_unchecked(af1, AFType::OutputPushPull, Pull::None, Speed::VeryHigh);
+        ch2.set_as_af_unchecked(af2, AFType::OutputPushPull, Pull::None, Speed::VeryHigh);
+        ch3.set_as_af_unchecked(af3, AFType::OutputPushPull, Pull::None, Speed::VeryHigh);
+        ch4.set_as_af_unchecked(af4, AFType::OutputPushPull, Pull::None, Speed::VeryHigh);
 
-        ch1.set_speed(Speed::VeryHigh);
-        ch1.set_as_af(ch1.af_num(), AFType::OutputPushPull);
-        ch2.set_speed(Speed::VeryHigh);
-        ch2.set_as_af(ch1.af_num(), AFType::OutputPushPull);
-        ch3.set_speed(Speed::VeryHigh);
-        ch3.set_as_af(ch1.af_num(), AFType::OutputPushPull);
-        ch4.set_speed(Speed::VeryHigh);
-        ch4.set_as_af(ch1.af_num(), AFType::OutputPushPull);
-
-        let mut this = Self { inner: tim };
+        let mut this = Self {
+            tim: LLTimer::new(tim),
+            _ch1: ch1,
+            _ch2: ch2,
+            _ch3: ch3,
+            _ch4: ch4,
+        };
 
         this.set_frequency(freq);
-        this.inner.start();
+        this.tim.start();
 
-        let r = T::regs_gp32();
+        let r = this.tim.regs_gp32();
         r.ccmr_output(0)
             .modify(|w| w.set_ocm(0, OutputCompareMode::PwmMode1.into()));
         r.ccmr_output(0)
@@ -101,23 +113,26 @@ impl<'d, T: CaptureCompare32bitInstance> SimplePwm32<'d, T> {
     }
 
     pub fn enable(&mut self, channel: Channel) {
-        T::regs_gp32().ccer().modify(|w| w.set_cce(channel.index(), true));
+        self.tim.regs_gp32().ccer().modify(|w| w.set_cce(channel.index(), true));
     }
 
     pub fn disable(&mut self, channel: Channel) {
-        T::regs_gp32().ccer().modify(|w| w.set_cce(channel.index(), false));
+        self.tim
+            .regs_gp32()
+            .ccer()
+            .modify(|w| w.set_cce(channel.index(), false));
     }
 
     pub fn set_frequency(&mut self, freq: Hertz) {
-        <T as embassy_stm32::timer::low_level::GeneralPurpose32bitInstance>::set_frequency(&mut self.inner, freq);
+        self.tim.set_frequency(freq);
     }
 
     pub fn get_max_duty(&self) -> u32 {
-        T::regs_gp32().arr().read()
+        self.tim.regs_gp32().arr().read()
     }
 
     pub fn set_duty(&mut self, channel: Channel, duty: u32) {
         defmt::assert!(duty < self.get_max_duty());
-        T::regs_gp32().ccr(channel.index()).write_value(duty)
+        self.tim.regs_gp32().ccr(channel.index()).write_value(duty)
     }
 }
diff --git a/examples/stm32h7/src/bin/usb_serial.rs b/examples/stm32h7/src/bin/usb_serial.rs
index d81efb541..576506ad3 100644
--- a/examples/stm32h7/src/bin/usb_serial.rs
+++ b/examples/stm32h7/src/bin/usb_serial.rs
@@ -3,8 +3,8 @@
 
 use defmt::{panic, *};
 use embassy_executor::Spawner;
-use embassy_stm32::usb_otg::{Driver, Instance};
-use embassy_stm32::{bind_interrupts, peripherals, usb_otg, Config};
+use embassy_stm32::usb::{Driver, Instance};
+use embassy_stm32::{bind_interrupts, peripherals, usb, Config};
 use embassy_usb::class::cdc_acm::{CdcAcmClass, State};
 use embassy_usb::driver::EndpointError;
 use embassy_usb::Builder;
@@ -12,7 +12,7 @@ use futures::future::join;
 use {defmt_rtt as _, panic_probe as _};
 
 bind_interrupts!(struct Irqs {
-    OTG_FS => usb_otg::InterruptHandler<peripherals::USB_OTG_FS>;
+    OTG_FS => usb::InterruptHandler<peripherals::USB_OTG_FS>;
 });
 
 #[embassy_executor::main]
@@ -40,12 +40,13 @@ async fn main(_spawner: Spawner) {
         config.rcc.apb3_pre = APBPrescaler::DIV2; // 100 Mhz
         config.rcc.apb4_pre = APBPrescaler::DIV2; // 100 Mhz
         config.rcc.voltage_scale = VoltageScale::Scale1;
+        config.rcc.mux.usbsel = mux::Usbsel::HSI48;
     }
     let p = embassy_stm32::init(config);
 
     // Create the driver, from the HAL.
     let mut ep_out_buffer = [0u8; 256];
-    let mut config = embassy_stm32::usb_otg::Config::default();
+    let mut config = embassy_stm32::usb::Config::default();
     config.vbus_detection = true;
     let driver = Driver::new_fs(p.USB_OTG_FS, Irqs, p.PA12, p.PA11, &mut ep_out_buffer, config);
 
@@ -64,7 +65,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -74,7 +74,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32l1/src/bin/usb_serial.rs b/examples/stm32l1/src/bin/usb_serial.rs
index f738ea358..653bbd6d2 100644
--- a/examples/stm32l1/src/bin/usb_serial.rs
+++ b/examples/stm32l1/src/bin/usb_serial.rs
@@ -46,7 +46,6 @@ async fn main(_spawner: Spawner) {
     config.device_protocol = 0x01;
     config.composite_with_iads = true;
 
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -56,7 +55,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32l4/src/bin/dac_dma.rs b/examples/stm32l4/src/bin/dac_dma.rs
index f227812cd..d01b016c0 100644
--- a/examples/stm32l4/src/bin/dac_dma.rs
+++ b/examples/stm32l4/src/bin/dac_dma.rs
@@ -6,9 +6,9 @@ use embassy_executor::Spawner;
 use embassy_stm32::dac::{DacCh1, DacCh2, ValueArray};
 use embassy_stm32::pac::timer::vals::Mms;
 use embassy_stm32::peripherals::{DAC1, DMA1_CH3, DMA1_CH4, TIM6, TIM7};
-use embassy_stm32::rcc::low_level::RccPeripheral;
+use embassy_stm32::rcc::frequency;
 use embassy_stm32::time::Hertz;
-use embassy_stm32::timer::low_level::BasicInstance;
+use embassy_stm32::timer::low_level::Timer;
 use micromath::F32Ext;
 use {defmt_rtt as _, panic_probe as _};
 
@@ -22,19 +22,19 @@ async fn main(spawner: Spawner) {
     // Obtain two independent channels (p.DAC1 can only be consumed once, though!)
     let (dac_ch1, dac_ch2) = embassy_stm32::dac::Dac::new(p.DAC1, p.DMA1_CH3, p.DMA1_CH4, p.PA4, p.PA5).split();
 
-    spawner.spawn(dac_task1(dac_ch1)).ok();
-    spawner.spawn(dac_task2(dac_ch2)).ok();
+    spawner.spawn(dac_task1(p.TIM6, dac_ch1)).ok();
+    spawner.spawn(dac_task2(p.TIM7, dac_ch2)).ok();
 }
 
 #[embassy_executor::task]
-async fn dac_task1(mut dac: DacCh1<'static, DAC1, DMA1_CH3>) {
+async fn dac_task1(tim: TIM6, mut dac: DacCh1<'static, DAC1, DMA1_CH3>) {
     let data: &[u8; 256] = &calculate_array::<256>();
 
-    info!("TIM6 frequency is {}", TIM6::frequency());
+    info!("TIM6 frequency is {}", frequency::<TIM6>());
     const FREQUENCY: Hertz = Hertz::hz(200);
 
     // Compute the reload value such that we obtain the FREQUENCY for the sine
-    let reload: u32 = (TIM6::frequency().0 / FREQUENCY.0) / data.len() as u32;
+    let reload: u32 = (frequency::<TIM6>().0 / FREQUENCY.0) / data.len() as u32;
 
     // Depends on your clock and on the specific chip used, you may need higher or lower values here
     if reload < 10 {
@@ -45,17 +45,17 @@ async fn dac_task1(mut dac: DacCh1<'static, DAC1, DMA1_CH3>) {
     dac.set_triggering(true);
     dac.enable();
 
-    TIM6::enable_and_reset();
-    TIM6::regs_basic().arr().modify(|w| w.set_arr(reload as u16 - 1));
-    TIM6::regs_basic().cr2().modify(|w| w.set_mms(Mms::UPDATE));
-    TIM6::regs_basic().cr1().modify(|w| {
+    let tim = Timer::new(tim);
+    tim.regs_basic().arr().modify(|w| w.set_arr(reload as u16 - 1));
+    tim.regs_basic().cr2().modify(|w| w.set_mms(Mms::UPDATE));
+    tim.regs_basic().cr1().modify(|w| {
         w.set_opm(false);
         w.set_cen(true);
     });
 
     debug!(
         "TIM6 Frequency {}, Target Frequency {}, Reload {}, Reload as u16 {}, Samples {}",
-        TIM6::frequency(),
+        frequency::<TIM6>(),
         FREQUENCY,
         reload,
         reload as u16,
@@ -70,22 +70,22 @@ async fn dac_task1(mut dac: DacCh1<'static, DAC1, DMA1_CH3>) {
 }
 
 #[embassy_executor::task]
-async fn dac_task2(mut dac: DacCh2<'static, DAC1, DMA1_CH4>) {
+async fn dac_task2(tim: TIM7, mut dac: DacCh2<'static, DAC1, DMA1_CH4>) {
     let data: &[u8; 256] = &calculate_array::<256>();
 
-    info!("TIM7 frequency is {}", TIM7::frequency());
+    info!("TIM7 frequency is {}", frequency::<TIM7>());
 
     const FREQUENCY: Hertz = Hertz::hz(600);
-    let reload: u32 = (TIM7::frequency().0 / FREQUENCY.0) / data.len() as u32;
+    let reload: u32 = (frequency::<TIM7>().0 / FREQUENCY.0) / data.len() as u32;
 
     if reload < 10 {
         error!("Reload value {} below threshold!", reload);
     }
 
-    TIM7::enable_and_reset();
-    TIM7::regs_basic().arr().modify(|w| w.set_arr(reload as u16 - 1));
-    TIM7::regs_basic().cr2().modify(|w| w.set_mms(Mms::UPDATE));
-    TIM7::regs_basic().cr1().modify(|w| {
+    let tim = Timer::new(tim);
+    tim.regs_basic().arr().modify(|w| w.set_arr(reload as u16 - 1));
+    tim.regs_basic().cr2().modify(|w| w.set_mms(Mms::UPDATE));
+    tim.regs_basic().cr1().modify(|w| {
         w.set_opm(false);
         w.set_cen(true);
     });
@@ -96,7 +96,7 @@ async fn dac_task2(mut dac: DacCh2<'static, DAC1, DMA1_CH4>) {
 
     debug!(
         "TIM7 Frequency {}, Target Frequency {}, Reload {}, Reload as u16 {}, Samples {}",
-        TIM7::frequency(),
+        frequency::<TIM7>(),
         FREQUENCY,
         reload,
         reload as u16,
diff --git a/examples/stm32l4/src/bin/spe_adin1110_http_server.rs b/examples/stm32l4/src/bin/spe_adin1110_http_server.rs
index 32bfab6eb..77aa929ab 100644
--- a/examples/stm32l4/src/bin/spe_adin1110_http_server.rs
+++ b/examples/stm32l4/src/bin/spe_adin1110_http_server.rs
@@ -42,7 +42,7 @@ bind_interrupts!(struct Irqs {
     RNG => rng::InterruptHandler<peripherals::RNG>;
 });
 
-use embassy_net_adin1110::{self, Device, Runner, ADIN1110};
+use embassy_net_adin1110::{Device, Runner, ADIN1110};
 use embedded_hal_bus::spi::ExclusiveDevice;
 use hal::gpio::Pull;
 use hal::i2c::Config as I2C_Config;
@@ -93,12 +93,6 @@ async fn main(spawner: Spawner) {
 
     let dp = embassy_stm32::init(config);
 
-    // RM0432rev9, 5.1.2: Independent I/O supply rail
-    // After reset, the I/Os supplied by VDDIO2 are logically and electrically isolated and
-    // therefore are not available. The isolation must be removed before using any I/O from
-    // PG[15:2], by setting the IOSV bit in the PWR_CR2 register, once the VDDIO2 supply is present
-    pac::PWR.cr2().modify(|w| w.set_iosv(true));
-
     let reset_status = pac::RCC.bdcr().read().0;
     defmt::println!("bdcr before: 0x{:X}", reset_status);
 
diff --git a/examples/stm32l4/src/bin/usb_serial.rs b/examples/stm32l4/src/bin/usb_serial.rs
index 9247e56a1..198504b59 100644
--- a/examples/stm32l4/src/bin/usb_serial.rs
+++ b/examples/stm32l4/src/bin/usb_serial.rs
@@ -4,9 +4,8 @@
 use defmt::{panic, *};
 use defmt_rtt as _; // global logger
 use embassy_executor::Spawner;
-use embassy_stm32::rcc::*;
-use embassy_stm32::usb_otg::{Driver, Instance};
-use embassy_stm32::{bind_interrupts, peripherals, usb_otg, Config};
+use embassy_stm32::usb::{Driver, Instance};
+use embassy_stm32::{bind_interrupts, peripherals, usb, Config};
 use embassy_usb::class::cdc_acm::{CdcAcmClass, State};
 use embassy_usb::driver::EndpointError;
 use embassy_usb::Builder;
@@ -14,7 +13,7 @@ use futures::future::join;
 use panic_probe as _;
 
 bind_interrupts!(struct Irqs {
-    OTG_FS => usb_otg::InterruptHandler<peripherals::USB_OTG_FS>;
+    OTG_FS => usb::InterruptHandler<peripherals::USB_OTG_FS>;
 });
 
 #[embassy_executor::main]
@@ -22,23 +21,26 @@ async fn main(_spawner: Spawner) {
     info!("Hello World!");
 
     let mut config = Config::default();
-    config.rcc.hsi48 = Some(Hsi48Config { sync_from_usb: true }); // needed for USB
-    config.rcc.sys = Sysclk::PLL1_R;
-    config.rcc.hsi = true;
-    config.rcc.pll = Some(Pll {
-        source: PllSource::HSI,
-        prediv: PllPreDiv::DIV1,
-        mul: PllMul::MUL10,
-        divp: None,
-        divq: None,
-        divr: Some(PllRDiv::DIV2), // sysclk 80Mhz (16 / 1 * 10 / 2)
-    });
-
+    {
+        use embassy_stm32::rcc::*;
+        config.rcc.hsi48 = Some(Hsi48Config { sync_from_usb: true }); // needed for USB
+        config.rcc.sys = Sysclk::PLL1_R;
+        config.rcc.hsi = true;
+        config.rcc.pll = Some(Pll {
+            source: PllSource::HSI,
+            prediv: PllPreDiv::DIV1,
+            mul: PllMul::MUL10,
+            divp: None,
+            divq: None,
+            divr: Some(PllRDiv::DIV2), // sysclk 80Mhz (16 / 1 * 10 / 2)
+        });
+        config.rcc.mux.clk48sel = mux::Clk48sel::HSI48;
+    }
     let p = embassy_stm32::init(config);
 
     // Create the driver, from the HAL.
     let mut ep_out_buffer = [0u8; 256];
-    let mut config = embassy_stm32::usb_otg::Config::default();
+    let mut config = embassy_stm32::usb::Config::default();
     config.vbus_detection = true;
     let driver = Driver::new_fs(p.USB_OTG_FS, Irqs, p.PA12, p.PA11, &mut ep_out_buffer, config);
 
@@ -58,7 +60,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -68,7 +69,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32l5/src/bin/usb_ethernet.rs b/examples/stm32l5/src/bin/usb_ethernet.rs
index f6d8b16d0..7f73fd677 100644
--- a/examples/stm32l5/src/bin/usb_ethernet.rs
+++ b/examples/stm32l5/src/bin/usb_ethernet.rs
@@ -5,7 +5,6 @@ use defmt::*;
 use embassy_executor::Spawner;
 use embassy_net::tcp::TcpSocket;
 use embassy_net::{Stack, StackResources};
-use embassy_stm32::rcc::*;
 use embassy_stm32::rng::Rng;
 use embassy_stm32::usb::Driver;
 use embassy_stm32::{bind_interrupts, peripherals, rng, usb, Config};
@@ -44,17 +43,22 @@ async fn net_task(stack: &'static Stack<Device<'static, MTU>>) -> ! {
 #[embassy_executor::main]
 async fn main(spawner: Spawner) {
     let mut config = Config::default();
-    config.rcc.hsi = true;
-    config.rcc.sys = Sysclk::PLL1_R;
-    config.rcc.pll = Some(Pll {
-        // 80Mhz clock (16 / 1 * 10 / 2)
-        source: PllSource::HSI,
-        prediv: PllPreDiv::DIV1,
-        mul: PllMul::MUL10,
-        divp: None,
-        divq: None,
-        divr: Some(PllRDiv::DIV2),
-    });
+    {
+        use embassy_stm32::rcc::*;
+        config.rcc.hsi = true;
+        config.rcc.sys = Sysclk::PLL1_R;
+        config.rcc.pll = Some(Pll {
+            // 80Mhz clock (16 / 1 * 10 / 2)
+            source: PllSource::HSI,
+            prediv: PllPreDiv::DIV1,
+            mul: PllMul::MUL10,
+            divp: None,
+            divq: None,
+            divr: Some(PllRDiv::DIV2),
+        });
+        config.rcc.hsi48 = Some(Hsi48Config { sync_from_usb: true }); // needed for USB
+        config.rcc.mux.clk48sel = mux::Clk48sel::HSI48;
+    }
     let p = embassy_stm32::init(config);
 
     // Create the driver, from the HAL.
@@ -75,14 +79,12 @@ async fn main(spawner: Spawner) {
     config.device_protocol = 0x01;
 
     // Create embassy-usb DeviceBuilder using the driver and config.
-    static DEVICE_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static CONFIG_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static BOS_DESC: StaticCell<[u8; 256]> = StaticCell::new();
     static CONTROL_BUF: StaticCell<[u8; 128]> = StaticCell::new();
     let mut builder = Builder::new(
         driver,
         config,
-        &mut DEVICE_DESC.init([0; 256])[..],
         &mut CONFIG_DESC.init([0; 256])[..],
         &mut BOS_DESC.init([0; 256])[..],
         &mut [], // no msos descriptors
diff --git a/examples/stm32l5/src/bin/usb_hid_mouse.rs b/examples/stm32l5/src/bin/usb_hid_mouse.rs
index c51ed96e0..9d30205bb 100644
--- a/examples/stm32l5/src/bin/usb_hid_mouse.rs
+++ b/examples/stm32l5/src/bin/usb_hid_mouse.rs
@@ -4,7 +4,6 @@
 use defmt::*;
 use embassy_executor::Spawner;
 use embassy_futures::join::join;
-use embassy_stm32::rcc::*;
 use embassy_stm32::usb::Driver;
 use embassy_stm32::{bind_interrupts, peripherals, usb, Config};
 use embassy_time::Timer;
@@ -21,17 +20,22 @@ bind_interrupts!(struct Irqs {
 #[embassy_executor::main]
 async fn main(_spawner: Spawner) {
     let mut config = Config::default();
-    config.rcc.hsi = true;
-    config.rcc.sys = Sysclk::PLL1_R;
-    config.rcc.pll = Some(Pll {
-        // 80Mhz clock (16 / 1 * 10 / 2)
-        source: PllSource::HSI,
-        prediv: PllPreDiv::DIV1,
-        mul: PllMul::MUL10,
-        divp: None,
-        divq: None,
-        divr: Some(PllRDiv::DIV2),
-    });
+    {
+        use embassy_stm32::rcc::*;
+        config.rcc.hsi = true;
+        config.rcc.sys = Sysclk::PLL1_R;
+        config.rcc.pll = Some(Pll {
+            // 80Mhz clock (16 / 1 * 10 / 2)
+            source: PllSource::HSI,
+            prediv: PllPreDiv::DIV1,
+            mul: PllMul::MUL10,
+            divp: None,
+            divq: None,
+            divr: Some(PllRDiv::DIV2),
+        });
+        config.rcc.hsi48 = Some(Hsi48Config { sync_from_usb: true }); // needed for USB
+        config.rcc.mux.clk48sel = mux::Clk48sel::HSI48;
+    }
     let p = embassy_stm32::init(config);
 
     // Create the driver, from the HAL.
@@ -47,7 +51,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -58,7 +61,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32l5/src/bin/usb_serial.rs b/examples/stm32l5/src/bin/usb_serial.rs
index 87987f2ce..a64bda31b 100644
--- a/examples/stm32l5/src/bin/usb_serial.rs
+++ b/examples/stm32l5/src/bin/usb_serial.rs
@@ -4,7 +4,6 @@
 use defmt::{panic, *};
 use embassy_executor::Spawner;
 use embassy_futures::join::join;
-use embassy_stm32::rcc::*;
 use embassy_stm32::usb::{Driver, Instance};
 use embassy_stm32::{bind_interrupts, peripherals, usb, Config};
 use embassy_usb::class::cdc_acm::{CdcAcmClass, State};
@@ -19,17 +18,22 @@ bind_interrupts!(struct Irqs {
 #[embassy_executor::main]
 async fn main(_spawner: Spawner) {
     let mut config = Config::default();
-    config.rcc.hsi = true;
-    config.rcc.sys = Sysclk::PLL1_R;
-    config.rcc.pll = Some(Pll {
-        // 80Mhz clock (16 / 1 * 10 / 2)
-        source: PllSource::HSI,
-        prediv: PllPreDiv::DIV1,
-        mul: PllMul::MUL10,
-        divp: None,
-        divq: None,
-        divr: Some(PllRDiv::DIV2),
-    });
+    {
+        use embassy_stm32::rcc::*;
+        config.rcc.hsi = true;
+        config.rcc.sys = Sysclk::PLL1_R;
+        config.rcc.pll = Some(Pll {
+            // 80Mhz clock (16 / 1 * 10 / 2)
+            source: PllSource::HSI,
+            prediv: PllPreDiv::DIV1,
+            mul: PllMul::MUL10,
+            divp: None,
+            divq: None,
+            divr: Some(PllRDiv::DIV2),
+        });
+        config.rcc.hsi48 = Some(Hsi48Config { sync_from_usb: true }); // needed for USB
+        config.rcc.mux.clk48sel = mux::Clk48sel::HSI48;
+    }
     let p = embassy_stm32::init(config);
 
     info!("Hello World!");
@@ -43,7 +47,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 7];
@@ -53,7 +56,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/examples/stm32u5/src/bin/usb_serial.rs b/examples/stm32u5/src/bin/usb_serial.rs
index 61851e5a2..6a313efb0 100644
--- a/examples/stm32u5/src/bin/usb_serial.rs
+++ b/examples/stm32u5/src/bin/usb_serial.rs
@@ -4,8 +4,8 @@
 use defmt::{panic, *};
 use defmt_rtt as _; // global logger
 use embassy_executor::Spawner;
-use embassy_stm32::usb_otg::{Driver, Instance};
-use embassy_stm32::{bind_interrupts, peripherals, usb_otg, Config};
+use embassy_stm32::usb::{Driver, Instance};
+use embassy_stm32::{bind_interrupts, peripherals, usb, Config};
 use embassy_usb::class::cdc_acm::{CdcAcmClass, State};
 use embassy_usb::driver::EndpointError;
 use embassy_usb::Builder;
@@ -13,7 +13,7 @@ use futures::future::join;
 use panic_probe as _;
 
 bind_interrupts!(struct Irqs {
-    OTG_FS => usb_otg::InterruptHandler<peripherals::USB_OTG_FS>;
+    OTG_FS => usb::InterruptHandler<peripherals::USB_OTG_FS>;
 });
 
 #[embassy_executor::main]
@@ -35,13 +35,14 @@ async fn main(_spawner: Spawner) {
         config.rcc.sys = Sysclk::PLL1_R;
         config.rcc.voltage_range = VoltageScale::RANGE1;
         config.rcc.hsi48 = Some(Hsi48Config { sync_from_usb: true }); // needed for USB
+        config.rcc.mux.iclksel = mux::Iclksel::HSI48; // USB uses ICLK
     }
 
     let p = embassy_stm32::init(config);
 
     // Create the driver, from the HAL.
     let mut ep_out_buffer = [0u8; 256];
-    let mut config = embassy_stm32::usb_otg::Config::default();
+    let mut config = embassy_stm32::usb::Config::default();
     config.vbus_detection = false;
     let driver = Driver::new_fs(p.USB_OTG_FS, Irqs, p.PA12, p.PA11, &mut ep_out_buffer, config);
 
@@ -60,7 +61,6 @@ async fn main(_spawner: Spawner) {
 
     // Create embassy-usb DeviceBuilder using the driver and config.
     // It needs some buffers for building the descriptors.
-    let mut device_descriptor = [0; 256];
     let mut config_descriptor = [0; 256];
     let mut bos_descriptor = [0; 256];
     let mut control_buf = [0; 64];
@@ -70,7 +70,6 @@ async fn main(_spawner: Spawner) {
     let mut builder = Builder::new(
         driver,
         config,
-        &mut device_descriptor,
         &mut config_descriptor,
         &mut bos_descriptor,
         &mut [], // no msos descriptors
diff --git a/rust-toolchain-nightly.toml b/rust-toolchain-nightly.toml
index b8a7db353..98696fd2b 100644
--- a/rust-toolchain-nightly.toml
+++ b/rust-toolchain-nightly.toml
@@ -1,5 +1,5 @@
 [toolchain]
-channel = "nightly-2023-12-20"
+channel = "nightly-2024-03-20"
 components = [ "rust-src", "rustfmt", "llvm-tools", "miri" ]
 targets = [
     "thumbv7em-none-eabi",
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index a6fe52ee2..2f5d17069 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,5 +1,5 @@
 [toolchain]
-channel = "1.75"
+channel = "1.77"
 components = [ "rust-src", "rustfmt", "llvm-tools" ]
 targets = [
     "thumbv7em-none-eabi",
diff --git a/tests/rp/src/bin/gpio_multicore.rs b/tests/rp/src/bin/gpio_multicore.rs
index 8aed9b80c..e9c6f3122 100644
--- a/tests/rp/src/bin/gpio_multicore.rs
+++ b/tests/rp/src/bin/gpio_multicore.rs
@@ -21,10 +21,14 @@ static CHANNEL1: Channel<CriticalSectionRawMutex, (), 1> = Channel::new();
 #[cortex_m_rt::entry]
 fn main() -> ! {
     let p = embassy_rp::init(Default::default());
-    spawn_core1(p.CORE1, unsafe { &mut CORE1_STACK }, move || {
-        let executor1 = EXECUTOR1.init(Executor::new());
-        executor1.run(|spawner| unwrap!(spawner.spawn(core1_task(p.PIN_1))));
-    });
+    spawn_core1(
+        p.CORE1,
+        unsafe { &mut *core::ptr::addr_of_mut!(CORE1_STACK) },
+        move || {
+            let executor1 = EXECUTOR1.init(Executor::new());
+            executor1.run(|spawner| unwrap!(spawner.spawn(core1_task(p.PIN_1))));
+        },
+    );
     let executor0 = EXECUTOR0.init(Executor::new());
     executor0.run(|spawner| unwrap!(spawner.spawn(core0_task(p.PIN_0))));
 }
diff --git a/tests/rp/src/bin/i2c.rs b/tests/rp/src/bin/i2c.rs
index 153b37999..9615007bd 100644
--- a/tests/rp/src/bin/i2c.rs
+++ b/tests/rp/src/bin/i2c.rs
@@ -210,10 +210,14 @@ async fn controller_task(con: &mut i2c::I2c<'static, I2C0, i2c::Async>) {
         config.addr = DEV_ADDR as u16;
         let device = i2c_slave::I2cSlave::new(p.I2C1, d_sda, d_scl, Irqs, config);
 
-        spawn_core1(p.CORE1, unsafe { &mut CORE1_STACK }, move || {
-            let executor1 = EXECUTOR1.init(Executor::new());
-            executor1.run(|spawner| unwrap!(spawner.spawn(device_task(device))));
-        });
+        spawn_core1(
+            p.CORE1,
+            unsafe { &mut *core::ptr::addr_of_mut!(CORE1_STACK) },
+            move || {
+                let executor1 = EXECUTOR1.init(Executor::new());
+                executor1.run(|spawner| unwrap!(spawner.spawn(device_task(device))));
+            },
+        );
 
         let c_sda = p.PIN_21;
         let c_scl = p.PIN_20;
diff --git a/tests/rp/src/bin/multicore.rs b/tests/rp/src/bin/multicore.rs
index 60d9f85ec..783ea0f27 100644
--- a/tests/rp/src/bin/multicore.rs
+++ b/tests/rp/src/bin/multicore.rs
@@ -19,10 +19,14 @@ static CHANNEL1: Channel<CriticalSectionRawMutex, bool, 1> = Channel::new();
 #[cortex_m_rt::entry]
 fn main() -> ! {
     let p = embassy_rp::init(Default::default());
-    spawn_core1(p.CORE1, unsafe { &mut CORE1_STACK }, move || {
-        let executor1 = EXECUTOR1.init(Executor::new());
-        executor1.run(|spawner| unwrap!(spawner.spawn(core1_task())));
-    });
+    spawn_core1(
+        p.CORE1,
+        unsafe { &mut *core::ptr::addr_of_mut!(CORE1_STACK) },
+        move || {
+            let executor1 = EXECUTOR1.init(Executor::new());
+            executor1.run(|spawner| unwrap!(spawner.spawn(core1_task())));
+        },
+    );
     let executor0 = EXECUTOR0.init(Executor::new());
     executor0.run(|spawner| unwrap!(spawner.spawn(core0_task())));
 }
diff --git a/tests/rp/src/bin/pwm.rs b/tests/rp/src/bin/pwm.rs
index e71d9e610..4b02e5bab 100644
--- a/tests/rp/src/bin/pwm.rs
+++ b/tests/rp/src/bin/pwm.rs
@@ -28,7 +28,7 @@ async fn main(_spawner: Spawner) {
 
     // Test free-running clock
     {
-        let pwm = Pwm::new_free(&mut p.PWM_CH3, cfg.clone());
+        let pwm = Pwm::new_free(&mut p.PWM_SLICE3, cfg.clone());
         cortex_m::asm::delay(125);
         let ctr = pwm.counter();
         assert!(ctr > 0);
@@ -46,7 +46,7 @@ async fn main(_spawner: Spawner) {
         // Test output from A
         {
             let pin1 = Input::new(&mut p9, Pull::None);
-            let _pwm = Pwm::new_output_a(&mut p.PWM_CH3, &mut p6, cfg.clone());
+            let _pwm = Pwm::new_output_a(&mut p.PWM_SLICE3, &mut p6, cfg.clone());
             Timer::after_millis(1).await;
             assert_eq!(pin1.is_low(), invert_a);
             Timer::after_millis(5).await;
@@ -60,7 +60,7 @@ async fn main(_spawner: Spawner) {
         // Test output from B
         {
             let pin2 = Input::new(&mut p11, Pull::None);
-            let _pwm = Pwm::new_output_b(&mut p.PWM_CH3, &mut p7, cfg.clone());
+            let _pwm = Pwm::new_output_b(&mut p.PWM_SLICE3, &mut p7, cfg.clone());
             Timer::after_millis(1).await;
             assert_ne!(pin2.is_low(), invert_a);
             Timer::after_millis(5).await;
@@ -75,7 +75,7 @@ async fn main(_spawner: Spawner) {
         {
             let pin1 = Input::new(&mut p9, Pull::None);
             let pin2 = Input::new(&mut p11, Pull::None);
-            let _pwm = Pwm::new_output_ab(&mut p.PWM_CH3, &mut p6, &mut p7, cfg.clone());
+            let _pwm = Pwm::new_output_ab(&mut p.PWM_SLICE3, &mut p6, &mut p7, cfg.clone());
             Timer::after_millis(1).await;
             assert_eq!(pin1.is_low(), invert_a);
             assert_ne!(pin2.is_low(), invert_a);
@@ -94,7 +94,7 @@ async fn main(_spawner: Spawner) {
     // Test level-gated
     {
         let mut pin2 = Output::new(&mut p11, Level::Low);
-        let pwm = Pwm::new_input(&mut p.PWM_CH3, &mut p7, InputMode::Level, cfg.clone());
+        let pwm = Pwm::new_input(&mut p.PWM_SLICE3, &mut p7, InputMode::Level, cfg.clone());
         assert_eq!(pwm.counter(), 0);
         Timer::after_millis(5).await;
         assert_eq!(pwm.counter(), 0);
@@ -110,7 +110,7 @@ async fn main(_spawner: Spawner) {
     // Test rising-gated
     {
         let mut pin2 = Output::new(&mut p11, Level::Low);
-        let pwm = Pwm::new_input(&mut p.PWM_CH3, &mut p7, InputMode::RisingEdge, cfg.clone());
+        let pwm = Pwm::new_input(&mut p.PWM_SLICE3, &mut p7, InputMode::RisingEdge, cfg.clone());
         assert_eq!(pwm.counter(), 0);
         Timer::after_millis(5).await;
         assert_eq!(pwm.counter(), 0);
@@ -125,7 +125,7 @@ async fn main(_spawner: Spawner) {
     // Test falling-gated
     {
         let mut pin2 = Output::new(&mut p11, Level::High);
-        let pwm = Pwm::new_input(&mut p.PWM_CH3, &mut p7, InputMode::FallingEdge, cfg.clone());
+        let pwm = Pwm::new_input(&mut p.PWM_SLICE3, &mut p7, InputMode::FallingEdge, cfg.clone());
         assert_eq!(pwm.counter(), 0);
         Timer::after_millis(5).await;
         assert_eq!(pwm.counter(), 0);
diff --git a/tests/stm32/Cargo.toml b/tests/stm32/Cargo.toml
index bfe003a11..e42470004 100644
--- a/tests/stm32/Cargo.toml
+++ b/tests/stm32/Cargo.toml
@@ -13,7 +13,7 @@ stm32f303ze = ["embassy-stm32/stm32f303ze", "chrono", "not-gpdma"]
 stm32f429zi = ["embassy-stm32/stm32f429zi", "chrono", "eth", "stop", "can", "not-gpdma", "dac", "rng"]
 stm32f446re = ["embassy-stm32/stm32f446re", "chrono", "stop", "can", "not-gpdma", "dac", "sdmmc"]
 stm32f767zi = ["embassy-stm32/stm32f767zi", "chrono", "not-gpdma", "eth", "rng"]
-stm32g071rb = ["embassy-stm32/stm32g071rb", "cm0", "not-gpdma", "dac"]
+stm32g071rb = ["embassy-stm32/stm32g071rb", "cm0", "not-gpdma", "dac", "ucpd"]
 stm32g491re = ["embassy-stm32/stm32g491re", "chrono", "stop", "not-gpdma", "rng", "fdcan"]
 stm32h563zi = ["embassy-stm32/stm32h563zi", "chrono", "eth", "rng", "hash"]
 stm32h753zi = ["embassy-stm32/stm32h753zi", "chrono", "not-gpdma", "eth", "rng", "fdcan", "hash", "cryp"]
@@ -47,6 +47,7 @@ mac = ["dep:embassy-stm32-wpan", "embassy-stm32-wpan/mac"]
 embassy-stm32-wpan = []
 not-gpdma = []
 dac = []
+ucpd = []
 
 cm0 = ["portable-atomic/unsafe-assume-single-core"]
 
@@ -160,6 +161,11 @@ name = "timer"
 path = "src/bin/timer.rs"
 required-features = []
 
+[[bin]]
+name = "ucpd"
+path = "src/bin/ucpd.rs"
+required-features = [ "ucpd",]
+
 [[bin]]
 name = "usart"
 path = "src/bin/usart.rs"
diff --git a/tests/stm32/src/bin/can.rs b/tests/stm32/src/bin/can.rs
index f4effa244..551764458 100644
--- a/tests/stm32/src/bin/can.rs
+++ b/tests/stm32/src/bin/can.rs
@@ -6,17 +6,20 @@
 #[path = "../common.rs"]
 mod common;
 use common::*;
-use defmt::assert;
 use embassy_executor::Spawner;
 use embassy_stm32::bind_interrupts;
-use embassy_stm32::can::bxcan::filter::Mask32;
-use embassy_stm32::can::bxcan::{Fifo, Frame, StandardId};
-use embassy_stm32::can::{Can, Rx0InterruptHandler, Rx1InterruptHandler, SceInterruptHandler, TxInterruptHandler};
+use embassy_stm32::can::filter::Mask32;
+use embassy_stm32::can::{
+    Can, Fifo, Rx0InterruptHandler, Rx1InterruptHandler, SceInterruptHandler, TxInterruptHandler,
+};
 use embassy_stm32::gpio::{Input, Pull};
 use embassy_stm32::peripherals::CAN1;
-use embassy_time::{Duration, Instant};
+use embassy_time::Duration;
 use {defmt_rtt as _, panic_probe as _};
 
+mod can_common;
+use can_common::*;
+
 bind_interrupts!(struct Irqs {
     CAN1_RX0 => Rx0InterruptHandler<CAN1>;
     CAN1_RX1 => Rx1InterruptHandler<CAN1>;
@@ -29,6 +32,11 @@ async fn main(_spawner: Spawner) {
     let p = embassy_stm32::init(config());
     info!("Hello World!");
 
+    let options = TestOptions {
+        max_latency: Duration::from_micros(1200),
+        max_buffered: 2,
+    };
+
     let can = peri!(p, CAN);
     let tx = peri!(p, CAN_TX);
     let mut rx = peri!(p, CAN_RX);
@@ -44,54 +52,25 @@ async fn main(_spawner: Spawner) {
 
     info!("Configuring can...");
 
-    can.as_mut()
-        .modify_filters()
-        .enable_bank(0, Fifo::Fifo0, Mask32::accept_all());
+    can.modify_filters().enable_bank(0, Fifo::Fifo0, Mask32::accept_all());
 
-    can.set_bitrate(1_000_000);
-    can.as_mut()
-        .modify_config()
+    can.modify_config()
         .set_loopback(true) // Receive own frames
         .set_silent(true)
         // .set_bit_timing(0x001c0003)
-        .enable();
+        .set_bitrate(1_000_000);
+
+    can.enable().await;
 
     info!("Can configured");
 
-    let mut i: u8 = 0;
-    loop {
-        let tx_frame = Frame::new_data(unwrap!(StandardId::new(i as _)), [i]);
+    run_can_tests(&mut can, &options).await;
 
-        info!("Transmitting frame...");
-        let tx_ts = Instant::now();
-        can.write(&tx_frame).await;
-
-        let envelope = can.read().await.unwrap();
-        info!("Frame received!");
-
-        info!("loopback time {}", envelope.ts);
-        info!("loopback frame {=u8}", envelope.frame.data().unwrap()[0]);
-
-        let latency = envelope.ts.saturating_duration_since(tx_ts);
-        info!("loopback latency {} us", latency.as_micros());
-
-        // Theoretical minimum latency is 55us, actual is usually ~80us
-        const MIN_LATENCY: Duration = Duration::from_micros(50);
-        const MAX_LATENCY: Duration = Duration::from_micros(150);
-        assert!(
-            MIN_LATENCY <= latency && latency <= MAX_LATENCY,
-            "{} <= {} <= {}",
-            MIN_LATENCY,
-            latency,
-            MAX_LATENCY
-        );
-
-        i += 1;
-        if i > 10 {
-            break;
-        }
-    }
+    // Test again with a split
+    let (mut tx, mut rx) = can.split();
+    run_split_can_tests(&mut tx, &mut rx, &options).await;
 
     info!("Test OK");
+
     cortex_m::asm::bkpt();
 }
diff --git a/tests/stm32/src/bin/can_common.rs b/tests/stm32/src/bin/can_common.rs
new file mode 100644
index 000000000..4b39269cc
--- /dev/null
+++ b/tests/stm32/src/bin/can_common.rs
@@ -0,0 +1,112 @@
+use defmt::{assert, *};
+use embassy_stm32::can;
+use embassy_time::{Duration, Instant};
+
+#[derive(Clone, Copy, Debug)]
+pub struct TestOptions {
+    pub max_latency: Duration,
+    pub max_buffered: u8,
+}
+
+pub async fn run_can_tests<'d, T: can::Instance>(can: &mut can::Can<'d, T>, options: &TestOptions) {
+    let mut i: u8 = 0;
+    loop {
+        //let tx_frame = can::frame::Frame::new_standard(0x123, &[i, 0x12 as u8, 0x34 as u8, 0x56 as u8, 0x78 as u8, 0x9A as u8, 0xBC as u8 ]).unwrap();
+        let tx_frame = can::frame::Frame::new_standard(0x123, &[i; 1]).unwrap();
+
+        //info!("Transmitting frame...");
+        let tx_ts = Instant::now();
+        can.write(&tx_frame).await;
+
+        let (frame, timestamp) = can.read().await.unwrap().parts();
+        //info!("Frame received!");
+
+        // Check data.
+        assert!(i == frame.data()[0], "{} == {}", i, frame.data()[0]);
+
+        //info!("loopback time {}", timestamp);
+        //info!("loopback frame {=u8}", frame.data()[0]);
+        let latency = timestamp.saturating_duration_since(tx_ts);
+        info!("loopback latency {} us", latency.as_micros());
+
+        // Theoretical minimum latency is 55us, actual is usually ~80us
+        const MIN_LATENCY: Duration = Duration::from_micros(50);
+        // Was failing at 150 but we are not getting a real time stamp. I'm not
+        // sure if there are other delays
+        assert!(
+            MIN_LATENCY <= latency && latency <= options.max_latency,
+            "{} <= {} <= {}",
+            MIN_LATENCY,
+            latency,
+            options.max_latency
+        );
+
+        i += 1;
+        if i > 5 {
+            break;
+        }
+    }
+
+    // Below here, check that we can receive from both FIFO0 and FIFO1
+    // Above we configured FIFO1 for extended ID packets. There are only 3 slots
+    // in each FIFO so make sure we write enough to fill them both up before reading.
+    for i in 0..options.max_buffered {
+        // Try filling up the RX FIFO0 buffers
+        //let tx_frame = if 0 != (i & 0x01) {
+        let tx_frame = if i < options.max_buffered / 2 {
+            info!("Transmitting standard frame {}", i);
+            can::frame::Frame::new_standard(0x123, &[i; 1]).unwrap()
+        } else {
+            info!("Transmitting extended frame {}", i);
+            can::frame::Frame::new_extended(0x1232344, &[i; 1]).unwrap()
+        };
+        can.write(&tx_frame).await;
+    }
+
+    // Try and receive all 6 packets
+    for _i in 0..options.max_buffered {
+        let (frame, _ts) = can.read().await.unwrap().parts();
+        match frame.id() {
+            embedded_can::Id::Extended(_id) => {
+                info!("Extended received! {}", frame.data()[0]);
+                //info!("Extended received! {:x} {} {}", id.as_raw(), frame.data()[0], i);
+            }
+            embedded_can::Id::Standard(_id) => {
+                info!("Standard received! {}", frame.data()[0]);
+                //info!("Standard received! {:x} {} {}", id.as_raw(), frame.data()[0], i);
+            }
+        }
+    }
+}
+
+pub async fn run_split_can_tests<'d, T: can::Instance>(
+    tx: &mut can::CanTx<'d, T>,
+    rx: &mut can::CanRx<'d, T>,
+    options: &TestOptions,
+) {
+    for i in 0..options.max_buffered {
+        // Try filling up the RX FIFO0 buffers
+        //let tx_frame = if 0 != (i & 0x01) {
+        let tx_frame = if i < options.max_buffered / 2 {
+            info!("Transmitting standard frame {}", i);
+            can::frame::Frame::new_standard(0x123, &[i; 1]).unwrap()
+        } else {
+            info!("Transmitting extended frame {}", i);
+            can::frame::Frame::new_extended(0x1232344, &[i; 1]).unwrap()
+        };
+        tx.write(&tx_frame).await;
+    }
+
+    // Try and receive all 6 packets
+    for _i in 0..options.max_buffered {
+        let (frame, _ts) = rx.read().await.unwrap().parts();
+        match frame.id() {
+            embedded_can::Id::Extended(_id) => {
+                info!("Extended received! {}", frame.data()[0]);
+            }
+            embedded_can::Id::Standard(_id) => {
+                info!("Standard received! {}", frame.data()[0]);
+            }
+        }
+    }
+}
diff --git a/tests/stm32/src/bin/cryp.rs b/tests/stm32/src/bin/cryp.rs
index f105abf26..60778bdaa 100644
--- a/tests/stm32/src/bin/cryp.rs
+++ b/tests/stm32/src/bin/cryp.rs
@@ -10,9 +10,14 @@ use aes_gcm::aead::{AeadInPlace, KeyInit};
 use aes_gcm::Aes128Gcm;
 use common::*;
 use embassy_executor::Spawner;
-use embassy_stm32::cryp::*;
+use embassy_stm32::cryp::{self, *};
+use embassy_stm32::{bind_interrupts, peripherals};
 use {defmt_rtt as _, panic_probe as _};
 
+bind_interrupts!(struct Irqs {
+    CRYP => cryp::InterruptHandler<peripherals::CRYP>;
+});
+
 #[embassy_executor::main]
 async fn main(_spawner: Spawner) {
     let p: embassy_stm32::Peripherals = embassy_stm32::init(config());
@@ -22,27 +27,32 @@ async fn main(_spawner: Spawner) {
     const AAD1: &[u8] = b"additional data 1 stdargadrhaethaethjatjatjaetjartjstrjsfkk;'jopofyuisrteytweTASTUIKFUKIXTRDTEREharhaeryhaterjartjarthaethjrtjarthaetrhartjatejatrjsrtjartjyt1";
     const AAD2: &[u8] = b"additional data 2 stdhthsthsthsrthsrthsrtjdykjdukdyuldadfhsdghsdghsdghsadghjk'hioethjrtjarthaetrhartjatecfgjhzdfhgzdfhzdfghzdfhzdfhzfhjatrjsrtjartjytjfytjfyg";
 
-    let hw_cryp = Cryp::new(p.CRYP);
+    let in_dma = peri!(p, CRYP_IN_DMA);
+    let out_dma = peri!(p, CRYP_OUT_DMA);
+
+    let mut hw_cryp = Cryp::new(p.CRYP, in_dma, out_dma, Irqs);
     let key: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     let mut ciphertext: [u8; PAYLOAD1.len() + PAYLOAD2.len()] = [0; PAYLOAD1.len() + PAYLOAD2.len()];
     let mut plaintext: [u8; PAYLOAD1.len() + PAYLOAD2.len()] = [0; PAYLOAD1.len() + PAYLOAD2.len()];
     let iv: [u8; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
 
-    // Encrypt in hardware using AES-GCM 128-bit
+    // Encrypt in hardware using AES-GCM 128-bit in blocking mode.
     let aes_gcm = AesGcm::new(&key, &iv);
-    let mut gcm_encrypt = hw_cryp.start(&aes_gcm, Direction::Encrypt);
+    let mut gcm_encrypt = hw_cryp.start_blocking(&aes_gcm, Direction::Encrypt);
     hw_cryp.aad_blocking(&mut gcm_encrypt, AAD1, false);
     hw_cryp.aad_blocking(&mut gcm_encrypt, AAD2, true);
     hw_cryp.payload_blocking(&mut gcm_encrypt, PAYLOAD1, &mut ciphertext[..PAYLOAD1.len()], false);
     hw_cryp.payload_blocking(&mut gcm_encrypt, PAYLOAD2, &mut ciphertext[PAYLOAD1.len()..], true);
     let encrypt_tag = hw_cryp.finish_blocking(gcm_encrypt);
 
-    // Decrypt in hardware using AES-GCM 128-bit
-    let mut gcm_decrypt = hw_cryp.start(&aes_gcm, Direction::Decrypt);
-    hw_cryp.aad_blocking(&mut gcm_decrypt, AAD1, false);
-    hw_cryp.aad_blocking(&mut gcm_decrypt, AAD2, true);
-    hw_cryp.payload_blocking(&mut gcm_decrypt, &ciphertext, &mut plaintext, true);
-    let decrypt_tag = hw_cryp.finish_blocking(gcm_decrypt);
+    // Decrypt in hardware using AES-GCM 128-bit in async (DMA) mode.
+    let mut gcm_decrypt = hw_cryp.start(&aes_gcm, Direction::Decrypt).await;
+    hw_cryp.aad(&mut gcm_decrypt, AAD1, false).await;
+    hw_cryp.aad(&mut gcm_decrypt, AAD2, true).await;
+    hw_cryp
+        .payload(&mut gcm_decrypt, &ciphertext, &mut plaintext, true)
+        .await;
+    let decrypt_tag = hw_cryp.finish(gcm_decrypt).await;
 
     info!("AES-GCM Ciphertext: {:?}", ciphertext);
     info!("AES-GCM Plaintext: {:?}", plaintext);
diff --git a/tests/stm32/src/bin/fdcan.rs b/tests/stm32/src/bin/fdcan.rs
index dd78d7fb3..27bdd038a 100644
--- a/tests/stm32/src/bin/fdcan.rs
+++ b/tests/stm32/src/bin/fdcan.rs
@@ -6,26 +6,26 @@
 #[path = "../common.rs"]
 mod common;
 use common::*;
-use defmt::assert;
 use embassy_executor::Spawner;
 use embassy_stm32::peripherals::*;
 use embassy_stm32::{bind_interrupts, can, Config};
-use embassy_time::{Duration, Instant};
+use embassy_time::Duration;
 use {defmt_rtt as _, panic_probe as _};
 
-bind_interrupts!(struct Irqs {
+mod can_common;
+use can_common::*;
+
+bind_interrupts!(struct Irqs2 {
+    FDCAN2_IT0 => can::IT0InterruptHandler<FDCAN2>;
+    FDCAN2_IT1 => can::IT1InterruptHandler<FDCAN2>;
+});
+bind_interrupts!(struct Irqs1 {
     FDCAN1_IT0 => can::IT0InterruptHandler<FDCAN1>;
     FDCAN1_IT1 => can::IT1InterruptHandler<FDCAN1>;
 });
 
-struct TestOptions {
-    config: Config,
-    max_latency: Duration,
-    second_fifo_working: bool,
-}
-
 #[cfg(any(feature = "stm32h755zi", feature = "stm32h753zi", feature = "stm32h563zi"))]
-fn options() -> TestOptions {
+fn options() -> (Config, TestOptions) {
     use embassy_stm32::rcc;
     info!("H75 config");
     let mut c = config();
@@ -34,15 +34,17 @@ fn options() -> TestOptions {
         mode: rcc::HseMode::Oscillator,
     });
     c.rcc.mux.fdcansel = rcc::mux::Fdcansel::HSE;
-    TestOptions {
-        config: c,
-        max_latency: Duration::from_micros(1200),
-        second_fifo_working: false,
-    }
+    (
+        c,
+        TestOptions {
+            max_latency: Duration::from_micros(1200),
+            max_buffered: 3,
+        },
+    )
 }
 
 #[cfg(any(feature = "stm32h7a3zi"))]
-fn options() -> TestOptions {
+fn options() -> (Config, TestOptions) {
     use embassy_stm32::rcc;
     info!("H7a config");
     let mut c = config();
@@ -51,139 +53,63 @@ fn options() -> TestOptions {
         mode: rcc::HseMode::Oscillator,
     });
     c.rcc.mux.fdcansel = rcc::mux::Fdcansel::HSE;
-    TestOptions {
-        config: c,
-        max_latency: Duration::from_micros(1200),
-        second_fifo_working: false,
-    }
+    (
+        c,
+        TestOptions {
+            max_latency: Duration::from_micros(1200),
+            max_buffered: 3,
+        },
+    )
 }
 
 #[cfg(any(feature = "stm32g491re", feature = "stm32g431cb"))]
-fn options() -> TestOptions {
+fn options() -> (Config, TestOptions) {
     info!("G4 config");
-    TestOptions {
-        config: config(),
-        max_latency: Duration::from_micros(500),
-        second_fifo_working: true,
-    }
+    (
+        config(),
+        TestOptions {
+            max_latency: Duration::from_micros(500),
+            max_buffered: 6,
+        },
+    )
 }
 
 #[embassy_executor::main]
 async fn main(_spawner: Spawner) {
     //let peripherals = embassy_stm32::init(config());
 
-    let options = options();
-    let peripherals = embassy_stm32::init(options.config);
+    let (config, options) = options();
+    let peripherals = embassy_stm32::init(config);
 
-    let mut can = can::FdcanConfigurator::new(peripherals.FDCAN1, peripherals.PB8, peripherals.PB9, Irqs);
+    let mut can = can::CanConfigurator::new(peripherals.FDCAN1, peripherals.PB8, peripherals.PB9, Irqs1);
+    let mut can2 = can::CanConfigurator::new(peripherals.FDCAN2, peripherals.PB12, peripherals.PB13, Irqs2);
 
     // 250k bps
     can.set_bitrate(250_000);
+    can2.set_bitrate(250_000);
 
     can.set_extended_filter(
         can::filter::ExtendedFilterSlot::_0,
         can::filter::ExtendedFilter::accept_all_into_fifo1(),
     );
+    can2.set_extended_filter(
+        can::filter::ExtendedFilterSlot::_0,
+        can::filter::ExtendedFilter::accept_all_into_fifo1(),
+    );
 
     let mut can = can.into_internal_loopback_mode();
+    let mut can2 = can2.into_internal_loopback_mode();
+
+    run_can_tests(&mut can, &options).await;
+    run_can_tests(&mut can2, &options).await;
 
     info!("CAN Configured");
 
-    let mut i: u8 = 0;
-    loop {
-        let tx_frame = can::frame::ClassicFrame::new_standard(0x123, &[i; 1]).unwrap();
-
-        info!("Transmitting frame...");
-        let tx_ts = Instant::now();
-        can.write(&tx_frame).await;
-
-        let (frame, timestamp) = can.read().await.unwrap();
-        info!("Frame received!");
-
-        // Check data.
-        assert!(i == frame.data()[0], "{} == {}", i, frame.data()[0]);
-
-        info!("loopback time {}", timestamp);
-        info!("loopback frame {=u8}", frame.data()[0]);
-        let latency = timestamp.saturating_duration_since(tx_ts);
-        info!("loopback latency {} us", latency.as_micros());
-
-        // Theoretical minimum latency is 55us, actual is usually ~80us
-        const MIN_LATENCY: Duration = Duration::from_micros(50);
-        // Was failing at 150 but we are not getting a real time stamp. I'm not
-        // sure if there are other delays
-        assert!(
-            MIN_LATENCY <= latency && latency <= options.max_latency,
-            "{} <= {} <= {}",
-            MIN_LATENCY,
-            latency,
-            options.max_latency
-        );
-
-        i += 1;
-        if i > 10 {
-            break;
-        }
-    }
-
-    let max_buffered = if options.second_fifo_working { 6 } else { 3 };
-
-    // Below here, check that we can receive from both FIFO0 and FIFO0
-    // Above we configured FIFO1 for extended ID packets. There are only 3 slots
-    // in each FIFO so make sure we write enough to fill them both up before reading.
-    for i in 0..3 {
-        // Try filling up the RX FIFO0 buffers with standard packets
-        let tx_frame = can::frame::ClassicFrame::new_standard(0x123, &[i; 1]).unwrap();
-        info!("Transmitting frame {}", i);
-        can.write(&tx_frame).await;
-    }
-    for i in 3..max_buffered {
-        // Try filling up the RX FIFO0 buffers with extended packets
-        let tx_frame = can::frame::ClassicFrame::new_extended(0x1232344, &[i; 1]).unwrap();
-        info!("Transmitting frame {}", i);
-        can.write(&tx_frame).await;
-    }
-
-    // Try and receive all 6 packets
-    for i in 0..max_buffered {
-        let (frame, _ts) = can.read().await.unwrap();
-        match frame.id() {
-            embedded_can::Id::Extended(id) => {
-                info!("Extended received! {:x} {} {}", id.as_raw(), frame.data()[0], i);
-            }
-            embedded_can::Id::Standard(id) => {
-                info!("Standard received! {:x} {} {}", id.as_raw(), frame.data()[0], i);
-            }
-        }
-    }
-
     // Test again with a split
     let (mut tx, mut rx) = can.split();
-    for i in 0..3 {
-        // Try filling up the RX FIFO0 buffers with standard packets
-        let tx_frame = can::frame::ClassicFrame::new_standard(0x123, &[i; 1]).unwrap();
-        info!("Transmitting frame {}", i);
-        tx.write(&tx_frame).await;
-    }
-    for i in 3..max_buffered {
-        // Try filling up the RX FIFO0 buffers with extended packets
-        let tx_frame = can::frame::ClassicFrame::new_extended(0x1232344, &[i; 1]).unwrap();
-        info!("Transmitting frame {}", i);
-        tx.write(&tx_frame).await;
-    }
-
-    // Try and receive all 6 packets
-    for i in 0..max_buffered {
-        let (frame, _ts) = rx.read().await.unwrap();
-        match frame.id() {
-            embedded_can::Id::Extended(id) => {
-                info!("Extended received! {:x} {} {}", id.as_raw(), frame.data()[0], i);
-            }
-            embedded_can::Id::Standard(id) => {
-                info!("Standard received! {:x} {} {}", id.as_raw(), frame.data()[0], i);
-            }
-        }
-    }
+    let (mut tx2, mut rx2) = can2.split();
+    run_split_can_tests(&mut tx, &mut rx, &options).await;
+    run_split_can_tests(&mut tx2, &mut rx2, &options).await;
 
     info!("Test OK");
     cortex_m::asm::bkpt();
diff --git a/tests/stm32/src/bin/ucpd.rs b/tests/stm32/src/bin/ucpd.rs
new file mode 100644
index 000000000..c09334ec8
--- /dev/null
+++ b/tests/stm32/src/bin/ucpd.rs
@@ -0,0 +1,120 @@
+// required-features: ucpd
+#![no_std]
+#![no_main]
+#[path = "../common.rs"]
+mod common;
+
+use common::*;
+use defmt::{assert, assert_eq};
+use embassy_executor::Spawner;
+use embassy_futures::join::join;
+use embassy_stm32::ucpd::{self, CcPhy, CcPull, CcSel, CcVState, RxError, Ucpd};
+use embassy_stm32::{bind_interrupts, peripherals};
+use embassy_time::Timer;
+
+bind_interrupts!(struct Irqs {
+    UCPD1_2 => ucpd::InterruptHandler<peripherals::UCPD1>, ucpd::InterruptHandler<peripherals::UCPD2>;
+});
+
+static SRC_TO_SNK: [u8; 6] = [0, 1, 2, 3, 4, 5];
+static SNK_TO_SRC: [u8; 4] = [9, 8, 7, 6];
+
+async fn wait_for_vstate<T: ucpd::Instance>(cc_phy: &mut CcPhy<'_, T>, vstate: CcVState) {
+    let (mut cc1, mut _cc2) = cc_phy.vstate();
+    while cc1 != vstate {
+        (cc1, _cc2) = cc_phy.wait_for_vstate_change().await;
+    }
+}
+
+async fn source(
+    mut ucpd: Ucpd<'static, peripherals::UCPD1>,
+    rx_dma: peripherals::DMA1_CH1,
+    tx_dma: peripherals::DMA1_CH2,
+) {
+    debug!("source: setting default current pull-up");
+    ucpd.cc_phy().set_pull(CcPull::SourceDefaultUsb);
+
+    // Wait for default sink.
+    debug!("source: wait for sink");
+    wait_for_vstate(ucpd.cc_phy(), CcVState::LOW).await;
+
+    // Advertise a higher current by changing the pull-up resistor.
+    debug!("source: sink detected, setting 3.0A current pull-up");
+    ucpd.cc_phy().set_pull(CcPull::Source3_0A);
+
+    let (_, mut pd_phy) = ucpd.split_pd_phy(rx_dma, tx_dma, CcSel::CC1);
+
+    // Listen for an incoming message
+    debug!("source: wait for message from sink");
+    let mut snk_to_src_buf = [0_u8; 30];
+    let n = unwrap!(pd_phy.receive(snk_to_src_buf.as_mut()).await);
+    assert_eq!(n, SNK_TO_SRC.len());
+    assert_eq!(&snk_to_src_buf[..n], SNK_TO_SRC.as_slice());
+
+    // Send message
+    debug!("source: message received, sending message");
+    unwrap!(pd_phy.transmit(SRC_TO_SNK.as_slice()).await);
+
+    // Wait for hard-reset
+    debug!("source: message sent, waiting for hard-reset");
+    assert!(matches!(
+        pd_phy.receive(snk_to_src_buf.as_mut()).await,
+        Err(RxError::HardReset)
+    ));
+}
+
+async fn sink(
+    mut ucpd: Ucpd<'static, peripherals::UCPD2>,
+    rx_dma: peripherals::DMA1_CH3,
+    tx_dma: peripherals::DMA1_CH4,
+) {
+    debug!("sink: setting pull down");
+    ucpd.cc_phy().set_pull(CcPull::Sink);
+
+    // Wait for default source.
+    debug!("sink: waiting for default vstate");
+    wait_for_vstate(ucpd.cc_phy(), CcVState::LOW).await;
+
+    // Wait higher current pull-up.
+    //debug!("sink: source default vstate detected, waiting for 3.0A vstate");
+    //wait_for_vstate(ucpd.cc_phy(), CcVState::HIGHEST).await;
+    //debug!("sink: source 3.0A vstate detected");
+    // TODO: not working yet, why? no idea, replace with timer for now
+    Timer::after_millis(100).await;
+
+    let (_, mut pd_phy) = ucpd.split_pd_phy(rx_dma, tx_dma, CcSel::CC1);
+
+    // Send message
+    debug!("sink: sending message");
+    unwrap!(pd_phy.transmit(SNK_TO_SRC.as_slice()).await);
+
+    // Listen for an incoming message
+    debug!("sink: message sent, waiting for message from source");
+    let mut src_to_snk_buf = [0_u8; 30];
+    let n = unwrap!(pd_phy.receive(src_to_snk_buf.as_mut()).await);
+    assert_eq!(n, SRC_TO_SNK.len());
+    assert_eq!(&src_to_snk_buf[..n], SRC_TO_SNK.as_slice());
+
+    // Send hard reset
+    debug!("sink: message received, sending hard-reset");
+    unwrap!(pd_phy.transmit_hardreset().await);
+}
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let p = embassy_stm32::init(config());
+    info!("Hello World!");
+
+    // Wire between PD0 and PA8
+    let ucpd1 = Ucpd::new(p.UCPD1, Irqs {}, p.PA8, p.PB15);
+    let ucpd2 = Ucpd::new(p.UCPD2, Irqs {}, p.PD0, p.PD2);
+
+    join(
+        source(ucpd1, p.DMA1_CH1, p.DMA1_CH2),
+        sink(ucpd2, p.DMA1_CH3, p.DMA1_CH4),
+    )
+    .await;
+
+    info!("Test OK");
+    cortex_m::asm::bkpt();
+}
diff --git a/tests/stm32/src/common.rs b/tests/stm32/src/common.rs
index 3297ea7e2..0e555efc8 100644
--- a/tests/stm32/src/common.rs
+++ b/tests/stm32/src/common.rs
@@ -140,6 +140,7 @@ define_peris!(
 );
 #[cfg(any(feature = "stm32h755zi", feature = "stm32h753zi"))]
 define_peris!(
+    CRYP_IN_DMA = DMA1_CH0, CRYP_OUT_DMA = DMA1_CH1,
     UART = USART1, UART_TX = PB6, UART_RX = PB7, UART_TX_DMA = DMA1_CH0, UART_RX_DMA = DMA1_CH1,
     SPI = SPI1, SPI_SCK = PA5, SPI_MOSI = PB5, SPI_MISO = PA6, SPI_TX_DMA = DMA1_CH0, SPI_RX_DMA = DMA1_CH1,
     ADC = ADC1, DAC = DAC1, DAC_PIN = PA4,
@@ -250,13 +251,6 @@ define_peris!(
 );
 
 pub fn config() -> Config {
-    // Setting this bit is mandatory to use PG[15:2].
-    #[cfg(feature = "stm32u5a5zj")]
-    embassy_stm32::pac::PWR.svmcr().modify(|w| {
-        w.set_io2sv(true);
-        w.set_io2vmen(true);
-    });
-
     #[allow(unused_mut)]
     let mut config = Config::default();