diff --git a/ci.sh b/ci.sh
index d86c93520..47bf5d660 100755
--- a/ci.sh
+++ b/ci.sh
@@ -49,6 +49,7 @@ cargo batch  \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features nightly,stm32f411ce,defmt,exti,time-driver-any,unstable-traits \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features nightly,stm32f413vh,defmt,exti,time-driver-any,unstable-traits \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features nightly,stm32f429zi,log,exti,time-driver-any,unstable-traits,embedded-sdmmc \
+    --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features nightly,stm32f730i8,defmt,exti,time-driver-any,unstable-traits \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features nightly,stm32h755zi-cm7,defmt,exti,time-driver-any,unstable-traits \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features nightly,stm32h7b3ai,defmt,exti,time-driver-any,unstable-traits \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7em-none-eabi --features nightly,stm32l476vg,defmt,exti,time-driver-any,unstable-traits \
@@ -65,6 +66,8 @@ cargo batch  \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7m-none-eabi --features nightly,stm32f107vc,defmt,exti,time-driver-any,unstable-traits \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7m-none-eabi --features nightly,stm32f103re,defmt,exti,time-driver-any,unstable-traits \
     --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7m-none-eabi --features nightly,stm32f100c4,defmt,exti,time-driver-any,unstable-traits \
+    --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7m-none-eabi --features nightly,stm32h503rb,defmt,exti,time-driver-any,unstable-traits \
+    --- build --release --manifest-path embassy-stm32/Cargo.toml --target thumbv7m-none-eabi --features nightly,stm32h562ag,defmt,exti,time-driver-any,unstable-traits \
     --- build --release --manifest-path embassy-boot/nrf/Cargo.toml --target thumbv7em-none-eabi --features embassy-nrf/nrf52840 \
     --- build --release --manifest-path embassy-boot/nrf/Cargo.toml --target thumbv8m.main-none-eabihf --features embassy-nrf/nrf9160-ns \
     --- build --release --manifest-path embassy-boot/rp/Cargo.toml --target thumbv6m-none-eabi \
@@ -86,6 +89,7 @@ cargo batch  \
     --- build --release --manifest-path examples/stm32c0/Cargo.toml --target thumbv6m-none-eabi --out-dir out/examples/stm32c0 \
     --- build --release --manifest-path examples/stm32g0/Cargo.toml --target thumbv6m-none-eabi --out-dir out/examples/stm32g0 \
     --- build --release --manifest-path examples/stm32g4/Cargo.toml --target thumbv7em-none-eabi --out-dir out/examples/stm32g4 \
+    --- build --release --manifest-path examples/stm32h5/Cargo.toml --target thumbv7em-none-eabi --out-dir out/examples/stm32h5 \
     --- build --release --manifest-path examples/stm32h7/Cargo.toml --target thumbv7em-none-eabi --out-dir out/examples/stm32h7 \
     --- build --release --manifest-path examples/stm32l0/Cargo.toml --target thumbv6m-none-eabi --out-dir out/examples/stm32l0 \
     --- build --release --manifest-path examples/stm32l1/Cargo.toml --target thumbv7m-none-eabi --out-dir out/examples/stm32l1 \
@@ -115,6 +119,7 @@ cargo batch  \
     --- build --release --manifest-path tests/stm32/Cargo.toml --target thumbv6m-none-eabi --features stm32g071rb --out-dir out/tests/nucleo-stm32g071rb \
     --- build --release --manifest-path tests/stm32/Cargo.toml --target thumbv7em-none-eabi --features stm32h755zi --out-dir out/tests/nucleo-stm32h755zi \
     --- build --release --manifest-path tests/stm32/Cargo.toml --target thumbv7em-none-eabi --features stm32wb55rg --out-dir out/tests/nucleo-stm32wb55rg \
+    --- build --release --manifest-path tests/stm32/Cargo.toml --target thumbv7em-none-eabi --features stm32h563zi --out-dir out/tests/nucleo-stm32h563zi \
     --- build --release --manifest-path tests/stm32/Cargo.toml --target thumbv7em-none-eabi --features stm32u585ai --out-dir out/tests/iot-stm32u585ai \
     --- build --release --manifest-path tests/rp/Cargo.toml --target thumbv6m-none-eabi --out-dir out/tests/rpi-pico \
     --- build --release --manifest-path tests/nrf/Cargo.toml --target thumbv7em-none-eabi --out-dir out/tests/nrf52840-dk \
diff --git a/docs/modules/ROOT/examples/basic/Cargo.toml b/docs/modules/ROOT/examples/basic/Cargo.toml
index d9f8a285a..e3e446e63 100644
--- a/docs/modules/ROOT/examples/basic/Cargo.toml
+++ b/docs/modules/ROOT/examples/basic/Cargo.toml
@@ -6,7 +6,7 @@ version = "0.1.0"
 license = "MIT OR Apache-2.0"
 
 [dependencies]
-embassy-executor = { version = "0.1.0", path = "../../../../../embassy-executor", features = ["defmt", "nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../../../../embassy-executor", features = ["defmt", "nightly", "integrated-timers", "arch-cortex-m", "executor-thread"] }
 embassy-time = { version = "0.1.0", path = "../../../../../embassy-time", features = ["defmt", "nightly"] }
 embassy-nrf = { version = "0.1.0", path = "../../../../../embassy-nrf", features = ["defmt", "nrf52840", "time-driver-rtc1", "gpiote", "nightly"] }
 
diff --git a/docs/modules/ROOT/examples/layer-by-layer/blinky-async/Cargo.toml b/docs/modules/ROOT/examples/layer-by-layer/blinky-async/Cargo.toml
index c9a963d4d..a11a7e0ba 100644
--- a/docs/modules/ROOT/examples/layer-by-layer/blinky-async/Cargo.toml
+++ b/docs/modules/ROOT/examples/layer-by-layer/blinky-async/Cargo.toml
@@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0"
 cortex-m = "0.7"
 cortex-m-rt = "0.7"
 embassy-stm32 = { version = "0.1.0", features = ["stm32l475vg", "memory-x", "exti"], default-features = false  }
-embassy-executor = { version = "0.1.0", default-features = false, features = ["nightly"] }
+embassy-executor = { version = "0.1.0", default-features = false, features = ["nightly", "arch-cortex-m", "executor-thread"] }
 
 defmt = "0.3.0"
 defmt-rtt = "0.3.0"
diff --git a/embassy-boot/boot/Cargo.toml b/embassy-boot/boot/Cargo.toml
index 04409cdc7..39f501570 100644
--- a/embassy-boot/boot/Cargo.toml
+++ b/embassy-boot/boot/Cargo.toml
@@ -24,6 +24,7 @@ features = ["defmt"]
 
 [dependencies]
 defmt = { version = "0.3", optional = true }
+digest = "0.10"
 log = { version = "0.4", optional = true  }
 ed25519-dalek = { version = "1.0.1", default_features = false, features = ["u32_backend"], optional = true }
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync" }
@@ -37,6 +38,7 @@ log = "0.4"
 env_logger = "0.9"
 rand = "0.7" # ed25519-dalek v1.0.1 depends on this exact version
 futures = { version = "0.3", features = ["executor"] }
+sha1 = "0.10.5"
 
 [dev-dependencies.ed25519-dalek]
 default_features = false
@@ -50,4 +52,4 @@ ed25519-salty = ["dep:salty", "_verify"]
 nightly = ["dep:embedded-storage-async"]
 
 #Internal features
-_verify = []
\ No newline at end of file
+_verify = []
diff --git a/embassy-boot/boot/src/boot_loader.rs b/embassy-boot/boot/src/boot_loader.rs
new file mode 100644
index 000000000..b959de2c4
--- /dev/null
+++ b/embassy-boot/boot/src/boot_loader.rs
@@ -0,0 +1,533 @@
+use embedded_storage::nor_flash::{ErrorType, NorFlash, NorFlashError, NorFlashErrorKind, ReadNorFlash};
+
+use crate::{Partition, State, BOOT_MAGIC, SWAP_MAGIC};
+
+/// Errors returned by bootloader
+#[derive(PartialEq, Eq, Debug)]
+pub enum BootError {
+    /// Error from flash.
+    Flash(NorFlashErrorKind),
+    /// Invalid bootloader magic
+    BadMagic,
+}
+
+#[cfg(feature = "defmt")]
+impl defmt::Format for BootError {
+    fn format(&self, fmt: defmt::Formatter) {
+        match self {
+            BootError::Flash(_) => defmt::write!(fmt, "BootError::Flash(_)"),
+            BootError::BadMagic => defmt::write!(fmt, "BootError::BadMagic"),
+        }
+    }
+}
+
+impl<E> From<E> for BootError
+where
+    E: NorFlashError,
+{
+    fn from(error: E) -> Self {
+        BootError::Flash(error.kind())
+    }
+}
+
+/// Trait defining the flash handles used for active and DFU partition.
+pub trait FlashConfig {
+    /// The erase value of the state flash. Typically the default of 0xFF is used, but some flashes use a different value.
+    const STATE_ERASE_VALUE: u8 = 0xFF;
+    /// Flash type used for the state partition.
+    type STATE: NorFlash;
+    /// Flash type used for the active partition.
+    type ACTIVE: NorFlash;
+    /// Flash type used for the dfu partition.
+    type DFU: NorFlash;
+
+    /// Return flash instance used to write/read to/from active partition.
+    fn active(&mut self) -> &mut Self::ACTIVE;
+    /// Return flash instance used to write/read to/from dfu partition.
+    fn dfu(&mut self) -> &mut Self::DFU;
+    /// Return flash instance used to write/read to/from bootloader state.
+    fn state(&mut self) -> &mut Self::STATE;
+}
+
+trait FlashConfigEx {
+    fn page_size() -> u32;
+}
+
+impl<T: FlashConfig> FlashConfigEx for T {
+    /// Get the page size which is the "unit of operation" within the bootloader.
+    fn page_size() -> u32 {
+        core::cmp::max(T::ACTIVE::ERASE_SIZE, T::DFU::ERASE_SIZE) as u32
+    }
+}
+
+/// BootLoader works with any flash implementing embedded_storage.
+pub struct BootLoader {
+    // Page with current state of bootloader. The state partition has the following format:
+    // All ranges are in multiples of WRITE_SIZE bytes.
+    // | Range    | Description                                                                      |
+    // | 0..1     | Magic indicating bootloader state. BOOT_MAGIC means boot, SWAP_MAGIC means swap. |
+    // | 1..2     | Progress validity. ERASE_VALUE means valid, !ERASE_VALUE means invalid.          |
+    // | 2..2 + N | Progress index used while swapping or reverting                                  |
+    state: Partition,
+    // Location of the partition which will be booted from
+    active: Partition,
+    // Location of the partition which will be swapped in when requested
+    dfu: Partition,
+}
+
+impl BootLoader {
+    /// Create a new instance of a bootloader with the given partitions.
+    ///
+    /// - All partitions must be aligned with the PAGE_SIZE const generic parameter.
+    /// - The dfu partition must be at least PAGE_SIZE bigger than the active partition.
+    pub fn new(active: Partition, dfu: Partition, state: Partition) -> Self {
+        Self { active, dfu, state }
+    }
+
+    /// Return the offset of the active partition into the active flash.
+    pub fn boot_address(&self) -> usize {
+        self.active.from as usize
+    }
+
+    /// Perform necessary boot preparations like swapping images.
+    ///
+    /// The DFU partition is assumed to be 1 page bigger than the active partition for the swap
+    /// algorithm to work correctly.
+    ///
+    /// The provided aligned_buf argument must satisfy any alignment requirements
+    /// given by the partition flashes. All flash operations will use this buffer.
+    ///
+    /// SWAPPING
+    ///
+    /// Assume a flash size of 3 pages for the active partition, and 4 pages for the DFU partition.
+    /// The swap index contains the copy progress, as to allow continuation of the copy process on
+    /// power failure. The index counter is represented within 1 or more pages (depending on total
+    /// flash size), where a page X is considered swapped if index at location (X + WRITE_SIZE)
+    /// contains a zero value. This ensures that index updates can be performed atomically and
+    /// avoid a situation where the wrong index value is set (page write size is "atomic").
+    ///
+    /// +-----------+------------+--------+--------+--------+--------+
+    /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
+    /// +-----------+------------+--------+--------+--------+--------+
+    /// |    Active |          0 |      1 |      2 |      3 |      - |
+    /// |       DFU |          0 |      3 |      2 |      1 |      X |
+    /// +-----------+------------+--------+--------+--------+--------+
+    ///
+    /// The algorithm starts by copying 'backwards', and after the first step, the layout is
+    /// as follows:
+    ///
+    /// +-----------+------------+--------+--------+--------+--------+
+    /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
+    /// +-----------+------------+--------+--------+--------+--------+
+    /// |    Active |          1 |      1 |      2 |      1 |      - |
+    /// |       DFU |          1 |      3 |      2 |      1 |      3 |
+    /// +-----------+------------+--------+--------+--------+--------+
+    ///
+    /// The next iteration performs the same steps
+    ///
+    /// +-----------+------------+--------+--------+--------+--------+
+    /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
+    /// +-----------+------------+--------+--------+--------+--------+
+    /// |    Active |          2 |      1 |      2 |      1 |      - |
+    /// |       DFU |          2 |      3 |      2 |      2 |      3 |
+    /// +-----------+------------+--------+--------+--------+--------+
+    ///
+    /// And again until we're done
+    ///
+    /// +-----------+------------+--------+--------+--------+--------+
+    /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
+    /// +-----------+------------+--------+--------+--------+--------+
+    /// |    Active |          3 |      3 |      2 |      1 |      - |
+    /// |       DFU |          3 |      3 |      1 |      2 |      3 |
+    /// +-----------+------------+--------+--------+--------+--------+
+    ///
+    /// REVERTING
+    ///
+    /// The reverting algorithm uses the swap index to discover that images were swapped, but that
+    /// the application failed to mark the boot successful. In this case, the revert algorithm will
+    /// run.
+    ///
+    /// The revert index is located separately from the swap index, to ensure that revert can continue
+    /// on power failure.
+    ///
+    /// The revert algorithm works forwards, by starting copying into the 'unused' DFU page at the start.
+    ///
+    /// +-----------+--------------+--------+--------+--------+--------+
+    /// | Partition | Revert Index | Page 0 | Page 1 | Page 3 | Page 4 |
+    //*/
+    /// +-----------+--------------+--------+--------+--------+--------+
+    /// |    Active |            3 |      1 |      2 |      1 |      - |
+    /// |       DFU |            3 |      3 |      1 |      2 |      3 |
+    /// +-----------+--------------+--------+--------+--------+--------+
+    ///
+    ///
+    /// +-----------+--------------+--------+--------+--------+--------+
+    /// | Partition | Revert Index | Page 0 | Page 1 | Page 3 | Page 4 |
+    /// +-----------+--------------+--------+--------+--------+--------+
+    /// |    Active |            3 |      1 |      2 |      1 |      - |
+    /// |       DFU |            3 |      3 |      2 |      2 |      3 |
+    /// +-----------+--------------+--------+--------+--------+--------+
+    ///
+    /// +-----------+--------------+--------+--------+--------+--------+
+    /// | Partition | Revert Index | Page 0 | Page 1 | Page 3 | Page 4 |
+    /// +-----------+--------------+--------+--------+--------+--------+
+    /// |    Active |            3 |      1 |      2 |      3 |      - |
+    /// |       DFU |            3 |      3 |      2 |      1 |      3 |
+    /// +-----------+--------------+--------+--------+--------+--------+
+    ///
+    pub fn prepare_boot<P: FlashConfig>(&mut self, p: &mut P, aligned_buf: &mut [u8]) -> Result<State, BootError> {
+        // Ensure we have enough progress pages to store copy progress
+        assert_eq!(0, P::page_size() % aligned_buf.len() as u32);
+        assert_eq!(0, P::page_size() % P::ACTIVE::WRITE_SIZE as u32);
+        assert_eq!(0, P::page_size() % P::ACTIVE::ERASE_SIZE as u32);
+        assert_eq!(0, P::page_size() % P::DFU::WRITE_SIZE as u32);
+        assert_eq!(0, P::page_size() % P::DFU::ERASE_SIZE as u32);
+        assert!(aligned_buf.len() >= P::STATE::WRITE_SIZE);
+        assert_eq!(0, aligned_buf.len() % P::ACTIVE::WRITE_SIZE);
+        assert_eq!(0, aligned_buf.len() % P::DFU::WRITE_SIZE);
+        assert_partitions(self.active, self.dfu, self.state, P::page_size(), P::STATE::WRITE_SIZE);
+
+        // Copy contents from partition N to active
+        let state = self.read_state(p, aligned_buf)?;
+        if state == State::Swap {
+            //
+            // Check if we already swapped. If we're in the swap state, this means we should revert
+            // since the app has failed to mark boot as successful
+            //
+            if !self.is_swapped(p, aligned_buf)? {
+                trace!("Swapping");
+                self.swap(p, aligned_buf)?;
+                trace!("Swapping done");
+            } else {
+                trace!("Reverting");
+                self.revert(p, aligned_buf)?;
+
+                let state_flash = p.state();
+                let state_word = &mut aligned_buf[..P::STATE::WRITE_SIZE];
+
+                // Invalidate progress
+                state_word.fill(!P::STATE_ERASE_VALUE);
+                self.state
+                    .write_blocking(state_flash, P::STATE::WRITE_SIZE as u32, state_word)?;
+
+                // Clear magic and progress
+                self.state.wipe_blocking(state_flash)?;
+
+                // Set magic
+                state_word.fill(BOOT_MAGIC);
+                self.state.write_blocking(state_flash, 0, state_word)?;
+            }
+        }
+        Ok(state)
+    }
+
+    fn is_swapped<P: FlashConfig>(&mut self, p: &mut P, aligned_buf: &mut [u8]) -> Result<bool, BootError> {
+        let page_count = (self.active.size() / P::page_size()) as usize;
+        let progress = self.current_progress(p, aligned_buf)?;
+
+        Ok(progress >= page_count * 2)
+    }
+
+    fn current_progress<P: FlashConfig>(&mut self, config: &mut P, aligned_buf: &mut [u8]) -> Result<usize, BootError> {
+        let write_size = P::STATE::WRITE_SIZE as u32;
+        let max_index = (((self.state.size() - write_size) / write_size) - 2) as usize;
+        let state_flash = config.state();
+        let state_word = &mut aligned_buf[..write_size as usize];
+
+        self.state.read_blocking(state_flash, write_size, state_word)?;
+        if state_word.iter().any(|&b| b != P::STATE_ERASE_VALUE) {
+            // Progress is invalid
+            return Ok(max_index);
+        }
+
+        for index in 0..max_index {
+            self.state
+                .read_blocking(state_flash, (2 + index) as u32 * write_size, state_word)?;
+
+            if state_word.iter().any(|&b| b == P::STATE_ERASE_VALUE) {
+                return Ok(index);
+            }
+        }
+        Ok(max_index)
+    }
+
+    fn update_progress<P: FlashConfig>(
+        &mut self,
+        progress_index: usize,
+        p: &mut P,
+        aligned_buf: &mut [u8],
+    ) -> Result<(), BootError> {
+        let state_word = &mut aligned_buf[..P::STATE::WRITE_SIZE];
+        state_word.fill(!P::STATE_ERASE_VALUE);
+        self.state.write_blocking(
+            p.state(),
+            (2 + progress_index) as u32 * P::STATE::WRITE_SIZE as u32,
+            state_word,
+        )?;
+        Ok(())
+    }
+
+    fn copy_page_once_to_active<P: FlashConfig>(
+        &mut self,
+        progress_index: usize,
+        from_offset: u32,
+        to_offset: u32,
+        p: &mut P,
+        aligned_buf: &mut [u8],
+    ) -> Result<(), BootError> {
+        if self.current_progress(p, aligned_buf)? <= progress_index {
+            let page_size = P::page_size() as u32;
+
+            self.active
+                .erase_blocking(p.active(), to_offset, to_offset + page_size)?;
+
+            for offset_in_page in (0..page_size).step_by(aligned_buf.len()) {
+                self.dfu
+                    .read_blocking(p.dfu(), from_offset + offset_in_page as u32, aligned_buf)?;
+                self.active
+                    .write_blocking(p.active(), to_offset + offset_in_page as u32, aligned_buf)?;
+            }
+
+            self.update_progress(progress_index, p, aligned_buf)?;
+        }
+        Ok(())
+    }
+
+    fn copy_page_once_to_dfu<P: FlashConfig>(
+        &mut self,
+        progress_index: usize,
+        from_offset: u32,
+        to_offset: u32,
+        p: &mut P,
+        aligned_buf: &mut [u8],
+    ) -> Result<(), BootError> {
+        if self.current_progress(p, aligned_buf)? <= progress_index {
+            let page_size = P::page_size() as u32;
+
+            self.dfu
+                .erase_blocking(p.dfu(), to_offset as u32, to_offset + page_size)?;
+
+            for offset_in_page in (0..page_size).step_by(aligned_buf.len()) {
+                self.active
+                    .read_blocking(p.active(), from_offset + offset_in_page as u32, aligned_buf)?;
+                self.dfu
+                    .write_blocking(p.dfu(), to_offset + offset_in_page as u32, aligned_buf)?;
+            }
+
+            self.update_progress(progress_index, p, aligned_buf)?;
+        }
+        Ok(())
+    }
+
+    fn swap<P: FlashConfig>(&mut self, p: &mut P, aligned_buf: &mut [u8]) -> Result<(), BootError> {
+        let page_size = P::page_size();
+        let page_count = self.active.size() / page_size;
+        for page_num in 0..page_count {
+            let progress_index = (page_num * 2) as usize;
+
+            // Copy active page to the 'next' DFU page.
+            let active_from_offset = (page_count - 1 - page_num) * page_size;
+            let dfu_to_offset = (page_count - page_num) * page_size;
+            //trace!("Copy active {} to dfu {}", active_from_offset, dfu_to_offset);
+            self.copy_page_once_to_dfu(progress_index, active_from_offset, dfu_to_offset, p, aligned_buf)?;
+
+            // Copy DFU page to the active page
+            let active_to_offset = (page_count - 1 - page_num) * page_size;
+            let dfu_from_offset = (page_count - 1 - page_num) * page_size;
+            //trace!("Copy dfy {} to active {}", dfu_from_offset, active_to_offset);
+            self.copy_page_once_to_active(progress_index + 1, dfu_from_offset, active_to_offset, p, aligned_buf)?;
+        }
+
+        Ok(())
+    }
+
+    fn revert<P: FlashConfig>(&mut self, p: &mut P, aligned_buf: &mut [u8]) -> Result<(), BootError> {
+        let page_size = P::page_size();
+        let page_count = self.active.size() / page_size;
+        for page_num in 0..page_count {
+            let progress_index = (page_count * 2 + page_num * 2) as usize;
+
+            // Copy the bad active page to the DFU page
+            let active_from_offset = page_num * page_size;
+            let dfu_to_offset = page_num * page_size;
+            self.copy_page_once_to_dfu(progress_index, active_from_offset, dfu_to_offset, p, aligned_buf)?;
+
+            // Copy the DFU page back to the active page
+            let active_to_offset = page_num * page_size;
+            let dfu_from_offset = (page_num + 1) * page_size;
+            self.copy_page_once_to_active(progress_index + 1, dfu_from_offset, active_to_offset, p, aligned_buf)?;
+        }
+
+        Ok(())
+    }
+
+    fn read_state<P: FlashConfig>(&mut self, config: &mut P, aligned_buf: &mut [u8]) -> Result<State, BootError> {
+        let state_word = &mut aligned_buf[..P::STATE::WRITE_SIZE];
+        self.state.read_blocking(config.state(), 0, state_word)?;
+
+        if !state_word.iter().any(|&b| b != SWAP_MAGIC) {
+            Ok(State::Swap)
+        } else {
+            Ok(State::Boot)
+        }
+    }
+}
+
+fn assert_partitions(active: Partition, dfu: Partition, state: Partition, page_size: u32, state_write_size: usize) {
+    assert_eq!(active.size() % page_size, 0);
+    assert_eq!(dfu.size() % page_size, 0);
+    assert!(dfu.size() - active.size() >= page_size);
+    assert!(2 + 2 * (active.size() / page_size) <= state.size() / state_write_size as u32);
+}
+
+/// A flash wrapper implementing the Flash and embedded_storage traits.
+pub struct BootFlash<F>
+where
+    F: NorFlash,
+{
+    flash: F,
+}
+
+impl<F> BootFlash<F>
+where
+    F: NorFlash,
+{
+    /// Create a new instance of a bootable flash
+    pub fn new(flash: F) -> Self {
+        Self { flash }
+    }
+}
+
+impl<F> ErrorType for BootFlash<F>
+where
+    F: NorFlash,
+{
+    type Error = F::Error;
+}
+
+impl<F> NorFlash for BootFlash<F>
+where
+    F: NorFlash,
+{
+    const WRITE_SIZE: usize = F::WRITE_SIZE;
+    const ERASE_SIZE: usize = F::ERASE_SIZE;
+
+    fn erase(&mut self, from: u32, to: u32) -> Result<(), Self::Error> {
+        F::erase(&mut self.flash, from, to)
+    }
+
+    fn write(&mut self, offset: u32, bytes: &[u8]) -> Result<(), Self::Error> {
+        F::write(&mut self.flash, offset, bytes)
+    }
+}
+
+impl<F> ReadNorFlash for BootFlash<F>
+where
+    F: NorFlash,
+{
+    const READ_SIZE: usize = F::READ_SIZE;
+
+    fn read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Self::Error> {
+        F::read(&mut self.flash, offset, bytes)
+    }
+
+    fn capacity(&self) -> usize {
+        F::capacity(&self.flash)
+    }
+}
+
+/// Convenience provider that uses a single flash for all partitions.
+pub struct SingleFlashConfig<'a, F>
+where
+    F: NorFlash,
+{
+    flash: &'a mut F,
+}
+
+impl<'a, F> SingleFlashConfig<'a, F>
+where
+    F: NorFlash,
+{
+    /// Create a provider for a single flash.
+    pub fn new(flash: &'a mut F) -> Self {
+        Self { flash }
+    }
+}
+
+impl<'a, F> FlashConfig for SingleFlashConfig<'a, F>
+where
+    F: NorFlash,
+{
+    type STATE = F;
+    type ACTIVE = F;
+    type DFU = F;
+
+    fn active(&mut self) -> &mut Self::STATE {
+        self.flash
+    }
+    fn dfu(&mut self) -> &mut Self::ACTIVE {
+        self.flash
+    }
+    fn state(&mut self) -> &mut Self::DFU {
+        self.flash
+    }
+}
+
+/// Convenience flash provider that uses separate flash instances for each partition.
+pub struct MultiFlashConfig<'a, ACTIVE, STATE, DFU>
+where
+    ACTIVE: NorFlash,
+    STATE: NorFlash,
+    DFU: NorFlash,
+{
+    active: &'a mut ACTIVE,
+    state: &'a mut STATE,
+    dfu: &'a mut DFU,
+}
+
+impl<'a, ACTIVE, STATE, DFU> MultiFlashConfig<'a, ACTIVE, STATE, DFU>
+where
+    ACTIVE: NorFlash,
+    STATE: NorFlash,
+    DFU: NorFlash,
+{
+    /// Create a new flash provider with separate configuration for all three partitions.
+    pub fn new(active: &'a mut ACTIVE, state: &'a mut STATE, dfu: &'a mut DFU) -> Self {
+        Self { active, state, dfu }
+    }
+}
+
+impl<'a, ACTIVE, STATE, DFU> FlashConfig for MultiFlashConfig<'a, ACTIVE, STATE, DFU>
+where
+    ACTIVE: NorFlash,
+    STATE: NorFlash,
+    DFU: NorFlash,
+{
+    type STATE = STATE;
+    type ACTIVE = ACTIVE;
+    type DFU = DFU;
+
+    fn active(&mut self) -> &mut Self::ACTIVE {
+        self.active
+    }
+    fn dfu(&mut self) -> &mut Self::DFU {
+        self.dfu
+    }
+    fn state(&mut self) -> &mut Self::STATE {
+        self.state
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[should_panic]
+    fn test_range_asserts() {
+        const ACTIVE: Partition = Partition::new(4096, 4194304);
+        const DFU: Partition = Partition::new(4194304, 2 * 4194304);
+        const STATE: Partition = Partition::new(0, 4096);
+        assert_partitions(ACTIVE, DFU, STATE, 4096, 4);
+    }
+}
diff --git a/embassy-boot/boot/src/digest_adapters/ed25519_dalek.rs b/embassy-boot/boot/src/digest_adapters/ed25519_dalek.rs
new file mode 100644
index 000000000..a184d1c51
--- /dev/null
+++ b/embassy-boot/boot/src/digest_adapters/ed25519_dalek.rs
@@ -0,0 +1,30 @@
+use digest::typenum::U64;
+use digest::{FixedOutput, HashMarker, OutputSizeUser, Update};
+use ed25519_dalek::Digest as _;
+
+pub struct Sha512(ed25519_dalek::Sha512);
+
+impl Default for Sha512 {
+    fn default() -> Self {
+        Self(ed25519_dalek::Sha512::new())
+    }
+}
+
+impl Update for Sha512 {
+    fn update(&mut self, data: &[u8]) {
+        self.0.update(data)
+    }
+}
+
+impl FixedOutput for Sha512 {
+    fn finalize_into(self, out: &mut digest::Output<Self>) {
+        let result = self.0.finalize();
+        out.as_mut_slice().copy_from_slice(result.as_slice())
+    }
+}
+
+impl OutputSizeUser for Sha512 {
+    type OutputSize = U64;
+}
+
+impl HashMarker for Sha512 {}
diff --git a/embassy-boot/boot/src/digest_adapters/mod.rs b/embassy-boot/boot/src/digest_adapters/mod.rs
new file mode 100644
index 000000000..9b4b4b60c
--- /dev/null
+++ b/embassy-boot/boot/src/digest_adapters/mod.rs
@@ -0,0 +1,5 @@
+#[cfg(feature = "ed25519-dalek")]
+pub(crate) mod ed25519_dalek;
+
+#[cfg(feature = "ed25519-salty")]
+pub(crate) mod salty;
diff --git a/embassy-boot/boot/src/digest_adapters/salty.rs b/embassy-boot/boot/src/digest_adapters/salty.rs
new file mode 100644
index 000000000..2b5dcf3af
--- /dev/null
+++ b/embassy-boot/boot/src/digest_adapters/salty.rs
@@ -0,0 +1,29 @@
+use digest::typenum::U64;
+use digest::{FixedOutput, HashMarker, OutputSizeUser, Update};
+
+pub struct Sha512(salty::Sha512);
+
+impl Default for Sha512 {
+    fn default() -> Self {
+        Self(salty::Sha512::new())
+    }
+}
+
+impl Update for Sha512 {
+    fn update(&mut self, data: &[u8]) {
+        self.0.update(data)
+    }
+}
+
+impl FixedOutput for Sha512 {
+    fn finalize_into(self, out: &mut digest::Output<Self>) {
+        let result = self.0.finalize();
+        out.as_mut_slice().copy_from_slice(result.as_slice())
+    }
+}
+
+impl OutputSizeUser for Sha512 {
+    type OutputSize = U64;
+}
+
+impl HashMarker for Sha512 {}
diff --git a/embassy-boot/boot/src/firmware_updater.rs b/embassy-boot/boot/src/firmware_updater.rs
new file mode 100644
index 000000000..a2f822f4a
--- /dev/null
+++ b/embassy-boot/boot/src/firmware_updater.rs
@@ -0,0 +1,534 @@
+use digest::Digest;
+use embedded_storage::nor_flash::{NorFlash, NorFlashError, NorFlashErrorKind};
+use embedded_storage_async::nor_flash::NorFlash as AsyncNorFlash;
+
+use crate::{Partition, State, BOOT_MAGIC, SWAP_MAGIC};
+
+/// Errors returned by FirmwareUpdater
+#[derive(Debug)]
+pub enum FirmwareUpdaterError {
+    /// Error from flash.
+    Flash(NorFlashErrorKind),
+    /// Signature errors.
+    Signature(signature::Error),
+}
+
+#[cfg(feature = "defmt")]
+impl defmt::Format for FirmwareUpdaterError {
+    fn format(&self, fmt: defmt::Formatter) {
+        match self {
+            FirmwareUpdaterError::Flash(_) => defmt::write!(fmt, "FirmwareUpdaterError::Flash(_)"),
+            FirmwareUpdaterError::Signature(_) => defmt::write!(fmt, "FirmwareUpdaterError::Signature(_)"),
+        }
+    }
+}
+
+impl<E> From<E> for FirmwareUpdaterError
+where
+    E: NorFlashError,
+{
+    fn from(error: E) -> Self {
+        FirmwareUpdaterError::Flash(error.kind())
+    }
+}
+
+/// FirmwareUpdater is an application API for interacting with the BootLoader without the ability to
+/// 'mess up' the internal bootloader state
+pub struct FirmwareUpdater {
+    state: Partition,
+    dfu: Partition,
+}
+
+impl Default for FirmwareUpdater {
+    fn default() -> Self {
+        extern "C" {
+            static __bootloader_state_start: u32;
+            static __bootloader_state_end: u32;
+            static __bootloader_dfu_start: u32;
+            static __bootloader_dfu_end: u32;
+        }
+
+        let dfu = unsafe {
+            Partition::new(
+                &__bootloader_dfu_start as *const u32 as u32,
+                &__bootloader_dfu_end as *const u32 as u32,
+            )
+        };
+        let state = unsafe {
+            Partition::new(
+                &__bootloader_state_start as *const u32 as u32,
+                &__bootloader_state_end as *const u32 as u32,
+            )
+        };
+
+        trace!("DFU: 0x{:x} - 0x{:x}", dfu.from, dfu.to);
+        trace!("STATE: 0x{:x} - 0x{:x}", state.from, state.to);
+        FirmwareUpdater::new(dfu, state)
+    }
+}
+
+impl FirmwareUpdater {
+    /// Create a firmware updater instance with partition ranges for the update and state partitions.
+    pub const fn new(dfu: Partition, state: Partition) -> Self {
+        Self { dfu, state }
+    }
+
+    /// Obtain the current state.
+    ///
+    /// This is useful to check if the bootloader has just done a swap, in order
+    /// to do verifications and self-tests of the new image before calling
+    /// `mark_booted`.
+    pub async fn get_state<F: AsyncNorFlash>(
+        &mut self,
+        state_flash: &mut F,
+        aligned: &mut [u8],
+    ) -> Result<State, FirmwareUpdaterError> {
+        self.state.read(state_flash, 0, aligned).await?;
+
+        if !aligned.iter().any(|&b| b != SWAP_MAGIC) {
+            Ok(State::Swap)
+        } else {
+            Ok(State::Boot)
+        }
+    }
+
+    /// Verify the DFU given a public key. If there is an error then DO NOT
+    /// proceed with updating the firmware as it must be signed with a
+    /// corresponding private key (otherwise it could be malicious firmware).
+    ///
+    /// Mark to trigger firmware swap on next boot if verify suceeds.
+    ///
+    /// If the "ed25519-salty" feature is set (or another similar feature) then the signature is expected to have
+    /// been generated from a SHA-512 digest of the firmware bytes.
+    ///
+    /// If no signature feature is set then this method will always return a
+    /// signature error.
+    ///
+    /// # Safety
+    ///
+    /// The `_aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being read from
+    /// and written to.
+    #[cfg(feature = "_verify")]
+    pub async fn verify_and_mark_updated<F: AsyncNorFlash>(
+        &mut self,
+        _state_and_dfu_flash: &mut F,
+        _public_key: &[u8],
+        _signature: &[u8],
+        _update_len: u32,
+        _aligned: &mut [u8],
+    ) -> Result<(), FirmwareUpdaterError> {
+        assert_eq!(_aligned.len(), F::WRITE_SIZE);
+        assert!(_update_len <= self.dfu.size());
+
+        #[cfg(feature = "ed25519-dalek")]
+        {
+            use ed25519_dalek::{PublicKey, Signature, SignatureError, Verifier};
+
+            use crate::digest_adapters::ed25519_dalek::Sha512;
+
+            let into_signature_error = |e: SignatureError| FirmwareUpdaterError::Signature(e.into());
+
+            let public_key = PublicKey::from_bytes(_public_key).map_err(into_signature_error)?;
+            let signature = Signature::from_bytes(_signature).map_err(into_signature_error)?;
+
+            let mut message = [0; 64];
+            self.hash::<_, Sha512>(_state_and_dfu_flash, _update_len, _aligned, &mut message)
+                .await?;
+
+            public_key.verify(&message, &signature).map_err(into_signature_error)?
+        }
+        #[cfg(feature = "ed25519-salty")]
+        {
+            use salty::constants::{PUBLICKEY_SERIALIZED_LENGTH, SIGNATURE_SERIALIZED_LENGTH};
+            use salty::{PublicKey, Signature};
+
+            use crate::digest_adapters::salty::Sha512;
+
+            fn into_signature_error<E>(_: E) -> FirmwareUpdaterError {
+                FirmwareUpdaterError::Signature(signature::Error::default())
+            }
+
+            let public_key: [u8; PUBLICKEY_SERIALIZED_LENGTH] = _public_key.try_into().map_err(into_signature_error)?;
+            let public_key = PublicKey::try_from(&public_key).map_err(into_signature_error)?;
+            let signature: [u8; SIGNATURE_SERIALIZED_LENGTH] = _signature.try_into().map_err(into_signature_error)?;
+            let signature = Signature::try_from(&signature).map_err(into_signature_error)?;
+
+            let mut message = [0; 64];
+            self.hash::<_, Sha512>(_state_and_dfu_flash, _update_len, _aligned, &mut message)
+                .await?;
+
+            let r = public_key.verify(&message, &signature);
+            trace!(
+                "Verifying with public key {}, signature {} and message {} yields ok: {}",
+                public_key.to_bytes(),
+                signature.to_bytes(),
+                message,
+                r.is_ok()
+            );
+            r.map_err(into_signature_error)?
+        }
+
+        self.set_magic(_aligned, SWAP_MAGIC, _state_and_dfu_flash).await
+    }
+
+    /// Verify the update in DFU with any digest.
+    pub async fn hash<F: AsyncNorFlash, D: Digest>(
+        &mut self,
+        dfu_flash: &mut F,
+        update_len: u32,
+        chunk_buf: &mut [u8],
+        output: &mut [u8],
+    ) -> Result<(), FirmwareUpdaterError> {
+        let mut digest = D::new();
+        for offset in (0..update_len).step_by(chunk_buf.len()) {
+            self.dfu.read(dfu_flash, offset, chunk_buf).await?;
+            let len = core::cmp::min((update_len - offset) as usize, chunk_buf.len());
+            digest.update(&chunk_buf[..len]);
+        }
+        output.copy_from_slice(digest.finalize().as_slice());
+        Ok(())
+    }
+
+    /// Mark to trigger firmware swap on next boot.
+    ///
+    /// # Safety
+    ///
+    /// The `aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being written to.
+    #[cfg(not(feature = "_verify"))]
+    pub async fn mark_updated<F: AsyncNorFlash>(
+        &mut self,
+        state_flash: &mut F,
+        aligned: &mut [u8],
+    ) -> Result<(), FirmwareUpdaterError> {
+        assert_eq!(aligned.len(), F::WRITE_SIZE);
+        self.set_magic(aligned, SWAP_MAGIC, state_flash).await
+    }
+
+    /// Mark firmware boot successful and stop rollback on reset.
+    ///
+    /// # Safety
+    ///
+    /// The `aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being written to.
+    pub async fn mark_booted<F: AsyncNorFlash>(
+        &mut self,
+        state_flash: &mut F,
+        aligned: &mut [u8],
+    ) -> Result<(), FirmwareUpdaterError> {
+        assert_eq!(aligned.len(), F::WRITE_SIZE);
+        self.set_magic(aligned, BOOT_MAGIC, state_flash).await
+    }
+
+    async fn set_magic<F: AsyncNorFlash>(
+        &mut self,
+        aligned: &mut [u8],
+        magic: u8,
+        state_flash: &mut F,
+    ) -> Result<(), FirmwareUpdaterError> {
+        self.state.read(state_flash, 0, aligned).await?;
+
+        if aligned.iter().any(|&b| b != magic) {
+            // Read progress validity
+            self.state.read(state_flash, F::WRITE_SIZE as u32, aligned).await?;
+
+            // FIXME: Do not make this assumption.
+            const STATE_ERASE_VALUE: u8 = 0xFF;
+
+            if aligned.iter().any(|&b| b != STATE_ERASE_VALUE) {
+                // The current progress validity marker is invalid
+            } else {
+                // Invalidate progress
+                aligned.fill(!STATE_ERASE_VALUE);
+                self.state.write(state_flash, F::WRITE_SIZE as u32, aligned).await?;
+            }
+
+            // Clear magic and progress
+            self.state.wipe(state_flash).await?;
+
+            // Set magic
+            aligned.fill(magic);
+            self.state.write(state_flash, 0, aligned).await?;
+        }
+        Ok(())
+    }
+
+    /// Write data to a flash page.
+    ///
+    /// The buffer must follow alignment requirements of the target flash and a multiple of page size big.
+    ///
+    /// # Safety
+    ///
+    /// Failing to meet alignment and size requirements may result in a panic.
+    pub async fn write_firmware<F: AsyncNorFlash>(
+        &mut self,
+        offset: usize,
+        data: &[u8],
+        dfu_flash: &mut F,
+    ) -> Result<(), FirmwareUpdaterError> {
+        assert!(data.len() >= F::ERASE_SIZE);
+
+        self.dfu
+            .erase(dfu_flash, offset as u32, (offset + data.len()) as u32)
+            .await?;
+
+        self.dfu.write(dfu_flash, offset as u32, data).await?;
+
+        Ok(())
+    }
+
+    /// Prepare for an incoming DFU update by erasing the entire DFU area and
+    /// returning its `Partition`.
+    ///
+    /// Using this instead of `write_firmware` allows for an optimized API in
+    /// exchange for added complexity.
+    pub async fn prepare_update<F: AsyncNorFlash>(
+        &mut self,
+        dfu_flash: &mut F,
+    ) -> Result<Partition, FirmwareUpdaterError> {
+        self.dfu.wipe(dfu_flash).await?;
+
+        Ok(self.dfu)
+    }
+
+    //
+    // Blocking API
+    //
+
+    /// Obtain the current state.
+    ///
+    /// This is useful to check if the bootloader has just done a swap, in order
+    /// to do verifications and self-tests of the new image before calling
+    /// `mark_booted`.
+    pub fn get_state_blocking<F: NorFlash>(
+        &mut self,
+        state_flash: &mut F,
+        aligned: &mut [u8],
+    ) -> Result<State, FirmwareUpdaterError> {
+        self.state.read_blocking(state_flash, 0, aligned)?;
+
+        if !aligned.iter().any(|&b| b != SWAP_MAGIC) {
+            Ok(State::Swap)
+        } else {
+            Ok(State::Boot)
+        }
+    }
+
+    /// Verify the DFU given a public key. If there is an error then DO NOT
+    /// proceed with updating the firmware as it must be signed with a
+    /// corresponding private key (otherwise it could be malicious firmware).
+    ///
+    /// Mark to trigger firmware swap on next boot if verify suceeds.
+    ///
+    /// If the "ed25519-salty" feature is set (or another similar feature) then the signature is expected to have
+    /// been generated from a SHA-512 digest of the firmware bytes.
+    ///
+    /// If no signature feature is set then this method will always return a
+    /// signature error.
+    ///
+    /// # Safety
+    ///
+    /// The `_aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being read from
+    /// and written to.
+    #[cfg(feature = "_verify")]
+    pub fn verify_and_mark_updated_blocking<F: NorFlash>(
+        &mut self,
+        _state_and_dfu_flash: &mut F,
+        _public_key: &[u8],
+        _signature: &[u8],
+        _update_len: u32,
+        _aligned: &mut [u8],
+    ) -> Result<(), FirmwareUpdaterError> {
+        assert_eq!(_aligned.len(), F::WRITE_SIZE);
+        assert!(_update_len <= self.dfu.size());
+
+        #[cfg(feature = "ed25519-dalek")]
+        {
+            use ed25519_dalek::{PublicKey, Signature, SignatureError, Verifier};
+
+            use crate::digest_adapters::ed25519_dalek::Sha512;
+
+            let into_signature_error = |e: SignatureError| FirmwareUpdaterError::Signature(e.into());
+
+            let public_key = PublicKey::from_bytes(_public_key).map_err(into_signature_error)?;
+            let signature = Signature::from_bytes(_signature).map_err(into_signature_error)?;
+
+            let mut message = [0; 64];
+            self.hash_blocking::<_, Sha512>(_state_and_dfu_flash, _update_len, _aligned, &mut message)?;
+
+            public_key.verify(&message, &signature).map_err(into_signature_error)?
+        }
+        #[cfg(feature = "ed25519-salty")]
+        {
+            use salty::constants::{PUBLICKEY_SERIALIZED_LENGTH, SIGNATURE_SERIALIZED_LENGTH};
+            use salty::{PublicKey, Signature};
+
+            use crate::digest_adapters::salty::Sha512;
+
+            fn into_signature_error<E>(_: E) -> FirmwareUpdaterError {
+                FirmwareUpdaterError::Signature(signature::Error::default())
+            }
+
+            let public_key: [u8; PUBLICKEY_SERIALIZED_LENGTH] = _public_key.try_into().map_err(into_signature_error)?;
+            let public_key = PublicKey::try_from(&public_key).map_err(into_signature_error)?;
+            let signature: [u8; SIGNATURE_SERIALIZED_LENGTH] = _signature.try_into().map_err(into_signature_error)?;
+            let signature = Signature::try_from(&signature).map_err(into_signature_error)?;
+
+            let mut message = [0; 64];
+            self.hash_blocking::<_, Sha512>(_state_and_dfu_flash, _update_len, _aligned, &mut message)?;
+
+            let r = public_key.verify(&message, &signature);
+            trace!(
+                "Verifying with public key {}, signature {} and message {} yields ok: {}",
+                public_key.to_bytes(),
+                signature.to_bytes(),
+                message,
+                r.is_ok()
+            );
+            r.map_err(into_signature_error)?
+        }
+
+        self.set_magic_blocking(_aligned, SWAP_MAGIC, _state_and_dfu_flash)
+    }
+
+    /// Verify the update in DFU with any digest.
+    pub fn hash_blocking<F: NorFlash, D: Digest>(
+        &mut self,
+        dfu_flash: &mut F,
+        update_len: u32,
+        chunk_buf: &mut [u8],
+        output: &mut [u8],
+    ) -> Result<(), FirmwareUpdaterError> {
+        let mut digest = D::new();
+        for offset in (0..update_len).step_by(chunk_buf.len()) {
+            self.dfu.read_blocking(dfu_flash, offset, chunk_buf)?;
+            let len = core::cmp::min((update_len - offset) as usize, chunk_buf.len());
+            digest.update(&chunk_buf[..len]);
+        }
+        output.copy_from_slice(digest.finalize().as_slice());
+        Ok(())
+    }
+
+    /// Mark to trigger firmware swap on next boot.
+    ///
+    /// # Safety
+    ///
+    /// The `aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being written to.
+    #[cfg(not(feature = "_verify"))]
+    pub fn mark_updated_blocking<F: NorFlash>(
+        &mut self,
+        state_flash: &mut F,
+        aligned: &mut [u8],
+    ) -> Result<(), FirmwareUpdaterError> {
+        assert_eq!(aligned.len(), F::WRITE_SIZE);
+        self.set_magic_blocking(aligned, SWAP_MAGIC, state_flash)
+    }
+
+    /// Mark firmware boot successful and stop rollback on reset.
+    ///
+    /// # Safety
+    ///
+    /// The `aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being written to.
+    pub fn mark_booted_blocking<F: NorFlash>(
+        &mut self,
+        state_flash: &mut F,
+        aligned: &mut [u8],
+    ) -> Result<(), FirmwareUpdaterError> {
+        assert_eq!(aligned.len(), F::WRITE_SIZE);
+        self.set_magic_blocking(aligned, BOOT_MAGIC, state_flash)
+    }
+
+    fn set_magic_blocking<F: NorFlash>(
+        &mut self,
+        aligned: &mut [u8],
+        magic: u8,
+        state_flash: &mut F,
+    ) -> Result<(), FirmwareUpdaterError> {
+        self.state.read_blocking(state_flash, 0, aligned)?;
+
+        if aligned.iter().any(|&b| b != magic) {
+            // Read progress validity
+            self.state.read_blocking(state_flash, F::WRITE_SIZE as u32, aligned)?;
+
+            // FIXME: Do not make this assumption.
+            const STATE_ERASE_VALUE: u8 = 0xFF;
+
+            if aligned.iter().any(|&b| b != STATE_ERASE_VALUE) {
+                // The current progress validity marker is invalid
+            } else {
+                // Invalidate progress
+                aligned.fill(!STATE_ERASE_VALUE);
+                self.state.write_blocking(state_flash, F::WRITE_SIZE as u32, aligned)?;
+            }
+
+            // Clear magic and progress
+            self.state.wipe_blocking(state_flash)?;
+
+            // Set magic
+            aligned.fill(magic);
+            self.state.write_blocking(state_flash, 0, aligned)?;
+        }
+        Ok(())
+    }
+
+    /// Write data to a flash page.
+    ///
+    /// The buffer must follow alignment requirements of the target flash and a multiple of page size big.
+    ///
+    /// # Safety
+    ///
+    /// Failing to meet alignment and size requirements may result in a panic.
+    pub fn write_firmware_blocking<F: NorFlash>(
+        &mut self,
+        offset: usize,
+        data: &[u8],
+        dfu_flash: &mut F,
+    ) -> Result<(), FirmwareUpdaterError> {
+        assert!(data.len() >= F::ERASE_SIZE);
+
+        self.dfu
+            .erase_blocking(dfu_flash, offset as u32, (offset + data.len()) as u32)?;
+
+        self.dfu.write_blocking(dfu_flash, offset as u32, data)?;
+
+        Ok(())
+    }
+
+    /// Prepare for an incoming DFU update by erasing the entire DFU area and
+    /// returning its `Partition`.
+    ///
+    /// Using this instead of `write_firmware_blocking` allows for an optimized
+    /// API in exchange for added complexity.
+    pub fn prepare_update_blocking<F: NorFlash>(&mut self, flash: &mut F) -> Result<Partition, FirmwareUpdaterError> {
+        self.dfu.wipe_blocking(flash)?;
+
+        Ok(self.dfu)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use futures::executor::block_on;
+    use sha1::{Digest, Sha1};
+
+    use super::*;
+    use crate::mem_flash::MemFlash;
+
+    #[test]
+    fn can_verify_sha1() {
+        const STATE: Partition = Partition::new(0, 4096);
+        const DFU: Partition = Partition::new(65536, 131072);
+
+        let mut flash = MemFlash::<131072, 4096, 8>::default();
+
+        let update = [0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66];
+        let mut to_write = [0; 4096];
+        to_write[..7].copy_from_slice(update.as_slice());
+
+        let mut updater = FirmwareUpdater::new(DFU, STATE);
+        block_on(updater.write_firmware(0, to_write.as_slice(), &mut flash)).unwrap();
+        let mut chunk_buf = [0; 2];
+        let mut hash = [0; 20];
+        block_on(updater.hash::<_, Sha1>(&mut flash, update.len() as u32, &mut chunk_buf, &mut hash)).unwrap();
+
+        assert_eq!(Sha1::digest(update).as_slice(), hash);
+    }
+}
diff --git a/embassy-boot/boot/src/lib.rs b/embassy-boot/boot/src/lib.rs
index 7ce0c664a..e268d8883 100644
--- a/embassy-boot/boot/src/lib.rs
+++ b/embassy-boot/boot/src/lib.rs
@@ -5,36 +5,18 @@
 #![doc = include_str!("../README.md")]
 mod fmt;
 
-use embedded_storage::nor_flash::{ErrorType, NorFlash, NorFlashError, NorFlashErrorKind, ReadNorFlash};
+mod boot_loader;
+mod digest_adapters;
+mod firmware_updater;
+mod mem_flash;
+mod partition;
 
-#[cfg(feature = "nightly")]
-use embedded_storage_async::nor_flash::NorFlash as AsyncNorFlash;
+pub use boot_loader::{BootError, BootFlash, BootLoader, FlashConfig, MultiFlashConfig, SingleFlashConfig};
+pub use firmware_updater::{FirmwareUpdater, FirmwareUpdaterError};
+pub use partition::Partition;
 
-const BOOT_MAGIC: u8 = 0xD0;
-const SWAP_MAGIC: u8 = 0xF0;
-
-/// A region in flash used by the bootloader.
-#[derive(Copy, Clone, Debug)]
-#[cfg_attr(feature = "defmt", derive(defmt::Format))]
-pub struct Partition {
-    /// Start of the flash region.
-    pub from: usize,
-    /// End of the flash region.
-    pub to: usize,
-}
-
-impl Partition {
-    /// Create a new partition with the provided range
-    pub const fn new(from: usize, to: usize) -> Self {
-        Self { from, to }
-    }
-
-    /// Return the length of the partition
-    #[allow(clippy::len_without_is_empty)]
-    pub const fn len(&self) -> usize {
-        self.to - self.from
-    }
-}
+pub(crate) const BOOT_MAGIC: u8 = 0xD0;
+pub(crate) const SWAP_MAGIC: u8 = 0xF0;
 
 /// The state of the bootloader after running prepare.
 #[derive(PartialEq, Eq, Debug)]
@@ -46,34 +28,6 @@ pub enum State {
     Swap,
 }
 
-/// Errors returned by bootloader
-#[derive(PartialEq, Eq, Debug)]
-pub enum BootError {
-    /// Error from flash.
-    Flash(NorFlashErrorKind),
-    /// Invalid bootloader magic
-    BadMagic,
-}
-
-#[cfg(feature = "defmt")]
-impl defmt::Format for BootError {
-    fn format(&self, fmt: defmt::Formatter) {
-        match self {
-            BootError::Flash(_) => defmt::write!(fmt, "BootError::Flash(_)"),
-            BootError::BadMagic => defmt::write!(fmt, "BootError::BadMagic"),
-        }
-    }
-}
-
-impl<E> From<E> for BootError
-where
-    E: NorFlashError,
-{
-    fn from(error: E) -> Self {
-        BootError::Flash(error.kind())
-    }
-}
-
 /// Buffer aligned to 32 byte boundary, largest known alignment requirement for embassy-boot.
 #[repr(align(32))]
 pub struct AlignedBuffer<const N: usize>(pub [u8; N]);
@@ -90,1128 +44,12 @@ impl<const N: usize> AsMut<[u8]> for AlignedBuffer<N> {
     }
 }
 
-/// Extension of the embedded-storage flash type information with block size and erase value.
-pub trait Flash: NorFlash + ReadNorFlash {
-    /// The block size that should be used when writing to flash. For most builtin flashes, this is the same as the erase
-    /// size of the flash, but for external QSPI flash modules, this can be lower.
-    const BLOCK_SIZE: usize;
-    /// The erase value of the flash. Typically the default of 0xFF is used, but some flashes use a different value.
-    const ERASE_VALUE: u8 = 0xFF;
-}
-
-/// Trait defining the flash handles used for active and DFU partition
-pub trait FlashConfig {
-    /// Flash type used for the state partition.
-    type STATE: Flash;
-    /// Flash type used for the active partition.
-    type ACTIVE: Flash;
-    /// Flash type used for the dfu partition.
-    type DFU: Flash;
-
-    /// Return flash instance used to write/read to/from active partition.
-    fn active(&mut self) -> &mut Self::ACTIVE;
-    /// Return flash instance used to write/read to/from dfu partition.
-    fn dfu(&mut self) -> &mut Self::DFU;
-    /// Return flash instance used to write/read to/from bootloader state.
-    fn state(&mut self) -> &mut Self::STATE;
-}
-
-/// BootLoader works with any flash implementing embedded_storage and can also work with
-/// different page sizes and flash write sizes.
-pub struct BootLoader {
-    // Page with current state of bootloader. The state partition has the following format:
-    // | Range          | Description                                                                      |
-    // | 0 - WRITE_SIZE | Magic indicating bootloader state. BOOT_MAGIC means boot, SWAP_MAGIC means swap. |
-    // | WRITE_SIZE - N | Progress index used while swapping or reverting                                  |
-    state: Partition,
-    // Location of the partition which will be booted from
-    active: Partition,
-    // Location of the partition which will be swapped in when requested
-    dfu: Partition,
-}
-
-impl BootLoader {
-    /// Create a new instance of a bootloader with the given partitions.
-    ///
-    /// - All partitions must be aligned with the PAGE_SIZE const generic parameter.
-    /// - The dfu partition must be at least PAGE_SIZE bigger than the active partition.
-    pub fn new(active: Partition, dfu: Partition, state: Partition) -> Self {
-        Self { active, dfu, state }
-    }
-
-    /// Return the boot address for the active partition.
-    pub fn boot_address(&self) -> usize {
-        self.active.from
-    }
-
-    /// Perform necessary boot preparations like swapping images.
-    ///
-    /// The DFU partition is assumed to be 1 page bigger than the active partition for the swap
-    /// algorithm to work correctly.
-    ///
-    /// SWAPPING
-    ///
-    /// Assume a flash size of 3 pages for the active partition, and 4 pages for the DFU partition.
-    /// The swap index contains the copy progress, as to allow continuation of the copy process on
-    /// power failure. The index counter is represented within 1 or more pages (depending on total
-    /// flash size), where a page X is considered swapped if index at location (X + WRITE_SIZE)
-    /// contains a zero value. This ensures that index updates can be performed atomically and
-    /// avoid a situation where the wrong index value is set (page write size is "atomic").
-    ///
-    /// +-----------+------------+--------+--------+--------+--------+
-    /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
-    /// +-----------+------------+--------+--------+--------+--------+
-    /// |    Active |          0 |      1 |      2 |      3 |      - |
-    /// |       DFU |          0 |      3 |      2 |      1 |      X |
-    /// +-----------+------------+--------+--------+--------+--------+
-    ///
-    /// The algorithm starts by copying 'backwards', and after the first step, the layout is
-    /// as follows:
-    ///
-    /// +-----------+------------+--------+--------+--------+--------+
-    /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
-    /// +-----------+------------+--------+--------+--------+--------+
-    /// |    Active |          1 |      1 |      2 |      1 |      - |
-    /// |       DFU |          1 |      3 |      2 |      1 |      3 |
-    /// +-----------+------------+--------+--------+--------+--------+
-    ///
-    /// The next iteration performs the same steps
-    ///
-    /// +-----------+------------+--------+--------+--------+--------+
-    /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
-    /// +-----------+------------+--------+--------+--------+--------+
-    /// |    Active |          2 |      1 |      2 |      1 |      - |
-    /// |       DFU |          2 |      3 |      2 |      2 |      3 |
-    /// +-----------+------------+--------+--------+--------+--------+
-    ///
-    /// And again until we're done
-    ///
-    /// +-----------+------------+--------+--------+--------+--------+
-    /// | Partition | Swap Index | Page 0 | Page 1 | Page 3 | Page 4 |
-    /// +-----------+------------+--------+--------+--------+--------+
-    /// |    Active |          3 |      3 |      2 |      1 |      - |
-    /// |       DFU |          3 |      3 |      1 |      2 |      3 |
-    /// +-----------+------------+--------+--------+--------+--------+
-    ///
-    /// REVERTING
-    ///
-    /// The reverting algorithm uses the swap index to discover that images were swapped, but that
-    /// the application failed to mark the boot successful. In this case, the revert algorithm will
-    /// run.
-    ///
-    /// The revert index is located separately from the swap index, to ensure that revert can continue
-    /// on power failure.
-    ///
-    /// The revert algorithm works forwards, by starting copying into the 'unused' DFU page at the start.
-    ///
-    /// +-----------+--------------+--------+--------+--------+--------+
-    /// | Partition | Revert Index | Page 0 | Page 1 | Page 3 | Page 4 |
-    //*/
-    /// +-----------+--------------+--------+--------+--------+--------+
-    /// |    Active |            3 |      1 |      2 |      1 |      - |
-    /// |       DFU |            3 |      3 |      1 |      2 |      3 |
-    /// +-----------+--------------+--------+--------+--------+--------+
-    ///
-    ///
-    /// +-----------+--------------+--------+--------+--------+--------+
-    /// | Partition | Revert Index | Page 0 | Page 1 | Page 3 | Page 4 |
-    /// +-----------+--------------+--------+--------+--------+--------+
-    /// |    Active |            3 |      1 |      2 |      1 |      - |
-    /// |       DFU |            3 |      3 |      2 |      2 |      3 |
-    /// +-----------+--------------+--------+--------+--------+--------+
-    ///
-    /// +-----------+--------------+--------+--------+--------+--------+
-    /// | Partition | Revert Index | Page 0 | Page 1 | Page 3 | Page 4 |
-    /// +-----------+--------------+--------+--------+--------+--------+
-    /// |    Active |            3 |      1 |      2 |      3 |      - |
-    /// |       DFU |            3 |      3 |      2 |      1 |      3 |
-    /// +-----------+--------------+--------+--------+--------+--------+
-    ///
-    pub fn prepare_boot<P: FlashConfig>(
-        &mut self,
-        p: &mut P,
-        magic: &mut [u8],
-        page: &mut [u8],
-    ) -> Result<State, BootError> {
-        // Ensure we have enough progress pages to store copy progress
-        assert_partitions(self.active, self.dfu, self.state, page.len(), P::STATE::WRITE_SIZE);
-        assert_eq!(magic.len(), P::STATE::WRITE_SIZE);
-
-        // Copy contents from partition N to active
-        let state = self.read_state(p, magic)?;
-        if state == State::Swap {
-            //
-            // Check if we already swapped. If we're in the swap state, this means we should revert
-            // since the app has failed to mark boot as successful
-            //
-            if !self.is_swapped(p, magic, page)? {
-                trace!("Swapping");
-                self.swap(p, magic, page)?;
-                trace!("Swapping done");
-            } else {
-                trace!("Reverting");
-                self.revert(p, magic, page)?;
-
-                // Overwrite magic and reset progress
-                let fstate = p.state();
-                magic.fill(!P::STATE::ERASE_VALUE);
-                fstate.write(self.state.from as u32, magic)?;
-                fstate.erase(self.state.from as u32, self.state.to as u32)?;
-
-                magic.fill(BOOT_MAGIC);
-                fstate.write(self.state.from as u32, magic)?;
-            }
-        }
-        Ok(state)
-    }
-
-    fn is_swapped<P: FlashConfig>(&mut self, p: &mut P, magic: &mut [u8], page: &mut [u8]) -> Result<bool, BootError> {
-        let page_size = page.len();
-        let page_count = self.active.len() / page_size;
-        let progress = self.current_progress(p, magic)?;
-
-        Ok(progress >= page_count * 2)
-    }
-
-    fn current_progress<P: FlashConfig>(&mut self, config: &mut P, aligned: &mut [u8]) -> Result<usize, BootError> {
-        let write_size = aligned.len();
-        let max_index = ((self.state.len() - write_size) / write_size) - 1;
-        aligned.fill(!P::STATE::ERASE_VALUE);
-
-        let flash = config.state();
-        for i in 0..max_index {
-            flash.read((self.state.from + write_size + i * write_size) as u32, aligned)?;
-
-            if aligned.iter().any(|&b| b == P::STATE::ERASE_VALUE) {
-                return Ok(i);
-            }
-        }
-        Ok(max_index)
-    }
-
-    fn update_progress<P: FlashConfig>(&mut self, idx: usize, p: &mut P, magic: &mut [u8]) -> Result<(), BootError> {
-        let flash = p.state();
-        let write_size = magic.len();
-        let w = self.state.from + write_size + idx * write_size;
-
-        let aligned = magic;
-        aligned.fill(!P::STATE::ERASE_VALUE);
-        flash.write(w as u32, aligned)?;
-        Ok(())
-    }
-
-    fn active_addr(&self, n: usize, page_size: usize) -> usize {
-        self.active.from + n * page_size
-    }
-
-    fn dfu_addr(&self, n: usize, page_size: usize) -> usize {
-        self.dfu.from + n * page_size
-    }
-
-    fn copy_page_once_to_active<P: FlashConfig>(
-        &mut self,
-        idx: usize,
-        from_page: usize,
-        to_page: usize,
-        p: &mut P,
-        magic: &mut [u8],
-        page: &mut [u8],
-    ) -> Result<(), BootError> {
-        let buf = page;
-        if self.current_progress(p, magic)? <= idx {
-            let mut offset = from_page;
-            for chunk in buf.chunks_mut(P::DFU::BLOCK_SIZE) {
-                p.dfu().read(offset as u32, chunk)?;
-                offset += chunk.len();
-            }
-
-            p.active().erase(to_page as u32, (to_page + buf.len()) as u32)?;
-
-            let mut offset = to_page;
-            for chunk in buf.chunks(P::ACTIVE::BLOCK_SIZE) {
-                p.active().write(offset as u32, chunk)?;
-                offset += chunk.len();
-            }
-            self.update_progress(idx, p, magic)?;
-        }
-        Ok(())
-    }
-
-    fn copy_page_once_to_dfu<P: FlashConfig>(
-        &mut self,
-        idx: usize,
-        from_page: usize,
-        to_page: usize,
-        p: &mut P,
-        magic: &mut [u8],
-        page: &mut [u8],
-    ) -> Result<(), BootError> {
-        let buf = page;
-        if self.current_progress(p, magic)? <= idx {
-            let mut offset = from_page;
-            for chunk in buf.chunks_mut(P::ACTIVE::BLOCK_SIZE) {
-                p.active().read(offset as u32, chunk)?;
-                offset += chunk.len();
-            }
-
-            p.dfu().erase(to_page as u32, (to_page + buf.len()) as u32)?;
-
-            let mut offset = to_page;
-            for chunk in buf.chunks(P::DFU::BLOCK_SIZE) {
-                p.dfu().write(offset as u32, chunk)?;
-                offset += chunk.len();
-            }
-            self.update_progress(idx, p, magic)?;
-        }
-        Ok(())
-    }
-
-    fn swap<P: FlashConfig>(&mut self, p: &mut P, magic: &mut [u8], page: &mut [u8]) -> Result<(), BootError> {
-        let page_size = page.len();
-        let page_count = self.active.len() / page_size;
-        trace!("Page count: {}", page_count);
-        for page_num in 0..page_count {
-            trace!("COPY PAGE {}", page_num);
-            // Copy active page to the 'next' DFU page.
-            let active_page = self.active_addr(page_count - 1 - page_num, page_size);
-            let dfu_page = self.dfu_addr(page_count - page_num, page_size);
-            //trace!("Copy active {} to dfu {}", active_page, dfu_page);
-            self.copy_page_once_to_dfu(page_num * 2, active_page, dfu_page, p, magic, page)?;
-
-            // Copy DFU page to the active page
-            let active_page = self.active_addr(page_count - 1 - page_num, page_size);
-            let dfu_page = self.dfu_addr(page_count - 1 - page_num, page_size);
-            //trace!("Copy dfy {} to active {}", dfu_page, active_page);
-            self.copy_page_once_to_active(page_num * 2 + 1, dfu_page, active_page, p, magic, page)?;
-        }
-
-        Ok(())
-    }
-
-    fn revert<P: FlashConfig>(&mut self, p: &mut P, magic: &mut [u8], page: &mut [u8]) -> Result<(), BootError> {
-        let page_size = page.len();
-        let page_count = self.active.len() / page_size;
-        for page_num in 0..page_count {
-            // Copy the bad active page to the DFU page
-            let active_page = self.active_addr(page_num, page_size);
-            let dfu_page = self.dfu_addr(page_num, page_size);
-            self.copy_page_once_to_dfu(page_count * 2 + page_num * 2, active_page, dfu_page, p, magic, page)?;
-
-            // Copy the DFU page back to the active page
-            let active_page = self.active_addr(page_num, page_size);
-            let dfu_page = self.dfu_addr(page_num + 1, page_size);
-            self.copy_page_once_to_active(page_count * 2 + page_num * 2 + 1, dfu_page, active_page, p, magic, page)?;
-        }
-
-        Ok(())
-    }
-
-    fn read_state<P: FlashConfig>(&mut self, config: &mut P, magic: &mut [u8]) -> Result<State, BootError> {
-        let flash = config.state();
-        flash.read(self.state.from as u32, magic)?;
-
-        if !magic.iter().any(|&b| b != SWAP_MAGIC) {
-            Ok(State::Swap)
-        } else {
-            Ok(State::Boot)
-        }
-    }
-}
-
-fn assert_partitions(active: Partition, dfu: Partition, state: Partition, page_size: usize, write_size: usize) {
-    assert_eq!(active.len() % page_size, 0);
-    assert_eq!(dfu.len() % page_size, 0);
-    assert!(dfu.len() - active.len() >= page_size);
-    assert!(2 * (active.len() / page_size) <= (state.len() - write_size) / write_size);
-}
-
-/// Convenience provider that uses a single flash for all partitions.
-pub struct SingleFlashConfig<'a, F>
-where
-    F: Flash,
-{
-    flash: &'a mut F,
-}
-
-impl<'a, F> SingleFlashConfig<'a, F>
-where
-    F: Flash,
-{
-    /// Create a provider for a single flash.
-    pub fn new(flash: &'a mut F) -> Self {
-        Self { flash }
-    }
-}
-
-impl<'a, F> FlashConfig for SingleFlashConfig<'a, F>
-where
-    F: Flash,
-{
-    type STATE = F;
-    type ACTIVE = F;
-    type DFU = F;
-
-    fn active(&mut self) -> &mut Self::STATE {
-        self.flash
-    }
-    fn dfu(&mut self) -> &mut Self::ACTIVE {
-        self.flash
-    }
-    fn state(&mut self) -> &mut Self::DFU {
-        self.flash
-    }
-}
-
-/// A flash wrapper implementing the Flash and embedded_storage traits.
-pub struct BootFlash<F, const BLOCK_SIZE: usize, const ERASE_VALUE: u8 = 0xFF>
-where
-    F: NorFlash + ReadNorFlash,
-{
-    flash: F,
-}
-
-impl<F, const BLOCK_SIZE: usize, const ERASE_VALUE: u8> BootFlash<F, BLOCK_SIZE, ERASE_VALUE>
-where
-    F: NorFlash + ReadNorFlash,
-{
-    /// Create a new instance of a bootable flash
-    pub fn new(flash: F) -> Self {
-        Self { flash }
-    }
-}
-
-impl<F, const BLOCK_SIZE: usize, const ERASE_VALUE: u8> Flash for BootFlash<F, BLOCK_SIZE, ERASE_VALUE>
-where
-    F: NorFlash + ReadNorFlash,
-{
-    const BLOCK_SIZE: usize = BLOCK_SIZE;
-    const ERASE_VALUE: u8 = ERASE_VALUE;
-}
-
-impl<F, const BLOCK_SIZE: usize, const ERASE_VALUE: u8> ErrorType for BootFlash<F, BLOCK_SIZE, ERASE_VALUE>
-where
-    F: ReadNorFlash + NorFlash,
-{
-    type Error = F::Error;
-}
-
-impl<F, const BLOCK_SIZE: usize, const ERASE_VALUE: u8> NorFlash for BootFlash<F, BLOCK_SIZE, ERASE_VALUE>
-where
-    F: ReadNorFlash + NorFlash,
-{
-    const WRITE_SIZE: usize = F::WRITE_SIZE;
-    const ERASE_SIZE: usize = F::ERASE_SIZE;
-
-    fn erase(&mut self, from: u32, to: u32) -> Result<(), Self::Error> {
-        F::erase(&mut self.flash, from, to)
-    }
-
-    fn write(&mut self, offset: u32, bytes: &[u8]) -> Result<(), Self::Error> {
-        F::write(&mut self.flash, offset, bytes)
-    }
-}
-
-impl<F, const BLOCK_SIZE: usize, const ERASE_VALUE: u8> ReadNorFlash for BootFlash<F, BLOCK_SIZE, ERASE_VALUE>
-where
-    F: ReadNorFlash + NorFlash,
-{
-    const READ_SIZE: usize = F::READ_SIZE;
-
-    fn read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Self::Error> {
-        F::read(&mut self.flash, offset, bytes)
-    }
-
-    fn capacity(&self) -> usize {
-        F::capacity(&self.flash)
-    }
-}
-
-/// Convenience flash provider that uses separate flash instances for each partition.
-pub struct MultiFlashConfig<'a, ACTIVE, STATE, DFU>
-where
-    ACTIVE: Flash,
-    STATE: Flash,
-    DFU: Flash,
-{
-    active: &'a mut ACTIVE,
-    state: &'a mut STATE,
-    dfu: &'a mut DFU,
-}
-
-impl<'a, ACTIVE, STATE, DFU> MultiFlashConfig<'a, ACTIVE, STATE, DFU>
-where
-    ACTIVE: Flash,
-    STATE: Flash,
-    DFU: Flash,
-{
-    /// Create a new flash provider with separate configuration for all three partitions.
-    pub fn new(active: &'a mut ACTIVE, state: &'a mut STATE, dfu: &'a mut DFU) -> Self {
-        Self { active, state, dfu }
-    }
-}
-
-impl<'a, ACTIVE, STATE, DFU> FlashConfig for MultiFlashConfig<'a, ACTIVE, STATE, DFU>
-where
-    ACTIVE: Flash,
-    STATE: Flash,
-    DFU: Flash,
-{
-    type STATE = STATE;
-    type ACTIVE = ACTIVE;
-    type DFU = DFU;
-
-    fn active(&mut self) -> &mut Self::ACTIVE {
-        self.active
-    }
-    fn dfu(&mut self) -> &mut Self::DFU {
-        self.dfu
-    }
-    fn state(&mut self) -> &mut Self::STATE {
-        self.state
-    }
-}
-/// Errors returned by FirmwareUpdater
-#[derive(Debug)]
-pub enum FirmwareUpdaterError {
-    /// Error from flash.
-    Flash(NorFlashErrorKind),
-    /// Signature errors.
-    Signature(signature::Error),
-}
-
-#[cfg(feature = "defmt")]
-impl defmt::Format for FirmwareUpdaterError {
-    fn format(&self, fmt: defmt::Formatter) {
-        match self {
-            FirmwareUpdaterError::Flash(_) => defmt::write!(fmt, "FirmwareUpdaterError::Flash(_)"),
-            FirmwareUpdaterError::Signature(_) => defmt::write!(fmt, "FirmwareUpdaterError::Signature(_)"),
-        }
-    }
-}
-
-impl<E> From<E> for FirmwareUpdaterError
-where
-    E: NorFlashError,
-{
-    fn from(error: E) -> Self {
-        FirmwareUpdaterError::Flash(error.kind())
-    }
-}
-
-/// FirmwareUpdater is an application API for interacting with the BootLoader without the ability to
-/// 'mess up' the internal bootloader state
-pub struct FirmwareUpdater {
-    state: Partition,
-    dfu: Partition,
-}
-
-impl Default for FirmwareUpdater {
-    fn default() -> Self {
-        extern "C" {
-            static __bootloader_state_start: u32;
-            static __bootloader_state_end: u32;
-            static __bootloader_dfu_start: u32;
-            static __bootloader_dfu_end: u32;
-        }
-
-        let dfu = unsafe {
-            Partition::new(
-                &__bootloader_dfu_start as *const u32 as usize,
-                &__bootloader_dfu_end as *const u32 as usize,
-            )
-        };
-        let state = unsafe {
-            Partition::new(
-                &__bootloader_state_start as *const u32 as usize,
-                &__bootloader_state_end as *const u32 as usize,
-            )
-        };
-
-        trace!("DFU: 0x{:x} - 0x{:x}", dfu.from, dfu.to);
-        trace!("STATE: 0x{:x} - 0x{:x}", state.from, state.to);
-        FirmwareUpdater::new(dfu, state)
-    }
-}
-
-impl FirmwareUpdater {
-    /// Create a firmware updater instance with partition ranges for the update and state partitions.
-    pub const fn new(dfu: Partition, state: Partition) -> Self {
-        Self { dfu, state }
-    }
-
-    /// Return the length of the DFU area
-    pub fn firmware_len(&self) -> usize {
-        self.dfu.len()
-    }
-
-    /// Obtain the current state.
-    ///
-    /// This is useful to check if the bootloader has just done a swap, in order
-    /// to do verifications and self-tests of the new image before calling
-    /// `mark_booted`.
-    #[cfg(feature = "nightly")]
-    pub async fn get_state<F: AsyncNorFlash>(
-        &mut self,
-        flash: &mut F,
-        aligned: &mut [u8],
-    ) -> Result<State, FirmwareUpdaterError> {
-        flash.read(self.state.from as u32, aligned).await?;
-
-        if !aligned.iter().any(|&b| b != SWAP_MAGIC) {
-            Ok(State::Swap)
-        } else {
-            Ok(State::Boot)
-        }
-    }
-
-    /// Verify the DFU given a public key. If there is an error then DO NOT
-    /// proceed with updating the firmware as it must be signed with a
-    /// corresponding private key (otherwise it could be malicious firmware).
-    ///
-    /// Mark to trigger firmware swap on next boot if verify suceeds.
-    ///
-    /// If the "ed25519-salty" feature is set (or another similar feature) then the signature is expected to have
-    /// been generated from a SHA-512 digest of the firmware bytes.
-    ///
-    /// If no signature feature is set then this method will always return a
-    /// signature error.
-    ///
-    /// # Safety
-    ///
-    /// The `_aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being read from
-    /// and written to.
-    #[cfg(feature = "_verify")]
-    pub async fn verify_and_mark_updated<F: AsyncNorFlash>(
-        &mut self,
-        _flash: &mut F,
-        _public_key: &[u8],
-        _signature: &[u8],
-        _update_len: usize,
-        _aligned: &mut [u8],
-    ) -> Result<(), FirmwareUpdaterError> {
-        let _end = self.dfu.from + _update_len;
-        let _read_size = _aligned.len();
-
-        assert_eq!(_aligned.len(), F::WRITE_SIZE);
-        assert!(_end <= self.dfu.to);
-
-        #[cfg(feature = "ed25519-dalek")]
-        {
-            use ed25519_dalek::{Digest, PublicKey, Sha512, Signature, SignatureError, Verifier};
-
-            let into_signature_error = |e: SignatureError| FirmwareUpdaterError::Signature(e.into());
-
-            let public_key = PublicKey::from_bytes(_public_key).map_err(into_signature_error)?;
-            let signature = Signature::from_bytes(_signature).map_err(into_signature_error)?;
-
-            let mut digest = Sha512::new();
-
-            let mut offset = self.dfu.from;
-            let last_offset = _end / _read_size * _read_size;
-
-            while offset < last_offset {
-                _flash.read(offset as u32, _aligned).await?;
-                digest.update(&_aligned);
-                offset += _read_size;
-            }
-
-            let remaining = _end % _read_size;
-
-            if remaining > 0 {
-                _flash.read(last_offset as u32, _aligned).await?;
-                digest.update(&_aligned[0..remaining]);
-            }
-
-            public_key
-                .verify(&digest.finalize(), &signature)
-                .map_err(into_signature_error)?
-        }
-        #[cfg(feature = "ed25519-salty")]
-        {
-            use salty::constants::{PUBLICKEY_SERIALIZED_LENGTH, SIGNATURE_SERIALIZED_LENGTH};
-            use salty::{PublicKey, Sha512, Signature};
-
-            fn into_signature_error<E>(_: E) -> FirmwareUpdaterError {
-                FirmwareUpdaterError::Signature(signature::Error::default())
-            }
-
-            let public_key: [u8; PUBLICKEY_SERIALIZED_LENGTH] = _public_key.try_into().map_err(into_signature_error)?;
-            let public_key = PublicKey::try_from(&public_key).map_err(into_signature_error)?;
-            let signature: [u8; SIGNATURE_SERIALIZED_LENGTH] = _signature.try_into().map_err(into_signature_error)?;
-            let signature = Signature::try_from(&signature).map_err(into_signature_error)?;
-
-            let mut digest = Sha512::new();
-
-            let mut offset = self.dfu.from;
-            let last_offset = _end / _read_size * _read_size;
-
-            while offset < last_offset {
-                _flash.read(offset as u32, _aligned).await?;
-                digest.update(&_aligned);
-                offset += _read_size;
-            }
-
-            let remaining = _end % _read_size;
-
-            if remaining > 0 {
-                _flash.read(last_offset as u32, _aligned).await?;
-                digest.update(&_aligned[0..remaining]);
-            }
-
-            let message = digest.finalize();
-            let r = public_key.verify(&message, &signature);
-            trace!(
-                "Verifying with public key {}, signature {} and message {} yields ok: {}",
-                public_key.to_bytes(),
-                signature.to_bytes(),
-                message,
-                r.is_ok()
-            );
-            r.map_err(into_signature_error)?
-        }
-
-        self.set_magic(_aligned, SWAP_MAGIC, _flash).await
-    }
-
-    /// Mark to trigger firmware swap on next boot.
-    ///
-    /// # Safety
-    ///
-    /// The `aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being written to.
-    #[cfg(not(feature = "_verify"))]
-    #[cfg(feature = "nightly")]
-    pub async fn mark_updated<F: AsyncNorFlash>(
-        &mut self,
-        flash: &mut F,
-        aligned: &mut [u8],
-    ) -> Result<(), FirmwareUpdaterError> {
-        assert_eq!(aligned.len(), F::WRITE_SIZE);
-        self.set_magic(aligned, SWAP_MAGIC, flash).await
-    }
-
-    /// Mark firmware boot successful and stop rollback on reset.
-    ///
-    /// # Safety
-    ///
-    /// The `aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being written to.
-    #[cfg(feature = "nightly")]
-    pub async fn mark_booted<F: AsyncNorFlash>(
-        &mut self,
-        flash: &mut F,
-        aligned: &mut [u8],
-    ) -> Result<(), FirmwareUpdaterError> {
-        assert_eq!(aligned.len(), F::WRITE_SIZE);
-        self.set_magic(aligned, BOOT_MAGIC, flash).await
-    }
-
-    #[cfg(feature = "nightly")]
-    async fn set_magic<F: AsyncNorFlash>(
-        &mut self,
-        aligned: &mut [u8],
-        magic: u8,
-        flash: &mut F,
-    ) -> Result<(), FirmwareUpdaterError> {
-        flash.read(self.state.from as u32, aligned).await?;
-
-        if aligned.iter().any(|&b| b != magic) {
-            aligned.fill(0);
-
-            flash.write(self.state.from as u32, aligned).await?;
-            flash.erase(self.state.from as u32, self.state.to as u32).await?;
-
-            aligned.fill(magic);
-            flash.write(self.state.from as u32, aligned).await?;
-        }
-        Ok(())
-    }
-
-    /// Write data to a flash page.
-    ///
-    /// The buffer must follow alignment requirements of the target flash and a multiple of page size big.
-    ///
-    /// # Safety
-    ///
-    /// Failing to meet alignment and size requirements may result in a panic.
-    #[cfg(feature = "nightly")]
-    pub async fn write_firmware<F: AsyncNorFlash>(
-        &mut self,
-        offset: usize,
-        data: &[u8],
-        flash: &mut F,
-        block_size: usize,
-    ) -> Result<(), FirmwareUpdaterError> {
-        assert!(data.len() >= F::ERASE_SIZE);
-
-        flash
-            .erase(
-                (self.dfu.from + offset) as u32,
-                (self.dfu.from + offset + data.len()) as u32,
-            )
-            .await?;
-
-        trace!(
-            "Erased from {} to {}",
-            self.dfu.from + offset,
-            self.dfu.from + offset + data.len()
-        );
-
-        FirmwareWriter(self.dfu)
-            .write_block(offset, data, flash, block_size)
-            .await?;
-
-        Ok(())
-    }
-
-    /// Prepare for an incoming DFU update by erasing the entire DFU area and
-    /// returning a `FirmwareWriter`.
-    ///
-    /// Using this instead of `write_firmware` allows for an optimized API in
-    /// exchange for added complexity.
-    #[cfg(feature = "nightly")]
-    pub async fn prepare_update<F: AsyncNorFlash>(
-        &mut self,
-        flash: &mut F,
-    ) -> Result<FirmwareWriter, FirmwareUpdaterError> {
-        flash.erase((self.dfu.from) as u32, (self.dfu.to) as u32).await?;
-
-        trace!("Erased from {} to {}", self.dfu.from, self.dfu.to);
-
-        Ok(FirmwareWriter(self.dfu))
-    }
-
-    //
-    // Blocking API
-    //
-
-    /// Obtain the current state.
-    ///
-    /// This is useful to check if the bootloader has just done a swap, in order
-    /// to do verifications and self-tests of the new image before calling
-    /// `mark_booted`.
-    pub fn get_state_blocking<F: NorFlash>(
-        &mut self,
-        flash: &mut F,
-        aligned: &mut [u8],
-    ) -> Result<State, FirmwareUpdaterError> {
-        flash.read(self.state.from as u32, aligned)?;
-
-        if !aligned.iter().any(|&b| b != SWAP_MAGIC) {
-            Ok(State::Swap)
-        } else {
-            Ok(State::Boot)
-        }
-    }
-
-    /// Verify the DFU given a public key. If there is an error then DO NOT
-    /// proceed with updating the firmware as it must be signed with a
-    /// corresponding private key (otherwise it could be malicious firmware).
-    ///
-    /// Mark to trigger firmware swap on next boot if verify suceeds.
-    ///
-    /// If the "ed25519-salty" feature is set (or another similar feature) then the signature is expected to have
-    /// been generated from a SHA-512 digest of the firmware bytes.
-    ///
-    /// If no signature feature is set then this method will always return a
-    /// signature error.
-    ///
-    /// # Safety
-    ///
-    /// The `_aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being read from
-    /// and written to.
-    #[cfg(feature = "_verify")]
-    pub fn verify_and_mark_updated_blocking<F: NorFlash>(
-        &mut self,
-        _flash: &mut F,
-        _public_key: &[u8],
-        _signature: &[u8],
-        _update_len: usize,
-        _aligned: &mut [u8],
-    ) -> Result<(), FirmwareUpdaterError> {
-        let _end = self.dfu.from + _update_len;
-        let _read_size = _aligned.len();
-
-        assert_eq!(_aligned.len(), F::WRITE_SIZE);
-        assert!(_end <= self.dfu.to);
-
-        #[cfg(feature = "ed25519-dalek")]
-        {
-            use ed25519_dalek::{Digest, PublicKey, Sha512, Signature, SignatureError, Verifier};
-
-            let into_signature_error = |e: SignatureError| FirmwareUpdaterError::Signature(e.into());
-
-            let public_key = PublicKey::from_bytes(_public_key).map_err(into_signature_error)?;
-            let signature = Signature::from_bytes(_signature).map_err(into_signature_error)?;
-
-            let mut digest = Sha512::new();
-
-            let mut offset = self.dfu.from;
-            let last_offset = _end / _read_size * _read_size;
-
-            while offset < last_offset {
-                _flash.read(offset as u32, _aligned)?;
-                digest.update(&_aligned);
-                offset += _read_size;
-            }
-
-            let remaining = _end % _read_size;
-
-            if remaining > 0 {
-                _flash.read(last_offset as u32, _aligned)?;
-                digest.update(&_aligned[0..remaining]);
-            }
-
-            public_key
-                .verify(&digest.finalize(), &signature)
-                .map_err(into_signature_error)?
-        }
-        #[cfg(feature = "ed25519-salty")]
-        {
-            use salty::constants::{PUBLICKEY_SERIALIZED_LENGTH, SIGNATURE_SERIALIZED_LENGTH};
-            use salty::{PublicKey, Sha512, Signature};
-
-            fn into_signature_error<E>(_: E) -> FirmwareUpdaterError {
-                FirmwareUpdaterError::Signature(signature::Error::default())
-            }
-
-            let public_key: [u8; PUBLICKEY_SERIALIZED_LENGTH] = _public_key.try_into().map_err(into_signature_error)?;
-            let public_key = PublicKey::try_from(&public_key).map_err(into_signature_error)?;
-            let signature: [u8; SIGNATURE_SERIALIZED_LENGTH] = _signature.try_into().map_err(into_signature_error)?;
-            let signature = Signature::try_from(&signature).map_err(into_signature_error)?;
-
-            let mut digest = Sha512::new();
-
-            let mut offset = self.dfu.from;
-            let last_offset = _end / _read_size * _read_size;
-
-            while offset < last_offset {
-                _flash.read(offset as u32, _aligned)?;
-                digest.update(&_aligned);
-                offset += _read_size;
-            }
-
-            let remaining = _end % _read_size;
-
-            if remaining > 0 {
-                _flash.read(last_offset as u32, _aligned)?;
-                digest.update(&_aligned[0..remaining]);
-            }
-
-            let message = digest.finalize();
-            let r = public_key.verify(&message, &signature);
-            trace!(
-                "Verifying with public key {}, signature {} and message {} yields ok: {}",
-                public_key.to_bytes(),
-                signature.to_bytes(),
-                message,
-                r.is_ok()
-            );
-            r.map_err(into_signature_error)?
-        }
-
-        self.set_magic_blocking(_aligned, SWAP_MAGIC, _flash)
-    }
-
-    /// Mark to trigger firmware swap on next boot.
-    ///
-    /// # Safety
-    ///
-    /// The `aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being written to.
-    #[cfg(not(feature = "_verify"))]
-    pub fn mark_updated_blocking<F: NorFlash>(
-        &mut self,
-        flash: &mut F,
-        aligned: &mut [u8],
-    ) -> Result<(), FirmwareUpdaterError> {
-        assert_eq!(aligned.len(), F::WRITE_SIZE);
-        self.set_magic_blocking(aligned, SWAP_MAGIC, flash)
-    }
-
-    /// Mark firmware boot successful and stop rollback on reset.
-    ///
-    /// # Safety
-    ///
-    /// The `aligned` buffer must have a size of F::WRITE_SIZE, and follow the alignment rules for the flash being written to.
-    pub fn mark_booted_blocking<F: NorFlash>(
-        &mut self,
-        flash: &mut F,
-        aligned: &mut [u8],
-    ) -> Result<(), FirmwareUpdaterError> {
-        assert_eq!(aligned.len(), F::WRITE_SIZE);
-        self.set_magic_blocking(aligned, BOOT_MAGIC, flash)
-    }
-
-    fn set_magic_blocking<F: NorFlash>(
-        &mut self,
-        aligned: &mut [u8],
-        magic: u8,
-        flash: &mut F,
-    ) -> Result<(), FirmwareUpdaterError> {
-        flash.read(self.state.from as u32, aligned)?;
-
-        if aligned.iter().any(|&b| b != magic) {
-            aligned.fill(0);
-
-            flash.write(self.state.from as u32, aligned)?;
-            flash.erase(self.state.from as u32, self.state.to as u32)?;
-
-            aligned.fill(magic);
-            flash.write(self.state.from as u32, aligned)?;
-        }
-        Ok(())
-    }
-
-    /// Write data to a flash page.
-    ///
-    /// The buffer must follow alignment requirements of the target flash and a multiple of page size big.
-    ///
-    /// # Safety
-    ///
-    /// Failing to meet alignment and size requirements may result in a panic.
-    pub fn write_firmware_blocking<F: NorFlash>(
-        &mut self,
-        offset: usize,
-        data: &[u8],
-        flash: &mut F,
-        block_size: usize,
-    ) -> Result<(), FirmwareUpdaterError> {
-        assert!(data.len() >= F::ERASE_SIZE);
-
-        flash.erase(
-            (self.dfu.from + offset) as u32,
-            (self.dfu.from + offset + data.len()) as u32,
-        )?;
-
-        trace!(
-            "Erased from {} to {}",
-            self.dfu.from + offset,
-            self.dfu.from + offset + data.len()
-        );
-
-        FirmwareWriter(self.dfu).write_block_blocking(offset, data, flash, block_size)?;
-
-        Ok(())
-    }
-
-    /// Prepare for an incoming DFU update by erasing the entire DFU area and
-    /// returning a `FirmwareWriter`.
-    ///
-    /// Using this instead of `write_firmware_blocking` allows for an optimized
-    /// API in exchange for added complexity.
-    pub fn prepare_update_blocking<F: NorFlash>(
-        &mut self,
-        flash: &mut F,
-    ) -> Result<FirmwareWriter, FirmwareUpdaterError> {
-        flash.erase((self.dfu.from) as u32, (self.dfu.to) as u32)?;
-
-        trace!("Erased from {} to {}", self.dfu.from, self.dfu.to);
-
-        Ok(FirmwareWriter(self.dfu))
-    }
-}
-
-/// FirmwareWriter allows writing blocks to an already erased flash.
-pub struct FirmwareWriter(Partition);
-
-impl FirmwareWriter {
-    /// Write data to a flash page.
-    ///
-    /// The buffer must follow alignment requirements of the target flash and a multiple of page size big.
-    ///
-    /// # Safety
-    ///
-    /// Failing to meet alignment and size requirements may result in a panic.
-    #[cfg(feature = "nightly")]
-    pub async fn write_block<F: AsyncNorFlash>(
-        &mut self,
-        offset: usize,
-        data: &[u8],
-        flash: &mut F,
-        block_size: usize,
-    ) -> Result<(), F::Error> {
-        trace!(
-            "Writing firmware at offset 0x{:x} len {}",
-            self.0.from + offset,
-            data.len()
-        );
-
-        let mut write_offset = self.0.from + offset;
-        for chunk in data.chunks(block_size) {
-            trace!("Wrote chunk at {}: {:?}", write_offset, chunk);
-            flash.write(write_offset as u32, chunk).await?;
-            write_offset += chunk.len();
-        }
-        /*
-        trace!("Wrote data, reading back for verification");
-
-        let mut buf: [u8; 4096] = [0; 4096];
-        let mut data_offset = 0;
-        let mut read_offset = self.dfu.from + offset;
-        for chunk in buf.chunks_mut(block_size) {
-            flash.read(read_offset as u32, chunk).await?;
-            trace!("Read chunk at {}: {:?}", read_offset, chunk);
-            assert_eq!(&data[data_offset..data_offset + block_size], chunk);
-            read_offset += chunk.len();
-            data_offset += chunk.len();
-        }
-        */
-
-        Ok(())
-    }
-
-    /// Write data to a flash page.
-    ///
-    /// The buffer must follow alignment requirements of the target flash and a multiple of page size big.
-    ///
-    /// # Safety
-    ///
-    /// Failing to meet alignment and size requirements may result in a panic.
-    pub fn write_block_blocking<F: NorFlash>(
-        &mut self,
-        offset: usize,
-        data: &[u8],
-        flash: &mut F,
-        block_size: usize,
-    ) -> Result<(), F::Error> {
-        trace!(
-            "Writing firmware at offset 0x{:x} len {}",
-            self.0.from + offset,
-            data.len()
-        );
-
-        let mut write_offset = self.0.from + offset;
-        for chunk in data.chunks(block_size) {
-            trace!("Wrote chunk at {}: {:?}", write_offset, chunk);
-            flash.write(write_offset as u32, chunk)?;
-            write_offset += chunk.len();
-        }
-        /*
-        trace!("Wrote data, reading back for verification");
-
-        let mut buf: [u8; 4096] = [0; 4096];
-        let mut data_offset = 0;
-        let mut read_offset = self.dfu.from + offset;
-        for chunk in buf.chunks_mut(block_size) {
-            flash.read(read_offset as u32, chunk).await?;
-            trace!("Read chunk at {}: {:?}", read_offset, chunk);
-            assert_eq!(&data[data_offset..data_offset + block_size], chunk);
-            read_offset += chunk.len();
-            data_offset += chunk.len();
-        }
-        */
-
-        Ok(())
-    }
-}
-
 #[cfg(test)]
 mod tests {
-    use core::convert::Infallible;
-
-    use embedded_storage::nor_flash::ErrorType;
-    use embedded_storage_async::nor_flash::ReadNorFlash as AsyncReadNorFlash;
     use futures::executor::block_on;
 
     use super::*;
+    use crate::mem_flash::MemFlash;
 
     /*
     #[test]
@@ -1234,18 +72,14 @@ mod tests {
         const ACTIVE: Partition = Partition::new(4096, 61440);
         const DFU: Partition = Partition::new(61440, 122880);
 
-        let mut flash = MemFlash::<131072, 4096, 4>([0xff; 131072]);
-        flash.0[0..4].copy_from_slice(&[BOOT_MAGIC; 4]);
+        let mut flash = MemFlash::<131072, 4096, 4>::default();
+        flash.mem[0..4].copy_from_slice(&[BOOT_MAGIC; 4]);
         let mut flash = SingleFlashConfig::new(&mut flash);
 
         let mut bootloader: BootLoader = BootLoader::new(ACTIVE, DFU, STATE);
 
-        let mut magic = [0; 4];
         let mut page = [0; 4096];
-        assert_eq!(
-            State::Boot,
-            bootloader.prepare_boot(&mut flash, &mut magic, &mut page).unwrap()
-        );
+        assert_eq!(State::Boot, bootloader.prepare_boot(&mut flash, &mut page).unwrap());
     }
 
     #[test]
@@ -1254,66 +88,49 @@ mod tests {
         const STATE: Partition = Partition::new(0, 4096);
         const ACTIVE: Partition = Partition::new(4096, 61440);
         const DFU: Partition = Partition::new(61440, 122880);
-        let mut flash = MemFlash::<131072, 4096, 4>([0xff; 131072]);
+        let mut flash = MemFlash::<131072, 4096, 4>::random();
 
-        let original: [u8; ACTIVE.len()] = [rand::random::<u8>(); ACTIVE.len()];
-        let update: [u8; DFU.len()] = [rand::random::<u8>(); DFU.len()];
+        let original = [rand::random::<u8>(); ACTIVE.size() as usize];
+        let update = [rand::random::<u8>(); ACTIVE.size() as usize];
         let mut aligned = [0; 4];
 
-        for i in ACTIVE.from..ACTIVE.to {
-            flash.0[i] = original[i - ACTIVE.from];
-        }
+        flash.program(ACTIVE.from, &original).unwrap();
 
         let mut bootloader: BootLoader = BootLoader::new(ACTIVE, DFU, STATE);
         let mut updater = FirmwareUpdater::new(DFU, STATE);
-        let mut offset = 0;
-        for chunk in update.chunks(4096) {
-            block_on(updater.write_firmware(offset, chunk, &mut flash, 4096)).unwrap();
-            offset += chunk.len();
-        }
+        block_on(updater.write_firmware(0, &update, &mut flash)).unwrap();
         block_on(updater.mark_updated(&mut flash, &mut aligned)).unwrap();
 
-        let mut magic = [0; 4];
-        let mut page = [0; 4096];
+        let mut page = [0; 1024];
         assert_eq!(
             State::Swap,
             bootloader
-                .prepare_boot(&mut SingleFlashConfig::new(&mut flash), &mut magic, &mut page)
+                .prepare_boot(&mut SingleFlashConfig::new(&mut flash), &mut page)
                 .unwrap()
         );
 
-        for i in ACTIVE.from..ACTIVE.to {
-            assert_eq!(flash.0[i], update[i - ACTIVE.from], "Index {}", i);
-        }
-
+        flash.assert_eq(ACTIVE.from, &update);
         // First DFU page is untouched
-        for i in DFU.from + 4096..DFU.to {
-            assert_eq!(flash.0[i], original[i - DFU.from - 4096], "Index {}", i);
-        }
+        flash.assert_eq(DFU.from + 4096, &original);
 
         // Running again should cause a revert
         assert_eq!(
             State::Swap,
             bootloader
-                .prepare_boot(&mut SingleFlashConfig::new(&mut flash), &mut magic, &mut page)
+                .prepare_boot(&mut SingleFlashConfig::new(&mut flash), &mut page)
                 .unwrap()
         );
 
-        for i in ACTIVE.from..ACTIVE.to {
-            assert_eq!(flash.0[i], original[i - ACTIVE.from], "Index {}", i);
-        }
-
+        flash.assert_eq(ACTIVE.from, &original);
         // Last page is untouched
-        for i in DFU.from..DFU.to - 4096 {
-            assert_eq!(flash.0[i], update[i - DFU.from], "Index {}", i);
-        }
+        flash.assert_eq(DFU.from, &update);
 
         // Mark as booted
         block_on(updater.mark_booted(&mut flash, &mut aligned)).unwrap();
         assert_eq!(
             State::Boot,
             bootloader
-                .prepare_boot(&mut SingleFlashConfig::new(&mut flash), &mut magic, &mut page)
+                .prepare_boot(&mut SingleFlashConfig::new(&mut flash), &mut page)
                 .unwrap()
         );
     }
@@ -1325,50 +142,34 @@ mod tests {
         const ACTIVE: Partition = Partition::new(4096, 16384);
         const DFU: Partition = Partition::new(0, 16384);
 
-        let mut active = MemFlash::<16384, 4096, 8>([0xff; 16384]);
-        let mut dfu = MemFlash::<16384, 2048, 8>([0xff; 16384]);
-        let mut state = MemFlash::<4096, 128, 4>([0xff; 4096]);
+        let mut active = MemFlash::<16384, 4096, 8>::random();
+        let mut dfu = MemFlash::<16384, 2048, 8>::random();
+        let mut state = MemFlash::<4096, 128, 4>::random();
         let mut aligned = [0; 4];
 
-        let original: [u8; ACTIVE.len()] = [rand::random::<u8>(); ACTIVE.len()];
-        let update: [u8; DFU.len()] = [rand::random::<u8>(); DFU.len()];
+        let original = [rand::random::<u8>(); ACTIVE.size() as usize];
+        let update = [rand::random::<u8>(); ACTIVE.size() as usize];
 
-        for i in ACTIVE.from..ACTIVE.to {
-            active.0[i] = original[i - ACTIVE.from];
-        }
+        active.program(ACTIVE.from, &original).unwrap();
 
         let mut updater = FirmwareUpdater::new(DFU, STATE);
 
-        let mut offset = 0;
-        for chunk in update.chunks(2048) {
-            block_on(updater.write_firmware(offset, chunk, &mut dfu, chunk.len())).unwrap();
-            offset += chunk.len();
-        }
+        block_on(updater.write_firmware(0, &update, &mut dfu)).unwrap();
         block_on(updater.mark_updated(&mut state, &mut aligned)).unwrap();
 
         let mut bootloader: BootLoader = BootLoader::new(ACTIVE, DFU, STATE);
-        let mut magic = [0; 4];
         let mut page = [0; 4096];
 
         assert_eq!(
             State::Swap,
             bootloader
-                .prepare_boot(
-                    &mut MultiFlashConfig::new(&mut active, &mut state, &mut dfu),
-                    &mut magic,
-                    &mut page
-                )
+                .prepare_boot(&mut MultiFlashConfig::new(&mut active, &mut state, &mut dfu), &mut page)
                 .unwrap()
         );
 
-        for i in ACTIVE.from..ACTIVE.to {
-            assert_eq!(active.0[i], update[i - ACTIVE.from], "Index {}", i);
-        }
-
+        active.assert_eq(ACTIVE.from, &update);
         // First DFU page is untouched
-        for i in DFU.from + 4096..DFU.to {
-            assert_eq!(dfu.0[i], original[i - DFU.from - 4096], "Index {}", i);
-        }
+        dfu.assert_eq(DFU.from + 4096, &original);
     }
 
     #[test]
@@ -1379,57 +180,35 @@ mod tests {
         const DFU: Partition = Partition::new(0, 16384);
 
         let mut aligned = [0; 4];
-        let mut active = MemFlash::<16384, 2048, 4>([0xff; 16384]);
-        let mut dfu = MemFlash::<16384, 4096, 8>([0xff; 16384]);
-        let mut state = MemFlash::<4096, 128, 4>([0xff; 4096]);
+        let mut active = MemFlash::<16384, 2048, 4>::random();
+        let mut dfu = MemFlash::<16384, 4096, 8>::random();
+        let mut state = MemFlash::<4096, 128, 4>::random();
 
-        let original: [u8; ACTIVE.len()] = [rand::random::<u8>(); ACTIVE.len()];
-        let update: [u8; DFU.len()] = [rand::random::<u8>(); DFU.len()];
+        let original = [rand::random::<u8>(); ACTIVE.size() as usize];
+        let update = [rand::random::<u8>(); ACTIVE.size() as usize];
 
-        for i in ACTIVE.from..ACTIVE.to {
-            active.0[i] = original[i - ACTIVE.from];
-        }
+        active.program(ACTIVE.from, &original).unwrap();
 
         let mut updater = FirmwareUpdater::new(DFU, STATE);
 
-        let mut offset = 0;
-        for chunk in update.chunks(4096) {
-            block_on(updater.write_firmware(offset, chunk, &mut dfu, chunk.len())).unwrap();
-            offset += chunk.len();
-        }
+        block_on(updater.write_firmware(0, &update, &mut dfu)).unwrap();
         block_on(updater.mark_updated(&mut state, &mut aligned)).unwrap();
 
         let mut bootloader: BootLoader = BootLoader::new(ACTIVE, DFU, STATE);
-        let mut magic = [0; 4];
         let mut page = [0; 4096];
         assert_eq!(
             State::Swap,
             bootloader
                 .prepare_boot(
                     &mut MultiFlashConfig::new(&mut active, &mut state, &mut dfu,),
-                    &mut magic,
                     &mut page
                 )
                 .unwrap()
         );
 
-        for i in ACTIVE.from..ACTIVE.to {
-            assert_eq!(active.0[i], update[i - ACTIVE.from], "Index {}", i);
-        }
-
+        active.assert_eq(ACTIVE.from, &update);
         // First DFU page is untouched
-        for i in DFU.from + 4096..DFU.to {
-            assert_eq!(dfu.0[i], original[i - DFU.from - 4096], "Index {}", i);
-        }
-    }
-
-    #[test]
-    #[should_panic]
-    fn test_range_asserts() {
-        const ACTIVE: Partition = Partition::new(4096, 4194304);
-        const DFU: Partition = Partition::new(4194304, 2 * 4194304);
-        const STATE: Partition = Partition::new(0, 4096);
-        assert_partitions(ACTIVE, DFU, STATE, 4096, 4);
+        dfu.assert_eq(DFU.from + 4096, &original);
     }
 
     #[test]
@@ -1458,13 +237,13 @@ mod tests {
 
         const STATE: Partition = Partition::new(0, 4096);
         const DFU: Partition = Partition::new(4096, 8192);
-        let mut flash = MemFlash::<8192, 4096, 4>([0xff; 8192]);
+        let mut flash = MemFlash::<8192, 4096, 4>::default();
 
         let firmware_len = firmware.len();
 
         let mut write_buf = [0; 4096];
         write_buf[0..firmware_len].copy_from_slice(firmware);
-        NorFlash::write(&mut flash, DFU.from as u32, &write_buf).unwrap();
+        DFU.write_blocking(&mut flash, 0, &write_buf).unwrap();
 
         // On with the test
 
@@ -1476,117 +255,9 @@ mod tests {
             &mut flash,
             &public_key.to_bytes(),
             &signature.to_bytes(),
-            firmware_len,
+            firmware_len as u32,
             &mut aligned,
         ))
         .is_ok());
     }
-    struct MemFlash<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize>([u8; SIZE]);
-
-    impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> NorFlash
-        for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
-    {
-        const WRITE_SIZE: usize = WRITE_SIZE;
-        const ERASE_SIZE: usize = ERASE_SIZE;
-        fn erase(&mut self, from: u32, to: u32) -> Result<(), Self::Error> {
-            let from = from as usize;
-            let to = to as usize;
-            assert!(from % ERASE_SIZE == 0);
-            assert!(to % ERASE_SIZE == 0, "To: {}, erase size: {}", to, ERASE_SIZE);
-            for i in from..to {
-                self.0[i] = 0xFF;
-            }
-            Ok(())
-        }
-
-        fn write(&mut self, offset: u32, data: &[u8]) -> Result<(), Self::Error> {
-            assert!(data.len() % WRITE_SIZE == 0);
-            assert!(offset as usize % WRITE_SIZE == 0);
-            assert!(offset as usize + data.len() <= SIZE);
-
-            self.0[offset as usize..offset as usize + data.len()].copy_from_slice(data);
-
-            Ok(())
-        }
-    }
-
-    impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> ErrorType
-        for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
-    {
-        type Error = Infallible;
-    }
-
-    impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> ReadNorFlash
-        for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
-    {
-        const READ_SIZE: usize = 1;
-
-        fn read(&mut self, offset: u32, buf: &mut [u8]) -> Result<(), Self::Error> {
-            let len = buf.len();
-            buf[..].copy_from_slice(&self.0[offset as usize..offset as usize + len]);
-            Ok(())
-        }
-
-        fn capacity(&self) -> usize {
-            SIZE
-        }
-    }
-
-    impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> super::Flash
-        for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
-    {
-        const BLOCK_SIZE: usize = ERASE_SIZE;
-        const ERASE_VALUE: u8 = 0xFF;
-    }
-
-    impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> AsyncReadNorFlash
-        for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
-    {
-        const READ_SIZE: usize = 1;
-
-        async fn read(&mut self, offset: u32, buf: &mut [u8]) -> Result<(), Self::Error> {
-            let len = buf.len();
-            buf[..].copy_from_slice(&self.0[offset as usize..offset as usize + len]);
-            Ok(())
-        }
-
-        fn capacity(&self) -> usize {
-            SIZE
-        }
-    }
-
-    impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> AsyncNorFlash
-        for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
-    {
-        const WRITE_SIZE: usize = WRITE_SIZE;
-        const ERASE_SIZE: usize = ERASE_SIZE;
-
-        async fn erase(&mut self, from: u32, to: u32) -> Result<(), Self::Error> {
-            let from = from as usize;
-            let to = to as usize;
-            assert!(from % ERASE_SIZE == 0);
-            assert!(to % ERASE_SIZE == 0);
-            for i in from..to {
-                self.0[i] = 0xFF;
-            }
-            Ok(())
-        }
-
-        async fn write(&mut self, offset: u32, data: &[u8]) -> Result<(), Self::Error> {
-            info!("Writing {} bytes to 0x{:x}", data.len(), offset);
-            assert!(data.len() % WRITE_SIZE == 0);
-            assert!(offset as usize % WRITE_SIZE == 0);
-            assert!(
-                offset as usize + data.len() <= SIZE,
-                "OFFSET: {}, LEN: {}, FLASH SIZE: {}",
-                offset,
-                data.len(),
-                SIZE
-            );
-
-            self.0[offset as usize..offset as usize + data.len()].copy_from_slice(data);
-
-            Ok(())
-        }
-    }
 }
diff --git a/embassy-boot/boot/src/mem_flash.rs b/embassy-boot/boot/src/mem_flash.rs
new file mode 100644
index 000000000..c62379b24
--- /dev/null
+++ b/embassy-boot/boot/src/mem_flash.rs
@@ -0,0 +1,164 @@
+#![allow(unused)]
+
+use core::ops::{Bound, Range, RangeBounds};
+
+use embedded_storage::nor_flash::{ErrorType, NorFlash, NorFlashError, NorFlashErrorKind, ReadNorFlash};
+use embedded_storage_async::nor_flash::{NorFlash as AsyncNorFlash, ReadNorFlash as AsyncReadNorFlash};
+
+pub struct MemFlash<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> {
+    pub mem: [u8; SIZE],
+    pub pending_write_successes: Option<usize>,
+}
+
+#[derive(Debug)]
+pub struct MemFlashError;
+
+impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE> {
+    pub const fn new(fill: u8) -> Self {
+        Self {
+            mem: [fill; SIZE],
+            pending_write_successes: None,
+        }
+    }
+
+    #[cfg(test)]
+    pub fn random() -> Self {
+        let mut mem = [0; SIZE];
+        for byte in mem.iter_mut() {
+            *byte = rand::random::<u8>();
+        }
+        Self {
+            mem,
+            pending_write_successes: None,
+        }
+    }
+
+    pub fn program(&mut self, offset: u32, bytes: &[u8]) -> Result<(), MemFlashError> {
+        let offset = offset as usize;
+        assert!(bytes.len() % WRITE_SIZE == 0);
+        assert!(offset % WRITE_SIZE == 0);
+        assert!(offset + bytes.len() <= SIZE);
+
+        self.mem[offset..offset + bytes.len()].copy_from_slice(bytes);
+
+        Ok(())
+    }
+
+    pub fn assert_eq(&self, offset: u32, expectation: &[u8]) {
+        for i in 0..expectation.len() {
+            assert_eq!(self.mem[offset as usize + i], expectation[i], "Index {}", i);
+        }
+    }
+}
+
+impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> Default
+    for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
+{
+    fn default() -> Self {
+        Self::new(0xFF)
+    }
+}
+
+impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> ErrorType
+    for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
+{
+    type Error = MemFlashError;
+}
+
+impl NorFlashError for MemFlashError {
+    fn kind(&self) -> NorFlashErrorKind {
+        NorFlashErrorKind::Other
+    }
+}
+
+impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> ReadNorFlash
+    for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
+{
+    const READ_SIZE: usize = 1;
+
+    fn read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Self::Error> {
+        let len = bytes.len();
+        bytes.copy_from_slice(&self.mem[offset as usize..offset as usize + len]);
+        Ok(())
+    }
+
+    fn capacity(&self) -> usize {
+        SIZE
+    }
+}
+
+impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> NorFlash
+    for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
+{
+    const WRITE_SIZE: usize = WRITE_SIZE;
+    const ERASE_SIZE: usize = ERASE_SIZE;
+
+    fn erase(&mut self, from: u32, to: u32) -> Result<(), Self::Error> {
+        let from = from as usize;
+        let to = to as usize;
+        assert!(from % ERASE_SIZE == 0);
+        assert!(to % ERASE_SIZE == 0, "To: {}, erase size: {}", to, ERASE_SIZE);
+        for i in from..to {
+            self.mem[i] = 0xFF;
+        }
+        Ok(())
+    }
+
+    fn write(&mut self, offset: u32, bytes: &[u8]) -> Result<(), Self::Error> {
+        let offset = offset as usize;
+        assert!(bytes.len() % WRITE_SIZE == 0);
+        assert!(offset % WRITE_SIZE == 0);
+        assert!(offset + bytes.len() <= SIZE);
+
+        if let Some(pending_successes) = self.pending_write_successes {
+            if pending_successes > 0 {
+                self.pending_write_successes = Some(pending_successes - 1);
+            } else {
+                return Err(MemFlashError);
+            }
+        }
+
+        for ((offset, mem_byte), new_byte) in self
+            .mem
+            .iter_mut()
+            .enumerate()
+            .skip(offset)
+            .take(bytes.len())
+            .zip(bytes)
+        {
+            assert_eq!(0xFF, *mem_byte, "Offset {} is not erased", offset);
+            *mem_byte = *new_byte;
+        }
+
+        Ok(())
+    }
+}
+
+impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> AsyncReadNorFlash
+    for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
+{
+    const READ_SIZE: usize = 1;
+
+    async fn read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Self::Error> {
+        <Self as ReadNorFlash>::read(self, offset, bytes)
+    }
+
+    fn capacity(&self) -> usize {
+        <Self as ReadNorFlash>::capacity(self)
+    }
+}
+
+impl<const SIZE: usize, const ERASE_SIZE: usize, const WRITE_SIZE: usize> AsyncNorFlash
+    for MemFlash<SIZE, ERASE_SIZE, WRITE_SIZE>
+{
+    const WRITE_SIZE: usize = WRITE_SIZE;
+    const ERASE_SIZE: usize = ERASE_SIZE;
+
+    async fn erase(&mut self, from: u32, to: u32) -> Result<(), Self::Error> {
+        <Self as NorFlash>::erase(self, from, to)
+    }
+
+    async fn write(&mut self, offset: u32, bytes: &[u8]) -> Result<(), Self::Error> {
+        <Self as NorFlash>::write(self, offset, bytes)
+    }
+}
diff --git a/embassy-boot/boot/src/partition.rs b/embassy-boot/boot/src/partition.rs
new file mode 100644
index 000000000..7529059b6
--- /dev/null
+++ b/embassy-boot/boot/src/partition.rs
@@ -0,0 +1,139 @@
+use embedded_storage::nor_flash::{NorFlash, ReadNorFlash};
+use embedded_storage_async::nor_flash::{NorFlash as AsyncNorFlash, ReadNorFlash as AsyncReadNorFlash};
+
+/// A region in flash used by the bootloader.
+#[derive(Copy, Clone, Debug)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub struct Partition {
+    /// The offset into the flash where the partition starts.
+    pub from: u32,
+    /// The offset into the flash where the partition ends.
+    pub to: u32,
+}
+
+impl Partition {
+    /// Create a new partition with the provided range
+    pub const fn new(from: u32, to: u32) -> Self {
+        Self { from, to }
+    }
+
+    /// Return the size of the partition
+    pub const fn size(&self) -> u32 {
+        self.to - self.from
+    }
+
+    /// Read from the partition on the provided flash
+    pub async fn read<F: AsyncReadNorFlash>(
+        &self,
+        flash: &mut F,
+        offset: u32,
+        bytes: &mut [u8],
+    ) -> Result<(), F::Error> {
+        let offset = self.from as u32 + offset;
+        flash.read(offset, bytes).await
+    }
+
+    /// Write to the partition on the provided flash
+    pub async fn write<F: AsyncNorFlash>(&self, flash: &mut F, offset: u32, bytes: &[u8]) -> Result<(), F::Error> {
+        let offset = self.from as u32 + offset;
+        flash.write(offset, bytes).await?;
+        trace!("Wrote from 0x{:x} len {}", offset, bytes.len());
+        Ok(())
+    }
+
+    /// Erase part of the partition on the provided flash
+    pub async fn erase<F: AsyncNorFlash>(&self, flash: &mut F, from: u32, to: u32) -> Result<(), F::Error> {
+        let from = self.from as u32 + from;
+        let to = self.from as u32 + to;
+        flash.erase(from, to).await?;
+        trace!("Erased from 0x{:x} to 0x{:x}", from, to);
+        Ok(())
+    }
+
+    /// Erase the entire partition
+    pub(crate) async fn wipe<F: AsyncNorFlash>(&self, flash: &mut F) -> Result<(), F::Error> {
+        let from = self.from as u32;
+        let to = self.to as u32;
+        flash.erase(from, to).await?;
+        trace!("Wiped from 0x{:x} to 0x{:x}", from, to);
+        Ok(())
+    }
+
+    /// Read from the partition on the provided flash
+    pub fn read_blocking<F: ReadNorFlash>(&self, flash: &mut F, offset: u32, bytes: &mut [u8]) -> Result<(), F::Error> {
+        let offset = self.from as u32 + offset;
+        flash.read(offset, bytes)
+    }
+
+    /// Write to the partition on the provided flash
+    pub fn write_blocking<F: NorFlash>(&self, flash: &mut F, offset: u32, bytes: &[u8]) -> Result<(), F::Error> {
+        let offset = self.from as u32 + offset;
+        flash.write(offset, bytes)?;
+        trace!("Wrote from 0x{:x} len {}", offset, bytes.len());
+        Ok(())
+    }
+
+    /// Erase part of the partition on the provided flash
+    pub fn erase_blocking<F: NorFlash>(&self, flash: &mut F, from: u32, to: u32) -> Result<(), F::Error> {
+        let from = self.from as u32 + from;
+        let to = self.from as u32 + to;
+        flash.erase(from, to)?;
+        trace!("Erased from 0x{:x} to 0x{:x}", from, to);
+        Ok(())
+    }
+
+    /// Erase the entire partition
+    pub(crate) fn wipe_blocking<F: NorFlash>(&self, flash: &mut F) -> Result<(), F::Error> {
+        let from = self.from as u32;
+        let to = self.to as u32;
+        flash.erase(from, to)?;
+        trace!("Wiped from 0x{:x} to 0x{:x}", from, to);
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::mem_flash::MemFlash;
+    use crate::Partition;
+
+    #[test]
+    fn can_erase() {
+        let mut flash = MemFlash::<1024, 64, 4>::new(0x00);
+        let partition = Partition::new(256, 512);
+
+        partition.erase_blocking(&mut flash, 64, 192).unwrap();
+
+        for (index, byte) in flash.mem.iter().copied().enumerate().take(256 + 64) {
+            assert_eq!(0x00, byte, "Index {}", index);
+        }
+
+        for (index, byte) in flash.mem.iter().copied().enumerate().skip(256 + 64).take(128) {
+            assert_eq!(0xFF, byte, "Index {}", index);
+        }
+
+        for (index, byte) in flash.mem.iter().copied().enumerate().skip(256 + 64 + 128) {
+            assert_eq!(0x00, byte, "Index {}", index);
+        }
+    }
+
+    #[test]
+    fn can_wipe() {
+        let mut flash = MemFlash::<1024, 64, 4>::new(0x00);
+        let partition = Partition::new(256, 512);
+
+        partition.wipe_blocking(&mut flash).unwrap();
+
+        for (index, byte) in flash.mem.iter().copied().enumerate().take(256) {
+            assert_eq!(0x00, byte, "Index {}", index);
+        }
+
+        for (index, byte) in flash.mem.iter().copied().enumerate().skip(256).take(256) {
+            assert_eq!(0xFF, byte, "Index {}", index);
+        }
+
+        for (index, byte) in flash.mem.iter().copied().enumerate().skip(512) {
+            assert_eq!(0x00, byte, "Index {}", index);
+        }
+    }
+}
diff --git a/embassy-boot/nrf/src/lib.rs b/embassy-boot/nrf/src/lib.rs
index 5cc6ba448..48bbd7e2a 100644
--- a/embassy-boot/nrf/src/lib.rs
+++ b/embassy-boot/nrf/src/lib.rs
@@ -11,13 +11,12 @@ use embassy_nrf::wdt;
 use embedded_storage::nor_flash::{ErrorType, NorFlash, ReadNorFlash};
 
 /// A bootloader for nRF devices.
-pub struct BootLoader {
+pub struct BootLoader<const BUFFER_SIZE: usize = PAGE_SIZE> {
     boot: embassy_boot::BootLoader,
-    magic: AlignedBuffer<4>,
-    page: AlignedBuffer<PAGE_SIZE>,
+    aligned_buf: AlignedBuffer<BUFFER_SIZE>,
 }
 
-impl Default for BootLoader {
+impl Default for BootLoader<PAGE_SIZE> {
     /// Create a new bootloader instance using parameters from linker script
     fn default() -> Self {
         extern "C" {
@@ -31,20 +30,20 @@ impl Default for BootLoader {
 
         let active = unsafe {
             Partition::new(
-                &__bootloader_active_start as *const u32 as usize,
-                &__bootloader_active_end as *const u32 as usize,
+                &__bootloader_active_start as *const u32 as u32,
+                &__bootloader_active_end as *const u32 as u32,
             )
         };
         let dfu = unsafe {
             Partition::new(
-                &__bootloader_dfu_start as *const u32 as usize,
-                &__bootloader_dfu_end as *const u32 as usize,
+                &__bootloader_dfu_start as *const u32 as u32,
+                &__bootloader_dfu_end as *const u32 as u32,
             )
         };
         let state = unsafe {
             Partition::new(
-                &__bootloader_state_start as *const u32 as usize,
-                &__bootloader_state_end as *const u32 as usize,
+                &__bootloader_state_start as *const u32 as u32,
+                &__bootloader_state_end as *const u32 as u32,
             )
         };
 
@@ -56,20 +55,19 @@ impl Default for BootLoader {
     }
 }
 
-impl BootLoader {
+impl<const BUFFER_SIZE: usize> BootLoader<BUFFER_SIZE> {
     /// Create a new bootloader instance using the supplied partitions for active, dfu and state.
     pub fn new(active: Partition, dfu: Partition, state: Partition) -> Self {
         Self {
             boot: embassy_boot::BootLoader::new(active, dfu, state),
-            magic: AlignedBuffer([0; 4]),
-            page: AlignedBuffer([0; PAGE_SIZE]),
+            aligned_buf: AlignedBuffer([0; BUFFER_SIZE]),
         }
     }
 
     /// Inspect the bootloader state and perform actions required before booting, such as swapping
     /// firmware.
     pub fn prepare<F: FlashConfig>(&mut self, flash: &mut F) -> usize {
-        match self.boot.prepare_boot(flash, &mut self.magic.0, &mut self.page.0) {
+        match self.boot.prepare_boot(flash, &mut self.aligned_buf.0) {
             Ok(_) => self.boot.boot_address(),
             Err(_) => panic!("boot prepare error!"),
         }
diff --git a/embassy-boot/rp/src/lib.rs b/embassy-boot/rp/src/lib.rs
index 6df34133e..c3cb22299 100644
--- a/embassy-boot/rp/src/lib.rs
+++ b/embassy-boot/rp/src/lib.rs
@@ -5,33 +5,31 @@
 mod fmt;
 
 pub use embassy_boot::{AlignedBuffer, BootFlash, FirmwareUpdater, FlashConfig, Partition, SingleFlashConfig, State};
-use embassy_rp::flash::{Flash, ERASE_SIZE, WRITE_SIZE};
+use embassy_rp::flash::{Flash, ERASE_SIZE};
 use embassy_rp::peripherals::{FLASH, WATCHDOG};
 use embassy_rp::watchdog::Watchdog;
 use embassy_time::Duration;
 use embedded_storage::nor_flash::{ErrorType, NorFlash, ReadNorFlash};
 
 /// A bootloader for RP2040 devices.
-pub struct BootLoader {
+pub struct BootLoader<const BUFFER_SIZE: usize = ERASE_SIZE> {
     boot: embassy_boot::BootLoader,
-    magic: AlignedBuffer<WRITE_SIZE>,
-    page: AlignedBuffer<ERASE_SIZE>,
+    aligned_buf: AlignedBuffer<BUFFER_SIZE>,
 }
 
-impl BootLoader {
+impl<const BUFFER_SIZE: usize> BootLoader<BUFFER_SIZE> {
     /// Create a new bootloader instance using the supplied partitions for active, dfu and state.
     pub fn new(active: Partition, dfu: Partition, state: Partition) -> Self {
         Self {
             boot: embassy_boot::BootLoader::new(active, dfu, state),
-            magic: AlignedBuffer([0; WRITE_SIZE]),
-            page: AlignedBuffer([0; ERASE_SIZE]),
+            aligned_buf: AlignedBuffer([0; BUFFER_SIZE]),
         }
     }
 
     /// Inspect the bootloader state and perform actions required before booting, such as swapping
     /// firmware.
     pub fn prepare<F: FlashConfig>(&mut self, flash: &mut F) -> usize {
-        match self.boot.prepare_boot(flash, self.magic.as_mut(), self.page.as_mut()) {
+        match self.boot.prepare_boot(flash, self.aligned_buf.as_mut()) {
             Ok(_) => embassy_rp::flash::FLASH_BASE + self.boot.boot_address(),
             Err(_) => panic!("boot prepare error!"),
         }
@@ -54,7 +52,7 @@ impl BootLoader {
     }
 }
 
-impl Default for BootLoader {
+impl Default for BootLoader<ERASE_SIZE> {
     /// Create a new bootloader instance using parameters from linker script
     fn default() -> Self {
         extern "C" {
@@ -68,20 +66,20 @@ impl Default for BootLoader {
 
         let active = unsafe {
             Partition::new(
-                &__bootloader_active_start as *const u32 as usize,
-                &__bootloader_active_end as *const u32 as usize,
+                &__bootloader_active_start as *const u32 as u32,
+                &__bootloader_active_end as *const u32 as u32,
             )
         };
         let dfu = unsafe {
             Partition::new(
-                &__bootloader_dfu_start as *const u32 as usize,
-                &__bootloader_dfu_end as *const u32 as usize,
+                &__bootloader_dfu_start as *const u32 as u32,
+                &__bootloader_dfu_end as *const u32 as u32,
             )
         };
         let state = unsafe {
             Partition::new(
-                &__bootloader_state_start as *const u32 as usize,
-                &__bootloader_state_end as *const u32 as usize,
+                &__bootloader_state_start as *const u32 as u32,
+                &__bootloader_state_end as *const u32 as u32,
             )
         };
 
diff --git a/embassy-boot/stm32/src/lib.rs b/embassy-boot/stm32/src/lib.rs
index 82f712c4d..94404697f 100644
--- a/embassy-boot/stm32/src/lib.rs
+++ b/embassy-boot/stm32/src/lib.rs
@@ -7,26 +7,24 @@ mod fmt;
 pub use embassy_boot::{AlignedBuffer, BootFlash, FirmwareUpdater, FlashConfig, Partition, SingleFlashConfig, State};
 
 /// A bootloader for STM32 devices.
-pub struct BootLoader<const PAGE_SIZE: usize, const WRITE_SIZE: usize> {
+pub struct BootLoader<const BUFFER_SIZE: usize> {
     boot: embassy_boot::BootLoader,
-    magic: AlignedBuffer<WRITE_SIZE>,
-    page: AlignedBuffer<PAGE_SIZE>,
+    aligned_buf: AlignedBuffer<BUFFER_SIZE>,
 }
 
-impl<const PAGE_SIZE: usize, const WRITE_SIZE: usize> BootLoader<PAGE_SIZE, WRITE_SIZE> {
+impl<const BUFFER_SIZE: usize> BootLoader<BUFFER_SIZE> {
     /// Create a new bootloader instance using the supplied partitions for active, dfu and state.
     pub fn new(active: Partition, dfu: Partition, state: Partition) -> Self {
         Self {
             boot: embassy_boot::BootLoader::new(active, dfu, state),
-            magic: AlignedBuffer([0; WRITE_SIZE]),
-            page: AlignedBuffer([0; PAGE_SIZE]),
+            aligned_buf: AlignedBuffer([0; BUFFER_SIZE]),
         }
     }
 
     /// Inspect the bootloader state and perform actions required before booting, such as swapping
     /// firmware.
     pub fn prepare<F: FlashConfig>(&mut self, flash: &mut F) -> usize {
-        match self.boot.prepare_boot(flash, self.magic.as_mut(), self.page.as_mut()) {
+        match self.boot.prepare_boot(flash, self.aligned_buf.as_mut()) {
             Ok(_) => embassy_stm32::flash::FLASH_BASE + self.boot.boot_address(),
             Err(_) => panic!("boot prepare error!"),
         }
@@ -49,7 +47,7 @@ impl<const PAGE_SIZE: usize, const WRITE_SIZE: usize> BootLoader<PAGE_SIZE, WRIT
     }
 }
 
-impl<const PAGE_SIZE: usize, const WRITE_SIZE: usize> Default for BootLoader<PAGE_SIZE, WRITE_SIZE> {
+impl<const BUFFER_SIZE: usize> Default for BootLoader<BUFFER_SIZE> {
     /// Create a new bootloader instance using parameters from linker script
     fn default() -> Self {
         extern "C" {
@@ -63,20 +61,20 @@ impl<const PAGE_SIZE: usize, const WRITE_SIZE: usize> Default for BootLoader<PAG
 
         let active = unsafe {
             Partition::new(
-                &__bootloader_active_start as *const u32 as usize,
-                &__bootloader_active_end as *const u32 as usize,
+                &__bootloader_active_start as *const u32 as u32,
+                &__bootloader_active_end as *const u32 as u32,
             )
         };
         let dfu = unsafe {
             Partition::new(
-                &__bootloader_dfu_start as *const u32 as usize,
-                &__bootloader_dfu_end as *const u32 as usize,
+                &__bootloader_dfu_start as *const u32 as u32,
+                &__bootloader_dfu_end as *const u32 as u32,
             )
         };
         let state = unsafe {
             Partition::new(
-                &__bootloader_state_start as *const u32 as usize,
-                &__bootloader_state_end as *const u32 as usize,
+                &__bootloader_state_start as *const u32 as u32,
+                &__bootloader_state_end as *const u32 as u32,
             )
         };
 
diff --git a/embassy-cortex-m/src/executor.rs b/embassy-cortex-m/src/executor.rs
deleted file mode 100644
index 558539e73..000000000
--- a/embassy-cortex-m/src/executor.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-//! Executor specific to cortex-m devices.
-
-use core::cell::UnsafeCell;
-use core::mem::MaybeUninit;
-
-use atomic_polyfill::{AtomicBool, Ordering};
-use cortex_m::interrupt::InterruptNumber;
-use cortex_m::peripheral::NVIC;
-pub use embassy_executor::*;
-
-#[derive(Clone, Copy)]
-struct N(u16);
-unsafe impl cortex_m::interrupt::InterruptNumber for N {
-    fn number(self) -> u16 {
-        self.0
-    }
-}
-
-fn pend_by_number(n: u16) {
-    cortex_m::peripheral::NVIC::pend(N(n))
-}
-
-/// Interrupt mode executor.
-///
-/// This executor runs tasks in interrupt mode. The interrupt handler is set up
-/// to poll tasks, and when a task is woken the interrupt is pended from software.
-///
-/// This allows running async tasks at a priority higher than thread mode. One
-/// use case is to leave thread mode free for non-async tasks. Another use case is
-/// to run multiple executors: one in thread mode for low priority tasks and another in
-/// interrupt mode for higher priority tasks. Higher priority tasks will preempt lower
-/// priority ones.
-///
-/// It is even possible to run multiple interrupt mode executors at different priorities,
-/// by assigning different priorities to the interrupts. For an example on how to do this,
-/// See the 'multiprio' example for 'embassy-nrf'.
-///
-/// To use it, you have to pick an interrupt that won't be used by the hardware.
-/// Some chips reserve some interrupts for this purpose, sometimes named "software interrupts" (SWI).
-/// If this is not the case, you may use an interrupt from any unused peripheral.
-///
-/// It is somewhat more complex to use, it's recommended to use the thread-mode
-/// [`Executor`] instead, if it works for your use case.
-pub struct InterruptExecutor {
-    started: AtomicBool,
-    executor: UnsafeCell<MaybeUninit<raw::Executor>>,
-}
-
-unsafe impl Send for InterruptExecutor {}
-unsafe impl Sync for InterruptExecutor {}
-
-impl InterruptExecutor {
-    /// Create a new, not started `InterruptExecutor`.
-    #[inline]
-    pub const fn new() -> Self {
-        Self {
-            started: AtomicBool::new(false),
-            executor: UnsafeCell::new(MaybeUninit::uninit()),
-        }
-    }
-
-    /// Executor interrupt callback.
-    ///
-    /// # Safety
-    ///
-    /// You MUST call this from the interrupt handler, and from nowhere else.
-    pub unsafe fn on_interrupt(&'static self) {
-        let executor = unsafe { (&*self.executor.get()).assume_init_ref() };
-        executor.poll();
-    }
-
-    /// Start the executor.
-    ///
-    /// This initializes the executor, enables the interrupt, and returns.
-    /// The executor keeps running in the background through the interrupt.
-    ///
-    /// This returns a [`SendSpawner`] you can use to spawn tasks on it. A [`SendSpawner`]
-    /// is returned instead of a [`Spawner`](embassy_executor::Spawner) because the executor effectively runs in a
-    /// different "thread" (the interrupt), so spawning tasks on it is effectively
-    /// sending them.
-    ///
-    /// To obtain a [`Spawner`](embassy_executor::Spawner) for this executor, use [`Spawner::for_current_executor()`](embassy_executor::Spawner::for_current_executor()) from
-    /// a task running in it.
-    ///
-    /// # Interrupt requirements
-    ///
-    /// You must write the interrupt handler yourself, and make it call [`on_interrupt()`](Self::on_interrupt).
-    ///
-    /// This method already enables (unmasks) the interrupt, you must NOT do it yourself.
-    ///
-    /// You must set the interrupt priority before calling this method. You MUST NOT
-    /// do it after.
-    ///
-    pub fn start(&'static self, irq: impl InterruptNumber) -> SendSpawner {
-        if self
-            .started
-            .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
-            .is_err()
-        {
-            panic!("InterruptExecutor::start() called multiple times on the same executor.");
-        }
-
-        unsafe {
-            (&mut *self.executor.get()).as_mut_ptr().write(raw::Executor::new(
-                |ctx| pend_by_number(ctx as u16),
-                irq.number() as *mut (),
-            ))
-        }
-
-        let executor = unsafe { (&*self.executor.get()).assume_init_ref() };
-
-        unsafe { NVIC::unmask(irq) }
-
-        executor.spawner().make_send()
-    }
-}
diff --git a/embassy-cortex-m/src/lib.rs b/embassy-cortex-m/src/lib.rs
index fba23367b..e4b713a06 100644
--- a/embassy-cortex-m/src/lib.rs
+++ b/embassy-cortex-m/src/lib.rs
@@ -5,6 +5,6 @@
 // This mod MUST go first, so that the others see its macros.
 pub(crate) mod fmt;
 
-pub mod executor;
+pub use embassy_executor as executor;
 pub mod interrupt;
 pub mod peripheral;
diff --git a/embassy-embedded-hal/Cargo.toml b/embassy-embedded-hal/Cargo.toml
index 45eb0d43d..c509d6ee5 100644
--- a/embassy-embedded-hal/Cargo.toml
+++ b/embassy-embedded-hal/Cargo.toml
@@ -19,8 +19,8 @@ nightly = ["embedded-hal-async", "embedded-storage-async"]
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../embassy-sync" }
 embedded-hal-02 = { package = "embedded-hal", version = "0.2.6", features = ["unproven"] }
-embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9" }
-embedded-hal-async = { version = "=0.2.0-alpha.0", optional = true }
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10" }
+embedded-hal-async = { version = "=0.2.0-alpha.1", optional = true }
 embedded-storage = "0.3.0"
 embedded-storage-async = { version = "0.4.0", optional = true }
 nb = "1.0.0"
diff --git a/embassy-embedded-hal/src/adapter.rs b/embassy-embedded-hal/src/adapter.rs
index a49f8df4b..ee919bd84 100644
--- a/embassy-embedded-hal/src/adapter.rs
+++ b/embassy-embedded-hal/src/adapter.rs
@@ -36,27 +36,22 @@ where
     E: embedded_hal_1::i2c::Error + 'static,
     T: blocking::i2c::WriteRead<Error = E> + blocking::i2c::Read<Error = E> + blocking::i2c::Write<Error = E>,
 {
-    async fn read<'a>(&'a mut self, address: u8, buffer: &'a mut [u8]) -> Result<(), Self::Error> {
-        self.wrapped.read(address, buffer)
+    async fn read(&mut self, address: u8, read: &mut [u8]) -> Result<(), Self::Error> {
+        self.wrapped.read(address, read)
     }
 
-    async fn write<'a>(&'a mut self, address: u8, bytes: &'a [u8]) -> Result<(), Self::Error> {
-        self.wrapped.write(address, bytes)
+    async fn write(&mut self, address: u8, write: &[u8]) -> Result<(), Self::Error> {
+        self.wrapped.write(address, write)
     }
 
-    async fn write_read<'a>(
-        &'a mut self,
+    async fn write_read(&mut self, address: u8, write: &[u8], read: &mut [u8]) -> Result<(), Self::Error> {
+        self.wrapped.write_read(address, write, read)
+    }
+
+    async fn transaction(
+        &mut self,
         address: u8,
-        bytes: &'a [u8],
-        buffer: &'a mut [u8],
-    ) -> Result<(), Self::Error> {
-        self.wrapped.write_read(address, bytes, buffer)
-    }
-
-    async fn transaction<'a, 'b>(
-        &'a mut self,
-        address: u8,
-        operations: &'a mut [embedded_hal_async::i2c::Operation<'b>],
+        operations: &mut [embedded_hal_1::i2c::Operation<'_>],
     ) -> Result<(), Self::Error> {
         let _ = address;
         let _ = operations;
diff --git a/embassy-embedded-hal/src/lib.rs b/embassy-embedded-hal/src/lib.rs
index 8da042228..a23fbdc41 100644
--- a/embassy-embedded-hal/src/lib.rs
+++ b/embassy-embedded-hal/src/lib.rs
@@ -1,7 +1,7 @@
 #![cfg_attr(not(feature = "std"), no_std)]
 #![cfg_attr(
     feature = "nightly",
-    feature(type_alias_impl_trait, async_fn_in_trait, impl_trait_projections)
+    feature(type_alias_impl_trait, async_fn_in_trait, impl_trait_projections, try_blocks)
 )]
 #![cfg_attr(feature = "nightly", allow(incomplete_features))]
 #![warn(missing_docs)]
diff --git a/embassy-embedded-hal/src/shared_bus/asynch/i2c.rs b/embassy-embedded-hal/src/shared_bus/asynch/i2c.rs
index c5e1fd415..829554045 100644
--- a/embassy-embedded-hal/src/shared_bus/asynch/i2c.rs
+++ b/embassy-embedded-hal/src/shared_bus/asynch/i2c.rs
@@ -54,35 +54,35 @@ where
     M: RawMutex + 'static,
     BUS: i2c::I2c + 'static,
 {
-    async fn read<'a>(&'a mut self, address: u8, buffer: &'a mut [u8]) -> Result<(), I2cDeviceError<BUS::Error>> {
+    async fn read(&mut self, address: u8, read: &mut [u8]) -> Result<(), I2cDeviceError<BUS::Error>> {
         let mut bus = self.bus.lock().await;
-        bus.read(address, buffer).await.map_err(I2cDeviceError::I2c)?;
+        bus.read(address, read).await.map_err(I2cDeviceError::I2c)?;
         Ok(())
     }
 
-    async fn write<'a>(&'a mut self, address: u8, bytes: &'a [u8]) -> Result<(), I2cDeviceError<BUS::Error>> {
+    async fn write(&mut self, address: u8, write: &[u8]) -> Result<(), I2cDeviceError<BUS::Error>> {
         let mut bus = self.bus.lock().await;
-        bus.write(address, bytes).await.map_err(I2cDeviceError::I2c)?;
+        bus.write(address, write).await.map_err(I2cDeviceError::I2c)?;
         Ok(())
     }
 
-    async fn write_read<'a>(
-        &'a mut self,
+    async fn write_read(
+        &mut self,
         address: u8,
-        wr_buffer: &'a [u8],
-        rd_buffer: &'a mut [u8],
+        write: &[u8],
+        read: &mut [u8],
     ) -> Result<(), I2cDeviceError<BUS::Error>> {
         let mut bus = self.bus.lock().await;
-        bus.write_read(address, wr_buffer, rd_buffer)
+        bus.write_read(address, write, read)
             .await
             .map_err(I2cDeviceError::I2c)?;
         Ok(())
     }
 
-    async fn transaction<'a, 'b>(
-        &'a mut self,
+    async fn transaction(
+        &mut self,
         address: u8,
-        operations: &'a mut [embedded_hal_async::i2c::Operation<'b>],
+        operations: &mut [embedded_hal_async::i2c::Operation<'_>],
     ) -> Result<(), I2cDeviceError<BUS::Error>> {
         let _ = address;
         let _ = operations;
@@ -121,25 +121,25 @@ where
     M: RawMutex + 'static,
     BUS: i2c::I2c + SetConfig + 'static,
 {
-    async fn read<'a>(&'a mut self, address: u8, buffer: &'a mut [u8]) -> Result<(), I2cDeviceError<BUS::Error>> {
+    async fn read(&mut self, address: u8, buffer: &mut [u8]) -> Result<(), I2cDeviceError<BUS::Error>> {
         let mut bus = self.bus.lock().await;
         bus.set_config(&self.config);
         bus.read(address, buffer).await.map_err(I2cDeviceError::I2c)?;
         Ok(())
     }
 
-    async fn write<'a>(&'a mut self, address: u8, bytes: &'a [u8]) -> Result<(), I2cDeviceError<BUS::Error>> {
+    async fn write(&mut self, address: u8, bytes: &[u8]) -> Result<(), I2cDeviceError<BUS::Error>> {
         let mut bus = self.bus.lock().await;
         bus.set_config(&self.config);
         bus.write(address, bytes).await.map_err(I2cDeviceError::I2c)?;
         Ok(())
     }
 
-    async fn write_read<'a>(
-        &'a mut self,
+    async fn write_read(
+        &mut self,
         address: u8,
-        wr_buffer: &'a [u8],
-        rd_buffer: &'a mut [u8],
+        wr_buffer: &[u8],
+        rd_buffer: &mut [u8],
     ) -> Result<(), I2cDeviceError<BUS::Error>> {
         let mut bus = self.bus.lock().await;
         bus.set_config(&self.config);
@@ -149,11 +149,7 @@ where
         Ok(())
     }
 
-    async fn transaction<'a, 'b>(
-        &'a mut self,
-        address: u8,
-        operations: &'a mut [embedded_hal_async::i2c::Operation<'b>],
-    ) -> Result<(), I2cDeviceError<BUS::Error>> {
+    async fn transaction(&mut self, address: u8, operations: &mut [i2c::Operation<'_>]) -> Result<(), Self::Error> {
         let _ = address;
         let _ = operations;
         todo!()
diff --git a/embassy-embedded-hal/src/shared_bus/asynch/spi.rs b/embassy-embedded-hal/src/shared_bus/asynch/spi.rs
index d25716655..b5549a6cd 100644
--- a/embassy-embedded-hal/src/shared_bus/asynch/spi.rs
+++ b/embassy-embedded-hal/src/shared_bus/asynch/spi.rs
@@ -25,12 +25,11 @@
 //! let spi_dev2 = SpiDevice::new(spi_bus, cs_pin2);
 //! let display2 = ST7735::new(spi_dev2, dc2, rst2, Default::default(), 160, 128);
 //! ```
-use core::future::Future;
 
 use embassy_sync::blocking_mutex::raw::RawMutex;
 use embassy_sync::mutex::Mutex;
 use embedded_hal_1::digital::OutputPin;
-use embedded_hal_1::spi::ErrorType;
+use embedded_hal_1::spi::Operation;
 use embedded_hal_async::spi;
 
 use crate::shared_bus::SpiDeviceError;
@@ -57,33 +56,92 @@ where
     type Error = SpiDeviceError<BUS::Error, CS::Error>;
 }
 
-unsafe impl<M, BUS, CS> spi::SpiDevice for SpiDevice<'_, M, BUS, CS>
+impl<M, BUS, CS> spi::SpiDeviceRead for SpiDevice<'_, M, BUS, CS>
 where
-    M: RawMutex + 'static,
-    BUS: spi::SpiBusFlush + 'static,
+    M: RawMutex,
+    BUS: spi::SpiBusRead,
     CS: OutputPin,
 {
-    type Bus = BUS;
-
-    async fn transaction<R, F, Fut>(&mut self, f: F) -> Result<R, Self::Error>
-    where
-        F: FnOnce(*mut Self::Bus) -> Fut,
-        Fut: Future<Output = Result<R, <Self::Bus as ErrorType>::Error>>,
-    {
+    async fn read_transaction(&mut self, operations: &mut [&mut [u8]]) -> Result<(), Self::Error> {
         let mut bus = self.bus.lock().await;
         self.cs.set_low().map_err(SpiDeviceError::Cs)?;
 
-        let f_res = f(&mut *bus).await;
+        let op_res: Result<(), BUS::Error> = try {
+            for buf in operations {
+                bus.read(buf).await?;
+            }
+        };
 
         // On failure, it's important to still flush and deassert CS.
         let flush_res = bus.flush().await;
         let cs_res = self.cs.set_high();
 
-        let f_res = f_res.map_err(SpiDeviceError::Spi)?;
+        let op_res = op_res.map_err(SpiDeviceError::Spi)?;
         flush_res.map_err(SpiDeviceError::Spi)?;
         cs_res.map_err(SpiDeviceError::Cs)?;
 
-        Ok(f_res)
+        Ok(op_res)
+    }
+}
+
+impl<M, BUS, CS> spi::SpiDeviceWrite for SpiDevice<'_, M, BUS, CS>
+where
+    M: RawMutex,
+    BUS: spi::SpiBusWrite,
+    CS: OutputPin,
+{
+    async fn write_transaction(&mut self, operations: &[&[u8]]) -> Result<(), Self::Error> {
+        let mut bus = self.bus.lock().await;
+        self.cs.set_low().map_err(SpiDeviceError::Cs)?;
+
+        let op_res: Result<(), BUS::Error> = try {
+            for buf in operations {
+                bus.write(buf).await?;
+            }
+        };
+
+        // On failure, it's important to still flush and deassert CS.
+        let flush_res = bus.flush().await;
+        let cs_res = self.cs.set_high();
+
+        let op_res = op_res.map_err(SpiDeviceError::Spi)?;
+        flush_res.map_err(SpiDeviceError::Spi)?;
+        cs_res.map_err(SpiDeviceError::Cs)?;
+
+        Ok(op_res)
+    }
+}
+
+impl<M, BUS, CS> spi::SpiDevice for SpiDevice<'_, M, BUS, CS>
+where
+    M: RawMutex,
+    BUS: spi::SpiBus,
+    CS: OutputPin,
+{
+    async fn transaction(&mut self, operations: &mut [spi::Operation<'_, u8>]) -> Result<(), Self::Error> {
+        let mut bus = self.bus.lock().await;
+        self.cs.set_low().map_err(SpiDeviceError::Cs)?;
+
+        let op_res: Result<(), BUS::Error> = try {
+            for op in operations {
+                match op {
+                    Operation::Read(buf) => bus.read(buf).await?,
+                    Operation::Write(buf) => bus.write(buf).await?,
+                    Operation::Transfer(read, write) => bus.transfer(read, write).await?,
+                    Operation::TransferInPlace(buf) => bus.transfer_in_place(buf).await?,
+                }
+            }
+        };
+
+        // On failure, it's important to still flush and deassert CS.
+        let flush_res = bus.flush().await;
+        let cs_res = self.cs.set_high();
+
+        let op_res = op_res.map_err(SpiDeviceError::Spi)?;
+        flush_res.map_err(SpiDeviceError::Spi)?;
+        cs_res.map_err(SpiDeviceError::Cs)?;
+
+        Ok(op_res)
     }
 }
 
@@ -114,33 +172,94 @@ where
     type Error = SpiDeviceError<BUS::Error, CS::Error>;
 }
 
-unsafe impl<M, BUS, CS> spi::SpiDevice for SpiDeviceWithConfig<'_, M, BUS, CS>
+impl<M, BUS, CS> spi::SpiDeviceWrite for SpiDeviceWithConfig<'_, M, BUS, CS>
 where
-    M: RawMutex + 'static,
-    BUS: spi::SpiBusFlush + SetConfig + 'static,
+    M: RawMutex,
+    BUS: spi::SpiBusWrite + SetConfig,
     CS: OutputPin,
 {
-    type Bus = BUS;
-
-    async fn transaction<R, F, Fut>(&mut self, f: F) -> Result<R, Self::Error>
-    where
-        F: FnOnce(*mut Self::Bus) -> Fut,
-        Fut: Future<Output = Result<R, <Self::Bus as ErrorType>::Error>>,
-    {
+    async fn write_transaction(&mut self, operations: &[&[u8]]) -> Result<(), Self::Error> {
         let mut bus = self.bus.lock().await;
         bus.set_config(&self.config);
         self.cs.set_low().map_err(SpiDeviceError::Cs)?;
 
-        let f_res = f(&mut *bus).await;
+        let op_res: Result<(), BUS::Error> = try {
+            for buf in operations {
+                bus.write(buf).await?;
+            }
+        };
 
         // On failure, it's important to still flush and deassert CS.
         let flush_res = bus.flush().await;
         let cs_res = self.cs.set_high();
 
-        let f_res = f_res.map_err(SpiDeviceError::Spi)?;
+        let op_res = op_res.map_err(SpiDeviceError::Spi)?;
         flush_res.map_err(SpiDeviceError::Spi)?;
         cs_res.map_err(SpiDeviceError::Cs)?;
 
-        Ok(f_res)
+        Ok(op_res)
+    }
+}
+
+impl<M, BUS, CS> spi::SpiDeviceRead for SpiDeviceWithConfig<'_, M, BUS, CS>
+where
+    M: RawMutex,
+    BUS: spi::SpiBusRead + SetConfig,
+    CS: OutputPin,
+{
+    async fn read_transaction(&mut self, operations: &mut [&mut [u8]]) -> Result<(), Self::Error> {
+        let mut bus = self.bus.lock().await;
+        bus.set_config(&self.config);
+        self.cs.set_low().map_err(SpiDeviceError::Cs)?;
+
+        let op_res: Result<(), BUS::Error> = try {
+            for buf in operations {
+                bus.read(buf).await?;
+            }
+        };
+
+        // On failure, it's important to still flush and deassert CS.
+        let flush_res = bus.flush().await;
+        let cs_res = self.cs.set_high();
+
+        let op_res = op_res.map_err(SpiDeviceError::Spi)?;
+        flush_res.map_err(SpiDeviceError::Spi)?;
+        cs_res.map_err(SpiDeviceError::Cs)?;
+
+        Ok(op_res)
+    }
+}
+
+impl<M, BUS, CS> spi::SpiDevice for SpiDeviceWithConfig<'_, M, BUS, CS>
+where
+    M: RawMutex,
+    BUS: spi::SpiBus + SetConfig,
+    CS: OutputPin,
+{
+    async fn transaction(&mut self, operations: &mut [spi::Operation<'_, u8>]) -> Result<(), Self::Error> {
+        let mut bus = self.bus.lock().await;
+        bus.set_config(&self.config);
+        self.cs.set_low().map_err(SpiDeviceError::Cs)?;
+
+        let op_res: Result<(), BUS::Error> = try {
+            for op in operations {
+                match op {
+                    Operation::Read(buf) => bus.read(buf).await?,
+                    Operation::Write(buf) => bus.write(buf).await?,
+                    Operation::Transfer(read, write) => bus.transfer(read, write).await?,
+                    Operation::TransferInPlace(buf) => bus.transfer_in_place(buf).await?,
+                }
+            }
+        };
+
+        // On failure, it's important to still flush and deassert CS.
+        let flush_res = bus.flush().await;
+        let cs_res = self.cs.set_high();
+
+        let op_res = op_res.map_err(SpiDeviceError::Spi)?;
+        flush_res.map_err(SpiDeviceError::Spi)?;
+        cs_res.map_err(SpiDeviceError::Cs)?;
+
+        Ok(op_res)
     }
 }
diff --git a/embassy-embedded-hal/src/shared_bus/blocking/i2c.rs b/embassy-embedded-hal/src/shared_bus/blocking/i2c.rs
index 892000b26..1fe520e6c 100644
--- a/embassy-embedded-hal/src/shared_bus/blocking/i2c.rs
+++ b/embassy-embedded-hal/src/shared_bus/blocking/i2c.rs
@@ -72,34 +72,6 @@ where
         let _ = operations;
         todo!()
     }
-
-    fn write_iter<B: IntoIterator<Item = u8>>(&mut self, addr: u8, bytes: B) -> Result<(), Self::Error> {
-        let _ = addr;
-        let _ = bytes;
-        todo!()
-    }
-
-    fn write_iter_read<B: IntoIterator<Item = u8>>(
-        &mut self,
-        addr: u8,
-        bytes: B,
-        buffer: &mut [u8],
-    ) -> Result<(), Self::Error> {
-        let _ = addr;
-        let _ = bytes;
-        let _ = buffer;
-        todo!()
-    }
-
-    fn transaction_iter<'a, O: IntoIterator<Item = Operation<'a>>>(
-        &mut self,
-        address: u8,
-        operations: O,
-    ) -> Result<(), Self::Error> {
-        let _ = address;
-        let _ = operations;
-        todo!()
-    }
 }
 
 impl<'a, M, BUS, E> embedded_hal_02::blocking::i2c::Write for I2cDevice<'_, M, BUS>
@@ -204,32 +176,4 @@ where
         let _ = operations;
         todo!()
     }
-
-    fn write_iter<B: IntoIterator<Item = u8>>(&mut self, addr: u8, bytes: B) -> Result<(), Self::Error> {
-        let _ = addr;
-        let _ = bytes;
-        todo!()
-    }
-
-    fn write_iter_read<B: IntoIterator<Item = u8>>(
-        &mut self,
-        addr: u8,
-        bytes: B,
-        buffer: &mut [u8],
-    ) -> Result<(), Self::Error> {
-        let _ = addr;
-        let _ = bytes;
-        let _ = buffer;
-        todo!()
-    }
-
-    fn transaction_iter<'a, O: IntoIterator<Item = Operation<'a>>>(
-        &mut self,
-        address: u8,
-        operations: O,
-    ) -> Result<(), Self::Error> {
-        let _ = address;
-        let _ = operations;
-        todo!()
-    }
 }
diff --git a/embassy-embedded-hal/src/shared_bus/blocking/spi.rs b/embassy-embedded-hal/src/shared_bus/blocking/spi.rs
index 4a08dc36e..7982ffb6e 100644
--- a/embassy-embedded-hal/src/shared_bus/blocking/spi.rs
+++ b/embassy-embedded-hal/src/shared_bus/blocking/spi.rs
@@ -23,8 +23,7 @@ use core::cell::RefCell;
 use embassy_sync::blocking_mutex::raw::RawMutex;
 use embassy_sync::blocking_mutex::Mutex;
 use embedded_hal_1::digital::OutputPin;
-use embedded_hal_1::spi;
-use embedded_hal_1::spi::SpiBusFlush;
+use embedded_hal_1::spi::{self, Operation, SpiBus, SpiBusRead, SpiBusWrite};
 
 use crate::shared_bus::SpiDeviceError;
 use crate::SetConfig;
@@ -50,30 +49,85 @@ where
     type Error = SpiDeviceError<BUS::Error, CS::Error>;
 }
 
-impl<BUS, M, CS> embedded_hal_1::spi::SpiDevice for SpiDevice<'_, M, BUS, CS>
+impl<BUS, M, CS> embedded_hal_1::spi::SpiDeviceRead for SpiDevice<'_, M, BUS, CS>
 where
     M: RawMutex,
-    BUS: SpiBusFlush,
+    BUS: SpiBusRead,
     CS: OutputPin,
 {
-    type Bus = BUS;
-
-    fn transaction<R>(&mut self, f: impl FnOnce(&mut Self::Bus) -> Result<R, BUS::Error>) -> Result<R, Self::Error> {
+    fn read_transaction(&mut self, operations: &mut [&mut [u8]]) -> Result<(), Self::Error> {
         self.bus.lock(|bus| {
             let mut bus = bus.borrow_mut();
             self.cs.set_low().map_err(SpiDeviceError::Cs)?;
 
-            let f_res = f(&mut bus);
+            let op_res = operations.iter_mut().try_for_each(|buf| bus.read(buf));
 
             // On failure, it's important to still flush and deassert CS.
             let flush_res = bus.flush();
             let cs_res = self.cs.set_high();
 
-            let f_res = f_res.map_err(SpiDeviceError::Spi)?;
+            let op_res = op_res.map_err(SpiDeviceError::Spi)?;
             flush_res.map_err(SpiDeviceError::Spi)?;
             cs_res.map_err(SpiDeviceError::Cs)?;
 
-            Ok(f_res)
+            Ok(op_res)
+        })
+    }
+}
+
+impl<BUS, M, CS> embedded_hal_1::spi::SpiDeviceWrite for SpiDevice<'_, M, BUS, CS>
+where
+    M: RawMutex,
+    BUS: SpiBusWrite,
+    CS: OutputPin,
+{
+    fn write_transaction(&mut self, operations: &[&[u8]]) -> Result<(), Self::Error> {
+        self.bus.lock(|bus| {
+            let mut bus = bus.borrow_mut();
+            self.cs.set_low().map_err(SpiDeviceError::Cs)?;
+
+            let op_res = operations.iter().try_for_each(|buf| bus.write(buf));
+
+            // On failure, it's important to still flush and deassert CS.
+            let flush_res = bus.flush();
+            let cs_res = self.cs.set_high();
+
+            let op_res = op_res.map_err(SpiDeviceError::Spi)?;
+            flush_res.map_err(SpiDeviceError::Spi)?;
+            cs_res.map_err(SpiDeviceError::Cs)?;
+
+            Ok(op_res)
+        })
+    }
+}
+
+impl<BUS, M, CS> embedded_hal_1::spi::SpiDevice for SpiDevice<'_, M, BUS, CS>
+where
+    M: RawMutex,
+    BUS: SpiBus,
+    CS: OutputPin,
+{
+    fn transaction(&mut self, operations: &mut [Operation<'_, u8>]) -> Result<(), Self::Error> {
+        self.bus.lock(|bus| {
+            let mut bus = bus.borrow_mut();
+            self.cs.set_low().map_err(SpiDeviceError::Cs)?;
+
+            let op_res = operations.iter_mut().try_for_each(|op| match op {
+                Operation::Read(buf) => bus.read(buf),
+                Operation::Write(buf) => bus.write(buf),
+                Operation::Transfer(read, write) => bus.transfer(read, write),
+                Operation::TransferInPlace(buf) => bus.transfer_in_place(buf),
+            });
+
+            // On failure, it's important to still flush and deassert CS.
+            let flush_res = bus.flush();
+            let cs_res = self.cs.set_high();
+
+            let op_res = op_res.map_err(SpiDeviceError::Spi)?;
+            flush_res.map_err(SpiDeviceError::Spi)?;
+            cs_res.map_err(SpiDeviceError::Cs)?;
+
+            Ok(op_res)
         })
     }
 }
@@ -89,11 +143,11 @@ where
         self.bus.lock(|bus| {
             let mut bus = bus.borrow_mut();
             self.cs.set_low().map_err(SpiDeviceError::Cs)?;
-            let f_res = bus.transfer(words);
+            let op_res = bus.transfer(words);
             let cs_res = self.cs.set_high();
-            let f_res = f_res.map_err(SpiDeviceError::Spi)?;
+            let op_res = op_res.map_err(SpiDeviceError::Spi)?;
             cs_res.map_err(SpiDeviceError::Cs)?;
-            Ok(f_res)
+            Ok(op_res)
         })
     }
 }
@@ -110,11 +164,11 @@ where
         self.bus.lock(|bus| {
             let mut bus = bus.borrow_mut();
             self.cs.set_low().map_err(SpiDeviceError::Cs)?;
-            let f_res = bus.write(words);
+            let op_res = bus.write(words);
             let cs_res = self.cs.set_high();
-            let f_res = f_res.map_err(SpiDeviceError::Spi)?;
+            let op_res = op_res.map_err(SpiDeviceError::Spi)?;
             cs_res.map_err(SpiDeviceError::Cs)?;
-            Ok(f_res)
+            Ok(op_res)
         })
     }
 }
@@ -146,30 +200,85 @@ where
     type Error = SpiDeviceError<BUS::Error, CS::Error>;
 }
 
-impl<BUS, M, CS> embedded_hal_1::spi::SpiDevice for SpiDeviceWithConfig<'_, M, BUS, CS>
+impl<BUS, M, CS> embedded_hal_1::spi::SpiDeviceRead for SpiDeviceWithConfig<'_, M, BUS, CS>
 where
     M: RawMutex,
-    BUS: SpiBusFlush + SetConfig,
+    BUS: SpiBusRead + SetConfig,
     CS: OutputPin,
 {
-    type Bus = BUS;
-
-    fn transaction<R>(&mut self, f: impl FnOnce(&mut Self::Bus) -> Result<R, BUS::Error>) -> Result<R, Self::Error> {
+    fn read_transaction(&mut self, operations: &mut [&mut [u8]]) -> Result<(), Self::Error> {
         self.bus.lock(|bus| {
             let mut bus = bus.borrow_mut();
             bus.set_config(&self.config);
             self.cs.set_low().map_err(SpiDeviceError::Cs)?;
 
-            let f_res = f(&mut bus);
+            let op_res = operations.iter_mut().try_for_each(|buf| bus.read(buf));
 
             // On failure, it's important to still flush and deassert CS.
             let flush_res = bus.flush();
             let cs_res = self.cs.set_high();
 
-            let f_res = f_res.map_err(SpiDeviceError::Spi)?;
+            let op_res = op_res.map_err(SpiDeviceError::Spi)?;
             flush_res.map_err(SpiDeviceError::Spi)?;
             cs_res.map_err(SpiDeviceError::Cs)?;
-            Ok(f_res)
+            Ok(op_res)
+        })
+    }
+}
+
+impl<BUS, M, CS> embedded_hal_1::spi::SpiDeviceWrite for SpiDeviceWithConfig<'_, M, BUS, CS>
+where
+    M: RawMutex,
+    BUS: SpiBusWrite + SetConfig,
+    CS: OutputPin,
+{
+    fn write_transaction(&mut self, operations: &[&[u8]]) -> Result<(), Self::Error> {
+        self.bus.lock(|bus| {
+            let mut bus = bus.borrow_mut();
+            bus.set_config(&self.config);
+            self.cs.set_low().map_err(SpiDeviceError::Cs)?;
+
+            let op_res = operations.iter().try_for_each(|buf| bus.write(buf));
+
+            // On failure, it's important to still flush and deassert CS.
+            let flush_res = bus.flush();
+            let cs_res = self.cs.set_high();
+
+            let op_res = op_res.map_err(SpiDeviceError::Spi)?;
+            flush_res.map_err(SpiDeviceError::Spi)?;
+            cs_res.map_err(SpiDeviceError::Cs)?;
+            Ok(op_res)
+        })
+    }
+}
+
+impl<BUS, M, CS> embedded_hal_1::spi::SpiDevice for SpiDeviceWithConfig<'_, M, BUS, CS>
+where
+    M: RawMutex,
+    BUS: SpiBus + SetConfig,
+    CS: OutputPin,
+{
+    fn transaction(&mut self, operations: &mut [Operation<'_, u8>]) -> Result<(), Self::Error> {
+        self.bus.lock(|bus| {
+            let mut bus = bus.borrow_mut();
+            bus.set_config(&self.config);
+            self.cs.set_low().map_err(SpiDeviceError::Cs)?;
+
+            let op_res = operations.iter_mut().try_for_each(|op| match op {
+                Operation::Read(buf) => bus.read(buf),
+                Operation::Write(buf) => bus.write(buf),
+                Operation::Transfer(read, write) => bus.transfer(read, write),
+                Operation::TransferInPlace(buf) => bus.transfer_in_place(buf),
+            });
+
+            // On failure, it's important to still flush and deassert CS.
+            let flush_res = bus.flush();
+            let cs_res = self.cs.set_high();
+
+            let op_res = op_res.map_err(SpiDeviceError::Spi)?;
+            flush_res.map_err(SpiDeviceError::Spi)?;
+            cs_res.map_err(SpiDeviceError::Cs)?;
+            Ok(op_res)
         })
     }
 }
diff --git a/embassy-executor/Cargo.toml b/embassy-executor/Cargo.toml
index c2868eb98..29e1bd478 100644
--- a/embassy-executor/Cargo.toml
+++ b/embassy-executor/Cargo.toml
@@ -14,30 +14,42 @@ categories = [
 [package.metadata.embassy_docs]
 src_base = "https://github.com/embassy-rs/embassy/blob/embassy-executor-v$VERSION/embassy-executor/src/"
 src_base_git = "https://github.com/embassy-rs/embassy/blob/$COMMIT/embassy-executor/src/"
-features = ["nightly", "defmt"]
+features = ["nightly", "defmt", "pender-callback"]
 flavors = [
-    { name = "std",                       target = "x86_64-unknown-linux-gnu",   features = ["std"] },
-    { name = "wasm",                      target = "wasm32-unknown-unknown",     features = ["wasm"] },
-    { name = "thumbv6m-none-eabi",        target = "thumbv6m-none-eabi",         features = [] },
-    { name = "thumbv7m-none-eabi",        target = "thumbv7m-none-eabi",         features = [] },
-    { name = "thumbv7em-none-eabi",       target = "thumbv7em-none-eabi",        features = [] },
-    { name = "thumbv7em-none-eabihf",     target = "thumbv7em-none-eabihf",      features = [] },
-    { name = "thumbv8m.base-none-eabi",   target = "thumbv8m.base-none-eabi",    features = [] },
-    { name = "thumbv8m.main-none-eabi",   target = "thumbv8m.main-none-eabi",    features = [] },
-    { name = "thumbv8m.main-none-eabihf", target = "thumbv8m.main-none-eabihf",  features = [] },
+    { name = "std",             target = "x86_64-unknown-linux-gnu",     features = ["arch-std", "executor-thread"] },
+    { name = "wasm",            target = "wasm32-unknown-unknown",       features = ["arch-wasm", "executor-thread"] },
+    { name = "cortex-m",        target = "thumbv7em-none-eabi",          features = ["arch-cortex-m", "executor-thread", "executor-interrupt"] },
+    { name = "riscv32",         target = "riscv32imac-unknown-none-elf", features = ["arch-riscv32", "executor-thread"] },
 ]
 
 [package.metadata.docs.rs]
-features = ["std", "nightly", "defmt"]
+default-target = "thumbv7em-none-eabi"
+targets = ["thumbv7em-none-eabi"]
+features = ["nightly", "defmt", "pender-callback", "arch-cortex-m", "executor-thread", "executor-interrupt"]
 
 [features]
-default = []
-std = ["critical-section/std"]
-wasm = ["dep:wasm-bindgen", "dep:js-sys"]
+
+# Architecture
+_arch = [] # some arch was picked
+arch-std = ["_arch", "critical-section/std"]
+arch-cortex-m = ["_arch", "dep:cortex-m"]
+arch-xtensa = ["_arch"]
+arch-riscv32 = ["_arch"]
+arch-wasm = ["_arch", "dep:wasm-bindgen", "dep:js-sys"]
+
+# Enable creating a `Pender` from an arbitrary function pointer callback.
+pender-callback = []
+
+# Enable the thread-mode executor (using WFE/SEV in Cortex-M, WFI in other embedded archs)
+executor-thread = []
+# Enable the interrupt-mode executor (available in Cortex-M only)
+executor-interrupt = []
 
 # Enable nightly-only features
 nightly = []
 
+turbowakers = []
+
 integrated-timers = ["dep:embassy-time"]
 
 # Trace interrupt invocations with rtos-trace.
@@ -53,9 +65,11 @@ embassy-macros  = { version = "0.1.0", path = "../embassy-macros" }
 embassy-time  = { version = "0.1.0", path = "../embassy-time", optional = true}
 atomic-polyfill = "1.0.1"
 critical-section = "1.1"
-cfg-if = "1.0.0"
 static_cell = "1.0"
 
-# WASM dependencies
+# arch-cortex-m dependencies
+cortex-m = { version = "0.7.6", optional = true }
+
+# arch-wasm dependencies
 wasm-bindgen = { version = "0.2.82", optional = true }
 js-sys = { version = "0.3", optional = true }
diff --git a/embassy-executor/src/arch/cortex_m.rs b/embassy-executor/src/arch/cortex_m.rs
index 4b27a264e..d6a55c4c7 100644
--- a/embassy-executor/src/arch/cortex_m.rs
+++ b/embassy-executor/src/arch/cortex_m.rs
@@ -1,59 +1,209 @@
-use core::arch::asm;
-use core::marker::PhantomData;
-use core::ptr;
+#[cfg(feature = "executor-thread")]
+pub use thread::*;
+#[cfg(feature = "executor-thread")]
+mod thread {
+    use core::arch::asm;
+    use core::marker::PhantomData;
 
-use super::{raw, Spawner};
+    #[cfg(feature = "nightly")]
+    pub use embassy_macros::main_cortex_m as main;
 
-/// Thread mode executor, using WFE/SEV.
-///
-/// This is the simplest and most common kind of executor. It runs on
-/// thread mode (at the lowest priority level), and uses the `WFE` ARM instruction
-/// to sleep when it has no more work to do. When a task is woken, a `SEV` instruction
-/// is executed, to make the `WFE` exit from sleep and poll the task.
-///
-/// This executor allows for ultra low power consumption for chips where `WFE`
-/// triggers low-power sleep without extra steps. If your chip requires extra steps,
-/// you may use [`raw::Executor`] directly to program custom behavior.
-pub struct Executor {
-    inner: raw::Executor,
-    not_send: PhantomData<*mut ()>,
-}
+    use crate::raw::{Pender, PenderInner};
+    use crate::{raw, Spawner};
 
-impl Executor {
-    /// Create a new Executor.
-    pub fn new() -> Self {
-        Self {
-            inner: raw::Executor::new(|_| unsafe { asm!("sev") }, ptr::null_mut()),
-            not_send: PhantomData,
+    #[derive(Copy, Clone)]
+    pub(crate) struct ThreadPender;
+
+    impl ThreadPender {
+        pub(crate) fn pend(self) {
+            unsafe { core::arch::asm!("sev") }
         }
     }
 
-    /// Run the executor.
+    /// Thread mode executor, using WFE/SEV.
     ///
-    /// The `init` closure is called with a [`Spawner`] that spawns tasks on
-    /// this executor. Use it to spawn the initial task(s). After `init` returns,
-    /// the executor starts running the tasks.
+    /// This is the simplest and most common kind of executor. It runs on
+    /// thread mode (at the lowest priority level), and uses the `WFE` ARM instruction
+    /// to sleep when it has no more work to do. When a task is woken, a `SEV` instruction
+    /// is executed, to make the `WFE` exit from sleep and poll the task.
     ///
-    /// To spawn more tasks later, you may keep copies of the [`Spawner`] (it is `Copy`),
-    /// for example by passing it as an argument to the initial tasks.
-    ///
-    /// This function requires `&'static mut self`. This means you have to store the
-    /// Executor instance in a place where it'll live forever and grants you mutable
-    /// access. There's a few ways to do this:
-    ///
-    /// - a [StaticCell](https://docs.rs/static_cell/latest/static_cell/) (safe)
-    /// - a `static mut` (unsafe)
-    /// - a local variable in a function you know never returns (like `fn main() -> !`), upgrading its lifetime with `transmute`. (unsafe)
-    ///
-    /// This function never returns.
-    pub fn run(&'static mut self, init: impl FnOnce(Spawner)) -> ! {
-        init(self.inner.spawner());
+    /// This executor allows for ultra low power consumption for chips where `WFE`
+    /// triggers low-power sleep without extra steps. If your chip requires extra steps,
+    /// you may use [`raw::Executor`] directly to program custom behavior.
+    pub struct Executor {
+        inner: raw::Executor,
+        not_send: PhantomData<*mut ()>,
+    }
+
+    impl Executor {
+        /// Create a new Executor.
+        pub fn new() -> Self {
+            Self {
+                inner: raw::Executor::new(Pender(PenderInner::Thread(ThreadPender))),
+                not_send: PhantomData,
+            }
+        }
+
+        /// Run the executor.
+        ///
+        /// The `init` closure is called with a [`Spawner`] that spawns tasks on
+        /// this executor. Use it to spawn the initial task(s). After `init` returns,
+        /// the executor starts running the tasks.
+        ///
+        /// To spawn more tasks later, you may keep copies of the [`Spawner`] (it is `Copy`),
+        /// for example by passing it as an argument to the initial tasks.
+        ///
+        /// This function requires `&'static mut self`. This means you have to store the
+        /// Executor instance in a place where it'll live forever and grants you mutable
+        /// access. There's a few ways to do this:
+        ///
+        /// - a [StaticCell](https://docs.rs/static_cell/latest/static_cell/) (safe)
+        /// - a `static mut` (unsafe)
+        /// - a local variable in a function you know never returns (like `fn main() -> !`), upgrading its lifetime with `transmute`. (unsafe)
+        ///
+        /// This function never returns.
+        pub fn run(&'static mut self, init: impl FnOnce(Spawner)) -> ! {
+            init(self.inner.spawner());
+
+            loop {
+                unsafe {
+                    self.inner.poll();
+                    asm!("wfe");
+                };
+            }
+        }
+    }
+}
+
+#[cfg(feature = "executor-interrupt")]
+pub use interrupt::*;
+#[cfg(feature = "executor-interrupt")]
+mod interrupt {
+    use core::cell::UnsafeCell;
+    use core::mem::MaybeUninit;
+
+    use atomic_polyfill::{AtomicBool, Ordering};
+    use cortex_m::interrupt::InterruptNumber;
+    use cortex_m::peripheral::NVIC;
+
+    use crate::raw::{self, Pender, PenderInner};
+
+    #[derive(Clone, Copy)]
+    pub(crate) struct InterruptPender(u16);
+
+    impl InterruptPender {
+        pub(crate) fn pend(self) {
+            // STIR is faster, but is only available in v7 and higher.
+            #[cfg(not(armv6m))]
+            {
+                let mut nvic: cortex_m::peripheral::NVIC = unsafe { core::mem::transmute(()) };
+                nvic.request(self);
+            }
+
+            #[cfg(armv6m)]
+            cortex_m::peripheral::NVIC::pend(self);
+        }
+    }
+
+    unsafe impl cortex_m::interrupt::InterruptNumber for InterruptPender {
+        fn number(self) -> u16 {
+            self.0
+        }
+    }
+
+    /// Interrupt mode executor.
+    ///
+    /// This executor runs tasks in interrupt mode. The interrupt handler is set up
+    /// to poll tasks, and when a task is woken the interrupt is pended from software.
+    ///
+    /// This allows running async tasks at a priority higher than thread mode. One
+    /// use case is to leave thread mode free for non-async tasks. Another use case is
+    /// to run multiple executors: one in thread mode for low priority tasks and another in
+    /// interrupt mode for higher priority tasks. Higher priority tasks will preempt lower
+    /// priority ones.
+    ///
+    /// It is even possible to run multiple interrupt mode executors at different priorities,
+    /// by assigning different priorities to the interrupts. For an example on how to do this,
+    /// See the 'multiprio' example for 'embassy-nrf'.
+    ///
+    /// To use it, you have to pick an interrupt that won't be used by the hardware.
+    /// Some chips reserve some interrupts for this purpose, sometimes named "software interrupts" (SWI).
+    /// If this is not the case, you may use an interrupt from any unused peripheral.
+    ///
+    /// It is somewhat more complex to use, it's recommended to use the thread-mode
+    /// [`Executor`] instead, if it works for your use case.
+    pub struct InterruptExecutor {
+        started: AtomicBool,
+        executor: UnsafeCell<MaybeUninit<raw::Executor>>,
+    }
+
+    unsafe impl Send for InterruptExecutor {}
+    unsafe impl Sync for InterruptExecutor {}
+
+    impl InterruptExecutor {
+        /// Create a new, not started `InterruptExecutor`.
+        #[inline]
+        pub const fn new() -> Self {
+            Self {
+                started: AtomicBool::new(false),
+                executor: UnsafeCell::new(MaybeUninit::uninit()),
+            }
+        }
+
+        /// Executor interrupt callback.
+        ///
+        /// # Safety
+        ///
+        /// You MUST call this from the interrupt handler, and from nowhere else.
+        pub unsafe fn on_interrupt(&'static self) {
+            let executor = unsafe { (&*self.executor.get()).assume_init_ref() };
+            executor.poll();
+        }
+
+        /// Start the executor.
+        ///
+        /// This initializes the executor, enables the interrupt, and returns.
+        /// The executor keeps running in the background through the interrupt.
+        ///
+        /// This returns a [`SendSpawner`] you can use to spawn tasks on it. A [`SendSpawner`]
+        /// is returned instead of a [`Spawner`](embassy_executor::Spawner) because the executor effectively runs in a
+        /// different "thread" (the interrupt), so spawning tasks on it is effectively
+        /// sending them.
+        ///
+        /// To obtain a [`Spawner`](embassy_executor::Spawner) for this executor, use [`Spawner::for_current_executor()`](embassy_executor::Spawner::for_current_executor()) from
+        /// a task running in it.
+        ///
+        /// # Interrupt requirements
+        ///
+        /// You must write the interrupt handler yourself, and make it call [`on_interrupt()`](Self::on_interrupt).
+        ///
+        /// This method already enables (unmasks) the interrupt, you must NOT do it yourself.
+        ///
+        /// You must set the interrupt priority before calling this method. You MUST NOT
+        /// do it after.
+        ///
+        pub fn start(&'static self, irq: impl InterruptNumber) -> crate::SendSpawner {
+            if self
+                .started
+                .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
+                .is_err()
+            {
+                panic!("InterruptExecutor::start() called multiple times on the same executor.");
+            }
 
-        loop {
             unsafe {
-                self.inner.poll();
-                asm!("wfe");
-            };
+                (&mut *self.executor.get())
+                    .as_mut_ptr()
+                    .write(raw::Executor::new(Pender(PenderInner::Interrupt(InterruptPender(
+                        irq.number(),
+                    )))))
+            }
+
+            let executor = unsafe { (&*self.executor.get()).assume_init_ref() };
+
+            unsafe { NVIC::unmask(irq) }
+
+            executor.spawner().make_send()
         }
     }
 }
diff --git a/embassy-executor/src/arch/riscv32.rs b/embassy-executor/src/arch/riscv32.rs
index e97a56cda..f66daeae4 100644
--- a/embassy-executor/src/arch/riscv32.rs
+++ b/embassy-executor/src/arch/riscv32.rs
@@ -1,72 +1,83 @@
-use core::marker::PhantomData;
-use core::ptr;
-use core::sync::atomic::{AtomicBool, Ordering};
+#[cfg(feature = "executor-interrupt")]
+compile_error!("`executor-interrupt` is not supported with `arch-riscv32`.");
 
-use super::{raw, Spawner};
+#[cfg(feature = "executor-thread")]
+pub use thread::*;
+#[cfg(feature = "executor-thread")]
+mod thread {
+    use core::marker::PhantomData;
+    use core::sync::atomic::{AtomicBool, Ordering};
 
-/// global atomic used to keep track of whether there is work to do since sev() is not available on RISCV
-///
-static SIGNAL_WORK_THREAD_MODE: AtomicBool = AtomicBool::new(false);
+    use crate::raw::{Pender, PenderInner};
+    use crate::{raw, Spawner};
 
-/// RISCV32 Executor
-pub struct Executor {
-    inner: raw::Executor,
-    not_send: PhantomData<*mut ()>,
-}
+    #[derive(Copy, Clone)]
+    pub(crate) struct ThreadPender;
 
-impl Executor {
-    /// Create a new Executor.
-    pub fn new() -> Self {
-        Self {
-            // use Signal_Work_Thread_Mode as substitute for local interrupt register
-            inner: raw::Executor::new(
-                |_| {
-                    SIGNAL_WORK_THREAD_MODE.store(true, Ordering::SeqCst);
-                },
-                ptr::null_mut(),
-            ),
-            not_send: PhantomData,
+    impl ThreadPender {
+        #[allow(unused)]
+        pub(crate) fn pend(self) {
+            SIGNAL_WORK_THREAD_MODE.store(true, core::sync::atomic::Ordering::SeqCst);
         }
     }
 
-    /// Run the executor.
-    ///
-    /// The `init` closure is called with a [`Spawner`] that spawns tasks on
-    /// this executor. Use it to spawn the initial task(s). After `init` returns,
-    /// the executor starts running the tasks.
-    ///
-    /// To spawn more tasks later, you may keep copies of the [`Spawner`] (it is `Copy`),
-    /// for example by passing it as an argument to the initial tasks.
-    ///
-    /// This function requires `&'static mut self`. This means you have to store the
-    /// Executor instance in a place where it'll live forever and grants you mutable
-    /// access. There's a few ways to do this:
-    ///
-    /// - a [StaticCell](https://docs.rs/static_cell/latest/static_cell/) (safe)
-    /// - a `static mut` (unsafe)
-    /// - a local variable in a function you know never returns (like `fn main() -> !`), upgrading its lifetime with `transmute`. (unsafe)
-    ///
-    /// This function never returns.
-    pub fn run(&'static mut self, init: impl FnOnce(Spawner)) -> ! {
-        init(self.inner.spawner());
+    /// global atomic used to keep track of whether there is work to do since sev() is not available on RISCV
+    static SIGNAL_WORK_THREAD_MODE: AtomicBool = AtomicBool::new(false);
 
-        loop {
-            unsafe {
-                self.inner.poll();
-                // we do not care about race conditions between the load and store operations, interrupts
-                //will only set this value to true.
-                critical_section::with(|_| {
-                    // if there is work to do, loop back to polling
-                    // TODO can we relax this?
-                    if SIGNAL_WORK_THREAD_MODE.load(Ordering::SeqCst) {
-                        SIGNAL_WORK_THREAD_MODE.store(false, Ordering::SeqCst);
-                    }
-                    // if not, wait for interrupt
-                    else {
-                        core::arch::asm!("wfi");
-                    }
-                });
-                // if an interrupt occurred while waiting, it will be serviced here
+    /// RISCV32 Executor
+    pub struct Executor {
+        inner: raw::Executor,
+        not_send: PhantomData<*mut ()>,
+    }
+
+    impl Executor {
+        /// Create a new Executor.
+        pub fn new() -> Self {
+            Self {
+                inner: raw::Executor::new(Pender(PenderInner::Thread(ThreadPender))),
+                not_send: PhantomData,
+            }
+        }
+
+        /// Run the executor.
+        ///
+        /// The `init` closure is called with a [`Spawner`] that spawns tasks on
+        /// this executor. Use it to spawn the initial task(s). After `init` returns,
+        /// the executor starts running the tasks.
+        ///
+        /// To spawn more tasks later, you may keep copies of the [`Spawner`] (it is `Copy`),
+        /// for example by passing it as an argument to the initial tasks.
+        ///
+        /// This function requires `&'static mut self`. This means you have to store the
+        /// Executor instance in a place where it'll live forever and grants you mutable
+        /// access. There's a few ways to do this:
+        ///
+        /// - a [StaticCell](https://docs.rs/static_cell/latest/static_cell/) (safe)
+        /// - a `static mut` (unsafe)
+        /// - a local variable in a function you know never returns (like `fn main() -> !`), upgrading its lifetime with `transmute`. (unsafe)
+        ///
+        /// This function never returns.
+        pub fn run(&'static mut self, init: impl FnOnce(Spawner)) -> ! {
+            init(self.inner.spawner());
+
+            loop {
+                unsafe {
+                    self.inner.poll();
+                    // we do not care about race conditions between the load and store operations, interrupts
+                    //will only set this value to true.
+                    critical_section::with(|_| {
+                        // if there is work to do, loop back to polling
+                        // TODO can we relax this?
+                        if SIGNAL_WORK_THREAD_MODE.load(Ordering::SeqCst) {
+                            SIGNAL_WORK_THREAD_MODE.store(false, Ordering::SeqCst);
+                        }
+                        // if not, wait for interrupt
+                        else {
+                            core::arch::asm!("wfi");
+                        }
+                    });
+                    // if an interrupt occurred while waiting, it will be serviced here
+                }
             }
         }
     }
diff --git a/embassy-executor/src/arch/std.rs b/embassy-executor/src/arch/std.rs
index 701f0eb18..4e4a178f0 100644
--- a/embassy-executor/src/arch/std.rs
+++ b/embassy-executor/src/arch/std.rs
@@ -1,84 +1,100 @@
-use std::marker::PhantomData;
-use std::sync::{Condvar, Mutex};
+#[cfg(feature = "executor-interrupt")]
+compile_error!("`executor-interrupt` is not supported with `arch-std`.");
 
-use super::{raw, Spawner};
+#[cfg(feature = "executor-thread")]
+pub use thread::*;
+#[cfg(feature = "executor-thread")]
+mod thread {
+    use std::marker::PhantomData;
+    use std::sync::{Condvar, Mutex};
 
-/// Single-threaded std-based executor.
-pub struct Executor {
-    inner: raw::Executor,
-    not_send: PhantomData<*mut ()>,
-    signaler: &'static Signaler,
-}
+    #[cfg(feature = "nightly")]
+    pub use embassy_macros::main_std as main;
 
-impl Executor {
-    /// Create a new Executor.
-    pub fn new() -> Self {
-        let signaler = &*Box::leak(Box::new(Signaler::new()));
-        Self {
-            inner: raw::Executor::new(
-                |p| unsafe {
-                    let s = &*(p as *const () as *const Signaler);
-                    s.signal()
-                },
-                signaler as *const _ as _,
-            ),
-            not_send: PhantomData,
-            signaler,
+    use crate::raw::{Pender, PenderInner};
+    use crate::{raw, Spawner};
+
+    #[derive(Copy, Clone)]
+    pub(crate) struct ThreadPender(&'static Signaler);
+
+    impl ThreadPender {
+        #[allow(unused)]
+        pub(crate) fn pend(self) {
+            self.0.signal()
         }
     }
 
-    /// Run the executor.
-    ///
-    /// The `init` closure is called with a [`Spawner`] that spawns tasks on
-    /// this executor. Use it to spawn the initial task(s). After `init` returns,
-    /// the executor starts running the tasks.
-    ///
-    /// To spawn more tasks later, you may keep copies of the [`Spawner`] (it is `Copy`),
-    /// for example by passing it as an argument to the initial tasks.
-    ///
-    /// This function requires `&'static mut self`. This means you have to store the
-    /// Executor instance in a place where it'll live forever and grants you mutable
-    /// access. There's a few ways to do this:
-    ///
-    /// - a [StaticCell](https://docs.rs/static_cell/latest/static_cell/) (safe)
-    /// - a `static mut` (unsafe)
-    /// - a local variable in a function you know never returns (like `fn main() -> !`), upgrading its lifetime with `transmute`. (unsafe)
-    ///
-    /// This function never returns.
-    pub fn run(&'static mut self, init: impl FnOnce(Spawner)) -> ! {
-        init(self.inner.spawner());
-
-        loop {
-            unsafe { self.inner.poll() };
-            self.signaler.wait()
-        }
+    /// Single-threaded std-based executor.
+    pub struct Executor {
+        inner: raw::Executor,
+        not_send: PhantomData<*mut ()>,
+        signaler: &'static Signaler,
     }
-}
 
-struct Signaler {
-    mutex: Mutex<bool>,
-    condvar: Condvar,
-}
+    impl Executor {
+        /// Create a new Executor.
+        pub fn new() -> Self {
+            let signaler = &*Box::leak(Box::new(Signaler::new()));
+            Self {
+                inner: raw::Executor::new(Pender(PenderInner::Thread(ThreadPender(signaler)))),
+                not_send: PhantomData,
+                signaler,
+            }
+        }
 
-impl Signaler {
-    fn new() -> Self {
-        Self {
-            mutex: Mutex::new(false),
-            condvar: Condvar::new(),
+        /// Run the executor.
+        ///
+        /// The `init` closure is called with a [`Spawner`] that spawns tasks on
+        /// this executor. Use it to spawn the initial task(s). After `init` returns,
+        /// the executor starts running the tasks.
+        ///
+        /// To spawn more tasks later, you may keep copies of the [`Spawner`] (it is `Copy`),
+        /// for example by passing it as an argument to the initial tasks.
+        ///
+        /// This function requires `&'static mut self`. This means you have to store the
+        /// Executor instance in a place where it'll live forever and grants you mutable
+        /// access. There's a few ways to do this:
+        ///
+        /// - a [StaticCell](https://docs.rs/static_cell/latest/static_cell/) (safe)
+        /// - a `static mut` (unsafe)
+        /// - a local variable in a function you know never returns (like `fn main() -> !`), upgrading its lifetime with `transmute`. (unsafe)
+        ///
+        /// This function never returns.
+        pub fn run(&'static mut self, init: impl FnOnce(Spawner)) -> ! {
+            init(self.inner.spawner());
+
+            loop {
+                unsafe { self.inner.poll() };
+                self.signaler.wait()
+            }
         }
     }
 
-    fn wait(&self) {
-        let mut signaled = self.mutex.lock().unwrap();
-        while !*signaled {
-            signaled = self.condvar.wait(signaled).unwrap();
-        }
-        *signaled = false;
+    struct Signaler {
+        mutex: Mutex<bool>,
+        condvar: Condvar,
     }
 
-    fn signal(&self) {
-        let mut signaled = self.mutex.lock().unwrap();
-        *signaled = true;
-        self.condvar.notify_one();
+    impl Signaler {
+        fn new() -> Self {
+            Self {
+                mutex: Mutex::new(false),
+                condvar: Condvar::new(),
+            }
+        }
+
+        fn wait(&self) {
+            let mut signaled = self.mutex.lock().unwrap();
+            while !*signaled {
+                signaled = self.condvar.wait(signaled).unwrap();
+            }
+            *signaled = false;
+        }
+
+        fn signal(&self) {
+            let mut signaled = self.mutex.lock().unwrap();
+            *signaled = true;
+            self.condvar.notify_one();
+        }
     }
 }
diff --git a/embassy-executor/src/arch/wasm.rs b/embassy-executor/src/arch/wasm.rs
index 98091cfbb..08ab16b99 100644
--- a/embassy-executor/src/arch/wasm.rs
+++ b/embassy-executor/src/arch/wasm.rs
@@ -1,74 +1,88 @@
-use core::marker::PhantomData;
+#[cfg(feature = "executor-interrupt")]
+compile_error!("`executor-interrupt` is not supported with `arch-wasm`.");
 
-use js_sys::Promise;
-use wasm_bindgen::prelude::*;
+#[cfg(feature = "executor-thread")]
+pub use thread::*;
+#[cfg(feature = "executor-thread")]
+mod thread {
 
-use super::raw::util::UninitCell;
-use super::raw::{self};
-use super::Spawner;
+    use core::marker::PhantomData;
 
-/// WASM executor, wasm_bindgen to schedule tasks on the JS event loop.
-pub struct Executor {
-    inner: raw::Executor,
-    ctx: &'static WasmContext,
-    not_send: PhantomData<*mut ()>,
-}
+    #[cfg(feature = "nightly")]
+    pub use embassy_macros::main_wasm as main;
+    use js_sys::Promise;
+    use wasm_bindgen::prelude::*;
 
-pub(crate) struct WasmContext {
-    promise: Promise,
-    closure: UninitCell<Closure<dyn FnMut(JsValue)>>,
-}
+    use crate::raw::util::UninitCell;
+    use crate::raw::{Pender, PenderInner};
+    use crate::{raw, Spawner};
 
-impl WasmContext {
-    pub fn new() -> Self {
-        Self {
-            promise: Promise::resolve(&JsValue::undefined()),
-            closure: UninitCell::uninit(),
-        }
+    /// WASM executor, wasm_bindgen to schedule tasks on the JS event loop.
+    pub struct Executor {
+        inner: raw::Executor,
+        ctx: &'static WasmContext,
+        not_send: PhantomData<*mut ()>,
     }
-}
 
-impl Executor {
-    /// Create a new Executor.
-    pub fn new() -> Self {
-        let ctx = &*Box::leak(Box::new(WasmContext::new()));
-        let inner = raw::Executor::new(
-            |p| unsafe {
-                let ctx = &*(p as *const () as *const WasmContext);
-                let _ = ctx.promise.then(ctx.closure.as_mut());
-            },
-            ctx as *const _ as _,
-        );
-        Self {
-            inner,
-            not_send: PhantomData,
-            ctx,
+    pub(crate) struct WasmContext {
+        promise: Promise,
+        closure: UninitCell<Closure<dyn FnMut(JsValue)>>,
+    }
+
+    #[derive(Copy, Clone)]
+    pub(crate) struct ThreadPender(&'static WasmContext);
+
+    impl ThreadPender {
+        #[allow(unused)]
+        pub(crate) fn pend(self) {
+            let _ = self.0.promise.then(unsafe { self.0.closure.as_mut() });
         }
     }
 
-    /// Run the executor.
-    ///
-    /// The `init` closure is called with a [`Spawner`] that spawns tasks on
-    /// this executor. Use it to spawn the initial task(s). After `init` returns,
-    /// the executor starts running the tasks.
-    ///
-    /// To spawn more tasks later, you may keep copies of the [`Spawner`] (it is `Copy`),
-    /// for example by passing it as an argument to the initial tasks.
-    ///
-    /// This function requires `&'static mut self`. This means you have to store the
-    /// Executor instance in a place where it'll live forever and grants you mutable
-    /// access. There's a few ways to do this:
-    ///
-    /// - a [StaticCell](https://docs.rs/static_cell/latest/static_cell/) (safe)
-    /// - a `static mut` (unsafe)
-    /// - a local variable in a function you know never returns (like `fn main() -> !`), upgrading its lifetime with `transmute`. (unsafe)
-    pub fn start(&'static mut self, init: impl FnOnce(Spawner)) {
-        unsafe {
-            let executor = &self.inner;
-            self.ctx.closure.write(Closure::new(move |_| {
-                executor.poll();
-            }));
-            init(self.inner.spawner());
+    impl WasmContext {
+        pub fn new() -> Self {
+            Self {
+                promise: Promise::resolve(&JsValue::undefined()),
+                closure: UninitCell::uninit(),
+            }
+        }
+    }
+
+    impl Executor {
+        /// Create a new Executor.
+        pub fn new() -> Self {
+            let ctx = &*Box::leak(Box::new(WasmContext::new()));
+            Self {
+                inner: raw::Executor::new(Pender(PenderInner::Thread(ThreadPender(ctx)))),
+                not_send: PhantomData,
+                ctx,
+            }
+        }
+
+        /// Run the executor.
+        ///
+        /// The `init` closure is called with a [`Spawner`] that spawns tasks on
+        /// this executor. Use it to spawn the initial task(s). After `init` returns,
+        /// the executor starts running the tasks.
+        ///
+        /// To spawn more tasks later, you may keep copies of the [`Spawner`] (it is `Copy`),
+        /// for example by passing it as an argument to the initial tasks.
+        ///
+        /// This function requires `&'static mut self`. This means you have to store the
+        /// Executor instance in a place where it'll live forever and grants you mutable
+        /// access. There's a few ways to do this:
+        ///
+        /// - a [StaticCell](https://docs.rs/static_cell/latest/static_cell/) (safe)
+        /// - a `static mut` (unsafe)
+        /// - a local variable in a function you know never returns (like `fn main() -> !`), upgrading its lifetime with `transmute`. (unsafe)
+        pub fn start(&'static mut self, init: impl FnOnce(Spawner)) {
+            unsafe {
+                let executor = &self.inner;
+                self.ctx.closure.write(Closure::new(move |_| {
+                    executor.poll();
+                }));
+                init(self.inner.spawner());
+            }
         }
     }
 }
diff --git a/embassy-executor/src/arch/xtensa.rs b/embassy-executor/src/arch/xtensa.rs
index 4ee0d9f78..61ea92c16 100644
--- a/embassy-executor/src/arch/xtensa.rs
+++ b/embassy-executor/src/arch/xtensa.rs
@@ -1,73 +1,84 @@
-use core::marker::PhantomData;
-use core::ptr;
-use core::sync::atomic::{AtomicBool, Ordering};
+#[cfg(feature = "executor-interrupt")]
+compile_error!("`executor-interrupt` is not supported with `arch-xtensa`.");
 
-use super::{raw, Spawner};
+#[cfg(feature = "executor-thread")]
+pub use thread::*;
+#[cfg(feature = "executor-thread")]
+mod thread {
+    use core::marker::PhantomData;
+    use core::sync::atomic::{AtomicBool, Ordering};
 
-/// global atomic used to keep track of whether there is work to do since sev() is not available on Xtensa
-///
-static SIGNAL_WORK_THREAD_MODE: AtomicBool = AtomicBool::new(false);
+    use crate::raw::{Pender, PenderInner};
+    use crate::{raw, Spawner};
 
-/// Xtensa Executor
-pub struct Executor {
-    inner: raw::Executor,
-    not_send: PhantomData<*mut ()>,
-}
+    #[derive(Copy, Clone)]
+    pub(crate) struct ThreadPender;
 
-impl Executor {
-    /// Create a new Executor.
-    pub fn new() -> Self {
-        Self {
-            // use Signal_Work_Thread_Mode as substitute for local interrupt register
-            inner: raw::Executor::new(
-                |_| {
-                    SIGNAL_WORK_THREAD_MODE.store(true, Ordering::SeqCst);
-                },
-                ptr::null_mut(),
-            ),
-            not_send: PhantomData,
+    impl ThreadPender {
+        #[allow(unused)]
+        pub(crate) fn pend(self) {
+            SIGNAL_WORK_THREAD_MODE.store(true, core::sync::atomic::Ordering::SeqCst);
         }
     }
 
-    /// Run the executor.
-    ///
-    /// The `init` closure is called with a [`Spawner`] that spawns tasks on
-    /// this executor. Use it to spawn the initial task(s). After `init` returns,
-    /// the executor starts running the tasks.
-    ///
-    /// To spawn more tasks later, you may keep copies of the [`Spawner`] (it is `Copy`),
-    /// for example by passing it as an argument to the initial tasks.
-    ///
-    /// This function requires `&'static mut self`. This means you have to store the
-    /// Executor instance in a place where it'll live forever and grants you mutable
-    /// access. There's a few ways to do this:
-    ///
-    /// - a [StaticCell](https://docs.rs/static_cell/latest/static_cell/) (safe)
-    /// - a `static mut` (unsafe)
-    /// - a local variable in a function you know never returns (like `fn main() -> !`), upgrading its lifetime with `transmute`. (unsafe)
-    ///
-    /// This function never returns.
-    pub fn run(&'static mut self, init: impl FnOnce(Spawner)) -> ! {
-        init(self.inner.spawner());
+    /// global atomic used to keep track of whether there is work to do since sev() is not available on Xtensa
+    static SIGNAL_WORK_THREAD_MODE: AtomicBool = AtomicBool::new(false);
 
-        loop {
-            unsafe {
-                self.inner.poll();
-                // we do not care about race conditions between the load and store operations, interrupts
-                // will only set this value to true.
-                // if there is work to do, loop back to polling
-                // TODO can we relax this?
-                critical_section::with(|_| {
-                    if SIGNAL_WORK_THREAD_MODE.load(Ordering::SeqCst) {
-                        SIGNAL_WORK_THREAD_MODE.store(false, Ordering::SeqCst);
-                    } else {
-                        // waiti sets the PS.INTLEVEL when slipping into sleep
-                        // because critical sections in Xtensa are implemented via increasing
-                        // PS.INTLEVEL the critical section ends here
-                        // take care not add code after `waiti` if it needs to be inside the CS
-                        core::arch::asm!("waiti 0"); // critical section ends here
-                    }
-                });
+    /// Xtensa Executor
+    pub struct Executor {
+        inner: raw::Executor,
+        not_send: PhantomData<*mut ()>,
+    }
+
+    impl Executor {
+        /// Create a new Executor.
+        pub fn new() -> Self {
+            Self {
+                inner: raw::Executor::new(Pender(PenderInner::Thread(ThreadPender))),
+                not_send: PhantomData,
+            }
+        }
+
+        /// Run the executor.
+        ///
+        /// The `init` closure is called with a [`Spawner`] that spawns tasks on
+        /// this executor. Use it to spawn the initial task(s). After `init` returns,
+        /// the executor starts running the tasks.
+        ///
+        /// To spawn more tasks later, you may keep copies of the [`Spawner`] (it is `Copy`),
+        /// for example by passing it as an argument to the initial tasks.
+        ///
+        /// This function requires `&'static mut self`. This means you have to store the
+        /// Executor instance in a place where it'll live forever and grants you mutable
+        /// access. There's a few ways to do this:
+        ///
+        /// - a [StaticCell](https://docs.rs/static_cell/latest/static_cell/) (safe)
+        /// - a `static mut` (unsafe)
+        /// - a local variable in a function you know never returns (like `fn main() -> !`), upgrading its lifetime with `transmute`. (unsafe)
+        ///
+        /// This function never returns.
+        pub fn run(&'static mut self, init: impl FnOnce(Spawner)) -> ! {
+            init(self.inner.spawner());
+
+            loop {
+                unsafe {
+                    self.inner.poll();
+                    // we do not care about race conditions between the load and store operations, interrupts
+                    // will only set this value to true.
+                    // if there is work to do, loop back to polling
+                    // TODO can we relax this?
+                    critical_section::with(|_| {
+                        if SIGNAL_WORK_THREAD_MODE.load(Ordering::SeqCst) {
+                            SIGNAL_WORK_THREAD_MODE.store(false, Ordering::SeqCst);
+                        } else {
+                            // waiti sets the PS.INTLEVEL when slipping into sleep
+                            // because critical sections in Xtensa are implemented via increasing
+                            // PS.INTLEVEL the critical section ends here
+                            // take care not add code after `waiti` if it needs to be inside the CS
+                            core::arch::asm!("waiti 0"); // critical section ends here
+                        }
+                    });
+                }
             }
         }
     }
diff --git a/embassy-executor/src/lib.rs b/embassy-executor/src/lib.rs
index 4c7e2f4cd..3ce687eb6 100644
--- a/embassy-executor/src/lib.rs
+++ b/embassy-executor/src/lib.rs
@@ -1,5 +1,5 @@
-#![cfg_attr(not(any(feature = "std", feature = "wasm")), no_std)]
-#![cfg_attr(all(feature = "nightly", target_arch = "xtensa"), feature(asm_experimental_arch))]
+#![cfg_attr(not(any(feature = "arch-std", feature = "arch-wasm")), no_std)]
+#![cfg_attr(all(feature = "nightly", feature = "arch-xtensa"), feature(asm_experimental_arch))]
 #![allow(clippy::new_without_default)]
 #![doc = include_str!("../README.md")]
 #![warn(missing_docs)]
@@ -10,47 +10,43 @@ pub(crate) mod fmt;
 #[cfg(feature = "nightly")]
 pub use embassy_macros::task;
 
-cfg_if::cfg_if! {
-    if #[cfg(cortex_m)] {
-        #[path="arch/cortex_m.rs"]
-        mod arch;
-        pub use arch::*;
-        #[cfg(feature = "nightly")]
-        pub use embassy_macros::main_cortex_m as main;
-    }
-    else if #[cfg(target_arch="riscv32")] {
-        #[path="arch/riscv32.rs"]
-        mod arch;
-        pub use arch::*;
-        #[cfg(feature = "nightly")]
-        pub use embassy_macros::main_riscv as main;
-    }
-    else if #[cfg(all(target_arch="xtensa", feature = "nightly"))] {
-        #[path="arch/xtensa.rs"]
-        mod arch;
-        pub use arch::*;
-    }
-    else if #[cfg(feature="wasm")] {
-        #[path="arch/wasm.rs"]
-        mod arch;
-        pub use arch::*;
-        #[cfg(feature = "nightly")]
-        pub use embassy_macros::main_wasm as main;
-    }
-    else if #[cfg(feature="std")] {
-        #[path="arch/std.rs"]
-        mod arch;
-        pub use arch::*;
-        #[cfg(feature = "nightly")]
-        pub use embassy_macros::main_std as main;
-    }
+macro_rules! check_at_most_one {
+    (@amo [$($feats:literal)*] [] [$($res:tt)*]) => {
+        #[cfg(any($($res)*))]
+        compile_error!(concat!("At most one of these features can be enabled at the same time:", $(" `", $feats, "`",)*));
+    };
+    (@amo $feats:tt [$curr:literal $($rest:literal)*] [$($res:tt)*]) => {
+        check_at_most_one!(@amo $feats [$($rest)*] [$($res)* $(all(feature=$curr, feature=$rest),)*]);
+    };
+    ($($f:literal),*$(,)?) => {
+        check_at_most_one!(@amo [$($f)*] [$($f)*] []);
+    };
 }
+check_at_most_one!("arch-cortex-m", "arch-riscv32", "arch-xtensa", "arch-std", "arch-wasm",);
 
+#[cfg(feature = "_arch")]
+#[cfg_attr(feature = "arch-cortex-m", path = "arch/cortex_m.rs")]
+#[cfg_attr(feature = "arch-riscv32", path = "arch/riscv32.rs")]
+#[cfg_attr(feature = "arch-xtensa", path = "arch/xtensa.rs")]
+#[cfg_attr(feature = "arch-std", path = "arch/std.rs")]
+#[cfg_attr(feature = "arch-wasm", path = "arch/wasm.rs")]
+mod arch;
+
+#[cfg(feature = "_arch")]
+pub use arch::*;
+
+pub mod raw;
+
+mod spawner;
+pub use spawner::*;
+
+/// Implementation details for embassy macros.
+/// Do not use. Used for macros and HALs only. Not covered by semver guarantees.
 #[doc(hidden)]
-/// Implementation details for embassy macros. DO NOT USE.
-pub mod export {
+pub mod _export {
     #[cfg(feature = "rtos-trace")]
     pub use rtos_trace::trace;
+    pub use static_cell::StaticCell;
 
     /// Expands the given block of code when `embassy-executor` is compiled with
     /// the `rtos-trace-interrupt` feature.
@@ -70,14 +66,3 @@ pub mod export {
         ($($tt:tt)*) => {};
     }
 }
-
-pub mod raw;
-
-mod spawner;
-pub use spawner::*;
-
-/// Do not use. Used for macros and HALs only. Not covered by semver guarantees.
-#[doc(hidden)]
-pub mod _export {
-    pub use static_cell::StaticCell;
-}
diff --git a/embassy-executor/src/raw/mod.rs b/embassy-executor/src/raw/mod.rs
index 15ff18fc8..bd0cff26b 100644
--- a/embassy-executor/src/raw/mod.rs
+++ b/embassy-executor/src/raw/mod.rs
@@ -11,6 +11,7 @@ mod run_queue;
 #[cfg(feature = "integrated-timers")]
 mod timer_queue;
 pub(crate) mod util;
+#[cfg_attr(feature = "turbowakers", path = "waker_turbo.rs")]
 mod waker;
 
 use core::future::Future;
@@ -18,11 +19,9 @@ use core::marker::PhantomData;
 use core::mem;
 use core::pin::Pin;
 use core::ptr::NonNull;
-use core::sync::atomic::AtomicPtr;
 use core::task::{Context, Poll};
 
 use atomic_polyfill::{AtomicU32, Ordering};
-use critical_section::CriticalSection;
 #[cfg(feature = "integrated-timers")]
 use embassy_time::driver::{self, AlarmHandle};
 #[cfg(feature = "integrated-timers")]
@@ -289,10 +288,60 @@ impl<F: Future + 'static, const N: usize> TaskPool<F, N> {
     }
 }
 
+#[derive(Clone, Copy)]
+pub(crate) enum PenderInner {
+    #[cfg(feature = "executor-thread")]
+    Thread(crate::arch::ThreadPender),
+    #[cfg(feature = "executor-interrupt")]
+    Interrupt(crate::arch::InterruptPender),
+    #[cfg(feature = "pender-callback")]
+    Callback { func: fn(*mut ()), context: *mut () },
+}
+
+unsafe impl Send for PenderInner {}
+unsafe impl Sync for PenderInner {}
+
+/// Platform/architecture-specific action executed when an executor has pending work.
+///
+/// When a task within an executor is woken, the `Pender` is called. This does a
+/// platform/architecture-specific action to signal there is pending work in the executor.
+/// When this happens, you must arrange for [`Executor::poll`] to be called.
+///
+/// You can think of it as a waker, but for the whole executor.
+pub struct Pender(pub(crate) PenderInner);
+
+impl Pender {
+    /// Create a `Pender` that will call an arbitrary function pointer.
+    ///
+    /// # Arguments
+    ///
+    /// - `func`: The function pointer to call.
+    /// - `context`: Opaque context pointer, that will be passed to the function pointer.
+    #[cfg(feature = "pender-callback")]
+    pub fn new_from_callback(func: fn(*mut ()), context: *mut ()) -> Self {
+        Self(PenderInner::Callback {
+            func,
+            context: context.into(),
+        })
+    }
+}
+
+impl Pender {
+    pub(crate) fn pend(&self) {
+        match self.0 {
+            #[cfg(feature = "executor-thread")]
+            PenderInner::Thread(x) => x.pend(),
+            #[cfg(feature = "executor-interrupt")]
+            PenderInner::Interrupt(x) => x.pend(),
+            #[cfg(feature = "pender-callback")]
+            PenderInner::Callback { func, context } => func(context),
+        }
+    }
+}
+
 pub(crate) struct SyncExecutor {
     run_queue: RunQueue,
-    signal_fn: fn(*mut ()),
-    signal_ctx: AtomicPtr<()>,
+    pender: Pender,
 
     #[cfg(feature = "integrated-timers")]
     pub(crate) timer_queue: timer_queue::TimerQueue,
@@ -301,16 +350,13 @@ pub(crate) struct SyncExecutor {
 }
 
 impl SyncExecutor {
-    pub(crate) fn new(signal_fn: fn(*mut ()), signal_ctx: *mut ()) -> Self {
+    pub(crate) fn new(pender: Pender) -> Self {
         #[cfg(feature = "integrated-timers")]
         let alarm = unsafe { unwrap!(driver::allocate_alarm()) };
-        #[cfg(feature = "integrated-timers")]
-        driver::set_alarm_callback(alarm, signal_fn, signal_ctx);
 
         Self {
             run_queue: RunQueue::new(),
-            signal_fn,
-            signal_ctx: AtomicPtr::new(signal_ctx),
+            pender,
 
             #[cfg(feature = "integrated-timers")]
             timer_queue: timer_queue::TimerQueue::new(),
@@ -326,30 +372,37 @@ impl SyncExecutor {
     /// - `task` must be set up to run in this executor.
     /// - `task` must NOT be already enqueued (in this executor or another one).
     #[inline(always)]
-    unsafe fn enqueue(&self, cs: CriticalSection, task: TaskRef) {
+    unsafe fn enqueue(&self, task: TaskRef) {
         #[cfg(feature = "rtos-trace")]
         trace::task_ready_begin(task.as_ptr() as u32);
 
-        if self.run_queue.enqueue(cs, task) {
-            (self.signal_fn)(self.signal_ctx.load(Ordering::Relaxed))
+        if self.run_queue.enqueue(task) {
+            self.pender.pend();
         }
     }
 
+    #[cfg(feature = "integrated-timers")]
+    fn alarm_callback(ctx: *mut ()) {
+        let this: &Self = unsafe { &*(ctx as *const Self) };
+        this.pender.pend();
+    }
+
     pub(super) unsafe fn spawn(&'static self, task: TaskRef) {
         task.header().executor.set(Some(self));
 
         #[cfg(feature = "rtos-trace")]
         trace::task_new(task.as_ptr() as u32);
 
-        critical_section::with(|cs| {
-            self.enqueue(cs, task);
-        })
+        self.enqueue(task);
     }
 
     /// # Safety
     ///
     /// Same as [`Executor::poll`], plus you must only call this on the thread this executor was created.
     pub(crate) unsafe fn poll(&'static self) {
+        #[cfg(feature = "integrated-timers")]
+        driver::set_alarm_callback(self.alarm, Self::alarm_callback, self as *const _ as *mut ());
+
         #[allow(clippy::never_loop)]
         loop {
             #[cfg(feature = "integrated-timers")]
@@ -416,14 +469,14 @@ impl SyncExecutor {
 ///
 /// - To get the executor to do work, call `poll()`. This will poll all queued tasks (all tasks
 ///   that "want to run").
-/// - You must supply a `signal_fn`. The executor will call it to notify you it has work
+/// - You must supply a [`Pender`]. The executor will call it to notify you it has work
 ///   to do. You must arrange for `poll()` to be called as soon as possible.
 ///
-/// `signal_fn` can be called from *any* context: any thread, any interrupt priority
+/// The [`Pender`] can be called from *any* context: any thread, any interrupt priority
 /// level, etc. It may be called synchronously from any `Executor` method call as well.
 /// You must deal with this correctly.
 ///
-/// In particular, you must NOT call `poll` directly from `signal_fn`, as this violates
+/// In particular, you must NOT call `poll` directly from the pender callback, as this violates
 /// the requirement for `poll` to not be called reentrantly.
 #[repr(transparent)]
 pub struct Executor {
@@ -436,15 +489,15 @@ impl Executor {
     pub(crate) unsafe fn wrap(inner: &SyncExecutor) -> &Self {
         mem::transmute(inner)
     }
+
     /// Create a new executor.
     ///
-    /// When the executor has work to do, it will call `signal_fn` with
-    /// `signal_ctx` as argument.
+    /// When the executor has work to do, it will call the [`Pender`].
     ///
-    /// See [`Executor`] docs for details on `signal_fn`.
-    pub fn new(signal_fn: fn(*mut ()), signal_ctx: *mut ()) -> Self {
+    /// See [`Executor`] docs for details on `Pender`.
+    pub fn new(pender: Pender) -> Self {
         Self {
-            inner: SyncExecutor::new(signal_fn, signal_ctx),
+            inner: SyncExecutor::new(pender),
             _not_sync: PhantomData,
         }
     }
@@ -467,16 +520,16 @@ impl Executor {
     /// This loops over all tasks that are queued to be polled (i.e. they're
     /// freshly spawned or they've been woken). Other tasks are not polled.
     ///
-    /// You must call `poll` after receiving a call to `signal_fn`. It is OK
-    /// to call `poll` even when not requested by `signal_fn`, but it wastes
+    /// You must call `poll` after receiving a call to the [`Pender`]. It is OK
+    /// to call `poll` even when not requested by the `Pender`, but it wastes
     /// energy.
     ///
     /// # Safety
     ///
     /// You must NOT call `poll` reentrantly on the same executor.
     ///
-    /// In particular, note that `poll` may call `signal_fn` synchronously. Therefore, you
-    /// must NOT directly call `poll()` from your `signal_fn`. Instead, `signal_fn` has to
+    /// In particular, note that `poll` may call the `Pender` synchronously. Therefore, you
+    /// must NOT directly call `poll()` from the `Pender` callback. Instead, the callback has to
     /// somehow schedule for `poll()` to be called later, at a time you know for sure there's
     /// no `poll()` already running.
     pub unsafe fn poll(&'static self) {
@@ -496,24 +549,25 @@ impl Executor {
 ///
 /// You can obtain a `TaskRef` from a `Waker` using [`task_from_waker`].
 pub fn wake_task(task: TaskRef) {
-    critical_section::with(|cs| {
-        let header = task.header();
-        let state = header.state.load(Ordering::Relaxed);
+    let header = task.header();
 
+    let res = header.state.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |state| {
         // If already scheduled, or if not started,
         if (state & STATE_RUN_QUEUED != 0) || (state & STATE_SPAWNED == 0) {
-            return;
+            None
+        } else {
+            // Mark it as scheduled
+            Some(state | STATE_RUN_QUEUED)
         }
+    });
 
-        // Mark it as scheduled
-        header.state.store(state | STATE_RUN_QUEUED, Ordering::Relaxed);
-
+    if res.is_ok() {
         // We have just marked the task as scheduled, so enqueue it.
         unsafe {
             let executor = header.executor.get().unwrap_unchecked();
-            executor.enqueue(cs, task);
+            executor.enqueue(task);
         }
-    })
+    }
 }
 
 #[cfg(feature = "integrated-timers")]
diff --git a/embassy-executor/src/raw/run_queue.rs b/embassy-executor/src/raw/run_queue.rs
index 362157535..a88174a0c 100644
--- a/embassy-executor/src/raw/run_queue.rs
+++ b/embassy-executor/src/raw/run_queue.rs
@@ -2,7 +2,6 @@ use core::ptr;
 use core::ptr::NonNull;
 
 use atomic_polyfill::{AtomicPtr, Ordering};
-use critical_section::CriticalSection;
 
 use super::{TaskHeader, TaskRef};
 
@@ -46,11 +45,18 @@ impl RunQueue {
     ///
     /// `item` must NOT be already enqueued in any queue.
     #[inline(always)]
-    pub(crate) unsafe fn enqueue(&self, _cs: CriticalSection, task: TaskRef) -> bool {
-        let prev = self.head.load(Ordering::Relaxed);
-        task.header().run_queue_item.next.store(prev, Ordering::Relaxed);
-        self.head.store(task.as_ptr() as _, Ordering::Relaxed);
-        prev.is_null()
+    pub(crate) unsafe fn enqueue(&self, task: TaskRef) -> bool {
+        let mut was_empty = false;
+
+        self.head
+            .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |prev| {
+                was_empty = prev.is_null();
+                task.header().run_queue_item.next.store(prev, Ordering::Relaxed);
+                Some(task.as_ptr() as *mut _)
+            })
+            .ok();
+
+        was_empty
     }
 
     /// Empty the queue, then call `on_task` for each task that was in the queue.
diff --git a/embassy-executor/src/raw/waker_turbo.rs b/embassy-executor/src/raw/waker_turbo.rs
new file mode 100644
index 000000000..435a0ff7e
--- /dev/null
+++ b/embassy-executor/src/raw/waker_turbo.rs
@@ -0,0 +1,34 @@
+use core::ptr::NonNull;
+use core::task::Waker;
+
+use super::{wake_task, TaskHeader, TaskRef};
+
+pub(crate) unsafe fn from_task(p: TaskRef) -> Waker {
+    Waker::from_turbo_ptr(NonNull::new_unchecked(p.as_ptr() as _))
+}
+
+/// Get a task pointer from a waker.
+///
+/// This can be used as an optimization in wait queues to store task pointers
+/// (1 word) instead of full Wakers (2 words). This saves a bit of RAM and helps
+/// avoid dynamic dispatch.
+///
+/// You can use the returned task pointer to wake the task with [`wake_task`](super::wake_task).
+///
+/// # Panics
+///
+/// Panics if the waker is not created by the Embassy executor.
+pub fn task_from_waker(waker: &Waker) -> TaskRef {
+    let ptr = waker.as_turbo_ptr().as_ptr();
+
+    // safety: our wakers are always created with `TaskRef::as_ptr`
+    unsafe { TaskRef::from_ptr(ptr as *const TaskHeader) }
+}
+
+#[inline(never)]
+#[no_mangle]
+fn _turbo_wake(ptr: NonNull<()>) {
+    // safety: our wakers are always created with `TaskRef::as_ptr`
+    let task = unsafe { TaskRef::from_ptr(ptr.as_ptr() as *const TaskHeader) };
+    wake_task(task)
+}
diff --git a/embassy-lora/Cargo.toml b/embassy-lora/Cargo.toml
index cbe78e592..604358c5b 100644
--- a/embassy-lora/Cargo.toml
+++ b/embassy-lora/Cargo.toml
@@ -9,9 +9,9 @@ src_base = "https://github.com/embassy-rs/embassy/blob/embassy-lora-v$VERSION/em
 src_base_git = "https://github.com/embassy-rs/embassy/blob/$COMMIT/embassy-lora/src/"
 features = ["time", "defmt"]
 flavors = [
-    { name = "sx126x", target = "thumbv7em-none-eabihf",   features = ["sx126x"] },
-    { name = "sx127x",  target = "thumbv7em-none-eabihf", features = ["sx127x", "embassy-stm32/stm32wl55jc-cm4", "embassy-stm32/time-driver-any"] },
-    { name = "stm32wl", target = "thumbv7em-none-eabihf", features = ["stm32wl", "embassy-stm32/stm32wl55jc-cm4", "embassy-stm32/time-driver-any"] },
+    { name = "sx126x",  target = "thumbv7em-none-eabihf", features = ["sx126x"] },
+    { name = "sx127x",  target = "thumbv7em-none-eabihf", features = ["sx127x"] },
+    { name = "stm32wl", target = "thumbv7em-none-eabihf", features = ["stm32wl", "embassy-stm32?/stm32wl55jc-cm4", "embassy-stm32?/time-driver-any"] },
 ]
 
 [lib]
@@ -19,7 +19,7 @@ flavors = [
 [features]
 sx126x = []
 sx127x = []
-stm32wl = ["embassy-stm32", "embassy-stm32/subghz"]
+stm32wl = ["dep:embassy-stm32"]
 time = []
 defmt = ["dep:defmt", "lorawan/defmt", "lorawan-device/defmt"]
 
@@ -31,8 +31,8 @@ log = { version = "0.4.14", optional = true }
 embassy-time = { version = "0.1.0", path = "../embassy-time" }
 embassy-sync = { version = "0.1.0", path = "../embassy-sync" }
 embassy-stm32 = { version = "0.1.0", path = "../embassy-stm32", default-features = false, optional = true }
-embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9" }
-embedded-hal-async = { version = "=0.2.0-alpha.0" }
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10" }
+embedded-hal-async = { version = "=0.2.0-alpha.1" }
 embassy-hal-common = { version = "0.1.0", path = "../embassy-hal-common", default-features = false }
 futures = { version = "0.3.17", default-features = false, features = [ "async-await" ] }
 embedded-hal = { version = "0.2", features = ["unproven"] }
diff --git a/embassy-macros/src/macros/cortex_m_interrupt_take.rs b/embassy-macros/src/macros/cortex_m_interrupt_take.rs
index e2ebf98c7..4806d1c12 100644
--- a/embassy-macros/src/macros/cortex_m_interrupt_take.rs
+++ b/embassy-macros/src/macros/cortex_m_interrupt_take.rs
@@ -10,12 +10,12 @@ pub fn run(name: syn::Ident) -> Result<TokenStream, TokenStream> {
     let (isr_enter, isr_exit) = (
         quote! {
             ::embassy_executor::rtos_trace_interrupt! {
-                ::embassy_executor::export::trace::isr_enter();
+                ::embassy_executor::_export::trace::isr_enter();
             }
         },
         quote! {
             ::embassy_executor::rtos_trace_interrupt! {
-                ::embassy_executor::export::trace::isr_exit();
+                ::embassy_executor::_export::trace::isr_exit();
             }
         },
     );
diff --git a/embassy-nrf/Cargo.toml b/embassy-nrf/Cargo.toml
index 4e62ca89e..4a4e7c9f9 100644
--- a/embassy-nrf/Cargo.toml
+++ b/embassy-nrf/Cargo.toml
@@ -87,8 +87,8 @@ embassy-embedded-hal = {version = "0.1.0", path = "../embassy-embedded-hal" }
 embassy-usb-driver = {version = "0.1.0", path = "../embassy-usb-driver", optional=true }
 
 embedded-hal-02 = { package = "embedded-hal", version = "0.2.6", features = ["unproven"] }
-embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9", optional = true}
-embedded-hal-async = { version = "=0.2.0-alpha.0", optional = true}
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10", optional = true}
+embedded-hal-async = { version = "=0.2.0-alpha.1", optional = true}
 embedded-io = { version = "0.4.0", features = ["async"], optional = true }
 
 defmt = { version = "0.3", optional = true }
diff --git a/embassy-nrf/src/twim.rs b/embassy-nrf/src/twim.rs
index ef4c929a3..9ae569609 100644
--- a/embassy-nrf/src/twim.rs
+++ b/embassy-nrf/src/twim.rs
@@ -846,20 +846,6 @@ mod eh1 {
             self.blocking_write(address, buffer)
         }
 
-        fn write_iter<B>(&mut self, _address: u8, _bytes: B) -> Result<(), Self::Error>
-        where
-            B: IntoIterator<Item = u8>,
-        {
-            todo!();
-        }
-
-        fn write_iter_read<B>(&mut self, _address: u8, _bytes: B, _buffer: &mut [u8]) -> Result<(), Self::Error>
-        where
-            B: IntoIterator<Item = u8>,
-        {
-            todo!();
-        }
-
         fn write_read(&mut self, address: u8, wr_buffer: &[u8], rd_buffer: &mut [u8]) -> Result<(), Self::Error> {
             self.blocking_write_read(address, wr_buffer, rd_buffer)
         }
@@ -871,13 +857,6 @@ mod eh1 {
         ) -> Result<(), Self::Error> {
             todo!();
         }
-
-        fn transaction_iter<'a, O>(&mut self, _address: u8, _operations: O) -> Result<(), Self::Error>
-        where
-            O: IntoIterator<Item = embedded_hal_1::i2c::Operation<'a>>,
-        {
-            todo!();
-        }
     }
 }
 
@@ -885,28 +864,22 @@ mod eh1 {
 mod eha {
     use super::*;
     impl<'d, T: Instance> embedded_hal_async::i2c::I2c for Twim<'d, T> {
-        async fn read<'a>(&'a mut self, address: u8, buffer: &'a mut [u8]) -> Result<(), Error> {
-            self.read(address, buffer).await
+        async fn read(&mut self, address: u8, read: &mut [u8]) -> Result<(), Self::Error> {
+            self.read(address, read).await
         }
 
-        async fn write<'a>(&'a mut self, address: u8, bytes: &'a [u8]) -> Result<(), Error> {
-            self.write(address, bytes).await
+        async fn write(&mut self, address: u8, write: &[u8]) -> Result<(), Self::Error> {
+            self.write(address, write).await
+        }
+        async fn write_read(&mut self, address: u8, write: &[u8], read: &mut [u8]) -> Result<(), Self::Error> {
+            self.write_read(address, write, read).await
         }
 
-        async fn write_read<'a>(
-            &'a mut self,
+        async fn transaction(
+            &mut self,
             address: u8,
-            wr_buffer: &'a [u8],
-            rd_buffer: &'a mut [u8],
-        ) -> Result<(), Error> {
-            self.write_read(address, wr_buffer, rd_buffer).await
-        }
-
-        async fn transaction<'a, 'b>(
-            &'a mut self,
-            address: u8,
-            operations: &'a mut [embedded_hal_async::i2c::Operation<'b>],
-        ) -> Result<(), Error> {
+            operations: &mut [embedded_hal_1::i2c::Operation<'_>],
+        ) -> Result<(), Self::Error> {
             let _ = address;
             let _ = operations;
             todo!()
diff --git a/embassy-rp/Cargo.toml b/embassy-rp/Cargo.toml
index 209c665b0..cb9c7be77 100644
--- a/embassy-rp/Cargo.toml
+++ b/embassy-rp/Cargo.toml
@@ -65,9 +65,9 @@ rp2040-pac2 = { git = "https://github.com/embassy-rs/rp2040-pac2", rev="017e3c90
 #rp2040-pac2 = { path = "../../rp2040-pac2", features = ["rt"] }
 
 embedded-hal-02 = { package = "embedded-hal", version = "0.2.6", features = ["unproven"] }
-embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9", optional = true}
-embedded-hal-async = { version = "=0.2.0-alpha.0", optional = true}
-embedded-hal-nb = { version = "=1.0.0-alpha.1", optional = true}
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10", optional = true}
+embedded-hal-async = { version = "=0.2.0-alpha.1", optional = true}
+embedded-hal-nb = { version = "=1.0.0-alpha.2", optional = true}
 
 paste = "1.0"
 pio-proc = {version= "0.2", optional = true}
diff --git a/embassy-rp/src/gpio.rs b/embassy-rp/src/gpio.rs
index fb45ef7cf..98e182868 100644
--- a/embassy-rp/src/gpio.rs
+++ b/embassy-rp/src/gpio.rs
@@ -437,6 +437,37 @@ impl<'d, T: Pin> OutputOpenDrain<'d, T> {
     pub fn is_low(&self) -> bool {
         self.pin.is_low()
     }
+
+    /// Returns current pin level
+    #[inline]
+    pub fn get_level(&self) -> Level {
+        self.is_high().into()
+    }
+
+    #[inline]
+    pub async fn wait_for_high(&mut self) {
+        self.pin.wait_for_high().await;
+    }
+
+    #[inline]
+    pub async fn wait_for_low(&mut self) {
+        self.pin.wait_for_low().await;
+    }
+
+    #[inline]
+    pub async fn wait_for_rising_edge(&mut self) {
+        self.pin.wait_for_rising_edge().await;
+    }
+
+    #[inline]
+    pub async fn wait_for_falling_edge(&mut self) {
+        self.pin.wait_for_falling_edge().await;
+    }
+
+    #[inline]
+    pub async fn wait_for_any_edge(&mut self) {
+        self.pin.wait_for_any_edge().await;
+    }
 }
 
 /// GPIO flexible pin.
@@ -1117,4 +1148,32 @@ mod eh1 {
             Ok(())
         }
     }
+
+    #[cfg(feature = "nightly")]
+    impl<'d, T: Pin> embedded_hal_async::digital::Wait for OutputOpenDrain<'d, T> {
+        async fn wait_for_high(&mut self) -> Result<(), Self::Error> {
+            self.wait_for_high().await;
+            Ok(())
+        }
+
+        async fn wait_for_low(&mut self) -> Result<(), Self::Error> {
+            self.wait_for_low().await;
+            Ok(())
+        }
+
+        async fn wait_for_rising_edge(&mut self) -> Result<(), Self::Error> {
+            self.wait_for_rising_edge().await;
+            Ok(())
+        }
+
+        async fn wait_for_falling_edge(&mut self) -> Result<(), Self::Error> {
+            self.wait_for_falling_edge().await;
+            Ok(())
+        }
+
+        async fn wait_for_any_edge(&mut self) -> Result<(), Self::Error> {
+            self.wait_for_any_edge().await;
+            Ok(())
+        }
+    }
 }
diff --git a/embassy-rp/src/i2c.rs b/embassy-rp/src/i2c.rs
index e48e16d81..40e85c66f 100644
--- a/embassy-rp/src/i2c.rs
+++ b/embassy-rp/src/i2c.rs
@@ -490,14 +490,14 @@ impl<'d, T: Instance + 'd, M: Mode> I2c<'d, T, M> {
         }
     }
 
-    fn read_blocking_internal(&mut self, buffer: &mut [u8], restart: bool, send_stop: bool) -> Result<(), Error> {
-        if buffer.is_empty() {
+    fn read_blocking_internal(&mut self, read: &mut [u8], restart: bool, send_stop: bool) -> Result<(), Error> {
+        if read.is_empty() {
             return Err(Error::InvalidReadBufferLength);
         }
 
         let p = T::regs();
-        let lastindex = buffer.len() - 1;
-        for (i, byte) in buffer.iter_mut().enumerate() {
+        let lastindex = read.len() - 1;
+        for (i, byte) in read.iter_mut().enumerate() {
             let first = i == 0;
             let last = i == lastindex;
 
@@ -524,15 +524,15 @@ impl<'d, T: Instance + 'd, M: Mode> I2c<'d, T, M> {
         Ok(())
     }
 
-    fn write_blocking_internal(&mut self, bytes: &[u8], send_stop: bool) -> Result<(), Error> {
-        if bytes.is_empty() {
+    fn write_blocking_internal(&mut self, write: &[u8], send_stop: bool) -> Result<(), Error> {
+        if write.is_empty() {
             return Err(Error::InvalidWriteBufferLength);
         }
 
         let p = T::regs();
 
-        for (i, byte) in bytes.iter().enumerate() {
-            let last = i == bytes.len() - 1;
+        for (i, byte) in write.iter().enumerate() {
+            let last = i == write.len() - 1;
 
             // NOTE(unsafe) We have &mut self
             unsafe {
@@ -572,21 +572,21 @@ impl<'d, T: Instance + 'd, M: Mode> I2c<'d, T, M> {
     // Blocking public API
     // =========================
 
-    pub fn blocking_read(&mut self, address: u8, buffer: &mut [u8]) -> Result<(), Error> {
+    pub fn blocking_read(&mut self, address: u8, read: &mut [u8]) -> Result<(), Error> {
         Self::setup(address.into())?;
-        self.read_blocking_internal(buffer, true, true)
+        self.read_blocking_internal(read, true, true)
         // Automatic Stop
     }
 
-    pub fn blocking_write(&mut self, address: u8, bytes: &[u8]) -> Result<(), Error> {
+    pub fn blocking_write(&mut self, address: u8, write: &[u8]) -> Result<(), Error> {
         Self::setup(address.into())?;
-        self.write_blocking_internal(bytes, true)
+        self.write_blocking_internal(write, true)
     }
 
-    pub fn blocking_write_read(&mut self, address: u8, bytes: &[u8], buffer: &mut [u8]) -> Result<(), Error> {
+    pub fn blocking_write_read(&mut self, address: u8, write: &[u8], read: &mut [u8]) -> Result<(), Error> {
         Self::setup(address.into())?;
-        self.write_blocking_internal(bytes, false)?;
-        self.read_blocking_internal(buffer, true, true)
+        self.write_blocking_internal(write, false)?;
+        self.read_blocking_internal(read, true, true)
         // Automatic Stop
     }
 }
@@ -644,48 +644,22 @@ mod eh1 {
     }
 
     impl<'d, T: Instance, M: Mode> embedded_hal_1::i2c::I2c for I2c<'d, T, M> {
-        fn read(&mut self, address: u8, buffer: &mut [u8]) -> Result<(), Self::Error> {
-            self.blocking_read(address, buffer)
+        fn read(&mut self, address: u8, read: &mut [u8]) -> Result<(), Self::Error> {
+            self.blocking_read(address, read)
         }
 
-        fn write(&mut self, address: u8, buffer: &[u8]) -> Result<(), Self::Error> {
-            self.blocking_write(address, buffer)
+        fn write(&mut self, address: u8, write: &[u8]) -> Result<(), Self::Error> {
+            self.blocking_write(address, write)
         }
 
-        fn write_iter<B>(&mut self, address: u8, bytes: B) -> Result<(), Self::Error>
-        where
-            B: IntoIterator<Item = u8>,
-        {
-            let mut peekable = bytes.into_iter().peekable();
-            Self::setup(address.into())?;
-
-            while let Some(tx) = peekable.next() {
-                self.write_blocking_internal(&[tx], peekable.peek().is_none())?;
-            }
-            Ok(())
+        fn write_read(&mut self, address: u8, write: &[u8], read: &mut [u8]) -> Result<(), Self::Error> {
+            self.blocking_write_read(address, write, read)
         }
 
-        fn write_iter_read<B>(&mut self, address: u8, bytes: B, buffer: &mut [u8]) -> Result<(), Self::Error>
-        where
-            B: IntoIterator<Item = u8>,
-        {
-            let peekable = bytes.into_iter().peekable();
-            Self::setup(address.into())?;
-
-            for tx in peekable {
-                self.write_blocking_internal(&[tx], false)?
-            }
-            self.read_blocking_internal(buffer, true, true)
-        }
-
-        fn write_read(&mut self, address: u8, wr_buffer: &[u8], rd_buffer: &mut [u8]) -> Result<(), Self::Error> {
-            self.blocking_write_read(address, wr_buffer, rd_buffer)
-        }
-
-        fn transaction<'a>(
+        fn transaction(
             &mut self,
             address: u8,
-            operations: &mut [embedded_hal_1::i2c::Operation<'a>],
+            operations: &mut [embedded_hal_1::i2c::Operation<'_>],
         ) -> Result<(), Self::Error> {
             Self::setup(address.into())?;
             for i in 0..operations.len() {
@@ -697,22 +671,6 @@ mod eh1 {
             }
             Ok(())
         }
-
-        fn transaction_iter<'a, O>(&mut self, address: u8, operations: O) -> Result<(), Self::Error>
-        where
-            O: IntoIterator<Item = embedded_hal_1::i2c::Operation<'a>>,
-        {
-            Self::setup(address.into())?;
-            let mut peekable = operations.into_iter().peekable();
-            while let Some(operation) = peekable.next() {
-                let last = peekable.peek().is_none();
-                match operation {
-                    embedded_hal_1::i2c::Operation::Read(buf) => self.read_blocking_internal(buf, false, last)?,
-                    embedded_hal_1::i2c::Operation::Write(buf) => self.write_blocking_internal(buf, last)?,
-                }
-            }
-            Ok(())
-        }
     }
 }
 #[cfg(all(feature = "unstable-traits", feature = "nightly"))]
@@ -727,36 +685,29 @@ mod nightly {
         A: AddressMode + Into<u16> + 'static,
         T: Instance + 'd,
     {
-        async fn read<'a>(&'a mut self, address: A, read: &'a mut [u8]) -> Result<(), Self::Error> {
+        async fn read(&mut self, address: A, read: &mut [u8]) -> Result<(), Self::Error> {
             let addr: u16 = address.into();
 
             Self::setup(addr)?;
             self.read_async_internal(read, false, true).await
         }
 
-        async fn write<'a>(&'a mut self, address: A, write: &'a [u8]) -> Result<(), Self::Error> {
+        async fn write(&mut self, address: A, write: &[u8]) -> Result<(), Self::Error> {
             let addr: u16 = address.into();
 
             Self::setup(addr)?;
             self.write_async_internal(write.iter().copied(), true).await
         }
-        async fn write_read<'a>(
-            &'a mut self,
-            address: A,
-            write: &'a [u8],
-            read: &'a mut [u8],
-        ) -> Result<(), Self::Error> {
+
+        async fn write_read(&mut self, address: A, write: &[u8], read: &mut [u8]) -> Result<(), Self::Error> {
             let addr: u16 = address.into();
 
             Self::setup(addr)?;
             self.write_async_internal(write.iter().cloned(), false).await?;
             self.read_async_internal(read, false, true).await
         }
-        async fn transaction<'a, 'b>(
-            &'a mut self,
-            address: A,
-            operations: &'a mut [Operation<'b>],
-        ) -> Result<(), Self::Error> {
+
+        async fn transaction(&mut self, address: A, operations: &mut [Operation<'_>]) -> Result<(), Self::Error> {
             let addr: u16 = address.into();
 
             let mut iterator = operations.iter_mut();
diff --git a/embassy-rp/src/spi.rs b/embassy-rp/src/spi.rs
index ebd621ecf..742a35d49 100644
--- a/embassy-rp/src/spi.rs
+++ b/embassy-rp/src/spi.rs
@@ -19,6 +19,7 @@ pub enum Error {
 }
 
 #[non_exhaustive]
+#[derive(Clone)]
 pub struct Config {
     pub frequency: u32,
     pub phase: Phase,
diff --git a/embassy-rp/src/uart/buffered.rs b/embassy-rp/src/uart/buffered.rs
index 1a573b311..c620ed08c 100644
--- a/embassy-rp/src/uart/buffered.rs
+++ b/embassy-rp/src/uart/buffered.rs
@@ -175,6 +175,10 @@ impl<'d, T: Instance> BufferedUartRx<'d, T> {
 
     fn read<'a>(buf: &'a mut [u8]) -> impl Future<Output = Result<usize, Error>> + 'a {
         poll_fn(move |cx| {
+            if buf.is_empty() {
+                return Poll::Ready(Ok(0));
+            }
+
             let state = T::state();
             let mut rx_reader = unsafe { state.rx_buf.reader() };
             let n = rx_reader.pop(|data| {
@@ -202,6 +206,10 @@ impl<'d, T: Instance> BufferedUartRx<'d, T> {
     }
 
     pub fn blocking_read(&mut self, buf: &mut [u8]) -> Result<usize, Error> {
+        if buf.is_empty() {
+            return Ok(0);
+        }
+
         loop {
             let state = T::state();
             let mut rx_reader = unsafe { state.rx_buf.reader() };
@@ -293,6 +301,10 @@ impl<'d, T: Instance> BufferedUartTx<'d, T> {
 
     fn write<'a>(buf: &'a [u8]) -> impl Future<Output = Result<usize, Error>> + 'a {
         poll_fn(move |cx| {
+            if buf.is_empty() {
+                return Poll::Ready(Ok(0));
+            }
+
             let state = T::state();
             let mut tx_writer = unsafe { state.tx_buf.writer() };
             let n = tx_writer.push(|data| {
@@ -327,6 +339,10 @@ impl<'d, T: Instance> BufferedUartTx<'d, T> {
     }
 
     pub fn blocking_write(&mut self, buf: &[u8]) -> Result<usize, Error> {
+        if buf.is_empty() {
+            return Ok(0);
+        }
+
         loop {
             let state = T::state();
             let mut tx_writer = unsafe { state.tx_buf.writer() };
diff --git a/embassy-rp/src/uart/mod.rs b/embassy-rp/src/uart/mod.rs
index 682243a27..a945f2295 100644
--- a/embassy-rp/src/uart/mod.rs
+++ b/embassy-rp/src/uart/mod.rs
@@ -405,10 +405,6 @@ impl<'d, T: Instance + 'd, M: Mode> Uart<'d, T, M> {
                 Parity::ParityEven => (true, true),
             };
 
-            // PL011 needs a (dummy) line control register write to latch in the
-            // divisors. We don't want to actually change LCR contents here.
-            r.uartlcr_h().modify(|_| {});
-
             r.uartlcr_h().write(|w| {
                 w.set_wlen(config.data_bits.bits());
                 w.set_stp2(config.stop_bits == StopBits::STOP2);
@@ -458,6 +454,10 @@ impl<'d, T: Instance + 'd, M: Mode> Uart<'d, T, M> {
             // Load PL011's baud divisor registers
             r.uartibrd().write_value(pac::uart::regs::Uartibrd(baud_ibrd));
             r.uartfbrd().write_value(pac::uart::regs::Uartfbrd(baud_fbrd));
+
+            // PL011 needs a (dummy) line control register write to latch in the
+            // divisors. We don't want to actually change LCR contents here.
+            r.uartlcr_h().modify(|_| {});
         }
     }
 }
diff --git a/embassy-stm32/Cargo.toml b/embassy-stm32/Cargo.toml
index 14ec3d70a..a8ebacd25 100644
--- a/embassy-stm32/Cargo.toml
+++ b/embassy-stm32/Cargo.toml
@@ -8,10 +8,7 @@ license = "MIT OR Apache-2.0"
 src_base = "https://github.com/embassy-rs/embassy/blob/embassy-stm32-v$VERSION/embassy-stm32/src/"
 src_base_git = "https://github.com/embassy-rs/embassy/blob/$COMMIT/embassy-stm32/src/"
 
-# TODO: sdmmc
-# TODO: net
-# TODO: subghz
-features = ["nightly", "defmt", "unstable-pac", "unstable-traits", "exti", "time-driver-any"]
+features = ["nightly", "defmt", "unstable-pac", "unstable-traits", "exti", "time-driver-any", "time"]
 flavors = [
     { regex_feature = "stm32f0.*", target = "thumbv6m-none-eabi" },
     { regex_feature = "stm32f1.*", target = "thumbv7m-none-eabi" },
@@ -22,6 +19,7 @@ flavors = [
     { regex_feature = "stm32c0.*", target = "thumbv6m-none-eabi" },
     { regex_feature = "stm32g0.*", target = "thumbv6m-none-eabi" },
     { regex_feature = "stm32g4.*", target = "thumbv7em-none-eabi" },
+    { regex_feature = "stm32h5.*", target = "thumbv8m.main-none-eabihf" },
     { regex_feature = "stm32h7.*", target = "thumbv7em-none-eabi" },
     { regex_feature = "stm32l0.*", target = "thumbv6m-none-eabi" },
     { regex_feature = "stm32l1.*", target = "thumbv7m-none-eabi" },
@@ -44,9 +42,9 @@ embassy-net-driver = { version = "0.1.0", path = "../embassy-net-driver" }
 embassy-usb-driver = {version = "0.1.0", path = "../embassy-usb-driver", optional = true }
 
 embedded-hal-02 = { package = "embedded-hal", version = "0.2.6", features = ["unproven"] }
-embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9", optional = true}
-embedded-hal-async = { version = "=0.2.0-alpha.0", optional = true}
-embedded-hal-nb = { version = "=1.0.0-alpha.1", optional = true}
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10", optional = true}
+embedded-hal-async = { version = "=0.2.0-alpha.1", optional = true}
+embedded-hal-nb = { version = "=1.0.0-alpha.2", optional = true}
 
 embedded-storage = "0.3.0"
 
@@ -60,7 +58,7 @@ sdio-host = "0.5.0"
 embedded-sdmmc = { git = "https://github.com/embassy-rs/embedded-sdmmc-rs", rev = "46d1b1c2ff13e31e282ec1e352421721694f126a", optional = true }
 critical-section = "1.1"
 atomic-polyfill = "1.0.1"
-stm32-metapac = { version = "2", features = ["rt"] }
+stm32-metapac = "5"
 vcell = "0.1.3"
 bxcan = "0.7.0"
 nb = "1.0.0"
@@ -69,15 +67,18 @@ seq-macro = "0.3.0"
 cfg-if = "1.0.0"
 embedded-io = { version = "0.4.0", features = ["async"], optional = true }
 
+[dev-dependencies]
+critical-section = { version = "1.1", features = ["std"] }
+
 [build-dependencies]
 proc-macro2 = "1.0.36"
 quote = "1.0.15"
-stm32-metapac = { version = "2", default-features = false, features = ["metadata"]}
+stm32-metapac = { version = "5", default-features = false, features = ["metadata"]}
 
 [features]
+default = ["stm32-metapac/rt"]
 defmt = ["dep:defmt", "bxcan/unstable-defmt", "embassy-sync/defmt", "embassy-executor/defmt", "embassy-embedded-hal/defmt", "embassy-hal-common/defmt", "embedded-io?/defmt", "embassy-usb-driver?/defmt", "embassy-net-driver/defmt"]
 memory-x = ["stm32-metapac/memory-x"]
-subghz = []
 exti = []
 
 # Enables additional driver features that depend on embassy-time
@@ -830,6 +831,37 @@ stm32g4a1ke = [ "stm32-metapac/stm32g4a1ke" ]
 stm32g4a1me = [ "stm32-metapac/stm32g4a1me" ]
 stm32g4a1re = [ "stm32-metapac/stm32g4a1re" ]
 stm32g4a1ve = [ "stm32-metapac/stm32g4a1ve" ]
+stm32h503cb = [ "stm32-metapac/stm32h503cb" ]
+stm32h503eb = [ "stm32-metapac/stm32h503eb" ]
+stm32h503kb = [ "stm32-metapac/stm32h503kb" ]
+stm32h503rb = [ "stm32-metapac/stm32h503rb" ]
+stm32h562ag = [ "stm32-metapac/stm32h562ag" ]
+stm32h562ai = [ "stm32-metapac/stm32h562ai" ]
+stm32h562ig = [ "stm32-metapac/stm32h562ig" ]
+stm32h562ii = [ "stm32-metapac/stm32h562ii" ]
+stm32h562rg = [ "stm32-metapac/stm32h562rg" ]
+stm32h562ri = [ "stm32-metapac/stm32h562ri" ]
+stm32h562vg = [ "stm32-metapac/stm32h562vg" ]
+stm32h562vi = [ "stm32-metapac/stm32h562vi" ]
+stm32h562zg = [ "stm32-metapac/stm32h562zg" ]
+stm32h562zi = [ "stm32-metapac/stm32h562zi" ]
+stm32h563ag = [ "stm32-metapac/stm32h563ag" ]
+stm32h563ai = [ "stm32-metapac/stm32h563ai" ]
+stm32h563ig = [ "stm32-metapac/stm32h563ig" ]
+stm32h563ii = [ "stm32-metapac/stm32h563ii" ]
+stm32h563mi = [ "stm32-metapac/stm32h563mi" ]
+stm32h563rg = [ "stm32-metapac/stm32h563rg" ]
+stm32h563ri = [ "stm32-metapac/stm32h563ri" ]
+stm32h563vg = [ "stm32-metapac/stm32h563vg" ]
+stm32h563vi = [ "stm32-metapac/stm32h563vi" ]
+stm32h563zg = [ "stm32-metapac/stm32h563zg" ]
+stm32h563zi = [ "stm32-metapac/stm32h563zi" ]
+stm32h573ai = [ "stm32-metapac/stm32h573ai" ]
+stm32h573ii = [ "stm32-metapac/stm32h573ii" ]
+stm32h573mi = [ "stm32-metapac/stm32h573mi" ]
+stm32h573ri = [ "stm32-metapac/stm32h573ri" ]
+stm32h573vi = [ "stm32-metapac/stm32h573vi" ]
+stm32h573zi = [ "stm32-metapac/stm32h573zi" ]
 stm32h723ve = [ "stm32-metapac/stm32h723ve" ]
 stm32h723vg = [ "stm32-metapac/stm32h723vg" ]
 stm32h723ze = [ "stm32-metapac/stm32h723ze" ]
@@ -1312,6 +1344,22 @@ stm32l562qe = [ "stm32-metapac/stm32l562qe" ]
 stm32l562re = [ "stm32-metapac/stm32l562re" ]
 stm32l562ve = [ "stm32-metapac/stm32l562ve" ]
 stm32l562ze = [ "stm32-metapac/stm32l562ze" ]
+stm32u535cb = [ "stm32-metapac/stm32u535cb" ]
+stm32u535cc = [ "stm32-metapac/stm32u535cc" ]
+stm32u535ce = [ "stm32-metapac/stm32u535ce" ]
+stm32u535je = [ "stm32-metapac/stm32u535je" ]
+stm32u535nc = [ "stm32-metapac/stm32u535nc" ]
+stm32u535ne = [ "stm32-metapac/stm32u535ne" ]
+stm32u535rb = [ "stm32-metapac/stm32u535rb" ]
+stm32u535rc = [ "stm32-metapac/stm32u535rc" ]
+stm32u535re = [ "stm32-metapac/stm32u535re" ]
+stm32u535vc = [ "stm32-metapac/stm32u535vc" ]
+stm32u535ve = [ "stm32-metapac/stm32u535ve" ]
+stm32u545ce = [ "stm32-metapac/stm32u545ce" ]
+stm32u545je = [ "stm32-metapac/stm32u545je" ]
+stm32u545ne = [ "stm32-metapac/stm32u545ne" ]
+stm32u545re = [ "stm32-metapac/stm32u545re" ]
+stm32u545ve = [ "stm32-metapac/stm32u545ve" ]
 stm32u575ag = [ "stm32-metapac/stm32u575ag" ]
 stm32u575ai = [ "stm32-metapac/stm32u575ai" ]
 stm32u575cg = [ "stm32-metapac/stm32u575cg" ]
@@ -1333,6 +1381,32 @@ stm32u585qi = [ "stm32-metapac/stm32u585qi" ]
 stm32u585ri = [ "stm32-metapac/stm32u585ri" ]
 stm32u585vi = [ "stm32-metapac/stm32u585vi" ]
 stm32u585zi = [ "stm32-metapac/stm32u585zi" ]
+stm32u595ai = [ "stm32-metapac/stm32u595ai" ]
+stm32u595aj = [ "stm32-metapac/stm32u595aj" ]
+stm32u595qi = [ "stm32-metapac/stm32u595qi" ]
+stm32u595qj = [ "stm32-metapac/stm32u595qj" ]
+stm32u595ri = [ "stm32-metapac/stm32u595ri" ]
+stm32u595rj = [ "stm32-metapac/stm32u595rj" ]
+stm32u595vi = [ "stm32-metapac/stm32u595vi" ]
+stm32u595vj = [ "stm32-metapac/stm32u595vj" ]
+stm32u595zi = [ "stm32-metapac/stm32u595zi" ]
+stm32u595zj = [ "stm32-metapac/stm32u595zj" ]
+stm32u599bj = [ "stm32-metapac/stm32u599bj" ]
+stm32u599ni = [ "stm32-metapac/stm32u599ni" ]
+stm32u599nj = [ "stm32-metapac/stm32u599nj" ]
+stm32u599vi = [ "stm32-metapac/stm32u599vi" ]
+stm32u599vj = [ "stm32-metapac/stm32u599vj" ]
+stm32u599zi = [ "stm32-metapac/stm32u599zi" ]
+stm32u599zj = [ "stm32-metapac/stm32u599zj" ]
+stm32u5a5aj = [ "stm32-metapac/stm32u5a5aj" ]
+stm32u5a5qj = [ "stm32-metapac/stm32u5a5qj" ]
+stm32u5a5rj = [ "stm32-metapac/stm32u5a5rj" ]
+stm32u5a5vj = [ "stm32-metapac/stm32u5a5vj" ]
+stm32u5a5zj = [ "stm32-metapac/stm32u5a5zj" ]
+stm32u5a9bj = [ "stm32-metapac/stm32u5a9bj" ]
+stm32u5a9nj = [ "stm32-metapac/stm32u5a9nj" ]
+stm32u5a9vj = [ "stm32-metapac/stm32u5a9vj" ]
+stm32u5a9zj = [ "stm32-metapac/stm32u5a9zj" ]
 stm32wb10cc = [ "stm32-metapac/stm32wb10cc" ]
 stm32wb15cc = [ "stm32-metapac/stm32wb15cc" ]
 stm32wb30ce = [ "stm32-metapac/stm32wb30ce" ]
diff --git a/embassy-stm32/build.rs b/embassy-stm32/build.rs
index 3780c5a40..b01e8ba45 100644
--- a/embassy-stm32/build.rs
+++ b/embassy-stm32/build.rs
@@ -3,9 +3,9 @@ use std::fmt::Write as _;
 use std::path::PathBuf;
 use std::{env, fs};
 
-use proc_macro2::TokenStream;
+use proc_macro2::{Ident, TokenStream};
 use quote::{format_ident, quote};
-use stm32_metapac::metadata::METADATA;
+use stm32_metapac::metadata::{MemoryRegionKind, METADATA};
 
 fn main() {
     let chip_name = match env::vars()
@@ -50,10 +50,13 @@ fn main() {
                 // We *shouldn't* have singletons for these, but the HAL currently requires
                 // singletons, for using with RccPeripheral to enable/disable clocks to them.
                 "rcc" => {
-                    if r.version.starts_with("h7") {
+                    if r.version.starts_with("h5") || r.version.starts_with("h7") || r.version.starts_with("f4") {
                         singletons.push("MCO1".to_string());
                         singletons.push("MCO2".to_string());
                     }
+                    if r.version.starts_with("l4") {
+                        singletons.push("MCO".to_string());
+                    }
                     singletons.push(p.name.to_string());
                 }
                 //"dbgmcu" => {}
@@ -103,6 +106,94 @@ fn main() {
         }
     });
 
+    // ========
+    // Generate FLASH regions
+    let mut flash_regions = TokenStream::new();
+    let flash_memory_regions: Vec<_> = METADATA
+        .memory
+        .iter()
+        .filter(|x| x.kind == MemoryRegionKind::Flash && x.settings.is_some())
+        .collect();
+    for region in flash_memory_regions.iter() {
+        let region_name = format_ident!("{}", get_flash_region_name(region.name));
+        let bank_variant = format_ident!(
+            "{}",
+            if region.name.starts_with("BANK_1") {
+                "Bank1"
+            } else if region.name.starts_with("BANK_2") {
+                "Bank2"
+            } else if region.name == "OTP" {
+                "Otp"
+            } else {
+                continue;
+            }
+        );
+        let base = region.address;
+        let size = region.size;
+        let settings = region.settings.as_ref().unwrap();
+        let erase_size = settings.erase_size;
+        let write_size = settings.write_size;
+        let erase_value = settings.erase_value;
+
+        flash_regions.extend(quote! {
+            pub const #region_name: crate::flash::FlashRegion = crate::flash::FlashRegion {
+                bank: crate::flash::FlashBank::#bank_variant,
+                base: #base,
+                size: #size,
+                erase_size: #erase_size,
+                write_size: #write_size,
+                erase_value: #erase_value,
+            };
+        });
+
+        let region_type = format_ident!("{}", get_flash_region_type_name(region.name));
+        flash_regions.extend(quote! {
+            #[cfg(flash)]
+            pub struct #region_type<'d>(pub &'static crate::flash::FlashRegion, pub(crate) embassy_hal_common::PeripheralRef<'d, crate::peripherals::FLASH>,);
+        });
+    }
+
+    let (fields, (inits, region_names)): (Vec<TokenStream>, (Vec<TokenStream>, Vec<Ident>)) = flash_memory_regions
+        .iter()
+        .map(|f| {
+            let region_name = get_flash_region_name(f.name);
+            let field_name = format_ident!("{}", region_name.to_lowercase());
+            let field_type = format_ident!("{}", get_flash_region_type_name(f.name));
+            let field = quote! {
+                pub #field_name: #field_type<'d>
+            };
+            let region_name = format_ident!("{}", region_name);
+            let init = quote! {
+                #field_name: #field_type(&#region_name, unsafe { p.clone_unchecked()})
+            };
+
+            (field, (init, region_name))
+        })
+        .unzip();
+
+    let regions_len = flash_memory_regions.len();
+    flash_regions.extend(quote! {
+        #[cfg(flash)]
+        pub struct FlashLayout<'d> {
+            #(#fields),*
+        }
+
+        #[cfg(flash)]
+        impl<'d> FlashLayout<'d> {
+            pub(crate) fn new(mut p: embassy_hal_common::PeripheralRef<'d, crate::peripherals::FLASH>) -> Self {
+                Self {
+                    #(#inits),*
+                }
+            }
+        }
+
+        pub const FLASH_REGIONS: [&crate::flash::FlashRegion; #regions_len] = [
+            #(&#region_names),*
+        ];
+    });
+
+    g.extend(quote! { pub mod flash_regions { #flash_regions } });
+
     // ========
     // Generate DMA IRQs.
 
@@ -258,6 +349,7 @@ fn main() {
         (("i2c", "SCL"), quote!(crate::i2c::SclPin)),
         (("rcc", "MCO_1"), quote!(crate::rcc::McoPin)),
         (("rcc", "MCO_2"), quote!(crate::rcc::McoPin)),
+        (("rcc", "MCO"), quote!(crate::rcc::McoPin)),
         (("dcmi", "D0"), quote!(crate::dcmi::D0Pin)),
         (("dcmi", "D1"), quote!(crate::dcmi::D1Pin)),
         (("dcmi", "D2"), quote!(crate::dcmi::D2Pin)),
@@ -447,13 +539,25 @@ fn main() {
                     // MCO is special
                     if pin.signal.starts_with("MCO_") {
                         // Supported in H7 only for now
-                        if regs.version.starts_with("h7") {
+                        if regs.version.starts_with("h5")
+                            || regs.version.starts_with("h7")
+                            || regs.version.starts_with("f4")
+                        {
                             peri = format_ident!("{}", pin.signal.replace("_", ""));
                         } else {
                             continue;
                         }
                     }
 
+                    if pin.signal == "MCO" {
+                        // Supported in H7 only for now
+                        if regs.version.starts_with("l4") {
+                            peri = format_ident!("MCO");
+                        } else {
+                            continue;
+                        }
+                    }
+
                     g.extend(quote! {
                         pin_trait_impl!(#tr, #peri, #pin_name, #af);
                     })
@@ -565,11 +669,25 @@ fn main() {
     // ========
     // Write foreach_foo! macrotables
 
+    let mut flash_regions_table: Vec<Vec<String>> = Vec::new();
     let mut interrupts_table: Vec<Vec<String>> = Vec::new();
     let mut peripherals_table: Vec<Vec<String>> = Vec::new();
     let mut pins_table: Vec<Vec<String>> = Vec::new();
     let mut dma_channels_table: Vec<Vec<String>> = Vec::new();
 
+    for m in METADATA
+        .memory
+        .iter()
+        .filter(|m| m.kind == MemoryRegionKind::Flash && m.settings.is_some())
+    {
+        let settings = m.settings.as_ref().unwrap();
+        let mut row = Vec::new();
+        row.push(get_flash_region_type_name(m.name));
+        row.push(settings.write_size.to_string());
+        row.push(settings.erase_size.to_string());
+        flash_regions_table.push(row);
+    }
+
     let gpio_base = METADATA.peripherals.iter().find(|p| p.name == "GPIOA").unwrap().address as u32;
     let gpio_stride = 0x400;
 
@@ -666,6 +784,7 @@ fn main() {
 
     let mut m = String::new();
 
+    make_table(&mut m, "foreach_flash_region", &flash_regions_table);
     make_table(&mut m, "foreach_interrupt", &interrupts_table);
     make_table(&mut m, "foreach_peripheral", &peripherals_table);
     make_table(&mut m, "foreach_pin", &pins_table);
@@ -818,3 +937,19 @@ macro_rules! {} {{
     )
     .unwrap();
 }
+
+fn get_flash_region_name(name: &str) -> String {
+    let name = name.replace("BANK_", "BANK").replace("REGION_", "REGION");
+    if name.contains("REGION") {
+        name
+    } else {
+        name + "_REGION"
+    }
+}
+
+fn get_flash_region_type_name(name: &str) -> String {
+    get_flash_region_name(name)
+        .replace("BANK", "Bank")
+        .replace("REGION", "Region")
+        .replace("_", "")
+}
diff --git a/embassy-stm32/src/adc/mod.rs b/embassy-stm32/src/adc/mod.rs
index ec49dace7..56ecd63ca 100644
--- a/embassy-stm32/src/adc/mod.rs
+++ b/embassy-stm32/src/adc/mod.rs
@@ -7,21 +7,18 @@
 #[cfg_attr(adc_v4, path = "v4.rs")]
 mod _version;
 
-#[cfg(not(any(adc_f1, adc_v1)))]
+#[cfg(not(adc_f1))]
 mod resolution;
-#[cfg(not(adc_v1))]
 mod sample_time;
 
 #[allow(unused)]
 pub use _version::*;
-#[cfg(not(any(adc_f1, adc_v1)))]
+#[cfg(not(adc_f1))]
 pub use resolution::Resolution;
-#[cfg(not(adc_v1))]
 pub use sample_time::SampleTime;
 
 use crate::peripherals;
 
-#[cfg(not(adc_v1))]
 pub struct Adc<'d, T: Instance> {
     #[allow(unused)]
     adc: crate::PeripheralRef<'d, T>,
@@ -44,9 +41,9 @@ pub(crate) mod sealed {
     }
 }
 
-#[cfg(not(any(adc_f1, adc_v2, adc_v4)))]
+#[cfg(not(any(adc_f1, adc_v1, adc_v2, adc_v4)))]
 pub trait Instance: sealed::Instance + crate::Peripheral<P = Self> {}
-#[cfg(any(adc_f1, adc_v2, adc_v4))]
+#[cfg(any(adc_f1, adc_v1, adc_v2, adc_v4))]
 pub trait Instance: sealed::Instance + crate::Peripheral<P = Self> + crate::rcc::RccPeripheral {}
 
 pub trait AdcPin<T: Instance>: sealed::AdcPin<T> {}
diff --git a/embassy-stm32/src/adc/resolution.rs b/embassy-stm32/src/adc/resolution.rs
index 62b52a46c..67fb9b8c0 100644
--- a/embassy-stm32/src/adc/resolution.rs
+++ b/embassy-stm32/src/adc/resolution.rs
@@ -1,4 +1,4 @@
-#[cfg(any(adc_v2, adc_v3, adc_g0))]
+#[cfg(any(adc_v1, adc_v2, adc_v3, adc_g0))]
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum Resolution {
     TwelveBit,
@@ -19,7 +19,7 @@ pub enum Resolution {
 
 impl Default for Resolution {
     fn default() -> Self {
-        #[cfg(any(adc_v2, adc_v3, adc_g0))]
+        #[cfg(any(adc_v1, adc_v2, adc_v3, adc_g0))]
         {
             Self::TwelveBit
         }
@@ -40,7 +40,7 @@ impl From<Resolution> for crate::pac::adc::vals::Res {
             Resolution::TwelveBit => crate::pac::adc::vals::Res::TWELVEBIT,
             Resolution::TenBit => crate::pac::adc::vals::Res::TENBIT,
             Resolution::EightBit => crate::pac::adc::vals::Res::EIGHTBIT,
-            #[cfg(any(adc_v2, adc_v3, adc_g0))]
+            #[cfg(any(adc_v1, adc_v2, adc_v3, adc_g0))]
             Resolution::SixBit => crate::pac::adc::vals::Res::SIXBIT,
         }
     }
@@ -56,7 +56,7 @@ impl Resolution {
             Resolution::TwelveBit => (1 << 12) - 1,
             Resolution::TenBit => (1 << 10) - 1,
             Resolution::EightBit => (1 << 8) - 1,
-            #[cfg(any(adc_v2, adc_v3, adc_g0))]
+            #[cfg(any(adc_v1, adc_v2, adc_v3, adc_g0))]
             Resolution::SixBit => (1 << 6) - 1,
         }
     }
diff --git a/embassy-stm32/src/adc/sample_time.rs b/embassy-stm32/src/adc/sample_time.rs
index bc5fb1d6f..0faa1e3c0 100644
--- a/embassy-stm32/src/adc/sample_time.rs
+++ b/embassy-stm32/src/adc/sample_time.rs
@@ -25,7 +25,7 @@ macro_rules! impl_sample_time {
     };
 }
 
-#[cfg(adc_f1)]
+#[cfg(any(adc_f1, adc_v1))]
 impl_sample_time!(
     "1.5",
     Cycles1_5,
diff --git a/embassy-stm32/src/adc/v1.rs b/embassy-stm32/src/adc/v1.rs
index 8b1378917..82a8c3efb 100644
--- a/embassy-stm32/src/adc/v1.rs
+++ b/embassy-stm32/src/adc/v1.rs
@@ -1 +1,171 @@
+use embassy_hal_common::into_ref;
+use embedded_hal_02::blocking::delay::DelayUs;
 
+use crate::adc::{Adc, AdcPin, Instance, InternalChannel, Resolution, SampleTime};
+use crate::peripherals::ADC;
+use crate::Peripheral;
+
+pub const VDDA_CALIB_MV: u32 = 3300;
+pub const VREF_INT: u32 = 1230;
+
+pub struct Vbat;
+impl InternalChannel<ADC> for Vbat {}
+impl super::sealed::InternalChannel<ADC> for Vbat {
+    fn channel(&self) -> u8 {
+        18
+    }
+}
+
+pub struct Vref;
+impl InternalChannel<ADC> for Vref {}
+impl super::sealed::InternalChannel<ADC> for Vref {
+    fn channel(&self) -> u8 {
+        17
+    }
+}
+
+pub struct Temperature;
+impl InternalChannel<ADC> for Temperature {}
+impl super::sealed::InternalChannel<ADC> for Temperature {
+    fn channel(&self) -> u8 {
+        16
+    }
+}
+
+impl<'d, T: Instance> Adc<'d, T> {
+    pub fn new(adc: impl Peripheral<P = T> + 'd, delay: &mut impl DelayUs<u32>) -> Self {
+        into_ref!(adc);
+        T::enable();
+        T::reset();
+
+        // Delay 1μs when using HSI14 as the ADC clock.
+        //
+        // Table 57. ADC characteristics
+        // tstab = 14 * 1/fadc
+        delay.delay_us(1);
+
+        let s = Self {
+            adc,
+            sample_time: Default::default(),
+        };
+        s.calibrate();
+        s
+    }
+
+    pub fn enable_vbat(&self, _delay: &mut impl DelayUs<u32>) -> Vbat {
+        // SMP must be ≥ 56 ADC clock cycles when using HSI14.
+        //
+        // 6.3.20 Vbat monitoring characteristics
+        // ts_vbat ≥ 4μs
+        unsafe {
+            T::regs().ccr().modify(|reg| reg.set_vbaten(true));
+        }
+        Vbat
+    }
+
+    pub fn enable_vref(&self, delay: &mut impl DelayUs<u32>) -> Vref {
+        // Table 28. Embedded internal reference voltage
+        // tstart = 10μs
+        unsafe {
+            T::regs().ccr().modify(|reg| reg.set_vrefen(true));
+        }
+        delay.delay_us(10);
+        Vref
+    }
+
+    pub fn enable_temperature(&self, delay: &mut impl DelayUs<u32>) -> Temperature {
+        // SMP must be ≥ 56 ADC clock cycles when using HSI14.
+        //
+        // 6.3.19 Temperature sensor characteristics
+        // tstart ≤ 10μs
+        // ts_temp ≥ 4μs
+        unsafe {
+            T::regs().ccr().modify(|reg| reg.set_tsen(true));
+        }
+        delay.delay_us(10);
+        Temperature
+    }
+
+    fn calibrate(&self) {
+        unsafe {
+            // A.7.1 ADC calibration code example
+            if T::regs().cr().read().aden() {
+                T::regs().cr().modify(|reg| reg.set_addis(true));
+            }
+            while T::regs().cr().read().aden() {
+                // spin
+            }
+            T::regs().cfgr1().modify(|reg| reg.set_dmaen(false));
+            T::regs().cr().modify(|reg| reg.set_adcal(true));
+            while T::regs().cr().read().adcal() {
+                // spin
+            }
+        }
+    }
+
+    pub fn set_sample_time(&mut self, sample_time: SampleTime) {
+        self.sample_time = sample_time;
+    }
+
+    pub fn set_resolution(&mut self, resolution: Resolution) {
+        unsafe {
+            T::regs().cfgr1().modify(|reg| reg.set_res(resolution.into()));
+        }
+    }
+
+    pub fn read<P>(&mut self, pin: &mut P) -> u16
+    where
+        P: AdcPin<T> + crate::gpio::sealed::Pin,
+    {
+        let channel = pin.channel();
+        unsafe {
+            pin.set_as_analog();
+            self.read_channel(channel)
+        }
+    }
+
+    pub fn read_internal(&mut self, channel: &mut impl InternalChannel<T>) -> u16 {
+        let channel = channel.channel();
+        unsafe { self.read_channel(channel) }
+    }
+
+    unsafe fn read_channel(&mut self, channel: u8) -> u16 {
+        // A.7.2 ADC enable sequence code example
+        if T::regs().isr().read().adrdy() {
+            T::regs().isr().modify(|reg| reg.set_adrdy(true));
+        }
+        T::regs().cr().modify(|reg| reg.set_aden(true));
+        while !T::regs().isr().read().adrdy() {
+            // ES0233, 2.4.3 ADEN bit cannot be set immediately after the ADC calibration
+            // Workaround: When the ADC calibration is complete (ADCAL = 0), keep setting the
+            // ADEN bit until the ADRDY flag goes high.
+            T::regs().cr().modify(|reg| reg.set_aden(true));
+        }
+
+        T::regs().isr().modify(|reg| {
+            reg.set_eoc(true);
+            reg.set_eosmp(true);
+        });
+
+        // A.7.5 Single conversion sequence code example - Software trigger
+        T::regs().chselr().write(|reg| reg.set_chselx(channel as usize, true));
+        T::regs().smpr().modify(|reg| reg.set_smp(self.sample_time.into()));
+        T::regs().cr().modify(|reg| reg.set_adstart(true));
+        while !T::regs().isr().read().eoc() {
+            // spin
+        }
+        let value = T::regs().dr().read().0 as u16;
+
+        // A.7.3 ADC disable code example
+        T::regs().cr().modify(|reg| reg.set_adstp(true));
+        while T::regs().cr().read().adstp() {
+            // spin
+        }
+        T::regs().cr().modify(|reg| reg.set_addis(true));
+        while T::regs().cr().read().aden() {
+            // spin
+        }
+
+        value
+    }
+}
diff --git a/embassy-stm32/src/dma/gpdma.rs b/embassy-stm32/src/dma/gpdma.rs
index 442fee48e..6f26fd194 100644
--- a/embassy-stm32/src/dma/gpdma.rs
+++ b/embassy-stm32/src/dma/gpdma.rs
@@ -190,6 +190,10 @@ mod low_level_api {
         fence(Ordering::SeqCst);
 
         let ch = dma.ch(channel_number as _);
+
+        // Reset ch
+        ch.cr().write(|w| w.set_reset(true));
+
         ch.llr().write(|_| {}); // no linked list
         ch.tr1().write(|w| {
             w.set_sdw(data_size.into());
@@ -252,7 +256,7 @@ mod low_level_api {
     /// Gets the running status of the channel
     pub unsafe fn is_running(dma: Gpdma, ch: u8) -> bool {
         let ch = dma.ch(ch as _);
-        !ch.sr().read().idlef()
+        !ch.sr().read().tcf()
     }
 
     /// Gets the total remaining transfers for the channel
@@ -291,7 +295,10 @@ mod low_level_api {
         }
 
         if sr.suspf() || sr.tcf() {
-            ch.cr().write(|w| w.set_reset(true));
+            // disable all xxIEs to prevent the irq from firing again.
+            ch.cr().write(|_| {});
+
+            // Wake the future. It'll look at tcf and see it's set.
             STATE.channels[state_index].waker.wake();
         }
     }
diff --git a/embassy-stm32/src/eth/v2/mod.rs b/embassy-stm32/src/eth/v2/mod.rs
index fcb4a296c..d49b1f767 100644
--- a/embassy-stm32/src/eth/v2/mod.rs
+++ b/embassy-stm32/src/eth/v2/mod.rs
@@ -9,7 +9,7 @@ pub(crate) use self::descriptors::{RDes, RDesRing, TDes, TDesRing};
 use super::*;
 use crate::gpio::sealed::{AFType, Pin as _};
 use crate::gpio::{AnyPin, Speed};
-use crate::pac::{ETH, RCC, SYSCFG};
+use crate::pac::ETH;
 use crate::Peripheral;
 
 const MTU: usize = 1514; // 14 Ethernet header + 1500 IP packet
@@ -60,16 +60,33 @@ impl<'d, T: Instance, P: PHY> Ethernet<'d, T, P> {
         unsafe {
             // Enable the necessary Clocks
             // NOTE(unsafe) We have exclusive access to the registers
+            #[cfg(not(rcc_h5))]
             critical_section::with(|_| {
-                RCC.apb4enr().modify(|w| w.set_syscfgen(true));
-                RCC.ahb1enr().modify(|w| {
+                crate::pac::RCC.apb4enr().modify(|w| w.set_syscfgen(true));
+                crate::pac::RCC.ahb1enr().modify(|w| {
                     w.set_eth1macen(true);
                     w.set_eth1txen(true);
                     w.set_eth1rxen(true);
                 });
 
                 // RMII
-                SYSCFG.pmcr().modify(|w| w.set_epis(0b100));
+                crate::pac::SYSCFG.pmcr().modify(|w| w.set_epis(0b100));
+            });
+
+            #[cfg(rcc_h5)]
+            critical_section::with(|_| {
+                crate::pac::RCC.apb3enr().modify(|w| w.set_sbsen(true));
+
+                crate::pac::RCC.ahb1enr().modify(|w| {
+                    w.set_ethen(true);
+                    w.set_ethtxen(true);
+                    w.set_ethrxen(true);
+                });
+
+                // RMII
+                crate::pac::SBS
+                    .pmcr()
+                    .modify(|w| w.set_eth_sel_phy(crate::pac::sbs::vals::EthSelPhy::B_0X4));
             });
 
             config_pins!(ref_clk, mdio, mdc, crs, rx_d0, rx_d1, tx_d0, tx_d1, tx_en);
diff --git a/embassy-stm32/src/exti.rs b/embassy-stm32/src/exti.rs
index e1ce09a49..10109e56a 100644
--- a/embassy-stm32/src/exti.rs
+++ b/embassy-stm32/src/exti.rs
@@ -25,11 +25,11 @@ fn cpu_regs() -> pac::exti::Exti {
     EXTI
 }
 
-#[cfg(not(any(exti_c0, exti_g0, exti_l5, gpio_v1, exti_u5)))]
+#[cfg(not(any(exti_c0, exti_g0, exti_l5, gpio_v1, exti_u5, exti_h5, exti_h50)))]
 fn exticr_regs() -> pac::syscfg::Syscfg {
     pac::SYSCFG
 }
-#[cfg(any(exti_c0, exti_g0, exti_l5, exti_u5))]
+#[cfg(any(exti_c0, exti_g0, exti_l5, exti_u5, exti_h5, exti_h50))]
 fn exticr_regs() -> pac::exti::Exti {
     EXTI
 }
@@ -39,9 +39,9 @@ fn exticr_regs() -> pac::afio::Afio {
 }
 
 pub unsafe fn on_irq() {
-    #[cfg(not(any(exti_c0, exti_g0, exti_l5, exti_u5)))]
+    #[cfg(not(any(exti_c0, exti_g0, exti_l5, exti_u5, exti_h5, exti_h50)))]
     let bits = EXTI.pr(0).read().0;
-    #[cfg(any(exti_c0, exti_g0, exti_l5, exti_u5))]
+    #[cfg(any(exti_c0, exti_g0, exti_l5, exti_u5, exti_h5, exti_h50))]
     let bits = EXTI.rpr(0).read().0 | EXTI.fpr(0).read().0;
 
     // Mask all the channels that fired.
@@ -53,9 +53,9 @@ pub unsafe fn on_irq() {
     }
 
     // Clear pending
-    #[cfg(not(any(exti_c0, exti_g0, exti_l5, exti_u5)))]
+    #[cfg(not(any(exti_c0, exti_g0, exti_l5, exti_u5, exti_h5, exti_h50)))]
     EXTI.pr(0).write_value(Lines(bits));
-    #[cfg(any(exti_c0, exti_g0, exti_l5, exti_u5))]
+    #[cfg(any(exti_c0, exti_g0, exti_l5, exti_u5, exti_h5, exti_h50))]
     {
         EXTI.rpr(0).write_value(Lines(bits));
         EXTI.fpr(0).write_value(Lines(bits));
@@ -213,9 +213,9 @@ impl<'a> ExtiInputFuture<'a> {
             EXTI.ftsr(0).modify(|w| w.set_line(pin, falling));
 
             // clear pending bit
-            #[cfg(not(any(exti_c0, exti_g0, exti_l5, exti_u5)))]
+            #[cfg(not(any(exti_c0, exti_g0, exti_l5, exti_u5, exti_h5, exti_h50)))]
             EXTI.pr(0).write(|w| w.set_line(pin, true));
-            #[cfg(any(exti_c0, exti_g0, exti_l5, exti_u5))]
+            #[cfg(any(exti_c0, exti_g0, exti_l5, exti_u5, exti_h5, exti_h50))]
             {
                 EXTI.rpr(0).write(|w| w.set_line(pin, true));
                 EXTI.fpr(0).write(|w| w.set_line(pin, true));
@@ -364,7 +364,7 @@ pub(crate) unsafe fn init() {
 
     foreach_exti_irq!(enable_irq);
 
-    #[cfg(not(any(rcc_wb, rcc_wl5, rcc_wle, stm32f1)))]
+    #[cfg(not(any(rcc_wb, rcc_wl5, rcc_wle, stm32f1, exti_h5, exti_h50)))]
     <crate::peripherals::SYSCFG as crate::rcc::sealed::RccPeripheral>::enable();
     #[cfg(stm32f1)]
     <crate::peripherals::AFIO as crate::rcc::sealed::RccPeripheral>::enable();
diff --git a/embassy-stm32/src/flash/common.rs b/embassy-stm32/src/flash/common.rs
new file mode 100644
index 000000000..8235d6f08
--- /dev/null
+++ b/embassy-stm32/src/flash/common.rs
@@ -0,0 +1,211 @@
+use atomic_polyfill::{fence, Ordering};
+use embassy_hal_common::drop::OnDrop;
+use embassy_hal_common::{into_ref, PeripheralRef};
+
+use super::{family, Error, FlashLayout, FlashRegion, FlashSector, FLASH_BASE, FLASH_SIZE, WRITE_SIZE};
+use crate::flash::FlashBank;
+use crate::Peripheral;
+
+pub struct Flash<'d> {
+    inner: PeripheralRef<'d, crate::peripherals::FLASH>,
+}
+
+impl<'d> Flash<'d> {
+    pub fn new(p: impl Peripheral<P = crate::peripherals::FLASH> + 'd) -> Self {
+        into_ref!(p);
+        Self { inner: p }
+    }
+
+    pub fn into_regions(self) -> FlashLayout<'d> {
+        FlashLayout::new(self.release())
+    }
+
+    pub fn blocking_read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Error> {
+        blocking_read(FLASH_BASE as u32, FLASH_SIZE as u32, offset, bytes)
+    }
+
+    pub fn blocking_write(&mut self, offset: u32, bytes: &[u8]) -> Result<(), Error> {
+        unsafe { blocking_write(FLASH_BASE as u32, FLASH_SIZE as u32, offset, bytes) }
+    }
+
+    pub fn blocking_erase(&mut self, from: u32, to: u32) -> Result<(), Error> {
+        unsafe { blocking_erase(FLASH_BASE as u32, from, to) }
+    }
+
+    pub(crate) fn release(self) -> PeripheralRef<'d, crate::peripherals::FLASH> {
+        let mut flash = self;
+        unsafe { flash.inner.clone_unchecked() }
+    }
+}
+
+fn blocking_read(base: u32, size: u32, offset: u32, bytes: &mut [u8]) -> Result<(), Error> {
+    if offset + bytes.len() as u32 > size {
+        return Err(Error::Size);
+    }
+
+    let start_address = base + offset;
+    let flash_data = unsafe { core::slice::from_raw_parts(start_address as *const u8, bytes.len()) };
+    bytes.copy_from_slice(flash_data);
+    Ok(())
+}
+
+unsafe fn blocking_write(base: u32, size: u32, offset: u32, bytes: &[u8]) -> Result<(), Error> {
+    if offset + bytes.len() as u32 > size {
+        return Err(Error::Size);
+    }
+    if offset % WRITE_SIZE as u32 != 0 || bytes.len() % WRITE_SIZE != 0 {
+        return Err(Error::Unaligned);
+    }
+
+    let mut address = base + offset;
+    trace!("Writing {} bytes at 0x{:x}", bytes.len(), address);
+
+    for chunk in bytes.chunks(WRITE_SIZE) {
+        critical_section::with(|_| {
+            family::clear_all_err();
+            fence(Ordering::SeqCst);
+            family::unlock();
+            fence(Ordering::SeqCst);
+            family::begin_write();
+            fence(Ordering::SeqCst);
+
+            let _on_drop = OnDrop::new(|| {
+                family::end_write();
+                fence(Ordering::SeqCst);
+                family::lock();
+            });
+
+            family::blocking_write(address, chunk.try_into().unwrap())
+        })?;
+        address += WRITE_SIZE as u32;
+    }
+    Ok(())
+}
+
+unsafe fn blocking_erase(base: u32, from: u32, to: u32) -> Result<(), Error> {
+    let start_address = base + from;
+    let end_address = base + to;
+    let regions = family::get_flash_regions();
+
+    // Test if the address range is aligned at sector base addresses
+    let mut address = start_address;
+    while address < end_address {
+        let sector = get_sector(address, regions);
+        if sector.start != address {
+            return Err(Error::Unaligned);
+        }
+        address += sector.size;
+    }
+    if address != end_address {
+        return Err(Error::Unaligned);
+    }
+
+    trace!("Erasing from 0x{:x} to 0x{:x}", start_address, end_address);
+
+    let mut address = start_address;
+    while address < end_address {
+        let sector = get_sector(address, regions);
+        trace!("Erasing sector: {:?}", sector);
+
+        critical_section::with(|_| {
+            family::clear_all_err();
+            fence(Ordering::SeqCst);
+            family::unlock();
+            fence(Ordering::SeqCst);
+
+            let _on_drop = OnDrop::new(|| {
+                family::lock();
+            });
+
+            family::blocking_erase_sector(&sector)
+        })?;
+        address += sector.size;
+    }
+    Ok(())
+}
+
+pub(crate) fn get_sector(address: u32, regions: &[&FlashRegion]) -> FlashSector {
+    let mut current_bank = FlashBank::Bank1;
+    let mut bank_offset = 0;
+    for region in regions {
+        if region.bank != current_bank {
+            current_bank = region.bank;
+            bank_offset = 0;
+        }
+
+        if address < region.end() {
+            let index_in_region = (address - region.base) / region.erase_size;
+            return FlashSector {
+                bank: region.bank,
+                index_in_bank: bank_offset + index_in_region as u8,
+                start: region.base + index_in_region * region.erase_size,
+                size: region.erase_size,
+            };
+        }
+
+        bank_offset += region.sectors();
+    }
+
+    panic!("Flash sector not found");
+}
+
+impl FlashRegion {
+    pub fn blocking_read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Error> {
+        blocking_read(self.base, self.size, offset, bytes)
+    }
+
+    pub fn blocking_write(&mut self, offset: u32, bytes: &[u8]) -> Result<(), Error> {
+        unsafe { blocking_write(self.base, self.size, offset, bytes) }
+    }
+
+    pub fn blocking_erase(&mut self, from: u32, to: u32) -> Result<(), Error> {
+        unsafe { blocking_erase(self.base, from, to) }
+    }
+}
+
+foreach_flash_region! {
+    ($type_name:ident, $write_size:literal, $erase_size:literal) => {
+        impl crate::_generated::flash_regions::$type_name<'_> {
+            pub fn blocking_read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Error> {
+                blocking_read(self.0.base, self.0.size, offset, bytes)
+            }
+
+            pub fn blocking_write(&mut self, offset: u32, bytes: &[u8]) -> Result<(), Error> {
+                unsafe { blocking_write(self.0.base, self.0.size, offset, bytes) }
+            }
+
+            pub fn blocking_erase(&mut self, from: u32, to: u32) -> Result<(), Error> {
+                unsafe { blocking_erase(self.0.base, from, to) }
+            }
+        }
+
+        impl embedded_storage::nor_flash::ErrorType for crate::_generated::flash_regions::$type_name<'_> {
+            type Error = Error;
+        }
+
+        impl embedded_storage::nor_flash::ReadNorFlash for crate::_generated::flash_regions::$type_name<'_> {
+            const READ_SIZE: usize = 1;
+
+            fn read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Self::Error> {
+                self.blocking_read(offset, bytes)
+            }
+
+            fn capacity(&self) -> usize {
+                self.0.size as usize
+            }
+        }
+
+        impl embedded_storage::nor_flash::NorFlash for crate::_generated::flash_regions::$type_name<'_> {
+            const WRITE_SIZE: usize = $write_size;
+            const ERASE_SIZE: usize = $erase_size;
+
+            fn write(&mut self, offset: u32, bytes: &[u8]) -> Result<(), Self::Error> {
+                self.blocking_write(offset, bytes)
+            }
+
+            fn erase(&mut self, from: u32, to: u32) -> Result<(), Self::Error> {
+                self.blocking_erase(from, to)
+            }
+        }
+    };
+}
diff --git a/embassy-stm32/src/flash/f3.rs b/embassy-stm32/src/flash/f3.rs
index 1cb08ee1a..10a09c42c 100644
--- a/embassy-stm32/src/flash/f3.rs
+++ b/embassy-stm32/src/flash/f3.rs
@@ -1,9 +1,16 @@
 use core::convert::TryInto;
 use core::ptr::write_volatile;
 
+use atomic_polyfill::{fence, Ordering};
+
+use super::{FlashRegion, FlashSector, FLASH_REGIONS, WRITE_SIZE};
 use crate::flash::Error;
 use crate::pac;
 
+pub const fn get_flash_regions() -> &'static [&'static FlashRegion] {
+    &FLASH_REGIONS
+}
+
 pub(crate) unsafe fn lock() {
     pac::FLASH.cr().modify(|w| w.set_lock(true));
 }
@@ -13,58 +20,55 @@ pub(crate) unsafe fn unlock() {
     pac::FLASH.keyr().write(|w| w.set_fkeyr(0xCDEF_89AB));
 }
 
-pub(crate) unsafe fn blocking_write(offset: u32, buf: &[u8]) -> Result<(), Error> {
+pub(crate) unsafe fn begin_write() {
+    assert_eq!(0, WRITE_SIZE % 2);
+
     pac::FLASH.cr().write(|w| w.set_pg(true));
-
-    let ret = {
-        let mut ret: Result<(), Error> = Ok(());
-        let mut offset = offset;
-        for chunk in buf.chunks(2) {
-            write_volatile(offset as *mut u16, u16::from_le_bytes(chunk[0..2].try_into().unwrap()));
-            offset += chunk.len() as u32;
-
-            ret = blocking_wait_ready();
-            if ret.is_err() {
-                break;
-            }
-        }
-        ret
-    };
-
-    pac::FLASH.cr().write(|w| w.set_pg(false));
-
-    ret
 }
 
-pub(crate) unsafe fn blocking_erase(from: u32, to: u32) -> Result<(), Error> {
-    for page in (from..to).step_by(super::ERASE_SIZE) {
-        pac::FLASH.cr().modify(|w| {
-            w.set_per(true);
-        });
+pub(crate) unsafe fn end_write() {
+    pac::FLASH.cr().write(|w| w.set_pg(false));
+}
 
-        pac::FLASH.ar().write(|w| w.set_far(page));
+pub(crate) unsafe fn blocking_write(start_address: u32, buf: &[u8; WRITE_SIZE]) -> Result<(), Error> {
+    let mut address = start_address;
+    for chunk in buf.chunks(2) {
+        write_volatile(address as *mut u16, u16::from_le_bytes(chunk.try_into().unwrap()));
+        address += chunk.len() as u32;
 
-        pac::FLASH.cr().modify(|w| {
-            w.set_strt(true);
-        });
-
-        let mut ret: Result<(), Error> = blocking_wait_ready();
-
-        if !pac::FLASH.sr().read().eop() {
-            trace!("FLASH: EOP not set");
-            ret = Err(Error::Prog);
-        } else {
-            pac::FLASH.sr().write(|w| w.set_eop(true));
-        }
-
-        pac::FLASH.cr().modify(|w| w.set_per(false));
-
-        clear_all_err();
-        if ret.is_err() {
-            return ret;
-        }
+        // prevents parallelism errors
+        fence(Ordering::SeqCst);
     }
 
+    blocking_wait_ready()
+}
+
+pub(crate) unsafe fn blocking_erase_sector(sector: &FlashSector) -> Result<(), Error> {
+    pac::FLASH.cr().modify(|w| {
+        w.set_per(true);
+    });
+
+    pac::FLASH.ar().write(|w| w.set_far(sector.start));
+
+    pac::FLASH.cr().modify(|w| {
+        w.set_strt(true);
+    });
+
+    let mut ret: Result<(), Error> = blocking_wait_ready();
+
+    if !pac::FLASH.sr().read().eop() {
+        trace!("FLASH: EOP not set");
+        ret = Err(Error::Prog);
+    } else {
+        pac::FLASH.sr().write(|w| w.set_eop(true));
+    }
+
+    pac::FLASH.cr().modify(|w| w.set_per(false));
+
+    clear_all_err();
+    if ret.is_err() {
+        return ret;
+    }
     Ok(())
 }
 
@@ -82,7 +86,7 @@ pub(crate) unsafe fn clear_all_err() {
     });
 }
 
-pub(crate) unsafe fn blocking_wait_ready() -> Result<(), Error> {
+unsafe fn blocking_wait_ready() -> Result<(), Error> {
     loop {
         let sr = pac::FLASH.sr().read();
 
diff --git a/embassy-stm32/src/flash/f4.rs b/embassy-stm32/src/flash/f4.rs
index 9e23a8adf..2ce9df69f 100644
--- a/embassy-stm32/src/flash/f4.rs
+++ b/embassy-stm32/src/flash/f4.rs
@@ -2,27 +2,108 @@ use core::convert::TryInto;
 use core::ptr::write_volatile;
 use core::sync::atomic::{fence, Ordering};
 
-use super::{ERASE_SIZE, FLASH_BASE, FLASH_SIZE};
+use super::{FlashRegion, FlashSector, FLASH_REGIONS, WRITE_SIZE};
 use crate::flash::Error;
 use crate::pac;
 
-const SECOND_BANK_SECTOR_START: u32 = 12;
+#[cfg(any(stm32f427, stm32f429, stm32f437, stm32f439, stm32f469, stm32f479))]
+mod alt_regions {
+    use embassy_hal_common::PeripheralRef;
+    use stm32_metapac::FLASH_SIZE;
 
-unsafe fn is_dual_bank() -> bool {
-    match FLASH_SIZE / 1024 {
-        // 1 MB devices depend on configuration
-        1024 => {
-            if cfg!(any(stm32f427, stm32f429, stm32f437, stm32f439, stm32f469, stm32f479)) {
-                pac::FLASH.optcr().read().db1m()
-            } else {
-                false
+    use crate::_generated::flash_regions::{BANK1_REGION1, BANK1_REGION2, BANK1_REGION3};
+    use crate::flash::{Bank1Region1, Bank1Region2, Flash, FlashBank, FlashRegion};
+    use crate::peripherals::FLASH;
+
+    pub const ALT_BANK1_REGION3: FlashRegion = FlashRegion {
+        size: 3 * BANK1_REGION3.erase_size,
+        ..BANK1_REGION3
+    };
+    pub const ALT_BANK2_REGION1: FlashRegion = FlashRegion {
+        bank: FlashBank::Bank2,
+        base: BANK1_REGION1.base + FLASH_SIZE as u32 / 2,
+        ..BANK1_REGION1
+    };
+    pub const ALT_BANK2_REGION2: FlashRegion = FlashRegion {
+        bank: FlashBank::Bank2,
+        base: BANK1_REGION2.base + FLASH_SIZE as u32 / 2,
+        ..BANK1_REGION2
+    };
+    pub const ALT_BANK2_REGION3: FlashRegion = FlashRegion {
+        bank: FlashBank::Bank2,
+        base: BANK1_REGION3.base + FLASH_SIZE as u32 / 2,
+        size: 3 * BANK1_REGION3.erase_size,
+        ..BANK1_REGION3
+    };
+
+    pub const ALT_FLASH_REGIONS: [&FlashRegion; 6] = [
+        &BANK1_REGION1,
+        &BANK1_REGION2,
+        &ALT_BANK1_REGION3,
+        &ALT_BANK2_REGION1,
+        &ALT_BANK2_REGION2,
+        &ALT_BANK2_REGION3,
+    ];
+
+    pub type AltBank1Region1<'d> = Bank1Region1<'d>;
+    pub type AltBank1Region2<'d> = Bank1Region2<'d>;
+    pub struct AltBank1Region3<'d>(pub &'static FlashRegion, PeripheralRef<'d, FLASH>);
+    pub struct AltBank2Region1<'d>(pub &'static FlashRegion, PeripheralRef<'d, FLASH>);
+    pub struct AltBank2Region2<'d>(pub &'static FlashRegion, PeripheralRef<'d, FLASH>);
+    pub struct AltBank2Region3<'d>(pub &'static FlashRegion, PeripheralRef<'d, FLASH>);
+
+    pub struct AltFlashLayout<'d> {
+        pub bank1_region1: AltBank1Region1<'d>,
+        pub bank1_region2: AltBank1Region2<'d>,
+        pub bank1_region3: AltBank1Region3<'d>,
+        pub bank2_region1: AltBank2Region1<'d>,
+        pub bank2_region2: AltBank2Region2<'d>,
+        pub bank2_region3: AltBank2Region3<'d>,
+    }
+
+    impl<'d> Flash<'d> {
+        pub fn into_alt_regions(self) -> AltFlashLayout<'d> {
+            unsafe { crate::pac::FLASH.optcr().modify(|r| r.set_db1m(true)) };
+
+            // SAFETY: We never expose the cloned peripheral references, and their instance is not public.
+            // Also, all flash region operations are protected with a cs.
+            let mut p = self.release();
+            AltFlashLayout {
+                bank1_region1: Bank1Region1(&BANK1_REGION1, unsafe { p.clone_unchecked() }),
+                bank1_region2: Bank1Region2(&BANK1_REGION2, unsafe { p.clone_unchecked() }),
+                bank1_region3: AltBank1Region3(&ALT_BANK1_REGION3, unsafe { p.clone_unchecked() }),
+                bank2_region1: AltBank2Region1(&ALT_BANK2_REGION1, unsafe { p.clone_unchecked() }),
+                bank2_region2: AltBank2Region2(&ALT_BANK2_REGION2, unsafe { p.clone_unchecked() }),
+                bank2_region3: AltBank2Region3(&ALT_BANK2_REGION3, unsafe { p.clone_unchecked() }),
             }
         }
-        // 2 MB devices are always dual bank
-        2048 => true,
-        // All other devices are single bank
-        _ => false,
     }
+
+    impl Drop for AltFlashLayout<'_> {
+        fn drop(&mut self) {
+            unsafe {
+                super::lock();
+                crate::pac::FLASH.optcr().modify(|r| r.set_db1m(false))
+            };
+        }
+    }
+}
+
+#[cfg(any(stm32f427, stm32f429, stm32f437, stm32f439, stm32f469, stm32f479))]
+pub use alt_regions::{AltFlashLayout, ALT_FLASH_REGIONS};
+
+#[cfg(any(stm32f427, stm32f429, stm32f437, stm32f439, stm32f469, stm32f479))]
+pub fn get_flash_regions() -> &'static [&'static FlashRegion] {
+    if unsafe { pac::FLASH.optcr().read().db1m() } {
+        &ALT_FLASH_REGIONS
+    } else {
+        &FLASH_REGIONS
+    }
+}
+
+#[cfg(not(any(stm32f427, stm32f429, stm32f437, stm32f439, stm32f469, stm32f479)))]
+pub const fn get_flash_regions() -> &'static [&'static FlashRegion] {
+    &FLASH_REGIONS
 }
 
 pub(crate) unsafe fn lock() {
@@ -34,93 +115,34 @@ pub(crate) unsafe fn unlock() {
     pac::FLASH.keyr().write(|w| w.set_key(0xCDEF_89AB));
 }
 
-pub(crate) unsafe fn blocking_write(offset: u32, buf: &[u8]) -> Result<(), Error> {
+pub(crate) unsafe fn begin_write() {
+    assert_eq!(0, WRITE_SIZE % 4);
+
     pac::FLASH.cr().write(|w| {
         w.set_pg(true);
         w.set_psize(pac::flash::vals::Psize::PSIZE32);
     });
+}
 
-    let ret = {
-        let mut ret: Result<(), Error> = Ok(());
-        let mut offset = offset;
-        for chunk in buf.chunks(super::WRITE_SIZE) {
-            for val in chunk.chunks(4) {
-                write_volatile(offset as *mut u32, u32::from_le_bytes(val[0..4].try_into().unwrap()));
-                offset += val.len() as u32;
-
-                // prevents parallelism errors
-                fence(Ordering::SeqCst);
-            }
-
-            ret = blocking_wait_ready();
-            if ret.is_err() {
-                break;
-            }
-        }
-        ret
-    };
-
+pub(crate) unsafe fn end_write() {
     pac::FLASH.cr().write(|w| w.set_pg(false));
-
-    ret
 }
 
-struct FlashSector {
-    index: u8,
-    size: u32,
-}
+pub(crate) unsafe fn blocking_write(start_address: u32, buf: &[u8; WRITE_SIZE]) -> Result<(), Error> {
+    let mut address = start_address;
+    for val in buf.chunks(4) {
+        write_volatile(address as *mut u32, u32::from_le_bytes(val.try_into().unwrap()));
+        address += val.len() as u32;
 
-fn get_sector(addr: u32, dual_bank: bool) -> FlashSector {
-    let offset = addr - FLASH_BASE as u32;
-
-    let bank_size = match dual_bank {
-        true => FLASH_SIZE / 2,
-        false => FLASH_SIZE,
-    } as u32;
-
-    let bank = offset / bank_size;
-    let offset_in_bank = offset % bank_size;
-
-    let index_in_bank = if offset_in_bank >= ERASE_SIZE as u32 / 2 {
-        4 + offset_in_bank / ERASE_SIZE as u32
-    } else {
-        offset_in_bank / (ERASE_SIZE as u32 / 8)
-    };
-
-    // First 4 sectors are 16KB, then one 64KB, and rest are 128KB
-    let size = match index_in_bank {
-        0..=3 => 16 * 1024,
-        4 => 64 * 1024,
-        _ => 128 * 1024,
-    };
-
-    let index = if bank == 1 {
-        SECOND_BANK_SECTOR_START + index_in_bank
-    } else {
-        index_in_bank
-    } as u8;
-
-    FlashSector { index, size }
-}
-
-pub(crate) unsafe fn blocking_erase(from: u32, to: u32) -> Result<(), Error> {
-    let mut addr = from;
-    let dual_bank = is_dual_bank();
-
-    while addr < to {
-        let sector = get_sector(addr, dual_bank);
-        erase_sector(sector.index)?;
-        addr += sector.size;
+        // prevents parallelism errors
+        fence(Ordering::SeqCst);
     }
 
-    Ok(())
+    blocking_wait_ready()
 }
 
-unsafe fn erase_sector(sector: u8) -> Result<(), Error> {
-    let bank = sector / SECOND_BANK_SECTOR_START as u8;
-    let snb = (bank << 4) + (sector % SECOND_BANK_SECTOR_START as u8);
-
-    trace!("Erasing sector: {}", sector);
+pub(crate) unsafe fn blocking_erase_sector(sector: &FlashSector) -> Result<(), Error> {
+    let snb = ((sector.bank as u8) << 4) + sector.index_in_bank;
 
     pac::FLASH.cr().modify(|w| {
         w.set_ser(true);
@@ -148,7 +170,7 @@ pub(crate) unsafe fn clear_all_err() {
     });
 }
 
-pub(crate) unsafe fn blocking_wait_ready() -> Result<(), Error> {
+unsafe fn blocking_wait_ready() -> Result<(), Error> {
     loop {
         let sr = pac::FLASH.sr().read();
 
@@ -173,3 +195,80 @@ pub(crate) unsafe fn blocking_wait_ready() -> Result<(), Error> {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::flash::{get_sector, FlashBank};
+
+    #[test]
+    #[cfg(stm32f429)]
+    fn can_get_sector_single_bank() {
+        const SMALL_SECTOR_SIZE: u32 = 16 * 1024;
+        const MEDIUM_SECTOR_SIZE: u32 = 64 * 1024;
+        const LARGE_SECTOR_SIZE: u32 = 128 * 1024;
+
+        let assert_sector = |index_in_bank: u8, start: u32, size: u32, address: u32| {
+            assert_eq!(
+                FlashSector {
+                    bank: FlashBank::Bank1,
+                    index_in_bank,
+                    start,
+                    size
+                },
+                get_sector(address, &FLASH_REGIONS)
+            )
+        };
+
+        assert_sector(0, 0x0800_0000, SMALL_SECTOR_SIZE, 0x0800_0000);
+        assert_sector(0, 0x0800_0000, SMALL_SECTOR_SIZE, 0x0800_3FFF);
+        assert_sector(3, 0x0800_C000, SMALL_SECTOR_SIZE, 0x0800_C000);
+        assert_sector(3, 0x0800_C000, SMALL_SECTOR_SIZE, 0x0800_FFFF);
+
+        assert_sector(4, 0x0801_0000, MEDIUM_SECTOR_SIZE, 0x0801_0000);
+        assert_sector(4, 0x0801_0000, MEDIUM_SECTOR_SIZE, 0x0801_FFFF);
+
+        assert_sector(5, 0x0802_0000, LARGE_SECTOR_SIZE, 0x0802_0000);
+        assert_sector(5, 0x0802_0000, LARGE_SECTOR_SIZE, 0x0803_FFFF);
+        assert_sector(11, 0x080E_0000, LARGE_SECTOR_SIZE, 0x080E_0000);
+        assert_sector(11, 0x080E_0000, LARGE_SECTOR_SIZE, 0x080F_FFFF);
+
+        let assert_sector = |bank: FlashBank, index_in_bank: u8, start: u32, size: u32, address: u32| {
+            assert_eq!(
+                FlashSector {
+                    bank,
+                    index_in_bank,
+                    start,
+                    size
+                },
+                get_sector(address, &ALT_FLASH_REGIONS)
+            )
+        };
+
+        assert_sector(FlashBank::Bank1, 0, 0x0800_0000, SMALL_SECTOR_SIZE, 0x0800_0000);
+        assert_sector(FlashBank::Bank1, 0, 0x0800_0000, SMALL_SECTOR_SIZE, 0x0800_3FFF);
+        assert_sector(FlashBank::Bank1, 3, 0x0800_C000, SMALL_SECTOR_SIZE, 0x0800_C000);
+        assert_sector(FlashBank::Bank1, 3, 0x0800_C000, SMALL_SECTOR_SIZE, 0x0800_FFFF);
+
+        assert_sector(FlashBank::Bank1, 4, 0x0801_0000, MEDIUM_SECTOR_SIZE, 0x0801_0000);
+        assert_sector(FlashBank::Bank1, 4, 0x0801_0000, MEDIUM_SECTOR_SIZE, 0x0801_FFFF);
+
+        assert_sector(FlashBank::Bank1, 5, 0x0802_0000, LARGE_SECTOR_SIZE, 0x0802_0000);
+        assert_sector(FlashBank::Bank1, 5, 0x0802_0000, LARGE_SECTOR_SIZE, 0x0803_FFFF);
+        assert_sector(FlashBank::Bank1, 7, 0x0806_0000, LARGE_SECTOR_SIZE, 0x0806_0000);
+        assert_sector(FlashBank::Bank1, 7, 0x0806_0000, LARGE_SECTOR_SIZE, 0x0807_FFFF);
+
+        assert_sector(FlashBank::Bank2, 0, 0x0808_0000, SMALL_SECTOR_SIZE, 0x0808_0000);
+        assert_sector(FlashBank::Bank2, 0, 0x0808_0000, SMALL_SECTOR_SIZE, 0x0808_3FFF);
+        assert_sector(FlashBank::Bank2, 3, 0x0808_C000, SMALL_SECTOR_SIZE, 0x0808_C000);
+        assert_sector(FlashBank::Bank2, 3, 0x0808_C000, SMALL_SECTOR_SIZE, 0x0808_FFFF);
+
+        assert_sector(FlashBank::Bank2, 4, 0x0809_0000, MEDIUM_SECTOR_SIZE, 0x0809_0000);
+        assert_sector(FlashBank::Bank2, 4, 0x0809_0000, MEDIUM_SECTOR_SIZE, 0x0809_FFFF);
+
+        assert_sector(FlashBank::Bank2, 5, 0x080A_0000, LARGE_SECTOR_SIZE, 0x080A_0000);
+        assert_sector(FlashBank::Bank2, 5, 0x080A_0000, LARGE_SECTOR_SIZE, 0x080B_FFFF);
+        assert_sector(FlashBank::Bank2, 7, 0x080E_0000, LARGE_SECTOR_SIZE, 0x080E_0000);
+        assert_sector(FlashBank::Bank2, 7, 0x080E_0000, LARGE_SECTOR_SIZE, 0x080F_FFFF);
+    }
+}
diff --git a/embassy-stm32/src/flash/f7.rs b/embassy-stm32/src/flash/f7.rs
index dd0d8439d..6427d5a09 100644
--- a/embassy-stm32/src/flash/f7.rs
+++ b/embassy-stm32/src/flash/f7.rs
@@ -2,9 +2,14 @@ use core::convert::TryInto;
 use core::ptr::write_volatile;
 use core::sync::atomic::{fence, Ordering};
 
+use super::{FlashRegion, FlashSector, FLASH_REGIONS, WRITE_SIZE};
 use crate::flash::Error;
 use crate::pac;
 
+pub const fn get_flash_regions() -> &'static [&'static FlashRegion] {
+    &FLASH_REGIONS
+}
+
 pub(crate) unsafe fn lock() {
     pac::FLASH.cr().modify(|w| w.set_lock(true));
 }
@@ -14,64 +19,36 @@ pub(crate) unsafe fn unlock() {
     pac::FLASH.keyr().write(|w| w.set_key(0xCDEF_89AB));
 }
 
-pub(crate) unsafe fn blocking_write(offset: u32, buf: &[u8]) -> Result<(), Error> {
+pub(crate) unsafe fn begin_write() {
+    assert_eq!(0, WRITE_SIZE % 4);
+
     pac::FLASH.cr().write(|w| {
         w.set_pg(true);
         w.set_psize(pac::flash::vals::Psize::PSIZE32);
     });
-
-    let ret = {
-        let mut ret: Result<(), Error> = Ok(());
-        let mut offset = offset;
-        for chunk in buf.chunks(super::WRITE_SIZE) {
-            for val in chunk.chunks(4) {
-                write_volatile(offset as *mut u32, u32::from_le_bytes(val[0..4].try_into().unwrap()));
-                offset += val.len() as u32;
-
-                // prevents parallelism errors
-                fence(Ordering::SeqCst);
-            }
-
-            ret = blocking_wait_ready();
-            if ret.is_err() {
-                break;
-            }
-        }
-        ret
-    };
-
-    pac::FLASH.cr().write(|w| w.set_pg(false));
-
-    ret
 }
 
-pub(crate) unsafe fn blocking_erase(from: u32, to: u32) -> Result<(), Error> {
-    let start_sector = if from >= (super::FLASH_BASE + super::ERASE_SIZE / 2) as u32 {
-        4 + (from - super::FLASH_BASE as u32) / super::ERASE_SIZE as u32
-    } else {
-        (from - super::FLASH_BASE as u32) / (super::ERASE_SIZE as u32 / 8)
-    };
+pub(crate) unsafe fn end_write() {
+    pac::FLASH.cr().write(|w| w.set_pg(false));
+}
 
-    let end_sector = if to >= (super::FLASH_BASE + super::ERASE_SIZE / 2) as u32 {
-        4 + (to - super::FLASH_BASE as u32) / super::ERASE_SIZE as u32
-    } else {
-        (to - super::FLASH_BASE as u32) / (super::ERASE_SIZE as u32 / 8)
-    };
+pub(crate) unsafe fn blocking_write(start_address: u32, buf: &[u8; WRITE_SIZE]) -> Result<(), Error> {
+    let mut address = start_address;
+    for val in buf.chunks(4) {
+        write_volatile(address as *mut u32, u32::from_le_bytes(val.try_into().unwrap()));
+        address += val.len() as u32;
 
-    for sector in start_sector..end_sector {
-        let ret = erase_sector(sector as u8);
-        if ret.is_err() {
-            return ret;
-        }
+        // prevents parallelism errors
+        fence(Ordering::SeqCst);
     }
 
-    Ok(())
+    blocking_wait_ready()
 }
 
-unsafe fn erase_sector(sector: u8) -> Result<(), Error> {
+pub(crate) unsafe fn blocking_erase_sector(sector: &FlashSector) -> Result<(), Error> {
     pac::FLASH.cr().modify(|w| {
         w.set_ser(true);
-        w.set_snb(sector)
+        w.set_snb(sector.index_in_bank)
     });
 
     pac::FLASH.cr().modify(|w| {
@@ -107,7 +84,7 @@ pub(crate) unsafe fn clear_all_err() {
     });
 }
 
-pub(crate) unsafe fn blocking_wait_ready() -> Result<(), Error> {
+unsafe fn blocking_wait_ready() -> Result<(), Error> {
     loop {
         let sr = pac::FLASH.sr().read();
 
@@ -132,3 +109,75 @@ pub(crate) unsafe fn blocking_wait_ready() -> Result<(), Error> {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::flash::{get_sector, FlashBank};
+
+    #[test]
+    #[cfg(stm32f732)]
+    fn can_get_sector() {
+        const SMALL_SECTOR_SIZE: u32 = 16 * 1024;
+        const MEDIUM_SECTOR_SIZE: u32 = 64 * 1024;
+        const LARGE_SECTOR_SIZE: u32 = 128 * 1024;
+
+        let assert_sector = |index_in_bank: u8, start: u32, size: u32, address: u32| {
+            assert_eq!(
+                FlashSector {
+                    bank: FlashBank::Bank1,
+                    index_in_bank,
+                    start,
+                    size
+                },
+                get_sector(address, &FLASH_REGIONS)
+            )
+        };
+
+        assert_sector(0, 0x0800_0000, SMALL_SECTOR_SIZE, 0x0800_0000);
+        assert_sector(0, 0x0800_0000, SMALL_SECTOR_SIZE, 0x0800_3FFF);
+        assert_sector(3, 0x0800_C000, SMALL_SECTOR_SIZE, 0x0800_C000);
+        assert_sector(3, 0x0800_C000, SMALL_SECTOR_SIZE, 0x0800_FFFF);
+
+        assert_sector(4, 0x0801_0000, MEDIUM_SECTOR_SIZE, 0x0801_0000);
+        assert_sector(4, 0x0801_0000, MEDIUM_SECTOR_SIZE, 0x0801_FFFF);
+
+        assert_sector(5, 0x0802_0000, LARGE_SECTOR_SIZE, 0x0802_0000);
+        assert_sector(5, 0x0802_0000, LARGE_SECTOR_SIZE, 0x0803_FFFF);
+        assert_sector(7, 0x0806_0000, LARGE_SECTOR_SIZE, 0x0806_0000);
+        assert_sector(7, 0x0806_0000, LARGE_SECTOR_SIZE, 0x0807_FFFF);
+    }
+
+    #[test]
+    #[cfg(stm32f769)]
+    fn can_get_sector() {
+        const SMALL_SECTOR_SIZE: u32 = 32 * 1024;
+        const MEDIUM_SECTOR_SIZE: u32 = 128 * 1024;
+        const LARGE_SECTOR_SIZE: u32 = 256 * 1024;
+
+        let assert_sector = |index_in_bank: u8, start: u32, size: u32, address: u32| {
+            assert_eq!(
+                FlashSector {
+                    bank: FlashBank::Bank1,
+                    index_in_bank,
+                    start,
+                    size
+                },
+                get_sector(address, &FLASH_REGIONS)
+            )
+        };
+
+        assert_sector(0, 0x0800_0000, SMALL_SECTOR_SIZE, 0x0800_0000);
+        assert_sector(0, 0x0800_0000, SMALL_SECTOR_SIZE, 0x0800_7FFF);
+        assert_sector(3, 0x0801_8000, SMALL_SECTOR_SIZE, 0x0801_8000);
+        assert_sector(3, 0x0801_8000, SMALL_SECTOR_SIZE, 0x0801_FFFF);
+
+        assert_sector(4, 0x0802_0000, MEDIUM_SECTOR_SIZE, 0x0802_0000);
+        assert_sector(4, 0x0802_0000, MEDIUM_SECTOR_SIZE, 0x0803_FFFF);
+
+        assert_sector(5, 0x0804_0000, LARGE_SECTOR_SIZE, 0x0804_0000);
+        assert_sector(5, 0x0804_0000, LARGE_SECTOR_SIZE, 0x0807_FFFF);
+        assert_sector(7, 0x080C_0000, LARGE_SECTOR_SIZE, 0x080C_0000);
+        assert_sector(7, 0x080C_0000, LARGE_SECTOR_SIZE, 0x080F_FFFF);
+    }
+}
diff --git a/embassy-stm32/src/flash/h7.rs b/embassy-stm32/src/flash/h7.rs
index 7de95ac11..4f38d50c0 100644
--- a/embassy-stm32/src/flash/h7.rs
+++ b/embassy-stm32/src/flash/h7.rs
@@ -1,13 +1,18 @@
 use core::convert::TryInto;
 use core::ptr::write_volatile;
 
+use atomic_polyfill::{fence, Ordering};
+
+use super::{FlashRegion, FlashSector, BANK1_REGION, FLASH_REGIONS, WRITE_SIZE};
 use crate::flash::Error;
 use crate::pac;
 
-const SECOND_BANK_OFFSET: usize = 0x0010_0000;
-
 const fn is_dual_bank() -> bool {
-    super::FLASH_SIZE / 2 > super::ERASE_SIZE
+    FLASH_REGIONS.len() == 2
+}
+
+pub fn get_flash_regions() -> &'static [&'static FlashRegion] {
+    &FLASH_REGIONS
 }
 
 pub(crate) unsafe fn lock() {
@@ -20,90 +25,64 @@ pub(crate) unsafe fn lock() {
 pub(crate) unsafe fn unlock() {
     pac::FLASH.bank(0).keyr().write(|w| w.set_keyr(0x4567_0123));
     pac::FLASH.bank(0).keyr().write(|w| w.set_keyr(0xCDEF_89AB));
-
     if is_dual_bank() {
         pac::FLASH.bank(1).keyr().write(|w| w.set_keyr(0x4567_0123));
         pac::FLASH.bank(1).keyr().write(|w| w.set_keyr(0xCDEF_89AB));
     }
 }
 
-pub(crate) unsafe fn blocking_write(offset: u32, buf: &[u8]) -> Result<(), Error> {
-    let bank = if !is_dual_bank() || (offset - super::FLASH_BASE as u32) < SECOND_BANK_OFFSET as u32 {
+pub(crate) unsafe fn begin_write() {
+    assert_eq!(0, WRITE_SIZE % 4);
+}
+
+pub(crate) unsafe fn end_write() {}
+
+pub(crate) unsafe fn blocking_write(start_address: u32, buf: &[u8; WRITE_SIZE]) -> Result<(), Error> {
+    // We cannot have the write setup sequence in begin_write as it depends on the address
+    let bank = if start_address < BANK1_REGION.end() {
         pac::FLASH.bank(0)
     } else {
         pac::FLASH.bank(1)
     };
-
     bank.cr().write(|w| {
         w.set_pg(true);
         w.set_psize(2); // 32 bits at once
     });
-
     cortex_m::asm::isb();
     cortex_m::asm::dsb();
-    core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst);
+    fence(Ordering::SeqCst);
 
-    let ret = {
-        let mut ret: Result<(), Error> = Ok(());
-        let mut offset = offset;
-        'outer: for chunk in buf.chunks(super::WRITE_SIZE) {
-            for val in chunk.chunks(4) {
-                trace!("Writing at {:x}", offset);
-                write_volatile(offset as *mut u32, u32::from_le_bytes(val[0..4].try_into().unwrap()));
-                offset += val.len() as u32;
+    let mut res = None;
+    let mut address = start_address;
+    for val in buf.chunks(4) {
+        write_volatile(address as *mut u32, u32::from_le_bytes(val.try_into().unwrap()));
+        address += val.len() as u32;
 
-                ret = blocking_wait_ready(bank);
-                bank.sr().modify(|w| {
-                    if w.eop() {
-                        w.set_eop(true);
-                    }
-                });
-                if ret.is_err() {
-                    break 'outer;
-                }
+        res = Some(blocking_wait_ready(bank));
+        bank.sr().modify(|w| {
+            if w.eop() {
+                w.set_eop(true);
             }
+        });
+        if res.unwrap().is_err() {
+            break;
         }
-        ret
-    };
+    }
 
     bank.cr().write(|w| w.set_pg(false));
 
     cortex_m::asm::isb();
     cortex_m::asm::dsb();
-    core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst);
+    fence(Ordering::SeqCst);
 
-    ret
+    res.unwrap()
 }
 
-pub(crate) unsafe fn blocking_erase(from: u32, to: u32) -> Result<(), Error> {
-    let from = from - super::FLASH_BASE as u32;
-    let to = to - super::FLASH_BASE as u32;
-
-    let (start, end) = if to <= super::FLASH_SIZE as u32 {
-        let start_sector = from / super::ERASE_SIZE as u32;
-        let end_sector = to / super::ERASE_SIZE as u32;
-        (start_sector, end_sector)
-    } else {
-        error!("Attempting to write outside of defined sectors {:x} {:x}", from, to);
-        return Err(Error::Unaligned);
-    };
-
-    trace!("Erasing sectors from {} to {}", start, end);
-    for sector in start..end {
-        let bank = if sector >= 8 { 1 } else { 0 };
-        let ret = erase_sector(pac::FLASH.bank(bank), (sector % 8) as u8);
-        if ret.is_err() {
-            return ret;
-        }
-    }
-
-    Ok(())
-}
-
-unsafe fn erase_sector(bank: pac::flash::Bank, sector: u8) -> Result<(), Error> {
+pub(crate) unsafe fn blocking_erase_sector(sector: &FlashSector) -> Result<(), Error> {
+    let bank = pac::FLASH.bank(sector.bank as usize);
     bank.cr().modify(|w| {
         w.set_ser(true);
-        w.set_snb(sector)
+        w.set_snb(sector.index_in_bank)
     });
 
     bank.cr().modify(|w| {
@@ -160,7 +139,7 @@ unsafe fn bank_clear_all_err(bank: pac::flash::Bank) {
     });
 }
 
-pub(crate) unsafe fn blocking_wait_ready(bank: pac::flash::Bank) -> Result<(), Error> {
+unsafe fn blocking_wait_ready(bank: pac::flash::Bank) -> Result<(), Error> {
     loop {
         let sr = bank.sr().read();
 
diff --git a/embassy-stm32/src/flash/l.rs b/embassy-stm32/src/flash/l.rs
index 5048a3314..7d9cc6ea3 100644
--- a/embassy-stm32/src/flash/l.rs
+++ b/embassy-stm32/src/flash/l.rs
@@ -1,9 +1,15 @@
-use core::convert::TryInto;
 use core::ptr::write_volatile;
 
+use atomic_polyfill::{fence, Ordering};
+
+use super::{FlashRegion, FlashSector, FLASH_REGIONS, WRITE_SIZE};
 use crate::flash::Error;
 use crate::pac;
 
+pub const fn get_flash_regions() -> &'static [&'static FlashRegion] {
+    &FLASH_REGIONS
+}
+
 pub(crate) unsafe fn lock() {
     #[cfg(any(flash_wl, flash_wb, flash_l4))]
     pac::FLASH.cr().modify(|w| w.set_lock(true));
@@ -33,82 +39,75 @@ pub(crate) unsafe fn unlock() {
     }
 }
 
-pub(crate) unsafe fn blocking_write(offset: u32, buf: &[u8]) -> Result<(), Error> {
+pub(crate) unsafe fn begin_write() {
+    assert_eq!(0, WRITE_SIZE % 4);
+
     #[cfg(any(flash_wl, flash_wb, flash_l4))]
     pac::FLASH.cr().write(|w| w.set_pg(true));
-
-    let ret = {
-        let mut ret: Result<(), Error> = Ok(());
-        let mut offset = offset;
-        for chunk in buf.chunks(super::WRITE_SIZE) {
-            for val in chunk.chunks(4) {
-                write_volatile(offset as *mut u32, u32::from_le_bytes(val[0..4].try_into().unwrap()));
-                offset += val.len() as u32;
-            }
-
-            ret = blocking_wait_ready();
-            if ret.is_err() {
-                break;
-            }
-        }
-        ret
-    };
-
-    #[cfg(any(flash_wl, flash_wb, flash_l4))]
-    pac::FLASH.cr().write(|w| w.set_pg(false));
-
-    ret
 }
 
-pub(crate) unsafe fn blocking_erase(from: u32, to: u32) -> Result<(), Error> {
-    for page in (from..to).step_by(super::ERASE_SIZE) {
-        #[cfg(any(flash_l0, flash_l1))]
-        {
-            pac::FLASH.pecr().modify(|w| {
-                w.set_erase(true);
-                w.set_prog(true);
-            });
+pub(crate) unsafe fn end_write() {
+    #[cfg(any(flash_wl, flash_wb, flash_l4))]
+    pac::FLASH.cr().write(|w| w.set_pg(false));
+}
 
-            write_volatile(page as *mut u32, 0xFFFFFFFF);
-        }
+pub(crate) unsafe fn blocking_write(start_address: u32, buf: &[u8; WRITE_SIZE]) -> Result<(), Error> {
+    let mut address = start_address;
+    for val in buf.chunks(4) {
+        write_volatile(address as *mut u32, u32::from_le_bytes(val.try_into().unwrap()));
+        address += val.len() as u32;
 
-        #[cfg(any(flash_wl, flash_wb, flash_l4))]
-        {
-            let idx = (page - super::FLASH_BASE as u32) / super::ERASE_SIZE as u32;
-
-            #[cfg(flash_l4)]
-            let (idx, bank) = if idx > 255 { (idx - 256, true) } else { (idx, false) };
-
-            pac::FLASH.cr().modify(|w| {
-                w.set_per(true);
-                w.set_pnb(idx as u8);
-                #[cfg(any(flash_wl, flash_wb))]
-                w.set_strt(true);
-                #[cfg(any(flash_l4))]
-                w.set_start(true);
-                #[cfg(any(flash_l4))]
-                w.set_bker(bank);
-            });
-        }
-
-        let ret: Result<(), Error> = blocking_wait_ready();
-
-        #[cfg(any(flash_wl, flash_wb, flash_l4))]
-        pac::FLASH.cr().modify(|w| w.set_per(false));
-
-        #[cfg(any(flash_l0, flash_l1))]
-        pac::FLASH.pecr().modify(|w| {
-            w.set_erase(false);
-            w.set_prog(false);
-        });
-
-        clear_all_err();
-        if ret.is_err() {
-            return ret;
-        }
+        // prevents parallelism errors
+        fence(Ordering::SeqCst);
     }
 
-    Ok(())
+    blocking_wait_ready()
+}
+
+pub(crate) unsafe fn blocking_erase_sector(sector: &FlashSector) -> Result<(), Error> {
+    #[cfg(any(flash_l0, flash_l1))]
+    {
+        pac::FLASH.pecr().modify(|w| {
+            w.set_erase(true);
+            w.set_prog(true);
+        });
+
+        write_volatile(sector.start as *mut u32, 0xFFFFFFFF);
+    }
+
+    #[cfg(any(flash_wl, flash_wb, flash_l4))]
+    {
+        let idx = (sector.start - super::FLASH_BASE as u32) / super::BANK1_REGION.erase_size as u32;
+
+        #[cfg(flash_l4)]
+        let (idx, bank) = if idx > 255 { (idx - 256, true) } else { (idx, false) };
+
+        pac::FLASH.cr().modify(|w| {
+            w.set_per(true);
+            w.set_pnb(idx as u8);
+            #[cfg(any(flash_wl, flash_wb))]
+            w.set_strt(true);
+            #[cfg(any(flash_l4))]
+            w.set_start(true);
+            #[cfg(any(flash_l4))]
+            w.set_bker(bank);
+        });
+    }
+
+    let ret: Result<(), Error> = blocking_wait_ready();
+
+    #[cfg(any(flash_wl, flash_wb, flash_l4))]
+    pac::FLASH.cr().modify(|w| w.set_per(false));
+
+    #[cfg(any(flash_l0, flash_l1))]
+    pac::FLASH.pecr().modify(|w| {
+        w.set_erase(false);
+        w.set_prog(false);
+    });
+
+    clear_all_err();
+
+    ret
 }
 
 pub(crate) unsafe fn clear_all_err() {
@@ -149,7 +148,7 @@ pub(crate) unsafe fn clear_all_err() {
     });
 }
 
-pub(crate) unsafe fn blocking_wait_ready() -> Result<(), Error> {
+unsafe fn blocking_wait_ready() -> Result<(), Error> {
     loop {
         let sr = pac::FLASH.sr().read();
 
diff --git a/embassy-stm32/src/flash/mod.rs b/embassy-stm32/src/flash/mod.rs
index b7166a437..231ff1f9e 100644
--- a/embassy-stm32/src/flash/mod.rs
+++ b/embassy-stm32/src/flash/mod.rs
@@ -1,89 +1,67 @@
-use embassy_hal_common::{into_ref, PeripheralRef};
-use embedded_storage::nor_flash::{ErrorType, NorFlash, NorFlashError, NorFlashErrorKind, ReadNorFlash};
+use embedded_storage::nor_flash::{NorFlashError, NorFlashErrorKind};
 
-pub use crate::pac::{ERASE_SIZE, ERASE_VALUE, FLASH_BASE, FLASH_SIZE, WRITE_SIZE};
-use crate::peripherals::FLASH;
-use crate::Peripheral;
-const FLASH_END: usize = FLASH_BASE + FLASH_SIZE;
+#[cfg(flash)]
+mod common;
 
-#[cfg_attr(any(flash_wl, flash_wb, flash_l0, flash_l1, flash_l4), path = "l.rs")]
+#[cfg(flash)]
+pub use common::*;
+
+pub use crate::_generated::flash_regions::*;
+pub use crate::pac::{FLASH_BASE, FLASH_SIZE, WRITE_SIZE};
+
+#[derive(Debug)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub struct FlashRegion {
+    pub bank: FlashBank,
+    pub base: u32,
+    pub size: u32,
+    pub erase_size: u32,
+    pub write_size: u32,
+    pub erase_value: u8,
+}
+
+#[derive(Debug, PartialEq)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub struct FlashSector {
+    pub bank: FlashBank,
+    pub index_in_bank: u8,
+    pub start: u32,
+    pub size: u32,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum FlashBank {
+    Bank1 = 0,
+    Bank2 = 1,
+    Otp,
+}
+
+impl FlashRegion {
+    pub const fn end(&self) -> u32 {
+        self.base + self.size
+    }
+
+    pub const fn sectors(&self) -> u8 {
+        (self.size / self.erase_size) as u8
+    }
+}
+
+#[cfg_attr(any(flash_l0, flash_l1, flash_l4, flash_wl, flash_wb), path = "l.rs")]
 #[cfg_attr(flash_f3, path = "f3.rs")]
 #[cfg_attr(flash_f4, path = "f4.rs")]
 #[cfg_attr(flash_f7, path = "f7.rs")]
 #[cfg_attr(flash_h7, path = "h7.rs")]
+#[cfg_attr(
+    not(any(
+        flash_l0, flash_l1, flash_l4, flash_wl, flash_wb, flash_f3, flash_f4, flash_f7, flash_h7
+    )),
+    path = "other.rs"
+)]
 mod family;
 
-pub struct Flash<'d> {
-    _inner: PeripheralRef<'d, FLASH>,
-}
-
-impl<'d> Flash<'d> {
-    pub fn new(p: impl Peripheral<P = FLASH> + 'd) -> Self {
-        into_ref!(p);
-        Self { _inner: p }
-    }
-
-    pub fn blocking_read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Error> {
-        let offset = FLASH_BASE as u32 + offset;
-        if offset as usize >= FLASH_END || offset as usize + bytes.len() > FLASH_END {
-            return Err(Error::Size);
-        }
-
-        let flash_data = unsafe { core::slice::from_raw_parts(offset as *const u8, bytes.len()) };
-        bytes.copy_from_slice(flash_data);
-        Ok(())
-    }
-
-    pub fn blocking_write(&mut self, offset: u32, buf: &[u8]) -> Result<(), Error> {
-        let offset = FLASH_BASE as u32 + offset;
-        if offset as usize + buf.len() > FLASH_END {
-            return Err(Error::Size);
-        }
-        if offset as usize % WRITE_SIZE != 0 || buf.len() as usize % WRITE_SIZE != 0 {
-            return Err(Error::Unaligned);
-        }
-        trace!("Writing {} bytes at 0x{:x}", buf.len(), offset);
-
-        self.clear_all_err();
-
-        unsafe {
-            family::unlock();
-            let res = family::blocking_write(offset, buf);
-            family::lock();
-            res
-        }
-    }
-
-    pub fn blocking_erase(&mut self, from: u32, to: u32) -> Result<(), Error> {
-        let from = FLASH_BASE as u32 + from;
-        let to = FLASH_BASE as u32 + to;
-        if to < from || to as usize > FLASH_END {
-            return Err(Error::Size);
-        }
-        if (from as usize % ERASE_SIZE) != 0 || (to as usize % ERASE_SIZE) != 0 {
-            return Err(Error::Unaligned);
-        }
-
-        self.clear_all_err();
-
-        unsafe {
-            family::unlock();
-            let res = family::blocking_erase(from, to);
-            family::lock();
-            res
-        }
-    }
-
-    fn clear_all_err(&mut self) {
-        unsafe { family::clear_all_err() };
-    }
-}
-
-impl Drop for Flash<'_> {
-    fn drop(&mut self) {
-        unsafe { family::lock() };
-    }
-}
+#[allow(unused_imports)]
+pub use family::*;
 
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 #[cfg_attr(feature = "defmt", derive(defmt::Format))]
@@ -97,10 +75,6 @@ pub enum Error {
     Parallelism,
 }
 
-impl<'d> ErrorType for Flash<'d> {
-    type Error = Error;
-}
-
 impl NorFlashError for Error {
     fn kind(&self) -> NorFlashErrorKind {
         match self {
@@ -110,28 +84,3 @@ impl NorFlashError for Error {
         }
     }
 }
-
-impl<'d> ReadNorFlash for Flash<'d> {
-    const READ_SIZE: usize = WRITE_SIZE;
-
-    fn read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Self::Error> {
-        self.blocking_read(offset, bytes)
-    }
-
-    fn capacity(&self) -> usize {
-        FLASH_SIZE
-    }
-}
-
-impl<'d> NorFlash for Flash<'d> {
-    const WRITE_SIZE: usize = WRITE_SIZE;
-    const ERASE_SIZE: usize = ERASE_SIZE;
-
-    fn erase(&mut self, from: u32, to: u32) -> Result<(), Self::Error> {
-        self.blocking_erase(from, to)
-    }
-
-    fn write(&mut self, offset: u32, bytes: &[u8]) -> Result<(), Self::Error> {
-        self.blocking_write(offset, bytes)
-    }
-}
diff --git a/embassy-stm32/src/flash/other.rs b/embassy-stm32/src/flash/other.rs
new file mode 100644
index 000000000..c151cb828
--- /dev/null
+++ b/embassy-stm32/src/flash/other.rs
@@ -0,0 +1,29 @@
+#![allow(unused)]
+
+use super::{Error, FlashRegion, FlashSector, FLASH_REGIONS, WRITE_SIZE};
+
+pub const fn get_flash_regions() -> &'static [&'static FlashRegion] {
+    &FLASH_REGIONS
+}
+
+pub(crate) unsafe fn lock() {
+    unimplemented!();
+}
+pub(crate) unsafe fn unlock() {
+    unimplemented!();
+}
+pub(crate) unsafe fn begin_write() {
+    unimplemented!();
+}
+pub(crate) unsafe fn end_write() {
+    unimplemented!();
+}
+pub(crate) unsafe fn blocking_write(_start_address: u32, _buf: &[u8; WRITE_SIZE]) -> Result<(), Error> {
+    unimplemented!();
+}
+pub(crate) unsafe fn blocking_erase_sector(_sector: &FlashSector) -> Result<(), Error> {
+    unimplemented!();
+}
+pub(crate) unsafe fn clear_all_err() {
+    unimplemented!();
+}
diff --git a/embassy-stm32/src/i2c/timeout.rs b/embassy-stm32/src/i2c/timeout.rs
index 4fca1ca2b..939e2750e 100644
--- a/embassy-stm32/src/i2c/timeout.rs
+++ b/embassy-stm32/src/i2c/timeout.rs
@@ -28,64 +28,64 @@ impl<'d, T: Instance, TXDMA, RXDMA> TimeoutI2c<'d, T, TXDMA, RXDMA> {
     }
 
     /// Blocking read with a custom timeout
-    pub fn blocking_read_timeout(&mut self, addr: u8, buffer: &mut [u8], timeout: Duration) -> Result<(), Error> {
-        self.i2c.blocking_read_timeout(addr, buffer, timeout_fn(timeout))
+    pub fn blocking_read_timeout(&mut self, addr: u8, read: &mut [u8], timeout: Duration) -> Result<(), Error> {
+        self.i2c.blocking_read_timeout(addr, read, timeout_fn(timeout))
     }
 
     /// Blocking read with default timeout, provided in [`TimeoutI2c::new()`]
-    pub fn blocking_read(&mut self, addr: u8, buffer: &mut [u8]) -> Result<(), Error> {
-        self.blocking_read_timeout(addr, buffer, self.timeout)
+    pub fn blocking_read(&mut self, addr: u8, read: &mut [u8]) -> Result<(), Error> {
+        self.blocking_read_timeout(addr, read, self.timeout)
     }
 
     /// Blocking write with a custom timeout
-    pub fn blocking_write_timeout(&mut self, addr: u8, bytes: &[u8], timeout: Duration) -> Result<(), Error> {
-        self.i2c.blocking_write_timeout(addr, bytes, timeout_fn(timeout))
+    pub fn blocking_write_timeout(&mut self, addr: u8, write: &[u8], timeout: Duration) -> Result<(), Error> {
+        self.i2c.blocking_write_timeout(addr, write, timeout_fn(timeout))
     }
 
     /// Blocking write with default timeout, provided in [`TimeoutI2c::new()`]
-    pub fn blocking_write(&mut self, addr: u8, bytes: &[u8]) -> Result<(), Error> {
-        self.blocking_write_timeout(addr, bytes, self.timeout)
+    pub fn blocking_write(&mut self, addr: u8, write: &[u8]) -> Result<(), Error> {
+        self.blocking_write_timeout(addr, write, self.timeout)
     }
 
     /// Blocking write-read with a custom timeout
     pub fn blocking_write_read_timeout(
         &mut self,
         addr: u8,
-        bytes: &[u8],
-        buffer: &mut [u8],
+        write: &[u8],
+        read: &mut [u8],
         timeout: Duration,
     ) -> Result<(), Error> {
         self.i2c
-            .blocking_write_read_timeout(addr, bytes, buffer, timeout_fn(timeout))
+            .blocking_write_read_timeout(addr, write, read, timeout_fn(timeout))
     }
 
     /// Blocking write-read with default timeout, provided in [`TimeoutI2c::new()`]
-    pub fn blocking_write_read(&mut self, addr: u8, bytes: &[u8], buffer: &mut [u8]) -> Result<(), Error> {
-        self.blocking_write_read_timeout(addr, bytes, buffer, self.timeout)
+    pub fn blocking_write_read(&mut self, addr: u8, write: &[u8], read: &mut [u8]) -> Result<(), Error> {
+        self.blocking_write_read_timeout(addr, write, read, self.timeout)
     }
 }
 
 impl<'d, T: Instance, TXDMA, RXDMA> embedded_hal_02::blocking::i2c::Read for TimeoutI2c<'d, T, TXDMA, RXDMA> {
     type Error = Error;
 
-    fn read(&mut self, addr: u8, buffer: &mut [u8]) -> Result<(), Self::Error> {
-        self.blocking_read(addr, buffer)
+    fn read(&mut self, addr: u8, read: &mut [u8]) -> Result<(), Self::Error> {
+        self.blocking_read(addr, read)
     }
 }
 
 impl<'d, T: Instance, TXDMA, RXDMA> embedded_hal_02::blocking::i2c::Write for TimeoutI2c<'d, T, TXDMA, RXDMA> {
     type Error = Error;
 
-    fn write(&mut self, addr: u8, bytes: &[u8]) -> Result<(), Self::Error> {
-        self.blocking_write(addr, bytes)
+    fn write(&mut self, addr: u8, write: &[u8]) -> Result<(), Self::Error> {
+        self.blocking_write(addr, write)
     }
 }
 
 impl<'d, T: Instance, TXDMA, RXDMA> embedded_hal_02::blocking::i2c::WriteRead for TimeoutI2c<'d, T, TXDMA, RXDMA> {
     type Error = Error;
 
-    fn write_read(&mut self, addr: u8, bytes: &[u8], buffer: &mut [u8]) -> Result<(), Self::Error> {
-        self.blocking_write_read(addr, bytes, buffer)
+    fn write_read(&mut self, addr: u8, write: &[u8], read: &mut [u8]) -> Result<(), Self::Error> {
+        self.blocking_write_read(addr, write, read)
     }
 }
 
@@ -98,45 +98,24 @@ mod eh1 {
     }
 
     impl<'d, T: Instance, TXDMA, RXDMA> embedded_hal_1::i2c::I2c for TimeoutI2c<'d, T, TXDMA, RXDMA> {
-        fn read(&mut self, address: u8, buffer: &mut [u8]) -> Result<(), Self::Error> {
-            self.blocking_read(address, buffer)
+        fn read(&mut self, address: u8, read: &mut [u8]) -> Result<(), Self::Error> {
+            self.blocking_read(address, read)
         }
 
-        fn write(&mut self, address: u8, buffer: &[u8]) -> Result<(), Self::Error> {
-            self.blocking_write(address, buffer)
+        fn write(&mut self, address: u8, write: &[u8]) -> Result<(), Self::Error> {
+            self.blocking_write(address, write)
         }
 
-        fn write_iter<B>(&mut self, _address: u8, _bytes: B) -> Result<(), Self::Error>
-        where
-            B: IntoIterator<Item = u8>,
-        {
-            todo!();
+        fn write_read(&mut self, address: u8, write: &[u8], read: &mut [u8]) -> Result<(), Self::Error> {
+            self.blocking_write_read(address, write, read)
         }
 
-        fn write_iter_read<B>(&mut self, _address: u8, _bytes: B, _buffer: &mut [u8]) -> Result<(), Self::Error>
-        where
-            B: IntoIterator<Item = u8>,
-        {
-            todo!();
-        }
-
-        fn write_read(&mut self, address: u8, wr_buffer: &[u8], rd_buffer: &mut [u8]) -> Result<(), Self::Error> {
-            self.blocking_write_read(address, wr_buffer, rd_buffer)
-        }
-
-        fn transaction<'a>(
+        fn transaction(
             &mut self,
             _address: u8,
-            _operations: &mut [embedded_hal_1::i2c::Operation<'a>],
+            _operations: &mut [embedded_hal_1::i2c::Operation<'_>],
         ) -> Result<(), Self::Error> {
             todo!();
         }
-
-        fn transaction_iter<'a, O>(&mut self, _address: u8, _operations: O) -> Result<(), Self::Error>
-        where
-            O: IntoIterator<Item = embedded_hal_1::i2c::Operation<'a>>,
-        {
-            todo!();
-        }
     }
 }
diff --git a/embassy-stm32/src/i2c/v1.rs b/embassy-stm32/src/i2c/v1.rs
index f140e2b0d..4b47f0eb1 100644
--- a/embassy-stm32/src/i2c/v1.rs
+++ b/embassy-stm32/src/i2c/v1.rs
@@ -307,18 +307,18 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         }
     }
 
-    pub fn blocking_read(&mut self, addr: u8, buffer: &mut [u8]) -> Result<(), Error> {
-        self.blocking_read_timeout(addr, buffer, || Ok(()))
+    pub fn blocking_read(&mut self, addr: u8, read: &mut [u8]) -> Result<(), Error> {
+        self.blocking_read_timeout(addr, read, || Ok(()))
     }
 
     pub fn blocking_write_timeout(
         &mut self,
         addr: u8,
-        bytes: &[u8],
+        write: &[u8],
         check_timeout: impl Fn() -> Result<(), Error>,
     ) -> Result<(), Error> {
         unsafe {
-            self.write_bytes(addr, bytes, &check_timeout)?;
+            self.write_bytes(addr, write, &check_timeout)?;
             // Send a STOP condition
             T::regs().cr1().modify(|reg| reg.set_stop(true));
             // Wait for STOP condition to transmit.
@@ -331,49 +331,49 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         Ok(())
     }
 
-    pub fn blocking_write(&mut self, addr: u8, bytes: &[u8]) -> Result<(), Error> {
-        self.blocking_write_timeout(addr, bytes, || Ok(()))
+    pub fn blocking_write(&mut self, addr: u8, write: &[u8]) -> Result<(), Error> {
+        self.blocking_write_timeout(addr, write, || Ok(()))
     }
 
     pub fn blocking_write_read_timeout(
         &mut self,
         addr: u8,
-        bytes: &[u8],
-        buffer: &mut [u8],
+        write: &[u8],
+        read: &mut [u8],
         check_timeout: impl Fn() -> Result<(), Error>,
     ) -> Result<(), Error> {
-        unsafe { self.write_bytes(addr, bytes, &check_timeout)? };
-        self.blocking_read_timeout(addr, buffer, &check_timeout)?;
+        unsafe { self.write_bytes(addr, write, &check_timeout)? };
+        self.blocking_read_timeout(addr, read, &check_timeout)?;
 
         Ok(())
     }
 
-    pub fn blocking_write_read(&mut self, addr: u8, bytes: &[u8], buffer: &mut [u8]) -> Result<(), Error> {
-        self.blocking_write_read_timeout(addr, bytes, buffer, || Ok(()))
+    pub fn blocking_write_read(&mut self, addr: u8, write: &[u8], read: &mut [u8]) -> Result<(), Error> {
+        self.blocking_write_read_timeout(addr, write, read, || Ok(()))
     }
 }
 
 impl<'d, T: Instance> embedded_hal_02::blocking::i2c::Read for I2c<'d, T> {
     type Error = Error;
 
-    fn read(&mut self, addr: u8, buffer: &mut [u8]) -> Result<(), Self::Error> {
-        self.blocking_read(addr, buffer)
+    fn read(&mut self, addr: u8, read: &mut [u8]) -> Result<(), Self::Error> {
+        self.blocking_read(addr, read)
     }
 }
 
 impl<'d, T: Instance> embedded_hal_02::blocking::i2c::Write for I2c<'d, T> {
     type Error = Error;
 
-    fn write(&mut self, addr: u8, bytes: &[u8]) -> Result<(), Self::Error> {
-        self.blocking_write(addr, bytes)
+    fn write(&mut self, addr: u8, write: &[u8]) -> Result<(), Self::Error> {
+        self.blocking_write(addr, write)
     }
 }
 
 impl<'d, T: Instance> embedded_hal_02::blocking::i2c::WriteRead for I2c<'d, T> {
     type Error = Error;
 
-    fn write_read(&mut self, addr: u8, bytes: &[u8], buffer: &mut [u8]) -> Result<(), Self::Error> {
-        self.blocking_write_read(addr, bytes, buffer)
+    fn write_read(&mut self, addr: u8, write: &[u8], read: &mut [u8]) -> Result<(), Self::Error> {
+        self.blocking_write_read(addr, write, read)
     }
 }
 
@@ -402,46 +402,25 @@ mod eh1 {
     }
 
     impl<'d, T: Instance> embedded_hal_1::i2c::I2c for I2c<'d, T> {
-        fn read(&mut self, address: u8, buffer: &mut [u8]) -> Result<(), Self::Error> {
-            self.blocking_read(address, buffer)
+        fn read(&mut self, address: u8, read: &mut [u8]) -> Result<(), Self::Error> {
+            self.blocking_read(address, read)
         }
 
-        fn write(&mut self, address: u8, buffer: &[u8]) -> Result<(), Self::Error> {
-            self.blocking_write(address, buffer)
+        fn write(&mut self, address: u8, write: &[u8]) -> Result<(), Self::Error> {
+            self.blocking_write(address, write)
         }
 
-        fn write_iter<B>(&mut self, _address: u8, _bytes: B) -> Result<(), Self::Error>
-        where
-            B: IntoIterator<Item = u8>,
-        {
-            todo!();
+        fn write_read(&mut self, address: u8, write: &[u8], read: &mut [u8]) -> Result<(), Self::Error> {
+            self.blocking_write_read(address, write, read)
         }
 
-        fn write_iter_read<B>(&mut self, _address: u8, _bytes: B, _buffer: &mut [u8]) -> Result<(), Self::Error>
-        where
-            B: IntoIterator<Item = u8>,
-        {
-            todo!();
-        }
-
-        fn write_read(&mut self, address: u8, wr_buffer: &[u8], rd_buffer: &mut [u8]) -> Result<(), Self::Error> {
-            self.blocking_write_read(address, wr_buffer, rd_buffer)
-        }
-
-        fn transaction<'a>(
+        fn transaction(
             &mut self,
             _address: u8,
-            _operations: &mut [embedded_hal_1::i2c::Operation<'a>],
+            _operations: &mut [embedded_hal_1::i2c::Operation<'_>],
         ) -> Result<(), Self::Error> {
             todo!();
         }
-
-        fn transaction_iter<'a, O>(&mut self, _address: u8, _operations: O) -> Result<(), Self::Error>
-        where
-            O: IntoIterator<Item = embedded_hal_1::i2c::Operation<'a>>,
-        {
-            todo!();
-        }
     }
 }
 
diff --git a/embassy-stm32/src/i2c/v2.rs b/embassy-stm32/src/i2c/v2.rs
index 06ff07b21..7218f7706 100644
--- a/embassy-stm32/src/i2c/v2.rs
+++ b/embassy-stm32/src/i2c/v2.rs
@@ -262,7 +262,7 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
             if T::regs().isr().read().txis() {
                 T::regs().txdr().write(|w| w.set_txdata(0));
             }
-            if T::regs().isr().read().txe() {
+            if !T::regs().isr().read().txe() {
                 T::regs().isr().modify(|w| w.set_txe(true))
             }
         }
@@ -345,12 +345,12 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
     fn read_internal(
         &mut self,
         address: u8,
-        buffer: &mut [u8],
+        read: &mut [u8],
         restart: bool,
         check_timeout: impl Fn() -> Result<(), Error>,
     ) -> Result<(), Error> {
-        let completed_chunks = buffer.len() / 255;
-        let total_chunks = if completed_chunks * 255 == buffer.len() {
+        let completed_chunks = read.len() / 255;
+        let total_chunks = if completed_chunks * 255 == read.len() {
             completed_chunks
         } else {
             completed_chunks + 1
@@ -360,7 +360,7 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         unsafe {
             Self::master_read(
                 address,
-                buffer.len().min(255),
+                read.len().min(255),
                 Stop::Automatic,
                 last_chunk_idx != 0,
                 restart,
@@ -368,7 +368,7 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
             )?;
         }
 
-        for (number, chunk) in buffer.chunks_mut(255).enumerate() {
+        for (number, chunk) in read.chunks_mut(255).enumerate() {
             if number != 0 {
                 // NOTE(unsafe) We have &mut self
                 unsafe {
@@ -391,12 +391,12 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
     fn write_internal(
         &mut self,
         address: u8,
-        bytes: &[u8],
+        write: &[u8],
         send_stop: bool,
         check_timeout: impl Fn() -> Result<(), Error>,
     ) -> Result<(), Error> {
-        let completed_chunks = bytes.len() / 255;
-        let total_chunks = if completed_chunks * 255 == bytes.len() {
+        let completed_chunks = write.len() / 255;
+        let total_chunks = if completed_chunks * 255 == write.len() {
             completed_chunks
         } else {
             completed_chunks + 1
@@ -410,14 +410,14 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         unsafe {
             Self::master_write(
                 address,
-                bytes.len().min(255),
+                write.len().min(255),
                 Stop::Software,
                 last_chunk_idx != 0,
                 &check_timeout,
             )?;
         }
 
-        for (number, chunk) in bytes.chunks(255).enumerate() {
+        for (number, chunk) in write.chunks(255).enumerate() {
             if number != 0 {
                 // NOTE(unsafe) We have &mut self
                 unsafe {
@@ -448,7 +448,7 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
     async fn write_dma_internal(
         &mut self,
         address: u8,
-        bytes: &[u8],
+        write: &[u8],
         first_slice: bool,
         last_slice: bool,
         check_timeout: impl Fn() -> Result<(), Error>,
@@ -456,7 +456,7 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
     where
         TXDMA: crate::i2c::TxDma<T>,
     {
-        let total_len = bytes.len();
+        let total_len = write.len();
         let completed_chunks = total_len / 255;
         let total_chunks = if completed_chunks * 255 == total_len {
             completed_chunks
@@ -476,7 +476,7 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
 
             let ch = &mut self.tx_dma;
             let request = ch.request();
-            crate::dma::write(ch, request, bytes, dst)
+            crate::dma::write(ch, request, write, dst)
         };
 
         let state = T::state();
@@ -641,25 +641,25 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
     // =========================
     //  Async public API
 
-    pub async fn write(&mut self, address: u8, bytes: &[u8]) -> Result<(), Error>
+    pub async fn write(&mut self, address: u8, write: &[u8]) -> Result<(), Error>
     where
         TXDMA: crate::i2c::TxDma<T>,
     {
-        if bytes.is_empty() {
-            self.write_internal(address, bytes, true, || Ok(()))
+        if write.is_empty() {
+            self.write_internal(address, write, true, || Ok(()))
         } else {
-            self.write_dma_internal(address, bytes, true, true, || Ok(())).await
+            self.write_dma_internal(address, write, true, true, || Ok(())).await
         }
     }
 
-    pub async fn write_vectored(&mut self, address: u8, bytes: &[&[u8]]) -> Result<(), Error>
+    pub async fn write_vectored(&mut self, address: u8, write: &[&[u8]]) -> Result<(), Error>
     where
         TXDMA: crate::i2c::TxDma<T>,
     {
-        if bytes.is_empty() {
+        if write.is_empty() {
             return Err(Error::ZeroLengthTransfer);
         }
-        let mut iter = bytes.iter();
+        let mut iter = write.iter();
 
         let mut first = true;
         let mut current = iter.next();
@@ -685,21 +685,21 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         }
     }
 
-    pub async fn write_read(&mut self, address: u8, bytes: &[u8], buffer: &mut [u8]) -> Result<(), Error>
+    pub async fn write_read(&mut self, address: u8, write: &[u8], read: &mut [u8]) -> Result<(), Error>
     where
         TXDMA: super::TxDma<T>,
         RXDMA: super::RxDma<T>,
     {
-        if bytes.is_empty() {
-            self.write_internal(address, bytes, false, || Ok(()))?;
+        if write.is_empty() {
+            self.write_internal(address, write, false, || Ok(()))?;
         } else {
-            self.write_dma_internal(address, bytes, true, true, || Ok(())).await?;
+            self.write_dma_internal(address, write, true, true, || Ok(())).await?;
         }
 
-        if buffer.is_empty() {
-            self.read_internal(address, buffer, true, || Ok(()))?;
+        if read.is_empty() {
+            self.read_internal(address, read, true, || Ok(()))?;
         } else {
-            self.read_dma_internal(address, buffer, true, || Ok(())).await?;
+            self.read_dma_internal(address, read, true, || Ok(())).await?;
         }
 
         Ok(())
@@ -711,57 +711,57 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
     pub fn blocking_read_timeout(
         &mut self,
         address: u8,
-        buffer: &mut [u8],
+        read: &mut [u8],
         check_timeout: impl Fn() -> Result<(), Error>,
     ) -> Result<(), Error> {
-        self.read_internal(address, buffer, false, &check_timeout)
+        self.read_internal(address, read, false, &check_timeout)
         // Automatic Stop
     }
 
-    pub fn blocking_read(&mut self, address: u8, buffer: &mut [u8]) -> Result<(), Error> {
-        self.blocking_read_timeout(address, buffer, || Ok(()))
+    pub fn blocking_read(&mut self, address: u8, read: &mut [u8]) -> Result<(), Error> {
+        self.blocking_read_timeout(address, read, || Ok(()))
     }
 
     pub fn blocking_write_timeout(
         &mut self,
         address: u8,
-        bytes: &[u8],
+        write: &[u8],
         check_timeout: impl Fn() -> Result<(), Error>,
     ) -> Result<(), Error> {
-        self.write_internal(address, bytes, true, &check_timeout)
+        self.write_internal(address, write, true, &check_timeout)
     }
 
-    pub fn blocking_write(&mut self, address: u8, bytes: &[u8]) -> Result<(), Error> {
-        self.blocking_write_timeout(address, bytes, || Ok(()))
+    pub fn blocking_write(&mut self, address: u8, write: &[u8]) -> Result<(), Error> {
+        self.blocking_write_timeout(address, write, || Ok(()))
     }
 
     pub fn blocking_write_read_timeout(
         &mut self,
         address: u8,
-        bytes: &[u8],
-        buffer: &mut [u8],
+        write: &[u8],
+        read: &mut [u8],
         check_timeout: impl Fn() -> Result<(), Error>,
     ) -> Result<(), Error> {
-        self.write_internal(address, bytes, false, &check_timeout)?;
-        self.read_internal(address, buffer, true, &check_timeout)
+        self.write_internal(address, write, false, &check_timeout)?;
+        self.read_internal(address, read, true, &check_timeout)
         // Automatic Stop
     }
 
-    pub fn blocking_write_read(&mut self, address: u8, bytes: &[u8], buffer: &mut [u8]) -> Result<(), Error> {
-        self.blocking_write_read_timeout(address, bytes, buffer, || Ok(()))
+    pub fn blocking_write_read(&mut self, address: u8, write: &[u8], read: &mut [u8]) -> Result<(), Error> {
+        self.blocking_write_read_timeout(address, write, read, || Ok(()))
     }
 
     pub fn blocking_write_vectored_timeout(
         &mut self,
         address: u8,
-        bytes: &[&[u8]],
+        write: &[&[u8]],
         check_timeout: impl Fn() -> Result<(), Error>,
     ) -> Result<(), Error> {
-        if bytes.is_empty() {
+        if write.is_empty() {
             return Err(Error::ZeroLengthTransfer);
         }
-        let first_length = bytes[0].len();
-        let last_slice_index = bytes.len() - 1;
+        let first_length = write[0].len();
+        let last_slice_index = write.len() - 1;
 
         // NOTE(unsafe) We have &mut self
         unsafe {
@@ -774,7 +774,7 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
             )?;
         }
 
-        for (idx, slice) in bytes.iter().enumerate() {
+        for (idx, slice) in write.iter().enumerate() {
             let slice_len = slice.len();
             let completed_chunks = slice_len / 255;
             let total_chunks = if completed_chunks * 255 == slice_len {
@@ -828,8 +828,8 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
         Ok(())
     }
 
-    pub fn blocking_write_vectored(&mut self, address: u8, bytes: &[&[u8]]) -> Result<(), Error> {
-        self.blocking_write_vectored_timeout(address, bytes, || Ok(()))
+    pub fn blocking_write_vectored(&mut self, address: u8, write: &[&[u8]]) -> Result<(), Error> {
+        self.blocking_write_vectored_timeout(address, write, || Ok(()))
     }
 }
 
@@ -847,16 +847,16 @@ mod eh02 {
     impl<'d, T: Instance> embedded_hal_02::blocking::i2c::Write for I2c<'d, T> {
         type Error = Error;
 
-        fn write(&mut self, address: u8, bytes: &[u8]) -> Result<(), Self::Error> {
-            self.blocking_write(address, bytes)
+        fn write(&mut self, address: u8, write: &[u8]) -> Result<(), Self::Error> {
+            self.blocking_write(address, write)
         }
     }
 
     impl<'d, T: Instance> embedded_hal_02::blocking::i2c::WriteRead for I2c<'d, T> {
         type Error = Error;
 
-        fn write_read(&mut self, address: u8, bytes: &[u8], buffer: &mut [u8]) -> Result<(), Self::Error> {
-            self.blocking_write_read(address, bytes, buffer)
+        fn write_read(&mut self, address: u8, write: &[u8], read: &mut [u8]) -> Result<(), Self::Error> {
+            self.blocking_write_read(address, write, read)
         }
     }
 }
@@ -1010,46 +1010,25 @@ mod eh1 {
     }
 
     impl<'d, T: Instance> embedded_hal_1::i2c::I2c for I2c<'d, T, NoDma, NoDma> {
-        fn read(&mut self, address: u8, buffer: &mut [u8]) -> Result<(), Self::Error> {
-            self.blocking_read(address, buffer)
+        fn read(&mut self, address: u8, read: &mut [u8]) -> Result<(), Self::Error> {
+            self.blocking_read(address, read)
         }
 
-        fn write(&mut self, address: u8, buffer: &[u8]) -> Result<(), Self::Error> {
-            self.blocking_write(address, buffer)
+        fn write(&mut self, address: u8, write: &[u8]) -> Result<(), Self::Error> {
+            self.blocking_write(address, write)
         }
 
-        fn write_iter<B>(&mut self, _address: u8, _bytes: B) -> Result<(), Self::Error>
-        where
-            B: IntoIterator<Item = u8>,
-        {
-            todo!();
+        fn write_read(&mut self, address: u8, write: &[u8], read: &mut [u8]) -> Result<(), Self::Error> {
+            self.blocking_write_read(address, write, read)
         }
 
-        fn write_iter_read<B>(&mut self, _address: u8, _bytes: B, _buffer: &mut [u8]) -> Result<(), Self::Error>
-        where
-            B: IntoIterator<Item = u8>,
-        {
-            todo!();
-        }
-
-        fn write_read(&mut self, address: u8, wr_buffer: &[u8], rd_buffer: &mut [u8]) -> Result<(), Self::Error> {
-            self.blocking_write_read(address, wr_buffer, rd_buffer)
-        }
-
-        fn transaction<'a>(
+        fn transaction(
             &mut self,
             _address: u8,
-            _operations: &mut [embedded_hal_1::i2c::Operation<'a>],
+            _operations: &mut [embedded_hal_1::i2c::Operation<'_>],
         ) -> Result<(), Self::Error> {
             todo!();
         }
-
-        fn transaction_iter<'a, O>(&mut self, _address: u8, _operations: O) -> Result<(), Self::Error>
-        where
-            O: IntoIterator<Item = embedded_hal_1::i2c::Operation<'a>>,
-        {
-            todo!();
-        }
     }
 }
 
@@ -1059,27 +1038,22 @@ mod eha {
     use super::*;
 
     impl<'d, T: Instance, TXDMA: TxDma<T>, RXDMA: RxDma<T>> embedded_hal_async::i2c::I2c for I2c<'d, T, TXDMA, RXDMA> {
-        async fn read<'a>(&'a mut self, address: u8, read: &'a mut [u8]) -> Result<(), Self::Error> {
+        async fn read(&mut self, address: u8, read: &mut [u8]) -> Result<(), Self::Error> {
             self.read(address, read).await
         }
 
-        async fn write<'a>(&'a mut self, address: u8, write: &'a [u8]) -> Result<(), Self::Error> {
+        async fn write(&mut self, address: u8, write: &[u8]) -> Result<(), Self::Error> {
             self.write(address, write).await
         }
 
-        async fn write_read<'a>(
-            &'a mut self,
-            address: u8,
-            write: &'a [u8],
-            read: &'a mut [u8],
-        ) -> Result<(), Self::Error> {
+        async fn write_read(&mut self, address: u8, write: &[u8], read: &mut [u8]) -> Result<(), Self::Error> {
             self.write_read(address, write, read).await
         }
 
-        async fn transaction<'a, 'b>(
-            &'a mut self,
+        async fn transaction(
+            &mut self,
             address: u8,
-            operations: &'a mut [embedded_hal_1::i2c::Operation<'b>],
+            operations: &mut [embedded_hal_1::i2c::Operation<'_>],
         ) -> Result<(), Self::Error> {
             let _ = address;
             let _ = operations;
diff --git a/embassy-stm32/src/lib.rs b/embassy-stm32/src/lib.rs
index 8dc4df2dc..d4d7155bd 100644
--- a/embassy-stm32/src/lib.rs
+++ b/embassy-stm32/src/lib.rs
@@ -43,9 +43,6 @@ pub mod i2c;
 
 #[cfg(crc)]
 pub mod crc;
-#[cfg(any(
-    flash_l0, flash_l1, flash_wl, flash_wb, flash_l4, flash_f3, flash_f4, flash_f7, flash_h7
-))]
 pub mod flash;
 pub mod pwm;
 #[cfg(quadspi)]
@@ -56,6 +53,8 @@ pub mod rng;
 pub mod sdmmc;
 #[cfg(spi)]
 pub mod spi;
+#[cfg(stm32wl)]
+pub mod subghz;
 #[cfg(usart)]
 pub mod usart;
 #[cfg(all(usb, feature = "time"))]
@@ -65,9 +64,6 @@ pub mod usb_otg;
 #[cfg(iwdg)]
 pub mod wdg;
 
-#[cfg(feature = "subghz")]
-pub mod subghz;
-
 // This must go last, so that it sees all the impl_foo! macros defined earlier.
 pub(crate) mod _generated {
     #![allow(dead_code)]
diff --git a/embassy-stm32/src/pwm/complementary_pwm.rs b/embassy-stm32/src/pwm/complementary_pwm.rs
new file mode 100644
index 000000000..13edfbaa3
--- /dev/null
+++ b/embassy-stm32/src/pwm/complementary_pwm.rs
@@ -0,0 +1,124 @@
+use core::marker::PhantomData;
+
+use embassy_hal_common::{into_ref, PeripheralRef};
+pub use stm32_metapac::timer::vals::Ckd;
+
+use super::simple_pwm::*;
+use super::*;
+#[allow(unused_imports)]
+use crate::gpio::sealed::{AFType, Pin};
+use crate::gpio::AnyPin;
+use crate::time::Hertz;
+use crate::Peripheral;
+
+pub struct ComplementaryPwmPin<'d, Perip, Channel> {
+    _pin: PeripheralRef<'d, AnyPin>,
+    phantom: PhantomData<(Perip, Channel)>,
+}
+
+macro_rules! complementary_channel_impl {
+    ($new_chx:ident, $channel:ident, $pin_trait:ident, $complementary_pin_trait:ident) => {
+        impl<'d, Perip: CaptureCompare16bitInstance> ComplementaryPwmPin<'d, Perip, $channel> {
+            pub fn $new_chx(pin: impl Peripheral<P = impl $complementary_pin_trait<Perip>> + 'd) -> Self {
+                into_ref!(pin);
+                critical_section::with(|_| unsafe {
+                    pin.set_low();
+                    pin.set_as_af(pin.af_num(), AFType::OutputPushPull);
+                    #[cfg(gpio_v2)]
+                    pin.set_speed(crate::gpio::Speed::VeryHigh);
+                });
+                ComplementaryPwmPin {
+                    _pin: pin.map_into(),
+                    phantom: PhantomData,
+                }
+            }
+        }
+    };
+}
+
+complementary_channel_impl!(new_ch1, Ch1, Channel1Pin, Channel1ComplementaryPin);
+complementary_channel_impl!(new_ch2, Ch2, Channel2Pin, Channel2ComplementaryPin);
+complementary_channel_impl!(new_ch3, Ch3, Channel3Pin, Channel3ComplementaryPin);
+complementary_channel_impl!(new_ch4, Ch4, Channel4Pin, Channel4ComplementaryPin);
+
+pub struct ComplementaryPwm<'d, T> {
+    inner: PeripheralRef<'d, T>,
+}
+
+impl<'d, T: ComplementaryCaptureCompare16bitInstance> ComplementaryPwm<'d, T> {
+    pub fn new(
+        tim: impl Peripheral<P = T> + 'd,
+        _ch1: Option<PwmPin<'d, T, Ch1>>,
+        _ch1n: Option<ComplementaryPwmPin<'d, T, Ch1>>,
+        _ch2: Option<PwmPin<'d, T, Ch2>>,
+        _ch2n: Option<ComplementaryPwmPin<'d, T, Ch2>>,
+        _ch3: Option<PwmPin<'d, T, Ch3>>,
+        _ch3n: Option<ComplementaryPwmPin<'d, T, Ch3>>,
+        _ch4: Option<PwmPin<'d, T, Ch4>>,
+        _ch4n: Option<ComplementaryPwmPin<'d, T, Ch4>>,
+        freq: Hertz,
+    ) -> Self {
+        Self::new_inner(tim, freq)
+    }
+
+    fn new_inner(tim: impl Peripheral<P = T> + 'd, freq: Hertz) -> Self {
+        into_ref!(tim);
+
+        T::enable();
+        <T as crate::rcc::sealed::RccPeripheral>::reset();
+
+        let mut this = Self { inner: tim };
+
+        this.inner.set_frequency(freq);
+        this.inner.start();
+
+        unsafe {
+            this.inner.enable_outputs(true);
+
+            this.inner
+                .set_output_compare_mode(Channel::Ch1, OutputCompareMode::PwmMode1);
+            this.inner
+                .set_output_compare_mode(Channel::Ch2, OutputCompareMode::PwmMode1);
+            this.inner
+                .set_output_compare_mode(Channel::Ch3, OutputCompareMode::PwmMode1);
+            this.inner
+                .set_output_compare_mode(Channel::Ch4, OutputCompareMode::PwmMode1);
+        }
+        this
+    }
+
+    pub fn enable(&mut self, channel: Channel) {
+        unsafe {
+            self.inner.enable_channel(channel, true);
+            self.inner.enable_complementary_channel(channel, true);
+        }
+    }
+
+    pub fn disable(&mut self, channel: Channel) {
+        unsafe {
+            self.inner.enable_complementary_channel(channel, false);
+            self.inner.enable_channel(channel, false);
+        }
+    }
+
+    pub fn set_freq(&mut self, freq: Hertz) {
+        self.inner.set_frequency(freq);
+    }
+
+    pub fn get_max_duty(&self) -> u16 {
+        unsafe { self.inner.get_max_compare_value() }
+    }
+
+    pub fn set_duty(&mut self, channel: Channel, duty: u16) {
+        assert!(duty < self.get_max_duty());
+        unsafe { self.inner.set_compare_value(channel, duty) }
+    }
+
+    pub fn set_dead_time_clock_division(&mut self, value: Ckd) {
+        unsafe { self.inner.set_dead_time_clock_division(value) }
+    }
+
+    pub fn set_dead_time_value(&mut self, value: u8) {
+        unsafe { self.inner.set_dead_time_value(value) }
+    }
+}
diff --git a/embassy-stm32/src/pwm/mod.rs b/embassy-stm32/src/pwm/mod.rs
index d3713391c..0bef07089 100644
--- a/embassy-stm32/src/pwm/mod.rs
+++ b/embassy-stm32/src/pwm/mod.rs
@@ -1,5 +1,8 @@
+pub mod complementary_pwm;
 pub mod simple_pwm;
 
+use stm32_metapac::timer::vals::Ckd;
+
 #[cfg(feature = "unstable-pac")]
 pub mod low_level {
     pub use super::sealed::*;
@@ -67,6 +70,14 @@ pub(crate) mod sealed {
         unsafe fn get_max_compare_value(&self) -> u16;
     }
 
+    pub trait ComplementaryCaptureCompare16bitInstance: CaptureCompare16bitInstance {
+        unsafe fn set_dead_time_clock_division(&mut self, value: Ckd);
+
+        unsafe fn set_dead_time_value(&mut self, value: u8);
+
+        unsafe fn enable_complementary_channel(&mut self, channel: Channel, enable: bool);
+    }
+
     pub trait CaptureCompare32bitInstance: crate::timer::sealed::GeneralPurpose32bitInstance {
         unsafe fn set_output_compare_mode(&mut self, channel: Channel, mode: OutputCompareMode);
 
@@ -82,6 +93,12 @@ pub trait CaptureCompare16bitInstance:
     sealed::CaptureCompare16bitInstance + crate::timer::GeneralPurpose16bitInstance + 'static
 {
 }
+
+pub trait ComplementaryCaptureCompare16bitInstance:
+    sealed::ComplementaryCaptureCompare16bitInstance + crate::timer::AdvancedControlInstance + 'static
+{
+}
+
 pub trait CaptureCompare32bitInstance:
     sealed::CaptureCompare32bitInstance + CaptureCompare16bitInstance + crate::timer::GeneralPurpose32bitInstance + 'static
 {
@@ -209,6 +226,29 @@ foreach_interrupt! {
         impl CaptureCompare16bitInstance for crate::peripherals::$inst {
 
         }
+
+        impl crate::pwm::sealed::ComplementaryCaptureCompare16bitInstance for crate::peripherals::$inst {
+            unsafe fn set_dead_time_clock_division(&mut self, value: Ckd) {
+                use crate::timer::sealed::AdvancedControlInstance;
+                Self::regs_advanced().cr1().modify(|w| w.set_ckd(value));
+            }
+
+            unsafe fn set_dead_time_value(&mut self, value: u8) {
+                use crate::timer::sealed::AdvancedControlInstance;
+                Self::regs_advanced().bdtr().modify(|w| w.set_dtg(value));
+            }
+
+            unsafe fn enable_complementary_channel(&mut self, channel: Channel, enable: bool) {
+                use crate::timer::sealed::AdvancedControlInstance;
+                Self::regs_advanced()
+                    .ccer()
+                    .modify(|w| w.set_ccne(channel.raw(), enable));
+            }
+        }
+
+        impl ComplementaryCaptureCompare16bitInstance for crate::peripherals::$inst {
+
+        }
     };
 }
 
diff --git a/embassy-stm32/src/rcc/f4.rs b/embassy-stm32/src/rcc/f4.rs
index 200bcce9c..2a17eb9b0 100644
--- a/embassy-stm32/src/rcc/f4.rs
+++ b/embassy-stm32/src/rcc/f4.rs
@@ -1,8 +1,16 @@
+use core::marker::PhantomData;
+
+use embassy_hal_common::into_ref;
+use stm32_metapac::rcc::vals::{Mco1, Mco2, Mcopre};
+
 use super::sealed::RccPeripheral;
+use crate::gpio::sealed::AFType;
+use crate::gpio::Speed;
 use crate::pac::rcc::vals::{Hpre, Ppre, Sw};
 use crate::pac::{FLASH, PWR, RCC};
 use crate::rcc::{set_freqs, Clocks};
 use crate::time::Hertz;
+use crate::{peripherals, Peripheral};
 
 /// HSI speed
 pub const HSI_FREQ: Hertz = Hertz(16_000_000);
@@ -96,6 +104,164 @@ unsafe fn setup_pll(pllsrcclk: u32, use_hse: bool, pllsysclk: Option<u32>, pll48
     }
 }
 
+pub enum McoClock {
+    DIV1,
+    DIV2,
+    DIV3,
+    DIV4,
+    DIV5,
+}
+
+impl McoClock {
+    fn into_raw(&self) -> Mcopre {
+        match self {
+            McoClock::DIV1 => Mcopre::DIV1,
+            McoClock::DIV2 => Mcopre::DIV2,
+            McoClock::DIV3 => Mcopre::DIV3,
+            McoClock::DIV4 => Mcopre::DIV4,
+            McoClock::DIV5 => Mcopre::DIV5,
+        }
+    }
+}
+
+#[derive(Copy, Clone)]
+pub enum Mco1Source {
+    Hsi,
+    Lse,
+    Hse,
+    Pll,
+}
+
+impl Default for Mco1Source {
+    fn default() -> Self {
+        Self::Hsi
+    }
+}
+
+pub trait McoSource {
+    type Raw;
+
+    fn into_raw(&self) -> Self::Raw;
+}
+
+impl McoSource for Mco1Source {
+    type Raw = Mco1;
+    fn into_raw(&self) -> Self::Raw {
+        match self {
+            Mco1Source::Hsi => Mco1::HSI,
+            Mco1Source::Lse => Mco1::LSE,
+            Mco1Source::Hse => Mco1::HSE,
+            Mco1Source::Pll => Mco1::PLL,
+        }
+    }
+}
+
+#[derive(Copy, Clone)]
+pub enum Mco2Source {
+    SysClk,
+    Plli2s,
+    Hse,
+    Pll,
+}
+
+impl Default for Mco2Source {
+    fn default() -> Self {
+        Self::SysClk
+    }
+}
+
+impl McoSource for Mco2Source {
+    type Raw = Mco2;
+    fn into_raw(&self) -> Self::Raw {
+        match self {
+            Mco2Source::SysClk => Mco2::SYSCLK,
+            Mco2Source::Plli2s => Mco2::PLLI2S,
+            Mco2Source::Hse => Mco2::HSE,
+            Mco2Source::Pll => Mco2::PLL,
+        }
+    }
+}
+
+pub(crate) mod sealed {
+    use stm32_metapac::rcc::vals::Mcopre;
+    pub trait McoInstance {
+        type Source;
+        unsafe fn apply_clock_settings(source: Self::Source, prescaler: Mcopre);
+    }
+}
+
+pub trait McoInstance: sealed::McoInstance + 'static {}
+
+pin_trait!(McoPin, McoInstance);
+
+impl sealed::McoInstance for peripherals::MCO1 {
+    type Source = Mco1;
+    unsafe fn apply_clock_settings(source: Self::Source, prescaler: Mcopre) {
+        RCC.cfgr().modify(|w| {
+            w.set_mco1(source);
+            w.set_mco1pre(prescaler);
+        });
+        match source {
+            Mco1::PLL => {
+                RCC.cr().modify(|w| w.set_pllon(true));
+                while !RCC.cr().read().pllrdy() {}
+            }
+            Mco1::HSI => {
+                RCC.cr().modify(|w| w.set_hsion(true));
+                while !RCC.cr().read().hsirdy() {}
+            }
+            _ => {}
+        }
+    }
+}
+impl McoInstance for peripherals::MCO1 {}
+
+impl sealed::McoInstance for peripherals::MCO2 {
+    type Source = Mco2;
+    unsafe fn apply_clock_settings(source: Self::Source, prescaler: Mcopre) {
+        RCC.cfgr().modify(|w| {
+            w.set_mco2(source);
+            w.set_mco2pre(prescaler);
+        });
+        match source {
+            Mco2::PLL => {
+                RCC.cr().modify(|w| w.set_pllon(true));
+                while !RCC.cr().read().pllrdy() {}
+            }
+            #[cfg(not(stm32f410))]
+            Mco2::PLLI2S => {
+                RCC.cr().modify(|w| w.set_plli2son(true));
+                while !RCC.cr().read().plli2srdy() {}
+            }
+            _ => {}
+        }
+    }
+}
+impl McoInstance for peripherals::MCO2 {}
+
+pub struct Mco<'d, T: McoInstance> {
+    phantom: PhantomData<&'d mut T>,
+}
+
+impl<'d, T: McoInstance> Mco<'d, T> {
+    pub fn new(
+        _peri: impl Peripheral<P = T> + 'd,
+        pin: impl Peripheral<P = impl McoPin<T>> + 'd,
+        source: impl McoSource<Raw = T::Source>,
+        prescaler: McoClock,
+    ) -> Self {
+        into_ref!(pin);
+
+        critical_section::with(|_| unsafe {
+            T::apply_clock_settings(source.into_raw(), prescaler.into_raw());
+            pin.set_as_af(pin.af_num(), AFType::OutputPushPull);
+            pin.set_speed(Speed::VeryHigh);
+        });
+
+        Self { phantom: PhantomData }
+    }
+}
+
 unsafe fn flash_setup(sysclk: u32) {
     use crate::pac::flash::vals::Latency;
 
diff --git a/embassy-stm32/src/rcc/h5.rs b/embassy-stm32/src/rcc/h5.rs
new file mode 100644
index 000000000..17fbc6056
--- /dev/null
+++ b/embassy-stm32/src/rcc/h5.rs
@@ -0,0 +1,606 @@
+use core::marker::PhantomData;
+
+use stm32_metapac::rcc::vals::{Hpre, Ppre, Timpre};
+
+use crate::pac::pwr::vals::Vos;
+use crate::pac::rcc::vals::{Hseext, Hsidiv, Mco1, Mco2, Pllrge, Pllsrc, Pllvcosel, Sw};
+use crate::pac::{FLASH, PWR, RCC};
+use crate::rcc::{set_freqs, Clocks};
+use crate::time::Hertz;
+use crate::{peripherals, Peripheral};
+
+/// HSI speed
+pub const HSI_FREQ: Hertz = Hertz(64_000_000);
+
+/// CSI speed
+pub const CSI_FREQ: Hertz = Hertz(4_000_000);
+
+/// HSI48 speed
+pub const HSI48_FREQ: Hertz = Hertz(48_000_000);
+
+/// LSI speed
+pub const LSI_FREQ: Hertz = Hertz(32_000);
+
+const VCO_MIN: u32 = 150_000_000;
+const VCO_MAX: u32 = 420_000_000;
+const VCO_WIDE_MIN: u32 = 128_000_000;
+const VCO_WIDE_MAX: u32 = 560_000_000;
+
+/// Voltage Scale
+///
+/// Represents the voltage range feeding the CPU core. The maximum core
+/// clock frequency depends on this value.
+#[derive(Copy, Clone, PartialEq)]
+pub enum VoltageScale {
+    /// VOS 0 range VCORE 1.30V - 1.40V
+    Scale0,
+    /// VOS 1 range VCORE 1.15V - 1.26V
+    Scale1,
+    /// VOS 2 range VCORE 1.05V - 1.15V
+    Scale2,
+    /// VOS 3 range VCORE 0.95V - 1.05V
+    Scale3,
+}
+
+pub enum HseMode {
+    /// crystal/ceramic oscillator (HSEBYP=0)
+    Oscillator,
+    ///  external analog clock (low swing) (HSEBYP=1, HSEEXT=0)
+    BypassAnalog,
+    ///  external digital clock (full swing) (HSEBYP=1, HSEEXT=1)
+    BypassDigital,
+}
+
+pub struct Hse {
+    /// HSE frequency.
+    pub freq: Hertz,
+    /// HSE mode.
+    pub mode: HseMode,
+}
+
+pub enum Hsi {
+    /// 64Mhz
+    Mhz64,
+    /// 32Mhz (divided by 2)
+    Mhz32,
+    /// 16Mhz (divided by 4)
+    Mhz16,
+    /// 8Mhz (divided by 8)
+    Mhz8,
+}
+
+pub enum Sysclk {
+    /// HSI selected as sysclk
+    HSI,
+    /// HSE selected as sysclk
+    HSE,
+    /// CSI selected as sysclk
+    CSI,
+    /// PLL1_P selected as sysclk
+    Pll1P,
+}
+
+pub enum PllSource {
+    Hsi,
+    Csi,
+    Hse,
+}
+
+pub struct Pll {
+    /// Source clock selection.
+    pub source: PllSource,
+
+    /// PLL pre-divider (DIVM). Must be between 1 and 63.
+    pub prediv: u8,
+
+    /// PLL multiplication factor. Must be between 4 and 512.
+    pub mul: u16,
+
+    /// PLL P division factor. If None, PLL P output is disabled. Must be between 1 and 128.
+    /// On PLL1, it must be even (in particular, it cannot be 1.)
+    pub divp: Option<u16>,
+    /// PLL Q division factor. If None, PLL Q output is disabled. Must be between 1 and 128.
+    pub divq: Option<u16>,
+    /// PLL R division factor. If None, PLL R output is disabled. Must be between 1 and 128.
+    pub divr: Option<u16>,
+}
+
+/// AHB prescaler
+#[derive(Clone, Copy, PartialEq)]
+pub enum AHBPrescaler {
+    NotDivided,
+    Div2,
+    Div4,
+    Div8,
+    Div16,
+    Div64,
+    Div128,
+    Div256,
+    Div512,
+}
+
+impl AHBPrescaler {
+    fn div(&self, clk: Hertz) -> Hertz {
+        match self {
+            Self::NotDivided => clk,
+            Self::Div2 => clk / 2u32,
+            Self::Div4 => clk / 4u32,
+            Self::Div8 => clk / 8u32,
+            Self::Div16 => clk / 16u32,
+            Self::Div64 => clk / 64u32,
+            Self::Div128 => clk / 128u32,
+            Self::Div256 => clk / 256u32,
+            Self::Div512 => clk / 512u32,
+        }
+    }
+}
+
+/// APB prescaler
+#[derive(Clone, Copy)]
+pub enum APBPrescaler {
+    NotDivided,
+    Div2,
+    Div4,
+    Div8,
+    Div16,
+}
+
+impl APBPrescaler {
+    fn div(&self, clk: Hertz) -> Hertz {
+        match self {
+            Self::NotDivided => clk,
+            Self::Div2 => clk / 2u32,
+            Self::Div4 => clk / 4u32,
+            Self::Div8 => clk / 8u32,
+            Self::Div16 => clk / 16u32,
+        }
+    }
+
+    fn div_tim(&self, clk: Hertz, tim: TimerPrescaler) -> Hertz {
+        match (tim, self) {
+            // The timers kernel clock is equal to rcc_hclk1 if PPRE1 or PPRE2 corresponds to a
+            // division by 1 or 2, else it is equal to 2 x Frcc_pclk1 or 2 x Frcc_pclk2
+            (TimerPrescaler::DefaultX2, Self::NotDivided) => clk,
+            (TimerPrescaler::DefaultX2, Self::Div2) => clk,
+            (TimerPrescaler::DefaultX2, Self::Div4) => clk / 2u32,
+            (TimerPrescaler::DefaultX2, Self::Div8) => clk / 4u32,
+            (TimerPrescaler::DefaultX2, Self::Div16) => clk / 8u32,
+            // The timers kernel clock is equal to 2 x Frcc_pclk1 or 2 x Frcc_pclk2 if PPRE1 or PPRE2
+            // corresponds to a division by 1, 2 or 4, else it is equal to 4 x Frcc_pclk1 or 4 x Frcc_pclk2
+            // this makes NO SENSE and is different than in the H7. Mistake in the RM??
+            (TimerPrescaler::DefaultX4, Self::NotDivided) => clk * 2u32,
+            (TimerPrescaler::DefaultX4, Self::Div2) => clk,
+            (TimerPrescaler::DefaultX4, Self::Div4) => clk / 2u32,
+            (TimerPrescaler::DefaultX4, Self::Div8) => clk / 2u32,
+            (TimerPrescaler::DefaultX4, Self::Div16) => clk / 4u32,
+        }
+    }
+}
+
+/// APB prescaler
+#[derive(Clone, Copy)]
+pub enum TimerPrescaler {
+    DefaultX2,
+    DefaultX4,
+}
+
+impl From<TimerPrescaler> for Timpre {
+    fn from(value: TimerPrescaler) -> Self {
+        match value {
+            TimerPrescaler::DefaultX2 => Timpre::DEFAULTX2,
+            TimerPrescaler::DefaultX4 => Timpre::DEFAULTX4,
+        }
+    }
+}
+
+impl From<APBPrescaler> for Ppre {
+    fn from(val: APBPrescaler) -> Ppre {
+        match val {
+            APBPrescaler::NotDivided => Ppre::DIV1,
+            APBPrescaler::Div2 => Ppre::DIV2,
+            APBPrescaler::Div4 => Ppre::DIV4,
+            APBPrescaler::Div8 => Ppre::DIV8,
+            APBPrescaler::Div16 => Ppre::DIV16,
+        }
+    }
+}
+
+impl From<AHBPrescaler> for Hpre {
+    fn from(val: AHBPrescaler) -> Hpre {
+        match val {
+            AHBPrescaler::NotDivided => Hpre::DIV1,
+            AHBPrescaler::Div2 => Hpre::DIV2,
+            AHBPrescaler::Div4 => Hpre::DIV4,
+            AHBPrescaler::Div8 => Hpre::DIV8,
+            AHBPrescaler::Div16 => Hpre::DIV16,
+            AHBPrescaler::Div64 => Hpre::DIV64,
+            AHBPrescaler::Div128 => Hpre::DIV128,
+            AHBPrescaler::Div256 => Hpre::DIV256,
+            AHBPrescaler::Div512 => Hpre::DIV512,
+        }
+    }
+}
+
+/// Configuration of the core clocks
+#[non_exhaustive]
+pub struct Config {
+    pub hsi: Option<Hsi>,
+    pub hse: Option<Hse>,
+    pub csi: bool,
+    pub hsi48: bool,
+    pub sys: Sysclk,
+
+    pub pll1: Option<Pll>,
+    pub pll2: Option<Pll>,
+    #[cfg(rcc_h5)]
+    pub pll3: Option<Pll>,
+
+    pub ahb_pre: AHBPrescaler,
+    pub apb1_pre: APBPrescaler,
+    pub apb2_pre: APBPrescaler,
+    pub apb3_pre: APBPrescaler,
+    pub timer_prescaler: TimerPrescaler,
+
+    pub voltage_scale: VoltageScale,
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            hsi: Some(Hsi::Mhz64),
+            hse: None,
+            csi: false,
+            hsi48: false,
+            sys: Sysclk::HSI,
+            pll1: None,
+            pll2: None,
+            #[cfg(rcc_h5)]
+            pll3: None,
+
+            ahb_pre: AHBPrescaler::NotDivided,
+            apb1_pre: APBPrescaler::NotDivided,
+            apb2_pre: APBPrescaler::NotDivided,
+            apb3_pre: APBPrescaler::NotDivided,
+            timer_prescaler: TimerPrescaler::DefaultX2,
+
+            voltage_scale: VoltageScale::Scale3,
+        }
+    }
+}
+
+pub(crate) mod sealed {
+    pub trait McoInstance {
+        type Source;
+        unsafe fn apply_clock_settings(source: Self::Source, prescaler: u8);
+    }
+}
+
+pub trait McoInstance: sealed::McoInstance + 'static {}
+
+pin_trait!(McoPin, McoInstance);
+
+macro_rules! impl_peri {
+    ($peri:ident, $source:ident, $set_source:ident, $set_prescaler:ident) => {
+        impl sealed::McoInstance for peripherals::$peri {
+            type Source = $source;
+
+            unsafe fn apply_clock_settings(source: Self::Source, prescaler: u8) {
+                RCC.cfgr().modify(|w| {
+                    w.$set_source(source);
+                    w.$set_prescaler(prescaler);
+                });
+            }
+        }
+
+        impl McoInstance for peripherals::$peri {}
+    };
+}
+
+impl_peri!(MCO1, Mco1, set_mco1, set_mco1pre);
+impl_peri!(MCO2, Mco2, set_mco2, set_mco2pre);
+
+pub struct Mco<'d, T: McoInstance> {
+    phantom: PhantomData<&'d mut T>,
+}
+
+impl<'d, T: McoInstance> Mco<'d, T> {
+    pub fn new(
+        _peri: impl Peripheral<P = T> + 'd,
+        _pin: impl Peripheral<P = impl McoPin<T>> + 'd,
+        _source: T::Source,
+    ) -> Self {
+        todo!();
+    }
+}
+
+pub(crate) unsafe fn init(config: Config) {
+    let (vos, max_clk) = match config.voltage_scale {
+        VoltageScale::Scale0 => (Vos::SCALE0, Hertz(250_000_000)),
+        VoltageScale::Scale1 => (Vos::SCALE1, Hertz(200_000_000)),
+        VoltageScale::Scale2 => (Vos::SCALE2, Hertz(150_000_000)),
+        VoltageScale::Scale3 => (Vos::SCALE3, Hertz(100_000_000)),
+    };
+
+    // Configure voltage scale.
+    PWR.voscr().modify(|w| w.set_vos(vos));
+    while !PWR.vossr().read().vosrdy() {}
+
+    // Configure HSI
+    let hsi = match config.hsi {
+        None => {
+            RCC.cr().modify(|w| w.set_hsion(false));
+            None
+        }
+        Some(hsi) => {
+            let (freq, hsidiv) = match hsi {
+                Hsi::Mhz64 => (HSI_FREQ / 1u32, Hsidiv::DIV1),
+                Hsi::Mhz32 => (HSI_FREQ / 2u32, Hsidiv::DIV2),
+                Hsi::Mhz16 => (HSI_FREQ / 4u32, Hsidiv::DIV4),
+                Hsi::Mhz8 => (HSI_FREQ / 8u32, Hsidiv::DIV8),
+            };
+            RCC.cr().modify(|w| {
+                w.set_hsidiv(hsidiv);
+                w.set_hsion(true);
+            });
+            while !RCC.cr().read().hsirdy() {}
+            Some(freq)
+        }
+    };
+
+    // Configure HSE
+    let hse = match config.hse {
+        None => {
+            RCC.cr().modify(|w| w.set_hseon(false));
+            None
+        }
+        Some(hse) => {
+            let (byp, ext) = match hse.mode {
+                HseMode::Oscillator => (false, Hseext::ANALOG),
+                HseMode::BypassAnalog => (true, Hseext::ANALOG),
+                HseMode::BypassDigital => (true, Hseext::DIGITAL),
+            };
+
+            RCC.cr().modify(|w| {
+                w.set_hsebyp(byp);
+                w.set_hseext(ext);
+            });
+            RCC.cr().modify(|w| w.set_hseon(true));
+            while !RCC.cr().read().hserdy() {}
+            Some(hse.freq)
+        }
+    };
+
+    // Configure HSI48.
+    RCC.cr().modify(|w| w.set_hsi48on(config.hsi48));
+    let _hsi48 = match config.hsi48 {
+        false => None,
+        true => {
+            while !RCC.cr().read().hsi48rdy() {}
+            Some(CSI_FREQ)
+        }
+    };
+
+    // Configure CSI.
+    RCC.cr().modify(|w| w.set_csion(config.csi));
+    let csi = match config.csi {
+        false => None,
+        true => {
+            while !RCC.cr().read().csirdy() {}
+            Some(CSI_FREQ)
+        }
+    };
+
+    // Configure PLLs.
+    let pll_input = PllInput { csi, hse, hsi };
+    let pll1 = init_pll(0, config.pll1, &pll_input);
+    let _pll2 = init_pll(1, config.pll2, &pll_input);
+    #[cfg(rcc_h5)]
+    let _pll3 = init_pll(2, config.pll3, &pll_input);
+
+    // Configure sysclk
+    let (sys, sw) = match config.sys {
+        Sysclk::HSI => (unwrap!(hsi), Sw::HSI),
+        Sysclk::HSE => (unwrap!(hse), Sw::HSE),
+        Sysclk::CSI => (unwrap!(csi), Sw::CSI),
+        Sysclk::Pll1P => (unwrap!(pll1.p), Sw::PLL1),
+    };
+    assert!(sys <= max_clk);
+
+    let hclk = config.ahb_pre.div(sys);
+
+    let apb1 = config.apb1_pre.div(hclk);
+    let apb1_tim = config.apb1_pre.div_tim(hclk, config.timer_prescaler);
+    let apb2 = config.apb2_pre.div(hclk);
+    let apb2_tim = config.apb2_pre.div_tim(hclk, config.timer_prescaler);
+    let apb3 = config.apb3_pre.div(hclk);
+
+    flash_setup(hclk, config.voltage_scale);
+
+    // Set hpre
+    let hpre = config.ahb_pre.into();
+    RCC.cfgr2().modify(|w| w.set_hpre(hpre));
+    while RCC.cfgr2().read().hpre() != hpre {}
+
+    // set ppre
+    RCC.cfgr2().modify(|w| {
+        w.set_ppre1(config.apb1_pre.into());
+        w.set_ppre2(config.apb2_pre.into());
+        w.set_ppre3(config.apb3_pre.into());
+    });
+
+    RCC.cfgr().modify(|w| w.set_timpre(config.timer_prescaler.into()));
+
+    RCC.cfgr().modify(|w| w.set_sw(sw));
+    while RCC.cfgr().read().sws() != sw {}
+
+    set_freqs(Clocks {
+        sys,
+        ahb1: hclk,
+        ahb2: hclk,
+        ahb3: hclk,
+        ahb4: hclk,
+        apb1,
+        apb2,
+        apb3,
+        apb1_tim,
+        apb2_tim,
+        adc: None,
+    });
+}
+
+struct PllInput {
+    hsi: Option<Hertz>,
+    hse: Option<Hertz>,
+    csi: Option<Hertz>,
+}
+
+struct PllOutput {
+    p: Option<Hertz>,
+    #[allow(dead_code)]
+    q: Option<Hertz>,
+    #[allow(dead_code)]
+    r: Option<Hertz>,
+}
+
+unsafe fn init_pll(num: usize, config: Option<Pll>, input: &PllInput) -> PllOutput {
+    let Some(config) = config else {
+        // Stop PLL
+        RCC.cr().modify(|w| w.set_pllon(num, false));
+        while RCC.cr().read().pllrdy(num) {}
+
+        // "To save power when PLL1 is not used, the value of PLL1M must be set to 0.""
+        RCC.pllcfgr(num).write(|w| {
+            w.set_divm(0);
+        });
+
+        return PllOutput{
+            p: None,
+            q: None,
+            r: None,
+        }
+    };
+
+    assert!(1 <= config.prediv && config.prediv <= 63);
+    assert!(4 <= config.mul && config.mul <= 512);
+
+    let (in_clk, src) = match config.source {
+        PllSource::Hsi => (unwrap!(input.hsi), Pllsrc::HSI),
+        PllSource::Hse => (unwrap!(input.hse), Pllsrc::HSE),
+        PllSource::Csi => (unwrap!(input.csi), Pllsrc::CSI),
+    };
+
+    let ref_clk = in_clk / config.prediv as u32;
+
+    let ref_range = match ref_clk.0 {
+        ..=1_999_999 => Pllrge::RANGE1,
+        ..=3_999_999 => Pllrge::RANGE2,
+        ..=7_999_999 => Pllrge::RANGE4,
+        ..=16_000_000 => Pllrge::RANGE8,
+        x => panic!("pll ref_clk out of range: {} mhz", x),
+    };
+
+    // The smaller range (150 to 420 MHz) must
+    // be chosen when the reference clock frequency is lower than 2 MHz.
+    let wide_allowed = ref_range != Pllrge::RANGE1;
+
+    let vco_clk = ref_clk * config.mul;
+    let vco_range = match vco_clk.0 {
+        VCO_MIN..=VCO_MAX => Pllvcosel::MEDIUMVCO,
+        VCO_WIDE_MIN..=VCO_WIDE_MAX if wide_allowed => Pllvcosel::WIDEVCO,
+        x => panic!("pll vco_clk out of range: {} mhz", x),
+    };
+
+    let p = config.divp.map(|div| {
+        assert!(1 <= div && div <= 128);
+        if num == 0 {
+            // on PLL1, DIVP must be even.
+            assert!(div % 2 == 0);
+        }
+
+        vco_clk / div
+    });
+    let q = config.divq.map(|div| {
+        assert!(1 <= div && div <= 128);
+        vco_clk / div
+    });
+    let r = config.divr.map(|div| {
+        assert!(1 <= div && div <= 128);
+        vco_clk / div
+    });
+
+    RCC.pllcfgr(num).write(|w| {
+        w.set_pllsrc(src);
+        w.set_divm(config.prediv);
+        w.set_pllvcosel(vco_range);
+        w.set_pllrge(ref_range);
+        w.set_pllfracen(false);
+        w.set_pllpen(p.is_some());
+        w.set_pllqen(q.is_some());
+        w.set_pllren(r.is_some());
+    });
+    RCC.plldivr(num).write(|w| {
+        w.set_plln(config.mul - 1);
+        w.set_pllp((config.divp.unwrap_or(1) - 1) as u8);
+        w.set_pllq((config.divq.unwrap_or(1) - 1) as u8);
+        w.set_pllr((config.divr.unwrap_or(1) - 1) as u8);
+    });
+
+    RCC.cr().modify(|w| w.set_pllon(num, true));
+    while !RCC.cr().read().pllrdy(num) {}
+
+    PllOutput { p, q, r }
+}
+
+fn flash_setup(clk: Hertz, vos: VoltageScale) {
+    // RM0481 Rev 1, table 37
+    // LATENCY  WRHIGHFREQ  VOS3           VOS2            VOS1            VOS0
+    //      0           0   0 to 20 MHz    0 to 30 MHz     0 to 34 MHz     0 to 42 MHz
+    //      1           0   20 to 40 MHz   30 to 60 MHz    34 to 68 MHz    42 to 84 MHz
+    //      2           1   40 to 60 MHz   60 to 90 MHz    68 to 102 MHz   84 to 126 MHz
+    //      3           1   60 to 80 MHz   90 to 120 MHz   102 to 136 MHz  126 to 168 MHz
+    //      4           2   80 to 100 MHz  120 to 150 MHz  136 to 170 MHz  168 to 210 MHz
+    //      5           2                                  170 to 200 MHz  210 to 250 MHz
+
+    // See RM0433 Rev 7 Table 17. FLASH recommended number of wait
+    // states and programming delay
+    let (latency, wrhighfreq) = match (vos, clk.0) {
+        (VoltageScale::Scale0, ..=42_000_000) => (0, 0),
+        (VoltageScale::Scale0, ..=84_000_000) => (1, 0),
+        (VoltageScale::Scale0, ..=126_000_000) => (2, 1),
+        (VoltageScale::Scale0, ..=168_000_000) => (3, 1),
+        (VoltageScale::Scale0, ..=210_000_000) => (4, 2),
+        (VoltageScale::Scale0, ..=250_000_000) => (5, 2),
+
+        (VoltageScale::Scale1, ..=34_000_000) => (0, 0),
+        (VoltageScale::Scale1, ..=68_000_000) => (1, 0),
+        (VoltageScale::Scale1, ..=102_000_000) => (2, 1),
+        (VoltageScale::Scale1, ..=136_000_000) => (3, 1),
+        (VoltageScale::Scale1, ..=170_000_000) => (4, 2),
+        (VoltageScale::Scale1, ..=200_000_000) => (5, 2),
+
+        (VoltageScale::Scale2, ..=30_000_000) => (0, 0),
+        (VoltageScale::Scale2, ..=60_000_000) => (1, 0),
+        (VoltageScale::Scale2, ..=90_000_000) => (2, 1),
+        (VoltageScale::Scale2, ..=120_000_000) => (3, 1),
+        (VoltageScale::Scale2, ..=150_000_000) => (4, 2),
+
+        (VoltageScale::Scale3, ..=20_000_000) => (0, 0),
+        (VoltageScale::Scale3, ..=40_000_000) => (1, 0),
+        (VoltageScale::Scale3, ..=60_000_000) => (2, 1),
+        (VoltageScale::Scale3, ..=80_000_000) => (3, 1),
+        (VoltageScale::Scale3, ..=100_000_000) => (4, 2),
+
+        _ => unreachable!(),
+    };
+
+    defmt::debug!("flash: latency={} wrhighfreq={}", latency, wrhighfreq);
+
+    // NOTE(unsafe) Atomic write
+    unsafe {
+        FLASH.acr().write(|w| {
+            w.set_wrhighfreq(wrhighfreq);
+            w.set_latency(latency);
+        });
+        while FLASH.acr().read().latency() != latency {}
+    }
+}
diff --git a/embassy-stm32/src/rcc/l4.rs b/embassy-stm32/src/rcc/l4.rs
index e650490fe..c1bf7d0cd 100644
--- a/embassy-stm32/src/rcc/l4.rs
+++ b/embassy-stm32/src/rcc/l4.rs
@@ -1,7 +1,15 @@
+use core::marker::PhantomData;
+
+use embassy_hal_common::into_ref;
+use stm32_metapac::rcc::vals::{Mcopre, Mcosel};
+
+use crate::gpio::sealed::AFType;
+use crate::gpio::Speed;
 use crate::pac::rcc::vals::{Hpre, Msirange, Pllsrc, Ppre, Sw};
 use crate::pac::{FLASH, RCC};
 use crate::rcc::{set_freqs, Clocks};
 use crate::time::Hertz;
+use crate::{peripherals, Peripheral};
 
 /// HSI speed
 pub const HSI_FREQ: Hertz = Hertz(16_000_000);
@@ -298,6 +306,131 @@ impl Default for Config {
     }
 }
 
+pub enum McoClock {
+    DIV1,
+    DIV2,
+    DIV4,
+    DIV8,
+    DIV16,
+}
+
+impl McoClock {
+    fn into_raw(&self) -> Mcopre {
+        match self {
+            McoClock::DIV1 => Mcopre::DIV1,
+            McoClock::DIV2 => Mcopre::DIV2,
+            McoClock::DIV4 => Mcopre::DIV4,
+            McoClock::DIV8 => Mcopre::DIV8,
+            McoClock::DIV16 => Mcopre::DIV16,
+        }
+    }
+}
+
+#[derive(Copy, Clone)]
+pub enum Mco1Source {
+    Disabled,
+    Lse,
+    Lsi,
+    Hse,
+    Hsi16,
+    PllClk,
+    SysClk,
+    Msi,
+    #[cfg(not(any(stm32l471, stm32l475, stm32l476, stm32l486)))]
+    Hsi48,
+}
+
+impl Default for Mco1Source {
+    fn default() -> Self {
+        Self::Hsi16
+    }
+}
+
+pub trait McoSource {
+    type Raw;
+
+    fn into_raw(&self) -> Self::Raw;
+}
+
+impl McoSource for Mco1Source {
+    type Raw = Mcosel;
+    fn into_raw(&self) -> Self::Raw {
+        match self {
+            Mco1Source::Disabled => Mcosel::NOCLOCK,
+            Mco1Source::Lse => Mcosel::LSE,
+            Mco1Source::Lsi => Mcosel::LSI,
+            Mco1Source::Hse => Mcosel::HSE,
+            Mco1Source::Hsi16 => Mcosel::HSI16,
+            Mco1Source::PllClk => Mcosel::PLL,
+            Mco1Source::SysClk => Mcosel::SYSCLK,
+            Mco1Source::Msi => Mcosel::MSI,
+            #[cfg(not(any(stm32l471, stm32l475, stm32l476, stm32l486)))]
+            Mco1Source::Hsi48 => Mcosel::HSI48,
+        }
+    }
+}
+
+pub(crate) mod sealed {
+    use stm32_metapac::rcc::vals::Mcopre;
+    pub trait McoInstance {
+        type Source;
+        unsafe fn apply_clock_settings(source: Self::Source, prescaler: Mcopre);
+    }
+}
+
+pub trait McoInstance: sealed::McoInstance + 'static {}
+
+pin_trait!(McoPin, McoInstance);
+
+impl sealed::McoInstance for peripherals::MCO {
+    type Source = Mcosel;
+
+    unsafe fn apply_clock_settings(source: Self::Source, prescaler: Mcopre) {
+        RCC.cfgr().modify(|w| {
+            w.set_mcosel(source);
+            w.set_mcopre(prescaler);
+        });
+
+        match source {
+            Mcosel::HSI16 => {
+                RCC.cr().modify(|w| w.set_hsion(true));
+                while !RCC.cr().read().hsirdy() {}
+            }
+            #[cfg(not(any(stm32l471, stm32l475, stm32l476, stm32l486)))]
+            Mcosel::HSI48 => {
+                RCC.crrcr().modify(|w| w.set_hsi48on(true));
+                while !RCC.crrcr().read().hsi48rdy() {}
+            }
+            _ => {}
+        }
+    }
+}
+
+impl McoInstance for peripherals::MCO {}
+
+pub struct Mco<'d, T: McoInstance> {
+    phantom: PhantomData<&'d mut T>,
+}
+
+impl<'d, T: McoInstance> Mco<'d, T> {
+    pub fn new(
+        _peri: impl Peripheral<P = T> + 'd,
+        pin: impl Peripheral<P = impl McoPin<T>> + 'd,
+        source: impl McoSource<Raw = T::Source>,
+        prescaler: McoClock,
+    ) -> Self {
+        into_ref!(pin);
+
+        critical_section::with(|_| unsafe {
+            T::apply_clock_settings(source.into_raw(), prescaler.into_raw());
+            pin.set_as_af(pin.af_num(), AFType::OutputPushPull);
+            pin.set_speed(Speed::VeryHigh);
+        });
+
+        Self { phantom: PhantomData }
+    }
+}
+
 pub(crate) unsafe fn init(config: Config) {
     let (sys_clk, sw) = match config.mux {
         ClockSrc::MSI(range) => {
diff --git a/embassy-stm32/src/rcc/mod.rs b/embassy-stm32/src/rcc/mod.rs
index d4bd3d6b8..d6a31f17b 100644
--- a/embassy-stm32/src/rcc/mod.rs
+++ b/embassy-stm32/src/rcc/mod.rs
@@ -21,6 +21,7 @@ use crate::time::Hertz;
 #[cfg_attr(rcc_u5, path = "u5.rs")]
 #[cfg_attr(rcc_wb, path = "wb.rs")]
 #[cfg_attr(any(rcc_wl5, rcc_wle), path = "wl.rs")]
+#[cfg_attr(any(rcc_h5, rcc_h50), path = "h5.rs")]
 mod _version;
 pub use _version::*;
 
@@ -36,7 +37,7 @@ pub struct Clocks {
     pub apb2: Hertz,
     #[cfg(not(any(rcc_c0, rcc_g0)))]
     pub apb2_tim: Hertz,
-    #[cfg(any(rcc_wl5, rcc_wle, rcc_u5))]
+    #[cfg(any(rcc_wl5, rcc_wle, rcc_h5, rcc_h50, rcc_u5))]
     pub apb3: Hertz,
     #[cfg(any(rcc_h7, rcc_h7ab))]
     pub apb4: Hertz,
@@ -44,14 +45,16 @@ pub struct Clocks {
     // AHB
     pub ahb1: Hertz,
     #[cfg(any(
-        rcc_l4, rcc_l5, rcc_f2, rcc_f4, rcc_f410, rcc_f7, rcc_h7, rcc_h7ab, rcc_g4, rcc_u5, rcc_wb, rcc_wl5, rcc_wle
+        rcc_l4, rcc_l5, rcc_f2, rcc_f4, rcc_f410, rcc_f7, rcc_h5, rcc_h50, rcc_h7, rcc_h7ab, rcc_g4, rcc_u5, rcc_wb,
+        rcc_wl5, rcc_wle
     ))]
     pub ahb2: Hertz,
     #[cfg(any(
-        rcc_l4, rcc_l5, rcc_f2, rcc_f4, rcc_f410, rcc_f7, rcc_h7, rcc_h7ab, rcc_u5, rcc_wb, rcc_wl5, rcc_wle
+        rcc_l4, rcc_l5, rcc_f2, rcc_f4, rcc_f410, rcc_f7, rcc_h5, rcc_h50, rcc_h7, rcc_h7ab, rcc_u5, rcc_wb, rcc_wl5,
+        rcc_wle
     ))]
     pub ahb3: Hertz,
-    #[cfg(any(rcc_h7, rcc_h7ab))]
+    #[cfg(any(rcc_h5, rcc_h50, rcc_h7, rcc_h7ab))]
     pub ahb4: Hertz,
 
     #[cfg(any(rcc_f2, rcc_f4, rcc_f410, rcc_f7))]
@@ -60,7 +63,7 @@ pub struct Clocks {
     #[cfg(stm32f1)]
     pub adc: Hertz,
 
-    #[cfg(any(rcc_h7, rcc_h7ab))]
+    #[cfg(any(rcc_h5, rcc_h50, rcc_h7, rcc_h7ab))]
     pub adc: Option<Hertz>,
 }
 
diff --git a/embassy-stm32/src/spi/mod.rs b/embassy-stm32/src/spi/mod.rs
index 1f1708873..481ea4abc 100644
--- a/embassy-stm32/src/spi/mod.rs
+++ b/embassy-stm32/src/spi/mod.rs
@@ -258,7 +258,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
                 w.set_spe(true);
             });
         }
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         unsafe {
             T::REGS.ifcr().write(|w| w.0 = 0xffff_ffff);
             T::REGS.cfg2().modify(|w| {
@@ -317,7 +317,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
             });
         }
 
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         unsafe {
             T::REGS.cfg2().modify(|w| {
                 w.set_cpha(cpha);
@@ -330,7 +330,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
     pub fn get_current_config(&self) -> Config {
         #[cfg(any(spi_v1, spi_f1, spi_v2))]
         let cfg = unsafe { T::REGS.cr1().read() };
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         let cfg = unsafe { T::REGS.cfg2().read() };
         let polarity = if cfg.cpol() == vals::Cpol::IDLELOW {
             Polarity::IdleLow
@@ -383,7 +383,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
                 w.set_spe(true);
             });
         }
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         unsafe {
             T::REGS.cr1().modify(|w| {
                 w.set_csusp(true);
@@ -429,7 +429,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
             T::REGS.cr1().modify(|w| {
                 w.set_spe(true);
             });
-            #[cfg(any(spi_v3, spi_v4))]
+            #[cfg(any(spi_v3, spi_v4, spi_v5))]
             T::REGS.cr1().modify(|w| {
                 w.set_cstart(true);
             });
@@ -459,7 +459,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
         }
 
         // SPIv3 clears rxfifo on SPE=0
-        #[cfg(not(any(spi_v3, spi_v4)))]
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
         flush_rx_fifo(T::REGS);
 
         set_rxdmaen(T::REGS, true);
@@ -481,7 +481,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
             T::REGS.cr1().modify(|w| {
                 w.set_spe(true);
             });
-            #[cfg(any(spi_v3, spi_v4))]
+            #[cfg(any(spi_v3, spi_v4, spi_v5))]
             T::REGS.cr1().modify(|w| {
                 w.set_cstart(true);
             });
@@ -514,7 +514,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
         }
 
         // SPIv3 clears rxfifo on SPE=0
-        #[cfg(not(any(spi_v3, spi_v4)))]
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
         flush_rx_fifo(T::REGS);
 
         set_rxdmaen(T::REGS, true);
@@ -534,7 +534,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
             T::REGS.cr1().modify(|w| {
                 w.set_spe(true);
             });
-            #[cfg(any(spi_v3, spi_v4))]
+            #[cfg(any(spi_v3, spi_v4, spi_v5))]
             T::REGS.cr1().modify(|w| {
                 w.set_cstart(true);
             });
@@ -619,9 +619,9 @@ impl<'d, T: Instance, Tx, Rx> Drop for Spi<'d, T, Tx, Rx> {
     }
 }
 
-#[cfg(not(any(spi_v3, spi_v4)))]
+#[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
 use vals::Br;
-#[cfg(any(spi_v3, spi_v4))]
+#[cfg(any(spi_v3, spi_v4, spi_v5))]
 use vals::Mbr as Br;
 
 fn compute_baud_rate(clocks: Hertz, freq: Hertz) -> Br {
@@ -647,17 +647,17 @@ trait RegsExt {
 
 impl RegsExt for Regs {
     fn tx_ptr<W>(&self) -> *mut W {
-        #[cfg(not(any(spi_v3, spi_v4)))]
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
         let dr = self.dr();
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         let dr = self.txdr();
         dr.ptr() as *mut W
     }
 
     fn rx_ptr<W>(&self) -> *mut W {
-        #[cfg(not(any(spi_v3, spi_v4)))]
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
         let dr = self.dr();
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         let dr = self.rxdr();
         dr.ptr() as *mut W
     }
@@ -667,22 +667,22 @@ fn check_error_flags(sr: regs::Sr) -> Result<(), Error> {
     if sr.ovr() {
         return Err(Error::Overrun);
     }
-    #[cfg(not(any(spi_f1, spi_v3, spi_v4)))]
+    #[cfg(not(any(spi_f1, spi_v3, spi_v4, spi_v5)))]
     if sr.fre() {
         return Err(Error::Framing);
     }
-    #[cfg(any(spi_v3, spi_v4))]
+    #[cfg(any(spi_v3, spi_v4, spi_v5))]
     if sr.tifre() {
         return Err(Error::Framing);
     }
     if sr.modf() {
         return Err(Error::ModeFault);
     }
-    #[cfg(not(any(spi_v3, spi_v4)))]
+    #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
     if sr.crcerr() {
         return Err(Error::Crc);
     }
-    #[cfg(any(spi_v3, spi_v4))]
+    #[cfg(any(spi_v3, spi_v4, spi_v5))]
     if sr.crce() {
         return Err(Error::Crc);
     }
@@ -696,11 +696,11 @@ fn spin_until_tx_ready(regs: Regs) -> Result<(), Error> {
 
         check_error_flags(sr)?;
 
-        #[cfg(not(any(spi_v3, spi_v4)))]
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
         if sr.txe() {
             return Ok(());
         }
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         if sr.txp() {
             return Ok(());
         }
@@ -713,11 +713,11 @@ fn spin_until_rx_ready(regs: Regs) -> Result<(), Error> {
 
         check_error_flags(sr)?;
 
-        #[cfg(not(any(spi_v3, spi_v4)))]
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
         if sr.rxne() {
             return Ok(());
         }
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         if sr.rxp() {
             return Ok(());
         }
@@ -726,11 +726,11 @@ fn spin_until_rx_ready(regs: Regs) -> Result<(), Error> {
 
 fn flush_rx_fifo(regs: Regs) {
     unsafe {
-        #[cfg(not(any(spi_v3, spi_v4)))]
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
         while regs.sr().read().rxne() {
             let _ = regs.dr().read();
         }
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         while regs.sr().read().rxp() {
             let _ = regs.rxdr().read();
         }
@@ -739,11 +739,11 @@ fn flush_rx_fifo(regs: Regs) {
 
 fn set_txdmaen(regs: Regs, val: bool) {
     unsafe {
-        #[cfg(not(any(spi_v3, spi_v4)))]
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
         regs.cr2().modify(|reg| {
             reg.set_txdmaen(val);
         });
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         regs.cfg1().modify(|reg| {
             reg.set_txdmaen(val);
         });
@@ -752,11 +752,11 @@ fn set_txdmaen(regs: Regs, val: bool) {
 
 fn set_rxdmaen(regs: Regs, val: bool) {
     unsafe {
-        #[cfg(not(any(spi_v3, spi_v4)))]
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
         regs.cr2().modify(|reg| {
             reg.set_rxdmaen(val);
         });
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         regs.cfg1().modify(|reg| {
             reg.set_rxdmaen(val);
         });
@@ -768,9 +768,9 @@ fn finish_dma(regs: Regs) {
         #[cfg(spi_v2)]
         while regs.sr().read().ftlvl() > 0 {}
 
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         while !regs.sr().read().txc() {}
-        #[cfg(not(any(spi_v3, spi_v4)))]
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
         while regs.sr().read().bsy() {}
 
         // Disable the spi peripheral
@@ -780,12 +780,12 @@ fn finish_dma(regs: Regs) {
 
         // The peripheral automatically disables the DMA stream on completion without error,
         // but it does not clear the RXDMAEN/TXDMAEN flag in CR2.
-        #[cfg(not(any(spi_v3, spi_v4)))]
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
         regs.cr2().modify(|reg| {
             reg.set_txdmaen(false);
             reg.set_rxdmaen(false);
         });
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         regs.cfg1().modify(|reg| {
             reg.set_txdmaen(false);
             reg.set_rxdmaen(false);
@@ -799,7 +799,7 @@ fn transfer_word<W: Word>(regs: Regs, tx_word: W) -> Result<W, Error> {
     unsafe {
         ptr::write_volatile(regs.tx_ptr(), tx_word);
 
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         regs.cr1().modify(|reg| reg.set_cstart(true));
     }
 
@@ -970,7 +970,7 @@ pub(crate) mod sealed {
             }
         }
 
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         pub fn dsize(&self) -> u8 {
             match self {
                 WordSize::EightBit => 0b0111,
@@ -978,7 +978,7 @@ pub(crate) mod sealed {
             }
         }
 
-        #[cfg(any(spi_v3, spi_v4))]
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
         pub fn _frxth(&self) -> vals::Fthlv {
             match self {
                 WordSize::EightBit => vals::Fthlv::ONEFRAME,
diff --git a/embassy-stm32/src/time.rs b/embassy-stm32/src/time.rs
index 975517a48..f08abe331 100644
--- a/embassy-stm32/src/time.rs
+++ b/embassy-stm32/src/time.rs
@@ -1,7 +1,9 @@
 //! Time units
 
+use core::ops::{Div, Mul};
+
 /// Hertz
-#[derive(PartialEq, PartialOrd, Clone, Copy, Debug, Eq)]
+#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Copy, Debug)]
 #[cfg_attr(feature = "defmt", derive(defmt::Format))]
 pub struct Hertz(pub u32);
 
@@ -33,3 +35,45 @@ pub fn khz(kilohertz: u32) -> Hertz {
 pub fn mhz(megahertz: u32) -> Hertz {
     Hertz::mhz(megahertz)
 }
+
+impl Mul<u32> for Hertz {
+    type Output = Hertz;
+    fn mul(self, rhs: u32) -> Self::Output {
+        Hertz(self.0 * rhs)
+    }
+}
+
+impl Div<u32> for Hertz {
+    type Output = Hertz;
+    fn div(self, rhs: u32) -> Self::Output {
+        Hertz(self.0 / rhs)
+    }
+}
+
+impl Mul<u16> for Hertz {
+    type Output = Hertz;
+    fn mul(self, rhs: u16) -> Self::Output {
+        self * (rhs as u32)
+    }
+}
+
+impl Div<u16> for Hertz {
+    type Output = Hertz;
+    fn div(self, rhs: u16) -> Self::Output {
+        self / (rhs as u32)
+    }
+}
+
+impl Mul<u8> for Hertz {
+    type Output = Hertz;
+    fn mul(self, rhs: u8) -> Self::Output {
+        self * (rhs as u32)
+    }
+}
+
+impl Div<u8> for Hertz {
+    type Output = Hertz;
+    fn div(self, rhs: u8) -> Self::Output {
+        self / (rhs as u32)
+    }
+}
diff --git a/embassy-stm32/src/usart/buffered.rs b/embassy-stm32/src/usart/buffered.rs
index cd7d72f91..3e23e7ca1 100644
--- a/embassy-stm32/src/usart/buffered.rs
+++ b/embassy-stm32/src/usart/buffered.rs
@@ -1,55 +1,51 @@
-use core::cell::RefCell;
 use core::future::poll_fn;
-use core::sync::atomic::{compiler_fence, Ordering};
+use core::slice;
 use core::task::Poll;
 
-use embassy_cortex_m::peripheral::{PeripheralMutex, PeripheralState, StateStorage};
-use embassy_hal_common::ring_buffer::RingBuffer;
-use embassy_sync::waitqueue::WakerRegistration;
+use embassy_cortex_m::interrupt::Interrupt;
+use embassy_hal_common::atomic_ring_buffer::RingBuffer;
+use embassy_sync::waitqueue::AtomicWaker;
 
 use super::*;
 
-pub struct State<'d, T: BasicInstance>(StateStorage<StateInner<'d, T>>);
-impl<'d, T: BasicInstance> State<'d, T> {
+pub struct State {
+    rx_waker: AtomicWaker,
+    rx_buf: RingBuffer,
+
+    tx_waker: AtomicWaker,
+    tx_buf: RingBuffer,
+}
+
+impl State {
     pub const fn new() -> Self {
-        Self(StateStorage::new())
+        Self {
+            rx_buf: RingBuffer::new(),
+            tx_buf: RingBuffer::new(),
+            rx_waker: AtomicWaker::new(),
+            tx_waker: AtomicWaker::new(),
+        }
     }
 }
 
-struct StateInner<'d, T: BasicInstance> {
-    phantom: PhantomData<&'d mut T>,
-
-    rx_waker: WakerRegistration,
-    rx: RingBuffer<'d>,
-
-    tx_waker: WakerRegistration,
-    tx: RingBuffer<'d>,
-}
-
-unsafe impl<'d, T: BasicInstance> Send for StateInner<'d, T> {}
-unsafe impl<'d, T: BasicInstance> Sync for StateInner<'d, T> {}
-
 pub struct BufferedUart<'d, T: BasicInstance> {
-    inner: RefCell<PeripheralMutex<'d, StateInner<'d, T>>>,
+    rx: BufferedUartRx<'d, T>,
+    tx: BufferedUartTx<'d, T>,
 }
 
-pub struct BufferedUartTx<'u, 'd, T: BasicInstance> {
-    inner: &'u BufferedUart<'d, T>,
+pub struct BufferedUartTx<'d, T: BasicInstance> {
+    phantom: PhantomData<&'d mut T>,
 }
 
-pub struct BufferedUartRx<'u, 'd, T: BasicInstance> {
-    inner: &'u BufferedUart<'d, T>,
+pub struct BufferedUartRx<'d, T: BasicInstance> {
+    phantom: PhantomData<&'d mut T>,
 }
 
-impl<'d, T: BasicInstance> Unpin for BufferedUart<'d, T> {}
-
 impl<'d, T: BasicInstance> BufferedUart<'d, T> {
     pub fn new(
-        state: &'d mut State<'d, T>,
         peri: impl Peripheral<P = T> + 'd,
+        irq: impl Peripheral<P = T::Interrupt> + 'd,
         rx: impl Peripheral<P = impl RxPin<T>> + 'd,
         tx: impl Peripheral<P = impl TxPin<T>> + 'd,
-        irq: impl Peripheral<P = T::Interrupt> + 'd,
         tx_buffer: &'d mut [u8],
         rx_buffer: &'d mut [u8],
         config: Config,
@@ -57,15 +53,14 @@ impl<'d, T: BasicInstance> BufferedUart<'d, T> {
         T::enable();
         T::reset();
 
-        Self::new_inner(state, peri, rx, tx, irq, tx_buffer, rx_buffer, config)
+        Self::new_inner(peri, irq, rx, tx, tx_buffer, rx_buffer, config)
     }
 
     pub fn new_with_rtscts(
-        state: &'d mut State<'d, T>,
         peri: impl Peripheral<P = T> + 'd,
+        irq: impl Peripheral<P = T::Interrupt> + 'd,
         rx: impl Peripheral<P = impl RxPin<T>> + 'd,
         tx: impl Peripheral<P = impl TxPin<T>> + 'd,
-        irq: impl Peripheral<P = T::Interrupt> + 'd,
         rts: impl Peripheral<P = impl RtsPin<T>> + 'd,
         cts: impl Peripheral<P = impl CtsPin<T>> + 'd,
         tx_buffer: &'d mut [u8],
@@ -86,16 +81,15 @@ impl<'d, T: BasicInstance> BufferedUart<'d, T> {
             });
         }
 
-        Self::new_inner(state, peri, rx, tx, irq, tx_buffer, rx_buffer, config)
+        Self::new_inner(peri, irq, rx, tx, tx_buffer, rx_buffer, config)
     }
 
     #[cfg(not(usart_v1))]
     pub fn new_with_de(
-        state: &'d mut State<'d, T>,
         peri: impl Peripheral<P = T> + 'd,
+        irq: impl Peripheral<P = T::Interrupt> + 'd,
         rx: impl Peripheral<P = impl RxPin<T>> + 'd,
         tx: impl Peripheral<P = impl TxPin<T>> + 'd,
-        irq: impl Peripheral<P = T::Interrupt> + 'd,
         de: impl Peripheral<P = impl DePin<T>> + 'd,
         tx_buffer: &'d mut [u8],
         rx_buffer: &'d mut [u8],
@@ -113,23 +107,27 @@ impl<'d, T: BasicInstance> BufferedUart<'d, T> {
             });
         }
 
-        Self::new_inner(state, peri, rx, tx, irq, tx_buffer, rx_buffer, config)
+        Self::new_inner(peri, irq, rx, tx, tx_buffer, rx_buffer, config)
     }
 
     fn new_inner(
-        state: &'d mut State<'d, T>,
         _peri: impl Peripheral<P = T> + 'd,
+        irq: impl Peripheral<P = T::Interrupt> + 'd,
         rx: impl Peripheral<P = impl RxPin<T>> + 'd,
         tx: impl Peripheral<P = impl TxPin<T>> + 'd,
-        irq: impl Peripheral<P = T::Interrupt> + 'd,
         tx_buffer: &'d mut [u8],
         rx_buffer: &'d mut [u8],
         config: Config,
     ) -> BufferedUart<'d, T> {
         into_ref!(_peri, rx, tx, irq);
 
-        let r = T::regs();
+        let state = T::buffered_state();
+        let len = tx_buffer.len();
+        unsafe { state.tx_buf.init(tx_buffer.as_mut_ptr(), len) };
+        let len = rx_buffer.len();
+        unsafe { state.rx_buf.init(rx_buffer.as_mut_ptr(), len) };
 
+        let r = T::regs();
         unsafe {
             rx.set_as_af(rx.af_num(), AFType::Input);
             tx.set_as_af(tx.af_num(), AFType::OutputPushPull);
@@ -147,273 +145,259 @@ impl<'d, T: BasicInstance> BufferedUart<'d, T> {
             });
         }
 
+        irq.set_handler(on_interrupt::<T>);
+        irq.unpend();
+        irq.enable();
+
         Self {
-            inner: RefCell::new(PeripheralMutex::new(irq, &mut state.0, move || StateInner {
-                phantom: PhantomData,
-                tx: RingBuffer::new(tx_buffer),
-                tx_waker: WakerRegistration::new(),
-
-                rx: RingBuffer::new(rx_buffer),
-                rx_waker: WakerRegistration::new(),
-            })),
+            rx: BufferedUartRx { phantom: PhantomData },
+            tx: BufferedUartTx { phantom: PhantomData },
         }
     }
 
-    pub fn split<'u>(&'u mut self) -> (BufferedUartRx<'u, 'd, T>, BufferedUartTx<'u, 'd, T>) {
-        (BufferedUartRx { inner: self }, BufferedUartTx { inner: self })
+    pub fn split(self) -> (BufferedUartTx<'d, T>, BufferedUartRx<'d, T>) {
+        (self.tx, self.rx)
     }
+}
 
-    async fn inner_read<'a>(&'a self, buf: &'a mut [u8]) -> Result<usize, Error> {
+impl<'d, T: BasicInstance> BufferedUartRx<'d, T> {
+    async fn read(&self, buf: &mut [u8]) -> Result<usize, Error> {
         poll_fn(move |cx| {
-            let mut do_pend = false;
-            let mut inner = self.inner.borrow_mut();
-            let res = inner.with(|state| {
-                compiler_fence(Ordering::SeqCst);
+            let state = T::buffered_state();
+            let mut rx_reader = unsafe { state.rx_buf.reader() };
+            let data = rx_reader.pop_slice();
 
-                // We have data ready in buffer? Return it.
-                let data = state.rx.pop_buf();
-                if !data.is_empty() {
-                    let len = data.len().min(buf.len());
-                    buf[..len].copy_from_slice(&data[..len]);
+            if !data.is_empty() {
+                let len = data.len().min(buf.len());
+                buf[..len].copy_from_slice(&data[..len]);
 
-                    if state.rx.is_full() {
-                        do_pend = true;
-                    }
-                    state.rx.pop(len);
+                let do_pend = state.rx_buf.is_full();
+                rx_reader.pop_done(len);
 
-                    return Poll::Ready(Ok(len));
+                if do_pend {
+                    unsafe { T::Interrupt::steal().pend() };
                 }
 
+                return Poll::Ready(Ok(len));
+            }
+
+            state.rx_waker.register(cx.waker());
+            Poll::Pending
+        })
+        .await
+    }
+
+    fn blocking_read(&self, buf: &mut [u8]) -> Result<usize, Error> {
+        loop {
+            let state = T::buffered_state();
+            let mut rx_reader = unsafe { state.rx_buf.reader() };
+            let data = rx_reader.pop_slice();
+
+            if !data.is_empty() {
+                let len = data.len().min(buf.len());
+                buf[..len].copy_from_slice(&data[..len]);
+
+                let do_pend = state.rx_buf.is_full();
+                rx_reader.pop_done(len);
+
+                if do_pend {
+                    unsafe { T::Interrupt::steal().pend() };
+                }
+
+                return Ok(len);
+            }
+        }
+    }
+
+    async fn fill_buf(&self) -> Result<&[u8], Error> {
+        poll_fn(move |cx| {
+            let state = T::buffered_state();
+            let mut rx_reader = unsafe { state.rx_buf.reader() };
+            let (p, n) = rx_reader.pop_buf();
+            if n == 0 {
                 state.rx_waker.register(cx.waker());
-                Poll::Pending
-            });
-
-            if do_pend {
-                inner.pend();
+                return Poll::Pending;
             }
 
-            res
+            let buf = unsafe { slice::from_raw_parts(p, n) };
+            Poll::Ready(Ok(buf))
         })
         .await
     }
 
-    fn inner_blocking_read(&self, buf: &mut [u8]) -> Result<usize, Error> {
-        loop {
-            let mut do_pend = false;
-            let mut inner = self.inner.borrow_mut();
-            let n = inner.with(|state| {
-                compiler_fence(Ordering::SeqCst);
+    fn consume(&self, amt: usize) {
+        let state = T::buffered_state();
+        let mut rx_reader = unsafe { state.rx_buf.reader() };
+        let full = state.rx_buf.is_full();
+        rx_reader.pop_done(amt);
+        if full {
+            unsafe { T::Interrupt::steal().pend() };
+        }
+    }
+}
 
-                // We have data ready in buffer? Return it.
-                let data = state.rx.pop_buf();
-                if !data.is_empty() {
-                    let len = data.len().min(buf.len());
-                    buf[..len].copy_from_slice(&data[..len]);
+impl<'d, T: BasicInstance> BufferedUartTx<'d, T> {
+    async fn write(&self, buf: &[u8]) -> Result<usize, Error> {
+        poll_fn(move |cx| {
+            let state = T::buffered_state();
+            let empty = state.tx_buf.is_empty();
 
-                    if state.rx.is_full() {
-                        do_pend = true;
-                    }
-                    state.rx.pop(len);
-
-                    return len;
-                }
-
-                0
-            });
-
-            if do_pend {
-                inner.pend();
+            let mut tx_writer = unsafe { state.tx_buf.writer() };
+            let data = tx_writer.push_slice();
+            if data.is_empty() {
+                state.tx_waker.register(cx.waker());
+                return Poll::Pending;
             }
 
-            if n > 0 {
+            let n = data.len().min(buf.len());
+            data[..n].copy_from_slice(&buf[..n]);
+            tx_writer.push_done(n);
+
+            if empty {
+                unsafe { T::Interrupt::steal() }.pend();
+            }
+
+            Poll::Ready(Ok(n))
+        })
+        .await
+    }
+
+    async fn flush(&self) -> Result<(), Error> {
+        poll_fn(move |cx| {
+            let state = T::buffered_state();
+            if !state.tx_buf.is_empty() {
+                state.tx_waker.register(cx.waker());
+                return Poll::Pending;
+            }
+
+            Poll::Ready(Ok(()))
+        })
+        .await
+    }
+
+    fn blocking_write(&self, buf: &[u8]) -> Result<usize, Error> {
+        loop {
+            let state = T::buffered_state();
+            let empty = state.tx_buf.is_empty();
+
+            let mut tx_writer = unsafe { state.tx_buf.writer() };
+            let data = tx_writer.push_slice();
+            if !data.is_empty() {
+                let n = data.len().min(buf.len());
+                data[..n].copy_from_slice(&buf[..n]);
+                tx_writer.push_done(n);
+
+                if empty {
+                    unsafe { T::Interrupt::steal() }.pend();
+                }
+
                 return Ok(n);
             }
         }
     }
 
-    async fn inner_write<'a>(&'a self, buf: &'a [u8]) -> Result<usize, Error> {
-        poll_fn(move |cx| {
-            let mut inner = self.inner.borrow_mut();
-            let (poll, empty) = inner.with(|state| {
-                let empty = state.tx.is_empty();
-                let tx_buf = state.tx.push_buf();
-                if tx_buf.is_empty() {
-                    state.tx_waker.register(cx.waker());
-                    return (Poll::Pending, empty);
-                }
-
-                let n = core::cmp::min(tx_buf.len(), buf.len());
-                tx_buf[..n].copy_from_slice(&buf[..n]);
-                state.tx.push(n);
-
-                (Poll::Ready(Ok(n)), empty)
-            });
-            if empty {
-                inner.pend();
-            }
-            poll
-        })
-        .await
-    }
-
-    async fn inner_flush<'a>(&'a self) -> Result<(), Error> {
-        poll_fn(move |cx| {
-            self.inner.borrow_mut().with(|state| {
-                if !state.tx.is_empty() {
-                    state.tx_waker.register(cx.waker());
-                    return Poll::Pending;
-                }
-
-                Poll::Ready(Ok(()))
-            })
-        })
-        .await
-    }
-
-    fn inner_blocking_write(&self, buf: &[u8]) -> Result<usize, Error> {
+    fn blocking_flush(&self) -> Result<(), Error> {
         loop {
-            let mut inner = self.inner.borrow_mut();
-            let (n, empty) = inner.with(|state| {
-                let empty = state.tx.is_empty();
-                let tx_buf = state.tx.push_buf();
-                if tx_buf.is_empty() {
-                    return (0, empty);
-                }
-
-                let n = core::cmp::min(tx_buf.len(), buf.len());
-                tx_buf[..n].copy_from_slice(&buf[..n]);
-                state.tx.push(n);
-
-                (n, empty)
-            });
-            if empty {
-                inner.pend();
-            }
-            if n != 0 {
-                return Ok(n);
-            }
-        }
-    }
-
-    fn inner_blocking_flush(&self) -> Result<(), Error> {
-        loop {
-            if !self.inner.borrow_mut().with(|state| state.tx.is_empty()) {
+            let state = T::buffered_state();
+            if state.tx_buf.is_empty() {
                 return Ok(());
             }
         }
     }
-
-    async fn inner_fill_buf<'a>(&'a self) -> Result<&'a [u8], Error> {
-        poll_fn(move |cx| {
-            self.inner.borrow_mut().with(|state| {
-                compiler_fence(Ordering::SeqCst);
-
-                // We have data ready in buffer? Return it.
-                let buf = state.rx.pop_buf();
-                if !buf.is_empty() {
-                    let buf: &[u8] = buf;
-                    // Safety: buffer lives as long as uart
-                    let buf: &[u8] = unsafe { core::mem::transmute(buf) };
-                    return Poll::Ready(Ok(buf));
-                }
-
-                state.rx_waker.register(cx.waker());
-                Poll::<Result<&[u8], Error>>::Pending
-            })
-        })
-        .await
-    }
-
-    fn inner_consume(&self, amt: usize) {
-        let mut inner = self.inner.borrow_mut();
-        let signal = inner.with(|state| {
-            let full = state.rx.is_full();
-            state.rx.pop(amt);
-            full
-        });
-        if signal {
-            inner.pend();
-        }
-    }
 }
 
-impl<'d, T: BasicInstance> StateInner<'d, T>
-where
-    Self: 'd,
-{
-    fn on_rx(&mut self) {
-        let r = T::regs();
+impl<'d, T: BasicInstance> Drop for BufferedUartRx<'d, T> {
+    fn drop(&mut self) {
+        let state = T::buffered_state();
         unsafe {
-            let sr = sr(r).read();
-            clear_interrupt_flags(r, sr);
+            state.rx_buf.deinit();
 
-            // This read also clears the error and idle interrupt flags on v1.
-            let b = rdr(r).read_volatile();
-
-            if sr.rxne() {
-                if sr.pe() {
-                    warn!("Parity error");
-                }
-                if sr.fe() {
-                    warn!("Framing error");
-                }
-                if sr.ne() {
-                    warn!("Noise error");
-                }
-                if sr.ore() {
-                    warn!("Overrun error");
-                }
-
-                let buf = self.rx.push_buf();
-                if !buf.is_empty() {
-                    buf[0] = b;
-                    self.rx.push(1);
-                } else {
-                    warn!("RX buffer full, discard received byte");
-                }
-
-                if self.rx.is_full() {
-                    self.rx_waker.wake();
-                }
-            }
-
-            if sr.idle() {
-                self.rx_waker.wake();
-            };
-        }
-    }
-
-    fn on_tx(&mut self) {
-        let r = T::regs();
-        unsafe {
-            if sr(r).read().txe() {
-                let buf = self.tx.pop_buf();
-                if !buf.is_empty() {
-                    r.cr1().modify(|w| {
-                        w.set_txeie(true);
-                    });
-                    tdr(r).write_volatile(buf[0].into());
-                    self.tx.pop(1);
-                    self.tx_waker.wake();
-                } else {
-                    // Disable interrupt until we have something to transmit again
-                    r.cr1().modify(|w| {
-                        w.set_txeie(false);
-                    });
-                }
+            // TX is inactive if the the buffer is not available.
+            // We can now unregister the interrupt handler
+            if state.tx_buf.len() == 0 {
+                T::Interrupt::steal().disable();
             }
         }
     }
 }
 
-impl<'d, T: BasicInstance> PeripheralState for StateInner<'d, T>
-where
-    Self: 'd,
-{
-    type Interrupt = T::Interrupt;
-    fn on_interrupt(&mut self) {
-        self.on_rx();
-        self.on_tx();
+impl<'d, T: BasicInstance> Drop for BufferedUartTx<'d, T> {
+    fn drop(&mut self) {
+        let state = T::buffered_state();
+        unsafe {
+            state.tx_buf.deinit();
+
+            // RX is inactive if the the buffer is not available.
+            // We can now unregister the interrupt handler
+            if state.rx_buf.len() == 0 {
+                T::Interrupt::steal().disable();
+            }
+        }
+    }
+}
+
+unsafe fn on_interrupt<T: BasicInstance>(_: *mut ()) {
+    let r = T::regs();
+    let state = T::buffered_state();
+
+    // RX
+    unsafe {
+        let sr = sr(r).read();
+        clear_interrupt_flags(r, sr);
+
+        if sr.rxne() {
+            if sr.pe() {
+                warn!("Parity error");
+            }
+            if sr.fe() {
+                warn!("Framing error");
+            }
+            if sr.ne() {
+                warn!("Noise error");
+            }
+            if sr.ore() {
+                warn!("Overrun error");
+            }
+
+            let mut rx_writer = state.rx_buf.writer();
+            let buf = rx_writer.push_slice();
+            if !buf.is_empty() {
+                // This read also clears the error and idle interrupt flags on v1.
+                buf[0] = rdr(r).read_volatile();
+                rx_writer.push_done(1);
+            } else {
+                // FIXME: Should we disable any further RX interrupts when the buffer becomes full.
+            }
+
+            if state.rx_buf.is_full() {
+                state.rx_waker.wake();
+            }
+        }
+
+        if sr.idle() {
+            state.rx_waker.wake();
+        };
+    }
+
+    // TX
+    unsafe {
+        if sr(r).read().txe() {
+            let mut tx_reader = state.tx_buf.reader();
+            let buf = tx_reader.pop_slice();
+            if !buf.is_empty() {
+                r.cr1().modify(|w| {
+                    w.set_txeie(true);
+                });
+                tdr(r).write_volatile(buf[0].into());
+                tx_reader.pop_done(1);
+                state.tx_waker.wake();
+            } else {
+                // Disable interrupt until we have something to transmit again
+                r.cr1().modify(|w| {
+                    w.set_txeie(false);
+                });
+            }
+        }
     }
 }
 
@@ -427,94 +411,284 @@ impl<'d, T: BasicInstance> embedded_io::Io for BufferedUart<'d, T> {
     type Error = Error;
 }
 
-impl<'u, 'd, T: BasicInstance> embedded_io::Io for BufferedUartRx<'u, 'd, T> {
+impl<'d, T: BasicInstance> embedded_io::Io for BufferedUartRx<'d, T> {
     type Error = Error;
 }
 
-impl<'u, 'd, T: BasicInstance> embedded_io::Io for BufferedUartTx<'u, 'd, T> {
+impl<'d, T: BasicInstance> embedded_io::Io for BufferedUartTx<'d, T> {
     type Error = Error;
 }
 
 impl<'d, T: BasicInstance> embedded_io::asynch::Read for BufferedUart<'d, T> {
     async fn read(&mut self, buf: &mut [u8]) -> Result<usize, Self::Error> {
-        self.inner_read(buf).await
+        self.rx.read(buf).await
     }
 }
 
-impl<'u, 'd, T: BasicInstance> embedded_io::asynch::Read for BufferedUartRx<'u, 'd, T> {
+impl<'d, T: BasicInstance> embedded_io::asynch::Read for BufferedUartRx<'d, T> {
     async fn read(&mut self, buf: &mut [u8]) -> Result<usize, Self::Error> {
-        self.inner.inner_read(buf).await
+        Self::read(self, buf).await
     }
 }
 
 impl<'d, T: BasicInstance> embedded_io::asynch::BufRead for BufferedUart<'d, T> {
     async fn fill_buf(&mut self) -> Result<&[u8], Self::Error> {
-        self.inner_fill_buf().await
+        self.rx.fill_buf().await
     }
 
     fn consume(&mut self, amt: usize) {
-        self.inner_consume(amt)
+        self.rx.consume(amt)
     }
 }
 
-impl<'u, 'd, T: BasicInstance> embedded_io::asynch::BufRead for BufferedUartRx<'u, 'd, T> {
+impl<'d, T: BasicInstance> embedded_io::asynch::BufRead for BufferedUartRx<'d, T> {
     async fn fill_buf(&mut self) -> Result<&[u8], Self::Error> {
-        self.inner.inner_fill_buf().await
+        Self::fill_buf(self).await
     }
 
     fn consume(&mut self, amt: usize) {
-        self.inner.inner_consume(amt)
+        Self::consume(self, amt)
     }
 }
 
 impl<'d, T: BasicInstance> embedded_io::asynch::Write for BufferedUart<'d, T> {
     async fn write(&mut self, buf: &[u8]) -> Result<usize, Self::Error> {
-        self.inner_write(buf).await
+        self.tx.write(buf).await
     }
 
     async fn flush(&mut self) -> Result<(), Self::Error> {
-        self.inner_flush().await
+        self.tx.flush().await
     }
 }
 
-impl<'u, 'd, T: BasicInstance> embedded_io::asynch::Write for BufferedUartTx<'u, 'd, T> {
+impl<'d, T: BasicInstance> embedded_io::asynch::Write for BufferedUartTx<'d, T> {
     async fn write(&mut self, buf: &[u8]) -> Result<usize, Self::Error> {
-        self.inner.inner_write(buf).await
+        Self::write(self, buf).await
     }
 
     async fn flush(&mut self) -> Result<(), Self::Error> {
-        self.inner.inner_flush().await
+        Self::flush(self).await
     }
 }
 
 impl<'d, T: BasicInstance> embedded_io::blocking::Read for BufferedUart<'d, T> {
     fn read(&mut self, buf: &mut [u8]) -> Result<usize, Self::Error> {
-        self.inner_blocking_read(buf)
+        self.rx.blocking_read(buf)
     }
 }
 
-impl<'u, 'd, T: BasicInstance> embedded_io::blocking::Read for BufferedUartRx<'u, 'd, T> {
+impl<'d, T: BasicInstance> embedded_io::blocking::Read for BufferedUartRx<'d, T> {
     fn read(&mut self, buf: &mut [u8]) -> Result<usize, Self::Error> {
-        self.inner.inner_blocking_read(buf)
+        self.blocking_read(buf)
     }
 }
 
 impl<'d, T: BasicInstance> embedded_io::blocking::Write for BufferedUart<'d, T> {
     fn write(&mut self, buf: &[u8]) -> Result<usize, Self::Error> {
-        self.inner_blocking_write(buf)
+        self.tx.blocking_write(buf)
     }
 
     fn flush(&mut self) -> Result<(), Self::Error> {
-        self.inner_blocking_flush()
+        self.tx.blocking_flush()
     }
 }
 
-impl<'u, 'd, T: BasicInstance> embedded_io::blocking::Write for BufferedUartTx<'u, 'd, T> {
+impl<'d, T: BasicInstance> embedded_io::blocking::Write for BufferedUartTx<'d, T> {
     fn write(&mut self, buf: &[u8]) -> Result<usize, Self::Error> {
-        self.inner.inner_blocking_write(buf)
+        Self::blocking_write(self, buf)
     }
 
     fn flush(&mut self) -> Result<(), Self::Error> {
-        self.inner.inner_blocking_flush()
+        Self::blocking_flush(self)
+    }
+}
+
+mod eh02 {
+    use super::*;
+
+    impl<'d, T: BasicInstance> embedded_hal_02::serial::Read<u8> for BufferedUartRx<'d, T> {
+        type Error = Error;
+
+        fn read(&mut self) -> Result<u8, nb::Error<Self::Error>> {
+            let r = T::regs();
+            unsafe {
+                let sr = sr(r).read();
+                if sr.pe() {
+                    rdr(r).read_volatile();
+                    Err(nb::Error::Other(Error::Parity))
+                } else if sr.fe() {
+                    rdr(r).read_volatile();
+                    Err(nb::Error::Other(Error::Framing))
+                } else if sr.ne() {
+                    rdr(r).read_volatile();
+                    Err(nb::Error::Other(Error::Noise))
+                } else if sr.ore() {
+                    rdr(r).read_volatile();
+                    Err(nb::Error::Other(Error::Overrun))
+                } else if sr.rxne() {
+                    Ok(rdr(r).read_volatile())
+                } else {
+                    Err(nb::Error::WouldBlock)
+                }
+            }
+        }
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_02::blocking::serial::Write<u8> for BufferedUartTx<'d, T> {
+        type Error = Error;
+
+        fn bwrite_all(&mut self, mut buffer: &[u8]) -> Result<(), Self::Error> {
+            while !buffer.is_empty() {
+                match self.blocking_write(buffer) {
+                    Ok(0) => panic!("zero-length write."),
+                    Ok(n) => buffer = &buffer[n..],
+                    Err(e) => return Err(e),
+                }
+            }
+            Ok(())
+        }
+
+        fn bflush(&mut self) -> Result<(), Self::Error> {
+            self.blocking_flush()
+        }
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_02::serial::Read<u8> for BufferedUart<'d, T> {
+        type Error = Error;
+
+        fn read(&mut self) -> Result<u8, nb::Error<Self::Error>> {
+            embedded_hal_02::serial::Read::read(&mut self.rx)
+        }
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_02::blocking::serial::Write<u8> for BufferedUart<'d, T> {
+        type Error = Error;
+
+        fn bwrite_all(&mut self, mut buffer: &[u8]) -> Result<(), Self::Error> {
+            while !buffer.is_empty() {
+                match self.tx.blocking_write(buffer) {
+                    Ok(0) => panic!("zero-length write."),
+                    Ok(n) => buffer = &buffer[n..],
+                    Err(e) => return Err(e),
+                }
+            }
+            Ok(())
+        }
+
+        fn bflush(&mut self) -> Result<(), Self::Error> {
+            self.tx.blocking_flush()
+        }
+    }
+}
+
+#[cfg(feature = "unstable-traits")]
+mod eh1 {
+    use super::*;
+
+    impl<'d, T: BasicInstance> embedded_hal_1::serial::ErrorType for BufferedUart<'d, T> {
+        type Error = Error;
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_1::serial::ErrorType for BufferedUartTx<'d, T> {
+        type Error = Error;
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_1::serial::ErrorType for BufferedUartRx<'d, T> {
+        type Error = Error;
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_nb::serial::Read for BufferedUartRx<'d, T> {
+        fn read(&mut self) -> nb::Result<u8, Self::Error> {
+            embedded_hal_02::serial::Read::read(self)
+        }
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_1::serial::Write for BufferedUartTx<'d, T> {
+        fn write(&mut self, buffer: &[u8]) -> Result<(), Self::Error> {
+            self.blocking_write(buffer).map(drop)
+        }
+
+        fn flush(&mut self) -> Result<(), Self::Error> {
+            self.blocking_flush()
+        }
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_nb::serial::Write for BufferedUartTx<'d, T> {
+        fn write(&mut self, char: u8) -> nb::Result<(), Self::Error> {
+            self.blocking_write(&[char]).map(drop).map_err(nb::Error::Other)
+        }
+
+        fn flush(&mut self) -> nb::Result<(), Self::Error> {
+            self.blocking_flush().map_err(nb::Error::Other)
+        }
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_nb::serial::Read for BufferedUart<'d, T> {
+        fn read(&mut self) -> Result<u8, nb::Error<Self::Error>> {
+            embedded_hal_02::serial::Read::read(&mut self.rx)
+        }
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_1::serial::Write for BufferedUart<'d, T> {
+        fn write(&mut self, buffer: &[u8]) -> Result<(), Self::Error> {
+            self.tx.blocking_write(buffer).map(drop)
+        }
+
+        fn flush(&mut self) -> Result<(), Self::Error> {
+            self.tx.blocking_flush()
+        }
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_nb::serial::Write for BufferedUart<'d, T> {
+        fn write(&mut self, char: u8) -> nb::Result<(), Self::Error> {
+            self.tx.blocking_write(&[char]).map(drop).map_err(nb::Error::Other)
+        }
+
+        fn flush(&mut self) -> nb::Result<(), Self::Error> {
+            self.tx.blocking_flush().map_err(nb::Error::Other)
+        }
+    }
+}
+
+#[cfg(all(
+    feature = "unstable-traits",
+    feature = "nightly",
+    feature = "_todo_embedded_hal_serial"
+))]
+mod eha {
+    use core::future::Future;
+
+    use super::*;
+
+    impl<'d, T: BasicInstance> embedded_hal_async::serial::Write for BufferedUartTx<'d, T> {
+        async fn write(&mut self, buf: &[u8]) -> Result<(), Self::Error> {
+            Self::write(buf)
+        }
+
+        async fn flush(&mut self) -> Result<(), Self::Error> {
+            Self::flush()
+        }
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_async::serial::Read for BufferedUartRx<'d, T> {
+        async fn read(&mut self, buf: &mut [u8]) -> Result<(), Self::Error> {
+            Self::read(buf)
+        }
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_async::serial::Write for BufferedUart<'d, T> {
+        async fn write(&mut self, buf: &[u8]) -> Result<(), Self::Error> {
+            self.tx.write(buf)
+        }
+
+        async fn flush(&mut self) -> Result<(), Self::Error> {
+            self.tx.flush()
+        }
+    }
+
+    impl<'d, T: BasicInstance> embedded_hal_async::serial::Read for BufferedUart<'d, T> {
+        async fn read(&mut self, buf: &mut [u8]) -> Result<(), Self::Error> {
+            self.rx.read(buf)
+        }
     }
 }
diff --git a/embassy-stm32/src/usart/mod.rs b/embassy-stm32/src/usart/mod.rs
index f80323e37..a42eede18 100644
--- a/embassy-stm32/src/usart/mod.rs
+++ b/embassy-stm32/src/usart/mod.rs
@@ -1112,6 +1112,9 @@ pub(crate) mod sealed {
 
         fn regs() -> Regs;
         fn state() -> &'static State;
+
+        #[cfg(feature = "nightly")]
+        fn buffered_state() -> &'static buffered::State;
     }
 
     pub trait FullInstance: BasicInstance {
@@ -1147,6 +1150,12 @@ macro_rules! impl_lpuart {
                 static STATE: crate::usart::sealed::State = crate::usart::sealed::State::new();
                 &STATE
             }
+
+            #[cfg(feature = "nightly")]
+            fn buffered_state() -> &'static buffered::State {
+                static STATE: buffered::State = buffered::State::new();
+                &STATE
+            }
         }
 
         impl BasicInstance for peripherals::$inst {}
diff --git a/embassy-stm32/src/usb/usb.rs b/embassy-stm32/src/usb/usb.rs
index 0355c5f14..ad68eaba2 100644
--- a/embassy-stm32/src/usb/usb.rs
+++ b/embassy-stm32/src/usb/usb.rs
@@ -12,22 +12,29 @@ use embassy_usb_driver as driver;
 use embassy_usb_driver::{
     Direction, EndpointAddress, EndpointAllocError, EndpointError, EndpointInfo, EndpointType, Event, Unsupported,
 };
-use pac::common::{Reg, RW};
-use pac::usb::vals::{EpType, Stat};
 
 use super::{DmPin, DpPin, Instance};
 use crate::gpio::sealed::AFType;
 use crate::interrupt::InterruptExt;
 use crate::pac::usb::regs;
+use crate::pac::usb::vals::{EpType, Stat};
+use crate::pac::USBRAM;
 use crate::rcc::sealed::RccPeripheral;
-use crate::{pac, Peripheral};
+use crate::Peripheral;
 
 const EP_COUNT: usize = 8;
 
-#[cfg(any(usb_v1_x1, usb_v1_x2))]
-const EP_MEMORY_SIZE: usize = 512;
-#[cfg(not(any(usb_v1_x1, usb_v1_x2)))]
-const EP_MEMORY_SIZE: usize = 1024;
+#[cfg(any(usbram_16x1_512, usbram_16x2_512))]
+const USBRAM_SIZE: usize = 512;
+#[cfg(usbram_16x2_1024)]
+const USBRAM_SIZE: usize = 1024;
+#[cfg(usbram_32_2048)]
+const USBRAM_SIZE: usize = 2048;
+
+#[cfg(not(usbram_32_2048))]
+const USBRAM_ALIGN: usize = 2;
+#[cfg(usbram_32_2048)]
+const USBRAM_ALIGN: usize = 4;
 
 const NEW_AW: AtomicWaker = AtomicWaker::new();
 static BUS_WAKER: AtomicWaker = NEW_AW;
@@ -57,25 +64,60 @@ fn invariant(mut r: regs::Epr) -> regs::Epr {
     r
 }
 
+fn align_len_up(len: u16) -> u16 {
+    ((len as usize + USBRAM_ALIGN - 1) / USBRAM_ALIGN * USBRAM_ALIGN) as u16
+}
+
 // Returns (actual_len, len_bits)
 fn calc_out_len(len: u16) -> (u16, u16) {
     match len {
-        2..=62 => ((len + 1) / 2 * 2, ((len + 1) / 2) << 10),
-        63..=480 => ((len + 31) / 32 * 32, (((len + 31) / 32 - 1) << 10) | 0x8000),
+        // NOTE: this could be 2..=62 with 16bit USBRAM, but not with 32bit. Limit it to 60 for simplicity.
+        2..=60 => (align_len_up(len), align_len_up(len) / 2 << 10),
+        61..=1024 => ((len + 31) / 32 * 32, (((len + 31) / 32 - 1) << 10) | 0x8000),
         _ => panic!("invalid OUT length {}", len),
     }
 }
-fn ep_in_addr<T: Instance>(index: usize) -> Reg<u16, RW> {
-    T::regs().ep_mem(index * 4 + 0)
+
+#[cfg(not(usbram_32_2048))]
+mod btable {
+    use super::*;
+
+    pub(super) unsafe fn write_in<T: Instance>(index: usize, addr: u16) {
+        USBRAM.mem(index * 4 + 0).write_value(addr);
+    }
+
+    pub(super) unsafe fn write_in_len<T: Instance>(index: usize, _addr: u16, len: u16) {
+        USBRAM.mem(index * 4 + 1).write_value(len);
+    }
+
+    pub(super) unsafe fn write_out<T: Instance>(index: usize, addr: u16, max_len_bits: u16) {
+        USBRAM.mem(index * 4 + 2).write_value(addr);
+        USBRAM.mem(index * 4 + 3).write_value(max_len_bits);
+    }
+
+    pub(super) unsafe fn read_out_len<T: Instance>(index: usize) -> u16 {
+        USBRAM.mem(index * 4 + 3).read()
+    }
 }
-fn ep_in_len<T: Instance>(index: usize) -> Reg<u16, RW> {
-    T::regs().ep_mem(index * 4 + 1)
-}
-fn ep_out_addr<T: Instance>(index: usize) -> Reg<u16, RW> {
-    T::regs().ep_mem(index * 4 + 2)
-}
-fn ep_out_len<T: Instance>(index: usize) -> Reg<u16, RW> {
-    T::regs().ep_mem(index * 4 + 3)
+#[cfg(usbram_32_2048)]
+mod btable {
+    use super::*;
+
+    pub(super) unsafe fn write_in<T: Instance>(_index: usize, _addr: u16) {}
+
+    pub(super) unsafe fn write_in_len<T: Instance>(index: usize, addr: u16, len: u16) {
+        USBRAM.mem(index * 2).write_value((addr as u32) | ((len as u32) << 16));
+    }
+
+    pub(super) unsafe fn write_out<T: Instance>(index: usize, addr: u16, max_len_bits: u16) {
+        USBRAM
+            .mem(index * 2 + 1)
+            .write_value((addr as u32) | ((max_len_bits as u32) << 16));
+    }
+
+    pub(super) unsafe fn read_out_len<T: Instance>(index: usize) -> u16 {
+        (USBRAM.mem(index * 2 + 1).read() >> 16) as u16
+    }
 }
 
 struct EndpointBuffer<T: Instance> {
@@ -87,23 +129,25 @@ struct EndpointBuffer<T: Instance> {
 impl<T: Instance> EndpointBuffer<T> {
     fn read(&mut self, buf: &mut [u8]) {
         assert!(buf.len() <= self.len as usize);
-        for i in 0..((buf.len() + 1) / 2) {
-            let val = unsafe { T::regs().ep_mem(self.addr as usize / 2 + i).read() };
-            buf[i * 2] = val as u8;
-            if i * 2 + 1 < buf.len() {
-                buf[i * 2 + 1] = (val >> 8) as u8;
-            }
+        for i in 0..(buf.len() + USBRAM_ALIGN - 1) / USBRAM_ALIGN {
+            let val = unsafe { USBRAM.mem(self.addr as usize / USBRAM_ALIGN + i).read() };
+            let n = USBRAM_ALIGN.min(buf.len() - i * USBRAM_ALIGN);
+            buf[i * USBRAM_ALIGN..][..n].copy_from_slice(&val.to_le_bytes()[..n]);
         }
     }
 
     fn write(&mut self, buf: &[u8]) {
         assert!(buf.len() <= self.len as usize);
-        for i in 0..((buf.len() + 1) / 2) {
-            let mut val = buf[i * 2] as u16;
-            if i * 2 + 1 < buf.len() {
-                val |= (buf[i * 2 + 1] as u16) << 8;
-            }
-            unsafe { T::regs().ep_mem(self.addr as usize / 2 + i).write_value(val) };
+        for i in 0..(buf.len() + USBRAM_ALIGN - 1) / USBRAM_ALIGN {
+            let mut val = [0u8; USBRAM_ALIGN];
+            let n = USBRAM_ALIGN.min(buf.len() - i * USBRAM_ALIGN);
+            val[..n].copy_from_slice(&buf[i * USBRAM_ALIGN..][..n]);
+
+            #[cfg(not(usbram_32_2048))]
+            let val = u16::from_le_bytes(val);
+            #[cfg(usbram_32_2048)]
+            let val = u32::from_le_bytes(val);
+            unsafe { USBRAM.mem(self.addr as usize / USBRAM_ALIGN + i).write_value(val) };
         }
     }
 }
@@ -139,8 +183,12 @@ impl<'d, T: Instance> Driver<'d, T> {
         #[cfg(stm32l5)]
         unsafe {
             crate::peripherals::PWR::enable();
+            crate::pac::PWR.cr2().modify(|w| w.set_usv(true));
+        }
 
-            pac::PWR.cr2().modify(|w| w.set_usv(true));
+        #[cfg(pwr_h5)]
+        unsafe {
+            crate::pac::PWR.usbscr().modify(|w| w.set_usb33sv(true))
         }
 
         unsafe {
@@ -256,8 +304,9 @@ impl<'d, T: Instance> Driver<'d, T> {
     }
 
     fn alloc_ep_mem(&mut self, len: u16) -> u16 {
+        assert!(len as usize % USBRAM_ALIGN == 0);
         let addr = self.ep_mem_free;
-        if addr + len > EP_MEMORY_SIZE as _ {
+        if addr + len > USBRAM_SIZE as _ {
             panic!("Endpoint memory full");
         }
         self.ep_mem_free += len;
@@ -306,10 +355,7 @@ impl<'d, T: Instance> Driver<'d, T> {
                 let addr = self.alloc_ep_mem(len);
 
                 trace!("  len_bits = {:04x}", len_bits);
-                unsafe {
-                    ep_out_addr::<T>(index).write_value(addr);
-                    ep_out_len::<T>(index).write_value(len_bits);
-                }
+                unsafe { btable::write_out::<T>(index, addr, len_bits) }
 
                 EndpointBuffer {
                     addr,
@@ -321,13 +367,11 @@ impl<'d, T: Instance> Driver<'d, T> {
                 assert!(!ep.used_in);
                 ep.used_in = true;
 
-                let len = (max_packet_size + 1) / 2 * 2;
+                let len = align_len_up(max_packet_size);
                 let addr = self.alloc_ep_mem(len);
 
-                unsafe {
-                    ep_in_addr::<T>(index).write_value(addr);
-                    // ep_in_len is written when actually TXing packets.
-                }
+                // ep_in_len is written when actually TXing packets.
+                unsafe { btable::write_in::<T>(index, addr) }
 
                 EndpointBuffer {
                     addr,
@@ -398,7 +442,7 @@ impl<'d, T: Instance> driver::Driver<'d> for Driver<'d, T> {
                 w.set_ctrm(true);
             });
 
-            #[cfg(usb_v3)]
+            #[cfg(any(usb_v3, usb_v4))]
             regs.bcdr().write(|w| w.set_dppu(true))
         }
 
@@ -633,12 +677,12 @@ impl<'d, T: Instance, D> Endpoint<'d, T, D> {
     fn write_data(&mut self, buf: &[u8]) {
         let index = self.info.addr.index();
         self.buf.write(buf);
-        unsafe { ep_in_len::<T>(index).write_value(buf.len() as _) };
+        unsafe { btable::write_in_len::<T>(index, self.buf.addr, buf.len() as _) }
     }
 
     fn read_data(&mut self, buf: &mut [u8]) -> Result<usize, EndpointError> {
         let index = self.info.addr.index();
-        let rx_len = unsafe { ep_out_len::<T>(index).read() as usize } & 0x3FF;
+        let rx_len = unsafe { btable::read_out_len::<T>(index) as usize } & 0x3FF;
         trace!("READ DONE, rx_len = {}", rx_len);
         if rx_len > buf.len() {
             return Err(EndpointError::BufferOverflow);
diff --git a/embassy-stm32/src/usb_otg/mod.rs b/embassy-stm32/src/usb_otg/mod.rs
index 84fef78cb..193e0df0d 100644
--- a/embassy-stm32/src/usb_otg/mod.rs
+++ b/embassy-stm32/src/usb_otg/mod.rs
@@ -89,6 +89,9 @@ foreach_interrupt!(
                 } else if #[cfg(stm32h7)] {
                     const FIFO_DEPTH_WORDS: u16 = 1024;
                     const ENDPOINT_COUNT: usize = 9;
+                } else if #[cfg(stm32u5)] {
+                    const FIFO_DEPTH_WORDS: u16 = 320;
+                    const ENDPOINT_COUNT: usize = 6;
                 } else {
                     compile_error!("USB_OTG_FS peripheral is not supported by this chip.");
                 }
@@ -137,6 +140,9 @@ foreach_interrupt!(
                 ))] {
                     const FIFO_DEPTH_WORDS: u16 = 1024;
                     const ENDPOINT_COUNT: usize = 9;
+                } else if #[cfg(stm32u5)] {
+                    const FIFO_DEPTH_WORDS: u16 = 1024;
+                    const ENDPOINT_COUNT: usize = 9;
                 } else {
                     compile_error!("USB_OTG_HS peripheral is not supported by this chip.");
                 }
diff --git a/embassy-sync/Cargo.toml b/embassy-sync/Cargo.toml
index 7b5d3ce48..e4871e718 100644
--- a/embassy-sync/Cargo.toml
+++ b/embassy-sync/Cargo.toml
@@ -25,6 +25,7 @@ features = ["nightly"]
 [features]
 nightly = ["embedded-io/async"]
 std = []
+turbowakers = []
 
 [dependencies]
 defmt = { version = "0.3", optional = true }
diff --git a/embassy-sync/src/waitqueue/atomic_waker.rs b/embassy-sync/src/waitqueue/atomic_waker.rs
new file mode 100644
index 000000000..63fe04a6e
--- /dev/null
+++ b/embassy-sync/src/waitqueue/atomic_waker.rs
@@ -0,0 +1,41 @@
+use core::cell::Cell;
+use core::task::Waker;
+
+use crate::blocking_mutex::raw::CriticalSectionRawMutex;
+use crate::blocking_mutex::Mutex;
+
+/// Utility struct to register and wake a waker.
+pub struct AtomicWaker {
+    waker: Mutex<CriticalSectionRawMutex, Cell<Option<Waker>>>,
+}
+
+impl AtomicWaker {
+    /// Create a new `AtomicWaker`.
+    pub const fn new() -> Self {
+        Self {
+            waker: Mutex::const_new(CriticalSectionRawMutex::new(), Cell::new(None)),
+        }
+    }
+
+    /// Register a waker. Overwrites the previous waker, if any.
+    pub fn register(&self, w: &Waker) {
+        critical_section::with(|cs| {
+            let cell = self.waker.borrow(cs);
+            cell.set(match cell.replace(None) {
+                Some(w2) if (w2.will_wake(w)) => Some(w2),
+                _ => Some(w.clone()),
+            })
+        })
+    }
+
+    /// Wake the registered waker, if any.
+    pub fn wake(&self) {
+        critical_section::with(|cs| {
+            let cell = self.waker.borrow(cs);
+            if let Some(w) = cell.replace(None) {
+                w.wake_by_ref();
+                cell.set(Some(w));
+            }
+        })
+    }
+}
diff --git a/embassy-sync/src/waitqueue/atomic_waker_turbo.rs b/embassy-sync/src/waitqueue/atomic_waker_turbo.rs
new file mode 100644
index 000000000..5c6a96ec8
--- /dev/null
+++ b/embassy-sync/src/waitqueue/atomic_waker_turbo.rs
@@ -0,0 +1,30 @@
+use core::ptr;
+use core::ptr::NonNull;
+use core::sync::atomic::{AtomicPtr, Ordering};
+use core::task::Waker;
+
+/// Utility struct to register and wake a waker.
+pub struct AtomicWaker {
+    waker: AtomicPtr<()>,
+}
+
+impl AtomicWaker {
+    /// Create a new `AtomicWaker`.
+    pub const fn new() -> Self {
+        Self {
+            waker: AtomicPtr::new(ptr::null_mut()),
+        }
+    }
+
+    /// Register a waker. Overwrites the previous waker, if any.
+    pub fn register(&self, w: &Waker) {
+        self.waker.store(w.as_turbo_ptr().as_ptr() as _, Ordering::Release);
+    }
+
+    /// Wake the registered waker, if any.
+    pub fn wake(&self) {
+        if let Some(ptr) = NonNull::new(self.waker.load(Ordering::Acquire)) {
+            unsafe { Waker::from_turbo_ptr(ptr) }.wake();
+        }
+    }
+}
diff --git a/embassy-sync/src/waitqueue/mod.rs b/embassy-sync/src/waitqueue/mod.rs
index 6661a6b61..6b0b0c64e 100644
--- a/embassy-sync/src/waitqueue/mod.rs
+++ b/embassy-sync/src/waitqueue/mod.rs
@@ -1,7 +1,11 @@
 //! Async low-level wait queues
 
-mod waker;
-pub use waker::*;
+#[cfg_attr(feature = "turbowakers", path = "atomic_waker_turbo.rs")]
+mod atomic_waker;
+pub use atomic_waker::*;
+
+mod waker_registration;
+pub use waker_registration::*;
 
 mod multi_waker;
 pub use multi_waker::*;
diff --git a/embassy-sync/src/waitqueue/waker.rs b/embassy-sync/src/waitqueue/waker_registration.rs
similarity index 63%
rename from embassy-sync/src/waitqueue/waker.rs
rename to embassy-sync/src/waitqueue/waker_registration.rs
index 9ce94a089..9b666e7c4 100644
--- a/embassy-sync/src/waitqueue/waker.rs
+++ b/embassy-sync/src/waitqueue/waker_registration.rs
@@ -1,10 +1,6 @@
-use core::cell::Cell;
 use core::mem;
 use core::task::Waker;
 
-use crate::blocking_mutex::raw::CriticalSectionRawMutex;
-use crate::blocking_mutex::Mutex;
-
 /// Utility struct to register and wake a waker.
 #[derive(Debug, Default)]
 pub struct WakerRegistration {
@@ -54,39 +50,3 @@ impl WakerRegistration {
         self.waker.is_some()
     }
 }
-
-/// Utility struct to register and wake a waker.
-pub struct AtomicWaker {
-    waker: Mutex<CriticalSectionRawMutex, Cell<Option<Waker>>>,
-}
-
-impl AtomicWaker {
-    /// Create a new `AtomicWaker`.
-    pub const fn new() -> Self {
-        Self {
-            waker: Mutex::const_new(CriticalSectionRawMutex::new(), Cell::new(None)),
-        }
-    }
-
-    /// Register a waker. Overwrites the previous waker, if any.
-    pub fn register(&self, w: &Waker) {
-        critical_section::with(|cs| {
-            let cell = self.waker.borrow(cs);
-            cell.set(match cell.replace(None) {
-                Some(w2) if (w2.will_wake(w)) => Some(w2),
-                _ => Some(w.clone()),
-            })
-        })
-    }
-
-    /// Wake the registered waker, if any.
-    pub fn wake(&self) {
-        critical_section::with(|cs| {
-            let cell = self.waker.borrow(cs);
-            if let Some(w) = cell.replace(None) {
-                w.wake_by_ref();
-                cell.set(Some(w));
-            }
-        })
-    }
-}
diff --git a/embassy-time/Cargo.toml b/embassy-time/Cargo.toml
index 5b14814a1..38d31f1c4 100644
--- a/embassy-time/Cargo.toml
+++ b/embassy-time/Cargo.toml
@@ -152,8 +152,8 @@ defmt = { version = "0.3", optional = true }
 log = { version = "0.4.14", optional = true }
 
 embedded-hal-02 = { package = "embedded-hal", version = "0.2.6" }
-embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9", optional = true}
-embedded-hal-async = { version = "=0.2.0-alpha.0", optional = true}
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10", optional = true}
+embedded-hal-async = { version = "=0.2.0-alpha.1", optional = true}
 
 futures-util = { version = "0.3.17", default-features = false }
 embassy-sync = { version = "0.1", path = "../embassy-sync" }
diff --git a/embassy-time/src/delay.rs b/embassy-time/src/delay.rs
index 0ca176abd..cf1918724 100644
--- a/embassy-time/src/delay.rs
+++ b/embassy-time/src/delay.rs
@@ -19,14 +19,12 @@ mod eh1 {
     use super::*;
 
     impl embedded_hal_1::delay::DelayUs for Delay {
-        type Error = core::convert::Infallible;
-
-        fn delay_us(&mut self, us: u32) -> Result<(), Self::Error> {
-            Ok(block_for(Duration::from_micros(us as u64)))
+        fn delay_us(&mut self, us: u32) {
+            block_for(Duration::from_micros(us as u64))
         }
 
-        fn delay_ms(&mut self, ms: u32) -> Result<(), Self::Error> {
-            Ok(block_for(Duration::from_millis(ms as u64)))
+        fn delay_ms(&mut self, ms: u32) {
+            block_for(Duration::from_millis(ms as u64))
         }
     }
 }
@@ -37,14 +35,12 @@ mod eha {
     use crate::Timer;
 
     impl embedded_hal_async::delay::DelayUs for Delay {
-        type Error = core::convert::Infallible;
-
-        async fn delay_us(&mut self, micros: u32) -> Result<(), Self::Error> {
-            Ok(Timer::after(Duration::from_micros(micros as _)).await)
+        async fn delay_us(&mut self, micros: u32) {
+            Timer::after(Duration::from_micros(micros as _)).await
         }
 
-        async fn delay_ms(&mut self, millis: u32) -> Result<(), Self::Error> {
-            Ok(Timer::after(Duration::from_millis(millis as _)).await)
+        async fn delay_ms(&mut self, millis: u32) {
+            Timer::after(Duration::from_millis(millis as _)).await
         }
     }
 }
diff --git a/examples/boot/application/nrf/Cargo.toml b/examples/boot/application/nrf/Cargo.toml
index 888993255..e75c73cbd 100644
--- a/examples/boot/application/nrf/Cargo.toml
+++ b/examples/boot/application/nrf/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../../../embassy-sync" }
-embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "nightly", "integrated-timers", "arch-cortex-m", "executor-thread"] }
 embassy-time = { version = "0.1.0", path = "../../../../embassy-time", features = ["nightly"] }
 embassy-nrf = { version = "0.1.0", path = "../../../../embassy-nrf", features = ["time-driver-rtc1", "gpiote", "nightly"] }
 embassy-boot = { version = "0.1.0", path = "../../../../embassy-boot/boot" }
diff --git a/examples/boot/application/rp/Cargo.toml b/examples/boot/application/rp/Cargo.toml
index 8d826790b..8de2d2ebd 100644
--- a/examples/boot/application/rp/Cargo.toml
+++ b/examples/boot/application/rp/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../../../embassy-sync" }
-embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "nightly", "integrated-timers", "arch-cortex-m", "executor-thread"] }
 embassy-time = { version = "0.1.0", path = "../../../../embassy-time", features = ["nightly"] }
 embassy-rp = { version = "0.1.0", path = "../../../../embassy-rp", features = ["time-driver", "unstable-traits", "nightly"] }
 embassy-boot-rp = { version = "0.1.0", path = "../../../../embassy-boot/rp" }
diff --git a/examples/boot/application/stm32f3/Cargo.toml b/examples/boot/application/stm32f3/Cargo.toml
index aa279fb76..083607de5 100644
--- a/examples/boot/application/stm32f3/Cargo.toml
+++ b/examples/boot/application/stm32f3/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "nightly", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../../../embassy-time", features = ["nightly", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../../../embassy-stm32", features = ["unstable-traits", "nightly", "stm32f303re", "time-driver-any", "exti"]  }
 embassy-boot-stm32 = { version = "0.1.0", path = "../../../../embassy-boot/stm32" }
diff --git a/examples/boot/application/stm32f7/Cargo.toml b/examples/boot/application/stm32f7/Cargo.toml
index 1ec0643a6..74f508515 100644
--- a/examples/boot/application/stm32f7/Cargo.toml
+++ b/examples/boot/application/stm32f7/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "nightly", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../../../embassy-time", features = ["nightly", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../../../embassy-stm32", features = ["unstable-traits", "nightly", "stm32f767zi", "time-driver-any", "exti"]  }
 embassy-boot-stm32 = { version = "0.1.0", path = "../../../../embassy-boot/stm32" }
diff --git a/examples/boot/application/stm32h7/Cargo.toml b/examples/boot/application/stm32h7/Cargo.toml
index a4eefe2a5..898b9a47e 100644
--- a/examples/boot/application/stm32h7/Cargo.toml
+++ b/examples/boot/application/stm32h7/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../../../embassy-sync" }
-embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "nightly", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../../../embassy-time", features = ["nightly", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../../../embassy-stm32", features = ["unstable-traits", "nightly", "stm32h743zi", "time-driver-any", "exti"]  }
 embassy-boot-stm32 = { version = "0.1.0", path = "../../../../embassy-boot/stm32" }
diff --git a/examples/boot/application/stm32l0/Cargo.toml b/examples/boot/application/stm32l0/Cargo.toml
index 36eada29b..e142c8481 100644
--- a/examples/boot/application/stm32l0/Cargo.toml
+++ b/examples/boot/application/stm32l0/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "nightly", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../../../embassy-time", features = ["nightly", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../../../embassy-stm32", features = ["unstable-traits", "nightly", "stm32l072cz", "time-driver-any", "exti", "memory-x"]  }
 embassy-boot-stm32 = { version = "0.1.0", path = "../../../../embassy-boot/stm32" }
diff --git a/examples/boot/application/stm32l1/Cargo.toml b/examples/boot/application/stm32l1/Cargo.toml
index 67efda748..f0e92e1ac 100644
--- a/examples/boot/application/stm32l1/Cargo.toml
+++ b/examples/boot/application/stm32l1/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "nightly", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../../../embassy-time", features = ["nightly", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../../../embassy-stm32", features = ["unstable-traits", "nightly", "stm32l151cb-a", "time-driver-any", "exti"]  }
 embassy-boot-stm32 = { version = "0.1.0", path = "../../../../embassy-boot/stm32" }
diff --git a/examples/boot/application/stm32l4/Cargo.toml b/examples/boot/application/stm32l4/Cargo.toml
index 4b2e02dd2..87689e9a9 100644
--- a/examples/boot/application/stm32l4/Cargo.toml
+++ b/examples/boot/application/stm32l4/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "nightly", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../../../embassy-time", features = ["nightly", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../../../embassy-stm32", features = ["unstable-traits", "nightly", "stm32l475vg", "time-driver-any", "exti"]  }
 embassy-boot-stm32 = { version = "0.1.0", path = "../../../../embassy-boot/stm32" }
diff --git a/examples/boot/application/stm32wl/Cargo.toml b/examples/boot/application/stm32wl/Cargo.toml
index fecbfc51d..a6708bf51 100644
--- a/examples/boot/application/stm32wl/Cargo.toml
+++ b/examples/boot/application/stm32wl/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "nightly", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../../../embassy-time", features = ["nightly", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../../../embassy-stm32", features = ["unstable-traits", "nightly", "stm32wl55jc-cm4", "time-driver-any", "exti"]  }
 embassy-boot-stm32 = { version = "0.1.0", path = "../../../../embassy-boot/stm32" }
diff --git a/examples/boot/bootloader/nrf/src/main.rs b/examples/boot/bootloader/nrf/src/main.rs
index aca3b857a..8818a23b8 100644
--- a/examples/boot/bootloader/nrf/src/main.rs
+++ b/examples/boot/bootloader/nrf/src/main.rs
@@ -27,9 +27,11 @@ fn main() -> ! {
     wdt_config.run_during_sleep = true;
     wdt_config.run_during_debug_halt = false;
 
-    let start = bl.prepare(&mut SingleFlashConfig::new(&mut BootFlash::<_, 4096>::new(
-        WatchdogFlash::start(Nvmc::new(p.NVMC), p.WDT, wdt_config),
-    )));
+    let start = bl.prepare(&mut SingleFlashConfig::new(&mut BootFlash::new(WatchdogFlash::start(
+        Nvmc::new(p.NVMC),
+        p.WDT,
+        wdt_config,
+    ))));
     unsafe { bl.load(start) }
 }
 
diff --git a/examples/boot/bootloader/rp/src/main.rs b/examples/boot/bootloader/rp/src/main.rs
index fb7f0522b..8129591fa 100644
--- a/examples/boot/bootloader/rp/src/main.rs
+++ b/examples/boot/bootloader/rp/src/main.rs
@@ -5,7 +5,6 @@ use cortex_m_rt::{entry, exception};
 #[cfg(feature = "defmt")]
 use defmt_rtt as _;
 use embassy_boot_rp::*;
-use embassy_rp::flash::ERASE_SIZE;
 use embassy_time::Duration;
 
 const FLASH_SIZE: usize = 2 * 1024 * 1024;
@@ -24,7 +23,7 @@ fn main() -> ! {
 
     let mut bl: BootLoader = BootLoader::default();
     let flash = WatchdogFlash::<FLASH_SIZE>::start(p.FLASH, p.WATCHDOG, Duration::from_secs(8));
-    let mut flash = BootFlash::<_, ERASE_SIZE>::new(flash);
+    let mut flash = BootFlash::new(flash);
     let start = bl.prepare(&mut SingleFlashConfig::new(&mut flash));
     core::mem::drop(flash);
 
diff --git a/examples/boot/bootloader/stm32/src/main.rs b/examples/boot/bootloader/stm32/src/main.rs
index 4b17cd799..49c21920b 100644
--- a/examples/boot/bootloader/stm32/src/main.rs
+++ b/examples/boot/bootloader/stm32/src/main.rs
@@ -5,7 +5,7 @@ use cortex_m_rt::{entry, exception};
 #[cfg(feature = "defmt")]
 use defmt_rtt as _;
 use embassy_boot_stm32::*;
-use embassy_stm32::flash::{Flash, ERASE_SIZE, ERASE_VALUE, WRITE_SIZE};
+use embassy_stm32::flash::Flash;
 
 #[entry]
 fn main() -> ! {
@@ -19,9 +19,10 @@ fn main() -> ! {
         }
     */
 
-    let mut bl: BootLoader<ERASE_SIZE, WRITE_SIZE> = BootLoader::default();
+    let mut bl: BootLoader<2048> = BootLoader::default();
     let flash = Flash::new(p.FLASH);
-    let mut flash = BootFlash::<_, ERASE_SIZE, ERASE_VALUE>::new(flash);
+    let layout = flash.into_regions();
+    let mut flash = BootFlash::new(layout.bank1_region);
     let start = bl.prepare(&mut SingleFlashConfig::new(&mut flash));
     core::mem::drop(flash);
     unsafe { bl.load(start) }
diff --git a/examples/nrf-rtos-trace/Cargo.toml b/examples/nrf-rtos-trace/Cargo.toml
index d8c24dfad..7910b372a 100644
--- a/examples/nrf-rtos-trace/Cargo.toml
+++ b/examples/nrf-rtos-trace/Cargo.toml
@@ -17,7 +17,7 @@ log = [
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync" }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features=["rtos-trace", "rtos-trace-interrupt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "rtos-trace", "rtos-trace-interrupt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time" }
 embassy-nrf = { version = "0.1.0", path = "../../embassy-nrf", features = ["nrf52840", "time-driver-rtc1", "gpiote", "unstable-pac"] }
 
diff --git a/examples/nrf52840/Cargo.toml b/examples/nrf52840/Cargo.toml
index cc88d92c7..3ece24066 100644
--- a/examples/nrf52840/Cargo.toml
+++ b/examples/nrf52840/Cargo.toml
@@ -12,7 +12,7 @@ nightly = ["embassy-executor/nightly", "embassy-nrf/nightly", "embassy-net/night
 [dependencies]
 embassy-futures = { version = "0.1.0", path = "../../embassy-futures" }
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "executor-interrupt", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime"] }
 embassy-nrf = { version = "0.1.0", path = "../../embassy-nrf", features = ["defmt", "nrf52840", "time-driver-rtc1", "gpiote", "unstable-pac", "time"] }
 embassy-net = { version = "0.1.0", path = "../../embassy-net", features = ["defmt", "tcp", "dhcpv4", "medium-ethernet"], optional = true }
diff --git a/examples/nrf5340/Cargo.toml b/examples/nrf5340/Cargo.toml
index e88ddf2f7..4134db46f 100644
--- a/examples/nrf5340/Cargo.toml
+++ b/examples/nrf5340/Cargo.toml
@@ -9,7 +9,7 @@ embassy-futures = { version = "0.1.0", path = "../../embassy-futures" }
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = [
     "defmt",
 ] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = [
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", 
     "nightly",
     "defmt",
     "integrated-timers",
diff --git a/examples/rp/.cargo/config.toml b/examples/rp/.cargo/config.toml
index d1c8c1c5a..2ee6fcb00 100644
--- a/examples/rp/.cargo/config.toml
+++ b/examples/rp/.cargo/config.toml
@@ -1,5 +1,5 @@
 [target.'cfg(all(target_arch = "arm", target_os = "none"))']
-runner = "probe-run --chip RP2040"
+runner = "probe-rs-cli run --chip RP2040"
 
 [build]
 target = "thumbv6m-none-eabi"        # Cortex-M0 and Cortex-M0+
diff --git a/examples/rp/Cargo.toml b/examples/rp/Cargo.toml
index 1e8870ed7..63d0ac82a 100644
--- a/examples/rp/Cargo.toml
+++ b/examples/rp/Cargo.toml
@@ -6,8 +6,9 @@ license = "MIT OR Apache-2.0"
 
 
 [dependencies]
+embassy-embedded-hal = { version = "0.1.0", path = "../../embassy-embedded-hal", features = ["defmt"] }
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime"] }
 embassy-rp = { version = "0.1.0", path = "../../embassy-rp", features = ["defmt", "unstable-traits", "nightly", "unstable-pac", "time-driver", "pio", "critical-section-impl"] }
 embassy-usb = { version = "0.1.0", path = "../../embassy-usb", features = ["defmt"] }
@@ -30,8 +31,8 @@ display-interface = "0.4.1"
 byte-slice-cast = { version = "1.2.0", default-features = false }
 smart-leds = "0.3.0"
 
-embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9" }
-embedded-hal-async = "0.2.0-alpha.0"
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10" }
+embedded-hal-async = "0.2.0-alpha.1"
 embedded-io = { version = "0.4.0", features = ["async", "defmt"] }
 embedded-storage = { version = "0.3" }
 static_cell = "1.0.0"
diff --git a/examples/rp/src/bin/spi_display.rs b/examples/rp/src/bin/spi_display.rs
index 778cad3fa..85a19ce07 100644
--- a/examples/rp/src/bin/spi_display.rs
+++ b/examples/rp/src/bin/spi_display.rs
@@ -5,10 +5,13 @@
 use core::cell::RefCell;
 
 use defmt::*;
+use embassy_embedded_hal::shared_bus::blocking::spi::SpiDeviceWithConfig;
 use embassy_executor::Spawner;
 use embassy_rp::gpio::{Level, Output};
 use embassy_rp::spi;
 use embassy_rp::spi::{Blocking, Spi};
+use embassy_sync::blocking_mutex::raw::NoopRawMutex;
+use embassy_sync::blocking_mutex::Mutex;
 use embassy_time::Delay;
 use embedded_graphics::image::{Image, ImageRawLE};
 use embedded_graphics::mono_font::ascii::FONT_10X20;
@@ -21,10 +24,9 @@ use st7789::{Orientation, ST7789};
 use {defmt_rtt as _, panic_probe as _};
 
 use crate::my_display_interface::SPIDeviceInterface;
-use crate::shared_spi::SpiDeviceWithCs;
 use crate::touch::Touch;
 
-//const DISPLAY_FREQ: u32 = 64_000_000;
+const DISPLAY_FREQ: u32 = 64_000_000;
 const TOUCH_FREQ: u32 = 200_000;
 
 #[embassy_executor::main]
@@ -43,16 +45,20 @@ async fn main(_spawner: Spawner) {
     //let touch_irq = p.PIN_17;
 
     // create SPI
-    let mut config = spi::Config::default();
-    config.frequency = TOUCH_FREQ; // use the lowest freq
-    config.phase = spi::Phase::CaptureOnSecondTransition;
-    config.polarity = spi::Polarity::IdleHigh;
+    let mut display_config = spi::Config::default();
+    display_config.frequency = DISPLAY_FREQ;
+    display_config.phase = spi::Phase::CaptureOnSecondTransition;
+    display_config.polarity = spi::Polarity::IdleHigh;
+    let mut touch_config = spi::Config::default();
+    touch_config.frequency = TOUCH_FREQ;
+    touch_config.phase = spi::Phase::CaptureOnSecondTransition;
+    touch_config.polarity = spi::Polarity::IdleHigh;
 
-    let spi: Spi<'_, _, Blocking> = Spi::new_blocking(p.SPI1, clk, mosi, miso, config);
-    let spi_bus = RefCell::new(spi);
+    let spi: Spi<'_, _, Blocking> = Spi::new_blocking(p.SPI1, clk, mosi, miso, touch_config.clone());
+    let spi_bus: Mutex<NoopRawMutex, _> = Mutex::new(RefCell::new(spi));
 
-    let display_spi = SpiDeviceWithCs::new(&spi_bus, Output::new(display_cs, Level::High));
-    let touch_spi = SpiDeviceWithCs::new(&spi_bus, Output::new(touch_cs, Level::High));
+    let display_spi = SpiDeviceWithConfig::new(&spi_bus, Output::new(display_cs, Level::High), display_config);
+    let touch_spi = SpiDeviceWithConfig::new(&spi_bus, Output::new(touch_cs, Level::High), touch_config);
 
     let mut touch = Touch::new(touch_spi);
 
@@ -104,85 +110,9 @@ async fn main(_spawner: Spawner) {
     }
 }
 
-mod shared_spi {
-    use core::cell::RefCell;
-    use core::fmt::Debug;
-
-    use embedded_hal_1::digital::OutputPin;
-    use embedded_hal_1::spi;
-    use embedded_hal_1::spi::SpiDevice;
-
-    #[derive(Copy, Clone, Eq, PartialEq, Debug)]
-    pub enum SpiDeviceWithCsError<BUS, CS> {
-        #[allow(unused)] // will probably use in the future when adding a flush() to SpiBus
-        Spi(BUS),
-        Cs(CS),
-    }
-
-    impl<BUS, CS> spi::Error for SpiDeviceWithCsError<BUS, CS>
-    where
-        BUS: spi::Error + Debug,
-        CS: Debug,
-    {
-        fn kind(&self) -> spi::ErrorKind {
-            match self {
-                Self::Spi(e) => e.kind(),
-                Self::Cs(_) => spi::ErrorKind::Other,
-            }
-        }
-    }
-
-    pub struct SpiDeviceWithCs<'a, BUS, CS> {
-        bus: &'a RefCell<BUS>,
-        cs: CS,
-    }
-
-    impl<'a, BUS, CS> SpiDeviceWithCs<'a, BUS, CS> {
-        pub fn new(bus: &'a RefCell<BUS>, cs: CS) -> Self {
-            Self { bus, cs }
-        }
-    }
-
-    impl<'a, BUS, CS> spi::ErrorType for SpiDeviceWithCs<'a, BUS, CS>
-    where
-        BUS: spi::ErrorType,
-        CS: OutputPin,
-    {
-        type Error = SpiDeviceWithCsError<BUS::Error, CS::Error>;
-    }
-
-    impl<'a, BUS, CS> SpiDevice for SpiDeviceWithCs<'a, BUS, CS>
-    where
-        BUS: spi::SpiBusFlush,
-        CS: OutputPin,
-    {
-        type Bus = BUS;
-
-        fn transaction<R>(
-            &mut self,
-            f: impl FnOnce(&mut Self::Bus) -> Result<R, BUS::Error>,
-        ) -> Result<R, Self::Error> {
-            let mut bus = self.bus.borrow_mut();
-            self.cs.set_low().map_err(SpiDeviceWithCsError::Cs)?;
-
-            let f_res = f(&mut bus);
-
-            // On failure, it's important to still flush and deassert CS.
-            let flush_res = bus.flush();
-            let cs_res = self.cs.set_high();
-
-            let f_res = f_res.map_err(SpiDeviceWithCsError::Spi)?;
-            flush_res.map_err(SpiDeviceWithCsError::Spi)?;
-            cs_res.map_err(SpiDeviceWithCsError::Cs)?;
-
-            Ok(f_res)
-        }
-    }
-}
-
 /// Driver for the XPT2046 resistive touchscreen sensor
 mod touch {
-    use embedded_hal_1::spi::{SpiBus, SpiBusRead, SpiBusWrite, SpiDevice};
+    use embedded_hal_1::spi::{Operation, SpiDevice};
 
     struct Calibration {
         x1: i32,
@@ -209,7 +139,6 @@ mod touch {
     impl<SPI> Touch<SPI>
     where
         SPI: SpiDevice,
-        SPI::Bus: SpiBus,
     {
         pub fn new(spi: SPI) -> Self {
             Self { spi }
@@ -219,13 +148,12 @@ mod touch {
             let mut x = [0; 2];
             let mut y = [0; 2];
             self.spi
-                .transaction(|bus| {
-                    bus.write(&[0x90])?;
-                    bus.read(&mut x)?;
-                    bus.write(&[0xd0])?;
-                    bus.read(&mut y)?;
-                    Ok(())
-                })
+                .transaction(&mut [
+                    Operation::Write(&[0x90]),
+                    Operation::Read(&mut x),
+                    Operation::Write(&[0xd0]),
+                    Operation::Read(&mut y),
+                ])
                 .unwrap();
 
             let x = (u16::from_be_bytes(x) >> 3) as i32;
@@ -247,7 +175,7 @@ mod touch {
 mod my_display_interface {
     use display_interface::{DataFormat, DisplayError, WriteOnlyDataCommand};
     use embedded_hal_1::digital::OutputPin;
-    use embedded_hal_1::spi::{SpiBusWrite, SpiDevice};
+    use embedded_hal_1::spi::SpiDeviceWrite;
 
     /// SPI display interface.
     ///
@@ -259,8 +187,7 @@ mod my_display_interface {
 
     impl<SPI, DC> SPIDeviceInterface<SPI, DC>
     where
-        SPI: SpiDevice,
-        SPI::Bus: SpiBusWrite,
+        SPI: SpiDeviceWrite,
         DC: OutputPin,
     {
         /// Create new SPI interface for communciation with a display driver
@@ -271,42 +198,27 @@ mod my_display_interface {
 
     impl<SPI, DC> WriteOnlyDataCommand for SPIDeviceInterface<SPI, DC>
     where
-        SPI: SpiDevice,
-        SPI::Bus: SpiBusWrite,
+        SPI: SpiDeviceWrite,
         DC: OutputPin,
     {
         fn send_commands(&mut self, cmds: DataFormat<'_>) -> Result<(), DisplayError> {
-            let r = self.spi.transaction(|bus| {
-                // 1 = data, 0 = command
-                if let Err(_) = self.dc.set_low() {
-                    return Ok(Err(DisplayError::DCError));
-                }
+            // 1 = data, 0 = command
+            self.dc.set_low().map_err(|_| DisplayError::DCError)?;
 
-                // Send words over SPI
-                send_u8(bus, cmds)?;
-
-                Ok(Ok(()))
-            });
-            r.map_err(|_| DisplayError::BusWriteError)?
+            send_u8(&mut self.spi, cmds).map_err(|_| DisplayError::BusWriteError)?;
+            Ok(())
         }
 
         fn send_data(&mut self, buf: DataFormat<'_>) -> Result<(), DisplayError> {
-            let r = self.spi.transaction(|bus| {
-                // 1 = data, 0 = command
-                if let Err(_) = self.dc.set_high() {
-                    return Ok(Err(DisplayError::DCError));
-                }
+            // 1 = data, 0 = command
+            self.dc.set_high().map_err(|_| DisplayError::DCError)?;
 
-                // Send words over SPI
-                send_u8(bus, buf)?;
-
-                Ok(Ok(()))
-            });
-            r.map_err(|_| DisplayError::BusWriteError)?
+            send_u8(&mut self.spi, buf).map_err(|_| DisplayError::BusWriteError)?;
+            Ok(())
         }
     }
 
-    fn send_u8<T: SpiBusWrite>(spi: &mut T, words: DataFormat<'_>) -> Result<(), T::Error> {
+    fn send_u8<T: SpiDeviceWrite>(spi: &mut T, words: DataFormat<'_>) -> Result<(), T::Error> {
         match words {
             DataFormat::U8(slice) => spi.write(slice),
             DataFormat::U16(slice) => {
diff --git a/examples/std/Cargo.toml b/examples/std/Cargo.toml
index 8087df09a..ff08e378c 100644
--- a/examples/std/Cargo.toml
+++ b/examples/std/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["log"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["log", "std", "nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-std", "executor-thread", "log", "nightly", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["log", "std", "nightly"] }
 embassy-net = { version = "0.1.0", path = "../../embassy-net", features=[ "std", "nightly", "log", "medium-ethernet", "tcp", "udp", "dns", "dhcpv4", "unstable-traits", "proto-ipv6"] }
 embassy-net-driver = { version = "0.1.0", path = "../../embassy-net-driver" }
diff --git a/examples/stm32c0/Cargo.toml b/examples/stm32c0/Cargo.toml
index 0095a680c..3b1d888f6 100644
--- a/examples/stm32c0/Cargo.toml
+++ b/examples/stm32c0/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "time-driver-any", "stm32c031c6", "memory-x", "unstable-pac", "exti"]  }
 
diff --git a/examples/stm32f0/Cargo.toml b/examples/stm32f0/Cargo.toml
index 89d99b6d3..5c82c5579 100644
--- a/examples/stm32f0/Cargo.toml
+++ b/examples/stm32f0/Cargo.toml
@@ -13,7 +13,7 @@ defmt = "0.3"
 defmt-rtt = "0.4"
 panic-probe = "0.3"
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "executor-interrupt", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "memory-x", "stm32f091rc", "time-driver-any", "exti", "unstable-pac"] }
 static_cell = "1.0"
diff --git a/examples/stm32f0/src/bin/adc.rs b/examples/stm32f0/src/bin/adc.rs
new file mode 100644
index 000000000..8ed9f98f8
--- /dev/null
+++ b/examples/stm32f0/src/bin/adc.rs
@@ -0,0 +1,35 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::adc::{Adc, SampleTime};
+use embassy_time::{Delay, Duration, Timer};
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let p = embassy_stm32::init(Default::default());
+    info!("Hello World!");
+
+    let mut adc = Adc::new(p.ADC, &mut Delay);
+    adc.set_sample_time(SampleTime::Cycles71_5);
+    let mut pin = p.PA1;
+
+    let mut vrefint = adc.enable_vref(&mut Delay);
+    let vrefint_sample = adc.read_internal(&mut vrefint);
+    let convert_to_millivolts = |sample| {
+        // From https://www.st.com/resource/en/datasheet/stm32f031c6.pdf
+        // 6.3.4 Embedded reference voltage
+        const VREFINT_MV: u32 = 1230; // mV
+
+        (u32::from(sample) * VREFINT_MV / u32::from(vrefint_sample)) as u16
+    };
+
+    loop {
+        let v = adc.read(&mut pin);
+        info!("--> {} - {} mV", v, convert_to_millivolts(v));
+        Timer::after(Duration::from_millis(100)).await;
+    }
+}
diff --git a/examples/stm32f0/src/bin/multiprio.rs b/examples/stm32f0/src/bin/multiprio.rs
index e0dc8c989..430a805fc 100644
--- a/examples/stm32f0/src/bin/multiprio.rs
+++ b/examples/stm32f0/src/bin/multiprio.rs
@@ -62,7 +62,7 @@ use core::mem;
 use cortex_m::peripheral::NVIC;
 use cortex_m_rt::entry;
 use defmt::*;
-use embassy_stm32::executor::{Executor, InterruptExecutor};
+use embassy_executor::{Executor, InterruptExecutor};
 use embassy_stm32::interrupt;
 use embassy_stm32::pac::Interrupt;
 use embassy_time::{Duration, Instant, Timer};
diff --git a/examples/stm32f1/Cargo.toml b/examples/stm32f1/Cargo.toml
index 53f369b3a..99f37cdda 100644
--- a/examples/stm32f1/Cargo.toml
+++ b/examples/stm32f1/Cargo.toml
@@ -6,9 +6,9 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
-embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "stm32f103c8", "unstable-pac", "memory-x", "time-driver-any"]  }
+embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "stm32f103c8", "unstable-pac", "memory-x", "time-driver-any", "unstable-traits" ]  }
 embassy-usb = { version = "0.1.0", path = "../../embassy-usb", features = ["defmt"] }
 embassy-futures = { version = "0.1.0", path = "../../embassy-futures" }
 
diff --git a/examples/stm32f2/Cargo.toml b/examples/stm32f2/Cargo.toml
index afaf9a0c9..ffb232310 100644
--- a/examples/stm32f2/Cargo.toml
+++ b/examples/stm32f2/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "stm32f207zg", "unstable-pac", "memory-x", "time-driver-any", "exti"]  }
 
diff --git a/examples/stm32f3/Cargo.toml b/examples/stm32f3/Cargo.toml
index 69ebef786..38f11201d 100644
--- a/examples/stm32f3/Cargo.toml
+++ b/examples/stm32f3/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "executor-interrupt", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "stm32f303ze", "unstable-pac", "memory-x", "time-driver-any", "exti"]  }
 embassy-usb = { version = "0.1.0", path = "../../embassy-usb", features = ["defmt"] }
diff --git a/examples/stm32f3/src/bin/flash.rs b/examples/stm32f3/src/bin/flash.rs
index baa7484d0..e40ad4fc0 100644
--- a/examples/stm32f3/src/bin/flash.rs
+++ b/examples/stm32f3/src/bin/flash.rs
@@ -15,7 +15,7 @@ async fn main(_spawner: Spawner) {
 
     const ADDR: u32 = 0x26000;
 
-    let mut f = Flash::new(p.FLASH);
+    let mut f = Flash::new(p.FLASH).into_regions().bank1_region;
 
     info!("Reading...");
     let mut buf = [0u8; 8];
diff --git a/examples/stm32f3/src/bin/multiprio.rs b/examples/stm32f3/src/bin/multiprio.rs
index 77df51ac7..5d010f799 100644
--- a/examples/stm32f3/src/bin/multiprio.rs
+++ b/examples/stm32f3/src/bin/multiprio.rs
@@ -62,7 +62,7 @@ use core::mem;
 use cortex_m::peripheral::NVIC;
 use cortex_m_rt::entry;
 use defmt::*;
-use embassy_stm32::executor::{Executor, InterruptExecutor};
+use embassy_executor::{Executor, InterruptExecutor};
 use embassy_stm32::interrupt;
 use embassy_stm32::pac::Interrupt;
 use embassy_time::{Duration, Instant, Timer};
diff --git a/examples/stm32f4/Cargo.toml b/examples/stm32f4/Cargo.toml
index 7a7bab5bb..d967d8501 100644
--- a/examples/stm32f4/Cargo.toml
+++ b/examples/stm32f4/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "executor-interrupt", "defmt", "integrated-timers", "arch-cortex-m", "executor-thread", "executor-interrupt"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "unstable-traits", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "unstable-traits", "defmt", "stm32f429zi", "unstable-pac", "memory-x", "time-driver-any", "exti"]  }
 embassy-usb = { version = "0.1.0", path = "../../embassy-usb", features = ["defmt"] }
diff --git a/examples/stm32f4/src/bin/flash.rs b/examples/stm32f4/src/bin/flash.rs
index 7ea068a42..bd3a7c95e 100644
--- a/examples/stm32f4/src/bin/flash.rs
+++ b/examples/stm32f4/src/bin/flash.rs
@@ -5,7 +5,6 @@
 use defmt::{info, unwrap};
 use embassy_executor::Spawner;
 use embassy_stm32::flash::Flash;
-use embedded_storage::nor_flash::{NorFlash, ReadNorFlash};
 use {defmt_rtt as _, panic_probe as _};
 
 #[embassy_executor::main]
@@ -13,6 +12,8 @@ async fn main(_spawner: Spawner) {
     let p = embassy_stm32::init(Default::default());
     info!("Hello Flash!");
 
+    // Once can also call `into_regions()` to get access to NorFlash implementations
+    // for each of the unique characteristics.
     let mut f = Flash::new(p.FLASH);
 
     // Sector 5
@@ -30,19 +31,19 @@ fn test_flash(f: &mut Flash, offset: u32, size: u32) {
 
     info!("Reading...");
     let mut buf = [0u8; 32];
-    unwrap!(f.read(offset, &mut buf));
+    unwrap!(f.blocking_read(offset, &mut buf));
     info!("Read: {=[u8]:x}", buf);
 
     info!("Erasing...");
-    unwrap!(f.erase(offset, offset + size));
+    unwrap!(f.blocking_erase(offset, offset + size));
 
     info!("Reading...");
     let mut buf = [0u8; 32];
-    unwrap!(f.read(offset, &mut buf));
+    unwrap!(f.blocking_read(offset, &mut buf));
     info!("Read after erase: {=[u8]:x}", buf);
 
     info!("Writing...");
-    unwrap!(f.write(
+    unwrap!(f.blocking_write(
         offset,
         &[
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
@@ -52,7 +53,7 @@ fn test_flash(f: &mut Flash, offset: u32, size: u32) {
 
     info!("Reading...");
     let mut buf = [0u8; 32];
-    unwrap!(f.read(offset, &mut buf));
+    unwrap!(f.blocking_read(offset, &mut buf));
     info!("Read: {=[u8]:x}", buf);
     assert_eq!(
         &buf[..],
diff --git a/examples/stm32f4/src/bin/mco.rs b/examples/stm32f4/src/bin/mco.rs
new file mode 100644
index 000000000..2b9ceebc3
--- /dev/null
+++ b/examples/stm32f4/src/bin/mco.rs
@@ -0,0 +1,30 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::gpio::{Level, Output, Speed};
+use embassy_stm32::rcc::{Mco, Mco1Source, Mco2Source, McoClock};
+use embassy_time::{Duration, Timer};
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let p = embassy_stm32::init(Default::default());
+    info!("Hello World!");
+
+    let _mco1 = Mco::new(p.MCO1, p.PA8, Mco1Source::Hsi, McoClock::DIV1);
+    let _mco2 = Mco::new(p.MCO2, p.PC9, Mco2Source::Pll, McoClock::DIV4);
+    let mut led = Output::new(p.PB7, Level::High, Speed::Low);
+
+    loop {
+        info!("high");
+        led.set_high();
+        Timer::after(Duration::from_millis(300)).await;
+
+        info!("low");
+        led.set_low();
+        Timer::after(Duration::from_millis(300)).await;
+    }
+}
diff --git a/examples/stm32f4/src/bin/multiprio.rs b/examples/stm32f4/src/bin/multiprio.rs
index 77df51ac7..5d010f799 100644
--- a/examples/stm32f4/src/bin/multiprio.rs
+++ b/examples/stm32f4/src/bin/multiprio.rs
@@ -62,7 +62,7 @@ use core::mem;
 use cortex_m::peripheral::NVIC;
 use cortex_m_rt::entry;
 use defmt::*;
-use embassy_stm32::executor::{Executor, InterruptExecutor};
+use embassy_executor::{Executor, InterruptExecutor};
 use embassy_stm32::interrupt;
 use embassy_stm32::pac::Interrupt;
 use embassy_time::{Duration, Instant, Timer};
diff --git a/examples/stm32f4/src/bin/pwm_complementary.rs b/examples/stm32f4/src/bin/pwm_complementary.rs
new file mode 100644
index 000000000..6e17f3fd3
--- /dev/null
+++ b/examples/stm32f4/src/bin/pwm_complementary.rs
@@ -0,0 +1,77 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::pwm::complementary_pwm::{Ckd, ComplementaryPwm, ComplementaryPwmPin};
+use embassy_stm32::pwm::simple_pwm::PwmPin;
+use embassy_stm32::pwm::Channel;
+use embassy_stm32::time::khz;
+use embassy_time::{Duration, Timer};
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let p = embassy_stm32::init(Default::default());
+    info!("Hello World!");
+
+    let ch1 = PwmPin::new_ch1(p.PE9);
+    let ch1n = ComplementaryPwmPin::new_ch1(p.PA7);
+    let mut pwm = ComplementaryPwm::new(
+        p.TIM1,
+        Some(ch1),
+        Some(ch1n),
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        khz(10),
+    );
+
+    /*
+        Dead-time = T_clk * T_dts * T_dtg
+
+        T_dts:
+        This bit-field indicates the division ratio between the timer clock (CK_INT) frequency and the
+        dead-time and sampling clock (tDTS)used by the dead-time generators and the digital filters
+        (ETR, TIx),
+        00: tDTS=tCK_INT
+        01: tDTS=2*tCK_INT
+        10: tDTS=4*tCK_INT
+
+        T_dtg:
+        This bit-field defines the duration of the dead-time inserted between the complementary
+        outputs. DT correspond to this duration.
+        DTG[7:5]=0xx => DT=DTG[7:0]x tdtg with tdtg=tDTS.
+        DTG[7:5]=10x => DT=(64+DTG[5:0])xtdtg with Tdtg=2xtDTS.
+        DTG[7:5]=110 => DT=(32+DTG[4:0])xtdtg with Tdtg=8xtDTS.
+        DTG[7:5]=111 => DT=(32+DTG[4:0])xtdtg with Tdtg=16xtDTS.
+        Example if TDTS=125ns (8MHz), dead-time possible values are:
+        0 to 15875 ns by 125 ns steps,
+        16 us to 31750 ns by 250 ns steps,
+        32 us to 63us by 1 us steps,
+        64 us to 126 us by 2 us steps
+    */
+    pwm.set_dead_time_clock_division(Ckd::DIV1);
+    pwm.set_dead_time_value(0);
+
+    let max = pwm.get_max_duty();
+    pwm.enable(Channel::Ch1);
+
+    info!("PWM initialized");
+    info!("PWM max duty {}", max);
+
+    loop {
+        pwm.set_duty(Channel::Ch1, 0);
+        Timer::after(Duration::from_millis(300)).await;
+        pwm.set_duty(Channel::Ch1, max / 4);
+        Timer::after(Duration::from_millis(300)).await;
+        pwm.set_duty(Channel::Ch1, max / 2);
+        Timer::after(Duration::from_millis(300)).await;
+        pwm.set_duty(Channel::Ch1, max - 1);
+        Timer::after(Duration::from_millis(300)).await;
+    }
+}
diff --git a/examples/stm32f4/src/bin/usart_buffered.rs b/examples/stm32f4/src/bin/usart_buffered.rs
index dd171fe13..a93f8baeb 100644
--- a/examples/stm32f4/src/bin/usart_buffered.rs
+++ b/examples/stm32f4/src/bin/usart_buffered.rs
@@ -5,7 +5,7 @@
 use defmt::*;
 use embassy_executor::Spawner;
 use embassy_stm32::interrupt;
-use embassy_stm32::usart::{BufferedUart, Config, State};
+use embassy_stm32::usart::{BufferedUart, Config};
 use embedded_io::asynch::BufRead;
 use {defmt_rtt as _, panic_probe as _};
 
@@ -16,20 +16,10 @@ async fn main(_spawner: Spawner) {
 
     let config = Config::default();
 
-    let mut state = State::new();
     let irq = interrupt::take!(USART3);
     let mut tx_buf = [0u8; 32];
     let mut rx_buf = [0u8; 32];
-    let mut buf_usart = BufferedUart::new(
-        &mut state,
-        p.USART3,
-        p.PD9,
-        p.PD8,
-        irq,
-        &mut tx_buf,
-        &mut rx_buf,
-        config,
-    );
+    let mut buf_usart = BufferedUart::new(p.USART3, irq, p.PD9, p.PD8, &mut tx_buf, &mut rx_buf, config);
 
     loop {
         let buf = buf_usart.fill_buf().await.unwrap();
diff --git a/examples/stm32f7/Cargo.toml b/examples/stm32f7/Cargo.toml
index ea4cbd808..74e7bf53d 100644
--- a/examples/stm32f7/Cargo.toml
+++ b/examples/stm32f7/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "stm32f767zi", "unstable-pac", "time-driver-any", "exti"]  }
 embassy-net = { path = "../../embassy-net", features = ["defmt", "nightly", "tcp", "dhcpv4", "medium-ethernet"] }
diff --git a/examples/stm32f7/src/bin/flash.rs b/examples/stm32f7/src/bin/flash.rs
index 4a7bca1fa..aabfe8557 100644
--- a/examples/stm32f7/src/bin/flash.rs
+++ b/examples/stm32f7/src/bin/flash.rs
@@ -14,12 +14,12 @@ async fn main(_spawner: Spawner) {
     let p = embassy_stm32::init(Default::default());
     info!("Hello Flash!");
 
-    const ADDR: u32 = 0x8_0000;
+    const ADDR: u32 = 0x8_0000; // This is the offset into the third region, the absolute address is 4x32K + 128K + 0x8_0000.
 
     // wait a bit before accessing the flash
     Timer::after(Duration::from_millis(300)).await;
 
-    let mut f = Flash::new(p.FLASH);
+    let mut f = Flash::new(p.FLASH).into_regions().bank1_region3;
 
     info!("Reading...");
     let mut buf = [0u8; 32];
diff --git a/examples/stm32g0/Cargo.toml b/examples/stm32g0/Cargo.toml
index e7273c9fc..03bdbcea3 100644
--- a/examples/stm32g0/Cargo.toml
+++ b/examples/stm32g0/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "time-driver-any", "stm32g071rb", "memory-x", "unstable-pac", "exti"]  }
 
diff --git a/examples/stm32g4/Cargo.toml b/examples/stm32g4/Cargo.toml
index 8a57a8ef0..4e4150350 100644
--- a/examples/stm32g4/Cargo.toml
+++ b/examples/stm32g4/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "time-driver-any", "stm32g491re", "memory-x", "unstable-pac", "exti"]  }
 embassy-hal-common = {version = "0.1.0", path = "../../embassy-hal-common" }
diff --git a/examples/stm32h5/.cargo/config.toml b/examples/stm32h5/.cargo/config.toml
new file mode 100644
index 000000000..c8b864b6c
--- /dev/null
+++ b/examples/stm32h5/.cargo/config.toml
@@ -0,0 +1,8 @@
+[target.thumbv8m.main-none-eabihf]
+runner = 'probe-rs-cli run --chip STM32H563ZITx'
+
+[build]
+target = "thumbv8m.main-none-eabihf"
+
+[env]
+DEFMT_LOG = "trace"
diff --git a/examples/stm32h5/Cargo.toml b/examples/stm32h5/Cargo.toml
new file mode 100644
index 000000000..b77d376ca
--- /dev/null
+++ b/examples/stm32h5/Cargo.toml
@@ -0,0 +1,71 @@
+[package]
+edition = "2021"
+name = "embassy-stm32h5-examples"
+version = "0.1.0"
+license = "MIT OR Apache-2.0"
+
+[dependencies]
+embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
+embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "unstable-traits", "tick-hz-32_768"] }
+embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "stm32h563zi", "time-driver-any", "exti", "unstable-pac", "unstable-traits"] }
+embassy-net = { path = "../../embassy-net", features = ["defmt", "nightly", "tcp", "dhcpv4", "medium-ethernet", "unstable-traits", "proto-ipv6"] }
+embedded-io = { version = "0.4.0", features = ["async"] }
+embassy-usb = { version = "0.1.0", path = "../../embassy-usb", features = ["defmt"] }
+
+defmt = "0.3"
+defmt-rtt = "0.4"
+
+cortex-m = { version = "0.7.6", features = ["critical-section-single-core"] }
+cortex-m-rt = "0.7.0"
+embedded-hal = "0.2.6"
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10" }
+embedded-hal-async = { version = "=0.2.0-alpha.1" }
+embedded-nal-async = "0.4.0"
+panic-probe = { version = "0.3", features = ["print-defmt"] }
+futures = { version = "0.3.17", default-features = false, features = ["async-await"] }
+heapless = { version = "0.7.5", default-features = false }
+rand_core = "0.6.3"
+critical-section = "1.1"
+micromath = "2.0.0"
+stm32-fmc = "0.2.4"
+embedded-storage = "0.3.0"
+static_cell = "1.0"
+
+# cargo build/run
+[profile.dev]
+codegen-units = 1
+debug = 2
+debug-assertions = true # <-
+incremental = false
+opt-level = 3 # <-
+overflow-checks = true # <-
+
+# cargo test
+[profile.test]
+codegen-units = 1
+debug = 2
+debug-assertions = true # <-
+incremental = false
+opt-level = 3 # <-
+overflow-checks = true # <-
+
+# cargo build/run --release
+[profile.release]
+codegen-units = 1
+debug = 2
+debug-assertions = false # <-
+incremental = false
+lto = 'fat'
+opt-level = 3 # <-
+overflow-checks = false # <-
+
+# cargo test --release
+[profile.bench]
+codegen-units = 1
+debug = 2
+debug-assertions = false # <-
+incremental = false
+lto = 'fat'
+opt-level = 3 # <-
+overflow-checks = false # <-
diff --git a/examples/stm32h5/build.rs b/examples/stm32h5/build.rs
new file mode 100644
index 000000000..8cd32d7ed
--- /dev/null
+++ b/examples/stm32h5/build.rs
@@ -0,0 +1,5 @@
+fn main() {
+    println!("cargo:rustc-link-arg-bins=--nmagic");
+    println!("cargo:rustc-link-arg-bins=-Tlink.x");
+    println!("cargo:rustc-link-arg-bins=-Tdefmt.x");
+}
diff --git a/examples/stm32h5/memory.x b/examples/stm32h5/memory.x
new file mode 100644
index 000000000..456061509
--- /dev/null
+++ b/examples/stm32h5/memory.x
@@ -0,0 +1,5 @@
+MEMORY
+{
+    FLASH : ORIGIN = 0x08000000, LENGTH = 0x200000
+    RAM   : ORIGIN = 0x20000000, LENGTH =  0x50000
+}
diff --git a/examples/stm32h5/src/bin/blinky.rs b/examples/stm32h5/src/bin/blinky.rs
new file mode 100644
index 000000000..f9bf90d2e
--- /dev/null
+++ b/examples/stm32h5/src/bin/blinky.rs
@@ -0,0 +1,27 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::gpio::{Level, Output, Speed};
+use embassy_time::{Duration, Timer};
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let p = embassy_stm32::init(Default::default());
+    info!("Hello World!");
+
+    let mut led = Output::new(p.PB0, Level::High, Speed::Low);
+
+    loop {
+        info!("high");
+        led.set_high();
+        Timer::after(Duration::from_millis(500)).await;
+
+        info!("low");
+        led.set_low();
+        Timer::after(Duration::from_millis(500)).await;
+    }
+}
diff --git a/examples/stm32h5/src/bin/button_exti.rs b/examples/stm32h5/src/bin/button_exti.rs
new file mode 100644
index 000000000..dfe587d41
--- /dev/null
+++ b/examples/stm32h5/src/bin/button_exti.rs
@@ -0,0 +1,27 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::exti::ExtiInput;
+use embassy_stm32::gpio::{Input, Pull};
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let p = embassy_stm32::init(Default::default());
+    info!("Hello World!");
+
+    let button = Input::new(p.PC13, Pull::Down);
+    let mut button = ExtiInput::new(button, p.EXTI13);
+
+    info!("Press the USER button...");
+
+    loop {
+        button.wait_for_rising_edge().await;
+        info!("Pressed!");
+        button.wait_for_falling_edge().await;
+        info!("Released!");
+    }
+}
diff --git a/examples/stm32h5/src/bin/eth.rs b/examples/stm32h5/src/bin/eth.rs
new file mode 100644
index 000000000..6d650da9e
--- /dev/null
+++ b/examples/stm32h5/src/bin/eth.rs
@@ -0,0 +1,133 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_net::tcp::TcpSocket;
+use embassy_net::{Ipv4Address, Stack, StackResources};
+use embassy_stm32::eth::generic_smi::GenericSMI;
+use embassy_stm32::eth::{Ethernet, PacketQueue};
+use embassy_stm32::peripherals::ETH;
+use embassy_stm32::rcc::{AHBPrescaler, APBPrescaler, Hse, HseMode, Pll, PllSource, Sysclk, VoltageScale};
+use embassy_stm32::rng::Rng;
+use embassy_stm32::time::Hertz;
+use embassy_stm32::{interrupt, Config};
+use embassy_time::{Duration, Timer};
+use embedded_io::asynch::Write;
+use rand_core::RngCore;
+use static_cell::StaticCell;
+use {defmt_rtt as _, panic_probe as _};
+
+macro_rules! singleton {
+    ($val:expr) => {{
+        type T = impl Sized;
+        static STATIC_CELL: StaticCell<T> = StaticCell::new();
+        let (x,) = STATIC_CELL.init(($val,));
+        x
+    }};
+}
+
+type Device = Ethernet<'static, ETH, GenericSMI>;
+
+#[embassy_executor::task]
+async fn net_task(stack: &'static Stack<Device>) -> ! {
+    stack.run().await
+}
+
+#[embassy_executor::main]
+async fn main(spawner: Spawner) -> ! {
+    let mut config = Config::default();
+    config.rcc.hsi = None;
+    config.rcc.hsi48 = true; // needed for rng
+    config.rcc.hse = Some(Hse {
+        freq: Hertz(8_000_000),
+        mode: HseMode::BypassDigital,
+    });
+    config.rcc.pll1 = Some(Pll {
+        source: PllSource::Hse,
+        prediv: 2,
+        mul: 125,
+        divp: Some(2),
+        divq: Some(2),
+        divr: None,
+    });
+    config.rcc.ahb_pre = AHBPrescaler::NotDivided;
+    config.rcc.apb1_pre = APBPrescaler::NotDivided;
+    config.rcc.apb2_pre = APBPrescaler::NotDivided;
+    config.rcc.apb3_pre = APBPrescaler::NotDivided;
+    config.rcc.sys = Sysclk::Pll1P;
+    config.rcc.voltage_scale = VoltageScale::Scale0;
+    let p = embassy_stm32::init(config);
+    info!("Hello World!");
+
+    // Generate random seed.
+    let mut rng = Rng::new(p.RNG);
+    let mut seed = [0; 8];
+    rng.fill_bytes(&mut seed);
+    let seed = u64::from_le_bytes(seed);
+
+    let eth_int = interrupt::take!(ETH);
+    let mac_addr = [0x00, 0x00, 0xDE, 0xAD, 0xBE, 0xEF];
+
+    let device = Ethernet::new(
+        singleton!(PacketQueue::<4, 4>::new()),
+        p.ETH,
+        eth_int,
+        p.PA1,
+        p.PA2,
+        p.PC1,
+        p.PA7,
+        p.PC4,
+        p.PC5,
+        p.PG13,
+        p.PB15,
+        p.PG11,
+        GenericSMI,
+        mac_addr,
+        0,
+    );
+
+    let config = embassy_net::Config::Dhcp(Default::default());
+    //let config = embassy_net::Config::Static(embassy_net::StaticConfig {
+    //    address: Ipv4Cidr::new(Ipv4Address::new(10, 42, 0, 61), 24),
+    //    dns_servers: Vec::new(),
+    //    gateway: Some(Ipv4Address::new(10, 42, 0, 1)),
+    //});
+
+    // Init network stack
+    let stack = &*singleton!(Stack::new(device, config, singleton!(StackResources::<2>::new()), seed));
+
+    // Launch network task
+    unwrap!(spawner.spawn(net_task(&stack)));
+
+    info!("Network task initialized");
+
+    // Then we can use it!
+    let mut rx_buffer = [0; 1024];
+    let mut tx_buffer = [0; 1024];
+
+    loop {
+        let mut socket = TcpSocket::new(&stack, &mut rx_buffer, &mut tx_buffer);
+
+        socket.set_timeout(Some(embassy_net::SmolDuration::from_secs(10)));
+
+        let remote_endpoint = (Ipv4Address::new(10, 42, 0, 1), 8000);
+        info!("connecting...");
+        let r = socket.connect(remote_endpoint).await;
+        if let Err(e) = r {
+            info!("connect error: {:?}", e);
+            Timer::after(Duration::from_secs(3)).await;
+            continue;
+        }
+        info!("connected!");
+        loop {
+            let r = socket.write_all(b"Hello\n").await;
+            if let Err(e) = r {
+                info!("write error: {:?}", e);
+                continue;
+            }
+            Timer::after(Duration::from_secs(1)).await;
+        }
+    }
+}
diff --git a/examples/stm32h5/src/bin/i2c.rs b/examples/stm32h5/src/bin/i2c.rs
new file mode 100644
index 000000000..6cbf58bbc
--- /dev/null
+++ b/examples/stm32h5/src/bin/i2c.rs
@@ -0,0 +1,44 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::i2c::{Error, I2c, TimeoutI2c};
+use embassy_stm32::interrupt;
+use embassy_stm32::time::Hertz;
+use embassy_time::Duration;
+use {defmt_rtt as _, panic_probe as _};
+
+const ADDRESS: u8 = 0x5F;
+const WHOAMI: u8 = 0x0F;
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    info!("Hello world!");
+    let p = embassy_stm32::init(Default::default());
+
+    let irq = interrupt::take!(I2C2_EV);
+    let mut i2c = I2c::new(
+        p.I2C2,
+        p.PB10,
+        p.PB11,
+        irq,
+        p.GPDMA1_CH4,
+        p.GPDMA1_CH5,
+        Hertz(100_000),
+        Default::default(),
+    );
+
+    // I2C bus can freeze if SCL line is shorted or due to a broken device that clock stretches for too long.
+    // TimeoutI2c allows recovering from such errors by throwing `Error::Timeout` after a given delay.
+    let mut timeout_i2c = TimeoutI2c::new(&mut i2c, Duration::from_millis(1000));
+
+    let mut data = [0u8; 1];
+
+    match timeout_i2c.blocking_write_read(ADDRESS, &[WHOAMI], &mut data) {
+        Ok(()) => info!("Whoami: {}", data[0]),
+        Err(Error::Timeout) => error!("Operation timed out"),
+        Err(e) => error!("I2c Error: {:?}", e),
+    }
+}
diff --git a/examples/stm32h5/src/bin/rng.rs b/examples/stm32h5/src/bin/rng.rs
new file mode 100644
index 000000000..af9be0b62
--- /dev/null
+++ b/examples/stm32h5/src/bin/rng.rs
@@ -0,0 +1,20 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::rng::Rng;
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let p = embassy_stm32::init(Default::default());
+    info!("Hello World!");
+
+    let mut rng = Rng::new(p.RNG);
+
+    let mut buf = [0u8; 16];
+    unwrap!(rng.async_fill_bytes(&mut buf).await);
+    info!("random bytes: {:02x}", buf);
+}
diff --git a/examples/stm32h5/src/bin/usart.rs b/examples/stm32h5/src/bin/usart.rs
new file mode 100644
index 000000000..405f18ec7
--- /dev/null
+++ b/examples/stm32h5/src/bin/usart.rs
@@ -0,0 +1,43 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use cortex_m_rt::entry;
+use defmt::*;
+use embassy_executor::Executor;
+use embassy_stm32::dma::NoDma;
+use embassy_stm32::interrupt;
+use embassy_stm32::usart::{Config, Uart};
+use static_cell::StaticCell;
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::task]
+async fn main_task() {
+    let p = embassy_stm32::init(Default::default());
+
+    let config = Config::default();
+    let irq = interrupt::take!(UART7);
+    let mut usart = Uart::new(p.UART7, p.PF6, p.PF7, irq, NoDma, NoDma, config);
+
+    unwrap!(usart.blocking_write(b"Hello Embassy World!\r\n"));
+    info!("wrote Hello, starting echo");
+
+    let mut buf = [0u8; 1];
+    loop {
+        unwrap!(usart.blocking_read(&mut buf));
+        unwrap!(usart.blocking_write(&buf));
+    }
+}
+
+static EXECUTOR: StaticCell<Executor> = StaticCell::new();
+
+#[entry]
+fn main() -> ! {
+    info!("Hello World!");
+
+    let executor = EXECUTOR.init(Executor::new());
+
+    executor.run(|spawner| {
+        unwrap!(spawner.spawn(main_task()));
+    })
+}
diff --git a/examples/stm32h5/src/bin/usart_dma.rs b/examples/stm32h5/src/bin/usart_dma.rs
new file mode 100644
index 000000000..43d791aae
--- /dev/null
+++ b/examples/stm32h5/src/bin/usart_dma.rs
@@ -0,0 +1,46 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use core::fmt::Write;
+
+use cortex_m_rt::entry;
+use defmt::*;
+use embassy_executor::Executor;
+use embassy_stm32::dma::NoDma;
+use embassy_stm32::interrupt;
+use embassy_stm32::usart::{Config, Uart};
+use heapless::String;
+use static_cell::StaticCell;
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::task]
+async fn main_task() {
+    let p = embassy_stm32::init(Default::default());
+
+    let config = Config::default();
+    let irq = interrupt::take!(UART7);
+    let mut usart = Uart::new(p.UART7, p.PF6, p.PF7, irq, p.GPDMA1_CH0, NoDma, config);
+
+    for n in 0u32.. {
+        let mut s: String<128> = String::new();
+        core::write!(&mut s, "Hello DMA World {}!\r\n", n).unwrap();
+
+        usart.write(s.as_bytes()).await.ok();
+
+        info!("wrote DMA");
+    }
+}
+
+static EXECUTOR: StaticCell<Executor> = StaticCell::new();
+
+#[entry]
+fn main() -> ! {
+    info!("Hello World!");
+
+    let executor = EXECUTOR.init(Executor::new());
+
+    executor.run(|spawner| {
+        unwrap!(spawner.spawn(main_task()));
+    })
+}
diff --git a/examples/stm32h5/src/bin/usart_split.rs b/examples/stm32h5/src/bin/usart_split.rs
new file mode 100644
index 000000000..16a499582
--- /dev/null
+++ b/examples/stm32h5/src/bin/usart_split.rs
@@ -0,0 +1,58 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::dma::NoDma;
+use embassy_stm32::interrupt;
+use embassy_stm32::peripherals::{GPDMA1_CH1, UART7};
+use embassy_stm32::usart::{Config, Uart, UartRx};
+use embassy_sync::blocking_mutex::raw::ThreadModeRawMutex;
+use embassy_sync::channel::Channel;
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::task]
+async fn writer(mut usart: Uart<'static, UART7, NoDma, NoDma>) {
+    unwrap!(usart.blocking_write(b"Hello Embassy World!\r\n"));
+    info!("wrote Hello, starting echo");
+
+    let mut buf = [0u8; 1];
+    loop {
+        unwrap!(usart.blocking_read(&mut buf));
+        unwrap!(usart.blocking_write(&buf));
+    }
+}
+
+static CHANNEL: Channel<ThreadModeRawMutex, [u8; 8], 1> = Channel::new();
+
+#[embassy_executor::main]
+async fn main(spawner: Spawner) -> ! {
+    let p = embassy_stm32::init(Default::default());
+    info!("Hello World!");
+
+    let config = Config::default();
+    let irq = interrupt::take!(UART7);
+    let mut usart = Uart::new(p.UART7, p.PF6, p.PF7, irq, p.GPDMA1_CH0, p.GPDMA1_CH1, config);
+    unwrap!(usart.blocking_write(b"Type 8 chars to echo!\r\n"));
+
+    let (mut tx, rx) = usart.split();
+
+    unwrap!(spawner.spawn(reader(rx)));
+
+    loop {
+        let buf = CHANNEL.recv().await;
+        info!("writing...");
+        unwrap!(tx.write(&buf).await);
+    }
+}
+
+#[embassy_executor::task]
+async fn reader(mut rx: UartRx<'static, UART7, GPDMA1_CH1>) {
+    let mut buf = [0; 8];
+    loop {
+        info!("reading...");
+        unwrap!(rx.read(&mut buf).await);
+        CHANNEL.send(buf).await;
+    }
+}
diff --git a/examples/stm32h5/src/bin/usb_serial.rs b/examples/stm32h5/src/bin/usb_serial.rs
new file mode 100644
index 000000000..6af269c1d
--- /dev/null
+++ b/examples/stm32h5/src/bin/usb_serial.rs
@@ -0,0 +1,128 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::{panic, *};
+use embassy_executor::Spawner;
+use embassy_stm32::rcc::{AHBPrescaler, APBPrescaler, Hse, HseMode, Pll, PllSource, Sysclk, VoltageScale};
+use embassy_stm32::time::Hertz;
+use embassy_stm32::usb::{Driver, Instance};
+use embassy_stm32::{interrupt, pac, Config};
+use embassy_usb::class::cdc_acm::{CdcAcmClass, State};
+use embassy_usb::driver::EndpointError;
+use embassy_usb::Builder;
+use futures::future::join;
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let mut config = Config::default();
+    config.rcc.hsi = None;
+    config.rcc.hsi48 = true; // needed for usb
+    config.rcc.hse = Some(Hse {
+        freq: Hertz(8_000_000),
+        mode: HseMode::BypassDigital,
+    });
+    config.rcc.pll1 = Some(Pll {
+        source: PllSource::Hse,
+        prediv: 2,
+        mul: 125,
+        divp: Some(2), // 250mhz
+        divq: None,
+        divr: None,
+    });
+    config.rcc.ahb_pre = AHBPrescaler::Div2;
+    config.rcc.apb1_pre = APBPrescaler::Div4;
+    config.rcc.apb2_pre = APBPrescaler::Div2;
+    config.rcc.apb3_pre = APBPrescaler::Div4;
+    config.rcc.sys = Sysclk::Pll1P;
+    config.rcc.voltage_scale = VoltageScale::Scale0;
+    let p = embassy_stm32::init(config);
+
+    info!("Hello World!");
+
+    unsafe {
+        pac::RCC.ccipr4().write(|w| {
+            w.set_usbsel(pac::rcc::vals::Usbsel::HSI48);
+        });
+    }
+
+    // Create the driver, from the HAL.
+    let irq = interrupt::take!(USB_DRD_FS);
+    let driver = Driver::new(p.USB, irq, p.PA12, p.PA11);
+
+    // Create embassy-usb Config
+    let mut config = embassy_usb::Config::new(0xc0de, 0xcafe);
+    config.manufacturer = Some("Embassy");
+    config.product = Some("USB-serial example");
+    config.serial_number = Some("12345678");
+
+    // Required for windows compatiblity.
+    // https://developer.nordicsemi.com/nRF_Connect_SDK/doc/1.9.1/kconfig/CONFIG_CDC_ACM_IAD.html#help
+    config.device_class = 0xEF;
+    config.device_sub_class = 0x02;
+    config.device_protocol = 0x01;
+    config.composite_with_iads = true;
+
+    // Create embassy-usb DeviceBuilder using the driver and config.
+    // It needs some buffers for building the descriptors.
+    let mut device_descriptor = [0; 256];
+    let mut config_descriptor = [0; 256];
+    let mut bos_descriptor = [0; 256];
+    let mut control_buf = [0; 64];
+
+    let mut state = State::new();
+
+    let mut builder = Builder::new(
+        driver,
+        config,
+        &mut device_descriptor,
+        &mut config_descriptor,
+        &mut bos_descriptor,
+        &mut control_buf,
+    );
+
+    // Create classes on the builder.
+    let mut class = CdcAcmClass::new(&mut builder, &mut state, 64);
+
+    // Build the builder.
+    let mut usb = builder.build();
+
+    // Run the USB device.
+    let usb_fut = usb.run();
+
+    // Do stuff with the class!
+    let echo_fut = async {
+        loop {
+            class.wait_connection().await;
+            info!("Connected");
+            let _ = echo(&mut class).await;
+            info!("Disconnected");
+        }
+    };
+
+    // Run everything concurrently.
+    // If we had made everything `'static` above instead, we could do this using separate tasks instead.
+    join(usb_fut, echo_fut).await;
+}
+
+struct Disconnected {}
+
+impl From<EndpointError> for Disconnected {
+    fn from(val: EndpointError) -> Self {
+        match val {
+            EndpointError::BufferOverflow => panic!("Buffer overflow"),
+            EndpointError::Disabled => Disconnected {},
+        }
+    }
+}
+
+async fn echo<'d, T: Instance + 'd>(class: &mut CdcAcmClass<'d, Driver<'d, T>>) -> Result<(), Disconnected> {
+    let mut buf = [0; 64];
+    loop {
+        let n = class.read_packet(&mut buf).await?;
+        let data = &buf[..n];
+        info!("data: {:x}", data);
+        class.write_packet(data).await?;
+    }
+}
diff --git a/examples/stm32h7/Cargo.toml b/examples/stm32h7/Cargo.toml
index a04134789..154f5a987 100644
--- a/examples/stm32h7/Cargo.toml
+++ b/examples/stm32h7/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "unstable-traits", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "stm32h743bi", "time-driver-any", "exti", "unstable-pac", "unstable-traits"] }
 embassy-net = { path = "../../embassy-net", features = ["defmt", "nightly", "tcp", "dhcpv4", "medium-ethernet", "unstable-traits", "proto-ipv6"] }
@@ -19,8 +19,8 @@ defmt-rtt = "0.4"
 cortex-m = { version = "0.7.6", features = ["critical-section-single-core"] }
 cortex-m-rt = "0.7.0"
 embedded-hal = "0.2.6"
-embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9" }
-embedded-hal-async = { version = "=0.2.0-alpha.0" }
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10" }
+embedded-hal-async = { version = "=0.2.0-alpha.1" }
 embedded-nal-async = "0.4.0"
 panic-probe = { version = "0.3", features = ["print-defmt"] }
 futures = { version = "0.3.17", default-features = false, features = ["async-await"] }
diff --git a/examples/stm32h7/src/bin/flash.rs b/examples/stm32h7/src/bin/flash.rs
index ee86bdbf6..7ee9838c9 100644
--- a/examples/stm32h7/src/bin/flash.rs
+++ b/examples/stm32h7/src/bin/flash.rs
@@ -14,12 +14,12 @@ async fn main(_spawner: Spawner) {
     let p = embassy_stm32::init(Default::default());
     info!("Hello Flash!");
 
-    const ADDR: u32 = 0x08_0000;
+    const ADDR: u32 = 0; // This is the offset into bank 2, the absolute address is 0x8_0000
 
     // wait a bit before accessing the flash
     Timer::after(Duration::from_millis(300)).await;
 
-    let mut f = Flash::new(p.FLASH);
+    let mut f = Flash::new(p.FLASH).into_regions().bank2_region;
 
     info!("Reading...");
     let mut buf = [0u8; 32];
diff --git a/examples/stm32l0/Cargo.toml b/examples/stm32l0/Cargo.toml
index 86933a629..413d5c18f 100644
--- a/examples/stm32l0/Cargo.toml
+++ b/examples/stm32l0/Cargo.toml
@@ -10,7 +10,7 @@ nightly = ["embassy-stm32/nightly", "embassy-lora", "lorawan-device", "lorawan",
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["defmt", "stm32l072cz", "time-driver-any", "exti", "unstable-traits", "memory-x"]  }
 embassy-lora = { version = "0.1.0", path = "../../embassy-lora", features = ["sx127x", "time", "defmt"], optional = true}
diff --git a/examples/stm32l0/src/bin/flash.rs b/examples/stm32l0/src/bin/flash.rs
index ffe4fb10b..337425028 100644
--- a/examples/stm32l0/src/bin/flash.rs
+++ b/examples/stm32l0/src/bin/flash.rs
@@ -15,7 +15,7 @@ async fn main(_spawner: Spawner) {
 
     const ADDR: u32 = 0x26000;
 
-    let mut f = Flash::new(p.FLASH);
+    let mut f = Flash::new(p.FLASH).into_regions().bank1_region;
 
     info!("Reading...");
     let mut buf = [0u8; 8];
diff --git a/examples/stm32l0/src/bin/usart_irq.rs b/examples/stm32l0/src/bin/usart_irq.rs
index 8e84cd092..465347004 100644
--- a/examples/stm32l0/src/bin/usart_irq.rs
+++ b/examples/stm32l0/src/bin/usart_irq.rs
@@ -5,7 +5,7 @@
 use defmt::*;
 use embassy_executor::Spawner;
 use embassy_stm32::interrupt;
-use embassy_stm32::usart::{BufferedUart, Config, State};
+use embassy_stm32::usart::{BufferedUart, Config};
 use embedded_io::asynch::{Read, Write};
 use {defmt_rtt as _, panic_probe as _};
 
@@ -20,20 +20,8 @@ async fn main(_spawner: Spawner) {
     let mut config = Config::default();
     config.baudrate = 9600;
 
-    let mut state = State::new();
     let irq = interrupt::take!(USART2);
-    let mut usart = unsafe {
-        BufferedUart::new(
-            &mut state,
-            p.USART2,
-            p.PA3,
-            p.PA2,
-            irq,
-            &mut TX_BUFFER,
-            &mut RX_BUFFER,
-            config,
-        )
-    };
+    let mut usart = unsafe { BufferedUart::new(p.USART2, irq, p.PA3, p.PA2, &mut TX_BUFFER, &mut RX_BUFFER, config) };
 
     usart.write_all(b"Hello Embassy World!\r\n").await.unwrap();
     info!("wrote Hello, starting echo");
diff --git a/examples/stm32l1/Cargo.toml b/examples/stm32l1/Cargo.toml
index 6e3b2103c..cd9508d57 100644
--- a/examples/stm32l1/Cargo.toml
+++ b/examples/stm32l1/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "stm32l151cb-a", "time-driver-any", "memory-x"]  }
 
diff --git a/examples/stm32l1/src/bin/flash.rs b/examples/stm32l1/src/bin/flash.rs
index 476ed51a4..38feb0d76 100644
--- a/examples/stm32l1/src/bin/flash.rs
+++ b/examples/stm32l1/src/bin/flash.rs
@@ -15,7 +15,7 @@ async fn main(_spawner: Spawner) {
 
     const ADDR: u32 = 0x26000;
 
-    let mut f = Flash::new(p.FLASH);
+    let mut f = Flash::new(p.FLASH).into_regions().bank1_region;
 
     info!("Reading...");
     let mut buf = [0u8; 8];
diff --git a/examples/stm32l4/Cargo.toml b/examples/stm32l4/Cargo.toml
index 644c90b1a..fa39df6db 100644
--- a/examples/stm32l4/Cargo.toml
+++ b/examples/stm32l4/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-embedded-hal = { version = "0.1.0", path = "../../embassy-embedded-hal" }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "unstable-pac", "stm32l4s5vi", "time-driver-any", "exti", "unstable-traits"]  }
@@ -18,8 +18,8 @@ defmt-rtt = "0.4"
 cortex-m = { version = "0.7.6", features = ["critical-section-single-core"] }
 cortex-m-rt = "0.7.0"
 embedded-hal = "0.2.6"
-embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9" }
-embedded-hal-async = { version = "=0.2.0-alpha.0" }
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10" }
+embedded-hal-async = { version = "=0.2.0-alpha.1" }
 panic-probe = { version = "0.3", features = ["print-defmt"] }
 futures = { version = "0.3.17", default-features = false, features = ["async-await"] }
 heapless = { version = "0.7.5", default-features = false }
diff --git a/examples/stm32l4/src/bin/mco.rs b/examples/stm32l4/src/bin/mco.rs
new file mode 100644
index 000000000..dea0c66e0
--- /dev/null
+++ b/examples/stm32l4/src/bin/mco.rs
@@ -0,0 +1,27 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::gpio::{Level, Output, Speed};
+use embassy_stm32::rcc::{Mco, Mco1Source, McoClock};
+use embassy_time::{Duration, Timer};
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let p = embassy_stm32::init(Default::default());
+    info!("Hello World!");
+
+    let _mco = Mco::new(p.MCO, p.PA8, Mco1Source::Hsi16, McoClock::DIV1);
+
+    let mut led = Output::new(p.PB14, Level::High, Speed::Low);
+
+    loop {
+        led.set_high();
+        Timer::after(Duration::from_millis(300)).await;
+        led.set_low();
+        Timer::after(Duration::from_millis(300)).await;
+    }
+}
diff --git a/examples/stm32l5/Cargo.toml b/examples/stm32l5/Cargo.toml
index f880328dc..1c662b9da 100644
--- a/examples/stm32l5/Cargo.toml
+++ b/examples/stm32l5/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "unstable-pac", "stm32l552ze", "time-driver-any", "exti", "unstable-traits", "memory-x"]  }
 embassy-usb = { version = "0.1.0", path = "../../embassy-usb", features = ["defmt"] }
diff --git a/examples/stm32u5/Cargo.toml b/examples/stm32u5/Cargo.toml
index 2b02eda92..ebef0a4f7 100644
--- a/examples/stm32u5/Cargo.toml
+++ b/examples/stm32u5/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "unstable-pac", "stm32u585ai", "time-driver-any", "memory-x" ]  }
 embassy-usb = { version = "0.1.0", path = "../../embassy-usb", features = ["defmt"] }
diff --git a/examples/stm32wb/Cargo.toml b/examples/stm32wb/Cargo.toml
index e27b4527c..ddf9729e6 100644
--- a/examples/stm32wb/Cargo.toml
+++ b/examples/stm32wb/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "stm32wb55cc", "time-driver-any", "exti"]  }
 
diff --git a/examples/stm32wl/Cargo.toml b/examples/stm32wl/Cargo.toml
index 690481bbf..0d2194ea2 100644
--- a/examples/stm32wl/Cargo.toml
+++ b/examples/stm32wl/Cargo.toml
@@ -6,9 +6,9 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
-embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "stm32wl55jc-cm4", "time-driver-any", "memory-x", "subghz", "unstable-pac", "exti"]  }
+embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "stm32wl55jc-cm4", "time-driver-any", "memory-x", "unstable-pac", "exti"]  }
 embassy-lora = { version = "0.1.0", path = "../../embassy-lora", features = ["stm32wl", "time", "defmt"] }
 
 lorawan-device = { version = "0.8.0", default-features = false, features = ["async"] }
diff --git a/examples/stm32wl/src/bin/flash.rs b/examples/stm32wl/src/bin/flash.rs
index 2a8880624..e6bc2865c 100644
--- a/examples/stm32wl/src/bin/flash.rs
+++ b/examples/stm32wl/src/bin/flash.rs
@@ -15,7 +15,7 @@ async fn main(_spawner: Spawner) {
 
     const ADDR: u32 = 0x36000;
 
-    let mut f = Flash::new(p.FLASH);
+    let mut f = Flash::new(p.FLASH).into_regions().bank1_region;
 
     info!("Reading...");
     let mut buf = [0u8; 8];
diff --git a/examples/wasm/Cargo.toml b/examples/wasm/Cargo.toml
index e0e799a34..430d0b4c7 100644
--- a/examples/wasm/Cargo.toml
+++ b/examples/wasm/Cargo.toml
@@ -9,7 +9,7 @@ crate-type = ["cdylib"]
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["log"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["log", "wasm", "nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-wasm", "executor-thread", "log", "nightly", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["log", "wasm", "nightly"] }
 
 wasm-logger = "0.2.0"
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index da75fa53a..9785cd9eb 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,7 +1,7 @@
 # Before upgrading check that everything is available on all tier1 targets here:
 # https://rust-lang.github.io/rustup-components-history
 [toolchain]
-channel = "nightly-2023-02-07"
+channel = "nightly-2023-04-02"
 components = [ "rust-src", "rustfmt", "llvm-tools-preview" ]
 targets = [
     "thumbv7em-none-eabi",
@@ -9,5 +9,6 @@ targets = [
     "thumbv6m-none-eabi",
     "thumbv7em-none-eabihf",
     "thumbv8m.main-none-eabihf",
+    "riscv32imac-unknown-none-elf",
     "wasm32-unknown-unknown",
 ]
diff --git a/tests/nrf/Cargo.toml b/tests/nrf/Cargo.toml
index 2a4e8cf41..912749e5d 100644
--- a/tests/nrf/Cargo.toml
+++ b/tests/nrf/Cargo.toml
@@ -7,7 +7,7 @@ license = "MIT OR Apache-2.0"
 [dependencies]
 embassy-futures = { version = "0.1.0", path = "../../embassy-futures" }
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt", "nightly"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "nightly", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "nightly", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "nightly", "defmt-timestamp-uptime"] }
 embassy-nrf = { version = "0.1.0", path = "../../embassy-nrf", features = ["defmt", "nightly", "unstable-traits", "nrf52840", "time-driver-rtc1", "gpiote", "unstable-pac"] }
 embedded-io = { version = "0.4.0", features = ["async"] }
diff --git a/tests/nrf/src/bin/timer.rs b/tests/nrf/src/bin/timer.rs
new file mode 100644
index 000000000..9b9b5fb28
--- /dev/null
+++ b/tests/nrf/src/bin/timer.rs
@@ -0,0 +1,25 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::{assert, info};
+use embassy_executor::Spawner;
+use embassy_time::{Duration, Instant, Timer};
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let _p = embassy_nrf::init(Default::default());
+    info!("Hello World!");
+
+    let start = Instant::now();
+    Timer::after(Duration::from_millis(100)).await;
+    let end = Instant::now();
+    let ms = (end - start).as_millis();
+    info!("slept for {} ms", ms);
+    assert!(ms >= 99);
+    assert!(ms < 110);
+
+    info!("Test OK");
+    cortex_m::asm::bkpt();
+}
diff --git a/tests/rp/Cargo.toml b/tests/rp/Cargo.toml
index 572a9ce88..463a370fe 100644
--- a/tests/rp/Cargo.toml
+++ b/tests/rp/Cargo.toml
@@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt"] }
 embassy-rp = { version = "0.1.0", path = "../../embassy-rp", features = ["nightly", "defmt", "unstable-pac", "unstable-traits", "time-driver", "critical-section-impl"]  }
 embassy-futures = { version = "0.1.0", path = "../../embassy-futures" }
@@ -17,8 +17,8 @@ defmt-rtt = "0.4"
 cortex-m = { version = "0.7.6" }
 cortex-m-rt = "0.7.0"
 embedded-hal = "0.2.6"
-embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9" }
-embedded-hal-async = { version = "=0.2.0-alpha.0" }
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10" }
+embedded-hal-async = { version = "=0.2.0-alpha.1" }
 panic-probe = { version = "0.3.0", features = ["print-defmt"] }
 futures = { version = "0.3.17", default-features = false, features = ["async-await"] }
 embedded-io = { version = "0.4.0", features = ["async"] }
diff --git a/tests/stm32/Cargo.toml b/tests/stm32/Cargo.toml
index 08a775eae..bd181f235 100644
--- a/tests/stm32/Cargo.toml
+++ b/tests/stm32/Cargo.toml
@@ -11,11 +11,12 @@ stm32g071rb = ["embassy-stm32/stm32g071rb"]     # Nucleo
 stm32g491re = ["embassy-stm32/stm32g491re"]     # Nucleo
 stm32h755zi = ["embassy-stm32/stm32h755zi-cm7"] # Nucleo
 stm32wb55rg = ["embassy-stm32/stm32wb55rg"]     # Nucleo
+stm32h563zi = ["embassy-stm32/stm32h563zi"]     # Nucleo
 stm32u585ai = ["embassy-stm32/stm32u585ai"]     # IoT board
 
 [dependencies]
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
-embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
+embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "tick-hz-32_768"] }
 embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["nightly", "defmt", "unstable-pac", "memory-x", "time-driver-tim2"]  }
 
@@ -25,8 +26,8 @@ defmt-rtt = "0.4"
 cortex-m = { version = "0.7.6", features = ["critical-section-single-core"] }
 cortex-m-rt = "0.7.0"
 embedded-hal = "0.2.6"
-embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9" }
-embedded-hal-async = { version = "=0.2.0-alpha.0" }
+embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.10" }
+embedded-hal-async = { version = "=0.2.0-alpha.1" }
 panic-probe = { version = "0.3.0", features = ["print-defmt"] }
 
 [profile.dev]
diff --git a/tests/stm32/src/bin/gpio.rs b/tests/stm32/src/bin/gpio.rs
index 18fd85d44..6a36df8cc 100644
--- a/tests/stm32/src/bin/gpio.rs
+++ b/tests/stm32/src/bin/gpio.rs
@@ -30,6 +30,8 @@ async fn main(_spawner: Spawner) {
     let (mut a, mut b) = (p.PB6, p.PB7);
     #[cfg(feature = "stm32u585ai")]
     let (mut a, mut b) = (p.PD9, p.PD8);
+    #[cfg(feature = "stm32h563zi")]
+    let (mut a, mut b) = (p.PB6, p.PB7);
 
     // Test initial output
     {
diff --git a/tests/stm32/src/bin/spi.rs b/tests/stm32/src/bin/spi.rs
index 1c5dc87c0..bf8098b1b 100644
--- a/tests/stm32/src/bin/spi.rs
+++ b/tests/stm32/src/bin/spi.rs
@@ -17,22 +17,25 @@ async fn main(_spawner: Spawner) {
     info!("Hello World!");
 
     #[cfg(feature = "stm32f103c8")]
-    let (sck, mosi, miso) = (p.PA5, p.PA7, p.PA6);
+    let (spi, sck, mosi, miso) = (p.SPI1, p.PA5, p.PA7, p.PA6);
     #[cfg(feature = "stm32f429zi")]
-    let (sck, mosi, miso) = (p.PA5, p.PA7, p.PA6);
+    let (spi, sck, mosi, miso) = (p.SPI1, p.PA5, p.PA7, p.PA6);
     #[cfg(feature = "stm32h755zi")]
-    let (sck, mosi, miso) = (p.PA5, p.PB5, p.PA6);
+    let (spi, sck, mosi, miso) = (p.SPI1, p.PA5, p.PB5, p.PA6);
     #[cfg(feature = "stm32g491re")]
-    let (sck, mosi, miso) = (p.PA5, p.PA7, p.PA6);
+    let (spi, sck, mosi, miso) = (p.SPI1, p.PA5, p.PA7, p.PA6);
     #[cfg(feature = "stm32g071rb")]
-    let (sck, mosi, miso) = (p.PA5, p.PA7, p.PA6);
+    let (spi, sck, mosi, miso) = (p.SPI1, p.PA5, p.PA7, p.PA6);
     #[cfg(feature = "stm32wb55rg")]
-    let (sck, mosi, miso) = (p.PA5, p.PA7, p.PA6);
+    let (spi, sck, mosi, miso) = (p.SPI1, p.PA5, p.PA7, p.PA6);
     #[cfg(feature = "stm32u585ai")]
-    let (sck, mosi, miso) = (p.PE13, p.PE15, p.PE14);
+    let (spi, sck, mosi, miso) = (p.SPI1, p.PE13, p.PE15, p.PE14);
+    #[cfg(feature = "stm32h563zi")]
+    let (spi, sck, mosi, miso) = (p.SPI4, p.PE12, p.PE14, p.PE13);
 
+    info!("asdfa;");
     let mut spi = Spi::new(
-        p.SPI1,
+        spi,
         sck,  // Arduino D13
         mosi, // Arduino D11
         miso, // Arduino D12
diff --git a/tests/stm32/src/bin/spi_dma.rs b/tests/stm32/src/bin/spi_dma.rs
index cb2152e0b..b3dad8132 100644
--- a/tests/stm32/src/bin/spi_dma.rs
+++ b/tests/stm32/src/bin/spi_dma.rs
@@ -16,22 +16,24 @@ async fn main(_spawner: Spawner) {
     info!("Hello World!");
 
     #[cfg(feature = "stm32f103c8")]
-    let (sck, mosi, miso, tx_dma, rx_dma) = (p.PA5, p.PA7, p.PA6, p.DMA1_CH3, p.DMA1_CH2);
+    let (spi, sck, mosi, miso, tx_dma, rx_dma) = (p.SPI1, p.PA5, p.PA7, p.PA6, p.DMA1_CH3, p.DMA1_CH2);
     #[cfg(feature = "stm32f429zi")]
-    let (sck, mosi, miso, tx_dma, rx_dma) = (p.PA5, p.PA7, p.PA6, p.DMA2_CH3, p.DMA2_CH2);
+    let (spi, sck, mosi, miso, tx_dma, rx_dma) = (p.SPI1, p.PA5, p.PA7, p.PA6, p.DMA2_CH3, p.DMA2_CH2);
     #[cfg(feature = "stm32h755zi")]
-    let (sck, mosi, miso, tx_dma, rx_dma) = (p.PA5, p.PB5, p.PA6, p.DMA1_CH0, p.DMA1_CH1);
+    let (spi, sck, mosi, miso, tx_dma, rx_dma) = (p.SPI1, p.PA5, p.PB5, p.PA6, p.DMA1_CH0, p.DMA1_CH1);
     #[cfg(feature = "stm32g491re")]
-    let (sck, mosi, miso, tx_dma, rx_dma) = (p.PA5, p.PA7, p.PA6, p.DMA1_CH1, p.DMA1_CH2);
+    let (spi, sck, mosi, miso, tx_dma, rx_dma) = (p.SPI1, p.PA5, p.PA7, p.PA6, p.DMA1_CH1, p.DMA1_CH2);
     #[cfg(feature = "stm32g071rb")]
-    let (sck, mosi, miso, tx_dma, rx_dma) = (p.PA5, p.PA7, p.PA6, p.DMA1_CH1, p.DMA1_CH2);
+    let (spi, sck, mosi, miso, tx_dma, rx_dma) = (p.SPI1, p.PA5, p.PA7, p.PA6, p.DMA1_CH1, p.DMA1_CH2);
     #[cfg(feature = "stm32wb55rg")]
-    let (sck, mosi, miso, tx_dma, rx_dma) = (p.PA5, p.PA7, p.PA6, p.DMA1_CH1, p.DMA1_CH2);
+    let (spi, sck, mosi, miso, tx_dma, rx_dma) = (p.SPI1, p.PA5, p.PA7, p.PA6, p.DMA1_CH1, p.DMA1_CH2);
     #[cfg(feature = "stm32u585ai")]
-    let (sck, mosi, miso, tx_dma, rx_dma) = (p.PE13, p.PE15, p.PE14, p.GPDMA1_CH0, p.GPDMA1_CH1);
+    let (spi, sck, mosi, miso, tx_dma, rx_dma) = (p.SPI1, p.PE13, p.PE15, p.PE14, p.GPDMA1_CH0, p.GPDMA1_CH1);
+    #[cfg(feature = "stm32h563zi")]
+    let (spi, sck, mosi, miso, tx_dma, rx_dma) = (p.SPI4, p.PE12, p.PE14, p.PE13, p.GPDMA1_CH0, p.GPDMA1_CH1);
 
     let mut spi = Spi::new(
-        p.SPI1,
+        spi,
         sck,  // Arduino D13
         mosi, // Arduino D11
         miso, // Arduino D12
diff --git a/tests/stm32/src/bin/usart.rs b/tests/stm32/src/bin/usart.rs
index af55867f2..52409567c 100644
--- a/tests/stm32/src/bin/usart.rs
+++ b/tests/stm32/src/bin/usart.rs
@@ -32,6 +32,8 @@ async fn main(_spawner: Spawner) {
     let (tx, rx, usart, irq) = (p.PB6, p.PB7, p.USART1, interrupt::take!(USART1));
     #[cfg(feature = "stm32u585ai")]
     let (tx, rx, usart, irq) = (p.PD8, p.PD9, p.USART3, interrupt::take!(USART3));
+    #[cfg(feature = "stm32h563zi")]
+    let (tx, rx, usart, irq) = (p.PB6, p.PB7, p.LPUART1, interrupt::take!(LPUART1));
 
     let config = Config::default();
     let mut usart = Uart::new(usart, rx, tx, irq, NoDma, NoDma, config);
diff --git a/tests/stm32/src/bin/usart_dma.rs b/tests/stm32/src/bin/usart_dma.rs
index d12605a9a..3f70791c1 100644
--- a/tests/stm32/src/bin/usart_dma.rs
+++ b/tests/stm32/src/bin/usart_dma.rs
@@ -62,6 +62,15 @@ async fn main(_spawner: Spawner) {
         p.GPDMA1_CH0,
         p.GPDMA1_CH1,
     );
+    #[cfg(feature = "stm32h563zi")]
+    let (tx, rx, usart, irq, tx_dma, rx_dma) = (
+        p.PB6,
+        p.PB7,
+        p.LPUART1,
+        interrupt::take!(LPUART1),
+        p.GPDMA1_CH0,
+        p.GPDMA1_CH1,
+    );
 
     let config = Config::default();
     let mut usart = Uart::new(usart, rx, tx, irq, tx_dma, rx_dma, config);