diff --git a/ci.sh b/ci.sh
index 760d6fbfb..3fe1b1ce8 100755
--- a/ci.sh
+++ b/ci.sh
@@ -133,16 +133,16 @@ cargo batch  \
     --- build --release --manifest-path examples/stm32u5/Cargo.toml --target thumbv8m.main-none-eabihf --out-dir out/examples/stm32u5 \
     --- build --release --manifest-path examples/stm32wb/Cargo.toml --target thumbv7em-none-eabihf --out-dir out/examples/stm32wb \
     --- build --release --manifest-path examples/stm32wl/Cargo.toml --target thumbv7em-none-eabihf --out-dir out/examples/stm32wl \
-    --- build --release --manifest-path examples/boot/application/nrf/Cargo.toml --target thumbv7em-none-eabi --features embassy-nrf/nrf52840 --out-dir out/examples/boot/nrf --bin b \
-    --- build --release --manifest-path examples/boot/application/nrf/Cargo.toml --target thumbv8m.main-none-eabihf --features embassy-nrf/nrf9160-ns --out-dir out/examples/boot/nrf --bin b \
-    --- build --release --manifest-path examples/boot/application/rp/Cargo.toml --target thumbv6m-none-eabi --out-dir out/examples/boot/rp --bin b \
-    --- build --release --manifest-path examples/boot/application/stm32f3/Cargo.toml --target thumbv7em-none-eabi --out-dir out/examples/boot/stm32f3 --bin b \
-    --- build --release --manifest-path examples/boot/application/stm32f7/Cargo.toml --target thumbv7em-none-eabi --out-dir out/examples/boot/stm32f7 --bin b \
-    --- build --release --manifest-path examples/boot/application/stm32h7/Cargo.toml --target thumbv7em-none-eabi --out-dir out/examples/boot/stm32h7 --bin b \
-    --- build --release --manifest-path examples/boot/application/stm32l0/Cargo.toml --target thumbv6m-none-eabi --out-dir out/examples/boot/stm32l0 --bin b \
-    --- build --release --manifest-path examples/boot/application/stm32l1/Cargo.toml --target thumbv7m-none-eabi --out-dir out/examples/boot/stm32l1 --bin b \
-    --- build --release --manifest-path examples/boot/application/stm32l4/Cargo.toml --target thumbv7em-none-eabi --out-dir out/examples/boot/stm32l4 --bin b \
-    --- build --release --manifest-path examples/boot/application/stm32wl/Cargo.toml --target thumbv7em-none-eabihf --out-dir out/examples/boot/stm32wl --bin b \
+    --- build --release --manifest-path examples/boot/application/nrf/Cargo.toml --target thumbv7em-none-eabi --features embassy-nrf/nrf52840,skip-include --out-dir out/examples/boot/nrf  \
+    --- build --release --manifest-path examples/boot/application/nrf/Cargo.toml --target thumbv8m.main-none-eabihf --features embassy-nrf/nrf9160-ns,skip-include --out-dir out/examples/boot/nrf \
+    --- build --release --manifest-path examples/boot/application/rp/Cargo.toml --target thumbv6m-none-eabi --features skip-include --out-dir out/examples/boot/rp \
+    --- build --release --manifest-path examples/boot/application/stm32f3/Cargo.toml --target thumbv7em-none-eabi --features skip-include --out-dir out/examples/boot/stm32f3 \
+    --- build --release --manifest-path examples/boot/application/stm32f7/Cargo.toml --target thumbv7em-none-eabi --features skip-include --out-dir out/examples/boot/stm32f7 \
+    --- build --release --manifest-path examples/boot/application/stm32h7/Cargo.toml --target thumbv7em-none-eabi --features skip-include --out-dir out/examples/boot/stm32h7 \
+    --- build --release --manifest-path examples/boot/application/stm32l0/Cargo.toml --target thumbv6m-none-eabi --features skip-include --out-dir out/examples/boot/stm32l0 \
+    --- build --release --manifest-path examples/boot/application/stm32l1/Cargo.toml --target thumbv7m-none-eabi --features skip-include --out-dir out/examples/boot/stm32l1 \
+    --- build --release --manifest-path examples/boot/application/stm32l4/Cargo.toml --target thumbv7em-none-eabi --features skip-include --out-dir out/examples/boot/stm32l4 \
+    --- build --release --manifest-path examples/boot/application/stm32wl/Cargo.toml --target thumbv7em-none-eabihf --features skip-include --out-dir out/examples/boot/stm32wl \
     --- build --release --manifest-path examples/boot/bootloader/nrf/Cargo.toml --target thumbv7em-none-eabi --features embassy-nrf/nrf52840 \
     --- build --release --manifest-path examples/boot/bootloader/nrf/Cargo.toml --target thumbv8m.main-none-eabihf --features embassy-nrf/nrf9160-ns \
     --- build --release --manifest-path examples/boot/bootloader/rp/Cargo.toml --target thumbv6m-none-eabi \
diff --git a/embassy-boot/boot/src/firmware_updater/asynch.rs b/embassy-boot/boot/src/firmware_updater/asynch.rs
index 0b3f88313..20731ee0a 100644
--- a/embassy-boot/boot/src/firmware_updater/asynch.rs
+++ b/embassy-boot/boot/src/firmware_updater/asynch.rs
@@ -56,6 +56,16 @@ impl<DFU: NorFlash, STATE: NorFlash> FirmwareUpdater<DFU, STATE> {
         }
     }
 
+    // Make sure we are running a booted firmware to avoid reverting to a bad state.
+    async fn verify_booted(&mut self, aligned: &mut [u8]) -> Result<(), FirmwareUpdaterError> {
+        assert_eq!(aligned.len(), STATE::WRITE_SIZE);
+        if self.get_state(aligned).await? == State::Boot {
+            Ok(())
+        } else {
+            Err(FirmwareUpdaterError::BadState)
+        }
+    }
+
     /// Obtain the current state.
     ///
     /// This is useful to check if the bootloader has just done a swap, in order
@@ -98,6 +108,8 @@ impl<DFU: NorFlash, STATE: NorFlash> FirmwareUpdater<DFU, STATE> {
         assert_eq!(_aligned.len(), STATE::WRITE_SIZE);
         assert!(_update_len <= self.dfu.capacity() as u32);
 
+        self.verify_booted(_aligned).await?;
+
         #[cfg(feature = "ed25519-dalek")]
         {
             use ed25519_dalek::{PublicKey, Signature, SignatureError, Verifier};
@@ -217,8 +229,16 @@ impl<DFU: NorFlash, STATE: NorFlash> FirmwareUpdater<DFU, STATE> {
     /// # Safety
     ///
     /// Failing to meet alignment and size requirements may result in a panic.
-    pub async fn write_firmware(&mut self, offset: usize, data: &[u8]) -> Result<(), FirmwareUpdaterError> {
+    pub async fn write_firmware(
+        &mut self,
+        aligned: &mut [u8],
+        offset: usize,
+        data: &[u8],
+    ) -> Result<(), FirmwareUpdaterError> {
         assert!(data.len() >= DFU::ERASE_SIZE);
+        assert_eq!(aligned.len(), STATE::WRITE_SIZE);
+
+        self.verify_booted(aligned).await?;
 
         self.dfu.erase(offset as u32, (offset + data.len()) as u32).await?;
 
@@ -232,7 +252,14 @@ impl<DFU: NorFlash, STATE: NorFlash> FirmwareUpdater<DFU, STATE> {
     ///
     /// Using this instead of `write_firmware` allows for an optimized API in
     /// exchange for added complexity.
-    pub async fn prepare_update(&mut self) -> Result<&mut DFU, FirmwareUpdaterError> {
+    ///
+    /// # Safety
+    ///
+    /// The `aligned` buffer must have a size of STATE::WRITE_SIZE, and follow the alignment rules for the flash being written to.
+    pub async fn prepare_update(&mut self, aligned: &mut [u8]) -> Result<&mut DFU, FirmwareUpdaterError> {
+        assert_eq!(aligned.len(), STATE::WRITE_SIZE);
+        self.verify_booted(aligned).await?;
+
         self.dfu.erase(0, self.dfu.capacity() as u32).await?;
 
         Ok(&mut self.dfu)
@@ -255,13 +282,14 @@ mod tests {
         let flash = Mutex::<NoopRawMutex, _>::new(MemFlash::<131072, 4096, 8>::default());
         let state = Partition::new(&flash, 0, 4096);
         let dfu = Partition::new(&flash, 65536, 65536);
+        let mut aligned = [0; 8];
 
         let update = [0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66];
         let mut to_write = [0; 4096];
         to_write[..7].copy_from_slice(update.as_slice());
 
         let mut updater = FirmwareUpdater::new(FirmwareUpdaterConfig { dfu, state });
-        block_on(updater.write_firmware(0, to_write.as_slice())).unwrap();
+        block_on(updater.write_firmware(&mut aligned, 0, to_write.as_slice())).unwrap();
         let mut chunk_buf = [0; 2];
         let mut hash = [0; 20];
         block_on(updater.hash::<Sha1>(update.len() as u32, &mut chunk_buf, &mut hash)).unwrap();
diff --git a/embassy-boot/boot/src/firmware_updater/blocking.rs b/embassy-boot/boot/src/firmware_updater/blocking.rs
index 551150c4f..f03f53e4d 100644
--- a/embassy-boot/boot/src/firmware_updater/blocking.rs
+++ b/embassy-boot/boot/src/firmware_updater/blocking.rs
@@ -58,6 +58,16 @@ impl<DFU: NorFlash, STATE: NorFlash> BlockingFirmwareUpdater<DFU, STATE> {
         }
     }
 
+    // Make sure we are running a booted firmware to avoid reverting to a bad state.
+    fn verify_booted(&mut self, aligned: &mut [u8]) -> Result<(), FirmwareUpdaterError> {
+        assert_eq!(aligned.len(), STATE::WRITE_SIZE);
+        if self.get_state(aligned)? == State::Boot {
+            Ok(())
+        } else {
+            Err(FirmwareUpdaterError::BadState)
+        }
+    }
+
     /// Obtain the current state.
     ///
     /// This is useful to check if the bootloader has just done a swap, in order
@@ -100,6 +110,8 @@ impl<DFU: NorFlash, STATE: NorFlash> BlockingFirmwareUpdater<DFU, STATE> {
         assert_eq!(_aligned.len(), STATE::WRITE_SIZE);
         assert!(_update_len <= self.dfu.capacity() as u32);
 
+        self.verify_booted(_aligned)?;
+
         #[cfg(feature = "ed25519-dalek")]
         {
             use ed25519_dalek::{PublicKey, Signature, SignatureError, Verifier};
@@ -219,8 +231,15 @@ impl<DFU: NorFlash, STATE: NorFlash> BlockingFirmwareUpdater<DFU, STATE> {
     /// # Safety
     ///
     /// Failing to meet alignment and size requirements may result in a panic.
-    pub fn write_firmware(&mut self, offset: usize, data: &[u8]) -> Result<(), FirmwareUpdaterError> {
+    pub fn write_firmware(
+        &mut self,
+        aligned: &mut [u8],
+        offset: usize,
+        data: &[u8],
+    ) -> Result<(), FirmwareUpdaterError> {
         assert!(data.len() >= DFU::ERASE_SIZE);
+        assert_eq!(aligned.len(), STATE::WRITE_SIZE);
+        self.verify_booted(aligned)?;
 
         self.dfu.erase(offset as u32, (offset + data.len()) as u32)?;
 
@@ -234,7 +253,13 @@ impl<DFU: NorFlash, STATE: NorFlash> BlockingFirmwareUpdater<DFU, STATE> {
     ///
     /// Using this instead of `write_firmware` allows for an optimized API in
     /// exchange for added complexity.
-    pub fn prepare_update(&mut self) -> Result<&mut DFU, FirmwareUpdaterError> {
+    ///
+    /// # Safety
+    ///
+    /// The `aligned` buffer must have a size of STATE::WRITE_SIZE, and follow the alignment rules for the flash being written to.
+    pub fn prepare_update(&mut self, aligned: &mut [u8]) -> Result<&mut DFU, FirmwareUpdaterError> {
+        assert_eq!(aligned.len(), STATE::WRITE_SIZE);
+        self.verify_booted(aligned)?;
         self.dfu.erase(0, self.dfu.capacity() as u32)?;
 
         Ok(&mut self.dfu)
@@ -264,7 +289,8 @@ mod tests {
         to_write[..7].copy_from_slice(update.as_slice());
 
         let mut updater = BlockingFirmwareUpdater::new(FirmwareUpdaterConfig { dfu, state });
-        updater.write_firmware(0, to_write.as_slice()).unwrap();
+        let mut aligned = [0; 8];
+        updater.write_firmware(&mut aligned, 0, to_write.as_slice()).unwrap();
         let mut chunk_buf = [0; 2];
         let mut hash = [0; 20];
         updater
diff --git a/embassy-boot/boot/src/firmware_updater/mod.rs b/embassy-boot/boot/src/firmware_updater/mod.rs
index a37984a3a..55ce8f363 100644
--- a/embassy-boot/boot/src/firmware_updater/mod.rs
+++ b/embassy-boot/boot/src/firmware_updater/mod.rs
@@ -26,6 +26,8 @@ pub enum FirmwareUpdaterError {
     Flash(NorFlashErrorKind),
     /// Signature errors.
     Signature(signature::Error),
+    /// Bad state.
+    BadState,
 }
 
 #[cfg(feature = "defmt")]
@@ -34,6 +36,7 @@ impl defmt::Format for FirmwareUpdaterError {
         match self {
             FirmwareUpdaterError::Flash(_) => defmt::write!(fmt, "FirmwareUpdaterError::Flash(_)"),
             FirmwareUpdaterError::Signature(_) => defmt::write!(fmt, "FirmwareUpdaterError::Signature(_)"),
+            FirmwareUpdaterError::BadState => defmt::write!(fmt, "FirmwareUpdaterError::BadState"),
         }
     }
 }
diff --git a/embassy-boot/boot/src/lib.rs b/embassy-boot/boot/src/lib.rs
index 45a87bd0e..016362b86 100644
--- a/embassy-boot/boot/src/lib.rs
+++ b/embassy-boot/boot/src/lib.rs
@@ -51,6 +51,8 @@ impl<const N: usize> AsMut<[u8]> for AlignedBuffer<N> {
 
 #[cfg(test)]
 mod tests {
+    #![allow(unused_imports)]
+
     use embedded_storage::nor_flash::{NorFlash, ReadNorFlash};
     #[cfg(feature = "nightly")]
     use embedded_storage_async::nor_flash::NorFlash as AsyncNorFlash;
@@ -120,9 +122,13 @@ mod tests {
             dfu: flash.dfu(),
             state: flash.state(),
         });
-        block_on(updater.write_firmware(0, &UPDATE)).unwrap();
+        block_on(updater.write_firmware(&mut aligned, 0, &UPDATE)).unwrap();
         block_on(updater.mark_updated(&mut aligned)).unwrap();
 
+        // Writing after marking updated is not allowed until marked as booted.
+        let res: Result<(), FirmwareUpdaterError> = block_on(updater.write_firmware(&mut aligned, 0, &UPDATE));
+        assert!(matches!(res, Err::<(), _>(FirmwareUpdaterError::BadState)));
+
         let flash = flash.into_blocking();
         let mut bootloader = BootLoader::new(BootLoaderConfig {
             active: flash.active(),
@@ -188,7 +194,7 @@ mod tests {
             dfu: flash.dfu(),
             state: flash.state(),
         });
-        block_on(updater.write_firmware(0, &UPDATE)).unwrap();
+        block_on(updater.write_firmware(&mut aligned, 0, &UPDATE)).unwrap();
         block_on(updater.mark_updated(&mut aligned)).unwrap();
 
         let flash = flash.into_blocking();
@@ -230,7 +236,7 @@ mod tests {
             dfu: flash.dfu(),
             state: flash.state(),
         });
-        block_on(updater.write_firmware(0, &UPDATE)).unwrap();
+        block_on(updater.write_firmware(&mut aligned, 0, &UPDATE)).unwrap();
         block_on(updater.mark_updated(&mut aligned)).unwrap();
 
         let flash = flash.into_blocking();
diff --git a/embassy-stm32/src/can/bxcan.rs b/embassy-stm32/src/can/bxcan.rs
index 85f6e99ac..88eef528f 100644
--- a/embassy-stm32/src/can/bxcan.rs
+++ b/embassy-stm32/src/can/bxcan.rs
@@ -1,22 +1,106 @@
+use core::future::poll_fn;
+use core::marker::PhantomData;
 use core::ops::{Deref, DerefMut};
+use core::task::Poll;
 
 pub use bxcan;
+use bxcan::{Data, ExtendedId, Frame, Id, StandardId};
 use embassy_hal_common::{into_ref, PeripheralRef};
+use futures::FutureExt;
 
 use crate::gpio::sealed::AFType;
+use crate::interrupt::typelevel::Interrupt;
+use crate::pac::can::vals::{Lec, RirIde};
 use crate::rcc::RccPeripheral;
-use crate::{peripherals, Peripheral};
+use crate::time::Hertz;
+use crate::{interrupt, peripherals, Peripheral};
+
+/// Interrupt handler.
+pub struct TxInterruptHandler<T: Instance> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Instance> interrupt::typelevel::Handler<T::TXInterrupt> for TxInterruptHandler<T> {
+    unsafe fn on_interrupt() {
+        T::regs().tsr().write(|v| {
+            v.set_rqcp(0, true);
+            v.set_rqcp(1, true);
+            v.set_rqcp(2, true);
+        });
+
+        T::state().tx_waker.wake();
+    }
+}
+
+pub struct Rx0InterruptHandler<T: Instance> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Instance> interrupt::typelevel::Handler<T::RX0Interrupt> for Rx0InterruptHandler<T> {
+    unsafe fn on_interrupt() {
+        // info!("rx0 irq");
+        Can::<T>::receive_fifo(RxFifo::Fifo0);
+    }
+}
+
+pub struct Rx1InterruptHandler<T: Instance> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Instance> interrupt::typelevel::Handler<T::RX1Interrupt> for Rx1InterruptHandler<T> {
+    unsafe fn on_interrupt() {
+        // info!("rx1 irq");
+        Can::<T>::receive_fifo(RxFifo::Fifo1);
+    }
+}
+
+pub struct SceInterruptHandler<T: Instance> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Instance> interrupt::typelevel::Handler<T::SCEInterrupt> for SceInterruptHandler<T> {
+    unsafe fn on_interrupt() {
+        // info!("sce irq");
+        let msr = T::regs().msr();
+        let msr_val = msr.read();
+
+        if msr_val.erri() {
+            msr.modify(|v| v.set_erri(true));
+            T::state().err_waker.wake();
+        }
+    }
+}
 
 pub struct Can<'d, T: Instance> {
     can: bxcan::Can<BxcanInstance<'d, T>>,
 }
 
+#[derive(Debug)]
+pub enum BusError {
+    Stuff,
+    Form,
+    Acknowledge,
+    BitRecessive,
+    BitDominant,
+    Crc,
+    Software,
+    BusOff,
+    BusPassive,
+    BusWarning,
+}
+
 impl<'d, T: Instance> Can<'d, T> {
-    /// Creates a new Bxcan instance, blocking for 11 recessive bits to sync with the CAN bus.
+    /// Creates a new Bxcan instance, keeping the peripheral in sleep mode.
+    /// You must call [Can::enable_non_blocking] to use the peripheral.
     pub fn new(
         peri: impl Peripheral<P = T> + 'd,
         rx: impl Peripheral<P = impl RxPin<T>> + 'd,
         tx: impl Peripheral<P = impl TxPin<T>> + 'd,
+        _irqs: impl interrupt::typelevel::Binding<T::TXInterrupt, TxInterruptHandler<T>>
+            + interrupt::typelevel::Binding<T::RX0Interrupt, Rx0InterruptHandler<T>>
+            + interrupt::typelevel::Binding<T::RX1Interrupt, Rx1InterruptHandler<T>>
+            + interrupt::typelevel::Binding<T::SCEInterrupt, SceInterruptHandler<T>>
+            + 'd,
     ) -> Self {
         into_ref!(peri, rx, tx);
 
@@ -26,30 +110,242 @@ impl<'d, T: Instance> Can<'d, T> {
         T::enable();
         T::reset();
 
-        Self {
-            can: bxcan::Can::builder(BxcanInstance(peri)).enable(),
-        }
-    }
+        {
+            use crate::pac::can::vals::{Errie, Fmpie, Tmeie};
 
-    /// Creates a new Bxcan instance, keeping the peripheral in sleep mode.
-    /// You must call [Can::enable_non_blocking] to use the peripheral.
-    pub fn new_disabled(
-        peri: impl Peripheral<P = T> + 'd,
-        rx: impl Peripheral<P = impl RxPin<T>> + 'd,
-        tx: impl Peripheral<P = impl TxPin<T>> + 'd,
-    ) -> Self {
-        into_ref!(peri, rx, tx);
+            T::regs().ier().write(|w| {
+                // TODO: fix metapac
+
+                w.set_errie(Errie(1));
+                w.set_fmpie(0, Fmpie(1));
+                w.set_fmpie(1, Fmpie(1));
+                w.set_tmeie(Tmeie(1));
+            });
+
+            T::regs().mcr().write(|w| {
+                // Enable timestamps on rx messages
+
+                w.set_ttcm(true);
+            });
+        }
+
+        unsafe {
+            T::TXInterrupt::unpend();
+            T::TXInterrupt::enable();
+
+            T::RX0Interrupt::unpend();
+            T::RX0Interrupt::enable();
+
+            T::RX1Interrupt::unpend();
+            T::RX1Interrupt::enable();
+
+            T::SCEInterrupt::unpend();
+            T::SCEInterrupt::enable();
+        }
 
         rx.set_as_af(rx.af_num(), AFType::Input);
         tx.set_as_af(tx.af_num(), AFType::OutputPushPull);
 
-        T::enable();
-        T::reset();
+        let can = bxcan::Can::builder(BxcanInstance(peri)).leave_disabled();
+        Self { can }
+    }
 
-        Self {
-            can: bxcan::Can::builder(BxcanInstance(peri)).leave_disabled(),
+    pub fn set_bitrate(&mut self, bitrate: u32) {
+        let bit_timing = Self::calc_bxcan_timings(T::frequency(), bitrate).unwrap();
+        self.can.modify_config().set_bit_timing(bit_timing).leave_disabled();
+    }
+
+    /// Queues the message to be sent but exerts backpressure
+    pub async fn write(&mut self, frame: &Frame) -> bxcan::TransmitStatus {
+        poll_fn(|cx| {
+            T::state().tx_waker.register(cx.waker());
+            if let Ok(status) = self.can.transmit(frame) {
+                return Poll::Ready(status);
+            }
+
+            Poll::Pending
+        })
+        .await
+    }
+
+    pub async fn flush(&self, mb: bxcan::Mailbox) {
+        poll_fn(|cx| {
+            T::state().tx_waker.register(cx.waker());
+            if T::regs().tsr().read().tme(mb.index()) {
+                return Poll::Ready(());
+            }
+
+            Poll::Pending
+        })
+        .await;
+    }
+
+    /// Returns a tuple of the time the message was received and the message frame
+    pub async fn read(&mut self) -> Result<(u16, bxcan::Frame), BusError> {
+        poll_fn(|cx| {
+            T::state().err_waker.register(cx.waker());
+            if let Poll::Ready((time, frame)) = T::state().rx_queue.recv().poll_unpin(cx) {
+                return Poll::Ready(Ok((time, frame)));
+            } else if let Some(err) = self.curr_error() {
+                return Poll::Ready(Err(err));
+            }
+
+            Poll::Pending
+        })
+        .await
+    }
+
+    fn curr_error(&self) -> Option<BusError> {
+        let err = { T::regs().esr().read() };
+        if err.boff() {
+            return Some(BusError::BusOff);
+        } else if err.epvf() {
+            return Some(BusError::BusPassive);
+        } else if err.ewgf() {
+            return Some(BusError::BusWarning);
+        } else if let Some(err) = err.lec().into_bus_err() {
+            return Some(err);
+        }
+        None
+    }
+
+    unsafe fn receive_fifo(fifo: RxFifo) {
+        let state = T::state();
+        let regs = T::regs();
+        let fifo_idx = match fifo {
+            RxFifo::Fifo0 => 0usize,
+            RxFifo::Fifo1 => 1usize,
+        };
+        let rfr = regs.rfr(fifo_idx);
+        let fifo = regs.rx(fifo_idx);
+
+        loop {
+            // If there are no pending messages, there is nothing to do
+            if rfr.read().fmp() == 0 {
+                return;
+            }
+
+            let rir = fifo.rir().read();
+            let id = if rir.ide() == RirIde::STANDARD {
+                Id::from(StandardId::new_unchecked(rir.stid()))
+            } else {
+                let stid = (rir.stid() & 0x7FF) as u32;
+                let exid = rir.exid() & 0x3FFFF;
+                let id = (stid << 18) | (exid as u32);
+                Id::from(ExtendedId::new_unchecked(id))
+            };
+            let data_len = fifo.rdtr().read().dlc() as usize;
+            let mut data: [u8; 8] = [0; 8];
+            data[0..4].copy_from_slice(&fifo.rdlr().read().0.to_ne_bytes());
+            data[4..8].copy_from_slice(&fifo.rdhr().read().0.to_ne_bytes());
+
+            let time = fifo.rdtr().read().time();
+            let frame = Frame::new_data(id, Data::new(&data[0..data_len]).unwrap());
+
+            rfr.modify(|v| v.set_rfom(true));
+
+            /*
+                NOTE: consensus was reached that if rx_queue is full, packets should be dropped
+            */
+            let _ = state.rx_queue.try_send((time, frame));
         }
     }
+
+    pub const fn calc_bxcan_timings(periph_clock: Hertz, can_bitrate: u32) -> Option<u32> {
+        const BS1_MAX: u8 = 16;
+        const BS2_MAX: u8 = 8;
+        const MAX_SAMPLE_POINT_PERMILL: u16 = 900;
+
+        let periph_clock = periph_clock.0;
+
+        if can_bitrate < 1000 {
+            return None;
+        }
+
+        // Ref. "Automatic Baudrate Detection in CANopen Networks", U. Koppe, MicroControl GmbH & Co. KG
+        //      CAN in Automation, 2003
+        //
+        // According to the source, optimal quanta per bit are:
+        //   Bitrate        Optimal Maximum
+        //   1000 kbps      8       10
+        //   500  kbps      16      17
+        //   250  kbps      16      17
+        //   125  kbps      16      17
+        let max_quanta_per_bit: u8 = if can_bitrate >= 1_000_000 { 10 } else { 17 };
+
+        // Computing (prescaler * BS):
+        //   BITRATE = 1 / (PRESCALER * (1 / PCLK) * (1 + BS1 + BS2))       -- See the Reference Manual
+        //   BITRATE = PCLK / (PRESCALER * (1 + BS1 + BS2))                 -- Simplified
+        // let:
+        //   BS = 1 + BS1 + BS2                                             -- Number of time quanta per bit
+        //   PRESCALER_BS = PRESCALER * BS
+        // ==>
+        //   PRESCALER_BS = PCLK / BITRATE
+        let prescaler_bs = periph_clock / can_bitrate;
+
+        // Searching for such prescaler value so that the number of quanta per bit is highest.
+        let mut bs1_bs2_sum = max_quanta_per_bit - 1;
+        while (prescaler_bs % (1 + bs1_bs2_sum) as u32) != 0 {
+            if bs1_bs2_sum <= 2 {
+                return None; // No solution
+            }
+            bs1_bs2_sum -= 1;
+        }
+
+        let prescaler = prescaler_bs / (1 + bs1_bs2_sum) as u32;
+        if (prescaler < 1) || (prescaler > 1024) {
+            return None; // No solution
+        }
+
+        // Now we have a constraint: (BS1 + BS2) == bs1_bs2_sum.
+        // We need to find such values so that the sample point is as close as possible to the optimal value,
+        // which is 87.5%, which is 7/8.
+        //
+        //   Solve[(1 + bs1)/(1 + bs1 + bs2) == 7/8, bs2]  (* Where 7/8 is 0.875, the recommended sample point location *)
+        //   {{bs2 -> (1 + bs1)/7}}
+        //
+        // Hence:
+        //   bs2 = (1 + bs1) / 7
+        //   bs1 = (7 * bs1_bs2_sum - 1) / 8
+        //
+        // Sample point location can be computed as follows:
+        //   Sample point location = (1 + bs1) / (1 + bs1 + bs2)
+        //
+        // Since the optimal solution is so close to the maximum, we prepare two solutions, and then pick the best one:
+        //   - With rounding to nearest
+        //   - With rounding to zero
+        let mut bs1 = ((7 * bs1_bs2_sum - 1) + 4) / 8; // Trying rounding to nearest first
+        let mut bs2 = bs1_bs2_sum - bs1;
+        core::assert!(bs1_bs2_sum > bs1);
+
+        let sample_point_permill = 1000 * ((1 + bs1) / (1 + bs1 + bs2)) as u16;
+        if sample_point_permill > MAX_SAMPLE_POINT_PERMILL {
+            // Nope, too far; now rounding to zero
+            bs1 = (7 * bs1_bs2_sum - 1) / 8;
+            bs2 = bs1_bs2_sum - bs1;
+        }
+
+        // Check is BS1 and BS2 are in range
+        if (bs1 < 1) || (bs1 > BS1_MAX) || (bs2 < 1) || (bs2 > BS2_MAX) {
+            return None;
+        }
+
+        // Check if final bitrate matches the requested
+        if can_bitrate != (periph_clock / (prescaler * (1 + bs1 + bs2) as u32)) {
+            return None;
+        }
+
+        // One is recommended by DS-015, CANOpen, and DeviceNet
+        let sjw = 1;
+
+        // Pack into BTR register values
+        Some((sjw - 1) << 24 | (bs1 as u32 - 1) << 16 | (bs2 as u32 - 1) << 20 | (prescaler as u32 - 1))
+    }
+}
+
+enum RxFifo {
+    Fifo0,
+    Fifo1,
 }
 
 impl<'d, T: Instance> Drop for Can<'d, T> {
@@ -76,14 +372,52 @@ impl<'d, T: Instance> DerefMut for Can<'d, T> {
 }
 
 pub(crate) mod sealed {
+    use embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
+    use embassy_sync::channel::Channel;
+    use embassy_sync::waitqueue::AtomicWaker;
+
+    pub struct State {
+        pub tx_waker: AtomicWaker,
+        pub err_waker: AtomicWaker,
+        pub rx_queue: Channel<CriticalSectionRawMutex, (u16, bxcan::Frame), 32>,
+    }
+
+    impl State {
+        pub const fn new() -> Self {
+            Self {
+                tx_waker: AtomicWaker::new(),
+                err_waker: AtomicWaker::new(),
+                rx_queue: Channel::new(),
+            }
+        }
+    }
+
     pub trait Instance {
         const REGISTERS: *mut bxcan::RegisterBlock;
 
         fn regs() -> &'static crate::pac::can::Can;
+        fn state() -> &'static State;
     }
 }
 
-pub trait Instance: sealed::Instance + RccPeripheral {}
+pub trait TXInstance {
+    type TXInterrupt: crate::interrupt::typelevel::Interrupt;
+}
+
+pub trait RX0Instance {
+    type RX0Interrupt: crate::interrupt::typelevel::Interrupt;
+}
+
+pub trait RX1Instance {
+    type RX1Interrupt: crate::interrupt::typelevel::Interrupt;
+}
+
+pub trait SCEInstance {
+    type SCEInterrupt: crate::interrupt::typelevel::Interrupt;
+}
+
+pub trait InterruptableInstance: TXInstance + RX0Instance + RX1Instance + SCEInstance {}
+pub trait Instance: sealed::Instance + RccPeripheral + InterruptableInstance + 'static {}
 
 pub struct BxcanInstance<'a, T>(PeripheralRef<'a, T>);
 
@@ -99,10 +433,39 @@ foreach_peripheral!(
             fn regs() -> &'static crate::pac::can::Can {
                 &crate::pac::$inst
             }
+
+            fn state() -> &'static sealed::State {
+                static STATE: sealed::State = sealed::State::new();
+                &STATE
+            }
         }
 
         impl Instance for peripherals::$inst {}
 
+        foreach_interrupt!(
+            ($inst,can,CAN,TX,$irq:ident) => {
+                impl TXInstance for peripherals::$inst {
+                    type TXInterrupt = crate::interrupt::typelevel::$irq;
+                }
+            };
+            ($inst,can,CAN,RX0,$irq:ident) => {
+                impl RX0Instance for peripherals::$inst {
+                    type RX0Interrupt = crate::interrupt::typelevel::$irq;
+                }
+            };
+            ($inst,can,CAN,RX1,$irq:ident) => {
+                impl RX1Instance for peripherals::$inst {
+                    type RX1Interrupt = crate::interrupt::typelevel::$irq;
+                }
+            };
+            ($inst,can,CAN,SCE,$irq:ident) => {
+                impl SCEInstance for peripherals::$inst {
+                    type SCEInterrupt = crate::interrupt::typelevel::$irq;
+                }
+            };
+        );
+
+        impl InterruptableInstance for peripherals::$inst {}
     };
 );
 
@@ -143,3 +506,36 @@ foreach_peripheral!(
 
 pin_trait!(RxPin, Instance);
 pin_trait!(TxPin, Instance);
+
+trait Index {
+    fn index(&self) -> usize;
+}
+
+impl Index for bxcan::Mailbox {
+    fn index(&self) -> usize {
+        match self {
+            bxcan::Mailbox::Mailbox0 => 0,
+            bxcan::Mailbox::Mailbox1 => 1,
+            bxcan::Mailbox::Mailbox2 => 2,
+        }
+    }
+}
+
+trait IntoBusError {
+    fn into_bus_err(self) -> Option<BusError>;
+}
+
+impl IntoBusError for Lec {
+    fn into_bus_err(self) -> Option<BusError> {
+        match self {
+            Lec::STUFF => Some(BusError::Stuff),
+            Lec::FORM => Some(BusError::Form),
+            Lec::ACK => Some(BusError::Acknowledge),
+            Lec::BITRECESSIVE => Some(BusError::BitRecessive),
+            Lec::BITDOMINANT => Some(BusError::BitDominant),
+            Lec::CRC => Some(BusError::Crc),
+            Lec::CUSTOM => Some(BusError::Software),
+            _ => None,
+        }
+    }
+}
diff --git a/embassy-stm32/src/dma/bdma.rs b/embassy-stm32/src/dma/bdma.rs
index 0ad20579a..162ca9adb 100644
--- a/embassy-stm32/src/dma/bdma.rs
+++ b/embassy-stm32/src/dma/bdma.rs
@@ -338,6 +338,7 @@ impl<'a, C: Channel> Transfer<'a, C> {
 
     pub fn blocking_wait(mut self) {
         while self.is_running() {}
+        self.request_stop();
 
         // "Subsequent reads and writes cannot be moved ahead of preceding reads."
         fence(Ordering::SeqCst);
diff --git a/embassy-stm32/src/dma/gpdma.rs b/embassy-stm32/src/dma/gpdma.rs
index c600df92d..b7bcf7795 100644
--- a/embassy-stm32/src/dma/gpdma.rs
+++ b/embassy-stm32/src/dma/gpdma.rs
@@ -252,6 +252,7 @@ impl<'a, C: Channel> Transfer<'a, C> {
         super::dmamux::configure_dmamux(&mut *this.channel, request);
 
         ch.cr().write(|w| w.set_reset(true));
+        ch.fcr().write(|w| w.0 = 0xFFFF_FFFF); // clear all irqs
         ch.llr().write(|_| {}); // no linked list
         ch.tr1().write(|w| {
             w.set_sdw(data_size.into());
diff --git a/examples/boot/application/nrf/Cargo.toml b/examples/boot/application/nrf/Cargo.toml
index 5939a43b1..b98f73f39 100644
--- a/examples/boot/application/nrf/Cargo.toml
+++ b/examples/boot/application/nrf/Cargo.toml
@@ -24,3 +24,4 @@ cortex-m-rt = "0.7.0"
 [features]
 ed25519-dalek = ["embassy-boot/ed25519-dalek"]
 ed25519-salty = ["embassy-boot/ed25519-salty"]
+skip-include = []
diff --git a/examples/boot/application/nrf/src/bin/a.rs b/examples/boot/application/nrf/src/bin/a.rs
index 06c237781..021d77f3b 100644
--- a/examples/boot/application/nrf/src/bin/a.rs
+++ b/examples/boot/application/nrf/src/bin/a.rs
@@ -12,6 +12,9 @@ use embassy_nrf::wdt::{self, Watchdog};
 use embassy_sync::mutex::Mutex;
 use panic_reset as _;
 
+#[cfg(feature = "skip-include")]
+static APP_B: &[u8] = &[0, 1, 2, 3];
+#[cfg(not(feature = "skip-include"))]
 static APP_B: &[u8] = include_bytes!("../../b.bin");
 
 #[embassy_executor::main]
@@ -55,13 +58,13 @@ async fn main(_spawner: Spawner) {
         button.wait_for_any_edge().await;
         if button.is_low() {
             let mut offset = 0;
+            let mut magic = [0; 4];
             for chunk in APP_B.chunks(4096) {
                 let mut buf: [u8; 4096] = [0; 4096];
                 buf[..chunk.len()].copy_from_slice(chunk);
-                updater.write_firmware(offset, &buf).await.unwrap();
+                updater.write_firmware(&mut magic, offset, &buf).await.unwrap();
                 offset += chunk.len();
             }
-            let mut magic = [0; 4];
             updater.mark_updated(&mut magic).await.unwrap();
             led.set_high();
             cortex_m::peripheral::SCB::sys_reset();
diff --git a/examples/boot/application/rp/Cargo.toml b/examples/boot/application/rp/Cargo.toml
index 4a2c5dd8f..007b6839c 100644
--- a/examples/boot/application/rp/Cargo.toml
+++ b/examples/boot/application/rp/Cargo.toml
@@ -29,6 +29,7 @@ debug = [
     "embassy-boot-rp/defmt",
     "panic-probe"
 ]
+skip-include = []
 
 [profile.release]
 debug = true
diff --git a/examples/boot/application/rp/src/bin/a.rs b/examples/boot/application/rp/src/bin/a.rs
index 69850069b..c8497494c 100644
--- a/examples/boot/application/rp/src/bin/a.rs
+++ b/examples/boot/application/rp/src/bin/a.rs
@@ -18,7 +18,11 @@ use panic_probe as _;
 #[cfg(feature = "panic-reset")]
 use panic_reset as _;
 
+#[cfg(feature = "skip-include")]
+static APP_B: &[u8] = &[0, 1, 2, 3];
+#[cfg(not(feature = "skip-include"))]
 static APP_B: &[u8] = include_bytes!("../../b.bin");
+
 const FLASH_SIZE: usize = 2 * 1024 * 1024;
 
 #[embassy_executor::main]
@@ -43,7 +47,7 @@ async fn main(_s: Spawner) {
     let mut buf: AlignedBuffer<4096> = AlignedBuffer([0; 4096]);
     defmt::info!("preparing update");
     let writer = updater
-        .prepare_update()
+        .prepare_update(&mut buf.0[..1])
         .map_err(|e| defmt::warn!("E: {:?}", defmt::Debug2Format(&e)))
         .unwrap();
     defmt::info!("writer created, starting write");
diff --git a/examples/boot/application/stm32f3/Cargo.toml b/examples/boot/application/stm32f3/Cargo.toml
index 24abd90d4..5b3faf8f8 100644
--- a/examples/boot/application/stm32f3/Cargo.toml
+++ b/examples/boot/application/stm32f3/Cargo.toml
@@ -26,3 +26,4 @@ defmt = [
       "embassy-stm32/defmt",
       "embassy-boot-stm32/defmt",
 ]
+skip-include = []
diff --git a/examples/boot/application/stm32f3/src/bin/a.rs b/examples/boot/application/stm32f3/src/bin/a.rs
index c94676f09..c0a11d699 100644
--- a/examples/boot/application/stm32f3/src/bin/a.rs
+++ b/examples/boot/application/stm32f3/src/bin/a.rs
@@ -13,6 +13,9 @@ use embassy_stm32::gpio::{Input, Level, Output, Pull, Speed};
 use embassy_sync::mutex::Mutex;
 use panic_reset as _;
 
+#[cfg(feature = "skip-include")]
+static APP_B: &[u8] = &[0, 1, 2, 3];
+#[cfg(not(feature = "skip-include"))]
 static APP_B: &[u8] = include_bytes!("../../b.bin");
 
 #[embassy_executor::main]
@@ -31,13 +34,13 @@ async fn main(_spawner: Spawner) {
     let mut updater = FirmwareUpdater::new(config);
     button.wait_for_falling_edge().await;
     let mut offset = 0;
+    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
     for chunk in APP_B.chunks(2048) {
         let mut buf: [u8; 2048] = [0; 2048];
         buf[..chunk.len()].copy_from_slice(chunk);
-        updater.write_firmware(offset, &buf).await.unwrap();
+        updater.write_firmware(magic.as_mut(), offset, &buf).await.unwrap();
         offset += chunk.len();
     }
-    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
     updater.mark_updated(magic.as_mut()).await.unwrap();
     led.set_low();
     cortex_m::peripheral::SCB::sys_reset();
diff --git a/examples/boot/application/stm32f7/Cargo.toml b/examples/boot/application/stm32f7/Cargo.toml
index 529a01aad..b6a6f9cd8 100644
--- a/examples/boot/application/stm32f7/Cargo.toml
+++ b/examples/boot/application/stm32f7/Cargo.toml
@@ -16,6 +16,7 @@ defmt = { version = "0.3", optional = true }
 defmt-rtt = { version = "0.4", optional = true }
 panic-reset = { version = "0.1.1" }
 embedded-hal = { version = "0.2.6" }
+embedded-storage = "0.3.0"
 
 cortex-m = { version = "0.7.6", features = ["inline-asm", "critical-section-single-core"] }
 cortex-m-rt = "0.7.0"
@@ -26,3 +27,4 @@ defmt = [
       "embassy-stm32/defmt",
       "embassy-boot-stm32/defmt",
 ]
+skip-include = []
diff --git a/examples/boot/application/stm32f7/src/bin/a.rs b/examples/boot/application/stm32f7/src/bin/a.rs
index fc2702c91..dea682a96 100644
--- a/examples/boot/application/stm32f7/src/bin/a.rs
+++ b/examples/boot/application/stm32f7/src/bin/a.rs
@@ -2,6 +2,8 @@
 #![no_main]
 #![feature(type_alias_impl_trait)]
 
+use core::cell::RefCell;
+
 #[cfg(feature = "defmt-rtt")]
 use defmt_rtt::*;
 use embassy_boot_stm32::{AlignedBuffer, BlockingFirmwareUpdater, FirmwareUpdaterConfig};
@@ -9,8 +11,13 @@ use embassy_executor::Spawner;
 use embassy_stm32::exti::ExtiInput;
 use embassy_stm32::flash::{Flash, WRITE_SIZE};
 use embassy_stm32::gpio::{Input, Level, Output, Pull, Speed};
+use embassy_sync::blocking_mutex::Mutex;
+use embedded_storage::nor_flash::NorFlash;
 use panic_reset as _;
 
+#[cfg(feature = "skip-include")]
+static APP_B: &[u8] = &[0, 1, 2, 3];
+#[cfg(not(feature = "skip-include"))]
 static APP_B: &[u8] = include_bytes!("../../b.bin");
 
 #[embassy_executor::main]
@@ -27,16 +34,16 @@ async fn main(_spawner: Spawner) {
 
     let config = FirmwareUpdaterConfig::from_linkerfile_blocking(&flash);
     let mut updater = BlockingFirmwareUpdater::new(config);
-    let mut writer = updater.prepare_update().unwrap();
+    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
+    let writer = updater.prepare_update(magic.as_mut()).unwrap();
     button.wait_for_rising_edge().await;
     let mut offset = 0;
     let mut buf = AlignedBuffer([0; 4096]);
     for chunk in APP_B.chunks(4096) {
         buf.as_mut()[..chunk.len()].copy_from_slice(chunk);
         writer.write(offset, buf.as_ref()).unwrap();
-        offset += chunk.len();
+        offset += chunk.len() as u32;
     }
-    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
     updater.mark_updated(magic.as_mut()).unwrap();
     led.set_low();
     cortex_m::peripheral::SCB::sys_reset();
diff --git a/examples/boot/application/stm32h7/Cargo.toml b/examples/boot/application/stm32h7/Cargo.toml
index d7539a53f..0a7e19b1d 100644
--- a/examples/boot/application/stm32h7/Cargo.toml
+++ b/examples/boot/application/stm32h7/Cargo.toml
@@ -16,6 +16,7 @@ defmt = { version = "0.3", optional = true }
 defmt-rtt = { version = "0.4", optional = true }
 panic-reset = { version = "0.1.1" }
 embedded-hal = { version = "0.2.6" }
+embedded-storage = "0.3.0"
 
 cortex-m = { version = "0.7.6", features = ["inline-asm", "critical-section-single-core"] }
 cortex-m-rt = "0.7.0"
@@ -26,3 +27,4 @@ defmt = [
       "embassy-stm32/defmt",
       "embassy-boot-stm32/defmt",
 ]
+skip-include = []
diff --git a/examples/boot/application/stm32h7/src/bin/a.rs b/examples/boot/application/stm32h7/src/bin/a.rs
index 1a54464d0..719176692 100644
--- a/examples/boot/application/stm32h7/src/bin/a.rs
+++ b/examples/boot/application/stm32h7/src/bin/a.rs
@@ -2,6 +2,8 @@
 #![no_main]
 #![feature(type_alias_impl_trait)]
 
+use core::cell::RefCell;
+
 #[cfg(feature = "defmt-rtt")]
 use defmt_rtt::*;
 use embassy_boot_stm32::{AlignedBuffer, BlockingFirmwareUpdater, FirmwareUpdaterConfig};
@@ -9,8 +11,13 @@ use embassy_executor::Spawner;
 use embassy_stm32::exti::ExtiInput;
 use embassy_stm32::flash::{Flash, WRITE_SIZE};
 use embassy_stm32::gpio::{Input, Level, Output, Pull, Speed};
+use embassy_sync::blocking_mutex::Mutex;
+use embedded_storage::nor_flash::NorFlash;
 use panic_reset as _;
 
+#[cfg(feature = "skip-include")]
+static APP_B: &[u8] = &[0, 1, 2, 3];
+#[cfg(not(feature = "skip-include"))]
 static APP_B: &[u8] = include_bytes!("../../b.bin");
 
 #[embassy_executor::main]
@@ -26,17 +33,17 @@ async fn main(_spawner: Spawner) {
     led.set_high();
 
     let config = FirmwareUpdaterConfig::from_linkerfile_blocking(&flash);
+    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
     let mut updater = BlockingFirmwareUpdater::new(config);
-    let mut writer = updater.prepare_update().unwrap();
+    let writer = updater.prepare_update(magic.as_mut()).unwrap();
     button.wait_for_rising_edge().await;
     let mut offset = 0;
     let mut buf = AlignedBuffer([0; 4096]);
     for chunk in APP_B.chunks(4096) {
         buf.as_mut()[..chunk.len()].copy_from_slice(chunk);
         writer.write(offset, buf.as_ref()).unwrap();
-        offset += chunk.len();
+        offset += chunk.len() as u32;
     }
-    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
     updater.mark_updated(magic.as_mut()).unwrap();
     led.set_low();
     cortex_m::peripheral::SCB::sys_reset();
diff --git a/examples/boot/application/stm32l0/Cargo.toml b/examples/boot/application/stm32l0/Cargo.toml
index e90da259b..998df4dc0 100644
--- a/examples/boot/application/stm32l0/Cargo.toml
+++ b/examples/boot/application/stm32l0/Cargo.toml
@@ -26,3 +26,4 @@ defmt = [
       "embassy-stm32/defmt",
       "embassy-boot-stm32/defmt",
 ]
+skip-include = []
diff --git a/examples/boot/application/stm32l0/src/bin/a.rs b/examples/boot/application/stm32l0/src/bin/a.rs
index 4033ac590..ce80056e6 100644
--- a/examples/boot/application/stm32l0/src/bin/a.rs
+++ b/examples/boot/application/stm32l0/src/bin/a.rs
@@ -4,22 +4,26 @@
 
 #[cfg(feature = "defmt-rtt")]
 use defmt_rtt::*;
-use embassy_boot_stm32::{AlignedBuffer, FirmwareUpdater};
+use embassy_boot_stm32::{AlignedBuffer, FirmwareUpdater, FirmwareUpdaterConfig};
 use embassy_embedded_hal::adapter::BlockingAsync;
 use embassy_executor::Spawner;
 use embassy_stm32::exti::ExtiInput;
 use embassy_stm32::flash::{Flash, WRITE_SIZE};
 use embassy_stm32::gpio::{Input, Level, Output, Pull, Speed};
+use embassy_sync::mutex::Mutex;
 use embassy_time::{Duration, Timer};
 use panic_reset as _;
 
+#[cfg(feature = "skip-include")]
+static APP_B: &[u8] = &[0, 1, 2, 3];
+#[cfg(not(feature = "skip-include"))]
 static APP_B: &[u8] = include_bytes!("../../b.bin");
 
 #[embassy_executor::main]
 async fn main(_spawner: Spawner) {
     let p = embassy_stm32::init(Default::default());
     let flash = Flash::new_blocking(p.FLASH);
-    let mut flash = BlockingAsync::new(flash);
+    let flash = Mutex::new(BlockingAsync::new(flash));
 
     let button = Input::new(p.PB2, Pull::Up);
     let mut button = ExtiInput::new(button, p.EXTI2);
@@ -28,18 +32,19 @@ async fn main(_spawner: Spawner) {
 
     led.set_high();
 
-    let mut updater = FirmwareUpdater::default();
+    let config = FirmwareUpdaterConfig::from_linkerfile(&flash);
+    let mut updater = FirmwareUpdater::new(config);
     button.wait_for_falling_edge().await;
     let mut offset = 0;
+    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
     for chunk in APP_B.chunks(128) {
         let mut buf: [u8; 128] = [0; 128];
         buf[..chunk.len()].copy_from_slice(chunk);
-        updater.write_firmware(offset, &buf, &mut flash, 128).await.unwrap();
+        updater.write_firmware(magic.as_mut(), offset, &buf).await.unwrap();
         offset += chunk.len();
     }
 
-    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
-    updater.mark_updated(&mut flash, magic.as_mut()).await.unwrap();
+    updater.mark_updated(magic.as_mut()).await.unwrap();
     led.set_low();
     Timer::after(Duration::from_secs(1)).await;
     cortex_m::peripheral::SCB::sys_reset();
diff --git a/examples/boot/application/stm32l1/Cargo.toml b/examples/boot/application/stm32l1/Cargo.toml
index 8ac0fac85..10b58c172 100644
--- a/examples/boot/application/stm32l1/Cargo.toml
+++ b/examples/boot/application/stm32l1/Cargo.toml
@@ -26,3 +26,4 @@ defmt = [
       "embassy-stm32/defmt",
       "embassy-boot-stm32/defmt",
 ]
+skip-include = []
diff --git a/examples/boot/application/stm32l1/src/bin/a.rs b/examples/boot/application/stm32l1/src/bin/a.rs
index 00ddda636..1e9bf3cb9 100644
--- a/examples/boot/application/stm32l1/src/bin/a.rs
+++ b/examples/boot/application/stm32l1/src/bin/a.rs
@@ -10,9 +10,13 @@ use embassy_executor::Spawner;
 use embassy_stm32::exti::ExtiInput;
 use embassy_stm32::flash::{Flash, WRITE_SIZE};
 use embassy_stm32::gpio::{Input, Level, Output, Pull, Speed};
+use embassy_sync::mutex::Mutex;
 use embassy_time::{Duration, Timer};
 use panic_reset as _;
 
+#[cfg(feature = "skip-include")]
+static APP_B: &[u8] = &[0, 1, 2, 3];
+#[cfg(not(feature = "skip-include"))]
 static APP_B: &[u8] = include_bytes!("../../b.bin");
 
 #[embassy_executor::main]
@@ -31,15 +35,15 @@ async fn main(_spawner: Spawner) {
     let config = FirmwareUpdaterConfig::from_linkerfile(&flash);
     let mut updater = FirmwareUpdater::new(config);
     button.wait_for_falling_edge().await;
+    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
     let mut offset = 0;
     for chunk in APP_B.chunks(128) {
         let mut buf: [u8; 128] = [0; 128];
         buf[..chunk.len()].copy_from_slice(chunk);
-        updater.write_firmware(offset, &buf).await.unwrap();
+        updater.write_firmware(magic.as_mut(), offset, &buf).await.unwrap();
         offset += chunk.len();
     }
 
-    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
     updater.mark_updated(magic.as_mut()).await.unwrap();
     led.set_low();
     Timer::after(Duration::from_secs(1)).await;
diff --git a/examples/boot/application/stm32l4/Cargo.toml b/examples/boot/application/stm32l4/Cargo.toml
index ec79acdeb..713a6527e 100644
--- a/examples/boot/application/stm32l4/Cargo.toml
+++ b/examples/boot/application/stm32l4/Cargo.toml
@@ -26,3 +26,4 @@ defmt = [
       "embassy-stm32/defmt",
       "embassy-boot-stm32/defmt",
 ]
+skip-include = []
diff --git a/examples/boot/application/stm32l4/src/bin/a.rs b/examples/boot/application/stm32l4/src/bin/a.rs
index 54579e4ac..a514ab5be 100644
--- a/examples/boot/application/stm32l4/src/bin/a.rs
+++ b/examples/boot/application/stm32l4/src/bin/a.rs
@@ -10,8 +10,12 @@ use embassy_executor::Spawner;
 use embassy_stm32::exti::ExtiInput;
 use embassy_stm32::flash::{Flash, WRITE_SIZE};
 use embassy_stm32::gpio::{Input, Level, Output, Pull, Speed};
+use embassy_sync::mutex::Mutex;
 use panic_reset as _;
 
+#[cfg(feature = "skip-include")]
+static APP_B: &[u8] = &[0, 1, 2, 3];
+#[cfg(not(feature = "skip-include"))]
 static APP_B: &[u8] = include_bytes!("../../b.bin");
 
 #[embassy_executor::main]
@@ -29,15 +33,15 @@ async fn main(_spawner: Spawner) {
     let config = FirmwareUpdaterConfig::from_linkerfile(&flash);
     let mut updater = FirmwareUpdater::new(config);
     button.wait_for_falling_edge().await;
+    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
     let mut offset = 0;
     for chunk in APP_B.chunks(2048) {
         let mut buf: [u8; 2048] = [0; 2048];
         buf[..chunk.len()].copy_from_slice(chunk);
-        updater.write_firmware(offset, &buf).await.unwrap();
+        updater.write_firmware(magic.as_mut(), offset, &buf).await.unwrap();
         offset += chunk.len();
     }
-    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
-    updater.mark_updated(&mut flash, magic.as_mut()).await.unwrap();
+    updater.mark_updated(magic.as_mut()).await.unwrap();
     led.set_low();
     cortex_m::peripheral::SCB::sys_reset();
 }
diff --git a/examples/boot/application/stm32wl/Cargo.toml b/examples/boot/application/stm32wl/Cargo.toml
index dfaece6cf..4c8bbd73f 100644
--- a/examples/boot/application/stm32wl/Cargo.toml
+++ b/examples/boot/application/stm32wl/Cargo.toml
@@ -26,3 +26,4 @@ defmt = [
       "embassy-stm32/defmt",
       "embassy-boot-stm32/defmt",
 ]
+skip-include = []
diff --git a/examples/boot/application/stm32wl/src/bin/a.rs b/examples/boot/application/stm32wl/src/bin/a.rs
index 0c6fa05f9..52a197a5c 100644
--- a/examples/boot/application/stm32wl/src/bin/a.rs
+++ b/examples/boot/application/stm32wl/src/bin/a.rs
@@ -4,21 +4,25 @@
 
 #[cfg(feature = "defmt-rtt")]
 use defmt_rtt::*;
-use embassy_boot_stm32::{AlignedBuffer, FirmwareUpdater};
+use embassy_boot_stm32::{AlignedBuffer, FirmwareUpdater, FirmwareUpdaterConfig};
 use embassy_embedded_hal::adapter::BlockingAsync;
 use embassy_executor::Spawner;
 use embassy_stm32::exti::ExtiInput;
 use embassy_stm32::flash::{Flash, WRITE_SIZE};
 use embassy_stm32::gpio::{Input, Level, Output, Pull, Speed};
+use embassy_sync::mutex::Mutex;
 use panic_reset as _;
 
+#[cfg(feature = "skip-include")]
+static APP_B: &[u8] = &[0, 1, 2, 3];
+#[cfg(not(feature = "skip-include"))]
 static APP_B: &[u8] = include_bytes!("../../b.bin");
 
 #[embassy_executor::main]
 async fn main(_spawner: Spawner) {
     let p = embassy_stm32::init(Default::default());
     let flash = Flash::new_blocking(p.FLASH);
-    let mut flash = Mutex::new(BlockingAsync::new(flash));
+    let flash = Mutex::new(BlockingAsync::new(flash));
 
     let button = Input::new(p.PA0, Pull::Up);
     let mut button = ExtiInput::new(button, p.EXTI0);
@@ -30,15 +34,15 @@ async fn main(_spawner: Spawner) {
     let mut updater = FirmwareUpdater::new(config);
     button.wait_for_falling_edge().await;
     //defmt::info!("Starting update");
+    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
     let mut offset = 0;
     for chunk in APP_B.chunks(2048) {
         let mut buf: [u8; 2048] = [0; 2048];
         buf[..chunk.len()].copy_from_slice(chunk);
         //        defmt::info!("Writing chunk at 0x{:x}", offset);
-        updater.write_firmware(offset, &buf).await.unwrap();
+        updater.write_firmware(magic.as_mut(), offset, &buf).await.unwrap();
         offset += chunk.len();
     }
-    let mut magic = AlignedBuffer([0; WRITE_SIZE]);
     updater.mark_updated(magic.as_mut()).await.unwrap();
     //defmt::info!("Marked as updated");
     led.set_low();
diff --git a/examples/stm32f4/src/bin/can.rs b/examples/stm32f4/src/bin/can.rs
index e8377b9a1..da8955053 100644
--- a/examples/stm32f4/src/bin/can.rs
+++ b/examples/stm32f4/src/bin/can.rs
@@ -4,12 +4,21 @@
 
 use cortex_m_rt::entry;
 use defmt::*;
+use embassy_stm32::bind_interrupts;
 use embassy_stm32::can::bxcan::filter::Mask32;
 use embassy_stm32::can::bxcan::{Fifo, Frame, StandardId};
-use embassy_stm32::can::Can;
+use embassy_stm32::can::{Can, Rx0InterruptHandler, Rx1InterruptHandler, SceInterruptHandler, TxInterruptHandler};
 use embassy_stm32::gpio::{Input, Pull};
+use embassy_stm32::peripherals::CAN1;
 use {defmt_rtt as _, panic_probe as _};
 
+bind_interrupts!(struct Irqs {
+    CAN1_RX0 => Rx0InterruptHandler<CAN1>;
+    CAN1_RX1 => Rx1InterruptHandler<CAN1>;
+    CAN1_SCE => SceInterruptHandler<CAN1>;
+    CAN1_TX => TxInterruptHandler<CAN1>;
+});
+
 #[entry]
 fn main() -> ! {
     info!("Hello World!");
@@ -23,7 +32,7 @@ fn main() -> ! {
     let rx_pin = Input::new(&mut p.PA11, Pull::Up);
     core::mem::forget(rx_pin);
 
-    let mut can = Can::new(p.CAN1, p.PA11, p.PA12);
+    let mut can = Can::new(p.CAN1, p.PA11, p.PA12, Irqs);
 
     can.modify_filters().enable_bank(0, Fifo::Fifo0, Mask32::accept_all());
 
diff --git a/tests/stm32/Cargo.toml b/tests/stm32/Cargo.toml
index 487ef4626..365f631b7 100644
--- a/tests/stm32/Cargo.toml
+++ b/tests/stm32/Cargo.toml
@@ -7,7 +7,7 @@ autobins = false
 
 [features]
 stm32f103c8 = ["embassy-stm32/stm32f103c8", "not-gpdma"]     # Blue Pill
-stm32f429zi = ["embassy-stm32/stm32f429zi", "chrono", "not-gpdma"]     # Nucleo "sdmmc"
+stm32f429zi = ["embassy-stm32/stm32f429zi", "chrono", "can", "not-gpdma"]     # Nucleo "sdmmc"
 stm32g071rb = ["embassy-stm32/stm32g071rb", "not-gpdma"]     # Nucleo
 stm32c031c6 = ["embassy-stm32/stm32c031c6", "not-gpdma"]     # Nucleo
 stm32g491re = ["embassy-stm32/stm32g491re", "not-gpdma"]     # Nucleo
@@ -18,6 +18,7 @@ stm32u585ai = ["embassy-stm32/stm32u585ai"]     # IoT board
 
 sdmmc = []
 chrono = ["embassy-stm32/chrono", "dep:chrono"]
+can = []
 ble = ["dep:embassy-stm32-wpan"]
 not-gpdma = []
 
@@ -52,6 +53,11 @@ name = "tl_mbox"
 path = "src/bin/tl_mbox.rs"
 required-features = [ "ble",]
 
+[[bin]]
+name = "can"
+path = "src/bin/can.rs"
+required-features = [ "can",]
+
 [[bin]]
 name = "gpio"
 path = "src/bin/gpio.rs"
diff --git a/tests/stm32/src/bin/can.rs b/tests/stm32/src/bin/can.rs
new file mode 100644
index 000000000..33d63d546
--- /dev/null
+++ b/tests/stm32/src/bin/can.rs
@@ -0,0 +1,78 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+// required-features: can
+
+#[path = "../common.rs"]
+mod common;
+use common::*;
+use embassy_executor::Spawner;
+use embassy_stm32::bind_interrupts;
+use embassy_stm32::can::bxcan::filter::Mask32;
+use embassy_stm32::can::bxcan::{Fifo, Frame, StandardId};
+use embassy_stm32::can::{Can, Rx0InterruptHandler, Rx1InterruptHandler, SceInterruptHandler, TxInterruptHandler};
+use embassy_stm32::gpio::{Input, Pull};
+use embassy_stm32::peripherals::CAN1;
+use {defmt_rtt as _, panic_probe as _};
+
+bind_interrupts!(struct Irqs {
+    CAN1_RX0 => Rx0InterruptHandler<CAN1>;
+    CAN1_RX1 => Rx1InterruptHandler<CAN1>;
+    CAN1_SCE => SceInterruptHandler<CAN1>;
+    CAN1_TX => TxInterruptHandler<CAN1>;
+});
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let mut p = embassy_stm32::init(config());
+    info!("Hello World!");
+
+    // HW is connected as follows:
+    // PB13 -> PD0
+    // PB12 -> PD1
+
+    // The next two lines are a workaround for testing without transceiver.
+    // To synchronise to the bus the RX input needs to see a high level.
+    // Use `mem::forget()` to release the borrow on the pin but keep the
+    // pull-up resistor enabled.
+    let rx_pin = Input::new(&mut p.PD0, Pull::Up);
+    core::mem::forget(rx_pin);
+
+    let mut can = Can::new(p.CAN1, p.PD0, p.PD1, Irqs);
+
+    info!("Configuring can...");
+
+    can.modify_filters().enable_bank(0, Fifo::Fifo0, Mask32::accept_all());
+
+    can.set_bitrate(1_000_000);
+    can.modify_config()
+        .set_loopback(true) // Receive own frames
+        .set_silent(true)
+        // .set_bit_timing(0x001c0003)
+        .enable();
+
+    info!("Can configured");
+
+    let mut i: u8 = 0;
+    loop {
+        let tx_frame = Frame::new_data(unwrap!(StandardId::new(i as _)), [i]);
+
+        info!("Transmitting frame...");
+        can.write(&tx_frame).await;
+
+        info!("Receiving frame...");
+        let (time, rx_frame) = can.read().await.unwrap();
+
+        info!("loopback time {}", time);
+        info!("loopback frame {=u8}", rx_frame.data().unwrap()[0]);
+
+        i += 1;
+        if i > 10 {
+            break;
+        }
+    }
+
+    info!("Test OK");
+    cortex_m::asm::bkpt();
+}
diff --git a/tests/stm32/src/bin/usart_dma.rs b/tests/stm32/src/bin/usart_dma.rs
index 50dd2893e..c34d9574b 100644
--- a/tests/stm32/src/bin/usart_dma.rs
+++ b/tests/stm32/src/bin/usart_dma.rs
@@ -69,24 +69,27 @@ async fn main(_spawner: Spawner) {
     const LEN: usize = 128;
     let mut tx_buf = [0; LEN];
     let mut rx_buf = [0; LEN];
-    for i in 0..LEN {
-        tx_buf[i] = i as u8;
-    }
 
     let (mut tx, mut rx) = usart.split();
 
-    let tx_fut = async {
-        tx.write(&tx_buf).await.unwrap();
-    };
-    let rx_fut = async {
-        rx.read(&mut rx_buf).await.unwrap();
-    };
+    for n in 0..42 {
+        for i in 0..LEN {
+            tx_buf[i] = (i ^ n) as u8;
+        }
 
-    // note: rx needs to be polled first, to workaround this bug:
-    // https://github.com/embassy-rs/embassy/issues/1426
-    join(rx_fut, tx_fut).await;
+        let tx_fut = async {
+            tx.write(&tx_buf).await.unwrap();
+        };
+        let rx_fut = async {
+            rx.read(&mut rx_buf).await.unwrap();
+        };
 
-    assert_eq!(tx_buf, rx_buf);
+        // note: rx needs to be polled first, to workaround this bug:
+        // https://github.com/embassy-rs/embassy/issues/1426
+        join(rx_fut, tx_fut).await;
+
+        assert_eq!(tx_buf, rx_buf);
+    }
 
     info!("Test OK");
     cortex_m::asm::bkpt();