diff --git a/.vscode/settings.json b/.vscode/settings.json
index 0c195a13b..220d25914 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -15,10 +15,11 @@
   "rust-analyzer.cargo.target": "thumbv7em-none-eabi",
   //"rust-analyzer.cargo.target": "thumbv8m.main-none-eabihf",
   "rust-analyzer.cargo.features": [
-    "stm32f103c8",
+    "stm32f446re",
     "time-driver-any",
     "unstable-pac",
     "exti",
+    "rt",
   ],
   "rust-analyzer.linkedProjects": [
     // Uncomment ONE line for the chip you want to work on.
diff --git a/embassy-stm32/src/i2s.rs b/embassy-stm32/src/i2s.rs
index c5a606b21..9b80dc1d0 100644
--- a/embassy-stm32/src/i2s.rs
+++ b/embassy-stm32/src/i2s.rs
@@ -2,6 +2,7 @@
 use embassy_hal_internal::into_ref;
 
 use crate::gpio::{AFType, AnyPin, SealedPin};
+use crate::mode::Async;
 use crate::pac::spi::vals;
 use crate::spi::{Config as SpiConfig, *};
 use crate::time::Hertz;
@@ -152,15 +153,15 @@ impl Default for Config {
 }
 
 /// I2S driver.
-pub struct I2S<'d, T: Instance, Tx, Rx> {
-    _peri: Spi<'d, T, Tx, Rx>,
+pub struct I2S<'d, T: Instance> {
+    _peri: Spi<'d, T, Async>,
     sd: Option<PeripheralRef<'d, AnyPin>>,
     ws: Option<PeripheralRef<'d, AnyPin>>,
     ck: Option<PeripheralRef<'d, AnyPin>>,
     mck: Option<PeripheralRef<'d, AnyPin>>,
 }
 
-impl<'d, T: Instance, Tx, Rx> I2S<'d, T, Tx, Rx> {
+impl<'d, T: Instance> I2S<'d, T> {
     /// Note: Full-Duplex modes are not supported at this time
     pub fn new(
         peri: impl Peripheral<P = T> + 'd,
@@ -168,8 +169,8 @@ impl<'d, T: Instance, Tx, Rx> I2S<'d, T, Tx, Rx> {
         ws: impl Peripheral<P = impl WsPin<T>> + 'd,
         ck: impl Peripheral<P = impl CkPin<T>> + 'd,
         mck: impl Peripheral<P = impl MckPin<T>> + 'd,
-        txdma: impl Peripheral<P = Tx> + 'd,
-        rxdma: impl Peripheral<P = Rx> + 'd,
+        txdma: impl Peripheral<P = impl TxDma<T>> + 'd,
+        rxdma: impl Peripheral<P = impl RxDma<T>> + 'd,
         freq: Hertz,
         config: Config,
     ) -> Self {
@@ -265,24 +266,17 @@ impl<'d, T: Instance, Tx, Rx> I2S<'d, T, Tx, Rx> {
     }
 
     /// Write audio data.
-    pub async fn write<W: Word>(&mut self, data: &[W]) -> Result<(), Error>
-    where
-        Tx: TxDma<T>,
-    {
+    pub async fn write<W: Word>(&mut self, data: &[W]) -> Result<(), Error> {
         self._peri.write(data).await
     }
 
     /// Read audio data.
-    pub async fn read<W: Word>(&mut self, data: &mut [W]) -> Result<(), Error>
-    where
-        Tx: TxDma<T>,
-        Rx: RxDma<T>,
-    {
+    pub async fn read<W: Word>(&mut self, data: &mut [W]) -> Result<(), Error> {
         self._peri.read(data).await
     }
 }
 
-impl<'d, T: Instance, Tx, Rx> Drop for I2S<'d, T, Tx, Rx> {
+impl<'d, T: Instance> Drop for I2S<'d, T> {
     fn drop(&mut self) {
         self.sd.as_ref().map(|x| x.set_as_disconnected());
         self.ws.as_ref().map(|x| x.set_as_disconnected());
diff --git a/embassy-stm32/src/lib.rs b/embassy-stm32/src/lib.rs
index ea17f8477..1f0f85936 100644
--- a/embassy-stm32/src/lib.rs
+++ b/embassy-stm32/src/lib.rs
@@ -17,6 +17,29 @@ include!(concat!(env!("OUT_DIR"), "/_macros.rs"));
 // Utilities
 pub mod time;
 mod traits;
+/// Operating modes for peripherals.
+pub mod mode {
+    trait SealedMode {}
+
+    /// Operating mode for a peripheral.
+    #[allow(private_bounds)]
+    pub trait Mode: SealedMode {}
+
+    macro_rules! impl_mode {
+        ($name:ident) => {
+            impl SealedMode for $name {}
+            impl Mode for $name {}
+        };
+    }
+
+    /// Blocking mode.
+    pub struct Blocking;
+    /// Async mode.
+    pub struct Async;
+
+    impl_mode!(Blocking);
+    impl_mode!(Async);
+}
 
 // Always-present hardware
 pub mod dma;
diff --git a/embassy-stm32/src/spi/mod.rs b/embassy-stm32/src/spi/mod.rs
index 450975f18..a4465e289 100644
--- a/embassy-stm32/src/spi/mod.rs
+++ b/embassy-stm32/src/spi/mod.rs
@@ -1,6 +1,7 @@
 //! Serial Peripheral Interface (SPI)
 #![macro_use]
 
+use core::marker::PhantomData;
 use core::ptr;
 
 use embassy_embedded_hal::SetConfig;
@@ -8,8 +9,9 @@ use embassy_futures::join::join;
 use embassy_hal_internal::{into_ref, PeripheralRef};
 pub use embedded_hal_02::spi::{Mode, Phase, Polarity, MODE_0, MODE_1, MODE_2, MODE_3};
 
-use crate::dma::{slice_ptr_parts, word, Transfer};
-use crate::gpio::{AFType, AnyPin, Pull, SealedPin as _};
+use crate::dma::{slice_ptr_parts, word, AnyChannel, Request, Transfer};
+use crate::gpio::{AFType, AnyPin, Pull, SealedPin as _, Speed};
+use crate::mode::{Async, Blocking, Mode as PeriMode};
 use crate::pac::spi::{regs, vals, Spi as Regs};
 use crate::rcc::RccPeripheral;
 use crate::time::Hertz;
@@ -81,163 +83,37 @@ impl Config {
             BitOrder::MsbFirst => vals::Lsbfirst::MSBFIRST,
         }
     }
-}
 
+    fn sck_pull_mode(&self) -> Pull {
+        match self.mode.polarity {
+            Polarity::IdleLow => Pull::Down,
+            Polarity::IdleHigh => Pull::Up,
+        }
+    }
+}
 /// SPI driver.
-pub struct Spi<'d, T: Instance, Tx, Rx> {
+pub struct Spi<'d, T: Instance, M: PeriMode> {
     _peri: PeripheralRef<'d, T>,
     sck: Option<PeripheralRef<'d, AnyPin>>,
     mosi: Option<PeripheralRef<'d, AnyPin>>,
     miso: Option<PeripheralRef<'d, AnyPin>>,
-    txdma: PeripheralRef<'d, Tx>,
-    rxdma: PeripheralRef<'d, Rx>,
+    txdma: Option<(PeripheralRef<'d, AnyChannel>, Request)>,
+    rxdma: Option<(PeripheralRef<'d, AnyChannel>, Request)>,
+    _phantom: PhantomData<M>,
     current_word_size: word_impl::Config,
 }
 
-impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
-    /// Create a new SPI driver.
-    pub fn new(
-        peri: impl Peripheral<P = T> + 'd,
-        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
-        mosi: impl Peripheral<P = impl MosiPin<T>> + 'd,
-        miso: impl Peripheral<P = impl MisoPin<T>> + 'd,
-        txdma: impl Peripheral<P = Tx> + 'd,
-        rxdma: impl Peripheral<P = Rx> + 'd,
-        config: Config,
-    ) -> Self {
-        into_ref!(peri, sck, mosi, miso);
-
-        let sck_pull_mode = match config.mode.polarity {
-            Polarity::IdleLow => Pull::Down,
-            Polarity::IdleHigh => Pull::Up,
-        };
-
-        sck.set_as_af_pull(sck.af_num(), AFType::OutputPushPull, sck_pull_mode);
-        sck.set_speed(crate::gpio::Speed::VeryHigh);
-        mosi.set_as_af(mosi.af_num(), AFType::OutputPushPull);
-        mosi.set_speed(crate::gpio::Speed::VeryHigh);
-        miso.set_as_af(miso.af_num(), AFType::Input);
-        miso.set_speed(crate::gpio::Speed::VeryHigh);
-
-        Self::new_inner(
-            peri,
-            Some(sck.map_into()),
-            Some(mosi.map_into()),
-            Some(miso.map_into()),
-            txdma,
-            rxdma,
-            config,
-        )
-    }
-
-    /// Create a new SPI driver, in RX-only mode (only MISO pin, no MOSI).
-    pub fn new_rxonly(
-        peri: impl Peripheral<P = T> + 'd,
-        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
-        miso: impl Peripheral<P = impl MisoPin<T>> + 'd,
-        txdma: impl Peripheral<P = Tx> + 'd, // TODO remove
-        rxdma: impl Peripheral<P = Rx> + 'd,
-        config: Config,
-    ) -> Self {
-        into_ref!(sck, miso);
-        sck.set_as_af(sck.af_num(), AFType::OutputPushPull);
-        sck.set_speed(crate::gpio::Speed::VeryHigh);
-        miso.set_as_af(miso.af_num(), AFType::Input);
-        miso.set_speed(crate::gpio::Speed::VeryHigh);
-
-        Self::new_inner(
-            peri,
-            Some(sck.map_into()),
-            None,
-            Some(miso.map_into()),
-            txdma,
-            rxdma,
-            config,
-        )
-    }
-
-    /// Create a new SPI driver, in TX-only mode (only MOSI pin, no MISO).
-    pub fn new_txonly(
-        peri: impl Peripheral<P = T> + 'd,
-        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
-        mosi: impl Peripheral<P = impl MosiPin<T>> + 'd,
-        txdma: impl Peripheral<P = Tx> + 'd,
-        rxdma: impl Peripheral<P = Rx> + 'd, // TODO remove
-        config: Config,
-    ) -> Self {
-        into_ref!(sck, mosi);
-        sck.set_as_af(sck.af_num(), AFType::OutputPushPull);
-        sck.set_speed(crate::gpio::Speed::VeryHigh);
-        mosi.set_as_af(mosi.af_num(), AFType::OutputPushPull);
-        mosi.set_speed(crate::gpio::Speed::VeryHigh);
-
-        Self::new_inner(
-            peri,
-            Some(sck.map_into()),
-            Some(mosi.map_into()),
-            None,
-            txdma,
-            rxdma,
-            config,
-        )
-    }
-
-    /// Create a new SPI driver, in TX-only mode, without SCK pin.
-    ///
-    /// This can be useful for bit-banging non-SPI protocols.
-    pub fn new_txonly_nosck(
-        peri: impl Peripheral<P = T> + 'd,
-        mosi: impl Peripheral<P = impl MosiPin<T>> + 'd,
-        txdma: impl Peripheral<P = Tx> + 'd,
-        rxdma: impl Peripheral<P = Rx> + 'd, // TODO: remove
-        config: Config,
-    ) -> Self {
-        into_ref!(mosi);
-        mosi.set_as_af_pull(mosi.af_num(), AFType::OutputPushPull, Pull::Down);
-        mosi.set_speed(crate::gpio::Speed::Medium);
-
-        Self::new_inner(peri, None, Some(mosi.map_into()), None, txdma, rxdma, config)
-    }
-
-    #[cfg(stm32wl)]
-    /// Useful for on chip peripherals like SUBGHZ which are hardwired.
-    pub fn new_subghz(
-        peri: impl Peripheral<P = T> + 'd,
-        txdma: impl Peripheral<P = Tx> + 'd,
-        rxdma: impl Peripheral<P = Rx> + 'd,
-    ) -> Self {
-        // see RM0453 rev 1 section 7.2.13 page 291
-        // The SUBGHZSPI_SCK frequency is obtained by PCLK3 divided by two.
-        // The SUBGHZSPI_SCK clock maximum speed must not exceed 16 MHz.
-        let pclk3_freq = <peripherals::SUBGHZSPI as crate::rcc::SealedRccPeripheral>::frequency().0;
-        let freq = Hertz(core::cmp::min(pclk3_freq / 2, 16_000_000));
-        let mut config = Config::default();
-        config.mode = MODE_0;
-        config.bit_order = BitOrder::MsbFirst;
-        config.frequency = freq;
-        Self::new_inner(peri, None, None, None, txdma, rxdma, config)
-    }
-
-    #[allow(dead_code)]
-    pub(crate) fn new_internal(
-        peri: impl Peripheral<P = T> + 'd,
-        txdma: impl Peripheral<P = Tx> + 'd,
-        rxdma: impl Peripheral<P = Rx> + 'd,
-        config: Config,
-    ) -> Self {
-        Self::new_inner(peri, None, None, None, txdma, rxdma, config)
-    }
-
+impl<'d, T: Instance, M: PeriMode> Spi<'d, T, M> {
     fn new_inner(
         peri: impl Peripheral<P = T> + 'd,
         sck: Option<PeripheralRef<'d, AnyPin>>,
         mosi: Option<PeripheralRef<'d, AnyPin>>,
         miso: Option<PeripheralRef<'d, AnyPin>>,
-        txdma: impl Peripheral<P = Tx> + 'd,
-        rxdma: impl Peripheral<P = Rx> + 'd,
+        txdma: Option<(PeripheralRef<'d, AnyChannel>, Request)>,
+        rxdma: Option<(PeripheralRef<'d, AnyChannel>, Request)>,
         config: Config,
     ) -> Self {
-        into_ref!(peri, txdma, rxdma);
+        into_ref!(peri);
 
         let pclk = T::frequency();
         let freq = config.frequency;
@@ -336,6 +212,7 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
             txdma,
             rxdma,
             current_word_size: <u8 as SealedWord>::CONFIG,
+            _phantom: PhantomData,
         }
     }
 
@@ -462,169 +339,6 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
         self.current_word_size = word_size;
     }
 
-    /// SPI write, using DMA.
-    pub async fn write<W: Word>(&mut self, data: &[W]) -> Result<(), Error>
-    where
-        Tx: TxDma<T>,
-    {
-        if data.is_empty() {
-            return Ok(());
-        }
-
-        self.set_word_size(W::CONFIG);
-        T::REGS.cr1().modify(|w| {
-            w.set_spe(false);
-        });
-
-        let tx_request = self.txdma.request();
-        let tx_dst = T::REGS.tx_ptr();
-        let tx_f = unsafe { Transfer::new_write(&mut self.txdma, tx_request, data, tx_dst, Default::default()) };
-
-        set_txdmaen(T::REGS, true);
-        T::REGS.cr1().modify(|w| {
-            w.set_spe(true);
-        });
-        #[cfg(any(spi_v3, spi_v4, spi_v5))]
-        T::REGS.cr1().modify(|w| {
-            w.set_cstart(true);
-        });
-
-        tx_f.await;
-
-        finish_dma(T::REGS);
-
-        Ok(())
-    }
-
-    /// SPI read, using DMA.
-    pub async fn read<W: Word>(&mut self, data: &mut [W]) -> Result<(), Error>
-    where
-        Tx: TxDma<T>,
-        Rx: RxDma<T>,
-    {
-        if data.is_empty() {
-            return Ok(());
-        }
-
-        self.set_word_size(W::CONFIG);
-        T::REGS.cr1().modify(|w| {
-            w.set_spe(false);
-        });
-
-        // SPIv3 clears rxfifo on SPE=0
-        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
-        flush_rx_fifo(T::REGS);
-
-        set_rxdmaen(T::REGS, true);
-
-        let clock_byte_count = data.len();
-
-        let rx_request = self.rxdma.request();
-        let rx_src = T::REGS.rx_ptr();
-        let rx_f = unsafe { Transfer::new_read(&mut self.rxdma, rx_request, rx_src, data, Default::default()) };
-
-        let tx_request = self.txdma.request();
-        let tx_dst = T::REGS.tx_ptr();
-        let clock_byte = 0x00u8;
-        let tx_f = unsafe {
-            Transfer::new_write_repeated(
-                &mut self.txdma,
-                tx_request,
-                &clock_byte,
-                clock_byte_count,
-                tx_dst,
-                Default::default(),
-            )
-        };
-
-        set_txdmaen(T::REGS, true);
-        T::REGS.cr1().modify(|w| {
-            w.set_spe(true);
-        });
-        #[cfg(any(spi_v3, spi_v4, spi_v5))]
-        T::REGS.cr1().modify(|w| {
-            w.set_cstart(true);
-        });
-
-        join(tx_f, rx_f).await;
-
-        finish_dma(T::REGS);
-
-        Ok(())
-    }
-
-    async fn transfer_inner<W: Word>(&mut self, read: *mut [W], write: *const [W]) -> Result<(), Error>
-    where
-        Tx: TxDma<T>,
-        Rx: RxDma<T>,
-    {
-        let (_, rx_len) = slice_ptr_parts(read);
-        let (_, tx_len) = slice_ptr_parts(write);
-        assert_eq!(rx_len, tx_len);
-        if rx_len == 0 {
-            return Ok(());
-        }
-
-        self.set_word_size(W::CONFIG);
-        T::REGS.cr1().modify(|w| {
-            w.set_spe(false);
-        });
-
-        // SPIv3 clears rxfifo on SPE=0
-        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
-        flush_rx_fifo(T::REGS);
-
-        set_rxdmaen(T::REGS, true);
-
-        let rx_request = self.rxdma.request();
-        let rx_src = T::REGS.rx_ptr();
-        let rx_f = unsafe { Transfer::new_read_raw(&mut self.rxdma, rx_request, rx_src, read, Default::default()) };
-
-        let tx_request = self.txdma.request();
-        let tx_dst = T::REGS.tx_ptr();
-        let tx_f = unsafe { Transfer::new_write_raw(&mut self.txdma, tx_request, write, tx_dst, Default::default()) };
-
-        set_txdmaen(T::REGS, true);
-        T::REGS.cr1().modify(|w| {
-            w.set_spe(true);
-        });
-        #[cfg(any(spi_v3, spi_v4, spi_v5))]
-        T::REGS.cr1().modify(|w| {
-            w.set_cstart(true);
-        });
-
-        join(tx_f, rx_f).await;
-
-        finish_dma(T::REGS);
-
-        Ok(())
-    }
-
-    /// Bidirectional transfer, using DMA.
-    ///
-    /// This transfers both buffers at the same time, so it is NOT equivalent to `write` followed by `read`.
-    ///
-    /// The transfer runs for `max(read.len(), write.len())` bytes. If `read` is shorter extra bytes are ignored.
-    /// If `write` is shorter it is padded with zero bytes.
-    pub async fn transfer<W: Word>(&mut self, read: &mut [W], write: &[W]) -> Result<(), Error>
-    where
-        Tx: TxDma<T>,
-        Rx: RxDma<T>,
-    {
-        self.transfer_inner(read, write).await
-    }
-
-    /// In-place bidirectional transfer, using DMA.
-    ///
-    /// This writes the contents of `data` on MOSI, and puts the received data on MISO in `data`, at the same time.
-    pub async fn transfer_in_place<W: Word>(&mut self, data: &mut [W]) -> Result<(), Error>
-    where
-        Tx: TxDma<T>,
-        Rx: RxDma<T>,
-    {
-        self.transfer_inner(data, data).await
-    }
-
     /// Blocking write.
     pub fn blocking_write<W: Word>(&mut self, words: &[W]) -> Result<(), Error> {
         T::REGS.cr1().modify(|w| w.set_spe(true));
@@ -682,7 +396,338 @@ impl<'d, T: Instance, Tx, Rx> Spi<'d, T, Tx, Rx> {
     }
 }
 
-impl<'d, T: Instance, Tx, Rx> Drop for Spi<'d, T, Tx, Rx> {
+impl<'d, T: Instance> Spi<'d, T, Blocking> {
+    /// Create a new blocking SPI driver.
+    pub fn new_blocking(
+        peri: impl Peripheral<P = T> + 'd,
+        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
+        mosi: impl Peripheral<P = impl MosiPin<T>> + 'd,
+        miso: impl Peripheral<P = impl MisoPin<T>> + 'd,
+        config: Config,
+    ) -> Self {
+        Self::new_inner(
+            peri,
+            new_pin!(sck, AFType::OutputPushPull, Speed::VeryHigh, config.sck_pull_mode()),
+            new_pin!(mosi, AFType::OutputPushPull, Speed::VeryHigh),
+            new_pin!(miso, AFType::Input, Speed::VeryHigh),
+            None,
+            None,
+            config,
+        )
+    }
+
+    /// Create a new blocking SPI driver, in RX-only mode (only MISO pin, no MOSI).
+    pub fn new_blocking_rxonly(
+        peri: impl Peripheral<P = T> + 'd,
+        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
+        miso: impl Peripheral<P = impl MisoPin<T>> + 'd,
+        config: Config,
+    ) -> Self {
+        Self::new_inner(
+            peri,
+            new_pin!(sck, AFType::OutputPushPull, Speed::VeryHigh, config.sck_pull_mode()),
+            None,
+            new_pin!(miso, AFType::Input, Speed::VeryHigh),
+            None,
+            None,
+            config,
+        )
+    }
+
+    /// Create a new blocking SPI driver, in TX-only mode (only MOSI pin, no MISO).
+    pub fn new_blocking_txonly(
+        peri: impl Peripheral<P = T> + 'd,
+        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
+        mosi: impl Peripheral<P = impl MosiPin<T>> + 'd,
+        config: Config,
+    ) -> Self {
+        Self::new_inner(
+            peri,
+            new_pin!(sck, AFType::OutputPushPull, Speed::VeryHigh, config.sck_pull_mode()),
+            new_pin!(mosi, AFType::OutputPushPull, Speed::VeryHigh),
+            None,
+            None,
+            None,
+            config,
+        )
+    }
+
+    /// Create a new SPI driver, in TX-only mode, without SCK pin.
+    ///
+    /// This can be useful for bit-banging non-SPI protocols.
+    pub fn new_blocking_txonly_nosck(
+        peri: impl Peripheral<P = T> + 'd,
+        mosi: impl Peripheral<P = impl MosiPin<T>> + 'd,
+        config: Config,
+    ) -> Self {
+        Self::new_inner(
+            peri,
+            None,
+            new_pin!(mosi, AFType::OutputPushPull, Speed::VeryHigh),
+            None,
+            None,
+            None,
+            config,
+        )
+    }
+}
+
+impl<'d, T: Instance> Spi<'d, T, Async> {
+    /// Create a new SPI driver.
+    pub fn new(
+        peri: impl Peripheral<P = T> + 'd,
+        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
+        mosi: impl Peripheral<P = impl MosiPin<T>> + 'd,
+        miso: impl Peripheral<P = impl MisoPin<T>> + 'd,
+        txdma: impl Peripheral<P = impl TxDma<T>> + 'd,
+        rxdma: impl Peripheral<P = impl RxDma<T>> + 'd,
+        config: Config,
+    ) -> Self {
+        Self::new_inner(
+            peri,
+            new_pin!(sck, AFType::OutputPushPull, Speed::VeryHigh, config.sck_pull_mode()),
+            new_pin!(mosi, AFType::OutputPushPull, Speed::VeryHigh),
+            new_pin!(miso, AFType::Input, Speed::VeryHigh),
+            new_dma!(txdma),
+            new_dma!(rxdma),
+            config,
+        )
+    }
+
+    /// Create a new SPI driver, in RX-only mode (only MISO pin, no MOSI).
+    pub fn new_rxonly(
+        peri: impl Peripheral<P = T> + 'd,
+        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
+        miso: impl Peripheral<P = impl MisoPin<T>> + 'd,
+        rxdma: impl Peripheral<P = impl RxDma<T>> + 'd,
+        config: Config,
+    ) -> Self {
+        Self::new_inner(
+            peri,
+            new_pin!(sck, AFType::OutputPushPull, Speed::VeryHigh, config.sck_pull_mode()),
+            None,
+            new_pin!(miso, AFType::Input, Speed::VeryHigh),
+            None,
+            new_dma!(rxdma),
+            config,
+        )
+    }
+
+    /// Create a new SPI driver, in TX-only mode (only MOSI pin, no MISO).
+    pub fn new_txonly(
+        peri: impl Peripheral<P = T> + 'd,
+        sck: impl Peripheral<P = impl SckPin<T>> + 'd,
+        mosi: impl Peripheral<P = impl MosiPin<T>> + 'd,
+        txdma: impl Peripheral<P = impl TxDma<T>> + 'd,
+        config: Config,
+    ) -> Self {
+        Self::new_inner(
+            peri,
+            new_pin!(sck, AFType::OutputPushPull, Speed::VeryHigh, config.sck_pull_mode()),
+            new_pin!(mosi, AFType::OutputPushPull, Speed::VeryHigh),
+            None,
+            new_dma!(txdma),
+            None,
+            config,
+        )
+    }
+
+    /// Create a new SPI driver, in TX-only mode, without SCK pin.
+    ///
+    /// This can be useful for bit-banging non-SPI protocols.
+    pub fn new_txonly_nosck(
+        peri: impl Peripheral<P = T> + 'd,
+        mosi: impl Peripheral<P = impl MosiPin<T>> + 'd,
+        txdma: impl Peripheral<P = impl TxDma<T>> + 'd,
+        config: Config,
+    ) -> Self {
+        Self::new_inner(
+            peri,
+            None,
+            new_pin!(mosi, AFType::OutputPushPull, Speed::VeryHigh),
+            None,
+            new_dma!(txdma),
+            None,
+            config,
+        )
+    }
+
+    #[cfg(stm32wl)]
+    /// Useful for on chip peripherals like SUBGHZ which are hardwired.
+    pub fn new_subghz(
+        peri: impl Peripheral<P = T> + 'd,
+        txdma: impl Peripheral<P = impl TxDma<T>> + 'd,
+        rxdma: impl Peripheral<P = impl RxDma<T>> + 'd,
+    ) -> Self {
+        // see RM0453 rev 1 section 7.2.13 page 291
+        // The SUBGHZSPI_SCK frequency is obtained by PCLK3 divided by two.
+        // The SUBGHZSPI_SCK clock maximum speed must not exceed 16 MHz.
+        let pclk3_freq = <peripherals::SUBGHZSPI as crate::rcc::SealedRccPeripheral>::frequency().0;
+        let freq = Hertz(core::cmp::min(pclk3_freq / 2, 16_000_000));
+        let mut config = Config::default();
+        config.mode = MODE_0;
+        config.bit_order = BitOrder::MsbFirst;
+        config.frequency = freq;
+
+        Self::new_inner(peri, None, None, None, new_dma!(txdma), new_dma!(rxdma), config)
+    }
+
+    #[allow(dead_code)]
+    pub(crate) fn new_internal(
+        peri: impl Peripheral<P = T> + 'd,
+        txdma: impl Peripheral<P = impl TxDma<T>> + 'd,
+        rxdma: impl Peripheral<P = impl RxDma<T>> + 'd,
+        config: Config,
+    ) -> Self {
+        Self::new_inner(peri, None, None, None, new_dma!(txdma), new_dma!(rxdma), config)
+    }
+
+    /// SPI write, using DMA.
+    pub async fn write<W: Word>(&mut self, data: &[W]) -> Result<(), Error> {
+        if data.is_empty() {
+            return Ok(());
+        }
+
+        self.set_word_size(W::CONFIG);
+        T::REGS.cr1().modify(|w| {
+            w.set_spe(false);
+        });
+
+        let (txdma, tx_request) = self.txdma.as_mut().unwrap();
+        let tx_dst = T::REGS.tx_ptr();
+        let tx_f = unsafe { Transfer::new_write(txdma, *tx_request, data, tx_dst, Default::default()) };
+
+        set_txdmaen(T::REGS, true);
+        T::REGS.cr1().modify(|w| {
+            w.set_spe(true);
+        });
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
+        T::REGS.cr1().modify(|w| {
+            w.set_cstart(true);
+        });
+
+        tx_f.await;
+
+        finish_dma(T::REGS);
+
+        Ok(())
+    }
+
+    /// SPI read, using DMA.
+    pub async fn read<W: Word>(&mut self, data: &mut [W]) -> Result<(), Error> {
+        if data.is_empty() {
+            return Ok(());
+        }
+
+        self.set_word_size(W::CONFIG);
+        T::REGS.cr1().modify(|w| {
+            w.set_spe(false);
+        });
+
+        // SPIv3 clears rxfifo on SPE=0
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
+        flush_rx_fifo(T::REGS);
+
+        set_rxdmaen(T::REGS, true);
+
+        let clock_byte_count = data.len();
+
+        let (rxdma, rx_request) = self.rxdma.as_mut().unwrap();
+        let rx_src = T::REGS.rx_ptr();
+        let rx_f = unsafe { Transfer::new_read(rxdma, *rx_request, rx_src, data, Default::default()) };
+
+        let (txdma, tx_request) = self.txdma.as_mut().unwrap();
+        let tx_dst = T::REGS.tx_ptr();
+        let clock_byte = 0x00u8;
+        let tx_f = unsafe {
+            Transfer::new_write_repeated(
+                txdma,
+                *tx_request,
+                &clock_byte,
+                clock_byte_count,
+                tx_dst,
+                Default::default(),
+            )
+        };
+
+        set_txdmaen(T::REGS, true);
+        T::REGS.cr1().modify(|w| {
+            w.set_spe(true);
+        });
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
+        T::REGS.cr1().modify(|w| {
+            w.set_cstart(true);
+        });
+
+        join(tx_f, rx_f).await;
+
+        finish_dma(T::REGS);
+
+        Ok(())
+    }
+
+    async fn transfer_inner<W: Word>(&mut self, read: *mut [W], write: *const [W]) -> Result<(), Error> {
+        let (_, rx_len) = slice_ptr_parts(read);
+        let (_, tx_len) = slice_ptr_parts(write);
+        assert_eq!(rx_len, tx_len);
+        if rx_len == 0 {
+            return Ok(());
+        }
+
+        self.set_word_size(W::CONFIG);
+        T::REGS.cr1().modify(|w| {
+            w.set_spe(false);
+        });
+
+        // SPIv3 clears rxfifo on SPE=0
+        #[cfg(not(any(spi_v3, spi_v4, spi_v5)))]
+        flush_rx_fifo(T::REGS);
+
+        set_rxdmaen(T::REGS, true);
+
+        let (rxdma, rx_request) = self.rxdma.as_mut().unwrap();
+        let rx_src = T::REGS.rx_ptr();
+        let rx_f = unsafe { Transfer::new_read_raw(rxdma, *rx_request, rx_src, read, Default::default()) };
+
+        let (txdma, tx_request) = self.txdma.as_mut().unwrap();
+        let tx_dst = T::REGS.tx_ptr();
+        let tx_f = unsafe { Transfer::new_write_raw(txdma, *tx_request, write, tx_dst, Default::default()) };
+
+        set_txdmaen(T::REGS, true);
+        T::REGS.cr1().modify(|w| {
+            w.set_spe(true);
+        });
+        #[cfg(any(spi_v3, spi_v4, spi_v5))]
+        T::REGS.cr1().modify(|w| {
+            w.set_cstart(true);
+        });
+
+        join(tx_f, rx_f).await;
+
+        finish_dma(T::REGS);
+
+        Ok(())
+    }
+
+    /// Bidirectional transfer, using DMA.
+    ///
+    /// This transfers both buffers at the same time, so it is NOT equivalent to `write` followed by `read`.
+    ///
+    /// The transfer runs for `max(read.len(), write.len())` bytes. If `read` is shorter extra bytes are ignored.
+    /// If `write` is shorter it is padded with zero bytes.
+    pub async fn transfer<W: Word>(&mut self, read: &mut [W], write: &[W]) -> Result<(), Error> {
+        self.transfer_inner(read, write).await
+    }
+
+    /// In-place bidirectional transfer, using DMA.
+    ///
+    /// This writes the contents of `data` on MOSI, and puts the received data on MISO in `data`, at the same time.
+    pub async fn transfer_in_place<W: Word>(&mut self, data: &mut [W]) -> Result<(), Error> {
+        self.transfer_inner(data, data).await
+    }
+}
+
+impl<'d, T: Instance, M: PeriMode> Drop for Spi<'d, T, M> {
     fn drop(&mut self) {
         self.sck.as_ref().map(|x| x.set_as_disconnected());
         self.mosi.as_ref().map(|x| x.set_as_disconnected());
@@ -900,7 +945,7 @@ fn transfer_word<W: Word>(regs: Regs, tx_word: W) -> Result<W, Error> {
 // some marker traits. For details, see https://github.com/rust-embedded/embedded-hal/pull/289
 macro_rules! impl_blocking {
     ($w:ident) => {
-        impl<'d, T: Instance, Tx, Rx> embedded_hal_02::blocking::spi::Write<$w> for Spi<'d, T, Tx, Rx> {
+        impl<'d, T: Instance, M: PeriMode> embedded_hal_02::blocking::spi::Write<$w> for Spi<'d, T, M> {
             type Error = Error;
 
             fn write(&mut self, words: &[$w]) -> Result<(), Self::Error> {
@@ -908,7 +953,7 @@ macro_rules! impl_blocking {
             }
         }
 
-        impl<'d, T: Instance, Tx, Rx> embedded_hal_02::blocking::spi::Transfer<$w> for Spi<'d, T, Tx, Rx> {
+        impl<'d, T: Instance, M: PeriMode> embedded_hal_02::blocking::spi::Transfer<$w> for Spi<'d, T, M> {
             type Error = Error;
 
             fn transfer<'w>(&mut self, words: &'w mut [$w]) -> Result<&'w [$w], Self::Error> {
@@ -922,11 +967,11 @@ macro_rules! impl_blocking {
 impl_blocking!(u8);
 impl_blocking!(u16);
 
-impl<'d, T: Instance, Tx, Rx> embedded_hal_1::spi::ErrorType for Spi<'d, T, Tx, Rx> {
+impl<'d, T: Instance, M: PeriMode> embedded_hal_1::spi::ErrorType for Spi<'d, T, M> {
     type Error = Error;
 }
 
-impl<'d, T: Instance, W: Word, Tx, Rx> embedded_hal_1::spi::SpiBus<W> for Spi<'d, T, Tx, Rx> {
+impl<'d, T: Instance, W: Word, M: PeriMode> embedded_hal_1::spi::SpiBus<W> for Spi<'d, T, M> {
     fn flush(&mut self) -> Result<(), Self::Error> {
         Ok(())
     }
@@ -959,7 +1004,7 @@ impl embedded_hal_1::spi::Error for Error {
     }
 }
 
-impl<'d, T: Instance, Tx: TxDma<T>, Rx: RxDma<T>, W: Word> embedded_hal_async::spi::SpiBus<W> for Spi<'d, T, Tx, Rx> {
+impl<'d, T: Instance, W: Word> embedded_hal_async::spi::SpiBus<W> for Spi<'d, T, Async> {
     async fn flush(&mut self) -> Result<(), Self::Error> {
         Ok(())
     }
@@ -1094,7 +1139,7 @@ foreach_peripheral!(
     };
 );
 
-impl<'d, T: Instance, Tx, Rx> SetConfig for Spi<'d, T, Tx, Rx> {
+impl<'d, T: Instance, M: PeriMode> SetConfig for Spi<'d, T, M> {
     type Config = Config;
     type ConfigError = ();
     fn set_config(&mut self, config: &Self::Config) -> Result<(), ()> {
diff --git a/embassy-stm32/src/traits.rs b/embassy-stm32/src/traits.rs
index 13f695821..539302c49 100644
--- a/embassy-stm32/src/traits.rs
+++ b/embassy-stm32/src/traits.rs
@@ -69,3 +69,26 @@ macro_rules! dma_trait_impl {
         }
     };
 }
+
+macro_rules! new_dma {
+    ($name:ident) => {{
+        let dma = $name.into_ref();
+        let req = dma.request();
+        Some((dma.map_into(), req))
+    }};
+}
+
+macro_rules! new_pin {
+    ($name:ident, $aftype:expr, $speed:expr) => {{
+        let pin = $name.into_ref();
+        pin.set_as_af(pin.af_num(), $aftype);
+        pin.set_speed($speed);
+        Some(pin.map_into())
+    }};
+    ($name:ident, $aftype:expr, $speed:expr, $pull:expr) => {{
+        let pin = $name.into_ref();
+        pin.set_as_af_pull(pin.af_num(), $aftype, $pull);
+        pin.set_speed($speed);
+        Some(pin.map_into())
+    }};
+}
diff --git a/examples/stm32f4/src/bin/spi.rs b/examples/stm32f4/src/bin/spi.rs
index dc9141c62..970d819fc 100644
--- a/examples/stm32f4/src/bin/spi.rs
+++ b/examples/stm32f4/src/bin/spi.rs
@@ -3,7 +3,6 @@
 
 use cortex_m_rt::entry;
 use defmt::*;
-use embassy_stm32::dma::NoDma;
 use embassy_stm32::gpio::{Level, Output, Speed};
 use embassy_stm32::spi::{Config, Spi};
 use embassy_stm32::time::Hertz;
@@ -18,7 +17,7 @@ fn main() -> ! {
     let mut spi_config = Config::default();
     spi_config.frequency = Hertz(1_000_000);
 
-    let mut spi = Spi::new(p.SPI3, p.PC10, p.PC12, p.PC11, NoDma, NoDma, spi_config);
+    let mut spi = Spi::new_blocking(p.SPI3, p.PC10, p.PC12, p.PC11, spi_config);
 
     let mut cs = Output::new(p.PE0, Level::High, Speed::VeryHigh);
 
diff --git a/examples/stm32f4/src/bin/ws2812_spi.rs b/examples/stm32f4/src/bin/ws2812_spi.rs
index 56ccb67b8..e00d14327 100644
--- a/examples/stm32f4/src/bin/ws2812_spi.rs
+++ b/examples/stm32f4/src/bin/ws2812_spi.rs
@@ -13,8 +13,8 @@
 #![no_std]
 #![no_main]
 
+use embassy_stm32::spi;
 use embassy_stm32::time::khz;
-use embassy_stm32::{dma, spi};
 use embassy_time::{Duration, Ticker, Timer};
 use {defmt_rtt as _, panic_probe as _};
 
@@ -78,7 +78,7 @@ async fn main(_spawner: embassy_executor::Spawner) {
     spi_config.frequency = khz(12_800);
 
     // Since we only output waveform, then the Rx and Sck and RxDma it is not considered
-    let mut ws2812_spi = spi::Spi::new_txonly_nosck(dp.SPI1, dp.PB5, dp.DMA2_CH3, dma::NoDma, spi_config);
+    let mut ws2812_spi = spi::Spi::new_txonly_nosck(dp.SPI1, dp.PB5, dp.DMA2_CH3, spi_config);
 
     // flip color at 2 Hz
     let mut ticker = Ticker::every(Duration::from_millis(500));
diff --git a/examples/stm32g0/src/bin/spi_neopixel.rs b/examples/stm32g0/src/bin/spi_neopixel.rs
index c5ea51721..2deee271d 100644
--- a/examples/stm32g0/src/bin/spi_neopixel.rs
+++ b/examples/stm32g0/src/bin/spi_neopixel.rs
@@ -4,7 +4,6 @@
 use defmt::*;
 use embassy_executor::Spawner;
 use embassy_stm32::dma::word::U5;
-use embassy_stm32::dma::NoDma;
 use embassy_stm32::spi::{Config, Spi};
 use embassy_stm32::time::Hertz;
 use embassy_time::Timer;
@@ -77,7 +76,7 @@ async fn main(_spawner: Spawner) {
 
     let mut config = Config::default();
     config.frequency = Hertz(4_000_000);
-    let mut spi = Spi::new_txonly_nosck(p.SPI1, p.PB5, p.DMA1_CH3, NoDma, config);
+    let mut spi = Spi::new_txonly_nosck(p.SPI1, p.PB5, p.DMA1_CH3, config);
 
     let mut neopixels = Ws2812::new();
 
diff --git a/examples/stm32h7/src/bin/spi.rs b/examples/stm32h7/src/bin/spi.rs
index aed27723a..aaebdc346 100644
--- a/examples/stm32h7/src/bin/spi.rs
+++ b/examples/stm32h7/src/bin/spi.rs
@@ -7,7 +7,7 @@ use core::str::from_utf8;
 use cortex_m_rt::entry;
 use defmt::*;
 use embassy_executor::Executor;
-use embassy_stm32::dma::NoDma;
+use embassy_stm32::mode::Blocking;
 use embassy_stm32::peripherals::SPI3;
 use embassy_stm32::time::mhz;
 use embassy_stm32::{spi, Config};
@@ -16,7 +16,7 @@ use static_cell::StaticCell;
 use {defmt_rtt as _, panic_probe as _};
 
 #[embassy_executor::task]
-async fn main_task(mut spi: spi::Spi<'static, SPI3, NoDma, NoDma>) {
+async fn main_task(mut spi: spi::Spi<'static, SPI3, Blocking>) {
     for n in 0u32.. {
         let mut write: String<128> = String::new();
         core::write!(&mut write, "Hello DMA World {}!\r\n", n).unwrap();
@@ -62,7 +62,7 @@ fn main() -> ! {
     let mut spi_config = spi::Config::default();
     spi_config.frequency = mhz(1);
 
-    let spi = spi::Spi::new(p.SPI3, p.PB3, p.PB5, p.PB4, NoDma, NoDma, spi_config);
+    let spi = spi::Spi::new_blocking(p.SPI3, p.PB3, p.PB5, p.PB4, spi_config);
 
     let executor = EXECUTOR.init(Executor::new());
 
diff --git a/examples/stm32h7/src/bin/spi_dma.rs b/examples/stm32h7/src/bin/spi_dma.rs
index 54d4d7656..3d3c724eb 100644
--- a/examples/stm32h7/src/bin/spi_dma.rs
+++ b/examples/stm32h7/src/bin/spi_dma.rs
@@ -7,15 +7,15 @@ use core::str::from_utf8;
 use cortex_m_rt::entry;
 use defmt::*;
 use embassy_executor::Executor;
-use embassy_stm32::peripherals::{DMA1_CH3, DMA1_CH4, SPI3};
+use embassy_stm32::mode::Async;
 use embassy_stm32::time::mhz;
-use embassy_stm32::{spi, Config};
+use embassy_stm32::{peripherals, spi, Config};
 use heapless::String;
 use static_cell::StaticCell;
 use {defmt_rtt as _, panic_probe as _};
 
 #[embassy_executor::task]
-async fn main_task(mut spi: spi::Spi<'static, SPI3, DMA1_CH3, DMA1_CH4>) {
+async fn main_task(mut spi: spi::Spi<'static, peripherals::SPI3, Async>) {
     for n in 0u32.. {
         let mut write: String<128> = String::new();
         let mut read = [0; 128];
diff --git a/examples/stm32l0/src/bin/spi.rs b/examples/stm32l0/src/bin/spi.rs
index f23a537b8..8e0cfdedb 100644
--- a/examples/stm32l0/src/bin/spi.rs
+++ b/examples/stm32l0/src/bin/spi.rs
@@ -3,7 +3,6 @@
 
 use defmt::*;
 use embassy_executor::Spawner;
-use embassy_stm32::dma::NoDma;
 use embassy_stm32::gpio::{Level, Output, Speed};
 use embassy_stm32::spi::{Config, Spi};
 use embassy_stm32::time::Hertz;
@@ -17,7 +16,7 @@ async fn main(_spawner: Spawner) {
     let mut spi_config = Config::default();
     spi_config.frequency = Hertz(1_000_000);
 
-    let mut spi = Spi::new(p.SPI1, p.PB3, p.PA7, p.PA6, NoDma, NoDma, spi_config);
+    let mut spi = Spi::new_blocking(p.SPI1, p.PB3, p.PA7, p.PA6, spi_config);
 
     let mut cs = Output::new(p.PA15, Level::High, Speed::VeryHigh);
 
diff --git a/examples/stm32l1/src/bin/spi.rs b/examples/stm32l1/src/bin/spi.rs
index 8be686c5a..eabf1bac2 100644
--- a/examples/stm32l1/src/bin/spi.rs
+++ b/examples/stm32l1/src/bin/spi.rs
@@ -3,7 +3,6 @@
 
 use defmt::*;
 use embassy_executor::Spawner;
-use embassy_stm32::dma::NoDma;
 use embassy_stm32::gpio::{Level, Output, Speed};
 use embassy_stm32::spi::{Config, Spi};
 use embassy_stm32::time::Hertz;
@@ -17,7 +16,7 @@ async fn main(_spawner: Spawner) {
     let mut spi_config = Config::default();
     spi_config.frequency = Hertz(1_000_000);
 
-    let mut spi = Spi::new(p.SPI1, p.PA5, p.PA7, p.PA6, NoDma, NoDma, spi_config);
+    let mut spi = Spi::new_blocking(p.SPI1, p.PA5, p.PA7, p.PA6, spi_config);
 
     let mut cs = Output::new(p.PA4, Level::High, Speed::VeryHigh);
 
diff --git a/examples/stm32l4/src/bin/spe_adin1110_http_server.rs b/examples/stm32l4/src/bin/spe_adin1110_http_server.rs
index 77aa929ab..a99d08924 100644
--- a/examples/stm32l4/src/bin/spe_adin1110_http_server.rs
+++ b/examples/stm32l4/src/bin/spe_adin1110_http_server.rs
@@ -23,18 +23,23 @@ use embassy_futures::select::{select, Either};
 use embassy_futures::yield_now;
 use embassy_net::tcp::TcpSocket;
 use embassy_net::{Ipv4Address, Ipv4Cidr, Stack, StackResources, StaticConfigV4};
+use embassy_net_adin1110::{Device, Runner, ADIN1110};
+use embassy_stm32::gpio::{Input, Level, Output, Pull, Speed};
+use embassy_stm32::i2c::{self, Config as I2C_Config, I2c};
+use embassy_stm32::mode::Async;
+use embassy_stm32::rng::{self, Rng};
+use embassy_stm32::spi::{Config as SPI_Config, Spi};
+use embassy_stm32::time::Hertz;
+use embassy_stm32::{bind_interrupts, exti, pac, peripherals};
 use embassy_time::{Delay, Duration, Ticker, Timer};
 use embedded_hal_async::i2c::I2c as I2cBus;
+use embedded_hal_bus::spi::ExclusiveDevice;
 use embedded_io::Write as bWrite;
 use embedded_io_async::Write;
-use hal::gpio::{Input, Level, Output, Speed};
-use hal::i2c::{self, I2c};
-use hal::rng::{self, Rng};
-use hal::{bind_interrupts, exti, pac, peripherals};
 use heapless::Vec;
+use panic_probe as _;
 use rand::RngCore;
 use static_cell::StaticCell;
-use {embassy_stm32 as hal, panic_probe as _};
 
 bind_interrupts!(struct Irqs {
     I2C3_EV => i2c::EventInterruptHandler<peripherals::I2C3>;
@@ -42,13 +47,6 @@ bind_interrupts!(struct Irqs {
     RNG => rng::InterruptHandler<peripherals::RNG>;
 });
 
-use embassy_net_adin1110::{Device, Runner, ADIN1110};
-use embedded_hal_bus::spi::ExclusiveDevice;
-use hal::gpio::Pull;
-use hal::i2c::Config as I2C_Config;
-use hal::spi::{Config as SPI_Config, Spi};
-use hal::time::Hertz;
-
 // Basic settings
 // MAC-address used by the adin1110
 const MAC: [u8; 6] = [0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff];
@@ -57,7 +55,7 @@ const IP_ADDRESS: Ipv4Cidr = Ipv4Cidr::new(Ipv4Address([192, 168, 1, 5]), 24);
 // Listen port for the webserver
 const HTTP_LISTEN_PORT: u16 = 80;
 
-pub type SpeSpi = Spi<'static, peripherals::SPI2, peripherals::DMA1_CH1, peripherals::DMA1_CH2>;
+pub type SpeSpi = Spi<'static, peripherals::SPI2, Async>;
 pub type SpeSpiCs = ExclusiveDevice<SpeSpi, Output<'static>, Delay>;
 pub type SpeInt = exti::ExtiInput<'static>;
 pub type SpeRst = Output<'static>;
diff --git a/examples/stm32l4/src/bin/spi.rs b/examples/stm32l4/src/bin/spi.rs
index 6653e4516..5693a3765 100644
--- a/examples/stm32l4/src/bin/spi.rs
+++ b/examples/stm32l4/src/bin/spi.rs
@@ -2,7 +2,6 @@
 #![no_main]
 
 use defmt::*;
-use embassy_stm32::dma::NoDma;
 use embassy_stm32::gpio::{Level, Output, Speed};
 use embassy_stm32::spi::{Config, Spi};
 use embassy_stm32::time::Hertz;
@@ -17,7 +16,7 @@ fn main() -> ! {
     let mut spi_config = Config::default();
     spi_config.frequency = Hertz(1_000_000);
 
-    let mut spi = Spi::new(p.SPI3, p.PC10, p.PC12, p.PC11, NoDma, NoDma, spi_config);
+    let mut spi = Spi::new_blocking(p.SPI3, p.PC10, p.PC12, p.PC11, spi_config);
 
     let mut cs = Output::new(p.PE0, Level::High, Speed::VeryHigh);
 
diff --git a/examples/stm32l4/src/bin/spi_blocking_async.rs b/examples/stm32l4/src/bin/spi_blocking_async.rs
index 68dbb70ad..1f1089101 100644
--- a/examples/stm32l4/src/bin/spi_blocking_async.rs
+++ b/examples/stm32l4/src/bin/spi_blocking_async.rs
@@ -4,7 +4,6 @@
 use defmt::*;
 use embassy_embedded_hal::adapter::BlockingAsync;
 use embassy_executor::Spawner;
-use embassy_stm32::dma::NoDma;
 use embassy_stm32::gpio::{Input, Level, Output, Pull, Speed};
 use embassy_stm32::spi::{Config, Spi};
 use embassy_stm32::time::Hertz;
@@ -19,7 +18,7 @@ async fn main(_spawner: Spawner) {
     let mut spi_config = Config::default();
     spi_config.frequency = Hertz(1_000_000);
 
-    let spi = Spi::new(p.SPI3, p.PC10, p.PC12, p.PC11, NoDma, NoDma, spi_config);
+    let spi = Spi::new_blocking(p.SPI3, p.PC10, p.PC12, p.PC11, spi_config);
 
     let mut spi = BlockingAsync::new(spi);
 
diff --git a/tests/stm32/src/bin/spi.rs b/tests/stm32/src/bin/spi.rs
index b0bdd477f..59cb0cfd3 100644
--- a/tests/stm32/src/bin/spi.rs
+++ b/tests/stm32/src/bin/spi.rs
@@ -6,7 +6,6 @@ mod common;
 use common::*;
 use defmt::assert_eq;
 use embassy_executor::Spawner;
-use embassy_stm32::dma::NoDma;
 use embassy_stm32::spi::{self, Spi};
 use embassy_stm32::time::Hertz;
 
@@ -23,11 +22,11 @@ async fn main(_spawner: Spawner) {
     let mut spi_config = spi::Config::default();
     spi_config.frequency = Hertz(1_000_000);
 
-    let mut spi = Spi::new(
+    let mut spi = Spi::new_blocking(
         spi, sck,  // Arduino D13
         mosi, // Arduino D11
         miso, // Arduino D12
-        NoDma, NoDma, spi_config,
+        spi_config,
     );
 
     let data: [u8; 9] = [0x00, 0xFF, 0xAA, 0x55, 0xC0, 0xFF, 0xEE, 0xC0, 0xDE];