diff --git a/examples/rpi-pico-w/build.rs b/examples/rpi-pico-w/build.rs
index 3f915f931..d4c3ec89d 100644
--- a/examples/rpi-pico-w/build.rs
+++ b/examples/rpi-pico-w/build.rs
@@ -14,23 +14,23 @@ use std::io::Write;
 use std::path::PathBuf;
 
 fn main() {
-    // Put `memory.x` in our output directory and ensure it's
-    // on the linker search path.
-    let out = &PathBuf::from(env::var_os("OUT_DIR").unwrap());
-    File::create(out.join("memory.x"))
-        .unwrap()
-        .write_all(include_bytes!("memory.x"))
-        .unwrap();
-    println!("cargo:rustc-link-search={}", out.display());
+    // // Put `memory.x` in our output directory and ensure it's
+    // // on the linker search path.
+    // let out = &PathBuf::from(env::var_os("OUT_DIR").unwrap());
+    // File::create(out.join("memory.x"))
+    //     .unwrap()
+    //     .write_all(include_bytes!("memory.x"))
+    //     .unwrap();
+    // println!("cargo:rustc-link-search={}", out.display());
 
-    // By default, Cargo will re-run a build script whenever
-    // any file in the project changes. By specifying `memory.x`
-    // here, we ensure the build script is only re-run when
-    // `memory.x` is changed.
-    println!("cargo:rerun-if-changed=memory.x");
+    // // By default, Cargo will re-run a build script whenever
+    // // any file in the project changes. By specifying `memory.x`
+    // // here, we ensure the build script is only re-run when
+    // // `memory.x` is changed.
+    // println!("cargo:rerun-if-changed=memory.x");
 
-    println!("cargo:rustc-link-arg-bins=--nmagic");
-    println!("cargo:rustc-link-arg-bins=-Tlink.x");
-    println!("cargo:rustc-link-arg-bins=-Tlink-rp.x");
-    println!("cargo:rustc-link-arg-bins=-Tdefmt.x");
+    // println!("cargo:rustc-link-arg-bins=--nmagic");
+    // println!("cargo:rustc-link-arg-bins=-Tlink.x");
+    // println!("cargo:rustc-link-arg-bins=-Tlink-rp.x");
+    // println!("cargo:rustc-link-arg-bins=-Tdefmt.x");
 }
diff --git a/examples/rpi-pico-w/src/main.rs b/examples/rpi-pico-w/src/main.rs
index c706e121d..f768af193 100644
--- a/examples/rpi-pico-w/src/main.rs
+++ b/examples/rpi-pico-w/src/main.rs
@@ -161,6 +161,17 @@ impl ErrorType for MySpi {
     type Error = Infallible;
 }
 
+impl cyw43::SpiBusCyw43<u32> for MySpi {
+    async fn cmd_write<'a>(&'a mut self, write: &'a [u32]) -> Result<(), Self::Error> {
+        self.write(write).await
+    }
+
+    async fn cmd_read<'a>(&'a mut self, write: &'a [u32], read: &'a mut [u32]) -> Result<(), Self::Error> {
+        self.write(write).await?;
+        self.read(read).await
+    }
+}
+
 impl SpiBusFlush for MySpi {
     async fn flush(&mut self) -> Result<(), Self::Error> {
         Ok(())
diff --git a/src/bus.rs b/src/bus.rs
index f64c0abba..1c8bb9893 100644
--- a/src/bus.rs
+++ b/src/bus.rs
@@ -2,10 +2,23 @@ use core::slice;
 
 use embassy_time::{Duration, Timer};
 use embedded_hal_1::digital::OutputPin;
+use embedded_hal_1::spi::ErrorType;
 use embedded_hal_async::spi::{transaction, SpiBusRead, SpiBusWrite, SpiDevice};
 
 use crate::consts::*;
 
+/// Custom Spi Trait that _only_ supports the bus operation of the cyw43
+pub trait SpiBusCyw43<Word: 'static + Copy>: ErrorType {
+    /// Issues a write command on the bus
+    /// Frist 32 bits of `word` are expected to be a cmd word
+    async fn cmd_write<'a>(&'a mut self, write: &'a [Word]) -> Result<(), Self::Error>;
+
+    /// Issues a read command on the bus
+    /// `write` is expected to be a 32 bit cmd word
+    /// `read` will contain the response of the device
+    async fn cmd_read<'a>(&'a mut self, write: &'a [Word], read: &'a mut [Word]) -> Result<(), Self::Error>;
+}
+
 pub(crate) struct Bus<PWR, SPI> {
     backplane_window: u32,
     pwr: PWR,
@@ -16,7 +29,7 @@ impl<PWR, SPI> Bus<PWR, SPI>
 where
     PWR: OutputPin,
     SPI: SpiDevice,
-    SPI::Bus: SpiBusRead<u32> + SpiBusWrite<u32>,
+    SPI::Bus: SpiBusCyw43<u32>,
 {
     pub(crate) fn new(pwr: PWR, spi: SPI) -> Self {
         Self {
@@ -52,8 +65,9 @@ where
         let cmd = cmd_word(READ, INC_ADDR, FUNC_WLAN, 0, len_in_u8);
         let len_in_u32 = (len_in_u8 as usize + 3) / 4;
         transaction!(&mut self.spi, |bus| async {
-            bus.write(&[cmd]).await?;
-            bus.read(&mut buf[..len_in_u32]).await?;
+            // bus.write(&[cmd]).await?;
+            // bus.read(&mut buf[..len_in_u32]).await?;
+            bus.cmd_read(slice::from_ref(&cmd), &mut buf[..len_in_u32]).await?;
             Ok(())
         })
         .await
@@ -62,9 +76,16 @@ where
 
     pub async fn wlan_write(&mut self, buf: &[u32]) {
         let cmd = cmd_word(WRITE, INC_ADDR, FUNC_WLAN, 0, buf.len() as u32 * 4);
+        //TODO try to remove copy?
+        let mut cmd_buf = [0_u32; 513];
+        cmd_buf[0] = cmd;
+        cmd_buf[1..][..buf.len()].copy_from_slice(buf);
+
         transaction!(&mut self.spi, |bus| async {
-            bus.write(&[cmd]).await?;
-            bus.write(buf).await?;
+            // bus.write(&[cmd]).await?;
+            // bus.write(buf).await?;
+
+            bus.cmd_write(&cmd_buf).await?;
             Ok(())
         })
         .await
@@ -79,7 +100,7 @@ where
         // To simplify, enforce 4-align for now.
         assert!(addr % 4 == 0);
 
-        let mut buf = [0u32; BACKPLANE_MAX_TRANSFER_SIZE / 4];
+        let mut buf = [0u32; BACKPLANE_MAX_TRANSFER_SIZE / 4 + 1];
 
         while !data.is_empty() {
             // Ensure transfer doesn't cross a window boundary.
@@ -93,20 +114,23 @@ where
             let cmd = cmd_word(READ, INC_ADDR, FUNC_BACKPLANE, window_offs, len as u32);
 
             transaction!(&mut self.spi, |bus| async {
-                bus.write(&[cmd]).await?;
+                // bus.write(&[cmd]).await?;
 
-                // 4-byte response delay.
-                let mut junk = [0; 1];
-                bus.read(&mut junk).await?;
+                // // 4-byte response delay.
+                // let mut junk = [0; 1];
+                // bus.read(&mut junk).await?;
 
-                // Read data
-                bus.read(&mut buf[..(len + 3) / 4]).await?;
+                // // Read data
+                // bus.read(&mut buf[..(len + 3) / 4]).await?;
+
+                bus.cmd_read(slice::from_ref(&cmd), &mut buf[..(len + 3) / 4 + 1])
+                    .await?;
                 Ok(())
             })
             .await
             .unwrap();
 
-            data[..len].copy_from_slice(&slice8_mut(&mut buf)[..len]);
+            data[..len].copy_from_slice(&slice8_mut(&mut buf[1..])[..len]);
 
             // Advance ptr.
             addr += len as u32;
@@ -121,7 +145,7 @@ where
         // To simplify, enforce 4-align for now.
         assert!(addr % 4 == 0);
 
-        let mut buf = [0u32; BACKPLANE_MAX_TRANSFER_SIZE / 4];
+        let mut buf = [0u32; BACKPLANE_MAX_TRANSFER_SIZE / 4 + 1];
 
         while !data.is_empty() {
             // Ensure transfer doesn't cross a window boundary.
@@ -129,15 +153,19 @@ where
             let window_remaining = BACKPLANE_WINDOW_SIZE - window_offs as usize;
 
             let len = data.len().min(BACKPLANE_MAX_TRANSFER_SIZE).min(window_remaining);
-            slice8_mut(&mut buf)[..len].copy_from_slice(&data[..len]);
+            slice8_mut(&mut buf[1..])[..len].copy_from_slice(&data[..len]);
 
             self.backplane_set_window(addr).await;
 
             let cmd = cmd_word(WRITE, INC_ADDR, FUNC_BACKPLANE, window_offs, len as u32);
+            buf[0] = cmd;
 
             transaction!(&mut self.spi, |bus| async {
-                bus.write(&[cmd]).await?;
-                bus.write(&buf[..(len + 3) / 4]).await?;
+                // bus.write(&[cmd]).await?;
+                // bus.write(&buf[..(len + 3) / 4]).await?;
+
+                bus.cmd_write(&buf[..(len + 3) / 4 + 1]).await?;
+
                 Ok(())
             })
             .await
@@ -253,28 +281,36 @@ where
 
     async fn readn(&mut self, func: u32, addr: u32, len: u32) -> u32 {
         let cmd = cmd_word(READ, INC_ADDR, func, addr, len);
-        let mut buf = [0; 1];
+        let mut buf = [0; 2];
+        let len = if func == FUNC_BACKPLANE { 2 } else { 1 };
 
         transaction!(&mut self.spi, |bus| async {
-            bus.write(&[cmd]).await?;
-            if func == FUNC_BACKPLANE {
-                // 4-byte response delay.
-                bus.read(&mut buf).await?;
-            }
-            bus.read(&mut buf).await?;
+            // bus.write(&[cmd]).await?;
+            // if func == FUNC_BACKPLANE {
+            //     // 4-byte response delay.
+            //     bus.read(&mut buf).await?;
+            // }
+            // bus.read(&mut buf).await?;
+
+            bus.cmd_read(slice::from_ref(&cmd), &mut buf[..len]).await?;
             Ok(())
         })
         .await
         .unwrap();
 
-        buf[0]
+        if func == FUNC_BACKPLANE {
+            buf[1]
+        } else {
+            buf[0]
+        }
     }
 
     async fn writen(&mut self, func: u32, addr: u32, val: u32, len: u32) {
         let cmd = cmd_word(WRITE, INC_ADDR, func, addr, len);
 
         transaction!(&mut self.spi, |bus| async {
-            bus.write(&[cmd, val]).await?;
+            // bus.write(&[cmd, val]).await?;
+            bus.cmd_write(&[cmd, val]).await?;
             Ok(())
         })
         .await
@@ -283,11 +319,14 @@ where
 
     async fn read32_swapped(&mut self, addr: u32) -> u32 {
         let cmd = cmd_word(READ, INC_ADDR, FUNC_BUS, addr, 4);
+        let cmd = swap16(cmd);
         let mut buf = [0; 1];
 
         transaction!(&mut self.spi, |bus| async {
-            bus.write(&[swap16(cmd)]).await?;
-            bus.read(&mut buf).await?;
+            // bus.write(&[swap16(cmd)]).await?;
+            // bus.read(&mut buf).await?;
+
+            bus.cmd_read(slice::from_ref(&cmd), &mut buf).await?;
             Ok(())
         })
         .await
@@ -298,9 +337,12 @@ where
 
     async fn write32_swapped(&mut self, addr: u32, val: u32) {
         let cmd = cmd_word(WRITE, INC_ADDR, FUNC_BUS, addr, 4);
+        let buf = [swap16(cmd), swap16(val)];
 
         transaction!(&mut self.spi, |bus| async {
-            bus.write(&[swap16(cmd), swap16(val)]).await?;
+            // bus.write(&[swap16(cmd), swap16(val)]).await?;
+
+            bus.cmd_write(&buf).await?;
             Ok(())
         })
         .await
diff --git a/src/lib.rs b/src/lib.rs
index 5733506ac..7bf3992cd 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,7 @@
 #![no_std]
 #![no_main]
-#![feature(type_alias_impl_trait, concat_bytes)]
+#![allow(incomplete_features)]
+#![feature(async_fn_in_trait, type_alias_impl_trait, concat_bytes)]
 #![deny(unused_must_use)]
 
 // This mod MUST go first, so that the others see its macros.
@@ -24,6 +25,7 @@ use embedded_hal_1::digital::OutputPin;
 use embedded_hal_async::spi::{SpiBusRead, SpiBusWrite, SpiDevice};
 
 use crate::bus::Bus;
+pub use crate::bus::SpiBusCyw43;
 use crate::consts::*;
 use crate::events::Event;
 use crate::structs::*;
@@ -512,7 +514,7 @@ pub async fn new<'a, PWR, SPI>(
 where
     PWR: OutputPin,
     SPI: SpiDevice,
-    SPI::Bus: SpiBusRead<u32> + SpiBusWrite<u32>,
+    SPI::Bus: SpiBusCyw43<u32>,
 {
     let (ch_runner, device) = ch::new(&mut state.ch, [0; 6]);
     let state_ch = ch_runner.state_runner();
@@ -551,7 +553,7 @@ impl<'a, PWR, SPI> Runner<'a, PWR, SPI>
 where
     PWR: OutputPin,
     SPI: SpiDevice,
-    SPI::Bus: SpiBusRead<u32> + SpiBusWrite<u32>,
+    SPI::Bus: SpiBusCyw43<u32>,
 {
     async fn init(&mut self, firmware: &[u8]) {
         self.bus.init().await;