diff --git a/embassy-nrf/Cargo.toml b/embassy-nrf/Cargo.toml
index 858ff1f6e..5299c04a8 100644
--- a/embassy-nrf/Cargo.toml
+++ b/embassy-nrf/Cargo.toml
@@ -18,7 +18,7 @@ flavors = [
 [features]
 
 # Enable nightly-only features
-nightly = ["embassy/nightly", "embedded-hal-1", "embedded-hal-async", "embassy-usb"]
+nightly = ["embassy/nightly", "embedded-hal-1", "embedded-hal-async", "embassy-usb", "embedded-storage-async"]
 
 # Reexport the PAC for the currently enabled chip at `embassy_nrf::pac`.
 # This is unstable because semver-minor (non-breaking) releases of embassy-nrf may major-bump (breaking) the PAC version.
@@ -80,6 +80,7 @@ critical-section = "0.2.5"
 rand_core = "0.6.3"
 fixed = "1.10.0"
 embedded-storage = "0.3.0"
+embedded-storage-async = { version = "0.3.0", optional = true }
 cfg-if = "1.0.0"
 
 nrf52805-pac  = { version = "0.11.0", optional = true, features = [ "rt" ] }
diff --git a/embassy-nrf/src/qspi.rs b/embassy-nrf/src/qspi.rs
index 89262ac05..8902879f8 100644
--- a/embassy-nrf/src/qspi.rs
+++ b/embassy-nrf/src/qspi.rs
@@ -60,16 +60,18 @@ impl Default for Config {
 #[cfg_attr(feature = "defmt", derive(defmt::Format))]
 #[non_exhaustive]
 pub enum Error {
+    OutOfBounds,
     // TODO add "not in data memory" error and check for it
 }
 
-pub struct Qspi<'d, T: Instance> {
+pub struct Qspi<'d, T: Instance, const FLASH_SIZE: usize> {
+    irq: T::Interrupt,
     dpm_enabled: bool,
     phantom: PhantomData<&'d mut T>,
 }
 
-impl<'d, T: Instance> Qspi<'d, T> {
-    pub async fn new(
+impl<'d, T: Instance, const FLASH_SIZE: usize> Qspi<'d, T, FLASH_SIZE> {
+    pub fn new(
         _qspi: impl Unborrow<Target = T> + 'd,
         irq: impl Unborrow<Target = T::Interrupt> + 'd,
         sck: impl Unborrow<Target = impl GpioPin> + 'd,
@@ -79,7 +81,7 @@ impl<'d, T: Instance> Qspi<'d, T> {
         io2: impl Unborrow<Target = impl GpioPin> + 'd,
         io3: impl Unborrow<Target = impl GpioPin> + 'd,
         config: Config,
-    ) -> Qspi<'d, T> {
+    ) -> Qspi<'d, T, FLASH_SIZE> {
         unborrow!(irq, sck, csn, io0, io1, io2, io3);
 
         let r = T::regs();
@@ -142,6 +144,7 @@ impl<'d, T: Instance> Qspi<'d, T> {
 
         let mut res = Self {
             dpm_enabled: config.deep_power_down.is_some(),
+            irq,
             phantom: PhantomData,
         };
 
@@ -150,7 +153,7 @@ impl<'d, T: Instance> Qspi<'d, T> {
 
         r.tasks_activate.write(|w| w.tasks_activate().bit(true));
 
-        res.wait_ready().await;
+        res.blocking_wait_ready();
 
         res
     }
@@ -173,8 +176,36 @@ impl<'d, T: Instance> Qspi<'d, T> {
     ) -> Result<(), Error> {
         let bomb = DropBomb::new();
 
+        let len = core::cmp::max(req.len(), resp.len()) as u8;
+        self.custom_instruction_start(opcode, req, len)?;
+
+        self.wait_ready().await;
+
+        self.custom_instruction_finish(resp)?;
+
+        bomb.defuse();
+
+        Ok(())
+    }
+
+    pub fn blocking_custom_instruction(
+        &mut self,
+        opcode: u8,
+        req: &[u8],
+        resp: &mut [u8],
+    ) -> Result<(), Error> {
+        let len = core::cmp::max(req.len(), resp.len()) as u8;
+        self.custom_instruction_start(opcode, req, len)?;
+
+        self.blocking_wait_ready();
+
+        self.custom_instruction_finish(resp)?;
+
+        Ok(())
+    }
+
+    fn custom_instruction_start(&mut self, opcode: u8, req: &[u8], len: u8) -> Result<(), Error> {
         assert!(req.len() <= 8);
-        assert!(resp.len() <= 8);
 
         let mut dat0: u32 = 0;
         let mut dat1: u32 = 0;
@@ -190,8 +221,6 @@ impl<'d, T: Instance> Qspi<'d, T> {
             }
         }
 
-        let len = core::cmp::max(req.len(), resp.len()) as u8;
-
         let r = T::regs();
         r.cinstrdat0.write(|w| unsafe { w.bits(dat0) });
         r.cinstrdat1.write(|w| unsafe { w.bits(dat1) });
@@ -210,9 +239,10 @@ impl<'d, T: Instance> Qspi<'d, T> {
             let w = w.lfstop().bit(false);
             w
         });
+        Ok(())
+    }
 
-        self.wait_ready().await;
-
+    fn custom_instruction_finish(&mut self, resp: &mut [u8]) -> Result<(), Error> {
         let r = T::regs();
 
         let dat0 = r.cinstrdat0.read().bits();
@@ -227,9 +257,6 @@ impl<'d, T: Instance> Qspi<'d, T> {
                 resp[i] = (dat1 >> (i * 8)) as u8;
             }
         }
-
-        bomb.defuse();
-
         Ok(())
     }
 
@@ -246,12 +273,22 @@ impl<'d, T: Instance> Qspi<'d, T> {
         .await
     }
 
-    pub async fn read(&mut self, address: usize, data: &mut [u8]) -> Result<(), Error> {
-        let bomb = DropBomb::new();
+    fn blocking_wait_ready(&mut self) {
+        loop {
+            let r = T::regs();
+            if r.events_ready.read().bits() != 0 {
+                break;
+            }
+        }
+    }
 
+    fn start_read(&mut self, address: usize, data: &mut [u8]) -> Result<(), Error> {
         assert_eq!(data.as_ptr() as u32 % 4, 0);
         assert_eq!(data.len() as u32 % 4, 0);
         assert_eq!(address as u32 % 4, 0);
+        if address > FLASH_SIZE {
+            return Err(Error::OutOfBounds);
+        }
 
         let r = T::regs();
 
@@ -269,19 +306,20 @@ impl<'d, T: Instance> Qspi<'d, T> {
         r.intenset.write(|w| w.ready().set());
         r.tasks_readstart.write(|w| w.tasks_readstart().bit(true));
 
-        self.wait_ready().await;
-
-        bomb.defuse();
-
         Ok(())
     }
 
-    pub async fn write(&mut self, address: usize, data: &[u8]) -> Result<(), Error> {
-        let bomb = DropBomb::new();
-
+    fn start_write(&mut self, address: usize, data: &[u8]) -> Result<(), Error> {
+        //info!("start_write ptr {}", data.as_ptr() as u32);
         assert_eq!(data.as_ptr() as u32 % 4, 0);
+        //info!("start_write OK ptr");
         assert_eq!(data.len() as u32 % 4, 0);
+        //info!("start_write OK len");
         assert_eq!(address as u32 % 4, 0);
+        //info!("start_write OK addr");
+        if address > FLASH_SIZE {
+            return Err(Error::OutOfBounds);
+        }
 
         let r = T::regs();
         r.write
@@ -298,17 +336,14 @@ impl<'d, T: Instance> Qspi<'d, T> {
         r.intenset.write(|w| w.ready().set());
         r.tasks_writestart.write(|w| w.tasks_writestart().bit(true));
 
-        self.wait_ready().await;
-
-        bomb.defuse();
-
         Ok(())
     }
 
-    pub async fn erase(&mut self, address: usize) -> Result<(), Error> {
-        let bomb = DropBomb::new();
-
+    fn start_erase(&mut self, address: usize) -> Result<(), Error> {
         assert_eq!(address as u32 % 4096, 0);
+        if address > FLASH_SIZE {
+            return Err(Error::OutOfBounds);
+        }
 
         let r = T::regs();
         r.erase
@@ -320,15 +355,65 @@ impl<'d, T: Instance> Qspi<'d, T> {
         r.intenset.write(|w| w.ready().set());
         r.tasks_erasestart.write(|w| w.tasks_erasestart().bit(true));
 
+        Ok(())
+    }
+
+    pub async fn read(&mut self, address: usize, data: &mut [u8]) -> Result<(), Error> {
+        let bomb = DropBomb::new();
+
+        self.start_read(address, data)?;
         self.wait_ready().await;
 
         bomb.defuse();
 
         Ok(())
     }
+
+    pub async fn write(&mut self, address: usize, data: &[u8]) -> Result<(), Error> {
+        let bomb = DropBomb::new();
+
+        //info!("WRITE {} bytes at {}", data.len(), address);
+        self.start_write(address, data)?;
+        //info!("STARTED");
+        self.wait_ready().await;
+        //info!("WRITE DONE");
+
+        bomb.defuse();
+
+        Ok(())
+    }
+
+    pub async fn erase(&mut self, address: usize) -> Result<(), Error> {
+        let bomb = DropBomb::new();
+
+        self.start_erase(address)?;
+        self.wait_ready().await;
+
+        bomb.defuse();
+
+        Ok(())
+    }
+
+    pub fn blocking_read(&mut self, address: usize, data: &mut [u8]) -> Result<(), Error> {
+        self.start_read(address, data)?;
+        self.blocking_wait_ready();
+        Ok(())
+    }
+
+    pub fn blocking_write(&mut self, address: usize, data: &[u8]) -> Result<(), Error> {
+        self.start_write(address, data)?;
+        self.blocking_wait_ready();
+        Ok(())
+    }
+
+    pub fn blocking_erase(&mut self, address: usize) -> Result<(), Error> {
+        self.start_erase(address)?;
+        self.blocking_wait_ready();
+        Ok(())
+    }
 }
 
-impl<'d, T: Instance> Drop for Qspi<'d, T> {
+impl<'d, T: Instance, const FLASH_SIZE: usize> Drop for Qspi<'d, T, FLASH_SIZE> {
     fn drop(&mut self) {
         let r = T::regs();
 
@@ -358,6 +443,8 @@ impl<'d, T: Instance> Drop for Qspi<'d, T> {
 
         r.enable.write(|w| w.enable().disabled());
 
+        self.irq.disable();
+
         // Note: we do NOT deconfigure CSN here. If DPM is in use and we disconnect CSN,
         // leaving it floating, the flash chip might read it as zero which would cause it to
         // spuriously exit DPM.
@@ -371,6 +458,90 @@ impl<'d, T: Instance> Drop for Qspi<'d, T> {
     }
 }
 
+use embedded_storage::nor_flash::{
+    ErrorType, NorFlash, NorFlashError, NorFlashErrorKind, ReadNorFlash,
+};
+
+impl<'d, T: Instance, const FLASH_SIZE: usize> ErrorType for Qspi<'d, T, FLASH_SIZE> {
+    type Error = Error;
+}
+
+impl NorFlashError for Error {
+    fn kind(&self) -> NorFlashErrorKind {
+        NorFlashErrorKind::Other
+    }
+}
+
+impl<'d, T: Instance, const FLASH_SIZE: usize> ReadNorFlash for Qspi<'d, T, FLASH_SIZE> {
+    const READ_SIZE: usize = 4;
+
+    fn read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Self::Error> {
+        self.blocking_read(offset as usize, bytes)?;
+        Ok(())
+    }
+
+    fn capacity(&self) -> usize {
+        FLASH_SIZE
+    }
+}
+
+impl<'d, T: Instance, const FLASH_SIZE: usize> NorFlash for Qspi<'d, T, FLASH_SIZE> {
+    const WRITE_SIZE: usize = 4;
+    const ERASE_SIZE: usize = 4096;
+
+    fn erase(&mut self, from: u32, to: u32) -> Result<(), Self::Error> {
+        for address in (from as usize..to as usize).step_by(<Self as NorFlash>::ERASE_SIZE) {
+            self.blocking_erase(address)?;
+        }
+        Ok(())
+    }
+
+    fn write(&mut self, offset: u32, bytes: &[u8]) -> Result<(), Self::Error> {
+        self.blocking_write(offset as usize, bytes)?;
+        Ok(())
+    }
+}
+
+cfg_if::cfg_if! {
+    if #[cfg(feature = "nightly")]
+    {
+        use embedded_storage_async::nor_flash::{AsyncNorFlash, AsyncReadNorFlash};
+        use core::future::Future;
+
+        impl<'d, T: Instance, const FLASH_SIZE: usize> AsyncNorFlash for Qspi<'d, T, FLASH_SIZE> {
+            const WRITE_SIZE: usize = <Self as NorFlash>::WRITE_SIZE;
+            const ERASE_SIZE: usize = <Self as NorFlash>::ERASE_SIZE;
+
+            type WriteFuture<'a> = impl Future<Output = Result<(), Self::Error>> + 'a where Self: 'a;
+            fn write<'a>(&'a mut self, offset: u32, data: &'a [u8]) -> Self::WriteFuture<'a> {
+                async move { self.write(offset as usize, data).await }
+            }
+
+            type EraseFuture<'a> = impl Future<Output = Result<(), Self::Error>> + 'a where Self: 'a;
+            fn erase<'a>(&'a mut self, from: u32, to: u32) -> Self::EraseFuture<'a> {
+                async move {
+                    for address in (from as usize..to as usize).step_by(<Self as AsyncNorFlash>::ERASE_SIZE) {
+                        self.erase(address).await?
+                    }
+                    Ok(())
+                }
+            }
+        }
+
+        impl<'d, T: Instance, const FLASH_SIZE: usize> AsyncReadNorFlash for Qspi<'d, T, FLASH_SIZE> {
+            const READ_SIZE: usize = 4;
+            type ReadFuture<'a> = impl Future<Output = Result<(), Self::Error>> + 'a where Self: 'a;
+            fn read<'a>(&'a mut self, address: u32, data: &'a mut [u8]) -> Self::ReadFuture<'a> {
+                async move { self.read(address as usize, data).await }
+            }
+
+            fn capacity(&self) -> usize {
+                FLASH_SIZE
+            }
+        }
+    }
+}
+
 pub(crate) mod sealed {
     use embassy::waitqueue::AtomicWaker;
 
diff --git a/examples/nrf/src/bin/qspi.rs b/examples/nrf/src/bin/qspi.rs
index ba60716c8..8673b7961 100644
--- a/examples/nrf/src/bin/qspi.rs
+++ b/examples/nrf/src/bin/qspi.rs
@@ -26,10 +26,9 @@ async fn main(_spawner: Spawner, p: Peripherals) {
     config.write_page_size = qspi::WritePageSize::_256BYTES;
 
     let irq = interrupt::take!(QSPI);
-    let mut q = qspi::Qspi::new(
+    let mut q: qspi::Qspi<_, 67108864> = qspi::Qspi::new(
         p.QSPI, irq, p.P0_19, p.P0_17, p.P0_20, p.P0_21, p.P0_22, p.P0_23, config,
-    )
-    .await;
+    );
 
     let mut id = [1; 3];
     unwrap!(q.custom_instruction(0x9F, &[], &mut id).await);
diff --git a/examples/nrf/src/bin/qspi_lowpower.rs b/examples/nrf/src/bin/qspi_lowpower.rs
index a8184cd63..255ce5d5e 100644
--- a/examples/nrf/src/bin/qspi_lowpower.rs
+++ b/examples/nrf/src/bin/qspi_lowpower.rs
@@ -32,7 +32,7 @@ async fn main(_spawner: Spawner, mut p: Peripherals) {
             exit_time: 3,  // tRDP = 35uS
         });
 
-        let mut q = qspi::Qspi::new(
+        let mut q: qspi::Qspi<_, 67108864> = qspi::Qspi::new(
             &mut p.QSPI,
             &mut irq,
             &mut p.P0_19,
@@ -42,8 +42,7 @@ async fn main(_spawner: Spawner, mut p: Peripherals) {
             &mut p.P0_22,
             &mut p.P0_23,
             config,
-        )
-        .await;
+        );
 
         let mut id = [1; 3];
         unwrap!(q.custom_instruction(0x9F, &[], &mut id).await);