diff --git a/embassy-rp/Cargo.toml b/embassy-rp/Cargo.toml
--- a/embassy-rp/Cargo.toml
+++ b/embassy-rp/Cargo.toml
@@ -26,6 +26,7 @@ time-driver = []
 rom-func-cache = []
 intrinsics = []
 rom-v2-intrinsics = []
+pio = ["dep:pio", "dep:pio-proc"]
 # Enable nightly-only features
 nightly = ["embassy-executor/nightly", "embedded-hal-1", "embedded-hal-async", "embassy-embedded-hal/nightly", "dep:embassy-usb-driver", "dep:embedded-io"]
@@ -64,3 +65,10 @@ embedded-hal-02 = { package = "embedded-hal", version = "0.2.6", features = ["un
 embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9", optional = true}
 embedded-hal-async = { version = "=0.2.0-alpha.0", optional = true}
 embedded-hal-nb = { version = "=1.0.0-alpha.1", optional = true}
+paste = "1.0"
+pio-proc = {version= "0.2", optional = true}
+pio = {version= "0.2", optional = true}
+pio = {git = "https://github.com/rp-rs/pio-rs.git"}
diff --git a/embassy-rp/src/gpio.rs b/embassy-rp/src/gpio.rs
--- a/embassy-rp/src/gpio.rs
+++ b/embassy-rp/src/gpio.rs
@@ -48,6 +48,21 @@ pub enum Pull {
+/// Drive strength of an output
+#[derive(Debug, Eq, PartialEq)]
+pub enum Drive {
+    _2mA,
+    _4mA,
+    _8mA,
+    _12mA,
+/// Slew rate of an output
+#[derive(Debug, Eq, PartialEq)]
+pub enum SlewRate {
+    Fast,
+    Slow,
 /// A GPIO bank with up to 32 pins.
 #[derive(Debug, Eq, PartialEq)]
 pub enum Bank {
@@ -459,13 +474,40 @@ impl<'d, T: Pin> Flex<'d, T> {
     pub fn set_pull(&mut self, pull: Pull) {
         unsafe {
-            self.pin.pad_ctrl().write(|w| {
+            self.pin.pad_ctrl().modify(|w| {
-                match pull {
-                    Pull::Up => w.set_pue(true),
-                    Pull::Down => w.set_pde(true),
-                    Pull::None => {}
-                }
+                let (pu, pd) = match pull {
+                    Pull::Up => (true, false),
+                    Pull::Down => (false, true),
+                    Pull::None => (false, false),
+                };
+                w.set_pue(pu);
+                w.set_pde(pd);
+            });
+        }
+    }
+    /// Set the pin's drive strength.
+    #[inline]
+    pub fn set_drive_strength(&mut self, strength: Drive) {
+        unsafe {
+            self.pin.pad_ctrl().modify(|w| {
+                w.set_drive(match strength {
+                    Drive::_2mA => pac::pads::vals::Drive::_2MA,
+                    Drive::_4mA => pac::pads::vals::Drive::_4MA,
+                    Drive::_8mA => pac::pads::vals::Drive::_8MA,
+                    Drive::_12mA => pac::pads::vals::Drive::_12MA,
+                });
+            });
+        }
+    }
+    // Set the pin's slew rate.
+    #[inline]
+    pub fn set_slew_rate(&mut self, slew_rate: SlewRate) {
+        unsafe {
+            self.pin.pad_ctrl().modify(|w| {
+                w.set_slewfast(slew_rate == SlewRate::Fast);
diff --git a/embassy-rp/src/lib.rs b/embassy-rp/src/lib.rs
--- a/embassy-rp/src/lib.rs
+++ b/embassy-rp/src/lib.rs
@@ -12,6 +12,14 @@ pub mod dma;
 pub mod gpio;
 pub mod i2c;
 pub mod interrupt;
+#[cfg(feature = "pio")]
+pub mod pio;
+#[cfg(feature = "pio")]
+pub mod pio_instr_util;
+#[cfg(feature = "pio")]
+pub mod relocate;
 pub mod rom_data;
 pub mod rtc;
 pub mod spi;
@@ -102,6 +110,9 @@ embassy_hal_common::peripherals! {
+    PIO0,
+    PIO1,
 #[link_section = ".boot2"]
diff --git a/embassy-rp/src/pio.rs b/embassy-rp/src/pio.rs
new file mode 100644
--- /dev/null
+++ b/embassy-rp/src/pio.rs
@@ -0,0 +1,1259 @@
+use core::future::Future;
+use core::marker::PhantomData;
+use core::pin::Pin as FuturePin;
+use core::sync::atomic::{compiler_fence, Ordering};
+use core::task::{Context, Poll};
+use embassy_cortex_m::interrupt::{Interrupt, InterruptExt};
+use embassy_hal_common::PeripheralRef;
+use embassy_sync::waitqueue::AtomicWaker;
+use crate::dma::{self, Channel, Transfer};
+use crate::gpio::sealed::Pin as SealedPin;
+use crate::gpio::{Drive, Pin, Pull, SlewRate};
+use crate::pac::dma::vals::{DataSize, TreqSel};
+use crate::{interrupt, pac, peripherals};
+const PIOS: [&pac::pio::Pio; 2] = [&pac::PIO0, &pac::PIO1];
+const NEW_AW: AtomicWaker = AtomicWaker::new();
+const PIO_WAKERS_INIT: [AtomicWaker; 4] = [NEW_AW; 4];
+static FIFO_OUT_WAKERS: [[AtomicWaker; 4]; 2] = [PIO_WAKERS_INIT; 2];
+static FIFO_IN_WAKERS: [[AtomicWaker; 4]; 2] = [PIO_WAKERS_INIT; 2];
+static IRQ_WAKERS: [[AtomicWaker; 4]; 2] = [PIO_WAKERS_INIT; 2];
+pub enum FifoJoin {
+    /// Both TX and RX fifo is enabled
+    Duplex,
+    /// Rx fifo twice as deep. TX fifo disabled
+    RxOnly,
+    /// Tx fifo twice as deep. RX fifo disabled
+    TxOnly,
+pub enum ShiftDirection {
+    Right = 1,
+    Left = 0,
+const RXNEMPTY_MASK: u32 = 1 << 0;
+const TXNFULL_MASK: u32 = 1 << 4;
+const SMIRQ_MASK: u32 = 1 << 8;
+unsafe fn PIO0_IRQ_1() {
+    use crate::pac;
+    let ints = pac::PIO0.irqs(1).ints().read().0;
+    let inte = pac::PIO0.irqs(1).inte();
+    for i in 0..4 {
+        // Check RXNEMPTY
+        if ints & (RXNEMPTY_MASK << i) != 0 {
+            inte.modify(|m| {
+                m.0 &= !(RXNEMPTY_MASK << i);
+            });
+            FIFO_IN_WAKERS[0][i].wake();
+        }
+        // Check IRQ flgs
+        if ints & (SMIRQ_MASK << i) != 0 {
+            inte.modify(|m| {
+                m.0 &= !(SMIRQ_MASK << i);
+            });
+            IRQ_WAKERS[0][i].wake();
+        }
+    }
+unsafe fn PIO1_IRQ_1() {
+    use crate::pac;
+    let ints = pac::PIO1.irqs(1).ints().read().0;
+    let inte = pac::PIO1.irqs(1).inte();
+    for i in 0..4 {
+        // Check all RXNEMPTY
+        if ints & (RXNEMPTY_MASK << i) != 0 {
+            inte.modify(|m| {
+                m.0 &= !(RXNEMPTY_MASK << i);
+            });
+            FIFO_IN_WAKERS[1][i].wake();
+        }
+        // Check IRQ flgs
+        if ints & (SMIRQ_MASK << i) != 0 {
+            inte.modify(|m| {
+                m.0 &= !(SMIRQ_MASK << i);
+            });
+            IRQ_WAKERS[1][i].wake();
+        }
+    }
+unsafe fn PIO0_IRQ_0() {
+    use crate::pac;
+    let ints = pac::PIO0.irqs(0).ints().read().0;
+    let inte = pac::PIO0.irqs(0).inte();
+    //debug!("!{:04x}",ints);
+    // Check all TXNFULL
+    for i in 0..4 {
+        if ints & (TXNFULL_MASK << i) != 0 {
+            inte.modify(|m| {
+                m.0 &= !(TXNFULL_MASK << i);
+            });
+            FIFO_OUT_WAKERS[0][i].wake();
+        }
+    }
+unsafe fn PIO1_IRQ_0() {
+    let ints = pac::PIO1.irqs(0).ints().read().0;
+    let inte = pac::PIO1.irqs(0).inte();
+    // Check all TXNFULL
+    for i in 0..4 {
+        if ints & (TXNFULL_MASK << i) != 0 {
+            inte.modify(|m| {
+                m.0 &= !(TXNFULL_MASK << i);
+            });
+            FIFO_OUT_WAKERS[1][i].wake();
+        }
+    }
+/// Future that waits for TX-FIFO to become writable
+pub struct FifoOutFuture<'a, PIO: PioInstance, SM: PioStateMachine + Unpin> {
+    sm: &'a mut SM,
+    pio: PhantomData<PIO>,
+    value: u32,
+impl<'a, PIO: PioInstance, SM: PioStateMachine + Unpin> FifoOutFuture<'a, PIO, SM> {
+    pub fn new(sm: &'a mut SM, value: u32) -> Self {
+        unsafe {
+            critical_section::with(|_| {
+                let irq = PIO::IrqOut::steal();
+                irq.set_priority(interrupt::Priority::P3);
+                irq.enable();
+            });
+        }
+        FifoOutFuture {
+            sm,
+            pio: PhantomData::default(),
+            value,
+        }
+    }
+impl<'d, PIO: PioInstance, SM: PioStateMachine + Unpin> Future for FifoOutFuture<'d, PIO, SM> {
+    type Output = ();
+    fn poll(self: FuturePin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        //debug!("Poll {},{}", PIO::PIO_NO, SM);
+        let value = self.value;
+        if self.get_mut().sm.try_push_tx(value) {
+            Poll::Ready(())
+        } else {
+            FIFO_OUT_WAKERS[PIO::PIO_NO as usize][SM::Sm::SM_NO as usize].register(cx.waker());
+            unsafe {
+                let irq = PIO::IrqOut::steal();
+                irq.disable();
+                critical_section::with(|_| {
+                    PIOS[PIO::PIO_NO as usize].irqs(0).inte().modify(|m| {
+                        m.0 |= TXNFULL_MASK << SM::Sm::SM_NO;
+                    });
+                });
+                irq.enable();
+            }
+            // debug!("Pending");
+            Poll::Pending
+        }
+    }
+impl<'d, PIO: PioInstance, SM: PioStateMachine + Unpin> Drop for FifoOutFuture<'d, PIO, SM> {
+    fn drop(&mut self) {
+        unsafe {
+            critical_section::with(|_| {
+                PIOS[PIO::PIO_NO as usize].irqs(0).inte().modify(|m| {
+                    m.0 &= !(TXNFULL_MASK << SM::Sm::SM_NO);
+                });
+            });
+        }
+    }
+/// Future that waits for RX-FIFO to become readable
+pub struct FifoInFuture<'a, PIO: PioInstance, SM: PioStateMachine> {
+    sm: &'a mut SM,
+    pio: PhantomData<PIO>,
+impl<'a, PIO: PioInstance, SM: PioStateMachine> FifoInFuture<'a, PIO, SM> {
+    pub fn new(sm: &'a mut SM) -> Self {
+        unsafe {
+            critical_section::with(|_| {
+                let irq = PIO::IrqIn::steal();
+                irq.set_priority(interrupt::Priority::P3);
+                irq.enable();
+            });
+        }
+        FifoInFuture {
+            sm,
+            pio: PhantomData::default(),
+        }
+    }
+impl<'d, PIO: PioInstance, SM: PioStateMachine> Future for FifoInFuture<'d, PIO, SM> {
+    type Output = u32;
+    fn poll(mut self: FuturePin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        //debug!("Poll {},{}", PIO::PIO_NO, SM);
+        if let Some(v) = self.sm.try_pull_rx() {
+            Poll::Ready(v)
+        } else {
+            FIFO_IN_WAKERS[PIO::PIO_NO as usize][SM::Sm::SM_NO as usize].register(cx.waker());
+            unsafe {
+                let irq = PIO::IrqIn::steal();
+                irq.disable();
+                critical_section::with(|_| {
+                    PIOS[PIO::PIO_NO as usize].irqs(1).inte().modify(|m| {
+                        m.0 |= RXNEMPTY_MASK << SM::Sm::SM_NO;
+                    });
+                });
+                irq.enable();
+            }
+            //debug!("Pending");
+            Poll::Pending
+        }
+    }
+impl<'d, PIO: PioInstance, SM: PioStateMachine> Drop for FifoInFuture<'d, PIO, SM> {
+    fn drop(&mut self) {
+        unsafe {
+            critical_section::with(|_| {
+                PIOS[PIO::PIO_NO as usize].irqs(1).inte().modify(|m| {
+                    m.0 &= !(RXNEMPTY_MASK << SM::Sm::SM_NO);
+                });
+            });
+        }
+    }
+/// Future that waits for IRQ
+pub struct IrqFuture<PIO: PioInstance> {
+    pio: PhantomData<PIO>,
+    irq_no: u8,
+impl<'a, PIO: PioInstance> IrqFuture<PIO> {
+    pub fn new(irq_no: u8) -> Self {
+        unsafe {
+            critical_section::with(|_| {
+                let irq = PIO::IrqSm::steal();
+                irq.set_priority(interrupt::Priority::P3);
+                irq.enable();
+            });
+        }
+        IrqFuture {
+            pio: PhantomData::default(),
+            irq_no,
+        }
+    }
+impl<'d, PIO: PioInstance> Future for IrqFuture<PIO> {
+    type Output = ();
+    fn poll(self: FuturePin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        //debug!("Poll {},{}", PIO::PIO_NO, SM);
+        // Check if IRQ flag is already set
+        if critical_section::with(|_| unsafe {
+            let irq_flags = PIOS[PIO::PIO_NO as usize].irq();
+            if irq_flags.read().0 & (1 << self.irq_no) != 0 {
+                irq_flags.write(|m| {
+                    m.0 = 1 << self.irq_no;
+                });
+                true
+            } else {
+                false
+            }
+        }) {
+            return Poll::Ready(());
+        }
+        IRQ_WAKERS[PIO::PIO_NO as usize][self.irq_no as usize].register(cx.waker());
+        unsafe {
+            let irq = PIO::IrqSm::steal();
+            irq.disable();
+            critical_section::with(|_| {
+                PIOS[PIO::PIO_NO as usize].irqs(1).inte().modify(|m| {
+                    m.0 |= SMIRQ_MASK << self.irq_no;
+                });
+            });
+            irq.enable();
+        }
+        Poll::Pending
+    }
+impl<'d, PIO: PioInstance> Drop for IrqFuture<PIO> {
+    fn drop(&mut self) {
+        unsafe {
+            critical_section::with(|_| {
+                PIOS[PIO::PIO_NO as usize].irqs(1).inte().modify(|m| {
+                    m.0 &= !(SMIRQ_MASK << self.irq_no);
+                });
+            });
+        }
+    }
+pub struct PioPin<PIO: PioInstance> {
+    pin_bank: u8,
+    pio: PhantomData<PIO>,
+impl<PIO: PioInstance> PioPin<PIO> {
+    /// Set the pin's drive strength.
+    #[inline]
+    pub fn set_drive_strength(&mut self, strength: Drive) {
+        unsafe {
+            self.pad_ctrl().modify(|w| {
+                w.set_drive(match strength {
+                    Drive::_2mA => pac::pads::vals::Drive::_2MA,
+                    Drive::_4mA => pac::pads::vals::Drive::_4MA,
+                    Drive::_8mA => pac::pads::vals::Drive::_8MA,
+                    Drive::_12mA => pac::pads::vals::Drive::_12MA,
+                });
+            });
+        }
+    }
+    // Set the pin's slew rate.
+    #[inline]
+    pub fn set_slew_rate(&mut self, slew_rate: SlewRate) {
+        unsafe {
+            self.pad_ctrl().modify(|w| {
+                w.set_slewfast(slew_rate == SlewRate::Fast);
+            });
+        }
+    }
+    /// Set the pin's pull.
+    #[inline]
+    pub fn set_pull(&mut self, pull: Pull) {
+        unsafe {
+            self.pad_ctrl().modify(|w| match pull {
+                Pull::Up => w.set_pue(true),
+                Pull::Down => w.set_pde(true),
+                Pull::None => {}
+            });
+        }
+    }
+    /// Set the pin's pull.
+    #[inline]
+    pub fn set_schmitt(&mut self, enable: bool) {
+        unsafe {
+            self.pad_ctrl().modify(|w| {
+                w.set_schmitt(enable);
+            });
+        }
+    }
+    pub fn set_input_sync_bypass<'a>(&mut self, bypass: bool) {
+        let mask = 1 << self.pin();
+        unsafe {
+            PIOS[PIO::PIO_NO as usize]
+                .input_sync_bypass()
+                .modify(|w| *w = if bypass { *w & !mask } else { *w | mask });
+        }
+    }
+    pub fn pin(&self) -> u8 {
+        self._pin()
+    }
+impl<PIO: PioInstance> SealedPin for PioPin<PIO> {
+    fn pin_bank(&self) -> u8 {
+        self.pin_bank
+    }
+pub struct PioStateMachineInstance<PIO: PioInstance, SM: SmInstance> {
+    pio: PhantomData<PIO>,
+    sm: PhantomData<SM>,
+impl<PIO: PioInstance, SM: SmInstance> PioStateMachine for PioStateMachineInstance<PIO, SM> {
+    type Pio = PIO;
+    type Sm = SM;
+pub trait PioStateMachine: Sized + Unpin {
+    type Pio: PioInstance;
+    type Sm: SmInstance;
+    fn pio_no(&self) -> u8 {
+        let _ = self;
+        Self::Pio::PIO_NO
+    }
+    fn sm_no(&self) -> u8 {
+        Self::Sm::SM_NO
+    }
+    fn restart(&mut self) {
+        let _ = self;
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .ctrl()
+                .modify(|w| w.set_sm_restart(1u8 << Self::Sm::SM_NO));
+        }
+    }
+    fn set_enable(&mut self, enable: bool) {
+        let _ = self;
+        let mask = 1u8 << Self::Sm::SM_NO;
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .ctrl()
+                .modify(|w| w.set_sm_enable((w.sm_enable() & !mask) | (if enable { mask } else { 0 })));
+        }
+    }
+    fn is_enabled(&self) -> bool {
+        let _ = self;
+        unsafe { PIOS[Self::Pio::PIO_NO as usize].ctrl().read().sm_enable() & (1u8 << Self::Sm::SM_NO) != 0 }
+    }
+    fn is_tx_empty(&self) -> bool {
+        let _ = self;
+        unsafe { PIOS[Self::Pio::PIO_NO as usize].fstat().read().txempty() & (1u8 << Self::Sm::SM_NO) != 0 }
+    }
+    fn is_tx_full(&self) -> bool {
+        let _ = self;
+        unsafe { PIOS[Self::Pio::PIO_NO as usize].fstat().read().txfull() & (1u8 << Self::Sm::SM_NO) != 0 }
+    }
+    fn is_rx_empty(&self) -> bool {
+        let _ = self;
+        unsafe { PIOS[Self::Pio::PIO_NO as usize].fstat().read().rxempty() & (1u8 << Self::Sm::SM_NO) != 0 }
+    }
+    fn is_rx_full(&self) -> bool {
+        let _ = self;
+        unsafe { PIOS[Self::Pio::PIO_NO as usize].fstat().read().rxfull() & (1u8 << Self::Sm::SM_NO) != 0 }
+    }
+    fn tx_level(&self) -> u8 {
+        unsafe {
+            let flevel = PIOS[Self::Pio::PIO_NO as usize].flevel().read().0;
+            (flevel >> (Self::Sm::SM_NO * 8)) as u8 & 0x0f
+        }
+    }
+    fn rx_level(&self) -> u8 {
+        unsafe {
+            let flevel = PIOS[Self::Pio::PIO_NO as usize].flevel().read().0;
+            (flevel >> (Self::Sm::SM_NO * 8 + 4)) as u8 & 0x0f
+        }
+    }
+    fn push_tx(&mut self, v: u32) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .txf(Self::Sm::SM_NO as usize)
+                .write_value(v);
+        }
+    }
+    fn try_push_tx(&mut self, v: u32) -> bool {
+        if self.is_tx_full() {
+            return false;
+        }
+        self.push_tx(v);
+        true
+    }
+    fn pull_rx(&mut self) -> u32 {
+        unsafe { PIOS[Self::Pio::PIO_NO as usize].rxf(Self::Sm::SM_NO as usize).read() }
+    }
+    fn try_pull_rx(&mut self) -> Option<u32> {
+        if self.is_rx_empty() {
+            return None;
+        }
+        Some(self.pull_rx())
+    }
+    fn set_clkdiv(&mut self, div_x_256: u32) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .clkdiv()
+                .write(|w| w.0 = div_x_256 << 8);
+        }
+    }
+    fn get_clkdiv(&self) -> u32 {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .clkdiv()
+                .read()
+                .0
+                >> 8
+        }
+    }
+    fn clkdiv_restart(&mut self) {
+        let _ = self;
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .ctrl()
+                .modify(|w| w.set_clkdiv_restart(1u8 << Self::Sm::SM_NO));
+        }
+    }
+    fn set_side_enable(&self, enable: bool) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .execctrl()
+                .modify(|w| w.set_side_en(enable));
+        }
+    }
+    fn is_side_enabled(&self) -> bool {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .execctrl()
+                .read()
+                .side_en()
+        }
+    }
+    fn set_side_pindir(&mut self, pindir: bool) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .execctrl()
+                .modify(|w| w.set_side_pindir(pindir));
+        }
+    }
+    fn is_side_pindir(&self) -> bool {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .execctrl()
+                .read()
+                .side_pindir()
+        }
+    }
+    fn set_jmp_pin(&mut self, pin: u8) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .execctrl()
+                .modify(|w| w.set_jmp_pin(pin));
+        }
+    }
+    fn get_jmp_pin(&mut self) -> u8 {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .execctrl()
+                .read()
+                .jmp_pin()
+        }
+    }
+    fn set_wrap(&self, source: u8, target: u8) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .execctrl()
+                .modify(|w| {
+                    w.set_wrap_top(source);
+                    w.set_wrap_bottom(target)
+                });
+        }
+    }
+    /// Get wrapping addresses. Returns (source, target).
+    fn get_wrap(&self) -> (u8, u8) {
+        unsafe {
+            let r = PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .execctrl()
+                .read();
+            (r.wrap_top(), r.wrap_bottom())
+        }
+    }
+    fn set_fifo_join(&mut self, join: FifoJoin) {
+        let (rx, tx) = match join {
+            FifoJoin::Duplex => (false, false),
+            FifoJoin::RxOnly => (true, false),
+            FifoJoin::TxOnly => (false, true),
+        };
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .modify(|w| {
+                    w.set_fjoin_rx(rx);
+                    w.set_fjoin_tx(tx)
+                });
+        }
+    }
+    fn get_fifo_join(&self) -> FifoJoin {
+        unsafe {
+            let r = PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .read();
+            // Ignores the invalid state when both bits are set
+            if r.fjoin_rx() {
+                FifoJoin::RxOnly
+            } else if r.fjoin_tx() {
+                FifoJoin::TxOnly
+            } else {
+                FifoJoin::Duplex
+            }
+        }
+    }
+    fn clear_fifos(&mut self) {
+        // Toggle FJOIN_RX to flush FIFOs
+        unsafe {
+            let shiftctrl = PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl();
+            shiftctrl.modify(|w| {
+                w.set_fjoin_rx(!w.fjoin_rx());
+            });
+            shiftctrl.modify(|w| {
+                w.set_fjoin_rx(!w.fjoin_rx());
+            });
+        }
+    }
+    fn set_pull_threshold(&mut self, threshold: u8) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .modify(|w| w.set_pull_thresh(threshold));
+        }
+    }
+    fn get_pull_threshold(&self) -> u8 {
+        unsafe {
+            let r = PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .read();
+            r.pull_thresh()
+        }
+    }
+    fn set_push_threshold(&mut self, threshold: u8) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .modify(|w| w.set_push_thresh(threshold));
+        }
+    }
+    fn get_push_threshold(&self) -> u8 {
+        unsafe {
+            let r = PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .read();
+            r.push_thresh()
+        }
+    }
+    fn set_out_shift_dir(&mut self, dir: ShiftDirection) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .modify(|w| w.set_out_shiftdir(dir == ShiftDirection::Right));
+        }
+    }
+    fn get_out_shiftdir(&self) -> ShiftDirection {
+        unsafe {
+            if PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .read()
+                .out_shiftdir()
+            {
+                ShiftDirection::Right
+            } else {
+                ShiftDirection::Left
+            }
+        }
+    }
+    fn set_in_shift_dir(&mut self, dir: ShiftDirection) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .modify(|w| w.set_in_shiftdir(dir == ShiftDirection::Right));
+        }
+    }
+    fn get_in_shiftdir(&self) -> ShiftDirection {
+        unsafe {
+            if PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .read()
+                .in_shiftdir()
+            {
+                ShiftDirection::Right
+            } else {
+                ShiftDirection::Left
+            }
+        }
+    }
+    fn set_autopull(&mut self, auto: bool) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .modify(|w| w.set_autopull(auto));
+        }
+    }
+    fn is_autopull(&self) -> bool {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .read()
+                .autopull()
+        }
+    }
+    fn set_autopush(&mut self, auto: bool) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .modify(|w| w.set_autopush(auto));
+        }
+    }
+    fn is_autopush(&self) -> bool {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .shiftctrl()
+                .read()
+                .autopush()
+        }
+    }
+    fn get_addr(&self) -> u8 {
+        unsafe {
+            let r = PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .addr()
+                .read();
+            r.addr()
+        }
+    }
+    fn set_sideset_count(&mut self, count: u8) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .pinctrl()
+                .modify(|w| w.set_sideset_count(count));
+        }
+    }
+    fn get_sideset_count(&self) -> u8 {
+        unsafe {
+            let r = PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .pinctrl()
+                .read();
+            r.sideset_count()
+        }
+    }
+    fn make_pio_pin(&self, pin: impl Pin) -> PioPin<Self::Pio> {
+        unsafe {
+            pin.io().ctrl().write(|w| {
+                w.set_funcsel(
+                    if Self::Pio::PIO_NO == 1 {
+                        pac::io::vals::Gpio0ctrlFuncsel::PIO1_0
+                    } else {
+                        // PIO == 0
+                        pac::io::vals::Gpio0ctrlFuncsel::PIO0_0
+                    }
+                    .0,
+                );
+            });
+        }
+        PioPin {
+            pin_bank: pin.pin_bank(),
+            pio: PhantomData::default(),
+        }
+    }
+    fn set_sideset_base_pin(&mut self, base_pin: &PioPin<Self::Pio>) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .pinctrl()
+                .modify(|w| w.set_sideset_base(base_pin.pin()));
+        }
+    }
+    fn get_sideset_base(&self) -> u8 {
+        unsafe {
+            let r = PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .pinctrl()
+                .read();
+            r.sideset_base()
+        }
+    }
+    /// Set the range of out pins affected by a set instruction.
+    fn set_set_range(&mut self, base: u8, count: u8) {
+        assert!(base + count < 32);
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .pinctrl()
+                .modify(|w| {
+                    w.set_set_base(base);
+                    w.set_set_count(count)
+                });
+        }
+    }
+    /// Get the range of out pins affected by a set instruction. Returns (base, count).
+    fn get_set_range(&self) -> (u8, u8) {
+        unsafe {
+            let r = PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .pinctrl()
+                .read();
+            (r.set_base(), r.set_count())
+        }
+    }
+    fn set_in_base_pin(&mut self, base: &PioPin<Self::Pio>) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .pinctrl()
+                .modify(|w| w.set_in_base(base.pin()));
+        }
+    }
+    fn get_in_base(&self) -> u8 {
+        unsafe {
+            let r = PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .pinctrl()
+                .read();
+            r.in_base()
+        }
+    }
+    fn set_out_range(&mut self, base: u8, count: u8) {
+        assert!(base + count < 32);
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .pinctrl()
+                .modify(|w| {
+                    w.set_out_base(base);
+                    w.set_out_count(count)
+                });
+        }
+    }
+    /// Get the range of out pins affected by a set instruction. Returns (base, count).
+    fn get_out_range(&self) -> (u8, u8) {
+        unsafe {
+            let r = PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .pinctrl()
+                .read();
+            (r.out_base(), r.out_count())
+        }
+    }
+    fn set_out_pins<'a, 'b: 'a>(&'a mut self, pins: &'b [&PioPin<Self::Pio>]) {
+        let count = pins.len();
+        assert!(count >= 1);
+        let start = pins[0].pin() as usize;
+        assert!(start + pins.len() <= 32);
+        for i in 0..count {
+            assert!(pins[i].pin() as usize == start + i, "Pins must be sequential");
+        }
+        self.set_out_range(start as u8, count as u8);
+    }
+    fn set_set_pins<'a, 'b: 'a>(&'a mut self, pins: &'b [&PioPin<Self::Pio>]) {
+        let count = pins.len();
+        assert!(count >= 1);
+        let start = pins[0].pin() as usize;
+        assert!(start + pins.len() <= 32);
+        for i in 0..count {
+            assert!(pins[i].pin() as usize == start + i, "Pins must be sequential");
+        }
+        self.set_set_range(start as u8, count as u8);
+    }
+    fn get_current_instr() -> u32 {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .instr()
+                .read()
+                .0
+        }
+    }
+    fn exec_instr(&mut self, instr: u16) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .sm(Self::Sm::SM_NO as usize)
+                .instr()
+                .write(|w| w.set_instr(instr));
+        }
+    }
+    fn write_instr<I>(&mut self, start: usize, instrs: I)
+    where
+        I: Iterator<Item = u16>,
+    {
+        let _ = self;
+        write_instr(
+            Self::Pio::PIO_NO,
+            start,
+            instrs,
+            MEM_USED_BY_STATEMACHINE | Self::Sm::SM_NO as u32,
+        );
+    }
+    fn is_irq_set(&self, irq_no: u8) -> bool {
+        assert!(irq_no < 8);
+        unsafe {
+            let irq_flags = PIOS[Self::Pio::PIO_NO as usize].irq();
+            irq_flags.read().0 & (1 << irq_no) != 0
+        }
+    }
+    fn clear_irq(&mut self, irq_no: usize) {
+        assert!(irq_no < 8);
+        unsafe { PIOS[Self::Pio::PIO_NO as usize].irq().write(|w| w.set_irq(1 << irq_no)) }
+    }
+    fn wait_push<'a>(&'a mut self, value: u32) -> FifoOutFuture<'a, Self::Pio, Self> {
+        FifoOutFuture::new(self, value)
+    }
+    fn wait_pull<'a>(&'a mut self) -> FifoInFuture<'a, Self::Pio, Self> {
+        FifoInFuture::new(self)
+    }
+    fn wait_irq(&self, irq_no: u8) -> IrqFuture<Self::Pio> {
+        IrqFuture::new(irq_no)
+    }
+    fn has_tx_stalled(&self) -> bool {
+        unsafe {
+            let fdebug = PIOS[Self::Pio::PIO_NO as usize].fdebug();
+            let ret = fdebug.read().txstall() & (1 << Self::Sm::SM_NO) != 0;
+            fdebug.write(|w| w.set_txstall(1 << Self::Sm::SM_NO));
+            ret
+        }
+    }
+    fn has_tx_overflowed(&self) -> bool {
+        unsafe {
+            let fdebug = PIOS[Self::Pio::PIO_NO as usize].fdebug();
+            let ret = fdebug.read().txover() & (1 << Self::Sm::SM_NO) != 0;
+            fdebug.write(|w| w.set_txover(1 << Self::Sm::SM_NO));
+            ret
+        }
+    }
+    fn has_rx_stalled(&self) -> bool {
+        unsafe {
+            let fdebug = PIOS[Self::Pio::PIO_NO as usize].fdebug();
+            let ret = fdebug.read().rxstall() & (1 << Self::Sm::SM_NO) != 0;
+            fdebug.write(|w| w.set_rxstall(1 << Self::Sm::SM_NO));
+            ret
+        }
+    }
+    fn has_rx_underflowed(&self) -> bool {
+        unsafe {
+            let fdebug = PIOS[Self::Pio::PIO_NO as usize].fdebug();
+            let ret = fdebug.read().rxunder() & (1 << Self::Sm::SM_NO) != 0;
+            fdebug.write(|w| w.set_rxunder(1 << Self::Sm::SM_NO));
+            ret
+        }
+    }
+    fn dma_push<'a, C: Channel>(&'a self, ch: PeripheralRef<'a, C>, data: &'a [u32]) -> Transfer<'a, C> {
+        unsafe {
+            dma::init();
+            let pio_no = Self::Pio::PIO_NO;
+            let sm_no = Self::Sm::SM_NO;
+            let p = ch.regs();
+            p.read_addr().write_value(data.as_ptr() as u32);
+            p.write_addr()
+                .write_value(PIOS[pio_no as usize].txf(sm_no as usize).ptr() as u32);
+            p.trans_count().write_value(data.len() as u32);
+            p.ctrl_trig().write(|w| {
+                // Set TX DREQ for this statemachine
+                w.set_treq_sel(TreqSel(pio_no * 8 + sm_no));
+                w.set_data_size(DataSize::SIZE_WORD);
+                w.set_chain_to(ch.number());
+                w.set_incr_read(true);
+                w.set_incr_write(false);
+                w.set_en(true);
+            });
+            compiler_fence(Ordering::SeqCst);
+        }
+        Transfer::new(ch)
+    }
+    fn dma_pull<'a, C: Channel>(&'a self, ch: PeripheralRef<'a, C>, data: &'a mut [u32]) -> Transfer<'a, C> {
+        unsafe {
+            dma::init();
+            let pio_no = Self::Pio::PIO_NO;
+            let sm_no = Self::Sm::SM_NO;
+            let p = ch.regs();
+            p.write_addr().write_value(data.as_ptr() as u32);
+            p.read_addr()
+                .write_value(PIOS[pio_no as usize].rxf(sm_no as usize).ptr() as u32);
+            p.trans_count().write_value(data.len() as u32);
+            p.ctrl_trig().write(|w| {
+                // Set TX DREQ for this statemachine
+                w.set_treq_sel(TreqSel(pio_no * 8 + sm_no + 4));
+                w.set_data_size(DataSize::SIZE_WORD);
+                w.set_chain_to(ch.number());
+                w.set_incr_read(false);
+                w.set_incr_write(true);
+                w.set_en(true);
+            });
+            compiler_fence(Ordering::SeqCst);
+        }
+        Transfer::new(ch)
+    }
+This is a bit array containing 4 bits for every word in the PIO instruction memory.
+// Bit 3-2
+//const MEM_USE_MASK: u32 = 0b1100;
+const MEM_NOT_USED: u32 = 0b0000;
+const MEM_USED_BY_STATEMACHINE: u32 = 0b0100;
+const MEM_USED_BY_COMMON: u32 = 0b1000;
+// Bit 1-0 is the number of the state machine
+//const MEM_STATE_MASK: u32 = 0b0011;
+// Should use mutex if running on multiple cores
+static mut INSTR_MEM_STATUS: &'static mut [[u32; 4]; 2] = &mut [[0; 4]; 2];
+fn instr_mem_get_status(pio_no: u8, addr: u8) -> u32 {
+    ((unsafe { INSTR_MEM_STATUS[pio_no as usize][(addr >> 3) as usize] }) >> ((addr & 0x07) * 4)) & 0xf
+fn instr_mem_set_status(pio_no: u8, addr: u8, status: u32) {
+    let w = unsafe { &mut INSTR_MEM_STATUS[pio_no as usize][(addr >> 3) as usize] };
+    let shift = (addr & 0x07) * 4;
+    *w = (*w & !(0xf << shift)) | (status << shift);
+fn instr_mem_is_free(pio_no: u8, addr: u8) -> bool {
+    instr_mem_get_status(pio_no, addr) == MEM_NOT_USED
+pub struct PioCommonInstance<PIO: PioInstance> {
+    pio: PhantomData<PIO>,
+impl<PIO: PioInstance> PioCommon for PioCommonInstance<PIO> {
+    type Pio = PIO;
+fn write_instr<I>(pio_no: u8, start: usize, instrs: I, mem_user: u32)
+    I: Iterator<Item = u16>,
+    for (i, instr) in instrs.enumerate() {
+        let addr = (i + start) as u8;
+        assert!(
+            instr_mem_is_free(pio_no, addr),
+            "Trying to write already used PIO instruction memory at {}",
+            addr
+        );
+        unsafe {
+            PIOS[pio_no as usize].instr_mem(addr as usize).write(|w| {
+                w.set_instr_mem(instr);
+            });
+            instr_mem_set_status(pio_no, addr, mem_user);
+        }
+    }
+pub trait PioCommon: Sized {
+    type Pio: PioInstance;
+    fn write_instr<I>(&mut self, start: usize, instrs: I)
+    where
+        I: Iterator<Item = u16>,
+    {
+        let _ = self;
+        write_instr(Self::Pio::PIO_NO, start, instrs, MEM_USED_BY_COMMON);
+    }
+    fn clear_irq(&mut self, irq_no: usize) {
+        assert!(irq_no < 8);
+        unsafe { PIOS[Self::Pio::PIO_NO as usize].irq().write(|w| w.set_irq(1 << irq_no)) }
+    }
+    fn clear_irqs(&mut self, mask: u8) {
+        unsafe { PIOS[Self::Pio::PIO_NO as usize].irq().write(|w| w.set_irq(mask)) }
+    }
+    fn force_irq(&mut self, irq_no: usize) {
+        assert!(irq_no < 8);
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .irq_force()
+                .write(|w| w.set_irq_force(1 << irq_no))
+        }
+    }
+    fn set_input_sync_bypass<'a>(&'a mut self, bypass: u32, mask: u32) {
+        unsafe {
+            PIOS[Self::Pio::PIO_NO as usize]
+                .input_sync_bypass()
+                .modify(|w| *w = (*w & !mask) | (bypass & mask));
+        }
+    }
+    fn get_input_sync_bypass(&self) -> u32 {
+        unsafe { PIOS[Self::Pio::PIO_NO as usize].input_sync_bypass().read() }
+    }
+// Identifies a specific state machine inside a PIO device
+pub struct SmInstanceBase<const SM_NO: u8> {}
+pub trait SmInstance: Unpin {
+    const SM_NO: u8;
+impl<const SM_NO: u8> SmInstance for SmInstanceBase<SM_NO> {
+    const SM_NO: u8 = SM_NO;
+pub trait PioPeripherial: Sized {
+    type Pio: PioInstance;
+    fn pio(&self) -> u8 {
+        let _ = self;
+        Self::Pio::PIO_NO
+    }
+    fn split(
+        self,
+    ) -> (
+        PioCommonInstance<Self::Pio>,
+        PioStateMachineInstance<Self::Pio, SmInstanceBase<0>>,
+        PioStateMachineInstance<Self::Pio, SmInstanceBase<1>>,
+        PioStateMachineInstance<Self::Pio, SmInstanceBase<2>>,
+        PioStateMachineInstance<Self::Pio, SmInstanceBase<3>>,
+    ) {
+        let _ = self;
+        (
+            PioCommonInstance {
+                pio: PhantomData::default(),
+            },
+            PioStateMachineInstance {
+                sm: PhantomData::default(),
+                pio: PhantomData::default(),
+            },
+            PioStateMachineInstance {
+                sm: PhantomData::default(),
+                pio: PhantomData::default(),
+            },
+            PioStateMachineInstance {
+                sm: PhantomData::default(),
+                pio: PhantomData::default(),
+            },
+            PioStateMachineInstance {
+                sm: PhantomData::default(),
+                pio: PhantomData::default(),
+            },
+        )
+    }
+// Identifies a specific PIO device
+pub struct PioInstanceBase<const PIO_NO: u8> {}
+pub trait PioInstance: Unpin {
+    const PIO_NO: u8;
+    type IrqOut: Interrupt;
+    type IrqIn: Interrupt;
+    type IrqSm: Interrupt;
+impl PioInstance for PioInstanceBase<0> {
+    const PIO_NO: u8 = 0;
+    type IrqOut = interrupt::PIO0_IRQ_0;
+    type IrqIn = interrupt::PIO0_IRQ_1;
+    type IrqSm = interrupt::PIO0_IRQ_1;
+impl PioInstance for PioInstanceBase<1> {
+    const PIO_NO: u8 = 1;
+    type IrqOut = interrupt::PIO1_IRQ_0;
+    type IrqIn = interrupt::PIO1_IRQ_1;
+    type IrqSm = interrupt::PIO1_IRQ_1;
+pub type Pio0 = PioInstanceBase<0>;
+pub type Pio1 = PioInstanceBase<1>;
+pub type Sm0 = SmInstanceBase<0>;
+pub type Sm1 = SmInstanceBase<1>;
+pub type Sm2 = SmInstanceBase<2>;
+pub type Sm3 = SmInstanceBase<3>;
+macro_rules! impl_pio_sm {
+    ($name:ident, $pio:expr) => {
+        impl PioPeripherial for peripherals::$name {
+            type Pio = PioInstanceBase<$pio>;
+        }
+    };
+impl_pio_sm!(PIO0, 0);
+impl_pio_sm!(PIO1, 1);
diff --git a/embassy-rp/src/pio_instr_util.rs b/embassy-rp/src/pio_instr_util.rs
new file mode 100644
index 000000000..ae26ff1dc
--- /dev/null
+++ b/embassy-rp/src/pio_instr_util.rs
@@ -0,0 +1,90 @@
+use pio::{InSource, InstructionOperands, JmpCondition, OutDestination, SetDestination};
+use crate::pio::PioStateMachine;
+pub fn set_x<SM: PioStateMachine>(sm: &mut SM, value: u32) {
+    const OUT: u16 = InstructionOperands::OUT {
+        destination: OutDestination::X,
+        bit_count: 32,
+    }
+    .encode();
+    sm.push_tx(value);
+    sm.exec_instr(OUT);
+pub fn get_x<SM: PioStateMachine>(sm: &mut SM) -> u32 {
+    const IN: u16 = InstructionOperands::IN {
+        source: InSource::X,
+        bit_count: 32,
+    }
+    .encode();
+    sm.exec_instr(IN);
+    sm.pull_rx()
+pub fn set_y<SM: PioStateMachine>(sm: &mut SM, value: u32) {
+    const OUT: u16 = InstructionOperands::OUT {
+        destination: OutDestination::Y,
+        bit_count: 32,
+    }
+    .encode();
+    sm.push_tx(value);
+    sm.exec_instr(OUT);
+pub fn get_y<SM: PioStateMachine>(sm: &mut SM) -> u32 {
+    const IN: u16 = InstructionOperands::IN {
+        source: InSource::Y,
+        bit_count: 32,
+    }
+    .encode();
+    sm.exec_instr(IN);
+    sm.pull_rx()
+pub fn set_pindir<SM: PioStateMachine>(sm: &mut SM, data: u8) {
+    let set: u16 = InstructionOperands::SET {
+        destination: SetDestination::PINDIRS,
+        data,
+    }
+    .encode();
+    sm.exec_instr(set);
+pub fn set_pin<SM: PioStateMachine>(sm: &mut SM, data: u8) {
+    let set: u16 = InstructionOperands::SET {
+        destination: SetDestination::PINS,
+        data,
+    }
+    .encode();
+    sm.exec_instr(set);
+pub fn set_out_pin<SM: PioStateMachine>(sm: &mut SM, data: u32) {
+    const OUT: u16 = InstructionOperands::OUT {
+        destination: OutDestination::PINS,
+        bit_count: 32,
+    }
+    .encode();
+    sm.push_tx(data);
+    sm.exec_instr(OUT);
+pub fn set_out_pindir<SM: PioStateMachine>(sm: &mut SM, data: u32) {
+    const OUT: u16 = InstructionOperands::OUT {
+        destination: OutDestination::PINDIRS,
+        bit_count: 32,
+    }
+    .encode();
+    sm.push_tx(data);
+    sm.exec_instr(OUT);
+pub fn exec_jmp<SM: PioStateMachine>(sm: &mut SM, to_addr: u8) {
+    let jmp: u16 = InstructionOperands::JMP {
+        address: to_addr,
+        condition: JmpCondition::Always,
+    }
+    .encode();
+    sm.exec_instr(jmp);
diff --git a/embassy-rp/src/relocate.rs b/embassy-rp/src/relocate.rs
new file mode 100644
--- /dev/null
+++ b/embassy-rp/src/relocate.rs
@@ -0,0 +1,77 @@
+use core::iter::Iterator;
+use pio::{Program, SideSet, Wrap};
+pub struct CodeIterator<'a, I>
+    I: Iterator<Item = &'a u16>,
+    iter: I,
+    offset: u8,
+impl<'a, I: Iterator<Item = &'a u16>> CodeIterator<'a, I> {
+    pub fn new(iter: I, offset: u8) -> CodeIterator<'a, I> {
+        CodeIterator { iter, offset }
+    }
+impl<'a, I> Iterator for CodeIterator<'a, I>
+    I: Iterator<Item = &'a u16>,
+    type Item = u16;
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iter.next().and_then(|&instr| {
+            Some(if instr & 0b1110_0000_0000_0000 == 0 {
+                // this is a JMP instruction -> add offset to address
+                let address = (instr & 0b1_1111) as u8;
+                let address = address + self.offset;
+                assert!(
+                    address < pio::RP2040_MAX_PROGRAM_SIZE as u8,
+                    "Invalid JMP out of the program after offset addition"
+                );
+                instr & (!0b11111) | address as u16
+            } else {
+                instr
+            })
+        })
+    }
+pub struct RelocatedProgram<'a, const PROGRAM_SIZE: usize> {
+    program: &'a Program<PROGRAM_SIZE>,
+    origin: u8,
+impl<'a, const PROGRAM_SIZE: usize> RelocatedProgram<'a, PROGRAM_SIZE> {
+    pub fn new(program: &Program<PROGRAM_SIZE>) -> RelocatedProgram<PROGRAM_SIZE> {
+        let origin = program.origin.unwrap_or(0);
+        RelocatedProgram { program, origin }
+    }
+    pub fn new_with_origin(program: &Program<PROGRAM_SIZE>, origin: u8) -> RelocatedProgram<PROGRAM_SIZE> {
+        RelocatedProgram { program, origin }
+    }
+    pub fn code(&'a self) -> CodeIterator<'a, core::slice::Iter<'a, u16>> {
+        CodeIterator::new(self.program.code.iter(), self.origin)
+    }
+    pub fn wrap(&self) -> Wrap {
+        let wrap = self.program.wrap;
+        let origin = self.origin;
+        Wrap {
+            source: wrap.source + origin,
+            target: wrap.target + origin,
+        }
+    }
+    pub fn side_set(&self) -> SideSet {
+        self.program.side_set
+    }
+    pub fn origin(&self) -> u8 {
+        self.origin
+    }
diff --git a/embassy-stm32/build.rs b/embassy-stm32/build.rs
--- a/embassy-stm32/build.rs
+++ b/embassy-stm32/build.rs
@@ -244,11 +244,13 @@ fn main() {
         (("usart", "CTS"), quote!(crate::usart::CtsPin)),
         (("usart", "RTS"), quote!(crate::usart::RtsPin)),
         (("usart", "CK"), quote!(crate::usart::CkPin)),
+        (("usart", "DE"), quote!(crate::usart::DePin)),
         (("lpuart", "TX"), quote!(crate::usart::TxPin)),
         (("lpuart", "RX"), quote!(crate::usart::RxPin)),
         (("lpuart", "CTS"), quote!(crate::usart::CtsPin)),
         (("lpuart", "RTS"), quote!(crate::usart::RtsPin)),
         (("lpuart", "CK"), quote!(crate::usart::CkPin)),
+        (("lpuart", "DE"), quote!(crate::usart::DePin)),
         (("spi", "SCK"), quote!(crate::spi::SckPin)),
         (("spi", "MOSI"), quote!(crate::spi::MosiPin)),
         (("spi", "MISO"), quote!(crate::spi::MisoPin)),
diff --git a/embassy-stm32/src/usart/buffered.rs b/embassy-stm32/src/usart/buffered.rs
index d024bedcf..874af1d73 100644
--- a/embassy-stm32/src/usart/buffered.rs
+++ b/embassy-stm32/src/usart/buffered.rs
@@ -89,6 +89,33 @@ impl<'d, T: BasicInstance> BufferedUart<'d, T> {
         Self::new_inner(state, peri, rx, tx, irq, tx_buffer, rx_buffer, config)
+    #[cfg(not(usart_v1))]
+    pub fn new_with_de(
+        state: &'d mut State<'d, T>,
+        peri: impl Peripheral<P = T> + 'd,
+        rx: impl Peripheral<P = impl RxPin<T>> + 'd,
+        tx: impl Peripheral<P = impl TxPin<T>> + 'd,
+        irq: impl Peripheral<P = T::Interrupt> + 'd,
+        de: impl Peripheral<P = impl DePin<T>> + 'd,
+        tx_buffer: &'d mut [u8],
+        rx_buffer: &'d mut [u8],
+        config: Config,
+    ) -> BufferedUart<'d, T> {
+        into_ref!(de);
+        T::enable();
+        T::reset();
+        unsafe {
+            de.set_as_af(de.af_num(), AFType::OutputPushPull);
+            T::regs().cr3().write(|w| {
+                w.set_dem(true);
+            });
+        }
+        Self::new_inner(state, peri, rx, tx, irq, tx_buffer, rx_buffer, config)
+    }
     fn new_inner(
         state: &'d mut State<'d, T>,
         _peri: impl Peripheral<P = T> + 'd,
diff --git a/embassy-stm32/src/usart/mod.rs b/embassy-stm32/src/usart/mod.rs
index aea054a4b..ea75361fa 100644
--- a/embassy-stm32/src/usart/mod.rs
+++ b/embassy-stm32/src/usart/mod.rs
@@ -646,6 +646,31 @@ impl<'d, T: BasicInstance, TxDma, RxDma> Uart<'d, T, TxDma, RxDma> {
         Self::new_inner(peri, rx, tx, irq, tx_dma, rx_dma, config)
+    #[cfg(not(usart_v1))]
+    pub fn new_with_de(
+        peri: impl Peripheral<P = T> + 'd,
+        rx: impl Peripheral<P = impl RxPin<T>> + 'd,
+        tx: impl Peripheral<P = impl TxPin<T>> + 'd,
+        irq: impl Peripheral<P = T::Interrupt> + 'd,
+        de: impl Peripheral<P = impl DePin<T>> + 'd,
+        tx_dma: impl Peripheral<P = TxDma> + 'd,
+        rx_dma: impl Peripheral<P = RxDma> + 'd,
+        config: Config,
+    ) -> Self {
+        into_ref!(de);
+        T::enable();
+        T::reset();
+        unsafe {
+            de.set_as_af(de.af_num(), AFType::OutputPushPull);
+            T::regs().cr3().write(|w| {
+                w.set_dem(true);
+            });
+        }
+        Self::new_inner(peri, rx, tx, irq, tx_dma, rx_dma, config)
+    }
     fn new_inner(
         peri: impl Peripheral<P = T> + 'd,
         rx: impl Peripheral<P = impl RxPin<T>> + 'd,
@@ -1040,6 +1065,7 @@ pin_trait!(TxPin, BasicInstance);
 pin_trait!(CtsPin, BasicInstance);
 pin_trait!(RtsPin, BasicInstance);
 pin_trait!(CkPin, BasicInstance);
+pin_trait!(DePin, BasicInstance);
 dma_trait!(TxDma, BasicInstance);
 dma_trait!(RxDma, BasicInstance);
diff --git a/examples/rp/Cargo.toml b/examples/rp/Cargo.toml
index 60a8ba94d..b07c471af 100644
--- a/examples/rp/Cargo.toml
+++ b/examples/rp/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT OR Apache-2.0"
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
 embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime"] }
-embassy-rp = { version = "0.1.0", path = "../../embassy-rp", features = ["defmt", "unstable-traits", "nightly", "unstable-pac", "time-driver"] }
+embassy-rp = { version = "0.1.0", path = "../../embassy-rp", features = ["defmt", "unstable-traits", "nightly", "unstable-pac", "time-driver", "pio"] }
 embassy-usb = { version = "0.1.0", path = "../../embassy-usb", features = ["defmt"] }
 embassy-net = { version = "0.1.0", path = "../../embassy-net", features = ["defmt", "nightly", "tcp", "dhcpv4", "medium-ethernet", "pool-16"] }
 embassy-futures = { version = "0.1.0", path = "../../embassy-futures" }
@@ -34,6 +34,11 @@ embedded-io = { version = "0.4.0", features = ["async", "defmt"] }
 embedded-storage = { version = "0.3" }
 static_cell = "1.0.0"
 log = "0.4"
+pio-proc = "0.2"
+pio = "0.2"
 debug = true
+pio = {git = "https://github.com/rp-rs/pio-rs.git"}
diff --git a/examples/rp/src/bin/pio_async.rs b/examples/rp/src/bin/pio_async.rs
new file mode 100644
index 000000000..45a8c73f7
--- /dev/null
+++ b/examples/rp/src/bin/pio_async.rs
@@ -0,0 +1,112 @@
+use defmt::info;
+use embassy_executor::Spawner;
+use embassy_rp::gpio::{AnyPin, Pin};
+use embassy_rp::pio::{Pio0, PioPeripherial, PioStateMachine, PioStateMachineInstance, ShiftDirection, Sm0, Sm1, Sm2};
+use embassy_rp::pio_instr_util;
+use embassy_rp::relocate::RelocatedProgram;
+use {defmt_rtt as _, panic_probe as _};
+async fn pio_task_sm0(mut sm: PioStateMachineInstance<Pio0, Sm0>, pin: AnyPin) {
+    // Setup sm0
+    // Send data serially to pin
+    let prg = pio_proc::pio_asm!(
+        ".origin 16",
+        "set pindirs, 1",
+        ".wrap_target",
+        "out pins,1 [19]",
+        ".wrap",
+    );
+    let relocated = RelocatedProgram::new(&prg.program);
+    let out_pin = sm.make_pio_pin(pin);
+    let pio_pins = [&out_pin];
+    sm.set_out_pins(&pio_pins);
+    sm.write_instr(relocated.origin() as usize, relocated.code());
+    pio_instr_util::exec_jmp(&mut sm, relocated.origin());
+    sm.set_clkdiv((125e6 / 20.0 / 2e2 * 256.0) as u32);
+    sm.set_set_range(0, 1);
+    let pio::Wrap { source, target } = relocated.wrap();
+    sm.set_wrap(source, target);
+    sm.set_autopull(true);
+    sm.set_out_shift_dir(ShiftDirection::Left);
+    sm.set_enable(true);
+    let mut v = 0x0f0caffa;
+    loop {
+        sm.wait_push(v).await;
+        v ^= 0xffff;
+        info!("Pushed {:032b} to FIFO", v);
+    }
+async fn pio_task_sm1(mut sm: PioStateMachineInstance<Pio0, Sm1>) {
+    // Setupm sm1
+    // Read 0b10101 repeatedly until ISR is full
+    let prg = pio_proc::pio_asm!(".origin 8", "set x, 0x15", ".wrap_target", "in x, 5 [31]", ".wrap",);
+    let relocated = RelocatedProgram::new(&prg.program);
+    sm.write_instr(relocated.origin() as usize, relocated.code());
+    pio_instr_util::exec_jmp(&mut sm, relocated.origin());
+    sm.set_clkdiv((125e6 / 2e3 * 256.0) as u32);
+    sm.set_set_range(0, 0);
+    let pio::Wrap { source, target } = relocated.wrap();
+    sm.set_wrap(source, target);
+    sm.set_autopush(true);
+    sm.set_in_shift_dir(ShiftDirection::Right);
+    sm.set_enable(true);
+    loop {
+        let rx = sm.wait_pull().await;
+        info!("Pulled {:032b} from FIFO", rx);
+    }
+async fn pio_task_sm2(mut sm: PioStateMachineInstance<Pio0, Sm2>) {
+    // Setup sm2
+    // Repeatedly trigger IRQ 3
+    let prg = pio_proc::pio_asm!(
+        ".origin 0",
+        ".wrap_target",
+        "set x,10",
+        "delay:",
+        "jmp x-- delay [15]",
+        "irq 3 [15]",
+        ".wrap",
+    );
+    let relocated = RelocatedProgram::new(&prg.program);
+    sm.write_instr(relocated.origin() as usize, relocated.code());
+    let pio::Wrap { source, target } = relocated.wrap();
+    sm.set_wrap(source, target);
+    pio_instr_util::exec_jmp(&mut sm, relocated.origin());
+    sm.set_clkdiv((125e6 / 2e3 * 256.0) as u32);
+    sm.set_enable(true);
+    loop {
+        sm.wait_irq(3).await;
+        info!("IRQ trigged");
+    }
+async fn main(spawner: Spawner) {
+    let p = embassy_rp::init(Default::default());
+    let pio = p.PIO0;
+    let (_, sm0, sm1, sm2, ..) = pio.split();
+    spawner.spawn(pio_task_sm0(sm0, p.PIN_0.degrade())).unwrap();
+    spawner.spawn(pio_task_sm1(sm1)).unwrap();
+    spawner.spawn(pio_task_sm2(sm2)).unwrap();
diff --git a/examples/rp/src/bin/pio_dma.rs b/examples/rp/src/bin/pio_dma.rs
new file mode 100644
index 000000000..b19ef4083
--- /dev/null
+++ b/examples/rp/src/bin/pio_dma.rs
@@ -0,0 +1,69 @@
+use defmt::info;
+use embassy_executor::Spawner;
+use embassy_futures::join::join;
+use embassy_rp::pio::{PioPeripherial, PioStateMachine, ShiftDirection};
+use embassy_rp::relocate::RelocatedProgram;
+use embassy_rp::{pio_instr_util, Peripheral};
+use {defmt_rtt as _, panic_probe as _};
+fn swap_nibbles(v: u32) -> u32 {
+    let v = (v & 0x0f0f_0f0f) << 4 | (v & 0xf0f0_f0f0) >> 4;
+    let v = (v & 0x00ff_00ff) << 8 | (v & 0xff00_ff00) >> 8;
+    (v & 0x0000_ffff) << 16 | (v & 0xffff_0000) >> 16
+async fn main(_spawner: Spawner) {
+    let p = embassy_rp::init(Default::default());
+    let pio = p.PIO0;
+    let (_, mut sm, ..) = pio.split();
+    let prg = pio_proc::pio_asm!(
+        ".origin 0",
+        "set pindirs,1",
+        ".wrap_target",
+        "set y,7",
+        "loop:",
+        "out x,4",
+        "in x,4",
+        "jmp y--, loop",
+        ".wrap",
+    );
+    let relocated = RelocatedProgram::new(&prg.program);
+    sm.write_instr(relocated.origin() as usize, relocated.code());
+    pio_instr_util::exec_jmp(&mut sm, relocated.origin());
+    sm.set_clkdiv((125e6 / 10e3 * 256.0) as u32);
+    let pio::Wrap { source, target } = relocated.wrap();
+    sm.set_wrap(source, target);
+    sm.set_autopull(true);
+    sm.set_autopush(true);
+    sm.set_pull_threshold(32);
+    sm.set_push_threshold(32);
+    sm.set_out_shift_dir(ShiftDirection::Right);
+    sm.set_in_shift_dir(ShiftDirection::Left);
+    sm.set_enable(true);
+    let mut dma_out_ref = p.DMA_CH0.into_ref();
+    let mut dma_in_ref = p.DMA_CH1.into_ref();
+    let mut dout = [0x12345678u32; 29];
+    for i in 1..dout.len() {
+        dout[i] = (dout[i - 1] & 0x0fff_ffff) * 13 + 7;
+    }
+    let mut din = [0u32; 29];
+    loop {
+        join(
+            sm.dma_push(dma_out_ref.reborrow(), &dout),
+            sm.dma_pull(dma_in_ref.reborrow(), &mut din),
+        )
+        .await;
+        for i in 0..din.len() {
+            assert_eq!(din[i], swap_nibbles(dout[i]));
+        }
+        info!("Swapped {} words", dout.len());
+    }