diff --git a/embassy-hal-internal/src/atomic_ring_buffer.rs b/embassy-hal-internal/src/atomic_ring_buffer.rs
index b4f2cec28..34ceac852 100644
--- a/embassy-hal-internal/src/atomic_ring_buffer.rs
+++ b/embassy-hal-internal/src/atomic_ring_buffer.rs
@@ -1,6 +1,6 @@
 //! Atomic reusable ringbuffer.
-use core::slice;
 use core::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
+use core::{ptr, slice};
 
 /// Atomic reusable ringbuffer
 ///
@@ -73,6 +73,7 @@ impl RingBuffer {
     pub unsafe fn deinit(&self) {
         // Ordering: it's OK to use `Relaxed` because this is not called
         // concurrently with other methods.
+        self.buf.store(ptr::null_mut(), Ordering::Relaxed);
         self.len.store(0, Ordering::Relaxed);
         self.start.store(0, Ordering::Relaxed);
         self.end.store(0, Ordering::Relaxed);
@@ -82,20 +83,46 @@ impl RingBuffer {
     ///
     /// # Safety
     ///
-    /// Only one reader can exist at a time.
+    /// - Only one reader can exist at a time.
+    /// - Ringbuffer must be initialized.
     pub unsafe fn reader(&self) -> Reader<'_> {
         Reader(self)
     }
 
+    /// Try creating a reader, fails if not initialized.
+    ///
+    /// # Safety
+    ///
+    /// Only one reader can exist at a time.
+    pub unsafe fn try_reader(&self) -> Option<Reader<'_>> {
+        if self.buf.load(Ordering::Relaxed).is_null() {
+            return None;
+        }
+        Some(Reader(self))
+    }
+
     /// Create a writer.
     ///
     /// # Safety
     ///
-    /// Only one writer can exist at a time.
+    /// - Only one writer can exist at a time.
+    /// - Ringbuffer must be initialized.
     pub unsafe fn writer(&self) -> Writer<'_> {
         Writer(self)
     }
 
+    /// Try creating a writer, fails if not initialized.
+    ///
+    /// # Safety
+    ///
+    /// Only one writer can exist at a time.
+    pub unsafe fn try_writer(&self) -> Option<Writer<'_>> {
+        if self.buf.load(Ordering::Relaxed).is_null() {
+            return None;
+        }
+        Some(Writer(self))
+    }
+
     /// Return length of buffer.
     pub fn len(&self) -> usize {
         self.len.load(Ordering::Relaxed)
diff --git a/embassy-nrf/src/buffered_uarte.rs b/embassy-nrf/src/buffered_uarte.rs
index fb72422bd..b04c96e09 100644
--- a/embassy-nrf/src/buffered_uarte.rs
+++ b/embassy-nrf/src/buffered_uarte.rs
@@ -17,29 +17,26 @@ use core::task::Poll;
 
 use embassy_hal_internal::atomic_ring_buffer::RingBuffer;
 use embassy_hal_internal::{into_ref, PeripheralRef};
-use embassy_sync::waitqueue::AtomicWaker;
 // Re-export SVD variants to allow user to directly set values
 pub use pac::uarte0::{baudrate::BAUDRATE_A as Baudrate, config::PARITY_A as Parity};
 
 use crate::gpio::sealed::Pin;
-use crate::gpio::{self, AnyPin, Pin as GpioPin, PselBits};
+use crate::gpio::{AnyPin, Pin as GpioPin, PselBits};
 use crate::interrupt::typelevel::Interrupt;
 use crate::ppi::{
     self, AnyConfigurableChannel, AnyGroup, Channel, ConfigurableChannel, Event, Group, Ppi, PpiGroup, Task,
 };
 use crate::timer::{Instance as TimerInstance, Timer};
-use crate::uarte::{apply_workaround_for_enable_anomaly, Config, Instance as UarteInstance};
+use crate::uarte::{configure, drop_tx_rx, Config, Instance as UarteInstance};
 use crate::{interrupt, pac, Peripheral};
 
 mod sealed {
     use super::*;
 
     pub struct State {
-        pub tx_waker: AtomicWaker,
         pub tx_buf: RingBuffer,
         pub tx_count: AtomicUsize,
 
-        pub rx_waker: AtomicWaker,
         pub rx_buf: RingBuffer,
         pub rx_started: AtomicBool,
         pub rx_started_count: AtomicU8,
@@ -61,11 +58,9 @@ pub(crate) use sealed::State;
 impl State {
     pub(crate) const fn new() -> Self {
         Self {
-            tx_waker: AtomicWaker::new(),
             tx_buf: RingBuffer::new(),
             tx_count: AtomicUsize::new(0),
 
-            rx_waker: AtomicWaker::new(),
             rx_buf: RingBuffer::new(),
             rx_started: AtomicBool::new(false),
             rx_started_count: AtomicU8::new(0),
@@ -84,128 +79,131 @@ impl<U: UarteInstance> interrupt::typelevel::Handler<U::Interrupt> for Interrupt
     unsafe fn on_interrupt() {
         //trace!("irq: start");
         let r = U::regs();
+        let ss = U::state();
         let s = U::buffered_state();
 
-        let buf_len = s.rx_buf.len();
-        let half_len = buf_len / 2;
-        let mut tx = unsafe { s.tx_buf.reader() };
-        let mut rx = unsafe { s.rx_buf.writer() };
+        if let Some(mut rx) = unsafe { s.rx_buf.try_writer() } {
+            let buf_len = s.rx_buf.len();
+            let half_len = buf_len / 2;
 
-        if r.events_error.read().bits() != 0 {
-            r.events_error.reset();
-            let errs = r.errorsrc.read();
-            r.errorsrc.write(|w| unsafe { w.bits(errs.bits()) });
+            if r.events_error.read().bits() != 0 {
+                r.events_error.reset();
+                let errs = r.errorsrc.read();
+                r.errorsrc.write(|w| unsafe { w.bits(errs.bits()) });
 
-            if errs.overrun().bit() {
-                panic!("BufferedUarte overrun");
+                if errs.overrun().bit() {
+                    panic!("BufferedUarte overrun");
+                }
             }
-        }
 
-        // Received some bytes, wake task.
-        if r.inten.read().rxdrdy().bit_is_set() && r.events_rxdrdy.read().bits() != 0 {
-            r.intenclr.write(|w| w.rxdrdy().clear());
-            r.events_rxdrdy.reset();
-            s.rx_waker.wake();
-        }
+            // Received some bytes, wake task.
+            if r.inten.read().rxdrdy().bit_is_set() && r.events_rxdrdy.read().bits() != 0 {
+                r.intenclr.write(|w| w.rxdrdy().clear());
+                r.events_rxdrdy.reset();
+                ss.rx_waker.wake();
+            }
 
-        if r.events_endrx.read().bits() != 0 {
-            //trace!("  irq_rx: endrx");
-            r.events_endrx.reset();
+            if r.events_endrx.read().bits() != 0 {
+                //trace!("  irq_rx: endrx");
+                r.events_endrx.reset();
 
-            let val = s.rx_ended_count.load(Ordering::Relaxed);
-            s.rx_ended_count.store(val.wrapping_add(1), Ordering::Relaxed);
-        }
+                let val = s.rx_ended_count.load(Ordering::Relaxed);
+                s.rx_ended_count.store(val.wrapping_add(1), Ordering::Relaxed);
+            }
 
-        if r.events_rxstarted.read().bits() != 0 || !s.rx_started.load(Ordering::Relaxed) {
-            //trace!("  irq_rx: rxstarted");
-            let (ptr, len) = rx.push_buf();
-            if len >= half_len {
-                r.events_rxstarted.reset();
+            if r.events_rxstarted.read().bits() != 0 || !s.rx_started.load(Ordering::Relaxed) {
+                //trace!("  irq_rx: rxstarted");
+                let (ptr, len) = rx.push_buf();
+                if len >= half_len {
+                    r.events_rxstarted.reset();
 
-                //trace!("  irq_rx: starting second {:?}", half_len);
+                    //trace!("  irq_rx: starting second {:?}", half_len);
 
-                // Set up the DMA read
-                r.rxd.ptr.write(|w| unsafe { w.ptr().bits(ptr as u32) });
-                r.rxd.maxcnt.write(|w| unsafe { w.maxcnt().bits(half_len as _) });
+                    // Set up the DMA read
+                    r.rxd.ptr.write(|w| unsafe { w.ptr().bits(ptr as u32) });
+                    r.rxd.maxcnt.write(|w| unsafe { w.maxcnt().bits(half_len as _) });
 
-                let chn = s.rx_ppi_ch.load(Ordering::Relaxed);
+                    let chn = s.rx_ppi_ch.load(Ordering::Relaxed);
 
-                // Enable endrx -> startrx PPI channel.
-                // From this point on, if endrx happens, startrx is automatically fired.
-                ppi::regs().chenset.write(|w| unsafe { w.bits(1 << chn) });
+                    // Enable endrx -> startrx PPI channel.
+                    // From this point on, if endrx happens, startrx is automatically fired.
+                    ppi::regs().chenset.write(|w| unsafe { w.bits(1 << chn) });
 
-                // It is possible that endrx happened BEFORE enabling the PPI. In this case
-                // the PPI channel doesn't trigger, and we'd hang. We have to detect this
-                // and manually start.
+                    // It is possible that endrx happened BEFORE enabling the PPI. In this case
+                    // the PPI channel doesn't trigger, and we'd hang. We have to detect this
+                    // and manually start.
 
-                // check again in case endrx has happened between the last check and now.
-                if r.events_endrx.read().bits() != 0 {
-                    //trace!("  irq_rx: endrx");
-                    r.events_endrx.reset();
+                    // check again in case endrx has happened between the last check and now.
+                    if r.events_endrx.read().bits() != 0 {
+                        //trace!("  irq_rx: endrx");
+                        r.events_endrx.reset();
 
-                    let val = s.rx_ended_count.load(Ordering::Relaxed);
-                    s.rx_ended_count.store(val.wrapping_add(1), Ordering::Relaxed);
+                        let val = s.rx_ended_count.load(Ordering::Relaxed);
+                        s.rx_ended_count.store(val.wrapping_add(1), Ordering::Relaxed);
+                    }
+
+                    let rx_ended = s.rx_ended_count.load(Ordering::Relaxed);
+                    let rx_started = s.rx_started_count.load(Ordering::Relaxed);
+
+                    // If we started the same amount of transfers as ended, the last rxend has
+                    // already occured.
+                    let rxend_happened = rx_started == rx_ended;
+
+                    // Check if the PPI channel is still enabled. The PPI channel disables itself
+                    // when it fires, so if it's still enabled it hasn't fired.
+                    let ppi_ch_enabled = ppi::regs().chen.read().bits() & (1 << chn) != 0;
+
+                    // if rxend happened, and the ppi channel hasn't fired yet, the rxend got missed.
+                    // this condition also naturally matches if `!started`, needed to kickstart the DMA.
+                    if rxend_happened && ppi_ch_enabled {
+                        //trace!("manually starting.");
+
+                        // disable the ppi ch, it's of no use anymore.
+                        ppi::regs().chenclr.write(|w| unsafe { w.bits(1 << chn) });
+
+                        // manually start
+                        r.tasks_startrx.write(|w| unsafe { w.bits(1) });
+                    }
+
+                    rx.push_done(half_len);
+
+                    s.rx_started_count.store(rx_started.wrapping_add(1), Ordering::Relaxed);
+                    s.rx_started.store(true, Ordering::Relaxed);
+                } else {
+                    //trace!("  irq_rx: rxstarted no buf");
+                    r.intenclr.write(|w| w.rxstarted().clear());
                 }
-
-                let rx_ended = s.rx_ended_count.load(Ordering::Relaxed);
-                let rx_started = s.rx_started_count.load(Ordering::Relaxed);
-
-                // If we started the same amount of transfers as ended, the last rxend has
-                // already occured.
-                let rxend_happened = rx_started == rx_ended;
-
-                // Check if the PPI channel is still enabled. The PPI channel disables itself
-                // when it fires, so if it's still enabled it hasn't fired.
-                let ppi_ch_enabled = ppi::regs().chen.read().bits() & (1 << chn) != 0;
-
-                // if rxend happened, and the ppi channel hasn't fired yet, the rxend got missed.
-                // this condition also naturally matches if `!started`, needed to kickstart the DMA.
-                if rxend_happened && ppi_ch_enabled {
-                    //trace!("manually starting.");
-
-                    // disable the ppi ch, it's of no use anymore.
-                    ppi::regs().chenclr.write(|w| unsafe { w.bits(1 << chn) });
-
-                    // manually start
-                    r.tasks_startrx.write(|w| unsafe { w.bits(1) });
-                }
-
-                rx.push_done(half_len);
-
-                s.rx_started_count.store(rx_started.wrapping_add(1), Ordering::Relaxed);
-                s.rx_started.store(true, Ordering::Relaxed);
-            } else {
-                //trace!("  irq_rx: rxstarted no buf");
-                r.intenclr.write(|w| w.rxstarted().clear());
             }
         }
 
         // =============================
 
-        // TX end
-        if r.events_endtx.read().bits() != 0 {
-            r.events_endtx.reset();
+        if let Some(mut tx) = unsafe { s.tx_buf.try_reader() } {
+            // TX end
+            if r.events_endtx.read().bits() != 0 {
+                r.events_endtx.reset();
 
-            let n = s.tx_count.load(Ordering::Relaxed);
-            //trace!("  irq_tx: endtx {:?}", n);
-            tx.pop_done(n);
-            s.tx_waker.wake();
-            s.tx_count.store(0, Ordering::Relaxed);
-        }
+                let n = s.tx_count.load(Ordering::Relaxed);
+                //trace!("  irq_tx: endtx {:?}", n);
+                tx.pop_done(n);
+                ss.tx_waker.wake();
+                s.tx_count.store(0, Ordering::Relaxed);
+            }
 
-        // If not TXing, start.
-        if s.tx_count.load(Ordering::Relaxed) == 0 {
-            let (ptr, len) = tx.pop_buf();
-            if len != 0 {
-                //trace!("  irq_tx: starting {:?}", len);
-                s.tx_count.store(len, Ordering::Relaxed);
+            // If not TXing, start.
+            if s.tx_count.load(Ordering::Relaxed) == 0 {
+                let (ptr, len) = tx.pop_buf();
+                if len != 0 {
+                    //trace!("  irq_tx: starting {:?}", len);
+                    s.tx_count.store(len, Ordering::Relaxed);
 
-                // Set up the DMA write
-                r.txd.ptr.write(|w| unsafe { w.ptr().bits(ptr as u32) });
-                r.txd.maxcnt.write(|w| unsafe { w.maxcnt().bits(len as _) });
+                    // Set up the DMA write
+                    r.txd.ptr.write(|w| unsafe { w.ptr().bits(ptr as u32) });
+                    r.txd.maxcnt.write(|w| unsafe { w.maxcnt().bits(len as _) });
 
-                // Start UARTE Transmit transaction
-                r.tasks_starttx.write(|w| unsafe { w.bits(1) });
+                    // Start UARTE Transmit transaction
+                    r.tasks_starttx.write(|w| unsafe { w.bits(1) });
+                }
             }
         }
 
@@ -215,11 +213,8 @@ impl<U: UarteInstance> interrupt::typelevel::Handler<U::Interrupt> for Interrupt
 
 /// Buffered UARTE driver.
 pub struct BufferedUarte<'d, U: UarteInstance, T: TimerInstance> {
-    _peri: PeripheralRef<'d, U>,
-    timer: Timer<'d, T>,
-    _ppi_ch1: Ppi<'d, AnyConfigurableChannel, 1, 1>,
-    _ppi_ch2: Ppi<'d, AnyConfigurableChannel, 1, 2>,
-    _ppi_group: PpiGroup<'d, AnyGroup>,
+    tx: BufferedUarteTx<'d, U>,
+    rx: BufferedUarteRx<'d, U, T>,
 }
 
 impl<'d, U: UarteInstance, T: TimerInstance> Unpin for BufferedUarte<'d, U, T> {}
@@ -243,7 +238,7 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
         rx_buffer: &'d mut [u8],
         tx_buffer: &'d mut [u8],
     ) -> Self {
-        into_ref!(rxd, txd, ppi_ch1, ppi_ch2, ppi_group);
+        into_ref!(uarte, timer, rxd, txd, ppi_ch1, ppi_ch2, ppi_group);
         Self::new_inner(
             uarte,
             timer,
@@ -280,7 +275,7 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
         rx_buffer: &'d mut [u8],
         tx_buffer: &'d mut [u8],
     ) -> Self {
-        into_ref!(rxd, txd, cts, rts, ppi_ch1, ppi_ch2, ppi_group);
+        into_ref!(uarte, timer, rxd, txd, cts, rts, ppi_ch1, ppi_ch2, ppi_group);
         Self::new_inner(
             uarte,
             timer,
@@ -298,8 +293,8 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
     }
 
     fn new_inner(
-        peri: impl Peripheral<P = U> + 'd,
-        timer: impl Peripheral<P = T> + 'd,
+        peri: PeripheralRef<'d, U>,
+        timer: PeripheralRef<'d, T>,
         ppi_ch1: PeripheralRef<'d, AnyConfigurableChannel>,
         ppi_ch2: PeripheralRef<'d, AnyConfigurableChannel>,
         ppi_group: PeripheralRef<'d, AnyGroup>,
@@ -311,16 +306,127 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
         rx_buffer: &'d mut [u8],
         tx_buffer: &'d mut [u8],
     ) -> Self {
-        into_ref!(peri, timer);
+        configure(U::regs(), config, cts.is_some());
 
-        assert!(rx_buffer.len() % 2 == 0);
+        let tx = BufferedUarteTx::new_innerer(unsafe { peri.clone_unchecked() }, txd, cts, tx_buffer);
+        let rx = BufferedUarteRx::new_innerer(peri, timer, ppi_ch1, ppi_ch2, ppi_group, rxd, rts, rx_buffer);
 
+        U::Interrupt::pend();
+        unsafe { U::Interrupt::enable() };
+
+        U::state().tx_rx_refcount.store(2, Ordering::Relaxed);
+
+        Self { tx, rx }
+    }
+
+    /// Adjust the baud rate to the provided value.
+    pub fn set_baudrate(&mut self, baudrate: Baudrate) {
         let r = U::regs();
+        r.baudrate.write(|w| w.baudrate().variant(baudrate));
+    }
 
-        let hwfc = cts.is_some();
+    /// Split the UART in reader and writer parts.
+    ///
+    /// This allows reading and writing concurrently from independent tasks.
+    pub fn split(self) -> (BufferedUarteRx<'d, U, T>, BufferedUarteTx<'d, U>) {
+        (self.rx, self.tx)
+    }
 
-        rxd.conf().write(|w| w.input().connect().drive().h0h1());
-        r.psel.rxd.write(|w| unsafe { w.bits(rxd.psel_bits()) });
+    /// Split the UART in reader and writer parts, by reference.
+    ///
+    /// The returned halves borrow from `self`, so you can drop them and go back to using
+    /// the "un-split" `self`. This allows temporarily splitting the UART.
+    pub fn split_by_ref(&mut self) -> (&mut BufferedUarteRx<'d, U, T>, &mut BufferedUarteTx<'d, U>) {
+        (&mut self.rx, &mut self.tx)
+    }
+
+    /// Pull some bytes from this source into the specified buffer, returning how many bytes were read.
+    pub async fn read(&mut self, buf: &mut [u8]) -> Result<usize, Error> {
+        self.rx.read(buf).await
+    }
+
+    /// Return the contents of the internal buffer, filling it with more data from the inner reader if it is empty.
+    pub async fn fill_buf(&mut self) -> Result<&[u8], Error> {
+        self.rx.fill_buf().await
+    }
+
+    /// Tell this buffer that `amt` bytes have been consumed from the buffer, so they should no longer be returned in calls to `fill_buf`.
+    pub fn consume(&mut self, amt: usize) {
+        self.rx.consume(amt)
+    }
+
+    /// Write a buffer into this writer, returning how many bytes were written.
+    pub async fn write(&mut self, buf: &[u8]) -> Result<usize, Error> {
+        self.tx.write(buf).await
+    }
+
+    /// Flush this output stream, ensuring that all intermediately buffered contents reach their destination.
+    pub async fn flush(&mut self) -> Result<(), Error> {
+        self.tx.flush().await
+    }
+}
+
+/// Reader part of the buffered UARTE driver.
+pub struct BufferedUarteTx<'d, U: UarteInstance> {
+    _peri: PeripheralRef<'d, U>,
+}
+
+impl<'d, U: UarteInstance> BufferedUarteTx<'d, U> {
+    /// Create a new BufferedUarteTx without hardware flow control.
+    pub fn new(
+        uarte: impl Peripheral<P = U> + 'd,
+        _irq: impl interrupt::typelevel::Binding<U::Interrupt, InterruptHandler<U>> + 'd,
+        txd: impl Peripheral<P = impl GpioPin> + 'd,
+        config: Config,
+        tx_buffer: &'d mut [u8],
+    ) -> Self {
+        into_ref!(uarte, txd);
+        Self::new_inner(uarte, txd.map_into(), None, config, tx_buffer)
+    }
+
+    /// Create a new BufferedUarte with hardware flow control (RTS/CTS)
+    ///
+    /// # Panics
+    ///
+    /// Panics if `rx_buffer.len()` is odd.
+    pub fn new_with_cts(
+        uarte: impl Peripheral<P = U> + 'd,
+        _irq: impl interrupt::typelevel::Binding<U::Interrupt, InterruptHandler<U>> + 'd,
+        txd: impl Peripheral<P = impl GpioPin> + 'd,
+        cts: impl Peripheral<P = impl GpioPin> + 'd,
+        config: Config,
+        tx_buffer: &'d mut [u8],
+    ) -> Self {
+        into_ref!(uarte, txd, cts);
+        Self::new_inner(uarte, txd.map_into(), Some(cts.map_into()), config, tx_buffer)
+    }
+
+    fn new_inner(
+        peri: PeripheralRef<'d, U>,
+        txd: PeripheralRef<'d, AnyPin>,
+        cts: Option<PeripheralRef<'d, AnyPin>>,
+        config: Config,
+        tx_buffer: &'d mut [u8],
+    ) -> Self {
+        configure(U::regs(), config, cts.is_some());
+
+        let this = Self::new_innerer(peri, txd, cts, tx_buffer);
+
+        U::Interrupt::pend();
+        unsafe { U::Interrupt::enable() };
+
+        U::state().tx_rx_refcount.store(1, Ordering::Relaxed);
+
+        this
+    }
+
+    fn new_innerer(
+        peri: PeripheralRef<'d, U>,
+        txd: PeripheralRef<'d, AnyPin>,
+        cts: Option<PeripheralRef<'d, AnyPin>>,
+        tx_buffer: &'d mut [u8],
+    ) -> Self {
+        let r = U::regs();
 
         txd.set_high();
         txd.conf().write(|w| w.dir().output().drive().h0h1());
@@ -331,6 +437,203 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
         }
         r.psel.cts.write(|w| unsafe { w.bits(cts.psel_bits()) });
 
+        // Initialize state
+        let s = U::buffered_state();
+        s.tx_count.store(0, Ordering::Relaxed);
+        let len = tx_buffer.len();
+        unsafe { s.tx_buf.init(tx_buffer.as_mut_ptr(), len) };
+
+        r.events_txstarted.reset();
+
+        // Enable interrupts
+        r.intenset.write(|w| {
+            w.endtx().set();
+            w
+        });
+
+        Self { _peri: peri }
+    }
+
+    /// Write a buffer into this writer, returning how many bytes were written.
+    pub async fn write(&mut self, buf: &[u8]) -> Result<usize, Error> {
+        poll_fn(move |cx| {
+            //trace!("poll_write: {:?}", buf.len());
+            let ss = U::state();
+            let s = U::buffered_state();
+            let mut tx = unsafe { s.tx_buf.writer() };
+
+            let tx_buf = tx.push_slice();
+            if tx_buf.is_empty() {
+                //trace!("poll_write: pending");
+                ss.tx_waker.register(cx.waker());
+                return Poll::Pending;
+            }
+
+            let n = min(tx_buf.len(), buf.len());
+            tx_buf[..n].copy_from_slice(&buf[..n]);
+            tx.push_done(n);
+
+            //trace!("poll_write: queued {:?}", n);
+
+            compiler_fence(Ordering::SeqCst);
+            U::Interrupt::pend();
+
+            Poll::Ready(Ok(n))
+        })
+        .await
+    }
+
+    /// Flush this output stream, ensuring that all intermediately buffered contents reach their destination.
+    pub async fn flush(&mut self) -> Result<(), Error> {
+        poll_fn(move |cx| {
+            //trace!("poll_flush");
+            let ss = U::state();
+            let s = U::buffered_state();
+            if !s.tx_buf.is_empty() {
+                //trace!("poll_flush: pending");
+                ss.tx_waker.register(cx.waker());
+                return Poll::Pending;
+            }
+
+            Poll::Ready(Ok(()))
+        })
+        .await
+    }
+}
+
+impl<'a, U: UarteInstance> Drop for BufferedUarteTx<'a, U> {
+    fn drop(&mut self) {
+        let r = U::regs();
+
+        r.intenclr.write(|w| {
+            w.txdrdy().set_bit();
+            w.txstarted().set_bit();
+            w.txstopped().set_bit();
+            w
+        });
+        r.events_txstopped.reset();
+        r.tasks_stoptx.write(|w| unsafe { w.bits(1) });
+        while r.events_txstopped.read().bits() == 0 {}
+
+        let s = U::buffered_state();
+        unsafe { s.tx_buf.deinit() }
+
+        let s = U::state();
+        drop_tx_rx(r, s);
+    }
+}
+
+/// Reader part of the buffered UARTE driver.
+pub struct BufferedUarteRx<'d, U: UarteInstance, T: TimerInstance> {
+    _peri: PeripheralRef<'d, U>,
+    timer: Timer<'d, T>,
+    _ppi_ch1: Ppi<'d, AnyConfigurableChannel, 1, 1>,
+    _ppi_ch2: Ppi<'d, AnyConfigurableChannel, 1, 2>,
+    _ppi_group: PpiGroup<'d, AnyGroup>,
+}
+
+impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarteRx<'d, U, T> {
+    /// Create a new BufferedUarte without hardware flow control.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `rx_buffer.len()` is odd.
+    pub fn new(
+        uarte: impl Peripheral<P = U> + 'd,
+        timer: impl Peripheral<P = T> + 'd,
+        ppi_ch1: impl Peripheral<P = impl ConfigurableChannel> + 'd,
+        ppi_ch2: impl Peripheral<P = impl ConfigurableChannel> + 'd,
+        ppi_group: impl Peripheral<P = impl Group> + 'd,
+        _irq: impl interrupt::typelevel::Binding<U::Interrupt, InterruptHandler<U>> + 'd,
+        rxd: impl Peripheral<P = impl GpioPin> + 'd,
+        config: Config,
+        rx_buffer: &'d mut [u8],
+    ) -> Self {
+        into_ref!(uarte, timer, rxd, ppi_ch1, ppi_ch2, ppi_group);
+        Self::new_inner(
+            uarte,
+            timer,
+            ppi_ch1.map_into(),
+            ppi_ch2.map_into(),
+            ppi_group.map_into(),
+            rxd.map_into(),
+            None,
+            config,
+            rx_buffer,
+        )
+    }
+
+    /// Create a new BufferedUarte with hardware flow control (RTS/CTS)
+    ///
+    /// # Panics
+    ///
+    /// Panics if `rx_buffer.len()` is odd.
+    pub fn new_with_rts(
+        uarte: impl Peripheral<P = U> + 'd,
+        timer: impl Peripheral<P = T> + 'd,
+        ppi_ch1: impl Peripheral<P = impl ConfigurableChannel> + 'd,
+        ppi_ch2: impl Peripheral<P = impl ConfigurableChannel> + 'd,
+        ppi_group: impl Peripheral<P = impl Group> + 'd,
+        _irq: impl interrupt::typelevel::Binding<U::Interrupt, InterruptHandler<U>> + 'd,
+        rxd: impl Peripheral<P = impl GpioPin> + 'd,
+        rts: impl Peripheral<P = impl GpioPin> + 'd,
+        config: Config,
+        rx_buffer: &'d mut [u8],
+    ) -> Self {
+        into_ref!(uarte, timer, rxd, rts, ppi_ch1, ppi_ch2, ppi_group);
+        Self::new_inner(
+            uarte,
+            timer,
+            ppi_ch1.map_into(),
+            ppi_ch2.map_into(),
+            ppi_group.map_into(),
+            rxd.map_into(),
+            Some(rts.map_into()),
+            config,
+            rx_buffer,
+        )
+    }
+
+    fn new_inner(
+        peri: PeripheralRef<'d, U>,
+        timer: PeripheralRef<'d, T>,
+        ppi_ch1: PeripheralRef<'d, AnyConfigurableChannel>,
+        ppi_ch2: PeripheralRef<'d, AnyConfigurableChannel>,
+        ppi_group: PeripheralRef<'d, AnyGroup>,
+        rxd: PeripheralRef<'d, AnyPin>,
+        rts: Option<PeripheralRef<'d, AnyPin>>,
+        config: Config,
+        rx_buffer: &'d mut [u8],
+    ) -> Self {
+        configure(U::regs(), config, rts.is_some());
+
+        let this = Self::new_innerer(peri, timer, ppi_ch1, ppi_ch2, ppi_group, rxd, rts, rx_buffer);
+
+        U::Interrupt::pend();
+        unsafe { U::Interrupt::enable() };
+
+        U::state().tx_rx_refcount.store(1, Ordering::Relaxed);
+
+        this
+    }
+
+    fn new_innerer(
+        peri: PeripheralRef<'d, U>,
+        timer: PeripheralRef<'d, T>,
+        ppi_ch1: PeripheralRef<'d, AnyConfigurableChannel>,
+        ppi_ch2: PeripheralRef<'d, AnyConfigurableChannel>,
+        ppi_group: PeripheralRef<'d, AnyGroup>,
+        rxd: PeripheralRef<'d, AnyPin>,
+        rts: Option<PeripheralRef<'d, AnyPin>>,
+        rx_buffer: &'d mut [u8],
+    ) -> Self {
+        assert!(rx_buffer.len() % 2 == 0);
+
+        let r = U::regs();
+
+        rxd.conf().write(|w| w.input().connect().drive().h0h1());
+        r.psel.rxd.write(|w| unsafe { w.bits(rxd.psel_bits()) });
+
         if let Some(pin) = &rts {
             pin.set_high();
             pin.conf().write(|w| w.dir().output().drive().h0h1());
@@ -339,35 +642,21 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
 
         // Initialize state
         let s = U::buffered_state();
-        s.tx_count.store(0, Ordering::Relaxed);
         s.rx_started_count.store(0, Ordering::Relaxed);
         s.rx_ended_count.store(0, Ordering::Relaxed);
         s.rx_started.store(false, Ordering::Relaxed);
-        let len = tx_buffer.len();
-        unsafe { s.tx_buf.init(tx_buffer.as_mut_ptr(), len) };
         let len = rx_buffer.len();
         unsafe { s.rx_buf.init(rx_buffer.as_mut_ptr(), len) };
 
-        // Configure
-        r.config.write(|w| {
-            w.hwfc().bit(hwfc);
-            w.parity().variant(config.parity);
-            w
-        });
-        r.baudrate.write(|w| w.baudrate().variant(config.baudrate));
-
         // clear errors
         let errors = r.errorsrc.read().bits();
         r.errorsrc.write(|w| unsafe { w.bits(errors) });
 
         r.events_rxstarted.reset();
-        r.events_txstarted.reset();
         r.events_error.reset();
         r.events_endrx.reset();
-        r.events_endtx.reset();
 
         // Enable interrupts
-        r.intenclr.write(|w| unsafe { w.bits(!0) });
         r.intenset.write(|w| {
             w.endtx().set();
             w.rxstarted().set();
@@ -376,10 +665,6 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
             w
         });
 
-        // Enable UARTE instance
-        apply_workaround_for_enable_anomaly(r);
-        r.enable.write(|w| w.enable().enabled());
-
         // Configure byte counter.
         let timer = Timer::new_counter(timer);
         timer.cc(1).write(rx_buffer.len() as u32 * 2);
@@ -401,9 +686,6 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
         ppi_ch2.disable();
         ppi_group.add_channel(&ppi_ch2);
 
-        U::Interrupt::pend();
-        unsafe { U::Interrupt::enable() };
-
         Self {
             _peri: peri,
             timer,
@@ -413,80 +695,24 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
         }
     }
 
-    fn pend_irq() {
-        U::Interrupt::pend()
-    }
-
-    /// Adjust the baud rate to the provided value.
-    pub fn set_baudrate(&mut self, baudrate: Baudrate) {
-        let r = U::regs();
-        r.baudrate.write(|w| w.baudrate().variant(baudrate));
-    }
-
-    /// Split the UART in reader and writer parts.
-    ///
-    /// This allows reading and writing concurrently from independent tasks.
-    pub fn split<'u>(&'u mut self) -> (BufferedUarteRx<'u, 'd, U, T>, BufferedUarteTx<'u, 'd, U, T>) {
-        (BufferedUarteRx { inner: self }, BufferedUarteTx { inner: self })
-    }
-
-    async fn inner_read(&self, buf: &mut [u8]) -> Result<usize, Error> {
-        let data = self.inner_fill_buf().await?;
+    /// Pull some bytes from this source into the specified buffer, returning how many bytes were read.
+    pub async fn read(&mut self, buf: &mut [u8]) -> Result<usize, Error> {
+        let data = self.fill_buf().await?;
         let n = data.len().min(buf.len());
         buf[..n].copy_from_slice(&data[..n]);
-        self.inner_consume(n);
+        self.consume(n);
         Ok(n)
     }
 
-    async fn inner_write<'a>(&'a self, buf: &'a [u8]) -> Result<usize, Error> {
-        poll_fn(move |cx| {
-            //trace!("poll_write: {:?}", buf.len());
-            let s = U::buffered_state();
-            let mut tx = unsafe { s.tx_buf.writer() };
-
-            let tx_buf = tx.push_slice();
-            if tx_buf.is_empty() {
-                //trace!("poll_write: pending");
-                s.tx_waker.register(cx.waker());
-                return Poll::Pending;
-            }
-
-            let n = min(tx_buf.len(), buf.len());
-            tx_buf[..n].copy_from_slice(&buf[..n]);
-            tx.push_done(n);
-
-            //trace!("poll_write: queued {:?}", n);
-
-            compiler_fence(Ordering::SeqCst);
-            Self::pend_irq();
-
-            Poll::Ready(Ok(n))
-        })
-        .await
-    }
-
-    async fn inner_flush<'a>(&'a self) -> Result<(), Error> {
-        poll_fn(move |cx| {
-            //trace!("poll_flush");
-            let s = U::buffered_state();
-            if !s.tx_buf.is_empty() {
-                //trace!("poll_flush: pending");
-                s.tx_waker.register(cx.waker());
-                return Poll::Pending;
-            }
-
-            Poll::Ready(Ok(()))
-        })
-        .await
-    }
-
-    async fn inner_fill_buf<'a>(&'a self) -> Result<&'a [u8], Error> {
+    /// Return the contents of the internal buffer, filling it with more data from the inner reader if it is empty.
+    pub async fn fill_buf(&mut self) -> Result<&[u8], Error> {
         poll_fn(move |cx| {
             compiler_fence(Ordering::SeqCst);
             //trace!("poll_read");
 
             let r = U::regs();
             let s = U::buffered_state();
+            let ss = U::state();
 
             // Read the RXDRDY counter.
             T::regs().tasks_capture[0].write(|w| unsafe { w.bits(1) });
@@ -510,7 +736,7 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
             let len = s.rx_buf.len();
             if start == end {
                 //trace!("  empty");
-                s.rx_waker.register(cx.waker());
+                ss.rx_waker.register(cx.waker());
                 r.intenset.write(|w| w.rxdrdy().set_bit());
                 return Poll::Pending;
             }
@@ -532,7 +758,8 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
         .await
     }
 
-    fn inner_consume(&self, amt: usize) {
+    /// Tell this buffer that `amt` bytes have been consumed from the buffer, so they should no longer be returned in calls to `fill_buf`.
+    pub fn consume(&mut self, amt: usize) {
         if amt == 0 {
             return;
         }
@@ -542,69 +769,31 @@ impl<'d, U: UarteInstance, T: TimerInstance> BufferedUarte<'d, U, T> {
         rx.pop_done(amt);
         U::regs().intenset.write(|w| w.rxstarted().set());
     }
-
-    /// Pull some bytes from this source into the specified buffer, returning how many bytes were read.
-    pub async fn read(&mut self, buf: &mut [u8]) -> Result<usize, Error> {
-        self.inner_read(buf).await
-    }
-
-    /// Return the contents of the internal buffer, filling it with more data from the inner reader if it is empty.
-    pub async fn fill_buf(&mut self) -> Result<&[u8], Error> {
-        self.inner_fill_buf().await
-    }
-
-    /// Tell this buffer that `amt` bytes have been consumed from the buffer, so they should no longer be returned in calls to `fill_buf`.
-    pub fn consume(&mut self, amt: usize) {
-        self.inner_consume(amt)
-    }
-
-    /// Write a buffer into this writer, returning how many bytes were written.
-    pub async fn write(&mut self, buf: &[u8]) -> Result<usize, Error> {
-        self.inner_write(buf).await
-    }
-
-    /// Flush this output stream, ensuring that all intermediately buffered contents reach their destination.
-    pub async fn flush(&mut self) -> Result<(), Error> {
-        self.inner_flush().await
-    }
 }
 
-/// Reader part of the buffered UARTE driver.
-pub struct BufferedUarteTx<'u, 'd, U: UarteInstance, T: TimerInstance> {
-    inner: &'u BufferedUarte<'d, U, T>,
-}
+impl<'a, U: UarteInstance, T: TimerInstance> Drop for BufferedUarteRx<'a, U, T> {
+    fn drop(&mut self) {
+        self._ppi_group.disable_all();
 
-impl<'u, 'd, U: UarteInstance, T: TimerInstance> BufferedUarteTx<'u, 'd, U, T> {
-    /// Write a buffer into this writer, returning how many bytes were written.
-    pub async fn write(&mut self, buf: &[u8]) -> Result<usize, Error> {
-        self.inner.inner_write(buf).await
-    }
+        let r = U::regs();
 
-    /// Flush this output stream, ensuring that all intermediately buffered contents reach their destination.
-    pub async fn flush(&mut self) -> Result<(), Error> {
-        self.inner.inner_flush().await
-    }
-}
+        self.timer.stop();
 
-/// Writer part of the buffered UARTE driver.
-pub struct BufferedUarteRx<'u, 'd, U: UarteInstance, T: TimerInstance> {
-    inner: &'u BufferedUarte<'d, U, T>,
-}
+        r.intenclr.write(|w| {
+            w.rxdrdy().set_bit();
+            w.rxstarted().set_bit();
+            w.rxto().set_bit();
+            w
+        });
+        r.events_rxto.reset();
+        r.tasks_stoprx.write(|w| unsafe { w.bits(1) });
+        while r.events_rxto.read().bits() == 0 {}
 
-impl<'u, 'd, U: UarteInstance, T: TimerInstance> BufferedUarteRx<'u, 'd, U, T> {
-    /// Pull some bytes from this source into the specified buffer, returning how many bytes were read.
-    pub async fn read(&mut self, buf: &mut [u8]) -> Result<usize, Error> {
-        self.inner.inner_read(buf).await
-    }
+        let s = U::buffered_state();
+        unsafe { s.rx_buf.deinit() }
 
-    /// Return the contents of the internal buffer, filling it with more data from the inner reader if it is empty.
-    pub async fn fill_buf(&mut self) -> Result<&[u8], Error> {
-        self.inner.inner_fill_buf().await
-    }
-
-    /// Tell this buffer that `amt` bytes have been consumed from the buffer, so they should no longer be returned in calls to `fill_buf`.
-    pub fn consume(&mut self, amt: usize) {
-        self.inner.inner_consume(amt)
+        let s = U::state();
+        drop_tx_rx(r, s);
     }
 }
 
@@ -621,95 +810,63 @@ mod _embedded_io {
         type Error = Error;
     }
 
-    impl<'u, 'd, U: UarteInstance, T: TimerInstance> embedded_io_async::ErrorType for BufferedUarteRx<'u, 'd, U, T> {
+    impl<'d, U: UarteInstance, T: TimerInstance> embedded_io_async::ErrorType for BufferedUarteRx<'d, U, T> {
         type Error = Error;
     }
 
-    impl<'u, 'd, U: UarteInstance, T: TimerInstance> embedded_io_async::ErrorType for BufferedUarteTx<'u, 'd, U, T> {
+    impl<'d, U: UarteInstance> embedded_io_async::ErrorType for BufferedUarteTx<'d, U> {
         type Error = Error;
     }
 
     impl<'d, U: UarteInstance, T: TimerInstance> embedded_io_async::Read for BufferedUarte<'d, U, T> {
         async fn read(&mut self, buf: &mut [u8]) -> Result<usize, Self::Error> {
-            self.inner_read(buf).await
+            self.read(buf).await
         }
     }
 
-    impl<'u, 'd: 'u, U: UarteInstance, T: TimerInstance> embedded_io_async::Read for BufferedUarteRx<'u, 'd, U, T> {
+    impl<'d: 'd, U: UarteInstance, T: TimerInstance> embedded_io_async::Read for BufferedUarteRx<'d, U, T> {
         async fn read(&mut self, buf: &mut [u8]) -> Result<usize, Self::Error> {
-            self.inner.inner_read(buf).await
+            self.read(buf).await
         }
     }
 
     impl<'d, U: UarteInstance, T: TimerInstance> embedded_io_async::BufRead for BufferedUarte<'d, U, T> {
         async fn fill_buf(&mut self) -> Result<&[u8], Self::Error> {
-            self.inner_fill_buf().await
+            self.fill_buf().await
         }
 
         fn consume(&mut self, amt: usize) {
-            self.inner_consume(amt)
+            self.consume(amt)
         }
     }
 
-    impl<'u, 'd: 'u, U: UarteInstance, T: TimerInstance> embedded_io_async::BufRead for BufferedUarteRx<'u, 'd, U, T> {
+    impl<'d: 'd, U: UarteInstance, T: TimerInstance> embedded_io_async::BufRead for BufferedUarteRx<'d, U, T> {
         async fn fill_buf(&mut self) -> Result<&[u8], Self::Error> {
-            self.inner.inner_fill_buf().await
+            self.fill_buf().await
         }
 
         fn consume(&mut self, amt: usize) {
-            self.inner.inner_consume(amt)
+            self.consume(amt)
         }
     }
 
     impl<'d, U: UarteInstance, T: TimerInstance> embedded_io_async::Write for BufferedUarte<'d, U, T> {
         async fn write(&mut self, buf: &[u8]) -> Result<usize, Self::Error> {
-            self.inner_write(buf).await
+            self.write(buf).await
         }
 
         async fn flush(&mut self) -> Result<(), Self::Error> {
-            self.inner_flush().await
+            self.flush().await
         }
     }
 
-    impl<'u, 'd: 'u, U: UarteInstance, T: TimerInstance> embedded_io_async::Write for BufferedUarteTx<'u, 'd, U, T> {
+    impl<'d: 'd, U: UarteInstance> embedded_io_async::Write for BufferedUarteTx<'d, U> {
         async fn write(&mut self, buf: &[u8]) -> Result<usize, Self::Error> {
-            self.inner.inner_write(buf).await
+            self.write(buf).await
         }
 
         async fn flush(&mut self) -> Result<(), Self::Error> {
-            self.inner.inner_flush().await
-        }
-    }
-}
-
-impl<'a, U: UarteInstance, T: TimerInstance> Drop for BufferedUarte<'a, U, T> {
-    fn drop(&mut self) {
-        self._ppi_group.disable_all();
-
-        let r = U::regs();
-
-        self.timer.stop();
-
-        r.inten.reset();
-        r.events_rxto.reset();
-        r.tasks_stoprx.write(|w| unsafe { w.bits(1) });
-        r.events_txstopped.reset();
-        r.tasks_stoptx.write(|w| unsafe { w.bits(1) });
-
-        while r.events_txstopped.read().bits() == 0 {}
-        while r.events_rxto.read().bits() == 0 {}
-
-        r.enable.write(|w| w.enable().disabled());
-
-        gpio::deconfigure_pin(r.psel.rxd.read().bits());
-        gpio::deconfigure_pin(r.psel.txd.read().bits());
-        gpio::deconfigure_pin(r.psel.rts.read().bits());
-        gpio::deconfigure_pin(r.psel.cts.read().bits());
-
-        let s = U::buffered_state();
-        unsafe {
-            s.rx_buf.deinit();
-            s.tx_buf.deinit();
+            self.flush().await
         }
     }
 }
diff --git a/embassy-nrf/src/uarte.rs b/embassy-nrf/src/uarte.rs
index 9e5b85dea..cbd5dccbc 100644
--- a/embassy-nrf/src/uarte.rs
+++ b/embassy-nrf/src/uarte.rs
@@ -115,7 +115,7 @@ impl<T: Instance> interrupt::typelevel::Handler<T::Interrupt> for InterruptHandl
         let endrx = r.events_endrx.read().bits();
         let error = r.events_error.read().bits();
         if endrx != 0 || error != 0 {
-            s.endrx_waker.wake();
+            s.rx_waker.wake();
             if endrx != 0 {
                 r.intenclr.write(|w| w.endrx().clear());
             }
@@ -124,7 +124,7 @@ impl<T: Instance> interrupt::typelevel::Handler<T::Interrupt> for InterruptHandl
             }
         }
         if r.events_endtx.read().bits() != 0 {
-            s.endtx_waker.wake();
+            s.tx_waker.wake();
             r.intenclr.write(|w| w.endtx().clear());
         }
     }
@@ -159,7 +159,7 @@ impl<'d, T: Instance> Uarte<'d, T> {
         txd: impl Peripheral<P = impl GpioPin> + 'd,
         config: Config,
     ) -> Self {
-        into_ref!(rxd, txd);
+        into_ref!(uarte, rxd, txd);
         Self::new_inner(uarte, rxd.map_into(), txd.map_into(), None, None, config)
     }
 
@@ -173,7 +173,7 @@ impl<'d, T: Instance> Uarte<'d, T> {
         rts: impl Peripheral<P = impl GpioPin> + 'd,
         config: Config,
     ) -> Self {
-        into_ref!(rxd, txd, cts, rts);
+        into_ref!(uarte, rxd, txd, cts, rts);
         Self::new_inner(
             uarte,
             rxd.map_into(),
@@ -185,17 +185,22 @@ impl<'d, T: Instance> Uarte<'d, T> {
     }
 
     fn new_inner(
-        uarte: impl Peripheral<P = T> + 'd,
+        uarte: PeripheralRef<'d, T>,
         rxd: PeripheralRef<'d, AnyPin>,
         txd: PeripheralRef<'d, AnyPin>,
         cts: Option<PeripheralRef<'d, AnyPin>>,
         rts: Option<PeripheralRef<'d, AnyPin>>,
         config: Config,
     ) -> Self {
-        into_ref!(uarte);
-
         let r = T::regs();
 
+        let hardware_flow_control = match (rts.is_some(), cts.is_some()) {
+            (false, false) => false,
+            (true, true) => true,
+            _ => panic!("RTS and CTS pins must be either both set or none set."),
+        };
+        configure(r, config, hardware_flow_control);
+
         rxd.conf().write(|w| w.input().connect().drive().h0h1());
         r.psel.rxd.write(|w| unsafe { w.bits(rxd.psel_bits()) });
 
@@ -217,13 +222,6 @@ impl<'d, T: Instance> Uarte<'d, T> {
         T::Interrupt::unpend();
         unsafe { T::Interrupt::enable() };
 
-        let hardware_flow_control = match (rts.is_some(), cts.is_some()) {
-            (false, false) => false,
-            (true, true) => true,
-            _ => panic!("RTS and CTS pins must be either both set or none set."),
-        };
-        configure(r, config, hardware_flow_control);
-
         let s = T::state();
         s.tx_rx_refcount.store(2, Ordering::Relaxed);
 
@@ -242,6 +240,14 @@ impl<'d, T: Instance> Uarte<'d, T> {
         (self.tx, self.rx)
     }
 
+    /// Split the UART in reader and writer parts, by reference.
+    ///
+    /// The returned halves borrow from `self`, so you can drop them and go back to using
+    /// the "un-split" `self`. This allows temporarily splitting the UART.
+    pub fn split_by_ref(&mut self) -> (&mut UarteTx<'d, T>, &mut UarteRx<'d, T>) {
+        (&mut self.tx, &mut self.rx)
+    }
+
     /// Split the Uarte into the transmitter and receiver with idle support parts.
     ///
     /// This is useful to concurrently transmit and receive from independent tasks.
@@ -291,7 +297,7 @@ impl<'d, T: Instance> Uarte<'d, T> {
     }
 }
 
-fn configure(r: &RegisterBlock, config: Config, hardware_flow_control: bool) {
+pub(crate) fn configure(r: &RegisterBlock, config: Config, hardware_flow_control: bool) {
     r.config.write(|w| {
         w.hwfc().bit(hardware_flow_control);
         w.parity().variant(config.parity);
@@ -307,6 +313,12 @@ fn configure(r: &RegisterBlock, config: Config, hardware_flow_control: bool) {
     r.events_rxstarted.reset();
     r.events_txstarted.reset();
 
+    // reset all pins
+    r.psel.txd.write(|w| w.connect().disconnected());
+    r.psel.rxd.write(|w| w.connect().disconnected());
+    r.psel.cts.write(|w| w.connect().disconnected());
+    r.psel.rts.write(|w| w.connect().disconnected());
+
     // Enable
     apply_workaround_for_enable_anomaly(r);
     r.enable.write(|w| w.enable().enabled());
@@ -320,7 +332,7 @@ impl<'d, T: Instance> UarteTx<'d, T> {
         txd: impl Peripheral<P = impl GpioPin> + 'd,
         config: Config,
     ) -> Self {
-        into_ref!(txd);
+        into_ref!(uarte, txd);
         Self::new_inner(uarte, txd.map_into(), None, config)
     }
 
@@ -332,20 +344,20 @@ impl<'d, T: Instance> UarteTx<'d, T> {
         cts: impl Peripheral<P = impl GpioPin> + 'd,
         config: Config,
     ) -> Self {
-        into_ref!(txd, cts);
+        into_ref!(uarte, txd, cts);
         Self::new_inner(uarte, txd.map_into(), Some(cts.map_into()), config)
     }
 
     fn new_inner(
-        uarte: impl Peripheral<P = T> + 'd,
+        uarte: PeripheralRef<'d, T>,
         txd: PeripheralRef<'d, AnyPin>,
         cts: Option<PeripheralRef<'d, AnyPin>>,
         config: Config,
     ) -> Self {
-        into_ref!(uarte);
-
         let r = T::regs();
 
+        configure(r, config, cts.is_some());
+
         txd.set_high();
         txd.conf().write(|w| w.dir().output().drive().s0s1());
         r.psel.txd.write(|w| unsafe { w.bits(txd.psel_bits()) });
@@ -355,12 +367,6 @@ impl<'d, T: Instance> UarteTx<'d, T> {
         }
         r.psel.cts.write(|w| unsafe { w.bits(cts.psel_bits()) });
 
-        r.psel.rxd.write(|w| w.connect().disconnected());
-        r.psel.rts.write(|w| w.connect().disconnected());
-
-        let hardware_flow_control = cts.is_some();
-        configure(r, config, hardware_flow_control);
-
         T::Interrupt::unpend();
         unsafe { T::Interrupt::enable() };
 
@@ -425,7 +431,7 @@ impl<'d, T: Instance> UarteTx<'d, T> {
         r.tasks_starttx.write(|w| unsafe { w.bits(1) });
 
         poll_fn(|cx| {
-            s.endtx_waker.register(cx.waker());
+            s.tx_waker.register(cx.waker());
             if r.events_endtx.read().bits() != 0 {
                 return Poll::Ready(());
             }
@@ -516,7 +522,7 @@ impl<'d, T: Instance> UarteRx<'d, T> {
         rxd: impl Peripheral<P = impl GpioPin> + 'd,
         config: Config,
     ) -> Self {
-        into_ref!(rxd);
+        into_ref!(uarte, rxd);
         Self::new_inner(uarte, rxd.map_into(), None, config)
     }
 
@@ -528,7 +534,7 @@ impl<'d, T: Instance> UarteRx<'d, T> {
         rts: impl Peripheral<P = impl GpioPin> + 'd,
         config: Config,
     ) -> Self {
-        into_ref!(rxd, rts);
+        into_ref!(uarte, rxd, rts);
         Self::new_inner(uarte, rxd.map_into(), Some(rts.map_into()), config)
     }
 
@@ -541,15 +547,15 @@ impl<'d, T: Instance> UarteRx<'d, T> {
     }
 
     fn new_inner(
-        uarte: impl Peripheral<P = T> + 'd,
+        uarte: PeripheralRef<'d, T>,
         rxd: PeripheralRef<'d, AnyPin>,
         rts: Option<PeripheralRef<'d, AnyPin>>,
         config: Config,
     ) -> Self {
-        into_ref!(uarte);
-
         let r = T::regs();
 
+        configure(r, config, rts.is_some());
+
         rxd.conf().write(|w| w.input().connect().drive().h0h1());
         r.psel.rxd.write(|w| unsafe { w.bits(rxd.psel_bits()) });
 
@@ -559,15 +565,9 @@ impl<'d, T: Instance> UarteRx<'d, T> {
         }
         r.psel.rts.write(|w| unsafe { w.bits(rts.psel_bits()) });
 
-        r.psel.txd.write(|w| w.connect().disconnected());
-        r.psel.cts.write(|w| w.connect().disconnected());
-
         T::Interrupt::unpend();
         unsafe { T::Interrupt::enable() };
 
-        let hardware_flow_control = rts.is_some();
-        configure(r, config, hardware_flow_control);
-
         let s = T::state();
         s.tx_rx_refcount.store(1, Ordering::Relaxed);
 
@@ -672,7 +672,7 @@ impl<'d, T: Instance> UarteRx<'d, T> {
         r.tasks_startrx.write(|w| unsafe { w.bits(1) });
 
         let result = poll_fn(|cx| {
-            s.endrx_waker.register(cx.waker());
+            s.rx_waker.register(cx.waker());
 
             if let Err(e) = self.check_and_clear_errors() {
                 r.tasks_stoprx.write(|w| unsafe { w.bits(1) });
@@ -819,7 +819,7 @@ impl<'d, T: Instance, U: TimerInstance> UarteRxWithIdle<'d, T, U> {
         r.tasks_startrx.write(|w| unsafe { w.bits(1) });
 
         let result = poll_fn(|cx| {
-            s.endrx_waker.register(cx.waker());
+            s.rx_waker.register(cx.waker());
 
             if let Err(e) = self.rx.check_and_clear_errors() {
                 r.tasks_stoprx.write(|w| unsafe { w.bits(1) });
@@ -962,15 +962,15 @@ pub(crate) mod sealed {
     use super::*;
 
     pub struct State {
-        pub endrx_waker: AtomicWaker,
-        pub endtx_waker: AtomicWaker,
+        pub rx_waker: AtomicWaker,
+        pub tx_waker: AtomicWaker,
         pub tx_rx_refcount: AtomicU8,
     }
     impl State {
         pub const fn new() -> Self {
             Self {
-                endrx_waker: AtomicWaker::new(),
-                endtx_waker: AtomicWaker::new(),
+                rx_waker: AtomicWaker::new(),
+                tx_waker: AtomicWaker::new(),
                 tx_rx_refcount: AtomicU8::new(0),
             }
         }
diff --git a/tests/nrf52840/src/bin/buffered_uart.rs b/tests/nrf52840/src/bin/buffered_uart.rs
index 354d787b4..a01d66d85 100644
--- a/tests/nrf52840/src/bin/buffered_uart.rs
+++ b/tests/nrf52840/src/bin/buffered_uart.rs
@@ -15,7 +15,7 @@ bind_interrupts!(struct Irqs {
 
 #[embassy_executor::main]
 async fn main(_spawner: Spawner) {
-    let p = embassy_nrf::init(Default::default());
+    let mut p = embassy_nrf::init(Default::default());
     let mut config = uarte::Config::default();
     config.parity = uarte::Parity::EXCLUDED;
     config.baudrate = uarte::Baudrate::BAUD1M;
@@ -23,55 +23,58 @@ async fn main(_spawner: Spawner) {
     let mut tx_buffer = [0u8; 1024];
     let mut rx_buffer = [0u8; 1024];
 
-    let mut u = BufferedUarte::new(
-        p.UARTE0,
-        p.TIMER0,
-        p.PPI_CH0,
-        p.PPI_CH1,
-        p.PPI_GROUP0,
-        Irqs,
-        p.P1_03,
-        p.P1_02,
-        config.clone(),
-        &mut rx_buffer,
-        &mut tx_buffer,
-    );
+    // test teardown + recreate of the buffereduarte works fine.
+    for _ in 0..2 {
+        let u = BufferedUarte::new(
+            &mut p.UARTE0,
+            &mut p.TIMER0,
+            &mut p.PPI_CH0,
+            &mut p.PPI_CH1,
+            &mut p.PPI_GROUP0,
+            Irqs,
+            &mut p.P1_03,
+            &mut p.P1_02,
+            config.clone(),
+            &mut rx_buffer,
+            &mut tx_buffer,
+        );
 
-    info!("uarte initialized!");
+        info!("uarte initialized!");
 
-    let (mut rx, mut tx) = u.split();
+        let (mut rx, mut tx) = u.split();
 
-    const COUNT: usize = 40_000;
+        const COUNT: usize = 40_000;
 
-    let tx_fut = async {
-        let mut tx_buf = [0; 215];
-        let mut i = 0;
-        while i < COUNT {
-            let n = tx_buf.len().min(COUNT - i);
-            let tx_buf = &mut tx_buf[..n];
-            for (j, b) in tx_buf.iter_mut().enumerate() {
-                *b = (i + j) as u8;
+        let tx_fut = async {
+            let mut tx_buf = [0; 215];
+            let mut i = 0;
+            while i < COUNT {
+                let n = tx_buf.len().min(COUNT - i);
+                let tx_buf = &mut tx_buf[..n];
+                for (j, b) in tx_buf.iter_mut().enumerate() {
+                    *b = (i + j) as u8;
+                }
+                let n = unwrap!(tx.write(tx_buf).await);
+                i += n;
             }
-            let n = unwrap!(tx.write(tx_buf).await);
-            i += n;
-        }
-    };
-    let rx_fut = async {
-        let mut i = 0;
-        while i < COUNT {
-            let buf = unwrap!(rx.fill_buf().await);
+        };
+        let rx_fut = async {
+            let mut i = 0;
+            while i < COUNT {
+                let buf = unwrap!(rx.fill_buf().await);
 
-            for &b in buf {
-                assert_eq!(b, i as u8);
-                i = i + 1;
+                for &b in buf {
+                    assert_eq!(b, i as u8);
+                    i = i + 1;
+                }
+
+                let n = buf.len();
+                rx.consume(n);
             }
+        };
 
-            let n = buf.len();
-            rx.consume(n);
-        }
-    };
-
-    join(rx_fut, tx_fut).await;
+        join(rx_fut, tx_fut).await;
+    }
 
     info!("Test OK");
     cortex_m::asm::bkpt();
diff --git a/tests/nrf52840/src/bin/buffered_uart_full.rs b/tests/nrf52840/src/bin/buffered_uart_full.rs
index e59c75ba9..62edaed25 100644
--- a/tests/nrf52840/src/bin/buffered_uart_full.rs
+++ b/tests/nrf52840/src/bin/buffered_uart_full.rs
@@ -23,7 +23,7 @@ async fn main(_spawner: Spawner) {
     let mut tx_buffer = [0u8; 1024];
     let mut rx_buffer = [0u8; 1024];
 
-    let mut u = BufferedUarte::new(
+    let u = BufferedUarte::new(
         p.UARTE0,
         p.TIMER0,
         p.PPI_CH0,
diff --git a/tests/nrf52840/src/bin/buffered_uart_halves.rs b/tests/nrf52840/src/bin/buffered_uart_halves.rs
new file mode 100644
index 000000000..54a9fef5b
--- /dev/null
+++ b/tests/nrf52840/src/bin/buffered_uart_halves.rs
@@ -0,0 +1,82 @@
+#![no_std]
+#![no_main]
+teleprobe_meta::target!(b"nrf52840-dk");
+
+use defmt::{assert_eq, *};
+use embassy_executor::Spawner;
+use embassy_futures::join::join;
+use embassy_nrf::buffered_uarte::{self, BufferedUarteRx, BufferedUarteTx};
+use embassy_nrf::{bind_interrupts, peripherals, uarte};
+use {defmt_rtt as _, panic_probe as _};
+
+bind_interrupts!(struct Irqs {
+    UARTE0_UART0 => buffered_uarte::InterruptHandler<peripherals::UARTE0>;
+    UARTE1 => buffered_uarte::InterruptHandler<peripherals::UARTE1>;
+});
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let mut p = embassy_nrf::init(Default::default());
+    let mut config = uarte::Config::default();
+    config.parity = uarte::Parity::EXCLUDED;
+    config.baudrate = uarte::Baudrate::BAUD1M;
+
+    let mut tx_buffer = [0u8; 1024];
+    let mut rx_buffer = [0u8; 1024];
+
+    // test teardown + recreate of the buffereduarte works fine.
+    for _ in 0..2 {
+        const COUNT: usize = 40_000;
+
+        let mut tx = BufferedUarteTx::new(&mut p.UARTE1, Irqs, &mut p.P1_02, config.clone(), &mut tx_buffer);
+
+        let mut rx = BufferedUarteRx::new(
+            &mut p.UARTE0,
+            &mut p.TIMER0,
+            &mut p.PPI_CH0,
+            &mut p.PPI_CH1,
+            &mut p.PPI_GROUP0,
+            Irqs,
+            &mut p.P1_03,
+            config.clone(),
+            &mut rx_buffer,
+        );
+
+        let tx_fut = async {
+            info!("tx initialized!");
+
+            let mut tx_buf = [0; 215];
+            let mut i = 0;
+            while i < COUNT {
+                let n = tx_buf.len().min(COUNT - i);
+                let tx_buf = &mut tx_buf[..n];
+                for (j, b) in tx_buf.iter_mut().enumerate() {
+                    *b = (i + j) as u8;
+                }
+                let n = unwrap!(tx.write(tx_buf).await);
+                i += n;
+            }
+        };
+        let rx_fut = async {
+            info!("rx initialized!");
+
+            let mut i = 0;
+            while i < COUNT {
+                let buf = unwrap!(rx.fill_buf().await);
+
+                for &b in buf {
+                    assert_eq!(b, i as u8);
+                    i = i + 1;
+                }
+
+                let n = buf.len();
+                rx.consume(n);
+            }
+        };
+
+        join(rx_fut, tx_fut).await;
+    }
+
+    info!("Test OK");
+    cortex_m::asm::bkpt();
+}