diff --git a/embassy-extras/src/peripheral.rs b/embassy-extras/src/peripheral.rs
index e2435d63f..68972c543 100644
--- a/embassy-extras/src/peripheral.rs
+++ b/embassy-extras/src/peripheral.rs
@@ -1,7 +1,6 @@
 use core::cell::UnsafeCell;
 use core::marker::{PhantomData, PhantomPinned};
 use core::pin::Pin;
-use core::sync::atomic::{compiler_fence, Ordering};
 
 use embassy::interrupt::{Interrupt, InterruptExt};
 
@@ -39,8 +38,6 @@ impl<S: PeripheralState> PeripheralMutex<S> {
         }
 
         this.irq.disable();
-        compiler_fence(Ordering::SeqCst);
-
         this.irq.set_handler(|p| {
             // Safety: it's OK to get a &mut to the state, since
             // - We're in the IRQ, no one else can't preempt us
@@ -50,8 +47,6 @@ impl<S: PeripheralState> PeripheralMutex<S> {
         });
         this.irq
             .set_handler_context((&mut this.state) as *mut _ as *mut ());
-
-        compiler_fence(Ordering::SeqCst);
         this.irq.enable();
 
         this.irq_setup_done = true;
@@ -61,14 +56,11 @@ impl<S: PeripheralState> PeripheralMutex<S> {
         let this = unsafe { self.get_unchecked_mut() };
 
         this.irq.disable();
-        compiler_fence(Ordering::SeqCst);
 
         // Safety: it's OK to get a &mut to the state, since the irq is disabled.
         let state = unsafe { &mut *this.state.get() };
-
         let r = f(state, &mut this.irq);
 
-        compiler_fence(Ordering::SeqCst);
         this.irq.enable();
 
         r
diff --git a/embassy-extras/src/peripheral_shared.rs b/embassy-extras/src/peripheral_shared.rs
index 73906698e..c62113396 100644
--- a/embassy-extras/src/peripheral_shared.rs
+++ b/embassy-extras/src/peripheral_shared.rs
@@ -1,7 +1,6 @@
 use core::cell::UnsafeCell;
 use core::marker::{PhantomData, PhantomPinned};
 use core::pin::Pin;
-use core::sync::atomic::{compiler_fence, Ordering};
 
 use embassy::interrupt::{Interrupt, InterruptExt};
 
@@ -39,16 +38,12 @@ impl<S: PeripheralState> Peripheral<S> {
         }
 
         this.irq.disable();
-        compiler_fence(Ordering::SeqCst);
-
         this.irq.set_handler(|p| {
             let state = unsafe { &*(p as *const S) };
             state.on_interrupt();
         });
         this.irq
             .set_handler_context((&this.state) as *const _ as *mut ());
-
-        compiler_fence(Ordering::SeqCst);
         this.irq.enable();
 
         this.irq_setup_done = true;
diff --git a/embassy-macros/src/lib.rs b/embassy-macros/src/lib.rs
index bcf9dd4ed..a14c374fa 100644
--- a/embassy-macros/src/lib.rs
+++ b/embassy-macros/src/lib.rs
@@ -172,12 +172,10 @@ pub fn interrupt_take(item: TokenStream) -> TokenStream {
                     static HANDLER: ::embassy::interrupt::Handler;
                 }
 
-                let func = HANDLER.func.load(::embassy::export::atomic::Ordering::Acquire);
-                let ctx = HANDLER.ctx.load(::embassy::export::atomic::Ordering::Acquire);
-                if !func.is_null() {
-                    let func: fn(*mut ()) = ::core::mem::transmute(func);
-                    func(ctx)
-                }
+                let func = HANDLER.func.load(::embassy::export::atomic::Ordering::Relaxed);
+                let ctx = HANDLER.ctx.load(::embassy::export::atomic::Ordering::Relaxed);
+                let func: fn(*mut ()) = ::core::mem::transmute(func);
+                func(ctx)
             }
 
             static TAKEN: ::embassy::export::atomic::AtomicBool = ::embassy::export::atomic::AtomicBool::new(false);
diff --git a/embassy/src/executor/raw.rs b/embassy/src/executor/raw.rs
index 7e981b084..52512c533 100644
--- a/embassy/src/executor/raw.rs
+++ b/embassy/src/executor/raw.rs
@@ -4,9 +4,9 @@ use core::cmp::min;
 use core::future::Future;
 use core::marker::PhantomData;
 use core::pin::Pin;
-use core::ptr;
 use core::ptr::NonNull;
 use core::task::{Context, Poll, Waker};
+use core::{mem, ptr};
 
 use super::run_queue::{RunQueue, RunQueueItem};
 use super::timer_queue::{TimerQueue, TimerQueueItem};
@@ -143,6 +143,10 @@ impl<F: Future + 'static> Task<F> {
             }
             Poll::Pending => {}
         }
+
+        // the compiler is emitting a virtual call for waker drop, but we know
+        // it's a noop for our waker.
+        mem::forget(waker);
     }
 }
 
diff --git a/embassy/src/interrupt.rs b/embassy/src/interrupt.rs
index a4285a9fe..99d7af753 100644
--- a/embassy/src/interrupt.rs
+++ b/embassy/src/interrupt.rs
@@ -1,7 +1,7 @@
 use core::ptr;
 use cortex_m::peripheral::NVIC;
 
-use atomic_polyfill::{AtomicPtr, Ordering};
+use atomic_polyfill::{compiler_fence, AtomicPtr, Ordering};
 
 pub use embassy_macros::interrupt_declare as declare;
 pub use embassy_macros::interrupt_take as take;
@@ -58,22 +58,27 @@ pub trait InterruptExt: Interrupt {
 
 impl<T: Interrupt + ?Sized> InterruptExt for T {
     fn set_handler(&self, func: unsafe fn(*mut ())) {
+        compiler_fence(Ordering::SeqCst);
         let handler = unsafe { self.__handler() };
-        handler.func.store(func as *mut (), Ordering::Release);
+        handler.func.store(func as *mut (), Ordering::Relaxed);
+        compiler_fence(Ordering::SeqCst);
     }
 
     fn remove_handler(&self) {
+        compiler_fence(Ordering::SeqCst);
         let handler = unsafe { self.__handler() };
-        handler.func.store(ptr::null_mut(), Ordering::Release);
+        handler.func.store(ptr::null_mut(), Ordering::Relaxed);
+        compiler_fence(Ordering::SeqCst);
     }
 
     fn set_handler_context(&self, ctx: *mut ()) {
         let handler = unsafe { self.__handler() };
-        handler.ctx.store(ctx, Ordering::Release);
+        handler.ctx.store(ctx, Ordering::Relaxed);
     }
 
     #[inline]
     fn enable(&self) {
+        compiler_fence(Ordering::SeqCst);
         unsafe {
             NVIC::unmask(NrWrap(self.number()));
         }
@@ -82,6 +87,7 @@ impl<T: Interrupt + ?Sized> InterruptExt for T {
     #[inline]
     fn disable(&self) {
         NVIC::mask(NrWrap(self.number()));
+        compiler_fence(Ordering::SeqCst);
     }
 
     #[inline]
diff --git a/embassy/src/util/forever.rs b/embassy/src/util/forever.rs
index efa96f30e..0432fa51e 100644
--- a/embassy/src/util/forever.rs
+++ b/embassy/src/util/forever.rs
@@ -31,6 +31,7 @@ unsafe impl<T> Send for Forever<T> {}
 unsafe impl<T> Sync for Forever<T> {}
 
 impl<T> Forever<T> {
+    #[inline(always)]
     pub const fn new() -> Self {
         Self {
             used: AtomicBool::new(false),
@@ -43,10 +44,11 @@ impl<T> Forever<T> {
     /// Panics if this `Forever` already has a value.
     ///
     /// Returns a mutable reference to the stored value.
+    #[inline(always)]
     pub fn put(&'static self, val: T) -> &'static mut T {
         if self
             .used
-            .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
+            .compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
             .is_err()
         {
             panic!("Forever.put() called multiple times");
@@ -60,6 +62,25 @@ impl<T> Forever<T> {
         }
     }
 
+    #[inline(always)]
+    pub fn put_with(&'static self, val: impl FnOnce() -> T) -> &'static mut T {
+        if self
+            .used
+            .compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
+            .is_err()
+        {
+            panic!("Forever.put() called multiple times");
+        }
+
+        unsafe {
+            let p = self.t.get();
+            let p = (&mut *p).as_mut_ptr();
+            p.write(val());
+            &mut *p
+        }
+    }
+
+    #[inline(always)]
     pub unsafe fn steal(&'static self) -> &'static mut T {
         let p = self.t.get();
         let p = (&mut *p).as_mut_ptr();