From cd9a65ba3969460cd7ecb9200f12a828c26205aa Mon Sep 17 00:00:00 2001
From: Dario Nieuwenhuis <dirbaio@dirbaio.net>
Date: Fri, 23 Dec 2022 20:45:51 +0100
Subject: [PATCH 1/2] stm32/usb: use separate irq flags.

- Fixes race condition that could cause losing irqs (because `if flags != 0` was clearing all)
- Doesn't need CAS, which is nice for thumbv6.
---
 embassy-stm32/src/usb/usb.rs | 66 ++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/embassy-stm32/src/usb/usb.rs b/embassy-stm32/src/usb/usb.rs
index 460abfe28..cba4915c1 100644
--- a/embassy-stm32/src/usb/usb.rs
+++ b/embassy-stm32/src/usb/usb.rs
@@ -2,10 +2,9 @@
 
 use core::future::poll_fn;
 use core::marker::PhantomData;
-use core::sync::atomic::Ordering;
+use core::sync::atomic::{AtomicBool, Ordering};
 use core::task::Poll;
 
-use atomic_polyfill::{AtomicBool, AtomicU8};
 use embassy_hal_common::into_ref;
 use embassy_sync::waitqueue::AtomicWaker;
 use embassy_time::{block_for, Duration};
@@ -35,10 +34,9 @@ static BUS_WAKER: AtomicWaker = NEW_AW;
 static EP0_SETUP: AtomicBool = AtomicBool::new(false);
 static EP_IN_WAKERS: [AtomicWaker; EP_COUNT] = [NEW_AW; EP_COUNT];
 static EP_OUT_WAKERS: [AtomicWaker; EP_COUNT] = [NEW_AW; EP_COUNT];
-static IRQ_FLAGS: AtomicU8 = AtomicU8::new(0);
-const IRQ_FLAG_RESET: u8 = 0x01;
-const IRQ_FLAG_SUSPEND: u8 = 0x02;
-const IRQ_FLAG_RESUME: u8 = 0x04;
+static IRQ_RESET: AtomicBool = AtomicBool::new(false);
+static IRQ_SUSPEND: AtomicBool = AtomicBool::new(false);
+static IRQ_RESUME: AtomicBool = AtomicBool::new(false);
 
 fn convert_type(t: EndpointType) -> EpType {
     match t {
@@ -184,47 +182,51 @@ impl<'d, T: Instance> Driver<'d, T> {
 
             let istr = regs.istr().read();
 
-            let mut flags: u8 = 0;
-
             if istr.susp() {
                 //trace!("USB IRQ: susp");
-                flags |= IRQ_FLAG_SUSPEND;
+                IRQ_SUSPEND.store(true, Ordering::Relaxed);
                 regs.cntr().modify(|w| {
                     w.set_fsusp(true);
                     w.set_lpmode(true);
-                })
+                });
+
+                // Write 0 to clear.
+                let mut clear = regs::Istr(!0);
+                clear.set_susp(false);
+                regs.istr().write_value(clear);
+
+                // Wake main thread.
+                BUS_WAKER.wake();
             }
 
             if istr.wkup() {
                 //trace!("USB IRQ: wkup");
-                flags |= IRQ_FLAG_RESUME;
+                IRQ_RESUME.store(true, Ordering::Relaxed);
                 regs.cntr().modify(|w| {
                     w.set_fsusp(false);
                     w.set_lpmode(false);
-                })
+                });
+
+                // Write 0 to clear.
+                let mut clear = regs::Istr(!0);
+                clear.set_wkup(false);
+                regs.istr().write_value(clear);
+
+                // Wake main thread.
+                BUS_WAKER.wake();
             }
 
             if istr.reset() {
                 //trace!("USB IRQ: reset");
-                flags |= IRQ_FLAG_RESET;
+                IRQ_RESET.store(true, Ordering::Relaxed);
 
                 // Write 0 to clear.
                 let mut clear = regs::Istr(!0);
                 clear.set_reset(false);
                 regs.istr().write_value(clear);
-            }
 
-            if flags != 0 {
-                // Send irqs to main thread.
-                IRQ_FLAGS.fetch_or(flags, Ordering::AcqRel);
+                // Wake main thread.
                 BUS_WAKER.wake();
-
-                // Clear them
-                let mut mask = regs::Istr(0);
-                mask.set_wkup(true);
-                mask.set_susp(true);
-                mask.set_reset(true);
-                regs.istr().write_value(regs::Istr(!(istr.0 & mask.0)));
             }
 
             if istr.ctr() {
@@ -436,17 +438,15 @@ impl<'d, T: Instance> driver::Bus for Bus<'d, T> {
             if self.inited {
                 let regs = T::regs();
 
-                let flags = IRQ_FLAGS.load(Ordering::Acquire);
-
-                if flags & IRQ_FLAG_RESUME != 0 {
-                    IRQ_FLAGS.fetch_and(!IRQ_FLAG_RESUME, Ordering::AcqRel);
+                if IRQ_RESUME.load(Ordering::Acquire) {
+                    IRQ_RESUME.store(false, Ordering::Relaxed);
                     return Poll::Ready(Event::Resume);
                 }
 
-                if flags & IRQ_FLAG_RESET != 0 {
-                    IRQ_FLAGS.fetch_and(!IRQ_FLAG_RESET, Ordering::AcqRel);
+                if IRQ_RESET.load(Ordering::Acquire) {
+                    IRQ_RESET.store(false, Ordering::Relaxed);
 
-                    trace!("RESET REGS WRITINGINGING");
+                    trace!("RESET");
                     regs.daddr().write(|w| {
                         w.set_ef(true);
                         w.set_add(0);
@@ -475,8 +475,8 @@ impl<'d, T: Instance> driver::Bus for Bus<'d, T> {
                     return Poll::Ready(Event::Reset);
                 }
 
-                if flags & IRQ_FLAG_SUSPEND != 0 {
-                    IRQ_FLAGS.fetch_and(!IRQ_FLAG_SUSPEND, Ordering::AcqRel);
+                if IRQ_SUSPEND.load(Ordering::Acquire) {
+                    IRQ_SUSPEND.store(false, Ordering::Relaxed);
                     return Poll::Ready(Event::Suspend);
                 }
 

From 10c9cc31b14a356e58833fd6c81456251ab3fce9 Mon Sep 17 00:00:00 2001
From: Dario Nieuwenhuis <dirbaio@dirbaio.net>
Date: Fri, 23 Dec 2022 20:46:49 +0100
Subject: [PATCH 2/2] Remove unnecessary use of atomic-polyfill.

Only use it when CAS is actually needed.
---
 .vscode/settings.json                | 11 ++++++-----
 embassy-executor/src/arch/riscv32.rs |  3 +--
 embassy-executor/src/arch/xtensa.rs  |  3 +--
 embassy-net/Cargo.toml               |  1 -
 embassy-net/src/tcp.rs               |  2 +-
 embassy-rp/src/multicore.rs          |  4 +---
 embassy-rp/src/usb.rs                |  3 +--
 embassy-stm32/src/adc/v4.rs          |  3 ++-
 embassy-stm32/src/flash/f4.rs        |  3 +--
 embassy-stm32/src/flash/f7.rs        |  3 +--
 embassy-stm32/src/flash/h7.rs        |  4 ++--
 embassy-stm32/src/i2c/v2.rs          |  5 +++--
 embassy-stm32/src/usart/buffered.rs  |  2 +-
 embassy-stm32/src/usart/mod.rs       |  2 +-
 embassy-sync/Cargo.toml              |  1 -
 15 files changed, 22 insertions(+), 28 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 62e5a362f..fd2e4afb1 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -4,7 +4,8 @@
   "rust-analyzer.checkOnSave.noDefaultFeatures": true,
   "rust-analyzer.cargo.noDefaultFeatures": true,
   "rust-analyzer.procMacro.enable": true,
-  "rust-analyzer.cargo.target": "thumbv7em-none-eabi",
+  //"rust-analyzer.cargo.target": "thumbv7em-none-eabi",
+  "rust-analyzer.cargo.target": "thumbv8m.main-none-eabihf",
   "rust-analyzer.cargo.features": [
     // These are needed to prevent embassy-net from failing to build
     //"embassy-net/medium-ethernet",
@@ -12,19 +13,19 @@
     //"embassy-net/pool-16",
     //"time-tick-16mhz",
     //"defmt-timestamp-uptime",
-    "nightly",
+    //"nightly",
     //"unstable-traits",
   ],
   "rust-analyzer.linkedProjects": [
     // Declare for the target you wish to develop
     //"embassy-executor/Cargo.toml",
     //"embassy-sync/Cargo.toml",
-    "examples/nrf/Cargo.toml",
+    //"examples/nrf/Cargo.toml",
     // "examples/nrf-rtos-trace/Cargo.toml",
     // "examples/rp/Cargo.toml",
     // "examples/std/Cargo.toml",
     // "examples/stm32f0/Cargo.toml",
-    // "examples/stm32f1/Cargo.toml",
+    //"examples/stm32f1/Cargo.toml",
     // "examples/stm32f2/Cargo.toml",
     // "examples/stm32f3/Cargo.toml",
     // "examples/stm32f4/Cargo.toml",
@@ -35,7 +36,7 @@
     // "examples/stm32l0/Cargo.toml",
     // "examples/stm32l1/Cargo.toml",
     // "examples/stm32l4/Cargo.toml",
-    // "examples/stm32l5/Cargo.toml",
+    "examples/stm32l5/Cargo.toml",
     // "examples/stm32u5/Cargo.toml",
     // "examples/stm32wb/Cargo.toml",
     // "examples/stm32wb55/Cargo.toml",
diff --git a/embassy-executor/src/arch/riscv32.rs b/embassy-executor/src/arch/riscv32.rs
index 2a4b006da..e97a56cda 100644
--- a/embassy-executor/src/arch/riscv32.rs
+++ b/embassy-executor/src/arch/riscv32.rs
@@ -1,7 +1,6 @@
 use core::marker::PhantomData;
 use core::ptr;
-
-use atomic_polyfill::{AtomicBool, Ordering};
+use core::sync::atomic::{AtomicBool, Ordering};
 
 use super::{raw, Spawner};
 
diff --git a/embassy-executor/src/arch/xtensa.rs b/embassy-executor/src/arch/xtensa.rs
index f908aaa70..4ee0d9f78 100644
--- a/embassy-executor/src/arch/xtensa.rs
+++ b/embassy-executor/src/arch/xtensa.rs
@@ -1,7 +1,6 @@
 use core::marker::PhantomData;
 use core::ptr;
-
-use atomic_polyfill::{AtomicBool, Ordering};
+use core::sync::atomic::{AtomicBool, Ordering};
 
 use super::{raw, Spawner};
 
diff --git a/embassy-net/Cargo.toml b/embassy-net/Cargo.toml
index ac338843d..2d853762e 100644
--- a/embassy-net/Cargo.toml
+++ b/embassy-net/Cargo.toml
@@ -51,7 +51,6 @@ generic-array = { version = "0.14.4", default-features = false }
 stable_deref_trait = { version = "1.2.0", default-features = false }
 futures = { version = "0.3.17", default-features = false, features = [ "async-await" ] }
 atomic-pool = "1.0"
-atomic-polyfill = "1.0.1"
 embedded-nal-async = { version = "0.3.0", optional = true }
 
 [dependencies.smoltcp]
diff --git a/embassy-net/src/tcp.rs b/embassy-net/src/tcp.rs
index 0dc8da73a..55cbda455 100644
--- a/embassy-net/src/tcp.rs
+++ b/embassy-net/src/tcp.rs
@@ -329,8 +329,8 @@ pub mod client {
     use core::cell::UnsafeCell;
     use core::mem::MaybeUninit;
     use core::ptr::NonNull;
+    use core::sync::atomic::{AtomicBool, Ordering};
 
-    use atomic_polyfill::{AtomicBool, Ordering};
     use embedded_nal_async::IpAddr;
 
     use super::*;
diff --git a/embassy-rp/src/multicore.rs b/embassy-rp/src/multicore.rs
index 8290c62a7..e51fc218a 100644
--- a/embassy-rp/src/multicore.rs
+++ b/embassy-rp/src/multicore.rs
@@ -29,9 +29,7 @@
 //! ```
 
 use core::mem::ManuallyDrop;
-use core::sync::atomic::{compiler_fence, Ordering};
-
-use atomic_polyfill::AtomicBool;
+use core::sync::atomic::{compiler_fence, AtomicBool, Ordering};
 
 use crate::interrupt::{Interrupt, InterruptExt};
 use crate::peripherals::CORE1;
diff --git a/embassy-rp/src/usb.rs b/embassy-rp/src/usb.rs
index dfc2e9da6..8361736a9 100644
--- a/embassy-rp/src/usb.rs
+++ b/embassy-rp/src/usb.rs
@@ -1,10 +1,9 @@
 use core::future::poll_fn;
 use core::marker::PhantomData;
 use core::slice;
-use core::sync::atomic::Ordering;
+use core::sync::atomic::{compiler_fence, Ordering};
 use core::task::Poll;
 
-use atomic_polyfill::compiler_fence;
 use embassy_hal_common::into_ref;
 use embassy_sync::waitqueue::AtomicWaker;
 use embassy_usb_driver as driver;
diff --git a/embassy-stm32/src/adc/v4.rs b/embassy-stm32/src/adc/v4.rs
index f5aa0e63d..4707b7c95 100644
--- a/embassy-stm32/src/adc/v4.rs
+++ b/embassy-stm32/src/adc/v4.rs
@@ -1,4 +1,5 @@
-use atomic_polyfill::{AtomicU8, Ordering};
+use core::sync::atomic::{AtomicU8, Ordering};
+
 use embedded_hal_02::blocking::delay::DelayUs;
 use pac::adc::vals::{Adcaldif, Boost, Difsel, Exten, Pcsel};
 use pac::adccommon::vals::Presc;
diff --git a/embassy-stm32/src/flash/f4.rs b/embassy-stm32/src/flash/f4.rs
index b8327ce4e..9e23a8adf 100644
--- a/embassy-stm32/src/flash/f4.rs
+++ b/embassy-stm32/src/flash/f4.rs
@@ -1,7 +1,6 @@
 use core::convert::TryInto;
 use core::ptr::write_volatile;
-
-use atomic_polyfill::{fence, Ordering};
+use core::sync::atomic::{fence, Ordering};
 
 use super::{ERASE_SIZE, FLASH_BASE, FLASH_SIZE};
 use crate::flash::Error;
diff --git a/embassy-stm32/src/flash/f7.rs b/embassy-stm32/src/flash/f7.rs
index 6d47b78a0..dd0d8439d 100644
--- a/embassy-stm32/src/flash/f7.rs
+++ b/embassy-stm32/src/flash/f7.rs
@@ -1,7 +1,6 @@
 use core::convert::TryInto;
 use core::ptr::write_volatile;
-
-use atomic_polyfill::{fence, Ordering};
+use core::sync::atomic::{fence, Ordering};
 
 use crate::flash::Error;
 use crate::pac;
diff --git a/embassy-stm32/src/flash/h7.rs b/embassy-stm32/src/flash/h7.rs
index 3178b3be9..7de95ac11 100644
--- a/embassy-stm32/src/flash/h7.rs
+++ b/embassy-stm32/src/flash/h7.rs
@@ -41,7 +41,7 @@ pub(crate) unsafe fn blocking_write(offset: u32, buf: &[u8]) -> Result<(), Error
 
     cortex_m::asm::isb();
     cortex_m::asm::dsb();
-    atomic_polyfill::fence(atomic_polyfill::Ordering::SeqCst);
+    core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst);
 
     let ret = {
         let mut ret: Result<(), Error> = Ok(());
@@ -70,7 +70,7 @@ pub(crate) unsafe fn blocking_write(offset: u32, buf: &[u8]) -> Result<(), Error
 
     cortex_m::asm::isb();
     cortex_m::asm::dsb();
-    atomic_polyfill::fence(atomic_polyfill::Ordering::SeqCst);
+    core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst);
 
     ret
 }
diff --git a/embassy-stm32/src/i2c/v2.rs b/embassy-stm32/src/i2c/v2.rs
index 47dc7d2a4..4622635bf 100644
--- a/embassy-stm32/src/i2c/v2.rs
+++ b/embassy-stm32/src/i2c/v2.rs
@@ -1,8 +1,8 @@
 use core::cmp;
 use core::future::poll_fn;
+use core::sync::atomic::{AtomicUsize, Ordering};
 use core::task::Poll;
 
-use atomic_polyfill::{AtomicUsize, Ordering};
 use embassy_embedded_hal::SetConfig;
 use embassy_hal_common::drop::OnDrop;
 use embassy_hal_common::{into_ref, PeripheralRef};
@@ -131,7 +131,8 @@ impl<'d, T: Instance, TXDMA, RXDMA> I2c<'d, T, TXDMA, RXDMA> {
 
         if isr.tcr() || isr.tc() {
             let state = T::state();
-            state.chunks_transferred.fetch_add(1, Ordering::Relaxed);
+            let transferred = state.chunks_transferred.load(Ordering::Relaxed);
+            state.chunks_transferred.store(transferred + 1, Ordering::Relaxed);
             state.waker.wake();
         }
         // The flag can only be cleared by writting to nbytes, we won't do that here, so disable
diff --git a/embassy-stm32/src/usart/buffered.rs b/embassy-stm32/src/usart/buffered.rs
index 874af1d73..a27fcc1ca 100644
--- a/embassy-stm32/src/usart/buffered.rs
+++ b/embassy-stm32/src/usart/buffered.rs
@@ -1,8 +1,8 @@
 use core::cell::RefCell;
 use core::future::poll_fn;
+use core::sync::atomic::{compiler_fence, Ordering};
 use core::task::Poll;
 
-use atomic_polyfill::{compiler_fence, Ordering};
 use embassy_cortex_m::peripheral::{PeripheralMutex, PeripheralState, StateStorage};
 use embassy_hal_common::ring_buffer::RingBuffer;
 use embassy_sync::waitqueue::WakerRegistration;
diff --git a/embassy-stm32/src/usart/mod.rs b/embassy-stm32/src/usart/mod.rs
index ea75361fa..6f8b6a9e8 100644
--- a/embassy-stm32/src/usart/mod.rs
+++ b/embassy-stm32/src/usart/mod.rs
@@ -2,9 +2,9 @@
 
 use core::future::poll_fn;
 use core::marker::PhantomData;
+use core::sync::atomic::{compiler_fence, Ordering};
 use core::task::Poll;
 
-use atomic_polyfill::{compiler_fence, Ordering};
 use embassy_cortex_m::interrupt::InterruptExt;
 use embassy_futures::select::{select, Either};
 use embassy_hal_common::drop::OnDrop;
diff --git a/embassy-sync/Cargo.toml b/embassy-sync/Cargo.toml
index 1eeb94c9a..7b5d3ce48 100644
--- a/embassy-sync/Cargo.toml
+++ b/embassy-sync/Cargo.toml
@@ -31,7 +31,6 @@ defmt = { version = "0.3", optional = true }
 log = { version = "0.4.14", optional = true }
 
 futures-util = { version = "0.3.17", default-features = false }
-atomic-polyfill = "1.0.1"
 critical-section = "1.1"
 heapless = "0.7.5"
 cfg-if = "1.0.0"