diff --git a/embassy-stm32/build.rs b/embassy-stm32/build.rs
index 15bb8ea62..057c4cee2 100644
--- a/embassy-stm32/build.rs
+++ b/embassy-stm32/build.rs
@@ -484,7 +484,7 @@ fn main() {
                 let expr = if let Some(mux) = self.chained_muxes.get(&v.name) {
                     self.gen_mux(mux)
                 } else {
-                    self.gen_clock(&v.name)
+                    self.gen_clock(v.name)
                 };
                 match_arms.extend(quote! {
                     crate::pac::rcc::vals::#enum_name::#variant_name => #expr,
@@ -1139,11 +1139,18 @@ fn main() {
         (("timer", "CH2"), quote!(crate::timer::Ch2Dma)),
         (("timer", "CH3"), quote!(crate::timer::Ch3Dma)),
         (("timer", "CH4"), quote!(crate::timer::Ch4Dma)),
+        (("cordic", "WRITE"), quote!(crate::cordic::WriteDma)), // FIXME: stm32u5a crash on Cordic driver
+        (("cordic", "READ"), quote!(crate::cordic::ReadDma)),   // FIXME: stm32u5a crash on Cordic driver
     ]
     .into();
 
     for p in METADATA.peripherals {
         if let Some(regs) = &p.registers {
+            // FIXME: stm32u5a crash on Cordic driver
+            if chip_name.starts_with("stm32u5a") && regs.kind == "cordic" {
+                continue;
+            }
+
             let mut dupe = HashSet::new();
             for ch in p.dma_channels {
                 // Some chips have multiple request numbers for the same (peri, signal, channel) combos.
diff --git a/embassy-stm32/src/cordic/enums.rs b/embassy-stm32/src/cordic/enums.rs
new file mode 100644
index 000000000..e8695fac7
--- /dev/null
+++ b/embassy-stm32/src/cordic/enums.rs
@@ -0,0 +1,71 @@
+/// CORDIC function
+#[allow(missing_docs)]
+#[derive(Debug, Clone, Copy)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum Function {
+    Cos = 0,
+    Sin,
+    Phase,
+    Modulus,
+    Arctan,
+    Cosh,
+    Sinh,
+    Arctanh,
+    Ln,
+    Sqrt,
+}
+
+/// CORDIC precision
+#[allow(missing_docs)]
+#[derive(Debug, Clone, Copy, Default)]
+pub enum Precision {
+    Iters4 = 1,
+    Iters8,
+    Iters12,
+    Iters16,
+    Iters20,
+    #[default]
+    Iters24, // this value is recommended by Reference Manual
+    Iters28,
+    Iters32,
+    Iters36,
+    Iters40,
+    Iters44,
+    Iters48,
+    Iters52,
+    Iters56,
+    Iters60,
+}
+
+/// CORDIC scale
+#[allow(missing_docs)]
+#[derive(Debug, Clone, Copy, Default, PartialEq)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum Scale {
+    #[default]
+    Arg1Res1 = 0,
+    Arg1o2Res2,
+    Arg1o4Res4,
+    Arg1o8Res8,
+    Arg1o16Res16,
+    Arg1o32Res32,
+    Arg1o64Res64,
+    Arg1o128Res128,
+}
+
+/// CORDIC argument/result register access count
+#[allow(missing_docs)]
+#[derive(Clone, Copy, Default)]
+pub enum AccessCount {
+    #[default]
+    One,
+    Two,
+}
+
+/// CORDIC argument/result data width
+#[allow(missing_docs)]
+#[derive(Clone, Copy)]
+pub enum Width {
+    Bits32,
+    Bits16,
+}
diff --git a/embassy-stm32/src/cordic/errors.rs b/embassy-stm32/src/cordic/errors.rs
new file mode 100644
index 000000000..3c70fc9e7
--- /dev/null
+++ b/embassy-stm32/src/cordic/errors.rs
@@ -0,0 +1,144 @@
+use super::{Function, Scale};
+
+/// Error for [Cordic](super::Cordic)
+#[derive(Debug)]
+pub enum CordicError {
+    /// Config error
+    ConfigError(ConfigError),
+    /// Argument length is incorrect
+    ArgumentLengthIncorrect,
+    /// Result buffer length error
+    ResultLengthNotEnough,
+    /// Input value is out of range for Q1.x format
+    NumberOutOfRange(NumberOutOfRange),
+    /// Argument error
+    ArgError(ArgError),
+}
+
+impl From<ConfigError> for CordicError {
+    fn from(value: ConfigError) -> Self {
+        Self::ConfigError(value)
+    }
+}
+
+impl From<NumberOutOfRange> for CordicError {
+    fn from(value: NumberOutOfRange) -> Self {
+        Self::NumberOutOfRange(value)
+    }
+}
+
+impl From<ArgError> for CordicError {
+    fn from(value: ArgError) -> Self {
+        Self::ArgError(value)
+    }
+}
+
+#[cfg(feature = "defmt")]
+impl defmt::Format for CordicError {
+    fn format(&self, fmt: defmt::Formatter) {
+        use CordicError::*;
+
+        match self {
+            ConfigError(e) => defmt::write!(fmt, "{}", e),
+            ResultLengthNotEnough => defmt::write!(fmt, "Output buffer length is not long enough"),
+            ArgumentLengthIncorrect => defmt::write!(fmt, "Argument length incorrect"),
+            NumberOutOfRange(e) => defmt::write!(fmt, "{}", e),
+            ArgError(e) => defmt::write!(fmt, "{}", e),
+        }
+    }
+}
+
+/// Error during parsing [Cordic::Config](super::Config)
+#[allow(dead_code)]
+#[derive(Debug)]
+pub struct ConfigError {
+    pub(super) func: Function,
+    pub(super) scale_range: [u8; 2],
+}
+
+#[cfg(feature = "defmt")]
+impl defmt::Format for ConfigError {
+    fn format(&self, fmt: defmt::Formatter) {
+        defmt::write!(fmt, "For FUNCTION: {},", self.func);
+
+        if self.scale_range[0] == self.scale_range[1] {
+            defmt::write!(fmt, " SCALE value should be {}", self.scale_range[0])
+        } else {
+            defmt::write!(
+                fmt,
+                " SCALE value should be {} <= SCALE <= {}",
+                self.scale_range[0],
+                self.scale_range[1]
+            )
+        }
+    }
+}
+
+/// Input value is out of range for Q1.x format
+#[allow(missing_docs)]
+#[derive(Debug)]
+pub enum NumberOutOfRange {
+    BelowLowerBound,
+    AboveUpperBound,
+}
+
+#[cfg(feature = "defmt")]
+impl defmt::Format for NumberOutOfRange {
+    fn format(&self, fmt: defmt::Formatter) {
+        use NumberOutOfRange::*;
+
+        match self {
+            BelowLowerBound => defmt::write!(fmt, "input value should be equal or greater than -1"),
+            AboveUpperBound => defmt::write!(fmt, "input value should be equal or less than 1"),
+        }
+    }
+}
+
+/// Error on checking input arguments
+#[allow(dead_code)]
+#[derive(Debug)]
+pub struct ArgError {
+    pub(super) func: Function,
+    pub(super) scale: Option<Scale>,
+    pub(super) arg_range: [f32; 2], // only for debug display, f32 is ok
+    pub(super) inclusive_upper_bound: bool,
+    pub(super) arg_type: ArgType,
+}
+
+#[cfg(feature = "defmt")]
+impl defmt::Format for ArgError {
+    fn format(&self, fmt: defmt::Formatter) {
+        defmt::write!(fmt, "For FUNCTION: {},", self.func);
+
+        if let Some(scale) = self.scale {
+            defmt::write!(fmt, " when SCALE is {},", scale);
+        }
+
+        defmt::write!(fmt, " {} should be", self.arg_type);
+
+        if self.inclusive_upper_bound {
+            defmt::write!(
+                fmt,
+                " {} <= {} <= {}",
+                self.arg_range[0],
+                self.arg_type,
+                self.arg_range[1]
+            )
+        } else {
+            defmt::write!(
+                fmt,
+                " {} <= {} < {}",
+                self.arg_range[0],
+                self.arg_type,
+                self.arg_range[1]
+            )
+        };
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub(super) enum ArgType {
+    Arg1,
+    Arg2,
+}
diff --git a/embassy-stm32/src/cordic/mod.rs b/embassy-stm32/src/cordic/mod.rs
new file mode 100644
index 000000000..6bbc48f2b
--- /dev/null
+++ b/embassy-stm32/src/cordic/mod.rs
@@ -0,0 +1,617 @@
+//! coordinate rotation digital computer (CORDIC)
+
+use embassy_hal_internal::drop::OnDrop;
+use embassy_hal_internal::{into_ref, Peripheral, PeripheralRef};
+
+use crate::{dma, peripherals};
+
+mod enums;
+pub use enums::*;
+
+mod errors;
+pub use errors::*;
+
+mod sealed;
+use self::sealed::SealedInstance;
+
+pub mod utils;
+
+/// CORDIC driver
+pub struct Cordic<'d, T: Instance> {
+    peri: PeripheralRef<'d, T>,
+    config: Config,
+}
+
+/// CORDIC instance trait
+#[allow(private_bounds)]
+pub trait Instance: SealedInstance + Peripheral<P = Self> + crate::rcc::RccPeripheral {}
+
+/// CORDIC configuration
+#[derive(Debug)]
+pub struct Config {
+    function: Function,
+    precision: Precision,
+    scale: Scale,
+}
+
+impl Config {
+    /// Create a config for Cordic driver
+    pub fn new(function: Function, precision: Precision, scale: Scale) -> Result<Self, CordicError> {
+        let config = Self {
+            function,
+            precision,
+            scale,
+        };
+
+        config.check_scale()?;
+
+        Ok(config)
+    }
+
+    fn check_scale(&self) -> Result<(), ConfigError> {
+        use Function::*;
+
+        let scale_raw = self.scale as u8;
+
+        let err_range = match self.function {
+            Cos | Sin | Phase | Modulus if !(0..=0).contains(&scale_raw) => Some([0, 0]),
+
+            Arctan if !(0..=7).contains(&scale_raw) => Some([0, 7]),
+
+            Cosh | Sinh | Arctanh if !(1..=1).contains(&scale_raw) => Some([1, 1]),
+
+            Ln if !(1..=4).contains(&scale_raw) => Some([1, 4]),
+
+            Sqrt if !(0..=2).contains(&scale_raw) => Some([0, 2]),
+
+            Cos | Sin | Phase | Modulus | Arctan | Cosh | Sinh | Arctanh | Ln | Sqrt => None,
+        };
+
+        if let Some(range) = err_range {
+            Err(ConfigError {
+                func: self.function,
+                scale_range: range,
+            })
+        } else {
+            Ok(())
+        }
+    }
+}
+
+// common method
+impl<'d, T: Instance> Cordic<'d, T> {
+    /// Create a Cordic driver instance
+    ///
+    /// Note:  
+    /// If you need a peripheral -> CORDIC -> peripheral mode,  
+    /// you may want to set Cordic into [Mode::ZeroOverhead] mode, and add extra arguments with [Self::extra_config]
+    pub fn new(peri: impl Peripheral<P = T> + 'd, config: Config) -> Self {
+        T::enable_and_reset();
+
+        into_ref!(peri);
+
+        let mut instance = Self { peri, config };
+
+        instance.reconfigure();
+
+        instance
+    }
+
+    /// Set a new config for Cordic driver  
+    pub fn set_config(&mut self, config: Config) {
+        self.config = config;
+        self.reconfigure();
+    }
+
+    /// Set extra config for data count and data width.
+    pub fn extra_config(&mut self, arg_cnt: AccessCount, arg_width: Width, res_width: Width) {
+        self.peri.set_argument_count(arg_cnt);
+        self.peri.set_data_width(arg_width, res_width);
+    }
+
+    fn clean_rrdy_flag(&mut self) {
+        while self.peri.ready_to_read() {
+            self.peri.read_result();
+        }
+    }
+
+    /// Disable IRQ and DMA, clean RRDY, and set ARG2 to +1 (0x7FFFFFFF)
+    pub fn reconfigure(&mut self) {
+        // reset ARG2 to +1
+        {
+            self.peri.disable_irq();
+            self.peri.disable_read_dma();
+            self.peri.disable_write_dma();
+            self.clean_rrdy_flag();
+
+            self.peri.set_func(Function::Cos);
+            self.peri.set_precision(Precision::Iters4);
+            self.peri.set_scale(Scale::Arg1Res1);
+            self.peri.set_argument_count(AccessCount::Two);
+            self.peri.set_data_width(Width::Bits32, Width::Bits32);
+            self.peri.write_argument(0x0u32);
+            self.peri.write_argument(0x7FFFFFFFu32);
+
+            self.clean_rrdy_flag();
+        }
+
+        self.peri.set_func(self.config.function);
+        self.peri.set_precision(self.config.precision);
+        self.peri.set_scale(self.config.scale);
+
+        // we don't set NRES in here, but to make sure NRES is set each time user call "calc"-ish functions,
+        // since each "calc"-ish functions can have different ARGSIZE and RESSIZE, thus NRES should be change accordingly.
+    }
+}
+
+impl<'d, T: Instance> Drop for Cordic<'d, T> {
+    fn drop(&mut self) {
+        T::disable();
+    }
+}
+
+// q1.31 related
+impl<'d, T: Instance> Cordic<'d, T> {
+    /// Run a blocking CORDIC calculation in q1.31 format  
+    ///
+    /// Notice:  
+    /// If you set `arg1_only` to `true`, please be sure ARG2 value has been set to desired value before.  
+    /// This function won't set ARG2 to +1 before or after each round of calculation.  
+    /// If you want to make sure ARG2 is set to +1, consider run [.reconfigure()](Self::reconfigure).
+    pub fn blocking_calc_32bit(
+        &mut self,
+        arg: &[u32],
+        res: &mut [u32],
+        arg1_only: bool,
+        res1_only: bool,
+    ) -> Result<usize, CordicError> {
+        if arg.is_empty() {
+            return Ok(0);
+        }
+
+        let res_cnt = Self::check_arg_res_length_32bit(arg.len(), res.len(), arg1_only, res1_only)?;
+
+        self.peri
+            .set_argument_count(if arg1_only { AccessCount::One } else { AccessCount::Two });
+
+        self.peri
+            .set_result_count(if res1_only { AccessCount::One } else { AccessCount::Two });
+
+        self.peri.set_data_width(Width::Bits32, Width::Bits32);
+
+        let mut cnt = 0;
+
+        match arg1_only {
+            true => {
+                // To use cordic preload function, the first value is special.
+                // It is loaded to CORDIC WDATA register out side of loop
+                let first_value = arg[0];
+
+                // preload 1st value to CORDIC, to start the CORDIC calc
+                self.peri.write_argument(first_value);
+
+                for &arg1 in &arg[1..] {
+                    // preload arg1 (for next calc)
+                    self.peri.write_argument(arg1);
+
+                    // then read current result out
+                    res[cnt] = self.peri.read_result();
+                    cnt += 1;
+                    if !res1_only {
+                        res[cnt] = self.peri.read_result();
+                        cnt += 1;
+                    }
+                }
+
+                // read the last result
+                res[cnt] = self.peri.read_result();
+                cnt += 1;
+                if !res1_only {
+                    res[cnt] = self.peri.read_result();
+                    // cnt += 1;
+                }
+            }
+            false => {
+                // To use cordic preload function, the first and last value is special.
+                // They are load to CORDIC WDATA register out side of loop
+                let first_value = arg[0];
+                let last_value = arg[arg.len() - 1];
+
+                let paired_args = &arg[1..arg.len() - 1];
+
+                // preload 1st value to CORDIC
+                self.peri.write_argument(first_value);
+
+                for args in paired_args.chunks(2) {
+                    let arg2 = args[0];
+                    let arg1 = args[1];
+
+                    // load arg2 (for current calc) first, to start the CORDIC calc
+                    self.peri.write_argument(arg2);
+
+                    // preload arg1 (for next calc)
+                    self.peri.write_argument(arg1);
+
+                    // then read current result out
+                    res[cnt] = self.peri.read_result();
+                    cnt += 1;
+                    if !res1_only {
+                        res[cnt] = self.peri.read_result();
+                        cnt += 1;
+                    }
+                }
+
+                // load last value to CORDIC, and finish the calculation
+                self.peri.write_argument(last_value);
+                res[cnt] = self.peri.read_result();
+                cnt += 1;
+                if !res1_only {
+                    res[cnt] = self.peri.read_result();
+                    // cnt += 1;
+                }
+            }
+        }
+
+        // at this point cnt should be equal to res_cnt
+
+        Ok(res_cnt)
+    }
+
+    /// Run a async CORDIC calculation in q.1.31 format
+    ///
+    /// Notice:  
+    /// If you set `arg1_only` to `true`, please be sure ARG2 value has been set to desired value before.  
+    /// This function won't set ARG2 to +1 before or after each round of calculation.  
+    /// If you want to make sure ARG2 is set to +1, consider run [.reconfigure()](Self::reconfigure).
+    pub async fn async_calc_32bit(
+        &mut self,
+        write_dma: impl Peripheral<P = impl WriteDma<T>>,
+        read_dma: impl Peripheral<P = impl ReadDma<T>>,
+        arg: &[u32],
+        res: &mut [u32],
+        arg1_only: bool,
+        res1_only: bool,
+    ) -> Result<usize, CordicError> {
+        if arg.is_empty() {
+            return Ok(0);
+        }
+
+        let res_cnt = Self::check_arg_res_length_32bit(arg.len(), res.len(), arg1_only, res1_only)?;
+
+        let active_res_buf = &mut res[..res_cnt];
+
+        into_ref!(write_dma, read_dma);
+
+        self.peri
+            .set_argument_count(if arg1_only { AccessCount::One } else { AccessCount::Two });
+
+        self.peri
+            .set_result_count(if res1_only { AccessCount::One } else { AccessCount::Two });
+
+        self.peri.set_data_width(Width::Bits32, Width::Bits32);
+
+        let write_req = write_dma.request();
+        let read_req = read_dma.request();
+
+        self.peri.enable_write_dma();
+        self.peri.enable_read_dma();
+
+        let _on_drop = OnDrop::new(|| {
+            self.peri.disable_write_dma();
+            self.peri.disable_read_dma();
+        });
+
+        unsafe {
+            let write_transfer = dma::Transfer::new_write(
+                &mut write_dma,
+                write_req,
+                arg,
+                T::regs().wdata().as_ptr() as *mut _,
+                Default::default(),
+            );
+
+            let read_transfer = dma::Transfer::new_read(
+                &mut read_dma,
+                read_req,
+                T::regs().rdata().as_ptr() as *mut _,
+                active_res_buf,
+                Default::default(),
+            );
+
+            embassy_futures::join::join(write_transfer, read_transfer).await;
+        }
+
+        Ok(res_cnt)
+    }
+
+    fn check_arg_res_length_32bit(
+        arg_len: usize,
+        res_len: usize,
+        arg1_only: bool,
+        res1_only: bool,
+    ) -> Result<usize, CordicError> {
+        if !arg1_only && arg_len % 2 != 0 {
+            return Err(CordicError::ArgumentLengthIncorrect);
+        }
+
+        let mut minimal_res_length = arg_len;
+
+        if !res1_only {
+            minimal_res_length *= 2;
+        }
+
+        if !arg1_only {
+            minimal_res_length /= 2
+        }
+
+        if minimal_res_length > res_len {
+            return Err(CordicError::ResultLengthNotEnough);
+        }
+
+        Ok(minimal_res_length)
+    }
+}
+
+// q1.15 related
+impl<'d, T: Instance> Cordic<'d, T> {
+    /// Run a blocking CORDIC calculation in q1.15 format  
+    ///
+    /// Notice::  
+    /// User will take respond to merge two u16 arguments into one u32 data, and/or split one u32 data into two u16 results.
+    pub fn blocking_calc_16bit(&mut self, arg: &[u32], res: &mut [u32]) -> Result<usize, CordicError> {
+        if arg.is_empty() {
+            return Ok(0);
+        }
+
+        if arg.len() > res.len() {
+            return Err(CordicError::ResultLengthNotEnough);
+        }
+
+        let res_cnt = arg.len();
+
+        // In q1.15 mode, 1 write/read to access 2 arguments/results
+        self.peri.set_argument_count(AccessCount::One);
+        self.peri.set_result_count(AccessCount::One);
+
+        self.peri.set_data_width(Width::Bits16, Width::Bits16);
+
+        // To use cordic preload function, the first value is special.
+        // It is loaded to CORDIC WDATA register out side of loop
+        let first_value = arg[0];
+
+        // preload 1st value to CORDIC, to start the CORDIC calc
+        self.peri.write_argument(first_value);
+
+        let mut cnt = 0;
+
+        for &arg_val in &arg[1..] {
+            // preload arg_val (for next calc)
+            self.peri.write_argument(arg_val);
+
+            // then read current result out
+            res[cnt] = self.peri.read_result();
+            cnt += 1;
+        }
+
+        // read last result out
+        res[cnt] = self.peri.read_result();
+        // cnt += 1;
+
+        Ok(res_cnt)
+    }
+
+    /// Run a async CORDIC calculation in q1.15 format  
+    ///
+    /// Notice::  
+    /// User will take respond to merge two u16 arguments into one u32 data, and/or split one u32 data into two u16 results.
+    pub async fn async_calc_16bit(
+        &mut self,
+        write_dma: impl Peripheral<P = impl WriteDma<T>>,
+        read_dma: impl Peripheral<P = impl ReadDma<T>>,
+        arg: &[u32],
+        res: &mut [u32],
+    ) -> Result<usize, CordicError> {
+        if arg.is_empty() {
+            return Ok(0);
+        }
+
+        if arg.len() > res.len() {
+            return Err(CordicError::ResultLengthNotEnough);
+        }
+
+        let res_cnt = arg.len();
+
+        let active_res_buf = &mut res[..res_cnt];
+
+        into_ref!(write_dma, read_dma);
+
+        // In q1.15 mode, 1 write/read to access 2 arguments/results
+        self.peri.set_argument_count(AccessCount::One);
+        self.peri.set_result_count(AccessCount::One);
+
+        self.peri.set_data_width(Width::Bits16, Width::Bits16);
+
+        let write_req = write_dma.request();
+        let read_req = read_dma.request();
+
+        self.peri.enable_write_dma();
+        self.peri.enable_read_dma();
+
+        let _on_drop = OnDrop::new(|| {
+            self.peri.disable_write_dma();
+            self.peri.disable_read_dma();
+        });
+
+        unsafe {
+            let write_transfer = dma::Transfer::new_write(
+                &mut write_dma,
+                write_req,
+                arg,
+                T::regs().wdata().as_ptr() as *mut _,
+                Default::default(),
+            );
+
+            let read_transfer = dma::Transfer::new_read(
+                &mut read_dma,
+                read_req,
+                T::regs().rdata().as_ptr() as *mut _,
+                active_res_buf,
+                Default::default(),
+            );
+
+            embassy_futures::join::join(write_transfer, read_transfer).await;
+        }
+
+        Ok(res_cnt)
+    }
+}
+
+macro_rules! check_arg_value {
+    ($func_arg1_name:ident, $func_arg2_name:ident, $float_type:ty) => {
+        impl<'d, T: Instance> Cordic<'d, T> {
+            /// check input value ARG1, SCALE and FUNCTION are compatible with each other
+            pub fn $func_arg1_name(&self, arg: $float_type) -> Result<(), ArgError> {
+                let config = &self.config;
+
+                use Function::*;
+
+                struct Arg1ErrInfo {
+                    scale: Option<Scale>,
+                    range: [f32; 2], // f32 is ok, it only used in error display
+                    inclusive_upper_bound: bool,
+                }
+
+                let err_info = match config.function {
+                    Cos | Sin | Phase | Modulus | Arctan if !(-1.0..=1.0).contains(arg) => Some(Arg1ErrInfo {
+                        scale: None,
+                        range: [-1.0, 1.0],
+                        inclusive_upper_bound: true,
+                    }),
+
+                    Cosh | Sinh if !(-0.559..=0.559).contains(arg) => Some(Arg1ErrInfo {
+                        scale: None,
+                        range: [-0.559, 0.559],
+                        inclusive_upper_bound: true,
+                    }),
+
+                    Arctanh if !(-0.403..=0.403).contains(arg) => Some(Arg1ErrInfo {
+                        scale: None,
+                        range: [-0.403, 0.403],
+                        inclusive_upper_bound: true,
+                    }),
+
+                    Ln => match config.scale {
+                        Scale::Arg1o2Res2 if !(0.0535..0.5).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o2Res2),
+                            range: [0.0535, 0.5],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1o4Res4 if !(0.25..0.75).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o4Res4),
+                            range: [0.25, 0.75],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1o8Res8 if !(0.375..0.875).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o8Res8),
+                            range: [0.375, 0.875],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1o16Res16 if !(0.4375..0.584).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o16Res16),
+                            range: [0.4375, 0.584],
+                            inclusive_upper_bound: false,
+                        }),
+
+                        Scale::Arg1o2Res2 | Scale::Arg1o4Res4 | Scale::Arg1o8Res8 | Scale::Arg1o16Res16 => None,
+
+                        _ => unreachable!(),
+                    },
+
+                    Sqrt => match config.scale {
+                        Scale::Arg1Res1 if !(0.027..0.75).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1Res1),
+                            range: [0.027, 0.75],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1o2Res2 if !(0.375..0.875).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o2Res2),
+                            range: [0.375, 0.875],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1o4Res4 if !(0.4375..0.584).contains(arg) => Some(Arg1ErrInfo {
+                            scale: Some(Scale::Arg1o4Res4),
+                            range: [0.4375, 0.584],
+                            inclusive_upper_bound: false,
+                        }),
+                        Scale::Arg1Res1 | Scale::Arg1o2Res2 | Scale::Arg1o4Res4 => None,
+                        _ => unreachable!(),
+                    },
+
+                    Cos | Sin | Phase | Modulus | Arctan | Cosh | Sinh | Arctanh => None,
+                };
+
+                if let Some(err) = err_info {
+                    return Err(ArgError {
+                        func: config.function,
+                        scale: err.scale,
+                        arg_range: err.range,
+                        inclusive_upper_bound: err.inclusive_upper_bound,
+                        arg_type: ArgType::Arg1,
+                    });
+                }
+
+                Ok(())
+            }
+
+            /// check input value ARG2 and FUNCTION are compatible with each other
+            pub fn $func_arg2_name(&self, arg: $float_type) -> Result<(), ArgError> {
+                let config = &self.config;
+
+                use Function::*;
+
+                struct Arg2ErrInfo {
+                    range: [f32; 2], // f32 is ok, it only used in error display
+                }
+
+                let err_info = match config.function {
+                    Cos | Sin if !(0.0..=1.0).contains(arg) => Some(Arg2ErrInfo { range: [0.0, 1.0] }),
+
+                    Phase | Modulus if !(-1.0..=1.0).contains(arg) => Some(Arg2ErrInfo { range: [-1.0, 1.0] }),
+
+                    Cos | Sin | Phase | Modulus | Arctan | Cosh | Sinh | Arctanh | Ln | Sqrt => None,
+                };
+
+                if let Some(err) = err_info {
+                    return Err(ArgError {
+                        func: config.function,
+                        scale: None,
+                        arg_range: err.range,
+                        inclusive_upper_bound: true,
+                        arg_type: ArgType::Arg2,
+                    });
+                }
+
+                Ok(())
+            }
+        }
+    };
+}
+
+check_arg_value!(check_f64_arg1, check_f64_arg2, &f64);
+check_arg_value!(check_f32_arg1, check_f32_arg2, &f32);
+
+foreach_interrupt!(
+    ($inst:ident, cordic, $block:ident, GLOBAL, $irq:ident) => {
+        impl Instance for peripherals::$inst {
+        }
+
+        impl SealedInstance for peripherals::$inst {
+            fn regs() -> crate::pac::cordic::Cordic {
+                crate::pac::$inst
+            }
+        }
+    };
+);
+
+dma_trait!(WriteDma, Instance);
+dma_trait!(ReadDma, Instance);
diff --git a/embassy-stm32/src/cordic/sealed.rs b/embassy-stm32/src/cordic/sealed.rs
new file mode 100644
index 000000000..8f0bd1830
--- /dev/null
+++ b/embassy-stm32/src/cordic/sealed.rs
@@ -0,0 +1,116 @@
+use super::*;
+use crate::pac::cordic::vals;
+
+/// Cordic instance
+pub(super) trait SealedInstance {
+    /// Get access to CORDIC registers
+    fn regs() -> crate::pac::cordic::Cordic;
+
+    /// Set Function value
+    fn set_func(&self, func: Function) {
+        Self::regs()
+            .csr()
+            .modify(|v| v.set_func(vals::Func::from_bits(func as u8)));
+    }
+
+    /// Set Precision value
+    fn set_precision(&self, precision: Precision) {
+        Self::regs()
+            .csr()
+            .modify(|v| v.set_precision(vals::Precision::from_bits(precision as u8)))
+    }
+
+    /// Set Scale value
+    fn set_scale(&self, scale: Scale) {
+        Self::regs()
+            .csr()
+            .modify(|v| v.set_scale(vals::Scale::from_bits(scale as u8)))
+    }
+
+    /// Enable global interrupt
+    fn enable_irq(&self) {
+        Self::regs().csr().modify(|v| v.set_ien(true))
+    }
+
+    /// Disable global interrupt
+    fn disable_irq(&self) {
+        Self::regs().csr().modify(|v| v.set_ien(false))
+    }
+
+    /// Enable Read DMA
+    fn enable_read_dma(&self) {
+        Self::regs().csr().modify(|v| {
+            v.set_dmaren(true);
+        })
+    }
+
+    /// Disable Read DMA
+    fn disable_read_dma(&self) {
+        Self::regs().csr().modify(|v| {
+            v.set_dmaren(false);
+        })
+    }
+
+    /// Enable Write DMA
+    fn enable_write_dma(&self) {
+        Self::regs().csr().modify(|v| {
+            v.set_dmawen(true);
+        })
+    }
+
+    /// Disable Write DMA
+    fn disable_write_dma(&self) {
+        Self::regs().csr().modify(|v| {
+            v.set_dmawen(false);
+        })
+    }
+
+    /// Set NARGS value
+    fn set_argument_count(&self, n: AccessCount) {
+        Self::regs().csr().modify(|v| {
+            v.set_nargs(match n {
+                AccessCount::One => vals::Num::NUM1,
+                AccessCount::Two => vals::Num::NUM2,
+            })
+        })
+    }
+
+    /// Set NRES value
+    fn set_result_count(&self, n: AccessCount) {
+        Self::regs().csr().modify(|v| {
+            v.set_nres(match n {
+                AccessCount::One => vals::Num::NUM1,
+                AccessCount::Two => vals::Num::NUM2,
+            });
+        })
+    }
+
+    /// Set ARGSIZE and RESSIZE value
+    fn set_data_width(&self, arg: Width, res: Width) {
+        Self::regs().csr().modify(|v| {
+            v.set_argsize(match arg {
+                Width::Bits32 => vals::Size::BITS32,
+                Width::Bits16 => vals::Size::BITS16,
+            });
+            v.set_ressize(match res {
+                Width::Bits32 => vals::Size::BITS32,
+                Width::Bits16 => vals::Size::BITS16,
+            })
+        })
+    }
+
+    /// Read RRDY flag
+    fn ready_to_read(&self) -> bool {
+        Self::regs().csr().read().rrdy()
+    }
+
+    /// Write value to WDATA
+    fn write_argument(&self, arg: u32) {
+        Self::regs().wdata().write_value(arg)
+    }
+
+    /// Read value from RDATA
+    fn read_result(&self) -> u32 {
+        Self::regs().rdata().read()
+    }
+}
diff --git a/embassy-stm32/src/cordic/utils.rs b/embassy-stm32/src/cordic/utils.rs
new file mode 100644
index 000000000..008f50270
--- /dev/null
+++ b/embassy-stm32/src/cordic/utils.rs
@@ -0,0 +1,62 @@
+//! Common math utils
+use super::errors::NumberOutOfRange;
+
+macro_rules! floating_fixed_convert {
+    ($f_to_q:ident, $q_to_f:ident, $unsigned_bin_typ:ty, $signed_bin_typ:ty, $float_ty:ty, $offset:literal, $min_positive:literal) => {
+        /// convert float point to fixed point format
+        pub fn $f_to_q(value: $float_ty) -> Result<$unsigned_bin_typ, NumberOutOfRange> {
+            const MIN_POSITIVE: $float_ty = unsafe { core::mem::transmute($min_positive) };
+
+            if value < -1.0 {
+                return Err(NumberOutOfRange::BelowLowerBound)
+            }
+
+            if value > 1.0 {
+                return Err(NumberOutOfRange::AboveUpperBound)
+            }
+
+
+            let value = if 1.0 - MIN_POSITIVE < value && value <= 1.0 {
+                // make a exception for value between (1.0^{-x} , 1.0] float point,
+                // convert it to max representable value of q1.x format
+                (1.0 as $float_ty) - MIN_POSITIVE
+            } else {
+                value
+            };
+
+            // It's necessary to cast the float value to signed integer, before convert it to a unsigned value.
+            // Since value from register is actually a "signed value", a "as" cast will keep original binary format but mark it as a unsigned value for register writing.
+            // see https://doc.rust-lang.org/reference/expressions/operator-expr.html#numeric-cast
+            Ok((value * ((1 as $unsigned_bin_typ << $offset) as $float_ty)) as $signed_bin_typ as $unsigned_bin_typ)
+        }
+
+        #[inline(always)]
+        /// convert fixed point to float point format
+        pub fn $q_to_f(value: $unsigned_bin_typ) -> $float_ty {
+            // It's necessary to cast the unsigned integer to signed integer, before convert it to a float value.
+            // Since value from register is actually a "signed value", a "as" cast will keep original binary format but mark it as a signed value.
+            // see https://doc.rust-lang.org/reference/expressions/operator-expr.html#numeric-cast
+            (value as $signed_bin_typ as $float_ty) / ((1 as $unsigned_bin_typ << $offset) as $float_ty)
+        }
+    };
+}
+
+floating_fixed_convert!(
+    f64_to_q1_31,
+    q1_31_to_f64,
+    u32,
+    i32,
+    f64,
+    31,
+    0x3E00_0000_0000_0000u64 // binary form of 1f64^(-31)
+);
+
+floating_fixed_convert!(
+    f32_to_q1_15,
+    q1_15_to_f32,
+    u16,
+    i16,
+    f32,
+    15,
+    0x3800_0000u32 // binary form of 1f32^(-15)
+);
diff --git a/embassy-stm32/src/lib.rs b/embassy-stm32/src/lib.rs
index 8b826e5ac..99d8b5036 100644
--- a/embassy-stm32/src/lib.rs
+++ b/embassy-stm32/src/lib.rs
@@ -32,6 +32,9 @@ pub mod timer;
 pub mod adc;
 #[cfg(can)]
 pub mod can;
+// FIXME: Cordic driver cause stm32u5a5zj crash
+#[cfg(all(cordic, not(any(stm32u5a5, stm32u5a9))))]
+pub mod cordic;
 #[cfg(crc)]
 pub mod crc;
 #[cfg(cryp)]
@@ -244,7 +247,7 @@ pub fn init(config: Config) -> Peripherals {
 
         #[cfg(dbgmcu)]
         crate::pac::DBGMCU.cr().modify(|cr| {
-            #[cfg(any(dbgmcu_h5))]
+            #[cfg(dbgmcu_h5)]
             {
                 cr.set_stop(config.enable_debug_during_sleep);
                 cr.set_standby(config.enable_debug_during_sleep);
diff --git a/examples/stm32h5/src/bin/cordic.rs b/examples/stm32h5/src/bin/cordic.rs
new file mode 100644
index 000000000..73e873574
--- /dev/null
+++ b/examples/stm32h5/src/bin/cordic.rs
@@ -0,0 +1,78 @@
+#![no_std]
+#![no_main]
+
+use defmt::*;
+use embassy_executor::Spawner;
+use embassy_stm32::cordic::{self, utils};
+use {defmt_rtt as _, panic_probe as _};
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let mut dp = embassy_stm32::init(Default::default());
+
+    let mut cordic = cordic::Cordic::new(
+        &mut dp.CORDIC,
+        unwrap!(cordic::Config::new(
+            cordic::Function::Sin,
+            Default::default(),
+            Default::default(),
+        )),
+    );
+
+    // for output buf, the length is not that strict, larger than minimal required is ok.
+    let mut output_f64 = [0f64; 19];
+    let mut output_u32 = [0u32; 21];
+
+    // tips:
+    // CORDIC peripheral has some strict on input value, you can also use ".check_argX_fXX()" methods
+    // to make sure your input values are compatible with current CORDIC setup.
+    let arg1 = [-1.0, -0.5, 0.0, 0.5, 1.0]; // for trigonometric function, the ARG1 value [-pi, pi] should be map to [-1, 1]
+    let arg2 = [0.5]; // and for Sin function, ARG2 should be in [0, 1]
+
+    let mut input_buf = [0u32; 9];
+
+    // convert input from floating point to fixed point
+    input_buf[0] = unwrap!(utils::f64_to_q1_31(arg1[0]));
+    input_buf[1] = unwrap!(utils::f64_to_q1_31(arg2[0]));
+
+    // If input length is small, blocking mode can be used to minimize overhead.
+    let cnt0 = unwrap!(cordic.blocking_calc_32bit(
+        &input_buf[..2], // input length is strict, since driver use its length to detect calculation count
+        &mut output_u32,
+        false,
+        false
+    ));
+
+    // convert result from fixed point into floating point
+    for (&u32_val, f64_val) in output_u32[..cnt0].iter().zip(output_f64.iter_mut()) {
+        *f64_val = utils::q1_31_to_f64(u32_val);
+    }
+
+    // convert input from floating point to fixed point
+    //
+    // first value from arg1 is used, so truncate to arg1[1..]
+    for (&f64_val, u32_val) in arg1[1..].iter().zip(input_buf.iter_mut()) {
+        *u32_val = unwrap!(utils::f64_to_q1_31(f64_val));
+    }
+
+    // If calculation is a little longer, async mode can make use of DMA, and let core do some other stuff.
+    let cnt1 = unwrap!(
+        cordic
+            .async_calc_32bit(
+                &mut dp.GPDMA1_CH0,
+                &mut dp.GPDMA1_CH1,
+                &input_buf[..arg1.len() - 1], // limit input buf to its actual length
+                &mut output_u32,
+                true,
+                false
+            )
+            .await
+    );
+
+    // convert result from fixed point into floating point
+    for (&u32_val, f64_val) in output_u32[..cnt1].iter().zip(output_f64[cnt0..cnt0 + cnt1].iter_mut()) {
+        *f64_val = utils::q1_31_to_f64(u32_val);
+    }
+
+    println!("result: {}", output_f64[..cnt0 + cnt1]);
+}
diff --git a/tests/stm32/Cargo.toml b/tests/stm32/Cargo.toml
index e42470004..e09083111 100644
--- a/tests/stm32/Cargo.toml
+++ b/tests/stm32/Cargo.toml
@@ -14,8 +14,8 @@ stm32f429zi = ["embassy-stm32/stm32f429zi", "chrono", "eth", "stop", "can", "not
 stm32f446re = ["embassy-stm32/stm32f446re", "chrono", "stop", "can", "not-gpdma", "dac", "sdmmc"]
 stm32f767zi = ["embassy-stm32/stm32f767zi", "chrono", "not-gpdma", "eth", "rng"]
 stm32g071rb = ["embassy-stm32/stm32g071rb", "cm0", "not-gpdma", "dac", "ucpd"]
-stm32g491re = ["embassy-stm32/stm32g491re", "chrono", "stop", "not-gpdma", "rng", "fdcan"]
-stm32h563zi = ["embassy-stm32/stm32h563zi", "chrono", "eth", "rng", "hash"]
+stm32g491re = ["embassy-stm32/stm32g491re", "chrono", "stop", "not-gpdma", "rng", "fdcan", "cordic"]
+stm32h563zi = ["embassy-stm32/stm32h563zi", "chrono", "eth", "rng", "hash", "cordic"]
 stm32h753zi = ["embassy-stm32/stm32h753zi", "chrono", "not-gpdma", "eth", "rng", "fdcan", "hash", "cryp"]
 stm32h755zi = ["embassy-stm32/stm32h755zi-cm7", "chrono", "not-gpdma", "eth", "dac", "rng", "fdcan", "hash", "cryp"]
 stm32h7a3zi = ["embassy-stm32/stm32h7a3zi", "not-gpdma", "rng", "fdcan"]
@@ -25,8 +25,8 @@ stm32l496zg = ["embassy-stm32/stm32l496zg", "not-gpdma", "rng"]
 stm32l4a6zg = ["embassy-stm32/stm32l4a6zg", "chrono", "not-gpdma", "rng", "hash"]
 stm32l4r5zi = ["embassy-stm32/stm32l4r5zi", "chrono", "not-gpdma", "rng"]
 stm32l552ze = ["embassy-stm32/stm32l552ze", "not-gpdma", "rng", "hash"]
-stm32u585ai = ["embassy-stm32/stm32u585ai", "chrono", "rng", "hash"]
-stm32u5a5zj = ["embassy-stm32/stm32u5a5zj", "chrono", "rng", "hash"]
+stm32u585ai = ["embassy-stm32/stm32u585ai", "chrono", "rng", "hash", "cordic"]
+stm32u5a5zj = ["embassy-stm32/stm32u5a5zj", "chrono", "rng", "hash"] # FIXME: cordic test cause it crash
 stm32wb55rg = ["embassy-stm32/stm32wb55rg", "chrono", "not-gpdma", "ble", "mac" , "rng"]
 stm32wba52cg = ["embassy-stm32/stm32wba52cg", "chrono", "rng", "hash"]
 stm32wl55jc = ["embassy-stm32/stm32wl55jc-cm4", "not-gpdma", "rng", "chrono"]
@@ -48,6 +48,7 @@ embassy-stm32-wpan = []
 not-gpdma = []
 dac = []
 ucpd = []
+cordic = ["dep:num-traits"]
 
 cm0 = ["portable-atomic/unsafe-assume-single-core"]
 
@@ -83,6 +84,7 @@ chrono = { version = "^0.4", default-features = false, optional = true}
 sha2 = { version = "0.10.8", default-features = false }
 hmac = "0.12.1"
 aes-gcm = {version = "0.10.3", default-features = false, features = ["aes", "heapless"] }
+num-traits = {version="0.2", default-features = false,features = ["libm"], optional = true}
 
 # BEGIN TESTS
 # Generated by gen_test.py. DO NOT EDIT.
@@ -91,6 +93,11 @@ name = "can"
 path = "src/bin/can.rs"
 required-features = [ "can",]
 
+[[bin]]
+name = "cordic"
+path = "src/bin/cordic.rs"
+required-features = [ "rng", "cordic",]
+
 [[bin]]
 name = "cryp"
 path = "src/bin/cryp.rs"
diff --git a/tests/stm32/gen_test.py b/tests/stm32/gen_test.py
index 8ff156c0e..daf714376 100644
--- a/tests/stm32/gen_test.py
+++ b/tests/stm32/gen_test.py
@@ -14,7 +14,7 @@ for f in sorted(glob('./src/bin/*.rs')):
     with open(f, 'r') as f:
         for line in f:
             if line.startswith('// required-features:'):
-                features = line.split(':', 2)[1].strip().split(',')
+                features = [feature.strip() for feature in line.split(':', 2)[1].strip().split(',')]
 
     tests[name] = features
 
diff --git a/tests/stm32/src/bin/cordic.rs b/tests/stm32/src/bin/cordic.rs
new file mode 100644
index 000000000..400e10207
--- /dev/null
+++ b/tests/stm32/src/bin/cordic.rs
@@ -0,0 +1,135 @@
+// required-features: rng, cordic
+
+// Test Cordic driver, with Q1.31 format, Sin function, at 24 iterations (aka PRECISION = 6), using DMA transfer
+
+#![no_std]
+#![no_main]
+
+#[path = "../common.rs"]
+mod common;
+use common::*;
+use embassy_executor::Spawner;
+use embassy_stm32::cordic::utils;
+use embassy_stm32::{bind_interrupts, cordic, peripherals, rng};
+use num_traits::Float;
+use {defmt_rtt as _, panic_probe as _};
+
+bind_interrupts!(struct Irqs {
+   RNG => rng::InterruptHandler<peripherals::RNG>;
+});
+
+/* input value control, can be changed */
+
+const INPUT_U32_COUNT: usize = 9;
+const INPUT_U8_COUNT: usize = 4 * INPUT_U32_COUNT;
+
+// Assume first calculation needs 2 arguments, the reset needs 1 argument.
+// And all calculation generate 2 results.
+const OUTPUT_LENGTH: usize = (INPUT_U32_COUNT - 1) * 2;
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) {
+    let dp = embassy_stm32::init(config());
+
+    //
+    // use RNG generate random Q1.31 value
+    //
+    // we don't generate floating-point value, since not all binary value are valid floating-point value,
+    // and Q1.31 only accept a fixed range of value.
+
+    let mut rng = rng::Rng::new(dp.RNG, Irqs);
+
+    let mut input_buf_u8 = [0u8; INPUT_U8_COUNT];
+    defmt::unwrap!(rng.async_fill_bytes(&mut input_buf_u8).await);
+
+    // convert every [u8; 4] to a u32, for a Q1.31 value
+    let mut input_q1_31 = unsafe { core::mem::transmute::<[u8; INPUT_U8_COUNT], [u32; INPUT_U32_COUNT]>(input_buf_u8) };
+
+    // ARG2 for Sin function should be inside [0, 1], set MSB to 0 of a Q1.31 value, will make sure it's no less than 0.
+    input_q1_31[1] &= !(1u32 << 31);
+
+    //
+    // CORDIC calculation
+    //
+
+    let mut output_q1_31 = [0u32; OUTPUT_LENGTH];
+
+    // setup Cordic driver
+    let mut cordic = cordic::Cordic::new(
+        dp.CORDIC,
+        defmt::unwrap!(cordic::Config::new(
+            cordic::Function::Sin,
+            Default::default(),
+            Default::default(),
+        )),
+    );
+
+    #[cfg(feature = "stm32g491re")]
+    let (mut write_dma, mut read_dma) = (dp.DMA1_CH4, dp.DMA1_CH5);
+
+    #[cfg(any(feature = "stm32h563zi", feature = "stm32u585ai", feature = "stm32u5a5zj"))]
+    let (mut write_dma, mut read_dma) = (dp.GPDMA1_CH0, dp.GPDMA1_CH1);
+
+    // calculate first result using blocking mode
+    let cnt0 = defmt::unwrap!(cordic.blocking_calc_32bit(&input_q1_31[..2], &mut output_q1_31, false, false));
+
+    // calculate rest results using async mode
+    let cnt1 = defmt::unwrap!(
+        cordic
+            .async_calc_32bit(
+                &mut write_dma,
+                &mut read_dma,
+                &input_q1_31[2..],
+                &mut output_q1_31[cnt0..],
+                true,
+                false,
+            )
+            .await
+    );
+
+    // all output value length should be the same as our output buffer size
+    defmt::assert_eq!(cnt0 + cnt1, output_q1_31.len());
+
+    let mut cordic_result_f64 = [0.0f64; OUTPUT_LENGTH];
+
+    for (f64_val, u32_val) in cordic_result_f64.iter_mut().zip(output_q1_31) {
+        *f64_val = utils::q1_31_to_f64(u32_val);
+    }
+
+    //
+    // software calculation
+    //
+
+    let mut software_result_f64 = [0.0f64; OUTPUT_LENGTH];
+
+    let arg2 = utils::q1_31_to_f64(input_q1_31[1]);
+
+    for (&arg1, res) in input_q1_31
+        .iter()
+        .enumerate()
+        .filter_map(|(idx, val)| if idx != 1 { Some(val) } else { None })
+        .zip(software_result_f64.chunks_mut(2))
+    {
+        let arg1 = utils::q1_31_to_f64(arg1);
+
+        let (raw_res1, raw_res2) = (arg1 * core::f64::consts::PI).sin_cos();
+        (res[0], res[1]) = (raw_res1 * arg2, raw_res2 * arg2);
+    }
+
+    //
+    // check result are the same
+    //
+
+    for (cordic_res, software_res) in cordic_result_f64[..cnt0 + cnt1]
+        .chunks(2)
+        .zip(software_result_f64.chunks(2))
+    {
+        for (cord_res, soft_res) in cordic_res.iter().zip(software_res.iter()) {
+            // 2.0.powi(-19) is the max residual error for Sin function, in q1.31 format, with 24 iterations (aka PRECISION = 6)
+            defmt::assert!((cord_res - soft_res).abs() <= 2.0.powi(-19));
+        }
+    }
+
+    info!("Test OK");
+    cortex_m::asm::bkpt();
+}