From 72bbfec39d3f826c1a8dd485af2da4bcbdd32e35 Mon Sep 17 00:00:00 2001 From: Caleb Garrett <47389035+caleb-garrett@users.noreply.github.com> Date: Sat, 3 Feb 2024 16:10:00 -0500 Subject: [PATCH] Added hash DMA implementation. --- embassy-stm32/build.rs | 1 + embassy-stm32/src/hash/mod.rs | 143 ++++++++++++++----------------- examples/stm32f7/src/bin/hash.rs | 20 +++-- 3 files changed, 79 insertions(+), 85 deletions(-) diff --git a/embassy-stm32/build.rs b/embassy-stm32/build.rs index 948ce3aff..1a68dfc9d 100644 --- a/embassy-stm32/build.rs +++ b/embassy-stm32/build.rs @@ -1015,6 +1015,7 @@ fn main() { (("dac", "CH1"), quote!(crate::dac::DacDma1)), (("dac", "CH2"), quote!(crate::dac::DacDma2)), (("timer", "UP"), quote!(crate::timer::UpDma)), + (("hash", "IN"), quote!(crate::hash::Dma)), ] .into(); diff --git a/embassy-stm32/src/hash/mod.rs b/embassy-stm32/src/hash/mod.rs index 4e37e60e1..ac4854f80 100644 --- a/embassy-stm32/src/hash/mod.rs +++ b/embassy-stm32/src/hash/mod.rs @@ -2,11 +2,13 @@ use core::cmp::min; use core::future::poll_fn; use core::marker::PhantomData; +use core::ptr; use core::task::Poll; use embassy_hal_internal::{into_ref, PeripheralRef}; use embassy_sync::waitqueue::AtomicWaker; +use crate::dma::Transfer; use crate::peripherals::HASH; use stm32_metapac::hash::regs::*; @@ -18,7 +20,6 @@ use crate::{interrupt, pac, peripherals, Peripheral}; const NUM_CONTEXT_REGS: usize = 51; #[cfg(hash_v2)] const NUM_CONTEXT_REGS: usize = 54; -const HASH_BUFFER_LEN: usize = 68; const DIGEST_BLOCK_SIZE: usize = 64; static HASH_WAKER: AtomicWaker = AtomicWaker::new(); @@ -74,8 +75,7 @@ pub enum DataType { /// Stores the state of the HASH peripheral for suspending/resuming /// digest calculation. pub struct Context { - first_word_sent: bool, - buffer: [u8; HASH_BUFFER_LEN], + buffer: [u8; DIGEST_BLOCK_SIZE], buflen: usize, algo: Algorithm, format: DataType, @@ -86,17 +86,19 @@ pub struct Context { } /// HASH driver. -pub struct Hash<'d, T: Instance> { +pub struct Hash<'d, T: Instance, D: Dma> { _peripheral: PeripheralRef<'d, T>, + dma: PeripheralRef<'d, D>, } -impl<'d, T: Instance> Hash<'d, T> { +impl<'d, T: Instance, D: Dma> Hash<'d, T, D> { /// Instantiates, resets, and enables the HASH peripheral. - pub fn new(peripheral: impl Peripheral

+ 'd) -> Self { + pub fn new(peripheral: impl Peripheral

+ 'd, dma: impl Peripheral

+ 'd) -> Self { HASH::enable_and_reset(); - into_ref!(peripheral); + into_ref!(peripheral, dma); let instance = Self { _peripheral: peripheral, + dma: dma, }; T::Interrupt::unpend(); @@ -109,8 +111,7 @@ impl<'d, T: Instance> Hash<'d, T> { pub async fn start(&mut self, algorithm: Algorithm, format: DataType) -> Context { // Define a context for this new computation. let mut ctx = Context { - first_word_sent: false, - buffer: [0; 68], + buffer: [0; DIGEST_BLOCK_SIZE], buflen: 0, algo: algorithm, format: format, @@ -134,6 +135,11 @@ impl<'d, T: Instance> Hash<'d, T> { } T::regs().cr().modify(|w| w.set_algo0(algo0)); T::regs().cr().modify(|w| w.set_algo1(algo1)); + + // Enable multiple DMA transfers. + T::regs().cr().modify(|w| w.set_mdmat(true)); + + // Set init to load the context registers. Necessary before storing context. T::regs().cr().modify(|w| w.set_init(true)); // Store and return the state of the peripheral. @@ -145,8 +151,8 @@ impl<'d, T: Instance> Hash<'d, T> { /// then updates the state with the provided data. /// Peripheral state is saved upon return. pub async fn update(&mut self, ctx: &mut Context, input: &[u8]) { - let mut data_waiting = input.len() + ctx.buflen; - if data_waiting < DIGEST_BLOCK_SIZE || (data_waiting < ctx.buffer.len() && !ctx.first_word_sent) { + let data_waiting = input.len() + ctx.buflen; + if data_waiting < DIGEST_BLOCK_SIZE { // There isn't enough data to digest a block, so append it to the buffer. ctx.buffer[ctx.buflen..ctx.buflen + input.len()].copy_from_slice(input); ctx.buflen += input.len(); @@ -159,65 +165,35 @@ impl<'d, T: Instance> Hash<'d, T> { let mut ilen_remaining = input.len(); let mut input_start = 0; - // Handle first block. - if !ctx.first_word_sent { - let empty_len = ctx.buffer.len() - ctx.buflen; + // First ingest the data in the buffer. + let empty_len = DIGEST_BLOCK_SIZE - ctx.buflen; + if empty_len > 0 { let copy_len = min(empty_len, ilen_remaining); - // Fill the buffer. - if copy_len > 0 { - ctx.buffer[ctx.buflen..ctx.buflen + copy_len].copy_from_slice(&input[0..copy_len]); - ctx.buflen += copy_len; - ilen_remaining -= copy_len; - input_start += copy_len; - } - assert_eq!(ctx.buflen, HASH_BUFFER_LEN); - self.accumulate(ctx.buffer.as_slice()); - data_waiting -= ctx.buflen; - ctx.buflen = 0; - ctx.first_word_sent = true; + ctx.buffer[ctx.buflen..ctx.buflen + copy_len].copy_from_slice(&input[input_start..input_start + copy_len]); + ctx.buflen += copy_len; + ilen_remaining -= copy_len; + input_start += copy_len; } + self.accumulate(&ctx.buffer).await; + ctx.buflen = 0; - if data_waiting < 64 { - // There isn't enough data remaining to process another block, so store it. - assert_eq!(ctx.buflen, 0); - ctx.buffer[0..ilen_remaining].copy_from_slice(&input[input_start..input_start + ilen_remaining]); - ctx.buflen += ilen_remaining; + // Move any extra data to the now-empty buffer. + let leftovers = ilen_remaining % DIGEST_BLOCK_SIZE; + if leftovers > 0 { + assert!(ilen_remaining >= leftovers); + ctx.buffer[0..leftovers].copy_from_slice(&input[input.len() - leftovers..input.len()]); + ctx.buflen += leftovers; + ilen_remaining -= leftovers; } else { - let mut total_data_sent = 0; - - // First ingest the data in the buffer. - let empty_len = DIGEST_BLOCK_SIZE - ctx.buflen; - if empty_len > 0 { - let copy_len = min(empty_len, ilen_remaining); - ctx.buffer[ctx.buflen..ctx.buflen + copy_len] - .copy_from_slice(&input[input_start..input_start + copy_len]); - ctx.buflen += copy_len; - ilen_remaining -= copy_len; - input_start += copy_len; - } - assert_eq!(ctx.buflen % 64, 0); - self.accumulate(&ctx.buffer[0..64]); - total_data_sent += ctx.buflen; - ctx.buflen = 0; - - // Move any extra data to the now-empty buffer. - let leftovers = ilen_remaining % 64; - if leftovers > 0 { - assert!(ilen_remaining >= leftovers); - ctx.buffer[0..leftovers].copy_from_slice(&input[input.len() - leftovers..input.len()]); - ctx.buflen += leftovers; - ilen_remaining -= leftovers; - } - assert_eq!(ilen_remaining % 64, 0); - - // Hash the remaining data. - self.accumulate(&input[input_start..input_start + ilen_remaining]); - - total_data_sent += ilen_remaining; - assert_eq!(total_data_sent % 64, 0); - assert!(total_data_sent >= 64); + ctx.buffer + .copy_from_slice(&input[input.len() - DIGEST_BLOCK_SIZE..input.len()]); + ctx.buflen += DIGEST_BLOCK_SIZE; + ilen_remaining -= DIGEST_BLOCK_SIZE; } + // Hash the remaining data. + self.accumulate(&input[input_start..input_start + ilen_remaining]).await; + // Save the peripheral context. self.store_context(ctx).await; } @@ -228,12 +204,12 @@ impl<'d, T: Instance> Hash<'d, T> { // Restore the peripheral state. self.load_context(&ctx); - // Hash the leftover bytes, if any. - self.accumulate(&ctx.buffer[0..ctx.buflen]); - ctx.buflen = 0; + // Must be cleared prior to the last DMA transfer. + T::regs().cr().modify(|w| w.set_mdmat(false)); - //Start the digest calculation. - T::regs().str().write(|w| w.set_dcal(true)); + // Hash the leftover bytes, if any. + self.accumulate(&ctx.buffer[0..ctx.buflen]).await; + ctx.buflen = 0; // Wait for completion. poll_fn(|cx| { @@ -272,19 +248,30 @@ impl<'d, T: Instance> Hash<'d, T> { } /// Push data into the hash core. - fn accumulate(&mut self, input: &[u8]) { + async fn accumulate(&mut self, input: &[u8]) { + // Ignore an input length of 0. + if input.len() == 0 { + return; + } + // Set the number of valid bits. let num_valid_bits: u8 = (8 * (input.len() % 4)) as u8; T::regs().str().modify(|w| w.set_nblw(num_valid_bits)); - let mut i = 0; - while i < input.len() { - let mut word: [u8; 4] = [0; 4]; - let copy_idx = min(i + 4, input.len()); - word[0..copy_idx - i].copy_from_slice(&input[i..copy_idx]); - T::regs().din().write_value(u32::from_ne_bytes(word)); - i += 4; + // Configure DMA to transfer input to hash core. + let dma_request = self.dma.request(); + let dst_ptr = T::regs().din().as_ptr(); + let mut num_words = input.len() / 4; + if input.len() % 4 > 0 { + num_words += 1; } + let src_ptr = ptr::slice_from_raw_parts(input.as_ptr().cast(), num_words); + let dma_transfer = + unsafe { Transfer::new_write_raw(&mut self.dma, dma_request, src_ptr, dst_ptr, Default::default()) }; + T::regs().cr().modify(|w| w.set_dmae(true)); + + // Wait for the transfer to complete. + dma_transfer.await; } /// Save the peripheral state to a context. @@ -361,3 +348,5 @@ foreach_interrupt!( } }; ); + +dma_trait!(Dma, Instance); diff --git a/examples/stm32f7/src/bin/hash.rs b/examples/stm32f7/src/bin/hash.rs index 1fd0e87eb..a9f5aa197 100644 --- a/examples/stm32f7/src/bin/hash.rs +++ b/examples/stm32f7/src/bin/hash.rs @@ -4,27 +4,30 @@ use defmt::info; use embassy_executor::Spawner; use embassy_stm32::Config; -use embassy_time::{Duration, Instant}; +use embassy_time::Instant; use {defmt_rtt as _, panic_probe as _}; use embassy_stm32::hash::*; use sha2::{Digest, Sha256}; -const TEST_STRING_1: &[u8] = b"hello world"; - #[embassy_executor::main] async fn main(_spawner: Spawner) -> ! { let config = Config::default(); let p = embassy_stm32::init(config); + let test_1: &[u8] = b"as;dfhaslfhas;oifvnasd;nifvnhasd;nifvhndlkfghsd;nvfnahssdfgsdafgsasdfasdfasdfasdfasdfghjklmnbvcalskdjghalskdjgfbaslkdjfgbalskdjgbalskdjbdfhsdfhsfghsfghfgh"; + let test_2: &[u8] = b"fdhalksdjfhlasdjkfhalskdjfhgal;skdjfgalskdhfjgalskdjfglafgadfgdfgdafgaadsfgfgdfgadrgsyfthxfgjfhklhjkfgukhulkvhlvhukgfhfsrghzdhxyfufynufyuszeradrtydyytserr"; + + let mut hw_hasher = Hash::new(p.HASH, p.DMA2_CH7); + let hw_start_time = Instant::now(); // Compute a digest in hardware. - let mut hw_hasher = Hash::new(p.HASH); - let mut context = hw_hasher.start(Algorithm::SHA256, DataType::Width8); - hw_hasher.update(&mut context, TEST_STRING_1); + let mut context = hw_hasher.start(Algorithm::SHA256, DataType::Width8).await; + hw_hasher.update(&mut context, test_1).await; + hw_hasher.update(&mut context, test_2).await; let mut buffer: [u8; 32] = [0; 32]; - let hw_digest = hw_hasher.finish(context, &mut buffer); + let hw_digest = hw_hasher.finish(context, &mut buffer).await; let hw_end_time = Instant::now(); let hw_execution_time = hw_end_time - hw_start_time; @@ -33,7 +36,8 @@ async fn main(_spawner: Spawner) -> ! { // Compute a digest in software. let mut sw_hasher = Sha256::new(); - sw_hasher.update(TEST_STRING_1); + sw_hasher.update(test_1); + sw_hasher.update(test_2); let sw_digest = sw_hasher.finalize(); let sw_end_time = Instant::now();