From 31fca432a7274907c46f6ec254d54e96cb6446c6 Mon Sep 17 00:00:00 2001 From: Mary Date: Tue, 2 Mar 2021 23:50:46 +0100 Subject: [PATCH] Amadeus: Add ARM SIMD fast path (#2069) Add fast paths in the audio renderer for AArch64 in all current fast paths. --- .../Renderer/Dsp/Command/MixCommand.cs | 26 ++++++++++ .../Renderer/Dsp/Command/VolumeCommand.cs | 25 ++++++++++ .../Renderer/Dsp/DataSourceHelper.cs | 47 ++++++++++++++++++- 3 files changed, 97 insertions(+), 1 deletion(-) diff --git a/Ryujinx.Audio/Renderer/Dsp/Command/MixCommand.cs b/Ryujinx.Audio/Renderer/Dsp/Command/MixCommand.cs index 566fea92ba..069688711f 100644 --- a/Ryujinx.Audio/Renderer/Dsp/Command/MixCommand.cs +++ b/Ryujinx.Audio/Renderer/Dsp/Command/MixCommand.cs @@ -19,6 +19,7 @@ using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace Ryujinx.Audio.Renderer.Dsp.Command @@ -89,6 +90,27 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ProcessMixAdvSimd(Span outputMix, ReadOnlySpan inputMix) + { + Vector128 volumeVec = Vector128.Create(Volume); + + ReadOnlySpan> inputVec = MemoryMarshal.Cast>(inputMix); + Span> outputVec = MemoryMarshal.Cast>(outputMix); + + int sisdStart = inputVec.Length * 4; + + for (int i = 0; i < inputVec.Length; i++) + { + outputVec[i] = AdvSimd.Add(outputVec[i], AdvSimd.Ceiling(AdvSimd.Multiply(inputVec[i], volumeVec))); + } + + for (int i = sisdStart; i < inputMix.Length; i++) + { + outputMix[i] += FloatingPointHelper.MultiplyRoundUp(inputMix[i], Volume); + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private void ProcessMixSlowPath(Span outputMix, ReadOnlySpan inputMix) { @@ -108,6 +130,10 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command { ProcessMixSse41(outputMix, inputMix); } + else if (AdvSimd.IsSupported) + { + ProcessMixAdvSimd(outputMix, inputMix); + } else { ProcessMixSlowPath(outputMix, inputMix); diff --git a/Ryujinx.Audio/Renderer/Dsp/Command/VolumeCommand.cs b/Ryujinx.Audio/Renderer/Dsp/Command/VolumeCommand.cs index b58ae1f814..217d51c9e2 100644 --- a/Ryujinx.Audio/Renderer/Dsp/Command/VolumeCommand.cs +++ b/Ryujinx.Audio/Renderer/Dsp/Command/VolumeCommand.cs @@ -19,6 +19,7 @@ using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace Ryujinx.Audio.Renderer.Dsp.Command @@ -89,6 +90,26 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command } } + private void ProcessVolumeAdvSimd(Span outputBuffer, ReadOnlySpan inputBuffer) + { + Vector128 volumeVec = Vector128.Create(Volume); + + ReadOnlySpan> inputVec = MemoryMarshal.Cast>(inputBuffer); + Span> outputVec = MemoryMarshal.Cast>(outputBuffer); + + int sisdStart = inputVec.Length * 4; + + for (int i = 0; i < inputVec.Length; i++) + { + outputVec[i] = AdvSimd.Ceiling(AdvSimd.Multiply(inputVec[i], volumeVec)); + } + + for (int i = sisdStart; i < inputBuffer.Length; i++) + { + outputBuffer[i] = FloatingPointHelper.MultiplyRoundUp(inputBuffer[i], Volume); + } + } + private void ProcessVolume(Span outputBuffer, ReadOnlySpan inputBuffer) { if (Avx.IsSupported) @@ -99,6 +120,10 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command { ProcessVolumeSse41(outputBuffer, inputBuffer); } + else if (AdvSimd.IsSupported) + { + ProcessVolumeAdvSimd(outputBuffer, inputBuffer); + } else { ProcessVolumeSlowPath(outputBuffer, inputBuffer); diff --git a/Ryujinx.Audio/Renderer/Dsp/DataSourceHelper.cs b/Ryujinx.Audio/Renderer/Dsp/DataSourceHelper.cs index c951452925..373776b71b 100644 --- a/Ryujinx.Audio/Renderer/Dsp/DataSourceHelper.cs +++ b/Ryujinx.Audio/Renderer/Dsp/DataSourceHelper.cs @@ -26,6 +26,7 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; using static Ryujinx.Audio.Renderer.Parameter.VoiceInParameter; @@ -320,6 +321,24 @@ namespace Ryujinx.Audio.Renderer.Dsp } } + private static void ToFloatAdvSimd(Span output, ReadOnlySpan input, int sampleCount) + { + ReadOnlySpan> inputVec = MemoryMarshal.Cast>(input); + Span> outputVec = MemoryMarshal.Cast>(output); + + int sisdStart = inputVec.Length * 4; + + for (int i = 0; i < inputVec.Length; i++) + { + outputVec[i] = AdvSimd.ConvertToSingle(inputVec[i]); + } + + for (int i = sisdStart; i < sampleCount; i++) + { + output[i] = input[i]; + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ToFloatSlow(Span output, ReadOnlySpan input, int sampleCount) { @@ -339,6 +358,10 @@ namespace Ryujinx.Audio.Renderer.Dsp { ToFloatSse2(output, input, sampleCount); } + else if (AdvSimd.IsSupported) + { + ToFloatAdvSimd(output, input, sampleCount); + } else { ToFloatSlow(output, input, sampleCount); @@ -372,7 +395,25 @@ namespace Ryujinx.Audio.Renderer.Dsp for (int i = 0; i < inputVec.Length; i++) { - outputVec[i] = Avx.ConvertToVector128Int32(inputVec[i]); + outputVec[i] = Sse2.ConvertToVector128Int32(inputVec[i]); + } + + for (int i = sisdStart; i < sampleCount; i++) + { + output[i] = (int)input[i]; + } + } + + public static void ToIntAdvSimd(Span output, ReadOnlySpan input, int sampleCount) + { + ReadOnlySpan> inputVec = MemoryMarshal.Cast>(input); + Span> outputVec = MemoryMarshal.Cast>(output); + + int sisdStart = inputVec.Length * 4; + + for (int i = 0; i < inputVec.Length; i++) + { + outputVec[i] = AdvSimd.ConvertToInt32RoundToZero(inputVec[i]); } for (int i = sisdStart; i < sampleCount; i++) @@ -400,6 +441,10 @@ namespace Ryujinx.Audio.Renderer.Dsp { ToIntSse2(output, input, sampleCount); } + else if (AdvSimd.IsSupported) + { + ToIntAdvSimd(output, input, sampleCount); + } else { ToIntSlow(output, input, sampleCount);