From aaf8be7579ee667bc7263df64eee053b0e27608f Mon Sep 17 00:00:00 2001 From: Kristian McGurran Date: Thu, 26 Jun 2025 13:11:20 +0100 Subject: [PATCH 1/2] Basic Vectorised Arithmetic --- .../AccelerateWrapperUnitTests.cpp | 84 ++++++++++++++----- 1 file changed, 61 insertions(+), 23 deletions(-) diff --git a/VectorisationWrappers/AccelerateWrapperUnitTests.cpp b/VectorisationWrappers/AccelerateWrapperUnitTests.cpp index 43be4bc..2ff48f2 100644 --- a/VectorisationWrappers/AccelerateWrapperUnitTests.cpp +++ b/VectorisationWrappers/AccelerateWrapperUnitTests.cpp @@ -3,50 +3,88 @@ #include #include "Accelerate/AccelerateWrapper.h" #include +#include -TEST_CASE("VDSP wrapper testing", "[VDSP Wrapper]") + +TEMPLATE_TEST_CASE ("VDSP wrapper testing", "[VDSP Wrapper]", float, double) { - std::vector input (256); - std::vector output (256); + using T = TestType; + + constexpr auto bufferSize = 1024; - input.assign (256, 0.0f); - output.assign (256, 1.0f); + std::vector input (bufferSize); + std::vector output (bufferSize); + + input.assign (bufferSize, 0.0f); + output.assign (bufferSize, 1.0f); SECTION ("Addition Test") { - Vectorised::additionVectorised (input.data(), output.data(), 256); + Vectorised::additionVectorised (input.data(), output.data(), bufferSize); + + for (const auto& value : output) + { + REQUIRE (value == static_cast(1)); + } } SECTION ("Subtraction Test") { - Vectorised::subtractionVectorised (input.data(), output.data(), 256); + input.assign (bufferSize, 5.0f); + output.assign (bufferSize, 4.0f); + + // Todo: This call order is confusing + Vectorised::subtractionVectorised (output.data(), input.data(), bufferSize); + + for (const auto& value : output) + { + REQUIRE (value == static_cast(1)); + } } SECTION ("Multiplication Test") { - Vectorised::multiplicationVectorised (input.data(), output.data(), 256); - } + input.assign (bufferSize, 2.0f); + output.assign (bufferSize, 1.0f); - SECTION ("Division Test") - { - Vectorised::divisionVectorised (input.data(), output.data(), 256); - } + // Todo: Order is confusing + Vectorised::multiplicationVectorised (input.data(), output.data(), bufferSize); - SECTION ("Division Test") - { - Vectorised::tanhVectorised (input.data(), output.data(), 256); + for (const auto& value : input) + { + REQUIRE (value == static_cast(2)); + } } SECTION ("Division Test") { - Vectorised::sineVectorised (input.data(), 256); - } + input.assign (bufferSize, 2.0f); + output.assign (bufferSize, 2.0f); - SECTION ("Division Test") - { - const auto lowerLimit = -1.f; - const auto upperLimit = -1.f; + // Todo: Function input confusing + Vectorised::divisionVectorised (input.data(), output.data(), bufferSize); - Vectorised::hardClipVectorised (input.data(), output.data(), 256, &lowerLimit, &upperLimit); + for (const auto& value : input) + { + REQUIRE (value == static_cast(1)); + } } + + // SECTION ("Division Test") + // { + // Vectorised::tanhVectorised (input.data(), output.data(), 256); + // } + // + // SECTION ("Division Test") + // { + // Vectorised::sineVectorised (input.data(), 256); + // } + // + // SECTION ("Division Test") + // { + // const auto lowerLimit = -1.f; + // const auto upperLimit = -1.f; + // + // Vectorised::hardClipVectorised (input.data(), output.data(), 256, &lowerLimit, &upperLimit); + // } } \ No newline at end of file From fa8b8ee1e53aeb95694bb517613516d622769c25 Mon Sep 17 00:00:00 2001 From: Kristian McGurran Date: Thu, 26 Jun 2025 18:16:53 +0100 Subject: [PATCH 2/2] Cleaning up basic vectorised functions --- CMakeLists.txt | 4 +- ...lerateWrapper.h => AccelerateArithmetic.h} | 24 +++++----- ....cpp => AccelerateArithmeticUnitTests.cpp} | 45 +++++++++---------- 3 files changed, 34 insertions(+), 39 deletions(-) rename VectorisationWrappers/Accelerate/{AccelerateWrapper.h => AccelerateArithmetic.h} (63%) rename VectorisationWrappers/{AccelerateWrapperUnitTests.cpp => AccelerateArithmeticUnitTests.cpp} (52%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f8f97d..78f4cec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,10 +5,10 @@ set (CMAKE_CXX_STANDARD 23) add_subdirectory (Catch2) if (APPLE) - add_executable (accelerate_DSP VectorisationWrappers/Accelerate/AccelerateWrapper.h + add_executable (accelerate_DSP VectorisationWrappers/Accelerate/AccelerateArithmetic.h VectorisationWrappers/Accelerate/AccelerateRange.h VectorisationWrappers/Accelerate/AccelerateUtilities.h - VectorisationWrappers/AccelerateWrapperUnitTests.cpp + VectorisationWrappers/AccelerateArithmeticUnitTests.cpp VectorisationWrappers/AccelerateUtilitiesUnitTests.cpp VectorisationWrappers/AccelerateRangeUnitTests.cpp) diff --git a/VectorisationWrappers/Accelerate/AccelerateWrapper.h b/VectorisationWrappers/Accelerate/AccelerateArithmetic.h similarity index 63% rename from VectorisationWrappers/Accelerate/AccelerateWrapper.h rename to VectorisationWrappers/Accelerate/AccelerateArithmetic.h index 53af170..4f5572c 100644 --- a/VectorisationWrappers/Accelerate/AccelerateWrapper.h +++ b/VectorisationWrappers/Accelerate/AccelerateArithmetic.h @@ -7,47 +7,47 @@ namespace Vectorised { // Vectorised Addition function for VDSP template - static void additionVectorised (T* BufferA, const T* BufferB, const int BufferSize) + static void additionVectorised (const T* BufferA, const T* BufferB, T* OutputBuffer, const int BufferSize, const int BufferAStride = 1, const int BufferBStride = 1, const int OutputStride = 1) { if constexpr (std::is_same_v) - vDSP_vadd (BufferA, 1, BufferB, 1, BufferA, 1, BufferSize); + vDSP_vadd (BufferA, BufferAStride, BufferB, BufferBStride, OutputBuffer, OutputStride, BufferSize); else if constexpr (std::is_same_v) - vDSP_vaddD (BufferA, 1, BufferB, 1, BufferA, 1, BufferSize); + vDSP_vaddD (BufferA, BufferAStride, BufferB, BufferBStride, OutputBuffer, OutputStride, BufferSize); else static_assert (sizeof(T) == 0, "Unsupported type for addition"); } // Vectorised Addition function for VDSP template - static void subtractionVectorised (T* BufferA, const T* BufferB, const int BufferSize) + static void subtractionVectorised (const T* BufferA, const T* BufferB, T* OutputBuffer, const int BufferSize, const int BufferAStride = 1, const int BufferBStride = 1, const int OutputStride = 1) { if constexpr (std::is_same_v) - vDSP_vsub (BufferA, 1, BufferB, 1, BufferA, 1, BufferSize); + vDSP_vsub (BufferA, BufferAStride, BufferB, BufferBStride, OutputBuffer, OutputStride, BufferSize); else if constexpr (std::is_same_v) - vDSP_vsubD (BufferA, 1, BufferB, 1, BufferA, 1, BufferSize); + vDSP_vsubD (BufferA, BufferAStride, BufferB, BufferBStride, OutputBuffer, OutputStride, BufferSize); else static_assert (sizeof(T) == 0, "Unsupported type for addition"); } // Vectorised Multiplication function using vdsp template - static void multiplicationVectorised(T* BufferA, const T* BufferB, const int BufferSize) + static void multiplicationVectorised (const T* BufferA, const T* BufferB, T* OutputBuffer, const int BufferSize, const int BufferAStride = 1, const int BufferBStride = 1, const int OutputStride = 1) { if constexpr (std::is_same_v) - vDSP_vmul (BufferA, 1, BufferB, 1, BufferA, 1, BufferSize); + vDSP_vmul (BufferA, BufferAStride, BufferB, BufferBStride, OutputBuffer, OutputStride, BufferSize); else if constexpr (std::is_same_v) - vDSP_vmulD (BufferA, 1, BufferB, 1, BufferA, 1, BufferSize); + vDSP_vmulD (BufferA, BufferAStride, BufferB, BufferBStride, OutputBuffer, OutputStride, BufferSize); else static_assert (sizeof(T) == 0, "Unsupported type for addition"); } template - static void divisionVectorised (T* OutputPtr, T* InputPtr, const int BufferSize) + static void divisionVectorised (const T* BufferA, const T* BufferB, T* OutputBuffer, const int BufferSize) { if constexpr (std::is_same_v) - vvdivf (OutputPtr, InputPtr, OutputPtr, &BufferSize); + vvdivf (OutputBuffer, BufferA, BufferB, &BufferSize); else if constexpr (std::is_same_v) - vvdiv (OutputPtr, InputPtr, OutputPtr, &BufferSize); + vvdiv (OutputBuffer, BufferA, BufferB, &BufferSize); } diff --git a/VectorisationWrappers/AccelerateWrapperUnitTests.cpp b/VectorisationWrappers/AccelerateArithmeticUnitTests.cpp similarity index 52% rename from VectorisationWrappers/AccelerateWrapperUnitTests.cpp rename to VectorisationWrappers/AccelerateArithmeticUnitTests.cpp index 2ff48f2..d948e61 100644 --- a/VectorisationWrappers/AccelerateWrapperUnitTests.cpp +++ b/VectorisationWrappers/AccelerateArithmeticUnitTests.cpp @@ -1,28 +1,26 @@ // Unit tests for VDSP WRAPPER -#include -#include "Accelerate/AccelerateWrapper.h" +#include "Accelerate/AccelerateArithmetic.h" #include #include - TEMPLATE_TEST_CASE ("VDSP wrapper testing", "[VDSP Wrapper]", float, double) { using T = TestType; constexpr auto bufferSize = 1024; - std::vector input (bufferSize); - std::vector output (bufferSize); + std::vector BufferA (bufferSize); + std::vector BufferB (bufferSize); - input.assign (bufferSize, 0.0f); - output.assign (bufferSize, 1.0f); + BufferA.assign (bufferSize, 0.0f); + BufferB.assign (bufferSize, 1.0f); SECTION ("Addition Test") { - Vectorised::additionVectorised (input.data(), output.data(), bufferSize); + Vectorised::additionVectorised (BufferA.data(), BufferB.data(), BufferB.data(), bufferSize); - for (const auto& value : output) + for (const auto& value : BufferB) { REQUIRE (value == static_cast(1)); } @@ -30,13 +28,12 @@ TEMPLATE_TEST_CASE ("VDSP wrapper testing", "[VDSP Wrapper]", float, double) SECTION ("Subtraction Test") { - input.assign (bufferSize, 5.0f); - output.assign (bufferSize, 4.0f); + BufferA.assign (bufferSize, 4.0f); + BufferB.assign (bufferSize, 5.0f); - // Todo: This call order is confusing - Vectorised::subtractionVectorised (output.data(), input.data(), bufferSize); + Vectorised::subtractionVectorised (BufferA.data(), BufferB.data(), BufferB.data(), bufferSize); - for (const auto& value : output) + for (const auto& value : BufferB) { REQUIRE (value == static_cast(1)); } @@ -44,13 +41,12 @@ TEMPLATE_TEST_CASE ("VDSP wrapper testing", "[VDSP Wrapper]", float, double) SECTION ("Multiplication Test") { - input.assign (bufferSize, 2.0f); - output.assign (bufferSize, 1.0f); + BufferA.assign (bufferSize, 2.0f); + BufferB.assign (bufferSize, 1.0f); - // Todo: Order is confusing - Vectorised::multiplicationVectorised (input.data(), output.data(), bufferSize); + Vectorised::multiplicationVectorised (BufferA.data(), BufferB.data(), BufferB.data(), bufferSize); - for (const auto& value : input) + for (const auto& value : BufferB) { REQUIRE (value == static_cast(2)); } @@ -58,18 +54,17 @@ TEMPLATE_TEST_CASE ("VDSP wrapper testing", "[VDSP Wrapper]", float, double) SECTION ("Division Test") { - input.assign (bufferSize, 2.0f); - output.assign (bufferSize, 2.0f); + BufferA.assign (bufferSize, 2.0f); + BufferB.assign (bufferSize, 2.0f); - // Todo: Function input confusing - Vectorised::divisionVectorised (input.data(), output.data(), bufferSize); + Vectorised::divisionVectorised (BufferA.data(), BufferB.data(), BufferB.data(), bufferSize); - for (const auto& value : input) + for (const auto& value : BufferB) { REQUIRE (value == static_cast(1)); } } - + // SECTION ("Division Test") // { // Vectorised::tanhVectorised (input.data(), output.data(), 256);