diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 32e18f5..add2b6f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,8 +6,8 @@ on: branches: [main] jobs: - build: - name: Build project + test-debug: + name: Build project and test in debug mode runs-on: ubuntu-latest strategy: matrix: @@ -20,13 +20,16 @@ jobs: uses: actions/checkout@v4 - name: Setup Python 3.10 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.10' + - name: Install pytest + run: pip install pytest + - name: Install Boost Python and Python dev headers run: | - sudo apt-get install -y \ + sudo apt-get update && sudo apt-get install -y \ libboost-python-dev \ python3-dev @@ -49,37 +52,78 @@ jobs: - name: Build in Debug Mode run: | - mkdir build && cd build && cmake -DCMake_Build_Type=Debug .. + mkdir build && cd build + cmake -DCMake_Build_Type=Debug -DBUILD_TESTS=ON .. make + - name: Run Unit tests + run: cd build && ctest --output-on-failure --verbose + + - name: Upload artifacts + if: failure() + uses: actions/upload-artifact@v4 + with: + name: debug-build-output + path: build/ + retention-days: 2 + + test-release: + name: Build project and test in relese mode + runs-on: ubuntu-latest + strategy: + matrix: + gcc-version: [13] + cmake-version: ['3.31.3'] + fail-fast: true + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install pytest + run: pip install pytest + + - name: Install Boost Python and Python dev headers + run: | + sudo apt-get update && sudo apt-get install -y \ + libboost-python-dev \ + python3-dev + + - name: Setup CMake + uses: jwlawson/actions-setup-cmake@v2 + with: + cmake-version: ${{ matrix.cmake-version }} + + - name: Setup GCC + run: | + sudo apt-get update + sudo apt-get install -y gcc-${{ matrix.gcc-version }} g++-${{ matrix.gcc-version }} + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${{ matrix.gcc-version }} 100 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${{ matrix.gcc-version }} 100 + + - name: Verify versions + run: | + gcc --version + cmake --version + - name: Build in Release Mode run: | - cd build - echo "Deleting the debug build files in $(pwd)/build" - rm -rf * - cmake -DCMake_Build_Type=Release .. + mkdir build && cd build + cmake -DCMake_Build_Type=Release -DBUILD_TESTS=ON .. make + - name: Run Unit tests + run: cd build && ctest --output-on-failure --verbose + - name: Upload artifacts + if: failure() uses: actions/upload-artifact@v4 with: - name: build-output + name: release-build-output path: build/ - retention-days: 1 - - # example of artifacts - # - name: Upload artifacts - # uses: actions/upload-artifact@v4 - # with: - # name: build-${{ matrix.gcc-version }}-${{ github.run_id }} - # path: output/ - # - #test: - # needs: build # Runs after build job - # runs-on: ubuntu-latest - # steps: - # - name: Download artifact - # uses: actions/download-artifact@v4 - # with: - # name: build-output - # path: output/ \ No newline at end of file + retention-days: 2 \ No newline at end of file diff --git a/.gitignore b/.gitignore index a0e8554..ef9f13c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ build .vscode unit_tests_backend -*.txt \ No newline at end of file +*.txt +python_lib/dl_lib/_compiled +*__pycache__* \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 620f07b..b09189f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,11 +5,10 @@ project(dllib VERSION 1.0.0 LANGUAGES CXX) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}) -set(LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}) # to link shared libs. TODO: can we get rid of this, as it may cost runtime? set(CMAKE_POSITION_INDEPENDENT_CODE ON) +set(PYTHON_MODULE_DIR "${CMAKE_SOURCE_DIR}/python_lib/dl_lib/_compiled") # to enable find boost, see https://stackoverflow.com/a/79147222 if(POLICY CMP0167) @@ -104,10 +103,9 @@ include_directories("${PROJECT_SOURCE_DIR}/src" "${PROJECT_SOURCE_DIR}/examples") add_subdirectory(src) -add_subdirectory(examples) -option(BUILD_TESTING "Build tests" ON) -if(BUILD_TESTING) +option(BUILD_TESTS "Build tests" OFF) +if(BUILD_TESTS) enable_testing() add_subdirectory(tests) endif() \ No newline at end of file diff --git a/python_lib/dl_lib/__init__.py b/python_lib/dl_lib/__init__.py new file mode 100644 index 0000000..9817df9 --- /dev/null +++ b/python_lib/dl_lib/__init__.py @@ -0,0 +1,3 @@ +from ._compiled._core import Tensor, Dimension, Device, Ones, Zeros, Gaussian + +__all__ = ['Tensor', 'Device', 'Dimension'] \ No newline at end of file diff --git a/python_lib/dl_lib/nn/__init__.py b/python_lib/dl_lib/nn/__init__.py new file mode 100644 index 0000000..75fefbc --- /dev/null +++ b/python_lib/dl_lib/nn/__init__.py @@ -0,0 +1,4 @@ +#from .._compiled._layers import FfLayer, ReLU +#from .._compiled._core import Tensor # re-export if needed + +#__all__ = ['FfLayer', 'ReLU'] \ No newline at end of file diff --git a/readme.md b/readme.md index f2d3688..bba8bee 100644 --- a/readme.md +++ b/readme.md @@ -2,6 +2,10 @@ A from-scratch deep learning framework in modern C++ with Python bindings. +## Motivation + +Built to understand deep learning frameworks from first principles - from computational graphs to gradient computation to optimization algorithms. + ## Features - **Computational Graph**: Dynamic graph construction with automatic differentiation @@ -38,15 +42,16 @@ Roadmap: mkdir build && cd build cmake .. make -./run_tests +ctest ``` ## Required -- Python 3 (we test with 3.10, but it should work with any version) +- Compiler capable of C++20 at least (we test with gcc 12.3.0) - Boost Python - Cmake > 3.24 -- Compiler capable of C++20 at least (we test with gcc 12.3.0) +- Python 3 (we test with 3.10, but it should work with any version) +- pytest for unit tests (we use 9.0.2) ## Troubleshooting @@ -55,10 +60,6 @@ make The implementation of the Python wrapper does not work on MSVC6/7 in its current form. This is due to an issue that arises from Boost Python in combination with these compilers. Workarounds are proposed, but not implemented. More information here [here](https://beta.boost.org/doc/libs/develop/libs/python/doc/html/tutorial/tutorial/exposing.html). -## Motivation - -Built to understand deep learning frameworks from first principles - from computational graphs to gradient computation to optimization algorithms. - ## License MIT diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 15b916c..0d564e2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,12 +2,12 @@ add_subdirectory(backend) add_subdirectory(python) -target_link_libraries(py_data_modeling +target_link_libraries(_core ${Boost_LIBRARIES} ${PYTHON_LIBRARIES} BackendCore) -target_include_directories(py_data_modeling PRIVATE +target_include_directories(_core PRIVATE ${PYTHON_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS}) diff --git a/src/backend/computational_graph/getter_node.cpp b/src/backend/computational_graph/getter_node.cpp index 7cc86a4..e1a3ac0 100644 --- a/src/backend/computational_graph/getter_node.cpp +++ b/src/backend/computational_graph/getter_node.cpp @@ -15,6 +15,23 @@ using namespace std; using namespace graph; vector< shared_ptr > GetterNode::backward(const Tensor& upstreamGrad) { - assert(!upstreamGrad.getRequiresGrad()); - return { make_shared(upstreamGrad.createDeepCopy()) }; + // upstreamGrad is scalar by definition + assert(!upstreamGrad.getRequiresGrad() && upstreamGrad.getDims().nDims()==1); + + auto res = make_shared(parents[0]->getDims(), parents[0]->getDevice(), false); + for(tensorSize_t i=0; igetSize(); i++){ + res->setItem(0, i); + } + + if(std::holds_alternative(idx)){ + res->setItem(upstreamGrad.getItem(0), std::get(idx)); + } + else if(std::holds_alternative(idx)){ + res->setItem(upstreamGrad.getItem(0), std::get(idx)); + } + else{ + __throw_runtime_error("Idx variant in unexpected state"); + } + + return { std::move(res) }; } \ No newline at end of file diff --git a/src/backend/computational_graph/getter_node.h b/src/backend/computational_graph/getter_node.h index 78777e0..e55b2d5 100644 --- a/src/backend/computational_graph/getter_node.h +++ b/src/backend/computational_graph/getter_node.h @@ -13,6 +13,9 @@ #include "graph_node.h" +#include +#include + namespace graph{ /** * @brief When calling a get function, say as in @@ -21,9 +24,17 @@ namespace graph{ * */ class GetterNode final : public GraphNode { + using multiDimIdx_t = std::vector; + + private: + const std::variant idx; + public: - explicit GetterNode(std::shared_ptr t) - : GraphNode({std::move(t)}) {} + explicit GetterNode(std::shared_ptr t, const tensorSize_t idx) + : GraphNode({std::move(t)}), idx{idx} {} + + explicit GetterNode(std::shared_ptr t, const multiDimIdx_t& idx) + : GraphNode({std::move(t)}), idx{idx} {} GetterNode(const GetterNode& other) = delete; GetterNode& operator=(const GetterNode& other) = delete; diff --git a/src/backend/computational_graph/graph_creation.cpp b/src/backend/computational_graph/graph_creation.cpp index fd701d2..1955493 100644 --- a/src/backend/computational_graph/graph_creation.cpp +++ b/src/backend/computational_graph/graph_creation.cpp @@ -103,7 +103,7 @@ shared_ptr graph::get(const shared_ptr& t, tensorSize_t idx) { t->getDevice()); if(t->getRequiresGrad()){ - res->setCgNode(std::make_shared(t)); + res->setCgNode(std::make_shared(t, idx)); assert(res->getRequiresGrad()); } return res; @@ -115,13 +115,25 @@ shared_ptr graph::get(const shared_ptr& t, tensorSize_t idx) { * * loss = loss + other.get(i), we need to make sure get(i) can map to computational graph. */ -shared_ptr graph::get(const shared_ptr& t, vector&& idx) { +shared_ptr graph::get(const shared_ptr& t, const vector& idx) { ftype val = t->getItem(std::move(idx)); auto res = make_shared(std::vector{1}, std::vector{val}, t->getDevice()); if(t->getRequiresGrad()){ - res->setCgNode(std::make_shared(t)); + res->setCgNode(std::make_shared(t, idx)); assert(res->getRequiresGrad()); } return res; +} + +/** + * @brief Takes the sum of the whole tensor, then returns result as vector. + */ +shared_ptr graph::sumTensor(const shared_ptr t) { + auto res = make_shared(std::vector{1}, std::vector{0.0}, + t->getDevice(), t->getRequiresGrad()); + for(tensorSize_t i=0; igetSize(); i++){ + res = graph::add(res, graph::get(t, i)); + } + return res; } \ No newline at end of file diff --git a/src/backend/computational_graph/graph_creation.h b/src/backend/computational_graph/graph_creation.h index f1e972c..f68cb4c 100644 --- a/src/backend/computational_graph/graph_creation.h +++ b/src/backend/computational_graph/graph_creation.h @@ -16,22 +16,25 @@ #include namespace graph { + // Artithmetic operations std::shared_ptr mul(const std::shared_ptr left, const std::shared_ptr right); - - std::shared_ptr add(const std::shared_ptr left, const std::shared_ptr right); - - std::shared_ptr matmul(const std::shared_ptr left, const std::shared_ptr right); - std::shared_ptr mul(const std::shared_ptr left, ftype scalar); std::shared_ptr mul(ftype scalar, const std::shared_ptr left); + std::shared_ptr add(const std::shared_ptr left, const std::shared_ptr right); std::shared_ptr add(const std::shared_ptr left, ftype scalar); std::shared_ptr add(ftype scalar, const std::shared_ptr left); + std::shared_ptr matmul(const std::shared_ptr left, const std::shared_ptr right); + std::shared_ptr sub(const std::shared_ptr left, ftype scalar); std::shared_ptr div(const std::shared_ptr left, ftype scalar); + // Getter methods std::shared_ptr get(const std::shared_ptr& t, tensorSize_t idx); - std::shared_ptr get(const std::shared_ptr& t, std::vector&& idx); + std::shared_ptr get(const std::shared_ptr& t, const std::vector& idx); + + // Composite operations + std::shared_ptr sumTensor(const std::shared_ptr t); } \ No newline at end of file diff --git a/src/backend/computational_graph/topological_sort.cpp b/src/backend/computational_graph/topological_sort.cpp index 60fcf3d..d6250ca 100644 --- a/src/backend/computational_graph/topological_sort.cpp +++ b/src/backend/computational_graph/topological_sort.cpp @@ -36,10 +36,11 @@ bool TopologicalSort::hasCycles(const Tensor* root) { assert(start->cgNode); stack tStack; + unordered_set visited; - auto pushParentsOnStack = [&tStack](const Tensor* t){ + auto pushParentsOnStack = [&tStack, &visited](const Tensor* t){ for(auto parent: t->cgNode->getParents()){ - if(parent->cgNode){ + if(parent->cgNode && !visited.contains(parent.get())){ tStack.push(parent.get()); } } @@ -57,6 +58,7 @@ bool TopologicalSort::hasCycles(const Tensor* root) { tStack.pop(); pushParentsOnStack(t); + visited.insert(t); } return false; diff --git a/src/backend/data_modeling/dim_type.h b/src/backend/data_modeling/dim_type.h index 1d0aa77..babdc96 100644 --- a/src/backend/data_modeling/dim_type.h +++ b/src/backend/data_modeling/dim_type.h @@ -79,9 +79,18 @@ class Dimension final { return this->dims == other.dims; } + bool operator==(const std::vector& other) const { + assert(size!=0); + return this->dims == other; + } + bool operator!=(const Dimension& other) const { return !(*this == other); } + bool operator!=(const std::vector& other) const { + return !(*this == other); + } + friend std::ostream& operator<<(std::ostream& os, const Dimension& d) noexcept; }; \ No newline at end of file diff --git a/src/backend/data_modeling/tensor.cpp b/src/backend/data_modeling/tensor.cpp index 10b6813..04a94aa 100644 --- a/src/backend/data_modeling/tensor.cpp +++ b/src/backend/data_modeling/tensor.cpp @@ -529,7 +529,7 @@ void Tensor::backward() { /** * @brief Get gradients */ -const shared_ptr& Tensor::getGrads() const { +shared_ptr Tensor::getGrads() const { if(!grads){ __throw_runtime_error("Tensor has no gradients."); } @@ -540,9 +540,7 @@ const shared_ptr& Tensor::getGrads() const { * @brief Sometimes we do accept negative dim-values. In accordance with e.g. * NumPy we map from the end to the beginning in that case. */ -tensorDim_t Tensor::mapDim(const int dim, optional dimOpt) const { - const auto& dims = dimOpt ? dimOpt.value() : this->dims; - +tensorDim_t Tensor::mapDim(const int dim, const Dimension& dims) { if(dim>=0){ return dim; } @@ -642,8 +640,8 @@ void Tensor::transposeImpl2D(Tensor& target, const int dim1, const int dim2) con const auto smallDim = dim1Mapped < dim2Mapped ? dim2Mapped : dim1Mapped; // largeDimSize >= smallDimSize - const auto largeDimSize = getTotalDimSize(largeDim); - const auto smallDimSize = getTotalDimSize(smallDim); + const auto largeDimOffset = getDimOffset(largeDim, dims); + const auto smallDimOffset = getDimOffset(smallDim, dims); auto transposedValues = make_unique(source.values->getDevice()); transposedValues->resize(source.values->getSize()); @@ -651,9 +649,9 @@ void Tensor::transposeImpl2D(Tensor& target, const int dim1, const int dim2) con tensorSize_t resIdx = 0; for(tensorSize_t smallDimCount=0; smallDimCount&& idx) const { - return computeIdx(idx); +tensorSize_t Tensor::computeLinearIdx(const std::vector&& idx, const Dimension& dims) { + return computeLinearIdx(idx, dims); } /** @@ -838,7 +836,7 @@ tensorSize_t Tensor::computeIdx(const std::vector&& idx) const { * * WARNING: Does not check for overflow. */ -tensorSize_t Tensor::computeIdx(const std::vector& idx) const { +tensorSize_t Tensor::computeLinearIdx(const std::vector& idx, const Dimension& dims) { if(idx.size()!=dims.nDims()) { __throw_invalid_argument("Number of idxs must match number of dimensions."); } @@ -862,7 +860,7 @@ tensorSize_t Tensor::computeIdx(const std::vector& idx) const { * @brief Gets the total size of a dimension. E.g. if dims=(2, 3, 4), * the offset of dim1 is 3*4==12, and that of dim0 is 2*3*4==24. */ -tensorSize_t Tensor::getTotalDimSize(const tensorDim_t dim) const { +tensorSize_t Tensor::getDimOffset(const tensorDim_t dim, const Dimension& dims) { tensorSize_t res = 1; // minimum possible dimsize for(size_t idx = dims.nDims()-1; idx>dim; idx--){ @@ -876,26 +874,22 @@ tensorSize_t Tensor::getTotalDimSize(const tensorDim_t dim) const { /** * @brief Like overload, but accepts negative dims. */ -tensorSize_t Tensor::getTotalDimSize(const int dim) const { - return getTotalDimSize(mapDim(dim)); +tensorSize_t Tensor::getDimOffset(const int dim, const Dimension& dims) { + return getDimOffset(mapDim(dim, dims), dims); } /** * @brief No explanation needed. */ -ftype Tensor::getItem(const std::vector&& idx) const { - return (*values)[computeIdx(idx)]; -} - -Tensor Tensor::getAsTensor(const std::vector&& idx) const { - return Tensor({1}, {(*values)[computeIdx(idx)]}, values->getDevice(), requiresGrad); +ftype Tensor::getItem(const std::vector& idx) const { + return (*values)[computeLinearIdx(idx, dims)]; } /** * @brief Special getter, indexes the contained underlying array linearly. * Can lead to unexpected results in multidimensional tensors. */ -ftype Tensor::getItem(tensorDim_t idx) const { +ftype Tensor::getItem(tensorSize_t idx) const { return (*values)[idx]; } @@ -914,8 +908,8 @@ ftype Tensor::getItem(tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2, tens /** * @brief No explanation needed. */ -void Tensor::setItem(ftype item, const std::vector&& idx) { - (*values)[computeIdx(idx)] = item; +void Tensor::setItem(ftype item, const std::vector& idx) { + (*values)[computeLinearIdx(idx, dims)] = item; } /** diff --git a/src/backend/data_modeling/tensor.h b/src/backend/data_modeling/tensor.h index 4f02101..9c1384b 100644 --- a/src/backend/data_modeling/tensor.h +++ b/src/backend/data_modeling/tensor.h @@ -49,7 +49,7 @@ constexpr const char* DeviceToString(Device d) { return ""; // suppress } -class Tensor final { +class Tensor final : public std::enable_shared_from_this { friend class graph::TopologicalSort; private: @@ -132,11 +132,12 @@ class Tensor final { void transposeImpl(Tensor& target, const int dim1, const int dim2) const noexcept; // convenience functions that appear in multiple places - tensorSize_t computeIdx(const std::vector&& idx) const; - tensorSize_t computeIdx(const std::vector& idx) const; - tensorSize_t getTotalDimSize(const tensorDim_t dim) const; - tensorSize_t getTotalDimSize(const int dim) const; - tensorDim_t mapDim(const int dim, std::optional dimsOpt=std::nullopt) const; + static tensorSize_t computeLinearIdx(const std::vector&& idx, const Dimension& dims); + static tensorSize_t computeLinearIdx(const std::vector& idx, const Dimension& dims); + + static tensorSize_t getDimOffset(const tensorDim_t dim, const Dimension& dims); + static tensorSize_t getDimOffset(const int dim, const Dimension& dims); + static tensorDim_t mapDim(const int dim, const Dimension& dims); friend void printValuesCpu(std::ostream& os, const Tensor& t); @@ -157,11 +158,11 @@ class Tensor final { values->resize(this->dims.getSize()); } - explicit Tensor(const std::vector& dims, std::vector&& initValues, bool requiresGrad=false) : + explicit Tensor(const std::vector& dims, const std::vector& initValues, bool requiresGrad=false) : Tensor{dims, std::move(initValues), Tensor::getDefaultDevice(), requiresGrad} { } - explicit Tensor(const std::vector& dims, std::vector&& initValues, Device d, bool requiresGrad=false) : + explicit Tensor(const std::vector& dims, const std::vector& initValues, Device d, bool requiresGrad=false) : Tensor{dims, d, requiresGrad} { for(tensorSize_t i=0; isetItem(initValues[i], i); @@ -218,7 +219,7 @@ class Tensor final { void backward(); bool hasGrads() const noexcept { return grads!=nullptr; } - const std::shared_ptr& getGrads() const; + std::shared_ptr getGrads() const; void transposeThis() noexcept; void transposeThis(int dim1, int dim2) noexcept; @@ -231,21 +232,19 @@ class Tensor final { friend std::ostream& operator<<(std::ostream& os, const Tensor& t) noexcept; // for convenience we provide some simple getters - ftype getItem(tensorDim_t idx) const; + ftype getItem(tensorSize_t idx) const; ftype getItem(tensorDim_t idx0, tensorDim_t idx1) const; ftype getItem(tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2) const; ftype getItem(tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2, tensorDim_t idx3) const; - ftype getItem(const std::vector&& idx) const; - - Tensor getAsTensor(const std::vector&& idx) const; + ftype getItem(const std::vector& idx) const; // for convenience we provide some simple setters void setItem(ftype item, tensorDim_t idx); void setItem(ftype item, tensorDim_t idx0, tensorDim_t idx1); void setItem(ftype item, tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2); void setItem(ftype item, tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2, tensorDim_t idx3); - void setItem(ftype item, const std::vector&& idx); + void setItem(ftype item, const std::vector& idx); void setDevice(const Device d) noexcept; Device getDevice() const noexcept; @@ -266,6 +265,17 @@ class Tensor final { requiresGrad = true; } + std::shared_ptr getSharedPtr() const { + try { + return std::const_pointer_cast(shared_from_this()); + } + catch (const std::bad_weak_ptr&) { + throw std::runtime_error( + "Tensor must be managed by shared_ptr for autograd operations" + ); + } + } + // these two should not be exposed to the python interface static void setDefaultDevice(const Device d) noexcept; static Device getDefaultDevice() noexcept; diff --git a/src/backend/data_modeling/tensor_functions.cpp b/src/backend/data_modeling/tensor_functions.cpp index 6bfae2d..3ac032e 100644 --- a/src/backend/data_modeling/tensor_functions.cpp +++ b/src/backend/data_modeling/tensor_functions.cpp @@ -65,14 +65,14 @@ shared_ptr TensorFunctions::makeSharedTensor(const vector& } shared_ptr TensorFunctions::makeSharedTensor(const vector& dims, - vector&& initValues, + const vector& initValues, bool requiresGrad) { - return make_shared(dims, std::move(initValues), requiresGrad); + return make_shared(dims, initValues, requiresGrad); } shared_ptr TensorFunctions::makeSharedTensor(const vector& dims, - vector&& initValues, + const vector& initValues, Device d, bool requiresGrad){ - return make_shared(dims, std::move(initValues), d, requiresGrad); + return make_shared(dims, initValues, d, requiresGrad); } \ No newline at end of file diff --git a/src/backend/data_modeling/tensor_functions.h b/src/backend/data_modeling/tensor_functions.h index e3818bf..78f68fe 100644 --- a/src/backend/data_modeling/tensor_functions.h +++ b/src/backend/data_modeling/tensor_functions.h @@ -40,11 +40,11 @@ namespace TensorFunctions { // class name acts as namespace for us std::shared_ptr makeSharedTensor(const std::vector& dims, Device d, bool requiresGrad=false); std::shared_ptr makeSharedTensor(const std::vector& dims, - std::vector&& initValues, + const std::vector& initValues, bool requiresGrad=false); std::shared_ptr makeSharedTensor(const std::vector& dims, - std::vector&& initValues, + const std::vector& initValues, Device d, bool requiresGrad=false); // Tensor manipulation diff --git a/src/backend/networks/sequential.h b/src/backend/networks/sequential.h index a66e251..f05fafd 100644 --- a/src/backend/networks/sequential.h +++ b/src/backend/networks/sequential.h @@ -23,7 +23,7 @@ class SequentialNetwork { bool assertDims(const layers::LayerBase& layer) const noexcept; template - requires (std::derived_from< std::remove_const_t, layers::LayerBase >) + requires (std::derived_from< std::remove_cvref_t, layers::LayerBase >) void addLayer(T&& layer) { if(!assertDims(layer)){ // TODO: show warning that the dims don't match diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index a13adbc..6c5370d 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -5,17 +5,12 @@ include_directories( # remove the lib... prefix set(CMAKE_SHARED_MODULE_PREFIX "") -add_library(py_data_modeling MODULE +add_library(_core MODULE data_modeling/py_data_modeling.cpp + data_modeling/py_data_modeling_util.cpp ) -set_target_properties(py_data_modeling PROPERTIES PREFIX "") # don't add "lib" in front - -#target_link_libraries(py_data_modeling ${Boost_LIBRARIES} ${PYTHON_LIBRARIES} BackendCore) -#target_include_directories(py_data_modeling PRIVATE ${PYTHON_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS}) - -#add_library(py_layers MODULE -# layers/py_layers.cpp -# ) -#set_target_properties(py_layers PROPERTIES PREFIX "") # don't add "lib" in front - -#target_link_libraries(layers INTERFACE data_modeling) \ No newline at end of file + +set_target_properties(_core PROPERTIES + PREFIX "" + OUTPUT_NAME "_core" + LIBRARY_OUTPUT_DIRECTORY ${PYTHON_MODULE_DIR}) \ No newline at end of file diff --git a/src/python/data_modeling/py_data_modeling.cpp b/src/python/data_modeling/py_data_modeling.cpp index cbd6b27..88b4bec 100644 --- a/src/python/data_modeling/py_data_modeling.cpp +++ b/src/python/data_modeling/py_data_modeling.cpp @@ -1,119 +1,221 @@ /** - * @file tensor.cpp + * @file py_data_modeling.cpp * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) * @brief * @version 0.1 - * @date 2026-01-11 + * @date 2026-02-21 * * @copyright Copyright (c) 2026 * */ -#include "py_data_modeling.h" +#include "data_modeling/tensor.h" -#include -#include +#include "py_data_modeling_util.h" +#include "python_templates.h" +#include "custom_converters.h" -using namespace boost::python; +#include "data_modeling/tensor.h" +#include "data_modeling/tensor_functions.h" +#include "computational_graph/graph_creation.h" -ftype Py_DataModeling::tensorGetItem(const Tensor& self, boost::python::object index) { - extract int_extractor(index); - - // Single integer index (1D) - if(int_extractor.check()) { - auto i0 = static_cast(int_extractor()); - return self.getItem(i0); - } - - // Tuple index (2D, 3D, or 4D, or list) - if (PySequence_Check(index.ptr())) { - int len = PySequence_Length(index.ptr()); - - // Dispatch to convenience functions for 1-4 args - if (len == 1) { - auto i0 = static_cast(extract(index[0])); - return self.getItem(i0); - } - else if (len == 2) { - auto i0 = static_cast(extract(index[0])); - auto i1 = static_cast(extract(index[1])); - return self.getItem(i0, i1); - } - else if (len == 3) { - auto i0 = static_cast(extract(index[0])); - auto i1 = static_cast(extract(index[1])); - auto i2 = static_cast(extract(index[2])); - return self.getItem(i0, i1, i2); - } - else if (len == 4) { - auto i0 = static_cast(extract(index[0])); - auto i1 = static_cast(extract(index[1])); - auto i2 = static_cast(extract(index[2])); - auto i3 = static_cast(extract(index[3])); - return self.getItem(i0, i1, i2, i3); - } - else { - // Arbitrary length - use vector version - std::vector indices; - for (int i = 0; i < len; ++i) { - indices.push_back(static_cast(extract(index[i]))); - } - return self.getItem(std::move(indices)); - } - } +#include +#include +#include + +BOOST_PYTHON_MODULE(_core) +{ + using namespace boost::python; + + // some macros to make code below easier to read + #define WRAP_TENSOR_METHOD_1(method) \ + +[](const Tensor& self, const Tensor& other) -> std::shared_ptr { \ + return std::make_shared(self.method(other)); \ + } + + #define WRAP_SCALAR(method, T) \ + +[](const Tensor& self, T val) -> std::shared_ptr { \ + return std::make_shared(self.method(val)); \ + } + + #define WRAP_SCALAR_REVERSE(op, T) \ + +[](const Tensor& self, T val) -> std::shared_ptr { \ + return std::make_shared(val op self); \ + } + + // different, since those are not methods anymore + #define WRAP_FREE_MEMBER_FUNC_1(fPtr, T1, T2) \ + +[](const Tensor& self, int v1, int v2) -> std::shared_ptr { \ + return std::make_shared((self.*fPtr)(v1, v2)); \ + } + + #define WRAP_FREE_MEMBER_FUNC_2(fPtr, T1, T2, T3) \ + +[](const Tensor& self, T1 v1, T2 v2, T3 v3) -> std::shared_ptr { \ + return std::make_shared((self.*fPtr)(v1, v2, v3)); \ + } + + #define WRAP_FREE_FUNC_1(fPtr, T1) \ + +[](T1 v1) -> std::shared_ptr { \ + return std::make_shared((*fPtr)(v1)); \ + } + + #define WRAP_FREE_FUNC_2(fPtr, T1, T2) \ + +[](T1 v1, T2 v2) -> std::shared_ptr { \ + return std::make_shared((*fPtr)(v1, v2)); \ + } + + #define WRAP_FREE_FUNC_3(fPtr, T1, T2, T3) \ + +[](T1 v1, T2 v2, T3 v3) -> std::shared_ptr { \ + return std::make_shared((*fPtr)(v1, v2, v3)); \ + } + + #define WRAP_FREE_FUNC_4(fPtr, T) \ + +[](const Tensor& self, T val) -> std::shared_ptr { \ + return (*fPtr)(self.getSharedPtr(), val); \ + } + + #define WRAP_FREE_FUNC_5(fPtr) \ + +[](const Tensor& self, const Tensor& other) -> std::shared_ptr { \ + return (*fPtr)(self.getSharedPtr(), other.getSharedPtr()); \ + } + + #define WRAP_FREE_FUNC_6(fPtr, T) \ + +[](const Tensor& self, T val) -> std::shared_ptr { \ + return (*fPtr)(val, self.getSharedPtr()); \ + } + + #define WRAP_FREE_FUNC_7(fPtr) \ + +[](const Tensor& self) -> std::shared_ptr { \ + return (*fPtr)(self.getSharedPtr()); \ + } + + #define WRAP_FUNC_AND_CONVERT_DTYPE_1(method) \ + +[](const Tensor& self, int v1) -> ftype { \ + return self.method(static_cast(v1)); \ + } + + #define WRAP_FUNC_AND_CONVERT_DTYPE_2(method) \ + +[](const Tensor& self, int v1, int v2) -> ftype { \ + return self.method(static_cast(v1), static_cast(v2)); \ + } + + #define WRAP_FUNC_AND_CONVERT_DTYPE_3(method) \ + +[](const Tensor& self, int v1, int v2, int v3) -> ftype { \ + return self.method(static_cast(v1), static_cast(v2), \ + static_cast(v3)); \ + } + + #define WRAP_FUNC_AND_CONVERT_DTYPE_4(method) \ + +[](const Tensor& self, int v1, int v2, int v3, int v4) -> ftype { \ + return self.method(static_cast(v1), static_cast(v2), \ + static_cast(v3), static_cast(v4)); \ + } + + // classes + class_("Dimension", no_init) + .add_property("list", &Dimension::getItem) + .def("__str__", &Py_Util::toString) + .def("__eq__", Py_DataModeling::dimEquals1) + .def("__eq__", Py_DataModeling::dimEquals2) + .def("__ne__", Py_DataModeling::nDimEquals1) + .def("__ne__", Py_DataModeling::nDimEquals2) + ; + + enum_("Device") + .value("CPU", Device::CPU) + .value("CUDA", Device::CUDA) + ; + + // register implicit dtype conversion + custom_converters::PyListToVectorConverter(); + custom_converters::PyListToVectorConverter(); + + // to convert std::shared_ptr to std::shared_ptr> in Python + boost::python::register_ptr_to_python< std::shared_ptr >(); + + // we manage via shared_ptr, since we deleted copy-ctor + class_, boost::noncopyable>("Tensor", no_init) + .def(init&, optional >()) + .def(init&, Device, optional >()) + .def(init&, const std::vector&, optional >()) + .def(init&, const std::vector&, Device, optional >()) - PyErr_SetString(PyExc_TypeError, "Index must be a number of up to 4integers or a list"); - throw_error_already_set(); - return 0.0; // Never reached -} - -void Py_DataModeling::tensorSetItem(Tensor& self, boost::python::object index, ftype value) { - extract int_extractor(index); - if(int_extractor.check()) { - auto i0 = static_cast(int_extractor()); - self.setItem(value, i0); - return; - } + // static creation methods + .def("ones", WRAP_FREE_FUNC_1(Py_DataModeling::Ones0, std::vector)) + .def("ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones1, std::vector, Device)) + .def("ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones2, std::vector, const bool)) + .def("ones", WRAP_FREE_FUNC_3(Py_DataModeling::Ones3, std::vector, Device, const bool)).staticmethod("ones") + + .def("zeros", WRAP_FREE_FUNC_1(Py_DataModeling::Zeros0, std::vector)) + .def("zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros1, std::vector, Device)) + .def("zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros2, std::vector, const bool)) + .def("zeros", WRAP_FREE_FUNC_3(Py_DataModeling::Zeros3, std::vector, Device, const bool)).staticmethod("zeros") + + .def("gauss", WRAP_FREE_FUNC_1(Py_DataModeling::Gaussian0, std::vector)) + .def("gauss", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian1, std::vector, Device)) + .def("gauss", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian2, std::vector, const bool)) + .def("gauss", WRAP_FREE_FUNC_3(Py_DataModeling::Gaussian3, std::vector, Device, const bool)).staticmethod("gauss") + + // properties + .add_property("device", &Tensor::getDevice, &Tensor::setDevice) + .add_property("dims", make_function(&Tensor::getDims, return_internal_reference<>())) + .add_property("grads", make_function(&Tensor::getGrads)) + .add_property("requiresGrad", &Tensor::getRequiresGrad, &Tensor::setRequiresGrad) + + // operators + .def("__str__", &Py_Util::toString) + .def("__repr__", &Py_Util::toString) + .def("__len__", &Tensor::getSize) + .def("__getitem__", WRAP_FREE_FUNC_4(&Py_DataModeling::getItemAsTensor1, tensorSize_t)) + .def("__getitem__", WRAP_FREE_FUNC_4(&Py_DataModeling::getItemAsTensor2, std::vector)) + .def("__setitem__", &Py_DataModeling::tensorSetItem) + + // arithmetics + .def("__matmul__", WRAP_FREE_FUNC_5(Py_DataModeling::matmul)) + .def("__add__", WRAP_FREE_FUNC_5(Py_DataModeling::elementwiseadd)) // elementwise add + .def("__add__", WRAP_FREE_FUNC_4(Py_DataModeling::scalaradd, ftype)) + .def("__radd__", WRAP_FREE_FUNC_6(Py_DataModeling::rscalaradd, ftype)) + + .def("__mul__", WRAP_FREE_FUNC_5(Py_DataModeling::elementwisemul)) // elementwise mult + .def("__mul__", WRAP_FREE_FUNC_4(Py_DataModeling::scalarmul, ftype)) + .def("__rmul__", WRAP_FREE_FUNC_6(Py_DataModeling::rscalarmul, ftype)) - // Tuple index (2D, 3D, or 4D, or list) - extract tuple_extractor(index); - if (PySequence_Check(index.ptr())) { - int len = PySequence_Length(index.ptr()); + .def("__sub__", WRAP_FREE_FUNC_4(Py_DataModeling::scalarsub, ftype)) + .def("__truediv__", WRAP_FREE_FUNC_4(Py_DataModeling::scalardiv, ftype)) + + // member functions + .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_1(Tensor::getItem)) + .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_2(Tensor::getItem)) + .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_3(Tensor::getItem)) + .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_4(Tensor::getItem)) + .def("getitem", Py_DataModeling::getItemVector) // the vector arg + + .def("sum", WRAP_FREE_FUNC_7(&(graph::sumTensor))) - // Dispatch to convenience functions for 1-4 args - if (len == 1) { - auto i0 = static_cast(extract(index[0])); - self.setItem(value, i0); - } - else if (len == 2) { - auto i0 = static_cast(extract(index[0])); - auto i1 = static_cast(extract(index[1])); - self.setItem(value, i0, i1); - } - else if (len == 3) { - auto i0 = static_cast(extract(index[0])); - auto i1 = static_cast(extract(index[1])); - auto i2 = static_cast(extract(index[2])); - self.setItem(value, i0, i1, i2); - } - else if (len == 4) { - auto i0 = static_cast(extract(index[0])); - auto i1 = static_cast(extract(index[1])); - auto i2 = static_cast(extract(index[2])); - auto i3 = static_cast(extract(index[3])); - self.setItem(value, i0, i1, i2, i3); - } - else { - // Arbitrary length - use vector version - std::vector indices; - for (int i = 0; i < len; ++i) { - indices.push_back(static_cast(extract(index[i]))); - } - self.setItem(value, std::move(indices)); - } - return; - } + .def("reset", Py_DataModeling::reset1) + .def("reset", Py_DataModeling::reset2) + + .def("transpose", WRAP_FREE_MEMBER_FUNC_1(Py_DataModeling::transpose1, int, int)) + .def("transpose", WRAP_FREE_MEMBER_FUNC_2(Py_DataModeling::transpose2, int, int, bool)) + .def("transposeThis", Py_DataModeling::transposeThis1) + .def("transposeThis", Py_DataModeling::transposeThis2) - PyErr_SetString(PyExc_TypeError, "Index must be a number of up to 4integers or a list"); - throw_error_already_set(); + .def("backward", &Tensor::backward) + ; + + // functions + def("Ones", WRAP_FREE_FUNC_1(Py_DataModeling::Ones0, std::vector)); + def("Ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones1, std::vector, Device)); + def("Ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones2, std::vector, const bool)); + def("Ones", WRAP_FREE_FUNC_3(Py_DataModeling::Ones3, std::vector, Device, const bool)); + + def("Zeros", WRAP_FREE_FUNC_1(Py_DataModeling::Zeros0, std::vector)); + def("Zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros1, std::vector, Device)); + def("Zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros2, std::vector, const bool)); + def("Zeros", WRAP_FREE_FUNC_3(Py_DataModeling::Zeros3, std::vector, Device, const bool)); + + def("Gaussian", WRAP_FREE_FUNC_1(Py_DataModeling::Gaussian0, std::vector)); + def("Gaussian", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian1, std::vector, Device)); + def("Gaussian", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian2, std::vector, const bool)); + def("Gaussian", WRAP_FREE_FUNC_3(Py_DataModeling::Gaussian3, std::vector, Device, const bool)); } \ No newline at end of file diff --git a/src/python/data_modeling/py_data_modeling.h b/src/python/data_modeling/py_data_modeling.h deleted file mode 100644 index 981dddd..0000000 --- a/src/python/data_modeling/py_data_modeling.h +++ /dev/null @@ -1,183 +0,0 @@ -/** - * @file tensor.h - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2026-01-11 - * - * @copyright Copyright (c) 2026 - * - */ - -#pragma once - -#include "data_modeling/tensor.h" -#include "data_modeling/tensor_functions.h" - -#include "python_templates.h" -#include "custom_converters.h" - -#include -#include -#include -#include - -namespace Py_DataModeling { - ftype tensorGetItem(const Tensor& self, boost::python::object index); - void tensorSetItem(Tensor& self, boost::python::object index, ftype value); - - // need wrappers for default arguments, see - // https://beta.boost.org/doc/libs/develop/libs/python/doc/html/tutorial/tutorial/functions.html - auto OnesWrapper0(std::vector dims) { - return TensorFunctions::Ones(std::move(dims)); - } - - auto OnesWrapper1(std::vector dims, Device d) { - return TensorFunctions::Ones(std::move(dims), d); - } - - auto ZerosWrapper0(std::vector dims) { - return TensorFunctions::Zeros(std::move(dims)); - } - - auto ZerosWrapper1(std::vector dims, Device d) { - return TensorFunctions::Zeros(std::move(dims), d); - } - - auto GaussianWrapper0(std::vector dims) { - return TensorFunctions::Gaussian(std::move(dims)); - } - - auto GaussianWrapper1(std::vector dims, Device d) { - return TensorFunctions::Gaussian(std::move(dims), d); - } - - Tensor (*Ones0)(std::vector) = &OnesWrapper0; - Tensor (*Ones1)(std::vector, Device) = &OnesWrapper1; - Tensor (*Ones2)(std::vector, const bool) = &(TensorFunctions::Ones); - Tensor (*Ones3)(std::vector, Device, const bool) = &(TensorFunctions::Ones); - - Tensor (*Zeros0)(std::vector) = &ZerosWrapper0; - Tensor (*Zeros1)(std::vector, Device) = &ZerosWrapper1; - Tensor (*Zeros2)(std::vector, const bool) = &(TensorFunctions::Zeros); - Tensor (*Zeros3)(std::vector, Device, const bool) = &(TensorFunctions::Zeros); - - Tensor (*Gaussian0)(std::vector) = &GaussianWrapper0; - Tensor (*Gaussian1)(std::vector, Device) = &GaussianWrapper1; - Tensor (*Gaussian2)(std::vector, const bool) = &(TensorFunctions::Gaussian); - Tensor (*Gaussian3)(std::vector, Device, const bool) = &(TensorFunctions::Gaussian); - - void (Tensor::*reset1)(const ftype) = &Tensor::reset; - void (Tensor::*reset2)(const utility::InitClass) = &Tensor::reset; - - void (Tensor::*transposeThis1)() = &Tensor::transposeThis; - void (Tensor::*transposeThis2)(int, int) = &Tensor::transposeThis; - Tensor (Tensor::*transpose1)(int, int) const = &Tensor::transpose; - Tensor (Tensor::*transpose2)(int, int, bool) const = &Tensor::transpose; -} - -BOOST_PYTHON_MODULE(py_data_modeling) -{ - using namespace boost::python; - - // some macros to make code below easier to read - #define WRAP_TENSOR_METHOD_1(method) \ - +[](const Tensor& self, const Tensor& other) -> std::shared_ptr { \ - return std::make_shared(self.method(other)); \ - } - - #define WRAP_SCALAR(method, T) \ - +[](const Tensor& self, T val) -> std::shared_ptr { \ - return std::make_shared(self.method(val)); \ - } - - #define WRAP_SCALAR_REVERSE(op, T) \ - +[](const Tensor& self, T val) -> std::shared_ptr { \ - return std::make_shared(val op self); \ - } - - // different, since those are not methods anymore - #define WRAP_FREE_MEMBER_FUNC_1(fPtr, T1, T2) \ - +[](const Tensor& self, int v1, int v2) -> std::shared_ptr { \ - return std::make_shared((self.*fPtr)(v1, v2)); \ - } - - #define WRAP_FREE_MEMBER_FUNC_2(fPtr, T1, T2, T3) \ - +[](const Tensor& self, T1 v1, T2 v2, T3 v3) -> std::shared_ptr { \ - return std::make_shared((self.*fPtr)(v1, v2, v3)); \ - } - - #define WRAP_FREE_FUNC_1(fPtr, T1) \ - +[](T1 v1) -> std::shared_ptr { \ - return std::make_shared((*fPtr)(v1)); \ - } - - #define WRAP_FREE_FUNC_2(fPtr, T1, T2) \ - +[](T1 v1, T2 v2) -> std::shared_ptr { \ - return std::make_shared((*fPtr)(v1, v2)); \ - } - - #define WRAP_FREE_FUNC_3(fPtr, T1, T2, T3) \ - +[](T1 v1, T2 v2, T3 v3) -> std::shared_ptr { \ - return std::make_shared((*fPtr)(v1, v2, v3)); \ - } - - // register implicit dtype conversion - converters::PyListToVectorConverter(); - - // classes - class_("Dimension", no_init) - .add_property("list", &Dimension::getItem) - .def("__str__", &Py_Util::toString) - ; - - enum_("Device") - .value("CPU", Device::CPU) - .value("CUDA", Device::CUDA) - ; - - // we manage via shared_ptr, since we deleted copy-ctor - class_, boost::noncopyable>("Tensor", no_init) - .def(init&, optional >()) - .def(init&, optional >()) - .add_property("device", &Tensor::getDevice, &Tensor::setDevice) - .add_property("dims", make_function(&Tensor::getDims, return_internal_reference<>())) - .add_property("grads", make_function(&Tensor::getGrads, return_internal_reference<>())) - .def("__str__", &Py_Util::toString) - .def("__repr__", &Py_Util::toString) - .def("__getitem__", &Py_DataModeling::tensorGetItem) - .def("__setitem__", &Py_DataModeling::tensorSetItem) - .def("__matmul__", WRAP_TENSOR_METHOD_1(matmul)) - .def("__add__", WRAP_TENSOR_METHOD_1(operator+)) // elementwise add - .def("__mul__", WRAP_TENSOR_METHOD_1(operator*)) // elementwise mult - .def("__mul__", WRAP_SCALAR(operator*, float)) - .def("__rmul__", WRAP_SCALAR_REVERSE(*, float)) - .def("__add__", WRAP_SCALAR(operator+, float)) - .def("__radd__", WRAP_SCALAR_REVERSE(+, float)) - .def("__sub__", WRAP_SCALAR(operator-, float)) - .def("__truediv__", WRAP_SCALAR(operator/, float)) - .def("reset", Py_DataModeling::reset1) - .def("reset", Py_DataModeling::reset2) - .def("transpose", WRAP_FREE_MEMBER_FUNC_1(Py_DataModeling::transpose1, int, int)) - .def("transpose", WRAP_FREE_MEMBER_FUNC_2(Py_DataModeling::transpose2, int, int, bool)) - .def("transposeThis", Py_DataModeling::transposeThis1) - .def("transposeThis", Py_DataModeling::transposeThis2) - .def("backward", &Tensor::backward) - ; - - // functions - def("Ones", WRAP_FREE_FUNC_1(Py_DataModeling::Ones0, std::vector)); - def("Ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones1, std::vector, Device)); - def("Ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones2, std::vector, const bool)); - def("Ones", WRAP_FREE_FUNC_3(Py_DataModeling::Ones3, std::vector, Device, const bool)); - - def("Zeros", WRAP_FREE_FUNC_1(Py_DataModeling::Zeros0, std::vector)); - def("Zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros1, std::vector, Device)); - def("Zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros2, std::vector, const bool)); - def("Zeros", WRAP_FREE_FUNC_3(Py_DataModeling::Zeros3, std::vector, Device, const bool)); - - def("Gaussian", WRAP_FREE_FUNC_1(Py_DataModeling::Gaussian0, std::vector)); - def("Gaussian", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian1, std::vector, Device)); - def("Gaussian", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian2, std::vector, const bool)); - def("Gaussian", WRAP_FREE_FUNC_3(Py_DataModeling::Gaussian3, std::vector, Device, const bool)); -} \ No newline at end of file diff --git a/src/python/data_modeling/py_data_modeling_util.cpp b/src/python/data_modeling/py_data_modeling_util.cpp new file mode 100644 index 0000000..d495300 --- /dev/null +++ b/src/python/data_modeling/py_data_modeling_util.cpp @@ -0,0 +1,119 @@ +/** + * @file py_data_modeling_util.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-02-21 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "py_data_modeling_util.h" + +#include +#include + +using namespace boost::python; + +ftype Py_DataModeling::tensorGetItem(const Tensor& self, boost::python::object index) { + extract int_extractor(index); + + // Single integer index (1D) + if(int_extractor.check()) { + auto i0 = static_cast(int_extractor()); + return self.getItem(i0); + } + + // Tuple index (2D, 3D, or 4D, or list) + if (PySequence_Check(index.ptr())) { + int len = PySequence_Length(index.ptr()); + + // Dispatch to convenience functions for 1-4 args + if (len == 1) { + auto i0 = static_cast(extract(index[0])); + return self.getItem(i0); + } + else if (len == 2) { + auto i0 = static_cast(extract(index[0])); + auto i1 = static_cast(extract(index[1])); + return self.getItem(i0, i1); + } + else if (len == 3) { + auto i0 = static_cast(extract(index[0])); + auto i1 = static_cast(extract(index[1])); + auto i2 = static_cast(extract(index[2])); + return self.getItem(i0, i1, i2); + } + else if (len == 4) { + auto i0 = static_cast(extract(index[0])); + auto i1 = static_cast(extract(index[1])); + auto i2 = static_cast(extract(index[2])); + auto i3 = static_cast(extract(index[3])); + return self.getItem(i0, i1, i2, i3); + } + else { + // Arbitrary length - use vector version + std::vector indices; + for (int i = 0; i < len; ++i) { + indices.push_back(static_cast(extract(index[i]))); + } + return self.getItem(std::move(indices)); + } + } + + PyErr_SetString(PyExc_TypeError, "Index must be a number of up to 4integers or a list"); + throw_error_already_set(); + return 0.0; // Never reached +} + +void Py_DataModeling::tensorSetItem(Tensor& self, boost::python::object index, ftype value) { + extract int_extractor(index); + if(int_extractor.check()) { + auto i0 = static_cast(int_extractor()); + self.setItem(value, i0); + return; + } + + // Tuple index (2D, 3D, or 4D, or list) + extract tuple_extractor(index); + if (PySequence_Check(index.ptr())) { + int len = PySequence_Length(index.ptr()); + + // Dispatch to convenience functions for 1-4 args + if (len == 1) { + auto i0 = static_cast(extract(index[0])); + self.setItem(value, i0); + } + else if (len == 2) { + auto i0 = static_cast(extract(index[0])); + auto i1 = static_cast(extract(index[1])); + self.setItem(value, i0, i1); + } + else if (len == 3) { + auto i0 = static_cast(extract(index[0])); + auto i1 = static_cast(extract(index[1])); + auto i2 = static_cast(extract(index[2])); + self.setItem(value, i0, i1, i2); + } + else if (len == 4) { + auto i0 = static_cast(extract(index[0])); + auto i1 = static_cast(extract(index[1])); + auto i2 = static_cast(extract(index[2])); + auto i3 = static_cast(extract(index[3])); + self.setItem(value, i0, i1, i2, i3); + } + else { + // Arbitrary length - use vector version + std::vector indices; + for (int i = 0; i < len; ++i) { + indices.push_back(static_cast(extract(index[i]))); + } + self.setItem(value, std::move(indices)); + } + return; + } + + PyErr_SetString(PyExc_TypeError, "Index must be a number of up to 4integers or a list"); + throw_error_already_set(); +} \ No newline at end of file diff --git a/src/python/data_modeling/py_data_modeling_util.h b/src/python/data_modeling/py_data_modeling_util.h new file mode 100644 index 0000000..82a8343 --- /dev/null +++ b/src/python/data_modeling/py_data_modeling_util.h @@ -0,0 +1,135 @@ +/** + * @file util.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief Helper and wrapper functions + * @version 0.1 + * @date 2026-02-21 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "data_modeling/dim_type.h" + +#include "data_modeling/tensor.h" +#include "data_modeling/tensor_functions.h" +#include "computational_graph/graph_creation.h" + +#include +#include + +#include + +namespace Py_DataModeling { + + /********************************************************************************************************* + ********************************************** Dimension ************************************************* + *********************************************************************************************************/ + + inline bool (Dimension::*dimEquals1)(const Dimension&) const = &Dimension::operator==; + inline bool (Dimension::*dimEquals2)(const std::vector&) const = &Dimension::operator==; + + inline bool (Dimension::*nDimEquals1)(const Dimension&) const = &Dimension::operator!=; + inline bool (Dimension::*nDimEquals2)(const std::vector&) const = &Dimension::operator!=; + /********************************************************************************************************* + *********************************************** Tensor *************************************************** + *********************************************************************************************************/ + + ftype tensorGetItem(const Tensor& self, boost::python::object index); + void tensorSetItem(Tensor& self, boost::python::object index, ftype value); + + // need wrappers for default arguments, see + // https://beta.boost.org/doc/libs/develop/libs/python/doc/html/tutorial/tutorial/functions.html + inline auto OnesWrapper0(std::vector dims) { + return TensorFunctions::Ones(std::move(dims)); + } + + inline auto OnesWrapper1(std::vector dims, Device d) { + return TensorFunctions::Ones(std::move(dims), d); + } + + inline auto ZerosWrapper0(std::vector dims) { + return TensorFunctions::Zeros(std::move(dims)); + } + + inline auto ZerosWrapper1(std::vector dims, Device d) { + return TensorFunctions::Zeros(std::move(dims), d); + } + + inline auto GaussianWrapper0(std::vector dims) { + return TensorFunctions::Gaussian(std::move(dims)); + } + + inline auto GaussianWrapper1(std::vector dims, Device d) { + return TensorFunctions::Gaussian(std::move(dims), d); + } + + inline Tensor (*Ones0)(std::vector) = &OnesWrapper0; + inline Tensor (*Ones1)(std::vector, Device) = &OnesWrapper1; + inline Tensor (*Ones2)(std::vector, const bool) = &(TensorFunctions::Ones); + inline Tensor (*Ones3)(std::vector, Device, const bool) = &(TensorFunctions::Ones); + + inline Tensor (*Zeros0)(std::vector) = &ZerosWrapper0; + inline Tensor (*Zeros1)(std::vector, Device) = &ZerosWrapper1; + inline Tensor (*Zeros2)(std::vector, const bool) = &(TensorFunctions::Zeros); + inline Tensor (*Zeros3)(std::vector, Device, const bool) = &(TensorFunctions::Zeros); + + inline Tensor (*Gaussian0)(std::vector) = &GaussianWrapper0; + inline Tensor (*Gaussian1)(std::vector, Device) = &GaussianWrapper1; + inline Tensor (*Gaussian2)(std::vector, const bool) = &(TensorFunctions::Gaussian); + inline Tensor (*Gaussian3)(std::vector, Device, const bool) = &(TensorFunctions::Gaussian); + + inline void (Tensor::*reset1)(const ftype) = &Tensor::reset; + inline void (Tensor::*reset2)(const utility::InitClass) = &Tensor::reset; + + inline void (Tensor::*transposeThis1)() = &Tensor::transposeThis; + inline void (Tensor::*transposeThis2)(int, int) = &Tensor::transposeThis; + inline Tensor (Tensor::*transpose1)(int, int) const = &Tensor::transpose; + inline Tensor (Tensor::*transpose2)(int, int, bool) const = &Tensor::transpose; + + inline ftype (Tensor::*getItemVector)(const std::vector&) const = &Tensor::getItem; + + /********************************************************************************************************* + ***************************************** Graph creation ************************************************* + *********************************************************************************************************/ + + // multiplications + inline std::shared_ptr (*elementwisemul) + (const std::shared_ptr left, const std::shared_ptr right) = &(graph::mul); + + inline std::shared_ptr (*scalarmul) + (const std::shared_ptr, ftype) = &(graph::mul); + + inline std::shared_ptr (*rscalarmul) + (ftype, const std::shared_ptr) = &(graph::mul); + + // additions + inline std::shared_ptr (*elementwiseadd) + (const std::shared_ptr left, const std::shared_ptr right) = &(graph::add); + + inline std::shared_ptr (*scalaradd) + (const std::shared_ptr, ftype) = &(graph::add); + + inline std::shared_ptr (*rscalaradd) + (ftype, const std::shared_ptr) = &(graph::add); + + // matmul + inline std::shared_ptr (*matmul) + (const std::shared_ptr left, const std::shared_ptr right) = &(graph::matmul); + + // sub, div + inline std::shared_ptr (*scalarsub) + (const std::shared_ptr, ftype) = &(graph::sub); + + inline std::shared_ptr (*scalardiv) + (const std::shared_ptr, ftype) = &(graph::div); + + // get + inline std::shared_ptr (*getItemAsTensor1) + (const std::shared_ptr& t, tensorSize_t idx) = &(graph::get); + + inline std::shared_ptr (*getItemAsTensor2) + (const std::shared_ptr& t, const std::vector& idx) = &(graph::get); +} \ No newline at end of file diff --git a/src/python/python_utility/custom_converters.h b/src/python/python_utility/custom_converters.h index 8bc4f10..5114d7f 100644 --- a/src/python/python_utility/custom_converters.h +++ b/src/python/python_utility/custom_converters.h @@ -19,13 +19,14 @@ #include #include -namespace converters { +namespace custom_converters { /** * @brief We use this class to convert Python lists of int into vectors of * internal types, such as tensorDim_t. */ template - requires ( std::is_integral_v< std::remove_const_t >) + requires ( std::is_integral_v< T > || + std::is_floating_point_v< T >) struct PyListToVectorConverter { using rvalueFromPythonData = boost::python::converter::rvalue_from_python_stage1_data; @@ -40,7 +41,7 @@ namespace converters { * internal types, such as tensorDim_t. */ template - requires ( std::is_integral_v< std::remove_const_t >) + requires ( std::is_integral_v< T >) struct PyIntToIntegralValueConverter { using rvalueFromPythonData = boost::python::converter::rvalue_from_python_stage1_data; @@ -56,8 +57,9 @@ namespace converters { /******************************************************************************************/ template -requires ( std::is_integral_v< std::remove_const_t >) -converters::PyListToVectorConverter::PyListToVectorConverter() { +requires ( std::is_integral_v< T > || + std::is_floating_point_v< T >) +custom_converters::PyListToVectorConverter::PyListToVectorConverter() { using namespace boost::python; // register converter with Boost.Python's conversion system @@ -69,8 +71,9 @@ converters::PyListToVectorConverter::PyListToVectorConverter() { } template -requires ( std::is_integral_v< std::remove_const_t >) -void* converters::PyListToVectorConverter::convertible(PyObject* obj_ptr) { +requires ( std::is_integral_v< T > || + std::is_floating_point_v< T >) +void* custom_converters::PyListToVectorConverter::convertible(PyObject* obj_ptr) { using namespace boost::python; if (!PySequence_Check(obj_ptr)) @@ -80,8 +83,9 @@ void* converters::PyListToVectorConverter::convertible(PyObject* obj_ptr) { } template -requires ( std::is_integral_v< std::remove_const_t >) -void converters::PyListToVectorConverter::construct(PyObject* obj_ptr, rvalueFromPythonData* data) { +requires ( std::is_integral_v< T > || + std::is_floating_point_v< T >) +void custom_converters::PyListToVectorConverter::construct(PyObject* obj_ptr, rvalueFromPythonData* data) { using namespace boost::python; @@ -96,8 +100,15 @@ void converters::PyListToVectorConverter::construct(PyObject* obj_ptr, rvalue // Fill it with converted values for (int i = 0; i < len(py_list); ++i) { - int val = extract(py_list[i]); - vec->push_back(static_cast(val)); + + if constexpr(std::is_integral_v< T >){ + auto val = extract(py_list[i]); + vec->push_back(static_cast(val)); + } + else if constexpr(std::is_floating_point_v< T >) { + auto val = extract(py_list[i]); + vec->push_back(static_cast(val)); + } } // Tell Boost.Python where the constructed object is @@ -105,8 +116,8 @@ void converters::PyListToVectorConverter::construct(PyObject* obj_ptr, rvalue } template -requires ( std::is_integral_v< std::remove_const_t >) -converters::PyIntToIntegralValueConverter::PyIntToIntegralValueConverter() { +requires ( std::is_integral_v< T >) +custom_converters::PyIntToIntegralValueConverter::PyIntToIntegralValueConverter() { using namespace boost::python; // register converter with Boost.Python's conversion system @@ -118,8 +129,8 @@ converters::PyIntToIntegralValueConverter::PyIntToIntegralValueConverter() { } template -requires ( std::is_integral_v< std::remove_const_t >) -void* converters::PyIntToIntegralValueConverter::convertible(PyObject* obj_ptr) { +requires ( std::is_integral_v< T >) +void* custom_converters::PyIntToIntegralValueConverter::convertible(PyObject* obj_ptr) { using namespace boost::python; if (!PyLong_Check(obj_ptr)) @@ -129,8 +140,8 @@ void* converters::PyIntToIntegralValueConverter::convertible(PyObject* obj_pt } template -requires ( std::is_integral_v< std::remove_const_t >) -void converters::PyIntToIntegralValueConverter::construct(PyObject* obj_ptr, rvalueFromPythonData* data) { +requires ( std::is_integral_v< T >) +void custom_converters::PyIntToIntegralValueConverter::construct(PyObject* obj_ptr, rvalueFromPythonData* data) { using namespace boost::python; // Extract Python int diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d4fdd04..3b258b9 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -19,10 +19,17 @@ target_link_libraries(unit_tests_backend PRIVATE include(GoogleTest) gtest_discover_tests(unit_tests_backend) -#find_package(Python3 COMPONENTS Interpreter) -#if(Python3_FOUND) -# add_test(NAME python_tests -# COMMAND ${Python3_EXECUTABLE} -m pytest ${CMAKE_CURRENT_SOURCE_DIR}/python -# WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} -# ) -#endif() \ No newline at end of file +find_package(Python3 COMPONENTS Interpreter) +if(Python3_FOUND) + # replace the placeholder variables and copy resulting file in .py file + add_test( + NAME python_tests + COMMAND ${Python3_EXECUTABLE} -m pytest + ${CMAKE_CURRENT_SOURCE_DIR}/python + ) + + # Set environment for Python to find the module + set_tests_properties(python_tests PROPERTIES + ENVIRONMENT "PYTHONPATH=${PYTHON_MODULE_DIR}:$ENV{PYTHONPATH}" + ) +endif() \ No newline at end of file diff --git a/tests/backend/test_computational_graph.cpp b/tests/backend/test_computational_graph.cpp index 62062a1..d2a686f 100644 --- a/tests/backend/test_computational_graph.cpp +++ b/tests/backend/test_computational_graph.cpp @@ -16,86 +16,119 @@ #include "computational_graph/graph_creation.h" +#include + +TEST(AutogradTest, ThrowsIfNoGradientSet) { + auto t1 = TensorFunctions::makeSharedTensor({1}, {3.0}, false); + auto t2 = TensorFunctions::makeSharedTensor({1}, {2.0}, false); + + auto loss = graph::add(t1, t2); + + EXPECT_THROW(loss->backward(), std::runtime_error); +} + TEST(AutogradTest, SimpleAddition) { auto t1 = TensorFunctions::makeSharedTensor({1}, {3.0}, true); auto t2 = TensorFunctions::makeSharedTensor({1}, {2.0}, true); - auto res = graph::add(t1, t2); - auto loss = graph::mul(res, res); + auto t3 = graph::add(t1, t2); + auto loss = graph::mul(t3, t3); loss->backward(); - EXPECT_NEAR(t1->getGrads()->getItem(0), 10.0f, 1e-5); - EXPECT_NEAR(t2->getGrads()->getItem(0), 10.0f, 1e-5); + EXPECT_NEAR(t1->getGrads()->getItem(0), 10.0, 1e-5); + EXPECT_NEAR(t2->getGrads()->getItem(0), 10.0, 1e-5); } TEST(AutogradTest, ScalarMultiplication) { auto t1 = TensorFunctions::makeSharedTensor({1}, {2.0}, true); auto t2 = TensorFunctions::makeSharedTensor({1}, {3.0}, true); - auto res = graph::mul(t1, t2); - auto loss = graph::mul(res, res); + auto t3 = graph::mul(t1, t2); + auto loss = graph::mul(t3, t3); loss->backward(); - EXPECT_NEAR(t1->getGrads()->getItem(0), 36.0f, 1e-5); - EXPECT_NEAR(t2->getGrads()->getItem(0), 24.0f, 1e-5); + ASSERT_DOUBLE_EQ(t1->getGrads()->getItem(0), 36.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->getItem(0), 24.0); } TEST(AutogradTest, MatMul) { auto t1 = TensorFunctions::makeSharedTensor({2, 3}, {1, 2, 3, 4, 5, 6}, true); auto t2 = TensorFunctions::makeSharedTensor({3, 2}, {1, 2, 3, 4, 5, 6}, true); - auto res = graph::matmul(t1, t2); + auto t3 = graph::matmul(t1, t2); - auto loss = TensorFunctions::makeSharedTensor({1}, {0.0f}, true); - for (size_t i = 0; i < res->getSize(); ++i) { - loss = graph::add(loss, graph::get(res, i)); + auto loss = TensorFunctions::makeSharedTensor({1}, {0.0}, true); + for (size_t i = 0; i < t3->getSize(); ++i) { + loss = graph::add(loss, graph::get(t3, i)); } loss->backward(); EXPECT_TRUE(t1->hasGrads()); EXPECT_TRUE(t2->hasGrads()); + + // dL/dt1 = dloss/dt3 @ t2^t = Ones({2, 2}) @ t2^t + ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({0, 0}), 3.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({0, 1}), 7.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({0, 2}), 11.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({1, 0}), 3.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({1, 1}), 7.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({1, 2}), 11.0); + + // dL/dt2 = t1^t @ dloss/dt3 = t1^t @ Ones({2, 2}) + ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({0, 0}), 5.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({0, 1}), 5.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({1, 0}), 7.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({1, 1}), 7.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({2, 0}), 9.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({2, 1}), 9.0); } -/* TEST(AutogradTest, ChainRule) { - Tensor x({1}, {2.0f}, true); +TEST(AutogradTest, ChainRule) { + auto x = TensorFunctions::makeSharedTensor({1}, {2.0}, true); - Tensor y = x * x; // y = x^2 - Tensor z = y + x; // z = x^2 + x - Tensor loss = z * z; // loss = (x^2 + x)^2 + auto y = graph::mul(x, x); // y = x^2 + auto z = graph::add(x, y); // z = x^2 + x + auto loss = graph::mul(z, z); // loss = (x^2 + x)^2 - loss.backward(); + loss->backward(); // dloss/dx = 2(x^2 + x) * (2x + 1) // At x=2: 2(4 + 2) * (4 + 1) = 2 * 6 * 5 = 60 - EXPECT_NEAR(loss.getGrads()->getItem(0), 60.0f, 1e-4); -} */ + ASSERT_DOUBLE_EQ(x->getGrads()->getItem(0), 60.0); +} -/* TEST(AutogradTest, ReLU) { - Tensor x({3}, {-1.0f, 0.0f, 2.0f}, true); +TEST(AutogradTest, MultiVariateChainRule) { + auto x = TensorFunctions::makeSharedTensor({2}, {1.0, 2.0}, true); - Tensor y = relu(x); // [0, 0, 2] - Tensor loss = sum(y); // loss = 2 + auto y = graph::mul(x, 3.0); // y = [3, 6] + auto loss = TensorFunctions::makeSharedTensor({1}, {0.0}, true); + for(int i=0; igetSize(); i++){ + loss = graph::add(loss, graph::get(y, i)); + } // loss = 9 - loss.backward(); + loss->backward(); - // Gradient: [0, 0, 1] (only where input > 0) - EXPECT_NEAR(t.getGrads()->getItem(0), 0.0f, 1e-5); - EXPECT_NEAR(t.getGrads()->getItem(1), 0.0f, 1e-5); - EXPECT_NEAR(t.getGrads()->getItem(2), 1.0f, 1e-5); + // dloss/dx = scalar = 3 + ASSERT_DOUBLE_EQ(x->getGrads()->getItem(0), 3.0); + ASSERT_DOUBLE_EQ(x->getGrads()->getItem(1), 3.0); + + ASSERT_DOUBLE_EQ(y->getGrads()->getItem(0), 1.0); + ASSERT_DOUBLE_EQ(y->getGrads()->getItem(1), 1.0); } -TEST(AutogradTest, ScalarMultiplication) { - Tensor x({2}, {1.0f, 2.0f}, true); +/* TEST(AutogradTest, ReLU) { + Tensor x({3}, {-1.0, 0.0, 2.0}, true); - Tensor y = x * 3.0f; // y = [3, 6] - Tensor loss = sum(y); // loss = 9 + Tensor y = relu(x); // [0, 0, 2] + Tensor loss = sum(y); // loss = 2 loss.backward(); - // dloss/dx = scalar = 3 - EXPECT_NEAR(t.getGrads()->getItem(0), 3.0f, 1e-5); - EXPECT_NEAR(t.getGrads()->getItem(1), 3.0f, 1e-5); + // Gradient: [0, 0, 1] (only where input > 0) + EXPECT_NEAR(t.getGrads()->getItem(0), 0.0, 1e-5); + EXPECT_NEAR(t.getGrads()->getItem(1), 0.0, 1e-5); + EXPECT_NEAR(t.getGrads()->getItem(2), 1.0, 1e-5); } */ \ No newline at end of file diff --git a/tests/backend/test_data_modeling.cpp b/tests/backend/test_data_modeling.cpp index 4afc140..3bff63f 100644 --- a/tests/backend/test_data_modeling.cpp +++ b/tests/backend/test_data_modeling.cpp @@ -16,6 +16,19 @@ #include +TEST(TensorOpsTest, TestCtor) { + auto t = Tensor({2, 2}, {2.0, 3.0, 4.0, 5.0}, Device::CPU, false); + + ASSERT_EQ(t.getDims(), Dimension({2, 2})); + ASSERT_EQ(t.getDevice(), Device::CPU); + ASSERT_TRUE(!t.getRequiresGrad()); + + ASSERT_DOUBLE_EQ(t.getItem(0, 0), 2.0); + ASSERT_DOUBLE_EQ(t.getItem(0, 1), 3.0); + ASSERT_DOUBLE_EQ(t.getItem(1, 0), 4.0); + ASSERT_DOUBLE_EQ(t.getItem(1, 1), 5.0); +} + TEST(TensorOpsTest, ScalarAddWorks) { auto t1 = TensorFunctions::Ones({2, 2}, false); @@ -29,6 +42,27 @@ TEST(TensorOpsTest, ScalarAddWorks) { } } +TEST(TensorOpsTest, TensorAddWorks) { + auto t1 = TensorFunctions::Ones({2, 2}, false); + auto t2 = TensorFunctions::Ones({2, 2}, false) * 4; + + auto res = t1 + t2; + + constexpr ftype sum = 5.0; + for(auto i=0; i{2, 2}; - ASSERT_EQ(res.getDims().toVector(), expectedDims); + auto t1 = TensorFunctions::Ones({2, 2}, false); + auto t2 = TensorFunctions::Ones({3, 2}, false); - constexpr ftype resSum = 3.0; - for(auto i=0; i