From a7928661dc94bee5a2c89dc75c833c866f611869 Mon Sep 17 00:00:00 2001 From: "liyuchenmike@gmail.com" Date: Thu, 8 Sep 2016 11:57:58 +0800 Subject: [PATCH 1/7] SINGA-236 memory pool \n Implemented the following features: \n 1. A memory pool facility to management Block data allocated by the memory pool. \n 2. Add relevant test cases. --- include/singa/core/common.h | 13 ++- include/singa/core/memory.h | 52 +++++++++++ src/core/memory/memory.cc | 153 ++++++++++++++++++++++++++++++- test/singa/test_memory.cc | 174 ++++++++++++++++++++++++++++++++++++ 4 files changed, 387 insertions(+), 5 deletions(-) diff --git a/include/singa/core/common.h b/include/singa/core/common.h index dc552c1d61..e7c7ea268f 100644 --- a/include/singa/core/common.h +++ b/include/singa/core/common.h @@ -36,7 +36,10 @@ #ifdef USE_OPENCL -#include "singa/utils/opencl_utils.h" +#define CL_HPP_MINIMUM_OPENCL_VERSION 120 +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#include +#include #endif // USE_OPENCL using std::atomic; @@ -62,6 +65,9 @@ class Block { // Disabled as it is not used currently. // Block(void* ptr, size_t size, size_t offset, std::shared_ptr> // ref) : data_(ptr), size_(size), offset_(offset), ref_count_(ref) {} + + // TODO(wangwei) check if the set is correct and add lock if shared sturcture is allowed + void set_data(void* ptr) { data_ = ptr; } void* mutable_data() { initialized_ = true; return static_cast(data_) + offset_; @@ -107,8 +113,9 @@ typedef struct _Context { #endif // USE_CUDA #ifdef USE_OPENCL - // This stores the context ID of the OpenCL context controlled by ViennaCL. - long vcl_ctx_id; + std::shared_ptr> kernels; + cl::CommandQueue ocl_cmdq; + cl::Context ocl_ctx; #endif } Context; diff --git a/include/singa/core/memory.h b/include/singa/core/memory.h index f664f95ced..2d2e78b191 100644 --- a/include/singa/core/memory.h +++ b/include/singa/core/memory.h @@ -23,6 +23,7 @@ #include #include "singa/proto/core.pb.h" #include "singa/singa_config.h" +#include "singa/core/common.h" #ifdef USE_CUDA #include "cnmem.h" @@ -50,6 +51,57 @@ class DeviceMemPool { // size_t init_size_ = 0, max_size_ = 0; }; +class CppMemPool { + public: + // initial pool size (MB), and the size of each memory uint in the memory pool (KB) + CppMemPool(size_t init_size_mb = 256, size_t uint_size_kb = 1); + + // return a new pool based on the current pool + // once returned, the old pool will be invalid + // re-initial with pool size (MB), and set the size of each memory uint in the memory pool (KB) + void RsetMemPool(size_t init_size_mb = 256, size_t uint_size_kb = 1); + + // create the memory requested, if size is larger than memUintSize, malloc from system call + // is_ptr_null indicate whether the pointer is null and if so we will initialize it in the malloc function, + // otherwise we will use the ptr directly and access its data and functions. + // after the malloc, the data pointer of the block will be changed and the orginal data pointer will be lost. + void Malloc(Block** ptr, const size_t size, bool is_ptr_null = true); + void Free(Block* ptr); + + std::pair GetMemUsage(); + size_t GetNumFreeUints(){return numUints - numAllocatedUintsInPool;}; + + // release all memory. + // all pointers allocated in the pool must be freed before calling the descturctor. + ~CppMemPool(); + + protected: + // each structure define a memory uint in the memory pool + // the structure is a static double linked list + struct _Uint { + struct _Uint *pPrev, *pNext; + Block* pBlk; + }; + + // pointer to the memory pool + void* pMemPool; + + // head pointer to allocated memory uint + struct _Uint* pAllocatedMemUint; + // head pointer to free memory uint + struct _Uint* pFreeMemUint; + + // the size of each memory uint with/out the meta data of the uint + size_t memUintSize, memUintSizeNoMeta; + + // the number of memory uints in the pool + size_t numUints; + // the number of allocated uints which are resided in the memory pool + size_t numAllocatedUintsInPool; + // the number of allocated uints including the ones resided outside the memory pool + size_t numAllocatedUints; +}; + #ifdef USE_CUDA class CnMemPool : public DeviceMemPool { public: diff --git a/src/core/memory/memory.cc b/src/core/memory/memory.cc index cb33a48cdd..45ff96ddc4 100644 --- a/src/core/memory/memory.cc +++ b/src/core/memory/memory.cc @@ -21,8 +21,157 @@ #include "singa/proto/core.pb.h" #include -#ifdef USE_CUDA namespace singa { + +std::pair CppMemPool::GetMemUsage() { + size_t total,free; + total = memUintSize * numUints; + free = total - memUintSize * numAllocatedUintsInPool; + return std::make_pair(free,total); +} + +CppMemPool::CppMemPool(size_t init_size_mb, size_t uint_size_kb) { + pMemPool = NULL ; + pAllocatedMemUint = pFreeMemUint = NULL; + memUintSize = memUintSizeNoMeta = 0; + numUints = numAllocatedUintsInPool = numAllocatedUints = 0; + RsetMemPool(init_size_mb,uint_size_kb); +} + + +void CppMemPool::RsetMemPool(size_t init_size_mb, size_t uint_size_kb) { + + if(numAllocatedUintsInPool == 0) { // in the case the pool is empty + // setting up the parameters in the memory pool + const size_t kNBytesPerKB = (1u << 10); + const size_t kNBytesPerMB = (1u << 20); + memUintSize = uint_size_kb * kNBytesPerKB; + memUintSizeNoMeta = memUintSize - sizeof(struct _Uint); + size_t poolSize = init_size_mb * kNBytesPerMB; + bool memAligned = poolSize % memUintSize == 0; + numUints = memAligned ? (poolSize / memUintSize) : (poolSize / memUintSize + 1); + CHECK_GE(numUints,1); + poolSize = memUintSize * numUints; + + // intialize the memory pool + pMemPool = malloc(poolSize); + CHECK(pMemPool != NULL); + for(size_t idx = 0; idx < numUints; idx++) { + struct _Uint *pCurUint = (struct _Uint*)((char *)pMemPool + idx * memUintSize); + pCurUint->pPrev = NULL; + pCurUint->pNext = pFreeMemUint; + if(pFreeMemUint != NULL) { + pFreeMemUint->pPrev = pCurUint; + } + pFreeMemUint = pCurUint; + pCurUint->pBlk = NULL; + } + } else { // the pool is not empty, create a new one and copy the old to the new one + CppMemPool* pNewPool = new CppMemPool(init_size_mb, uint_size_kb); + struct _Uint* pCurUint = pAllocatedMemUint; + for(size_t idx = 0; idx < numAllocatedUintsInPool; idx++) { + Block* pOldBlk = pCurUint->pBlk; + void* pData = pOldBlk->mutable_data(); + pNewPool->Malloc(&pOldBlk, pOldBlk->size(), false); + size_t copySize = pOldBlk->size() - pOldBlk->offset(); + memcpy(pOldBlk->mutable_data(),pData,copySize); + pCurUint = pCurUint->pNext; + } + // swap the new pool with the current + std::swap(pNewPool->pMemPool,pMemPool); + std::swap(pNewPool->pAllocatedMemUint,pAllocatedMemUint); + std::swap(pNewPool->pFreeMemUint,pFreeMemUint); + std::swap(pNewPool->memUintSize,memUintSize); + std::swap(pNewPool->memUintSizeNoMeta,memUintSizeNoMeta); + std::swap(pNewPool->numUints,numUints); + std::swap(pNewPool->numAllocatedUintsInPool,numAllocatedUintsInPool); + pNewPool->numAllocatedUints = 0; + delete pNewPool; + } +} + +void CppMemPool::Malloc(Block** ptr, const size_t size, bool is_ptr_null) { + numAllocatedUints++; + // the size is larger than the memory uint size + if(size > memUintSizeNoMeta || pFreeMemUint == NULL) { + void* pData = malloc(size); + if(is_ptr_null) { + *ptr = new Block(pData,size); + } else { + CHECK_EQ((*ptr)->size(),size); + (*ptr)->set_data(pData); + } + return; + } + + // otherwise retrieve from one of the memory uint + numAllocatedUintsInPool++; + struct _Uint *pCurUint = pFreeMemUint; + pFreeMemUint = pCurUint->pNext; + if(pFreeMemUint != NULL) { + pFreeMemUint->pPrev = NULL; + } + + pCurUint->pNext = pAllocatedMemUint; + if(pAllocatedMemUint != NULL) { + pAllocatedMemUint->pPrev = pCurUint; + } + + pAllocatedMemUint = pCurUint; + void* pData = (void*)((char *)pCurUint + sizeof(struct _Uint)); + if(is_ptr_null) { + *ptr = new Block(pData,size); + } else { + CHECK_EQ((*ptr)->size(),size); + (*ptr)->set_data(pData); + } + CHECK(pCurUint->pBlk == NULL); + pCurUint->pBlk = *ptr; +} + +void CppMemPool::Free(Block* ptr) { + void* pData = ptr->mutable_data(); + if(pMemPool < pData && pData < (void*)((char*)pMemPool + numUints * memUintSize)) { + struct _Uint *pCurUint = (struct _Uint*)((char*)pData-sizeof(struct _Uint)); + CHECK(ptr == pCurUint->pBlk); + + if(pCurUint == pAllocatedMemUint) { + pAllocatedMemUint = pCurUint->pNext; + if(pAllocatedMemUint != NULL) { + pAllocatedMemUint->pPrev = NULL; + } + } else { + struct _Uint *pCurPrevUint = pCurUint->pPrev; + pCurUint->pPrev = NULL; + pCurPrevUint->pNext = pCurUint->pNext; + if(pCurUint->pNext != NULL) { + pCurUint->pNext->pPrev = pCurPrevUint; + } + } + + pCurUint->pNext = pFreeMemUint; + if(pFreeMemUint != NULL) { + pFreeMemUint->pPrev = pCurUint; + } + + pFreeMemUint = pCurUint; + pCurUint->pBlk = NULL; + numAllocatedUintsInPool--; + } + else { + free(pData); + } + numAllocatedUints--; + delete ptr; +} + +CppMemPool::~CppMemPool() { + CHECK_EQ(numAllocatedUints,0); + free(pMemPool); +} + + +#ifdef USE_CUDA std::atomic CnMemPool::pool_count(0); std::pair CnMemPool::GetMemUsage() { size_t free, total; @@ -107,5 +256,5 @@ void CudaMemPool::Free(void *ptr) { cudaError_t status = cudaFree(ptr); CHECK_EQ(status, cudaError_t::cudaSuccess); } -} #endif +} diff --git a/test/singa/test_memory.cc b/test/singa/test_memory.cc index 33a374724e..4e0dfff065 100644 --- a/test/singa/test_memory.cc +++ b/test/singa/test_memory.cc @@ -25,6 +25,180 @@ #include "singa/singa_config.h" #include "singa/utils/timer.h" #include "singa/utils/cuda_utils.h" +#include + +// this tests allocated a number of memory blocks in the memory pool +// the pool consists of 1024 uints and each uint has a size of 1000 bytes +// we malloc 1024 blocks where half of the block will reside outside the pool, +// and the other half will be inside the pool +TEST(CppMemPool, Malloc) { + singa::CppMemPool pool(1,1); + const int numOfTests = 1024; + const size_t dataSizeSmall = 1000; + const size_t dataSizeLarge = 2000; + singa::Block** pptr = new singa::Block*[numOfTests]; + + for(int i = 0; i < numOfTests; i++) { + const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; + pool.Malloc(&(pptr[i]),dataSize); + int* data = static_cast(pptr[i]->mutable_data()); + for(int idx = 0; idx < (int)dataSize/4; idx++) { + data[idx] = i; + } + data = static_cast(pptr[i]->mutable_data()); + int sum = 0; + for(int idx = 0; idx < (int)dataSize/4; idx++) { + sum += data[idx]; + } + CHECK_EQ(sum,i*dataSize/4); + } + CHECK_EQ(512,pool.GetNumFreeUints()); + + for(int i = 0; i < numOfTests; i++) { + pool.Free(pptr[i]); + } + CHECK_EQ(1024,pool.GetNumFreeUints()); + + delete[] pptr; +} + +// this tests intialize a pool with size 2M bytes and each memory unit has a size of 2048 bytes +// we then allocated 1024 memory block with half of the blocks with size 2000 and the other half with size 1000 +// then we reset the pool to size 1M bytes and memory uint size to 1000 bytes to test the reset function +TEST(CppMemPool, MallocAndRest) { + singa::CppMemPool pool(2,2); + const int numOfTests = 1024; + const size_t dataSizeSmall = 1000; + const size_t dataSizeLarge = 2000; + singa::Block** pptr = new singa::Block*[numOfTests]; + + for(int i = 0; i < numOfTests; i++) { + const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; + pool.Malloc(&(pptr[i]),dataSize); + int* data = static_cast(pptr[i]->mutable_data()); + for(int idx = 0; idx < (int)dataSize/4; idx++) { + data[idx] = i; + } + data = static_cast(pptr[i]->mutable_data()); + int sum = 0; + for(int idx = 0; idx < (int)dataSize/4; idx++) { + sum += data[idx]; + } + CHECK_EQ(sum,i*dataSize/4); + } + CHECK_EQ(0,pool.GetNumFreeUints()); + + pool.RsetMemPool(1,1); + CHECK_EQ(512,pool.GetNumFreeUints()); + for(int i = 0; i < numOfTests; i++) { + const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; + int* data = static_cast(pptr[i]->mutable_data()); + for(int idx = 0; idx < (int)dataSize/4; idx++) { + data[idx] = i; + } + data = static_cast(pptr[i]->mutable_data()); + int sum = 0; + for(int idx = 0; idx < (int)dataSize/4; idx++) { + sum += data[idx]; + } + CHECK_EQ(sum,i*dataSize/4); + } + + for(int i = 0; i < numOfTests; i++) { + pool.Free(pptr[i]); + } + CHECK_EQ(1024,pool.GetNumFreeUints()); + + delete[] pptr; +} + +// this tests initialize a pool with size 1M bytes and uint size of 1024 bytes +// then 1024 memory blocks are allocated, half of them in the pool and the other half outside the pool +// subsequently, we randomly free 512 blocks and after that allocate them back to the pool +// after reset the pool to a size of 2M bytes and uint size of 2048 bytes, +// we free all memory blocks allocated. +TEST(CppMemPool, RandomFree) { + singa::CppMemPool pool(1,1); + const int numOfTests = 1024; + const size_t dataSizeSmall = 1000; + const size_t dataSizeLarge = 2000; + singa::Block** pptr = new singa::Block*[numOfTests]; + + for(int i = 0; i < numOfTests; i++) { + const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; + pool.Malloc(&(pptr[i]),dataSize); + int* data = static_cast(pptr[i]->mutable_data()); + for(int idx = 0; idx < (int)dataSize/4; idx++) { + data[idx] = i; + } + data = static_cast(pptr[i]->mutable_data()); + int sum = 0; + for(int idx = 0; idx < (int)dataSize/4; idx++) { + sum += data[idx]; + } + CHECK_EQ(sum,i*dataSize/4); + } + CHECK_EQ(512,pool.GetNumFreeUints()); + + // randomized free pointers + int* randomPool = new int[numOfTests]; + for(int i = 0; i < numOfTests; i++) { + randomPool[i] = i; + } + int iter = 0; + while(iter != numOfTests/2) { // random free half of the memory blocks + int pos = std::rand() % (numOfTests-iter); + int i = randomPool[pos]; + std::swap(randomPool[pos],randomPool[numOfTests-1-iter]); + + // check value before deletion + const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; + int* data = static_cast(pptr[i]->mutable_data()); + for(int idx = 0; idx < (int)dataSize/4; idx++) { + data[idx] = i; + } + data = static_cast(pptr[i]->mutable_data()); + int sum = 0; + for(int idx = 0; idx < (int)dataSize/4; idx++) { + sum += data[idx]; + } + CHECK_EQ(sum,i*dataSize/4); + + pool.Free(pptr[i]); + iter++; + } + + // test the unfreed memory block value + for(int pos = 0; pos < numOfTests/2; pos++) { + int i = randomPool[pos]; + const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; + int* data = static_cast(pptr[i]->mutable_data()); + for(int idx = 0; idx < (int)dataSize/4; idx++) { + data[idx] = i; + } + data = static_cast(pptr[i]->mutable_data()); + int sum = 0; + for(int idx = 0; idx < (int)dataSize/4; idx++) { + sum += data[idx]; + } + CHECK_EQ(sum,i*dataSize/4); + } + + for(int pos = numOfTests/2; pos < numOfTests; pos++) { + int i = randomPool[pos]; + const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; + pool.Malloc(&(pptr[i]),dataSize); + } + + pool.RsetMemPool(2,2); + for(int i = 0; i < numOfTests; i++) { + pool.Free(pptr[i]); + } + CHECK_EQ(1024,pool.GetNumFreeUints()); + + delete[] randomPool; + delete[] pptr; +} #ifdef USE_CUDA /* From ac5c59f4346ed52633f1ff5fbc30fc52b65c4658 Mon Sep 17 00:00:00 2001 From: "liyuchenmike@gmail.com" Date: Wed, 14 Sep 2016 11:14:08 +0800 Subject: [PATCH 2/7] updated the memory pool structure so that so fixed size pool is required --- include/singa/core/memory.h | 50 +++----- src/core/memory/memory.cc | 225 +++++++++++++++--------------------- test/singa/test_memory.cc | 128 +++++++++----------- 3 files changed, 166 insertions(+), 237 deletions(-) diff --git a/include/singa/core/memory.h b/include/singa/core/memory.h index 2d2e78b191..d0a8afa807 100644 --- a/include/singa/core/memory.h +++ b/include/singa/core/memory.h @@ -53,53 +53,33 @@ class DeviceMemPool { class CppMemPool { public: - // initial pool size (MB), and the size of each memory uint in the memory pool (KB) - CppMemPool(size_t init_size_mb = 256, size_t uint_size_kb = 1); + CppMemPool(); - // return a new pool based on the current pool - // once returned, the old pool will be invalid - // re-initial with pool size (MB), and set the size of each memory uint in the memory pool (KB) - void RsetMemPool(size_t init_size_mb = 256, size_t uint_size_kb = 1); - - // create the memory requested, if size is larger than memUintSize, malloc from system call - // is_ptr_null indicate whether the pointer is null and if so we will initialize it in the malloc function, - // otherwise we will use the ptr directly and access its data and functions. - // after the malloc, the data pointer of the block will be changed and the orginal data pointer will be lost. - void Malloc(Block** ptr, const size_t size, bool is_ptr_null = true); + Block* Malloc(const size_t size); void Free(Block* ptr); - std::pair GetMemUsage(); - size_t GetNumFreeUints(){return numUints - numAllocatedUintsInPool;}; + // get the free and total size of the memory pool (in terms of bytes) + std::pair GetMemUsage(){return std::make_pair(freeSize,memPoolSize);}; - // release all memory. - // all pointers allocated in the pool must be freed before calling the descturctor. ~CppMemPool(); - protected: - // each structure define a memory uint in the memory pool - // the structure is a static double linked list + private: + // each structure define a memory uint in the memory pool + // the structure is a static double linked list struct _Uint { struct _Uint *pPrev, *pNext; Block* pBlk; }; - // pointer to the memory pool - void* pMemPool; + // total size held by the memory pool (in terms of bytes) + size_t memPoolSize; + // total free size by the memory pool (in terms of bytes) + size_t freeSize; - // head pointer to allocated memory uint - struct _Uint* pAllocatedMemUint; - // head pointer to free memory uint - struct _Uint* pFreeMemUint; - - // the size of each memory uint with/out the meta data of the uint - size_t memUintSize, memUintSizeNoMeta; - - // the number of memory uints in the pool - size_t numUints; - // the number of allocated uints which are resided in the memory pool - size_t numAllocatedUintsInPool; - // the number of allocated uints including the ones resided outside the memory pool - size_t numAllocatedUints; + // each pointer in this array keeps a head of the allocated memory uints of different size (power of 2) + struct _Uint **ppAllocUints; + // each pointer in this array keeps a head of the allocated memory uints of different size (power of 2) + struct _Uint **ppFreeUints; }; #ifdef USE_CUDA diff --git a/src/core/memory/memory.cc b/src/core/memory/memory.cc index 45ff96ddc4..1abbbb3262 100644 --- a/src/core/memory/memory.cc +++ b/src/core/memory/memory.cc @@ -20,154 +20,119 @@ #include "singa/utils/logging.h" #include "singa/proto/core.pb.h" #include - -namespace singa { - -std::pair CppMemPool::GetMemUsage() { - size_t total,free; - total = memUintSize * numUints; - free = total - memUintSize * numAllocatedUintsInPool; - return std::make_pair(free,total); +/* +int get_pos(size_t size) { + int result = 0; + while(size > 1) { + result++; + size = size/2; + } + return result; } +*/ +namespace singa { -CppMemPool::CppMemPool(size_t init_size_mb, size_t uint_size_kb) { - pMemPool = NULL ; - pAllocatedMemUint = pFreeMemUint = NULL; - memUintSize = memUintSizeNoMeta = 0; - numUints = numAllocatedUintsInPool = numAllocatedUints = 0; - RsetMemPool(init_size_mb,uint_size_kb); +CppMemPool::CppMemPool() { + memPoolSize = 0; + freeSize = 0; + ppAllocUints = (struct _Uint**)malloc(64*sizeof(struct _Uint*)); + ppFreeUints = (struct _Uint**)malloc(64*sizeof(struct _Uint*)); + for(int i = 0; i < 64; i++) { + ppAllocUints[i] = NULL; + ppFreeUints[i] = NULL; + } } -void CppMemPool::RsetMemPool(size_t init_size_mb, size_t uint_size_kb) { - - if(numAllocatedUintsInPool == 0) { // in the case the pool is empty - // setting up the parameters in the memory pool - const size_t kNBytesPerKB = (1u << 10); - const size_t kNBytesPerMB = (1u << 20); - memUintSize = uint_size_kb * kNBytesPerKB; - memUintSizeNoMeta = memUintSize - sizeof(struct _Uint); - size_t poolSize = init_size_mb * kNBytesPerMB; - bool memAligned = poolSize % memUintSize == 0; - numUints = memAligned ? (poolSize / memUintSize) : (poolSize / memUintSize + 1); - CHECK_GE(numUints,1); - poolSize = memUintSize * numUints; - - // intialize the memory pool - pMemPool = malloc(poolSize); - CHECK(pMemPool != NULL); - for(size_t idx = 0; idx < numUints; idx++) { - struct _Uint *pCurUint = (struct _Uint*)((char *)pMemPool + idx * memUintSize); - pCurUint->pPrev = NULL; - pCurUint->pNext = pFreeMemUint; - if(pFreeMemUint != NULL) { - pFreeMemUint->pPrev = pCurUint; - } - pFreeMemUint = pCurUint; - pCurUint->pBlk = NULL; +Block* CppMemPool::Malloc(const size_t size) { + CHECK(size > 0); + Block *pAllocBlk = NULL; + int pos = 63 - __builtin_clzll(size); + + struct _Uint*& pAllocUint = ppAllocUints[pos]; + struct _Uint*& pFreeUint = ppFreeUints[pos]; + struct _Uint* pCurUint = NULL; + size_t memSize = pow(2,pos); + size_t blkSize = (size % memSize == 0) ? memSize : memSize*2; + blkSize += sizeof(struct _Uint); + + if(pFreeUint == NULL) { // if no available free blocks + memPoolSize += blkSize; + pCurUint = (struct _Uint*)malloc(blkSize); + pCurUint->pPrev = NULL; + pCurUint->pNext = pAllocUint; + if(pAllocUint != NULL) { + pAllocUint->pPrev = pCurUint; + } + pAllocUint = pCurUint; + pAllocBlk = new Block((char*)(pCurUint) + sizeof(struct _Uint), size); + pCurUint->pBlk = pAllocBlk; + } else { + freeSize -= blkSize; + pCurUint = pFreeUint; + pFreeUint = pCurUint->pNext; + if(pFreeUint != NULL) { + pFreeUint->pPrev = NULL; } - } else { // the pool is not empty, create a new one and copy the old to the new one - CppMemPool* pNewPool = new CppMemPool(init_size_mb, uint_size_kb); - struct _Uint* pCurUint = pAllocatedMemUint; - for(size_t idx = 0; idx < numAllocatedUintsInPool; idx++) { - Block* pOldBlk = pCurUint->pBlk; - void* pData = pOldBlk->mutable_data(); - pNewPool->Malloc(&pOldBlk, pOldBlk->size(), false); - size_t copySize = pOldBlk->size() - pOldBlk->offset(); - memcpy(pOldBlk->mutable_data(),pData,copySize); - pCurUint = pCurUint->pNext; + + pCurUint->pNext = pAllocUint; + if(pAllocUint != NULL) { + pAllocUint->pPrev = pCurUint; } - // swap the new pool with the current - std::swap(pNewPool->pMemPool,pMemPool); - std::swap(pNewPool->pAllocatedMemUint,pAllocatedMemUint); - std::swap(pNewPool->pFreeMemUint,pFreeMemUint); - std::swap(pNewPool->memUintSize,memUintSize); - std::swap(pNewPool->memUintSizeNoMeta,memUintSizeNoMeta); - std::swap(pNewPool->numUints,numUints); - std::swap(pNewPool->numAllocatedUintsInPool,numAllocatedUintsInPool); - pNewPool->numAllocatedUints = 0; - delete pNewPool; + pAllocUint = pCurUint; + pAllocBlk = pCurUint->pBlk; } + return pAllocBlk; } -void CppMemPool::Malloc(Block** ptr, const size_t size, bool is_ptr_null) { - numAllocatedUints++; - // the size is larger than the memory uint size - if(size > memUintSizeNoMeta || pFreeMemUint == NULL) { - void* pData = malloc(size); - if(is_ptr_null) { - *ptr = new Block(pData,size); - } else { - CHECK_EQ((*ptr)->size(),size); - (*ptr)->set_data(pData); +void CppMemPool::Free(Block* ptr) { + void* pData = ptr->mutable_data(); + struct _Uint *pCurUint = (struct _Uint*)((char*)pData-sizeof(struct _Uint)); + int pos = 63 - __builtin_clzll(ptr->size()); + struct _Uint*& pAllocUint = ppAllocUints[pos]; + struct _Uint*& pFreeUint = ppFreeUints[pos]; + size_t memSize = pow(2,pos); + size_t blkSize = (ptr->size() % memSize == 0) ? memSize : memSize*2; + blkSize += sizeof(struct _Uint); + freeSize += blkSize; + + if(pCurUint == pAllocUint) { + pAllocUint = pCurUint->pNext; + if(pAllocUint != NULL) { + pAllocUint->pPrev = NULL; + } + } else { + struct _Uint *pCurPrevUint = pCurUint->pPrev; + pCurUint->pPrev = NULL; + pCurPrevUint->pNext = pCurUint->pNext; + if(pCurUint->pNext != NULL) { + pCurUint->pNext->pPrev = pCurPrevUint; } - return; } - // otherwise retrieve from one of the memory uint - numAllocatedUintsInPool++; - struct _Uint *pCurUint = pFreeMemUint; - pFreeMemUint = pCurUint->pNext; - if(pFreeMemUint != NULL) { - pFreeMemUint->pPrev = NULL; - } - - pCurUint->pNext = pAllocatedMemUint; - if(pAllocatedMemUint != NULL) { - pAllocatedMemUint->pPrev = pCurUint; - } - - pAllocatedMemUint = pCurUint; - void* pData = (void*)((char *)pCurUint + sizeof(struct _Uint)); - if(is_ptr_null) { - *ptr = new Block(pData,size); - } else { - CHECK_EQ((*ptr)->size(),size); - (*ptr)->set_data(pData); - } - CHECK(pCurUint->pBlk == NULL); - pCurUint->pBlk = *ptr; + pCurUint->pNext = pFreeUint; + if(pFreeUint != NULL) { + pFreeUint->pPrev = pCurUint; + } + pFreeUint = pCurUint; } -void CppMemPool::Free(Block* ptr) { - void* pData = ptr->mutable_data(); - if(pMemPool < pData && pData < (void*)((char*)pMemPool + numUints * memUintSize)) { - struct _Uint *pCurUint = (struct _Uint*)((char*)pData-sizeof(struct _Uint)); - CHECK(ptr == pCurUint->pBlk); - - if(pCurUint == pAllocatedMemUint) { - pAllocatedMemUint = pCurUint->pNext; - if(pAllocatedMemUint != NULL) { - pAllocatedMemUint->pPrev = NULL; - } - } else { - struct _Uint *pCurPrevUint = pCurUint->pPrev; - pCurUint->pPrev = NULL; - pCurPrevUint->pNext = pCurUint->pNext; - if(pCurUint->pNext != NULL) { - pCurUint->pNext->pPrev = pCurPrevUint; - } - } - pCurUint->pNext = pFreeMemUint; - if(pFreeMemUint != NULL) { - pFreeMemUint->pPrev = pCurUint; +CppMemPool::~CppMemPool() { + // traverse all lists to delete the memory + for(int pos = 0; pos < 64; pos++) { + for(int i = 0; i < 2; i++) { + struct _Uint *pCurUint = i == 0 ? ppAllocUints[pos] : ppFreeUints[pos]; + while(pCurUint != NULL) { + struct _Uint *pNextUint = pCurUint->pNext; + free(pCurUint->pBlk); + free(pCurUint); + pCurUint = pNextUint; + } } - - pFreeMemUint = pCurUint; - pCurUint->pBlk = NULL; - numAllocatedUintsInPool--; } - else { - free(pData); - } - numAllocatedUints--; - delete ptr; -} - -CppMemPool::~CppMemPool() { - CHECK_EQ(numAllocatedUints,0); - free(pMemPool); + free(ppAllocUints); + free(ppFreeUints); } diff --git a/test/singa/test_memory.cc b/test/singa/test_memory.cc index 4e0dfff065..8a6101575c 100644 --- a/test/singa/test_memory.cc +++ b/test/singa/test_memory.cc @@ -27,46 +27,59 @@ #include "singa/utils/cuda_utils.h" #include -// this tests allocated a number of memory blocks in the memory pool -// the pool consists of 1024 uints and each uint has a size of 1000 bytes -// we malloc 1024 blocks where half of the block will reside outside the pool, -// and the other half will be inside the pool -TEST(CppMemPool, Malloc) { - singa::CppMemPool pool(1,1); - const int numOfTests = 1024; - const size_t dataSizeSmall = 1000; - const size_t dataSizeLarge = 2000; - singa::Block** pptr = new singa::Block*[numOfTests]; - - for(int i = 0; i < numOfTests; i++) { - const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; - pool.Malloc(&(pptr[i]),dataSize); - int* data = static_cast(pptr[i]->mutable_data()); - for(int idx = 0; idx < (int)dataSize/4; idx++) { - data[idx] = i; +TEST(CppMemPool, Compare) { + singa::CppMemPool pool; + const int numOfOuterLoops =1000; + const int numOfInnerLoops = 100; + const size_t allocSize = 1024; + int** pp = new int*[numOfInnerLoops]; + singa::Block** ppBlk = new singa::Block*[numOfInnerLoops]; + + double alloc_time = 0; + double free_time = 0; + time_t start,end; + singa::Timer t; + for (int i = 0; i < numOfOuterLoops; i++) { + start = clock(); + for(int j = 0; j < numOfInnerLoops; j++) { + pp[j] = (int*)malloc(allocSize); } - data = static_cast(pptr[i]->mutable_data()); - int sum = 0; - for(int idx = 0; idx < (int)dataSize/4; idx++) { - sum += data[idx]; + end = clock(); + alloc_time += end-start; + start = clock(); + for(int j = 0; j < numOfInnerLoops; j++) { + free(pp[j]); } - CHECK_EQ(sum,i*dataSize/4); - } - CHECK_EQ(512,pool.GetNumFreeUints()); - - for(int i = 0; i < numOfTests; i++) { - pool.Free(pptr[i]); - } - CHECK_EQ(1024,pool.GetNumFreeUints()); - - delete[] pptr; + end = clock(); + free_time += end-start; + } + int kernel_time = t.Elapsed(); + + t.Tick(); + alloc_time = free_time = 0; + for (int i = 0; i < numOfOuterLoops; i++) { + start = clock(); + for(int j = 0; j < numOfInnerLoops; j++) { + ppBlk[j] = pool.Malloc(allocSize); + } + end = clock(); + alloc_time += end-start; + start = clock(); + for(int j = 0; j < numOfInnerLoops; j++) { + pool.Free(ppBlk[j]); + } + end = clock(); + free_time += end-start; + } + int mempool_time = t.Elapsed(); + EXPECT_GT(kernel_time,mempool_time); + delete pp; + delete ppBlk; } -// this tests intialize a pool with size 2M bytes and each memory unit has a size of 2048 bytes -// we then allocated 1024 memory block with half of the blocks with size 2000 and the other half with size 1000 -// then we reset the pool to size 1M bytes and memory uint size to 1000 bytes to test the reset function -TEST(CppMemPool, MallocAndRest) { - singa::CppMemPool pool(2,2); +// this tests allocated a number of memory blocks in the memory pool +TEST(CppMemPool, Malloc) { + singa::CppMemPool pool; const int numOfTests = 1024; const size_t dataSizeSmall = 1000; const size_t dataSizeLarge = 2000; @@ -74,7 +87,7 @@ TEST(CppMemPool, MallocAndRest) { for(int i = 0; i < numOfTests; i++) { const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; - pool.Malloc(&(pptr[i]),dataSize); + pptr[i] = pool.Malloc(dataSize); int* data = static_cast(pptr[i]->mutable_data()); for(int idx = 0; idx < (int)dataSize/4; idx++) { data[idx] = i; @@ -86,39 +99,17 @@ TEST(CppMemPool, MallocAndRest) { } CHECK_EQ(sum,i*dataSize/4); } - CHECK_EQ(0,pool.GetNumFreeUints()); - - pool.RsetMemPool(1,1); - CHECK_EQ(512,pool.GetNumFreeUints()); - for(int i = 0; i < numOfTests; i++) { - const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; - int* data = static_cast(pptr[i]->mutable_data()); - for(int idx = 0; idx < (int)dataSize/4; idx++) { - data[idx] = i; - } - data = static_cast(pptr[i]->mutable_data()); - int sum = 0; - for(int idx = 0; idx < (int)dataSize/4; idx++) { - sum += data[idx]; - } - CHECK_EQ(sum,i*dataSize/4); - } - for(int i = 0; i < numOfTests; i++) { pool.Free(pptr[i]); } - CHECK_EQ(1024,pool.GetNumFreeUints()); - delete[] pptr; } -// this tests initialize a pool with size 1M bytes and uint size of 1024 bytes -// then 1024 memory blocks are allocated, half of them in the pool and the other half outside the pool + +// we allocate 1024 memory blocks // subsequently, we randomly free 512 blocks and after that allocate them back to the pool -// after reset the pool to a size of 2M bytes and uint size of 2048 bytes, -// we free all memory blocks allocated. TEST(CppMemPool, RandomFree) { - singa::CppMemPool pool(1,1); + singa::CppMemPool pool; const int numOfTests = 1024; const size_t dataSizeSmall = 1000; const size_t dataSizeLarge = 2000; @@ -126,7 +117,7 @@ TEST(CppMemPool, RandomFree) { for(int i = 0; i < numOfTests; i++) { const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; - pool.Malloc(&(pptr[i]),dataSize); + pptr[i] = pool.Malloc(dataSize); int* data = static_cast(pptr[i]->mutable_data()); for(int idx = 0; idx < (int)dataSize/4; idx++) { data[idx] = i; @@ -138,7 +129,6 @@ TEST(CppMemPool, RandomFree) { } CHECK_EQ(sum,i*dataSize/4); } - CHECK_EQ(512,pool.GetNumFreeUints()); // randomized free pointers int* randomPool = new int[numOfTests]; @@ -187,15 +177,9 @@ TEST(CppMemPool, RandomFree) { for(int pos = numOfTests/2; pos < numOfTests; pos++) { int i = randomPool[pos]; const size_t dataSize = (i%2) ? dataSizeSmall : dataSizeLarge; - pool.Malloc(&(pptr[i]),dataSize); + pptr[i] = pool.Malloc(dataSize); } - - pool.RsetMemPool(2,2); - for(int i = 0; i < numOfTests; i++) { - pool.Free(pptr[i]); - } - CHECK_EQ(1024,pool.GetNumFreeUints()); - + delete[] randomPool; delete[] pptr; } From 5ad387a3738d147df80f9415e71cd2eb24673868 Mon Sep 17 00:00:00 2001 From: aaronwwf Date: Tue, 6 Sep 2016 22:24:36 +0800 Subject: [PATCH 3/7] SINGA-244 Separating swig interface and python binding files - move swig interface files to src/api - move python to root folder - move python related cmake functions to python cmake files - use add_library OBJECT command to build singa_objects, then be used by singa shared library and python wraper library, avoiding build twice - todo, add java binding --- CMakeLists.txt | 19 ++- cmake/Dependencies.cmake | 2 +- cmake/Protobuf.cmake | 31 ----- cmake/Utils.cmake | 70 ---------- include/singa/core/common.h | 2 +- python/CMakeLists.txt | 147 ++++++++++++++++++++ {src/python => python}/setup.py.in | 0 {src/python => python}/singa/__init__.py | 0 {src/python => python}/singa/command.py | 0 {src/python => python}/singa/device.py | 0 {src/python => python}/singa/initializer.py | 0 {src/python => python}/singa/layer.py | 0 {src/python => python}/singa/loss.py | 0 {src/python => python}/singa/metric.py | 0 {src/python => python}/singa/model.py | 0 {src/python => python}/singa/net.py | 0 {src/python => python}/singa/optimizer.py | 0 {src/python => python}/singa/tensor.py | 0 {src/python => python}/singa/utils.py | 0 src/CMakeLists.txt | 68 ++------- src/api/config.i | 4 + src/{python/swig => api}/config.i.in | 0 src/{python/swig => api}/core_device.i | 0 src/{python/swig => api}/core_tensor.i | 0 src/{python/swig => api}/model_layer.i | 0 src/{python/swig => api}/model_loss.i | 0 src/{python/swig => api}/model_metric.i | 0 src/{python/swig => api}/model_optimizer.i | 0 src/{python/swig => api}/numpy.i | 0 src/{python/swig => api}/singa.i | 0 30 files changed, 181 insertions(+), 162 deletions(-) delete mode 100644 cmake/Protobuf.cmake delete mode 100644 cmake/Utils.cmake create mode 100644 python/CMakeLists.txt rename {src/python => python}/setup.py.in (100%) rename {src/python => python}/singa/__init__.py (100%) rename {src/python => python}/singa/command.py (100%) rename {src/python => python}/singa/device.py (100%) rename {src/python => python}/singa/initializer.py (100%) rename {src/python => python}/singa/layer.py (100%) rename {src/python => python}/singa/loss.py (100%) rename {src/python => python}/singa/metric.py (100%) rename {src/python => python}/singa/model.py (100%) rename {src/python => python}/singa/net.py (100%) rename {src/python => python}/singa/optimizer.py (100%) rename {src/python => python}/singa/tensor.py (100%) rename {src/python => python}/singa/utils.py (100%) create mode 100644 src/api/config.i rename src/{python/swig => api}/config.i.in (100%) rename src/{python/swig => api}/core_device.i (100%) rename src/{python/swig => api}/core_tensor.i (100%) rename src/{python/swig => api}/model_layer.i (100%) rename src/{python/swig => api}/model_loss.i (100%) rename src/{python/swig => api}/model_metric.i (100%) rename src/{python/swig => api}/model_optimizer.i (100%) rename src/{python/swig => api}/numpy.i (100%) rename src/{python/swig => api}/singa.i (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 611cee46e3..762839b39e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,22 +38,23 @@ SET(SINGA_INCLUDE_DIR INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR}) OPTION(USE_CBLAS "Use CBlas libs" ON) -OPTION(USE_CUDA "Use Cuda libs" OFF) +OPTION(USE_CUDA "Use Cuda libs" ON) OPTION(USE_CUDNN "Use Cudnn libs" ON) OPTION(USE_OPENCV "Use opencv" OFF) OPTION(USE_LMDB "Use LMDB libs" OFF) -OPTION(USE_PYTHON "Generate py wrappers" OFF) +OPTION(USE_PYTHON "Generate py wrappers" ON) +OPTION(USE_JAVA "Generate java wrappers" OFF) OPTION(USE_OPENCL "Use OpenCL" OFF) OPTION(ENABLE_DIST "enable distributed training" OFF) INCLUDE("cmake/Dependencies.cmake") -INCLUDE("cmake/Utils.cmake") +#INCLUDE("cmake/Utils.cmake") ADD_DEFINITIONS(-DUSE_CMAKE) #message(STATUS "${SINGA_INCLUDE_DIR}") CONFIGURE_FILE ( "${PROJECT_SOURCE_DIR}/cmake/Templates/singa_config.h.in" - "${PROJECT_BINARY_DIR}/include/singa/singa_config.h") + "${PROJECT_SOURCE_DIR}/include/singa/singa_config.h") #set(SINGA_CONFIGURE_SRC "${PROJECT_BINARY_DIR}/singa_config.h") #LIST(APPEND SRCS ${SINGA_CONFIGURE_SRCS} ${PROJECT_BINARY_DIR}/singa_config.h) @@ -64,6 +65,7 @@ SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) IF (USE_CUDA) ADD_SUBDIRECTORY(lib/cnmem) LIST(APPEND SINGA_LINKER_LIBS cnmem) + SET(global_cuda_objs "") ENDIF() # TODO(wangwei) detect the ev lib @@ -71,10 +73,19 @@ IF (ENABLE_DIST) LIST(APPEND SINGA_LINKER_LIBS ev) ENDIF() +INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}/include") ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(test) ADD_SUBDIRECTORY(examples) +IF (USE_PYTHON) + ADD_SUBDIRECTORY(python) +ENDIF() + +IF (USE_JAVA) + ADD_SUBDIRECTORY(java) +ENDIF() + INSTALL(DIRECTORY include/singa DESTINATION ${CMAKE_INSTALL_PREFIX}/include) INSTALL(FILES ${CMAKE_BINARY_DIR}/include/singa/singa_config.h DESTINATION ${CMAKE_INSTALL_PREFIX}/include/singa) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index a4121519c0..d4e68ac9ad 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -24,7 +24,7 @@ FIND_PACKAGE( Protobuf REQUIRED ) INCLUDE_DIRECTORIES(SYSTEM ${PROTOBUF_INCLUDE_DIR}) MESSAGE(STATUS "proto libs " ${PROTOBUF_LIBRARIES}) LIST(APPEND SINGA_LINKER_LIBS ${PROTOBUF_LIBRARIES}) -INCLUDE("cmake/Protobuf.cmake") +#INCLUDE("cmake/Protobuf.cmake") FIND_PACKAGE(Glog) IF(GLOG_FOUND) diff --git a/cmake/Protobuf.cmake b/cmake/Protobuf.cmake deleted file mode 100644 index 70cf0fea16..0000000000 --- a/cmake/Protobuf.cmake +++ /dev/null @@ -1,31 +0,0 @@ -# This script is taken from -# https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake -# and modified to our compilation. - -function(PROTOBUF_GENERATE_PYTHON OUTPUT) - if(NOT ARGN) - message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called - without any proto files") - return() - endif(NOT ARGN) - - set(${OUTPUT}) - foreach(FIL ${ARGN}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(FIL_WE ${FIL} NAME_WE) - get_filename_component(PATH ${FIL} PATH) - - list(APPEND ${OUTPUT} "${CMAKE_BINARY_DIR}/python/singa/proto/${FIL_WE}_pb2.py") - - add_custom_command( - OUTPUT "${CMAKE_BINARY_DIR}/python/singa/proto/${FIL_WE}_pb2.py" - COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS --python_out ${CMAKE_BINARY_DIR}/python/singa/proto - --proto_path ${PATH} ${ABS_FIL} - DEPENDS ${ABS_FIL} - COMMENT "Running Python protocol buffer compiler on ${FIL}" VERBATIM) - endforeach() - - set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) - set(${OUTPUT} ${${OUTPUT}} PARENT_SCOPE) -endfunction() diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake deleted file mode 100644 index a0373b8314..0000000000 --- a/cmake/Utils.cmake +++ /dev/null @@ -1,70 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -macro(swig_generate_cxx pylist_variable) - if(NOT EXISTS "${CMKAE_BINARY_DIR}/python") - execute_process( - COMMAND mkdir ${CMAKE_BINARY_DIR}/python - COMMAND mkdir ${CMAKE_BINARY_DIR}/python/singa - COMMAND mkdir ${CMAKE_BINARY_DIR}/python/singa/proto - ERROR_QUIET) - endif() - execute_process( - COMMAND swig -c++ -python -I${CMAKE_SOURCE_DIR}/include - -outdir ${CMAKE_BINARY_DIR}/python/singa - ${ARGN}) - - set(${pylist_variable} "${CMAKE_SOURCE_DIR}/src/python/swig/singa_wrap.cxx") -endmacro() - -function (create_symlinks) - # Do nothing if building in-source - if (${CMAKE_CURRENT_BINARY_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) - return() - endif() - - foreach (path_file ${ARGN}) - get_filename_component(folder ${path_file} PATH) - - # Create REAL folder - file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/${folder}") - - # Delete symlink if it exists - file(REMOVE "${CMAKE_BINARY_DIR}/${path_file}") - - # Get OS dependent path to use in `execute_process` - file(TO_NATIVE_PATH "${CMAKE_BINARY_DIR}/${path_file}" link) - file(TO_NATIVE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${path_file}" target) - - if (UNIX) - set(command ln -s ${target} ${link}) - else() - set(command cmd.exe /c mklink ${link} ${target}) - endif() - - execute_process(COMMAND ${command} - RESULT_VARIABLE result - ERROR_VARIABLE output) - - if (NOT ${result} EQUAL 0) - message(FATAL_ERROR "Could not create symbolic link for: ${target} --> ${output}") - endif() - - endforeach(path_file) -endfunction(create_symlinks) diff --git a/include/singa/core/common.h b/include/singa/core/common.h index e7c7ea268f..9b6e02b7ed 100644 --- a/include/singa/core/common.h +++ b/include/singa/core/common.h @@ -20,7 +20,7 @@ #define SINGA_CORE_COMMON_H_ #include #include -#include "./singa/singa_config.h" +#include "singa/singa_config.h" #include #include #include "singa/utils/logging.h" diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt new file mode 100644 index 0000000000..8bf8319611 --- /dev/null +++ b/python/CMakeLists.txt @@ -0,0 +1,147 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This following function is taken from +# https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake +# and modified to our compilation. +function(PROTOBUF_GENERATE_PYTHON OUTPUT) + if(NOT ARGN) + message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called + without any proto files") + return() + endif(NOT ARGN) + + set(${OUTPUT}) + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + get_filename_component(PATH ${FIL} PATH) + + list(APPEND ${OUTPUT} "${CMAKE_BINARY_DIR}/python/singa/proto/${FIL_WE}_pb2.py") + + add_custom_command( + OUTPUT "${CMAKE_BINARY_DIR}/python/singa/proto/${FIL_WE}_pb2.py" + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} + ARGS --python_out ${CMAKE_BINARY_DIR}/python/singa/proto + --proto_path ${PATH} ${ABS_FIL} + DEPENDS ${ABS_FIL} + COMMENT "Running Python protocol buffer compiler on ${FIL}" VERBATIM) + endforeach() + + set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) + set(${OUTPUT} ${${OUTPUT}} PARENT_SCOPE) +endfunction() + +function (create_symlinks) + # Do nothing if building in-source + if (${CMAKE_CURRENT_BINARY_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) + return() + endif() + + foreach (path_file ${ARGN}) + get_filename_component(folder ${path_file} PATH) + + # Create REAL folder + #file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/${folder}") + + # Delete symlink if it exists + file(REMOVE "${CMAKE_CURRENT_BINARY_DIR}/${path_file}") + + # Get OS dependent path to use in `execute_process` + file(TO_NATIVE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${path_file}" link) + file(TO_NATIVE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${path_file}" target) + + if (UNIX) + set(command ln -s ${target} ${link}) + else() + set(command cmd.exe /c mklink ${link} ${target}) + endif() + + execute_process(COMMAND ${command} + RESULT_VARIABLE result + ERROR_VARIABLE output) + + if (NOT ${result} EQUAL 0) + message(FATAL_ERROR "Could not create symbolic link for: ${target} --> ${output}") + endif() + + endforeach(path_file) +endfunction(create_symlinks) + + +# generate protobuf sources +FILE(GLOB proto_files ${CMAKE_SOURCE_DIR}/src/proto/*.proto) +PROTOBUF_GENERATE_PYTHON(proto_pys ${proto_files}) +#MESSAGE(STATUS "proto pys: ${proto_pys}") + +# generate cxx and wrap.py +if(NOT EXISTS "${CMKAE_BINARY_DIR}/python") + execute_process( + COMMAND mkdir ${CMAKE_BINARY_DIR}/python + COMMAND mkdir ${CMAKE_BINARY_DIR}/python/singa + COMMAND mkdir ${CMAKE_BINARY_DIR}/python/singa/proto + ERROR_QUIET) +endif() +execute_process( + COMMAND mkdir ${CMAKE_BINARY_DIR}/src/api + COMMAND swig -c++ -python -I${CMAKE_SOURCE_DIR}/include + -outdir ${CMAKE_BINARY_DIR}/python/singa + -o ${CMAKE_BINARY_DIR}/src/api/singa_wrap.cxx + ${CMAKE_SOURCE_DIR}/src/api/singa.i ) + +set(python_srcs "${CMAKE_BINARY_DIR}/src/api/singa_wrap.cxx") + +#Create symlinks for all python source files Do not omit !!!RELATIVE!!! +file(GLOB_RECURSE python_source_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.py) +create_symlinks(${python_source_files}) + + +IF(USE_CUDA) +# remain this custom command to avoid cuda objs can't find +ADD_CUSTOM_COMMAND( + OUTPUT ${global_cuda_objs} + COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/" + ) +ENDIF(USE_CUDA) + +ADD_LIBRARY(_singa_wrap SHARED $ ${python_srcs} ${proto_pys} ${global_cuda_objs}) +TARGET_LINK_LIBRARIES(_singa_wrap ${SINGA_LINKER_LIBS} ${PYTHON_LIBRARIES}) +TARGET_INCLUDE_DIRECTORIES(_singa_wrap PRIVATE ${PYTHON_INCLUDE_DIRS}) +SET_TARGET_PROPERTIES(_singa_wrap +PROPERTIES PREFIX "" +LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/python/singa +) + +#SETUP +SET(SETUP_PY_IN "setup.py.in") +SET(SETUP_PY "${CMAKE_BINARY_DIR}/python/setup.py") +CONFIGURE_FILE(${SETUP_PY_IN} ${SETUP_PY}) + +#create python/singa/proto/__init__.py +FILE(WRITE ${CMAKE_BINARY_DIR}/python/singa/proto/__init__.py "") +#MESSAGE(STATUS "apple: ${APPLE}") +IF(APPLE) +ADD_CUSTOM_TARGET( + change_suffix ALL + COMMAND ${CMAKE_COMMAND} -E rename "${CMAKE_BINARY_DIR}/python/singa/_singa_wrap.dylib" "${CMAKE_BINARY_DIR}/python/singa/_singa_wrap.so" + COMMENT "change .dylib to .so in mac system" +) +ADD_DEPENDENCIES(change_suffix _singa_wrap) +ENDIF(APPLE) + + diff --git a/src/python/setup.py.in b/python/setup.py.in similarity index 100% rename from src/python/setup.py.in rename to python/setup.py.in diff --git a/src/python/singa/__init__.py b/python/singa/__init__.py similarity index 100% rename from src/python/singa/__init__.py rename to python/singa/__init__.py diff --git a/src/python/singa/command.py b/python/singa/command.py similarity index 100% rename from src/python/singa/command.py rename to python/singa/command.py diff --git a/src/python/singa/device.py b/python/singa/device.py similarity index 100% rename from src/python/singa/device.py rename to python/singa/device.py diff --git a/src/python/singa/initializer.py b/python/singa/initializer.py similarity index 100% rename from src/python/singa/initializer.py rename to python/singa/initializer.py diff --git a/src/python/singa/layer.py b/python/singa/layer.py similarity index 100% rename from src/python/singa/layer.py rename to python/singa/layer.py diff --git a/src/python/singa/loss.py b/python/singa/loss.py similarity index 100% rename from src/python/singa/loss.py rename to python/singa/loss.py diff --git a/src/python/singa/metric.py b/python/singa/metric.py similarity index 100% rename from src/python/singa/metric.py rename to python/singa/metric.py diff --git a/src/python/singa/model.py b/python/singa/model.py similarity index 100% rename from src/python/singa/model.py rename to python/singa/model.py diff --git a/src/python/singa/net.py b/python/singa/net.py similarity index 100% rename from src/python/singa/net.py rename to python/singa/net.py diff --git a/src/python/singa/optimizer.py b/python/singa/optimizer.py similarity index 100% rename from src/python/singa/optimizer.py rename to python/singa/optimizer.py diff --git a/src/python/singa/tensor.py b/python/singa/tensor.py similarity index 100% rename from src/python/singa/tensor.py rename to python/singa/tensor.py diff --git a/src/python/singa/utils.py b/python/singa/utils.py similarity index 100% rename from src/python/singa/utils.py rename to python/singa/utils.py diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b4a88f5921..0752884bfa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -20,10 +20,9 @@ FILE(GLOB proto_files proto/*.proto) protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_files}) -INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}/include") +#MESSAGE(STATUS "proto_srcs: ${proto_srcs}") LIST(APPEND singa_sources ${proto_hdrs} ${proto_srcs}) -SET(PREVIOUS_LINKER_LIBS ${SINGA_LINKER_LIBS}) AUX_SOURCE_DIRECTORY(utils utils_source) LIST(APPEND singa_sources ${utils_source}) @@ -32,6 +31,8 @@ AUX_SOURCE_DIRECTORY(core/device core_source) AUX_SOURCE_DIRECTORY(core/memory core_source) AUX_SOURCE_DIRECTORY(core/scheduler core_source) AUX_SOURCE_DIRECTORY(core/tensor core_source) +LIST(APPEND singa_sources ${core_source}) + IF (USE_CUDA) FILE(GLOB_RECURSE cuda_source core "*.cu") SET(FLAGS_BACKUP ${CMAKE_CXX_FLAGS}) @@ -45,7 +46,8 @@ IF (USE_CUDA) include_directories("${CMAKE_CURRENT_SOURCE_DIR}/core/tensor") SET(CMAKE_CXX_FLAGS ${FLAGS_BACKUP}) ENDIF (USE_CUDA) -LIST(APPEND singa_sources ${core_source} ${cuda_objs}) + +SET(global_cuda_objs ${cuda_objs} PARENT_SCOPE) AUX_SOURCE_DIRECTORY(model model_source) AUX_SOURCE_DIRECTORY(model/layer model_source) @@ -58,7 +60,7 @@ LIST(APPEND singa_sources ${model_source}) AUX_SOURCE_DIRECTORY(io io_source) AUX_SOURCE_DIRECTORY(io/network io_source) LIST(APPEND singa_sources ${io_source}) -ADD_LIBRARY(singa SHARED ${singa_sources}) + ADD_CUSTOM_TARGET( copy_protobuf COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/include/singa/proto" @@ -71,59 +73,15 @@ FOREACH(fil ${proto_hdrs}) COMMAND ${CMAKE_COMMAND} -E copy ${fil} "${CMAKE_BINARY_DIR}/include/singa/proto" ) ENDFOREACH() -ADD_DEPENDENCIES(singa copy_protobuf) -TARGET_LINK_LIBRARIES(singa ${SINGA_LINKER_LIBS}) -#MESSAGE(STATUS "HEADERS: ${proto_hdrs}") - -IF(USE_PYTHON) - - protobuf_generate_python(proto_pys ${proto_files}) - #MESSAGE(STATUS "proto pys: ${proto_pys}") - FILE(REMOVE "${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i") - CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i.in" "${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i") - - FILE(GLOB python_files python/swig/singa.i) - # delete old .cxx file - FILE(REMOVE "${CMAKE_CURRENT_SOURCE_DIR}/python/swig/singa_wrap.cxx") - # generate cxx and wrap.py - swig_generate_cxx(python_srcs ${python_files}) +ADD_LIBRARY(singa_objects OBJECT ${singa_sources}) +ADD_DEPENDENCIES(singa_objects copy_protobuf) - #FILE(COPY python/ DESTINATION ${CMAKE_BINARY_DIR}/python/singa FILES_MATCHING PATTERN "swig" EXCLUDE PATTERN "*.py") - #Create symlinks for all python source files Do not omit !!!RELATIVE!!! - file(GLOB_RECURSE python_source_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.py) - - create_symlinks(${python_source_files}) - - ADD_LIBRARY(_singa_wrap SHARED ${python_srcs} ${singa_sources} ${cuda_objs} ${proto_pys}) - SET(WRAPPER_LINKER_LIBS "${PREVIOUS_LINKER_LIBS}") - TARGET_LINK_LIBRARIES(_singa_wrap ${WRAPPER_LINKER_LIBS} ${PYTHON_LIBRARIES}) - TARGET_INCLUDE_DIRECTORIES(_singa_wrap PRIVATE ${PYTHON_INCLUDE_DIRS}) - ADD_DEPENDENCIES(_singa_wrap singa ) - #message(STATUS "PREVIOUS_LINKER_LIBS ${PREVIOUS_LINKER_LIBS}") - - SET_TARGET_PROPERTIES(_singa_wrap - PROPERTIES PREFIX "" - LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/python/singa - ) - - #SETUP - SET(SETUP_PY_IN "python/setup.py.in") - SET(SETUP_PY "${CMAKE_BINARY_DIR}/python/setup.py") - CONFIGURE_FILE(${SETUP_PY_IN} ${SETUP_PY}) - - #create python/singa/proto/__init__.py - FILE(WRITE ${CMAKE_BINARY_DIR}/python/singa/proto/__init__.py "") - #MESSAGE(STATUS "apple: ${APPLE}") - IF(APPLE) - ADD_CUSTOM_TARGET( - change_suffix ALL - COMMAND ${CMAKE_COMMAND} -E rename "${CMAKE_BINARY_DIR}/python/singa/_singa_wrap.dylib" "${CMAKE_BINARY_DIR}/python/singa/_singa_wrap.so" - COMMENT "change .dylib to .so in mac system" - ) - ADD_DEPENDENCIES(change_suffix _singa_wrap) - ENDIF(APPLE) +ADD_LIBRARY(singa SHARED $ ${cuda_objs}) +TARGET_LINK_LIBRARIES(singa ${SINGA_LINKER_LIBS}) -ENDIF(USE_PYTHON) +#pass configure infor to swig +FILE(REMOVE "${CMAKE_CURRENT_SOURCE_DIR}/api/config.i") +CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/api/config.i.in" "${CMAKE_CURRENT_SOURCE_DIR}/api/config.i") diff --git a/src/api/config.i b/src/api/config.i new file mode 100644 index 0000000000..cfbcd46866 --- /dev/null +++ b/src/api/config.i @@ -0,0 +1,4 @@ +// Pass in cmake configurations to swig +#define USE_CUDA 1 +#define USE_CUDNN 1 +#define CUDNN_VERSION_SWIG 5005 diff --git a/src/python/swig/config.i.in b/src/api/config.i.in similarity index 100% rename from src/python/swig/config.i.in rename to src/api/config.i.in diff --git a/src/python/swig/core_device.i b/src/api/core_device.i similarity index 100% rename from src/python/swig/core_device.i rename to src/api/core_device.i diff --git a/src/python/swig/core_tensor.i b/src/api/core_tensor.i similarity index 100% rename from src/python/swig/core_tensor.i rename to src/api/core_tensor.i diff --git a/src/python/swig/model_layer.i b/src/api/model_layer.i similarity index 100% rename from src/python/swig/model_layer.i rename to src/api/model_layer.i diff --git a/src/python/swig/model_loss.i b/src/api/model_loss.i similarity index 100% rename from src/python/swig/model_loss.i rename to src/api/model_loss.i diff --git a/src/python/swig/model_metric.i b/src/api/model_metric.i similarity index 100% rename from src/python/swig/model_metric.i rename to src/api/model_metric.i diff --git a/src/python/swig/model_optimizer.i b/src/api/model_optimizer.i similarity index 100% rename from src/python/swig/model_optimizer.i rename to src/api/model_optimizer.i diff --git a/src/python/swig/numpy.i b/src/api/numpy.i similarity index 100% rename from src/python/swig/numpy.i rename to src/api/numpy.i diff --git a/src/python/swig/singa.i b/src/api/singa.i similarity index 100% rename from src/python/swig/singa.i rename to src/api/singa.i From 07a71a8137ba711268c9ad4f17a7984342990723 Mon Sep 17 00:00:00 2001 From: aaronwwf Date: Wed, 7 Sep 2016 15:09:07 +0800 Subject: [PATCH 4/7] SINGA-244 Separating swig interface and python binding files - add java binding cmake files - todo: add test code --- CMakeLists.txt | 5 +- cmake/Dependencies.cmake | 6 ++ java/CMakeLists.txt | 138 +++++++++++++++++++++++++++++++++++++++ src/api/.gitignore | 1 + src/api/config.i | 4 -- src/api/config.i.in | 2 + src/api/core_tensor.i | 3 + src/proto/core.proto | 2 + src/proto/io.proto | 1 + src/proto/model.proto | 1 + test/java/Test.java | 12 ++++ 11 files changed, 169 insertions(+), 6 deletions(-) create mode 100644 java/CMakeLists.txt create mode 100644 src/api/.gitignore delete mode 100644 src/api/config.i create mode 100644 test/java/Test.java diff --git a/CMakeLists.txt b/CMakeLists.txt index 762839b39e..e799348fa0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,7 +38,7 @@ SET(SINGA_INCLUDE_DIR INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR}) OPTION(USE_CBLAS "Use CBlas libs" ON) -OPTION(USE_CUDA "Use Cuda libs" ON) +OPTION(USE_CUDA "Use Cuda libs" OFF) OPTION(USE_CUDNN "Use Cudnn libs" ON) OPTION(USE_OPENCV "Use opencv" OFF) OPTION(USE_LMDB "Use LMDB libs" OFF) @@ -54,7 +54,7 @@ ADD_DEFINITIONS(-DUSE_CMAKE) CONFIGURE_FILE ( "${PROJECT_SOURCE_DIR}/cmake/Templates/singa_config.h.in" - "${PROJECT_SOURCE_DIR}/include/singa/singa_config.h") + "${PROJECT_BINARY_DIR}/include/singa/singa_config.h") #set(SINGA_CONFIGURE_SRC "${PROJECT_BINARY_DIR}/singa_config.h") #LIST(APPEND SRCS ${SINGA_CONFIGURE_SRCS} ${PROJECT_BINARY_DIR}/singa_config.h) @@ -83,6 +83,7 @@ IF (USE_PYTHON) ENDIF() IF (USE_JAVA) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing ") ADD_SUBDIRECTORY(java) ENDIF() diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index d4e68ac9ad..5f3d6a7e51 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -92,3 +92,9 @@ IF(USE_PYTHON) FIND_PACKAGE(PythonInterp 2.7 REQUIRED) FIND_PACKAGE(SWIG 3.0 REQUIRED) ENDIF() + +IF(USE_JAVA) + FIND_PACKAGE(Java) + FIND_PACKAGE(JNI ) + FIND_PACKAGE(SWIG 3.0 REQUIRED) +ENDIF() diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt new file mode 100644 index 0000000000..ef384611ff --- /dev/null +++ b/java/CMakeLists.txt @@ -0,0 +1,138 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This following function is taken from +# https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake +# and modified to our compilation. +function(PROTOBUF_GENERATE_JAVA OUTPUT) + if(NOT ARGN) + message(SEND_ERROR "Error: PROTOBUF_GENERATE_JAVA() called + without any proto files") + return() + endif(NOT ARGN) + + set(${OUTPUT}) + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + get_filename_component(PATH ${FIL} PATH) + + list(APPEND ${OUTPUT} "${CMAKE_BINARY_DIR}/java/singa/proto/${FIL_WE}.java") + + add_custom_command( + OUTPUT "${CMAKE_BINARY_DIR}/java/singa/proto/${FIL_WE}.java" + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} + ARGS --java_out ${CMAKE_BINARY_DIR}/java + --proto_path ${PATH} ${ABS_FIL} + DEPENDS ${ABS_FIL} + COMMENT "Running java protocol buffer compiler on ${FIL}" VERBATIM) + endforeach() + + set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) + set(${OUTPUT} ${${OUTPUT}} PARENT_SCOPE) +endfunction() + +function (create_symlinks) + # Do nothing if building in-source + if (${CMAKE_CURRENT_BINARY_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) + return() + endif() + + foreach (path_file ${ARGN}) + get_filename_component(folder ${path_file} PATH) + + # Delete symlink if it exists + file(REMOVE "${CMAKE_CURRENT_BINARY_DIR}/${path_file}") + + # Get OS dependent path to use in `execute_process` + file(TO_NATIVE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${path_file}" link) + file(TO_NATIVE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${path_file}" target) + + if (UNIX) + set(command ln -s ${target} ${link}) + else() + set(command cmd.exe /c mklink ${link} ${target}) + endif() + + execute_process(COMMAND ${command} + RESULT_VARIABLE result + ERROR_VARIABLE output) + + if (NOT ${result} EQUAL 0) + message(FATAL_ERROR "Could not create symbolic link for: ${target} --> ${output}") + endif() + + endforeach(path_file) +endfunction(create_symlinks) + + +# generate protobuf sources +FILE(GLOB proto_files ${CMAKE_SOURCE_DIR}/src/proto/*.proto) +PROTOBUF_GENERATE_JAVA(proto_javas ${proto_files}) +MESSAGE(STATUS "proto javas: ${proto_javas}") + +# generate cxx and wrap.py +if(NOT EXISTS "${CMKAE_BINARY_DIR}/java") + execute_process( + COMMAND mkdir ${CMAKE_BINARY_DIR}/java + COMMAND mkdir ${CMAKE_BINARY_DIR}/java/singa + COMMAND mkdir ${CMAKE_BINARY_DIR}/java/singa/proto + ERROR_QUIET) +endif() +execute_process( + COMMAND mkdir ${CMAKE_BINARY_DIR}/src/api + COMMAND swig -c++ -java -I${CMAKE_SOURCE_DIR}/include + -I${JAVA_INCLUDE_PATH} -I${JAVA_INCLUDE_PATH2} + -outdir ${CMAKE_BINARY_DIR}/java/singa + -package singa + -o ${CMAKE_BINARY_DIR}/src/api/singa_java_wrap.cxx + ${CMAKE_SOURCE_DIR}/src/api/singa.i ) + +#MESSAGE(STATUS "java include: ${JAVA_INCLUDE_DIRS} ${JNI_INCLUDE_DIRS} ${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2}") +set(java_srcs "${CMAKE_BINARY_DIR}/src/api/singa_java_wrap.cxx") + +#Create symlinks for all java source files Do not omit !!!RELATIVE!!! +file(GLOB_RECURSE java_source_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.java) +create_symlinks(${java_source_files}) + +IF(USE_CUDA) +# remain this custom command to avoid cuda objs can't find +ADD_CUSTOM_COMMAND( + OUTPUT ${global_cuda_objs} + COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/" + ) +ENDIF(USE_CUDA) + + +ADD_LIBRARY(singa_wrap SHARED $ ${java_srcs} ${proto_javas} ${global_cuda_objs}) +TARGET_LINK_LIBRARIES(singa_wrap ${SINGA_LINKER_LIBS} ${JNI_LIBRARIES}) +TARGET_INCLUDE_DIRECTORIES(singa_wrap PRIVATE ${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2}) + +SET_TARGET_PROPERTIES(singa_wrap + PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/java/singa +) + +IF(APPLE) +ADD_CUSTOM_TARGET( + change_suffix ALL + COMMAND ${CMAKE_COMMAND} -E rename "${CMAKE_BINARY_DIR}/java/singa/libsinga_wrap.dylib" "${CMAKE_BINARY_DIR}/java/singa/libsinga_wrap.so" + COMMENT "change .dylib to .so in mac system" +) +ADD_DEPENDENCIES(change_suffix _singa_wrap) +ENDIF(APPLE) + diff --git a/src/api/.gitignore b/src/api/.gitignore new file mode 100644 index 0000000000..adb5d03a3b --- /dev/null +++ b/src/api/.gitignore @@ -0,0 +1 @@ +config.i diff --git a/src/api/config.i b/src/api/config.i deleted file mode 100644 index cfbcd46866..0000000000 --- a/src/api/config.i +++ /dev/null @@ -1,4 +0,0 @@ -// Pass in cmake configurations to swig -#define USE_CUDA 1 -#define USE_CUDNN 1 -#define CUDNN_VERSION_SWIG 5005 diff --git a/src/api/config.i.in b/src/api/config.i.in index 5743ba30f9..7da916ccba 100644 --- a/src/api/config.i.in +++ b/src/api/config.i.in @@ -1,4 +1,6 @@ // Pass in cmake configurations to swig #cmakedefine01 USE_CUDA #cmakedefine01 USE_CUDNN +#cmakedefine01 USE_PYTHON +#cmakedefine01 USE_JAVA #cmakedefine CUDNN_VERSION_SWIG ${CUDNN_VERSION_SWIG} diff --git a/src/api/core_tensor.i b/src/api/core_tensor.i index 60f8b45a55..d85e7f0818 100644 --- a/src/api/core_tensor.i +++ b/src/api/core_tensor.i @@ -44,10 +44,13 @@ using singa::DataType; %} %shared_ptr(singa::Device) +#if USE_PYTHON %include "numpy.i" %init %{ import_array(); %} +#endif //USE_PYTHON + %apply (float *IN_ARRAY1, int DIM1) { (const float *src, const size_t num) } diff --git a/src/proto/core.proto b/src/proto/core.proto index c88bee9f4f..dc5ed34564 100644 --- a/src/proto/core.proto +++ b/src/proto/core.proto @@ -18,6 +18,8 @@ package singa; +option java_package = "singa.proto"; + // TODO(wangwei) check protobuf version to include the syntax //syntax = "proto2"; diff --git a/src/proto/io.proto b/src/proto/io.proto index f349f742e5..ab5698389a 100644 --- a/src/proto/io.proto +++ b/src/proto/io.proto @@ -18,6 +18,7 @@ package singa; +option java_package = "singa.proto"; message EncoderConf { optional string type = 1 [default = "jpg2proto"]; diff --git a/src/proto/model.proto b/src/proto/model.proto index 3df68e2e8b..6be5e0a4b3 100644 --- a/src/proto/model.proto +++ b/src/proto/model.proto @@ -18,6 +18,7 @@ package singa; +option java_package = "singa.proto"; /// \file layer.proto is adapted from [Caffe](https://github.com/BVLC/caffe/)'s /// proto file with commit id c419f8517b1e1b3d7a07fe212fc6c90a70b519ea. We /// use caffe's protocol for configuring layer hyper-parameters for easy diff --git a/test/java/Test.java b/test/java/Test.java new file mode 100644 index 0000000000..f9e541198c --- /dev/null +++ b/test/java/Test.java @@ -0,0 +1,12 @@ +import singa.*; + +public class Test { + static { + System.loadLibrary("singa_wrap"); + } + + public static void main(String argv[]) { + Tensor t = new Tensor(); + System.out.println(t); + } +} From 46fb1e37a8e07dbcf4ad509863bf0c0fcb6a7193 Mon Sep 17 00:00:00 2001 From: Ruan Pingcheng Date: Wed, 7 Sep 2016 22:09:43 +0800 Subject: [PATCH 5/7] SINGA-245 Float as the first operand can not multiply with a tensor object --- python/singa/loss.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/singa/loss.py b/python/singa/loss.py index c88290bc4b..8b99ad3d8f 100644 --- a/python/singa/loss.py +++ b/python/singa/loss.py @@ -105,7 +105,7 @@ class SquaredError(Loss): It is implemented using Python Tensor operations. ''' def __init__(self): - super(Loss, SquaredError).__init__() + super(Loss, self).__init__() self.err = None def forward(self, flag, x, y): @@ -122,7 +122,7 @@ def forward(self, flag, x, y): a Tensor with one error value per sample ''' self.err = x - y - return 0.5 * tensor.squared(self.err) + return tensor.square(self.err) * 0.5 def backward(self): '''Compute the gradient of x w.r.t the error. @@ -138,4 +138,4 @@ def evaluate(self, flag, x, y): Returns: a float value as the averaged error ''' - return tensor.sum(0.5 * tensor.squared(x - y)) / x.size() + return tensor.sum(tensor.square(x - y) * 0.5) / x.size() From 67b338dd1be3c4f41998fc4bdc6188fc53937f51 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 8 Sep 2016 22:48:46 +0800 Subject: [PATCH 6/7] SINGA-245 Float as the first operand can not multiply with a tensor object Add reverse add/sub/mult/div for float-tensor operations. add unit tests in test_tensor.py --- python/singa/loss.py | 3 ++- python/singa/tensor.py | 24 ++++++++++++++++++++++++ test/python/test_tensor.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/python/singa/loss.py b/python/singa/loss.py index 8b99ad3d8f..526e4d09fb 100644 --- a/python/singa/loss.py +++ b/python/singa/loss.py @@ -95,6 +95,7 @@ class SoftmaxCrossEntropy(Loss): ''' def __init__(self): + super(SoftmaxCrossEntropy, self).__init__() self.swig_loss = singa.SoftmaxCrossEntropy() @@ -105,7 +106,7 @@ class SquaredError(Loss): It is implemented using Python Tensor operations. ''' def __init__(self): - super(Loss, self).__init__() + super(SquareLoss, self).__init__() self.err = None def forward(self, flag, x, y): diff --git a/python/singa/tensor.py b/python/singa/tensor.py index f6bca432bf..102448335c 100644 --- a/python/singa/tensor.py +++ b/python/singa/tensor.py @@ -372,6 +372,7 @@ def __idiv__(self, x): ''' python operators (+, -, *, /, <, <=, >, >=) for singa binary operators + https://docs.python.org/2/library/operator.html#mapping-operators-to-functions ''' def __add__(self, rhs): @@ -441,6 +442,29 @@ def __ge__(self, rhs): return _call_singa_func(singa.GE_Tf, self.singa_tensor, rhs) + def __radd__(self, lhs): + lhs = float(lhs) + return _call_singa_func(singa.Add_Tf, self.singa_tensor, lhs) + + def __rsub__(self, lhs): + lhs = float(lhs) + ret = _call_singa_func(singa.Sub_Tf, self.singa_tensor, lhs) + ret *= -1 + return ret + + def __rmul__(self, lhs): + lhs = float(lhs) + return _call_singa_func(singa.EltwiseMul_Tf, self.singa_tensor, lhs) + + def __rdiv__(self, lhs): + lhs = float(lhs) + one = Tensor(self.shape, self.device, self.dtype) + one.set_value(1) + one *= lhs + return _call_singa_func(singa.Div_TT, one.singa_tensor,\ + self.singa_tensor) + + ''' python functions for global functions in Tensor.h ''' diff --git a/test/python/test_tensor.py b/test/python/test_tensor.py index 2374adce19..a1f220bc44 100644 --- a/test/python/test_tensor.py +++ b/test/python/test_tensor.py @@ -133,5 +133,34 @@ def test_random(self): self.assertAlmostEqual(tensor.average(x), 1, 3) + def test_radd(self): + x = tensor.Tensor((3,)) + x.set_value(1) + y = 1 + x + self.assertEqual(tensor.average(y), 2.) + + + def test_rsub(self): + x = tensor.Tensor((3,)) + x.set_value(1) + y = 1 - x + self.assertEqual(tensor.average(y), 0.) + + + def test_rmul(self): + x = tensor.Tensor((3,)) + x.set_value(1) + y = 2 * x + self.assertEqual(tensor.average(y), 2.) + + + def test_rdiv(self): + x = tensor.Tensor((3,)) + x.set_value(1) + y = 2 / x + self.assertEqual(tensor.average(y), 2.) + + + if __name__ == '__main__': unittest.main() From 646c804424f70814c8d9a87a62d73b2a7e15b634 Mon Sep 17 00:00:00 2001 From: xiezl Date: Fri, 9 Sep 2016 11:54:54 +0800 Subject: [PATCH 7/7] SINGA-244 Separating swig interface and python binding files Remove dependencies on copy_protobuf target. --- src/CMakeLists.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0752884bfa..5f7020494e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -62,20 +62,23 @@ AUX_SOURCE_DIRECTORY(io/network io_source) LIST(APPEND singa_sources ${io_source}) ADD_CUSTOM_TARGET( - copy_protobuf + copy_protobuf ALL COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/include/singa/proto" DEPENDS ${proto_hdrs} COMMENT "Copying Protobuf headers" ) FOREACH(fil ${proto_hdrs}) + GET_FILENAME_COMPONENT(filename ${fil} NAME) ADD_CUSTOM_COMMAND( - TARGET copy_protobuf PRE_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${fil} "${CMAKE_BINARY_DIR}/include/singa/proto" + TARGET copy_protobuf POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fil} + "${CMAKE_BINARY_DIR}/include/singa/proto/${filename}" + DEPENDS ${proto_files} ) ENDFOREACH() ADD_LIBRARY(singa_objects OBJECT ${singa_sources}) -ADD_DEPENDENCIES(singa_objects copy_protobuf) +#ADD_DEPENDENCIES(singa_objects copy_protobuf) ADD_LIBRARY(singa SHARED $ ${cuda_objs}) TARGET_LINK_LIBRARIES(singa ${SINGA_LINKER_LIBS})