diff --git a/PROJ3_WIN/565Rasterizer.sdf b/PROJ3_WIN/565Rasterizer.sdf new file mode 100644 index 0000000..f98cebc Binary files /dev/null and b/PROJ3_WIN/565Rasterizer.sdf differ diff --git a/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj b/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj index 1077f39..8393c8e 100755 --- a/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj +++ b/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj @@ -86,6 +86,8 @@ $(ProjectDir)$(Platform)/$(Configuration)/%(Filename)%(Extension).obj C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.0\include;C:/ProgramData/NVIDIA Corporation/NVIDIA GPU Computing SDK 4.0/C/common/inc;../shared/glew/includes;../shared/freeglut/includes + true + compute_20,sm_20 diff --git a/Project Readme.md b/Project Readme.md new file mode 100644 index 0000000..6c873ea --- /dev/null +++ b/Project Readme.md @@ -0,0 +1,18 @@ +CUDA RASTERIZER + + +FEATURES IMPLEMENTED: + +- I have completed all the necessary parts of the project. +- In the 3 additional features, I have implemented interpolated colors, back-face culling and scissor test. +- Camera position, light position, light color, rectangle for the scissor test can be modified in the function cudaRasterizeCore(). + + +SCREENSHOTS: + +- Screenshots of the renders are in the 'renders' directory. + + +BLOG POST: + +experiencetheforce.blogspot.com \ No newline at end of file diff --git a/renders/Thumbs.db b/renders/Thumbs.db new file mode 100644 index 0000000..e515932 Binary files /dev/null and b/renders/Thumbs.db differ diff --git a/renders/upload 1.png b/renders/upload 1.png new file mode 100644 index 0000000..dd9fdfd Binary files /dev/null and b/renders/upload 1.png differ diff --git a/renders/upload 2.png b/renders/upload 2.png new file mode 100644 index 0000000..0c385a5 Binary files /dev/null and b/renders/upload 2.png differ diff --git a/renders/upload 3.png b/renders/upload 3.png new file mode 100644 index 0000000..2c8c243 Binary files /dev/null and b/renders/upload 3.png differ diff --git a/renders/upload 4.png b/renders/upload 4.png new file mode 100644 index 0000000..b1ac823 Binary files /dev/null and b/renders/upload 4.png differ diff --git a/src/main.cpp b/src/main.cpp index dfb689a..3323c8f 100755 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,354 +1,358 @@ -// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania -// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania - -#include "main.h" - -//------------------------------- -//-------------MAIN-------------- -//------------------------------- - -int main(int argc, char** argv){ - - bool loadedScene = false; - for(int i=1; ibuildVBOs(); - delete loader; - loadedScene = true; - } - } - - if(!loadedScene){ - cout << "Usage: mesh=[obj file]" << endl; - return 0; - } - - frame = 0; - seconds = time (NULL); - fpstracker = 0; - - // Launch CUDA/GL - #ifdef __APPLE__ - // Needed in OSX to force use of OpenGL3.2 - glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR, 3); - glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR, 2); - glfwOpenWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); - glfwOpenWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); - init(); - #else - init(argc, argv); - #endif - - initCuda(); - - initVAO(); - initTextures(); - - GLuint passthroughProgram; - passthroughProgram = initShader("shaders/passthroughVS.glsl", "shaders/passthroughFS.glsl"); - - glUseProgram(passthroughProgram); - glActiveTexture(GL_TEXTURE0); - - #ifdef __APPLE__ - // send into GLFW main loop - while(1){ - display(); - if (glfwGetKey(GLFW_KEY_ESC) == GLFW_PRESS || !glfwGetWindowParam( GLFW_OPENED )){ - kernelCleanup(); - cudaDeviceReset(); - exit(0); - } - } - - glfwTerminate(); - #else - glutDisplayFunc(display); - glutKeyboardFunc(keyboard); - - glutMainLoop(); - #endif - kernelCleanup(); - return 0; -} - -//------------------------------- -//---------RUNTIME STUFF--------- -//------------------------------- - -void runCuda(){ - // Map OpenGL buffer object for writing from CUDA on a single GPU - // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer - dptr=NULL; - - vbo = mesh->getVBO(); - vbosize = mesh->getVBOsize(); - - float newcbo[] = {0.0, 1.0, 0.0, - 0.0, 0.0, 1.0, - 1.0, 0.0, 0.0}; - cbo = newcbo; - cbosize = 9; - - ibo = mesh->getIBO(); - ibosize = mesh->getIBOsize(); - - cudaGLMapBufferObject((void**)&dptr, pbo); - cudaRasterizeCore(dptr, glm::vec2(width, height), frame, vbo, vbosize, cbo, cbosize, ibo, ibosize); - cudaGLUnmapBufferObject(pbo); - - vbo = NULL; - cbo = NULL; - ibo = NULL; - - frame++; - fpstracker++; - -} - -#ifdef __APPLE__ - - void display(){ - runCuda(); - time_t seconds2 = time (NULL); - - if(seconds2-seconds >= 1){ - - fps = fpstracker/(seconds2-seconds); - fpstracker = 0; - seconds = seconds2; - - } - - string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS"; - - glfwSetWindowTitle(title.c_str()); - - - glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo); - glBindTexture(GL_TEXTURE_2D, displayImage); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, - GL_RGBA, GL_UNSIGNED_BYTE, NULL); - - - glClear(GL_COLOR_BUFFER_BIT); - - // VAO, shader program, and texture already bound - glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); - - glfwSwapBuffers(); - } - -#else - - void display(){ - runCuda(); - time_t seconds2 = time (NULL); - - if(seconds2-seconds >= 1){ - - fps = fpstracker/(seconds2-seconds); - fpstracker = 0; - seconds = seconds2; - - } - - string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS"; - glutSetWindowTitle(title.c_str()); - - glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo); - glBindTexture(GL_TEXTURE_2D, displayImage); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, - GL_RGBA, GL_UNSIGNED_BYTE, NULL); - - glClear(GL_COLOR_BUFFER_BIT); - - // VAO, shader program, and texture already bound - glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); - - glutPostRedisplay(); - glutSwapBuffers(); - } - - void keyboard(unsigned char key, int x, int y) - { - switch (key) - { - case(27): - shut_down(1); - break; - } - } - -#endif - -//------------------------------- -//----------SETUP STUFF---------- -//------------------------------- - -#ifdef __APPLE__ - void init(){ - - if (glfwInit() != GL_TRUE){ - shut_down(1); - } - - // 16 bit color, no depth, alpha or stencil buffers, windowed - if (glfwOpenWindow(width, height, 5, 6, 5, 0, 0, 0, GLFW_WINDOW) != GL_TRUE){ - shut_down(1); - } - - // Set up vertex array object, texture stuff - initVAO(); - initTextures(); - } -#else - void init(int argc, char* argv[]){ - glutInit(&argc, argv); - glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); - glutInitWindowSize(width, height); - glutCreateWindow("CIS565 Rasterizer"); - - // Init GLEW - glewInit(); - GLenum err = glewInit(); - if (GLEW_OK != err) - { - /* Problem: glewInit failed, something is seriously wrong. */ - std::cout << "glewInit failed, aborting." << std::endl; - exit (1); - } - - initVAO(); - initTextures(); - } -#endif - -void initPBO(GLuint* pbo){ - if (pbo) { - // set up vertex data parameter - int num_texels = width*height; - int num_values = num_texels * 4; - int size_tex_data = sizeof(GLubyte) * num_values; - - // Generate a buffer ID called a PBO (Pixel Buffer Object) - glGenBuffers(1,pbo); - // Make this the current UNPACK buffer (OpenGL is state-based) - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, *pbo); - // Allocate data for the buffer. 4-channel 8-bit image - glBufferData(GL_PIXEL_UNPACK_BUFFER, size_tex_data, NULL, GL_DYNAMIC_COPY); - cudaGLRegisterBufferObject( *pbo ); - } -} - -void initCuda(){ - // Use device with highest Gflops/s - cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); - - initPBO(&pbo); - - // Clean up on program exit - atexit(cleanupCuda); - - runCuda(); -} - -void initTextures(){ - glGenTextures(1,&displayImage); - glBindTexture(GL_TEXTURE_2D, displayImage); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, - GL_UNSIGNED_BYTE, NULL); -} - -void initVAO(void){ - GLfloat vertices[] = - { - -1.0f, -1.0f, - 1.0f, -1.0f, - 1.0f, 1.0f, - -1.0f, 1.0f, - }; - - GLfloat texcoords[] = - { - 1.0f, 1.0f, - 0.0f, 1.0f, - 0.0f, 0.0f, - 1.0f, 0.0f - }; - - GLushort indices[] = { 0, 1, 3, 3, 1, 2 }; - - GLuint vertexBufferObjID[3]; - glGenBuffers(3, vertexBufferObjID); - - glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[0]); - glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); - glVertexAttribPointer((GLuint)positionLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); - glEnableVertexAttribArray(positionLocation); - - glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[1]); - glBufferData(GL_ARRAY_BUFFER, sizeof(texcoords), texcoords, GL_STATIC_DRAW); - glVertexAttribPointer((GLuint)texcoordsLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); - glEnableVertexAttribArray(texcoordsLocation); - - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertexBufferObjID[2]); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); -} - -GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath){ - GLuint program = glslUtility::createProgram(vertexShaderPath, fragmentShaderPath, attributeLocations, 2); - GLint location; - - glUseProgram(program); - - if ((location = glGetUniformLocation(program, "u_image")) != -1) - { - glUniform1i(location, 0); - } - - return program; -} - -//------------------------------- -//---------CLEANUP STUFF--------- -//------------------------------- - -void cleanupCuda(){ - if(pbo) deletePBO(&pbo); - if(displayImage) deleteTexture(&displayImage); -} - -void deletePBO(GLuint* pbo){ - if (pbo) { - // unregister this buffer object with CUDA - cudaGLUnregisterBufferObject(*pbo); - - glBindBuffer(GL_ARRAY_BUFFER, *pbo); - glDeleteBuffers(1, pbo); - - *pbo = (GLuint)NULL; - } -} - -void deleteTexture(GLuint* tex){ - glDeleteTextures(1, tex); - *tex = (GLuint)NULL; -} - -void shut_down(int return_code){ - kernelCleanup(); - cudaDeviceReset(); - #ifdef __APPLE__ - glfwTerminate(); - #endif - exit(return_code); -} +// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania +// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania + +#include "main.h" + +//------------------------------- +//-------------MAIN-------------- +//------------------------------- + +int main(int argc, char** argv){ + + bool loadedScene = false; + for(int i=1; ibuildVBOs(); + delete loader; + loadedScene = true; + } + } + + if(!loadedScene){ + cout << "Usage: mesh=[obj file]" << endl; + return 0; + } + + frame = 0; + seconds = time (NULL); + fpstracker = 0; + + // Launch CUDA/GL + #ifdef __APPLE__ + // Needed in OSX to force use of OpenGL3.2 + glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR, 3); + glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR, 2); + glfwOpenWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); + glfwOpenWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); + init(); + #else + init(argc, argv); + #endif + + initCuda(); + + initVAO(); + initTextures(); + + GLuint passthroughProgram; + passthroughProgram = initShader("shaders/passthroughVS.glsl", "shaders/passthroughFS.glsl"); + + glUseProgram(passthroughProgram); + glActiveTexture(GL_TEXTURE0); + + #ifdef __APPLE__ + // send into GLFW main loop + while(1){ + display(); + if (glfwGetKey(GLFW_KEY_ESC) == GLFW_PRESS || !glfwGetWindowParam( GLFW_OPENED )){ + kernelCleanup(); + cudaDeviceReset(); + exit(0); + } + } + + glfwTerminate(); + #else + glutDisplayFunc(display); + glutKeyboardFunc(keyboard); + + glutMainLoop(); + #endif + kernelCleanup(); + return 0; +} + +//------------------------------- +//---------RUNTIME STUFF--------- +//------------------------------- + +void runCuda(){ + // Map OpenGL buffer object for writing from CUDA on a single GPU + // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer + dptr=NULL; + + vbo = mesh->getVBO(); + vbosize = mesh->getVBOsize(); + + nbo = mesh->getNBO(); + nbosize = mesh->getNBOsize(); + + float newcbo[] = {0.5, 0.5, 0.5, + 0.5, 0.5, 0.5, + 0.5, 0.5, 0.5}; + cbo = newcbo; + cbosize = 9; + + ibo = mesh->getIBO(); + ibosize = mesh->getIBOsize(); + + cudaGLMapBufferObject((void**)&dptr, pbo); + cudaRasterizeCore(dptr, glm::vec2(width, height), frame, vbo, vbosize, nbo, nbosize, cbo, cbosize, ibo, ibosize); + cudaGLUnmapBufferObject(pbo); + + vbo = NULL; + nbo = NULL; + cbo = NULL; + ibo = NULL; + + frame++; + fpstracker++; + +} + +#ifdef __APPLE__ + + void display(){ + runCuda(); + time_t seconds2 = time (NULL); + + if(seconds2-seconds >= 1){ + + fps = fpstracker/(seconds2-seconds); + fpstracker = 0; + seconds = seconds2; + + } + + string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS"; + + glfwSetWindowTitle(title.c_str()); + + + glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo); + glBindTexture(GL_TEXTURE_2D, displayImage); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, + GL_RGBA, GL_UNSIGNED_BYTE, NULL); + + + glClear(GL_COLOR_BUFFER_BIT); + + // VAO, shader program, and texture already bound + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); + + glfwSwapBuffers(); + } + +#else + + void display(){ + runCuda(); + time_t seconds2 = time (NULL); + + if(seconds2-seconds >= 1){ + + fps = fpstracker/(seconds2-seconds); + fpstracker = 0; + seconds = seconds2; + + } + + string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS"; + glutSetWindowTitle(title.c_str()); + + glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo); + glBindTexture(GL_TEXTURE_2D, displayImage); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, + GL_RGBA, GL_UNSIGNED_BYTE, NULL); + + glClear(GL_COLOR_BUFFER_BIT); + + // VAO, shader program, and texture already bound + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); + + glutPostRedisplay(); + glutSwapBuffers(); + } + + void keyboard(unsigned char key, int x, int y) + { + switch (key) + { + case(27): + shut_down(1); + break; + } + } + +#endif + +//------------------------------- +//----------SETUP STUFF---------- +//------------------------------- + +#ifdef __APPLE__ + void init(){ + + if (glfwInit() != GL_TRUE){ + shut_down(1); + } + + // 16 bit color, no depth, alpha or stencil buffers, windowed + if (glfwOpenWindow(width, height, 5, 6, 5, 0, 0, 0, GLFW_WINDOW) != GL_TRUE){ + shut_down(1); + } + + // Set up vertex array object, texture stuff + initVAO(); + initTextures(); + } +#else + void init(int argc, char* argv[]){ + glutInit(&argc, argv); + glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); + glutInitWindowSize(width, height); + glutCreateWindow("CIS565 Rasterizer"); + + // Init GLEW + glewInit(); + GLenum err = glewInit(); + if (GLEW_OK != err) + { + /* Problem: glewInit failed, something is seriously wrong. */ + std::cout << "glewInit failed, aborting." << std::endl; + exit (1); + } + + initVAO(); + initTextures(); + } +#endif + +void initPBO(GLuint* pbo){ + if (pbo) { + // set up vertex data parameter + int num_texels = width*height; + int num_values = num_texels * 4; + int size_tex_data = sizeof(GLubyte) * num_values; + + // Generate a buffer ID called a PBO (Pixel Buffer Object) + glGenBuffers(1,pbo); + // Make this the current UNPACK buffer (OpenGL is state-based) + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, *pbo); + // Allocate data for the buffer. 4-channel 8-bit image + glBufferData(GL_PIXEL_UNPACK_BUFFER, size_tex_data, NULL, GL_DYNAMIC_COPY); + cudaGLRegisterBufferObject( *pbo ); + } +} + +void initCuda(){ + // Use device with highest Gflops/s + cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); + + initPBO(&pbo); + + // Clean up on program exit + atexit(cleanupCuda); + + runCuda(); +} + +void initTextures(){ + glGenTextures(1,&displayImage); + glBindTexture(GL_TEXTURE_2D, displayImage); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, + GL_UNSIGNED_BYTE, NULL); +} + +void initVAO(void){ + GLfloat vertices[] = + { + -1.0f, -1.0f, + 1.0f, -1.0f, + 1.0f, 1.0f, + -1.0f, 1.0f, + }; + + GLfloat texcoords[] = + { + 1.0f, 1.0f, + 0.0f, 1.0f, + 0.0f, 0.0f, + 1.0f, 0.0f + }; + + GLushort indices[] = { 0, 1, 3, 3, 1, 2 }; + + GLuint vertexBufferObjID[3]; + glGenBuffers(3, vertexBufferObjID); + + glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[0]); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); + glVertexAttribPointer((GLuint)positionLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); + glEnableVertexAttribArray(positionLocation); + + glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[1]); + glBufferData(GL_ARRAY_BUFFER, sizeof(texcoords), texcoords, GL_STATIC_DRAW); + glVertexAttribPointer((GLuint)texcoordsLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); + glEnableVertexAttribArray(texcoordsLocation); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertexBufferObjID[2]); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); +} + +GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath){ + GLuint program = glslUtility::createProgram(vertexShaderPath, fragmentShaderPath, attributeLocations, 2); + GLint location; + + glUseProgram(program); + + if ((location = glGetUniformLocation(program, "u_image")) != -1) + { + glUniform1i(location, 0); + } + + return program; +} + +//------------------------------- +//---------CLEANUP STUFF--------- +//------------------------------- + +void cleanupCuda(){ + if(pbo) deletePBO(&pbo); + if(displayImage) deleteTexture(&displayImage); +} + +void deletePBO(GLuint* pbo){ + if (pbo) { + // unregister this buffer object with CUDA + cudaGLUnregisterBufferObject(*pbo); + + glBindBuffer(GL_ARRAY_BUFFER, *pbo); + glDeleteBuffers(1, pbo); + + *pbo = (GLuint)NULL; + } +} + +void deleteTexture(GLuint* tex){ + glDeleteTextures(1, tex); + *tex = (GLuint)NULL; +} + +void shut_down(int return_code){ + kernelCleanup(); + cudaDeviceReset(); + #ifdef __APPLE__ + glfwTerminate(); + #endif + exit(return_code); +} diff --git a/src/main.h b/src/main.h index 63bf0fa..393ec99 100755 --- a/src/main.h +++ b/src/main.h @@ -1,105 +1,107 @@ -// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania -// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania - -#ifndef MAIN_H -#define MAIN_H - -#ifdef __APPLE__ - #include -#else - #include - #include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "glslUtility.h" -#include "glm/glm.hpp" -#include "rasterizeKernels.h" -#include "utilities.h" -#include "ObjCore/objloader.h" - -using namespace std; - -//------------------------------- -//------------GL STUFF----------- -//------------------------------- -int frame; -int fpstracker; -double seconds; -int fps = 0; -GLuint positionLocation = 0; -GLuint texcoordsLocation = 1; -const char *attributeLocations[] = { "Position", "Tex" }; -GLuint pbo = (GLuint)NULL; -GLuint displayImage; -uchar4 *dptr; - -obj* mesh; - -float* vbo; -int vbosize; -float* cbo; -int cbosize; -int* ibo; -int ibosize; - -//------------------------------- -//----------CUDA STUFF----------- -//------------------------------- - -int width=800; int height=800; - -//------------------------------- -//-------------MAIN-------------- -//------------------------------- - -int main(int argc, char** argv); - -//------------------------------- -//---------RUNTIME STUFF--------- -//------------------------------- - -void runCuda(); - -#ifdef __APPLE__ - void display(); -#else - void display(); - void keyboard(unsigned char key, int x, int y); -#endif - -//------------------------------- -//----------SETUP STUFF---------- -//------------------------------- - -#ifdef __APPLE__ - void init(); -#else - void init(int argc, char* argv[]); -#endif - -void initPBO(GLuint* pbo); -void initCuda(); -void initTextures(); -void initVAO(); -GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath); - -//------------------------------- -//---------CLEANUP STUFF--------- -//------------------------------- - -void cleanupCuda(); -void deletePBO(GLuint* pbo); -void deleteTexture(GLuint* tex); -void shut_down(int return_code); - +// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania +// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania + +#ifndef MAIN_H +#define MAIN_H + +#ifdef __APPLE__ + #include +#else + #include + #include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "glslUtility.h" +#include "glm/glm.hpp" +#include "rasterizeKernels.h" +#include "utilities.h" +#include "ObjCore/objloader.h" + +using namespace std; + +//------------------------------- +//------------GL STUFF----------- +//------------------------------- +int frame; +int fpstracker; +double seconds; +int fps = 0; +GLuint positionLocation = 0; +GLuint texcoordsLocation = 1; +const char *attributeLocations[] = { "Position", "Tex" }; +GLuint pbo = (GLuint)NULL; +GLuint displayImage; +uchar4 *dptr; + +obj* mesh; + +float* vbo; +int vbosize; +float* nbo; +int nbosize; +float* cbo; +int cbosize; +int* ibo; +int ibosize; + +//------------------------------- +//----------CUDA STUFF----------- +//------------------------------- + +int width=800; int height=800; + +//------------------------------- +//-------------MAIN-------------- +//------------------------------- + +int main(int argc, char** argv); + +//------------------------------- +//---------RUNTIME STUFF--------- +//------------------------------- + +void runCuda(); + +#ifdef __APPLE__ + void display(); +#else + void display(); + void keyboard(unsigned char key, int x, int y); +#endif + +//------------------------------- +//----------SETUP STUFF---------- +//------------------------------- + +#ifdef __APPLE__ + void init(); +#else + void init(int argc, char* argv[]); +#endif + +void initPBO(GLuint* pbo); +void initCuda(); +void initTextures(); +void initVAO(); +GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath); + +//------------------------------- +//---------CLEANUP STUFF--------- +//------------------------------- + +void cleanupCuda(); +void deletePBO(GLuint* pbo); +void deleteTexture(GLuint* tex); +void shut_down(int return_code); + #endif \ No newline at end of file diff --git a/src/rasterizeKernels.cu b/src/rasterizeKernels.cu index 826ec80..1c06ffe 100755 --- a/src/rasterizeKernels.cu +++ b/src/rasterizeKernels.cu @@ -1,267 +1,409 @@ -// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania -// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania - -#include -#include -#include -#include -#include -#include "rasterizeKernels.h" -#include "rasterizeTools.h" - -glm::vec3* framebuffer; -fragment* depthbuffer; -float* device_vbo; -float* device_cbo; -int* device_ibo; -triangle* primitives; - -void checkCUDAError(const char *msg) { - cudaError_t err = cudaGetLastError(); - if( cudaSuccess != err) { - fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); - exit(EXIT_FAILURE); - } -} - -//Handy dandy little hashing function that provides seeds for random number generation -__host__ __device__ unsigned int hash(unsigned int a){ - a = (a+0x7ed55d16) + (a<<12); - a = (a^0xc761c23c) ^ (a>>19); - a = (a+0x165667b1) + (a<<5); - a = (a+0xd3a2646c) ^ (a<<9); - a = (a+0xfd7046c5) + (a<<3); - a = (a^0xb55a4f09) ^ (a>>16); - return a; -} - -//Writes a given fragment to a fragment buffer at a given location -__host__ __device__ void writeToDepthbuffer(int x, int y, fragment frag, fragment* depthbuffer, glm::vec2 resolution){ - if(x255){ - color.x = 255; - } - - if(color.y>255){ - color.y = 255; - } - - if(color.z>255){ - color.z = 255; - } - - // Each thread writes one pixel location in the texture (textel) - PBOpos[index].w = 0; - PBOpos[index].x = color.x; - PBOpos[index].y = color.y; - PBOpos[index].z = color.z; - } -} - -//TODO: Implement a vertex shader -__global__ void vertexShadeKernel(float* vbo, int vbosize){ - int index = (blockIdx.x * blockDim.x) + threadIdx.x; - if(index>>(resolution, framebuffer, glm::vec3(0,0,0)); - - fragment frag; - frag.color = glm::vec3(0,0,0); - frag.normal = glm::vec3(0,0,0); - frag.position = glm::vec3(0,0,-10000); - clearDepthBuffer<<>>(resolution, depthbuffer,frag); - - //------------------------------ - //memory stuff - //------------------------------ - primitives = NULL; - cudaMalloc((void**)&primitives, (ibosize/3)*sizeof(triangle)); - - device_ibo = NULL; - cudaMalloc((void**)&device_ibo, ibosize*sizeof(int)); - cudaMemcpy( device_ibo, ibo, ibosize*sizeof(int), cudaMemcpyHostToDevice); - - device_vbo = NULL; - cudaMalloc((void**)&device_vbo, vbosize*sizeof(float)); - cudaMemcpy( device_vbo, vbo, vbosize*sizeof(float), cudaMemcpyHostToDevice); - - device_cbo = NULL; - cudaMalloc((void**)&device_cbo, cbosize*sizeof(float)); - cudaMemcpy( device_cbo, cbo, cbosize*sizeof(float), cudaMemcpyHostToDevice); - - tileSize = 32; - int primitiveBlocks = ceil(((float)vbosize/3)/((float)tileSize)); - - //------------------------------ - //vertex shader - //------------------------------ - vertexShadeKernel<<>>(device_vbo, vbosize); - - cudaDeviceSynchronize(); - //------------------------------ - //primitive assembly - //------------------------------ - primitiveBlocks = ceil(((float)ibosize/3)/((float)tileSize)); - primitiveAssemblyKernel<<>>(device_vbo, vbosize, device_cbo, cbosize, device_ibo, ibosize, primitives); - - cudaDeviceSynchronize(); - //------------------------------ - //rasterization - //------------------------------ - rasterizationKernel<<>>(primitives, ibosize/3, depthbuffer, resolution); - - cudaDeviceSynchronize(); - //------------------------------ - //fragment shader - //------------------------------ - fragmentShadeKernel<<>>(depthbuffer, resolution); - - cudaDeviceSynchronize(); - //------------------------------ - //write fragments to framebuffer - //------------------------------ - render<<>>(resolution, depthbuffer, framebuffer); - sendImageToPBO<<>>(PBOpos, resolution, framebuffer); - - cudaDeviceSynchronize(); - - kernelCleanup(); - - checkCUDAError("Kernel failed!"); -} - -void kernelCleanup(){ - cudaFree( primitives ); - cudaFree( device_vbo ); - cudaFree( device_cbo ); - cudaFree( device_ibo ); - cudaFree( framebuffer ); - cudaFree( depthbuffer ); -} - +// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania +// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania + +#include +#include +#include +#include +#include +#include "rasterizeKernels.h" +#include "rasterizeTools.h" + +glm::vec3* framebuffer; +fragment* depthbuffer; +float* device_vbo; +float* device_nbo; +float* device_cbo; +int* device_ibo; +triangle* primitives; + +void checkCUDAError(const char *msg) { + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) { + fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + exit(EXIT_FAILURE); + } +} + +//Handy dandy little hashing function that provides seeds for random number generation +__host__ __device__ unsigned int hash(unsigned int a){ + a = (a+0x7ed55d16) + (a<<12); + a = (a^0xc761c23c) ^ (a>>19); + a = (a+0x165667b1) + (a<<5); + a = (a+0xd3a2646c) ^ (a<<9); + a = (a+0xfd7046c5) + (a<<3); + a = (a^0xb55a4f09) ^ (a>>16); + return a; +} + +//Writes a given fragment to a fragment buffer at a given location +__host__ __device__ void writeToDepthbuffer(int x, int y, fragment frag, fragment* depthbuffer, glm::vec2 resolution){ + if(x255){ + color.x = 255; + } + + if(color.y>255){ + color.y = 255; + } + + if(color.z>255){ + color.z = 255; + } + + // Each thread writes one pixel location in the texture (textel) + PBOpos[index].w = 0; + PBOpos[index].x = color.x; + PBOpos[index].y = color.y; + PBOpos[index].z = color.z; + } +} + +//TODO: Implement a vertex shader +__global__ void vertexShadeKernel(float* vbo, int vbosize, float* nbo, int nbosize, glm::mat4 MVP){ + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if(index 0.0f) + { + primitives[index].culled = true; + //printf("->"); + } + } + +} + + +//TODO: Implement a rasterization method, such as scanline. +__global__ void rasterizationKernel(triangle* primitives, int primitivesCount, fragment* depthbuffer, glm::vec2 resolution){ + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if(index depthbuffer[(int)(resolution.x * resolution.y) - ind].position.z) + { + depthbuffer[ind].position = tempPosition; + //depthbuffer[ind].color = glm::vec3(1,1,1); + depthbuffer[ind].color = getColorAtCoordinate(barycentric, curTriangle); + depthbuffer[ind].normal = getNormalAtCoordinate(barycentric, curTriangle); + } + } + } + } + } + } +} + +//TODO: Implement a fragment shader +__global__ void fragmentShadeKernel(fragment* depthbuffer, glm::vec2 resolution, glm::vec3 lightPosition, glm::vec3 lightColor){ + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * resolution.x); + if(x<=resolution.x && y<=resolution.y){ + + glm::vec3 lightDirection = glm::normalize(lightPosition - depthbuffer[index].position); + depthbuffer[index].normal = glm::normalize(depthbuffer[index].normal); + //float factor = max(glm::dot(depthbuffer[index].normal, lightDirection), 0.0f); + float factor = abs(glm::dot(depthbuffer[index].normal, lightDirection)); + depthbuffer[index].color = 6 * factor * lightColor * depthbuffer[index].color; + depthbuffer[index].scissored = false; + } +} + + +__global__ void scissorTest(glm::vec2 resolution, fragment* depthbuffer, glm::vec2 rectPos, glm::vec2 dimensions){ + + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * resolution.x); + + if(x<=resolution.x && y<=resolution.y){ + if(x > rectPos.x && x < rectPos.x + dimensions.x && y > rectPos.y && y < rectPos.y + dimensions.y) + depthbuffer[index].scissored = false; + else + depthbuffer[index].scissored = true; + } +} + +//Writes fragment colors to the framebuffer +__global__ void render(glm::vec2 resolution, fragment* depthbuffer, glm::vec3* framebuffer){ + + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * resolution.x); + + if(x<=resolution.x && y<=resolution.y && depthbuffer[index].scissored == false){ + framebuffer[index] = depthbuffer[index].color; + } +} + +// Wrapper for the __global__ call that sets up the kernel calls and does a ton of memory management +void cudaRasterizeCore(uchar4* PBOpos, glm::vec2 resolution, float frame, float* vbo, int vbosize, float* nbo, int nbosize, float* cbo, int cbosize, int* ibo, int ibosize){ + + // set up crucial magic + int tileSize = 8; + dim3 threadsPerBlock(tileSize, tileSize); + dim3 fullBlocksPerGrid((int)ceil(float(resolution.x)/float(tileSize)), (int)ceil(float(resolution.y)/float(tileSize))); + + //set up framebuffer + framebuffer = NULL; + cudaMalloc((void**)&framebuffer, (int)resolution.x*(int)resolution.y*sizeof(glm::vec3)); + + //set up depthbuffer + depthbuffer = NULL; + cudaMalloc((void**)&depthbuffer, (int)resolution.x*(int)resolution.y*sizeof(fragment)); + + //kernel launches to black out accumulated/unaccumlated pixel buffers and clear our scattering states + clearImage<<>>(resolution, framebuffer, glm::vec3(0,0,0)); + + fragment frag; + frag.color = glm::vec3(0,0,0); + frag.normal = glm::vec3(0,0,0); + frag.position = glm::vec3(0,0,-10000); + clearDepthBuffer<<>>(resolution, depthbuffer,frag); + + //------------------------------ + //memory stuff + //------------------------------ + primitives = NULL; + cudaMalloc((void**)&primitives, (ibosize/3)*sizeof(triangle)); + + device_ibo = NULL; + cudaMalloc((void**)&device_ibo, ibosize*sizeof(int)); + cudaMemcpy( device_ibo, ibo, ibosize*sizeof(int), cudaMemcpyHostToDevice); + + device_vbo = NULL; + cudaMalloc((void**)&device_vbo, vbosize*sizeof(float)); + cudaMemcpy( device_vbo, vbo, vbosize*sizeof(float), cudaMemcpyHostToDevice); + + device_nbo = NULL; + cudaMalloc((void**)&device_nbo, nbosize*sizeof(float)); + cudaMemcpy( device_nbo, nbo, nbosize*sizeof(float), cudaMemcpyHostToDevice); + + device_cbo = NULL; + cudaMalloc((void**)&device_cbo, cbosize*sizeof(float)); + cudaMemcpy( device_cbo, cbo, cbosize*sizeof(float), cudaMemcpyHostToDevice); + + tileSize = 32; + int primitiveBlocks = ceil(((float)vbosize/3)/((float)tileSize)); + + //------------------------------ + //vertex shader + //------------------------------ + glm::vec3 camPos = glm::vec3(0,0,5); + glm::mat4 model = glm::mat4(1.0f); + glm::mat4 view = glm::lookAt(camPos, glm::vec3(0,0,0), glm::vec3(0,1,0)); + glm::mat4 projection = glm::perspective(75.0f, resolution.x/resolution.y, 0.1f, 100.0f); + glm::vec4 viewport = glm::vec4(0, 0, resolution.x, resolution.y); + glm::mat4 MVP = projection * view * model; + + vertexShadeKernel<<>>(device_vbo, vbosize, device_nbo, nbosize, MVP); + + cudaDeviceSynchronize(); + //------------------------------ + //primitive assembly + //------------------------------ + primitiveBlocks = ceil(((float)ibosize/3)/((float)tileSize)); + primitiveAssemblyKernel<<>>(device_vbo, vbosize, device_nbo, nbosize, device_cbo, cbosize, device_ibo, ibosize, primitives); + + cudaDeviceSynchronize(); + //------------------------------ + //backface culling + //------------------------------ + backfaceCullingKernel<<>>(primitives, ibosize/3, camPos, model, view, projection, viewport); + + cudaDeviceSynchronize(); + //------------------------------ + //rasterization + //------------------------------ + rasterizationKernel<<>>(primitives, ibosize/3, depthbuffer, resolution); + + cudaDeviceSynchronize(); + //------------------------------ + //fragment shader + //------------------------------ + glm::vec3 lpos = glm::vec3(5,5,10); + glm::vec3 lcol = glm::vec3(1,1,1); + fragmentShadeKernel<<>>(depthbuffer, resolution, lpos, lcol); + + cudaDeviceSynchronize(); + //------------------------------ + //scissor test + //------------------------------ + glm::vec2 rectPos = glm::vec2(0,0); + glm::vec2 dimensions = glm::vec2(1000,1000); + scissorTest<<>>(resolution, depthbuffer, rectPos, dimensions); + + cudaDeviceSynchronize(); + //------------------------------ + //write fragments to framebuffer + //------------------------------ + render<<>>(resolution, depthbuffer, framebuffer); + sendImageToPBO<<>>(PBOpos, resolution, framebuffer); + + cudaDeviceSynchronize(); + + kernelCleanup(); + + checkCUDAError("Kernel failed!"); +} + +void kernelCleanup(){ + cudaFree( primitives ); + cudaFree( device_vbo ); + cudaFree( device_cbo ); + cudaFree( device_ibo ); + cudaFree( framebuffer ); + cudaFree( depthbuffer ); +} + diff --git a/src/rasterizeKernels.h b/src/rasterizeKernels.h index bef3653..8a779aa 100755 --- a/src/rasterizeKernels.h +++ b/src/rasterizeKernels.h @@ -1,17 +1,18 @@ -// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania -// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania - -#ifndef RASTERIZEKERNEL_H -#define RASTERIZEKERNEL_H - -#include -#include -#include -#include -#include -#include "glm/glm.hpp" - -void kernelCleanup(); -void cudaRasterizeCore(uchar4* pos, glm::vec2 resolution, float frame, float* vbo, int vbosize, float* cbo, int cbosize, int* ibo, int ibosize); - -#endif //RASTERIZEKERNEL_H +// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania +// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania + +#ifndef RASTERIZEKERNEL_H +#define RASTERIZEKERNEL_H + +#include +#include +#include +#include +#include +#include "glm/glm.hpp" +#include "glm/gtc/matrix_transform.hpp" + +void kernelCleanup(); +void cudaRasterizeCore(uchar4* pos, glm::vec2 resolution, float frame, float* vbo, int vbosize, float* nbo, int nbosize, float* cbo, int cbosize, int* ibo, int ibosize); + +#endif //RASTERIZEKERNEL_H diff --git a/src/rasterizeTools.h b/src/rasterizeTools.h index e9b5dcc..e03096b 100755 --- a/src/rasterizeTools.h +++ b/src/rasterizeTools.h @@ -1,78 +1,103 @@ -// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania -// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania - -#ifndef RASTERIZETOOLS_H -#define RASTERIZETOOLS_H - -#include -#include "glm/glm.hpp" -#include "utilities.h" -#include "cudaMat4.h" - -struct triangle { - glm::vec3 p0; - glm::vec3 p1; - glm::vec3 p2; - glm::vec3 c0; - glm::vec3 c1; - glm::vec3 c2; -}; - -struct fragment{ - glm::vec3 color; - glm::vec3 normal; - glm::vec3 position; -}; - -//Multiplies a cudaMat4 matrix and a vec4 -__host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec4 v){ - glm::vec3 r(1,1,1); - r.x = (m.x.x*v.x)+(m.x.y*v.y)+(m.x.z*v.z)+(m.x.w*v.w); - r.y = (m.y.x*v.x)+(m.y.y*v.y)+(m.y.z*v.z)+(m.y.w*v.w); - r.z = (m.z.x*v.x)+(m.z.y*v.y)+(m.z.z*v.z)+(m.z.w*v.w); - return r; -} - -//LOOK: finds the axis aligned bounding box for a given triangle -__host__ __device__ void getAABBForTriangle(triangle tri, glm::vec3& minpoint, glm::vec3& maxpoint){ - minpoint = glm::vec3(min(min(tri.p0.x, tri.p1.x),tri.p2.x), - min(min(tri.p0.y, tri.p1.y),tri.p2.y), - min(min(tri.p0.z, tri.p1.z),tri.p2.z)); - maxpoint = glm::vec3(max(max(tri.p0.x, tri.p1.x),tri.p2.x), - max(max(tri.p0.y, tri.p1.y),tri.p2.y), - max(max(tri.p0.z, tri.p1.z),tri.p2.z)); -} - -//LOOK: calculates the signed area of a given triangle -__host__ __device__ float calculateSignedArea(triangle tri){ - return 0.5*((tri.p2.x - tri.p0.x)*(tri.p1.y - tri.p0.y) - (tri.p1.x - tri.p0.x)*(tri.p2.y - tri.p0.y)); -} - -//LOOK: helper function for calculating barycentric coordinates -__host__ __device__ float calculateBarycentricCoordinateValue(glm::vec2 a, glm::vec2 b, glm::vec2 c, triangle tri){ - triangle baryTri; - baryTri.p0 = glm::vec3(a,0); baryTri.p1 = glm::vec3(b,0); baryTri.p2 = glm::vec3(c,0); - return calculateSignedArea(baryTri)/calculateSignedArea(tri); -} - -//LOOK: calculates barycentric coordinates -__host__ __device__ glm::vec3 calculateBarycentricCoordinate(triangle tri, glm::vec2 point){ - float beta = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), point, glm::vec2(tri.p2.x,tri.p2.y), tri); - float gamma = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), glm::vec2(tri.p1.x,tri.p1.y), point, tri); - float alpha = 1.0-beta-gamma; - return glm::vec3(alpha,beta,gamma); -} - -//LOOK: checks if a barycentric coordinate is within the boundaries of a triangle -__host__ __device__ bool isBarycentricCoordInBounds(glm::vec3 barycentricCoord){ - return barycentricCoord.x >= 0.0 && barycentricCoord.x <= 1.0 && - barycentricCoord.y >= 0.0 && barycentricCoord.y <= 1.0 && - barycentricCoord.z >= 0.0 && barycentricCoord.z <= 1.0; -} - -//LOOK: for a given barycentric coordinate, return the corresponding z position on the triangle -__host__ __device__ float getZAtCoordinate(glm::vec3 barycentricCoord, triangle tri){ - return -(barycentricCoord.x*tri.p0.z + barycentricCoord.y*tri.p1.z + barycentricCoord.z*tri.p2.z); -} - +// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania +// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania + +#ifndef RASTERIZETOOLS_H +#define RASTERIZETOOLS_H + +#include +#include "glm/glm.hpp" +#include "utilities.h" +#include "cudaMat4.h" + +struct triangle { + glm::vec3 p0; + glm::vec3 p1; + glm::vec3 p2; + glm::vec3 n0; + glm::vec3 n1; + glm::vec3 n2; + glm::vec3 c0; + glm::vec3 c1; + glm::vec3 c2; + bool culled; +}; + +struct fragment{ + glm::vec3 color; + glm::vec3 normal; + glm::vec3 position; + bool scissored; +}; + +//Multiplies a cudaMat4 matrix and a vec4 +__host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec4 v){ + glm::vec3 r(1,1,1); + r.x = (m.x.x*v.x)+(m.x.y*v.y)+(m.x.z*v.z)+(m.x.w*v.w); + r.y = (m.y.x*v.x)+(m.y.y*v.y)+(m.y.z*v.z)+(m.y.w*v.w); + r.z = (m.z.x*v.x)+(m.z.y*v.y)+(m.z.z*v.z)+(m.z.w*v.w); + return r; +} + +//LOOK: finds the axis aligned bounding box for a given triangle +__host__ __device__ void getAABBForTriangle(triangle tri, glm::vec3& minpoint, glm::vec3& maxpoint){ + minpoint = glm::vec3(min(min(tri.p0.x, tri.p1.x),tri.p2.x), + min(min(tri.p0.y, tri.p1.y),tri.p2.y), + min(min(tri.p0.z, tri.p1.z),tri.p2.z)); + maxpoint = glm::vec3(max(max(tri.p0.x, tri.p1.x),tri.p2.x), + max(max(tri.p0.y, tri.p1.y),tri.p2.y), + max(max(tri.p0.z, tri.p1.z),tri.p2.z)); +} + +//LOOK: calculates the signed area of a given triangle +__host__ __device__ float calculateSignedArea(triangle tri){ + return 0.5*((tri.p2.x - tri.p0.x)*(tri.p1.y - tri.p0.y) - (tri.p1.x - tri.p0.x)*(tri.p2.y - tri.p0.y)); +} + +//LOOK: helper function for calculating barycentric coordinates +__host__ __device__ float calculateBarycentricCoordinateValue(glm::vec2 a, glm::vec2 b, glm::vec2 c, triangle tri){ + triangle baryTri; + baryTri.p0 = glm::vec3(a,0); baryTri.p1 = glm::vec3(b,0); baryTri.p2 = glm::vec3(c,0); + return calculateSignedArea(baryTri)/calculateSignedArea(tri); +} + +//LOOK: calculates barycentric coordinates +__host__ __device__ glm::vec3 calculateBarycentricCoordinate(triangle tri, glm::vec2 point){ + float beta = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), point, glm::vec2(tri.p2.x,tri.p2.y), tri); + float gamma = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), glm::vec2(tri.p1.x,tri.p1.y), point, tri); + float alpha = 1.0-beta-gamma; + return glm::vec3(alpha,beta,gamma); +} + +//LOOK: checks if a barycentric coordinate is within the boundaries of a triangle +__host__ __device__ bool isBarycentricCoordInBounds(glm::vec3 barycentricCoord){ + return barycentricCoord.x >= 0.0 && barycentricCoord.x <= 1.0 && + barycentricCoord.y >= 0.0 && barycentricCoord.y <= 1.0 && + barycentricCoord.z >= 0.0 && barycentricCoord.z <= 1.0; +} + +//LOOK: for a given barycentric coordinate, return the corresponding z position on the triangle +__host__ __device__ float getZAtCoordinate(glm::vec3 barycentricCoord, triangle tri){ + return -(barycentricCoord.x*tri.p0.z + barycentricCoord.y*tri.p1.z + barycentricCoord.z*tri.p2.z); +} + +__host__ __device__ glm::vec3 getColorAtCoordinate(glm::vec3 barycentricCoord, triangle tri){ + + glm::vec3 color; + color.x = barycentricCoord.x * tri.c0.x + barycentricCoord.y * tri.c1.x + barycentricCoord.z * tri.c2.x; + color.y = barycentricCoord.x * tri.c0.y + barycentricCoord.y * tri.c1.y + barycentricCoord.z * tri.c2.y; + color.z = barycentricCoord.x * tri.c0.z + barycentricCoord.y * tri.c1.z + barycentricCoord.z * tri.c2.z; + + return color; +} + +__host__ __device__ glm::vec3 getNormalAtCoordinate(glm::vec3 barycentricCoord, triangle tri){ + + glm::vec3 normal; + normal.x = barycentricCoord.x * tri.n0.x + barycentricCoord.y * tri.n1.x + barycentricCoord.z * tri.n2.x; + normal.y = barycentricCoord.x * tri.n0.y + barycentricCoord.y * tri.n1.y + barycentricCoord.z * tri.n2.y; + normal.z = barycentricCoord.x * tri.n0.z + barycentricCoord.y * tri.n1.z + barycentricCoord.z * tri.n2.z; + + return normal; +} + #endif \ No newline at end of file