diff --git a/CMakeLists.txt b/CMakeLists.txt index dff84f8..275f3a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,10 +97,3 @@ target_link_libraries(${CMAKE_PROJECT_NAME} ${CORELIBS} ) -add_custom_command( - TARGET ${CMAKE_PROJECT_NAME} - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CMAKE_SOURCE_DIR}/shaders - ${CMAKE_BINARY_DIR}/shaders - ) diff --git a/README.md b/README.md index 41b91f0..0a39774 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,93 @@ CUDA Rasterizer =============== -[CLICK ME FOR INSTRUCTION OF THIS PROJECT](./INSTRUCTION.md) - **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4** -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Yu Sun +* [LinkedIn](https://www.linkedin.com/in/yusun3/) +* Tested on: Tested on: Windows 10 , i7-6700HQ CPU @ 2.60GHz × 8 , GeForce GTX 960M/PCIe/SSE2, 7.7GB Memory (Personal Laptop) + +## Introduction +In this project, a basic rasterized graphic pipeline is implemented using cuda. The pipeline includes +vertex shading, primitive assembly, rasterization, fragment shading, and a framebuffer. The special features I implement +for this project includes + +``` +* Rasterization for lines and points +* Back-face culling +* UV texture mapping with bilinear texture filtering and perspective correct texture coordinates. +* Super-sample Anti-Aliasing +``` + +Forgive my briefness for the README, I'll add more things if I get more time later. + +***Rasterization for Points, Lines and Triangles*** + +Rasterization for points is straightforward by computing the corresponding pixel index and color. + +Rasterization for lines is approximated using the Bresenham Algorithm since a naive approach would lead to artifact +coming from fixed grid resolution, and rounding float pixel locations to integer pixel locations + +Rasterization for traingles is achieved by using barycentric coordinates + +Milk Car | Duck | Flower +:-------------------------:|:-------------------------:|:-------------------------: +![](img/milkcar.gif) | ![](img/duck.gif) | ![](img/flower.gif) + +***Texture Mapping and Prospective Correction with Bilinear Interpolation*** + +The texture mapping can be achieved by using uv coordinates that warp 2D textures onto 3D mesh. Techniques used to make +the texture look better include bilinear interpolation and perspective correction +using the depth information. + +A comparision of texture mapping with and without perspective correction is shown below. + +Scene without Perspective Correction | Scene with Perspective Correction +:-------------------------:|:-------------------------: + | + +Bilinear interpolation is basically a techique used to prevent aliasing effect and make the resulting image +looks more natural and smooth by taking the color of surrounding pixels into account while generating the final +color for a specific pixel. Since the effect isn't that obvious, I picked one image that I found through Google Image +to demonstrate the effect. + +![](img/interpolation.png) + +***Back-face Culling*** + +Back-face culling is intended to reduce the amount of computation by eliminating the pixels that cannot be captured from +the camera. However, while implementing it I found that I didn't see a significant speed up, and it actually creates some +funny effect. + +Scene without Back-face Culling | Scene with Back-face Culling +:-------------------------:|:-------------------------: +![](img/no_culling.PNG) | ![](img/culling.PNG) + +The things that are culled out can be seen below + +![](img/culled.gif) + +***Super-Sampled Antialiasing*** + +By super-sampling, one is essentially making more grids and creating higher resolution. This is a sacrifice on memory to +give more details to the display. The difference can be seen below. + +SSAA Factor = 1 | SSAA Factor= 4 +:-------------------------:|:-------------------------: +![](img/ssaa=1.gif) | ![](img/ssaa=4.gif) + + +## Performance Analysis + +Below is a graph demonstrating the amount of time spent on each stage of the graphics pipeline for different type of scenes. +Without much surprise, it can be seen that most time of the computation is spent on primitive rasterization. -### (TODO: Your README) +![](img/compute.png) -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +Notice how computation increase while we move closer to the rendered object due to the increase amount of checks we need to perform. +![](img/z.png) + ### Credits diff --git a/img/check.PNG b/img/check.PNG new file mode 100755 index 0000000..3d59dac Binary files /dev/null and b/img/check.PNG differ diff --git a/img/check_corrected.PNG b/img/check_corrected.PNG new file mode 100755 index 0000000..c7bbeab Binary files /dev/null and b/img/check_corrected.PNG differ diff --git a/img/compute.png b/img/compute.png new file mode 100644 index 0000000..38e9f3a Binary files /dev/null and b/img/compute.png differ diff --git a/img/culled.gif b/img/culled.gif new file mode 100755 index 0000000..8b1d258 Binary files /dev/null and b/img/culled.gif differ diff --git a/img/culling.PNG b/img/culling.PNG new file mode 100755 index 0000000..96e27a8 Binary files /dev/null and b/img/culling.PNG differ diff --git a/img/duck.gif b/img/duck.gif new file mode 100755 index 0000000..49422fc Binary files /dev/null and b/img/duck.gif differ diff --git a/img/flower.gif b/img/flower.gif new file mode 100755 index 0000000..bfb70d6 Binary files /dev/null and b/img/flower.gif differ diff --git a/img/interpolation.png b/img/interpolation.png new file mode 100644 index 0000000..d5ef0eb Binary files /dev/null and b/img/interpolation.png differ diff --git a/img/milkcar.gif b/img/milkcar.gif new file mode 100755 index 0000000..c6570ac Binary files /dev/null and b/img/milkcar.gif differ diff --git a/img/no_culling.PNG b/img/no_culling.PNG new file mode 100755 index 0000000..0f8957d Binary files /dev/null and b/img/no_culling.PNG differ diff --git a/img/ssaa=1.gif b/img/ssaa=1.gif new file mode 100755 index 0000000..67154e6 Binary files /dev/null and b/img/ssaa=1.gif differ diff --git a/img/ssaa=4.gif b/img/ssaa=4.gif new file mode 100755 index 0000000..6dd78d3 Binary files /dev/null and b/img/ssaa=4.gif differ diff --git a/img/z.png b/img/z.png new file mode 100644 index 0000000..733c109 Binary files /dev/null and b/img/z.png differ diff --git a/src/main.cpp b/src/main.cpp index 7986959..679f2cc 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,10 +1,10 @@ /** - * @file main.cpp - * @brief Main file for CUDA rasterizer. Handles CUDA-GL interop for display. - * @authors Skeleton code: Yining Karl Li, Kai Ninomiya, Shuai Shao (Shrek) - * @date 2012-2016 - * @copyright University of Pennsylvania - */ +* @file main.cpp +* @brief Main file for CUDA rasterizer. Handles CUDA-GL interop for display. +* @authors Skeleton code: Yining Karl Li, Kai Ninomiya, Shuai Shao (Shrek) +* @date 2012-2016 +* @copyright University of Pennsylvania +*/ @@ -12,42 +12,52 @@ #define STB_IMAGE_IMPLEMENTATION #define TINYGLTF_LOADER_IMPLEMENTATION + #include +//------------------------------- +//---------RUNTIME STUFF--------- +//------------------------------- +float scale = 1.0f; +float x_trans = 0.0f, y_trans = 0.0f, z_trans = -10.0f; +float x_angle = 0.0f, y_angle = 0.0f; +int primitive_type = 0; + //------------------------------- //-------------MAIN-------------- //------------------------------- + int main(int argc, char **argv) { if (argc != 2) { cout << "Usage: [gltf file]. Press Enter to exit" << endl; - getchar(); + getchar(); return 0; } - tinygltf::Scene scene; - tinygltf::TinyGLTFLoader loader; - std::string err; - std::string input_filename(argv[1]); - std::string ext = getFilePathExtension(input_filename); - - bool ret = false; - if (ext.compare("glb") == 0) { - // assume binary glTF. - ret = loader.LoadBinaryFromFile(&scene, &err, input_filename.c_str()); - } else { - // assume ascii glTF. - ret = loader.LoadASCIIFromFile(&scene, &err, input_filename.c_str()); - } + tinygltf::Scene scene; + tinygltf::TinyGLTFLoader loader; + std::string err; + std::string input_filename(argv[1]); + std::string ext = getFilePathExtension(input_filename); + + bool ret = false; + if (ext.compare("glb") == 0) { + // assume binary glTF. + ret = loader.LoadBinaryFromFile(&scene, &err, input_filename.c_str()); + } else { + // assume ascii glTF. + ret = loader.LoadASCIIFromFile(&scene, &err, input_filename.c_str()); + } - if (!err.empty()) { - printf("Err: %s\n", err.c_str()); - } + if (!err.empty()) { + printf("Err: %s\n", err.c_str()); + } - if (!ret) { - printf("Failed to parse glTF\n"); - return -1; - } + if (!ret) { + printf("Failed to parse glTF\n"); + return -1; + } frame = 0; @@ -68,7 +78,7 @@ void mainLoop() { glfwPollEvents(); runCuda(); - time_t seconds2 = time (NULL); + time_t seconds2 = time(NULL); if (seconds2 - seconds >= 1) { @@ -77,7 +87,8 @@ void mainLoop() { seconds = seconds2; } - string title = "CIS565 Rasterizer | " + utilityCore::convertIntToString((int)fps) + " FPS"; + string title = "CIS565 Rasterizer | " + utilityCore::convertIntToString((int) fps) + " FPS" + + "; Z = " + utilityCore::convertIntToString((int) z_trans); glfwSetWindowTitle(window, title.c_str()); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo); @@ -86,52 +97,48 @@ void mainLoop() { glClear(GL_COLOR_BUFFER_BIT); // VAO, shader program, and texture already bound - glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); glfwSwapBuffers(window); } glfwDestroyWindow(window); glfwTerminate(); } -//------------------------------- -//---------RUNTIME STUFF--------- -//------------------------------- -float scale = 1.0f; -float x_trans = 0.0f, y_trans = 0.0f, z_trans = -10.0f; -float x_angle = 0.0f, y_angle = 0.0f; void runCuda() { // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer dptr = NULL; - glm::mat4 P = glm::frustum(-scale * ((float)width) / ((float)height), - scale * ((float)width / (float)height), - -scale, scale, 1.0, 1000.0); + glm::mat4 P = glm::frustum(-scale * ((float) width) / ((float) height), + scale * ((float) width / (float) height), + -scale, scale, 1.0, 1000.0); - glm::mat4 V = glm::mat4(1.0f); + glm::mat4 V = glm::mat4(1.0f); - glm::mat4 M = - glm::translate(glm::vec3(x_trans, y_trans, z_trans)) - * glm::rotate(x_angle, glm::vec3(1.0f, 0.0f, 0.0f)) - * glm::rotate(y_angle, glm::vec3(0.0f, 1.0f, 0.0f)); + glm::mat4 M = + glm::translate(glm::vec3(x_trans, y_trans, z_trans)) + * glm::rotate(x_angle, glm::vec3(1.0f, 0.0f, 0.0f)) + * glm::rotate(y_angle, glm::vec3(0.0f, 1.0f, 0.0f)); - glm::mat3 MV_normal = glm::transpose(glm::inverse(glm::mat3(V) * glm::mat3(M))); - glm::mat4 MV = V * M; - glm::mat4 MVP = P * MV; + glm::mat3 MV_normal = glm::transpose(glm::inverse(glm::mat3(V) * glm::mat3(M))); + glm::mat4 MV = V * M; + glm::mat4 MVP = P * MV; - cudaGLMapBufferObject((void **)&dptr, pbo); - rasterize(dptr, MVP, MV, MV_normal); + cudaGLMapBufferObject((void **) &dptr, pbo); + rasterize(dptr, MVP, MV, MV_normal, primitive_type); cudaGLUnmapBufferObject(pbo); frame++; fpstracker++; + y_angle += 0.04f; + } //------------------------------- //----------SETUP STUFF---------- //------------------------------- -bool init(const tinygltf::Scene & scene) { +bool init(const tinygltf::Scene &scene) { glfwSetErrorCallback(errorCallback); if (!glfwInit()) { @@ -158,30 +165,32 @@ bool init(const tinygltf::Scene & scene) { initVAO(); initTextures(); initCuda(); - initPBO(); - - // Mouse Control Callbacks - glfwSetMouseButtonCallback(window, mouseButtonCallback); - glfwSetCursorPosCallback(window, mouseMotionCallback); - glfwSetScrollCallback(window, mouseWheelCallback); - - { - std::map >::const_iterator it( - scene.scenes.begin()); - std::map >::const_iterator itEnd( - scene.scenes.end()); - - for (; it != itEnd; it++) { - for (size_t i = 0; i < it->second.size(); i++) { - std::cout << it->second[i] - << ((i != (it->second.size() - 1)) ? ", " : ""); - } - std::cout << " ] " << std::endl; - } - } + initPBO(); + + // Mouse Control Callbacks + glfwSetMouseButtonCallback(window, mouseButtonCallback); + glfwSetCursorPosCallback(window, mouseMotionCallback); + glfwSetScrollCallback(window, mouseWheelCallback); + + { + std::map < std::string, std::vector < std::string > > ::const_iterator + it( + scene.scenes.begin()); + std::map < std::string, std::vector < std::string > > ::const_iterator + itEnd( + scene.scenes.end()); + + for (; it != itEnd; it++) { + for (size_t i = 0; i < it->second.size(); i++) { + std::cout << it->second[i] + << ((i != (it->second.size() - 1)) ? ", " : ""); + } + std::cout << " ] " << std::endl; + } + } - rasterizeSetBuffers(scene); + rasterizeSetBuffers(scene); GLuint passthroughProgram; passthroughProgram = initShader(); @@ -225,38 +234,38 @@ void initTextures() { glBindTexture(GL_TEXTURE_2D, displayImage); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, - GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, + GL_UNSIGNED_BYTE, NULL); } void initVAO(void) { GLfloat vertices[] = { - -1.0f, -1.0f, - 1.0f, -1.0f, - 1.0f, 1.0f, - -1.0f, 1.0f, + -1.0f, -1.0f, + 1.0f, -1.0f, + 1.0f, 1.0f, + -1.0f, 1.0f, }; GLfloat texcoords[] = { - 1.0f, 1.0f, - 0.0f, 1.0f, - 0.0f, 0.0f, - 1.0f, 0.0f + 1.0f, 1.0f, + 0.0f, 1.0f, + 0.0f, 0.0f, + 1.0f, 0.0f }; - GLushort indices[] = { 0, 1, 3, 3, 1, 2 }; + GLushort indices[] = {0, 1, 3, 3, 1, 2}; GLuint vertexBufferObjID[3]; glGenBuffers(3, vertexBufferObjID); glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[0]); glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); - glVertexAttribPointer((GLuint)positionLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); + glVertexAttribPointer((GLuint) positionLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); glEnableVertexAttribArray(positionLocation); glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[1]); glBufferData(GL_ARRAY_BUFFER, sizeof(texcoords), texcoords, GL_STATIC_DRAW); - glVertexAttribPointer((GLuint)texcoordsLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); + glVertexAttribPointer((GLuint) texcoordsLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); glEnableVertexAttribArray(texcoordsLocation); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertexBufferObjID[2]); @@ -265,7 +274,7 @@ void initVAO(void) { GLuint initShader() { - const char *attribLocations[] = { "Position", "Tex" }; + const char *attribLocations[] = {"Position", "Tex"}; GLuint program = glslUtility::createDefaultProgram(attribLocations, 2); GLint location; @@ -298,13 +307,13 @@ void deletePBO(GLuint *pbo) { glBindBuffer(GL_ARRAY_BUFFER, *pbo); glDeleteBuffers(1, pbo); - *pbo = (GLuint)NULL; + *pbo = (GLuint) NULL; } } void deleteTexture(GLuint *tex) { glDeleteTextures(1, tex); - *tex = (GLuint)NULL; + *tex = (GLuint) NULL; } void shut_down(int return_code) { @@ -327,6 +336,9 @@ void errorCallback(int error, const char *description) { void keyCallback(GLFWwindow *window, int key, int scancode, int action, int mods) { if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) { glfwSetWindowShouldClose(window, GL_TRUE); + } else if (key == GLFW_KEY_N && action == GLFW_RELEASE) { + primitive_type++; + if (primitive_type > 2) primitive_type = 0; } } @@ -334,9 +346,9 @@ void keyCallback(GLFWwindow *window, int key, int scancode, int action, int mods //----- util ----------------- //---------------------------- static std::string getFilePathExtension(const std::string &FileName) { - if (FileName.find_last_of(".") != std::string::npos) - return FileName.substr(FileName.find_last_of(".") + 1); - return ""; + if (FileName.find_last_of(".") != std::string::npos) + return FileName.substr(FileName.find_last_of(".") + 1); + return ""; } @@ -345,56 +357,48 @@ static std::string getFilePathExtension(const std::string &FileName) { //---- Mouse control ---------- //----------------------------- -enum ControlState { NONE = 0, ROTATE, TRANSLATE }; +enum ControlState { + NONE = 0, ROTATE, TRANSLATE +}; ControlState mouseState = NONE; -void mouseButtonCallback(GLFWwindow* window, int button, int action, int mods) -{ - if (action == GLFW_PRESS) - { - if (button == GLFW_MOUSE_BUTTON_LEFT) - { - mouseState = ROTATE; - } - else if (button == GLFW_MOUSE_BUTTON_RIGHT) - { - mouseState = TRANSLATE; - } - - } - else if (action == GLFW_RELEASE) - { - mouseState = NONE; - } + +void mouseButtonCallback(GLFWwindow *window, int button, int action, int mods) { + if (action == GLFW_PRESS) { + if (button == GLFW_MOUSE_BUTTON_LEFT) { + mouseState = ROTATE; + } else if (button == GLFW_MOUSE_BUTTON_RIGHT) { + mouseState = TRANSLATE; + } + + } else if (action == GLFW_RELEASE) { + mouseState = NONE; + } } -double lastx = (double)width / 2; -double lasty = (double)height / 2; -void mouseMotionCallback(GLFWwindow* window, double xpos, double ypos) -{ - const double s_r = 0.01; - const double s_t = 0.01; - - double diffx = xpos - lastx; - double diffy = ypos - lasty; - lastx = xpos; - lasty = ypos; - - if (mouseState == ROTATE) - { - //rotate - x_angle += (float)s_r * diffy; - y_angle += (float)s_r * diffx; - } - else if (mouseState == TRANSLATE) - { - //translate - x_trans += (float)(s_t * diffx); - y_trans += (float)(-s_t * diffy); - } +double lastx = (double) width / 2; +double lasty = (double) height / 2; + +void mouseMotionCallback(GLFWwindow *window, double xpos, double ypos) { + const double s_r = 0.01; + const double s_t = 0.01; + + double diffx = xpos - lastx; + double diffy = ypos - lasty; + lastx = xpos; + lasty = ypos; + + if (mouseState == ROTATE) { + //rotate + x_angle += (float) s_r * diffy; + y_angle += (float) s_r * diffx; + } else if (mouseState == TRANSLATE) { + //translate + x_trans += (float) (s_t * diffx); + y_trans += (float) (-s_t * diffy); + } } -void mouseWheelCallback(GLFWwindow* window, double xoffset, double yoffset) -{ - const double s = 1.0; // sensitivity - z_trans += (float)(s * yoffset); +void mouseWheelCallback(GLFWwindow *window, double xoffset, double yoffset) { + const double s = 1.0; // sensitivity + z_trans += (float) (s * yoffset); } diff --git a/src/rasterize.cu b/src/rasterize.cu index 1262a09..4b3cdb8 100644 --- a/src/rasterize.cu +++ b/src/rasterize.cu @@ -1,10 +1,10 @@ /** - * @file rasterize.cu - * @brief CUDA-accelerated rasterization pipeline. - * @authors Skeleton code: Yining Karl Li, Kai Ninomiya, Shuai Shao (Shrek) - * @date 2012-2016 - * @copyright University of Pennsylvania & STUDENT - */ +* @file rasterize.cu +* @brief CUDA-accelerated rasterization pipeline. +* @authors Skeleton code: Yining Karl Li, Kai Ninomiya, Shuai Shao (Shrek) +* @date 2012-2016 +* @copyright University of Pennsylvania & STUDENT +*/ #include #include @@ -17,104 +17,137 @@ #include "rasterize.h" #include #include +#include +#include + +#define SSAA_RES 1 +#define CORRECT_PROSPECTIVE 1 +#define BACKFACE_CULLING 0 + +//#define LINE 0 +//#define POINT 0 +//#define TRIANGLE 1 + +#ifndef imax +#define imax(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef imin +#define imin(a, b) (((a) < (b)) ? (a) : (b)) +#endif +#define SCREENGAMMA 2.2 + +template +__host__ __device__ + +void swap(T &a, T &b) { + T tmp(a); + a = b; + b = tmp; +} namespace { - typedef unsigned short VertexIndex; - typedef glm::vec3 VertexAttributePosition; - typedef glm::vec3 VertexAttributeNormal; - typedef glm::vec2 VertexAttributeTexcoord; - typedef unsigned char TextureData; - - typedef unsigned char BufferByte; - - enum PrimitiveType{ - Point = 1, - Line = 2, - Triangle = 3 - }; - - struct VertexOut { - glm::vec4 pos; - - // TODO: add new attributes to your VertexOut - // The attributes listed below might be useful, - // but always feel free to modify on your own - - glm::vec3 eyePos; // eye space position used for shading - glm::vec3 eyeNor; // eye space normal used for shading, cuz normal will go wrong after perspective transformation - // glm::vec3 col; - glm::vec2 texcoord0; - TextureData* dev_diffuseTex = NULL; - // int texWidth, texHeight; - // ... - }; - - struct Primitive { - PrimitiveType primitiveType = Triangle; // C++ 11 init - VertexOut v[3]; - }; - - struct Fragment { - glm::vec3 color; - - // TODO: add new attributes to your Fragment - // The attributes listed below might be useful, - // but always feel free to modify on your own - - // glm::vec3 eyePos; // eye space position used for shading - // glm::vec3 eyeNor; - // VertexAttributeTexcoord texcoord0; - // TextureData* dev_diffuseTex; - // ... - }; - - struct PrimitiveDevBufPointers { - int primitiveMode; //from tinygltfloader macro - PrimitiveType primitiveType; - int numPrimitives; - int numIndices; - int numVertices; - - // Vertex In, const after loaded - VertexIndex* dev_indices; - VertexAttributePosition* dev_position; - VertexAttributeNormal* dev_normal; - VertexAttributeTexcoord* dev_texcoord0; - - // Materials, add more attributes when needed - TextureData* dev_diffuseTex; - int diffuseTexWidth; - int diffuseTexHeight; - // TextureData* dev_specularTex; - // TextureData* dev_normalTex; - // ... - - // Vertex Out, vertex used for rasterization, this is changing every frame - VertexOut* dev_verticesOut; - - // TODO: add more attributes when needed - }; + typedef unsigned short VertexIndex; + typedef glm::vec3 VertexAttributePosition; + typedef glm::vec3 VertexAttributeNormal; + typedef glm::vec2 VertexAttributeTexcoord; + typedef unsigned char TextureData; + + typedef unsigned char BufferByte; + + enum PrimitiveType { + Point = 1, + Line = 2, + Triangle = 3 + }; + + struct VertexOut { + glm::vec4 pos; + + // TODO: add new attributes to your VertexOut + // The attributes listed below might be useful, + // but always feel free to modify on your own + + glm::vec3 eyePos; // eye space position used for shading + glm::vec3 eyeNor; // eye space normal used for shading, cuz normal will go wrong after perspective transformation + glm::vec3 col; + glm::vec2 texcoord0; + TextureData *dev_diffuseTex = NULL; + int texWidth, texHeight; + // ... + }; + + struct Primitive { + PrimitiveType primitiveType = Triangle; // C++ 11 init + VertexOut v[3]; + bool cull = false; + }; + + struct Fragment { + glm::vec3 color; + + // TODO: add new attributes to your Fragment + // The attributes listed below might be useful, + // but always feel free to modify on your own + + glm::vec3 eyePos; // eye space position used for shading + glm::vec3 eyeNor; + VertexAttributeTexcoord texcoord0; + TextureData *dev_diffuseTex; + int texWidth, texHeight; + // ... + }; + + struct PrimitiveDevBufPointers { + int primitiveMode; //from tinygltfloader macro + PrimitiveType primitiveType; + int numPrimitives; + int numIndices; + int numVertices; + + // Vertex In, const after loaded + VertexIndex *dev_indices; + VertexAttributePosition *dev_position; + VertexAttributeNormal *dev_normal; + VertexAttributeTexcoord *dev_texcoord0; + + // Materials, add more attributes when needed + TextureData *dev_diffuseTex; + int diffuseTexWidth; + int diffuseTexHeight; + // TextureData* dev_specularTex; + // TextureData* dev_normalTex; + // ... + + // Vertex Out, vertex used for rasterization, this is changing every frame + VertexOut *dev_verticesOut; + + // TODO: add more attributes when needed + }; } -static std::map> mesh2PrimitivesMap; +static std::map > mesh2PrimitivesMap; static int width = 0; static int height = 0; - +static int screen_width = 0; +static int screen_height = 0; static int totalNumPrimitives = 0; + static Primitive *dev_primitives = NULL; static Fragment *dev_fragmentBuffer = NULL; static glm::vec3 *dev_framebuffer = NULL; -static int * dev_depth = NULL; // you might need this buffer when doing depth test +static int *dev_depth = NULL; // you might need this buffer when doing depth test +static cudaEvent_t start, stop; /** - * Kernel that writes the image to the OpenGL PBO directly. - */ -__global__ +* Kernel that writes the image to the OpenGL PBO directly. +*/ +__global__ void sendImageToPBO(uchar4 *pbo, int w, int h, glm::vec3 *image) { int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; @@ -122,9 +155,16 @@ void sendImageToPBO(uchar4 *pbo, int w, int h, glm::vec3 *image) { if (x < w && y < h) { glm::vec3 color; - color.x = glm::clamp(image[index].x, 0.0f, 1.0f) * 255.0; - color.y = glm::clamp(image[index].y, 0.0f, 1.0f) * 255.0; - color.z = glm::clamp(image[index].z, 0.0f, 1.0f) * 255.0; + for (int i = 0; i < SSAA_RES; i++) { + for (int j = 0; j < SSAA_RES; j++) { + int ss_index = x * SSAA_RES + i + (y * SSAA_RES + j) * w * SSAA_RES; + color.x += glm::clamp(image[ss_index].x, 0.0f, 1.0f) * 255.f; + color.y += glm::clamp(image[ss_index].y, 0.0f, 1.0f) * 255.f; + color.z += glm::clamp(image[ss_index].z, 0.0f, 1.0f) * 255.f; + } + } + color /= (float)(SSAA_RES * SSAA_RES); + // Each thread writes one pixel location in the texture (textel) pbo[index].w = 0; pbo[index].x = color.x; @@ -133,53 +173,113 @@ void sendImageToPBO(uchar4 *pbo, int w, int h, glm::vec3 *image) { } } -/** +__host__ __device__ + +glm::vec3 getRGBColor(const int idx, const TextureData *texture) { + return glm::vec3(texture[idx] / 255.0f, texture[idx + 1] / 255.0f, texture[idx + 2] / 255.0f); +} + + +__host__ __device__ + +glm::vec3 textureMapping(const int w, const int h, const glm::vec2 uv, const TextureData *texture) { + // bilinear interpolation wikipedia + float _x = w * uv.x; + float _y = h * uv.y; + int x = (int)_x; + int y = (int)_y; + + glm::vec3 q00 = getRGBColor(3 * (x + y * w), texture); + glm::vec3 q10 = getRGBColor(3 * (x + 1 + y * w), texture); + glm::vec3 q01 = getRGBColor(3 * (x + (y + 1) * w), texture); + glm::vec3 q11 = getRGBColor(3 * (x + 1 + (y + 1) * w), texture); + + float dx = _x - x; + float dy = _y - y; + + return (q00 * (1.f - dx) * (1.f - dy) + q10 * (1.f - dy) * dx + q01 * (1.f - dx) * dy + q11 * dx * dy); +} + + +/** * Writes fragment colors to the framebuffer */ __global__ -void render(int w, int h, Fragment *fragmentBuffer, glm::vec3 *framebuffer) { +void render(int w, int h, Fragment *fragmentBuffer, glm::vec3 *framebuffer, int primitive_type) { int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; int index = x + (y * w); if (x < w && y < h) { - framebuffer[index] = fragmentBuffer[index].color; - - // TODO: add your fragment shader code here + // TODO: add your fragment shader code here + // texture mapping + if (primitive_type == 0) { + if (fragmentBuffer[index].dev_diffuseTex != NULL) { + glm::vec3 diffuseTexture = textureMapping(fragmentBuffer[index].texWidth, fragmentBuffer[index].texHeight, + fragmentBuffer[index].texcoord0, + fragmentBuffer[index].dev_diffuseTex); + // wikipedia blin phong shading model + glm::vec3 lightPos = glm::vec3(50.f); + glm::vec3 lightColor = glm::vec3(1.0f); + glm::vec3 ambientColor = glm::vec3(0.9f); + glm::vec3 specular = glm::vec3(0.9f); + float lightPower = 1.2; + + glm::vec3 lightDir = glm::normalize(lightPos - fragmentBuffer[index].eyePos); + glm::vec3 eyeDir = glm::normalize(-fragmentBuffer[index].eyePos); + float lambertian = imax(glm::dot(fragmentBuffer[index].eyeNor, lightDir), 0); + + specular *= pow(imax(glm::dot(glm::normalize(lightDir + eyeDir), fragmentBuffer[index].eyeNor), 0), 16.0f); + + glm::vec3 color = + ambientColor * 0.1f * lightColor + (diffuseTexture * lambertian + specular) * lightColor * lightPower; + + color = pow(color, glm::vec3(1.f / SCREENGAMMA)); + + framebuffer[index] = color; + } + else { + framebuffer[index] = fragmentBuffer[index].color; + } + + } + else { + framebuffer[index] = fragmentBuffer[index].color; + } } } /** - * Called once at the beginning of the program to allocate memory. - */ +* Called once at the beginning of the program to allocate memory. +*/ void rasterizeInit(int w, int h) { - width = w; - height = h; - cudaFree(dev_fragmentBuffer); - cudaMalloc(&dev_fragmentBuffer, width * height * sizeof(Fragment)); - cudaMemset(dev_fragmentBuffer, 0, width * height * sizeof(Fragment)); + width = w * SSAA_RES; + height = h * SSAA_RES; + screen_width = w; + screen_height = h; + cudaFree(dev_fragmentBuffer); + cudaMalloc(&dev_fragmentBuffer, width * height * sizeof(Fragment)); + cudaMemset(dev_fragmentBuffer, 0, width * height * sizeof(Fragment)); cudaFree(dev_framebuffer); - cudaMalloc(&dev_framebuffer, width * height * sizeof(glm::vec3)); + cudaMalloc(&dev_framebuffer, width * height * sizeof(glm::vec3)); cudaMemset(dev_framebuffer, 0, width * height * sizeof(glm::vec3)); - - cudaFree(dev_depth); - cudaMalloc(&dev_depth, width * height * sizeof(int)); - checkCUDAError("rasterizeInit"); + cudaFree(dev_depth); + cudaMalloc(&dev_depth, width * height * sizeof(int)); + + checkCUDAError("rasterizeInit"); } __global__ -void initDepth(int w, int h, int * depth) -{ - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < w && y < h) - { - int index = x + (y * w); - depth[index] = INT_MAX; - } +void initDepth(int w, int h, int *depth) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < w && y < h) { + int index = x + (y * w); + depth[index] = INT_MAX; + } } @@ -187,590 +287,867 @@ void initDepth(int w, int h, int * depth) * kern function with support for stride to sometimes replace cudaMemcpy * One thread is responsible for copying one component */ -__global__ -void _deviceBufferCopy(int N, BufferByte* dev_dst, const BufferByte* dev_src, int n, int byteStride, int byteOffset, int componentTypeByteSize) { - - // Attribute (vec3 position) - // component (3 * float) - // byte (4 * byte) - - // id of component - int i = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (i < N) { - int count = i / n; - int offset = i - count * n; // which component of the attribute - - for (int j = 0; j < componentTypeByteSize; j++) { - - dev_dst[count * componentTypeByteSize * n - + offset * componentTypeByteSize - + j] - - = - - dev_src[byteOffset - + count * (byteStride == 0 ? componentTypeByteSize * n : byteStride) - + offset * componentTypeByteSize - + j]; - } - } - +__global__ +void _deviceBufferCopy(int N, BufferByte *dev_dst, const BufferByte *dev_src, int n, int byteStride, int byteOffset, + int componentTypeByteSize) { + + // Attribute (vec3 position) + // component (3 * float) + // byte (4 * byte) + + // id of component + int i = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (i < N) { + int count = i / n; + int offset = i - count * n; // which component of the attribute + + for (int j = 0; j < componentTypeByteSize; j++) { + + dev_dst[count * componentTypeByteSize * n + + offset * componentTypeByteSize + + j] + + = + + dev_src[byteOffset + + count * (byteStride == 0 ? componentTypeByteSize * n : byteStride) + + offset * componentTypeByteSize + + j]; + } + } + } __global__ void _nodeMatrixTransform( - int numVertices, - VertexAttributePosition* position, - VertexAttributeNormal* normal, - glm::mat4 MV, glm::mat3 MV_normal) { - - // vertex id - int vid = (blockIdx.x * blockDim.x) + threadIdx.x; - if (vid < numVertices) { - position[vid] = glm::vec3(MV * glm::vec4(position[vid], 1.0f)); - normal[vid] = glm::normalize(MV_normal * normal[vid]); - } + int numVertices, + VertexAttributePosition *position, + VertexAttributeNormal *normal, + glm::mat4 MV, glm::mat3 MV_normal) { + + // vertex id + int vid = (blockIdx.x * blockDim.x) + threadIdx.x; + if (vid < numVertices) { + position[vid] = glm::vec3(MV * glm::vec4(position[vid], 1.0f)); + normal[vid] = glm::normalize(MV_normal * normal[vid]); + } } -glm::mat4 getMatrixFromNodeMatrixVector(const tinygltf::Node & n) { - - glm::mat4 curMatrix(1.0); - - const std::vector &m = n.matrix; - if (m.size() > 0) { - // matrix, copy it - - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - curMatrix[i][j] = (float)m.at(4 * i + j); - } - } - } else { - // no matrix, use rotation, scale, translation - - if (n.translation.size() > 0) { - curMatrix[3][0] = n.translation[0]; - curMatrix[3][1] = n.translation[1]; - curMatrix[3][2] = n.translation[2]; - } - - if (n.rotation.size() > 0) { - glm::mat4 R; - glm::quat q; - q[0] = n.rotation[0]; - q[1] = n.rotation[1]; - q[2] = n.rotation[2]; - - R = glm::mat4_cast(q); - curMatrix = curMatrix * R; - } - - if (n.scale.size() > 0) { - curMatrix = curMatrix * glm::scale(glm::vec3(n.scale[0], n.scale[1], n.scale[2])); - } - } - - return curMatrix; -} +glm::mat4 getMatrixFromNodeMatrixVector(const tinygltf::Node &n) { -void traverseNode ( - std::map & n2m, - const tinygltf::Scene & scene, - const std::string & nodeString, - const glm::mat4 & parentMatrix - ) -{ - const tinygltf::Node & n = scene.nodes.at(nodeString); - glm::mat4 M = parentMatrix * getMatrixFromNodeMatrixVector(n); - n2m.insert(std::pair(nodeString, M)); - - auto it = n.children.begin(); - auto itEnd = n.children.end(); - - for (; it != itEnd; ++it) { - traverseNode(n2m, scene, *it, M); - } -} + glm::mat4 curMatrix(1.0); -void rasterizeSetBuffers(const tinygltf::Scene & scene) { + const std::vector &m = n.matrix; + if (m.size() > 0) { + // matrix, copy it - totalNumPrimitives = 0; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + curMatrix[i][j] = (float)m.at(4 * i + j); + } + } + } + else { + // no matrix, use rotation, scale, translation + + if (n.translation.size() > 0) { + curMatrix[3][0] = n.translation[0]; + curMatrix[3][1] = n.translation[1]; + curMatrix[3][2] = n.translation[2]; + } + + if (n.rotation.size() > 0) { + glm::mat4 R; + glm::quat q; + q[0] = n.rotation[0]; + q[1] = n.rotation[1]; + q[2] = n.rotation[2]; + + R = glm::mat4_cast(q); + curMatrix = curMatrix * R; + } + + if (n.scale.size() > 0) { + curMatrix = curMatrix * glm::scale(glm::vec3(n.scale[0], n.scale[1], n.scale[2])); + } + } - std::map bufferViewDevPointers; + return curMatrix; +} - // 1. copy all `bufferViews` to device memory - { - std::map::const_iterator it( - scene.bufferViews.begin()); - std::map::const_iterator itEnd( - scene.bufferViews.end()); +void traverseNode( + std::map &n2m, + const tinygltf::Scene &scene, + const std::string &nodeString, + const glm::mat4 &parentMatrix +) { + const tinygltf::Node &n = scene.nodes.at(nodeString); + glm::mat4 M = parentMatrix * getMatrixFromNodeMatrixVector(n); + n2m.insert(std::pair(nodeString, M)); + + auto it = n.children.begin(); + auto itEnd = n.children.end(); + + for (; it != itEnd; ++it) { + traverseNode(n2m, scene, *it, M); + } +} - for (; it != itEnd; it++) { - const std::string key = it->first; - const tinygltf::BufferView &bufferView = it->second; - if (bufferView.target == 0) { - continue; // Unsupported bufferView. - } +void rasterizeSetBuffers(const tinygltf::Scene &scene) { - const tinygltf::Buffer &buffer = scene.buffers.at(bufferView.buffer); + totalNumPrimitives = 0; - BufferByte* dev_bufferView; - cudaMalloc(&dev_bufferView, bufferView.byteLength); - cudaMemcpy(dev_bufferView, &buffer.data.front() + bufferView.byteOffset, bufferView.byteLength, cudaMemcpyHostToDevice); + std::map < std::string, BufferByte * > bufferViewDevPointers; - checkCUDAError("Set BufferView Device Mem"); + // 1. copy all `bufferViews` to device memory + { + std::map::const_iterator it( + scene.bufferViews.begin()); + std::map::const_iterator itEnd( + scene.bufferViews.end()); - bufferViewDevPointers.insert(std::make_pair(key, dev_bufferView)); + for (; it != itEnd; it++) { + const std::string key = it->first; + const tinygltf::BufferView &bufferView = it->second; + if (bufferView.target == 0) { + continue; // Unsupported bufferView. + } - } - } + const tinygltf::Buffer &buffer = scene.buffers.at(bufferView.buffer); + BufferByte *dev_bufferView; + cudaMalloc(&dev_bufferView, bufferView.byteLength); + cudaMemcpy(dev_bufferView, &buffer.data.front() + bufferView.byteOffset, bufferView.byteLength, + cudaMemcpyHostToDevice); + checkCUDAError("Set BufferView Device Mem"); - // 2. for each mesh: - // for each primitive: - // build device buffer of indices, materail, and each attributes - // and store these pointers in a map - { + bufferViewDevPointers.insert(std::make_pair(key, dev_bufferView)); - std::map nodeString2Matrix; - auto rootNodeNamesList = scene.scenes.at(scene.defaultScene); - - { - auto it = rootNodeNamesList.begin(); - auto itEnd = rootNodeNamesList.end(); - for (; it != itEnd; ++it) { - traverseNode(nodeString2Matrix, scene, *it, glm::mat4(1.0f)); - } - } + } + } - // parse through node to access mesh - auto itNode = nodeString2Matrix.begin(); - auto itEndNode = nodeString2Matrix.end(); - for (; itNode != itEndNode; ++itNode) { - - const tinygltf::Node & N = scene.nodes.at(itNode->first); - const glm::mat4 & matrix = itNode->second; - const glm::mat3 & matrixNormal = glm::transpose(glm::inverse(glm::mat3(matrix))); - - auto itMeshName = N.meshes.begin(); - auto itEndMeshName = N.meshes.end(); - - for (; itMeshName != itEndMeshName; ++itMeshName) { - - const tinygltf::Mesh & mesh = scene.meshes.at(*itMeshName); - - auto res = mesh2PrimitivesMap.insert(std::pair>(mesh.name, std::vector())); - std::vector & primitiveVector = (res.first)->second; - - // for each primitive - for (size_t i = 0; i < mesh.primitives.size(); i++) { - const tinygltf::Primitive &primitive = mesh.primitives[i]; - - if (primitive.indices.empty()) - return; - - // TODO: add new attributes for your PrimitiveDevBufPointers when you add new attributes - VertexIndex* dev_indices = NULL; - VertexAttributePosition* dev_position = NULL; - VertexAttributeNormal* dev_normal = NULL; - VertexAttributeTexcoord* dev_texcoord0 = NULL; - - // ----------Indices------------- - - const tinygltf::Accessor &indexAccessor = scene.accessors.at(primitive.indices); - const tinygltf::BufferView &bufferView = scene.bufferViews.at(indexAccessor.bufferView); - BufferByte* dev_bufferView = bufferViewDevPointers.at(indexAccessor.bufferView); - - // assume type is SCALAR for indices - int n = 1; - int numIndices = indexAccessor.count; - int componentTypeByteSize = sizeof(VertexIndex); - int byteLength = numIndices * n * componentTypeByteSize; - - dim3 numThreadsPerBlock(128); - dim3 numBlocks((numIndices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); - cudaMalloc(&dev_indices, byteLength); - _deviceBufferCopy << > > ( - numIndices, - (BufferByte*)dev_indices, - dev_bufferView, - n, - indexAccessor.byteStride, - indexAccessor.byteOffset, - componentTypeByteSize); - - - checkCUDAError("Set Index Buffer"); - - - // ---------Primitive Info------- - - // Warning: LINE_STRIP is not supported in tinygltfloader - int numPrimitives; - PrimitiveType primitiveType; - switch (primitive.mode) { - case TINYGLTF_MODE_TRIANGLES: - primitiveType = PrimitiveType::Triangle; - numPrimitives = numIndices / 3; - break; - case TINYGLTF_MODE_TRIANGLE_STRIP: - primitiveType = PrimitiveType::Triangle; - numPrimitives = numIndices - 2; - break; - case TINYGLTF_MODE_TRIANGLE_FAN: - primitiveType = PrimitiveType::Triangle; - numPrimitives = numIndices - 2; - break; - case TINYGLTF_MODE_LINE: - primitiveType = PrimitiveType::Line; - numPrimitives = numIndices / 2; - break; - case TINYGLTF_MODE_LINE_LOOP: - primitiveType = PrimitiveType::Line; - numPrimitives = numIndices + 1; - break; - case TINYGLTF_MODE_POINTS: - primitiveType = PrimitiveType::Point; - numPrimitives = numIndices; - break; - default: - // output error - break; - }; - - - // ----------Attributes------------- - - auto it(primitive.attributes.begin()); - auto itEnd(primitive.attributes.end()); - - int numVertices = 0; - // for each attribute - for (; it != itEnd; it++) { - const tinygltf::Accessor &accessor = scene.accessors.at(it->second); - const tinygltf::BufferView &bufferView = scene.bufferViews.at(accessor.bufferView); - - int n = 1; - if (accessor.type == TINYGLTF_TYPE_SCALAR) { - n = 1; - } - else if (accessor.type == TINYGLTF_TYPE_VEC2) { - n = 2; - } - else if (accessor.type == TINYGLTF_TYPE_VEC3) { - n = 3; - } - else if (accessor.type == TINYGLTF_TYPE_VEC4) { - n = 4; - } + // 2. for each mesh: + // for each primitive: + // build device buffer of indices, materail, and each attributes + // and store these pointers in a map + { + + std::map nodeString2Matrix; + auto rootNodeNamesList = scene.scenes.at(scene.defaultScene); + + { + auto it = rootNodeNamesList.begin(); + auto itEnd = rootNodeNamesList.end(); + for (; it != itEnd; ++it) { + traverseNode(nodeString2Matrix, scene, *it, glm::mat4(1.0f)); + } + } + + + // parse through node to access mesh + + auto itNode = nodeString2Matrix.begin(); + auto itEndNode = nodeString2Matrix.end(); + for (; itNode != itEndNode; ++itNode) { + + const tinygltf::Node &N = scene.nodes.at(itNode->first); + const glm::mat4 &matrix = itNode->second; + const glm::mat3 &matrixNormal = glm::transpose(glm::inverse(glm::mat3(matrix))); + + auto itMeshName = N.meshes.begin(); + auto itEndMeshName = N.meshes.end(); + + for (; itMeshName != itEndMeshName; ++itMeshName) { + + const tinygltf::Mesh &mesh = scene.meshes.at(*itMeshName); + + auto res = mesh2PrimitivesMap.insert(std::pair < std::string, std::vector < PrimitiveDevBufPointers + >> (mesh.name, std::vector())); + std::vector &primitiveVector = (res.first)->second; + + // for each primitive + for (size_t i = 0; i < mesh.primitives.size(); i++) { + const tinygltf::Primitive &primitive = mesh.primitives[i]; + + if (primitive.indices.empty()) + return; + + // TODO: add new attributes for your PrimitiveDevBufPointers when you add new attributes + VertexIndex *dev_indices = NULL; + VertexAttributePosition *dev_position = NULL; + VertexAttributeNormal *dev_normal = NULL; + VertexAttributeTexcoord *dev_texcoord0 = NULL; + + // ----------Indices------------- + + const tinygltf::Accessor &indexAccessor = scene.accessors.at(primitive.indices); + const tinygltf::BufferView &bufferView = scene.bufferViews.at(indexAccessor.bufferView); + BufferByte *dev_bufferView = bufferViewDevPointers.at(indexAccessor.bufferView); + + // assume type is SCALAR for indices + int n = 1; + int numIndices = indexAccessor.count; + int componentTypeByteSize = sizeof(VertexIndex); + int byteLength = numIndices * n * componentTypeByteSize; + + dim3 numThreadsPerBlock(128); + dim3 numBlocks((numIndices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); + cudaMalloc(&dev_indices, byteLength); + _deviceBufferCopy << < numBlocks, numThreadsPerBlock >> > ( + numIndices, + (BufferByte *)dev_indices, + dev_bufferView, + n, + indexAccessor.byteStride, + indexAccessor.byteOffset, + componentTypeByteSize); + + + checkCUDAError("Set Index Buffer"); + + + // ---------Primitive Info------- + + // Warning: LINE_STRIP is not supported in tinygltfloader + int numPrimitives; + PrimitiveType primitiveType; + switch (primitive.mode) { + case TINYGLTF_MODE_TRIANGLES: + primitiveType = PrimitiveType::Triangle; + numPrimitives = numIndices / 3; + break; + case TINYGLTF_MODE_TRIANGLE_STRIP: + primitiveType = PrimitiveType::Triangle; + numPrimitives = numIndices - 2; + break; + case TINYGLTF_MODE_TRIANGLE_FAN: + primitiveType = PrimitiveType::Triangle; + numPrimitives = numIndices - 2; + break; + case TINYGLTF_MODE_LINE: + primitiveType = PrimitiveType::Line; + numPrimitives = numIndices / 2; + break; + case TINYGLTF_MODE_LINE_LOOP: + primitiveType = PrimitiveType::Line; + numPrimitives = numIndices + 1; + break; + case TINYGLTF_MODE_POINTS: + primitiveType = PrimitiveType::Point; + numPrimitives = numIndices; + break; + default: + // output error + break; + }; + + + // ----------Attributes------------- + + auto it(primitive.attributes.begin()); + auto itEnd(primitive.attributes.end()); + + int numVertices = 0; + // for each attribute + for (; it != itEnd; it++) { + const tinygltf::Accessor &accessor = scene.accessors.at(it->second); + const tinygltf::BufferView &bufferView = scene.bufferViews.at(accessor.bufferView); + + int n = 1; + if (accessor.type == TINYGLTF_TYPE_SCALAR) { + n = 1; + } + else if (accessor.type == TINYGLTF_TYPE_VEC2) { + n = 2; + } + else if (accessor.type == TINYGLTF_TYPE_VEC3) { + n = 3; + } + else if (accessor.type == TINYGLTF_TYPE_VEC4) { + n = 4; + } + + BufferByte *dev_bufferView = bufferViewDevPointers.at(accessor.bufferView); + BufferByte **dev_attribute = NULL; + + numVertices = accessor.count; + int componentTypeByteSize; + + // Note: since the type of our attribute array (dev_position) is static (float32) + // We assume the glTF model attribute type are 5126(FLOAT) here + + if (it->first.compare("POSITION") == 0) { + componentTypeByteSize = sizeof(VertexAttributePosition) / n; + dev_attribute = (BufferByte **)&dev_position; + } + else if (it->first.compare("NORMAL") == 0) { + componentTypeByteSize = sizeof(VertexAttributeNormal) / n; + dev_attribute = (BufferByte **)&dev_normal; + } + else if (it->first.compare("TEXCOORD_0") == 0) { + componentTypeByteSize = sizeof(VertexAttributeTexcoord) / n; + dev_attribute = (BufferByte **)&dev_texcoord0; + } + + std::cout << accessor.bufferView << " - " << it->second << " - " << it->first << '\n'; + + dim3 numThreadsPerBlock(128); + dim3 numBlocks((n * numVertices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); + int byteLength = numVertices * n * componentTypeByteSize; + cudaMalloc(dev_attribute, byteLength); + + _deviceBufferCopy << < numBlocks, numThreadsPerBlock >> > ( + n * numVertices, + *dev_attribute, + dev_bufferView, + n, + accessor.byteStride, + accessor.byteOffset, + componentTypeByteSize); + + std::string msg = "Set Attribute Buffer: " + it->first; + checkCUDAError(msg.c_str()); + } + + // malloc for VertexOut + VertexOut *dev_vertexOut; + cudaMalloc(&dev_vertexOut, numVertices * sizeof(VertexOut)); + checkCUDAError("Malloc VertexOut Buffer"); + + // ----------Materials------------- + + // You can only worry about this part once you started to + // implement textures for your rasterizer + TextureData *dev_diffuseTex = NULL; + int diffuseTexWidth = 0; + int diffuseTexHeight = 0; + if (!primitive.material.empty()) { + const tinygltf::Material &mat = scene.materials.at(primitive.material); + printf("material.name = %s\n", mat.name.c_str()); + + if (mat.values.find("diffuse") != mat.values.end()) { + std::string diffuseTexName = mat.values.at("diffuse").string_value; + if (scene.textures.find(diffuseTexName) != scene.textures.end()) { + const tinygltf::Texture &tex = scene.textures.at(diffuseTexName); + if (scene.images.find(tex.source) != scene.images.end()) { + const tinygltf::Image &image = scene.images.at(tex.source); + + size_t s = image.image.size() * sizeof(TextureData); + cudaMalloc(&dev_diffuseTex, s); + cudaMemcpy(dev_diffuseTex, &image.image.at(0), s, cudaMemcpyHostToDevice); + + diffuseTexWidth = image.width; + diffuseTexHeight = image.height; + + checkCUDAError("Set Texture Image data"); + } + } + } + + // TODO: write your code for other materails + // You may have to take a look at tinygltfloader + // You can also use the above code loading diffuse material as a start point + } + + + // ---------Node hierarchy transform-------- + cudaDeviceSynchronize(); + + dim3 numBlocksNodeTransform((numVertices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); + _nodeMatrixTransform << < numBlocksNodeTransform, numThreadsPerBlock >> > ( + numVertices, + dev_position, + dev_normal, + matrix, + matrixNormal); + + checkCUDAError("Node hierarchy transformation"); + + // at the end of the for loop of primitive + // push dev pointers to map + primitiveVector.push_back(PrimitiveDevBufPointers{ + primitive.mode, + primitiveType, + numPrimitives, + numIndices, + numVertices, + + dev_indices, + dev_position, + dev_normal, + dev_texcoord0, + + dev_diffuseTex, + diffuseTexWidth, + diffuseTexHeight, + + dev_vertexOut //VertexOut + }); + + totalNumPrimitives += numPrimitives; + + } // for each primitive + + } // for each mesh + + } // for each node - BufferByte * dev_bufferView = bufferViewDevPointers.at(accessor.bufferView); - BufferByte ** dev_attribute = NULL; + } - numVertices = accessor.count; - int componentTypeByteSize; - // Note: since the type of our attribute array (dev_position) is static (float32) - // We assume the glTF model attribute type are 5126(FLOAT) here + // 3. Malloc for dev_primitives + { + cudaMalloc(&dev_primitives, totalNumPrimitives * sizeof(Primitive)); + } - if (it->first.compare("POSITION") == 0) { - componentTypeByteSize = sizeof(VertexAttributePosition) / n; - dev_attribute = (BufferByte**)&dev_position; - } - else if (it->first.compare("NORMAL") == 0) { - componentTypeByteSize = sizeof(VertexAttributeNormal) / n; - dev_attribute = (BufferByte**)&dev_normal; - } - else if (it->first.compare("TEXCOORD_0") == 0) { - componentTypeByteSize = sizeof(VertexAttributeTexcoord) / n; - dev_attribute = (BufferByte**)&dev_texcoord0; - } - std::cout << accessor.bufferView << " - " << it->second << " - " << it->first << '\n'; - - dim3 numThreadsPerBlock(128); - dim3 numBlocks((n * numVertices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); - int byteLength = numVertices * n * componentTypeByteSize; - cudaMalloc(dev_attribute, byteLength); - - _deviceBufferCopy << > > ( - n * numVertices, - *dev_attribute, - dev_bufferView, - n, - accessor.byteStride, - accessor.byteOffset, - componentTypeByteSize); - - std::string msg = "Set Attribute Buffer: " + it->first; - checkCUDAError(msg.c_str()); - } - - // malloc for VertexOut - VertexOut* dev_vertexOut; - cudaMalloc(&dev_vertexOut, numVertices * sizeof(VertexOut)); - checkCUDAError("Malloc VertexOut Buffer"); - - // ----------Materials------------- - - // You can only worry about this part once you started to - // implement textures for your rasterizer - TextureData* dev_diffuseTex = NULL; - int diffuseTexWidth = 0; - int diffuseTexHeight = 0; - if (!primitive.material.empty()) { - const tinygltf::Material &mat = scene.materials.at(primitive.material); - printf("material.name = %s\n", mat.name.c_str()); - - if (mat.values.find("diffuse") != mat.values.end()) { - std::string diffuseTexName = mat.values.at("diffuse").string_value; - if (scene.textures.find(diffuseTexName) != scene.textures.end()) { - const tinygltf::Texture &tex = scene.textures.at(diffuseTexName); - if (scene.images.find(tex.source) != scene.images.end()) { - const tinygltf::Image &image = scene.images.at(tex.source); - - size_t s = image.image.size() * sizeof(TextureData); - cudaMalloc(&dev_diffuseTex, s); - cudaMemcpy(dev_diffuseTex, &image.image.at(0), s, cudaMemcpyHostToDevice); - - diffuseTexWidth = image.width; - diffuseTexHeight = image.height; - - checkCUDAError("Set Texture Image data"); - } - } - } + // Finally, cudaFree raw dev_bufferViews + { - // TODO: write your code for other materails - // You may have to take a look at tinygltfloader - // You can also use the above code loading diffuse material as a start point - } + std::map::const_iterator it(bufferViewDevPointers.begin()); + std::map::const_iterator itEnd(bufferViewDevPointers.end()); + //bufferViewDevPointers - // ---------Node hierarchy transform-------- - cudaDeviceSynchronize(); - - dim3 numBlocksNodeTransform((numVertices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); - _nodeMatrixTransform << > > ( - numVertices, - dev_position, - dev_normal, - matrix, - matrixNormal); + for (; it != itEnd; it++) { + cudaFree(it->second); + } - checkCUDAError("Node hierarchy transformation"); + checkCUDAError("Free BufferView Device Mem"); + } - // at the end of the for loop of primitive - // push dev pointers to map - primitiveVector.push_back(PrimitiveDevBufPointers{ - primitive.mode, - primitiveType, - numPrimitives, - numIndices, - numVertices, - dev_indices, - dev_position, - dev_normal, - dev_texcoord0, +} - dev_diffuseTex, - diffuseTexWidth, - diffuseTexHeight, - dev_vertexOut //VertexOut - }); +__global__ +void _vertexTransformAndAssembly( + int numVertices, + PrimitiveDevBufPointers primitive, + glm::mat4 MVP, glm::mat4 MV, glm::mat3 MV_normal, + int width, int height) { + + // vertex id + int vid = (blockIdx.x * blockDim.x) + threadIdx.x; + if (vid < numVertices) { + + // TODO: Apply vertex transformation here + // Multiply the MVP matrix for each vertex position, this will transform everything into clipping space + glm::vec4 pos = MVP * glm::vec4(primitive.dev_position[vid], 1.0f); + // Then divide the pos by its w element to transform into NDC space + pos /= pos.w; + // Finally transform x and y to viewport space + pos.x = (float)width * (1.f - pos.x) / 2.f; + pos.y = (float)height * (1.f - pos.y) / 2.f; + // TODO: Apply vertex assembly here + // Assemble all attribute arraies into the primitive array + primitive.dev_verticesOut[vid].pos = pos; + primitive.dev_verticesOut[vid].eyePos = glm::vec3(MV * glm::vec4(primitive.dev_position[vid], 1.0f)); + primitive.dev_verticesOut[vid].eyeNor = glm::normalize(MV_normal * primitive.dev_normal[vid]); + + if (primitive.dev_diffuseTex != NULL) { + primitive.dev_verticesOut[vid].dev_diffuseTex = primitive.dev_diffuseTex; + primitive.dev_verticesOut[vid].texcoord0 = primitive.dev_texcoord0[vid]; + primitive.dev_verticesOut[vid].texHeight = primitive.diffuseTexHeight; + primitive.dev_verticesOut[vid].texWidth = primitive.diffuseTexWidth; + + } + else { + primitive.dev_verticesOut[vid].dev_diffuseTex = NULL; + + } + } +} - totalNumPrimitives += numPrimitives; - } // for each primitive +static int curPrimitiveBeginId = 0; - } // for each mesh +__global__ +void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive *dev_primitives, + PrimitiveDevBufPointers primitive) { - } // for each node + // index id + int iid = (blockIdx.x * blockDim.x) + threadIdx.x; - } - + if (iid < numIndices) { - // 3. Malloc for dev_primitives - { - cudaMalloc(&dev_primitives, totalNumPrimitives * sizeof(Primitive)); - } - + // TODO: uncomment the following code for a start + // This is primitive assembly for triangles - // Finally, cudaFree raw dev_bufferViews - { + int pid; // id for cur primitives vector + if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) { + pid = iid / (int)primitive.primitiveType; + dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType] + = primitive.dev_verticesOut[primitive.dev_indices[iid]]; + dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType].col + = glm::vec3(0.9f); + } - std::map::const_iterator it(bufferViewDevPointers.begin()); - std::map::const_iterator itEnd(bufferViewDevPointers.end()); - - //bufferViewDevPointers - for (; it != itEnd; it++) { - cudaFree(it->second); - } + // TODO: other primitive types (point, line) + } - checkCUDAError("Free BufferView Device Mem"); - } +} +struct primitive_culling { + __host__ __device__ + bool operator()(const Primitive &p) { + return p.cull; + } +}; + +// wikipedia back-face culling +__global__ void _backfaceCulling(const int numPrims, Primitive *dev_primitives) { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index < numPrims) { + glm::vec3 v1 = dev_primitives[index].v[1].eyePos - dev_primitives[index].v[0].eyePos; + glm::vec3 v2 = dev_primitives[index].v[2].eyePos - dev_primitives[index].v[0].eyePos; + glm::vec3 normal = glm::cross(v1, v2); + dev_primitives[index].cull = normal.z >= 0.f; + } } +// rasterization for points and lines +// reference: http://www.cs.cornell.edu/courses/cs4620/2010fa/lectures/07pipeline.pdf +__global__ void _rasterizePoint(const int numPrims, const int height, const int width, + Primitive *dev_primitives, Fragment *dev_fragment) { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index < numPrims) { + Primitive curr_prim = dev_primitives[index]; + glm::vec3 tri[3] = { glm::vec3(curr_prim.v[0].pos), + glm::vec3(curr_prim.v[1].pos), glm::vec3(curr_prim.v[2].pos) }; + for (int i = 0; i < 3; i++) { + int x = (int)tri[i].x; + int y = (int)tri[i].y; + if (x >= 0 && x < width && y >= 0 && y < height) { + int pixel = x + y * width; + dev_fragment[pixel].color = glm::vec3(0.4f, 0.8f, 0.4f); + } + } + } - -__global__ -void _vertexTransformAndAssembly( - int numVertices, - PrimitiveDevBufPointers primitive, - glm::mat4 MVP, glm::mat4 MV, glm::mat3 MV_normal, - int width, int height) { - - // vertex id - int vid = (blockIdx.x * blockDim.x) + threadIdx.x; - if (vid < numVertices) { - - // TODO: Apply vertex transformation here - // Multiply the MVP matrix for each vertex position, this will transform everything into clipping space - // Then divide the pos by its w element to transform into NDC space - // Finally transform x and y to viewport space - - // TODO: Apply vertex assembly here - // Assemble all attribute arraies into the primitive array - - } } +__host__ __device__ +void _bresenham(glm::vec3 pt1, glm::vec3 pt2, const int height, const int width, + Fragment *dev_fragment) { + float x1 = glm::clamp(pt1[0], 0.f, (float)(width - 1)); + float x2 = glm::clamp(pt2[0], 0.f, (float)(width - 1)); + float y1 = glm::clamp(pt1[1], 0.f, (float)(height - 1)); + float y2 = glm::clamp(pt2[1], 0.f, (float)(height - 1)); + const glm::vec3 color(0.8f, 0.8f, 0.8f); -static int curPrimitiveBeginId = 0; - -__global__ -void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_primitives, PrimitiveDevBufPointers primitive) { + bool swapped = (fabs(x2 - x1) < fabs(y2 - y1)); - // index id - int iid = (blockIdx.x * blockDim.x) + threadIdx.x; + if (swapped) { + swap(x1, y1); + swap(x2, y2); + } - if (iid < numIndices) { + if (x1 > x2) { + swap(x1, x2); + swap(y1, y2); + } - // TODO: uncomment the following code for a start - // This is primitive assembly for triangles + const float dx = x2 - x1; + const float dy = fabs(y2 - y1); + + float err = dx / 2.0f; + const int step_size = (y1 < y2) ? 1 : -1; + int y = (int)y1; + + int idx; + + for (int x = (int)x1; x < (int)x2; x++) { + if (swapped) { + idx = y + x * width; + dev_fragment[idx].color = color; + } + else { + idx = x + y * width; + dev_fragment[idx].color = color; + } + err -= dy; + if (err < 0) { + y += step_size; + err += dx; + } + } - //int pid; // id for cur primitives vector - //if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) { - // pid = iid / (int)primitive.primitiveType; - // dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType] - // = primitive.dev_verticesOut[primitive.dev_indices[iid]]; - //} +} +__global__ void _rasterizeLine(const int numPrims, const int height, const int width, + Primitive *dev_primitives, Fragment *dev_fragment) { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index < numPrims) { + Primitive curr_prim = dev_primitives[index]; + glm::vec3 tri[3] = { glm::vec3(curr_prim.v[0].pos), + glm::vec3(curr_prim.v[1].pos), glm::vec3(curr_prim.v[2].pos) }; + + _bresenham(tri[0], tri[1], height, width, dev_fragment); + _bresenham(tri[0], tri[2], height, width, dev_fragment); + _bresenham(tri[1], tri[2], height, width, dev_fragment); + } - // TODO: other primitive types (point, line) - } - } +__global__ void _rasterizeTraingle(const int numPrims, const int height, const int width, + Primitive *dev_primitives, int *dev_depth, Fragment *dev_fragment) { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index < numPrims) { + Primitive curr_prim = dev_primitives[index]; + glm::vec3 tri[3] = { glm::vec3(curr_prim.v[0].pos), + glm::vec3(curr_prim.v[1].pos), glm::vec3(curr_prim.v[2].pos) }; + + AABB curr_box = getAABBForTriangle(tri); + + /* reference: cis 460 slides + * scratchpixel.com perspective correct interpolation vertex attributes & wikipedia texture mapping + */ + for (int i = imax(curr_box.min.x, 0); i < imin(curr_box.max.x, width); i++) { + for (int j = imax(curr_box.min.y, 0); j < imin(curr_box.max.y, height); j++) { + glm::vec3 bary_coord = calculateBarycentricCoordinate(tri, glm::vec2(i, j)); + int pixel = j * width + i;// huge bug here omg! + if (isBarycentricCoordInBounds(bary_coord)) { + // use color with smallest z-coordinate + int depth = static_cast(glm::clamp(-getZAtCoordinate(bary_coord, tri), -1.f, 1.f) * INT_MAX); + atomicMin(&dev_depth[pixel], depth); + + if (depth == dev_depth[pixel]) { + dev_fragment[pixel].eyeNor = glm::normalize(bary_coord.x * curr_prim.v[0].eyeNor + + bary_coord.y * curr_prim.v[1].eyeNor + + bary_coord.z * curr_prim.v[2].eyeNor); + dev_fragment[pixel].color = bary_coord.x * curr_prim.v[0].col + + bary_coord.y * curr_prim.v[1].col + + bary_coord.z * curr_prim.v[2].col; + + dev_fragment[pixel].eyePos = bary_coord.x * curr_prim.v[0].eyePos + + bary_coord.y * curr_prim.v[1].eyePos + + bary_coord.z * curr_prim.v[2].eyePos; + + +#if CORRECT_PROSPECTIVE + glm::vec3 eyePosition[3] = { glm::vec3(curr_prim.v[0].eyePos), + glm::vec3(curr_prim.v[1].eyePos), glm::vec3(curr_prim.v[2].eyePos) }; + + float z = computeOneOverZ(bary_coord, eyePosition); + + glm::vec3 uv[3] = { glm::vec3(curr_prim.v[0].texcoord0, 0.f), + glm::vec3(curr_prim.v[1].texcoord0, 0.f), + glm::vec3(curr_prim.v[2].texcoord0, 0.f) }; + + if (curr_prim.v[0].dev_diffuseTex != NULL) { + dev_fragment[pixel].dev_diffuseTex = curr_prim.v[0].dev_diffuseTex; + dev_fragment[pixel].texHeight = curr_prim.v[0].texHeight; + dev_fragment[pixel].texWidth = curr_prim.v[0].texWidth; + dev_fragment[pixel].texcoord0 = glm::vec2( + correctCoordPerspective(z, bary_coord, eyePosition, uv)); + } + else { + dev_fragment[pixel].dev_diffuseTex = NULL; + } + +#else + if (curr_prim.v[0].dev_diffuseTex != NULL) { + dev_fragment[pixel].dev_diffuseTex = curr_prim.v[0].dev_diffuseTex; + dev_fragment[pixel].texHeight = curr_prim.v[0].texHeight; + dev_fragment[pixel].texWidth = curr_prim.v[0].texWidth; + dev_fragment[pixel].texcoord0 = bary_coord.x * curr_prim.v[0].texcoord0 + + bary_coord.y * curr_prim.v[1].texcoord0 + + bary_coord.z * curr_prim.v[2].texcoord0; + } + else { + dev_fragment[pixel].dev_diffuseTex = NULL; + } +#endif + } + + } + } + } + } +} + /** - * Perform rasterization. - */ -void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal) { +* Perform rasterization. +*/ +void rasterize(uchar4 *pbo, const glm::mat4 &MVP, const glm::mat4 &MV, const glm::mat3 MV_normal, int primitive_type) { int sideLength2d = 8; dim3 blockSize2d(sideLength2d, sideLength2d); - dim3 blockCount2d((width - 1) / blockSize2d.x + 1, - (height - 1) / blockSize2d.y + 1); - - // Execute your rasterization pipeline here - // (See README for rasterization pipeline outline.) - - // Vertex Process & primitive assembly - { - curPrimitiveBeginId = 0; - dim3 numThreadsPerBlock(128); - - auto it = mesh2PrimitivesMap.begin(); - auto itEnd = mesh2PrimitivesMap.end(); - - for (; it != itEnd; ++it) { - auto p = (it->second).begin(); // each primitive - auto pEnd = (it->second).end(); - for (; p != pEnd; ++p) { - dim3 numBlocksForVertices((p->numVertices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); - dim3 numBlocksForIndices((p->numIndices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); - - _vertexTransformAndAssembly << < numBlocksForVertices, numThreadsPerBlock >> >(p->numVertices, *p, MVP, MV, MV_normal, width, height); - checkCUDAError("Vertex Processing"); - cudaDeviceSynchronize(); - _primitiveAssembly << < numBlocksForIndices, numThreadsPerBlock >> > - (p->numIndices, - curPrimitiveBeginId, - dev_primitives, - *p); - checkCUDAError("Primitive Assembly"); - - curPrimitiveBeginId += p->numPrimitives; - } - } - - checkCUDAError("Vertex Processing and Primitive Assembly"); - } - - cudaMemset(dev_fragmentBuffer, 0, width * height * sizeof(Fragment)); - initDepth << > >(width, height, dev_depth); - - // TODO: rasterize + dim3 blockCount2d((width - 1) / blockSize2d.x + 1, + (height - 1) / blockSize2d.y + 1); + + // Execute your rasterization pipeline here + // (See README for rasterization pipeline outline.) + cudaEventCreate(&start); + cudaEventCreate(&stop); + float miliseconds = 0; + + // Vertex Process & primitive assembly + dim3 numThreadsPerBlock(128); + { + curPrimitiveBeginId = 0; + + + auto it = mesh2PrimitivesMap.begin(); + auto itEnd = mesh2PrimitivesMap.end(); + + for (; it != itEnd; ++it) { + auto p = (it->second).begin(); // each primitive + auto pEnd = (it->second).end(); + for (; p != pEnd; ++p) { + dim3 numBlocksForVertices((p->numVertices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); + dim3 numBlocksForIndices((p->numIndices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); + + cudaEventRecord(start); + _vertexTransformAndAssembly << < numBlocksForVertices, numThreadsPerBlock >> > + (p->numVertices, *p, MVP, MV, MV_normal, width, height); + checkCUDAError("Vertex Processing"); + cudaDeviceSynchronize(); + + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); + + cudaEventRecord(start); + _primitiveAssembly << < numBlocksForIndices, numThreadsPerBlock >> > + (p->numIndices, + curPrimitiveBeginId, + dev_primitives, + *p); + checkCUDAError("Primitive Assembly"); + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); + + curPrimitiveBeginId += p->numPrimitives; + } + } + + checkCUDAError("Vertex Processing and Primitive Assembly"); + } + cudaMemset(dev_fragmentBuffer, 0, width * height * sizeof(Fragment)); + + cudaEventRecord(start); + initDepth << < blockCount2d, blockSize2d >> > (width, height, dev_depth); + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); + + // backface culling + dim3 numBlocksForPrimitives; +#if BACKFACE_CULLING + numBlocksForPrimitives = dim3((curPrimitiveBeginId + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); + + cudaEventRecord(start); + _backfaceCulling << < numBlocksForPrimitives, numThreadsPerBlock >> > (curPrimitiveBeginId, dev_primitives); + Primitive *culled_primitives = thrust::partition(thrust::device, dev_primitives, + dev_primitives + curPrimitiveBeginId, primitive_culling()); + checkCUDAError("BackFace culling error"); + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); + curPrimitiveBeginId = (int)(culled_primitives - dev_primitives); +#endif + + // TODO: rasterize + numBlocksForPrimitives = dim3((curPrimitiveBeginId + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); + + if (primitive_type == 1) { + cudaEventRecord(start); + _rasterizePoint << < numBlocksForPrimitives, numThreadsPerBlock >> > (curPrimitiveBeginId, height, width, + dev_primitives, dev_fragmentBuffer); + checkCUDAError("Point rasterization error"); + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); + } + else if (primitive_type == 2) { + cudaEventRecord(start); + _rasterizeLine << < numBlocksForPrimitives, numThreadsPerBlock >> > (curPrimitiveBeginId, height, width, + dev_primitives, dev_fragmentBuffer); + checkCUDAError("Line rasterization error"); + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); + } + + else { + cudaEventRecord(start); + _rasterizeTraingle << < numBlocksForPrimitives, numThreadsPerBlock >> > (curPrimitiveBeginId, height, width, + dev_primitives, dev_depth, dev_fragmentBuffer); + checkCUDAError("Traingle rasterization error"); + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); + } // Copy depthbuffer colors into framebuffer - render << > >(width, height, dev_fragmentBuffer, dev_framebuffer); - checkCUDAError("fragment shader"); + cudaEventRecord(start); + render << < blockCount2d, blockSize2d >> > (width, height, dev_fragmentBuffer, dev_framebuffer, primitive_type); + checkCUDAError("fragment shader"); + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); + + // Copy framebuffer into OpenGL buffer for OpenGL previewing - sendImageToPBO<<>>(pbo, width, height, dev_framebuffer); + cudaEventRecord(start); + sendImageToPBO << < blockCount2d, blockSize2d >> > (pbo, screen_width, screen_height, dev_framebuffer); checkCUDAError("copy render result to pbo"); + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); } /** - * Called once at the end of the program to free CUDA memory. - */ +* Called once at the end of the program to free CUDA memory. +*/ void rasterizeFree() { // deconstruct primitives attribute/indices device buffer - auto it(mesh2PrimitivesMap.begin()); - auto itEnd(mesh2PrimitivesMap.end()); - for (; it != itEnd; ++it) { - for (auto p = it->second.begin(); p != it->second.end(); ++p) { - cudaFree(p->dev_indices); - cudaFree(p->dev_position); - cudaFree(p->dev_normal); - cudaFree(p->dev_texcoord0); - cudaFree(p->dev_diffuseTex); + auto it(mesh2PrimitivesMap.begin()); + auto itEnd(mesh2PrimitivesMap.end()); + for (; it != itEnd; ++it) { + for (auto p = it->second.begin(); p != it->second.end(); ++p) { + cudaFree(p->dev_indices); + cudaFree(p->dev_position); + cudaFree(p->dev_normal); + cudaFree(p->dev_texcoord0); + cudaFree(p->dev_diffuseTex); - cudaFree(p->dev_verticesOut); + cudaFree(p->dev_verticesOut); - - //TODO: release other attributes and materials - } - } - //////////// + //TODO: release other attributes and materials + } + } + + //////////// cudaFree(dev_primitives); dev_primitives = NULL; - cudaFree(dev_fragmentBuffer); - dev_fragmentBuffer = NULL; + cudaFree(dev_fragmentBuffer); + dev_fragmentBuffer = NULL; cudaFree(dev_framebuffer); dev_framebuffer = NULL; - cudaFree(dev_depth); - dev_depth = NULL; + cudaFree(dev_depth); + dev_depth = NULL; checkCUDAError("rasterize Free"); -} +} \ No newline at end of file diff --git a/src/rasterize.h b/src/rasterize.h index 560aae9..ee36479 100644 --- a/src/rasterize.h +++ b/src/rasterize.h @@ -1,10 +1,10 @@ /** - * @file rasterize.h - * @brief CUDA-accelerated rasterization pipeline. - * @authors Skeleton code: Yining Karl Li, Kai Ninomiya, Shuai Shao (Shrek) - * @date 2012-2016 - * @copyright University of Pennsylvania & STUDENT - */ +* @file rasterize.h +* @brief CUDA-accelerated rasterization pipeline. +* @authors Skeleton code: Yining Karl Li, Kai Ninomiya, Shuai Shao (Shrek) +* @date 2012-2016 +* @copyright University of Pennsylvania & STUDENT +*/ #pragma once @@ -12,13 +12,13 @@ #include #include -namespace tinygltf{ - class Scene; +namespace tinygltf { + class Scene; } void rasterizeInit(int width, int height); void rasterizeSetBuffers(const tinygltf::Scene & scene); -void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal); +void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal, int primitive_type); void rasterizeFree(); diff --git a/src/rasterizeTools.h b/src/rasterizeTools.h index 46c701e..9bec127 100644 --- a/src/rasterizeTools.h +++ b/src/rasterizeTools.h @@ -99,3 +99,21 @@ float getZAtCoordinate(const glm::vec3 barycentricCoord, const glm::vec3 tri[3]) + barycentricCoord.y * tri[1].z + barycentricCoord.z * tri[2].z); } + +/** + * For a given barycentric coordinate, compute the corresponding z position + * (i.e. depth) on the triangle with perspective projection + */ + +__host__ __device__ static +float computeOneOverZ(const glm::vec3 barycentricCoord, const glm::vec3 tri[3]){ + return 1.0f / (barycentricCoord.x / tri[0].z + barycentricCoord.y / tri[1].z + barycentricCoord.z / tri[2].z); +} + +__host__ __device__ static +glm::vec3 correctCoordPerspective(const float z, glm::vec3 barycentricCoord, const glm::vec3 tri[3], const glm::vec3 coord[3]){ + return z * glm::vec3(coord[0] * barycentricCoord.x / tri[0].z + + coord[1] * barycentricCoord.y / tri[1].z + + coord[2] * barycentricCoord.z / tri[2].z); +} +