diff --git a/README.md b/README.md
index cad1abd..c4a38b1 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,104 @@
CUDA Rasterizer
===============
-[CLICK ME FOR INSTRUCTION OF THIS PROJECT](./INSTRUCTION.md)
-
**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4**
-* (TODO) YOUR NAME HERE
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* Kaixiang Miao
+* Tested on: Windows 7, i7-3630QM @ 2.40GHz 8GB, GTX 660M 2GB (Lenovo Y580 laptop, personal computer)
+
+### Screenshot
+
+----------------
+
+#### Perspective-correct Color Interpolation
+
+
+ | duck.gltf (Blinn-Phong) |
+ cow.gltf (Blinn-Phong) |
+ flower.gltf (Lambert) |
+
+
+  |
+  |
+  |
+
+
+
+#### Bilinear Texture Mapping
+
+
+ | duck.gltf |
+ CesiumMilkTruck.gltf |
+
+
+  |
+  |
+
+
+
+#### Perspective-correct Texture Mapping
+
+
+ | Bad Mapping |
+ Nice Mapping |
+
+
+  |
+  |
+
+
+
+#### Bilinear Texture Mapping
+
+
+ | Bilinear Off |
+ Bilinear On |
+
+
+  |
+  |
+
+
+
+#### Backface Culling
+
+
+ | Backface Culling Off |
+ Backface Culling On |
+
+
+  |
+  |
+
+
+
+###Performance Analysis
+
+#### Kernel Time of Different Stages
+
+The kernel time of different stages of different .gltf files is shown as below. However, in spite of the fixed distance from the object, the objects have different scale factors, which make the stacked chart below less meanningful.
+
+
+
+Instead of comparing different objects, different distance from the object `duck.gltf` to the opsition of the camera is applied.
+
+
+
+As the camera getting closer and closer, the rasterizer stage takes up more time. It illustrates that as the object taking up more and more area of the screen, the workload of the rasterizer becomes heavier because of the large area of primitives and more pixels needed to be filled by scanline.
+
+The following graph shows the duration of the **fragment shader** of `duck.gltf`. Although the value is increasing, the ratio of this stage is decreasing, because of the steeper increase of the duration of the **rasterzier stage**. Besides, the duration of the **vertex shader**, the **primitive assembly** and the **backface culling** does not vary much because the number of vertices, primitives and backfaces is not relevant to the distance.
+
+
+
+#### Backface Culling
+
+Backface Culling reduce lots of execution time of both fragment shader and the rasterizer stage.
-### (TODO: Your README)
+
-*DO NOT* leave the README to the last minute! It is a crucial part of the
-project, and we will not be able to grade you without a good README.
+With no backface culling, the stacked chart appears different from the one shown at the beginning. The rasterizer stage takes up most duration because the rasterizer is shading numerous redundant primitives.
+
### Credits
diff --git a/img/1.gif b/img/1.gif
new file mode 100644
index 0000000..3570ba4
Binary files /dev/null and b/img/1.gif differ
diff --git a/img/10.gif b/img/10.gif
new file mode 100644
index 0000000..cf2f450
Binary files /dev/null and b/img/10.gif differ
diff --git a/img/2.gif b/img/2.gif
new file mode 100644
index 0000000..91143fb
Binary files /dev/null and b/img/2.gif differ
diff --git a/img/20.gif b/img/20.gif
new file mode 100644
index 0000000..5f60acd
Binary files /dev/null and b/img/20.gif differ
diff --git a/img/3.gif b/img/3.gif
new file mode 100644
index 0000000..e9e158a
Binary files /dev/null and b/img/3.gif differ
diff --git a/img/4.gif b/img/4.gif
new file mode 100644
index 0000000..e2bc7af
Binary files /dev/null and b/img/4.gif differ
diff --git a/img/5.gif b/img/5.gif
new file mode 100644
index 0000000..2f44f0b
Binary files /dev/null and b/img/5.gif differ
diff --git a/img/51.gif b/img/51.gif
new file mode 100644
index 0000000..22d83b8
Binary files /dev/null and b/img/51.gif differ
diff --git a/img/6.gif b/img/6.gif
new file mode 100644
index 0000000..916bcb6
Binary files /dev/null and b/img/6.gif differ
diff --git a/img/61.gif b/img/61.gif
new file mode 100644
index 0000000..2369c79
Binary files /dev/null and b/img/61.gif differ
diff --git a/img/7.gif b/img/7.gif
new file mode 100644
index 0000000..96ccb6c
Binary files /dev/null and b/img/7.gif differ
diff --git a/img/71.gif b/img/71.gif
new file mode 100644
index 0000000..40441f5
Binary files /dev/null and b/img/71.gif differ
diff --git a/img/8.jpg b/img/8.jpg
new file mode 100644
index 0000000..5f32dde
Binary files /dev/null and b/img/8.jpg differ
diff --git a/img/81.jpg b/img/81.jpg
new file mode 100644
index 0000000..6074f2a
Binary files /dev/null and b/img/81.jpg differ
diff --git a/img/Duck.jpg b/img/Duck.jpg
new file mode 100644
index 0000000..021ed26
Binary files /dev/null and b/img/Duck.jpg differ
diff --git a/img/DuckFrag.jpg b/img/DuckFrag.jpg
new file mode 100644
index 0000000..8d580de
Binary files /dev/null and b/img/DuckFrag.jpg differ
diff --git a/img/Kernel.jpg b/img/Kernel.jpg
new file mode 100644
index 0000000..8e17108
Binary files /dev/null and b/img/Kernel.jpg differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027104041.jpg" "b/img/QQ\346\210\252\345\233\27620161027104041.jpg"
new file mode 100644
index 0000000..56b2502
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027104041.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027125805.jpg" "b/img/QQ\346\210\252\345\233\27620161027125805.jpg"
new file mode 100644
index 0000000..490ac6d
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027125805.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027125842.jpg" "b/img/QQ\346\210\252\345\233\27620161027125842.jpg"
new file mode 100644
index 0000000..f0b7c34
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027125842.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027130210.jpg" "b/img/QQ\346\210\252\345\233\27620161027130210.jpg"
new file mode 100644
index 0000000..ddada08
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027130210.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027141341.jpg" "b/img/QQ\346\210\252\345\233\27620161027141341.jpg"
new file mode 100644
index 0000000..fa08e81
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027141341.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027194239.jpg" "b/img/QQ\346\210\252\345\233\27620161027194239.jpg"
new file mode 100644
index 0000000..db5bab2
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027194239.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027195451.jpg" "b/img/QQ\346\210\252\345\233\27620161027195451.jpg"
new file mode 100644
index 0000000..895d7f2
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027195451.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027195722.jpg" "b/img/QQ\346\210\252\345\233\27620161027195722.jpg"
new file mode 100644
index 0000000..3724d68
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027195722.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027205430.jpg" "b/img/QQ\346\210\252\345\233\27620161027205430.jpg"
new file mode 100644
index 0000000..5fcbd48
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027205430.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027205528.jpg" "b/img/QQ\346\210\252\345\233\27620161027205528.jpg"
new file mode 100644
index 0000000..a543758
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027205528.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027205938.jpg" "b/img/QQ\346\210\252\345\233\27620161027205938.jpg"
new file mode 100644
index 0000000..6d960cd
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027205938.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027210034.jpg" "b/img/QQ\346\210\252\345\233\27620161027210034.jpg"
new file mode 100644
index 0000000..faf4c82
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027210034.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161027231940.jpg" "b/img/QQ\346\210\252\345\233\27620161027231940.jpg"
new file mode 100644
index 0000000..e316e51
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161027231940.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161028180142.jpg" "b/img/QQ\346\210\252\345\233\27620161028180142.jpg"
new file mode 100644
index 0000000..4287730
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161028180142.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161028183459.jpg" "b/img/QQ\346\210\252\345\233\27620161028183459.jpg"
new file mode 100644
index 0000000..3971ba2
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161028183459.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161028184002.jpg" "b/img/QQ\346\210\252\345\233\27620161028184002.jpg"
new file mode 100644
index 0000000..0e37725
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161028184002.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161028213137.jpg" "b/img/QQ\346\210\252\345\233\27620161028213137.jpg"
new file mode 100644
index 0000000..f153d35
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161028213137.jpg" differ
diff --git "a/img/QQ\346\210\252\345\233\27620161028223323.jpg" "b/img/QQ\346\210\252\345\233\27620161028223323.jpg"
new file mode 100644
index 0000000..08b6026
Binary files /dev/null and "b/img/QQ\346\210\252\345\233\27620161028223323.jpg" differ
diff --git a/img/backface.jpg b/img/backface.jpg
new file mode 100644
index 0000000..a2ba08b
Binary files /dev/null and b/img/backface.jpg differ
diff --git a/img/bitex.jpg b/img/bitex.jpg
new file mode 100644
index 0000000..c910d43
Binary files /dev/null and b/img/bitex.jpg differ
diff --git a/img/fun0.jpg b/img/fun0.jpg
new file mode 100644
index 0000000..c58f7b0
Binary files /dev/null and b/img/fun0.jpg differ
diff --git a/img/fun1.jpg b/img/fun1.jpg
new file mode 100644
index 0000000..0b78e08
Binary files /dev/null and b/img/fun1.jpg differ
diff --git a/img/nobackface.jpg b/img/nobackface.jpg
new file mode 100644
index 0000000..ecb2370
Binary files /dev/null and b/img/nobackface.jpg differ
diff --git a/img/tex.jpg b/img/tex.jpg
new file mode 100644
index 0000000..b395542
Binary files /dev/null and b/img/tex.jpg differ
diff --git a/src/main.cpp b/src/main.cpp
index a36b955..3e4b37c 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -96,17 +96,19 @@ void mainLoop() {
//---------RUNTIME STUFF---------
//-------------------------------
float scale = 1.0f;
-float x_trans = 0.0f, y_trans = 0.0f, z_trans = -10.0f;
-float x_angle = 0.0f, y_angle = 0.0f;
+float x_trans = 0.0f, y_trans = 0.0f, z_trans = -5.0f;
+float x_angle = 0.0f, y_angle = 0.0f;bool flag = false;int c = 0;
+//float x_trans = 0.0f, y_trans = 0.0f, z_trans = -3.0f;
+//float x_angle = 0.63f, y_angle = 3.19f;bool flag = false;int c = 0;
void runCuda() {
// Map OpenGL buffer object for writing from CUDA on a single GPU
// No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer
dptr = NULL;
-
- glm::mat4 P = glm::frustum(-scale * ((float)width) / ((float)height),
- scale * ((float)width / (float)height),
- -scale, scale, 1.0, 1000.0);
-
+ //y_angle += 0.01f;
+ //glm::mat4 P = glm::frustum(-scale * ((float)width) / ((float)height),
+ // scale * ((float)width / (float)height),
+ // -scale, scale, 1.0, 1000.0);
+ glm::mat4 P = glm::perspective(3.14159f / 3, 1.0f, .1f, 1000.0f);
glm::mat4 V = glm::mat4(1.0f);
glm::mat4 M =
@@ -117,9 +119,16 @@ void runCuda() {
glm::mat3 MV_normal = glm::transpose(glm::inverse(glm::mat3(V) * glm::mat3(M)));
glm::mat4 MV = V * M;
glm::mat4 MVP = P * MV;
-
+ //printf("camera:%f %f %f\n", x_angle, y_angle, z_trans);
cudaGLMapBufferObject((void **)&dptr, pbo);
- rasterize(dptr, MVP, MV, MV_normal);
+
+ //if (!flag)
+ {
+ rasterize(dptr, MVP, MV, MV_normal, c);
+ c++;
+ flag = true;
+ }
+
cudaGLUnmapBufferObject(pbo);
frame++;
diff --git a/src/rasterize.cu b/src/rasterize.cu
index 1262a09..0084c6d 100644
--- a/src/rasterize.cu
+++ b/src/rasterize.cu
@@ -5,6 +5,9 @@
* @date 2012-2016
* @copyright University of Pennsylvania & STUDENT
*/
+#include
+#include
+#include
#include
#include
@@ -17,8 +20,11 @@
#include "rasterize.h"
#include
#include
+#include
-namespace {
+
+
+//namespace
typedef unsigned short VertexIndex;
typedef glm::vec3 VertexAttributePosition;
@@ -43,16 +49,27 @@ namespace {
glm::vec3 eyePos; // eye space position used for shading
glm::vec3 eyeNor; // eye space normal used for shading, cuz normal will go wrong after perspective transformation
- // glm::vec3 col;
+ glm::vec3 col;
+
glm::vec2 texcoord0;
- TextureData* dev_diffuseTex = NULL;
- // int texWidth, texHeight;
+ TextureData* dev_diffuseTex;
+ int diffuseTexHeight;
+ int diffuseTexWidth;
+ int diffuseTexComponent;
// ...
+ VertexOut()
+ {
+ dev_diffuseTex = NULL;
+ }
};
struct Primitive {
- PrimitiveType primitiveType = Triangle; // C++ 11 init
+ PrimitiveType primitiveType; // C++ 11 init
VertexOut v[3];
+ Primitive()
+ {
+ primitiveType = Triangle;
+ }
};
struct Fragment {
@@ -62,10 +79,20 @@ namespace {
// The attributes listed below might be useful,
// but always feel free to modify on your own
- // glm::vec3 eyePos; // eye space position used for shading
- // glm::vec3 eyeNor;
- // VertexAttributeTexcoord texcoord0;
- // TextureData* dev_diffuseTex;
+ glm::vec3 eyePos; // eye space position used for shading
+ glm::vec3 eyeNor;
+ float z;
+ VertexAttributeTexcoord texcoord0;
+ TextureData* dev_diffuseTex;
+ int diffuseTexWidth;
+ int diffuseTexHeight;
+ int diffuseTexComponent;
+ bool hasColor;
+ Fragment()
+ {
+ dev_diffuseTex = NULL;
+ hasColor = false;
+ }
// ...
};
@@ -86,17 +113,52 @@ namespace {
TextureData* dev_diffuseTex;
int diffuseTexWidth;
int diffuseTexHeight;
+ int diffuseTexComponent;
// TextureData* dev_specularTex;
// TextureData* dev_normalTex;
// ...
// Vertex Out, vertex used for rasterization, this is changing every frame
VertexOut* dev_verticesOut;
-
+ PrimitiveDevBufPointers(){}
+ PrimitiveDevBufPointers(int tPrimitiveMode,
+ PrimitiveType tPrimitiveType,
+ int tNumPrimitives,
+ int tNumIndices,
+ int tNumVertices,
+ VertexIndex *tDev_indices,
+ VertexAttributePosition *tDev_position,
+ VertexAttributeNormal *tDev_normal,
+ VertexAttributeTexcoord *tDev_texcoord0,
+ TextureData *tDev_diffuseTex,
+ int tDiffuseTexWidth,
+ int tDiffuseTexHeight,
+ int tDiffuseTexComponent,
+ VertexOut *tDev_verticesOut)
+ {
+ primitiveMode = tPrimitiveMode;
+ primitiveType = tPrimitiveType;
+ numPrimitives = tNumPrimitives;
+ numIndices = tNumIndices;
+ numVertices = tNumVertices;
+
+ dev_indices = tDev_indices;
+ dev_position = tDev_position;
+ dev_normal = tDev_normal;
+ dev_texcoord0 = tDev_texcoord0;
+
+ dev_diffuseTex = tDev_diffuseTex;
+ diffuseTexWidth = tDiffuseTexWidth;
+ diffuseTexHeight = tDiffuseTexHeight;
+ diffuseTexComponent = tDiffuseTexComponent;
+
+ dev_verticesOut = tDev_verticesOut;
+ }
// TODO: add more attributes when needed
};
-}
+
+
static std::map> mesh2PrimitivesMap;
@@ -109,6 +171,9 @@ static Primitive *dev_primitives = NULL;
static Fragment *dev_fragmentBuffer = NULL;
static glm::vec3 *dev_framebuffer = NULL;
+bool *dev_flag = NULL;
+int *dev_mutex = NULL;
+
static int * dev_depth = NULL; // you might need this buffer when doing depth test
/**
@@ -141,13 +206,45 @@ void render(int w, int h, Fragment *fragmentBuffer, glm::vec3 *framebuffer) {
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
int index = x + (y * w);
-
- if (x < w && y < h) {
- framebuffer[index] = fragmentBuffer[index].color;
-
+ glm::vec3 lightPos(5.0f, 5.0f, 5.0f);
+ Fragment cacheFragment = fragmentBuffer[index];
+ if (x < w && y < h && cacheFragment.hasColor) {
+ float diffuseTerm = 0.4 * glm::clamp(glm::dot(cacheFragment.eyeNor, glm::normalize(lightPos - cacheFragment.eyePos)), 0.0f, 1.0f);
+ float ambientTerm = 0.6f;
+ glm::vec3 L = glm::normalize(lightPos - cacheFragment.eyePos);
+ glm::vec3 N = cacheFragment.eyeNor;
+ glm::vec3 V = glm::normalize(-cacheFragment.eyePos);
+ glm::vec3 H = glm::normalize(V + L);
+ //printf("norm:%f %f %f\nH:%f %f %f\n\n", L[0], L[1], L[2],
+ // cacheFragment.eyePos[0], cacheFragment.eyePos[1], cacheFragment.eyePos[2]);
+ glm::vec3 textureColor;// = glm::vec3(1.0f, 1.0f, 1.0f);
+ if (cacheFragment.dev_diffuseTex != NULL)
+ {
+ //if (!(cacheFragment.diffuseTexWidth>0 && cacheFragment.diffuseTexHeight>0 & cacheFragment.diffuseTexComponent>0))
+ // printf("com:%d %d %d\n", cacheFragment.diffuseTexWidth, cacheFragment.diffuseTexHeight, cacheFragment.diffuseTexComponent);
+ //textureColor = cacheFragment.color * (diffuseTerm + ambientTerm);//
+ textureColor = getTextureColor(cacheFragment.dev_diffuseTex, cacheFragment.texcoord0, cacheFragment.diffuseTexWidth, cacheFragment.diffuseTexHeight, cacheFragment.diffuseTexComponent);
+ //textureColor = getBilinearTextureColor(cacheFragment.dev_diffuseTex, cacheFragment.texcoord0, cacheFragment.diffuseTexWidth, cacheFragment.diffuseTexHeight, cacheFragment.diffuseTexComponent);
+ //textureColor = glm::vec3(cacheFragment.texcoord0, 0.0f);
+ //printf("coord:%f %f\ncolor:%f %f %f\n\n", cacheFragment.texcoord0[0], cacheFragment.texcoord0[1], textureColor[0], textureColor[1], textureColor[2]);
+
+ }
+ else
+ {
+ textureColor = cacheFragment.color * (diffuseTerm + ambientTerm);
+ //printf("%f %f %f\n", textureColor[0], textureColor[1], textureColor[2]);
+ }
+ //printf("%f %f\n", fragmentBuffer[index].texcoord0[0], fragmentBuffer[index].texcoord0[1]);
+ //framebuffer[index] = fragmentBuffer[index].color * (diffuseTerm + ambientTerm);// * textureColor;
+ //framebuffer[index] = fragmentBuffer[index].color;// * (diffuseTerm + ambientTerm);
+ //framebuffer[index] = textureColor * (diffuseTerm + ambientTerm);
+ framebuffer[index] = textureColor * (diffuseTerm + ambientTerm) + pow(max(0.0f, glm::dot(N, H)), 200.0f);
+ //printf("%f\n", pow(max(0.0f, glm::dot(N, H)), 200.0f));
// TODO: add your fragment shader code here
}
+ else
+ framebuffer[index] = glm::vec3(0.0f, 0.0f, 0.0f);
}
/**
@@ -166,11 +263,12 @@ void rasterizeInit(int w, int h) {
cudaFree(dev_depth);
cudaMalloc(&dev_depth, width * height * sizeof(int));
+ cudaMalloc(&dev_mutex, width * height * sizeof(int));
checkCUDAError("rasterizeInit");
}
__global__
-void initDepth(int w, int h, int * depth)
+void initDepth(int w, int h, int * depth, Fragment *f)
{
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
@@ -179,9 +277,12 @@ void initDepth(int w, int h, int * depth)
{
int index = x + (y * w);
depth[index] = INT_MAX;
+ f[index].z = 2.0f;
+ f[index].hasColor = false;
+ //f[index].texcoord0 = glm::vec2(-1.0f, -1.0f);
}
}
-
+#define SCALE 100000;
/**
* kern function with support for stride to sometimes replace cudaMemcpy
@@ -523,6 +624,8 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) {
TextureData* dev_diffuseTex = NULL;
int diffuseTexWidth = 0;
int diffuseTexHeight = 0;
+ int diffuseTexComponent = 0;
+
if (!primitive.material.empty()) {
const tinygltf::Material &mat = scene.materials.at(primitive.material);
printf("material.name = %s\n", mat.name.c_str());
@@ -540,7 +643,8 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) {
diffuseTexWidth = image.width;
diffuseTexHeight = image.height;
-
+ diffuseTexComponent = image.component;
+ //printf("HH:%d\n", diffuseTexComponent);
checkCUDAError("Set Texture Image data");
}
}
@@ -567,7 +671,7 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) {
// at the end of the for loop of primitive
// push dev pointers to map
- primitiveVector.push_back(PrimitiveDevBufPointers{
+ primitiveVector.push_back(PrimitiveDevBufPointers(
primitive.mode,
primitiveType,
numPrimitives,
@@ -582,9 +686,10 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) {
dev_diffuseTex,
diffuseTexWidth,
diffuseTexHeight,
+ diffuseTexComponent,
dev_vertexOut //VertexOut
- });
+ ));
totalNumPrimitives += numPrimitives;
@@ -600,6 +705,7 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) {
// 3. Malloc for dev_primitives
{
cudaMalloc(&dev_primitives, totalNumPrimitives * sizeof(Primitive));
+ cudaMalloc(&dev_flag, totalNumPrimitives * sizeof(bool));
}
@@ -630,15 +736,37 @@ void _vertexTransformAndAssembly(
glm::mat4 MVP, glm::mat4 MV, glm::mat3 MV_normal,
int width, int height) {
+
+
// vertex id
int vid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (vid < numVertices) {
-
+ //printf("%d %d\n", vid, numVertices);
// TODO: Apply vertex transformation here
// Multiply the MVP matrix for each vertex position, this will transform everything into clipping space
// Then divide the pos by its w element to transform into NDC space
// Finally transform x and y to viewport space
-
+
+ primitive.dev_verticesOut[vid].pos = MVP * glm::vec4(primitive.dev_position[vid], 1.0f);
+ primitive.dev_verticesOut[vid].pos /= primitive.dev_verticesOut[vid].pos[3];
+ //primitive.dev_verticesOut[vid].pos[0] = -primitive.dev_verticesOut[vid].pos[0];
+ //primitive.dev_verticesOut[vid].pos[1] = -primitive.dev_verticesOut[vid].pos[1];
+ primitive.dev_verticesOut[vid].eyePos = glm::vec3(MV * glm::vec4(primitive.dev_position[vid], 1.0f));
+ primitive.dev_verticesOut[vid].eyeNor = glm::normalize(glm::vec3(MV_normal * primitive.dev_normal[vid]));
+ primitive.dev_verticesOut[vid].dev_diffuseTex = primitive.dev_diffuseTex;
+ primitive.dev_verticesOut[vid].diffuseTexHeight = primitive.diffuseTexHeight;
+ primitive.dev_verticesOut[vid].diffuseTexWidth = primitive.diffuseTexWidth;
+ primitive.dev_verticesOut[vid].diffuseTexComponent = primitive.diffuseTexComponent;
+ //printf("vertex:%d:%f %f %f\n\n", vid, primitive.dev_verticesOut[vid].pos[0], primitive.dev_verticesOut[vid].pos[1], primitive.dev_verticesOut[vid].pos[2]);
+ //printf("normal:%d:%f %f %f\n\n", vid, primitive.dev_verticesOut[vid].eyeNor[0], primitive.dev_verticesOut[vid].eyeNor[1], primitive.dev_verticesOut[vid].eyeNor[2]);
+ //printf("vertex:%d:%f %f %f\n\n", vid, primitive.dev_position[vid][0], primitive.dev_position[vid][1], primitive.dev_position[vid][2]);
+ if (primitive.dev_texcoord0 != NULL)
+ primitive.dev_verticesOut[vid].texcoord0 = primitive.dev_texcoord0[vid];
+ else;
+ //printf("%f %f\n", primitive.dev_verticesOut[vid].texcoord0[0], primitive.dev_verticesOut[vid].texcoord0[1]);
+ //primitive.dev_verticesOut[vid].col = glm::vec3(1.0f, 1.0f, 1.0f);
+ //primitive.dev_verticesOut[vid].col = c[vid / 3 % 3];
+ //printf("NULL TEX\n");
// TODO: Apply vertex assembly here
// Assemble all attribute arraies into the primitive array
@@ -651,34 +779,365 @@ static int curPrimitiveBeginId = 0;
__global__
void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_primitives, PrimitiveDevBufPointers primitive) {
-
+ glm::vec3 c[3];
+ c[0] = glm::vec3(1.0f, 0.0f, 0.0f);
+ c[1] = glm::vec3(0.0f, 1.0f, 0.0f);
+ c[2] = glm::vec3(0.0f, 0.0f, 1.0f);
// index id
int iid = (blockIdx.x * blockDim.x) + threadIdx.x;
-
if (iid < numIndices) {
// TODO: uncomment the following code for a start
// This is primitive assembly for triangles
-
- //int pid; // id for cur primitives vector
- //if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) {
- // pid = iid / (int)primitive.primitiveType;
- // dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType]
- // = primitive.dev_verticesOut[primitive.dev_indices[iid]];
- //}
-
-
+ int pid; // id for cur primitives vector
+ //pid = iid / (int)primitive.primitiveType;
+ //dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType] = primitive.dev_verticesOut[primitive.dev_indices[iid]];
+ pid = iid / 3;
+ dev_primitives[pid + curPrimitiveBeginId].v[iid % 3] = primitive.dev_verticesOut[primitive.dev_indices[iid]];
+ //dev_primitives[pid + curPrimitiveBeginId].v[iid % 3].col = c[iid % 3];
+ dev_primitives[pid + curPrimitiveBeginId].v[iid % 3].col = glm::vec3(1.0f, 1.0f, 1.0f);
+ //printf("%d\n", pid + curPrimitiveBeginId);
// TODO: other primitive types (point, line)
}
}
+__global__ void _backFaceCulling(int numIndices, Primitive* primitives, bool *flag)
+{
+ int pid = (blockIdx.x * blockDim.x) + threadIdx.x;
+ Primitive cachePrimitive = primitives[pid];
+ if (pid < numIndices)
+ {
+ glm::vec3 v0 = cachePrimitive.v[1].eyePos - cachePrimitive.v[0].eyePos;
+ glm::vec3 v1 = cachePrimitive.v[2].eyePos - cachePrimitive.v[0].eyePos;
+ glm::vec3 temp = glm::cross(v0, v1);
+ if (temp.z < 0.0f)
+ flag[pid] = false;
+ else
+ flag[pid] = true;
+ }
+
+}
+
+void _primitivesCompress(int &numIndices, Primitive* primitives, bool *flag)
+{
+ thrust::device_ptr dev_ptrFlag(flag);
+ thrust::device_ptr dev_primitives(primitives);
+ thrust::remove_if(dev_primitives, dev_primitives + numIndices, dev_ptrFlag, thrust::logical_not());
+ numIndices = thrust::count_if(dev_ptrFlag, dev_ptrFlag + numIndices, thrust::identity());
+
+ //thrust::device_ptr dev_ptrFlag(flag);
+ //thrust::device_ptr dev_ptrPaths(paths);
+ //thrust::remove_if(dev_ptrPaths, dev_ptrPaths + num_paths, dev_ptrFlag, thrust::logical_not());
+ //num_paths = thrust::count_if(dev_ptrFlag, dev_ptrFlag + num_paths, thrust::identity());
+}
+
+__device__ float triangleArea(glm::vec4 v1, glm::vec4 v2)
+{
+ // dim2 cross product
+ v1[2] = v2[2] = 0.0f;
+ return glm::length(glm::cross(glm::vec3(v1), glm::vec3(v2))) * 0.5f;
+}
+
+__device__ float tArea(float &x1, float &y1, float &x2, float &y2)
+{
+ // dim2 cross product
+ return fabs(x1 * y2 - x2 * y1) * 0.5f;
+}
+__device__ void print(glm::vec4 v)
+{
+ printf("%f %f %f %f\n", v[0], v[1], v[2], v[3]);
+}
+
+__device__ void clamp(int &i, int a, int b)
+{
+ if (i < a) i = a;
+ else if (i > b) i = b;
+}
+__device__ void clamp(float &i, float a, float b)
+{
+ if (i < a) i = a;
+ else if (i > b) i = b;
+}
+__global__ void rasterizer(Fragment *fragment, int *depth, Primitive *primitive, int numPrimitives, int width, int height, int *mutex)
+{
+ //printf("RARAR\n");
+ int pid = (blockIdx.x * blockDim.x) + threadIdx.x;
+ //printf("%d\n", numPrimitives);
+ if (pid < numPrimitives) {
+ //printf("p0:%d:%f %f %f\np1:%d:%f %f %f\np2:%d:%f %f %f\n\n", pid, primitive[pid].v[0].pos[0], primitive[pid].v[0].pos[1], primitive[pid].v[0].pos[2],
+ // pid, primitive[pid].v[1].pos[0], primitive[pid].v[1].pos[1], primitive[pid].v[1].pos[2],
+ // pid, primitive[pid].v[2].pos[0], primitive[pid].v[2].pos[1], primitive[pid].v[2].pos[2]);
+ //printf("p0:%d:%f %f %f\np1:%d:%f %f %f\np2:%d:%f %f %f\n\n", pid, primitive[pid].v[0].eyeNor[0], primitive[pid].v[0].eyeNor[1], primitive[pid].v[0].eyeNor[2],
+ // pid, primitive[pid].v[1].eyeNor[0], primitive[pid].v[1].eyeNor[1], primitive[pid].v[1].eyeNor[2],
+ // pid, primitive[pid].v[2].eyeNor[0], primitive[pid].v[2].eyeNor[1], primitive[pid].v[2].eyeNor[2]);
+ Primitive cachePrimitive = primitive[pid];
+ float p0x = cachePrimitive.v[0].pos[0];
+ float p0y = cachePrimitive.v[0].pos[1];
+ float p1x = cachePrimitive.v[1].pos[0];
+ float p1y = cachePrimitive.v[1].pos[1];
+ float p2x = cachePrimitive.v[2].pos[0];
+ float p2y = cachePrimitive.v[2].pos[1];
+
+ float x1 = p1x - p0x;
+ float y1 = p1y - p0y;
+ float x2 = p2x - p0x;
+ float y2 = p2y - p0y;
+
+ float triArea = tArea(x1, y1, x2, y2);
+ //printf("t:%f\n", triArea);
+
+ float minx = 2.0f;
+ float maxx = -2.0f;
+ float miny = 2.0f;
+ float maxy = -2.0f;
+
+ minx = min(min(p0x, p1x), p2x);
+ maxx = max(max(p0x, p1x), p2x);
+ miny = min(min(p0y, p1y), p2y);
+ maxy = max(max(p0y, p1y), p2y);
+
+ //minx = min(min(cachePrimitive.v[0].pos[0], cachePrimitive.v[1].pos[0]), cachePrimitive.v[2].pos[0]);
+ //maxx = max(max(cachePrimitive.v[0].pos[0], cachePrimitive.v[1].pos[0]), cachePrimitive.v[2].pos[0]);
+ //miny = min(min(cachePrimitive.v[0].pos[1], cachePrimitive.v[1].pos[1]), cachePrimitive.v[2].pos[1]);
+ //maxy = max(max(cachePrimitive.v[0].pos[1], cachePrimitive.v[1].pos[1]), cachePrimitive.v[2].pos[1]);
+
+ int iMaxx = (-minx + 1.0f) * 0.5f * width;
+ int iMinx = (-maxx + 1.0f) * 0.5f * width;
+ int iMaxy = (-miny + 1.0f) * 0.5f * height;
+ int iMiny = (-maxy + 1.0f) * 0.5f * height;
+ // sometimes out of screen
+ clamp(iMaxx, 0, width - 1);
+ clamp(iMinx, 0, width - 1);
+ clamp(iMaxy, 0, height - 1);
+ clamp(iMiny, 0, height - 1);
+
+ //printf("%d %d %d %d\n", iMinx, iMaxx, iMiny, iMaxy);
+ //int xx0 = (-primitive[pid].v[0].pos[0] + 1) / 2 * width;
+ //int yy0 = (-primitive[pid].v[0].pos[1] + 1) / 2 * height;
+ ////printf("%d %d\n", xx0, yy0);
+ //int index = xx0 + yy0 * width;
+ //fragment[index].color = glm::vec3(1.0f, 0.0f, 0.0f);
+ //int xx1 = (-primitive[pid].v[1].pos[0] + 1) / 2 * width;
+ //int yy1 = (-primitive[pid].v[1].pos[1] + 1) / 2 * height;
+ //index = xx1 + yy1 * width;
+ //fragment[index].color = glm::vec3(1.0f, 0.0f, 0.0f);
+ //int xx2 = (-primitive[pid].v[2].pos[0] + 1) / 2 * width;
+ //int yy2 = (-primitive[pid].v[2].pos[1] + 1) / 2 * height;
+ //index = xx2 + yy2 * width;
+ //fragment[index].color = glm::vec3(1.0f, 0.0f, 0.0f);
+ float currentPt[2];
+ float s0, s1, s2;
+ double fDepth;
+ float t0, t1, t2;
+ for (int j = iMiny; j <= iMaxy; j++)
+ //int j = 20;
+ {
+ //for (int i = 0; i < 800; i++){}
+ //for (int i = iMinx; i <= iMaxx; i++){}
+ //int i = 20;
+ for (int i = iMinx; i <= iMaxx; i++)
+ //int i = 390, j = 390;
+ {
+ int index = i + j * width;
+ //fragment[index].color = glm::vec3(1.0f, 0.0f, 0.0f);
+ currentPt[0] = 1 - (float)i / width * 2;
+ currentPt[1] = 1 - (float)j / height * 2;
+ x1 = currentPt[0] - p1x;
+ y1 = currentPt[1] - p1y;
+ x2 = currentPt[0] - p2x;
+ y2 = currentPt[1] - p2y;
+ //printf("x1:%f p1x:%f y1:%f x2:%f y2:%f\n", x1, p1x, y1, x2, y2);
+ s0 = tArea(x1, y1, x2, y2);
+ x1 = currentPt[0] - p0x;
+ y1 = currentPt[1] - p0y;
+ x2 = currentPt[0] - p2x;
+ y2 = currentPt[1] - p2y;
+ s1 = tArea(x1, y1, x2, y2);
+ x1 = currentPt[0] - p0x;
+ y1 = currentPt[1] - p0y;
+ x2 = currentPt[0] - p1x;
+ y2 = currentPt[1] - p1y;
+ s2 = tArea(x1, y1, x2, y2);
+ //print(v1);
+ //print(v2);
+
+ t0 = s0 / triArea;
+
+
+ t1 = s1 / triArea;
+
+ //t2 = s2 / (triArea * cachePrimitive.v[2].pos[2]);
+ if (triArea < EPSILON)
+ t0 = t1 = 0.0f;
+ t2 = 1.0f - t1 - t0;
+
+ //if (t0 < 0)
+ // printf("t0:%f %f %f %f\n", s0, triArea, cachePrimitive.v[0].pos[2], (triArea * cachePrimitive.v[0].pos[2]));
+ //if (t1 < 0)
+ // printf("t1:%f %f %f %f\n", s1, triArea, cachePrimitive.v[1].pos[2], (triArea * cachePrimitive.v[1].pos[2]));
+ //if (t0 > 1 && t2 < 0)
+ // printf("t0:%f %f t0:%f t1:%f t2:%f\n", s0, triArea, t0, t1, t2);
+
+
+ // new
+ // why
+ //glm::vec3 triangle[3] = { glm::vec3(cachePrimitive.v[0].pos), glm::vec3(cachePrimitive.v[1].pos), glm::vec3(cachePrimitive.v[2].pos) };
+ //glm::vec3 baryCoords = calculateBarycentricCoordinate(triangle, glm::vec2(currentPt[0], currentPt[1]));
+ //float newDepth = glm::dot(baryCoords, glm::vec3(cachePrimitive.v[0].pos.z, cachePrimitive.v[1].pos.z, cachePrimitive.v[2].pos.z));
+ glm::vec3 triangle[3] = { glm::vec3(primitive[pid].v[0].pos), glm::vec3(primitive[pid].v[1].pos), glm::vec3(primitive[pid].v[2].pos) };
+
+ // AABB boundingBox = getAABBForTriangle(triangle);
+ //printf("aabb:%f\n", boundingBox.min.x);
+ //int minxpix = clamp(0, boundingBox.min.x, width - 1);
+ //int minypix = clamp(0, boundingBox.min.y, height - 1);
+ //int maxxpix = clamp(0, boundingBox.max.x, width - 1);
+ //int maxypix = clamp(0, boundingBox.max.y, height - 1);
+ glm::vec3 baryCoords = calculateBarycentricCoordinate(triangle, glm::vec2(currentPt[0], currentPt[1]));
+ float newDepth = glm::dot(baryCoords, glm::vec3(primitive[pid].v[0].pos[2], primitive[pid].v[1].pos[2], primitive[pid].v[2].pos[2]));
+ float testDepth = baryCoords[0] * primitive[pid].v[0].pos[2] + baryCoords[1] * primitive[pid].v[1].pos[2] + baryCoords[2] * primitive[pid].v[2].pos[2];
+ int iDepth = newDepth * SCALE;
+ //printf("%f %f\n", newDepth, fDepth);
+ glm::vec3 ttt[3] = {glm::vec3(cachePrimitive.v[0].pos), glm::vec3(cachePrimitive.v[1].pos), glm::vec3(cachePrimitive.v[2].pos)};
+
+ if (newDepth < 0 || newDepth > 1.0f)
+ continue;
+ //printf("area:%f %f\n", triArea, calculateSignedArea(ttt));
+ //if (newDepth < 0)
+ // printf("%f %f\ncurrent:%f %f\np0:%f %f %f\np1:%f %f %f\np2:%f %f %f\nbary: %f %f %f\ncom1:%f\ncom2:%f\ncom3:%f\n\n", newDepth, testDepth,
+ // currentPt[0], currentPt[1],
+ // primitive[pid].v[0].pos[0], primitive[pid].v[0].pos[1], primitive[pid].v[0].pos[2],
+ // primitive[pid].v[1].pos[0], primitive[pid].v[1].pos[1], primitive[pid].v[1].pos[2],
+ // primitive[pid].v[2].pos[0], primitive[pid].v[2].pos[1], primitive[pid].v[2].pos[2],
+ // baryCoords[0], baryCoords[1], baryCoords[2],
+ // baryCoords[0] * primitive[pid].v[0].pos[2], baryCoords[1] * primitive[pid].v[1].pos[2], baryCoords[2] * primitive[pid].v[2].pos[2]
+ //);
+ //printf("d:%f %d\n", newDepth, iDepth);
+ //clamp(iDepth, -INT_MAX, INT_MAX);
+ //clamp(iDepth, 0, INT_MAX);
+ //if (newDepth > 1.0f || newDepth <0.0f)
+ // continue;
+ // new
+
+ //printf("currentPT:%f %f s:%f %f %f %f area:%f\n", currentPt[0], currentPt[1], s0, s1, s2, s0 + s1 + s2, triArea);
+ if (s0 + s1 + s2 <= triArea + 0.00001f)
+ {
+ //printf("t:%f %f %f\n", t0, t1, t2);
+ //printf("currentPT:%f %f s:%f %f %f %f area:%f\n", currentPt[0], currentPt[1], s0, s1, s2, s0 + s1 + s2, triArea);
+ //printf("IN\n");
+ //if (t0 < 0 || t1 < 0 || t2 < 0)
+ // printf("bary:%f %f %f\n\n", t0, t1, t2);
+ //fDepth = t0 * primitive[pid].v[0].pos[2] + t1 * primitive[pid].v[1].pos[2] + t2 * primitive[pid].v[2].pos[2];
+ t0 /= cachePrimitive.v[0].eyePos[2];
+ t1 /= cachePrimitive.v[1].eyePos[2];
+ t2 /= cachePrimitive.v[2].eyePos[2];
+ fDepth = 1 / (t0 + t1 + t2);
+ //float ttDepth = (float)fDepth * (t0 * cachePrimitive.v[0].col + t1 * cachePrimitive.v[1].col + t2 * cachePrimitive.v[2].col);
+ //printf("%f %f\n", newDepth, fDepth);
+ //printf("bary:%f %f %f\nbary2:%f %f %f\n\n", baryCoords[0], baryCoords[1], baryCoords[2], s0/triArea, s1/triArea, s2/triArea);
+ //printf("depth:%f\n", fDepth);
+ //printf("s0:%f s1:%f s2:%f total:%f %f\n", s0, s1, s2, s0 + s1 + s2, triArea);
+ {
+ //printf("index:%d\n", index);
+ //fragment[index].color = glm::vec3(1.0f, 0.0f, 0.0f);
+ //if (atomicCAS(&mutex[index], 0, 1) == 0)
+ //{
+ // if (fDepth < depth[index])
+ // {
+ // //printf("HERE");
+ // //printf("%d\n", index);
+ // fragment[index].color = fDepth * (t0 * cachePrimitive.v[0].col + t1 * cachePrimitive.v[1].col + t2 * cachePrimitive.v[2].col);
+ // depth[index] = fDepth;
+ // }
+ // mutex[index] = 0;
+ //
+ // //if (index == 334800)
+ // // printf("ONE %d\n", mutex[index]);
+ //}
+ bool isSet = false;
+ do
+ {
+ isSet = (atomicCAS(&mutex[index], 0, 1) == 0);
+ if (isSet)
+ {
+ //if (fDepth < depth[index])
+ //if (iDepth < depth[index])
+ if (newDepth < fragment[index].z)
+ {
+ fragment[index].z = newDepth;
+ fragment[index].hasColor = true;
+ //printf("HERE");
+ //printf("%d\n", index);
+ //fragment[index].color = glm::vec3(-newDepth, -newDepth, -newDepth);
+ fragment[index].color = (float)fDepth * (t0 * cachePrimitive.v[0].col + t1 * cachePrimitive.v[1].col + t2 * cachePrimitive.v[2].col);
+//printf("p0:%d:%f %f %f\np1:%d:%f %f %f\np2:%d:%f %f %f\n\n", pid, primitive[pid].v[0].col[0], primitive[pid].v[0].col[1], primitive[pid].v[0].col[2],
+// pid, primitive[pid].v[1].col[0], primitive[pid].v[1].col[1], primitive[pid].v[1].col[2],
+// pid, primitive[pid].v[2].col[0], primitive[pid].v[2].col[1], primitive[pid].v[2].col[2]);
+// fragment[index].color = glm::dot(baryCoords, glm::vec3(cachePrimitive.v[0].pos.z, cachePrimitive.v[1].pos.z, cachePrimitive.v[2].pos.z));
+ fragment[index].eyePos[0] = (float)fDepth * (t0 * cachePrimitive.v[0].pos[0] + t1 * cachePrimitive.v[1].pos[0] + t2 * cachePrimitive.v[2].pos[0]);
+ fragment[index].eyePos[1] = (float)fDepth * (t0 * cachePrimitive.v[0].pos[1] + t1 * cachePrimitive.v[1].pos[1] + t2 * cachePrimitive.v[2].pos[1]);
+ fragment[index].eyePos[2] = fDepth;
+ fragment[index].eyeNor = (float)fDepth * (t0 * cachePrimitive.v[0].eyeNor + t1 * cachePrimitive.v[1].eyeNor + t2 * cachePrimitive.v[2].eyeNor);
+ fragment[index].texcoord0 = (float)fDepth * (t0 * cachePrimitive.v[0].texcoord0 + t1 * cachePrimitive.v[1].texcoord0 + t2 * cachePrimitive.v[2].texcoord0);
+ //fragment[index].texcoord0 = baryCoords[0] * cachePrimitive.v[0].texcoord0 + baryCoords[1] * cachePrimitive.v[1].texcoord0 + baryCoords[2] * cachePrimitive.v[2].texcoord0;
+ clamp(fragment[index].texcoord0[0], 0.0f, 1.0f);
+ clamp(fragment[index].texcoord0[1], 0.0f, 1.0f);
+ //fragment[index].color = glm::vec3(fragment[index].texcoord0.x, fragment[index].texcoord0.y, 0.0f);
+ //fragment[index].texcoord0 = baryCoords[0] * cachePrimitive.v[0].texcoord0 + baryCoords[1] * cachePrimitive.v[1].texcoord0 + baryCoords[2] * cachePrimitive.v[2].texcoord0;
+ //if (fragment[index].texcoord0[0]<0 || fragment[index].texcoord0[0]>1 || fragment[index].texcoord0[1]< 0 || fragment[index].texcoord0[1]>1)
+ //printf("bary:%f %f %f\n%f %f\n\n", baryCoords[0], baryCoords[1], baryCoords[2],
+ //fragment[index].texcoord0[0], fragment[index].texcoord0[1]);
+ //if (t0 < 0 || t1 < 0 || t2 < 0)
+ // printf("bary:%f %f %f\n%f %f\n\n", t0, t1, t2,
+ //fragment[index].texcoord0[0], fragment[index].texcoord0[1]);
+ if (cachePrimitive.v[0].dev_diffuseTex != NULL)
+ {
+ fragment[index].dev_diffuseTex = cachePrimitive.v[0].dev_diffuseTex;
+ fragment[index].diffuseTexHeight = cachePrimitive.v[0].diffuseTexHeight;
+ fragment[index].diffuseTexWidth = cachePrimitive.v[0].diffuseTexWidth;
+ fragment[index].diffuseTexComponent = cachePrimitive.v[0].diffuseTexComponent;
+ }
+//printf("%f %f %f\n\n", fragment[index].eyeNor[0], fragment[index].eyeNor[1], fragment[index].eyeNor[2]);
+ depth[index] = iDepth;
+ }
+ //mutex[index] = 0;
+
+ //if (index == 334800)
+ // printf("ONE %d\n", mutex[index]);
+ }
+ if (isSet)
+ mutex[index] = 0;
+
+ } while (!isSet);
+ //fragment[index].eyeNor =
+ //atomicMin(&depth[index], fDepth);
+ }
+ }
+ }
+ }
+ }
+}
+//float time_elapsed = 0.0f;
+//cudaEvent_t start,stop;
+//cudaEventCreate(&start);
+//cudaEventCreate(&stop);
+//cudaEventRecord( start,0);
+//
+//cudaEventRecord( stop,0);
+//cudaEventSynchronize(start);
+//cudaEventSynchronize(stop);
+//cudaEventElapsedTime(&time_elapsed,start,stop);
/**
* Perform rasterization.
*/
-void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal) {
+float stime[100];
+cudaEvent_t start,stop;
+float time_elapsed;
+void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal, int counter) {
+ //counter++;
+ FILE *fp = fopen("time.txt", "a+");
int sideLength2d = 8;
dim3 blockSize2d(sideLength2d, sideLength2d);
dim3 blockCount2d((width - 1) / blockSize2d.x + 1,
@@ -688,50 +1147,123 @@ void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const g
// (See README for rasterization pipeline outline.)
// Vertex Process & primitive assembly
+ dim3 numThreadsPerBlock(128);
{
curPrimitiveBeginId = 0;
- dim3 numThreadsPerBlock(128);
+
auto it = mesh2PrimitivesMap.begin();
auto itEnd = mesh2PrimitivesMap.end();
-
+
for (; it != itEnd; ++it) {
auto p = (it->second).begin(); // each primitive
auto pEnd = (it->second).end();
- for (; p != pEnd; ++p) {
+ for (; p != pEnd; ++p)
+ //PrimitiveDevBufPointers *p;
+ //p = new PrimitiveDevBufPointers(s
+ {
dim3 numBlocksForVertices((p->numVertices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x);
dim3 numBlocksForIndices((p->numIndices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x);
-
+ //printf("stop1\n");
+ //cudaDeviceSynchronize();
+time_elapsed = 0.0f;
+cudaEventCreate(&start);
+cudaEventCreate(&stop);
+cudaEventRecord(start,0);
_vertexTransformAndAssembly << < numBlocksForVertices, numThreadsPerBlock >> >(p->numVertices, *p, MVP, MV, MV_normal, width, height);
+cudaEventRecord( stop,0);
+cudaEventSynchronize(start);
+cudaEventSynchronize(stop);
+cudaEventElapsedTime(&time_elapsed,start,stop);
+stime[0] += time_elapsed;
+
checkCUDAError("Vertex Processing");
- cudaDeviceSynchronize();
+ //printf("stop2\n");
+ //cudaDeviceSynchronize();
+ //printf("stop3\n");
+time_elapsed = 0.0f;
+cudaEventCreate(&start);
+cudaEventCreate(&stop);
+cudaEventRecord(start,0);
_primitiveAssembly << < numBlocksForIndices, numThreadsPerBlock >> >
(p->numIndices,
curPrimitiveBeginId,
dev_primitives,
*p);
+cudaEventRecord(stop,0);
+cudaEventSynchronize(start);
+cudaEventSynchronize(stop);
+cudaEventElapsedTime(&time_elapsed,start,stop);
+stime[1] += time_elapsed;
+ //printf("stop4\n");
checkCUDAError("Primitive Assembly");
-
+ //cudaDeviceSynchronize();
+ //printf("stop5\n");
curPrimitiveBeginId += p->numPrimitives;
}
}
+
checkCUDAError("Vertex Processing and Primitive Assembly");
}
-
+ dim3 numBlocksForPrimitives((curPrimitiveBeginId + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x);
+time_elapsed = 0.0f;
+cudaEventCreate(&start);
+cudaEventCreate(&stop);
+cudaEventRecord(start,0);
+ //_backFaceCulling<< < numBlocksForPrimitives, numThreadsPerBlock >> >(curPrimitiveBeginId, dev_primitives, dev_flag);
+ //_primitivesCompress(curPrimitiveBeginId, dev_primitives, dev_flag);
+cudaEventRecord(stop,0);
+cudaEventSynchronize(start);
+cudaEventSynchronize(stop);
+cudaEventElapsedTime(&time_elapsed,start,stop);
+stime[2] += time_elapsed;
+
cudaMemset(dev_fragmentBuffer, 0, width * height * sizeof(Fragment));
- initDepth << > >(width, height, dev_depth);
-
+ initDepth << > >(width, height, dev_depth, dev_fragmentBuffer);
+ //printf("id:%d total:%d\n", curPrimitiveBeginId, totalNumPrimitives);
// TODO: rasterize
-
-
-
+
+ //printf("%d\n", (curPrimitiveBeginId + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x);
+
+ cudaMemset(dev_mutex, 0, width * height * sizeof(int));
+time_elapsed = 0.0f;
+cudaEventCreate(&start);
+cudaEventCreate(&stop);
+cudaEventRecord(start,0);
+ rasterizer<<>>(dev_fragmentBuffer, dev_depth, dev_primitives, curPrimitiveBeginId, width, height, dev_mutex);
+cudaEventRecord(stop,0);
+cudaEventSynchronize(start);
+cudaEventSynchronize(stop);
+cudaEventElapsedTime(&time_elapsed,start,stop);
+stime[3] += time_elapsed;
+
// Copy depthbuffer colors into framebuffer
+time_elapsed = 0.0f;
+cudaEventCreate(&start);
+cudaEventCreate(&stop);
+cudaEventRecord(start,0);
render << > >(width, height, dev_fragmentBuffer, dev_framebuffer);
+cudaEventRecord(stop,0);
+cudaEventSynchronize(start);
+cudaEventSynchronize(stop);
+cudaEventElapsedTime(&time_elapsed,start,stop);
+stime[4] += time_elapsed;
+
checkCUDAError("fragment shader");
// Copy framebuffer into OpenGL buffer for OpenGL previewing
sendImageToPBO<<>>(pbo, width, height, dev_framebuffer);
checkCUDAError("copy render result to pbo");
+
+if (counter == 59)
+{
+ for (int i = 0; i < 5; i++)
+ fprintf(fp, "%f ", stime[i]);
+ fprintf(fp, "\n");
+ fclose(fp);
+ printf("DONE\n");
+}
+
}
/**
@@ -772,5 +1304,31 @@ void rasterizeFree() {
cudaFree(dev_depth);
dev_depth = NULL;
+ cudaFree(dev_mutex);
+ dev_mutex = NULL;
+
+ cudaFree(dev_flag);
+ dev_flag = NULL;
checkCUDAError("rasterize Free");
}
+
+//__global__ void _AdvanceParticle(PrimitiveDevBufPointers *p, int toIndex)
+//{
+//
+//}
+
+//void paticleSystem(uchar4 *pbo, const glm::mat4 &MVP)
+//{
+// int sideLength2d = 8;
+// dim3 blockSize2d(sideLength2d, sideLength2d);
+// dim3 blockCount2d((width - 1) / blockSize2d.x + 1, (height - 1) / blockSize2d.y + 1);
+//
+// dim3 numThreadsPerBlock(128);
+// PrimitiveDevBufPointers *p[2];
+// p[0] = new PrimitiveDevBufPointers();
+// p[1] = new PrimitiveDevBufPointers();
+//
+// _AdvanceParticle();
+//
+//}
+
diff --git a/src/rasterize.h b/src/rasterize.h
index 560aae9..508277c 100644
--- a/src/rasterize.h
+++ b/src/rasterize.h
@@ -20,5 +20,5 @@ namespace tinygltf{
void rasterizeInit(int width, int height);
void rasterizeSetBuffers(const tinygltf::Scene & scene);
-void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal);
+void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal, int counter);
void rasterizeFree();
diff --git a/src/rasterizeTools.h b/src/rasterizeTools.h
index 46c701e..bb3c875 100644
--- a/src/rasterizeTools.h
+++ b/src/rasterizeTools.h
@@ -62,7 +62,9 @@ float calculateBarycentricCoordinateValue(glm::vec2 a, glm::vec2 b, glm::vec2 c,
baryTri[0] = glm::vec3(a, 0);
baryTri[1] = glm::vec3(b, 0);
baryTri[2] = glm::vec3(c, 0);
- return calculateSignedArea(baryTri) / calculateSignedArea(tri);
+ float signedArea = calculateSignedArea(tri);
+ if (fabs(signedArea) < EPSILON) return 0.0f;
+ return fabs(calculateSignedArea(baryTri) / signedArea);
}
// CHECKITOUT
@@ -73,6 +75,13 @@ __host__ __device__ static
glm::vec3 calculateBarycentricCoordinate(const glm::vec3 tri[3], glm::vec2 point) {
float beta = calculateBarycentricCoordinateValue(glm::vec2(tri[0].x, tri[0].y), point, glm::vec2(tri[2].x, tri[2].y), tri);
float gamma = calculateBarycentricCoordinateValue(glm::vec2(tri[0].x, tri[0].y), glm::vec2(tri[1].x, tri[1].y), point, tri);
+ //if (beta < 0.0f)
+ // beta = 0.f;
+ //if (gamma < 0.0f)
+ // gamma = 0.f;
+ if (beta > 1.0f) return glm::vec3(0.0f, 1.0f, 0.0f);
+ else if (gamma > 1.0f) return glm::vec3(0.0f, 0.0f, 1.0f);
+ else if (beta + gamma >1.0f) return glm::vec3(0.0f, beta, gamma);
float alpha = 1.0 - beta - gamma;
return glm::vec3(alpha, beta, gamma);
}
@@ -99,3 +108,57 @@ float getZAtCoordinate(const glm::vec3 barycentricCoord, const glm::vec3 tri[3])
+ barycentricCoord.y * tri[1].z
+ barycentricCoord.z * tri[2].z);
}
+
+__host__ __device__ static
+glm::vec3 getTextureColor(unsigned char *pTex, glm::vec2 texcoord, int w, int h, int component) {
+ int x = 0.5f + (w - 1.f) * texcoord.x;
+ int y = 0.5f + (h - 1.f) * texcoord.y;
+ float scale = 1.0f / 255.0f;
+ int index = x + y * w;
+ //if (index < 0 || index >= w * h)
+ // printf("%d %d\n",index,w*h);
+ //if (index < 0) index = 0;
+ //else if (index >= w * h) index = w * h - 1;
+
+ return scale * glm::vec3(pTex[index * component],
+ pTex[index * component + 1],
+ pTex[index * component + 2]);
+}
+
+__host__ __device__ static
+ glm::vec3 getBilinearTextureColor(unsigned char *pTex, glm::vec2 texcoord, int w, int h, int component) {
+ float u = (w - 1.f) * texcoord.x;
+ float v = (h - 1.f) * texcoord.y;
+ int x = floor(u);
+ int y = floor(v);
+ float u_ratio = u - x;
+ float v_ratio = v - y;
+ float u_oppsite = 1 - u_ratio;
+ float v_oppsite = 1 - v_ratio;
+ int xNext = x + 1;
+ int yNext = y + 1;
+ if (xNext >= w)
+ xNext = w - 1;
+ if (yNext >= h)
+ yNext = h - 1;
+ int index0 = x + y * w;
+ int index1 = xNext + y * w;
+ int index2 = x + yNext * w;
+ int index3 = xNext + yNext * w;
+
+ glm::vec3 c0(pTex[index0 * component], pTex[index0 * component + 1], pTex[index0 * component + 2]);
+ glm::vec3 c1(pTex[index1 * component], pTex[index1 * component + 1], pTex[index1 * component + 2]);
+ glm::vec3 c2(pTex[index2 * component], pTex[index2 * component + 1], pTex[index2 * component + 2]);
+ glm::vec3 c3(pTex[index3 * component], pTex[index3 * component + 1], pTex[index3 * component + 2]);
+
+ float scale = 1.0f / 255.0f;
+ //if (index < 0 || index >= w * h)
+ // printf("%d %d\n",index,w*h);
+ //if (index < 0) index = 0;
+ //else if (index >= w * h) index = w * h - 1;
+ //return scale * c0;
+ return scale * ((c0 * u_oppsite + c1 * u_ratio) * v_oppsite + (c2 * u_oppsite + c3 * u_ratio) * v_ratio);
+}
+
+#define min(a,b) ((a) < (b) ? (a) : (b))
+#define max(a,b) ((a) > (b) ? (a) : (b))
\ No newline at end of file