diff --git a/MyReadme.txt b/MyReadme.txt new file mode 100644 index 0000000..bb5c12c --- /dev/null +++ b/MyReadme.txt @@ -0,0 +1,21 @@ +*** Ray Tracer *** + +Implementation Details +The goal was to create a ray tracer on the GPU using CUDA. + +I initially thought of implrmenting parallelism using pixels directly. But then I realized then it would be a simple loop within the kernel function executing the recursion. So I tried packing up the rays and creating threads based on how many rays were currently available. This greatly decreased the number of threads being used as most of the diffuse surfaces just returned color without any reflections or refraction. This also helped me implement refraction as each ray could now give rise to 2 rays. + +The bad part was that I did the compaction of the list on the CPU and transferred it back. So I was gaining memory by releasing threads, but I was loosing time as I was going back to the CPU. I did not get time to implement the stream compaction on the GPU as that would have been the ideal solution. Maybe will try it in the path tracer. + + +Features implemented: +* Diffuse Shading +* Phong's specular shading +* Fresnel's equation to calculate transmittance and reflectance using the refractive index. +* Specular Reflection +* Refraction +* Anti-aliasing + +Blog: +http://cudaraytracer.blogspot.com/ + diff --git a/PROJ1_MAC/bin/565raytracer b/PROJ1_MAC/bin/565raytracer index fb8e247..9f925f8 100755 Binary files a/PROJ1_MAC/bin/565raytracer and b/PROJ1_MAC/bin/565raytracer differ diff --git a/PROJ1_WIN/565Raytracer/565Raytracer.vcxproj b/PROJ1_WIN/565Raytracer/565Raytracer.vcxproj index fcc853d..75def98 100755 --- a/PROJ1_WIN/565Raytracer/565Raytracer.vcxproj +++ b/PROJ1_WIN/565Raytracer/565Raytracer.vcxproj @@ -92,6 +92,7 @@ $(ProjectDir)$(Platform)/$(Configuration)/%(Filename)%(Extension).obj C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.0\include;C:/ProgramData/NVIDIA Corporation/NVIDIA GPU Computing SDK 4.0/C/common/inc;../shared/glew/includes;../shared/freeglut/includes + compute_20,sm_20 @@ -117,6 +118,7 @@ $(ProjectDir)$(Platform)/$(Configuration)/%(Filename)%(Extension).obj C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.0\include;C:/ProgramData/NVIDIA Corporation/NVIDIA GPU Computing SDK 4.0/C/common/inc;../shared/glew/includes;../shared/freeglut/includes + compute_20,sm_20 diff --git a/PROJ1_WIN/565Raytracer/565Raytracer.vcxproj.user b/PROJ1_WIN/565Raytracer/565Raytracer.vcxproj.user index d7ca222..8a16524 100755 --- a/PROJ1_WIN/565Raytracer/565Raytracer.vcxproj.user +++ b/PROJ1_WIN/565Raytracer/565Raytracer.vcxproj.user @@ -1,7 +1,11 @@  - scene="../../scenes/sampleScene.txt" + scene=../../scenes/sampleScene_1.txt + WindowsLocalDebugger + + + scene=../../scenes/sampleScene.txt WindowsLocalDebugger \ No newline at end of file diff --git a/PROJ1_WIN/565Raytracer/renders/AntiAlias.PNG b/PROJ1_WIN/565Raytracer/renders/AntiAlias.PNG new file mode 100644 index 0000000..77dd464 Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/AntiAlias.PNG differ diff --git a/PROJ1_WIN/565Raytracer/renders/AntiAliasing.bmp b/PROJ1_WIN/565Raytracer/renders/AntiAliasing.bmp new file mode 100644 index 0000000..ab2653b Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/AntiAliasing.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/Diffuse.bmp b/PROJ1_WIN/565Raytracer/renders/Diffuse.bmp new file mode 100644 index 0000000..6803d43 Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/Diffuse.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/DiffuseShading.bmp b/PROJ1_WIN/565Raytracer/renders/DiffuseShading.bmp new file mode 100644 index 0000000..0b2468c Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/DiffuseShading.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/Final.bmp b/PROJ1_WIN/565Raytracer/renders/Final.bmp new file mode 100644 index 0000000..92653cd Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/Final.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/Final_AA_Reflection.bmp b/PROJ1_WIN/565Raytracer/renders/Final_AA_Reflection.bmp new file mode 100644 index 0000000..dcbd69f Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/Final_AA_Reflection.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/Final_NonAA_ReflectSphere.bmp b/PROJ1_WIN/565Raytracer/renders/Final_NonAA_ReflectSphere.bmp new file mode 100644 index 0000000..b08f581 Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/Final_NonAA_ReflectSphere.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/FlatShading.bmp b/PROJ1_WIN/565Raytracer/renders/FlatShading.bmp new file mode 100644 index 0000000..7b2cf34 Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/FlatShading.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/Reflection_Refraction_1.3.bmp b/PROJ1_WIN/565Raytracer/renders/Reflection_Refraction_1.3.bmp new file mode 100644 index 0000000..eec2eb3 Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/Reflection_Refraction_1.3.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/Reflection_Refraction_2.2.bmp b/PROJ1_WIN/565Raytracer/renders/Reflection_Refraction_2.2.bmp new file mode 100644 index 0000000..4332718 Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/Reflection_Refraction_2.2.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/Refraction.bmp b/PROJ1_WIN/565Raytracer/renders/Refraction.bmp new file mode 100644 index 0000000..417bd0c Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/Refraction.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/Running.PNG b/PROJ1_WIN/565Raytracer/renders/Running.PNG new file mode 100644 index 0000000..7503f7c Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/Running.PNG differ diff --git a/PROJ1_WIN/565Raytracer/renders/Specular.bmp b/PROJ1_WIN/565Raytracer/renders/Specular.bmp new file mode 100644 index 0000000..3b89b47 Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/Specular.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/TransparentShadow.bmp b/PROJ1_WIN/565Raytracer/renders/TransparentShadow.bmp new file mode 100644 index 0000000..b7f3f49 Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/TransparentShadow.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/sampleScene1.0.bmp b/PROJ1_WIN/565Raytracer/renders/sampleScene1.0.bmp new file mode 100644 index 0000000..4d8a55b Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/sampleScene1.0.bmp differ diff --git a/PROJ1_WIN/565Raytracer/renders/sampleScene2.0 - Copy.bmp b/PROJ1_WIN/565Raytracer/renders/sampleScene2.0 - Copy.bmp new file mode 100644 index 0000000..12f93cb Binary files /dev/null and b/PROJ1_WIN/565Raytracer/renders/sampleScene2.0 - Copy.bmp differ diff --git a/ProjectRunning.PNG b/ProjectRunning.PNG new file mode 100644 index 0000000..7503f7c Binary files /dev/null and b/ProjectRunning.PNG differ diff --git a/scenes/sampleScene.txt b/scenes/sampleScene.txt index 936135b..72da5a3 100755 --- a/scenes/sampleScene.txt +++ b/scenes/sampleScene.txt @@ -36,7 +36,7 @@ EMITTANCE 0 MATERIAL 3 //red glossy RGB .63 .06 .04 -SPECEX 0 +SPECEX 10 SPECRGB 1 1 1 REFL 0 REFR 0 @@ -109,8 +109,8 @@ EMITTANCE 15 CAMERA RES 800 800 FOVY 25 -ITERATIONS 5000 -FILE renders/sampleScene.bmp +ITERATIONS 1 +FILE renders/sampleScene1.bmp frame 0 EYE 0 4.5 12 VIEW 0 0 -1 diff --git a/src/interactions.h b/src/interactions.h index e18cfff..a077c10 100755 --- a/src/interactions.h +++ b/src/interactions.h @@ -40,22 +40,56 @@ __host__ __device__ bool calculateScatterAndAbsorption(ray& r, float& depth, Ab //TODO (OPTIONAL): IMPLEMENT THIS FUNCTION __host__ __device__ glm::vec3 calculateTransmissionDirection(glm::vec3 normal, glm::vec3 incident, float incidentIOR, float transmittedIOR) { - return glm::vec3(0,0,0); + float n12 = incidentIOR/transmittedIOR; + + float cos1 = glm::dot(normal, glm::vec3(-incident.x, -incident.y, -incident.z)); + float rootValue = 1 - pow(n12,2)*(1.0f-pow(cos1,2)); + if (rootValue < 0) + { + return calculateReflectionDirection(normal, incident); + } + + if (cos1 > 0.0) + return glm::normalize(normal*(n12*cos1 - sqrt(rootValue)) + incident*n12); + else + return glm::normalize(normal*(-n12*cos1 + sqrt(rootValue)) + incident*n12); } //TODO (OPTIONAL): IMPLEMENT THIS FUNCTION __host__ __device__ glm::vec3 calculateReflectionDirection(glm::vec3 normal, glm::vec3 incident) { //nothing fancy here - return glm::vec3(0,0,0); + // Rr = Ri - 2N(Ri.N) + float dotProd = glm::dot(incident, normal); + return glm::normalize(incident - normal*2.0f*dotProd); } //TODO (OPTIONAL): IMPLEMENT THIS FUNCTION -__host__ __device__ Fresnel calculateFresnel(glm::vec3 normal, glm::vec3 incident, float incidentIOR, float transmittedIOR, glm::vec3 reflectionDirection, glm::vec3 transmissionDirection) { - Fresnel fresnel; - - fresnel.reflectionCoefficient = 1; - fresnel.transmissionCoefficient = 0; - return fresnel; +__host__ __device__ Fresnel calculateFresnel(glm::vec3 normal, glm::vec3 incident, float incidentIOR, float transmittedIOR) { + Fresnel fresnel; + + float cosIncidence = abs(glm::dot(incident, normal)); + float sinIncidence = sqrt(1-pow(cosIncidence,2)); + + if (transmittedIOR > 0.0 && incidentIOR > 0) + { + float commonNumerator = sqrt(1-pow(((incidentIOR/transmittedIOR)*sinIncidence),2)); + float RsNumerator = incidentIOR*cosIncidence-transmittedIOR*commonNumerator; + float RsDenominator = incidentIOR*cosIncidence+transmittedIOR*commonNumerator; + float Rs = pow((RsNumerator/RsDenominator),2); + + float RpNumerator = (incidentIOR * commonNumerator) - (transmittedIOR * cosIncidence); + float RpDenominator = (incidentIOR * commonNumerator) + (transmittedIOR * cosIncidence); + float Rp = pow((RpNumerator/RpDenominator),2); + + fresnel.reflectionCoefficient = (Rs + Rp)/2.0; + fresnel.transmissionCoefficient = 1 - fresnel.reflectionCoefficient; + } + else + { + fresnel.reflectionCoefficient = 1; + fresnel.transmissionCoefficient = 0; + } + return fresnel; } //LOOK: This function demonstrates cosine weighted random direction generation in a sphere! @@ -90,7 +124,18 @@ __host__ __device__ glm::vec3 calculateRandomDirectionInHemisphere(glm::vec3 nor //Now that you know how cosine weighted direction generation works, try implementing non-cosine (uniform) weighted random direction generation. //This should be much easier than if you had to implement calculateRandomDirectionInHemisphere. __host__ __device__ glm::vec3 getRandomDirectionInSphere(float xi1, float xi2) { - return glm::vec3(0,0,0); + float z = 1.0f - 2.0f*xi1; + float temp = 1.0f - z*z; + float r; + if (temp < 0.0f) + r = 0.0f; + else + r = sqrtf(temp); + + float phi = 2.f * PI * xi2; + float x = r * cosf(phi); + float y = r * sinf(phi); + return glm::normalize(glm::vec3(x, y, z)); } //TODO (PARTIALLY OPTIONAL): IMPLEMENT THIS FUNCTION diff --git a/src/intersections.h b/src/intersections.h index 2d9dcc8..7e96751 100755 --- a/src/intersections.h +++ b/src/intersections.h @@ -14,11 +14,13 @@ //Some forward declarations __host__ __device__ glm::vec3 getPointOnRay(ray r, float t); +__host__ __device__ glm::vec3 getPointOnRayUnnormalized(ray r, float t); __host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec4 v); __host__ __device__ glm::vec3 getSignOfRay(ray r); __host__ __device__ glm::vec3 getInverseDirectionOfRay(ray r); __host__ __device__ float boxIntersectionTest(staticGeom sphere, ray r, glm::vec3& intersectionPoint, glm::vec3& normal); __host__ __device__ float sphereIntersectionTest(staticGeom sphere, ray r, glm::vec3& intersectionPoint, glm::vec3& normal); +__host__ __device__ float findIntersection(staticGeom geom, ray r, glm::vec3& intersectionPoint, glm::vec3& normal); __host__ __device__ glm::vec3 getRandomPointOnCube(staticGeom cube, float randomSeed); //Handy dandy little hashing function that provides seeds for random number generation @@ -46,6 +48,10 @@ __host__ __device__ glm::vec3 getPointOnRay(ray r, float t){ return r.origin + float(t-.0001)*glm::normalize(r.direction); } +__host__ __device__ glm::vec3 getPointOnRayUnnormalized(ray r, float t){ + return r.origin + r.direction*t; +} + //LOOK: This is a custom function for multiplying cudaMat4 4x4 matrixes with vectors. //This is a workaround for GLM matrix multiplication not working properly on pre-Fermi NVIDIA GPUs. //Multiplies a cudaMat4 matrix and a vec4 and returns a vec3 clipped from the vec4 @@ -71,8 +77,188 @@ __host__ __device__ glm::vec3 getSignOfRay(ray r){ //TODO: IMPLEMENT THIS FUNCTION //Cube intersection test, return -1 if no intersection, otherwise, distance to intersection __host__ __device__ float boxIntersectionTest(staticGeom box, ray r, glm::vec3& intersectionPoint, glm::vec3& normal){ + + glm::vec3 P0 = multiplyMV(box.inverseTransform, glm::vec4(r.origin, 1.0f)); + glm::vec3 V0 = multiplyMV(box.inverseTransform, glm::vec4(r.direction, 0.0f)); + + ray rt; + rt.origin = P0; + rt.direction = V0; - return -1; + float xmin = -0.5, xmax = 0.5; + float ymin = -0.5, ymax = 0.5; + float zmin = -0.5, zmax = 0.5; + + float tFar = 999999; //std::numeric_limits::max(); + float tNear = -999999;//std::numeric_limits::min(); + + float t1, t2; + + // For the X planes + if (rt.direction.x == 0) + { + // Ray is || to x-axis + // The light point should be in between the xmin and xmax bounds. Else it doesn't intersect + if (rt.origin.x < xmin || rt.origin.x > xmax) + { + return -1; + } + } + else + { + // T1 = (Xl - Xo) / Xd + t1 = (xmin - rt.origin.x)/rt.direction.x; + + // T2 = (Xh - Xo) / Xd + t2 = (xmax - rt.origin.x)/rt.direction.x; + + // If T1 > T2 swap (T1, T2) /* since T1 intersection with near plane */ + if (t1 > t2) + { + //swap t1 and t2 + double temp = t1; + t1 = t2; + t2 = temp; + } + + // If T1 > Tnear set Tnear =T1 /* want largest Tnear */ + if (t1 > tNear) + tNear = t1; + + // If T2 < Tfar set Tfar="T2" /* want smallest Tfar */ + if (t2 < tFar) + tFar = t2; + + // If Tnear > Tfar box is missed so return false + if (tNear > tFar) + return -1; + + // If Tfar < 0 box is behind ray return false end + if (tFar < 0) + return -1; + } + + // For the Y planes + if (rt.direction.y == 0) + { + // Ray is || to y-axis + // The light point should be in between the ymin and ymax bounds. Else it doesn't intersect + if (rt.origin.y < ymin || rt.origin.y > ymax) + { + return -1; + } + } + else + { + // T1 = (Yl - Yo) / Yd + t1 = (ymin - rt.origin.y)/rt.direction.y; + + // T2 = (Yh - Yo) / Yd + t2 = (ymax - rt.origin.y)/rt.direction.y; + + // If T1 > T2 swap (T1, T2) /* since T1 intersection with near plane */ + if (t1 > t2) + { + //swap t1 and t2 + double temp = t1; + t1 = t2; + t2 = temp; + } + + // If T1 > Tnear set Tnear =T1 /* want largest Tnear */ + if (t1 > tNear) + tNear = t1; + + // If T2 < Tfar set Tfar="T2" /* want smallest Tfar */ + if (t2 < tFar) + tFar = t2; + + // If Tnear > Tfar box is missed so return false + if (tNear > tFar) + return -1; + + // If Tfar < 0 box is behind ray return false end + if (tFar < 0) + return -1; + } + + // For the Z planes + if (rt.direction.z == 0) + { + // Ray is || to z-axis + // The light point should be in between the zmin and zmax bounds. Else it doesn't intersect + if (rt.origin.z < zmin || rt.origin.z > zmax) + { + return -1; + } + } + else + { + // T1 = (Zl - Zo) / Zd + t1 = (zmin - rt.origin.z)/rt.direction.z; + + // T2 = (Zh - Zo) / Zd + t2 = (zmax - rt.origin.z)/rt.direction.z; + + // If T1 > T2 swap (T1, T2) /* since T1 intersection with near plane */ + if (t1 > t2) + { + //swap t1 and t2 + double temp = t1; + t1 = t2; + t2 = temp; + } + + // If T1 > Tnear set Tnear =T1 /* want largest Tnear */ + if (t1 > tNear) + tNear = t1; + + // If T2 < Tfar set Tfar="T2" /* want smallest Tfar */ + if (t2 < tFar) + tFar = t2; + + // If Tnear > Tfar box is missed so return false + if (tNear > tFar) + return -1; + + // If Tfar < 0 box is behind ray return false end + if (tFar < 0) + return -1; + } + + // Box survived all above tests, return with intersection point Tnear and exit point Tfar. + double t; + if (abs(tNear) < 1e-3) + { + if (abs(tFar) < 1e-3) // on the surface + return -1; + t = tFar; + } + else + { + t = tNear; + } + + glm::vec3 p = getPointOnRayUnnormalized(rt, t); + glm::vec4 surNormalTemp = glm::vec4(0.0,0.0,0.0,0.0); + if (p.x <= xmin+(1e-3) && p.x >= xmin-(1e-3)) + surNormalTemp.x = -1; + if (p.y <= ymin+(1e-3) && p.y >= ymin-(1e-3)) + surNormalTemp.y = -1; + if (p.z <= zmin+(1e-3) && p.z >= zmin-(1e-3)) + surNormalTemp.z = -1; + if (p.x <= xmax+(1e-3) && p.x >= xmax-(1e-3)) + surNormalTemp.x = 1; + if (p.y <= ymax+(1e-3) && p.y >= ymax-(1e-3)) + surNormalTemp.y = 1; + if (p.z <= zmax+(1e-3) && p.z >= zmax-(1e-3)) + surNormalTemp.z = 1; + normal = multiplyMV(box.inverseTransposeTransform, surNormalTemp); + normal = glm::normalize(normal); + + intersectionPoint = getPointOnRay(r, t); + //intersectionPoint = multiplyMV(box.transform, glm::vec4(getPointOnRay(rt, t), 1.0)); + return t; } //LOOK: Here's an intersection test example from a sphere. Now you just need to figure out cube and, optionally, triangle. @@ -101,10 +287,34 @@ __host__ __device__ float sphereIntersectionTest(staticGeom sphere, ray r, glm: if (t1 < 0 && t2 < 0) { return -1; } else if (t1 > 0 && t2 > 0) { - t = min(t1, t2); + if (t1 > 0.001) + { + if (t2 > 0.001) + { + t = min(t1, t2); + } + else + { + t = t1; + } + } + else + { + if (t2 > 0.001) + { + t = t2; + } + else + { + t = -1; + } + } } else { t = max(t1, t2); - } + } + + if(t<0.001) + return -1; glm::vec3 realIntersectionPoint = multiplyMV(sphere.transform, glm::vec4(getPointOnRay(rt, t), 1.0)); glm::vec3 realOrigin = multiplyMV(sphere.transform, glm::vec4(0,0,0,1)); @@ -115,6 +325,22 @@ __host__ __device__ float sphereIntersectionTest(staticGeom sphere, ray r, glm: return glm::length(r.origin - realIntersectionPoint); } +// An intersection function calling into each of the individual object intersection functions +__host__ __device__ float findIntersection(staticGeom geom, ray r, glm::vec3& intersectionPoint, glm::vec3& normal){ + switch (geom.type) + { + case GEOMTYPE::SPHERE: + return sphereIntersectionTest(geom, r, intersectionPoint, normal); + case GEOMTYPE::CUBE: + return boxIntersectionTest(geom, r, intersectionPoint, normal); + case GEOMTYPE::MESH: + // TODO - To Be implemented + return -1; + default: + return -1; + } +} + //returns x,y,z half-dimensions of tightest bounding box __host__ __device__ glm::vec3 getRadiuses(staticGeom geom){ glm::vec3 origin = multiplyMV(geom.transform, glm::vec4(0,0,0,1)); @@ -176,8 +402,43 @@ __host__ __device__ glm::vec3 getRandomPointOnCube(staticGeom cube, float random //TODO: IMPLEMENT THIS FUNCTION //Generates a random point on a given sphere __host__ __device__ glm::vec3 getRandomPointOnSphere(staticGeom sphere, float randomSeed){ + thrust::default_random_engine rng(hash(randomSeed)); + thrust::uniform_real_distribution u01(-0.5, 0.5); + thrust::uniform_real_distribution u02(0.0,TWO_PI); + + // z = R sin(theta), theta = asin(z/R). + // Here R = 0.5 + float radius = 0.5f; + float z = (float)u01(rng); + float theta = asin(z/radius); + float phi = (float)u02(rng); + + glm::vec3 point; + // x = R cos(theta)cos(phi) + // y = R cos(theta)sin(phi) + point.x = radius*cos(theta)*cos(phi); + point.y = radius*cos(theta)*sin(phi); + point.z = z; + + glm::vec3 randPoint = multiplyMV(sphere.transform, glm::vec4(point,1.0f)); + return randPoint; +} + +__host__ __device__ glm::vec3 getRandomPointOnObject(staticGeom geom, float randomSeed) +{ - return glm::vec3(0,0,0); + switch (geom.type) + { + case GEOMTYPE::SPHERE: + return getRandomPointOnSphere(geom, randomSeed); + case GEOMTYPE::CUBE: + return getRandomPointOnCube(geom, randomSeed); + case GEOMTYPE::MESH: + // TODO - To Be implemented + return glm::vec3(0,0,0); + default: + return glm::vec3(0,0,0); + } } #endif \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 4e94892..6e46430 100755 --- a/src/main.cpp +++ b/src/main.cpp @@ -113,20 +113,29 @@ void runCuda(){ //pack geom and material arrays geom* geoms = new geom[renderScene->objects.size()]; material* materials = new material[renderScene->materials.size()]; - - for(int i=0; iobjects.size(); i++){ + PointLight* pointLights = new PointLight[renderScene->pointLights.size()]; + + for(unsigned int i=0; iobjects.size(); ++i){ geoms[i] = renderScene->objects[i]; } - for(int i=0; imaterials.size(); i++){ + for(unsigned int i=0; imaterials.size(); ++i){ materials[i] = renderScene->materials[i]; } - + for(unsigned int i=0; ipointLights.size(); ++i){ + pointLights[i].position = renderScene->pointLights[i].position; + pointLights[i].color = renderScene->pointLights[i].color; + } // execute the kernel - cudaRaytraceCore(dptr, renderCam, targetFrame, iterations, materials, renderScene->materials.size(), geoms, renderScene->objects.size() ); + cudaRaytraceCore(dptr, renderCam, targetFrame, iterations, materials, renderScene->materials.size(), + geoms, renderScene->objects.size(), pointLights, renderScene->pointLights.size() ); // unmap buffer object cudaGLUnmapBufferObject(pbo); + + delete[] geoms; + delete[] materials; + delete[] pointLights; }else{ if(!finishedRender){ @@ -136,12 +145,12 @@ void runCuda(){ for(int x=0; xresolution.x; x++){ for(int y=0; yresolution.y; y++){ int index = x + (y * renderCam->resolution.x); - outputImage.writePixelRGB(x,y,renderCam->image[index]); + outputImage.writePixelRGB(x,renderCam->resolution.y-y,renderCam->image[index]); } } gammaSettings gamma; - gamma.applyGamma = true; + gamma.applyGamma = false; gamma.gamma = 1.0/2.2; gamma.divisor = renderCam->iterations; outputImage.setGammaSettings(gamma); @@ -201,7 +210,7 @@ void runCuda(){ void display(){ runCuda(); - string title = "565Raytracer | " + utilityCore::convertIntToString(iterations) + " Frames"; + string title = "Raytracer | " + utilityCore::convertIntToString(iterations) + " Frames"; glutSetWindowTitle(title.c_str()); glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo); @@ -216,6 +225,8 @@ void runCuda(){ glutPostRedisplay(); glutSwapBuffers(); + + cudaDeviceReset(); } void keyboard(unsigned char key, int x, int y) @@ -259,7 +270,7 @@ void runCuda(){ glutInit(&argc, argv); glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); glutInitWindowSize(width, height); - glutCreateWindow("565Raytracer"); + glutCreateWindow("Raytracer"); // Init GLEW glewInit(); diff --git a/src/main.h b/src/main.h index 55daf50..832373f 100755 --- a/src/main.h +++ b/src/main.h @@ -41,7 +41,7 @@ using namespace std; scene* renderScene; camera* renderCam; int targetFrame; -int iterations; +unsigned int iterations; bool finishedRender; bool singleFrameMode; diff --git a/src/raytraceKernel.cu b/src/raytraceKernel.cu index dae7b03..8892ff8 100755 --- a/src/raytraceKernel.cu +++ b/src/raytraceKernel.cu @@ -17,6 +17,8 @@ #include "interactions.h" #include +__device__ int numRaysDevice = 0; + void checkCUDAError(const char *msg) { cudaError_t err = cudaGetLastError(); if( cudaSuccess != err) { @@ -38,10 +40,27 @@ __host__ __device__ glm::vec3 generateRandomNumberFromThread(glm::vec2 resolutio //TODO: IMPLEMENT THIS FUNCTION //Function that does the initial raycast from the camera -__host__ __device__ ray raycastFromCameraKernel(glm::vec2 resolution, float time, int x, int y, glm::vec3 eye, glm::vec3 view, glm::vec3 up, glm::vec2 fov){ +__host__ __device__ ray raycastFromCameraKernel(glm::vec2 resolution, float time, float x, float y, glm::vec3 eye, glm::vec3 view, glm::vec3 up, glm::vec2 fov){ ray r; - r.origin = glm::vec3(0,0,0); - r.direction = glm::vec3(0,0,-1); + r.origin = eye; + //r.direction = glm::vec3(0,0,-1); + + glm::vec3 A = glm::normalize(glm::cross(view, up)); + glm::vec3 B = glm::normalize(glm::cross(A, view)); + + float tanVert = tan(fov.y*PI/180); + float tanHor = tan(fov.x*PI/180); + + float camDistFromScreen = (float)((resolution.y/2.0)/tanVert); + glm::vec3 C = view*camDistFromScreen; + glm::vec3 M = eye + C; + + //glm::vec3 H = A * (camDistFromScreen * tanHor); + //glm::vec3 V = B * (camDistFromScreen * tanVert); + + glm::vec3 point = M + A*(resolution.x/2.0f)*(2.0f*(x/(float)resolution.x) - 1) + B*(resolution.y/2.0f)*(2.0f*(y/(float)resolution.y) - 1); + r.direction = glm::normalize(point - eye); + return r; } @@ -60,14 +79,15 @@ __global__ void sendImageToPBO(uchar4* PBOpos, glm::vec2 resolution, glm::vec3* int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; - int index = x + (y * resolution.x); + int oldIndex = x + (y * resolution.x); + int newIndex = resolution.x-x + ((resolution.y - y) * resolution.x); if(x<=resolution.x && y<=resolution.y){ glm::vec3 color; - color.x = image[index].x*255.0; - color.y = image[index].x*255.0; - color.z = image[index].x*255.0; + color.x = image[oldIndex].x*255.0; + color.y = image[oldIndex].y*255.0; + color.z = image[oldIndex].z*255.0; if(color.x>255){ color.x = 255; @@ -82,86 +102,538 @@ __global__ void sendImageToPBO(uchar4* PBOpos, glm::vec2 resolution, glm::vec3* } // Each thread writes one pixel location in the texture (textel) - PBOpos[index].w = 0; - PBOpos[index].x = color.x; - PBOpos[index].y = color.y; - PBOpos[index].z = color.z; + PBOpos[newIndex].w = 0; + PBOpos[newIndex].x = color.x; + PBOpos[newIndex].y = color.y; + PBOpos[newIndex].z = color.z; } } +__global__ void ShowTotalRays() +{ + printf("Number: %d", numRaysDevice); +} + +__global__ void ResetTotalRaysCount() +{ + numRaysDevice = 0; +} + +/* +__device__ int CalculateTotalRays(int rayCnt) +{ + numRays = atomicAdd(&numRays, rayCnt); +}*/ + +__device__ float GetIntersectionValues(ray rayToBeTraced, material* materials, int numberOfMaterials, staticGeom* geoms, int numberOfGeoms, + int& minGeomNum, int& materialId, glm::vec3& minIntersectionPoint, glm::vec3& minNormal) +{ + glm::vec3 intersectionPoint; + glm::vec3 normal; + float t = -1; + + for (int geomNum = 0; geomNum < numberOfGeoms; ++geomNum) { + float tTemp = findIntersection(geoms[geomNum], rayToBeTraced, intersectionPoint, normal); + if (tTemp > 0.001f) { + if (t < 0 || tTemp < t) { + t = tTemp; + materialId = geoms[geomNum].materialid; + minIntersectionPoint = intersectionPoint; + minNormal = normal; + minGeomNum = geomNum; + } + } + } + + return t; +} + +__device__ glm::vec3 LightFeeler(glm::vec3 minIntersectionPoint, glm::vec3 minNormal, int minGeom, + staticGeom* geoms, int numberOfGeoms, material* materials, int numberOfMaterials, material currMaterial, glm::vec3 rayDirection, + int randNum, float reductionFactor) +{ + glm::vec3 intersectionPoint; + glm::vec3 normal; + glm::vec3 color = glm::vec3(0,0,0); + + float Ka = 0.3; // ambient light factor + float Kd = 0.8f; + float Ks = 0.8f; + + color = Ka * currMaterial.color; + + // Shadow Feeler + int lightNum = -1; + for (int lightNumber=0; lightNumber < numberOfGeoms; ++lightNumber) + { + if (materials[geoms[lightNumber].materialid].emittance > 0) + { + lightNum = lightNumber; + } + } + if (lightNum >= 0) + { + /* Create pseudo - random number generator */ + float seed = randNum * 1.0f; + int totalLightRays = 1; + for (int numLightRays = 1; numLightRays <= totalLightRays; ++numLightRays) + { + seed += 1.5f; + //glm::vec3 lightPos = getRandomPointOnObject(geoms[lightNum], seed); + glm::vec3 lightPos = glm::vec3(0,9.2,0.0f); + glm::vec3 lightColor = glm::vec3(1,1,1); + glm::vec3 lightDir = minIntersectionPoint - lightPos; + ray lightRay; + lightRay.origin = minIntersectionPoint; + lightRay.direction = glm::normalize(lightDir*(-1.0f)); + float tLightIntersect = -1; + bool blocked = false; + glm::vec3 lightColorTemp = lightColor; + for (int geomNum = 0; geomNum < numberOfGeoms; ++geomNum) + { + if (materials[geoms[geomNum].materialid].emittance > 0 && geomNum != minGeom) + { + continue; + } + else + { + tLightIntersect = findIntersection(geoms[geomNum], lightRay, intersectionPoint, normal); + if(tLightIntersect > 0) + { + if ((glm::length(lightDir) + 0.001) > tLightIntersect) + { + // Blocked by other object + if (materials[geoms[geomNum].materialid].hasRefractive > 0) + { + // Object is transparent + lightColorTemp *= (materials[geoms[geomNum].materialid].color*0.9f); + } + else + { + blocked = true; + break; + } + } + } + } + } + + lightColor = lightColorTemp; + + if (!blocked) + { + float diffuseComponent = Kd*glm::dot(glm::normalize(lightRay.direction), minNormal); + float specDotProd = glm::dot(calculateReflectionDirection(minNormal, lightRay.direction), rayDirection);//rayToBeTraced.rayValue.direction); + if (specDotProd < 0.0f) + specDotProd = 0.0f; + + float specularComponent = 0; + if (currMaterial.specularExponent > 0) + specularComponent = Ks*pow(specDotProd, currMaterial.specularExponent); + + glm::vec3 newColor = lightColor* (reductionFactor)*(currMaterial.color*diffuseComponent + currMaterial.specularColor*specularComponent)/(float)totalLightRays; + color += newColor; + } + } + } + + return color; +} + + +__device__ glm::vec3 FindNewRaysAndCalculateColor(RayInPackage rayToBeTraced, RayOutPackage& rayOutPackage, material* materials, int numberOfMaterials, + staticGeom* geoms, int numberOfGeoms, int minGeomNum, int materialId, glm::vec3 minIntersectionPoint, glm::vec3 minNormal, int randNum) +{ + glm::vec3 intersectionPoint; + glm::vec3 normal; + + rayOutPackage.numRays = 0; + rayOutPackage.color = glm::vec3(0,0,0); + rayOutPackage.reductionFactor = rayToBeTraced.reductionFactor; + + // Get the refractive index. If the ray is inside the object the refractive index is from object to air + float incidentIOR, transmittedIOR; + if (rayToBeTraced.isInside) + { + incidentIOR = materials[materialId].indexOfRefraction; + transmittedIOR = 1.0; + } + else + { + incidentIOR = 1.0; + transmittedIOR = materials[materialId].indexOfRefraction; + } + Fresnel fresnel; + fresnel.reflectionCoefficient = materials[materialId].hasReflective; // use the reflection coefficient suplied if the object has no refraction + + if (materials[materialId].hasRefractive > 0) + { + fresnel = calculateFresnel(minNormal, rayToBeTraced.rayValue.direction, incidentIOR, transmittedIOR); + // Get the refracted ray + glm::vec3 refractedRay; + refractedRay = calculateTransmissionDirection(minNormal, rayToBeTraced.rayValue.direction, incidentIOR, transmittedIOR); + rayOutPackage.isTransPresent = true; + rayOutPackage.rayValueTrans.direction = refractedRay; + rayOutPackage.rayValueTrans.origin = minIntersectionPoint; + rayOutPackage.index = rayToBeTraced.index; + ++rayOutPackage.numRays; + rayOutPackage.isInsideObject = !rayToBeTraced.isInside; + //rayOutPackage.reductionFactor *= fresnel.reflectionCoefficient; + rayOutPackage.color = materials[materialId].color*rayOutPackage.reductionFactor; + //atomicAdd(&numRaysDevice, 1); + } + + if (materials[materialId].hasReflective > 0) + { + // Get the reflected ray + glm::vec3 reflectedRay = calculateReflectionDirection(minNormal, rayToBeTraced.rayValue.direction); + rayOutPackage.isPresent = true; + rayOutPackage.rayValue.direction = reflectedRay; + rayOutPackage.rayValue.origin = minIntersectionPoint; + rayOutPackage.index = rayToBeTraced.index; + ++rayOutPackage.numRays; + rayOutPackage.reductionFactor *= fresnel.reflectionCoefficient; + rayOutPackage.color = materials[materialId].color*rayOutPackage.reductionFactor; + //atomicAdd(&numRaysDevice, 1); + } + + return LightFeeler(minIntersectionPoint, minNormal, minGeomNum, geoms, numberOfGeoms, + materials, numberOfMaterials, materials[materialId], rayToBeTraced.rayValue.direction, randNum, rayToBeTraced.reductionFactor); +} + //TODO: IMPLEMENT THIS FUNCTION //Core raytracer kernel -__global__ void raytraceRay(glm::vec2 resolution, float time, cameraData cam, int rayDepth, glm::vec3* colors, - staticGeom* geoms, int numberOfGeoms){ +__global__ void raytraceRay(glm::vec2 resolution, float time, cameraData cam, int rayDepth, float offsetX, float offsetY, glm::vec3* colors, + RayOutPackage* rayOutPackageList, material* materials, int numberOfMaterials, staticGeom* geoms, int numberOfGeoms){ - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - int index = x + (y * resolution.x); + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * resolution.x); + + int materialId; + int minGeomNum; + glm::vec3 minIntersectionPoint; + glm::vec3 minNormal; - if((x<=resolution.x && y<=resolution.y)){ + rayOutPackageList[index].rayValue = raycastFromCameraKernel(cam.resolution, time, ((float)x)+offsetX, ((float)y)+offsetY, cam.position, cam.view, cam.up, cam.fov); + rayOutPackageList[index].isPresent = false; + rayOutPackageList[index].isTransPresent = false; + rayOutPackageList[index].index = index; + rayOutPackageList[index].numRays = 0; + rayOutPackageList[index].isInsideObject = false; - colors[index] = generateRandomNumberFromThread(resolution, time, x, y); - } + float t = GetIntersectionValues(rayOutPackageList[index].rayValue, materials, numberOfMaterials, geoms, numberOfGeoms, + minGeomNum, materialId, minIntersectionPoint, minNormal); + + RayInPackage rayInPack; + rayInPack.index = index; + rayInPack.isInside = false; + rayInPack.rayValue = rayOutPackageList[index].rayValue; + rayInPack.reductionFactor = 1.0f; + if (t > 0 && materialId < numberOfMaterials) + { + colors[index] += FindNewRaysAndCalculateColor(rayInPack, rayOutPackageList[index], materials, numberOfMaterials, + geoms, numberOfGeoms, minGeomNum, materialId, minIntersectionPoint, minNormal, index); + } +} + +__global__ void RayTrace(RayInPackage* raysToBeTraced, RayOutPackage* rayOutPackageList, glm::vec3* colors, + material* materials, int numberOfMaterials, staticGeom* geoms, int numberOfGeoms) +{ + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + + float t = -1; + glm::vec3 intersectionPoint; + glm::vec3 normal; + int materialId; + int minGeomNum; + glm::vec3 minIntersectionPoint; + glm::vec3 minNormal; + + t = GetIntersectionValues(raysToBeTraced[index].rayValue, materials, numberOfMaterials, geoms, numberOfGeoms, + minGeomNum, materialId, minIntersectionPoint, minNormal); + + if (t > 0 && materialId < numberOfMaterials) + { + colors[raysToBeTraced[index].index] += FindNewRaysAndCalculateColor(raysToBeTraced[index], rayOutPackageList[index], materials, numberOfMaterials, + geoms, numberOfGeoms, minGeomNum, materialId, minIntersectionPoint, minNormal, index); + } +} + +__global__ void ResetColors(glm::vec3* color, glm::vec2 resolution) +{ + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * resolution.x); + + color[index] = glm::vec3(0,0,0); +} + +__global__ void CalculateColorsForAntiAliasing(glm::vec3* finalColor, glm::vec3* color, float weight, glm::vec2 resolution) +{ + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * resolution.x); + + finalColor[index] += color[index]*weight; +} + +__global__ void ClampColors(glm::vec3* color, glm::vec2 resolution) +{ + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * resolution.x); + + if (color[index].x > 1) + color[index].x = 1; + if (color[index].y > 1) + color[index].y = 1; + if (color[index].z > 1) + color[index].z = 1; + + if (color[index].x < 0) + color[index].x = 0; + if (color[index].y < 0) + color[index].y = 0; + if (color[index].z < 0) + color[index].z = 0; } //TODO: FINISH THIS FUNCTION // Wrapper for the __global__ call that sets up the kernel calls and does a ton of memory management -void cudaRaytraceCore(uchar4* PBOpos, camera* renderCam, int frame, int iterations, material* materials, int numberOfMaterials, geom* geoms, int numberOfGeoms){ - - int traceDepth = 1; //determines how many bounces the raytracer traces +void cudaRaytraceCore(uchar4* PBOpos, camera* renderCam, int frame, int iterations, material* materials, int numberOfMaterials, + geom* geoms, int numberOfGeoms, PointLight* pointLights, int numberOfPointLights){ - // set up crucial magic - int tileSize = 8; - dim3 threadsPerBlock(tileSize, tileSize); - dim3 fullBlocksPerGrid((int)ceil(float(renderCam->resolution.x)/float(tileSize)), (int)ceil(float(renderCam->resolution.y)/float(tileSize))); - - //send image to GPU - glm::vec3* cudaimage = NULL; - cudaMalloc((void**)&cudaimage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3)); - cudaMemcpy( cudaimage, renderCam->image, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyHostToDevice); - - //package geometry and materials and sent to GPU - staticGeom* geomList = new staticGeom[numberOfGeoms]; - for(int i=0; iresolution; - cam.position = renderCam->positions[frame]; - cam.view = renderCam->views[frame]; - cam.up = renderCam->ups[frame]; - cam.fov = renderCam->fov; + //int deviceID = -1; + // if(cudaSuccess == cudaGetDevice(&deviceID)) + // { + // cudaDeviceProp devprop; + // cudaGetDeviceProperties(&devprop, deviceID); + // std::cout << "Thread Per Block: " << devprop.maxThreadsPerBlock << std::endl; + //} + + float offsetX = 0; + float offsetY = 0; + glm::vec3* cudaFinalImage = NULL; + cudaMalloc((void**)&cudaFinalImage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3)); + int antiAliasIter = 1; + + // set up crucial magic + int tileSize = 8; + dim3 threadsPerBlock(tileSize, tileSize); + dim3 fullBlocksPerGrid((int)ceil(float(renderCam->resolution.x)/float(tileSize)), (int)ceil(float(renderCam->resolution.y)/float(tileSize))); + + ResetColors<<>>(cudaFinalImage, renderCam->resolution); + + bool antiAlias = false; + while (antiAliasIter <= 5) + { + float weight; + if (antiAlias) + { + if (antiAliasIter == 1) + { + offsetX = 0; + offsetY = 0; + weight = 1.0f/2.0f; + } + else + { + weight = 1.0f/8.0f; + } + + if (antiAliasIter == 2) + { + offsetX = -0.33f;//(((float)(rand()%100)+0.01f)/200.0f); + offsetY = -0.33f;//(((float)(rand()%100)+0.01f)/200.0f); + } + else if (antiAliasIter == 3) + { + offsetX = -0.33f;//(((float)(rand()%100)+0.01f)/200.0f); + offsetY = 0.33f;//(((float)(rand()%100)+0.01f)/200.0f); + } + else if (antiAliasIter == 4) + { + offsetX = 0.33f;//(((float)(rand()%100)+0.01f)/200.0f); + offsetY = -0.33f;//(((float)(rand()%100)+0.01f)/200.0f); + } + else if (antiAliasIter == 5) + { + offsetX = 0.33f;//(((float)(rand()%100)+0.01f)/200.0f); + offsetY = 0.33f;//(((float)(rand()%100)+0.01f)/200.0f); + } + + ++antiAliasIter; + } + else + { + antiAliasIter = 6; + weight = 1.0; + } + + int traceDepth = 1; //determines how many bounces the raytracer traces + + //send image to GPU + glm::vec3* cudaimage = NULL; + cudaMalloc((void**)&cudaimage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3)); + //cudaMemcpy( cudaimage, renderCam->image, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyHostToDevice); + ResetColors<<>>(cudaimage, renderCam->resolution); + + // Create a rayPackage + // The rayPackage struct will store the ray details which can be then called using the kernel to be parallely processed + int totalPixels = (int)renderCam->resolution.x*(int)renderCam->resolution.y; + RayOutPackage* rayPackReset = new RayOutPackage[totalPixels]; + for (int cnt=0; cntresolution; + cam.position = renderCam->positions[frame]; + cam.view = renderCam->views[frame]; + cam.up = renderCam->ups[frame]; + cam.fov = renderCam->fov; + + + RayOutPackage* rayOutPackageList = NULL; + cudaMalloc((void**)&rayOutPackageList, totalPixels*sizeof(RayOutPackage)); + cudaMemcpy(rayOutPackageList, rayPackReset, totalPixels*sizeof(RayOutPackage), cudaMemcpyHostToDevice); + + //kernel launches + raytraceRay<<>>(renderCam->resolution, (float)iterations, cam, traceDepth, offsetX, offsetY, + cudaimage, rayOutPackageList, cudaMaterials, numberOfMaterials, cudageoms, numberOfGeoms); + ++traceDepth; + + // Loop through rays + int maxRayDepth = 10; + int totalRays = totalPixels; + while (traceDepth <= maxRayDepth && totalRays > 0) + { + RayOutPackage* rayPack = new RayOutPackage[totalRays]; + cudaMemcpy( rayPack, rayOutPackageList, totalRays*sizeof(RayOutPackage), cudaMemcpyDeviceToHost); + + RayInPackage* rayTracedHost = new RayInPackage[totalRays]; + int rayIndex = 0; + for (int cnt=0; cnt 0) + { + if (rayPack[cnt].isPresent) + { + rayTracedHost[rayIndex].rayValue = rayPack[cnt].rayValue; + rayTracedHost[rayIndex].index = rayPack[cnt].index; + rayTracedHost[rayIndex].isInside = false; + rayTracedHost[rayIndex].reductionFactor = rayPack[cnt].reductionFactor; + ++rayIndex; + } + if (rayPack[cnt].isTransPresent) + { + rayTracedHost[rayIndex].rayValue = rayPack[cnt].rayValueTrans; + rayTracedHost[rayIndex].index = rayPack[cnt].index; + rayTracedHost[rayIndex].isInside = rayPack[cnt].isInsideObject; + rayTracedHost[rayIndex].reductionFactor = rayPack[cnt].reductionFactor; + ++rayIndex; + } + } + } + + if (rayIndex <= 0) + { + // Break out of here as there are no more rays to be processed + delete[] rayTracedHost; + delete[] rayPack; + break; + } + + RayInPackage* raysToBeTraced = NULL; + cudaMalloc((void**)&raysToBeTraced, rayIndex*sizeof(RayInPackage)); + cudaMemcpy(raysToBeTraced, rayTracedHost, rayIndex*sizeof(RayInPackage), cudaMemcpyHostToDevice); + + cudaFree(rayOutPackageList); + cudaMalloc((void**)&rayOutPackageList, rayIndex*sizeof(RayOutPackage)); + cudaMemcpy(rayOutPackageList, rayPackReset, rayIndex*sizeof(RayOutPackage), cudaMemcpyHostToDevice); + + tileSize = 16; + int threadsPerBlockForRays = tileSize; + int fullBlocksPerGridForRays = ceil(float(rayIndex)/float(tileSize)); + ResetTotalRaysCount<<<1,1>>>(); + RayTrace<<>>(raysToBeTraced, rayOutPackageList, + cudaimage, cudaMaterials, numberOfMaterials, cudageoms, numberOfGeoms); + ++traceDepth; + totalRays = rayIndex; + + cudaFree(raysToBeTraced); + delete[] rayPack; + delete[] rayTracedHost; + } + + CalculateColorsForAntiAliasing<<>>(cudaFinalImage, cudaimage, weight, renderCam->resolution); + + cudaFree( rayOutPackageList); + cudaFree( cudaimage ); + cudaFree( cudageoms ); + cudaFree( cudaMaterials ); + cudaFree( cudaPointLights ); + delete[] geomList; + delete[] rayPackReset; + + // make certain the kernel has completed + cudaThreadSynchronize(); + } - //kernel launches - raytraceRay<<>>(renderCam->resolution, (float)iterations, cam, traceDepth, cudaimage, cudageoms, numberOfGeoms); + ClampColors<<>>(cudaFinalImage, renderCam->resolution); - sendImageToPBO<<>>(PBOpos, renderCam->resolution, cudaimage); + sendImageToPBO<<>>(PBOpos, renderCam->resolution, cudaFinalImage); - //retrieve image from GPU - cudaMemcpy( renderCam->image, cudaimage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyDeviceToHost); + //retrieve image from GPU + cudaMemcpy( renderCam->image, cudaFinalImage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyDeviceToHost); - //free up stuff, or else we'll leak memory like a madman - cudaFree( cudaimage ); - cudaFree( cudageoms ); - delete geomList; + //int a; + //std::cin >> a; + //free up stuff, or else we'll leak memory like a madman + + cudaFree( cudaFinalImage ); - // make certain the kernel has completed - cudaThreadSynchronize(); + // make certain the kernel has completed + cudaThreadSynchronize(); - checkCUDAError("Kernel failed!"); + checkCUDAError("Kernel failed!"); } diff --git a/src/raytraceKernel.h b/src/raytraceKernel.h index 331e5ce..f1402ef 100755 --- a/src/raytraceKernel.h +++ b/src/raytraceKernel.h @@ -15,6 +15,7 @@ #include "sceneStructs.h" #include -void cudaRaytraceCore(uchar4* pos, camera* renderCam, int frame, int iterations, material* materials, int numberOfMaterials, geom* geoms, int numberOfGeoms); +void cudaRaytraceCore(uchar4* pos, camera* renderCam, int frame, int iterations, material* materials, int numberOfMaterials, + geom* geoms, int numberOfGeoms, PointLight* pointLights, int numberOfPointLights); #endif diff --git a/src/scene.cpp b/src/scene.cpp index f0384b2..07220f2 100755 --- a/src/scene.cpp +++ b/src/scene.cpp @@ -26,6 +26,9 @@ scene::scene(string filename){ }else if(strcmp(tokens[0].c_str(), "CAMERA")==0){ loadCamera(); cout << " " << endl; + } else if(strcmp(tokens[0].c_str(), "POINTLIGHT")==0){ + loadPointLight(tokens[1]); + cout << " " << endl; } } } @@ -116,6 +119,7 @@ int scene::loadObject(string objectid){ newObject.scales = new glm::vec3[frameCount]; newObject.transforms = new cudaMat4[frameCount]; newObject.inverseTransforms = new cudaMat4[frameCount]; + newObject.inverseTransposeTransforms = new cudaMat4[frameCount]; for(int i=0; i tokens = utilityCore::tokenizeString(line); + if(strcmp(tokens[0].c_str(), "RGB")==0){ + glm::vec3 color( atof(tokens[1].c_str()), atof(tokens[2].c_str()), atof(tokens[3].c_str()) ); + newPointLight.color = color; + }else if(strcmp(tokens[0].c_str(), "POS")==0){ + newPointLight.position = glm::vec3( atof(tokens[1].c_str()), atof(tokens[2].c_str()), atof(tokens[3].c_str()) ); + } + } + + pointLights.push_back(newPointLight); + } +} diff --git a/src/scene.h b/src/scene.h index 9bfa71f..4fd4cc1 100755 --- a/src/scene.h +++ b/src/scene.h @@ -22,12 +22,14 @@ class scene{ int loadMaterial(string materialid); int loadObject(string objectid); int loadCamera(); + int loadPointLight(string lightid); public: scene(string filename); ~scene(); vector objects; vector materials; + vector pointLights; camera renderCam; }; diff --git a/src/sceneStructs.h b/src/sceneStructs.h index b10f1cf..16a90ec 100755 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -27,6 +27,7 @@ struct geom { glm::vec3* scales; cudaMat4* transforms; cudaMat4* inverseTransforms; + cudaMat4* inverseTransposeTransforms; }; struct staticGeom { @@ -37,6 +38,7 @@ struct staticGeom { glm::vec3 scale; cudaMat4 transform; cudaMat4 inverseTransform; + cudaMat4 inverseTransposeTransform; }; struct cameraData { @@ -73,4 +75,30 @@ struct material{ float emittance; }; +struct PointLight +{ + glm::vec3 position; + glm::vec3 color; +}; + +struct RayInPackage +{ + ray rayValue; + int index; + bool isInside; + float reductionFactor; +}; + +struct RayOutPackage { + bool isPresent; + int index; + ray rayValue; + bool isTransPresent; + ray rayValueTrans; + bool isInsideObject; + glm::vec3 color; + int numRays; + float reductionFactor; +}; + #endif //CUDASTRUCTS_H