diff --git a/PROJ1_WIN/565Raytracer/README_images/005_phong_illumination_with_soft_shadows_and_reflections.bmp b/PROJ1_WIN/565Raytracer/README_images/005_phong_illumination_with_soft_shadows_and_reflections.bmp
new file mode 100644
index 0000000..4d775c3
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/005_phong_illumination_with_soft_shadows_and_reflections.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/005_phong_illumination_with_soft_shadows_and_reflections_and_supersampled_antialiasing.bmp b/PROJ1_WIN/565Raytracer/README_images/005_phong_illumination_with_soft_shadows_and_reflections_and_supersampled_antialiasing.bmp
new file mode 100644
index 0000000..0243abc
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/005_phong_illumination_with_soft_shadows_and_reflections_and_supersampled_antialiasing.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/001_flat_shading.bmp b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/001_flat_shading.bmp
new file mode 100644
index 0000000..577f8a1
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/001_flat_shading.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/002_diffuse_illumination.bmp b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/002_diffuse_illumination.bmp
new file mode 100644
index 0000000..f8b9aa5
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/002_diffuse_illumination.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/003_diffuse_illumination_with_hard_shadows.bmp b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/003_diffuse_illumination_with_hard_shadows.bmp
new file mode 100644
index 0000000..436f6e6
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/003_diffuse_illumination_with_hard_shadows.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/004_diffuse_illumination_with_soft_shadows.bmp b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/004_diffuse_illumination_with_soft_shadows.bmp
new file mode 100644
index 0000000..5067408
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/004_diffuse_illumination_with_soft_shadows.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/005_phong_illumination_with_soft_shadows_and_reflections.bmp b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/005_phong_illumination_with_soft_shadows_and_reflections.bmp
new file mode 100644
index 0000000..4d775c3
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/005_phong_illumination_with_soft_shadows_and_reflections.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/005_phong_illumination_with_soft_shadows_and_reflections_and_supersampled_antialiasing.bmp b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/005_phong_illumination_with_soft_shadows_and_reflections_and_supersampled_antialiasing.bmp
new file mode 100644
index 0000000..0243abc
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/005_phong_illumination_with_soft_shadows_and_reflections_and_supersampled_antialiasing.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/006_final.bmp b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/006_final.bmp
new file mode 100644
index 0000000..da6b196
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/006_final.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/weird.bmp b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/weird.bmp
new file mode 100644
index 0000000..79a70fe
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/PROJ1 images/weird.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/StreamCompactionVsNot__Chart.bmp b/PROJ1_WIN/565Raytracer/README_images/StreamCompactionVsNot__Chart.bmp
new file mode 100644
index 0000000..c8b0cb7
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/StreamCompactionVsNot__Chart.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/StreamCompactionVsNot__Graph.bmp b/PROJ1_WIN/565Raytracer/README_images/StreamCompactionVsNot__Graph.bmp
new file mode 100644
index 0000000..cc53a98
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/StreamCompactionVsNot__Graph.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/StreamCompaction_graphic.bmp b/PROJ1_WIN/565Raytracer/README_images/StreamCompaction_graphic.bmp
new file mode 100644
index 0000000..d4933c4
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/StreamCompaction_graphic.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/Thumbs.db b/PROJ1_WIN/565Raytracer/README_images/Thumbs.db
new file mode 100644
index 0000000..bdf318b
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/Thumbs.db differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/caustics.bmp b/PROJ1_WIN/565Raytracer/README_images/caustics.bmp
new file mode 100644
index 0000000..ed6b671
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/caustics.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/diffuse.bmp b/PROJ1_WIN/565Raytracer/README_images/diffuse.bmp
new file mode 100644
index 0000000..0dfc87f
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/diffuse.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/full global illumination with perfect specular reflection and (W.I.P.) refraction and self-implemented stream compaction.bmp b/PROJ1_WIN/565Raytracer/README_images/full global illumination with perfect specular reflection and (W.I.P.) refraction and self-implemented stream compaction.bmp
new file mode 100644
index 0000000..68d050d
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/full global illumination with perfect specular reflection and (W.I.P.) refraction and self-implemented stream compaction.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/refraction.bmp b/PROJ1_WIN/565Raytracer/README_images/refraction.bmp
new file mode 100644
index 0000000..0d2368d
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/refraction.bmp differ
diff --git a/PROJ1_WIN/565Raytracer/README_images/specular.bmp b/PROJ1_WIN/565Raytracer/README_images/specular.bmp
new file mode 100644
index 0000000..3cb7c6c
Binary files /dev/null and b/PROJ1_WIN/565Raytracer/README_images/specular.bmp differ
diff --git a/README.md b/README.md
index 8f246fb..c438737 100755
--- a/README.md
+++ b/README.md
@@ -1,147 +1,288 @@
 -------------------------------------------------------------------------------
-CIS565: Project 2: CUDA Pathtracer
+CIS565 Project 2: CUDA Pathtracer
 -------------------------------------------------------------------------------
-Fall 2013
+Ricky Arietta Fall 2013
 -------------------------------------------------------------------------------
-Due Wednesday, 10/02/13
 -------------------------------------------------------------------------------
 
--------------------------------------------------------------------------------
-NOTE:
--------------------------------------------------------------------------------
-This project requires an NVIDIA graphics card with CUDA capability! Any card after the Geforce 8xxx series will work. If you do not have an NVIDIA graphics card in the machine you are working on, feel free to use any machine in the SIG Lab or in Moore100 labs. All machines in the SIG Lab and Moore100 are equipped with CUDA capable NVIDIA graphics cards. If this too proves to be a problem, please contact Patrick or Liam as soon as possible.
+This project is a highly parallel version of a Monte Carlo simulated pathtracer
+implemented on the GPU, augmented from a template provided by Karl Yi and Liam
+Boone. It works by casting out virtual rays through an image plane 
+from a user-defined camera and assigning pixel values based on intersections 
+with the defined geometry and lights. The final implementation, when built, 
+is capable of rendering high-quality images including full global illumination,
+physically realistic area light(s) and soft shadows, geometry primitives (spheres, 
+cubes), supersampled anti-aliasing, perfect recursive specular reflection, and Fresnel 
+refraction. An example of a final render can be seen immediately below this 
+description. The implementation of each feature is described briefly below with
+a rendered image demonstrating the development of the code. Finally, there is 
+some performance analysis included regarding the size and number of blocks requested 
+on the GPU during runtime.
+
+(A brief tour of the base code and a description of the scene file format
+used during implementation are also included at the end of this file, adapted
+from the project description provided by Patrick Cozzi and Liam Boone.)
+
 
 -------------------------------------------------------------------------------
-INTRODUCTION:
+Initial Ray Casting From Camera & Geometry Primitive Intersection
 -------------------------------------------------------------------------------
-In this project, you will extend your raytracer from Project 1 into a full CUDA based global illumination pathtracer. 
 
-For this project, you may either choose to continue working off of your codebase from Project 1, or you may choose to use the included basecode in this repository. The basecode for Project 2 is the same as the basecode for Project 1, but with some missing components you will need filled in, such as the intersection testing and camera raycasting methods. 
+In order to render images using ray tracing, we needed to define the set of rays
+being sent out from the camera. The camera, defined by the user input file,
+includes a resolution, field of view angle, viewing direction, up direction, etc.
+Using this information, I was able to define an arbitrary image plane some distance
+from the camera, orthogonal to the viewing direction vector. Then, using the 
+up direction and a computed third orthogonal "right" vector, I was able to
+define a grid on the image plane with the same resolution as the desired image.
+Then, from the camera, and given an x-index and y-index for the pixel in question,
+I could compute a single ray from the camera position to the center of the corresponding
+pixel in the image plane (adapted for supersampled antialiasing; see below). These
+rays served as the initial rays for tracing paths through the scene. But unlike a traditional
+ray tracer, these initial rays were not traced until termination -- they were inserted
+into the pool that would be sampled with each wave of path traces (see Parallelization below).
 
-How you choose to extend your raytracer into a pathtracer is a fairly open-ended problem; the supplied basecode is meant to serve as one possible set of guidelines for doing so, but you may choose any approach you want in your actual implementation, including completely scrapping the provided basecode in favor of your own from-scratch solution.
+Additionally, when following these rays, I needed to be able to determine any
+geometry intersections along the ray direction vector, since these intersections
+define the luminance value returned to the image pixel. These functions were also taken
+from my Assignment 1 ray tracing implementation.  
 
 -------------------------------------------------------------------------------
-CONTENTS:
+Parallelization by Ray (Instead of by Pixel) Using Stream Compaction (Self-Implemented)
 -------------------------------------------------------------------------------
-The Project2 root directory contains the following subdirectories:
-	
-* src/ contains the source code for the project. Both the Windows Visual Studio solution and the OSX makefile reference this folder for all source; the base source code compiles on OSX and Windows without modification.
-* scenes/ contains an example scene description file.
-* renders/ contains two example renders: the raytraced render from Project 1 (GI_no.bmp), and the same scene rendered with global illumination (GI_yes.bmp). 
-* PROJ1_WIN/ contains a Windows Visual Studio 2010 project and all dependencies needed for building and running on Windows 7.
-* PROJ1_OSX/ contains a OSX makefile, run script, and all dependencies needed for building and running on Mac OSX 10.8. 
-* PROJ1_NIX/ contains a Linux makefile for building and running on Ubuntu 
-  12.04 LTS. Note that you will need to set the following environment
-  variables: 
-    
-  - PATH=$PATH:/usr/local/cuda-5.5/bin
-  - LD_LIBRARY_PATH=/usr/local/cuda-5.5/lib64:/lib
 
-The projects build and run exactly the same way as in Project0 and Project1.
+To maximize utilization of the hardware, this path tracer is parallelized by ray and
+not by pixel. When parallelizing by pixel, some ray paths die out before the maximum
+ray depth is reached (either from absorption or lack of intersection) and they are a
+drag on the kernel calls, while some paths are traced all the way through.
+
+This implementation uses a pool of rays. Along with the origin and direction, each of
+these ray structures stores the (x,y) image coordinate associated with the ray, the
+current index of refraction for the ray, a light coefficient for the ray that is a result
+of being affected by diffuse absorption, and a flag indicating if the ray is alive or 
+dead.
+
+With each new wave of raycasts, the current live rays are pulled from the pool and cast
+into the scene. Depending on whether or not geometry was intersected, the ray is marked 
+as dead or alive. If there was an intersection and the ray lives on to the next wave of
+this iteration, a secondary ray is calculated and inserted back into the pool.
+
+After each such wave, a temporary array structure is computed, having a "true" value if the
+corresponding ray in the pool is alive and a "false" value if it is dead. This temp array
+is used as the basis for an inclusive scan, which is then shifted to an exclusive scan (the
+total number of surviving rays is equal to the last value of the inclusive array before the
+shift.) This scan array is used in a scatter call, where the live rays corresponding to these
+flags are transferred to a more compact array at the index specified in the scan. This new
+array thus includes only the live rays for every wave of raycasts.
+
+All of this code was based on the Parallel Algorithms presentation given in class.
+
+![Stream Compaction Graphic](https://raw.github.com/rarietta/Project2-PathTracer/master/PROJ1_WIN/565Raytracer/README_images/StreamCompaction_graphic.bmp)
 
 -------------------------------------------------------------------------------
-REQUIREMENTS:
+Full Global Illumination (Soft shadows, Color bleeding, etc.) by Pathtracing Rays
+and Properly Accumulating Luminance Values
 -------------------------------------------------------------------------------
-In this project, you are given code for:
-
-* All of the basecode from Project 1, plus:
-* Intersection testing code for spheres and cubes
-* Code for raycasting from the camera
 
-You will need to implement the following features. A number of these required features you may have already implemented in Project 1. If you have, you are ahead of the curve and have less work to do! 
+Each ray in the scene was traced from the camera until its first bounce within the 
+scene. If it hit a diffuse surface, a secondary ray was randomly cast out over the 
+cosine weighted hemisphere around the surface normal. The light rays from the camera initially
+have an RGB color value of (1.0, 1.0, 1.0). Upon diffuse reflection, some of this light is 
+absorbed by the surface, thus the light ray is multiplied by the RGB color of the surface
+material. After an intersection and the calculation of this secondary ray, the secondary
+ray is placed back in the ray pool in the place of the incoming ray (or marked as
+dead if there was no intersection.) This was done for every ray that hits a diffuse
+surface. Color is only set when a path happens upon a light source, in which case
+the path is terminated and the current color value of the ray is multiplied by the
+color of the light source times its emittance.
 
-* Full global illumination (including soft shadows, color bleeding, etc.) by pathtracing rays through the scene. 
-* Properly accumulating emittance and colors to generate a final image
-* Supersampled antialiasing
-* Parallelization by ray instead of by pixel via string compaction
-* Perfect specular reflection
+Unlike ray tracing, we see that this sampling of rays over the surfaces gives us
+a lot of features at no additional cost. It computes soft shadows without shadow
+feelers, gives us full global illumination (notice the color bleeding from the
+walls onto the ceiling/floor/spheres and dark spots on the corners where light gets
+trapped), and computes diffuse lighting that varies with the incoming light angle.
 
-You are also required to implement at least two of the following features. Some of these features you may have already implemented in Project 1. If you have, you may NOT resubmit those features and instead must pick two new ones to implement.
+![Global Illumination](https://raw.github.com/rarietta/Project2-PathTracer/master/PROJ1_WIN/565Raytracer/README_images/diffuse.bmp)
 
-* Additional BRDF models, such as Cook-Torrance, Ward, etc. Each BRDF model may count as a separate feature. 
-* Texture mapping 
-* Bump mapping
-* Translational motion blur
-* Fresnel-based Refraction, i.e. glass
-* OBJ Mesh loading and rendering without KD-Tree
-* Interactive camera
-* Integrate an existing stackless KD-Tree library, such as CUKD (https://github.com/unvirtual/cukd)
-* Depth of field
+-------------------------------------------------------------------------------
+Perfect Specular Reflection
+-------------------------------------------------------------------------------
 
-Alternatively, implementing just one of the following features can satisfy the "pick two" feature requirement, since these are correspondingly more difficult problems:
+Unlike in diffuse reflection, when a specular material is intersected, the secondary
+ray is not randomly sampled. It is perfectly reflected across the normal and sent out
+as a new secondary ray. Furthermore, the incoming ray color value is not changed at all
+by the specular coefficient, since in perfect specular reflection all of the light
+is reflected and none is absorbed.
 
-* Physically based subsurface scattering and transmission
-* Implement and integrate your own stackless KD-Tree from scratch. 
-* Displacement mapping
-* Deformational motion blur
+The diffuse absorption by specular surfaces does, however, still occur because
+the choice of a secondary ray type is sampled over an interval [0,1]. If the material
+is specular (or refractive, see below), then a defined float value 0.3 is used as
+the probability of diffuse reflection. The probabilities of specular reflection
+and refraction are computed via Fresnel equations.
 
-As yet another alternative, if you have a feature or features you really want to implement that are not on this list, let us know, and we'll probably say yes!
+![Perfect Specular Reflection](https://raw.github.com/rarietta/Project2-PathTracer/master/PROJ1_WIN/565Raytracer/README_images/specular.bmp])
 
 -------------------------------------------------------------------------------
-NOTES ON GLM:
+Fresnel Refraction
 -------------------------------------------------------------------------------
-This project uses GLM, the GL Math library, for linear algebra. You need to know two important points on how GLM is used in this project:
 
-* In this project, indices in GLM vectors (such as vec3, vec4), are accessed via swizzling. So, instead of v[0], v.x is used, and instead of v[1], v.y is used, and so on and so forth.
-* GLM Matrix operations work fine on NVIDIA Fermi cards and later, but pre-Fermi cards do not play nice with GLM matrices. As such, in this project, GLM matrices are replaced with a custom matrix struct, called a cudaMat4, found in cudaMat4.h. A custom function for multiplying glm::vec4s and cudaMat4s is provided as multiplyMV() in intersections.h.
+In addition to reflection, the path tracer accounts for refractive surfaces such
+as glass. We can see that the refractive glass inverts the view of the scene behind
+it, and the path tracer automatically accounts for caustics formed under the glass
+in the direction of the light source (note the bright spot on the wall.)
+
+![Fresnel Refraction](https://raw.github.com/rarietta/Project2-PathTracer/master/PROJ1_WIN/565Raytracer/README_images/refraction.bmp)
 
 -------------------------------------------------------------------------------
-README
+Addition of Supersampled AntiAliasing
 -------------------------------------------------------------------------------
-All students must replace or augment the contents of this Readme.md in a clear 
-manner with the following:
 
-* A brief description of the project and the specific features you implemented.
-* At least one screenshot of your project running.
-* A 30 second or longer video of your project running.  To create the video you
-  can use http://www.microsoft.com/expression/products/Encoder4_Overview.aspx 
-* A performance evaluation (described in detail below).
+_NOTE: This feature was implemented in Assignment 1 and the code was reused in
+this pathtracer. Since the pathttracer images are a little noisier due to random
+sampling, I have included the raytraced images from Assignment 1 to illustrate
+the supersamples antialiasing. The code in the pathtracer is exactly the same._
+
+With the existing code base described up to this point, it was easy to implement
+antialiasing by supersampling the pixel values. Instead of casting the same ray
+through the center of the pixel with each iteration, the direction of the ray
+within the bounds of the pixel were determined randomly in each iteration, and
+the computed intersection illumination values were averaged over the entire series
+of iterations.
+
+Compare the following two images. All input and scene data was identical between
+the two, except the second version included supersampling of the pixels. You can 
+see how smooth the edges are on the spheres and cubes in this version. While there 
+are clear "jaggies" in the above version, the below version has none and even 
+corrects for tricky edge intersection cases in the corners of the box. 
+
+![Non-Antialiased](https://raw.github.com/rarietta/Project1-RayTracer/master/PROJ1_WIN/565Raytracer/README_images/005_phong_illumination_with_soft_shadows_and_reflections.bmp)
+
+![Antialiased](https://raw.github.com/rarietta/Project1-RayTracer/master/PROJ1_WIN/565Raytracer/README_images/005_phong_illumination_with_soft_shadows_and_reflections_and_supersampled_antialiasing.bmp)
 
 -------------------------------------------------------------------------------
 PERFORMANCE EVALUATION
 -------------------------------------------------------------------------------
-The performance evaluation is where you will investigate how to make your CUDA
-programs more efficient using the skills you've learned in class. You must have
-performed at least one experiment on your code to investigate the positive or
-negative effects on performance. 
 
-One such experiment would be to investigate the performance increase involved 
-with adding a spatial data-structure to your scene data.
+To analyze the performance of the program on the GPU hardware, I decided to run
+timing tests on the renders with and without stream compaction for various path
+depths.
 
-Another idea could be looking at the change in timing between various block
-sizes.
+I ran the program for 50 iterations both with and without stream compaction, 
+charting the results for traceDepths over the range [1,6]. As we can see from the
+below graph and data chart, the runtime for the program without stream compaction
+is initially a little smaller, since there is not the overhead of the scan and
+scatter. However, as more waves are traced out, the implementation with stream
+compaction becomes faster and the additional time required for each successive
+trace depth is smaller than the last. This is because less and less rays are
+traced after each bounce, since a number of the pool dies off. Alternatively, 
+the version without stream compaction increases linearly with the traceDepth and
+quickly surpasses the stream compaction version w.r.t. runtime.
 
-A good metric to track would be number of rays per second, or frames per 
-second, or number of objects displayable at 60fps.
+If this was charted over higher and higher traceDepths, we would see the stream
+compaction implementation flatten out while the non-compacted version would
+continue to climb.
 
-We encourage you to get creative with your tweaks. Consider places in your code
-that could be considered bottlenecks and try to improve them. 
-
-Each student should provide no more than a one page summary of their
-optimizations along with tables and or graphs to visually explain any
-performance differences.
+![Chart](https://raw.github.com/rarietta/Project2-PathTracer/master/PROJ1_WIN/565Raytracer/README_images/StreamCompactionVsNot__Chart.bmp)
 
+![Graph](https://raw.github.com/rarietta/Project2-PathTracer/master/PROJ1_WIN/565Raytracer/README_images/StreamCompactionVsNot__Graph.bmp)
+ 
 -------------------------------------------------------------------------------
-THIRD PARTY CODE POLICY
+Runtime Video
 -------------------------------------------------------------------------------
-* Use of any third-party code must be approved by asking on the Google group.  If it is approved, all students are welcome to use it.  Generally, we approve use of third-party code that is not a core part of the project.  For example, for the ray tracer, we would approve using a third-party library for loading models, but would not approve copying and pasting a CUDA function for doing refraction.
-* Third-party code must be credited in README.md.
-* Using third-party code without its approval, including using another student's code, is an academic integrity violation, and will result in you receiving an F for the semester.
+
+Unfortunately, since I was working in Moore 100, I was unable to download or
+utilize and screen capture video software for producing runtime videos.
 
 -------------------------------------------------------------------------------
-SELF-GRADING
+BASE CODE TOUR:
 -------------------------------------------------------------------------------
-* On the submission date, email your grade, on a scale of 0 to 100, to Liam, liamboone+cis565@gmail.com, with a one paragraph explanation.  Be concise and realistic.  Recall that we reserve 30 points as a sanity check to adjust your grade.  Your actual grade will be (0.7 * your grade) + (0.3 * our grade).  We hope to only use this in extreme cases when your grade does not realistically reflect your work - it is either too high or too low.  In most cases, we plan to give you the exact grade you suggest.
-* Projects are not weighted evenly, e.g., Project 0 doesn't count as much as the path tracer.  We will determine the weighting at the end of the semester based on the size of each project.
+The main files of interest in this prooject, which handle the ray-tracing
+algorithm and image generation, are the following:
+
+* raytraceKernel.cu contains the core pathtracing CUDA kernel
+
+* intersections.h contains functions for geometry intersection testing and
+  point generation
+
+* interactions.h contains functions for ray-object interactions that define how
+  rays behave upon hitting materials and objects
+	  
+* sceneStructs.h, which contains definitions for how geometry, materials,
+  lights, cameras, and animation frames are stored in the renderer.
 
 -------------------------------------------------------------------------------
-SUBMISSION
+TAKUAscene FORMAT:
 -------------------------------------------------------------------------------
-As with the previous project, you should fork this project and work inside of your fork. Upon completion, commit your finished project back to your fork, and make a pull request to the master repository.
-You should include a README.md file in the root directory detailing the following
+This project uses a custom scene description format, called TAKUAscene.
+TAKUAscene files are flat text files that describe all geometry, materials,
+lights, cameras, render settings, and animation frames inside of the scene.
+Items in the format are delimited by new lines, and comments can be added at
+the end of each line preceded with a double-slash.
+
+Materials are defined in the following fashion:
+
+* MATERIAL (material ID)								//material header
+* RGB (float r) (float g) (float b)					//diffuse color
+* SPECX (float specx)									//specular exponent
+* SPECRGB (float r) (float g) (float b)				//specular color
+* REFL (bool refl)									//reflectivity flag, 0 for
+  no, 1 for yes
+* REFR (bool refr)									//refractivity flag, 0 for
+  no, 1 for yes
+* REFRIOR (float ior)									//index of refraction
+  for Fresnel effects
+* SCATTER (float scatter)								//scatter flag, 0 for
+  no, 1 for yes
+* ABSCOEFF (float r) (float b) (float g)				//absorption
+  coefficient for scattering
+* RSCTCOEFF (float rsctcoeff)							//reduced scattering
+  coefficient
+* EMITTANCE (float emittance)							//the emittance of the
+  material. Anything >0 makes the material a light source.
+
+Cameras are defined in the following fashion:
+
+* CAMERA 												//camera header
+* RES (float x) (float y)								//resolution
+* FOVY (float fovy)										//vertical field of
+  view half-angle. the horizonal angle is calculated from this and the
+  reslution
+* ITERATIONS (float interations)							//how many
+  iterations to refine the image, only relevant for supersampled antialiasing,
+  depth of field, area lights, and other distributed raytracing applications
+* FILE (string filename)									//file to output
+  render to upon completion
+* frame (frame number)									//start of a frame
+* EYE (float x) (float y) (float z)						//camera's position in
+  worldspace
+* VIEW (float x) (float y) (float z)						//camera's view
+  direction
+* UP (float x) (float y) (float z)						//camera's up vector
+
+Objects are defined in the following fashion:
+* OBJECT (object ID)										//object header
+* (cube OR sphere OR mesh)								//type of object, can
+  be either "cube", "sphere", or "mesh". Note that cubes and spheres are unit
+  sized and centered at the origin.
+* material (material ID)									//material to
+  assign this object
+* frame (frame number)									//start of a frame
+* TRANS (float transx) (float transy) (float transz)		//translation
+* ROTAT (float rotationx) (float rotationy) (float rotationz)		//rotation
+* SCALE (float scalex) (float scaley) (float scalez)		//scale
+
+An example TAKUAscene file setting up two frames inside of a Cornell Box can be
+found in the scenes/ directory.
+
+For meshes, note that the base code will only read in .obj files. For more 
+information on the .obj specification see http://en.wikipedia.org/wiki/Wavefront_.obj_file.
+
+An example of a mesh object is as follows:
 
-* A brief description of the project and specific features you implemented
-* At least one screenshot of your project running, and at least one screenshot of the final rendered output of your pathtracer
-* Instructions for building and running your project if they differ from the base code
-* A link to your blog post detailing the project
-* A list of all third-party code used
+OBJECT 0
+mesh tetra.obj
+material 0
+frame 0
+TRANS       0 5 -5
+ROTAT       0 90 0
+SCALE       .01 10 10 
diff --git a/scenes/sampleScene.txt b/scenes/sampleScene.txt
index 52d079e..9d9ce80 100755
--- a/scenes/sampleScene.txt
+++ b/scenes/sampleScene.txt
@@ -1,5 +1,5 @@
-MATERIAL 0				//white diffuse
-RGB         0.9 0.9 0.9       
+MATERIAL 0				//yellow diffuse
+RGB         1 1 1       
 SPECEX      0      
 SPECRGB     1 1 1      
 REFL        0       
@@ -34,11 +34,11 @@ ABSCOEFF    0 0 0
 RSCTCOEFF   0
 EMITTANCE   0
 
-MATERIAL 3 				//red glossy
-RGB         .63 .26 .24      
-SPECEX      0      
-SPECRGB     1 1 1       
-REFL        0       
+MATERIAL 3 				//yellow glossy
+RGB         1 1 0      
+SPECEX      0.99999      
+SPECRGB     .6 .6 .6       
+REFL        1       
 REFR        0        
 REFRIOR     2       
 SCATTER     0        
@@ -48,9 +48,9 @@ EMITTANCE   0
 
 MATERIAL 4 				//white glossy
 RGB         1 1 1     
-SPECEX      0      
+SPECEX      0.9      
 SPECRGB     1 1 1      
-REFL        0       
+REFL        1       
 REFR        0        
 REFRIOR     2      
 SCATTER     0        
@@ -59,7 +59,7 @@ RSCTCOEFF   0
 EMITTANCE   0
 
 MATERIAL 5 				//glass
-RGB         0 0 0    
+RGB         1 1 1    
 SPECEX      0      
 SPECRGB     1 1 1      
 REFL        0       
@@ -70,9 +70,9 @@ ABSCOEFF    .02 5.1 5.7
 RSCTCOEFF   13
 EMITTANCE   0
 
-MATERIAL 6 				//green glossy
-RGB         .35 .48 .29      
-SPECEX      0      
+MATERIAL 6 				//purple glossy
+RGB         .5 0 .5      
+SPECEX      0     
 SPECRGB     1 1 1     
 REFL        0       
 REFR        0        
@@ -106,19 +106,27 @@ ABSCOEFF    0 0 0
 RSCTCOEFF   0
 EMITTANCE   15
 
+MATERIAL 9 				//yellow diffuse
+RGB         1 1 0       
+SPECEX      0      
+SPECRGB     1 1 1      
+REFL        0       
+REFR        0        
+REFRIOR     0       
+SCATTER     0        
+ABSCOEFF    0 0 0      
+RSCTCOEFF   0
+EMITTANCE   0
+
 CAMERA
-RES         800 800
-FOVY        25
-ITERATIONS  5000
+RES         400 400
+FOVY        25 25
+ITERATIONS  300
 FILE        test.bmp
 frame 0
 EYE         0 4.5 12
 VIEW        0 0 -1
 UP          0 1 0
-frame 1
-EYE         0 4.5 12
-VIEW        0 0 -1
-UP          0 1 0
 
 OBJECT 0
 cube
@@ -127,10 +135,6 @@ frame 0
 TRANS       0 0 0
 ROTAT       0 0 90
 SCALE       .01 10 10 
-frame 1
-TRANS       0 0 0
-ROTAT       0 0 90
-SCALE       .01 10 10 
 
 OBJECT 1
 cube
@@ -139,10 +143,6 @@ frame 0
 TRANS       0 5 -5
 ROTAT       0 90 0
 SCALE       .01 10 10 
-frame 1
-TRANS       0 5 -5
-ROTAT       0 90 0
-SCALE       .01 10 10 
 
 OBJECT 2
 cube
@@ -151,10 +151,6 @@ frame 0
 TRANS       0 10 0
 ROTAT       0 0 90
 SCALE       .01 10 10
-frame 1
-TRANS       0 10 0
-ROTAT       0 0 90
-SCALE       .01 10 10
 
 OBJECT 3
 cube
@@ -163,10 +159,6 @@ frame 0
 TRANS       -5 5 0
 ROTAT       0 0 0
 SCALE       .01 10 10
-frame 1
-TRANS       -5 5 0
-ROTAT       0 0 0
-SCALE       .01 10 10
 
 OBJECT 4
 cube
@@ -175,44 +167,28 @@ frame 0
 TRANS       5 5 0
 ROTAT       0 0 0
 SCALE       .01 10 10
-frame 1
-TRANS       5 5 0
-ROTAT       0 0 0
-SCALE       .01 10 10
 
 OBJECT 5
 sphere
-material 4
+material 5
 frame 0
-TRANS       0 2 0
+TRANS       3.5 5 1.5
 ROTAT       0 180 0
-SCALE       3 3 3
-frame 1
-TRANS       0 2 0
-ROTAT       0 180 0
-SCALE       3 3 3
+SCALE       2.5 2.5 2.5
 
 OBJECT 6
 sphere
 material 3
 frame 0
-TRANS       2 5 2
-ROTAT       0 180 0
-SCALE       2.5 2.5 2.5
-frame 1
-TRANS       2 5 2
+TRANS       -2 5 -2
 ROTAT       0 180 0
-SCALE       2.5 2.5 2.5
+SCALE       3 3 3
 
 OBJECT 7
 sphere
-material 6
+material 9
 frame 0
-TRANS       -2 5 -2
-ROTAT       0 180 0
-SCALE       3 3 3
-frame 1
-TRANS       -2 5 -2
+TRANS       2 4 -2.5
 ROTAT       0 180 0
 SCALE       3 3 3
 
@@ -222,8 +198,4 @@ material 8
 frame 0
 TRANS       0 10 0
 ROTAT       0 0 90
-SCALE       .3 3 3
-frame 1
-TRANS       0 10 0
-ROTAT       0 0 90
-SCALE       .3 3 3
+SCALE       .3 3 3
\ No newline at end of file
diff --git a/src/image.cpp b/src/image.cpp
index 67bf157..46b6235 100755
--- a/src/image.cpp
+++ b/src/image.cpp
@@ -38,7 +38,7 @@ image::~image(){
 //------------------------
 
 float image::applyGamma(float f){
-    //apply gamma correction, use simple power law gamma for now.
+    //apply gamma correction, use simple power law gamma for now. TODO: sRGB
     return pow(f/float(gamma.divisor), gamma.gamma);
 }
 
diff --git a/src/interactions.h b/src/interactions.h
index a09ec95..150d704 100755
--- a/src/interactions.h
+++ b/src/interactions.h
@@ -8,6 +8,20 @@
 
 #include "intersections.h"
 
+#define DIFFUSE  0
+#define SPECULAR 1
+#define TRANSMIT 2
+
+struct Fresnel {
+  float reflectionCoefficient;
+  float transmissionCoefficient;
+};
+
+struct AbsorptionAndScatteringProperties{
+    glm::vec3 absorptionCoefficient;
+    float reducedScatteringCoefficient;
+};
+
 //forward declaration
 __host__ __device__ bool calculateScatterAndAbsorption(ray& r, float& depth, AbsorptionAndScatteringProperties& currentAbsorptionAndScattering, glm::vec3& unabsorbedColor, material m, float randomFloatForScatteringDistance, float randomFloat2, float randomFloat3);
 __host__ __device__ glm::vec3 getRandomDirectionInSphere(float xi1, float xi2);
@@ -16,6 +30,71 @@ __host__ __device__ glm::vec3 calculateTransmissionDirection(glm::vec3 normal, g
 __host__ __device__ glm::vec3 calculateReflectionDirection(glm::vec3 normal, glm::vec3 incident);
 __host__ __device__ Fresnel calculateFresnel(glm::vec3 normal, glm::vec3 incident, float incidentIOR, float transmittedIOR, glm::vec3 reflectionDirection, glm::vec3 transmissionDirection);
 __host__ __device__ glm::vec3 calculateRandomDirectionInHemisphere(glm::vec3 normal, float xi1, float xi2);
+__host__ __device__ glm::vec3 computePhongTotal(ray& r, glm::vec3 intersection_point, glm::vec3 intersection_normal, material intersection_mtl, staticGeom* lights, int numberOfLights, staticGeom* geoms, int numberOfGeoms, material* materials, float time);
+__host__ __device__ float computeShadowCoefficient(glm::vec3 intersection_point, staticGeom light, staticGeom* geoms, int numberOfGeoms, float time);
+
+//TODO (OPTIONAL): IMPLEMENT THIS FUNCTION
+__host__ __device__ glm::vec3 calculateTransmission(glm::vec3 absorptionCoefficient, float distance) {
+  return glm::vec3(0,0,0);
+}
+
+//TODO (OPTIONAL): IMPLEMENT THIS FUNCTION
+__host__ __device__ bool calculateScatterAndAbsorption(ray& r, float& depth, AbsorptionAndScatteringProperties& currentAbsorptionAndScattering,
+                                                        glm::vec3& unabsorbedColor, material m, float randomFloatForScatteringDistance, float randomFloat2, float randomFloat3){
+  return false;
+}
+
+//TODO (OPTIONAL): IMPLEMENT THIS FUNCTION
+__host__ __device__ glm::vec3 calculateTransmissionDirection(glm::vec3 normal, glm::vec3 incident, float incidentIOR, float transmittedIOR) {
+	
+	float n1 = incidentIOR;
+	float n2 = transmittedIOR;
+	float n = n1 / n2;
+
+	float c1 = glm::dot(-incident, normal);
+	float c2 = sqrt(1 - (n*n)*(1 - c1*c1));
+	
+	if (c1 > 0.0f) {
+		normal = -normal;
+		c1 = -c1;
+	}
+
+	glm::vec3 transmitDirection = (n*incident) + (n*c1 + c2) * normal;
+	return transmitDirection;
+}
+
+//TODO (OPTIONAL): IMPLEMENT THIS FUNCTION
+__host__ __device__ glm::vec3 calculateReflectionDirection(glm::vec3 normal, glm::vec3 incident) {
+	float IdotN = glm::dot(-incident,normal);
+	glm::vec3 I;
+	if (IdotN < 0.0f) { I = incident;  }
+	else			  { I = -incident; }
+	glm::vec3 R = glm::normalize(2*IdotN*normal - I);
+	return R;
+}
+
+//TODO (OPTIONAL): IMPLEMENT THIS FUNCTION
+__host__ __device__ Fresnel calculateFresnel(glm::vec3 normal, glm::vec3 incident, float incidentIOR, float transmittedIOR, glm::vec3 reflectionDirection, glm::vec3 transmissionDirection) {
+  Fresnel fresnel;
+  
+	float n1 = incidentIOR;
+	float n2 = transmittedIOR;
+	float n = n1 / n2;
+
+	float c1 = glm::dot(-incident, normal);
+	float c2 = sqrt(1 - (n*n)*(1 - c1*c1));
+	
+	float R1 = glm::abs( (n1*c1 - n2*c2) / (n1*c1 + n2*c2) ) * glm::abs( (n1*c1 - n2*c2) / (n1*c1 + n2*c2) );
+	float R2 = glm::abs( (n1*c2 - n2*c1) / (n1*c2 + n2*c1) ) * glm::abs( (n1*c2 - n2*c1) / (n1*c2 + n2*c1) );
+
+	float R = (R1 + R2) / 2.0f;
+	float T = 1.0 - R;
+
+	fresnel.reflectionCoefficient   = R;
+	fresnel.transmissionCoefficient = T;
+
+	return fresnel;
+}
 
 //LOOK: This function demonstrates cosine weighted random direction generation in a sphere!
 __host__ __device__ glm::vec3 calculateRandomDirectionInHemisphere(glm::vec3 normal, float xi1, float xi2) {
@@ -26,7 +105,8 @@ __host__ __device__ glm::vec3 calculateRandomDirectionInHemisphere(glm::vec3 nor
     float over = sqrt(1 - up * up); // sin(theta)
     float around = xi2 * TWO_PI;
     
-    //Find a direction that is not the normal based off of whether or not the normal's components are all equal to sqrt(1/3) or whether or not at least one component is less than sqrt(1/3). Learned this trick from Peter Kutz.
+    //Find a direction that is not the normal based off of whether or not the normal's components are all equal to sqrt(1/3) 
+	//or whether or not at least one component is less than sqrt(1/3). Learned this trick from Peter Kutz.
     
     glm::vec3 directionNotNormal;
     if (abs(normal.x) < SQRT_OF_ONE_THIRD) {
@@ -49,7 +129,48 @@ __host__ __device__ glm::vec3 calculateRandomDirectionInHemisphere(glm::vec3 nor
 //Now that you know how cosine weighted direction generation works, try implementing non-cosine (uniform) weighted random direction generation.
 //This should be much easier than if you had to implement calculateRandomDirectionInHemisphere.
 __host__ __device__ glm::vec3 getRandomDirectionInSphere(float xi1, float xi2) {
-  return glm::vec3(0,0,0);
+  
+	float z = xi1;
+	float theta = xi2 * TWO_PI;
+	
+	float r = sqrt(1-z*z);
+	float x = r*cos(theta);
+	float y = r*sin(theta);
+
+	return glm::vec3(x,y,z);
 }
 
+//TODO (PARTIALLY OPTIONAL): IMPLEMENT THIS FUNCTION
+//returns 0 if diffuse scatter, 1 if reflected, 2 if transmitted.
+__host__ __device__ int calculateBSDF(ray& r, glm::vec3 intersect, glm::vec3 normal, material* m, float randomSeed){
+                                       //AbsorptionAndScatteringProperties& currentAbsorptionAndScattering
+									   
+	if (!m->hasReflective && !m->hasRefractive) { return DIFFUSE; }
+
+	float incidentIOR    = r.currentIOR;
+	float transmittedIOR = m->indexOfRefraction;
+
+	glm::vec3 incident = r.direction;
+	glm::vec3 reflectionDirection = calculateReflectionDirection(normal, incident);
+	glm::vec3 transmittedDirection = calculateTransmissionDirection(normal, incident, incidentIOR, transmittedIOR);
+
+	Fresnel fresnel = calculateFresnel(normal, r.direction, incidentIOR, transmittedIOR, reflectionDirection, transmittedDirection);
+	
+	double diffuse_range, specular_range;
+	diffuse_range = 0.2;
+	if (!m->hasRefractive) { specular_range = 1.0; }
+	else				   { specular_range = diffuse_range + (1.0 - diffuse_range) * fresnel.reflectionCoefficient * m->hasReflective; }
+
+	thrust::default_random_engine rng(hash(randomSeed));
+	thrust::uniform_real_distribution<float> u01(0,1);
+	float sample = (float)u01(rng);
+
+	if (sample < diffuse_range)
+		return DIFFUSE;
+	else if (sample < specular_range)
+		return SPECULAR;
+	else
+		return TRANSMIT;
+};
+
 #endif
diff --git a/src/intersections.h b/src/intersections.h
index a6b9469..8422f4b 100755
--- a/src/intersections.h
+++ b/src/intersections.h
@@ -12,15 +12,18 @@
 #include "utilities.h"
 #include <thrust/random.h>
 
+#define ERROR 0.75e-3
+
 //Some forward declarations
 __host__ __device__ glm::vec3 getPointOnRay(ray r, float t);
 __host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec4 v);
 __host__ __device__ glm::vec3 getSignOfRay(ray r);
 __host__ __device__ glm::vec3 getInverseDirectionOfRay(ray r);
 __host__ __device__ float boxIntersectionTest(staticGeom sphere, ray r, glm::vec3& intersectionPoint, glm::vec3& normal);
-__host__ __device__ float boxIntersectionTest(glm::vec3 boxMin, glm::vec3 boxMax, staticGeom box, ray r, glm::vec3& intersectionPoint, glm::vec3& normal);
 __host__ __device__ float sphereIntersectionTest(staticGeom sphere, ray r, glm::vec3& intersectionPoint, glm::vec3& normal);
+__host__ __device__ glm::vec3 getRandomPointOnGeom(staticGeom geom, float randomSeed);
 __host__ __device__ glm::vec3 getRandomPointOnCube(staticGeom cube, float randomSeed);
+__host__ __device__ glm::vec3 getRandomPointOnSphere(staticGeom sphere, float randomSeed);
 
 //Handy dandy little hashing function that provides seeds for random number generation
 __host__ __device__ unsigned int hash(unsigned int a){
@@ -69,98 +72,71 @@ __host__ __device__ glm::vec3 getSignOfRay(ray r){
   return glm::vec3((int)(inv_direction.x < 0), (int)(inv_direction.y < 0), (int)(inv_direction.z < 0));
 }
 
-//Wrapper for cube intersection test for testing against unit cubes
-__host__ __device__  float boxIntersectionTest(staticGeom box, ray r, glm::vec3& intersectionPoint, glm::vec3& normal){
-  return boxIntersectionTest(glm::vec3(-.5,-.5,-.5), glm::vec3(.5,.5,.5), box, r, intersectionPoint, normal);
+//Cube intersection test, return -1 if no intersection, otherwise, distance to intersection
+__host__ __device__ float geomIntersectionTest(staticGeom geom, ray r, glm::vec3& intersectionPoint, glm::vec3& normal){
+	if (geom.type == CUBE) return boxIntersectionTest(geom, r, intersectionPoint, normal);
+	else if (geom.type == SPHERE) return sphereIntersectionTest(geom, r, intersectionPoint, normal);
+	return (float)-1.0;
 }
 
+//TODO: IMPLEMENT THIS FUNCTION
 //Cube intersection test, return -1 if no intersection, otherwise, distance to intersection
-__host__ __device__  float boxIntersectionTest(glm::vec3 boxMin, glm::vec3 boxMax, staticGeom box, ray r, glm::vec3& intersectionPoint, glm::vec3& normal){
-    glm::vec3 currentNormal = glm::vec3(0,0,0);
-
-    ray ro = r;
-
-    glm::vec3 iP0 = multiplyMV(box.inverseTransform,glm::vec4(r.origin, 1.0f));
-    glm::vec3 iP1 = multiplyMV(box.inverseTransform,glm::vec4(r.origin+r.direction, 1.0f));
-    glm::vec3 iV0 = iP1 - iP0;
-
-    r.origin = iP0; 
-    r.direction = glm::normalize(iV0);
-
-    float tmin, tmax, tymin, tymax, tzmin, tzmax;
-
-    glm::vec3 rsign = getSignOfRay(r);
-    glm::vec3 rInverseDirection = getInverseDirectionOfRay(r);
-
-    if((int)rsign.x==0){
-      tmin = (boxMin.x - r.origin.x) * rInverseDirection.x;
-      tmax = (boxMax.x - r.origin.x) * rInverseDirection.x;
-    }else{
-      tmin = (boxMax.x - r.origin.x) * rInverseDirection.x;
-      tmax = (boxMin.x - r.origin.x) * rInverseDirection.x;
-    }
-
-    if((int)rsign.y==0){
-      tymin = (boxMin.y - r.origin.y) * rInverseDirection.y;
-      tymax = (boxMax.y - r.origin.y) * rInverseDirection.y;
-    }else{
-      tymin = (boxMax.y - r.origin.y) * rInverseDirection.y;
-      tymax = (boxMin.y - r.origin.y) * rInverseDirection.y;
-    }
-
-    if ( (tmin > tymax) || (tymin > tmax) ){
-        return -1;
-    }
-    if (tymin > tmin){
-        tmin = tymin;
-    }
-    if (tymax < tmax){
-        tmax = tymax;
-    }
+__host__ __device__ float boxIntersectionTest(staticGeom box, ray r, glm::vec3& intersectionPoint, glm::vec3& normal){
+	        
+  glm::vec3 ro = multiplyMV(box.inverseTransform, glm::vec4(r.origin,1.0f));
+  glm::vec3 rd = glm::normalize(multiplyMV(box.inverseTransform, glm::vec4(r.direction,0.0f)));
 
-    if((int)rsign.z==0){
-      tzmin = (boxMin.z - r.origin.z) * rInverseDirection.z;
-      tzmax = (boxMax.z - r.origin.z) * rInverseDirection.z;
-    }else{
-      tzmin = (boxMax.z - r.origin.z) * rInverseDirection.z;
-      tzmax = (boxMin.z - r.origin.z) * rInverseDirection.z;
-    }
-
-    if ( (tmin > tzmax) || (tzmin > tmax) ){
-        return -1;
-    }
-    if (tzmin > tmin){
-        tmin = tzmin;
-    }
-    if (tzmax < tmax){
-        tmax = tzmax;
-    }
-    if(tmin<0){
-        return -1;
-    }
-
-    glm::vec3 osintersect = r.origin + tmin*r.direction;
-
-    if(abs(osintersect.x-abs(boxMax.x))<.001){
-        currentNormal = glm::vec3(1,0,0);
-    }else if(abs(osintersect.y-abs(boxMax.y))<.001){
-        currentNormal = glm::vec3(0,1,0);
-    }else if(abs(osintersect.z-abs(boxMax.z))<.001){
-        currentNormal = glm::vec3(0,0,1);
-    }else if(abs(osintersect.x+abs(boxMin.x))<.001){
-        currentNormal = glm::vec3(-1,0,0);
-    }else if(abs(osintersect.y+abs(boxMin.y))<.001){
-        currentNormal = glm::vec3(0,-1,0);
-    }else if(abs(osintersect.z+abs(boxMin.z))<.001){
-        currentNormal = glm::vec3(0,0,-1);
-    }
-
-    intersectionPoint = multiplyMV(box.transform, glm::vec4(osintersect, 1.0));
-
-
-
-    normal = multiplyMV(box.transform, glm::vec4(currentNormal,0.0));
-    return glm::length(intersectionPoint-ro.origin);
+  ray rt; rt.origin = ro; rt.direction = rd;
+  
+  glm::vec3 faceNormals[6];
+  glm::vec3 faceCenters[6];
+  faceNormals[0] = glm::vec3(0,0,-1); faceCenters[0] = glm::vec3(0,0,-0.5);
+  faceNormals[1] = glm::vec3(0,0,-1); faceCenters[1] = glm::vec3(0,0, 0.5);
+  faceNormals[2] = glm::vec3(0,-1,0); faceCenters[2] = glm::vec3(0,-0.5,0);
+  faceNormals[3] = glm::vec3(0, 1,0); faceCenters[3] = glm::vec3(0, 0.5,0);
+  faceNormals[4] = glm::vec3(-1,0,0); faceCenters[4] = glm::vec3(-0.5,0,0);
+  faceNormals[5] = glm::vec3( 1,0,0); faceCenters[5] = glm::vec3( 0.5,0,0);
+
+  // closest discovered intersection
+  float min_t = -1.0;
+  int min_i = 6;
+  
+  // find intersection of ray with each plane of the box
+  for (unsigned int i = 0; i < 6; i++) {
+	  glm::vec3 normal = faceNormals[i];
+	  glm::vec3 center = faceCenters[i];
+
+	  float t = glm::dot((center - rt.origin), normal) / glm::dot(rt.direction, normal);
+	  
+	  // continue if intersection is behind camera
+	  if (t <= 0)
+		  continue;
+	  
+	  // if t is greater than the closest found intersection, skip it
+	  if ((min_t > 0.0) && (t >= min_t))
+		  continue;
+	  
+	  // check to see if the point found is within
+	  // the edges defined by the face
+	  glm::vec3 P = getPointOnRay(rt,t);
+	  if ((P.x >= (-0.5 - ERROR)) && (P.x <= (0.5 + ERROR)) && 
+		  (P.y >= (-0.5 - ERROR)) && (P.y <= (0.5 + ERROR)) && 
+		  (P.z >= (-0.5 - ERROR)) && (P.z <= (0.5 + ERROR)))
+		  min_t = t;
+		  min_i = i;
+  }
+  
+  if (min_t < 0)
+	return (float) -1.0;
+  
+  else {
+	glm::vec3 realIntersectionPoint = multiplyMV(box.transform, glm::vec4(getPointOnRay(rt, min_t), 1.0));
+	glm::vec3 realNormal = glm::normalize(multiplyMV(box.transform, glm::vec4(faceNormals[min_i],0.0f)));
+	intersectionPoint = realIntersectionPoint;
+	normal = realNormal;
+        
+	return glm::length(r.origin - realIntersectionPoint);
+  }
 }
 
 //LOOK: Here's an intersection test example from a sphere. Now you just need to figure out cube and, optionally, triangle.
@@ -177,7 +153,7 @@ __host__ __device__ float sphereIntersectionTest(staticGeom sphere, ray r, glm::
   float vDotDirection = glm::dot(rt.origin, rt.direction);
   float radicand = vDotDirection * vDotDirection - (glm::dot(rt.origin, rt.origin) - pow(radius, 2));
   if (radicand < 0){
-    return -1;
+    return (float) -1.0;
   }
   
   float squareRoot = sqrt(radicand);
@@ -215,6 +191,12 @@ __host__ __device__ glm::vec3 getRadiuses(staticGeom geom){
     return glm::vec3(xradius, yradius, zradius);
 }
 
+__host__ __device__ glm::vec3 getRandomPointOnGeom(staticGeom geom, float randomSeed){
+	if (geom.type == SPHERE)    { return getRandomPointOnSphere(geom, randomSeed); }
+	else if (geom.type == CUBE) { return getRandomPointOnCube(geom, randomSeed);   }
+	else						{ return glm::vec3(0.0f, 0.0f, 0.0f);			   }
+}
+
 //LOOK: Example for generating a random point on an object using thrust.
 //Generates a random point on a given cube
 __host__ __device__ glm::vec3 getRandomPointOnCube(staticGeom cube, float randomSeed){
@@ -261,20 +243,22 @@ __host__ __device__ glm::vec3 getRandomPointOnCube(staticGeom cube, float random
        
 }
 
+//TODO: IMPLEMENT THIS FUNCTION
 //Generates a random point on a given sphere
 __host__ __device__ glm::vec3 getRandomPointOnSphere(staticGeom sphere, float randomSeed){
-  float radius=.5f;
-  thrust::default_random_engine rng(hash(randomSeed));
-  thrust::uniform_real_distribution<float> u01(-1,1);
-  thrust::uniform_real_distribution<float> u02(0,TWO_PI);
-
-  float theta = (float)u02(rng);
-  float cosphi = (float)u01(rng);
-  float sinphi = sqrt(1 - cosphi*cosphi);
-  glm::vec3 point = radius*glm::vec3(sinphi*cos(theta),sinphi*sin(theta),cosphi);
-  glm::vec3 randPoint = multiplyMV(sphere.transform, glm::vec4(point,1.0f));
-
-  return randPoint;
+	
+	float radius=.5f;
+	thrust::default_random_engine rng(hash(randomSeed));
+	thrust::uniform_real_distribution<float> u01(-1,1);
+	thrust::uniform_real_distribution<float> u02(0,TWO_PI);
+
+	float theta = (float)u02(rng);
+	float cosphi = (float)u01(rng);
+	float sinphi = sqrt(1 - cosphi*cosphi);
+	glm::vec3 point = radius*glm::vec3(sinphi*cos(theta),sinphi*sin(theta),cosphi);
+	glm::vec3 randPoint = multiplyMV(sphere.transform, glm::vec4(point,1.0f));
+
+	return randPoint;
 }
 
 #endif
diff --git a/src/main.cpp b/src/main.cpp
index 81836b1..b1ebf2a 100755
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -105,74 +105,76 @@ void runCuda(){
   // Map OpenGL buffer object for writing from CUDA on a single GPU
   // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer
   
-  if(iterations<renderCam->iterations){
-    uchar4 *dptr=NULL;
-    iterations++;
-    cudaGLMapBufferObject((void**)&dptr, pbo);
+	if(iterations < renderCam->iterations) {
+		uchar4 *dptr=NULL;
+		iterations++;
+		cudaGLMapBufferObject((void**)&dptr, pbo);
   
-    //pack geom and material arrays
-    geom* geoms = new geom[renderScene->objects.size()];
-    material* materials = new material[renderScene->materials.size()];
+		//pack geom and material arrays
+		geom* geoms = new geom[renderScene->objects.size()];
+		material* materials = new material[renderScene->materials.size()];
     
-    for(int i=0; i<renderScene->objects.size(); i++){
-      geoms[i] = renderScene->objects[i];
-    }
-    for(int i=0; i<renderScene->materials.size(); i++){
-      materials[i] = renderScene->materials[i];
-    }
-    
-  
-    // execute the kernel
-    cudaRaytraceCore(dptr, renderCam, targetFrame, iterations, materials, renderScene->materials.size(), geoms, renderScene->objects.size() );
+		for(int i=0; i<renderScene->objects.size(); i++){
+			geoms[i] = renderScene->objects[i];
+		}
+		for(int i=0; i<renderScene->materials.size(); i++){
+			materials[i] = renderScene->materials[i];
+		}
+		
+
+		// execute the kernel
+		cudaRaytraceCore(dptr, renderCam, targetFrame, iterations, materials, renderScene->materials.size(), geoms, renderScene->objects.size() );
     
-    // unmap buffer object
-    cudaGLUnmapBufferObject(pbo);
-  }else{
-
-    if(!finishedRender){
-      //output image file
-      image outputImage(renderCam->resolution.x, renderCam->resolution.y);
-
-      for(int x=0; x<renderCam->resolution.x; x++){
-        for(int y=0; y<renderCam->resolution.y; y++){
-          int index = x + (y * renderCam->resolution.x);
-          outputImage.writePixelRGB(renderCam->resolution.x-1-x,y,renderCam->image[index]);
-        }
-      }
+
+		// unmap buffer object
+		cudaGLUnmapBufferObject(pbo);
+	}
+	else {
+
+		if(!finishedRender){
+			//output image file
+			image outputImage(renderCam->resolution.x, renderCam->resolution.y);
+
+			for(int x=0; x<renderCam->resolution.x; x++){
+				for(int y=0; y<renderCam->resolution.y; y++){
+					int index = x + (y * renderCam->resolution.x);
+					outputImage.writePixelRGB(renderCam->resolution.x-1-x,y,renderCam->image[index]);
+				}
+			}
       
-      gammaSettings gamma;
-      gamma.applyGamma = true;
-      gamma.gamma = 1.0;
-      gamma.divisor = 1.0; //renderCam->iterations;
-      outputImage.setGammaSettings(gamma);
-      string filename = renderCam->imageName;
-      string s;
-      stringstream out;
-      out << targetFrame;
-      s = out.str();
-      utilityCore::replaceString(filename, ".bmp", "."+s+".bmp");
-      utilityCore::replaceString(filename, ".png", "."+s+".png");
-      outputImage.saveImageRGB(filename);
-      cout << "Saved frame " << s << " to " << filename << endl;
-      finishedRender = true;
-      if(singleFrameMode==true){
-        cudaDeviceReset(); 
-        exit(0);
-      }
-    }
-    if(targetFrame<renderCam->frames-1){
-
-      //clear image buffer and move onto next frame
-      targetFrame++;
-      iterations = 0;
-      for(int i=0; i<renderCam->resolution.x*renderCam->resolution.y; i++){
-        renderCam->image[i] = glm::vec3(0,0,0);
-      }
-      cudaDeviceReset(); 
-      finishedRender = false;
-    }
-  }
-  
+			gammaSettings gamma;
+			gamma.applyGamma = false;
+			gamma.gamma = 1.0/2.2;
+			gamma.divisor = 1.0;
+			outputImage.setGammaSettings(gamma);
+			string filename = renderCam->imageName;
+			string s;
+			stringstream out;
+			out << targetFrame;
+			s = out.str();
+			utilityCore::replaceString(filename, ".bmp", "."+s+".bmp");
+			utilityCore::replaceString(filename, ".png", "."+s+".png");
+			outputImage.saveImageRGB(filename);
+			cout << "Saved frame " << s << " to " << filename << endl;
+			finishedRender = true;
+
+			if(singleFrameMode==true){
+				cudaDeviceReset(); 
+				exit(0);
+			}
+		}
+		if(targetFrame < renderCam->frames-1){
+
+			//clear image buffer and move onto next frame
+			targetFrame++;
+			iterations = 0;
+			for(int i=0; i<renderCam->resolution.x*renderCam->resolution.y; i++){
+				renderCam->image[i] = glm::vec3(0,0,0);
+			}
+			cudaDeviceReset(); 
+			finishedRender = false;
+		}
+	}
 }
 
 #ifdef __APPLE__
diff --git a/src/raytraceKernel.cu b/src/raytraceKernel.cu
index 87a65a6..ad7549c 100755
--- a/src/raytraceKernel.cu
+++ b/src/raytraceKernel.cu
@@ -9,12 +9,18 @@
 #include <cuda.h>
 #include <cmath>
 #include "sceneStructs.h"
+#include "glm/glm.hpp"
 #include "utilities.h"
 #include "raytraceKernel.h"
 #include "intersections.h"
 #include "interactions.h"
 #include <vector>
-#include "glm/glm.hpp"
+
+#if CUDA_VERSION >= 5000
+    #include <helper_math.h>
+#else
+    #include <cutil_math.h>
+#endif
 
 void checkCUDAError(const char *msg) {
   cudaError_t err = cudaGetLastError();
@@ -35,42 +41,51 @@ __host__ __device__ glm::vec3 generateRandomNumberFromThread(glm::vec2 resolutio
   return glm::vec3((float) u01(rng), (float) u01(rng), (float) u01(rng));
 }
 
-//Kernel that does the initial raycast from the camera.
-__host__ __device__ ray raycastFromCameraKernel(glm::vec2 resolution, float time, int x, int y, glm::vec3 eye, glm::vec3 view, glm::vec3 up, glm::vec2 fov){
-   
+//TODO: IMPLEMENT THIS FUNCTION
+//Function that does the initial raycast from the camera
+__global__ void raycastFromCameraKernel(glm::vec2 resolution, float time, glm::vec3 eye, glm::vec3 view, glm::vec3 up, glm::vec2 fov, ray* rays){
+
+  // pixel index for ray
+  int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+  int y = (blockIdx.y * blockDim.y) + threadIdx.y;
   int index = x + (y * resolution.x);
-   
-  thrust::default_random_engine rng(hash(index*time));
-  thrust::uniform_real_distribution<float> u01(0,1);
-  
-  //standard camera raycast stuff
-  glm::vec3 E = eye;
-  glm::vec3 C = view;
-  glm::vec3 U = up;
-  float fovx = fov.x;
-  float fovy = fov.y;
-  
-  float CD = glm::length(C);
-  
-  glm::vec3 A = glm::cross(C, U);
-  glm::vec3 B = glm::cross(A, C);
-  glm::vec3 M = E+C;
-  glm::vec3 H = (A*float(CD*tan(fovx*(PI/180))))/float(glm::length(A));
-  glm::vec3 V = (B*float(CD*tan(-fovy*(PI/180))))/float(glm::length(B));
-  
-  float sx = (x)/(resolution.x-1);
-  float sy = (y)/(resolution.y-1);
+
+  //establish "right" camera direction
+  glm::normalize(eye); glm::normalize(view);
+  glm::vec3 right = glm::normalize(glm::cross(up, view));
   
-  glm::vec3 P = M + (((2*sx)-1)*H) + (((2*sy)-1)*V);
-  glm::vec3 PmE = P-E;
-  glm::vec3 R = E + (float(200)*(PmE))/float(glm::length(PmE));
+  // calculate P1 and P2 in both x and y directions
+  glm::vec3 image_center = eye + view;
+  glm::vec3 P1_X = image_center - tan((float)4.0*fov.x)*right;
+  glm::vec3 P2_X = image_center + tan((float)4.0*fov.x)*right;
+  glm::vec3 P1_Y = image_center - tan((float)4.0*fov.y)*up;
+  glm::vec3 P2_Y = image_center + tan((float)4.0*fov.y)*up;
   
-  glm::vec3 direction = glm::normalize(R);
-  //major performance cliff at this point, TODO: find out why!
+  glm::vec3 bottom_left  = P1_X + (P1_Y - image_center);
+  glm::vec3 bottom_right = P2_X + (P1_Y - image_center);
+  glm::vec3 top_left     = P1_X + (P2_Y - image_center);
+
+  glm::vec3 imgRight = bottom_right - bottom_left;
+  glm::vec3 imgUp    = top_left - bottom_left;
+
+  // supersample the pixels by taking a randomly offset ray in each iteration
+  glm::vec3 random_offset = generateRandomNumberFromThread(resolution, time, x, y);
+  float x_offset = random_offset.x;
+  float y_offset = random_offset.y;
+  glm::vec3 img_point = bottom_left + ((float)x + x_offset)/(float)resolution.x*imgRight + ((float)y + y_offset)/(float)resolution.y*imgUp;
+  glm::vec3 direction = glm::normalize(img_point - eye); 
+
+  // return value
   ray r;
-  r.origin = eye;
+  r.x = x;
+  r.y = y;
+  r.alive = true;
+  r.origin = eye; 
   r.direction = direction;
-  return r;
+  r.coeff = glm::vec3(1.0f, 1.0f, 1.0f);
+  r.currentIOR = 1.0;
+
+  rays[index] = r;
 }
 
 //Kernel that blacks out a given image buffer
@@ -83,7 +98,7 @@ __global__ void clearImage(glm::vec2 resolution, glm::vec3* image){
     }
 }
 
-//Kernel that writes the image to the OpenGL PBO directly. 
+//Kernel that writes the image to the OpenGL PBO directly.
 __global__ void sendImageToPBO(uchar4* PBOpos, glm::vec2 resolution, glm::vec3* image){
   
   int x = (blockIdx.x * blockDim.x) + threadIdx.x;
@@ -92,7 +107,7 @@ __global__ void sendImageToPBO(uchar4* PBOpos, glm::vec2 resolution, glm::vec3*
   
   if(x<=resolution.x && y<=resolution.y){
 
-      glm::vec3 color;      
+      glm::vec3 color;
       color.x = image[index].x*255.0;
       color.y = image[index].y*255.0;
       color.z = image[index].z*255.0;
@@ -111,117 +126,290 @@ __global__ void sendImageToPBO(uchar4* PBOpos, glm::vec2 resolution, glm::vec3*
       
       // Each thread writes one pixel location in the texture (textel)
       PBOpos[index].w = 0;
-      PBOpos[index].x = color.x;     
+      PBOpos[index].x = color.x;
       PBOpos[index].y = color.y;
       PBOpos[index].z = color.z;
   }
 }
 
-//TODO: IMPLEMENT THIS FUNCTION
-//Core raytracer kernel
-__global__ void raytraceRay(glm::vec2 resolution, float time, float bounce, cameraData cam, int rayDepth, glm::vec3* colors, 
-                            staticGeom* geoms, int numberOfGeoms, material* materials, int numberOfMaterials){
+// perform exclusive scan
+__global__ void createTempArray(ray* R_in, int* R_temp) {
+	
+	int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+	
+	if (R_in[index].alive == true)	{ R_temp[index] = 1; }
+	else							{ R_temp[index] = 0; }
 
-  int x = (blockIdx.x * blockDim.x) + threadIdx.x;
-  int y = (blockIdx.y * blockDim.y) + threadIdx.y;
-  int index = x + (y * resolution.x);
+	__syncthreads();
+}
 
-  ray r = raycastFromCameraKernel(resolution, time, x, y, cam.position, cam.view, cam.up, cam.fov);
-
-  if((x<=resolution.x && y<=resolution.y)){
-
-    float MAX_DEPTH = 100000000000000000;
-    float depth = MAX_DEPTH;
-
-    for(int i=0; i<numberOfGeoms; i++){
-        glm::vec3 intersectionPoint;
-        glm::vec3 intersectionNormal;
-       if(geoms[i].type==SPHERE){
-           depth = sphereIntersectionTest(geoms[i], r, intersectionPoint, intersectionNormal);
-        }else if(geoms[i].type==CUBE){
-            depth = boxIntersectionTest(geoms[i], r, intersectionPoint, intersectionNormal);
-        }else if(geoms[i].type==MESH){
-            //triangle tests go here
-        }else{
-            //lol?
-        }
-        if(depth<MAX_DEPTH && depth>-EPSILON){
-          MAX_DEPTH = depth;
-          colors[index] = materials[geoms[i].materialid].color;
-        }
-    }
+// perform exclusive scan
+__global__ void inclusiveScan(int* R_scan_in, int* R_scan_out, int depth) {
+	
+	int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+	
+    if (index >= pow(2.0, depth-1))
+		R_scan_out[index] = R_scan_in[index - (int)pow(2.0, depth-1)] + R_scan_in[index];
+	else
+		R_scan_out[index] = R_scan_in[index];
+		
+	__syncthreads();
+}
 
+// shift from inclusive to exclusive scan and store the numberOfRays
+__global__ void inclusive2exclusive(int* R_inclusive, int* R_exclusive, int *numberOfRays) {
+	
+	//*numberOfRays /= 2.0;
 
+	int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+	
+	if (index == 0)	{ R_exclusive = 0; }
+	else			{ R_exclusive[index] = R_inclusive[index-1]; }
+}
 
-    //colors[index] = generateRandomNumberFromThread(resolution, time, x, y);
-   }
+// shift from inclusive to exclusive scan and store the numberOfRays
+__global__ void scatter(int* R_temp, int* R_scan, ray* R_in, ray* R_out) {
+	
+	int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+	
+	if (R_temp[index] == 1)
+		R_out[R_scan[index]] = R_in[index];
 }
 
+// Core raytracer kernel
+__global__ void raytraceRay(glm::vec2 resolution, float time, int rayDepth, glm::vec3* colors, staticGeom* geoms, 
+							int numberOfGeoms, material* materials, int iterations, ray* rays) {
+
+	// Find index of pixel and create empty color vector
+	int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+
+	// Get initial ray from camera through this position
+	ray inRay = rays[index];
+	inRay.origin = inRay.origin + (float)ERROR*inRay.direction;
+
+	// Allocate secondary ray 
+	ray outRay;
+  
+	// Luminance value to be returned for ray
+	int colorIndex = inRay.x + (inRay.y * resolution.x);
+	
+	// Return values for the intersection test
+	R3Intersection intersection;
+
+	// Find the closest geometry intersection along the ray
+	float t;
+	float min_t = -1.0;
+	for (int i = 0; i < numberOfGeoms; i++) {
+		staticGeom geom = geoms[i];
+		t = geomIntersectionTest(geom, inRay, intersection.point, intersection.normal);
+		if ((t > ERROR) && (t < min_t || min_t < 0.0)) {
+			min_t = t;
+			intersection.material = &materials[geom.materialid];
+		}
+	}
+	
+	if (min_t == -1.0) {
+		outRay.alive = false;
+		outRay.coeff = glm::vec3(0,0,0);
+	}
+	
+	else if(intersection.material->emittance > 0)
+	{
+		outRay.alive = false;
+		outRay.coeff = (inRay.coeff * intersection.material->color * intersection.material->emittance);
+		colors[colorIndex] += outRay.coeff / (float)iterations;
+	}
+
+	else {
+		
+		int BSDF = calculateBSDF(inRay, intersection.point, intersection.normal, intersection.material, float(time*rayDepth));
+		switch (BSDF) {
+
+			case DIFFUSE:
+			{
+				glm::vec3 rand = generateRandomNumberFromThread(resolution, time*rayDepth, inRay.x, inRay.y);
+				outRay.direction = calculateRandomDirectionInHemisphere(intersection.normal, rand.x, rand.y);
+				outRay.coeff = inRay.coeff * intersection.material->color;
+				outRay.currentIOR = inRay.currentIOR;
+			}
+			break;
+
+			case SPECULAR:
+			{
+				outRay.direction = calculateReflectionDirection(intersection.normal, inRay.direction);
+				outRay.currentIOR = inRay.currentIOR;
+				outRay.coeff = inRay.coeff;
+			}
+			break;
+
+			case TRANSMIT:
+			{
+				float incidentIOR = inRay.currentIOR;
+				float transmittedIOR;
+				if (glm::dot(inRay.direction, intersection.normal) > 0)
+					transmittedIOR = 1.0;
+				else
+					transmittedIOR = intersection.material->indexOfRefraction;
+
+				outRay.direction = calculateTransmissionDirection(intersection.normal, inRay.direction, incidentIOR, transmittedIOR);
+				outRay.coeff = inRay.coeff;
+			}
+			break;
+		}
+
+		// Constant properties of outgoing ray, regardless of switch case
+		glm::vec3 ro = glm::vec3(intersection.point);
+		outRay.origin = ro;
+		outRay.x = inRay.x;
+		outRay.y = inRay.y;
+		outRay.alive = true;
+	}
+	rays[index] = outRay;
+}
 
 //TODO: FINISH THIS FUNCTION
 // Wrapper for the __global__ call that sets up the kernel calls and does a ton of memory management
 void cudaRaytraceCore(uchar4* PBOpos, camera* renderCam, int frame, int iterations, material* materials, int numberOfMaterials, geom* geoms, int numberOfGeoms){
   
-  int traceDepth = 1; //determines how many bounces the raytracer traces
+	// determines how many bounces the raytracer traces
+	int traceDepth = 6;		
 
-  // set up crucial magic
-  int tileSize = 8;
-  dim3 threadsPerBlock(tileSize, tileSize);
-  dim3 fullBlocksPerGrid((int)ceil(float(renderCam->resolution.x)/float(tileSize)), (int)ceil(float(renderCam->resolution.y)/float(tileSize)));
+	// set up crucial magic
+	int tileSize = 8;
+	dim3 threadsPerBlock(tileSize, tileSize);
+	dim3 fullBlocksPerGrid((int)ceil(float(renderCam->resolution.x)/float(tileSize)), (int)ceil(float(renderCam->resolution.y)/float(tileSize)));
   
-  //send image to GPU
-  glm::vec3* cudaimage = NULL;
-  cudaMalloc((void**)&cudaimage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3));
-  cudaMemcpy( cudaimage, renderCam->image, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyHostToDevice);
+	//send image to GPU
+	glm::vec3* cudaimage = NULL;
+	cudaMalloc((void**)&cudaimage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3));
+	cudaMemcpy( cudaimage, renderCam->image, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyHostToDevice);
   
-  //package geometry and materials and sent to GPU
-  staticGeom* geomList = new staticGeom[numberOfGeoms];
-  for(int i=0; i<numberOfGeoms; i++){
-    staticGeom newStaticGeom;
-    newStaticGeom.type = geoms[i].type;
-    newStaticGeom.materialid = geoms[i].materialid;
-    newStaticGeom.translation = geoms[i].translations[frame];
-    newStaticGeom.rotation = geoms[i].rotations[frame];
-    newStaticGeom.scale = geoms[i].scales[frame];
-    newStaticGeom.transform = geoms[i].transforms[frame];
-    newStaticGeom.inverseTransform = geoms[i].inverseTransforms[frame];
-    geomList[i] = newStaticGeom;
-  }
+	//package geometry and send to GPU
+	staticGeom* geomList = new staticGeom[numberOfGeoms];
+	for(int i=0; i<numberOfGeoms; i++){
+		staticGeom newStaticGeom;
+		newStaticGeom.objectid = geoms[i].objectid;
+		newStaticGeom.type = geoms[i].type;
+		newStaticGeom.materialid = geoms[i].materialid;
+		newStaticGeom.translation = geoms[i].translations[frame];
+		newStaticGeom.rotation = geoms[i].rotations[frame];
+		newStaticGeom.scale = geoms[i].scales[frame];
+		newStaticGeom.transform = geoms[i].transforms[frame];
+		newStaticGeom.inverseTransform = geoms[i].inverseTransforms[frame];
+		geomList[i] = newStaticGeom;
+	}
+
+	staticGeom* cudageoms = NULL;
+	cudaMalloc((void**)&cudageoms, numberOfGeoms*sizeof(staticGeom));
+	cudaMemcpy( cudageoms, geomList, numberOfGeoms*sizeof(staticGeom), cudaMemcpyHostToDevice);
   
-  staticGeom* cudageoms = NULL;
-  cudaMalloc((void**)&cudageoms, numberOfGeoms*sizeof(staticGeom));
-  cudaMemcpy( cudageoms, geomList, numberOfGeoms*sizeof(staticGeom), cudaMemcpyHostToDevice);
+	//package materials and send to GPU
+	material* materialList = new material[numberOfMaterials];
+	for (int i=0; i<numberOfMaterials; i++){
+		material newMaterial;
+		newMaterial.color = materials[i].color;
+		newMaterial.specularExponent = materials[i].specularExponent;
+		newMaterial.specularColor = materials[i].specularColor;
+		newMaterial.hasReflective = materials[i].hasReflective;
+		newMaterial.hasRefractive = materials[i].hasRefractive;
+		newMaterial.indexOfRefraction = materials[i].indexOfRefraction;
+		newMaterial.hasScatter = materials[i].hasScatter;
+		newMaterial.absorptionCoefficient = materials[i].absorptionCoefficient;
+		newMaterial.reducedScatterCoefficient = materials[i].reducedScatterCoefficient;
+		newMaterial.emittance = materials[i].emittance;
+		materialList[i] = newMaterial;
+	}
   
-  material* cudamaterials = NULL;
-  cudaMalloc((void**)&cudamaterials, numberOfMaterials*sizeof(material));
-  cudaMemcpy( cudamaterials, materials, numberOfMaterials*sizeof(material), cudaMemcpyHostToDevice);
-
-  //package camera
-  cameraData cam;
-  cam.resolution = renderCam->resolution;
-  cam.position = renderCam->positions[frame];
-  cam.view = renderCam->views[frame];
-  cam.up = renderCam->ups[frame];
-  cam.fov = renderCam->fov;
-
-  //kernel launches
-  for(int bounce = 1; bounce <= 1; ++bounce)
-  {
-  raytraceRay<<<fullBlocksPerGrid, threadsPerBlock>>>(renderCam->resolution, (float)iterations, (float)bounce, cam, traceDepth, cudaimage, cudageoms, numberOfGeoms, cudamaterials, numberOfMaterials);
-  }
-  sendImageToPBO<<<fullBlocksPerGrid, threadsPerBlock>>>(PBOpos, renderCam->resolution, cudaimage);
+	material* cudamaterials = NULL;
+	cudaMalloc((void**)&cudamaterials, numberOfMaterials*sizeof(material));
+	cudaMemcpy( cudamaterials, materialList, numberOfMaterials*sizeof(material), cudaMemcpyHostToDevice);
+  
+	// package camera
+	cameraData cam;
+	cam.resolution = renderCam->resolution;
+	cam.position = renderCam->positions[frame];
+	cam.view = renderCam->views[frame];
+	cam.up = renderCam->ups[frame];
+	cam.fov = renderCam->fov;
+
+	// create array of rays to feed to kernel call
+	int numberOfRays = cam.resolution.x * cam.resolution.y;
+	ray* rayList = new ray[numberOfRays];
+	ray* cudarays = NULL;
+	cudaMalloc((void**)&cudarays, numberOfRays*sizeof(ray));
+	cudaMemcpy(cudarays, rayList, numberOfRays*sizeof(ray), cudaMemcpyHostToDevice);
+	raycastFromCameraKernel<<<fullBlocksPerGrid, threadsPerBlock>>>(cam.resolution, (float)iterations, cam.position, cam.view, cam.up, cam.fov, cudarays);
 
-  //retrieve image from GPU
-  cudaMemcpy( renderCam->image, cudaimage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyDeviceToHost);
+	// kernel launching loop
+	for (int d = 0; d < traceDepth; d++) {
+
+		// determine block sizes & threads per block
+		int raytraceTileSize = 128;
+		dim3 raytraceThreadsPerBlock(raytraceTileSize);
+		dim3 raytraceFullBlocksPerGrid((int)ceil(float(numberOfRays)/float(raytraceTileSize)));
+  
+		// kernel call to trace all active rays
+		raytraceRay<<<raytraceFullBlocksPerGrid, raytraceThreadsPerBlock>>>(renderCam->resolution, (float)iterations, d+1, cudaimage, cudageoms, numberOfGeoms, cudamaterials, renderCam->iterations, cudarays);
+		
+		// create array copy for stream compaction
+		ray* cudaR_out = NULL;
+		cudaMalloc((void**)&cudaR_out, numberOfRays*sizeof(ray));
+		
+		// create temp array for stream compaction
+		int* cudaR_temp = NULL;
+		cudaMalloc((void**)&cudaR_temp, numberOfRays*sizeof(int));
+
+		// create scan array for stream compaction
+		int* cudaR_scan_in  = NULL;
+		int* cudaR_scan_out = NULL;
+		cudaMalloc((void**)&cudaR_scan_in,  numberOfRays*sizeof(int));
+		cudaMalloc((void**)&cudaR_scan_out, numberOfRays*sizeof(int));
+		
+		// populate the temp array and copy it to the initial state of the scan array
+		createTempArray<<<raytraceFullBlocksPerGrid, raytraceThreadsPerBlock>>>(cudarays, cudaR_temp);
+		cudaMemcpy(cudaR_scan_in, cudaR_temp, numberOfRays*sizeof(int), cudaMemcpyDeviceToDevice);
+		cudaMemcpy(cudaR_scan_out, cudaR_temp, numberOfRays*sizeof(int), cudaMemcpyDeviceToDevice);
+		
+		// kernel call to perform exclusive scan
+		for (int p = 1; p <= ceil(log2((double)numberOfRays)); p++) {
+			inclusiveScan<<<raytraceFullBlocksPerGrid, raytraceThreadsPerBlock>>>(cudaR_scan_in, cudaR_scan_out, p);
+			cudaMemcpy(cudaR_scan_in, cudaR_scan_out, numberOfRays*sizeof(int), cudaMemcpyDeviceToDevice);
+		}
+		
+		int* numRays = new int[1];
+		cudaMemcpy(numRays, cudaR_scan_in+(numberOfRays-2), sizeof(int), cudaMemcpyDeviceToHost);
+		numberOfRays = numRays[0];
+
+		inclusive2exclusive<<<raytraceFullBlocksPerGrid, raytraceThreadsPerBlock>>>(cudaR_scan_in, cudaR_scan_out, &numberOfRays);
+		scatter<<<raytraceFullBlocksPerGrid, raytraceThreadsPerBlock>>>(cudaR_temp, cudaR_scan_out, cudarays, cudaR_out);
+		
+		// perform stream compaction on array of rays, Rout array
+		cudaMemcpy(cudarays, cudaR_out, numberOfRays*sizeof(ray), cudaMemcpyDeviceToDevice);
+		
+		delete numRays;
+		cudaFree( cudaR_out );
+		cudaFree( cudaR_temp );
+		cudaFree( cudaR_scan_in );
+		cudaFree( cudaR_scan_out );
+	}
+	
+	sendImageToPBO<<<fullBlocksPerGrid, threadsPerBlock>>>(PBOpos, renderCam->resolution, cudaimage);
+  
+	// retrieve image from GPU
+	cudaMemcpy( renderCam->image, cudaimage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyDeviceToHost);
+  
+	// free up stuff, or else we'll leak memory like a madman
+	cudaFree( cudaimage );
+	cudaFree( cudageoms );
+	cudaFree( cudamaterials );
+	cudaFree( cudarays );
+	delete geomList;
+	delete materialList;
+	delete rayList;
 
-  //free up stuff, or else we'll leak memory like a madman
-  cudaFree( cudaimage );
-  cudaFree( cudageoms );
-  cudaFree( cudamaterials );
-  delete [] geomList;
 
-  // make certain the kernel has completed 
-  cudaThreadSynchronize();
+	// make certain the kernel has completed
+	cudaThreadSynchronize();
 
-  checkCUDAError("Kernel failed!");
+	checkCUDAError("Kernel failed!");
 }
diff --git a/src/scene.cpp b/src/scene.cpp
index 415d627..b194012 100755
--- a/src/scene.cpp
+++ b/src/scene.cpp
@@ -42,6 +42,7 @@ int scene::loadObject(string objectid){
     }else{
         cout << "Loading Object " << id << "..." << endl;
         geom newObject;
+		newObject.objectid = id;
         string line;
         
         //load object type 
@@ -135,8 +136,8 @@ int scene::loadObject(string objectid){
 }
 
 int scene::loadCamera(){
-	cout << "Loading Camera ..." << endl;
-        camera newCamera;
+	printf("Loading Camera ...\n");
+	camera newCamera;
 	float fovy;
 	
 	//load static properties
@@ -162,6 +163,7 @@ int scene::loadCamera(){
 	vector<glm::vec3> positions;
 	vector<glm::vec3> views;
 	vector<glm::vec3> ups;
+
     while (!line.empty() && fp_in.good()){
 	    
 	    //check frame number
diff --git a/src/sceneStructs.h b/src/sceneStructs.h
index b10f1cf..0dd69e6 100755
--- a/src/sceneStructs.h
+++ b/src/sceneStructs.h
@@ -16,10 +16,17 @@ enum GEOMTYPE{ SPHERE, CUBE, MESH };
 struct ray {
 	glm::vec3 origin;
 	glm::vec3 direction;
+	glm::vec3 coeff;		// specular/diffusive/refractive RGB coefficient
+	float x;				// pixel's x-index
+	float y;				// pixel's y-index
+	float currentIOR;		// IOR of current material being traversed
+	bool alive;				// is this ray still interacting with the scene?
 };
 
+
 struct geom {
 	enum GEOMTYPE type;
+	int objectid;
 	int materialid;
 	int frames;
 	glm::vec3* translations;
@@ -31,6 +38,7 @@ struct geom {
 
 struct staticGeom {
 	enum GEOMTYPE type;
+	int objectid;
 	int materialid;
 	glm::vec3 translation;
 	glm::vec3 rotation;
@@ -73,4 +81,10 @@ struct material{
 	float emittance;
 };
 
+struct R3Intersection {
+	glm::vec3 point;
+	glm::vec3 normal;
+	material* material;
+};
+
 #endif //CUDASTRUCTS_H
diff --git a/src/utilities.cpp b/src/utilities.cpp
index 3fd4b73..7ea2ea5 100755
--- a/src/utilities.cpp
+++ b/src/utilities.cpp
@@ -52,6 +52,25 @@ glm::vec3 utilityCore::clampRGB(glm::vec3 color){
     return color;
 }
 
+glm::vec3 utilityCore::clampLight(glm::vec3 color){
+    if(color.x > 1.0f){
+        color.x = 1.0f;
+    }else if(color.x < 0.0f){
+        color.x = 0.0f;
+    }
+    if(color.y > 1.0f){
+        color.y = 1.0f;
+    }else if(color.y < 0.0f){
+        color.y = 0.0f;
+    }
+	if(color.z > 1.0f){
+        color.z = 1.0f;
+    }else if(color.z < 0.0f){
+        color.z = 0.0f;
+    }
+	return color;
+}
+
 bool utilityCore::epsilonCheck(float a, float b){
     if(fabs(fabs(a)-fabs(b))<EPSILON){
         return true;
diff --git a/src/utilities.h b/src/utilities.h
index 15b6495..aed476f 100755
--- a/src/utilities.h
+++ b/src/utilities.h
@@ -17,18 +17,19 @@
 #include <vector>
 #include "cudaMat4.h"
 
-const float PI                          =3.1415926535897932384626422832795028841971;
-const float TWO_PI                      =6.2831853071795864769252867665590057683943;
-const float SQRT_OF_ONE_THIRD           =0.5773502691896257645091487805019574556476;
-const float E                           =2.7182818284590452353602874713526624977572;
-const float EPSILON                     =.000000001;
-const float ZERO_ABSORPTION_EPSILON     =0.00001;
-const float RAY_BIAS_AMOUNT             =0.0002;
+#define PI                          3.1415926535897932384626422832795028841971
+#define TWO_PI                      6.2831853071795864769252867665590057683943
+#define SQRT_OF_ONE_THIRD           0.5773502691896257645091487805019574556476
+#define E                           2.7182818284590452353602874713526624977572
+#define EPSILON                     .000000001
+#define ZERO_ABSORPTION_EPSILON     0.00001
+#define RAY_BIAS_AMOUNT             0.0002
 
 namespace utilityCore {
     extern float clamp(float f, float min, float max);
     extern bool replaceString(std::string& str, const std::string& from, const std::string& to);
     extern glm::vec3 clampRGB(glm::vec3 color);
+    extern glm::vec3 clampLight(glm::vec3 color);
     extern bool epsilonCheck(float a, float b);
     extern std::vector<std::string> tokenizeString(std::string str); 
     extern cudaMat4 glmMat4ToCudaMat4(glm::mat4 a);