diff --git a/.gitignore b/.gitignore
index 89942d9..6a06393 100644
--- a/.gitignore
+++ b/.gitignore
@@ -513,7 +513,7 @@ local.properties
 .externalToolBuilders/
 
 # Locally stored "Eclipse launch configurations"
-#*.launch
+*.launch
 
 # CDT-specific
 #.cproject
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d3d976c..df23a46 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.0)
 project(cis565_path_tracer)
 
 set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 
 # Set up include and lib paths
 set(EXTERNAL "external")
@@ -84,6 +85,7 @@ target_link_libraries(${CMAKE_PROJECT_NAME}
     src
     #stream_compaction  # TODO: uncomment if using your stream compaction
     ${CORELIBS}
+    X11
     )
 
 add_custom_command(
diff --git a/Project3-CUDA-Path-Tracer.launch b/Project3-CUDA-Path-Tracer.launch
deleted file mode 100644
index 0222434..0000000
--- a/Project3-CUDA-Path-Tracer.launch
+++ /dev/null
@@ -1,22 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<launchConfiguration type="org.eclipse.cdt.launch.applicationLaunchType">
-<stringAttribute key="org.eclipse.cdt.debug.mi.core.DEBUG_NAME" value="${cuda_bin}/cuda-gdb"/>
-<stringAttribute key="org.eclipse.cdt.debug.mi.core.GDB_INIT" value=".cuda-gdbinit"/>
-<stringAttribute key="org.eclipse.cdt.debug.mi.core.protocol" value="mi"/>
-<intAttribute key="org.eclipse.cdt.launch.ATTR_BUILD_BEFORE_LAUNCH_ATTR" value="2"/>
-<stringAttribute key="org.eclipse.cdt.launch.DEBUGGER_ID" value="com.nvidia.cuda.ide.debug.cudagdb"/>
-<stringAttribute key="org.eclipse.cdt.launch.DEBUGGER_START_MODE" value="run"/>
-<stringAttribute key="org.eclipse.cdt.launch.PROGRAM_ARGUMENTS" value="scenes/sphere.txt"/>
-<stringAttribute key="org.eclipse.cdt.launch.PROGRAM_NAME" value="build/cis565_path_tracer"/>
-<stringAttribute key="org.eclipse.cdt.launch.PROJECT_ATTR" value="Project3-CUDA-Path-Tracer"/>
-<booleanAttribute key="org.eclipse.cdt.launch.PROJECT_BUILD_CONFIG_AUTO_ATTR" value="true"/>
-<stringAttribute key="org.eclipse.cdt.launch.PROJECT_BUILD_CONFIG_ID_ATTR" value=""/>
-<booleanAttribute key="org.eclipse.cdt.launch.use_terminal" value="true"/>
-<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
-<listEntry value="/Project3-CUDA-Path-Tracer"/>
-</listAttribute>
-<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
-<listEntry value="4"/>
-</listAttribute>
-<stringAttribute key="process_factory_id" value="org.eclipse.cdt.dsf.gdb.GdbProcessFactory"/>
-</launchConfiguration>
diff --git a/README.md b/README.md
index 110697c..e61f4fc 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,94 @@
-CUDA Path Tracer
-================
+CUDA Stream Compaction
+======================
 
-**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3**
+**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2**
 
-* (TODO) YOUR NAME HERE
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* Edward Atter
+  * [LinkedIn](https://www.linkedin.com/in/atter/)
+  * Tested on: Linux Mint 18.3 Sylvia (4.13.0-41-generic), Ryzen 7 2700x @ 3.7 ghz (base clock) 16GB, GTX 1070 TI 8GB GDDR5 (Personal)
+  * CUDA 9
 
-### (TODO: Your README)
+![](img/intro.png)
 
-*DO NOT* leave the README to the last minute! It is a crucial part of the
-project, and we will not be able to grade you without a good README.
+## Overview
 
+This project implements a path tracer with the following features:
+ - A shading kernel capable of diffuse, specular, and refractive surfaces with Fresnel effects
+ - Antialiasing
+ - Stream compaction
+ - Material sorting
+ - First bounce caching
+ - Built in frame timer
+
+All features may be toggled by changing the defined constants in `src/pathtracer.cu`.
+
+## Performance
+
+
+#### Methodology
+
+Frame timings were calculated by taking the average over 5 frames. In general, after the initial frame, very little variance was observed. The timing for each feature was performed with all other toggleable features disabled.
+
+#### Relative Performance Impact
+
+![](img/relative-performance.png)
+
+The graph above shows the relative performance impact of each feature, represented in seconds. A negative time indicates the feature had a positive impact on performance, while a positive number represents a decrease in performance. "None" is with all features disabled and is the baseline in this test, set to 0. 
+
+Analysis of `nvidia-smi` suggests the application is GPU bound, as expected, pegging the GPU usage at 100%.
+
+    +-----------------------------------------------------------------------------+
+    | NVIDIA-SMI 396.26                 Driver Version: 396.26                    |
+    |-------------------------------+----------------------+----------------------+
+    | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+    | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+    |===============================+======================+======================|
+    |   0  GeForce GTX 107...  Off  | 00000000:0A:00.0  On |                  N/A |
+    | 47%   65C    P2   116W / 180W |   1055MiB /  8116MiB |    100%      Default |
+    +-------------------------------+----------------------+----------------------+
+                                                                                   
+    +-----------------------------------------------------------------------------+
+    | Processes:                                                       GPU Memory |
+    |  GPU       PID   Type   Process name                             Usage      |
+    |=============================================================================|
+    |    0      1570      G   /usr/lib/xorg/Xorg                           400MiB |
+    |    0      3563      G   cinnamon                                     140MiB |
+    |    0      4200      G   ...-token=A7907B8E58127E7F09984994A2B09AB9    48MiB |
+    |    0      6463      C   gimp-2.8                                     107MiB |
+    |    0     10622      G   ...-token=A3470AA5428C5FF1C71554B5AC8CA77C    41MiB |
+    |    0     12950    C+G   ...DA-Path-Tracer/build/cis565_path_tracer   209MiB |
+    |    0     23722      C   /usr/lib/libreoffice/program/soffice.bin     105MiB |
+    +-----------------------------------------------------------------------------+
+
+
+## Features
+
+#### Anti-Aliasing
+
+![](img/circle-no-aa.png) ![](img/circle-with-aa.png)
+
+Anti-aliasing can even out rough edges. This is done by "jittering" the rays' x and y positions slightly at each iteration. The effect is most noticible in high-contrast situations, as in the images above. The image on the left has anti-aliasing disabled, while the image on the right has AA enabled. The image on the right appears less pixelated. 
+
+Anti-aliasing had a negligible impact on performance, .003 seconds. This is well within the margin of error.
+
+#### Sorting
+In theory, sorting by material type should yield a large improvement. The GPUs scheduler can skip warps if each thread in the warp is returned. Sorting by material type should increase the liklihood of this happening. In addition, it should clean up memory accesses and branch prediction since most warps will consist of a single material type. 
+
+Unfortunately, as shown in the graph above, this theoretical gain was not realized. The overhead of sorting is much greater than the gains it provides.
+
+#### Stream Compaction
+Interestingly, stream compaction resulted in a slight decrease in performance. In theory, we should be able to eliminate many of the dead, black rays; but at least with the current implementation, the compaction overhead was to great to yield any appreciable results.
+
+I hypothesized that mostly black images would benefit the most from stream compaction, so I tried again on the `sphere.txt` scene. The results were the same, rendering was faster without stream compaction.
+
+#### First Bounce Caching
+Caching the first bounce yielded the largest increase. Unfortunately, it has the effect of dampening the anti-aliasing affect (if both are enabled simultaneously), since the random jitter of the array is cached and reused for each iteration, only being updated during a new frame. 
+
+## Gallery
+
+![](img/l1.png)
+![](img/m1.png) ![](img/m2.png)
+
+![](img/g1.png) ![](img/g2.png)
+![](img/g3.png) ![](img/g4.png)
+![](img/g5.png) ![](img/g6.png)
diff --git a/external/lib/linux/libglfw3.a b/external/lib/linux/libglfw3.a
index da7ab6c..17b4438 100644
Binary files a/external/lib/linux/libglfw3.a and b/external/lib/linux/libglfw3.a differ
diff --git a/img/circle-no-aa.png b/img/circle-no-aa.png
new file mode 100644
index 0000000..ede49be
Binary files /dev/null and b/img/circle-no-aa.png differ
diff --git a/img/circle-with-aa.png b/img/circle-with-aa.png
new file mode 100644
index 0000000..af2dd04
Binary files /dev/null and b/img/circle-with-aa.png differ
diff --git a/img/g1.png b/img/g1.png
new file mode 100644
index 0000000..f4dd91d
Binary files /dev/null and b/img/g1.png differ
diff --git a/img/g2.png b/img/g2.png
new file mode 100644
index 0000000..5ca4e4e
Binary files /dev/null and b/img/g2.png differ
diff --git a/img/g3.png b/img/g3.png
new file mode 100644
index 0000000..bef54d9
Binary files /dev/null and b/img/g3.png differ
diff --git a/img/g4.png b/img/g4.png
new file mode 100644
index 0000000..6722da5
Binary files /dev/null and b/img/g4.png differ
diff --git a/img/g5.png b/img/g5.png
new file mode 100644
index 0000000..1e5cef3
Binary files /dev/null and b/img/g5.png differ
diff --git a/img/g6.png b/img/g6.png
new file mode 100644
index 0000000..82db79f
Binary files /dev/null and b/img/g6.png differ
diff --git a/img/intro.png b/img/intro.png
new file mode 100644
index 0000000..b411d50
Binary files /dev/null and b/img/intro.png differ
diff --git a/img/l1.png b/img/l1.png
new file mode 100644
index 0000000..304f023
Binary files /dev/null and b/img/l1.png differ
diff --git a/img/m1.png b/img/m1.png
new file mode 100644
index 0000000..e4c9120
Binary files /dev/null and b/img/m1.png differ
diff --git a/img/m2.png b/img/m2.png
new file mode 100644
index 0000000..230812d
Binary files /dev/null and b/img/m2.png differ
diff --git a/img/relative-performance.png b/img/relative-performance.png
new file mode 100644
index 0000000..479a103
Binary files /dev/null and b/img/relative-performance.png differ
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a1cb3fb..84d8a59 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -19,5 +19,5 @@ set(SOURCE_FILES
 
 cuda_add_library(src
     ${SOURCE_FILES}
-    OPTIONS -arch=sm_20
+    OPTIONS -arch=sm_60
     )
diff --git a/src/interactions.h b/src/interactions.h
index 5ce3628..808a7ab 100644
--- a/src/interactions.h
+++ b/src/interactions.h
@@ -2,6 +2,8 @@
 
 #include "intersections.h"
 
+#define THRESH_INTERNAL_REFLECTION .01f
+
 // CHECKITOUT
 /**
  * Computes a cosine-weighted random direction in a hemisphere.
@@ -41,6 +43,41 @@ glm::vec3 calculateRandomDirectionInHemisphere(
         + sin(around) * over * perpendicularDirection2;
 }
 
+__host__ __device__
+void reflect(PathSegment & pathSegment,
+        glm::vec3 intersect,
+        glm::vec3 normal,
+        const Material &material,
+        glm::vec3 direction) {
+	pathSegment.color *= material.color;
+	pathSegment.ray.direction = glm::reflect(direction, normal);
+	pathSegment.ray.origin = intersect + .0001f * pathSegment.ray.direction;
+}
+
+__host__ __device__
+void refract(PathSegment & pathSegment,
+        glm::vec3 intersect,
+        glm::vec3 normal,
+        const Material &material,
+        glm::vec3 direction) {
+	float refractive_index = material.indexOfRefraction;
+	// Not sure why this fixes the bug...
+	if (glm::dot(direction, normal) < 0) {
+		refractive_index = 1.0f / refractive_index;
+	} else {
+		normal = -normal;
+	}
+
+	if (glm::length(pathSegment.ray.direction) > THRESH_INTERNAL_REFLECTION) {
+		pathSegment.ray.direction = glm::refract(direction, normal, refractive_index);
+	} else {
+		pathSegment.ray.direction = glm::reflect(direction, normal);
+	}
+
+	pathSegment.color *= material.color;
+	pathSegment.ray.origin = intersect + .001f * pathSegment.ray.direction;
+}
+
 /**
  * Scatter a ray with some probabilities according to the material properties.
  * For example, a diffuse surface scatters in a cosine-weighted hemisphere.
@@ -71,9 +108,71 @@ void scatterRay(
 		PathSegment & pathSegment,
         glm::vec3 intersect,
         glm::vec3 normal,
-        const Material &m,
+        const Material &material,
         thrust::default_random_engine &rng) {
-    // TODO: implement this.
     // A basic implementation of pure-diffuse shading will just call the
     // calculateRandomDirectionInHemisphere defined above.
+	glm::vec3 direction = glm::normalize(pathSegment.ray.direction);
+
+	if (material.hasReflective && material.hasRefractive) {
+		thrust::uniform_real_distribution<float> u01(0,1);
+
+		float cosAngle = glm::dot(direction, normal);
+		if (cosAngle > 1) {
+			cosAngle = 1;
+		} else if (cosAngle < -1) {
+			cosAngle = -1;
+		}
+
+		float refractive_index_before = 1;
+		float refractive_index_after;
+
+		if (cosAngle > 0) {
+			std::swap(refractive_index_before, refractive_index_after);
+		} else {
+			cosAngle *= -1;
+			pathSegment.ray.origin = intersect + .001f * direction;
+		}
+
+		float refractive_index = refractive_index_before / refractive_index_after;
+		float sinAngleBefore = 1 - std::pow(cosAngle, 2);
+		if (sinAngleBefore > 0) {
+			sinAngleBefore = std::sqrt(sinAngleBefore);
+		} else {
+			sinAngleBefore = 0;
+		}
+		float sinAngleAfter = refractive_index * sinAngleBefore;
+
+		if (sinAngleAfter > 1) {
+			pathSegment.color *= material.specular.color;
+			pathSegment.ray.direction = glm::reflect(pathSegment.ray.direction, normal);
+		} else {
+			//Shlick
+			//R = R_0 + (1 - R_0) * (1 - cos(theta))^5
+			//R_0 = [(n_1 - n_2) / (n_1 + n_2)]^2
+
+			float r0 = (refractive_index_before - refractive_index_after) / (refractive_index_before + refractive_index_after);
+			r0 *= r0;
+			float cosAngleRaised = std::pow(1 - cosAngle, 5);
+			float R = r0 + (1 - r0) * cosAngleRaised;
+
+			//Randomize
+			if (R < u01(rng)) {
+				refract(pathSegment, intersect, normal, material, direction);
+			} else {
+				reflect(pathSegment, intersect, normal, material, direction);
+			}
+		}
+
+	} else if (material.hasReflective) {
+		reflect(pathSegment, intersect, normal, material, direction);
+	} else if (material.hasRefractive) {
+		refract(pathSegment, intersect, normal, material, direction);
+	} else {
+		//Diffuse
+		pathSegment.color *= material.color;
+		pathSegment.ray.direction = calculateRandomDirectionInHemisphere(normal, rng);
+		pathSegment.ray.origin = intersect + .001f * pathSegment.ray.direction;
+	}
+
 }
diff --git a/src/pathtrace.cu b/src/pathtrace.cu
index c1ec122..1c7b2be 100644
--- a/src/pathtrace.cu
+++ b/src/pathtrace.cu
@@ -4,6 +4,7 @@
 #include <thrust/execution_policy.h>
 #include <thrust/random.h>
 #include <thrust/remove.h>
+#include <time.h>
 
 #include "sceneStructs.h"
 #include "scene.h"
@@ -16,6 +17,14 @@
 
 #define ERRORCHECK 1
 
+//CONFIGURABLE OPTIONS
+#define VERBOSE					 0
+#define OPTION_ENABLE_CACHE 	 0
+#define OPTION_ENABLE_SORT 		 0
+#define OPTION_ENABLE_COMPACTION 0
+#define OPTION_ENABLE_ANTI_ALIAS 1
+#define OPTION_ENABLE_TIMER		 1
+
 #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
 #define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__)
 void checkCUDAErrorFn(const char *msg, const char *file, int line) {
@@ -75,6 +84,7 @@ static PathSegment * dev_paths = NULL;
 static ShadeableIntersection * dev_intersections = NULL;
 // TODO: static variables for device memory, any extra info you need, etc
 // ...
+static ShadeableIntersection * dev_intersection_cache;
 
 void pathtraceInit(Scene *scene) {
     hst_scene = scene;
@@ -96,6 +106,7 @@ void pathtraceInit(Scene *scene) {
   	cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
 
     // TODO: initialize any extra device memeory you need
+  	cudaMalloc(&dev_intersection_cache, pixelcount * sizeof(ShadeableIntersection));
 
     checkCUDAError("pathtraceInit");
 }
@@ -108,6 +119,7 @@ void pathtraceFree() {
   	cudaFree(dev_intersections);
     // TODO: clean up any extra device memory you created
 
+  	cudaFree(dev_intersection_cache);
     checkCUDAError("pathtraceFree");
 }
 
@@ -131,10 +143,22 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path
 		segment.ray.origin = cam.position;
     segment.color = glm::vec3(1.0f, 1.0f, 1.0f);
 
-		// TODO: implement antialiasing by jittering the ray
+#if OPTION_ENABLE_ANTI_ALIAS
+		// Implement antialiasing by jittering the ray
+    	thrust::default_random_engine rng =
+    			makeSeededRandomEngine(iter, index, pathSegments[index].remainingBounces);
+    	thrust::uniform_real_distribution<float> u01(0, 1);
+
+    	float jit_x = u01(rng);
+    	float jit_y = u01(rng);
+#else
+    	float jit_x = 0;
+    	float jit_y = 0;
+#endif
+
 		segment.ray.direction = glm::normalize(cam.view
-			- cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f)
-			- cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f)
+			- cam.right * cam.pixelLength.x * (((float)x + jit_x) - (float)cam.resolution.x * 0.5f)
+			- cam.up * cam.pixelLength.y * (((float)y + jit_y) - (float)cam.resolution.y * 0.5f)
 			);
 
 		segment.pixelIndex = index;
@@ -265,6 +289,45 @@ __global__ void shadeFakeMaterial (
   }
 }
 
+__global__ void shadeMaterial (int iter
+		  	, int num_paths
+			, ShadeableIntersection * shadeableIntersections
+			, PathSegment * pathSegments
+			, Material * materials
+			) {
+	int idx = blockIdx.x * blockDim.x + threadIdx.x;
+	int remainingBounces = pathSegments[idx].remainingBounces;
+	if (idx > num_paths || remainingBounces <= 0){
+		return;
+	}
+
+	ShadeableIntersection si = shadeableIntersections[idx];
+	//Per notes, intersection of -1 is non-existent
+	if (si.t > 0) {
+		thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, remainingBounces);
+		thrust::uniform_real_distribution<float> u01(0, 1);
+
+		Material material = materials[si.materialId];
+
+		//Check if we hit a light source
+		if (material.emittance) {
+			pathSegments[idx].color *= (material.color * material.emittance);
+			remainingBounces = 0;
+		} else {
+			glm::vec3 ri = getPointOnRay(pathSegments[idx].ray, si.t);
+			scatterRay(pathSegments[idx], ri, si.surfaceNormal, material, rng);
+			remainingBounces --;
+		}
+	} else {
+		//No intersection, color black
+		pathSegments[idx].color = glm::vec3(0.0f);
+		remainingBounces = 0;
+	}
+
+	//Update copy of remainingBounces
+	pathSegments[idx].remainingBounces = remainingBounces;
+}
+
 // Add the current iteration's output to the overall image
 __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterationPaths)
 {
@@ -277,6 +340,23 @@ __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterati
 	}
 }
 
+struct material_comparator {
+	__host__ __device__
+	bool operator() (const ShadeableIntersection &a, const ShadeableIntersection &b) {
+		return (a.materialId < b.materialId); //Why does thrust comparator use boolean instead of standard NZP?
+	}
+};
+
+struct stream_comparator {
+	__host__ __device__
+	bool operator() (const PathSegment &a) {//, const PathSegment &b) {
+	    // Because this is only used for compacting, don't need two objects
+		// I'm not sure about this being equal to zero, but doesn't work without
+		// is this actually doing anything? A OB1 bug with remainingBounces somewhere else?
+		return a.remainingBounces >= 0;
+	}
+};
+
 /**
  * Wrapper for the __global__ call that sets up the kernel calls and does a ton
  * of memory management
@@ -295,6 +375,10 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
 	// 1D block for path tracing
 	const int blockSize1d = 128;
 
+	clock_t timer;
+	double iteration_time = 0;
+	double total_time = 0;
+
     ///////////////////////////////////////////////////////////////////////////
 
     // Recap:
@@ -338,12 +422,53 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
 
   bool iterationComplete = false;
 	while (!iterationComplete) {
+#if OPTION_ENABLE_TIMER
+	//Start the clock
+    timer = clock();
+#endif
 
 	// clean shading chunks
 	cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
 
 	// tracing
 	dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
+
+	//CACHE
+#if OPTION_ENABLE_CACHE
+	if (depth == 0 && iter == 1) {
+		//Calculate, first run
+		computeIntersections <<<numblocksPathSegmentTracing, blockSize1d>>> (
+				depth
+				, num_paths
+				, dev_paths
+				, dev_geoms
+				, hst_scene->geoms.size()
+				, dev_intersections
+				);
+		checkCUDAError("trace one bounce");
+		//Save cache
+		cudaMemcpy(dev_intersection_cache, dev_intersections,
+				pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice);
+		//printf("CACHED!\n");
+	} else if (depth == 0 && iter != 0) {
+		//Re-use cache
+		cudaMemcpy(dev_intersections, dev_intersection_cache,
+				pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice);
+		//printf("USING CACHE\n");
+	} else {
+		//Abandon the cache, depth != 1
+		computeIntersections <<<numblocksPathSegmentTracing, blockSize1d>>> (
+						depth
+						, num_paths
+						, dev_paths
+						, dev_geoms
+						, hst_scene->geoms.size()
+						, dev_intersections
+						);
+		checkCUDAError("trace one bounce");
+	}
+#else
+	//NO-CACHE, Recalculate each time
 	computeIntersections <<<numblocksPathSegmentTracing, blockSize1d>>> (
 		depth
 		, num_paths
@@ -353,10 +478,11 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
 		, dev_intersections
 		);
 	checkCUDAError("trace one bounce");
+#endif
+	//Common to caching and non-caching solutions
 	cudaDeviceSynchronize();
 	depth++;
 
-
 	// TODO:
 	// --- Shading Stage ---
 	// Shade path segments based on intersections and generate new rays by
@@ -366,14 +492,36 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
   // TODO: compare between directly shading the path segments and shading
   // path segments that have been reshuffled to be contiguous in memory.
 
-  shadeFakeMaterial<<<numblocksPathSegmentTracing, blockSize1d>>> (
+#if OPTION_ENABLE_SORT
+  //Sort
+  thrust::sort_by_key(thrust::device, dev_intersections, dev_intersections + num_paths, dev_paths, material_comparator());
+#endif
+
+  shadeMaterial<<<numblocksPathSegmentTracing, blockSize1d>>> (
     iter,
     num_paths,
     dev_intersections,
     dev_paths,
     dev_materials
   );
-  iterationComplete = true; // TODO: should be based off stream compaction results.
+
+#if OPTION_ENABLE_COMPACTION
+  //Compact
+  PathSegment * segment = thrust::partition(thrust::device, dev_paths, dev_paths + num_paths, stream_comparator());
+  //We've likely removed some paths, recalculate
+  num_paths = segment - dev_paths;
+#endif
+
+  iterationComplete = traceDepth < depth || num_paths <= 0;
+
+#if OPTION_ENABLE_TIMER
+  timer = clock() - timer;
+  iteration_time = ((double)timer)/CLOCKS_PER_SEC;
+  total_time += iteration_time;
+#if VERBOSE
+  printf("Iteration completed: %f \n", iteration_time);
+#endif
+#endif
 	}
 
   // Assemble this iteration and apply it to the image
@@ -390,4 +538,7 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
             pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost);
 
     checkCUDAError("pathtrace");
+#if OPTION_ENABLE_TIMER
+    printf("Frame completed: %f \n", total_time);
+#endif
 }