diff --git a/CMakeLists.txt b/CMakeLists.txt
index d3d976c..41dc21e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,7 +72,7 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
 endif()
 
 include_directories(.)
-#add_subdirectory(stream_compaction)  # TODO: uncomment if using your stream compaction
+add_subdirectory(stream_compaction)  # TODO: uncomment if using your stream compaction
 add_subdirectory(src)
 
 cuda_add_executable(${CMAKE_PROJECT_NAME}
@@ -82,7 +82,7 @@ cuda_add_executable(${CMAKE_PROJECT_NAME}
 
 target_link_libraries(${CMAKE_PROJECT_NAME}
     src
-    #stream_compaction  # TODO: uncomment if using your stream compaction
+    stream_compaction  # TODO: uncomment if using your stream compaction
     ${CORELIBS}
     )
 
diff --git a/README.md b/README.md
index 110697c..98f177d 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,86 @@
 CUDA Path Tracer
 ================
-
 **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3**
 
-* (TODO) YOUR NAME HERE
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* Liam Dugan -- Fall 2018
+  * [LinkedIn](https://www.linkedin.com/in/liam-dugan-95a961135/), [personal website](http://liamdugan.com/)
+* Tested on: Windows 10, Ryzen 5 1600 @ 3.20GHz 16GB, GTX 1070 16GB (Personal Computer)
+
+![](img/cornell.2018-10-03_03-05-39z.1695samp.png)
+
+What is Path Tracing?
+=============
+
+<img align="right" src="img/pathTracing.png">
+Path Tracing is a technique for creating images by emulating certain physical properties of light
+
+In the real world a ray of light:
+1. Is emitted from **Light Sources**
+2. **Bounces** around a scene and changes color depending on what it hits
+3. Some hit pixels on the camera, and those get seen
+
+However, a Path Tracer does this backwards by firing rays out of the camera pixels:
+1. Rays bounce around in the scene a certain number of times
+2. If they hit a light source they terminate and color the camera pixel
+3. If they terminate without hitting a light, the pixel is colored black
+
+(Picture taken from course path tracer lecture slide 3)
+
+
+Scenes
+================
+
+Reflection / Refraction / Diffuse
+---------
+![](img/reflectionRefraction.png)
+![](img/reflection.png)
+
+In these scenes we can see not only the reflection working (in the infinite wall room), but also we can see refraction and diffuse lighting being accounted for.
+
+Depth of Field
+-------
+![](img/cornell.2018-10-03_03-05-39z.1695samp.png)
+
+Depth of Field requires jittering the generated ray based on a given distance from a focal point.
+
+Arbitrary Object Loading (with tinyObj)
+-------
+![](img/Sword.png)
+
+I loaded in .obj files using tinyObj and then used `glm::intersectRayTriangle` to check for intersections between our ray and every triangle of the mesh.
+
+Performance Optimizations
+================
+
+Total Performance Breakdown
+------
+![](img/runtimes.png)
+
+To conduct this test I measured the runtime of each of our four main kernels across 4 different scenes and normalized them relative to each other. As we can see from this result the bottleneck of our pipeline is the compute intersections kernel. This is most evident in the sword scene as each ray has to for loop through the many triangles of the mesh. This bottleneck could be relieved greatly by the implementation of a kd tree or other such bounding data structure. 
+
+Otherwise our bottleneck seems to be stream compaction, which can be improved greatly by using shared memory
+
+First Iteration Caching
+-------
+![](img/cacheFirstIteration.png)
+
+As we can see from the graph here, the average runtime of the ray generation kernel is drastically reduced when the first iteration caching is activated. We do take a performance hit at the beginning due to having to copy the generated rays into the cache but it is well worth it.
+
+Material ID Sorting
+-------
+![](img/materialIDsort.png)
+
+Sorting the rays based on material ID attempts to exploit warp coherence to get extra performance, however it seems that in our case, the extra added overhead of the sort was just too significant. The material ID sort performed significantly worse over both a scarcely populated scene and a heavily populated scene.
+
+Stream Compaction
+-------
+![](img/StreamCompaction.png)
+
+Taking my implementation from Project 2 for Stream Compaction and adapting it here allows us to greatly decrease the total number of rays for which to calculate intersections. This is especially true in more open scenes, where the rays have plenty of room to shoot off into the distance and terminate. One the other hand in a closed scene (like the infinite reflection room) rays will almost always reach their depth limit unless they hit an emittant surface.
 
-### (TODO: Your README)
+## Bloopers
+Because all good things come from humble beginnings.
 
-*DO NOT* leave the README to the last minute! It is a crucial part of the
-project, and we will not be able to grade you without a good README.
+<img align="center" width=550px src="img/blooper1.png">
+<img align="center" width=550px src="img/blooper2.png">
 
diff --git a/external/include/tiny_obj_loader.h b/external/include/tiny_obj_loader.h
new file mode 100644
index 0000000..ee44076
--- /dev/null
+++ b/external/include/tiny_obj_loader.h
@@ -0,0 +1,2029 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2012-2016 Syoyo Fujita and many contributors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+//
+// version 1.0.6 : Add TINYOBJLOADER_USE_DOUBLE option(#124)
+// version 1.0.5 : Ignore `Tr` when `d` exists in MTL(#43)
+// version 1.0.4 : Support multiple filenames for 'mtllib'(#112)
+// version 1.0.3 : Support parsing texture options(#85)
+// version 1.0.2 : Improve parsing speed by about a factor of 2 for large
+// files(#105)
+// version 1.0.1 : Fixes a shape is lost if obj ends with a 'usemtl'(#104)
+// version 1.0.0 : Change data structure. Change license from BSD to MIT.
+//
+
+//
+// Use this in *one* .cc
+//   #define TINYOBJLOADER_IMPLEMENTATION
+//   #include "tiny_obj_loader.h"
+//
+
+#ifndef TINY_OBJ_LOADER_H_
+#define TINY_OBJ_LOADER_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace tinyobj {
+
+// https://en.wikipedia.org/wiki/Wavefront_.obj_file says ...
+//
+//  -blendu on | off                       # set horizontal texture blending
+//  (default on)
+//  -blendv on | off                       # set vertical texture blending
+//  (default on)
+//  -boost real_value                      # boost mip-map sharpness
+//  -mm base_value gain_value              # modify texture map values (default
+//  0 1)
+//                                         #     base_value = brightness,
+//                                         gain_value = contrast
+//  -o u [v [w]]                           # Origin offset             (default
+//  0 0 0)
+//  -s u [v [w]]                           # Scale                     (default
+//  1 1 1)
+//  -t u [v [w]]                           # Turbulence                (default
+//  0 0 0)
+//  -texres resolution                     # texture resolution to create
+//  -clamp on | off                        # only render texels in the clamped
+//  0-1 range (default off)
+//                                         #   When unclamped, textures are
+//                                         repeated across a surface,
+//                                         #   when clamped, only texels which
+//                                         fall within the 0-1
+//                                         #   range are rendered.
+//  -bm mult_value                         # bump multiplier (for bump maps
+//  only)
+//
+//  -imfchan r | g | b | m | l | z         # specifies which channel of the file
+//  is used to
+//                                         # create a scalar or bump texture.
+//                                         r:red, g:green,
+//                                         # b:blue, m:matte, l:luminance,
+//                                         z:z-depth..
+//                                         # (the default for bump is 'l' and
+//                                         for decal is 'm')
+//  bump -imfchan r bumpmap.tga            # says to use the red channel of
+//  bumpmap.tga as the bumpmap
+//
+// For reflection maps...
+//
+//   -type sphere                           # specifies a sphere for a "refl"
+//   reflection map
+//   -type cube_top    | cube_bottom |      # when using a cube map, the texture
+//   file for each
+//         cube_front  | cube_back   |      # side of the cube is specified
+//         separately
+//         cube_left   | cube_right
+
+#ifdef TINYOBJLOADER_USE_DOUBLE
+  //#pragma message "using double"
+  typedef double real_t;
+#else
+  //#pragma message "using float"
+  typedef float real_t;
+#endif
+
+typedef enum {
+  TEXTURE_TYPE_NONE,  // default
+  TEXTURE_TYPE_SPHERE,
+  TEXTURE_TYPE_CUBE_TOP,
+  TEXTURE_TYPE_CUBE_BOTTOM,
+  TEXTURE_TYPE_CUBE_FRONT,
+  TEXTURE_TYPE_CUBE_BACK,
+  TEXTURE_TYPE_CUBE_LEFT,
+  TEXTURE_TYPE_CUBE_RIGHT
+} texture_type_t;
+
+typedef struct {
+  texture_type_t type;     // -type (default TEXTURE_TYPE_NONE)
+  real_t sharpness;         // -boost (default 1.0?)
+  real_t brightness;        // base_value in -mm option (default 0)
+  real_t contrast;          // gain_value in -mm option (default 1)
+  real_t origin_offset[3];  // -o u [v [w]] (default 0 0 0)
+  real_t scale[3];          // -s u [v [w]] (default 1 1 1)
+  real_t turbulence[3];     // -t u [v [w]] (default 0 0 0)
+  // int   texture_resolution; // -texres resolution (default = ?) TODO
+  bool clamp;    // -clamp (default false)
+  char imfchan;  // -imfchan (the default for bump is 'l' and for decal is 'm')
+  bool blendu;   // -blendu (default on)
+  bool blendv;   // -blendv (default on)
+  real_t bump_multiplier;  // -bm (for bump maps only, default 1.0)
+} texture_option_t;
+
+typedef struct {
+  std::string name;
+
+  real_t ambient[3];
+  real_t diffuse[3];
+  real_t specular[3];
+  real_t transmittance[3];
+  real_t emission[3];
+  real_t shininess;
+  real_t ior;       // index of refraction
+  real_t dissolve;  // 1 == opaque; 0 == fully transparent
+  // illumination model (see http://www.fileformat.info/format/material/)
+  int illum;
+
+  int dummy;  // Suppress padding warning.
+
+  std::string ambient_texname;             // map_Ka
+  std::string diffuse_texname;             // map_Kd
+  std::string specular_texname;            // map_Ks
+  std::string specular_highlight_texname;  // map_Ns
+  std::string bump_texname;                // map_bump, bump
+  std::string displacement_texname;        // disp
+  std::string alpha_texname;               // map_d
+
+  texture_option_t ambient_texopt;
+  texture_option_t diffuse_texopt;
+  texture_option_t specular_texopt;
+  texture_option_t specular_highlight_texopt;
+  texture_option_t bump_texopt;
+  texture_option_t displacement_texopt;
+  texture_option_t alpha_texopt;
+
+  // PBR extension
+  // http://exocortex.com/blog/extending_wavefront_mtl_to_support_pbr
+  real_t roughness;            // [0, 1] default 0
+  real_t metallic;             // [0, 1] default 0
+  real_t sheen;                // [0, 1] default 0
+  real_t clearcoat_thickness;  // [0, 1] default 0
+  real_t clearcoat_roughness;  // [0, 1] default 0
+  real_t anisotropy;           // aniso. [0, 1] default 0
+  real_t anisotropy_rotation;  // anisor. [0, 1] default 0
+  real_t pad0;
+  real_t pad1;
+  std::string roughness_texname;  // map_Pr
+  std::string metallic_texname;   // map_Pm
+  std::string sheen_texname;      // map_Ps
+  std::string emissive_texname;   // map_Ke
+  std::string normal_texname;     // norm. For normal mapping.
+
+  texture_option_t roughness_texopt;
+  texture_option_t metallic_texopt;
+  texture_option_t sheen_texopt;
+  texture_option_t emissive_texopt;
+  texture_option_t normal_texopt;
+
+  int pad2;
+
+  std::map<std::string, std::string> unknown_parameter;
+} material_t;
+
+typedef struct {
+  std::string name;
+
+  std::vector<int> intValues;
+  std::vector<real_t> floatValues;
+  std::vector<std::string> stringValues;
+} tag_t;
+
+// Index struct to support different indices for vtx/normal/texcoord.
+// -1 means not used.
+typedef struct {
+  int vertex_index;
+  int normal_index;
+  int texcoord_index;
+} index_t;
+
+typedef struct {
+  std::vector<index_t> indices;
+  std::vector<unsigned char> num_face_vertices;  // The number of vertices per
+                                                 // face. 3 = polygon, 4 = quad,
+                                                 // ... Up to 255.
+  std::vector<int> material_ids;                 // per-face material ID
+  std::vector<tag_t> tags;                       // SubD tag
+} mesh_t;
+
+typedef struct {
+  std::string name;
+  mesh_t mesh;
+} shape_t;
+
+// Vertex attributes
+typedef struct {
+  std::vector<real_t> vertices;   // 'v'
+  std::vector<real_t> normals;    // 'vn'
+  std::vector<real_t> texcoords;  // 'vt'
+} attrib_t;
+
+typedef struct callback_t_ {
+  // W is optional and set to 1 if there is no `w` item in `v` line
+  void (*vertex_cb)(void *user_data, real_t x, real_t y, real_t z, real_t w);
+  void (*normal_cb)(void *user_data, real_t x, real_t y, real_t z);
+
+  // y and z are optional and set to 0 if there is no `y` and/or `z` item(s) in
+  // `vt` line.
+  void (*texcoord_cb)(void *user_data, real_t x, real_t y, real_t z);
+
+  // called per 'f' line. num_indices is the number of face indices(e.g. 3 for
+  // triangle, 4 for quad)
+  // 0 will be passed for undefined index in index_t members.
+  void (*index_cb)(void *user_data, index_t *indices, int num_indices);
+  // `name` material name, `material_id` = the array index of material_t[]. -1
+  // if
+  // a material not found in .mtl
+  void (*usemtl_cb)(void *user_data, const char *name, int material_id);
+  // `materials` = parsed material data.
+  void (*mtllib_cb)(void *user_data, const material_t *materials,
+                    int num_materials);
+  // There may be multiple group names
+  void (*group_cb)(void *user_data, const char **names, int num_names);
+  void (*object_cb)(void *user_data, const char *name);
+
+  callback_t_()
+      : vertex_cb(NULL),
+        normal_cb(NULL),
+        texcoord_cb(NULL),
+        index_cb(NULL),
+        usemtl_cb(NULL),
+        mtllib_cb(NULL),
+        group_cb(NULL),
+        object_cb(NULL) {}
+} callback_t;
+
+class MaterialReader {
+ public:
+  MaterialReader() {}
+  virtual ~MaterialReader();
+
+  virtual bool operator()(const std::string &matId,
+                          std::vector<material_t> *materials,
+                          std::map<std::string, int> *matMap,
+                          std::string *err) = 0;
+};
+
+class MaterialFileReader : public MaterialReader {
+ public:
+  explicit MaterialFileReader(const std::string &mtl_basedir)
+      : m_mtlBaseDir(mtl_basedir) {}
+  virtual ~MaterialFileReader() {}
+  virtual bool operator()(const std::string &matId,
+                          std::vector<material_t> *materials,
+                          std::map<std::string, int> *matMap, std::string *err);
+
+ private:
+  std::string m_mtlBaseDir;
+};
+
+class MaterialStreamReader : public MaterialReader {
+ public:
+  explicit MaterialStreamReader(std::istream &inStream)
+      : m_inStream(inStream) {}
+  virtual ~MaterialStreamReader() {}
+  virtual bool operator()(const std::string &matId,
+                          std::vector<material_t> *materials,
+                          std::map<std::string, int> *matMap, std::string *err);
+
+ private:
+  std::istream &m_inStream;
+};
+
+/// Loads .obj from a file.
+/// 'attrib', 'shapes' and 'materials' will be filled with parsed shape data
+/// 'shapes' will be filled with parsed shape data
+/// Returns true when loading .obj become success.
+/// Returns warning and error message into `err`
+/// 'mtl_basedir' is optional, and used for base directory for .mtl file.
+/// In default(`NULL'), .mtl file is searched from an application's working
+/// directory.
+/// 'triangulate' is optional, and used whether triangulate polygon face in .obj
+/// or not.
+bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
+             std::vector<material_t> *materials, std::string *err,
+             const char *filename, const char *mtl_basedir = NULL,
+             bool triangulate = true);
+
+/// Loads .obj from a file with custom user callback.
+/// .mtl is loaded as usual and parsed material_t data will be passed to
+/// `callback.mtllib_cb`.
+/// Returns true when loading .obj/.mtl become success.
+/// Returns warning and error message into `err`
+/// See `examples/callback_api/` for how to use this function.
+bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
+                         void *user_data = NULL,
+                         MaterialReader *readMatFn = NULL,
+                         std::string *err = NULL);
+
+/// Loads object from a std::istream, uses GetMtlIStreamFn to retrieve
+/// std::istream for materials.
+/// Returns true when loading .obj become success.
+/// Returns warning and error message into `err`
+bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
+             std::vector<material_t> *materials, std::string *err,
+             std::istream *inStream, MaterialReader *readMatFn = NULL,
+             bool triangulate = true);
+
+/// Loads materials into std::map
+void LoadMtl(std::map<std::string, int> *material_map,
+             std::vector<material_t> *materials, std::istream *inStream,
+             std::string *warning);
+
+}  // namespace tinyobj
+
+#endif  // TINY_OBJ_LOADER_H_
+
+#ifdef TINYOBJLOADER_IMPLEMENTATION
+#include <cassert>
+#include <cctype>
+#include <cmath>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <utility>
+
+#include <fstream>
+#include <sstream>
+
+namespace tinyobj {
+
+MaterialReader::~MaterialReader() {}
+
+#define TINYOBJ_SSCANF_BUFFER_SIZE (4096)
+
+struct vertex_index {
+  int v_idx, vt_idx, vn_idx;
+  vertex_index() : v_idx(-1), vt_idx(-1), vn_idx(-1) {}
+  explicit vertex_index(int idx) : v_idx(idx), vt_idx(idx), vn_idx(idx) {}
+  vertex_index(int vidx, int vtidx, int vnidx)
+      : v_idx(vidx), vt_idx(vtidx), vn_idx(vnidx) {}
+};
+
+struct tag_sizes {
+  tag_sizes() : num_ints(0), num_reals(0), num_strings(0) {}
+  int num_ints;
+  int num_reals;
+  int num_strings;
+};
+
+struct obj_shape {
+  std::vector<real_t> v;
+  std::vector<real_t> vn;
+  std::vector<real_t> vt;
+};
+
+// See
+// http://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf
+static std::istream &safeGetline(std::istream &is, std::string &t) {
+  t.clear();
+
+  // The characters in the stream are read one-by-one using a std::streambuf.
+  // That is faster than reading them one-by-one using the std::istream.
+  // Code that uses streambuf this way must be guarded by a sentry object.
+  // The sentry object performs various tasks,
+  // such as thread synchronization and updating the stream state.
+
+  std::istream::sentry se(is, true);
+  std::streambuf *sb = is.rdbuf();
+
+  for (;;) {
+    int c = sb->sbumpc();
+    switch (c) {
+      case '\n':
+        return is;
+      case '\r':
+        if (sb->sgetc() == '\n') sb->sbumpc();
+        return is;
+      case EOF:
+        // Also handle the case when the last line has no line ending
+        if (t.empty()) is.setstate(std::ios::eofbit);
+        return is;
+      default:
+        t += static_cast<char>(c);
+    }
+  }
+}
+
+#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t'))
+#define IS_DIGIT(x) \
+  (static_cast<unsigned int>((x) - '0') < static_cast<unsigned int>(10))
+#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0'))
+
+// Make index zero-base, and also support relative index.
+static inline int fixIndex(int idx, int n) {
+  if (idx > 0) return idx - 1;
+  if (idx == 0) return 0;
+  return n + idx;  // negative value = relative
+}
+
+static inline std::string parseString(const char **token) {
+  std::string s;
+  (*token) += strspn((*token), " \t");
+  size_t e = strcspn((*token), " \t\r");
+  s = std::string((*token), &(*token)[e]);
+  (*token) += e;
+  return s;
+}
+
+static inline int parseInt(const char **token) {
+  (*token) += strspn((*token), " \t");
+  int i = atoi((*token));
+  (*token) += strcspn((*token), " \t\r");
+  return i;
+}
+
+// Tries to parse a floating point number located at s.
+//
+// s_end should be a location in the string where reading should absolutely
+// stop. For example at the end of the string, to prevent buffer overflows.
+//
+// Parses the following EBNF grammar:
+//   sign    = "+" | "-" ;
+//   END     = ? anything not in digit ?
+//   digit   = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+//   integer = [sign] , digit , {digit} ;
+//   decimal = integer , ["." , integer] ;
+//   float   = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ;
+//
+//  Valid strings are for example:
+//   -0  +3.1417e+2  -0.0E-3  1.0324  -1.41   11e2
+//
+// If the parsing is a success, result is set to the parsed value and true
+// is returned.
+//
+// The function is greedy and will parse until any of the following happens:
+//  - a non-conforming character is encountered.
+//  - s_end is reached.
+//
+// The following situations triggers a failure:
+//  - s >= s_end.
+//  - parse failure.
+//
+static bool tryParseDouble(const char *s, const char *s_end, double *result) {
+  if (s >= s_end) {
+    return false;
+  }
+
+  double mantissa = 0.0;
+  // This exponent is base 2 rather than 10.
+  // However the exponent we parse is supposed to be one of ten,
+  // thus we must take care to convert the exponent/and or the
+  // mantissa to a * 2^E, where a is the mantissa and E is the
+  // exponent.
+  // To get the final double we will use ldexp, it requires the
+  // exponent to be in base 2.
+  int exponent = 0;
+
+  // NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED
+  // TO JUMP OVER DEFINITIONS.
+  char sign = '+';
+  char exp_sign = '+';
+  char const *curr = s;
+
+  // How many characters were read in a loop.
+  int read = 0;
+  // Tells whether a loop terminated due to reaching s_end.
+  bool end_not_reached = false;
+
+  /*
+          BEGIN PARSING.
+  */
+
+  // Find out what sign we've got.
+  if (*curr == '+' || *curr == '-') {
+    sign = *curr;
+    curr++;
+  } else if (IS_DIGIT(*curr)) { /* Pass through. */
+  } else {
+    goto fail;
+  }
+
+  // Read the integer part.
+  end_not_reached = (curr != s_end);
+  while (end_not_reached && IS_DIGIT(*curr)) {
+    mantissa *= 10;
+    mantissa += static_cast<int>(*curr - 0x30);
+    curr++;
+    read++;
+    end_not_reached = (curr != s_end);
+  }
+
+  // We must make sure we actually got something.
+  if (read == 0) goto fail;
+  // We allow numbers of form "#", "###" etc.
+  if (!end_not_reached) goto assemble;
+
+  // Read the decimal part.
+  if (*curr == '.') {
+    curr++;
+    read = 1;
+    end_not_reached = (curr != s_end);
+    while (end_not_reached && IS_DIGIT(*curr)) {
+      static const double pow_lut[] = {
+          1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001,
+      };
+      const int lut_entries = sizeof pow_lut / sizeof pow_lut[0];
+
+      // NOTE: Don't use powf here, it will absolutely murder precision.
+      mantissa += static_cast<int>(*curr - 0x30) *
+                  (read < lut_entries ? pow_lut[read] : std::pow(10.0, -read));
+      read++;
+      curr++;
+      end_not_reached = (curr != s_end);
+    }
+  } else if (*curr == 'e' || *curr == 'E') {
+  } else {
+    goto assemble;
+  }
+
+  if (!end_not_reached) goto assemble;
+
+  // Read the exponent part.
+  if (*curr == 'e' || *curr == 'E') {
+    curr++;
+    // Figure out if a sign is present and if it is.
+    end_not_reached = (curr != s_end);
+    if (end_not_reached && (*curr == '+' || *curr == '-')) {
+      exp_sign = *curr;
+      curr++;
+    } else if (IS_DIGIT(*curr)) { /* Pass through. */
+    } else {
+      // Empty E is not allowed.
+      goto fail;
+    }
+
+    read = 0;
+    end_not_reached = (curr != s_end);
+    while (end_not_reached && IS_DIGIT(*curr)) {
+      exponent *= 10;
+      exponent += static_cast<int>(*curr - 0x30);
+      curr++;
+      read++;
+      end_not_reached = (curr != s_end);
+    }
+    exponent *= (exp_sign == '+' ? 1 : -1);
+    if (read == 0) goto fail;
+  }
+
+assemble:
+  *result =
+      (sign == '+' ? 1 : -1) *
+      (exponent ? std::ldexp(mantissa * std::pow(5.0, exponent), exponent) : mantissa);
+  return true;
+fail:
+  return false;
+}
+
+static inline real_t parseReal(const char **token, double default_value = 0.0) {
+  (*token) += strspn((*token), " \t");
+  const char *end = (*token) + strcspn((*token), " \t\r");
+  double val = default_value;
+  tryParseDouble((*token), end, &val);
+  real_t f = static_cast<real_t>(val);
+  (*token) = end;
+  return f;
+}
+
+static inline void parseReal2(real_t *x, real_t *y, const char **token,
+                               const double default_x = 0.0,
+                               const double default_y = 0.0) {
+  (*x) = parseReal(token, default_x);
+  (*y) = parseReal(token, default_y);
+}
+
+static inline void parseReal3(real_t *x, real_t *y, real_t *z, const char **token,
+                               const double default_x = 0.0,
+                               const double default_y = 0.0,
+                               const double default_z = 0.0) {
+  (*x) = parseReal(token, default_x);
+  (*y) = parseReal(token, default_y);
+  (*z) = parseReal(token, default_z);
+}
+
+static inline void parseV(real_t *x, real_t *y, real_t *z, real_t *w,
+                          const char **token, const double default_x = 0.0,
+                          const double default_y = 0.0,
+                          const double default_z = 0.0,
+                          const double default_w = 1.0) {
+  (*x) = parseReal(token, default_x);
+  (*y) = parseReal(token, default_y);
+  (*z) = parseReal(token, default_z);
+  (*w) = parseReal(token, default_w);
+}
+
+static inline bool parseOnOff(const char **token, bool default_value = true) {
+  (*token) += strspn((*token), " \t");
+  const char *end = (*token) + strcspn((*token), " \t\r");
+
+  bool ret = default_value;
+  if ((0 == strncmp((*token), "on", 2))) {
+    ret = true;
+  } else if ((0 == strncmp((*token), "off", 3))) {
+    ret = false;
+  }
+
+  (*token) = end;
+  return ret;
+}
+
+static inline texture_type_t parseTextureType(
+    const char **token, texture_type_t default_value = TEXTURE_TYPE_NONE) {
+  (*token) += strspn((*token), " \t");
+  const char *end = (*token) + strcspn((*token), " \t\r");
+  texture_type_t ty = default_value;
+
+  if ((0 == strncmp((*token), "cube_top", strlen("cube_top")))) {
+    ty = TEXTURE_TYPE_CUBE_TOP;
+  } else if ((0 == strncmp((*token), "cube_bottom", strlen("cube_bottom")))) {
+    ty = TEXTURE_TYPE_CUBE_BOTTOM;
+  } else if ((0 == strncmp((*token), "cube_left", strlen("cube_left")))) {
+    ty = TEXTURE_TYPE_CUBE_LEFT;
+  } else if ((0 == strncmp((*token), "cube_right", strlen("cube_right")))) {
+    ty = TEXTURE_TYPE_CUBE_RIGHT;
+  } else if ((0 == strncmp((*token), "cube_front", strlen("cube_front")))) {
+    ty = TEXTURE_TYPE_CUBE_FRONT;
+  } else if ((0 == strncmp((*token), "cube_back", strlen("cube_back")))) {
+    ty = TEXTURE_TYPE_CUBE_BACK;
+  } else if ((0 == strncmp((*token), "sphere", strlen("sphere")))) {
+    ty = TEXTURE_TYPE_SPHERE;
+  }
+
+  (*token) = end;
+  return ty;
+}
+
+static tag_sizes parseTagTriple(const char **token) {
+  tag_sizes ts;
+
+  ts.num_ints = atoi((*token));
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    return ts;
+  }
+  (*token)++;
+
+  ts.num_reals = atoi((*token));
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    return ts;
+  }
+  (*token)++;
+
+  ts.num_strings = atoi((*token));
+  (*token) += strcspn((*token), "/ \t\r") + 1;
+
+  return ts;
+}
+
+// Parse triples with index offsets: i, i/j/k, i//k, i/j
+static vertex_index parseTriple(const char **token, int vsize, int vnsize,
+                                int vtsize) {
+  vertex_index vi(-1);
+
+  vi.v_idx = fixIndex(atoi((*token)), vsize);
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    return vi;
+  }
+  (*token)++;
+
+  // i//k
+  if ((*token)[0] == '/') {
+    (*token)++;
+    vi.vn_idx = fixIndex(atoi((*token)), vnsize);
+    (*token) += strcspn((*token), "/ \t\r");
+    return vi;
+  }
+
+  // i/j/k or i/j
+  vi.vt_idx = fixIndex(atoi((*token)), vtsize);
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    return vi;
+  }
+
+  // i/j/k
+  (*token)++;  // skip '/'
+  vi.vn_idx = fixIndex(atoi((*token)), vnsize);
+  (*token) += strcspn((*token), "/ \t\r");
+  return vi;
+}
+
+// Parse raw triples: i, i/j/k, i//k, i/j
+static vertex_index parseRawTriple(const char **token) {
+  vertex_index vi(static_cast<int>(0));  // 0 is an invalid index in OBJ
+
+  vi.v_idx = atoi((*token));
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    return vi;
+  }
+  (*token)++;
+
+  // i//k
+  if ((*token)[0] == '/') {
+    (*token)++;
+    vi.vn_idx = atoi((*token));
+    (*token) += strcspn((*token), "/ \t\r");
+    return vi;
+  }
+
+  // i/j/k or i/j
+  vi.vt_idx = atoi((*token));
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    return vi;
+  }
+
+  // i/j/k
+  (*token)++;  // skip '/'
+  vi.vn_idx = atoi((*token));
+  (*token) += strcspn((*token), "/ \t\r");
+  return vi;
+}
+
+static bool ParseTextureNameAndOption(std::string *texname,
+                                      texture_option_t *texopt,
+                                      const char *linebuf, const bool is_bump) {
+  // @todo { write more robust lexer and parser. }
+  bool found_texname = false;
+  std::string texture_name;
+
+  // Fill with default value for texopt.
+  if (is_bump) {
+    texopt->imfchan = 'l';
+  } else {
+    texopt->imfchan = 'm';
+  }
+  texopt->bump_multiplier = 1.0f;
+  texopt->clamp = false;
+  texopt->blendu = true;
+  texopt->blendv = true;
+  texopt->sharpness = 1.0f;
+  texopt->brightness = 0.0f;
+  texopt->contrast = 1.0f;
+  texopt->origin_offset[0] = 0.0f;
+  texopt->origin_offset[1] = 0.0f;
+  texopt->origin_offset[2] = 0.0f;
+  texopt->scale[0] = 1.0f;
+  texopt->scale[1] = 1.0f;
+  texopt->scale[2] = 1.0f;
+  texopt->turbulence[0] = 0.0f;
+  texopt->turbulence[1] = 0.0f;
+  texopt->turbulence[2] = 0.0f;
+  texopt->type = TEXTURE_TYPE_NONE;
+
+  const char *token = linebuf;  // Assume line ends with NULL
+
+  while (!IS_NEW_LINE((*token))) {
+    if ((0 == strncmp(token, "-blendu", 7)) && IS_SPACE((token[7]))) {
+      token += 8;
+      texopt->blendu = parseOnOff(&token, /* default */ true);
+    } else if ((0 == strncmp(token, "-blendv", 7)) && IS_SPACE((token[7]))) {
+      token += 8;
+      texopt->blendv = parseOnOff(&token, /* default */ true);
+    } else if ((0 == strncmp(token, "-clamp", 6)) && IS_SPACE((token[6]))) {
+      token += 7;
+      texopt->clamp = parseOnOff(&token, /* default */ true);
+    } else if ((0 == strncmp(token, "-boost", 6)) && IS_SPACE((token[6]))) {
+      token += 7;
+      texopt->sharpness = parseReal(&token, 1.0);
+    } else if ((0 == strncmp(token, "-bm", 3)) && IS_SPACE((token[3]))) {
+      token += 4;
+      texopt->bump_multiplier = parseReal(&token, 1.0);
+    } else if ((0 == strncmp(token, "-o", 2)) && IS_SPACE((token[2]))) {
+      token += 3;
+      parseReal3(&(texopt->origin_offset[0]), &(texopt->origin_offset[1]),
+                  &(texopt->origin_offset[2]), &token);
+    } else if ((0 == strncmp(token, "-s", 2)) && IS_SPACE((token[2]))) {
+      token += 3;
+      parseReal3(&(texopt->scale[0]), &(texopt->scale[1]), &(texopt->scale[2]),
+                  &token, 1.0, 1.0, 1.0);
+    } else if ((0 == strncmp(token, "-t", 2)) && IS_SPACE((token[2]))) {
+      token += 3;
+      parseReal3(&(texopt->turbulence[0]), &(texopt->turbulence[1]),
+                  &(texopt->turbulence[2]), &token);
+    } else if ((0 == strncmp(token, "-type", 5)) && IS_SPACE((token[5]))) {
+      token += 5;
+      texopt->type = parseTextureType((&token), TEXTURE_TYPE_NONE);
+    } else if ((0 == strncmp(token, "-imfchan", 8)) && IS_SPACE((token[8]))) {
+      token += 9;
+      token += strspn(token, " \t");
+      const char *end = token + strcspn(token, " \t\r");
+      if ((end - token) == 1) {  // Assume one char for -imfchan
+        texopt->imfchan = (*token);
+      }
+      token = end;
+    } else if ((0 == strncmp(token, "-mm", 3)) && IS_SPACE((token[3]))) {
+      token += 4;
+      parseReal2(&(texopt->brightness), &(texopt->contrast), &token, 0.0, 1.0);
+    } else {
+      // Assume texture filename
+      token += strspn(token, " \t");         // skip space
+      size_t len = strcspn(token, " \t\r");  // untile next space
+      texture_name = std::string(token, token + len);
+      token += len;
+
+      token += strspn(token, " \t");  // skip space
+
+      found_texname = true;
+    }
+  }
+
+  if (found_texname) {
+    (*texname) = texture_name;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static void InitMaterial(material_t *material) {
+  material->name = "";
+  material->ambient_texname = "";
+  material->diffuse_texname = "";
+  material->specular_texname = "";
+  material->specular_highlight_texname = "";
+  material->bump_texname = "";
+  material->displacement_texname = "";
+  material->alpha_texname = "";
+  for (int i = 0; i < 3; i++) {
+    material->ambient[i] = 0.f;
+    material->diffuse[i] = 0.f;
+    material->specular[i] = 0.f;
+    material->transmittance[i] = 0.f;
+    material->emission[i] = 0.f;
+  }
+  material->illum = 0;
+  material->dissolve = 1.f;
+  material->shininess = 1.f;
+  material->ior = 1.f;
+
+  material->roughness = 0.f;
+  material->metallic = 0.f;
+  material->sheen = 0.f;
+  material->clearcoat_thickness = 0.f;
+  material->clearcoat_roughness = 0.f;
+  material->anisotropy_rotation = 0.f;
+  material->anisotropy = 0.f;
+  material->roughness_texname = "";
+  material->metallic_texname = "";
+  material->sheen_texname = "";
+  material->emissive_texname = "";
+  material->normal_texname = "";
+
+  material->unknown_parameter.clear();
+}
+
+static bool exportFaceGroupToShape(
+    shape_t *shape, const std::vector<std::vector<vertex_index> > &faceGroup,
+    const std::vector<tag_t> &tags, const int material_id,
+    const std::string &name, bool triangulate) {
+  if (faceGroup.empty()) {
+    return false;
+  }
+
+  // Flatten vertices and indices
+  for (size_t i = 0; i < faceGroup.size(); i++) {
+    const std::vector<vertex_index> &face = faceGroup[i];
+
+    vertex_index i0 = face[0];
+    vertex_index i1(-1);
+    vertex_index i2 = face[1];
+
+    size_t npolys = face.size();
+
+    if (triangulate) {
+      // Polygon -> triangle fan conversion
+      for (size_t k = 2; k < npolys; k++) {
+        i1 = i2;
+        i2 = face[k];
+
+        index_t idx0, idx1, idx2;
+        idx0.vertex_index = i0.v_idx;
+        idx0.normal_index = i0.vn_idx;
+        idx0.texcoord_index = i0.vt_idx;
+        idx1.vertex_index = i1.v_idx;
+        idx1.normal_index = i1.vn_idx;
+        idx1.texcoord_index = i1.vt_idx;
+        idx2.vertex_index = i2.v_idx;
+        idx2.normal_index = i2.vn_idx;
+        idx2.texcoord_index = i2.vt_idx;
+
+        shape->mesh.indices.push_back(idx0);
+        shape->mesh.indices.push_back(idx1);
+        shape->mesh.indices.push_back(idx2);
+
+        shape->mesh.num_face_vertices.push_back(3);
+        shape->mesh.material_ids.push_back(material_id);
+      }
+    } else {
+      for (size_t k = 0; k < npolys; k++) {
+        index_t idx;
+        idx.vertex_index = face[k].v_idx;
+        idx.normal_index = face[k].vn_idx;
+        idx.texcoord_index = face[k].vt_idx;
+        shape->mesh.indices.push_back(idx);
+      }
+
+      shape->mesh.num_face_vertices.push_back(
+          static_cast<unsigned char>(npolys));
+      shape->mesh.material_ids.push_back(material_id);  // per face
+    }
+  }
+
+  shape->name = name;
+  shape->mesh.tags = tags;
+
+  return true;
+}
+
+// Split a string with specified delimiter character.
+// http://stackoverflow.com/questions/236129/split-a-string-in-c
+static void SplitString(const std::string &s, char delim,
+                        std::vector<std::string> &elems) {
+  std::stringstream ss;
+  ss.str(s);
+  std::string item;
+  while (std::getline(ss, item, delim)) {
+    elems.push_back(item);
+  }
+}
+
+void LoadMtl(std::map<std::string, int> *material_map,
+             std::vector<material_t> *materials, std::istream *inStream,
+             std::string *warning) {
+  // Create a default material anyway.
+  material_t material;
+  InitMaterial(&material);
+
+  // Issue 43. `d` wins against `Tr` since `Tr` is not in the MTL specification.
+  bool has_d = false;
+  bool has_tr = false;
+
+  std::stringstream ss;
+
+  std::string linebuf;
+  while (inStream->peek() != -1) {
+    safeGetline(*inStream, linebuf);
+
+    // Trim trailing whitespace.
+    if (linebuf.size() > 0) {
+      linebuf = linebuf.substr(0, linebuf.find_last_not_of(" \t") + 1);
+    }
+
+    // Trim newline '\r\n' or '\n'
+    if (linebuf.size() > 0) {
+      if (linebuf[linebuf.size() - 1] == '\n')
+        linebuf.erase(linebuf.size() - 1);
+    }
+    if (linebuf.size() > 0) {
+      if (linebuf[linebuf.size() - 1] == '\r')
+        linebuf.erase(linebuf.size() - 1);
+    }
+
+    // Skip if empty line.
+    if (linebuf.empty()) {
+      continue;
+    }
+
+    // Skip leading space.
+    const char *token = linebuf.c_str();
+    token += strspn(token, " \t");
+
+    assert(token);
+    if (token[0] == '\0') continue;  // empty line
+
+    if (token[0] == '#') continue;  // comment line
+
+    // new mtl
+    if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) {
+      // flush previous material.
+      if (!material.name.empty()) {
+        material_map->insert(std::pair<std::string, int>(
+            material.name, static_cast<int>(materials->size())));
+        materials->push_back(material);
+      }
+
+      // initial temporary material
+      InitMaterial(&material);
+
+      has_d = false;
+      has_tr = false;
+
+      // set new mtl name
+      char namebuf[TINYOBJ_SSCANF_BUFFER_SIZE];
+      token += 7;
+#ifdef _MSC_VER
+      sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf));
+#else
+      std::sscanf(token, "%s", namebuf);
+#endif
+      material.name = namebuf;
+      continue;
+    }
+
+    // ambient
+    if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) {
+      token += 2;
+      real_t r, g, b;
+      parseReal3(&r, &g, &b, &token);
+      material.ambient[0] = r;
+      material.ambient[1] = g;
+      material.ambient[2] = b;
+      continue;
+    }
+
+    // diffuse
+    if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) {
+      token += 2;
+      real_t r, g, b;
+      parseReal3(&r, &g, &b, &token);
+      material.diffuse[0] = r;
+      material.diffuse[1] = g;
+      material.diffuse[2] = b;
+      continue;
+    }
+
+    // specular
+    if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) {
+      token += 2;
+      real_t r, g, b;
+      parseReal3(&r, &g, &b, &token);
+      material.specular[0] = r;
+      material.specular[1] = g;
+      material.specular[2] = b;
+      continue;
+    }
+
+    // transmittance
+    if ((token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) ||
+        (token[0] == 'T' && token[1] == 'f' && IS_SPACE((token[2])))) {
+      token += 2;
+      real_t r, g, b;
+      parseReal3(&r, &g, &b, &token);
+      material.transmittance[0] = r;
+      material.transmittance[1] = g;
+      material.transmittance[2] = b;
+      continue;
+    }
+
+    // ior(index of refraction)
+    if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) {
+      token += 2;
+      material.ior = parseReal(&token);
+      continue;
+    }
+
+    // emission
+    if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) {
+      token += 2;
+      real_t r, g, b;
+      parseReal3(&r, &g, &b, &token);
+      material.emission[0] = r;
+      material.emission[1] = g;
+      material.emission[2] = b;
+      continue;
+    }
+
+    // shininess
+    if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) {
+      token += 2;
+      material.shininess = parseReal(&token);
+      continue;
+    }
+
+    // illum model
+    if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) {
+      token += 6;
+      material.illum = parseInt(&token);
+      continue;
+    }
+
+    // dissolve
+    if ((token[0] == 'd' && IS_SPACE(token[1]))) {
+      token += 1;
+      material.dissolve = parseReal(&token);
+
+      if (has_tr) {
+        ss << "WARN: Both `d` and `Tr` parameters defined for \""
+           << material.name << "\". Use the value of `d` for dissolve."
+           << std::endl;
+      }
+      has_d = true;
+      continue;
+    }
+    if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) {
+      token += 2;
+      if (has_d) {
+        // `d` wins. Ignore `Tr` value.
+        ss << "WARN: Both `d` and `Tr` parameters defined for \""
+           << material.name << "\". Use the value of `d` for dissolve."
+           << std::endl;
+      } else {
+        // We invert value of Tr(assume Tr is in range [0, 1])
+        // NOTE: Interpretation of Tr is application(exporter) dependent. For
+        // some application(e.g. 3ds max obj exporter), Tr = d(Issue 43)
+        material.dissolve = 1.0f - parseReal(&token);
+      }
+      has_tr = true;
+      continue;
+    }
+
+    // PBR: roughness
+    if (token[0] == 'P' && token[1] == 'r' && IS_SPACE(token[2])) {
+      token += 2;
+      material.roughness = parseReal(&token);
+      continue;
+    }
+
+    // PBR: metallic
+    if (token[0] == 'P' && token[1] == 'm' && IS_SPACE(token[2])) {
+      token += 2;
+      material.metallic = parseReal(&token);
+      continue;
+    }
+
+    // PBR: sheen
+    if (token[0] == 'P' && token[1] == 's' && IS_SPACE(token[2])) {
+      token += 2;
+      material.sheen = parseReal(&token);
+      continue;
+    }
+
+    // PBR: clearcoat thickness
+    if (token[0] == 'P' && token[1] == 'c' && IS_SPACE(token[2])) {
+      token += 2;
+      material.clearcoat_thickness = parseReal(&token);
+      continue;
+    }
+
+    // PBR: clearcoat roughness
+    if ((0 == strncmp(token, "Pcr", 3)) && IS_SPACE(token[3])) {
+      token += 4;
+      material.clearcoat_roughness = parseReal(&token);
+      continue;
+    }
+
+    // PBR: anisotropy
+    if ((0 == strncmp(token, "aniso", 5)) && IS_SPACE(token[5])) {
+      token += 6;
+      material.anisotropy = parseReal(&token);
+      continue;
+    }
+
+    // PBR: anisotropy rotation
+    if ((0 == strncmp(token, "anisor", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      material.anisotropy_rotation = parseReal(&token);
+      continue;
+    }
+
+    // ambient texture
+    if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      ParseTextureNameAndOption(&(material.ambient_texname),
+                                &(material.ambient_texopt), token,
+                                /* is_bump */ false);
+      continue;
+    }
+
+    // diffuse texture
+    if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      ParseTextureNameAndOption(&(material.diffuse_texname),
+                                &(material.diffuse_texopt), token,
+                                /* is_bump */ false);
+      continue;
+    }
+
+    // specular texture
+    if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      ParseTextureNameAndOption(&(material.specular_texname),
+                                &(material.specular_texopt), token,
+                                /* is_bump */ false);
+      continue;
+    }
+
+    // specular highlight texture
+    if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      ParseTextureNameAndOption(&(material.specular_highlight_texname),
+                                &(material.specular_highlight_texopt), token,
+                                /* is_bump */ false);
+      continue;
+    }
+
+    // bump texture
+    if ((0 == strncmp(token, "map_bump", 8)) && IS_SPACE(token[8])) {
+      token += 9;
+      ParseTextureNameAndOption(&(material.bump_texname),
+                                &(material.bump_texopt), token,
+                                /* is_bump */ true);
+      continue;
+    }
+
+    // bump texture
+    if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) {
+      token += 5;
+      ParseTextureNameAndOption(&(material.bump_texname),
+                                &(material.bump_texopt), token,
+                                /* is_bump */ true);
+      continue;
+    }
+
+    // alpha texture
+    if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) {
+      token += 6;
+      material.alpha_texname = token;
+      ParseTextureNameAndOption(&(material.alpha_texname),
+                                &(material.alpha_texopt), token,
+                                /* is_bump */ false);
+      continue;
+    }
+
+    // displacement texture
+    if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) {
+      token += 5;
+      ParseTextureNameAndOption(&(material.displacement_texname),
+                                &(material.displacement_texopt), token,
+                                /* is_bump */ false);
+      continue;
+    }
+
+    // PBR: roughness texture
+    if ((0 == strncmp(token, "map_Pr", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      ParseTextureNameAndOption(&(material.roughness_texname),
+                                &(material.roughness_texopt), token,
+                                /* is_bump */ false);
+      continue;
+    }
+
+    // PBR: metallic texture
+    if ((0 == strncmp(token, "map_Pm", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      ParseTextureNameAndOption(&(material.metallic_texname),
+                                &(material.metallic_texopt), token,
+                                /* is_bump */ false);
+      continue;
+    }
+
+    // PBR: sheen texture
+    if ((0 == strncmp(token, "map_Ps", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      ParseTextureNameAndOption(&(material.sheen_texname),
+                                &(material.sheen_texopt), token,
+                                /* is_bump */ false);
+      continue;
+    }
+
+    // PBR: emissive texture
+    if ((0 == strncmp(token, "map_Ke", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      ParseTextureNameAndOption(&(material.emissive_texname),
+                                &(material.emissive_texopt), token,
+                                /* is_bump */ false);
+      continue;
+    }
+
+    // PBR: normal map texture
+    if ((0 == strncmp(token, "norm", 4)) && IS_SPACE(token[4])) {
+      token += 5;
+      ParseTextureNameAndOption(
+          &(material.normal_texname), &(material.normal_texopt), token,
+          /* is_bump */ false);  // @fixme { is_bump will be true? }
+      continue;
+    }
+
+    // unknown parameter
+    const char *_space = strchr(token, ' ');
+    if (!_space) {
+      _space = strchr(token, '\t');
+    }
+    if (_space) {
+      std::ptrdiff_t len = _space - token;
+      std::string key(token, static_cast<size_t>(len));
+      std::string value = _space + 1;
+      material.unknown_parameter.insert(
+          std::pair<std::string, std::string>(key, value));
+    }
+  }
+  // flush last material.
+  material_map->insert(std::pair<std::string, int>(
+      material.name, static_cast<int>(materials->size())));
+  materials->push_back(material);
+
+  if (warning) {
+    (*warning) = ss.str();
+  }
+}
+
+bool MaterialFileReader::operator()(const std::string &matId,
+                                    std::vector<material_t> *materials,
+                                    std::map<std::string, int> *matMap,
+                                    std::string *err) {
+  std::string filepath;
+
+  if (!m_mtlBaseDir.empty()) {
+    filepath = std::string(m_mtlBaseDir) + matId;
+  } else {
+    filepath = matId;
+  }
+
+  std::ifstream matIStream(filepath.c_str());
+  if (!matIStream) {
+    std::stringstream ss;
+    ss << "WARN: Material file [ " << filepath << " ] not found." << std::endl;
+    if (err) {
+      (*err) += ss.str();
+    }
+    return false;
+  }
+
+  std::string warning;
+  LoadMtl(matMap, materials, &matIStream, &warning);
+
+  if (!warning.empty()) {
+    if (err) {
+      (*err) += warning;
+    }
+  }
+
+  return true;
+}
+
+bool MaterialStreamReader::operator()(const std::string &matId,
+                                      std::vector<material_t> *materials,
+                                      std::map<std::string, int> *matMap,
+                                      std::string *err) {
+  (void)matId;
+  if (!m_inStream) {
+    std::stringstream ss;
+    ss << "WARN: Material stream in error state. " << std::endl;
+    if (err) {
+      (*err) += ss.str();
+    }
+    return false;
+  }
+
+  std::string warning;
+  LoadMtl(matMap, materials, &m_inStream, &warning);
+
+  if (!warning.empty()) {
+    if (err) {
+      (*err) += warning;
+    }
+  }
+
+  return true;
+}
+
+bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
+             std::vector<material_t> *materials, std::string *err,
+             const char *filename, const char *mtl_basedir, bool trianglulate) {
+  attrib->vertices.clear();
+  attrib->normals.clear();
+  attrib->texcoords.clear();
+  shapes->clear();
+
+  std::stringstream errss;
+
+  std::ifstream ifs(filename);
+  if (!ifs) {
+    errss << "Cannot open file [" << filename << "]" << std::endl;
+    if (err) {
+      (*err) = errss.str();
+    }
+    return false;
+  }
+
+  std::string baseDir;
+  if (mtl_basedir) {
+    baseDir = mtl_basedir;
+  }
+  MaterialFileReader matFileReader(baseDir);
+
+  return LoadObj(attrib, shapes, materials, err, &ifs, &matFileReader,
+                 trianglulate);
+}
+
+bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
+             std::vector<material_t> *materials, std::string *err,
+             std::istream *inStream, MaterialReader *readMatFn /*= NULL*/,
+             bool triangulate) {
+  std::stringstream errss;
+
+  std::vector<real_t> v;
+  std::vector<real_t> vn;
+  std::vector<real_t> vt;
+  std::vector<tag_t> tags;
+  std::vector<std::vector<vertex_index> > faceGroup;
+  std::string name;
+
+  // material
+  std::map<std::string, int> material_map;
+  int material = -1;
+
+  shape_t shape;
+
+  std::string linebuf;
+  while (inStream->peek() != -1) {
+    safeGetline(*inStream, linebuf);
+
+    // Trim newline '\r\n' or '\n'
+    if (linebuf.size() > 0) {
+      if (linebuf[linebuf.size() - 1] == '\n')
+        linebuf.erase(linebuf.size() - 1);
+    }
+    if (linebuf.size() > 0) {
+      if (linebuf[linebuf.size() - 1] == '\r')
+        linebuf.erase(linebuf.size() - 1);
+    }
+
+    // Skip if empty line.
+    if (linebuf.empty()) {
+      continue;
+    }
+
+    // Skip leading space.
+    const char *token = linebuf.c_str();
+    token += strspn(token, " \t");
+
+    assert(token);
+    if (token[0] == '\0') continue;  // empty line
+
+    if (token[0] == '#') continue;  // comment line
+
+    // vertex
+    if (token[0] == 'v' && IS_SPACE((token[1]))) {
+      token += 2;
+      real_t x, y, z;
+      parseReal3(&x, &y, &z, &token);
+      v.push_back(x);
+      v.push_back(y);
+      v.push_back(z);
+      continue;
+    }
+
+    // normal
+    if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) {
+      token += 3;
+      real_t x, y, z;
+      parseReal3(&x, &y, &z, &token);
+      vn.push_back(x);
+      vn.push_back(y);
+      vn.push_back(z);
+      continue;
+    }
+
+    // texcoord
+    if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) {
+      token += 3;
+      real_t x, y;
+      parseReal2(&x, &y, &token);
+      vt.push_back(x);
+      vt.push_back(y);
+      continue;
+    }
+
+    // face
+    if (token[0] == 'f' && IS_SPACE((token[1]))) {
+      token += 2;
+      token += strspn(token, " \t");
+
+      std::vector<vertex_index> face;
+      face.reserve(3);
+
+      while (!IS_NEW_LINE(token[0])) {
+        vertex_index vi = parseTriple(&token, static_cast<int>(v.size() / 3),
+                                      static_cast<int>(vn.size() / 3),
+                                      static_cast<int>(vt.size() / 2));
+        face.push_back(vi);
+        size_t n = strspn(token, " \t\r");
+        token += n;
+      }
+
+      // replace with emplace_back + std::move on C++11
+      faceGroup.push_back(std::vector<vertex_index>());
+      faceGroup[faceGroup.size() - 1].swap(face);
+
+      continue;
+    }
+
+    // use mtl
+    if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) {
+      char namebuf[TINYOBJ_SSCANF_BUFFER_SIZE];
+      token += 7;
+#ifdef _MSC_VER
+      sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf));
+#else
+      std::sscanf(token, "%s", namebuf);
+#endif
+
+      int newMaterialId = -1;
+      if (material_map.find(namebuf) != material_map.end()) {
+        newMaterialId = material_map[namebuf];
+      } else {
+        // { error!! material not found }
+      }
+
+      if (newMaterialId != material) {
+        // Create per-face material. Thus we don't add `shape` to `shapes` at
+        // this time.
+        // just clear `faceGroup` after `exportFaceGroupToShape()` call.
+        exportFaceGroupToShape(&shape, faceGroup, tags, material, name,
+                               triangulate);
+        faceGroup.clear();
+        material = newMaterialId;
+      }
+
+      continue;
+    }
+
+    // load mtl
+    if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) {
+      if (readMatFn) {
+        token += 7;
+
+        std::vector<std::string> filenames;
+        SplitString(std::string(token), ' ', filenames);
+
+        if (filenames.empty()) {
+          if (err) {
+            (*err) +=
+                "WARN: Looks like empty filename for mtllib. Use default "
+                "material. \n";
+          }
+        } else {
+          bool found = false;
+          for (size_t s = 0; s < filenames.size(); s++) {
+            std::string err_mtl;
+            bool ok = (*readMatFn)(filenames[s].c_str(), materials,
+                                   &material_map, &err_mtl);
+            if (err && (!err_mtl.empty())) {
+              (*err) += err_mtl;  // This should be warn message.
+            }
+
+            if (ok) {
+              found = true;
+              break;
+            }
+          }
+
+          if (!found) {
+            if (err) {
+              (*err) +=
+                  "WARN: Failed to load material file(s). Use default "
+                  "material.\n";
+            }
+          }
+        }
+      }
+
+      continue;
+    }
+
+    // group name
+    if (token[0] == 'g' && IS_SPACE((token[1]))) {
+      // flush previous face group.
+      bool ret = exportFaceGroupToShape(&shape, faceGroup, tags, material, name,
+                                        triangulate);
+      if (ret) {
+        shapes->push_back(shape);
+      }
+
+      shape = shape_t();
+
+      // material = -1;
+      faceGroup.clear();
+
+      std::vector<std::string> names;
+      names.reserve(2);
+
+      while (!IS_NEW_LINE(token[0])) {
+        std::string str = parseString(&token);
+        names.push_back(str);
+        token += strspn(token, " \t\r");  // skip tag
+      }
+
+      assert(names.size() > 0);
+
+      // names[0] must be 'g', so skip the 0th element.
+      if (names.size() > 1) {
+        name = names[1];
+      } else {
+        name = "";
+      }
+
+      continue;
+    }
+
+    // object name
+    if (token[0] == 'o' && IS_SPACE((token[1]))) {
+      // flush previous face group.
+      bool ret = exportFaceGroupToShape(&shape, faceGroup, tags, material, name,
+                                        triangulate);
+      if (ret) {
+        shapes->push_back(shape);
+      }
+
+      // material = -1;
+      faceGroup.clear();
+      shape = shape_t();
+
+      // @todo { multiple object name? }
+      char namebuf[TINYOBJ_SSCANF_BUFFER_SIZE];
+      token += 2;
+#ifdef _MSC_VER
+      sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf));
+#else
+      std::sscanf(token, "%s", namebuf);
+#endif
+      name = std::string(namebuf);
+
+      continue;
+    }
+
+    if (token[0] == 't' && IS_SPACE(token[1])) {
+      tag_t tag;
+
+      char namebuf[4096];
+      token += 2;
+#ifdef _MSC_VER
+      sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf));
+#else
+      std::sscanf(token, "%s", namebuf);
+#endif
+      tag.name = std::string(namebuf);
+
+      token += tag.name.size() + 1;
+
+      tag_sizes ts = parseTagTriple(&token);
+
+      tag.intValues.resize(static_cast<size_t>(ts.num_ints));
+
+      for (size_t i = 0; i < static_cast<size_t>(ts.num_ints); ++i) {
+        tag.intValues[i] = atoi(token);
+        token += strcspn(token, "/ \t\r") + 1;
+      }
+
+      tag.floatValues.resize(static_cast<size_t>(ts.num_reals));
+      for (size_t i = 0; i < static_cast<size_t>(ts.num_reals); ++i) {
+        tag.floatValues[i] = parseReal(&token);
+        token += strcspn(token, "/ \t\r") + 1;
+      }
+
+      tag.stringValues.resize(static_cast<size_t>(ts.num_strings));
+      for (size_t i = 0; i < static_cast<size_t>(ts.num_strings); ++i) {
+        char stringValueBuffer[4096];
+
+#ifdef _MSC_VER
+        sscanf_s(token, "%s", stringValueBuffer,
+                 (unsigned)_countof(stringValueBuffer));
+#else
+        std::sscanf(token, "%s", stringValueBuffer);
+#endif
+        tag.stringValues[i] = stringValueBuffer;
+        token += tag.stringValues[i].size() + 1;
+      }
+
+      tags.push_back(tag);
+    }
+
+    // Ignore unknown command.
+  }
+
+  bool ret = exportFaceGroupToShape(&shape, faceGroup, tags, material, name,
+                                    triangulate);
+  // exportFaceGroupToShape return false when `usemtl` is called in the last
+  // line.
+  // we also add `shape` to `shapes` when `shape.mesh` has already some
+  // faces(indices)
+  if (ret || shape.mesh.indices.size()) {
+    shapes->push_back(shape);
+  }
+  faceGroup.clear();  // for safety
+
+  if (err) {
+    (*err) += errss.str();
+  }
+
+  attrib->vertices.swap(v);
+  attrib->normals.swap(vn);
+  attrib->texcoords.swap(vt);
+
+  return true;
+}
+
+bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
+                         void *user_data /*= NULL*/,
+                         MaterialReader *readMatFn /*= NULL*/,
+                         std::string *err /*= NULL*/) {
+  std::stringstream errss;
+
+  // material
+  std::map<std::string, int> material_map;
+  int material_id = -1;  // -1 = invalid
+
+  std::vector<index_t> indices;
+  std::vector<material_t> materials;
+  std::vector<std::string> names;
+  names.reserve(2);
+  std::string name;
+  std::vector<const char *> names_out;
+
+  std::string linebuf;
+  while (inStream.peek() != -1) {
+    safeGetline(inStream, linebuf);
+
+    // Trim newline '\r\n' or '\n'
+    if (linebuf.size() > 0) {
+      if (linebuf[linebuf.size() - 1] == '\n')
+        linebuf.erase(linebuf.size() - 1);
+    }
+    if (linebuf.size() > 0) {
+      if (linebuf[linebuf.size() - 1] == '\r')
+        linebuf.erase(linebuf.size() - 1);
+    }
+
+    // Skip if empty line.
+    if (linebuf.empty()) {
+      continue;
+    }
+
+    // Skip leading space.
+    const char *token = linebuf.c_str();
+    token += strspn(token, " \t");
+
+    assert(token);
+    if (token[0] == '\0') continue;  // empty line
+
+    if (token[0] == '#') continue;  // comment line
+
+    // vertex
+    if (token[0] == 'v' && IS_SPACE((token[1]))) {
+      token += 2;
+      real_t x, y, z, w;  // w is optional. default = 1.0
+      parseV(&x, &y, &z, &w, &token);
+      if (callback.vertex_cb) {
+        callback.vertex_cb(user_data, x, y, z, w);
+      }
+      continue;
+    }
+
+    // normal
+    if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) {
+      token += 3;
+      real_t x, y, z;
+      parseReal3(&x, &y, &z, &token);
+      if (callback.normal_cb) {
+        callback.normal_cb(user_data, x, y, z);
+      }
+      continue;
+    }
+
+    // texcoord
+    if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) {
+      token += 3;
+      real_t x, y, z;  // y and z are optional. default = 0.0
+      parseReal3(&x, &y, &z, &token);
+      if (callback.texcoord_cb) {
+        callback.texcoord_cb(user_data, x, y, z);
+      }
+      continue;
+    }
+
+    // face
+    if (token[0] == 'f' && IS_SPACE((token[1]))) {
+      token += 2;
+      token += strspn(token, " \t");
+
+      indices.clear();
+      while (!IS_NEW_LINE(token[0])) {
+        vertex_index vi = parseRawTriple(&token);
+
+        index_t idx;
+        idx.vertex_index = vi.v_idx;
+        idx.normal_index = vi.vn_idx;
+        idx.texcoord_index = vi.vt_idx;
+
+        indices.push_back(idx);
+        size_t n = strspn(token, " \t\r");
+        token += n;
+      }
+
+      if (callback.index_cb && indices.size() > 0) {
+        callback.index_cb(user_data, &indices.at(0),
+                          static_cast<int>(indices.size()));
+      }
+
+      continue;
+    }
+
+    // use mtl
+    if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) {
+      char namebuf[TINYOBJ_SSCANF_BUFFER_SIZE];
+      token += 7;
+#ifdef _MSC_VER
+      sscanf_s(token, "%s", namebuf,
+               static_cast<unsigned int>(_countof(namebuf)));
+#else
+      std::sscanf(token, "%s", namebuf);
+#endif
+
+      int newMaterialId = -1;
+      if (material_map.find(namebuf) != material_map.end()) {
+        newMaterialId = material_map[namebuf];
+      } else {
+        // { error!! material not found }
+      }
+
+      if (newMaterialId != material_id) {
+        material_id = newMaterialId;
+      }
+
+      if (callback.usemtl_cb) {
+        callback.usemtl_cb(user_data, namebuf, material_id);
+      }
+
+      continue;
+    }
+
+    // load mtl
+    if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) {
+      if (readMatFn) {
+        token += 7;
+
+        std::vector<std::string> filenames;
+        SplitString(std::string(token), ' ', filenames);
+
+        if (filenames.empty()) {
+          if (err) {
+            (*err) +=
+                "WARN: Looks like empty filename for mtllib. Use default "
+                "material. \n";
+          }
+        } else {
+          bool found = false;
+          for (size_t s = 0; s < filenames.size(); s++) {
+            std::string err_mtl;
+            bool ok = (*readMatFn)(filenames[s].c_str(), &materials,
+                                   &material_map, &err_mtl);
+            if (err && (!err_mtl.empty())) {
+              (*err) += err_mtl;  // This should be warn message.
+            }
+
+            if (ok) {
+              found = true;
+              break;
+            }
+          }
+
+          if (!found) {
+            if (err) {
+              (*err) +=
+                  "WARN: Failed to load material file(s). Use default "
+                  "material.\n";
+            }
+          } else {
+            if (callback.mtllib_cb) {
+              callback.mtllib_cb(user_data, &materials.at(0),
+                                 static_cast<int>(materials.size()));
+            }
+          }
+        }
+      }
+
+      continue;
+    }
+
+    // group name
+    if (token[0] == 'g' && IS_SPACE((token[1]))) {
+      names.clear();
+
+      while (!IS_NEW_LINE(token[0])) {
+        std::string str = parseString(&token);
+        names.push_back(str);
+        token += strspn(token, " \t\r");  // skip tag
+      }
+
+      assert(names.size() > 0);
+
+      // names[0] must be 'g', so skip the 0th element.
+      if (names.size() > 1) {
+        name = names[1];
+      } else {
+        name.clear();
+      }
+
+      if (callback.group_cb) {
+        if (names.size() > 1) {
+          // create const char* array.
+          names_out.resize(names.size() - 1);
+          for (size_t j = 0; j < names_out.size(); j++) {
+            names_out[j] = names[j + 1].c_str();
+          }
+          callback.group_cb(user_data, &names_out.at(0),
+                            static_cast<int>(names_out.size()));
+
+        } else {
+          callback.group_cb(user_data, NULL, 0);
+        }
+      }
+
+      continue;
+    }
+
+    // object name
+    if (token[0] == 'o' && IS_SPACE((token[1]))) {
+      // @todo { multiple object name? }
+      char namebuf[TINYOBJ_SSCANF_BUFFER_SIZE];
+      token += 2;
+#ifdef _MSC_VER
+      sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf));
+#else
+      std::sscanf(token, "%s", namebuf);
+#endif
+      std::string object_name = std::string(namebuf);
+
+      if (callback.object_cb) {
+        callback.object_cb(user_data, object_name.c_str());
+      }
+
+      continue;
+    }
+
+#if 0  // @todo
+    if (token[0] == 't' && IS_SPACE(token[1])) {
+      tag_t tag;
+
+      char namebuf[4096];
+      token += 2;
+#ifdef _MSC_VER
+      sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf));
+#else
+      std::sscanf(token, "%s", namebuf);
+#endif
+      tag.name = std::string(namebuf);
+
+      token += tag.name.size() + 1;
+
+      tag_sizes ts = parseTagTriple(&token);
+
+      tag.intValues.resize(static_cast<size_t>(ts.num_ints));
+
+      for (size_t i = 0; i < static_cast<size_t>(ts.num_ints); ++i) {
+        tag.intValues[i] = atoi(token);
+        token += strcspn(token, "/ \t\r") + 1;
+      }
+
+      tag.floatValues.resize(static_cast<size_t>(ts.num_reals));
+      for (size_t i = 0; i < static_cast<size_t>(ts.num_reals); ++i) {
+        tag.floatValues[i] = parseReal(&token);
+        token += strcspn(token, "/ \t\r") + 1;
+      }
+
+      tag.stringValues.resize(static_cast<size_t>(ts.num_strings));
+      for (size_t i = 0; i < static_cast<size_t>(ts.num_strings); ++i) {
+        char stringValueBuffer[4096];
+
+#ifdef _MSC_VER
+        sscanf_s(token, "%s", stringValueBuffer,
+                 (unsigned)_countof(stringValueBuffer));
+#else
+        std::sscanf(token, "%s", stringValueBuffer);
+#endif
+        tag.stringValues[i] = stringValueBuffer;
+        token += tag.stringValues[i].size() + 1;
+      }
+
+      tags.push_back(tag);
+    }
+#endif
+
+    // Ignore unknown command.
+  }
+
+  if (err) {
+    (*err) += errss.str();
+  }
+
+  return true;
+}
+}  // namespace tinyobj
+
+#endif
diff --git a/img/Cache First Iteration.png b/img/Cache First Iteration.png
new file mode 100644
index 0000000..60635b7
Binary files /dev/null and b/img/Cache First Iteration.png differ
diff --git a/img/IdealDiffuse.png b/img/IdealDiffuse.png
new file mode 100644
index 0000000..a59bede
Binary files /dev/null and b/img/IdealDiffuse.png differ
diff --git a/img/StreamCompaction.png b/img/StreamCompaction.png
new file mode 100644
index 0000000..3be8511
Binary files /dev/null and b/img/StreamCompaction.png differ
diff --git a/img/Sword.png b/img/Sword.png
new file mode 100644
index 0000000..c3fc8d7
Binary files /dev/null and b/img/Sword.png differ
diff --git a/img/blooper1.png b/img/blooper1.png
new file mode 100644
index 0000000..c339950
Binary files /dev/null and b/img/blooper1.png differ
diff --git a/img/blooper2.png b/img/blooper2.png
new file mode 100644
index 0000000..15e7b07
Binary files /dev/null and b/img/blooper2.png differ
diff --git a/img/blooper3.png b/img/blooper3.png
new file mode 100644
index 0000000..ce879fc
Binary files /dev/null and b/img/blooper3.png differ
diff --git a/img/cacheFirstIteration.png b/img/cacheFirstIteration.png
new file mode 100644
index 0000000..60635b7
Binary files /dev/null and b/img/cacheFirstIteration.png differ
diff --git a/img/cornell.2018-09-30_21-38-46z.3466samp.png b/img/cornell.2018-09-30_21-38-46z.3466samp.png
new file mode 100644
index 0000000..7b7d682
Binary files /dev/null and b/img/cornell.2018-09-30_21-38-46z.3466samp.png differ
diff --git a/img/cornell.2018-10-02_19-40-38z.4580samp.png b/img/cornell.2018-10-02_19-40-38z.4580samp.png
new file mode 100644
index 0000000..557b277
Binary files /dev/null and b/img/cornell.2018-10-02_19-40-38z.4580samp.png differ
diff --git a/img/cornell.2018-10-03_03-05-39z.1695samp.png b/img/cornell.2018-10-03_03-05-39z.1695samp.png
new file mode 100644
index 0000000..30c83ef
Binary files /dev/null and b/img/cornell.2018-10-03_03-05-39z.1695samp.png differ
diff --git a/img/materialIDsort.png b/img/materialIDsort.png
new file mode 100644
index 0000000..404ef73
Binary files /dev/null and b/img/materialIDsort.png differ
diff --git a/img/pathTracing.png b/img/pathTracing.png
new file mode 100644
index 0000000..23aadad
Binary files /dev/null and b/img/pathTracing.png differ
diff --git a/img/reflection.png b/img/reflection.png
new file mode 100644
index 0000000..bb0d055
Binary files /dev/null and b/img/reflection.png differ
diff --git a/img/reflectionRefraction.png b/img/reflectionRefraction.png
new file mode 100644
index 0000000..27437f3
Binary files /dev/null and b/img/reflectionRefraction.png differ
diff --git a/img/refraction.png b/img/refraction.png
new file mode 100644
index 0000000..0b82575
Binary files /dev/null and b/img/refraction.png differ
diff --git a/img/runtimes.png b/img/runtimes.png
new file mode 100644
index 0000000..5788202
Binary files /dev/null and b/img/runtimes.png differ
diff --git a/scenes/cornell.txt b/scenes/cornell.txt
index 83ff820..4a53c2e 100644
--- a/scenes/cornell.txt
+++ b/scenes/cornell.txt
@@ -6,7 +6,7 @@ SPECRGB     0 0 0
 REFL        0
 REFR        0
 REFRIOR     0
-EMITTANCE   5
+EMITTANCE   50
 
 // Diffuse white
 MATERIAL 1
@@ -48,9 +48,19 @@ REFR        0
 REFRIOR     0
 EMITTANCE   0
 
+// Specular refract 
+MATERIAL 5
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     .98 .98 .98
+REFL        0
+REFR        1
+REFRIOR     1.5
+EMITTANCE   0
+
 // Camera
 CAMERA
-RES         800 800
+RES         1280 720
 FOVY        45
 ITERATIONS  5000
 DEPTH       8
@@ -108,10 +118,26 @@ TRANS       5 5 0
 ROTAT       0 0 0
 SCALE       .01 10 10
 
-// Sphere
+// Sword
 OBJECT 6
+sword
+material 1
+TRANS       3 1 -0.5
+ROTAT       0 90 0
+SCALE       3 3 3
+
+// Sphere
+OBJECT 7
 sphere
 material 4
-TRANS       -1 4 -1
+TRANS       0 4 2
 ROTAT       0 0 0
 SCALE       3 3 3
+
+// Sphere
+OBJECT 8
+cube
+material 4
+TRANS       3 4 5
+ROTAT       0 0 0
+SCALE       3 3 3
\ No newline at end of file
diff --git a/scenes/cornellLots.txt b/scenes/cornellLots.txt
new file mode 100644
index 0000000..6fb6ce4
--- /dev/null
+++ b/scenes/cornellLots.txt
@@ -0,0 +1,361 @@
+// Emissive material (light)
+MATERIAL 0
+RGB         1 1 1
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   50
+
+// Diffuse white
+MATERIAL 1
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Diffuse red
+MATERIAL 2
+RGB         .85 .35 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Diffuse green
+MATERIAL 3
+RGB         .35 .85 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Specular purple
+MATERIAL 4
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     .85 .35 .85
+REFL        1
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Specular refract 
+MATERIAL 5
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     .98 .98 .98
+REFL        0
+REFR        1
+REFRIOR     1.5
+EMITTANCE   0
+
+// Diffuse yellow
+MATERIAL 6
+RGB         .85 .85 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Camera
+CAMERA
+RES         1280 720
+FOVY        45
+ITERATIONS  5000
+DEPTH       8
+FILE        cornell
+EYE         0.0 5 10.5
+LOOKAT      0 5 0
+UP          0 1 0
+
+
+// Ceiling light
+OBJECT 0
+cube
+material 0
+TRANS       0 10 0
+ROTAT       0 0 0
+SCALE       3 .3 3
+
+// Floor
+OBJECT 1
+cube
+material 1
+TRANS       0 0 0
+ROTAT       0 0 0
+SCALE       10 .01 10
+
+// Ceiling
+OBJECT 2
+cube
+material 1
+TRANS       0 10 0
+ROTAT       0 0 90
+SCALE       .01 10 10
+
+// Back wall
+OBJECT 3
+cube
+material 1
+TRANS       0 5 -5
+ROTAT       0 90 0
+SCALE       .01 10 10
+
+// Left wall
+OBJECT 4
+cube
+material 2
+TRANS       -5 5 0
+ROTAT       0 0 0
+SCALE       .01 10 10
+
+// Right wall
+OBJECT 5
+cube
+material 3
+TRANS       5 5 0
+ROTAT       0 0 0
+SCALE       .01 10 10
+
+// Diffuse Sphere
+OBJECT 6
+sphere
+material 6
+TRANS       -3 1 -2
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Diffuse Sphere
+OBJECT 7
+sphere
+material 6
+TRANS       -2 2 -2
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Diffuse Sphere
+OBJECT 8
+sphere
+material 6
+TRANS       -1 1 -2
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Diffuse Sphere
+OBJECT 9
+sphere
+material 6
+TRANS       0 2 -2
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Diffuse Sphere
+OBJECT 10
+sphere
+material 6
+TRANS       1 1 -2
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Diffuse Sphere
+OBJECT 11
+sphere
+material 6
+TRANS       2 2 -2
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Diffuse Sphere
+OBJECT 12
+sphere
+material 6
+TRANS       3 1 -2
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Diffuse Sphere
+OBJECT 13
+sphere
+material 6
+TRANS       1 1 -1
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Diffuse Sphere
+OBJECT 14
+sphere
+material 6
+TRANS       -1 1 -1
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Diffuse Sphere
+OBJECT 15
+sphere
+material 6
+TRANS       -3 1 -1
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Reflected Sphere
+OBJECT 16
+sphere
+material 4
+TRANS       -3 3 -1
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Reflected Sphere
+OBJECT 17
+sphere
+material 4
+TRANS       -2 3 0
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Reflected Sphere
+OBJECT 18
+sphere
+material 4
+TRANS       0 1 0
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Reflected Sphere
+OBJECT 19
+sphere
+material 4
+TRANS       2 5 0
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Reflected Sphere
+OBJECT 20
+sphere
+material 4
+TRANS       1 4 0
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Reflected Sphere
+OBJECT 21
+sphere
+material 4
+TRANS       1 2 1
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Reflected Sphere
+OBJECT 22
+sphere
+material 4
+TRANS       -2 6 -1
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Reflected Sphere
+OBJECT 23
+sphere
+material 4
+TRANS       0 3 2
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Reflected Sphere
+OBJECT 24
+sphere
+material 4
+TRANS       3 5 0
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Refracted Sphere
+OBJECT 25
+sphere
+material 5
+TRANS       0 4 3
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Refracted Sphere
+OBJECT 26
+sphere
+material 5
+TRANS       0 4 3
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Refracted Sphere
+OBJECT 27
+sphere
+material 5
+TRANS       0 3 3
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Refracted Sphere
+OBJECT 28
+sphere
+material 5
+TRANS       -1 2 3
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Refracted Sphere
+OBJECT 29
+sphere
+material 5
+TRANS       0 1 3
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Refracted Sphere
+OBJECT 30
+sphere
+material 5
+TRANS       -2 1 3
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Refracted Sphere
+OBJECT 31
+sphere
+material 5
+TRANS       -2 2 3
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Refracted Sphere
+OBJECT 32
+sphere
+material 5
+TRANS       -3 1 3
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Refracted Sphere
+OBJECT 33
+sphere
+material 5
+TRANS       0 4 4
+ROTAT       0 0 0
+SCALE       1 1 1
+
+// Refracted Sphere
+OBJECT 34
+sphere
+material 5
+TRANS       2 4 4
+ROTAT       0 0 0
+SCALE       1 1 1
\ No newline at end of file
diff --git a/scenes/cornellMany.txt b/scenes/cornellMany.txt
new file mode 100644
index 0000000..287ea93
--- /dev/null
+++ b/scenes/cornellMany.txt
@@ -0,0 +1,153 @@
+// Emissive material (light)
+MATERIAL 0
+RGB         1 1 1
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   50
+
+// Diffuse white
+MATERIAL 1
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Diffuse red
+MATERIAL 2
+RGB         .85 .35 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Diffuse green
+MATERIAL 3
+RGB         .35 .85 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Specular purple
+MATERIAL 4
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     .85 .35 .85
+REFL        1
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Specular refract 
+MATERIAL 5
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     .98 .98 .98
+REFL        0
+REFR        1
+REFRIOR     1.5
+EMITTANCE   0
+
+// Diffuse yellow
+MATERIAL 6
+RGB         .85 .85 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Camera
+CAMERA
+RES         1280 720
+FOVY        45
+ITERATIONS  5000
+DEPTH       8
+FILE        cornell
+EYE         0.0 5 10.5
+LOOKAT      0 5 0
+UP          0 1 0
+
+
+// Ceiling light
+OBJECT 0
+cube
+material 0
+TRANS       0 10 0
+ROTAT       0 0 0
+SCALE       3 .3 3
+
+// Floor
+OBJECT 1
+cube
+material 1
+TRANS       0 0 0
+ROTAT       0 0 0
+SCALE       10 .01 10
+
+// Ceiling
+OBJECT 2
+cube
+material 1
+TRANS       0 10 0
+ROTAT       0 0 90
+SCALE       .01 10 10
+
+// Back wall
+OBJECT 3
+cube
+material 1
+TRANS       0 5 -5
+ROTAT       0 90 0
+SCALE       .01 10 10
+
+// Left wall
+OBJECT 4
+cube
+material 2
+TRANS       -5 5 0
+ROTAT       0 0 0
+SCALE       .01 10 10
+
+// Right wall
+OBJECT 5
+cube
+material 3
+TRANS       5 5 0
+ROTAT       0 0 0
+SCALE       .01 10 10
+
+// Diffuse Sphere
+OBJECT 6
+sphere
+material 6
+TRANS       -3 4 -2
+ROTAT       0 0 0
+SCALE       3 3 3
+
+// Reflected Sphere
+OBJECT 7
+sphere
+material 4
+TRANS       2 5 0
+ROTAT       0 0 0
+SCALE       3 3 3
+
+// Refracted Sphere
+OBJECT 8
+sphere
+material 5
+TRANS       0 4 3
+ROTAT       0 0 0
+SCALE       4 4 4
\ No newline at end of file
diff --git a/scenes/mirror.txt b/scenes/mirror.txt
new file mode 100644
index 0000000..c206eef
--- /dev/null
+++ b/scenes/mirror.txt
@@ -0,0 +1,151 @@
+// Emissive material (light)
+MATERIAL 0
+RGB         1 1 1
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   50
+
+// Diffuse white
+MATERIAL 1
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Diffuse red
+MATERIAL 2
+RGB         .85 .35 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Diffuse green
+MATERIAL 3
+RGB         .35 .85 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Specular white
+MATERIAL 4
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     .98 .98 .98
+REFL        1
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Specular refract 
+MATERIAL 5
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     .98 .98 .98
+REFL        0
+REFR        1
+REFRIOR     1.5
+EMITTANCE   0
+
+// Camera
+CAMERA
+RES         1280 720
+FOVY        45
+ITERATIONS  5000
+DEPTH       8
+FILE        cornell
+EYE         0.0 5 10.5
+LOOKAT      0 5 0
+UP          0 1 0
+
+
+// Ceiling light
+OBJECT 0
+cube
+material 0
+TRANS       0 10 0
+ROTAT       0 0 0
+SCALE       3 .3 3
+
+// Floor
+OBJECT 1
+cube
+material 1
+TRANS       0 0 0
+ROTAT       0 0 0
+SCALE       10 .01 10
+
+// Ceiling
+OBJECT 2
+cube
+material 1
+TRANS       0 10 0
+ROTAT       0 0 90
+SCALE       .01 10 10
+
+// Back wall
+OBJECT 3
+cube
+material 4
+TRANS       0 5 -5
+ROTAT       0 90 0
+SCALE       .01 10 10
+
+// Left wall
+OBJECT 4
+cube
+material 2
+TRANS       -5 5 0
+ROTAT       0 0 0
+SCALE       .01 10 10
+
+// Right wall
+OBJECT 5
+cube
+material 3
+TRANS       5 5 0
+ROTAT       0 0 0
+SCALE       .01 10 10
+
+// Front wall
+OBJECT 6
+cube
+material 4
+TRANS       0 5 5
+ROTAT       0 90 0
+SCALE       .01 10 10
+
+// Sword
+//OBJECT 6
+//sword
+//material 1
+//TRANS       3 1 -0.5
+//ROTAT       0 0 0
+//SCALE       3 3 3
+
+// Sphere
+OBJECT 7
+sphere
+material 1
+TRANS       0 4 2
+ROTAT       0 0 0
+SCALE       3 3 3
+
+// Sphere
+//OBJECT 8
+//cube
+//material 4
+//TRANS       3 4 5
+//ROTAT       0 0 0
+//SCALE       3 3 3
\ No newline at end of file
diff --git a/scenes/sphere.txt b/scenes/sphere.txt
index a74b545..1a8a0e5 100644
--- a/scenes/sphere.txt
+++ b/scenes/sphere.txt
@@ -8,6 +8,16 @@ REFR        0
 REFRIOR     0
 EMITTANCE   5
 
+// rip kek material refrac
+MATERIAL 0
+RGB         1 1 1
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0.9
+EMITTANCE   0
+
 // Camera
 CAMERA
 RES         800 800
@@ -26,3 +36,11 @@ material 0
 TRANS       0 0 0
 ROTAT       0 0 0
 SCALE       3 3 3
+
+// Sphere
+OBJECT 1
+sphere
+material 1
+TRANS       -2 0 0
+ROTAT       0 0 0
+SCALE       3 3 3
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a1cb3fb..70fcd2e 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,4 +1,6 @@
 set(SOURCE_FILES
+    "objLoader.h"
+    "objLoader.cpp"
     "stb.cpp"
     "image.cpp"
     "image.h"
@@ -19,5 +21,5 @@ set(SOURCE_FILES
 
 cuda_add_library(src
     ${SOURCE_FILES}
-    OPTIONS -arch=sm_20
+    OPTIONS -arch=sm_61
     )
diff --git a/src/interactions.h b/src/interactions.h
index 5ce3628..1616dc1 100644
--- a/src/interactions.h
+++ b/src/interactions.h
@@ -67,13 +67,48 @@ glm::vec3 calculateRandomDirectionInHemisphere(
  * You may need to change the parameter list for your purposes!
  */
 __host__ __device__
-void scatterRay(
-		PathSegment & pathSegment,
-        glm::vec3 intersect,
-        glm::vec3 normal,
-        const Material &m,
-        thrust::default_random_engine &rng) {
-    // TODO: implement this.
-    // A basic implementation of pure-diffuse shading will just call the
-    // calculateRandomDirectionInHemisphere defined above.
+void scatterRay(PathSegment* pathSegment, float t, glm::vec3 intersect, glm::vec3 normal, const Material &m, thrust::default_random_engine &rng) {
+  if (pathSegment != NULL) // should never be NULL, but just in case
+  {
+    thrust::uniform_real_distribution<float> u01(0.0f, 1.0f);
+    float optionalColorScale = 1.0f;
+    glm::vec3 color;
+
+    if (m.hasReflective > 0.0f)
+    {
+      pathSegment->ray.direction = glm::reflect(pathSegment->ray.direction, normal);
+      pathSegment->ray.origin = intersect + 0.001f * pathSegment->ray.direction;
+      color = m.specular.color;
+    }
+    else if (m.hasRefractive > 0.0f)
+    {
+      // first calculate the reflection coefficient using schlick's approx
+      float r0 = powf(((1 - m.indexOfRefraction) / (1 + m.indexOfRefraction)), 2.0f);
+      float cosTheta = glm::dot(pathSegment->ray.direction, normal);
+      float reflCoeff = r0 + (1 - r0) * powf((1 - cosTheta), 5.0f);
+
+      float eta = m.indexOfRefraction;
+      if (glm::dot(pathSegment->ray.direction, normal) < 0.0001f)
+      {
+        eta = 1.0f / eta;
+      }
+      double cosI(dot(pathSegment->ray.direction, normal));
+      glm::vec3 refractedVec = (pathSegment->ray.direction * eta) - normal * (float)(-cosI + eta * cosI);
+      pathSegment->ray.direction = refractedVec;
+      pathSegment->ray.origin = intersect + 0.001f * pathSegment->ray.direction;
+      color = m.color;
+    }
+    else
+    {
+      pathSegment->ray.direction = calculateRandomDirectionInHemisphere(normal, rng);
+      pathSegment->ray.origin = intersect + 0.001f * pathSegment->ray.direction;
+      color = m.color;
+    }
+
+    // Taken from
+    float lightTerm = glm::dot(normal, glm::vec3(0.0f, 1.0f, 0.0f));
+    pathSegment->color *= (m.color * lightTerm) * 0.3f + ((1.0f - t * 0.02f) * m.color) * 0.7f;
+    pathSegment->color *= u01(rng); // apply some noise because why not
+    pathSegment->color /= optionalColorScale;
+  }
 }
diff --git a/src/objLoader.cpp b/src/objLoader.cpp
new file mode 100644
index 0000000..f010ef9
--- /dev/null
+++ b/src/objLoader.cpp
@@ -0,0 +1,77 @@
+#define TINYOBJLOADER_IMPLEMENTATION // define this in only *one* .cc
+#include "tiny_obj_loader.h"
+#include <iostream>
+#include "objLoader.h"
+
+void loadObj(std::string inputfile, int& startTriangleIndex, int& endTriangleIndex, std::vector<Triangle>* triangles)
+{
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &err, inputfile.c_str());
+
+  if (!err.empty()) { // `err` may contain warning message.
+    std::cerr << err << std::endl;
+  }
+
+  if (!ret) {
+    std::cerr << "couldn't load obj" << std::endl;
+    exit(1);
+  }
+
+  startTriangleIndex = triangles->size();
+
+  // Loop over shapes
+  for (size_t s = 0; s < shapes.size(); s++) {
+    // Loop over faces(polygon)
+    size_t index_offset = 0;
+    for (size_t f = 0; f < shapes[s].mesh.num_face_vertices.size(); f++) {
+      int fv = shapes[s].mesh.num_face_vertices[f];
+
+      if (fv != 3)
+      {
+        std::cerr << "Error: Mesh contains polygons that aren't triangles" << std::endl;
+        exit(1);
+      }
+
+      std::vector<glm::vec3> vertices;
+      std::vector<glm::vec3> normals;
+
+      // Loop over vertices in the face.
+      for (size_t v = 0; v < fv; v++) {
+        // access to vertex
+        tinyobj::index_t idx = shapes[s].mesh.indices[index_offset + v];
+        tinyobj::real_t vx = attrib.vertices[3 * idx.vertex_index + 0];
+        tinyobj::real_t vy = attrib.vertices[3 * idx.vertex_index + 1];
+        tinyobj::real_t vz = attrib.vertices[3 * idx.vertex_index + 2];
+        tinyobj::real_t nx = attrib.normals[3 * idx.normal_index + 0];
+        tinyobj::real_t ny = attrib.normals[3 * idx.normal_index + 1];
+        tinyobj::real_t nz = attrib.normals[3 * idx.normal_index + 2];
+       // tinyobj::real_t tx = attrib.texcoords[2 * idx.texcoord_index + 0];
+      //  tinyobj::real_t ty = attrib.texcoords[2 * idx.texcoord_index + 1];
+        vertices.push_back(glm::vec3(vx, vy, vz));
+        normals.push_back(glm::vec3(nx, ny, nz));
+
+        // Optional: vertex colors
+        // tinyobj::real_t red = attrib.colors[3*idx.vertex_index+0];
+        // tinyobj::real_t green = attrib.colors[3*idx.vertex_index+1];
+        // tinyobj::real_t blue = attrib.colors[3*idx.vertex_index+2];
+      }
+      index_offset += fv;
+
+      Triangle newTriangle;
+      newTriangle.v1 = vertices[0];
+      newTriangle.v2 = vertices[1];
+      newTriangle.v3 = vertices[2];
+      newTriangle.n =( normals[0] + normals[1] + normals[2] ) / 3.0f;
+      triangles->push_back(newTriangle);
+      // per-face material
+      shapes[s].mesh.material_ids[f];
+    }
+  }
+
+
+  endTriangleIndex = triangles->size();
+}
\ No newline at end of file
diff --git a/src/objLoader.h b/src/objLoader.h
new file mode 100644
index 0000000..ced3655
--- /dev/null
+++ b/src/objLoader.h
@@ -0,0 +1,6 @@
+#pragma once
+#include <string>
+#include "glm/glm.hpp"
+#include "sceneStructs.h"
+
+void loadObj(std::string inputfile, int& startTriangleIndex, int& endTriangleIndex, std::vector<Triangle>* triangles);
\ No newline at end of file
diff --git a/src/pathtrace.cu b/src/pathtrace.cu
index c1ec122..a789897 100644
--- a/src/pathtrace.cu
+++ b/src/pathtrace.cu
@@ -4,6 +4,10 @@
 #include <thrust/execution_policy.h>
 #include <thrust/random.h>
 #include <thrust/remove.h>
+#include <thrust/device_vector.h>
+#include <thrust/sort.h>
+#include <chrono>
+#include <ctime>
 
 #include "sceneStructs.h"
 #include "scene.h"
@@ -13,8 +17,12 @@
 #include "pathtrace.h"
 #include "intersections.h"
 #include "interactions.h"
+#include "../stream_compaction/efficient.h"
 
 #define ERRORCHECK 1
+#define CACHE_FIRST_ITERATION 1
+#define DEPTH_OF_FIELD 0
+#define SORT_BY_MATERIAL 1
 
 #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
 #define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__)
@@ -44,6 +52,55 @@ thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int de
     return thrust::default_random_engine(h);
 }
 
+__host__ __device__
+void concentricSampleDisk(float* newX, float* newY, thrust::default_random_engine &rng)
+{
+  // get the sample
+  thrust::uniform_real_distribution<float> u01(0.0f, 1.0f);
+  float x = u01(rng);
+  float y = u01(rng);
+
+  // remap to -1 to 1
+  float xOffset = 2.f * x - 1.f;
+  float yOffset = 2.f * y - 1.f;
+
+  if (xOffset == 0 && yOffset == 0)
+  {
+    *newX = xOffset;
+    *newY = yOffset;
+  }
+
+  float theta, r;
+  if (std::abs(xOffset) > std::abs(yOffset))
+  {
+    r = xOffset;
+    theta = (PI / 4.f) * (yOffset / xOffset);
+  }
+  else 
+  {
+    r = yOffset;
+    theta = (PI / 2.f) - ((PI / 4.f) * (xOffset / yOffset));
+  }
+
+  *newX = r * std::cos(theta);
+  *newY = r * std::sin(theta);
+}
+
+__host__ __device__
+void modifyRayForDepthofField(Ray* ray, float aperture, float focalDist, thrust::default_random_engine &rng)
+{
+  float lensX, lensY;
+
+  concentricSampleDisk(&lensX, &lensY, rng);
+  lensX *= aperture;
+  lensY *= aperture;
+  
+  float ft = focalDist / fabs(ray->direction.z);
+  glm::vec3 pFocus = getPointOnRay((*ray), ft);
+  ray->origin += glm::vec3(lensX, lensY, 0.0f);
+  ray->direction = glm::normalize(pFocus - ray->origin);
+}
+
 //Kernel that writes the image to the OpenGL PBO directly.
 __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution,
         int iter, glm::vec3* image) {
@@ -70,9 +127,13 @@ __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution,
 static Scene * hst_scene = NULL;
 static glm::vec3 * dev_image = NULL;
 static Geom * dev_geoms = NULL;
+static Triangle * dev_triangles = NULL;
 static Material * dev_materials = NULL;
 static PathSegment * dev_paths = NULL;
+static PathSegment * dev_paths_first_iter_cache = NULL;
 static ShadeableIntersection * dev_intersections = NULL;
+static PathSegment ** dev_paths_ptrs = NULL;
+static int * dev_material_ids = NULL;
 // TODO: static variables for device memory, any extra info you need, etc
 // ...
 
@@ -85,9 +146,14 @@ void pathtraceInit(Scene *scene) {
     cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3));
 
   	cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment));
+    cudaMalloc(&dev_paths_ptrs, pixelcount * sizeof(PathSegment*));
+    if (CACHE_FIRST_ITERATION) { cudaMalloc(&dev_paths_first_iter_cache, pixelcount * sizeof(PathSegment)); }
 
-  	cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom));
-  	cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice);
+    cudaMalloc(&dev_triangles, scene->triangles.size() * sizeof(Triangle));
+    cudaMemcpy(dev_triangles, scene->triangles.data(), scene->triangles.size() * sizeof(Triangle), cudaMemcpyHostToDevice);
+
+    cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom));
+    cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice);
 
   	cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material));
   	cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice);
@@ -95,6 +161,8 @@ void pathtraceInit(Scene *scene) {
   	cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection));
   	cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
 
+  	cudaMalloc(&dev_material_ids, pixelcount * sizeof(int));
+
     // TODO: initialize any extra device memeory you need
 
     checkCUDAError("pathtraceInit");
@@ -105,7 +173,12 @@ void pathtraceFree() {
   	cudaFree(dev_paths);
   	cudaFree(dev_geoms);
   	cudaFree(dev_materials);
+    cudaFree(dev_triangles);
   	cudaFree(dev_intersections);
+    cudaFree(dev_paths_ptrs);
+    if (CACHE_FIRST_ITERATION) { cudaFree(dev_paths_first_iter_cache); }
+    cudaFree(dev_material_ids);
+
     // TODO: clean up any extra device memory you created
 
     checkCUDAError("pathtraceFree");
@@ -128,38 +201,40 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path
 		int index = x + (y * cam.resolution.x);
 		PathSegment & segment = pathSegments[index];
 
-		segment.ray.origin = cam.position;
+    segment.ray.origin = cam.position;
     segment.color = glm::vec3(1.0f, 1.0f, 1.0f);
 
-		// TODO: implement antialiasing by jittering the ray
 		segment.ray.direction = glm::normalize(cam.view
 			- cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f)
 			- cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f)
 			);
 
+    thrust::default_random_engine rng = makeSeededRandomEngine(iter, index, traceDepth);
+
+    if (DEPTH_OF_FIELD) { modifyRayForDepthofField(&segment.ray, 0.5, 10.5, rng); }
 		segment.pixelIndex = index;
 		segment.remainingBounces = traceDepth;
 	}
 }
 
-// TODO:
 // computeIntersections handles generating ray intersections ONLY.
 // Generating new rays is handled in your shader(s).
 // Feel free to modify the code below.
 __global__ void computeIntersections(
 	int depth
 	, int num_paths
-	, PathSegment * pathSegments
+	, PathSegment ** pathSegments
 	, Geom * geoms
 	, int geoms_size
 	, ShadeableIntersection * intersections
+  , Triangle * triangles
 	)
 {
 	int path_index = blockIdx.x * blockDim.x + threadIdx.x;
 
 	if (path_index < num_paths)
 	{
-		PathSegment pathSegment = pathSegments[path_index];
+		PathSegment* pathSegment_ptr = pathSegments[path_index];
 
 		float t;
 		glm::vec3 intersect_point;
@@ -176,38 +251,69 @@ __global__ void computeIntersections(
 		for (int i = 0; i < geoms_size; i++)
 		{
 			Geom & geom = geoms[i];
-
 			if (geom.type == CUBE)
 			{
-				t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
+				t = boxIntersectionTest(geom, pathSegment_ptr->ray, tmp_intersect, tmp_normal, outside);
 			}
 			else if (geom.type == SPHERE)
 			{
-				t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
+				t = sphereIntersectionTest(geom, pathSegment_ptr->ray, tmp_intersect, tmp_normal, outside);
 			}
+      else if (geom.type == SWORD)
+      {
+ //       float temp_t = boxIntersectionTest(geom, pathSegment_ptr->ray, tmp_intersect, tmp_normal, outside);
+ //       if (temp_t > 0.0f && t_min > temp_t && outside)
+//        {
+          glm::vec3 baryPosition;
+          for (int j = geom.startTriangleIndex; j < geom.endTriangleIndex; ++j)
+          {
+            glm::vec3 ro = multiplyMV(geom.inverseTransform, glm::vec4(pathSegment_ptr->ray.origin, 1.0f));
+            glm::vec3 rd = glm::normalize(multiplyMV(geom.inverseTransform, glm::vec4(pathSegment_ptr->ray.direction, 0.0f)));
+
+            if (glm::intersectRayTriangle(ro,
+              rd,
+              triangles[j].v1,
+              triangles[j].v2,
+              triangles[j].v3,
+              baryPosition))
+            {
+              t = baryPosition.z;
+              glm::vec3 objspaceIntersection = getPointOnRay(pathSegment_ptr->ray, t);
+
+              tmp_intersect = multiplyMV(geom.transform, glm::vec4(objspaceIntersection, 1.f));
+
+              tmp_normal = triangles[j].n;
+              break;
+            }
+          }
+ //       }
+      }
 			// TODO: add more intersection tests here... triangle? metaball? CSG?
 
 			// Compute the minimum t from the intersection tests to determine what
 			// scene geometry object was hit first.
 			if (t > 0.0f && t_min > t)
 			{
-				t_min = t;
-				hit_geom_index = i;
-				intersect_point = tmp_intersect;
-				normal = tmp_normal;
+        if (outside)
+        {
+          t_min = t;
+          hit_geom_index = i;
+          intersect_point = tmp_intersect;
+          normal = tmp_normal;
+        }
 			}
 		}
 
 		if (hit_geom_index == -1)
 		{
-			intersections[path_index].t = -1.0f;
+			intersections[pathSegment_ptr->pixelIndex].t = -1.0f;
 		}
 		else
 		{
 			//The ray hits something
-			intersections[path_index].t = t_min;
-			intersections[path_index].materialId = geoms[hit_geom_index].materialid;
-			intersections[path_index].surfaceNormal = normal;
+			intersections[pathSegment_ptr->pixelIndex].t = t_min;
+			intersections[pathSegment_ptr->pixelIndex].materialId = geoms[hit_geom_index].materialid;
+			intersections[pathSegment_ptr->pixelIndex].surfaceNormal = normal;
 		}
 	}
 }
@@ -223,21 +329,23 @@ __global__ void computeIntersections(
 // bump mapping.
 __global__ void shadeFakeMaterial (
   int iter
+  , int depth
   , int num_paths
 	, ShadeableIntersection * shadeableIntersections
-	, PathSegment * pathSegments
+	, PathSegment ** pathSegments
 	, Material * materials
 	)
 {
   int idx = blockIdx.x * blockDim.x + threadIdx.x;
   if (idx < num_paths)
   {
-    ShadeableIntersection intersection = shadeableIntersections[idx];
-    if (intersection.t > 0.0f) { // if the intersection exists...
+    ShadeableIntersection intersection = shadeableIntersections[pathSegments[idx]->pixelIndex];
+    if (intersection.t > 0.0f && pathSegments[idx]->remainingBounces > 0) { // if the intersection exists...
       // Set up the RNG
       // LOOK: this is how you use thrust's RNG! Please look at
       // makeSeededRandomEngine as well.
-      thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0);
+//      long ms = std::chrono::system_clock::now().time_since_epoch().count;
+      thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, depth);
       thrust::uniform_real_distribution<float> u01(0, 1);
 
       Material material = materials[intersection.materialId];
@@ -245,22 +353,74 @@ __global__ void shadeFakeMaterial (
 
       // If the material indicates that the object was a light, "light" the ray
       if (material.emittance > 0.0f) {
-        pathSegments[idx].color *= (materialColor * material.emittance);
+        pathSegments[idx]->color *= (materialColor * material.emittance);
+        pathSegments[idx]->remainingBounces = 0;
       }
       // Otherwise, do some pseudo-lighting computation. This is actually more
       // like what you would expect from shading in a rasterizer like OpenGL.
       // TODO: replace this! you should be able to start with basically a one-liner
       else {
         float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f));
-        pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f;
-        pathSegments[idx].color *= u01(rng); // apply some noise because why not
+        pathSegments[idx]->color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f;
+        pathSegments[idx]->color *= u01(rng); // apply some noise because why not
+        pathSegments[idx]->remainingBounces--;
       }
     // If there was no intersection, color the ray black.
     // Lots of renderers use 4 channel color, RGBA, where A = alpha, often
     // used for opacity, in which case they can indicate "no opacity".
     // This can be useful for post-processing and image compositing.
     } else {
-      pathSegments[idx].color = glm::vec3(0.0f);
+      pathSegments[idx]->color = glm::vec3(0.0f);
+      pathSegments[idx]->remainingBounces = 0;
+    }
+  }
+}
+
+
+__global__ void shadeRealMaterial (
+  int iter
+  , int depth
+  , int num_paths
+	, ShadeableIntersection * shadeableIntersections
+	, PathSegment ** pathSegments
+	, Material * materials
+	)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx < num_paths)
+  {
+    ShadeableIntersection intersection = shadeableIntersections[pathSegments[idx]->pixelIndex];
+    
+    if (intersection.t > 0.0f && pathSegments[idx]->remainingBounces > 0) { // if the intersection exists...
+      // Set up the RNG
+      // LOOK: this is how you use thrust's RNG! Please look at
+      // makeSeededRandomEngine as well.
+      thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, depth);
+      thrust::uniform_real_distribution<float> u01(0.0f, 1.0f);
+
+      Material material = materials[intersection.materialId];
+      glm::vec3 materialColor = material.color;
+
+      // If the material indicates that the object was a light, "light" the ray
+      if (material.emittance > 0.0f) {
+        pathSegments[idx]->color *= (materialColor * material.emittance);
+        pathSegments[idx]->remainingBounces = 0;
+      }
+      // Otherwise, do some pseudo-lighting computation. This is actually more
+      // like what you would expect from shading in a rasterizer like OpenGL.
+      // TODO: replace this! you should be able to start with basically a one-liner
+      else {
+        glm::vec3 intersectionPoint = getPointOnRay(pathSegments[idx]->ray, intersection.t);
+        scatterRay(pathSegments[idx], intersection.t, intersectionPoint, intersection.surfaceNormal, material, rng);
+        pathSegments[idx]->remainingBounces--;
+      }
+    // If there was no intersection, color the ray black.
+    // Lots of renderers use 4 channel color, RGBA, where A = alpha, often
+    // used for opacity, in which case they can indicate "no opacity".
+    // This can be useful for post-processing and image compositing.
+    } else {
+      pathSegments[idx]->color = glm::vec3(0.0f);
+      pathSegments[idx]->remainingBounces = 0;
     }
   }
 }
@@ -277,6 +437,34 @@ __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterati
 	}
 }
 
+// very simple function to get pointers to all the paths to prevent having to copy so much during stream compaction
+__global__ void getPointersToPaths(int nPaths, PathSegment** dev_paths_ptrs, PathSegment* dev_paths)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx < nPaths)
+  {
+    dev_paths_ptrs[idx] = &(dev_paths[idx]);
+  }
+}
+
+
+// very simple kernel to set up our thrust sort
+__global__ void getMaterialIDArray(int nPaths, int* dev_materialIDs, 
+  ShadeableIntersection* dev_intersections, PathSegment** dev_paths_ptrs)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx < nPaths)
+  {
+    dev_materialIDs[idx] = dev_intersections[dev_paths_ptrs[idx]->pixelIndex].materialId;
+  }
+}
+
+using StreamCompaction::Common::PerformanceTimer;
+PerformanceTimer& timer()
+{
+  static PerformanceTimer timer;
+  return timer;
+}
 /**
  * Wrapper for the __global__ call that sets up the kernel calls and does a ton
  * of memory management
@@ -295,6 +483,10 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
 	// 1D block for path tracing
 	const int blockSize1d = 128;
 
+  float rayGenerateTime;
+  float computeIntersectionsTime;
+  float shadingTime;
+  float streamCompactionTime;
     ///////////////////////////////////////////////////////////////////////////
 
     // Recap:
@@ -323,11 +515,25 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
     //     since some shaders you write may also cause a path to terminate.
     // * Finally, add this iteration's results to the image. This has been done
     //   for you.
+  timer().startGpuTimer();
+  if (CACHE_FIRST_ITERATION)
+  {
+    // save the very first iteration into the other buffer
+    if (iter == 1)
+    {
+      generateRayFromCamera << <blocksPerGrid2d, blockSize2d >> > (cam, iter, traceDepth, dev_paths_first_iter_cache);
+      checkCUDAError("generate camera ray");
+    }
 
-    // TODO: perform one iteration of path tracing
-
-	generateRayFromCamera <<<blocksPerGrid2d, blockSize2d >>>(cam, iter, traceDepth, dev_paths);
-	checkCUDAError("generate camera ray");
+    // memcpy the cache buffer into the dev_paths buffer
+    cudaMemcpy(dev_paths, dev_paths_first_iter_cache, pixelcount * sizeof(PathSegment), cudaMemcpyDeviceToDevice);
+  }
+  else
+  {
+    // if we aren't caching then just generate rays into dev_paths always
+    generateRayFromCamera << <blocksPerGrid2d, blockSize2d >> > (cam, iter, traceDepth, dev_paths);
+    checkCUDAError("generate camera ray");
+  }
 
 	int depth = 0;
 	PathSegment* dev_path_end = dev_paths + pixelcount;
@@ -335,59 +541,75 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
 
 	// --- PathSegment Tracing Stage ---
 	// Shoot ray into scene, bounce between objects, push shading chunks
+  dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
+  getPointersToPaths << <numblocksPathSegmentTracing, blockSize1d >> > (num_paths, dev_paths_ptrs, dev_paths);
+
+  timer().endGpuTimer();
 
+  rayGenerateTime =  timer().getGpuElapsedTimeForPreviousOperation();
   bool iterationComplete = false;
 	while (!iterationComplete) {
+    timer().startGpuTimer();
+    // clean shading chunks
+    cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
+
+    // tracing
+    numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
+    computeIntersections << <numblocksPathSegmentTracing, blockSize1d >> > (depth, num_paths, dev_paths_ptrs, dev_geoms, hst_scene->geoms.size(), dev_intersections, dev_triangles);
+    checkCUDAError("trace one bounce");
+    cudaDeviceSynchronize();
+    depth++;
+    timer().endGpuTimer();
+
+    computeIntersectionsTime += timer().getGpuElapsedTimeForPreviousOperation();
+    /* 
+    --- Shading Stage ---
+    Shade path segments based on intersections and generate new rays by evaluating the BSDF.
+    Start off with just a big kernel that handles all the different materials you have in the scenefile. 
+    */
+    timer().startGpuTimer();
+
+    if (SORT_BY_MATERIAL)
+    {
+      thrust::device_ptr<int> dev_materialIDs_thrust(dev_material_ids);
+      thrust::device_ptr<PathSegment*> dev_paths_thrust(dev_paths_ptrs);
+      getMaterialIDArray << <numblocksPathSegmentTracing, blockSize1d >> > (num_paths, dev_material_ids, dev_intersections, dev_paths_ptrs);
+      thrust::sort_by_key(dev_materialIDs_thrust, dev_materialIDs_thrust + num_paths, dev_paths_thrust);
+    }
 
-	// clean shading chunks
-	cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
-
-	// tracing
-	dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
-	computeIntersections <<<numblocksPathSegmentTracing, blockSize1d>>> (
-		depth
-		, num_paths
-		, dev_paths
-		, dev_geoms
-		, hst_scene->geoms.size()
-		, dev_intersections
-		);
-	checkCUDAError("trace one bounce");
-	cudaDeviceSynchronize();
-	depth++;
-
-
-	// TODO:
-	// --- Shading Stage ---
-	// Shade path segments based on intersections and generate new rays by
-  // evaluating the BSDF.
-  // Start off with just a big kernel that handles all the different
-  // materials you have in the scenefile.
-  // TODO: compare between directly shading the path segments and shading
-  // path segments that have been reshuffled to be contiguous in memory.
-
-  shadeFakeMaterial<<<numblocksPathSegmentTracing, blockSize1d>>> (
-    iter,
-    num_paths,
-    dev_intersections,
-    dev_paths,
-    dev_materials
-  );
-  iterationComplete = true; // TODO: should be based off stream compaction results.
-	}
+    shadeRealMaterial << <numblocksPathSegmentTracing, blockSize1d >> > (iter, depth, num_paths, dev_intersections, dev_paths_ptrs, dev_materials);
+    checkCUDAError("shade real material");
+    timer().endGpuTimer();
 
+    shadingTime += timer().getGpuElapsedTimeForPreviousOperation();
+    timer().startGpuTimer();
+
+    // now we call the stream compaction
+    num_paths = StreamCompaction::Efficient::compact(num_paths, dev_paths_ptrs, dev_paths_ptrs);
+    checkCUDAError("Stream Compaction");
+
+    if (num_paths <= 0)
+    {
+      iterationComplete = true;
+    }
+    timer().endGpuTimer();
+
+    streamCompactionTime += timer().getGpuElapsedTimeForPreviousOperation();
+ 	}
+
+  std::cout << rayGenerateTime << ";" << computeIntersectionsTime << ";" << shadingTime << ";" << streamCompactionTime << std::endl;
+
+  //while (true);
   // Assemble this iteration and apply it to the image
   dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d;
-	finalGather<<<numBlocksPixels, blockSize1d>>>(num_paths, dev_image, dev_paths);
-
-    ///////////////////////////////////////////////////////////////////////////
+	finalGather<<<numBlocksPixels, blockSize1d>>>(pixelcount, dev_image, dev_paths);
 
-    // Send results to OpenGL buffer for rendering
-    sendImageToPBO<<<blocksPerGrid2d, blockSize2d>>>(pbo, cam.resolution, iter, dev_image);
+  ///////////////////////////////////////////////////////////////////////////
 
-    // Retrieve image from GPU
-    cudaMemcpy(hst_scene->state.image.data(), dev_image,
-            pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost);
+  // Send results to OpenGL buffer for rendering
+  sendImageToPBO << <blocksPerGrid2d, blockSize2d >> > (pbo, cam.resolution, iter, dev_image);
 
-    checkCUDAError("pathtrace");
+  // Retrieve image from GPU
+  cudaMemcpy(hst_scene->state.image.data(), dev_image, pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost);
+  checkCUDAError("pathtrace");
 }
diff --git a/src/scene.cpp b/src/scene.cpp
index cbae043..7ae44e1 100644
--- a/src/scene.cpp
+++ b/src/scene.cpp
@@ -1,5 +1,6 @@
 #include <iostream>
 #include "scene.h"
+#include "objLoader.h"
 #include <cstring>
 #include <glm/gtc/matrix_inverse.hpp>
 #include <glm/gtx/string_cast.hpp>
@@ -51,6 +52,10 @@ int Scene::loadGeom(string objectid) {
             } else if (strcmp(line.c_str(), "cube") == 0) {
                 cout << "Creating new cube..." << endl;
                 newGeom.type = CUBE;
+            } else if (strcmp(line.c_str(), "sword") == 0) {
+                cout << "Creating new sword..." << endl;
+                newGeom.type = SWORD;
+                loadObj("../scenes/sword2.obj", newGeom.startTriangleIndex, newGeom.endTriangleIndex, &triangles);
             }
         }
 
diff --git a/src/scene.h b/src/scene.h
index f29a917..e33cf99 100644
--- a/src/scene.h
+++ b/src/scene.h
@@ -22,5 +22,6 @@ class Scene {
 
     std::vector<Geom> geoms;
     std::vector<Material> materials;
+    std::vector<Triangle> triangles;
     RenderState state;
 };
diff --git a/src/sceneStructs.h b/src/sceneStructs.h
index b38b820..937af2b 100644
--- a/src/sceneStructs.h
+++ b/src/sceneStructs.h
@@ -10,6 +10,7 @@
 enum GeomType {
     SPHERE,
     CUBE,
+    SWORD,
 };
 
 struct Ray {
@@ -17,6 +18,13 @@ struct Ray {
     glm::vec3 direction;
 };
 
+struct Triangle {
+  glm::vec3 v1;
+  glm::vec3 v2;
+  glm::vec3 v3;
+  glm::vec3 n;
+};
+
 struct Geom {
     enum GeomType type;
     int materialid;
@@ -26,6 +34,8 @@ struct Geom {
     glm::mat4 transform;
     glm::mat4 inverseTransform;
     glm::mat4 invTranspose;
+    int startTriangleIndex;
+    int endTriangleIndex;
 };
 
 struct Material {
diff --git a/stream_compaction/CMakeLists.txt b/stream_compaction/CMakeLists.txt
index ac358c9..4bb0dc2 100644
--- a/stream_compaction/CMakeLists.txt
+++ b/stream_compaction/CMakeLists.txt
@@ -1,7 +1,17 @@
 set(SOURCE_FILES
+    "common.h"
+    "common.cu"
+    "cpu.h"
+    "cpu.cu"
+    "naive.h"
+    "naive.cu"
+    "efficient.h"
+    "efficient.cu"
+    "thrust.h"
+    "thrust.cu"
     )
 
 cuda_add_library(stream_compaction
     ${SOURCE_FILES}
-    OPTIONS -arch=sm_20
+    OPTIONS -arch=sm_61
     )
diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu
new file mode 100644
index 0000000..dded90f
--- /dev/null
+++ b/stream_compaction/common.cu
@@ -0,0 +1,57 @@
+#include "common.h"
+
+void checkCUDAErrorFn_SC(const char *msg, const char *file, int line) {
+    cudaError_t err = cudaGetLastError();
+    if (cudaSuccess == err) {
+        return;
+    }
+
+    fprintf(stderr, "CUDA error");
+    if (file) {
+        fprintf(stderr, " (%s:%d)", file, line);
+    }
+    fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err));
+    while (true);
+    exit(EXIT_FAILURE);
+}
+
+
+namespace StreamCompaction {
+    namespace Common {
+
+        /**
+         * Maps an array to an array of 0s and 1s for stream compaction. Elements
+         * which map to 0 will be removed, and elements which map to 1 will be kept.
+         */
+        __global__ void kernMapToBoolean(int n, int paddedN, int *bools, PathSegment **idata) {
+          // get index first and reject if greater than paddedN
+          int index = threadIdx.x + (blockIdx.x * blockDim.x);
+          if (index >= paddedN)
+          {
+            return;
+          }
+ 
+          // determine if you're a boolean (if you're in the part that's just padded on, give yourself a 0)
+          bools[index] = (index < n && idata[index]->remainingBounces > 0 ) ? 1 : 0;
+        }
+
+        /**
+         * Performs scatter on an array. That is, for each element in idata,
+         * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]].
+         */
+        __global__ void kernScatter(int n, PathSegment **odata, PathSegment **idata, const int *bools, const int *indices) {
+          
+          // get index first
+          int index = threadIdx.x + (blockIdx.x * blockDim.x);
+          if (index >= n)
+          {
+            return;
+          }
+
+          if (bools[index])
+          {
+            odata[indices[index]] = idata[index];
+          }
+        }
+    }
+}
diff --git a/stream_compaction/common.h b/stream_compaction/common.h
new file mode 100644
index 0000000..87865d0
--- /dev/null
+++ b/stream_compaction/common.h
@@ -0,0 +1,132 @@
+#pragma once
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <cstdio>
+#include <cstring>
+#include <cmath>
+#include <algorithm>
+#include <chrono>
+#include <stdexcept>
+#include "../src/sceneStructs.h"
+
+#define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
+#define checkCUDAError_SC(msg) checkCUDAErrorFn_SC(msg, FILENAME, __LINE__)
+
+/**
+ * Check for CUDA errors; print and exit if there was a problem.
+ */
+void checkCUDAErrorFn_SC(const char *msg, const char *file = NULL, int line = -1);
+
+inline int ilog2(int x) {
+    int lg = 0;
+    while (x >>= 1) {
+        ++lg;
+    }
+    return lg;
+}
+
+inline int ilog2ceil(int x) {
+    return x == 1 ? 0 : ilog2(x - 1) + 1;
+}
+
+namespace StreamCompaction {
+    namespace Common {
+      __global__ void kernMapToBoolean(int n, int paddedN, int *bools, PathSegment **idata);
+
+      __global__ void kernScatter(int n, PathSegment **odata, PathSegment **idata, const int *bools, const int *indices);
+
+	    /**
+	    * This class is used for timing the performance
+	    * Uncopyable and unmovable
+        *
+        * Adapted from WindyDarian(https://github.com/WindyDarian)
+	    */
+	    class PerformanceTimer
+	    {
+	    public:
+		    PerformanceTimer()
+		    {
+			    cudaEventCreate(&event_start);
+			    cudaEventCreate(&event_end);
+		    }
+
+		    ~PerformanceTimer()
+		    {
+			    cudaEventDestroy(event_start);
+			    cudaEventDestroy(event_end);
+		    }
+
+		    void startCpuTimer()
+		    {
+			    if (cpu_timer_started) { throw std::runtime_error("CPU timer already started"); }
+			    cpu_timer_started = true;
+
+			    time_start_cpu = std::chrono::high_resolution_clock::now();
+		    }
+
+		    void endCpuTimer()
+		    {
+			    time_end_cpu = std::chrono::high_resolution_clock::now();
+
+			    if (!cpu_timer_started) { throw std::runtime_error("CPU timer not started"); }
+
+			    std::chrono::duration<double, std::milli> duro = time_end_cpu - time_start_cpu;
+			    prev_elapsed_time_cpu_milliseconds =
+				    static_cast<decltype(prev_elapsed_time_cpu_milliseconds)>(duro.count());
+
+			    cpu_timer_started = false;
+		    }
+
+		    void startGpuTimer()
+		    {
+			    if (gpu_timer_started) { throw std::runtime_error("GPU timer already started"); }
+			    gpu_timer_started = true;
+
+			    cudaEventRecord(event_start);
+		    }
+
+		    void endGpuTimer()
+		    {
+			    cudaEventRecord(event_end);
+			    cudaEventSynchronize(event_end);
+
+			    if (!gpu_timer_started) { throw std::runtime_error("GPU timer not started"); }
+
+			    cudaEventElapsedTime(&prev_elapsed_time_gpu_milliseconds, event_start, event_end);
+			    gpu_timer_started = false;
+		    }
+
+		    float getCpuElapsedTimeForPreviousOperation() //noexcept //(damn I need VS 2015
+		    {
+			    return prev_elapsed_time_cpu_milliseconds;
+		    }
+
+		    float getGpuElapsedTimeForPreviousOperation() //noexcept
+		    {
+			    return prev_elapsed_time_gpu_milliseconds;
+		    }
+
+		    // remove copy and move functions
+		    PerformanceTimer(const PerformanceTimer&) = delete;
+		    PerformanceTimer(PerformanceTimer&&) = delete;
+		    PerformanceTimer& operator=(const PerformanceTimer&) = delete;
+		    PerformanceTimer& operator=(PerformanceTimer&&) = delete;
+
+	    private:
+		    cudaEvent_t event_start = nullptr;
+		    cudaEvent_t event_end = nullptr;
+
+		    using time_point_t = std::chrono::high_resolution_clock::time_point;
+		    time_point_t time_start_cpu;
+		    time_point_t time_end_cpu;
+
+		    bool cpu_timer_started = false;
+		    bool gpu_timer_started = false;
+
+		    float prev_elapsed_time_cpu_milliseconds = 0.f;
+		    float prev_elapsed_time_gpu_milliseconds = 0.f;
+	    };
+    }
+}
diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu
new file mode 100644
index 0000000..298b63c
--- /dev/null
+++ b/stream_compaction/cpu.cu
@@ -0,0 +1,91 @@
+#include <cstdio>
+#include "cpu.h"
+
+#include "common.h"
+
+namespace StreamCompaction {
+    namespace CPU {
+        using StreamCompaction::Common::PerformanceTimer;
+        PerformanceTimer& timer()
+        {
+	        static PerformanceTimer timer;
+	        return timer;
+        }
+
+        /**
+         * CPU scan (prefix sum).
+         * For performance analysis, this is supposed to be a simple for loop.
+         * (Optional) For better understanding before starting moving to GPU, you can simulate your GPU scan in this function first.
+         */
+        void scan(int n, int *odata, const int *idata) {
+	        timer().startCpuTimer();
+          
+          int sum = 0;
+          for (int i = 0; i < n; ++i)
+          {
+            odata[i] = sum;
+            sum += idata[i];
+          }
+
+	        timer().endCpuTimer();
+        }
+
+        /**
+         * CPU stream compaction without using the scan function.
+         *
+         * @returns the number of elements remaining after compaction.
+         */
+        int compactWithoutScan(int n, int *odata, const int *idata) {
+	        timer().startCpuTimer();
+          
+          int index = 0;
+          for (int i = 0; i < n; ++i)
+          {
+            // if the data meets the condition put it in
+            if (idata[i])
+            {
+              odata[index] = idata[i];
+              ++index;
+            }
+          }
+
+	        timer().endCpuTimer();
+          return index;
+        }
+
+        /**
+         * CPU stream compaction using scan and scatter, like the parallel version.
+         *
+         * @returns the number of elements remaining after compaction.
+         */
+        int compactWithScan(int n, int *odata, const int *idata) {
+          int* scanned = (int*) malloc(sizeof(int) * n);
+
+          timer().startCpuTimer();
+
+          int sum = 0;
+          for (int i = 0; i < n; ++i)
+          {
+            scanned[i] = sum;
+            if (idata[i]) 
+            {
+              ++sum;
+            }
+          }
+
+          // now scatter
+          for (int j = 0; j < n; j++)
+          {
+            if (idata[j])
+            {
+              odata[scanned[j]] = idata[j];
+            }
+          }
+
+	        timer().endCpuTimer();
+
+          free(scanned);
+          return sum;
+        }
+    }
+}
diff --git a/stream_compaction/cpu.h b/stream_compaction/cpu.h
new file mode 100644
index 0000000..236ce11
--- /dev/null
+++ b/stream_compaction/cpu.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "common.h"
+
+namespace StreamCompaction {
+    namespace CPU {
+	    StreamCompaction::Common::PerformanceTimer& timer();
+
+        void scan(int n, int *odata, const int *idata);
+
+        int compactWithoutScan(int n, int *odata, const int *idata);
+
+        int compactWithScan(int n, int *odata, const int *idata);
+    }
+}
diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu
new file mode 100644
index 0000000..ea614d6
--- /dev/null
+++ b/stream_compaction/efficient.cu
@@ -0,0 +1,156 @@
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include "common.h"
+#include "efficient.h"
+
+#define blockSize 128
+
+int* dev_efficientScanBuf;
+int* dev_efficientBools;
+int* dev_efficientIndices;
+PathSegment** dev_odata_buffer;
+
+__global__ void kernEfficientScanUpSweep(int n, int d, int* odata, int* idata)
+{
+  // get index first
+  int index = threadIdx.x + (blockIdx.x * blockDim.x);
+  int twoToPowDPlusOne = 1 << (d + 1);
+  if (index >= n || index % twoToPowDPlusOne != 0)
+  {
+    return;
+  }
+  
+  int twoToPowD = 1 << d;
+
+  // then add the two numbers and put them into the global output buffer
+  odata[index + twoToPowDPlusOne - 1] = idata[index + twoToPowDPlusOne - 1] + idata[index + twoToPowD - 1];
+}
+
+__global__ void kernSetFirstElementZero(int n, int* odata)
+{
+  odata[n - 1] = 0;
+}
+
+__global__ void kernEfficientScanDownSweep(int n, int d, int* odata, int* idata)
+{
+  // get index first
+  int index = threadIdx.x + (blockIdx.x * blockDim.x);
+  int twoToPowDPlusOne = 1 << (d + 1);
+  if (index >= n || (index % twoToPowDPlusOne != 0))
+  {
+    return;
+  }
+  
+  int twoToPowD = 1 << d;
+
+  // then sweep down
+  odata[index + twoToPowD - 1] = idata[index + twoToPowDPlusOne - 1];
+  odata[index + twoToPowDPlusOne - 1] = idata[index + twoToPowDPlusOne - 1] + idata[index + twoToPowD - 1];
+}
+
+namespace StreamCompaction {
+    namespace Efficient {
+        using StreamCompaction::Common::PerformanceTimer;
+        PerformanceTimer& timer()
+        {
+            static PerformanceTimer timer;
+            return timer;
+        }
+
+        /**
+         * Performs stream compaction on idata, storing the result into odata.
+         * All zeroes are discarded.
+         *
+         * @param n      The number of elements in idata.
+         * @param odata  The array into which to store elements.
+         * @param idata  The array of elements to compact.
+         * @returns      The number of elements remaining after compaction.
+         */
+        int compact(int n, PathSegment **dev_odata, PathSegment **dev_idata) {
+            int nNextHighestPowTwo = 1 << ilog2ceil(n);
+
+            cudaMalloc((void**)&dev_efficientBools, nNextHighestPowTwo * sizeof(int));
+            checkCUDAError_SC("cudaMalloc bool buf failed");
+
+            cudaMalloc((void**)&dev_efficientScanBuf, nNextHighestPowTwo * sizeof(int));
+            checkCUDAError_SC("cudaMalloc buf failed");
+
+            cudaMalloc((void**)&dev_efficientIndices, nNextHighestPowTwo * sizeof(int));
+            checkCUDAError_SC("cudaMalloc indices failed");
+            
+            cudaMalloc((void***)&dev_odata_buffer, nNextHighestPowTwo * sizeof(PathSegment*));
+            checkCUDAError_SC("cudaMalloc indices failed");
+
+            // map all of the values to booleans (and pad with zeroes for those values higher than original array limit)
+            StreamCompaction::Common::kernMapToBoolean<< <((nNextHighestPowTwo + blockSize - 1) / blockSize), blockSize >> > (n, nNextHighestPowTwo, dev_efficientBools, dev_idata);
+            checkCUDAError_SC("kern map to boolean");
+
+            // Start the scan --------------- (copy pasted from the scan function because you can't nest calls to timer. Plus it saves a copy from device to host)
+
+            // make a copy of the bools so we can do the scan and put it into indices
+            cudaMemcpy((void*)dev_efficientIndices, (const void*)dev_efficientBools, nNextHighestPowTwo * sizeof(int), cudaMemcpyDeviceToDevice);
+            checkCUDAError_SC("cudaMemcpy idata failed");
+
+            // call the upsweep kernel log2n number of times
+            for (int d = 0; d < ilog2ceil(nNextHighestPowTwo); ++d)
+            {
+              // copy all the data to make sure everythings in place
+              cudaMemcpy((void*)dev_efficientScanBuf, (const void*)dev_efficientIndices, nNextHighestPowTwo * sizeof(int), cudaMemcpyDeviceToDevice);
+              checkCUDAError_SC("cudaMemcpy idata failed");
+              
+              // call the kernel
+              kernEfficientScanUpSweep << <((nNextHighestPowTwo + blockSize - 1) / blockSize), blockSize >> > (nNextHighestPowTwo, d, dev_efficientScanBuf, dev_efficientIndices);
+              checkCUDAError_SC("Scan up sweep");
+
+              // flip flop the buffers so that idata is always the most recent data
+              int* temp = dev_efficientScanBuf;
+              dev_efficientScanBuf = dev_efficientIndices;
+              dev_efficientIndices = temp;
+            }
+            
+            // set first element to be zero in a new kernel (unsure how to do this otherwise)
+            kernSetFirstElementZero << <1, 1 >> > (nNextHighestPowTwo, dev_efficientIndices);
+            checkCUDAError_SC("set first element zero failed");
+
+            // now call the downsweep kernel log2n times
+            for (int d = (ilog2ceil(nNextHighestPowTwo) - 1); d >= 0; --d)
+            {
+              // copy all the data to make sure everythings in place
+              cudaMemcpy((void*)dev_efficientScanBuf, (const void*)dev_efficientIndices, nNextHighestPowTwo * sizeof(int), cudaMemcpyDeviceToDevice);
+              checkCUDAError_SC("cudaMemcpy idata failed");
+              
+              // call the kernel
+              kernEfficientScanDownSweep << <((nNextHighestPowTwo + blockSize - 1) / blockSize), blockSize >> > (nNextHighestPowTwo, d, dev_efficientScanBuf, dev_efficientIndices);
+              checkCUDAError_SC("Scan downsweep");
+
+              // flip flop the buffers
+              int* temp = dev_efficientScanBuf;
+              dev_efficientScanBuf = dev_efficientIndices;
+              dev_efficientIndices = temp;
+            }
+
+            // ------- end of scan
+
+            int sizeOfCompactedStream = 0;
+            // memcpy the final value of indices to out so that we can get the total size of compacted stream
+            cudaMemcpy(&sizeOfCompactedStream, dev_efficientIndices + (nNextHighestPowTwo - 1), 1 * sizeof(int), cudaMemcpyDeviceToHost);
+            checkCUDAError_SC("memcpy failed");
+
+            // run the stream compaction
+            StreamCompaction::Common::kernScatter << <((nNextHighestPowTwo + blockSize - 1) / blockSize), blockSize >> > (n, dev_odata_buffer, dev_idata, dev_efficientBools, dev_efficientIndices);
+            checkCUDAError_SC("Scatter failed");
+
+            cudaMemcpy(dev_odata, dev_odata_buffer, sizeOfCompactedStream * sizeof(PathSegment*), cudaMemcpyDeviceToDevice);
+
+            // free all our stuff
+            cudaFree(dev_efficientScanBuf);
+            cudaFree(dev_efficientBools);
+            cudaFree(dev_efficientIndices);
+            cudaFree(dev_odata_buffer);
+
+            // return the total size of the compacted stream
+            return sizeOfCompactedStream;
+        }
+    }
+}
diff --git a/stream_compaction/efficient.h b/stream_compaction/efficient.h
new file mode 100644
index 0000000..097e8a4
--- /dev/null
+++ b/stream_compaction/efficient.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "common.h"
+
+namespace StreamCompaction {
+    namespace Efficient {
+        StreamCompaction::Common::PerformanceTimer& timer();
+
+        int compact(int n, PathSegment **odata, PathSegment **idata);
+    }
+}
diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu
new file mode 100644
index 0000000..b8256ad
--- /dev/null
+++ b/stream_compaction/naive.cu
@@ -0,0 +1,108 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include "common.h"
+#include "naive.h"
+
+#define blockSize 64
+
+int* dev_gpuScanBuf;
+int* dev_idata;
+
+__global__ void kernNaiveScan(int n, int twoToPowerDMinusOne, int* odata, int* idata)
+{
+  // get index first
+  int index = threadIdx.x + (blockIdx.x * blockDim.x);
+  if (index >= n)
+  {
+    return;
+  }
+  
+  // then add the two numbers and put them into the global output buffer
+  if (index >= twoToPowerDMinusOne)
+  {
+    int one = idata[index - twoToPowerDMinusOne];
+    int two = idata[index];
+    int onePlusTwo = one + two;
+    odata[index] = onePlusTwo;
+  }
+  else
+  {
+    odata[index] = idata[index];
+  }
+}
+
+__global__ void kernShiftScan(int n, int* odata, int* idata)
+{
+
+  // if your thread index is 0, insert a 0, otherwise everyone else do their own index - 1 in the data array
+  int index = threadIdx.x + (blockIdx.x * blockDim.x);
+  if (index >= n)
+  {
+    return;
+  }
+
+  if (index == 0)
+  {
+    odata[index] = 0;
+  }
+  else
+  {
+    odata[index] = idata[index - 1];
+  }
+}
+
+namespace StreamCompaction {
+    namespace Naive {
+        using StreamCompaction::Common::PerformanceTimer;
+        PerformanceTimer& timer()
+        {
+            static PerformanceTimer timer;
+            return timer;
+        }
+
+        /**
+         * Performs prefix-sum (aka scan) on idata, storing the result into odata.
+         */
+        void scan(int n, int *odata, const int *idata) {
+
+          dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize);
+
+          int nNextHighestPowTwo = 1 << ilog2ceil(n);
+
+          cudaMalloc((void**)&dev_gpuScanBuf, nNextHighestPowTwo * sizeof(int));
+          checkCUDAError_SC("cudaMalloc buf failed");
+
+          cudaMalloc((void**)&dev_idata, nNextHighestPowTwo * sizeof(int));
+          checkCUDAError_SC("cudaMalloc idata failed");
+
+          timer().startGpuTimer();
+         
+          cudaMemcpy((void*)dev_idata, (const void*)idata, nNextHighestPowTwo * sizeof(int), cudaMemcpyHostToDevice);
+          checkCUDAError_SC("cudaMemcpy idata failed");
+
+          // call the kernel log2n number of times
+          for (int i = 1; i <= ilog2ceil(nNextHighestPowTwo); ++i)
+          {
+            // call the kernel
+            int twoToPowerIMinusOne = 1 << (i - 1);
+            kernNaiveScan<<<((n + blockSize - 1) / blockSize) , blockSize>>>(nNextHighestPowTwo, twoToPowerIMinusOne, dev_gpuScanBuf, dev_idata);
+
+            // flip flop the buffers 
+            int* temp = dev_gpuScanBuf;
+            dev_gpuScanBuf = dev_idata;
+            dev_idata = temp;
+          }
+
+          // shift it and memcpy to out
+          kernShiftScan << <((n + blockSize - 1) / blockSize), blockSize >> > (nNextHighestPowTwo, dev_gpuScanBuf, dev_idata);
+        
+          cudaMemcpy(odata, dev_gpuScanBuf, nNextHighestPowTwo * sizeof(float), cudaMemcpyDeviceToHost);
+
+          timer().endGpuTimer();
+
+          cudaFree(dev_gpuScanBuf);
+          cudaFree(dev_idata);
+
+        }
+    }
+}
diff --git a/stream_compaction/naive.h b/stream_compaction/naive.h
new file mode 100644
index 0000000..bf2d3b5
--- /dev/null
+++ b/stream_compaction/naive.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "common.h"
+#include <iostream>
+
+namespace StreamCompaction {
+    namespace Naive {
+        StreamCompaction::Common::PerformanceTimer& timer();
+
+        void scan(int n, int *odata, const int *idata);
+    }
+}
diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu
new file mode 100644
index 0000000..2694e23
--- /dev/null
+++ b/stream_compaction/thrust.cu
@@ -0,0 +1,34 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#include <thrust/scan.h>
+#include "common.h"
+#include "thrust.h"
+
+
+namespace StreamCompaction {
+    namespace Thrust {
+        using StreamCompaction::Common::PerformanceTimer;
+        PerformanceTimer& timer()
+        {
+            static PerformanceTimer timer;
+            return timer;
+        }
+        /**
+         * Performs prefix-sum (aka scan) on idata, storing the result into odata.
+         */
+        void scan(int n, int *odata, const int *idata) {
+            thrust::device_vector<int> dev_idata(idata, idata + n);
+            thrust::device_vector<int> dev_odata(n);
+
+            timer().startGpuTimer();
+
+            thrust::exclusive_scan(dev_idata.begin(), dev_idata.end(), dev_odata.begin());
+
+            timer().endGpuTimer();
+            
+            thrust::copy(dev_odata.begin(), dev_odata.end(), odata);
+        }
+    }
+}
diff --git a/stream_compaction/thrust.h b/stream_compaction/thrust.h
new file mode 100644
index 0000000..fe98206
--- /dev/null
+++ b/stream_compaction/thrust.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "common.h"
+
+namespace StreamCompaction {
+    namespace Thrust {
+        StreamCompaction::Common::PerformanceTimer& timer();
+
+        void scan(int n, int *odata, const int *idata);
+    }
+}