diff --git a/Analysis.xlsx b/Analysis.xlsx
new file mode 100644
index 0000000..f26873d
Binary files /dev/null and b/Analysis.xlsx differ
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d3d976c..c1171a4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,6 +8,7 @@ set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
 set(EXTERNAL "external")
 include_directories("${EXTERNAL}")
 include_directories("${EXTERNAL}/include")
+include_directories("${EXTERNAL}/include/tinyobjloader")
 if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
     set(EXTERNAL_LIB_PATH "${EXTERNAL}/lib/osx")
 elseif(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
diff --git a/README.md b/README.md
index 110697c..f201b80 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,45 @@
 CUDA Path Tracer
-================
+======================
 
-**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3**
+* Salaar Kohari
+  * LinkedIn ([https://www.linkedin.com/in/salaarkohari](https://www.linkedin.com/in/salaarkohari))
+  * Website ([http://salaar.kohari.com](http://salaar.kohari.com))
+  * University of Pennsylvania, CIS 565: GPU Programming and Architecture
+* Tested on: Windows 10, Intel Xeon @ 3.7GHz 32GB, GTX 1070 8GB (SIG Lab)
 
-* (TODO) YOUR NAME HERE
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+![](img/pathtrace.gif)
 
-### (TODO: Your README)
+### Introduction
+My GPU path tracer produces accurate renders in real-time. The rays are scattered using visually accurate diffuse, reflection, and refraction lighting properties. Techniques such as stream compaction and particular memory allocation help speed up the iteration time. Other features of the path tracer include arbitrary mesh loading and anti-aliasing.
 
-*DO NOT* leave the README to the last minute! It is a crucial part of the
-project, and we will not be able to grade you without a good README.
+Some terms will be important for understanding the analysis. Each ray cast from the camera has a maximum number of **bounces** carrying the light before it terminates. When every pixel's non-deterministic path reaches the maximum bounces or does not collide with anything in the scene, one **iteration** is completed. Performance analysis will focus on number of bounces and average iteration time for various features.
 
+### Algorithm
+1. Initialize array of paths (project a ray from camera through each pixel)
+2. Compute intersection with ray along its path
+3. Stream compaction to remove terminated paths (optional)
+4. Shade rays that intersected something using reflect, refract, or diffuse lighting to multiply with the current color of the ray
+5. Repeat steps 2-4 until max bounces reached or all paths terminated
+6. Add iteration results to the image, repeating steps 1-5 until max iterations reached
+
+### Images
+![Reflective Sphere](img/cornell-reflect.png)
+
+![Refractive Sphere](img/cornell-refract.png)
+
+
+
+![Anti-Aliasing](img/anti-aliasing.png)
+
+### Analysis
+![](img/paths_bounce.png)
+
+As expected, the remaining paths decay with bounces. (Cornell box with reflective sphere)
+
+![](img/iteration_bounce.png)
+
+Using stream compaction seems to slow down iteration time significantly versus anti-aliasing and caching the first bounce intersection for future iterations. Perhaps if stream compaction only took place for the first few bounces, it would provide a speedup for a larger number of bounces. (Cornell box with reflective sphere)
+
+![](img/obj_loader.png)
+
+Using a bounding box test before checking all triangles is actually slower than just checking the triangles for a very low poly cube. This is not the case with a larger mesh. Using the Stanford bunny resulted in about 750ms per iteration without bounding box, while using it could reduce it down to 11.72ms if it is entirely offscreen. (Cornell box with reflective objects)
diff --git a/external/include/tinyobjloader/tiny_obj_loader.h b/external/include/tinyobjloader/tiny_obj_loader.h
new file mode 100644
index 0000000..828cbd8
--- /dev/null
+++ b/external/include/tinyobjloader/tiny_obj_loader.h
@@ -0,0 +1,2562 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2012-2018 Syoyo Fujita and many contributors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+//
+// version 1.2.3 : Added color space extension('-colorspace') to tex opts.
+// version 1.2.2 : Parse multiple group names.
+// version 1.2.1 : Added initial support for line('l') primitive(PR #178)
+// version 1.2.0 : Hardened implementation(#175)
+// version 1.1.1 : Support smoothing groups(#162)
+// version 1.1.0 : Support parsing vertex color(#144)
+// version 1.0.8 : Fix parsing `g` tag just after `usemtl`(#138)
+// version 1.0.7 : Support multiple tex options(#126)
+// version 1.0.6 : Add TINYOBJLOADER_USE_DOUBLE option(#124)
+// version 1.0.5 : Ignore `Tr` when `d` exists in MTL(#43)
+// version 1.0.4 : Support multiple filenames for 'mtllib'(#112)
+// version 1.0.3 : Support parsing texture options(#85)
+// version 1.0.2 : Improve parsing speed by about a factor of 2 for large
+// files(#105)
+// version 1.0.1 : Fixes a shape is lost if obj ends with a 'usemtl'(#104)
+// version 1.0.0 : Change data structure. Change license from BSD to MIT.
+//
+
+//
+// Use this in *one* .cc
+//   #define TINYOBJLOADER_IMPLEMENTATION
+//   #include "tiny_obj_loader.h"
+//
+
+#ifndef TINY_OBJ_LOADER_H_
+#define TINY_OBJ_LOADER_H_
+
+#define TINYOBJLOADER_IMPLEMENTATION
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace tinyobj {
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#if __has_warning("-Wzero-as-null-pointer-constant")
+#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+
+#pragma clang diagnostic ignored "-Wpadded"
+
+#endif
+
+	// https://en.wikipedia.org/wiki/Wavefront_.obj_file says ...
+	//
+	//  -blendu on | off                       # set horizontal texture blending
+	//  (default on)
+	//  -blendv on | off                       # set vertical texture blending
+	//  (default on)
+	//  -boost real_value                      # boost mip-map sharpness
+	//  -mm base_value gain_value              # modify texture map values (default
+	//  0 1)
+	//                                         #     base_value = brightness,
+	//                                         gain_value = contrast
+	//  -o u [v [w]]                           # Origin offset             (default
+	//  0 0 0)
+	//  -s u [v [w]]                           # Scale                     (default
+	//  1 1 1)
+	//  -t u [v [w]]                           # Turbulence                (default
+	//  0 0 0)
+	//  -texres resolution                     # texture resolution to create
+	//  -clamp on | off                        # only render texels in the clamped
+	//  0-1 range (default off)
+	//                                         #   When unclamped, textures are
+	//                                         repeated across a surface,
+	//                                         #   when clamped, only texels which
+	//                                         fall within the 0-1
+	//                                         #   range are rendered.
+	//  -bm mult_value                         # bump multiplier (for bump maps
+	//  only)
+	//
+	//  -imfchan r | g | b | m | l | z         # specifies which channel of the file
+	//  is used to
+	//                                         # create a scalar or bump texture.
+	//                                         r:red, g:green,
+	//                                         # b:blue, m:matte, l:luminance,
+	//                                         z:z-depth..
+	//                                         # (the default for bump is 'l' and
+	//                                         for decal is 'm')
+	//  bump -imfchan r bumpmap.tga            # says to use the red channel of
+	//  bumpmap.tga as the bumpmap
+	//
+	// For reflection maps...
+	//
+	//   -type sphere                           # specifies a sphere for a "refl"
+	//   reflection map
+	//   -type cube_top    | cube_bottom |      # when using a cube map, the texture
+	//   file for each
+	//         cube_front  | cube_back   |      # side of the cube is specified
+	//         separately
+	//         cube_left   | cube_right
+	//
+	// TinyObjLoader extension.
+	//
+	//   -colorspace SPACE                      # Color space of the texture. e.g.  'sRGB` or 'linear' 
+	// 
+
+#ifdef TINYOBJLOADER_USE_DOUBLE
+	//#pragma message "using double"
+	typedef double real_t;
+#else
+	//#pragma message "using float"
+	typedef float real_t;
+#endif
+
+	typedef enum {
+		TEXTURE_TYPE_NONE,  // default
+		TEXTURE_TYPE_SPHERE,
+		TEXTURE_TYPE_CUBE_TOP,
+		TEXTURE_TYPE_CUBE_BOTTOM,
+		TEXTURE_TYPE_CUBE_FRONT,
+		TEXTURE_TYPE_CUBE_BACK,
+		TEXTURE_TYPE_CUBE_LEFT,
+		TEXTURE_TYPE_CUBE_RIGHT
+	} texture_type_t;
+
+	typedef struct {
+		texture_type_t type;      // -type (default TEXTURE_TYPE_NONE)
+		real_t sharpness;         // -boost (default 1.0?)
+		real_t brightness;        // base_value in -mm option (default 0)
+		real_t contrast;          // gain_value in -mm option (default 1)
+		real_t origin_offset[3];  // -o u [v [w]] (default 0 0 0)
+		real_t scale[3];          // -s u [v [w]] (default 1 1 1)
+		real_t turbulence[3];     // -t u [v [w]] (default 0 0 0)
+								  // int   texture_resolution; // -texres resolution (default = ?) TODO
+		bool clamp;    // -clamp (default false)
+		char imfchan;  // -imfchan (the default for bump is 'l' and for decal is 'm')
+		bool blendu;   // -blendu (default on)
+		bool blendv;   // -blendv (default on)
+		real_t bump_multiplier;  // -bm (for bump maps only, default 1.0)
+
+								 // extension
+		std::string colorspace;   // Explicitly specify color space of stored value. Usually `sRGB` or `linear` (default empty).
+	} texture_option_t;
+
+	typedef struct {
+		std::string name;
+
+		real_t ambient[3];
+		real_t diffuse[3];
+		real_t specular[3];
+		real_t transmittance[3];
+		real_t emission[3];
+		real_t shininess;
+		real_t ior;       // index of refraction
+		real_t dissolve;  // 1 == opaque; 0 == fully transparent
+						  // illumination model (see http://www.fileformat.info/format/material/)
+		int illum;
+
+		int dummy;  // Suppress padding warning.
+
+		std::string ambient_texname;             // map_Ka
+		std::string diffuse_texname;             // map_Kd
+		std::string specular_texname;            // map_Ks
+		std::string specular_highlight_texname;  // map_Ns
+		std::string bump_texname;                // map_bump, map_Bump, bump
+		std::string displacement_texname;        // disp
+		std::string alpha_texname;               // map_d
+		std::string reflection_texname;          // refl
+
+		texture_option_t ambient_texopt;
+		texture_option_t diffuse_texopt;
+		texture_option_t specular_texopt;
+		texture_option_t specular_highlight_texopt;
+		texture_option_t bump_texopt;
+		texture_option_t displacement_texopt;
+		texture_option_t alpha_texopt;
+		texture_option_t reflection_texopt;
+
+		// PBR extension
+		// http://exocortex.com/blog/extending_wavefront_mtl_to_support_pbr
+		real_t roughness;            // [0, 1] default 0
+		real_t metallic;             // [0, 1] default 0
+		real_t sheen;                // [0, 1] default 0
+		real_t clearcoat_thickness;  // [0, 1] default 0
+		real_t clearcoat_roughness;  // [0, 1] default 0
+		real_t anisotropy;           // aniso. [0, 1] default 0
+		real_t anisotropy_rotation;  // anisor. [0, 1] default 0
+		real_t pad0;
+		std::string roughness_texname;  // map_Pr
+		std::string metallic_texname;   // map_Pm
+		std::string sheen_texname;      // map_Ps
+		std::string emissive_texname;   // map_Ke
+		std::string normal_texname;     // norm. For normal mapping.
+
+		texture_option_t roughness_texopt;
+		texture_option_t metallic_texopt;
+		texture_option_t sheen_texopt;
+		texture_option_t emissive_texopt;
+		texture_option_t normal_texopt;
+
+		int pad2;
+
+		std::map<std::string, std::string> unknown_parameter;
+	} material_t;
+
+	typedef struct {
+		std::string name;
+
+		std::vector<int> intValues;
+		std::vector<real_t> floatValues;
+		std::vector<std::string> stringValues;
+	} tag_t;
+
+	// Index struct to support different indices for vtx/normal/texcoord.
+	// -1 means not used.
+	typedef struct {
+		int vertex_index;
+		int normal_index;
+		int texcoord_index;
+	} index_t;
+
+	typedef struct {
+		std::vector<index_t> indices;
+		std::vector<unsigned char> num_face_vertices;  // The number of vertices per
+													   // face. 3 = polygon, 4 = quad,
+													   // ... Up to 255.
+		std::vector<int> material_ids;                 // per-face material ID
+		std::vector<unsigned int> smoothing_group_ids;  // per-face smoothing group
+														// ID(0 = off. positive value
+														// = group id)
+		std::vector<tag_t> tags;                        // SubD tag
+	} mesh_t;
+
+	typedef struct {
+		std::vector<int> indices;  // pairs of indices for lines
+	} path_t;
+
+	typedef struct {
+		std::string name;
+		mesh_t mesh;
+		path_t path;
+	} shape_t;
+
+	// Vertex attributes
+	typedef struct {
+		std::vector<real_t> vertices;   // 'v'
+		std::vector<real_t> normals;    // 'vn'
+		std::vector<real_t> texcoords;  // 'vt'
+		std::vector<real_t> colors;     // extension: vertex colors
+	} attrib_t;
+
+	typedef struct callback_t_ {
+		// W is optional and set to 1 if there is no `w` item in `v` line
+		void(*vertex_cb)(void *user_data, real_t x, real_t y, real_t z, real_t w);
+		void(*normal_cb)(void *user_data, real_t x, real_t y, real_t z);
+
+		// y and z are optional and set to 0 if there is no `y` and/or `z` item(s) in
+		// `vt` line.
+		void(*texcoord_cb)(void *user_data, real_t x, real_t y, real_t z);
+
+		// called per 'f' line. num_indices is the number of face indices(e.g. 3 for
+		// triangle, 4 for quad)
+		// 0 will be passed for undefined index in index_t members.
+		void(*index_cb)(void *user_data, index_t *indices, int num_indices);
+		// `name` material name, `material_id` = the array index of material_t[]. -1
+		// if
+		// a material not found in .mtl
+		void(*usemtl_cb)(void *user_data, const char *name, int material_id);
+		// `materials` = parsed material data.
+		void(*mtllib_cb)(void *user_data, const material_t *materials,
+			int num_materials);
+		// There may be multiple group names
+		void(*group_cb)(void *user_data, const char **names, int num_names);
+		void(*object_cb)(void *user_data, const char *name);
+
+		callback_t_()
+			: vertex_cb(NULL),
+			normal_cb(NULL),
+			texcoord_cb(NULL),
+			index_cb(NULL),
+			usemtl_cb(NULL),
+			mtllib_cb(NULL),
+			group_cb(NULL),
+			object_cb(NULL) {}
+	} callback_t;
+
+	class MaterialReader {
+	public:
+		MaterialReader() {}
+		virtual ~MaterialReader();
+
+		virtual bool operator()(const std::string &matId,
+			std::vector<material_t> *materials,
+			std::map<std::string, int> *matMap,
+			std::string *err) = 0;
+	};
+
+	class MaterialFileReader : public MaterialReader {
+	public:
+		explicit MaterialFileReader(const std::string &mtl_basedir)
+			: m_mtlBaseDir(mtl_basedir) {}
+		virtual ~MaterialFileReader() {}
+		virtual bool operator()(const std::string &matId,
+			std::vector<material_t> *materials,
+			std::map<std::string, int> *matMap, std::string *err);
+
+	private:
+		std::string m_mtlBaseDir;
+	};
+
+	class MaterialStreamReader : public MaterialReader {
+	public:
+		explicit MaterialStreamReader(std::istream &inStream)
+			: m_inStream(inStream) {}
+		virtual ~MaterialStreamReader() {}
+		virtual bool operator()(const std::string &matId,
+			std::vector<material_t> *materials,
+			std::map<std::string, int> *matMap, std::string *err);
+
+	private:
+		std::istream &m_inStream;
+	};
+
+	/// Loads .obj from a file.
+	/// 'attrib', 'shapes' and 'materials' will be filled with parsed shape data
+	/// 'shapes' will be filled with parsed shape data
+	/// Returns true when loading .obj become success.
+	/// Returns warning and error message into `err`
+	/// 'mtl_basedir' is optional, and used for base directory for .mtl file.
+	/// In default(`NULL'), .mtl file is searched from an application's working
+	/// directory.
+	/// 'triangulate' is optional, and used whether triangulate polygon face in .obj
+	/// or not.
+	/// Option 'default_vcols_fallback' specifies whether vertex colors should
+	/// always be defined, even if no colors are given (fallback to white).
+	bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
+		std::vector<material_t> *materials, std::string *err,
+		const char *filename, const char *mtl_basedir = NULL,
+		bool triangulate = true, bool default_vcols_fallback = true);
+
+	/// Loads .obj from a file with custom user callback.
+	/// .mtl is loaded as usual and parsed material_t data will be passed to
+	/// `callback.mtllib_cb`.
+	/// Returns true when loading .obj/.mtl become success.
+	/// Returns warning and error message into `err`
+	/// See `examples/callback_api/` for how to use this function.
+	bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
+		void *user_data = NULL,
+		MaterialReader *readMatFn = NULL,
+		std::string *err = NULL);
+
+	/// Loads object from a std::istream, uses GetMtlIStreamFn to retrieve
+	/// std::istream for materials.
+	/// Returns true when loading .obj become success.
+	/// Returns warning and error message into `err`
+	bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
+		std::vector<material_t> *materials, std::string *err,
+		std::istream *inStream, MaterialReader *readMatFn = NULL,
+		bool triangulate = true, bool default_vcols_fallback = true);
+
+	/// Loads materials into std::map
+	void LoadMtl(std::map<std::string, int> *material_map,
+		std::vector<material_t> *materials, std::istream *inStream,
+		std::string *warning);
+
+}  // namespace tinyobj
+
+#endif  // TINY_OBJ_LOADER_H_
+
+#ifdef TINYOBJLOADER_IMPLEMENTATION
+#include <cassert>
+#include <cctype>
+#include <cmath>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+#include <utility>
+
+#include <fstream>
+#include <sstream>
+
+namespace tinyobj {
+
+	MaterialReader::~MaterialReader() {}
+
+	struct vertex_index_t {
+		int v_idx, vt_idx, vn_idx;
+		vertex_index_t() : v_idx(-1), vt_idx(-1), vn_idx(-1) {}
+		explicit vertex_index_t(int idx) : v_idx(idx), vt_idx(idx), vn_idx(idx) {}
+		vertex_index_t(int vidx, int vtidx, int vnidx)
+			: v_idx(vidx), vt_idx(vtidx), vn_idx(vnidx) {}
+	};
+
+	// Internal data structure for face representation
+	// index + smoothing group.
+	struct face_t {
+		unsigned int
+			smoothing_group_id;  // smoothing group id. 0 = smoothing groupd is off.
+		int pad_;
+		std::vector<vertex_index_t> vertex_indices;  // face vertex indices.
+
+		face_t() : smoothing_group_id(0) {}
+	};
+
+	struct line_t {
+		int idx0;
+		int idx1;
+	};
+
+	struct tag_sizes {
+		tag_sizes() : num_ints(0), num_reals(0), num_strings(0) {}
+		int num_ints;
+		int num_reals;
+		int num_strings;
+	};
+
+	struct obj_shape {
+		std::vector<real_t> v;
+		std::vector<real_t> vn;
+		std::vector<real_t> vt;
+	};
+
+	// See
+	// http://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf
+	static std::istream &safeGetline(std::istream &is, std::string &t) {
+		t.clear();
+
+		// The characters in the stream are read one-by-one using a std::streambuf.
+		// That is faster than reading them one-by-one using the std::istream.
+		// Code that uses streambuf this way must be guarded by a sentry object.
+		// The sentry object performs various tasks,
+		// such as thread synchronization and updating the stream state.
+
+		std::istream::sentry se(is, true);
+		std::streambuf *sb = is.rdbuf();
+
+		if (se) {
+			for (;;) {
+				int c = sb->sbumpc();
+				switch (c) {
+				case '\n':
+					return is;
+				case '\r':
+					if (sb->sgetc() == '\n') sb->sbumpc();
+					return is;
+				case EOF:
+					// Also handle the case when the last line has no line ending
+					if (t.empty()) is.setstate(std::ios::eofbit);
+					return is;
+				default:
+					t += static_cast<char>(c);
+				}
+			}
+		}
+
+		return is;
+	}
+
+#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t'))
+#define IS_DIGIT(x) \
+  (static_cast<unsigned int>((x) - '0') < static_cast<unsigned int>(10))
+#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0'))
+
+	// Make index zero-base, and also support relative index.
+	static inline bool fixIndex(int idx, int n, int *ret) {
+		if (!ret) {
+			return false;
+		}
+
+		if (idx > 0) {
+			(*ret) = idx - 1;
+			return true;
+		}
+
+		if (idx == 0) {
+			// zero is not allowed according to the spec.
+			return false;
+		}
+
+		if (idx < 0) {
+			(*ret) = n + idx;  // negative value = relative
+			return true;
+		}
+
+		return false;  // never reach here.
+	}
+
+	static inline std::string parseString(const char **token) {
+		std::string s;
+		(*token) += strspn((*token), " \t");
+		size_t e = strcspn((*token), " \t\r");
+		s = std::string((*token), &(*token)[e]);
+		(*token) += e;
+		return s;
+	}
+
+	static inline int parseInt(const char **token) {
+		(*token) += strspn((*token), " \t");
+		int i = atoi((*token));
+		(*token) += strcspn((*token), " \t\r");
+		return i;
+	}
+
+	// Tries to parse a floating point number located at s.
+	//
+	// s_end should be a location in the string where reading should absolutely
+	// stop. For example at the end of the string, to prevent buffer overflows.
+	//
+	// Parses the following EBNF grammar:
+	//   sign    = "+" | "-" ;
+	//   END     = ? anything not in digit ?
+	//   digit   = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+	//   integer = [sign] , digit , {digit} ;
+	//   decimal = integer , ["." , integer] ;
+	//   float   = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ;
+	//
+	//  Valid strings are for example:
+	//   -0  +3.1417e+2  -0.0E-3  1.0324  -1.41   11e2
+	//
+	// If the parsing is a success, result is set to the parsed value and true
+	// is returned.
+	//
+	// The function is greedy and will parse until any of the following happens:
+	//  - a non-conforming character is encountered.
+	//  - s_end is reached.
+	//
+	// The following situations triggers a failure:
+	//  - s >= s_end.
+	//  - parse failure.
+	//
+	static bool tryParseDouble(const char *s, const char *s_end, double *result) {
+		if (s >= s_end) {
+			return false;
+		}
+
+		double mantissa = 0.0;
+		// This exponent is base 2 rather than 10.
+		// However the exponent we parse is supposed to be one of ten,
+		// thus we must take care to convert the exponent/and or the
+		// mantissa to a * 2^E, where a is the mantissa and E is the
+		// exponent.
+		// To get the final double we will use ldexp, it requires the
+		// exponent to be in base 2.
+		int exponent = 0;
+
+		// NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED
+		// TO JUMP OVER DEFINITIONS.
+		char sign = '+';
+		char exp_sign = '+';
+		char const *curr = s;
+
+		// How many characters were read in a loop.
+		int read = 0;
+		// Tells whether a loop terminated due to reaching s_end.
+		bool end_not_reached = false;
+
+		/*
+		BEGIN PARSING.
+		*/
+
+		// Find out what sign we've got.
+		if (*curr == '+' || *curr == '-') {
+			sign = *curr;
+			curr++;
+		}
+		else if (IS_DIGIT(*curr)) { /* Pass through. */
+		}
+		else {
+			goto fail;
+		}
+
+		// Read the integer part.
+		end_not_reached = (curr != s_end);
+		while (end_not_reached && IS_DIGIT(*curr)) {
+			mantissa *= 10;
+			mantissa += static_cast<int>(*curr - 0x30);
+			curr++;
+			read++;
+			end_not_reached = (curr != s_end);
+		}
+
+		// We must make sure we actually got something.
+		if (read == 0) goto fail;
+		// We allow numbers of form "#", "###" etc.
+		if (!end_not_reached) goto assemble;
+
+		// Read the decimal part.
+		if (*curr == '.') {
+			curr++;
+			read = 1;
+			end_not_reached = (curr != s_end);
+			while (end_not_reached && IS_DIGIT(*curr)) {
+				static const double pow_lut[] = {
+					1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001,
+				};
+				const int lut_entries = sizeof pow_lut / sizeof pow_lut[0];
+
+				// NOTE: Don't use powf here, it will absolutely murder precision.
+				mantissa += static_cast<int>(*curr - 0x30) *
+					(read < lut_entries ? pow_lut[read] : std::pow(10.0, -read));
+				read++;
+				curr++;
+				end_not_reached = (curr != s_end);
+			}
+		}
+		else if (*curr == 'e' || *curr == 'E') {
+		}
+		else {
+			goto assemble;
+		}
+
+		if (!end_not_reached) goto assemble;
+
+		// Read the exponent part.
+		if (*curr == 'e' || *curr == 'E') {
+			curr++;
+			// Figure out if a sign is present and if it is.
+			end_not_reached = (curr != s_end);
+			if (end_not_reached && (*curr == '+' || *curr == '-')) {
+				exp_sign = *curr;
+				curr++;
+			}
+			else if (IS_DIGIT(*curr)) { /* Pass through. */
+			}
+			else {
+				// Empty E is not allowed.
+				goto fail;
+			}
+
+			read = 0;
+			end_not_reached = (curr != s_end);
+			while (end_not_reached && IS_DIGIT(*curr)) {
+				exponent *= 10;
+				exponent += static_cast<int>(*curr - 0x30);
+				curr++;
+				read++;
+				end_not_reached = (curr != s_end);
+			}
+			exponent *= (exp_sign == '+' ? 1 : -1);
+			if (read == 0) goto fail;
+		}
+
+	assemble:
+		*result = (sign == '+' ? 1 : -1) *
+			(exponent ? std::ldexp(mantissa * std::pow(5.0, exponent), exponent)
+				: mantissa);
+		return true;
+	fail:
+		return false;
+	}
+
+	static inline real_t parseReal(const char **token, double default_value = 0.0) {
+		(*token) += strspn((*token), " \t");
+		const char *end = (*token) + strcspn((*token), " \t\r");
+		double val = default_value;
+		tryParseDouble((*token), end, &val);
+		real_t f = static_cast<real_t>(val);
+		(*token) = end;
+		return f;
+	}
+
+	static inline bool parseReal(const char **token, real_t *out) {
+		(*token) += strspn((*token), " \t");
+		const char *end = (*token) + strcspn((*token), " \t\r");
+		double val;
+		bool ret = tryParseDouble((*token), end, &val);
+		if (ret) {
+			real_t f = static_cast<real_t>(val);
+			(*out) = f;
+		}
+		(*token) = end;
+		return ret;
+	}
+
+	static inline void parseReal2(real_t *x, real_t *y, const char **token,
+		const double default_x = 0.0,
+		const double default_y = 0.0) {
+		(*x) = parseReal(token, default_x);
+		(*y) = parseReal(token, default_y);
+	}
+
+	static inline void parseReal3(real_t *x, real_t *y, real_t *z,
+		const char **token, const double default_x = 0.0,
+		const double default_y = 0.0,
+		const double default_z = 0.0) {
+		(*x) = parseReal(token, default_x);
+		(*y) = parseReal(token, default_y);
+		(*z) = parseReal(token, default_z);
+	}
+
+	static inline void parseV(real_t *x, real_t *y, real_t *z, real_t *w,
+		const char **token, const double default_x = 0.0,
+		const double default_y = 0.0,
+		const double default_z = 0.0,
+		const double default_w = 1.0) {
+		(*x) = parseReal(token, default_x);
+		(*y) = parseReal(token, default_y);
+		(*z) = parseReal(token, default_z);
+		(*w) = parseReal(token, default_w);
+	}
+
+	// Extension: parse vertex with colors(6 items)
+	static inline bool parseVertexWithColor(real_t *x, real_t *y, real_t *z,
+		real_t *r, real_t *g, real_t *b,
+		const char **token,
+		const double default_x = 0.0,
+		const double default_y = 0.0,
+		const double default_z = 0.0) {
+		(*x) = parseReal(token, default_x);
+		(*y) = parseReal(token, default_y);
+		(*z) = parseReal(token, default_z);
+
+		const bool found_color = parseReal(token, r) && parseReal(token, g) && parseReal(token, b);
+
+		if (!found_color) {
+			(*r) = (*g) = (*b) = 1.0;
+		}
+
+		return found_color;
+	}
+
+	static inline bool parseOnOff(const char **token, bool default_value = true) {
+		(*token) += strspn((*token), " \t");
+		const char *end = (*token) + strcspn((*token), " \t\r");
+
+		bool ret = default_value;
+		if ((0 == strncmp((*token), "on", 2))) {
+			ret = true;
+		}
+		else if ((0 == strncmp((*token), "off", 3))) {
+			ret = false;
+		}
+
+		(*token) = end;
+		return ret;
+	}
+
+	static inline texture_type_t parseTextureType(
+		const char **token, texture_type_t default_value = TEXTURE_TYPE_NONE) {
+		(*token) += strspn((*token), " \t");
+		const char *end = (*token) + strcspn((*token), " \t\r");
+		texture_type_t ty = default_value;
+
+		if ((0 == strncmp((*token), "cube_top", strlen("cube_top")))) {
+			ty = TEXTURE_TYPE_CUBE_TOP;
+		}
+		else if ((0 == strncmp((*token), "cube_bottom", strlen("cube_bottom")))) {
+			ty = TEXTURE_TYPE_CUBE_BOTTOM;
+		}
+		else if ((0 == strncmp((*token), "cube_left", strlen("cube_left")))) {
+			ty = TEXTURE_TYPE_CUBE_LEFT;
+		}
+		else if ((0 == strncmp((*token), "cube_right", strlen("cube_right")))) {
+			ty = TEXTURE_TYPE_CUBE_RIGHT;
+		}
+		else if ((0 == strncmp((*token), "cube_front", strlen("cube_front")))) {
+			ty = TEXTURE_TYPE_CUBE_FRONT;
+		}
+		else if ((0 == strncmp((*token), "cube_back", strlen("cube_back")))) {
+			ty = TEXTURE_TYPE_CUBE_BACK;
+		}
+		else if ((0 == strncmp((*token), "sphere", strlen("sphere")))) {
+			ty = TEXTURE_TYPE_SPHERE;
+		}
+
+		(*token) = end;
+		return ty;
+	}
+
+	static tag_sizes parseTagTriple(const char **token) {
+		tag_sizes ts;
+
+		(*token) += strspn((*token), " \t");
+		ts.num_ints = atoi((*token));
+		(*token) += strcspn((*token), "/ \t\r");
+		if ((*token)[0] != '/') {
+			return ts;
+		}
+
+		(*token)++;  // Skip '/'
+
+		(*token) += strspn((*token), " \t");
+		ts.num_reals = atoi((*token));
+		(*token) += strcspn((*token), "/ \t\r");
+		if ((*token)[0] != '/') {
+			return ts;
+		}
+		(*token)++;  // Skip '/'
+
+		ts.num_strings = parseInt(token);
+
+		return ts;
+	}
+
+	// Parse triples with index offsets: i, i/j/k, i//k, i/j
+	static bool parseTriple(const char **token, int vsize, int vnsize, int vtsize,
+		vertex_index_t *ret) {
+		if (!ret) {
+			return false;
+		}
+
+		vertex_index_t vi(-1);
+
+		if (!fixIndex(atoi((*token)), vsize, &(vi.v_idx))) {
+			return false;
+		}
+
+		(*token) += strcspn((*token), "/ \t\r");
+		if ((*token)[0] != '/') {
+			(*ret) = vi;
+			return true;
+		}
+		(*token)++;
+
+		// i//k
+		if ((*token)[0] == '/') {
+			(*token)++;
+			if (!fixIndex(atoi((*token)), vnsize, &(vi.vn_idx))) {
+				return false;
+			}
+			(*token) += strcspn((*token), "/ \t\r");
+			(*ret) = vi;
+			return true;
+		}
+
+		// i/j/k or i/j
+		if (!fixIndex(atoi((*token)), vtsize, &(vi.vt_idx))) {
+			return false;
+		}
+
+		(*token) += strcspn((*token), "/ \t\r");
+		if ((*token)[0] != '/') {
+			(*ret) = vi;
+			return true;
+		}
+
+		// i/j/k
+		(*token)++;  // skip '/'
+		if (!fixIndex(atoi((*token)), vnsize, &(vi.vn_idx))) {
+			return false;
+		}
+		(*token) += strcspn((*token), "/ \t\r");
+
+		(*ret) = vi;
+
+		return true;
+	}
+
+	// Parse raw triples: i, i/j/k, i//k, i/j
+	static vertex_index_t parseRawTriple(const char **token) {
+		vertex_index_t vi(static_cast<int>(0));  // 0 is an invalid index in OBJ
+
+		vi.v_idx = atoi((*token));
+		(*token) += strcspn((*token), "/ \t\r");
+		if ((*token)[0] != '/') {
+			return vi;
+		}
+		(*token)++;
+
+		// i//k
+		if ((*token)[0] == '/') {
+			(*token)++;
+			vi.vn_idx = atoi((*token));
+			(*token) += strcspn((*token), "/ \t\r");
+			return vi;
+		}
+
+		// i/j/k or i/j
+		vi.vt_idx = atoi((*token));
+		(*token) += strcspn((*token), "/ \t\r");
+		if ((*token)[0] != '/') {
+			return vi;
+		}
+
+		// i/j/k
+		(*token)++;  // skip '/'
+		vi.vn_idx = atoi((*token));
+		(*token) += strcspn((*token), "/ \t\r");
+		return vi;
+	}
+
+	static bool ParseTextureNameAndOption(std::string *texname,
+		texture_option_t *texopt,
+		const char *linebuf, const bool is_bump) {
+		// @todo { write more robust lexer and parser. }
+		bool found_texname = false;
+		std::string texture_name;
+
+		// Fill with default value for texopt.
+		if (is_bump) {
+			texopt->imfchan = 'l';
+		}
+		else {
+			texopt->imfchan = 'm';
+		}
+		texopt->bump_multiplier = static_cast<real_t>(1.0);
+		texopt->clamp = false;
+		texopt->blendu = true;
+		texopt->blendv = true;
+		texopt->sharpness = static_cast<real_t>(1.0);
+		texopt->brightness = static_cast<real_t>(0.0);
+		texopt->contrast = static_cast<real_t>(1.0);
+		texopt->origin_offset[0] = static_cast<real_t>(0.0);
+		texopt->origin_offset[1] = static_cast<real_t>(0.0);
+		texopt->origin_offset[2] = static_cast<real_t>(0.0);
+		texopt->scale[0] = static_cast<real_t>(1.0);
+		texopt->scale[1] = static_cast<real_t>(1.0);
+		texopt->scale[2] = static_cast<real_t>(1.0);
+		texopt->turbulence[0] = static_cast<real_t>(0.0);
+		texopt->turbulence[1] = static_cast<real_t>(0.0);
+		texopt->turbulence[2] = static_cast<real_t>(0.0);
+		texopt->type = TEXTURE_TYPE_NONE;
+
+		const char *token = linebuf;  // Assume line ends with NULL
+
+		while (!IS_NEW_LINE((*token))) {
+			token += strspn(token, " \t");  // skip space
+			if ((0 == strncmp(token, "-blendu", 7)) && IS_SPACE((token[7]))) {
+				token += 8;
+				texopt->blendu = parseOnOff(&token, /* default */ true);
+			}
+			else if ((0 == strncmp(token, "-blendv", 7)) && IS_SPACE((token[7]))) {
+				token += 8;
+				texopt->blendv = parseOnOff(&token, /* default */ true);
+			}
+			else if ((0 == strncmp(token, "-clamp", 6)) && IS_SPACE((token[6]))) {
+				token += 7;
+				texopt->clamp = parseOnOff(&token, /* default */ true);
+			}
+			else if ((0 == strncmp(token, "-boost", 6)) && IS_SPACE((token[6]))) {
+				token += 7;
+				texopt->sharpness = parseReal(&token, 1.0);
+			}
+			else if ((0 == strncmp(token, "-bm", 3)) && IS_SPACE((token[3]))) {
+				token += 4;
+				texopt->bump_multiplier = parseReal(&token, 1.0);
+			}
+			else if ((0 == strncmp(token, "-o", 2)) && IS_SPACE((token[2]))) {
+				token += 3;
+				parseReal3(&(texopt->origin_offset[0]), &(texopt->origin_offset[1]),
+					&(texopt->origin_offset[2]), &token);
+			}
+			else if ((0 == strncmp(token, "-s", 2)) && IS_SPACE((token[2]))) {
+				token += 3;
+				parseReal3(&(texopt->scale[0]), &(texopt->scale[1]), &(texopt->scale[2]),
+					&token, 1.0, 1.0, 1.0);
+			}
+			else if ((0 == strncmp(token, "-t", 2)) && IS_SPACE((token[2]))) {
+				token += 3;
+				parseReal3(&(texopt->turbulence[0]), &(texopt->turbulence[1]),
+					&(texopt->turbulence[2]), &token);
+			}
+			else if ((0 == strncmp(token, "-type", 5)) && IS_SPACE((token[5]))) {
+				token += 5;
+				texopt->type = parseTextureType((&token), TEXTURE_TYPE_NONE);
+			}
+			else if ((0 == strncmp(token, "-imfchan", 8)) && IS_SPACE((token[8]))) {
+				token += 9;
+				token += strspn(token, " \t");
+				const char *end = token + strcspn(token, " \t\r");
+				if ((end - token) == 1) {  // Assume one char for -imfchan
+					texopt->imfchan = (*token);
+				}
+				token = end;
+			}
+			else if ((0 == strncmp(token, "-mm", 3)) && IS_SPACE((token[3]))) {
+				token += 4;
+				parseReal2(&(texopt->brightness), &(texopt->contrast), &token, 0.0, 1.0);
+			}
+			else if ((0 == strncmp(token, "-colorspace", 11)) && IS_SPACE((token[11]))) {
+				token += 12;
+				texopt->colorspace = parseString(&token);
+			}
+			else {
+				// Assume texture filename
+#if 0
+				size_t len = strcspn(token, " \t\r");  // untile next space
+				texture_name = std::string(token, token + len);
+				token += len;
+
+				token += strspn(token, " \t");  // skip space
+#else
+				// Read filename until line end to parse filename containing whitespace
+				// TODO(syoyo): Support parsing texture option flag after the filename.
+				texture_name = std::string(token);
+				token += texture_name.length();
+#endif
+
+				found_texname = true;
+			}
+		}
+
+		if (found_texname) {
+			(*texname) = texture_name;
+			return true;
+		}
+		else {
+			return false;
+		}
+	}
+
+	static void InitMaterial(material_t *material) {
+		material->name = "";
+		material->ambient_texname = "";
+		material->diffuse_texname = "";
+		material->specular_texname = "";
+		material->specular_highlight_texname = "";
+		material->bump_texname = "";
+		material->displacement_texname = "";
+		material->reflection_texname = "";
+		material->alpha_texname = "";
+		for (int i = 0; i < 3; i++) {
+			material->ambient[i] = static_cast<real_t>(0.0);
+			material->diffuse[i] = static_cast<real_t>(0.0);
+			material->specular[i] = static_cast<real_t>(0.0);
+			material->transmittance[i] = static_cast<real_t>(0.0);
+			material->emission[i] = static_cast<real_t>(0.0);
+		}
+		material->illum = 0;
+		material->dissolve = static_cast<real_t>(1.0);
+		material->shininess = static_cast<real_t>(1.0);
+		material->ior = static_cast<real_t>(1.0);
+
+		material->roughness = static_cast<real_t>(0.0);
+		material->metallic = static_cast<real_t>(0.0);
+		material->sheen = static_cast<real_t>(0.0);
+		material->clearcoat_thickness = static_cast<real_t>(0.0);
+		material->clearcoat_roughness = static_cast<real_t>(0.0);
+		material->anisotropy_rotation = static_cast<real_t>(0.0);
+		material->anisotropy = static_cast<real_t>(0.0);
+		material->roughness_texname = "";
+		material->metallic_texname = "";
+		material->sheen_texname = "";
+		material->emissive_texname = "";
+		material->normal_texname = "";
+
+		material->unknown_parameter.clear();
+	}
+
+	// code from https://wrf.ecse.rpi.edu//Research/Short_Notes/pnpoly.html
+	template <typename T>
+	static int pnpoly(int nvert, T *vertx, T *verty, T testx, T testy) {
+		int i, j, c = 0;
+		for (i = 0, j = nvert - 1; i < nvert; j = i++) {
+			if (((verty[i] > testy) != (verty[j] > testy)) &&
+				(testx <
+				(vertx[j] - vertx[i]) * (testy - verty[i]) / (verty[j] - verty[i]) +
+					vertx[i]))
+				c = !c;
+		}
+		return c;
+	}
+
+	// TODO(syoyo): refactor function.
+	static bool exportGroupsToShape(shape_t *shape,
+		const std::vector<face_t> &faceGroup,
+		std::vector<int> &lineGroup,
+		const std::vector<tag_t> &tags,
+		const int material_id, const std::string &name,
+		bool triangulate,
+		const std::vector<real_t> &v) {
+		if (faceGroup.empty() && lineGroup.empty()) {
+			return false;
+		}
+
+		if (!faceGroup.empty()) {
+			// Flatten vertices and indices
+			for (size_t i = 0; i < faceGroup.size(); i++) {
+				const face_t &face = faceGroup[i];
+
+				size_t npolys = face.vertex_indices.size();
+
+				if (npolys < 3) {
+					// Face must have 3+ vertices.
+					continue;
+				}
+
+				vertex_index_t i0 = face.vertex_indices[0];
+				vertex_index_t i1(-1);
+				vertex_index_t i2 = face.vertex_indices[1];
+
+				if (triangulate) {
+					// find the two axes to work in
+					size_t axes[2] = { 1, 2 };
+					for (size_t k = 0; k < npolys; ++k) {
+						i0 = face.vertex_indices[(k + 0) % npolys];
+						i1 = face.vertex_indices[(k + 1) % npolys];
+						i2 = face.vertex_indices[(k + 2) % npolys];
+						size_t vi0 = size_t(i0.v_idx);
+						size_t vi1 = size_t(i1.v_idx);
+						size_t vi2 = size_t(i2.v_idx);
+
+						if (((3 * vi0 + 2) >= v.size()) || ((3 * vi1 + 2) >= v.size()) ||
+							((3 * vi2 + 2) >= v.size())) {
+							// Invalid triangle.
+							// FIXME(syoyo): Is it ok to simply skip this invalid triangle?
+							continue;
+						}
+						real_t v0x = v[vi0 * 3 + 0];
+						real_t v0y = v[vi0 * 3 + 1];
+						real_t v0z = v[vi0 * 3 + 2];
+						real_t v1x = v[vi1 * 3 + 0];
+						real_t v1y = v[vi1 * 3 + 1];
+						real_t v1z = v[vi1 * 3 + 2];
+						real_t v2x = v[vi2 * 3 + 0];
+						real_t v2y = v[vi2 * 3 + 1];
+						real_t v2z = v[vi2 * 3 + 2];
+						real_t e0x = v1x - v0x;
+						real_t e0y = v1y - v0y;
+						real_t e0z = v1z - v0z;
+						real_t e1x = v2x - v1x;
+						real_t e1y = v2y - v1y;
+						real_t e1z = v2z - v1z;
+						real_t cx = std::fabs(e0y * e1z - e0z * e1y);
+						real_t cy = std::fabs(e0z * e1x - e0x * e1z);
+						real_t cz = std::fabs(e0x * e1y - e0y * e1x);
+						const real_t epsilon = std::numeric_limits<real_t>::epsilon();
+						if (cx > epsilon || cy > epsilon || cz > epsilon) {
+							// found a corner
+							if (cx > cy && cx > cz) {
+							}
+							else {
+								axes[0] = 0;
+								if (cz > cx && cz > cy) axes[1] = 1;
+							}
+							break;
+						}
+					}
+
+					real_t area = 0;
+					for (size_t k = 0; k < npolys; ++k) {
+						i0 = face.vertex_indices[(k + 0) % npolys];
+						i1 = face.vertex_indices[(k + 1) % npolys];
+						size_t vi0 = size_t(i0.v_idx);
+						size_t vi1 = size_t(i1.v_idx);
+						if (((vi0 * 3 + axes[0]) >= v.size()) ||
+							((vi0 * 3 + axes[1]) >= v.size()) ||
+							((vi1 * 3 + axes[0]) >= v.size()) ||
+							((vi1 * 3 + axes[1]) >= v.size())) {
+							// Invalid index.
+							continue;
+						}
+						real_t v0x = v[vi0 * 3 + axes[0]];
+						real_t v0y = v[vi0 * 3 + axes[1]];
+						real_t v1x = v[vi1 * 3 + axes[0]];
+						real_t v1y = v[vi1 * 3 + axes[1]];
+						area += (v0x * v1y - v0y * v1x) * static_cast<real_t>(0.5);
+					}
+
+					int maxRounds = 10;  // arbitrary max loop count to protect against
+										 // unexpected errors
+
+					face_t remainingFace = face;  // copy
+					size_t guess_vert = 0;
+					vertex_index_t ind[3];
+					real_t vx[3];
+					real_t vy[3];
+					while (remainingFace.vertex_indices.size() > 3 && maxRounds > 0) {
+						npolys = remainingFace.vertex_indices.size();
+						if (guess_vert >= npolys) {
+							maxRounds -= 1;
+							guess_vert -= npolys;
+						}
+						for (size_t k = 0; k < 3; k++) {
+							ind[k] = remainingFace.vertex_indices[(guess_vert + k) % npolys];
+							size_t vi = size_t(ind[k].v_idx);
+							if (((vi * 3 + axes[0]) >= v.size()) ||
+								((vi * 3 + axes[1]) >= v.size())) {
+								// ???
+								vx[k] = static_cast<real_t>(0.0);
+								vy[k] = static_cast<real_t>(0.0);
+							}
+							else {
+								vx[k] = v[vi * 3 + axes[0]];
+								vy[k] = v[vi * 3 + axes[1]];
+							}
+						}
+						real_t e0x = vx[1] - vx[0];
+						real_t e0y = vy[1] - vy[0];
+						real_t e1x = vx[2] - vx[1];
+						real_t e1y = vy[2] - vy[1];
+						real_t cross = e0x * e1y - e0y * e1x;
+						// if an internal angle
+						if (cross * area < static_cast<real_t>(0.0)) {
+							guess_vert += 1;
+							continue;
+						}
+
+						// check all other verts in case they are inside this triangle
+						bool overlap = false;
+						for (size_t otherVert = 3; otherVert < npolys; ++otherVert) {
+							size_t idx = (guess_vert + otherVert) % npolys;
+
+							if (idx >= remainingFace.vertex_indices.size()) {
+								// ???
+								continue;
+							}
+
+							size_t ovi = size_t(remainingFace.vertex_indices[idx].v_idx);
+
+							if (((ovi * 3 + axes[0]) >= v.size()) ||
+								((ovi * 3 + axes[1]) >= v.size())) {
+								// ???
+								continue;
+							}
+							real_t tx = v[ovi * 3 + axes[0]];
+							real_t ty = v[ovi * 3 + axes[1]];
+							if (pnpoly(3, vx, vy, tx, ty)) {
+								overlap = true;
+								break;
+							}
+						}
+
+						if (overlap) {
+							guess_vert += 1;
+							continue;
+						}
+
+						// this triangle is an ear
+						{
+							index_t idx0, idx1, idx2;
+							idx0.vertex_index = ind[0].v_idx;
+							idx0.normal_index = ind[0].vn_idx;
+							idx0.texcoord_index = ind[0].vt_idx;
+							idx1.vertex_index = ind[1].v_idx;
+							idx1.normal_index = ind[1].vn_idx;
+							idx1.texcoord_index = ind[1].vt_idx;
+							idx2.vertex_index = ind[2].v_idx;
+							idx2.normal_index = ind[2].vn_idx;
+							idx2.texcoord_index = ind[2].vt_idx;
+
+							shape->mesh.indices.push_back(idx0);
+							shape->mesh.indices.push_back(idx1);
+							shape->mesh.indices.push_back(idx2);
+
+							shape->mesh.num_face_vertices.push_back(3);
+							shape->mesh.material_ids.push_back(material_id);
+							shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id);
+						}
+
+						// remove v1 from the list
+						size_t removed_vert_index = (guess_vert + 1) % npolys;
+						while (removed_vert_index + 1 < npolys) {
+							remainingFace.vertex_indices[removed_vert_index] =
+								remainingFace.vertex_indices[removed_vert_index + 1];
+							removed_vert_index += 1;
+						}
+						remainingFace.vertex_indices.pop_back();
+					}
+
+					if (remainingFace.vertex_indices.size() == 3) {
+						i0 = remainingFace.vertex_indices[0];
+						i1 = remainingFace.vertex_indices[1];
+						i2 = remainingFace.vertex_indices[2];
+						{
+							index_t idx0, idx1, idx2;
+							idx0.vertex_index = i0.v_idx;
+							idx0.normal_index = i0.vn_idx;
+							idx0.texcoord_index = i0.vt_idx;
+							idx1.vertex_index = i1.v_idx;
+							idx1.normal_index = i1.vn_idx;
+							idx1.texcoord_index = i1.vt_idx;
+							idx2.vertex_index = i2.v_idx;
+							idx2.normal_index = i2.vn_idx;
+							idx2.texcoord_index = i2.vt_idx;
+
+							shape->mesh.indices.push_back(idx0);
+							shape->mesh.indices.push_back(idx1);
+							shape->mesh.indices.push_back(idx2);
+
+							shape->mesh.num_face_vertices.push_back(3);
+							shape->mesh.material_ids.push_back(material_id);
+							shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id);
+						}
+					}
+				}
+				else {
+					for (size_t k = 0; k < npolys; k++) {
+						index_t idx;
+						idx.vertex_index = face.vertex_indices[k].v_idx;
+						idx.normal_index = face.vertex_indices[k].vn_idx;
+						idx.texcoord_index = face.vertex_indices[k].vt_idx;
+						shape->mesh.indices.push_back(idx);
+					}
+
+					shape->mesh.num_face_vertices.push_back(
+						static_cast<unsigned char>(npolys));
+					shape->mesh.material_ids.push_back(material_id);  // per face
+					shape->mesh.smoothing_group_ids.push_back(
+						face.smoothing_group_id);  // per face
+				}
+			}
+
+			shape->name = name;
+			shape->mesh.tags = tags;
+		}
+
+		if (!lineGroup.empty()) {
+			shape->path.indices.swap(lineGroup);
+		}
+
+		return true;
+	}
+
+	// Split a string with specified delimiter character.
+	// http://stackoverflow.com/questions/236129/split-a-string-in-c
+	static void SplitString(const std::string &s, char delim,
+		std::vector<std::string> &elems) {
+		std::stringstream ss;
+		ss.str(s);
+		std::string item;
+		while (std::getline(ss, item, delim)) {
+			elems.push_back(item);
+		}
+	}
+
+	void LoadMtl(std::map<std::string, int> *material_map,
+		std::vector<material_t> *materials, std::istream *inStream,
+		std::string *warning) {
+		// Create a default material anyway.
+		material_t material;
+		InitMaterial(&material);
+
+		// Issue 43. `d` wins against `Tr` since `Tr` is not in the MTL specification.
+		bool has_d = false;
+		bool has_tr = false;
+
+		std::stringstream ss;
+
+		std::string linebuf;
+		while (inStream->peek() != -1) {
+			safeGetline(*inStream, linebuf);
+
+			// Trim trailing whitespace.
+			if (linebuf.size() > 0) {
+				linebuf = linebuf.substr(0, linebuf.find_last_not_of(" \t") + 1);
+			}
+
+			// Trim newline '\r\n' or '\n'
+			if (linebuf.size() > 0) {
+				if (linebuf[linebuf.size() - 1] == '\n')
+					linebuf.erase(linebuf.size() - 1);
+			}
+			if (linebuf.size() > 0) {
+				if (linebuf[linebuf.size() - 1] == '\r')
+					linebuf.erase(linebuf.size() - 1);
+			}
+
+			// Skip if empty line.
+			if (linebuf.empty()) {
+				continue;
+			}
+
+			// Skip leading space.
+			const char *token = linebuf.c_str();
+			token += strspn(token, " \t");
+
+			assert(token);
+			if (token[0] == '\0') continue;  // empty line
+
+			if (token[0] == '#') continue;  // comment line
+
+											// new mtl
+			if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) {
+				// flush previous material.
+				if (!material.name.empty()) {
+					material_map->insert(std::pair<std::string, int>(
+						material.name, static_cast<int>(materials->size())));
+					materials->push_back(material);
+				}
+
+				// initial temporary material
+				InitMaterial(&material);
+
+				has_d = false;
+				has_tr = false;
+
+				// set new mtl name
+				token += 7;
+				{
+					std::stringstream sstr;
+					sstr << token;
+					material.name = sstr.str();
+				}
+				continue;
+			}
+
+			// ambient
+			if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) {
+				token += 2;
+				real_t r, g, b;
+				parseReal3(&r, &g, &b, &token);
+				material.ambient[0] = r;
+				material.ambient[1] = g;
+				material.ambient[2] = b;
+				continue;
+			}
+
+			// diffuse
+			if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) {
+				token += 2;
+				real_t r, g, b;
+				parseReal3(&r, &g, &b, &token);
+				material.diffuse[0] = r;
+				material.diffuse[1] = g;
+				material.diffuse[2] = b;
+				continue;
+			}
+
+			// specular
+			if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) {
+				token += 2;
+				real_t r, g, b;
+				parseReal3(&r, &g, &b, &token);
+				material.specular[0] = r;
+				material.specular[1] = g;
+				material.specular[2] = b;
+				continue;
+			}
+
+			// transmittance
+			if ((token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) ||
+				(token[0] == 'T' && token[1] == 'f' && IS_SPACE((token[2])))) {
+				token += 2;
+				real_t r, g, b;
+				parseReal3(&r, &g, &b, &token);
+				material.transmittance[0] = r;
+				material.transmittance[1] = g;
+				material.transmittance[2] = b;
+				continue;
+			}
+
+			// ior(index of refraction)
+			if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) {
+				token += 2;
+				material.ior = parseReal(&token);
+				continue;
+			}
+
+			// emission
+			if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) {
+				token += 2;
+				real_t r, g, b;
+				parseReal3(&r, &g, &b, &token);
+				material.emission[0] = r;
+				material.emission[1] = g;
+				material.emission[2] = b;
+				continue;
+			}
+
+			// shininess
+			if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) {
+				token += 2;
+				material.shininess = parseReal(&token);
+				continue;
+			}
+
+			// illum model
+			if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) {
+				token += 6;
+				material.illum = parseInt(&token);
+				continue;
+			}
+
+			// dissolve
+			if ((token[0] == 'd' && IS_SPACE(token[1]))) {
+				token += 1;
+				material.dissolve = parseReal(&token);
+
+				if (has_tr) {
+					ss << "WARN: Both `d` and `Tr` parameters defined for \""
+						<< material.name << "\". Use the value of `d` for dissolve."
+						<< std::endl;
+				}
+				has_d = true;
+				continue;
+			}
+			if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) {
+				token += 2;
+				if (has_d) {
+					// `d` wins. Ignore `Tr` value.
+					ss << "WARN: Both `d` and `Tr` parameters defined for \""
+						<< material.name << "\". Use the value of `d` for dissolve."
+						<< std::endl;
+				}
+				else {
+					// We invert value of Tr(assume Tr is in range [0, 1])
+					// NOTE: Interpretation of Tr is application(exporter) dependent. For
+					// some application(e.g. 3ds max obj exporter), Tr = d(Issue 43)
+					material.dissolve = static_cast<real_t>(1.0) - parseReal(&token);
+				}
+				has_tr = true;
+				continue;
+			}
+
+			// PBR: roughness
+			if (token[0] == 'P' && token[1] == 'r' && IS_SPACE(token[2])) {
+				token += 2;
+				material.roughness = parseReal(&token);
+				continue;
+			}
+
+			// PBR: metallic
+			if (token[0] == 'P' && token[1] == 'm' && IS_SPACE(token[2])) {
+				token += 2;
+				material.metallic = parseReal(&token);
+				continue;
+			}
+
+			// PBR: sheen
+			if (token[0] == 'P' && token[1] == 's' && IS_SPACE(token[2])) {
+				token += 2;
+				material.sheen = parseReal(&token);
+				continue;
+			}
+
+			// PBR: clearcoat thickness
+			if (token[0] == 'P' && token[1] == 'c' && IS_SPACE(token[2])) {
+				token += 2;
+				material.clearcoat_thickness = parseReal(&token);
+				continue;
+			}
+
+			// PBR: clearcoat roughness
+			if ((0 == strncmp(token, "Pcr", 3)) && IS_SPACE(token[3])) {
+				token += 4;
+				material.clearcoat_roughness = parseReal(&token);
+				continue;
+			}
+
+			// PBR: anisotropy
+			if ((0 == strncmp(token, "aniso", 5)) && IS_SPACE(token[5])) {
+				token += 6;
+				material.anisotropy = parseReal(&token);
+				continue;
+			}
+
+			// PBR: anisotropy rotation
+			if ((0 == strncmp(token, "anisor", 6)) && IS_SPACE(token[6])) {
+				token += 7;
+				material.anisotropy_rotation = parseReal(&token);
+				continue;
+			}
+
+			// ambient texture
+			if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) {
+				token += 7;
+				ParseTextureNameAndOption(&(material.ambient_texname),
+					&(material.ambient_texopt), token,
+					/* is_bump */ false);
+				continue;
+			}
+
+			// diffuse texture
+			if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) {
+				token += 7;
+				ParseTextureNameAndOption(&(material.diffuse_texname),
+					&(material.diffuse_texopt), token,
+					/* is_bump */ false);
+				continue;
+			}
+
+			// specular texture
+			if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) {
+				token += 7;
+				ParseTextureNameAndOption(&(material.specular_texname),
+					&(material.specular_texopt), token,
+					/* is_bump */ false);
+				continue;
+			}
+
+			// specular highlight texture
+			if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) {
+				token += 7;
+				ParseTextureNameAndOption(&(material.specular_highlight_texname),
+					&(material.specular_highlight_texopt), token,
+					/* is_bump */ false);
+				continue;
+			}
+
+			// bump texture
+			if ((0 == strncmp(token, "map_bump", 8)) && IS_SPACE(token[8])) {
+				token += 9;
+				ParseTextureNameAndOption(&(material.bump_texname),
+					&(material.bump_texopt), token,
+					/* is_bump */ true);
+				continue;
+			}
+
+			// bump texture
+			if ((0 == strncmp(token, "map_Bump", 8)) && IS_SPACE(token[8])) {
+				token += 9;
+				ParseTextureNameAndOption(&(material.bump_texname),
+					&(material.bump_texopt), token,
+					/* is_bump */ true);
+				continue;
+			}
+
+			// bump texture
+			if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) {
+				token += 5;
+				ParseTextureNameAndOption(&(material.bump_texname),
+					&(material.bump_texopt), token,
+					/* is_bump */ true);
+				continue;
+			}
+
+			// alpha texture
+			if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) {
+				token += 6;
+				material.alpha_texname = token;
+				ParseTextureNameAndOption(&(material.alpha_texname),
+					&(material.alpha_texopt), token,
+					/* is_bump */ false);
+				continue;
+			}
+
+			// displacement texture
+			if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) {
+				token += 5;
+				ParseTextureNameAndOption(&(material.displacement_texname),
+					&(material.displacement_texopt), token,
+					/* is_bump */ false);
+				continue;
+			}
+
+			// reflection map
+			if ((0 == strncmp(token, "refl", 4)) && IS_SPACE(token[4])) {
+				token += 5;
+				ParseTextureNameAndOption(&(material.reflection_texname),
+					&(material.reflection_texopt), token,
+					/* is_bump */ false);
+				continue;
+			}
+
+			// PBR: roughness texture
+			if ((0 == strncmp(token, "map_Pr", 6)) && IS_SPACE(token[6])) {
+				token += 7;
+				ParseTextureNameAndOption(&(material.roughness_texname),
+					&(material.roughness_texopt), token,
+					/* is_bump */ false);
+				continue;
+			}
+
+			// PBR: metallic texture
+			if ((0 == strncmp(token, "map_Pm", 6)) && IS_SPACE(token[6])) {
+				token += 7;
+				ParseTextureNameAndOption(&(material.metallic_texname),
+					&(material.metallic_texopt), token,
+					/* is_bump */ false);
+				continue;
+			}
+
+			// PBR: sheen texture
+			if ((0 == strncmp(token, "map_Ps", 6)) && IS_SPACE(token[6])) {
+				token += 7;
+				ParseTextureNameAndOption(&(material.sheen_texname),
+					&(material.sheen_texopt), token,
+					/* is_bump */ false);
+				continue;
+			}
+
+			// PBR: emissive texture
+			if ((0 == strncmp(token, "map_Ke", 6)) && IS_SPACE(token[6])) {
+				token += 7;
+				ParseTextureNameAndOption(&(material.emissive_texname),
+					&(material.emissive_texopt), token,
+					/* is_bump */ false);
+				continue;
+			}
+
+			// PBR: normal map texture
+			if ((0 == strncmp(token, "norm", 4)) && IS_SPACE(token[4])) {
+				token += 5;
+				ParseTextureNameAndOption(
+					&(material.normal_texname), &(material.normal_texopt), token,
+					/* is_bump */ false);  // @fixme { is_bump will be true? }
+				continue;
+			}
+
+			// unknown parameter
+			const char *_space = strchr(token, ' ');
+			if (!_space) {
+				_space = strchr(token, '\t');
+			}
+			if (_space) {
+				std::ptrdiff_t len = _space - token;
+				std::string key(token, static_cast<size_t>(len));
+				std::string value = _space + 1;
+				material.unknown_parameter.insert(
+					std::pair<std::string, std::string>(key, value));
+			}
+		}
+		// flush last material.
+		material_map->insert(std::pair<std::string, int>(
+			material.name, static_cast<int>(materials->size())));
+		materials->push_back(material);
+
+		if (warning) {
+			(*warning) = ss.str();
+		}
+	}
+
+	bool MaterialFileReader::operator()(const std::string &matId,
+		std::vector<material_t> *materials,
+		std::map<std::string, int> *matMap,
+		std::string *err) {
+		std::string filepath;
+
+		if (!m_mtlBaseDir.empty()) {
+			filepath = std::string(m_mtlBaseDir) + matId;
+		}
+		else {
+			filepath = matId;
+		}
+
+		std::ifstream matIStream(filepath.c_str());
+		if (!matIStream) {
+			std::stringstream ss;
+			ss << "WARN: Material file [ " << filepath << " ] not found." << std::endl;
+			if (err) {
+				(*err) += ss.str();
+			}
+			return false;
+		}
+
+		std::string warning;
+		LoadMtl(matMap, materials, &matIStream, &warning);
+
+		if (!warning.empty()) {
+			if (err) {
+				(*err) += warning;
+			}
+		}
+
+		return true;
+	}
+
+	bool MaterialStreamReader::operator()(const std::string &matId,
+		std::vector<material_t> *materials,
+		std::map<std::string, int> *matMap,
+		std::string *err) {
+		(void)matId;
+		if (!m_inStream) {
+			std::stringstream ss;
+			ss << "WARN: Material stream in error state. " << std::endl;
+			if (err) {
+				(*err) += ss.str();
+			}
+			return false;
+		}
+
+		std::string warning;
+		LoadMtl(matMap, materials, &m_inStream, &warning);
+
+		if (!warning.empty()) {
+			if (err) {
+				(*err) += warning;
+			}
+		}
+
+		return true;
+	}
+
+	bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
+		std::vector<material_t> *materials, std::string *err,
+		const char *filename, const char *mtl_basedir,
+		bool trianglulate, bool default_vcols_fallback) {
+		attrib->vertices.clear();
+		attrib->normals.clear();
+		attrib->texcoords.clear();
+		attrib->colors.clear();
+		shapes->clear();
+
+		std::stringstream errss;
+
+		std::ifstream ifs(filename);
+		if (!ifs) {
+			errss << "Cannot open file [" << filename << "]" << std::endl;
+			if (err) {
+				(*err) = errss.str();
+			}
+			return false;
+		}
+
+		std::string baseDir = mtl_basedir ? mtl_basedir : "";
+		if (!baseDir.empty()) {
+#ifndef _WIN32
+			const char dirsep = '/';
+#else
+			const char dirsep = '\\';
+#endif
+			if (baseDir[baseDir.length() - 1] != dirsep) baseDir += dirsep;
+		}
+		MaterialFileReader matFileReader(baseDir);
+
+		return LoadObj(attrib, shapes, materials, err, &ifs, &matFileReader,
+			trianglulate, default_vcols_fallback);
+	}
+
+	bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
+		std::vector<material_t> *materials, std::string *err,
+		std::istream *inStream, MaterialReader *readMatFn /*= NULL*/,
+		bool triangulate, bool default_vcols_fallback) {
+		std::stringstream errss;
+
+		std::vector<real_t> v;
+		std::vector<real_t> vn;
+		std::vector<real_t> vt;
+		std::vector<real_t> vc;
+		std::vector<tag_t> tags;
+		std::vector<face_t> faceGroup;
+		std::vector<int> lineGroup;
+		std::string name;
+
+		// material
+		std::map<std::string, int> material_map;
+		int material = -1;
+
+		// smoothing group id
+		unsigned int current_smoothing_id =
+			0;  // Initial value. 0 means no smoothing.
+
+		int greatest_v_idx = -1;
+		int greatest_vn_idx = -1;
+		int greatest_vt_idx = -1;
+
+		shape_t shape;
+
+		bool found_all_colors = true;
+
+		size_t line_num = 0;
+		std::string linebuf;
+		while (inStream->peek() != -1) {
+			safeGetline(*inStream, linebuf);
+
+			line_num++;
+
+			// Trim newline '\r\n' or '\n'
+			if (linebuf.size() > 0) {
+				if (linebuf[linebuf.size() - 1] == '\n')
+					linebuf.erase(linebuf.size() - 1);
+			}
+			if (linebuf.size() > 0) {
+				if (linebuf[linebuf.size() - 1] == '\r')
+					linebuf.erase(linebuf.size() - 1);
+			}
+
+			// Skip if empty line.
+			if (linebuf.empty()) {
+				continue;
+			}
+
+			// Skip leading space.
+			const char *token = linebuf.c_str();
+			token += strspn(token, " \t");
+
+			assert(token);
+			if (token[0] == '\0') continue;  // empty line
+
+			if (token[0] == '#') continue;  // comment line
+
+											// vertex
+			if (token[0] == 'v' && IS_SPACE((token[1]))) {
+				token += 2;
+				real_t x, y, z;
+				real_t r, g, b;
+
+				found_all_colors &= parseVertexWithColor(&x, &y, &z, &r, &g, &b, &token);
+
+				v.push_back(x);
+				v.push_back(y);
+				v.push_back(z);
+
+				if (found_all_colors || default_vcols_fallback) {
+					vc.push_back(r);
+					vc.push_back(g);
+					vc.push_back(b);
+				}
+
+				continue;
+			}
+
+			// normal
+			if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) {
+				token += 3;
+				real_t x, y, z;
+				parseReal3(&x, &y, &z, &token);
+				vn.push_back(x);
+				vn.push_back(y);
+				vn.push_back(z);
+				continue;
+			}
+
+			// texcoord
+			if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) {
+				token += 3;
+				real_t x, y;
+				parseReal2(&x, &y, &token);
+				vt.push_back(x);
+				vt.push_back(y);
+				continue;
+			}
+
+			// line
+			if (token[0] == 'l' && IS_SPACE((token[1]))) {
+				token += 2;
+
+				line_t line_cache;
+				bool end_line_bit = 0;
+				while (!IS_NEW_LINE(token[0])) {
+					// get index from string
+					int idx;
+					fixIndex(parseInt(&token), 0, &idx);
+
+					size_t n = strspn(token, " \t\r");
+					token += n;
+
+					if (!end_line_bit) {
+						line_cache.idx0 = idx;
+					}
+					else {
+						line_cache.idx1 = idx;
+						lineGroup.push_back(line_cache.idx0);
+						lineGroup.push_back(line_cache.idx1);
+						line_cache = line_t();
+					}
+					end_line_bit = !end_line_bit;
+				}
+
+				continue;
+			}
+			// face
+			if (token[0] == 'f' && IS_SPACE((token[1]))) {
+				token += 2;
+				token += strspn(token, " \t");
+
+				face_t face;
+
+				face.smoothing_group_id = current_smoothing_id;
+				face.vertex_indices.reserve(3);
+
+				while (!IS_NEW_LINE(token[0])) {
+					vertex_index_t vi;
+					if (!parseTriple(&token, static_cast<int>(v.size() / 3),
+						static_cast<int>(vn.size() / 3),
+						static_cast<int>(vt.size() / 2), &vi)) {
+						if (err) {
+							(*err) = "Failed parse `f' line(e.g. zero value for face index).\n";
+						}
+						return false;
+					}
+
+					greatest_v_idx = greatest_v_idx > vi.v_idx ? greatest_v_idx : vi.v_idx;
+					greatest_vn_idx = greatest_vn_idx > vi.vn_idx ? greatest_vn_idx : vi.vn_idx;
+					greatest_vt_idx = greatest_vt_idx > vi.vt_idx ? greatest_vt_idx : vi.vt_idx;
+
+					face.vertex_indices.push_back(vi);
+					size_t n = strspn(token, " \t\r");
+					token += n;
+				}
+
+				// replace with emplace_back + std::move on C++11
+				faceGroup.push_back(face);
+
+				continue;
+			}
+
+			// use mtl
+			if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) {
+				token += 7;
+				std::stringstream ss;
+				ss << token;
+				std::string namebuf = ss.str();
+
+				int newMaterialId = -1;
+				if (material_map.find(namebuf) != material_map.end()) {
+					newMaterialId = material_map[namebuf];
+				}
+				else {
+					// { error!! material not found }
+				}
+
+				if (newMaterialId != material) {
+					// Create per-face material. Thus we don't add `shape` to `shapes` at
+					// this time.
+					// just clear `faceGroup` after `exportGroupsToShape()` call.
+					exportGroupsToShape(&shape, faceGroup, lineGroup, tags, material, name,
+						triangulate, v);
+					faceGroup.clear();
+					material = newMaterialId;
+				}
+
+				continue;
+			}
+
+			// load mtl
+			if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) {
+				if (readMatFn) {
+					token += 7;
+
+					std::vector<std::string> filenames;
+					SplitString(std::string(token), ' ', filenames);
+
+					if (filenames.empty()) {
+						if (err) {
+							(*err) +=
+								"WARN: Looks like empty filename for mtllib. Use default "
+								"material. \n";
+						}
+					}
+					else {
+						bool found = false;
+						for (size_t s = 0; s < filenames.size(); s++) {
+							std::string err_mtl;
+							bool ok = (*readMatFn)(filenames[s].c_str(), materials,
+								&material_map, &err_mtl);
+							if (err && (!err_mtl.empty())) {
+								(*err) += err_mtl;  // This should be warn message.
+							}
+
+							if (ok) {
+								found = true;
+								break;
+							}
+						}
+
+						if (!found) {
+							if (err) {
+								(*err) +=
+									"WARN: Failed to load material file(s). Use default "
+									"material.\n";
+							}
+						}
+					}
+				}
+
+				continue;
+			}
+
+			// group name
+			if (token[0] == 'g' && IS_SPACE((token[1]))) {
+				// flush previous face group.
+				bool ret = exportGroupsToShape(&shape, faceGroup, lineGroup, tags,
+					material, name, triangulate, v);
+				(void)ret;  // return value not used.
+
+				if (shape.mesh.indices.size() > 0) {
+					shapes->push_back(shape);
+				}
+
+				shape = shape_t();
+
+				// material = -1;
+				faceGroup.clear();
+
+				std::vector<std::string> names;
+
+				while (!IS_NEW_LINE(token[0])) {
+					std::string str = parseString(&token);
+					names.push_back(str);
+					token += strspn(token, " \t\r");  // skip tag
+				}
+
+				// names[0] must be 'g'
+
+				if (names.size() < 2) {
+					// 'g' with empty names
+					if (err) {
+						std::stringstream ss;
+						ss << "WARN: Empty group name. line: " << line_num << "\n";
+						(*err) += ss.str();
+						name = "";
+					}
+				}
+				else {
+
+					std::stringstream ss;
+					ss << names[1];
+
+					// tinyobjloader does not support multiple groups for a primitive.
+					// Currently we concatinate multiple group names with a space to get
+					// single group name.
+
+					for (size_t i = 2; i < names.size(); i++) {
+						ss << " " << names[i];
+					}
+
+					name = ss.str();
+
+				}
+
+				continue;
+			}
+
+			// object name
+			if (token[0] == 'o' && IS_SPACE((token[1]))) {
+				// flush previous face group.
+				bool ret = exportGroupsToShape(&shape, faceGroup, lineGroup, tags,
+					material, name, triangulate, v);
+				if (ret) {
+					shapes->push_back(shape);
+				}
+
+				// material = -1;
+				faceGroup.clear();
+				shape = shape_t();
+
+				// @todo { multiple object name? }
+				token += 2;
+				std::stringstream ss;
+				ss << token;
+				name = ss.str();
+
+				continue;
+			}
+
+			if (token[0] == 't' && IS_SPACE(token[1])) {
+				const int max_tag_nums = 8192;  // FIXME(syoyo): Parameterize.
+				tag_t tag;
+
+				token += 2;
+
+				tag.name = parseString(&token);
+
+				tag_sizes ts = parseTagTriple(&token);
+
+				if (ts.num_ints < 0) {
+					ts.num_ints = 0;
+				}
+				if (ts.num_ints > max_tag_nums) {
+					ts.num_ints = max_tag_nums;
+				}
+
+				if (ts.num_reals < 0) {
+					ts.num_reals = 0;
+				}
+				if (ts.num_reals > max_tag_nums) {
+					ts.num_reals = max_tag_nums;
+				}
+
+				if (ts.num_strings < 0) {
+					ts.num_strings = 0;
+				}
+				if (ts.num_strings > max_tag_nums) {
+					ts.num_strings = max_tag_nums;
+				}
+
+				tag.intValues.resize(static_cast<size_t>(ts.num_ints));
+
+				for (size_t i = 0; i < static_cast<size_t>(ts.num_ints); ++i) {
+					tag.intValues[i] = parseInt(&token);
+				}
+
+				tag.floatValues.resize(static_cast<size_t>(ts.num_reals));
+				for (size_t i = 0; i < static_cast<size_t>(ts.num_reals); ++i) {
+					tag.floatValues[i] = parseReal(&token);
+				}
+
+				tag.stringValues.resize(static_cast<size_t>(ts.num_strings));
+				for (size_t i = 0; i < static_cast<size_t>(ts.num_strings); ++i) {
+					tag.stringValues[i] = parseString(&token);
+				}
+
+				tags.push_back(tag);
+
+				continue;
+			}
+
+			if (token[0] == 's' && IS_SPACE(token[1])) {
+				// smoothing group id
+				token += 2;
+
+				// skip space.
+				token += strspn(token, " \t");  // skip space
+
+				if (token[0] == '\0') {
+					continue;
+				}
+
+				if (token[0] == '\r' || token[1] == '\n') {
+					continue;
+				}
+
+				if (strlen(token) >= 3) {
+					if (token[0] == 'o' && token[1] == 'f' && token[2] == 'f') {
+						current_smoothing_id = 0;
+					}
+				}
+				else {
+					// assume number
+					int smGroupId = parseInt(&token);
+					if (smGroupId < 0) {
+						// parse error. force set to 0.
+						// FIXME(syoyo): Report warning.
+						current_smoothing_id = 0;
+					}
+					else {
+						current_smoothing_id = static_cast<unsigned int>(smGroupId);
+					}
+				}
+
+				continue;
+			}  // smoothing group id
+
+			   // Ignore unknown command.
+		}
+
+		// not all vertices have colors, no default colors desired? -> clear colors
+		if (!found_all_colors && !default_vcols_fallback) {
+			vc.clear();
+		}
+
+		if (greatest_v_idx >= static_cast<int>(v.size() / 3))
+		{
+			if (err) {
+				std::stringstream ss;
+				ss << "WARN: Vertex indices out of bounds.\n" << std::endl;
+				(*err) += ss.str();
+			}
+		}
+		if (greatest_vn_idx >= static_cast<int>(vn.size() / 3))
+		{
+			if (err) {
+				std::stringstream ss;
+				ss << "WARN: Vertex normal indices out of bounds.\n" << std::endl;
+				(*err) += ss.str();
+			}
+		}
+		if (greatest_vt_idx >= static_cast<int>(vt.size() / 2))
+		{
+			if (err) {
+				std::stringstream ss;
+				ss << "WARN: Vertex texcoord indices out of bounds.\n" << std::endl;
+				(*err) += ss.str();
+			}
+		}
+
+		bool ret = exportGroupsToShape(&shape, faceGroup, lineGroup, tags, material,
+			name, triangulate, v);
+		// exportGroupsToShape return false when `usemtl` is called in the last
+		// line.
+		// we also add `shape` to `shapes` when `shape.mesh` has already some
+		// faces(indices)
+		if (ret || shape.mesh.indices.size()) {
+			shapes->push_back(shape);
+		}
+		faceGroup.clear();  // for safety
+
+		if (err) {
+			(*err) += errss.str();
+		}
+
+		attrib->vertices.swap(v);
+		attrib->normals.swap(vn);
+		attrib->texcoords.swap(vt);
+		attrib->colors.swap(vc);
+
+		return true;
+	}
+
+	bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
+		void *user_data /*= NULL*/,
+		MaterialReader *readMatFn /*= NULL*/,
+		std::string *err /*= NULL*/) {
+		std::stringstream errss;
+
+		// material
+		std::map<std::string, int> material_map;
+		int material_id = -1;  // -1 = invalid
+
+		std::vector<index_t> indices;
+		std::vector<material_t> materials;
+		std::vector<std::string> names;
+		names.reserve(2);
+		std::vector<const char *> names_out;
+
+		std::string linebuf;
+		while (inStream.peek() != -1) {
+			safeGetline(inStream, linebuf);
+
+			// Trim newline '\r\n' or '\n'
+			if (linebuf.size() > 0) {
+				if (linebuf[linebuf.size() - 1] == '\n')
+					linebuf.erase(linebuf.size() - 1);
+			}
+			if (linebuf.size() > 0) {
+				if (linebuf[linebuf.size() - 1] == '\r')
+					linebuf.erase(linebuf.size() - 1);
+			}
+
+			// Skip if empty line.
+			if (linebuf.empty()) {
+				continue;
+			}
+
+			// Skip leading space.
+			const char *token = linebuf.c_str();
+			token += strspn(token, " \t");
+
+			assert(token);
+			if (token[0] == '\0') continue;  // empty line
+
+			if (token[0] == '#') continue;  // comment line
+
+											// vertex
+			if (token[0] == 'v' && IS_SPACE((token[1]))) {
+				token += 2;
+				// TODO(syoyo): Support parsing vertex color extension.
+				real_t x, y, z, w;  // w is optional. default = 1.0
+				parseV(&x, &y, &z, &w, &token);
+				if (callback.vertex_cb) {
+					callback.vertex_cb(user_data, x, y, z, w);
+				}
+				continue;
+			}
+
+			// normal
+			if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) {
+				token += 3;
+				real_t x, y, z;
+				parseReal3(&x, &y, &z, &token);
+				if (callback.normal_cb) {
+					callback.normal_cb(user_data, x, y, z);
+				}
+				continue;
+			}
+
+			// texcoord
+			if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) {
+				token += 3;
+				real_t x, y, z;  // y and z are optional. default = 0.0
+				parseReal3(&x, &y, &z, &token);
+				if (callback.texcoord_cb) {
+					callback.texcoord_cb(user_data, x, y, z);
+				}
+				continue;
+			}
+
+			// face
+			if (token[0] == 'f' && IS_SPACE((token[1]))) {
+				token += 2;
+				token += strspn(token, " \t");
+
+				indices.clear();
+				while (!IS_NEW_LINE(token[0])) {
+					vertex_index_t vi = parseRawTriple(&token);
+
+					index_t idx;
+					idx.vertex_index = vi.v_idx;
+					idx.normal_index = vi.vn_idx;
+					idx.texcoord_index = vi.vt_idx;
+
+					indices.push_back(idx);
+					size_t n = strspn(token, " \t\r");
+					token += n;
+				}
+
+				if (callback.index_cb && indices.size() > 0) {
+					callback.index_cb(user_data, &indices.at(0),
+						static_cast<int>(indices.size()));
+				}
+
+				continue;
+			}
+
+			// use mtl
+			if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) {
+				token += 7;
+				std::stringstream ss;
+				ss << token;
+				std::string namebuf = ss.str();
+
+				int newMaterialId = -1;
+				if (material_map.find(namebuf) != material_map.end()) {
+					newMaterialId = material_map[namebuf];
+				}
+				else {
+					// { error!! material not found }
+				}
+
+				if (newMaterialId != material_id) {
+					material_id = newMaterialId;
+				}
+
+				if (callback.usemtl_cb) {
+					callback.usemtl_cb(user_data, namebuf.c_str(), material_id);
+				}
+
+				continue;
+			}
+
+			// load mtl
+			if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) {
+				if (readMatFn) {
+					token += 7;
+
+					std::vector<std::string> filenames;
+					SplitString(std::string(token), ' ', filenames);
+
+					if (filenames.empty()) {
+						if (err) {
+							(*err) +=
+								"WARN: Looks like empty filename for mtllib. Use default "
+								"material. \n";
+						}
+					}
+					else {
+						bool found = false;
+						for (size_t s = 0; s < filenames.size(); s++) {
+							std::string err_mtl;
+							bool ok = (*readMatFn)(filenames[s].c_str(), &materials,
+								&material_map, &err_mtl);
+							if (err && (!err_mtl.empty())) {
+								(*err) += err_mtl;  // This should be warn message.
+							}
+
+							if (ok) {
+								found = true;
+								break;
+							}
+						}
+
+						if (!found) {
+							if (err) {
+								(*err) +=
+									"WARN: Failed to load material file(s). Use default "
+									"material.\n";
+							}
+						}
+						else {
+							if (callback.mtllib_cb) {
+								callback.mtllib_cb(user_data, &materials.at(0),
+									static_cast<int>(materials.size()));
+							}
+						}
+					}
+				}
+
+				continue;
+			}
+
+			// group name
+			if (token[0] == 'g' && IS_SPACE((token[1]))) {
+				names.clear();
+
+				while (!IS_NEW_LINE(token[0])) {
+					std::string str = parseString(&token);
+					names.push_back(str);
+					token += strspn(token, " \t\r");  // skip tag
+				}
+
+				assert(names.size() > 0);
+
+				if (callback.group_cb) {
+					if (names.size() > 1) {
+						// create const char* array.
+						names_out.resize(names.size() - 1);
+						for (size_t j = 0; j < names_out.size(); j++) {
+							names_out[j] = names[j + 1].c_str();
+						}
+						callback.group_cb(user_data, &names_out.at(0),
+							static_cast<int>(names_out.size()));
+
+					}
+					else {
+						callback.group_cb(user_data, NULL, 0);
+					}
+				}
+
+				continue;
+			}
+
+			// object name
+			if (token[0] == 'o' && IS_SPACE((token[1]))) {
+				// @todo { multiple object name? }
+				token += 2;
+
+				std::stringstream ss;
+				ss << token;
+				std::string object_name = ss.str();
+
+				if (callback.object_cb) {
+					callback.object_cb(user_data, object_name.c_str());
+				}
+
+				continue;
+			}
+
+#if 0  // @todo
+			if (token[0] == 't' && IS_SPACE(token[1])) {
+				tag_t tag;
+
+				token += 2;
+				std::stringstream ss;
+				ss << token;
+				tag.name = ss.str();
+
+				token += tag.name.size() + 1;
+
+				tag_sizes ts = parseTagTriple(&token);
+
+				tag.intValues.resize(static_cast<size_t>(ts.num_ints));
+
+				for (size_t i = 0; i < static_cast<size_t>(ts.num_ints); ++i) {
+					tag.intValues[i] = atoi(token);
+					token += strcspn(token, "/ \t\r") + 1;
+				}
+
+				tag.floatValues.resize(static_cast<size_t>(ts.num_reals));
+				for (size_t i = 0; i < static_cast<size_t>(ts.num_reals); ++i) {
+					tag.floatValues[i] = parseReal(&token);
+					token += strcspn(token, "/ \t\r") + 1;
+				}
+
+				tag.stringValues.resize(static_cast<size_t>(ts.num_strings));
+				for (size_t i = 0; i < static_cast<size_t>(ts.num_strings); ++i) {
+					std::stringstream ss;
+					ss << token;
+					tag.stringValues[i] = ss.str();
+					token += tag.stringValues[i].size() + 1;
+				}
+
+				tags.push_back(tag);
+			}
+#endif
+
+			// Ignore unknown command.
+		}
+
+		if (err) {
+			(*err) += errss.str();
+		}
+
+		return true;
+	}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+}  // namespace tinyobj
+
+#endif
diff --git a/img/Lambert-Rasterize.png b/img/Lambert-Rasterize.png
new file mode 100644
index 0000000..121d8ad
Binary files /dev/null and b/img/Lambert-Rasterize.png differ
diff --git a/img/NoAA.png b/img/NoAA.png
new file mode 100644
index 0000000..8463801
Binary files /dev/null and b/img/NoAA.png differ
diff --git a/img/REFERENCE_cornell.5000samp.png b/img/REFERENCE_cornell.5000samp.png
deleted file mode 100644
index 5ceb26e..0000000
Binary files a/img/REFERENCE_cornell.5000samp.png and /dev/null differ
diff --git a/img/anti-aliasing.png b/img/anti-aliasing.png
new file mode 100644
index 0000000..556a1af
Binary files /dev/null and b/img/anti-aliasing.png differ
diff --git a/img/cornell-reflect.png b/img/cornell-reflect.png
new file mode 100644
index 0000000..117f792
Binary files /dev/null and b/img/cornell-reflect.png differ
diff --git a/img/cornell-refract.png b/img/cornell-refract.png
new file mode 100644
index 0000000..d174f0f
Binary files /dev/null and b/img/cornell-refract.png differ
diff --git a/img/iteration_bounce.png b/img/iteration_bounce.png
new file mode 100644
index 0000000..be550da
Binary files /dev/null and b/img/iteration_bounce.png differ
diff --git a/img/obj_loader.png b/img/obj_loader.png
new file mode 100644
index 0000000..b285abe
Binary files /dev/null and b/img/obj_loader.png differ
diff --git a/img/paths_bounce.png b/img/paths_bounce.png
new file mode 100644
index 0000000..e4a6c22
Binary files /dev/null and b/img/paths_bounce.png differ
diff --git a/img/pathtrace-sphere.gif b/img/pathtrace-sphere.gif
new file mode 100644
index 0000000..bc7fbfc
Binary files /dev/null and b/img/pathtrace-sphere.gif differ
diff --git a/img/pathtrace.gif b/img/pathtrace.gif
new file mode 100644
index 0000000..84c8d99
Binary files /dev/null and b/img/pathtrace.gif differ
diff --git a/img/sphere-diffuse.png b/img/sphere-diffuse.png
new file mode 100644
index 0000000..b9f4f0e
Binary files /dev/null and b/img/sphere-diffuse.png differ
diff --git a/img/sphere-reflect.png b/img/sphere-reflect.png
new file mode 100644
index 0000000..f6a66ed
Binary files /dev/null and b/img/sphere-reflect.png differ
diff --git a/img/sphere-refract.png b/img/sphere-refract.png
new file mode 100644
index 0000000..ae25232
Binary files /dev/null and b/img/sphere-refract.png differ
diff --git a/scenes/cornell.txt b/scenes/cornell.txt
index 83ff820..e2a3cd8 100644
--- a/scenes/cornell.txt
+++ b/scenes/cornell.txt
@@ -38,11 +38,21 @@ REFR        0
 REFRIOR     0
 EMITTANCE   0
 
-// Specular white
+// Refractive white
 MATERIAL 4
 RGB         .98 .98 .98
 SPECEX      0
 SPECRGB     .98 .98 .98
+REFL        0
+REFR        1
+REFRIOR     1.3
+EMITTANCE   0
+
+// Reflective white
+MATERIAL 5
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     .98 .98 .98
 REFL        1
 REFR        0
 REFRIOR     0
@@ -54,7 +64,7 @@ RES         800 800
 FOVY        45
 ITERATIONS  5000
 DEPTH       8
-FILE        cornell
+FILE        C:\Users\SIG\Documents\CUDA-Path-Tracer\img\refract2
 EYE         0.0 5 10.5
 LOOKAT      0 5 0
 UP          0 1 0
@@ -112,6 +122,30 @@ SCALE       .01 10 10
 OBJECT 6
 sphere
 material 4
-TRANS       -1 4 -1
+TRANS       0 2 0
 ROTAT       0 0 0
 SCALE       3 3 3
+
+// Sphere
+OBJECT 7
+sphere
+material 5
+TRANS       0 5 -3
+ROTAT       0 0 0
+SCALE       3 3 3
+
+// Cube
+OBJECT 8
+cube
+material 2
+TRANS       1.2 1 -2
+ROTAT       0 0 0
+SCALE       2 2 2
+
+// Cube
+OBJECT 9
+cube
+material 3
+TRANS       -1.2 1 -2
+ROTAT       0 0 0
+SCALE       2 2 2
diff --git a/scenes/cornellMesh.txt b/scenes/cornellMesh.txt
new file mode 100644
index 0000000..398db16
--- /dev/null
+++ b/scenes/cornellMesh.txt
@@ -0,0 +1,115 @@
+// Emissive material (light)
+MATERIAL 0
+RGB         1 1 1
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   5
+
+// Diffuse white
+MATERIAL 1
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Diffuse red
+MATERIAL 2
+RGB         .85 .35 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Diffuse green
+MATERIAL 3
+RGB         .35 .85 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Specular white
+MATERIAL 4
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     .98 .98 .98
+REFL        1
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Camera
+CAMERA
+RES         800 800
+FOVY        45
+ITERATIONS  5000
+DEPTH       6
+FILE        cornell
+EYE         0.0 5 10.5
+LOOKAT      0 5 0
+UP          0 1 0
+
+
+// Ceiling light
+OBJECT 0
+cube
+material 0
+TRANS       0 10 0
+ROTAT       0 0 0
+SCALE       3 .3 3
+
+// Floor
+OBJECT 1
+cube
+material 1
+TRANS       0 0 0
+ROTAT       0 0 0
+SCALE       10 .01 10
+
+// Ceiling
+OBJECT 2
+cube
+material 1
+TRANS       0 10 0
+ROTAT       0 0 90
+SCALE       .01 10 10
+
+// Back wall
+OBJECT 3
+cube
+material 1
+TRANS       0 5 -5
+ROTAT       0 90 0
+SCALE       .01 10 10
+
+// Left wall
+OBJECT 4
+cube
+material 2
+TRANS       -5 5 0
+ROTAT       0 0 0
+SCALE       .01 10 10
+
+// Right wall
+OBJECT 5
+cube
+material 3
+TRANS       5 5 0
+ROTAT       0 0 0
+SCALE       .01 10 10
+
+// Mesh
+OBJECT 6
+mesh
+material 4
+FILE        ../scenes/bunny.obj
diff --git a/scenes/cube.obj b/scenes/cube.obj
new file mode 100644
index 0000000..e355b94
--- /dev/null
+++ b/scenes/cube.obj
@@ -0,0 +1,33 @@
+# cube.obj
+#
+ 
+g cube
+ 
+v  0.0  0.0  0.0
+v  0.0  0.0  1.0
+v  0.0  1.0  0.0
+v  0.0  1.0  1.0
+v  1.0  0.0  0.0
+v  1.0  0.0  1.0
+v  1.0  1.0  0.0
+v  1.0  1.0  1.0
+
+vn  0.0  0.0  1.0
+vn  0.0  0.0 -1.0
+vn  0.0  1.0  0.0
+vn  0.0 -1.0  0.0
+vn  1.0  0.0  0.0
+vn -1.0  0.0  0.0
+ 
+f  1//2  7//2  5//2
+f  1//2  3//2  7//2 
+f  1//6  4//6  3//6 
+f  1//6  2//6  4//6 
+f  3//3  8//3  7//3 
+f  3//3  4//3  8//3 
+f  5//5  7//5  8//5 
+f  5//5  8//5  6//5 
+f  1//4  5//4  6//4 
+f  1//4  6//4  2//4 
+f  2//1  6//1  8//1 
+f  2//1  8//1  4//1 
\ No newline at end of file
diff --git a/scenes/sphere.obj b/scenes/sphere.obj
new file mode 100644
index 0000000..b4bde8b
--- /dev/null
+++ b/scenes/sphere.obj
@@ -0,0 +1,157 @@
+# Blender v2.74 (sub 0) OBJ File: ''
+# www.blender.org
+mtllib low-poly-sphere.mtl
+o sphere
+v 0.000000 0.500000 -0.000000
+v 0.250000 0.433000 -0.000000
+v 0.433000 0.250000 -0.000000
+v 0.500000 0.000000 0.000000
+v 0.433000 -0.250000 0.000000
+v 0.250000 -0.433000 0.000000
+v 0.000000 -0.500000 0.000000
+v 0.125000 0.433000 0.216500
+v 0.216500 0.250000 0.375000
+v 0.250000 0.000000 0.433000
+v 0.216500 -0.250000 0.375000
+v 0.125000 -0.433000 0.216500
+v -0.125000 0.433000 0.216500
+v -0.216500 0.250000 0.375000
+v -0.250000 0.000000 0.433000
+v -0.216500 -0.250000 0.375000
+v -0.125000 -0.433000 0.216500
+v -0.250000 0.433000 -0.000000
+v -0.433000 0.250000 -0.000000
+v -0.500000 0.000000 0.000000
+v -0.433000 -0.250000 0.000000
+v -0.250000 -0.433000 0.000000
+v -0.125000 0.433000 -0.216500
+v -0.216500 0.250000 -0.375000
+v -0.250000 -0.000000 -0.433000
+v -0.216500 -0.250000 -0.375000
+v -0.125000 -0.433000 -0.216500
+v 0.125000 0.433000 -0.216500
+v 0.216500 0.250000 -0.375000
+v 0.250000 -0.000000 -0.433000
+v 0.216500 -0.250000 -0.375000
+v 0.125000 -0.433000 -0.216500
+vt 0.000000 0.000000
+vt 0.166667 0.166667
+vt 0.000000 0.166667
+vt 0.166667 0.333333
+vt 0.000000 0.333333
+vt 0.166667 0.500000
+vt 0.000000 0.500000
+vt 0.166667 0.666667
+vt 0.000000 0.666667
+vt 0.166667 0.833333
+vt 0.000000 0.833333
+vt 0.166667 1.000000
+vt 0.166667 0.000000
+vt 0.333333 0.166667
+vt 0.333333 0.333333
+vt 0.333333 0.500000
+vt 0.333333 0.666667
+vt 0.333333 0.833333
+vt 0.333333 1.000000
+vt 0.333333 0.000000
+vt 0.500000 0.166667
+vt 0.500000 0.333333
+vt 0.500000 0.500000
+vt 0.500000 0.666667
+vt 0.500000 0.833333
+vt 0.500000 1.000000
+vt 0.500000 0.000000
+vt 0.666667 0.166667
+vt 0.666667 0.333333
+vt 0.666667 0.500000
+vt 0.666667 0.666667
+vt 0.666667 0.833333
+vt 0.666667 1.000000
+vt 0.666667 0.000000
+vt 0.833333 0.166667
+vt 0.833333 0.333333
+vt 0.833333 0.500000
+vt 0.833333 0.666667
+vt 0.833333 0.833333
+vt 0.833333 1.000000
+vt 0.833333 0.000000
+vt 1.000000 0.166667
+vt 1.000000 0.333333
+vt 1.000000 0.500000
+vt 1.000000 0.666667
+vt 1.000000 0.833333
+vt 1.000000 1.000000
+vn 0.256000 0.955300 0.147800
+vn 0.654600 0.654700 0.378000
+vn 0.843600 0.226100 0.487100
+vn 0.843600 -0.226100 0.487100
+vn 0.654600 -0.654700 0.378000
+vn 0.256000 -0.955300 0.147800
+vn 0.000000 0.955300 0.295600
+vn 0.000000 0.654700 0.755900
+vn 0.000000 0.226000 0.974100
+vn 0.000000 -0.226000 0.974100
+vn 0.000000 -0.654700 0.755900
+vn 0.000000 -0.955300 0.295600
+vn -0.256000 0.955300 0.147800
+vn -0.654600 0.654700 0.378000
+vn -0.843600 0.226100 0.487100
+vn -0.843600 -0.226100 0.487100
+vn -0.654600 -0.654700 0.378000
+vn -0.256000 -0.955300 0.147800
+vn -0.256000 0.955300 -0.147800
+vn -0.654600 0.654700 -0.378000
+vn -0.843600 0.226100 -0.487100
+vn -0.843600 -0.226100 -0.487100
+vn -0.654600 -0.654700 -0.378000
+vn -0.256000 -0.955300 -0.147800
+vn 0.000000 0.955300 -0.295600
+vn 0.000000 0.654700 -0.755900
+vn 0.000000 0.226000 -0.974100
+vn 0.000000 -0.226000 -0.974100
+vn 0.000000 -0.654700 -0.755900
+vn 0.000000 -0.955300 -0.295600
+vn 0.256000 0.955300 -0.147800
+vn 0.654600 0.654700 -0.378000
+vn 0.843600 0.226100 -0.487100
+vn 0.843600 -0.226100 -0.487100
+vn 0.654600 -0.654700 -0.378000
+vn 0.256000 -0.955300 -0.147800
+usemtl sphereDefault
+s off
+f 1/1/1 8/2/1 2/3/1
+f 2/3/2 8/2/2 9/4/2 3/5/2
+f 3/5/3 9/4/3 10/6/3 4/7/3
+f 4/7/4 10/6/4 11/8/4 5/9/4
+f 5/9/5 11/8/5 12/10/5 6/11/5
+f 6/11/6 12/10/6 7/12/6
+f 1/13/7 13/14/7 8/2/7
+f 8/2/8 13/14/8 14/15/8 9/4/8
+f 9/4/9 14/15/9 15/16/9 10/6/9
+f 10/6/10 15/16/10 16/17/10 11/8/10
+f 11/8/11 16/17/11 17/18/11 12/10/11
+f 12/10/12 17/18/12 7/19/12
+f 1/20/13 18/21/13 13/14/13
+f 13/14/14 18/21/14 19/22/14 14/15/14
+f 14/15/15 19/22/15 20/23/15 15/16/15
+f 15/16/16 20/23/16 21/24/16 16/17/16
+f 16/17/17 21/24/17 22/25/17 17/18/17
+f 17/18/18 22/25/18 7/26/18
+f 1/27/19 23/28/19 18/21/19
+f 18/21/20 23/28/20 24/29/20 19/22/20
+f 19/22/21 24/29/21 25/30/21 20/23/21
+f 20/23/22 25/30/22 26/31/22 21/24/22
+f 21/24/23 26/31/23 27/32/23 22/25/23
+f 22/25/24 27/32/24 7/33/24
+f 1/34/25 28/35/25 23/28/25
+f 23/28/26 28/35/26 29/36/26 24/29/26
+f 24/29/27 29/36/27 30/37/27 25/30/27
+f 25/30/28 30/37/28 31/38/28 26/31/28
+f 26/31/29 31/38/29 32/39/29 27/32/29
+f 27/32/30 32/39/30 7/40/30
+f 1/41/31 2/42/31 28/35/31
+f 28/35/32 2/42/32 3/43/32 29/36/32
+f 29/36/33 3/43/33 4/44/33 30/37/33
+f 30/37/34 4/44/34 5/45/34 31/38/34
+f 31/38/35 5/45/35 6/46/35 32/39/35
+f 32/39/36 6/46/36 7/47/36
\ No newline at end of file
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a1cb3fb..68832d9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,23 +1,23 @@
-set(SOURCE_FILES
-    "stb.cpp"
-    "image.cpp"
-    "image.h"
-    "interactions.h"
-    "intersections.h"
-    "glslUtility.hpp"
-    "glslUtility.cpp"
-    "pathtrace.cu"
-    "pathtrace.h"
-    "scene.cpp"
-    "scene.h"
-    "sceneStructs.h"
-    "preview.h"
-    "preview.cpp"
-    "utilities.cpp"
-    "utilities.h"
-    )
-
-cuda_add_library(src
-    ${SOURCE_FILES}
-    OPTIONS -arch=sm_20
-    )
+set(SOURCE_FILES
+    "stb.cpp"
+    "image.cpp"
+    "image.h"
+    "interactions.h"
+    "intersections.h"
+    "glslUtility.hpp"
+    "glslUtility.cpp"
+    "pathtrace.cu"
+    "pathtrace.h"
+    "scene.cpp"
+    "scene.h"
+    "sceneStructs.h"
+    "preview.h"
+    "preview.cpp"
+    "utilities.cpp"
+    "utilities.h"
+    )
+
+cuda_add_library(src
+    ${SOURCE_FILES}
+    OPTIONS -arch=sm_30
+    )
diff --git a/src/interactions.h b/src/interactions.h
index 5ce3628..e231e29 100644
--- a/src/interactions.h
+++ b/src/interactions.h
@@ -1,79 +1,105 @@
-#pragma once
-
-#include "intersections.h"
-
-// CHECKITOUT
-/**
- * Computes a cosine-weighted random direction in a hemisphere.
- * Used for diffuse lighting.
- */
-__host__ __device__
-glm::vec3 calculateRandomDirectionInHemisphere(
-        glm::vec3 normal, thrust::default_random_engine &rng) {
-    thrust::uniform_real_distribution<float> u01(0, 1);
-
-    float up = sqrt(u01(rng)); // cos(theta)
-    float over = sqrt(1 - up * up); // sin(theta)
-    float around = u01(rng) * TWO_PI;
-
-    // Find a direction that is not the normal based off of whether or not the
-    // normal's components are all equal to sqrt(1/3) or whether or not at
-    // least one component is less than sqrt(1/3). Learned this trick from
-    // Peter Kutz.
-
-    glm::vec3 directionNotNormal;
-    if (abs(normal.x) < SQRT_OF_ONE_THIRD) {
-        directionNotNormal = glm::vec3(1, 0, 0);
-    } else if (abs(normal.y) < SQRT_OF_ONE_THIRD) {
-        directionNotNormal = glm::vec3(0, 1, 0);
-    } else {
-        directionNotNormal = glm::vec3(0, 0, 1);
-    }
-
-    // Use not-normal direction to generate two perpendicular directions
-    glm::vec3 perpendicularDirection1 =
-        glm::normalize(glm::cross(normal, directionNotNormal));
-    glm::vec3 perpendicularDirection2 =
-        glm::normalize(glm::cross(normal, perpendicularDirection1));
-
-    return up * normal
-        + cos(around) * over * perpendicularDirection1
-        + sin(around) * over * perpendicularDirection2;
-}
-
-/**
- * Scatter a ray with some probabilities according to the material properties.
- * For example, a diffuse surface scatters in a cosine-weighted hemisphere.
- * A perfect specular surface scatters in the reflected ray direction.
- * In order to apply multiple effects to one surface, probabilistically choose
- * between them.
- * 
- * The visual effect you want is to straight-up add the diffuse and specular
- * components. You can do this in a few ways. This logic also applies to
- * combining other types of materias (such as refractive).
- * 
- * - Always take an even (50/50) split between a each effect (a diffuse bounce
- *   and a specular bounce), but divide the resulting color of either branch
- *   by its probability (0.5), to counteract the chance (0.5) of the branch
- *   being taken.
- *   - This way is inefficient, but serves as a good starting point - it
- *     converges slowly, especially for pure-diffuse or pure-specular.
- * - Pick the split based on the intensity of each material color, and divide
- *   branch result by that branch's probability (whatever probability you use).
- *
- * This method applies its changes to the Ray parameter `ray` in place.
- * It also modifies the color `color` of the ray in place.
- *
- * You may need to change the parameter list for your purposes!
- */
-__host__ __device__
-void scatterRay(
-		PathSegment & pathSegment,
-        glm::vec3 intersect,
-        glm::vec3 normal,
-        const Material &m,
-        thrust::default_random_engine &rng) {
-    // TODO: implement this.
-    // A basic implementation of pure-diffuse shading will just call the
-    // calculateRandomDirectionInHemisphere defined above.
-}
+#pragma once
+
+#include "intersections.h"
+
+// CHECKITOUT
+/**
+ * Computes a cosine-weighted random direction in a hemisphere.
+ * Used for diffuse lighting.
+ */
+__host__ __device__
+glm::vec3 calculateRandomDirectionInHemisphere(
+        glm::vec3 normal, thrust::default_random_engine &rng) {
+    thrust::uniform_real_distribution<float> u01(0, 1);
+
+    float up = sqrt(u01(rng)); // cos(theta)
+    float over = sqrt(1 - up * up); // sin(theta)
+    float around = u01(rng) * TWO_PI;
+
+    // Find a direction that is not the normal based off of whether or not the
+    // normal's components are all equal to sqrt(1/3) or whether or not at
+    // least one component is less than sqrt(1/3). Learned this trick from
+    // Peter Kutz.
+
+    glm::vec3 directionNotNormal;
+    if (abs(normal.x) < SQRT_OF_ONE_THIRD) {
+        directionNotNormal = glm::vec3(1, 0, 0);
+    } else if (abs(normal.y) < SQRT_OF_ONE_THIRD) {
+        directionNotNormal = glm::vec3(0, 1, 0);
+    } else {
+        directionNotNormal = glm::vec3(0, 0, 1);
+    }
+
+    // Use not-normal direction to generate two perpendicular directions
+    glm::vec3 perpendicularDirection1 =
+        glm::normalize(glm::cross(normal, directionNotNormal));
+    glm::vec3 perpendicularDirection2 =
+        glm::normalize(glm::cross(normal, perpendicularDirection1));
+
+    return up * normal
+        + cos(around) * over * perpendicularDirection1
+        + sin(around) * over * perpendicularDirection2;
+}
+
+/**
+ * Scatter a ray with some probabilities according to the material properties.
+ * For example, a diffuse surface scatters in a cosine-weighted hemisphere.
+ * A perfect specular surface scatters in the reflected ray direction.
+ * In order to apply multiple effects to one surface, probabilistically choose
+ * between them.
+ * 
+ * The visual effect you want is to straight-up add the diffuse and specular
+ * components. You can do this in a few ways. This logic also applies to
+ * combining other types of materias (such as refractive).
+ * 
+ * - Always take an even (50/50) split between a each effect (a diffuse bounce
+ *   and a specular bounce), but divide the resulting color of either branch
+ *   by its probability (0.5), to counteract the chance (0.5) of the branch
+ *   being taken.
+ *   - This way is inefficient, but serves as a good starting point - it
+ *     converges slowly, especially for pure-diffuse or pure-specular.
+ * - Pick the split based on the intensity of each material color, and divide
+ *   branch result by that branch's probability (whatever probability you use).
+ *
+ * This method applies its changes to the Ray parameter `ray` in place.
+ * It also modifies the color `color` of the ray in place.
+ *
+ * You may need to change the parameter list for your purposes!
+ */
+__host__ __device__
+void scatterRay(
+		PathSegment & pathSegment,
+        glm::vec3 intersect,
+        glm::vec3 normal,
+        const Material &m,
+        thrust::default_random_engine &rng) {
+    // TODO: implement this.
+    // A basic implementation of pure-diffuse shading will just call the
+    // calculateRandomDirectionInHemisphere defined above.
+	
+	thrust::uniform_real_distribution<float> randf(0, 1);
+	//randf(rng) > 0.5
+	glm::vec3 direction;
+	if (m.hasReflective ) { // Reflect
+		direction = glm::reflect(pathSegment.ray.direction, normal);
+		pathSegment.color *= m.specular.color;
+	} else if (m.hasRefractive ) { // Refract
+		direction = glm::reflect(pathSegment.ray.direction, normal);
+		// Schlick's approximation for Fresnel reflection
+		float cos = glm::dot(normal, direction) / (glm::length(normal) * glm::length(direction));
+		float Ro = ((1 - m.indexOfRefraction) / (1 + m.indexOfRefraction)) * ((1 - m.indexOfRefraction) / (1 + m.indexOfRefraction));
+		float R = Ro + (1 - Ro) * (1 - cos) * (1 - cos) * (1 - cos) * (1 - cos) * (1 - cos);
+		if (randf(rng) > R) {
+			bool outside = glm::dot(normal, pathSegment.ray.direction) >= 0;
+			direction = glm::refract(pathSegment.ray.direction,
+				outside ? -normal : normal,
+				outside ? m.indexOfRefraction : 1.f / m.indexOfRefraction);
+		}
+	} else { // Diffuse
+		direction = calculateRandomDirectionInHemisphere(normal, rng);
+		pathSegment.color *= m.color;
+	}
+	pathSegment.ray.direction = direction;
+	pathSegment.ray.origin = intersect + (0.001f) * pathSegment.ray.direction;
+	--pathSegment.remainingBounces;
+}
diff --git a/src/intersections.h b/src/intersections.h
index 6f23872..deadd51 100644
--- a/src/intersections.h
+++ b/src/intersections.h
@@ -2,6 +2,7 @@
 
 #include <glm/glm.hpp>
 #include <glm/gtx/intersect.hpp>
+#include <glm/gtx/normal.hpp>
 
 #include "sceneStructs.h"
 #include "utilities.h"
@@ -89,6 +90,42 @@ __host__ __device__ float boxIntersectionTest(Geom box, Ray r,
     return -1;
 }
 
+#if MESH_BOX
+__host__ __device__ float boxIntersectionTest(const Ray &r, glm::vec3 &intersectionPoint,
+	glm::vec3 &normal, bool &outside, glm::vec3 boxMin, glm::vec3 boxMax) {
+	float tmin, tmax, tymin, tymax, tzmin, tzmax;
+	glm::vec3 invdir = 1.0f / r.direction;
+	int sign[3] = { invdir.x < 0, invdir.y < 0, invdir.z < 0 };
+	glm::vec3 bounds[2] = { boxMin, boxMax };
+
+	tmin = (bounds[sign[0]].x - r.origin.x) * invdir.x;
+	tmax = (bounds[1 - sign[0]].x - r.origin.x) * invdir.x;
+	tymin = (bounds[sign[1]].y - r.origin.y) * invdir.y;
+	tymax = (bounds[1 - sign[1]].y - r.origin.y) * invdir.y;
+
+	if ((tmin > tymax) || (tymin > tmax))
+		return -1;
+	if (tymin > tmin)
+		tmin = tymin;
+	if (tymax < tmax)
+		tmax = tymax;
+
+	tzmin = (bounds[sign[2]].z - r.origin.z) * invdir.z;
+	tzmax = (bounds[1 - sign[2]].z - r.origin.z) * invdir.z;
+
+	if ((tmin > tzmax) || (tzmin > tmax))
+		return -1;
+	if (tzmin > tmin)
+		tmin = tzmin;
+	if (tzmax < tmax)
+		tmax = tzmax;
+
+	intersectionPoint = r.origin + r.direction * tmin;
+
+	return glm::length(r.origin - intersectionPoint);
+}
+#endif
+
 // CHECKITOUT
 /**
  * Test intersection between a ray and a transformed sphere. Untransformed,
@@ -142,3 +179,62 @@ __host__ __device__ float sphereIntersectionTest(Geom sphere, Ray r,
 
     return glm::length(r.origin - intersectionPoint);
 }
+
+__host__ __device__ float triangleIntersectionTest(Triangle triangle, Ray r,
+		glm::vec3 &intersectionPoint, glm::vec3 &normal, bool &outside) {
+
+	glm::vec3 bary;
+	bool success = glm::intersectRayTriangle(r.origin, r.direction, triangle.v0, triangle.v1, triangle.v2, bary);
+	if (!success) {
+		return -1;
+	}
+
+	float t = bary.z;
+	bary.z = 1.0f - bary.x - bary.y;
+
+	intersectionPoint = triangle.v0 * bary.x + triangle.v1 * bary.y + triangle.v2 * bary.z;
+	normal = glm::triangleNormal(triangle.v0, triangle.v1, triangle.v2);
+
+	return t;
+}
+
+/**
+* Test intersection between a ray and a triangle mesh.
+*
+* @param intersectionPoint  Output parameter for point of intersection.
+* @param normal             Output parameter for surface normal.
+* @param outside            Output param for whether the ray came from outside.
+* @return                   Ray parameter `t` value. -1 if no intersection.
+*/
+__host__ __device__ float meshIntersectionTest(Ray r, Triangle* triangles, int tri_size,
+	glm::vec3 &intersectionPoint, glm::vec3 &normal, bool &outside
+#if MESH_BOX
+	, glm::vec3 boxMin, glm::vec3 boxMax
+#endif
+) {
+	// TODO: calculate intersections using glm::intersectRayTriangle and hierarchical data structure
+	float t = -1;
+#if MESH_BOX
+	t = boxIntersectionTest(r, intersectionPoint, normal, outside, boxMin, boxMax);
+	if (t < 0) {
+		return -1;
+	}
+	t = -1;
+#endif
+
+	glm::vec3 tempIntersection;
+	glm::vec3 tempNormal;
+	float newt;
+	for (int i = 0; i < tri_size; ++i) {
+		newt = triangleIntersectionTest(triangles[i], r, tempIntersection, tempNormal, outside);
+		//glm::vec3 bary;
+		//bool success = glm::intersectRayTriangle(r.origin, r.direction, triangles[i].v0, triangles[i].v1, triangles[i].v2, bary);
+		if (newt >= 0 && (t < 0 || newt < t)) {
+			t = newt;
+			intersectionPoint = tempIntersection;
+			normal = tempNormal;
+		}
+	}
+	outside = true;
+	return t;
+}
diff --git a/src/main.cpp b/src/main.cpp
index fe8e85e..f26599e 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,5 +1,6 @@
 #include "main.h"
 #include "preview.h"
+#include "performance.h"
 #include <cstring>
 
 static std::string startTimeString;
@@ -98,6 +99,14 @@ void saveImage() {
     //img.saveHDR(filename);  // Save a Radiance HDR file
 }
 
+using Performance::PerformanceTimer;
+PerformanceTimer& timer()
+{
+	static PerformanceTimer timer;
+	return timer;
+}
+
+float totalTime = 0.0f;
 void runCuda() {
     if (camchanged) {
         iteration = 0;
@@ -134,8 +143,14 @@ void runCuda() {
 
         // execute the kernel
         int frame = 0;
+		/*if(iteration < 1000)
+			timer().startCpuTimer();*/
         pathtrace(pbo_dptr, frame, iteration);
-
+		/*if (iteration <= 1000) {
+			timer().endCpuTimer();
+			totalTime += timer().getCpuElapsedTimeForPreviousOperation();
+			cout << totalTime << endl;
+		}*/
         // unmap buffer object
         cudaGLUnmapBufferObject(pbo);
     } else {
diff --git a/src/pathtrace.cu b/src/pathtrace.cu
index c1ec122..d9e5105 100644
--- a/src/pathtrace.cu
+++ b/src/pathtrace.cu
@@ -1,393 +1,471 @@
-#include <cstdio>
-#include <cuda.h>
-#include <cmath>
-#include <thrust/execution_policy.h>
-#include <thrust/random.h>
-#include <thrust/remove.h>
-
-#include "sceneStructs.h"
-#include "scene.h"
-#include "glm/glm.hpp"
-#include "glm/gtx/norm.hpp"
-#include "utilities.h"
-#include "pathtrace.h"
-#include "intersections.h"
-#include "interactions.h"
-
-#define ERRORCHECK 1
-
-#define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
-#define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__)
-void checkCUDAErrorFn(const char *msg, const char *file, int line) {
-#if ERRORCHECK
-    cudaDeviceSynchronize();
-    cudaError_t err = cudaGetLastError();
-    if (cudaSuccess == err) {
-        return;
-    }
-
-    fprintf(stderr, "CUDA error");
-    if (file) {
-        fprintf(stderr, " (%s:%d)", file, line);
-    }
-    fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err));
-#  ifdef _WIN32
-    getchar();
-#  endif
-    exit(EXIT_FAILURE);
-#endif
-}
-
-__host__ __device__
-thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) {
-    int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index);
-    return thrust::default_random_engine(h);
-}
-
-//Kernel that writes the image to the OpenGL PBO directly.
-__global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution,
-        int iter, glm::vec3* image) {
-    int x = (blockIdx.x * blockDim.x) + threadIdx.x;
-    int y = (blockIdx.y * blockDim.y) + threadIdx.y;
-
-    if (x < resolution.x && y < resolution.y) {
-        int index = x + (y * resolution.x);
-        glm::vec3 pix = image[index];
-
-        glm::ivec3 color;
-        color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255);
-        color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255);
-        color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255);
-
-        // Each thread writes one pixel location in the texture (textel)
-        pbo[index].w = 0;
-        pbo[index].x = color.x;
-        pbo[index].y = color.y;
-        pbo[index].z = color.z;
-    }
-}
-
-static Scene * hst_scene = NULL;
-static glm::vec3 * dev_image = NULL;
-static Geom * dev_geoms = NULL;
-static Material * dev_materials = NULL;
-static PathSegment * dev_paths = NULL;
-static ShadeableIntersection * dev_intersections = NULL;
-// TODO: static variables for device memory, any extra info you need, etc
-// ...
-
-void pathtraceInit(Scene *scene) {
-    hst_scene = scene;
-    const Camera &cam = hst_scene->state.camera;
-    const int pixelcount = cam.resolution.x * cam.resolution.y;
-
-    cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3));
-    cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3));
-
-  	cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment));
-
-  	cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom));
-  	cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice);
-
-  	cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material));
-  	cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice);
-
-  	cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection));
-  	cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
-
-    // TODO: initialize any extra device memeory you need
-
-    checkCUDAError("pathtraceInit");
-}
-
-void pathtraceFree() {
-    cudaFree(dev_image);  // no-op if dev_image is null
-  	cudaFree(dev_paths);
-  	cudaFree(dev_geoms);
-  	cudaFree(dev_materials);
-  	cudaFree(dev_intersections);
-    // TODO: clean up any extra device memory you created
-
-    checkCUDAError("pathtraceFree");
-}
-
-/**
-* Generate PathSegments with rays from the camera through the screen into the
-* scene, which is the first bounce of rays.
-*
-* Antialiasing - add rays for sub-pixel sampling
-* motion blur - jitter rays "in time"
-* lens effect - jitter ray origin positions based on a lens
-*/
-__global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, PathSegment* pathSegments)
-{
-	int x = (blockIdx.x * blockDim.x) + threadIdx.x;
-	int y = (blockIdx.y * blockDim.y) + threadIdx.y;
-
-	if (x < cam.resolution.x && y < cam.resolution.y) {
-		int index = x + (y * cam.resolution.x);
-		PathSegment & segment = pathSegments[index];
-
-		segment.ray.origin = cam.position;
-    segment.color = glm::vec3(1.0f, 1.0f, 1.0f);
-
-		// TODO: implement antialiasing by jittering the ray
-		segment.ray.direction = glm::normalize(cam.view
-			- cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f)
-			- cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f)
-			);
-
-		segment.pixelIndex = index;
-		segment.remainingBounces = traceDepth;
-	}
-}
-
-// TODO:
-// computeIntersections handles generating ray intersections ONLY.
-// Generating new rays is handled in your shader(s).
-// Feel free to modify the code below.
-__global__ void computeIntersections(
-	int depth
-	, int num_paths
-	, PathSegment * pathSegments
-	, Geom * geoms
-	, int geoms_size
-	, ShadeableIntersection * intersections
-	)
-{
-	int path_index = blockIdx.x * blockDim.x + threadIdx.x;
-
-	if (path_index < num_paths)
-	{
-		PathSegment pathSegment = pathSegments[path_index];
-
-		float t;
-		glm::vec3 intersect_point;
-		glm::vec3 normal;
-		float t_min = FLT_MAX;
-		int hit_geom_index = -1;
-		bool outside = true;
-
-		glm::vec3 tmp_intersect;
-		glm::vec3 tmp_normal;
-
-		// naive parse through global geoms
-
-		for (int i = 0; i < geoms_size; i++)
-		{
-			Geom & geom = geoms[i];
-
-			if (geom.type == CUBE)
-			{
-				t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
-			}
-			else if (geom.type == SPHERE)
-			{
-				t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
-			}
-			// TODO: add more intersection tests here... triangle? metaball? CSG?
-
-			// Compute the minimum t from the intersection tests to determine what
-			// scene geometry object was hit first.
-			if (t > 0.0f && t_min > t)
-			{
-				t_min = t;
-				hit_geom_index = i;
-				intersect_point = tmp_intersect;
-				normal = tmp_normal;
-			}
-		}
-
-		if (hit_geom_index == -1)
-		{
-			intersections[path_index].t = -1.0f;
-		}
-		else
-		{
-			//The ray hits something
-			intersections[path_index].t = t_min;
-			intersections[path_index].materialId = geoms[hit_geom_index].materialid;
-			intersections[path_index].surfaceNormal = normal;
-		}
-	}
-}
-
-// LOOK: "fake" shader demonstrating what you might do with the info in
-// a ShadeableIntersection, as well as how to use thrust's random number
-// generator. Observe that since the thrust random number generator basically
-// adds "noise" to the iteration, the image should start off noisy and get
-// cleaner as more iterations are computed.
-//
-// Note that this shader does NOT do a BSDF evaluation!
-// Your shaders should handle that - this can allow techniques such as
-// bump mapping.
-__global__ void shadeFakeMaterial (
-  int iter
-  , int num_paths
-	, ShadeableIntersection * shadeableIntersections
-	, PathSegment * pathSegments
-	, Material * materials
-	)
-{
-  int idx = blockIdx.x * blockDim.x + threadIdx.x;
-  if (idx < num_paths)
-  {
-    ShadeableIntersection intersection = shadeableIntersections[idx];
-    if (intersection.t > 0.0f) { // if the intersection exists...
-      // Set up the RNG
-      // LOOK: this is how you use thrust's RNG! Please look at
-      // makeSeededRandomEngine as well.
-      thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0);
-      thrust::uniform_real_distribution<float> u01(0, 1);
-
-      Material material = materials[intersection.materialId];
-      glm::vec3 materialColor = material.color;
-
-      // If the material indicates that the object was a light, "light" the ray
-      if (material.emittance > 0.0f) {
-        pathSegments[idx].color *= (materialColor * material.emittance);
-      }
-      // Otherwise, do some pseudo-lighting computation. This is actually more
-      // like what you would expect from shading in a rasterizer like OpenGL.
-      // TODO: replace this! you should be able to start with basically a one-liner
-      else {
-        float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f));
-        pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f;
-        pathSegments[idx].color *= u01(rng); // apply some noise because why not
-      }
-    // If there was no intersection, color the ray black.
-    // Lots of renderers use 4 channel color, RGBA, where A = alpha, often
-    // used for opacity, in which case they can indicate "no opacity".
-    // This can be useful for post-processing and image compositing.
-    } else {
-      pathSegments[idx].color = glm::vec3(0.0f);
-    }
-  }
-}
-
-// Add the current iteration's output to the overall image
-__global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterationPaths)
-{
-	int index = (blockIdx.x * blockDim.x) + threadIdx.x;
-
-	if (index < nPaths)
-	{
-		PathSegment iterationPath = iterationPaths[index];
-		image[iterationPath.pixelIndex] += iterationPath.color;
-	}
-}
-
-/**
- * Wrapper for the __global__ call that sets up the kernel calls and does a ton
- * of memory management
- */
-void pathtrace(uchar4 *pbo, int frame, int iter) {
-    const int traceDepth = hst_scene->state.traceDepth;
-    const Camera &cam = hst_scene->state.camera;
-    const int pixelcount = cam.resolution.x * cam.resolution.y;
-
-	// 2D block for generating ray from camera
-    const dim3 blockSize2d(8, 8);
-    const dim3 blocksPerGrid2d(
-            (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
-            (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);
-
-	// 1D block for path tracing
-	const int blockSize1d = 128;
-
-    ///////////////////////////////////////////////////////////////////////////
-
-    // Recap:
-    // * Initialize array of path rays (using rays that come out of the camera)
-    //   * You can pass the Camera object to that kernel.
-    //   * Each path ray must carry at minimum a (ray, color) pair,
-    //   * where color starts as the multiplicative identity, white = (1, 1, 1).
-    //   * This has already been done for you.
-    // * For each depth:
-    //   * Compute an intersection in the scene for each path ray.
-    //     A very naive version of this has been implemented for you, but feel
-    //     free to add more primitives and/or a better algorithm.
-    //     Currently, intersection distance is recorded as a parametric distance,
-    //     t, or a "distance along the ray." t = -1.0 indicates no intersection.
-    //     * Color is attenuated (multiplied) by reflections off of any object
-    //   * TODO: Stream compact away all of the terminated paths.
-    //     You may use either your implementation or `thrust::remove_if` or its
-    //     cousins.
-    //     * Note that you can't really use a 2D kernel launch any more - switch
-    //       to 1D.
-    //   * TODO: Shade the rays that intersected something or didn't bottom out.
-    //     That is, color the ray by performing a color computation according
-    //     to the shader, then generate a new ray to continue the ray path.
-    //     We recommend just updating the ray's PathSegment in place.
-    //     Note that this step may come before or after stream compaction,
-    //     since some shaders you write may also cause a path to terminate.
-    // * Finally, add this iteration's results to the image. This has been done
-    //   for you.
-
-    // TODO: perform one iteration of path tracing
-
-	generateRayFromCamera <<<blocksPerGrid2d, blockSize2d >>>(cam, iter, traceDepth, dev_paths);
-	checkCUDAError("generate camera ray");
-
-	int depth = 0;
-	PathSegment* dev_path_end = dev_paths + pixelcount;
-	int num_paths = dev_path_end - dev_paths;
-
-	// --- PathSegment Tracing Stage ---
-	// Shoot ray into scene, bounce between objects, push shading chunks
-
-  bool iterationComplete = false;
-	while (!iterationComplete) {
-
-	// clean shading chunks
-	cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
-
-	// tracing
-	dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
-	computeIntersections <<<numblocksPathSegmentTracing, blockSize1d>>> (
-		depth
-		, num_paths
-		, dev_paths
-		, dev_geoms
-		, hst_scene->geoms.size()
-		, dev_intersections
-		);
-	checkCUDAError("trace one bounce");
-	cudaDeviceSynchronize();
-	depth++;
-
-
-	// TODO:
-	// --- Shading Stage ---
-	// Shade path segments based on intersections and generate new rays by
-  // evaluating the BSDF.
-  // Start off with just a big kernel that handles all the different
-  // materials you have in the scenefile.
-  // TODO: compare between directly shading the path segments and shading
-  // path segments that have been reshuffled to be contiguous in memory.
-
-  shadeFakeMaterial<<<numblocksPathSegmentTracing, blockSize1d>>> (
-    iter,
-    num_paths,
-    dev_intersections,
-    dev_paths,
-    dev_materials
-  );
-  iterationComplete = true; // TODO: should be based off stream compaction results.
-	}
-
-  // Assemble this iteration and apply it to the image
-  dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d;
-	finalGather<<<numBlocksPixels, blockSize1d>>>(num_paths, dev_image, dev_paths);
-
-    ///////////////////////////////////////////////////////////////////////////
-
-    // Send results to OpenGL buffer for rendering
-    sendImageToPBO<<<blocksPerGrid2d, blockSize2d>>>(pbo, cam.resolution, iter, dev_image);
-
-    // Retrieve image from GPU
-    cudaMemcpy(hst_scene->state.image.data(), dev_image,
-            pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost);
-
-    checkCUDAError("pathtrace");
-}
+#include <cstdio>
+#include <cuda.h>
+#include <cmath>
+#include <thrust/execution_policy.h>
+#include <thrust/random.h>
+#include <thrust/remove.h>
+#include <thrust/partition.h>
+
+#include "sceneStructs.h"
+#include "scene.h"
+#include "glm/glm.hpp"
+#include "glm/gtx/norm.hpp"
+#include "utilities.h"
+#include "pathtrace.h"
+#include "intersections.h"
+#include "interactions.h"
+#include "performance.h"
+
+#define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
+#define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__)
+void checkCUDAErrorFn(const char *msg, const char *file, int line) {
+#if ERRORCHECK
+    cudaDeviceSynchronize();
+    cudaError_t err = cudaGetLastError();
+    if (cudaSuccess == err) {
+        return;
+    }
+
+    fprintf(stderr, "CUDA error");
+    if (file) {
+        fprintf(stderr, " (%s:%d)", file, line);
+    }
+    fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err));
+#  ifdef _WIN32
+    getchar();
+#  endif
+    exit(EXIT_FAILURE);
+#endif
+}
+
+__host__ __device__
+thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) {
+    int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index);
+    return thrust::default_random_engine(h);
+}
+
+//Kernel that writes the image to the OpenGL PBO directly.
+__global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution,
+        int iter, glm::vec3* image) {
+    int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+    int y = (blockIdx.y * blockDim.y) + threadIdx.y;
+
+    if (x < resolution.x && y < resolution.y) {
+        int index = x + (y * resolution.x);
+        glm::vec3 pix = image[index];
+
+        glm::ivec3 color;
+        color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255);
+        color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255);
+        color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255);
+
+        // Each thread writes one pixel location in the texture (textel)
+        pbo[index].w = 0;
+        pbo[index].x = color.x;
+        pbo[index].y = color.y;
+        pbo[index].z = color.z;
+    }
+}
+
+static Scene * hst_scene = NULL;
+static glm::vec3 * dev_image = NULL;
+static Geom * dev_geoms = NULL;
+static Material * dev_materials = NULL;
+static PathSegment * dev_paths = NULL;
+static ShadeableIntersection * dev_intersections = NULL;
+static ShadeableIntersection * dev_cache_intersect = NULL;
+static Triangle * dev_triangles = NULL;
+// TODO: static variables for device memory, any extra info you need, etc
+// ...
+
+void pathtraceInit(Scene *scene) {
+    hst_scene = scene;
+    const Camera &cam = hst_scene->state.camera;
+    const int pixelcount = cam.resolution.x * cam.resolution.y;
+
+    cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3));
+    cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3));
+
+  	cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment));
+
+  	cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom));
+  	cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice);
+
+  	cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material));
+  	cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice);
+
+  	cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection));
+  	cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
+
+	cudaMalloc(&dev_cache_intersect, pixelcount * sizeof(ShadeableIntersection));
+
+	cudaMalloc(&dev_triangles, scene->triangles.size() * sizeof(Triangle));
+	cudaMemcpy(dev_triangles, scene->triangles.data(), scene->triangles.size() * sizeof(Triangle), cudaMemcpyHostToDevice);
+
+    // TODO: initialize any extra device memory you need
+
+    checkCUDAError("pathtraceInit");
+}
+
+void pathtraceFree() {
+    cudaFree(dev_image);  // no-op if dev_image is null
+  	cudaFree(dev_paths);
+  	cudaFree(dev_geoms);
+  	cudaFree(dev_materials);
+  	cudaFree(dev_intersections);
+	cudaFree(dev_cache_intersect);
+	cudaFree(dev_triangles);
+    // TODO: clean up any extra device memory you created
+
+    checkCUDAError("pathtraceFree");
+}
+
+/**
+* Generate PathSegments with rays from the camera through the screen into the
+* scene, which is the first bounce of rays.
+*
+* Antialiasing - add rays for sub-pixel sampling
+* motion blur - jitter rays "in time"
+* lens effect - jitter ray origin positions based on a lens
+*/
+__global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, PathSegment* pathSegments)
+{
+	int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+	int y = (blockIdx.y * blockDim.y) + threadIdx.y;
+
+	if (x < cam.resolution.x && y < cam.resolution.y) {
+		int index = x + (y * cam.resolution.x);
+		PathSegment & segment = pathSegments[index];
+
+#if ANTI_ALIAS
+		thrust::default_random_engine rng = makeSeededRandomEngine(iter, x, pathSegments[index].remainingBounces);
+		thrust::uniform_real_distribution<float> randf(0, 1);
+#endif
+		segment.ray.origin = cam.position;
+		segment.color = glm::vec3(1.0f, 1.0f, 1.0f);
+
+		// TODO: implement antialiasing by jittering the ray
+		segment.ray.direction = glm::normalize(cam.view
+#if ANTI_ALIAS
+			- cam.right * cam.pixelLength.x * ((float)(x + randf(rng)) - (float)cam.resolution.x * 0.5f)
+			- cam.up * cam.pixelLength.y * ((float)(y + randf(rng)) - (float)cam.resolution.y * 0.5f)
+#else
+			- cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f)
+			- cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f)
+#endif
+			);
+
+		segment.pixelIndex = index;
+		segment.remainingBounces = traceDepth;
+	}
+}
+
+// TODO:
+// computeIntersections handles generating ray intersections ONLY.
+// Generating new rays is handled in your shader(s).
+// Feel free to modify the code below.
+__global__ void computeIntersections(
+	int depth
+	, int num_paths
+	, PathSegment * pathSegments
+	, Geom * geoms
+	, int geoms_size
+	, ShadeableIntersection * intersections
+	, Triangle * triangles
+	, int tri_size
+#if MESH_BOX
+	, glm::vec3 boxMin
+	, glm::vec3 boxMax
+#endif
+	)
+{
+	int path_index = blockIdx.x * blockDim.x + threadIdx.x;
+
+	if (path_index < num_paths)
+	{
+		PathSegment pathSegment = pathSegments[path_index];
+
+		float t;
+		glm::vec3 intersect_point;
+		glm::vec3 normal;
+		float t_min = FLT_MAX;
+		int hit_geom_index = -1;
+		bool outside = true;
+
+		glm::vec3 tmp_intersect;
+		glm::vec3 tmp_normal;
+
+		// naive parse through global geoms
+
+		for (int i = 0; i < geoms_size; i++)
+		{
+			Geom & geom = geoms[i];
+
+			if (geom.type == CUBE)
+			{
+				t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
+			}
+			else if (geom.type == SPHERE)
+			{
+				t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
+			}
+			else if (geom.type == MESH)
+			{
+				t = meshIntersectionTest(pathSegment.ray, triangles, tri_size, tmp_intersect, tmp_normal, outside
+#if MESH_BOX
+					, boxMin, boxMax
+#endif
+				);
+			}
+			// TODO: add more intersection tests here... triangle? metaball? CSG?
+
+			// Compute the minimum t from the intersection tests to determine what
+			// scene geometry object was hit first.
+			if (t > 0.0f && t_min > t)
+			{
+				t_min = t;
+				hit_geom_index = i;
+				intersect_point = tmp_intersect;
+				normal = tmp_normal;
+			}
+		}
+
+		if (hit_geom_index == -1)
+		{
+			intersections[path_index].t = -1.0f;
+		}
+		else
+		{
+			//The ray hits something
+			intersections[path_index].t = t_min;
+			intersections[path_index].materialId = geoms[hit_geom_index].materialid;
+			intersections[path_index].surfaceNormal = normal;
+		}
+	}
+}
+
+// LOOK: "fake" shader demonstrating what you might do with the info in
+// a ShadeableIntersection, as well as how to use thrust's random number
+// generator. Observe that since the thrust random number generator basically
+// adds "noise" to the iteration, the image should start off noisy and get
+// cleaner as more iterations are computed.
+//
+// Note that this shader does NOT do a BSDF evaluation!
+// Your shaders should handle that - this can allow techniques such as
+// bump mapping.
+__global__ void shadeFakeMaterial (
+  int iter
+  , int num_paths
+	, ShadeableIntersection * shadeableIntersections
+	, PathSegment * pathSegments
+	, Material * materials
+	)
+{
+	int idx = blockIdx.x * blockDim.x + threadIdx.x;
+	if (idx >= num_paths)
+		return;
+
+	if (pathSegments[idx].remainingBounces == 0)
+		return;
+
+	ShadeableIntersection intersection = shadeableIntersections[idx];
+	if (intersection.t > 0.0f) { // if the intersection exists...
+		// Set up the RNG
+		// LOOK: this is how you use thrust's RNG! Please look at
+		// makeSeededRandomEngine as well.
+		thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, pathSegments[idx].remainingBounces);
+		thrust::uniform_real_distribution<float> u01(0, 1);
+
+		Material material = materials[intersection.materialId];
+		glm::vec3 materialColor = material.color;
+
+		// If the material indicates that the object was a light, "light" the ray
+		if (material.emittance > 0.0f) {
+			pathSegments[idx].color *= (materialColor * material.emittance);
+			pathSegments[idx].remainingBounces = 0;
+		}
+		// Otherwise, do some pseudo-lighting computation. This is actually more
+		// like what you would expect from shading in a rasterizer like OpenGL.
+		// TODO: replace this! you should be able to start with basically a one-liner
+		else {
+		/*
+		float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f));
+		pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f;
+		pathSegments[idx].color *= u01(rng); // apply some noise because why not
+		*/
+			scatterRay(pathSegments[idx], getPointOnRay(pathSegments[idx].ray, intersection.t), intersection.surfaceNormal, material, rng);
+		}
+	// If there was no intersection, color the ray black.
+	// Lots of renderers use 4 channel color, RGBA, where A = alpha, often
+	// used for opacity, in which case they can indicate "no opacity".
+	// This can be useful for post-processing and image compositing.
+	} else {
+		pathSegments[idx].color = glm::vec3(0.0f);
+		pathSegments[idx].remainingBounces = 0;
+	}
+}
+
+// Add the current iteration's output to the overall image
+__global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterationPaths)
+{
+	int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+
+	if (index < nPaths)
+	{
+		PathSegment iterationPath = iterationPaths[index];
+		image[iterationPath.pixelIndex] += iterationPath.color;
+	}
+}
+
+// Predicate for thrust stream compaction
+struct valid_path
+{
+	__host__ __device__
+		bool operator()(const PathSegment& path)
+	{
+		return path.remainingBounces > 0;
+	}
+};
+
+// Predicate for thrust stream compaction
+struct sort_material
+{
+	__host__ __device__
+		bool operator()(const ShadeableIntersection& int1, const ShadeableIntersection& int2)
+	{
+		return int1.materialId > int2.materialId;
+	}
+};
+
+/**
+ * Wrapper for the __global__ call that sets up the kernel calls and does a ton
+ * of memory management
+ */
+void pathtrace(uchar4 *pbo, int frame, int iter) {
+    const int traceDepth = hst_scene->state.traceDepth;
+    const Camera &cam = hst_scene->state.camera;
+    const int pixelcount = cam.resolution.x * cam.resolution.y;
+
+	// 2D block for generating ray from camera
+    const dim3 blockSize2d(8, 8);
+    const dim3 blocksPerGrid2d(
+            (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
+            (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);
+
+	// 1D block for path tracing
+	const int blockSize1d = 128;
+
+    ///////////////////////////////////////////////////////////////////////////
+
+    // Recap:
+    // * Initialize array of path rays (using rays that come out of the camera)
+    //   * You can pass the Camera object to that kernel.
+    //   * Each path ray must carry at minimum a (ray, color) pair,
+    //   * where color starts as the multiplicative identity, white = (1, 1, 1).
+    //   * This has already been done for you.
+    // * For each depth:
+    //   * Compute an intersection in the scene for each path ray.
+    //     A very naive version of this has been implemented for you, but feel
+    //     free to add more primitives and/or a better algorithm.
+    //     Currently, intersection distance is recorded as a parametric distance,
+    //     t, or a "distance along the ray." t = -1.0 indicates no intersection.
+    //     * Color is attenuated (multiplied) by reflections off of any object
+    //   * TODO: Stream compact away all of the terminated paths.
+    //     You may use either your implementation or `thrust::remove_if` or its
+    //     cousins.
+    //     * Note that you can't really use a 2D kernel launch any more - switch
+    //       to 1D.
+    //   * TODO: Shade the rays that intersected something or didn't bottom out.
+    //     That is, color the ray by performing a color computation according
+    //     to the shader, then generate a new ray to continue the ray path.
+    //     We recommend just updating the ray's PathSegment in place.
+    //     Note that this step may come before or after stream compaction,
+    //     since some shaders you write may also cause a path to terminate.
+    // * Finally, add this iteration's results to the image. This has been done
+    //   for you.
+
+    // TODO: perform one iteration of path tracing
+
+	generateRayFromCamera <<<blocksPerGrid2d, blockSize2d >>>(cam, iter, traceDepth, dev_paths);
+	checkCUDAError("generate camera ray");
+
+	int depth = 0;
+	PathSegment* dev_path_end = dev_paths + pixelcount;
+	int num_paths = dev_path_end - dev_paths;
+
+	// --- PathSegment Tracing Stage ---
+	// Shoot ray into scene, bounce between objects, push shading chunks
+
+	bool iterationComplete = false;
+	while (!iterationComplete) {
+		dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
+		bool useCached = CACHE_INTERSECTIONS && depth == 0 && iter > 1;
+
+		if (!useCached) {
+			// clean shading chunks
+			cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
+
+			// tracing
+			computeIntersections << <numblocksPathSegmentTracing, blockSize1d >> > (
+				depth, num_paths, dev_paths, dev_geoms, hst_scene->geoms.size(),
+				dev_intersections, dev_triangles, hst_scene->triangles.size()
+#if MESH_BOX
+				, hst_scene->boxMin, hst_scene->boxMax
+#endif
+				);
+			checkCUDAError("trace one bounce");
+			cudaDeviceSynchronize();
+		}
+
+#if SORT_MATERIAL
+		// Reshuffle segments to have materials contiguous in memory
+		thrust::sort_by_key(thrust::device, dev_intersections, dev_intersections + num_paths, dev_paths, sort_material());
+#endif
+
+#if CACHE_INTERSECTIONS
+		if (iter == 1 && depth == 0) {
+			//Cache first intersection to use at each first iteration
+			cudaMemcpy(dev_cache_intersect, dev_intersections, num_paths * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice);
+		}
+#endif
+
+		// TODO:
+		// --- Shading Stage ---
+		// Shade path segments based on intersections and generate new rays by
+		// evaluating the BSDF.
+		// Start off with just a big kernel that handles all the different
+		// materials you have in the scenefile.
+		// TODO: compare between directly shading the path segments and shading
+		// path segments that have been reshuffled to be contiguous in memory.
+
+		shadeFakeMaterial<<<numblocksPathSegmentTracing, blockSize1d>>> (
+			iter, num_paths,
+			useCached ? dev_cache_intersect : dev_intersections,
+			dev_paths, dev_materials
+		);
+
+#if STREAM_COMPACTION
+		dev_path_end = thrust::partition(thrust::device, dev_paths, dev_paths + num_paths, valid_path());
+		num_paths = dev_path_end - dev_paths;
+#endif
+
+		++depth;
+
+		iterationComplete = num_paths == 0 || depth >= traceDepth; // TODO: should be based off stream compaction results.
+	}
+
+  // Assemble this iteration and apply it to the image
+  dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d;
+	finalGather<<<numBlocksPixels, blockSize1d>>>(pixelcount, dev_image, dev_paths);
+
+    ///////////////////////////////////////////////////////////////////////////
+
+    // Send results to OpenGL buffer for rendering
+    sendImageToPBO<<<blocksPerGrid2d, blockSize2d>>>(pbo, cam.resolution, iter, dev_image);
+
+    // Retrieve image from GPU
+    cudaMemcpy(hst_scene->state.image.data(), dev_image,
+            pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost);
+
+    checkCUDAError("pathtrace");
+}
diff --git a/src/performance.h b/src/performance.h
new file mode 100644
index 0000000..d53c607
--- /dev/null
+++ b/src/performance.h
@@ -0,0 +1,101 @@
+#pragma once
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <stdexcept>
+#include <chrono>
+
+/**
+* This class is used for timing the performance
+* Uncopyable and unmovable
+*
+* Adapted from WindyDarian(https://github.com/WindyDarian)
+*/
+
+namespace Performance {
+	class PerformanceTimer
+	{
+	public:
+		PerformanceTimer()
+		{
+			cudaEventCreate(&event_start);
+			cudaEventCreate(&event_end);
+		}
+
+		~PerformanceTimer()
+		{
+			cudaEventDestroy(event_start);
+			cudaEventDestroy(event_end);
+		}
+
+		void startCpuTimer()
+		{
+			if (cpu_timer_started) { throw std::runtime_error("CPU timer already started"); }
+			cpu_timer_started = true;
+
+			time_start_cpu = std::chrono::high_resolution_clock::now();
+		}
+
+		void endCpuTimer()
+		{
+			time_end_cpu = std::chrono::high_resolution_clock::now();
+
+			if (!cpu_timer_started) { throw std::runtime_error("CPU timer not started"); }
+
+			std::chrono::duration<double, std::milli> duro = time_end_cpu - time_start_cpu;
+			prev_elapsed_time_cpu_milliseconds =
+				static_cast<decltype(prev_elapsed_time_cpu_milliseconds)>(duro.count());
+
+			cpu_timer_started = false;
+		}
+
+		void startGpuTimer()
+		{
+			if (gpu_timer_started) { throw std::runtime_error("GPU timer already started"); }
+			gpu_timer_started = true;
+
+			cudaEventRecord(event_start);
+		}
+
+		void endGpuTimer()
+		{
+			cudaEventRecord(event_end);
+			cudaEventSynchronize(event_end);
+
+			if (!gpu_timer_started) { throw std::runtime_error("GPU timer not started"); }
+
+			cudaEventElapsedTime(&prev_elapsed_time_gpu_milliseconds, event_start, event_end);
+			gpu_timer_started = false;
+		}
+
+		float getCpuElapsedTimeForPreviousOperation() //noexcept //(damn I need VS 2015
+		{
+			return prev_elapsed_time_cpu_milliseconds;
+		}
+
+		float getGpuElapsedTimeForPreviousOperation() //noexcept
+		{
+			return prev_elapsed_time_gpu_milliseconds;
+		}
+
+		// remove copy and move functions
+		PerformanceTimer(const PerformanceTimer&) = delete;
+		PerformanceTimer(PerformanceTimer&&) = delete;
+		PerformanceTimer& operator=(const PerformanceTimer&) = delete;
+		PerformanceTimer& operator=(PerformanceTimer&&) = delete;
+
+	private:
+		cudaEvent_t event_start = nullptr;
+		cudaEvent_t event_end = nullptr;
+
+		using time_point_t = std::chrono::high_resolution_clock::time_point;
+		time_point_t time_start_cpu;
+		time_point_t time_end_cpu;
+
+		bool cpu_timer_started = false;
+		bool gpu_timer_started = false;
+
+		float prev_elapsed_time_cpu_milliseconds = 0.f;
+		float prev_elapsed_time_gpu_milliseconds = 0.f;
+	};
+}
\ No newline at end of file
diff --git a/src/scene.cpp b/src/scene.cpp
index cbae043..37f03a2 100644
--- a/src/scene.cpp
+++ b/src/scene.cpp
@@ -4,6 +4,8 @@
 #include <glm/gtc/matrix_inverse.hpp>
 #include <glm/gtx/string_cast.hpp>
 
+#include <tinyobjloader/tiny_obj_loader.h>
+
 Scene::Scene(string filename) {
     cout << "Reading scene from " << filename << " ..." << endl;
     cout << " " << endl;
@@ -51,7 +53,10 @@ int Scene::loadGeom(string objectid) {
             } else if (strcmp(line.c_str(), "cube") == 0) {
                 cout << "Creating new cube..." << endl;
                 newGeom.type = CUBE;
-            }
+			} else if (strcmp(line.c_str(), "mesh") == 0) {
+				cout << "Creating new mesh..." << endl;
+				newGeom.type = MESH;
+			}
         }
 
         //link material
@@ -74,7 +79,9 @@ int Scene::loadGeom(string objectid) {
                 newGeom.rotation = glm::vec3(atof(tokens[1].c_str()), atof(tokens[2].c_str()), atof(tokens[3].c_str()));
             } else if (strcmp(tokens[0].c_str(), "SCALE") == 0) {
                 newGeom.scale = glm::vec3(atof(tokens[1].c_str()), atof(tokens[2].c_str()), atof(tokens[3].c_str()));
-            }
+            } else if (strcmp(tokens[0].c_str(), "FILE") == 0 && newGeom.type == MESH) {
+				loadMesh(tokens[1].c_str(), newGeom);
+			}
 
             utilityCore::safeGetline(fp_in, line);
         }
@@ -186,3 +193,68 @@ int Scene::loadMaterial(string materialid) {
         return 1;
     }
 }
+
+tinyobj::attrib_t obj_attrib;
+std::vector<tinyobj::shape_t> obj_shapes;
+std::vector<tinyobj::material_t> obj_materials;
+
+// Load tri mesh into readable geometry
+int Scene::loadMesh(const char* objpath, Geom mesh) {
+	std::string err;
+
+	if (!tinyobj::LoadObj(&obj_attrib, &obj_shapes, &obj_materials, &err, objpath)) {
+		throw std::runtime_error(err);
+	}
+
+	//TODO: populate triangle array and hierarchical data structure
+	for (int shape = 0; shape < obj_shapes.size(); ++shape) {
+		int offset = 0;
+		for (int i = 0; i < obj_shapes[shape].mesh.num_face_vertices.size(); ++i) {
+			Triangle tri;
+			tinyobj::index_t idx1 = obj_shapes[shape].mesh.indices[offset];
+			tinyobj::index_t idx2 = obj_shapes[shape].mesh.indices[offset + 1];
+			tinyobj::index_t idx3 = obj_shapes[shape].mesh.indices[offset + 2];
+
+			tri.v0 = glm::vec3(
+				obj_attrib.vertices[3 * idx1.vertex_index],
+				obj_attrib.vertices[3 * idx1.vertex_index + 1],
+				obj_attrib.vertices[3 * idx1.vertex_index + 2]);
+			/*tri.n0 = glm::vec3(
+				obj_attrib.normals[3 * idx1.normal_index],
+				obj_attrib.normals[3 * idx1.normal_index + 1],
+				obj_attrib.normals[3 * idx1.normal_index + 2]);*/
+			tri.v1 = glm::vec3(
+				obj_attrib.vertices[3 * idx2.vertex_index],
+				obj_attrib.vertices[3 * idx2.vertex_index + 1],
+				obj_attrib.vertices[3 * idx2.vertex_index + 2]);
+			/*tri.n1 = glm::vec3(
+				obj_attrib.normals[3 * idx2.normal_index],
+				obj_attrib.normals[3 * idx2.normal_index + 1],
+				obj_attrib.normals[3 * idx2.normal_index + 2]);*/
+			tri.v2 = glm::vec3(
+				obj_attrib.vertices[3 * idx3.vertex_index],
+				obj_attrib.vertices[3 * idx3.vertex_index + 1],
+				obj_attrib.vertices[3 * idx3.vertex_index + 2]);
+			/*tri.n2 = glm::vec3(
+				obj_attrib.normals[3 * idx3.normal_index],
+				obj_attrib.normals[3 * idx3.normal_index + 1],
+				obj_attrib.normals[3 * idx3.normal_index + 2]);*/
+
+			for (int t = 0; t < 3; ++t) {
+				for (int d = 0; d < 3; ++d) {
+					if (tri[t][d] < boxMin[d]) {
+						boxMin[d] = tri[t][d];
+					}
+					else if (tri[t][d] > boxMax[d]) {
+						boxMax[d] = tri[t][d];
+					}
+				}
+			}
+
+			triangles.push_back(tri);
+			offset += obj_shapes[shape].mesh.num_face_vertices[i];
+		}
+	}
+
+	return 1;
+}
diff --git a/src/scene.h b/src/scene.h
index f29a917..261d417 100644
--- a/src/scene.h
+++ b/src/scene.h
@@ -16,11 +16,17 @@ class Scene {
     int loadMaterial(string materialid);
     int loadGeom(string objectid);
     int loadCamera();
+	int loadMesh(const char* objpath, Geom mesh);
 public:
     Scene(string filename);
     ~Scene();
 
     std::vector<Geom> geoms;
     std::vector<Material> materials;
+	std::vector<Triangle> triangles;
+	glm::vec3 boxMin;
+	glm::vec3 boxMax;
     RenderState state;
+	
+	// TODO: define hierarchical data structure
 };
diff --git a/src/sceneStructs.h b/src/sceneStructs.h
index b38b820..575ab47 100644
--- a/src/sceneStructs.h
+++ b/src/sceneStructs.h
@@ -6,10 +6,17 @@
 #include "glm/glm.hpp"
 
 #define BACKGROUND_COLOR (glm::vec3(0.0f))
+#define ERRORCHECK 0
+#define STREAM_COMPACTION 0
+#define ANTI_ALIAS 1
+#define CACHE_INTERSECTIONS 0
+#define SORT_MATERIAL 0
+#define MESH_BOX 0
 
 enum GeomType {
     SPHERE,
     CUBE,
+	MESH,
 };
 
 struct Ray {
@@ -28,6 +35,21 @@ struct Geom {
     glm::mat4 invTranspose;
 };
 
+struct Triangle {
+	glm::vec3 v0;
+	glm::vec3 v1;
+	glm::vec3 v2;
+	glm::vec3 operator[](int i)
+	{
+		switch (i) {
+			case 0: return v0; break;
+			case 1: return v1; break;
+			case 2: return v2; break;
+			default: return v0;
+		}
+	}
+};
+
 struct Material {
     glm::vec3 color;
     struct {