diff --git a/README.md b/README.md
index cad1abd..ca364b3 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,48 @@
 CUDA Rasterizer
 ===============
 
-[CLICK ME FOR INSTRUCTION OF THIS PROJECT](./INSTRUCTION.md)
-
 **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4**
 
-* (TODO) YOUR NAME HERE
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* Richard Lee
+* Tested on: Windows 7, i7-3720QM @ 2.60GHz 8GB, GT 650M 4GB (Personal Computer)
+
+## Features
+
+* Basic Rasterization Pipeline
+* Per-Vertex Color Interpolation
+* Backface Culling
+* Texture Mapping with Bilinear Filtering and Perspective Correction
+
+#### Basic Pipeline
+
+The rasterizer implements the vertex shading, rasterization, and fragment shading stages of the graphics pipeline. It stores the glTF model as triangle primitives, which are rasterized and passed into the fragment shader buffer with a depth test before being shaded and rendered with a diffuse lighting scheme.
+
+<img src="img/truck.png" width="400" height="400">
+
+
+#### Per-Vertex Color and Texture Mapping
+
+Both per-vertex color and texture mapping with perspective correction were implemented for the rasterizer. Performing a texture lookup for each fragment had a significant performance impact on the rasterization stage, which made sense because the per-vertex method was able to just perform a simple color interpolation between the vertices. However, there is also a significant improvement in quality, as seen in the comparison below. With large triangles containing a lot of texture information, such as the milk truck model, texture mapping must be used in order to obtain a sufficient amount of detail.
+
+<img src="img/withoutTexture.png" width="180" height="180"> <img src="img/withTexture.png" width="180" height="180">
+
+<img src="img/textureChart.png" width="534" height="350">
+
+#### Bilinear Filtering
+
+Bilinear texture filtering was also implemented for the texture mapping stage, by performing a horizontal and vertical linear interpolation on the UV coordinate. This had a small performance impact due to the additional texture lookups, but removed some aliasing on the textures.
+
+<img src="img/withoutBilinear.png" width="180" height="180"> <img src="img/withBilinear.png" width="180" height="180">
+
+<img src="img/bilinearChart.png" width="600" height="400">
+
+#### Backface Culling
 
-### (TODO: Your README)
+Backface culling was implemented with stream compaction by removing any primitives before the rasterization stage which were facing away from the camera. This had a negligible effect on smaller models like the duck, but had a more noticeable improvement during the rasterization on larger models like the truck due to the reduced primitive pool. In addition, this stage removed any unenclosed faces which were facing away from the camera, as seen below.
 
-*DO NOT* leave the README to the last minute! It is a crucial part of the
-project, and we will not be able to grade you without a good README.
+<img src="img/withoutBackface.png" width="180" height="180"> <img src="img/withBackface.png" width="180" height="180">
 
+<img src="img/backfaceDuckChart.png" width="600" height="400"> <img src="img/backfaceTruckChart.png" width="600" height="400">
 
 ### Credits
 
diff --git a/img/backfaceDuckChart.png b/img/backfaceDuckChart.png
new file mode 100755
index 0000000..225007f
Binary files /dev/null and b/img/backfaceDuckChart.png differ
diff --git a/img/backfaceTruckChart.png b/img/backfaceTruckChart.png
new file mode 100755
index 0000000..16dacf4
Binary files /dev/null and b/img/backfaceTruckChart.png differ
diff --git a/img/bilinearChart.png b/img/bilinearChart.png
new file mode 100755
index 0000000..8c95c41
Binary files /dev/null and b/img/bilinearChart.png differ
diff --git a/img/textureChart.png b/img/textureChart.png
new file mode 100755
index 0000000..453a784
Binary files /dev/null and b/img/textureChart.png differ
diff --git a/img/truck.png b/img/truck.png
new file mode 100755
index 0000000..28dc9ef
Binary files /dev/null and b/img/truck.png differ
diff --git a/img/withBackface.png b/img/withBackface.png
new file mode 100755
index 0000000..8d3bdee
Binary files /dev/null and b/img/withBackface.png differ
diff --git a/img/withBilinear.png b/img/withBilinear.png
new file mode 100755
index 0000000..13d1c64
Binary files /dev/null and b/img/withBilinear.png differ
diff --git a/img/withTexture.png b/img/withTexture.png
new file mode 100755
index 0000000..82fd37e
Binary files /dev/null and b/img/withTexture.png differ
diff --git a/img/withoutBackface.png b/img/withoutBackface.png
new file mode 100755
index 0000000..4cda42a
Binary files /dev/null and b/img/withoutBackface.png differ
diff --git a/img/withoutBilinear.png b/img/withoutBilinear.png
new file mode 100755
index 0000000..7d44c29
Binary files /dev/null and b/img/withoutBilinear.png differ
diff --git a/img/withoutTexture.png b/img/withoutTexture.png
new file mode 100755
index 0000000..84b7a27
Binary files /dev/null and b/img/withoutTexture.png differ
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a57f69f..40c13cb 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -6,5 +6,5 @@ set(SOURCE_FILES
 
 cuda_add_library(src
     ${SOURCE_FILES}
-    OPTIONS -arch=sm_20
+    OPTIONS -arch=sm_30
     )
diff --git a/src/main.cpp b/src/main.cpp
index a36b955..b84cec8 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -14,9 +14,23 @@
 #define TINYGLTF_LOADER_IMPLEMENTATION
 #include <util/tiny_gltf_loader.h>
 
-//-------------------------------
-//-------------MAIN--------------
-//-------------------------------
+bool textureMapping = true, bilinearFiltering = false, backfaceCulling = false;
+void printState(bool printTextures = true, bool printFiltering = true, bool printBackface = true) {
+	if (printTextures && printFiltering && printBackface)
+		printf("----- Settings -----\n\n");
+
+	if (printTextures)
+		printf("Texture Mapping: %s\n", textureMapping ? "On" : "Off");
+
+	if (printFiltering)
+		printf("Bilinear Filtering: %s\n", bilinearFiltering ? "On" : "Off");
+
+	if (printBackface)
+		printf("Backface Culling: %s\n", backfaceCulling ? "On" : "Off");
+
+	printf("\n--------------------\n");
+	printf("\n");
+}
 
 int main(int argc, char **argv) {
     if (argc != 2) {
@@ -87,6 +101,10 @@ void mainLoop() {
         // VAO, shader program, and texture already bound
         glDrawElements(GL_TRIANGLES, 6,  GL_UNSIGNED_SHORT, 0);
         glfwSwapBuffers(window);
+
+		/*if (frame > 0) {
+			glfwSetWindowShouldClose(window, GL_TRUE);
+		}*/
     }
     glfwDestroyWindow(window);
     glfwTerminate();
@@ -96,7 +114,7 @@ void mainLoop() {
 //---------RUNTIME STUFF---------
 //-------------------------------
 float scale = 1.0f;
-float x_trans = 0.0f, y_trans = 0.0f, z_trans = -10.0f;
+float x_trans = 0.0f, y_trans = -1.5f, z_trans = -5.0f;
 float x_angle = 0.0f, y_angle = 0.0f;
 void runCuda() {
     // Map OpenGL buffer object for writing from CUDA on a single GPU
@@ -119,7 +137,7 @@ void runCuda() {
 	glm::mat4 MVP = P * MV;
 
     cudaGLMapBufferObject((void **)&dptr, pbo);
-	rasterize(dptr, MVP, MV, MV_normal);
+	rasterize(dptr, MVP, MV, MV_normal, textureMapping, bilinearFiltering, backfaceCulling);
     cudaGLUnmapBufferObject(pbo);
 
     frame++;
@@ -188,6 +206,8 @@ bool init(const tinygltf::Scene & scene) {
     glUseProgram(passthroughProgram);
     glActiveTexture(GL_TEXTURE0);
 
+	printState();
+
     return true;
 }
 
@@ -327,6 +347,23 @@ void keyCallback(GLFWwindow *window, int key, int scancode, int action, int mods
     if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) {
         glfwSetWindowShouldClose(window, GL_TRUE);
     }
+	if (action == GLFW_PRESS) {
+		if (key == GLFW_KEY_T)
+		{
+			textureMapping = !textureMapping;
+			printState(true, false, false);
+		}
+		else if (key == GLFW_KEY_F)
+		{
+			bilinearFiltering = !bilinearFiltering;
+			printState(false, true, false);
+		}
+		else if (key == GLFW_KEY_B)
+		{
+			backfaceCulling = !backfaceCulling;
+			printState(false, false, true);
+		}
+	}
 }
 
 //----------------------------
@@ -358,7 +395,6 @@ void mouseButtonCallback(GLFWwindow* window, int button, int action, int mods)
 		{
 			mouseState = TRANSLATE;
 		}
-
 	}
 	else if (action == GLFW_RELEASE)
 	{
diff --git a/src/rasterize.cu b/src/rasterize.cu
index 1262a09..f184330 100644
--- a/src/rasterize.cu
+++ b/src/rasterize.cu
@@ -17,86 +17,88 @@
 #include "rasterize.h"
 #include <glm/gtc/quaternion.hpp>
 #include <glm/gtc/matrix_transform.hpp>
+#include "util/utilityCore.hpp"
+#include <thrust/execution_policy.h>
+#include <thrust/device_vector.h>
+#include <thrust/remove.h>
+
+typedef unsigned short VertexIndex;
+typedef glm::vec3 VertexAttributePosition;
+typedef glm::vec3 VertexAttributeNormal;
+typedef glm::vec2 VertexAttributeTexcoord;
+typedef unsigned char TextureData;
+
+typedef unsigned char BufferByte;
+
+enum PrimitiveType{
+	Point = 1,
+	Line = 2,
+	Triangle = 3
+};
+
+struct VertexOut {
+	glm::vec4 pos;
+
+	// TODO: add new attributes to your VertexOut
+	// The attributes listed below might be useful, 
+	// but always feel free to modify on your own
+
+	glm::vec3 eyePos;	// eye space position used for shading
+	glm::vec3 eyeNor;	// eye space normal used for shading, cuz normal will go wrong after perspective transformation
+	glm::vec3 col;
+	glm::vec2 texcoord0;
+	TextureData* dev_diffuseTex = NULL;
+	int texWidth, texHeight;
+	// ...
+};
+
+struct Primitive {
+	PrimitiveType primitiveType = Triangle;	// C++ 11 init
+	VertexOut v[3];
+};
+
+struct Fragment {
+	glm::vec3 color;
+
+	// TODO: add new attributes to your Fragment
+	// The attributes listed below might be useful, 
+	// but always feel free to modify on your own
+
+		glm::vec3 eyePos;	// eye space position used for shading
+		glm::vec3 eyeNor;
+		VertexAttributeTexcoord texcoord0;
+	// TextureData* dev_diffuseTex;
+	// ...
+};
+
+struct PrimitiveDevBufPointers {
+	int primitiveMode;	//from tinygltfloader macro
+	PrimitiveType primitiveType;
+	int numPrimitives;
+	int numIndices;
+	int numVertices;
+
+	// Vertex In, const after loaded
+	VertexIndex* dev_indices;
+	VertexAttributePosition* dev_position;
+	VertexAttributeNormal* dev_normal;
+	VertexAttributeTexcoord* dev_texcoord0;
+
+	// Materials, add more attributes when needed
+	TextureData* dev_diffuseTex;
+	int diffuseTexWidth;
+	int diffuseTexHeight;
+	// TextureData* dev_specularTex;
+	// TextureData* dev_normalTex;
+	// ...
+
+	// Vertex Out, vertex used for rasterization, this is changing every frame
+	VertexOut* dev_verticesOut;
+
+	// TODO: add more attributes when needed
+};
 
-namespace {
-
-	typedef unsigned short VertexIndex;
-	typedef glm::vec3 VertexAttributePosition;
-	typedef glm::vec3 VertexAttributeNormal;
-	typedef glm::vec2 VertexAttributeTexcoord;
-	typedef unsigned char TextureData;
-
-	typedef unsigned char BufferByte;
-
-	enum PrimitiveType{
-		Point = 1,
-		Line = 2,
-		Triangle = 3
-	};
-
-	struct VertexOut {
-		glm::vec4 pos;
-
-		// TODO: add new attributes to your VertexOut
-		// The attributes listed below might be useful, 
-		// but always feel free to modify on your own
-
-		 glm::vec3 eyePos;	// eye space position used for shading
-		 glm::vec3 eyeNor;	// eye space normal used for shading, cuz normal will go wrong after perspective transformation
-		// glm::vec3 col;
-		 glm::vec2 texcoord0;
-		 TextureData* dev_diffuseTex = NULL;
-		// int texWidth, texHeight;
-		// ...
-	};
-
-	struct Primitive {
-		PrimitiveType primitiveType = Triangle;	// C++ 11 init
-		VertexOut v[3];
-	};
-
-	struct Fragment {
-		glm::vec3 color;
-
-		// TODO: add new attributes to your Fragment
-		// The attributes listed below might be useful, 
-		// but always feel free to modify on your own
-
-		// glm::vec3 eyePos;	// eye space position used for shading
-		// glm::vec3 eyeNor;
-		// VertexAttributeTexcoord texcoord0;
-		// TextureData* dev_diffuseTex;
-		// ...
-	};
-
-	struct PrimitiveDevBufPointers {
-		int primitiveMode;	//from tinygltfloader macro
-		PrimitiveType primitiveType;
-		int numPrimitives;
-		int numIndices;
-		int numVertices;
-
-		// Vertex In, const after loaded
-		VertexIndex* dev_indices;
-		VertexAttributePosition* dev_position;
-		VertexAttributeNormal* dev_normal;
-		VertexAttributeTexcoord* dev_texcoord0;
-
-		// Materials, add more attributes when needed
-		TextureData* dev_diffuseTex;
-		int diffuseTexWidth;
-		int diffuseTexHeight;
-		// TextureData* dev_specularTex;
-		// TextureData* dev_normalTex;
-		// ...
-
-		// Vertex Out, vertex used for rasterization, this is changing every frame
-		VertexOut* dev_verticesOut;
-
-		// TODO: add more attributes when needed
-	};
 
-}
 
 static std::map<std::string, std::vector<PrimitiveDevBufPointers>> mesh2PrimitivesMap;
 
@@ -111,6 +113,8 @@ static glm::vec3 *dev_framebuffer = NULL;
 
 static int * dev_depth = NULL;	// you might need this buffer when doing depth test
 
+thrust::device_ptr<Primitive> dev_thrust_primitives;
+
 /**
  * Kernel that writes the image to the OpenGL PBO directly.
  */
@@ -130,7 +134,7 @@ void sendImageToPBO(uchar4 *pbo, int w, int h, glm::vec3 *image) {
         pbo[index].x = color.x;
         pbo[index].y = color.y;
         pbo[index].z = color.z;
-    }
+    } 
 }
 
 /** 
@@ -143,10 +147,12 @@ void render(int w, int h, Fragment *fragmentBuffer, glm::vec3 *framebuffer) {
     int index = x + (y * w);
 
     if (x < w && y < h) {
-        framebuffer[index] = fragmentBuffer[index].color;
-
-		// TODO: add your fragment shader code here
-
+		// diffuse lighting scheme
+		glm::vec3 lightVec = glm::normalize(-1.0f * glm::vec3(0.5, -0.5, -1));
+		glm::vec3 ambient = glm::vec3(0.1, 0.1, 0.1);
+		glm::vec3 diffuse = glm::clamp(fragmentBuffer[index].color * glm::max(glm::dot(glm::normalize(fragmentBuffer[index].eyeNor), lightVec), 0.0f), 0.0f, 1.0f);
+		framebuffer[index] = diffuse;
+		//framebuffer[index] = fragmentBuffer[index].color;
     }
 }
 
@@ -600,6 +606,7 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) {
 	// 3. Malloc for dev_primitives
 	{
 		cudaMalloc(&dev_primitives, totalNumPrimitives * sizeof(Primitive));
+		dev_thrust_primitives = thrust::device_pointer_cast(dev_primitives);
 	}
 	
 
@@ -621,7 +628,20 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) {
 
 }
 
+__device__
+glm::vec3 _getColorData(TextureData* dev_diffuseTex, int texIndex) {
+	return glm::vec3((float)dev_diffuseTex[3 * texIndex] / 255.f,
+		(float)dev_diffuseTex[3 * texIndex + 1] / 255.f,
+		(float)dev_diffuseTex[3 * texIndex + 2] / 255.f);
+}
 
+__device__
+glm::vec3 _getTexColor(glm::vec2 texCoord, TextureData* dev_diffuseTex, int texWidth, int texHeight) {
+	int x = texCoord.x * texWidth;
+	int y = texCoord.y * texHeight;
+	int texIndex = x + y * texWidth;
+	return _getColorData(dev_diffuseTex, texIndex);
+}
 
 __global__ 
 void _vertexTransformAndAssembly(
@@ -633,14 +653,33 @@ void _vertexTransformAndAssembly(
 	// vertex id
 	int vid = (blockIdx.x * blockDim.x) + threadIdx.x;
 	if (vid < numVertices) {
-
-		// TODO: Apply vertex transformation here
 		// Multiply the MVP matrix for each vertex position, this will transform everything into clipping space
 		// Then divide the pos by its w element to transform into NDC space
 		// Finally transform x and y to viewport space
-
-		// TODO: Apply vertex assembly here
-		// Assemble all attribute arraies into the primitive array
+		glm::vec4 clipSpacePos = MVP * glm::vec4(primitive.dev_position[vid], 1.0f);
+		clipSpacePos /= clipSpacePos.w;
+
+		clipSpacePos.x = (width / 2) * -1 * clipSpacePos.x + (width / 2);
+		clipSpacePos.y = (height / 2) * -1 * clipSpacePos.y + (height / 2);
+
+		// Assemble all attribute arrays into the primitive array
+		primitive.dev_verticesOut[vid].pos = clipSpacePos;
+		primitive.dev_verticesOut[vid].eyePos = glm::vec3(MV * glm::vec4(primitive.dev_position[vid], 1.0f));
+		primitive.dev_verticesOut[vid].eyeNor = MV_normal * primitive.dev_normal[vid];
+		primitive.dev_verticesOut[vid].texcoord0 = primitive.dev_texcoord0[vid];
+		primitive.dev_verticesOut[vid].dev_diffuseTex = primitive.dev_diffuseTex;
+		primitive.dev_verticesOut[vid].texWidth = primitive.diffuseTexWidth;
+		primitive.dev_verticesOut[vid].texHeight = primitive.diffuseTexHeight;
+
+		if (primitive.dev_diffuseTex) {
+			primitive.dev_verticesOut[vid].col = _getTexColor(primitive.dev_texcoord0[vid],
+				primitive.dev_diffuseTex, primitive.diffuseTexWidth, primitive.diffuseTexHeight);
+		}
+		else {
+			glm::vec3 color(0.f, 1.f, 0.f);
+			//color[vid % 3] = 1.f;
+			primitive.dev_verticesOut[vid].col = color;
+		}
 		
 	}
 }
@@ -660,12 +699,12 @@ void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_
 		// TODO: uncomment the following code for a start
 		// This is primitive assembly for triangles
 
-		//int pid;	// id for cur primitives vector
-		//if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) {
-		//	pid = iid / (int)primitive.primitiveType;
-		//	dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType]
-		//		= primitive.dev_verticesOut[primitive.dev_indices[iid]];
-		//}
+		int pid;	// id for cur primitives vector
+		if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) {
+			pid = iid / (int)primitive.primitiveType;
+			dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType]
+				= primitive.dev_verticesOut[primitive.dev_indices[iid]];
+		}
 
 
 		// TODO: other primitive types (point, line)
@@ -673,16 +712,119 @@ void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_
 	
 }
 
+__device__
+glm::vec3 _getBilinearFilteredTexColor(glm::vec2 texCoord, TextureData* dev_diffuseTex, int texWidth, int texHeight) {
+	float u = texCoord.x * texWidth - 0.5f;
+	float v = texCoord.y * texHeight - 0.5f;
+	float u_ratio = u - glm::floor(u);
+	float v_ratio = v - glm::floor(v);
+	float u_opposite = 1 - u_ratio;
+	float v_opposite = 1 - v_ratio;
+
+	int x = texCoord.x * texWidth;
+	int y = texCoord.y * texHeight;
+	int texIndex00 = x + y * texWidth;
+	int texIndex01 = x + (y + 1) * texWidth;
+	int texIndex10 = x + y * texWidth;
+	int texIndex11 = x + (y + 1) * texWidth;
+
+	return (_getColorData(dev_diffuseTex, texIndex00) * u_opposite + _getColorData(dev_diffuseTex, texIndex10) * u_ratio) * v_opposite +
+		(_getColorData(dev_diffuseTex, texIndex01) * u_opposite + _getColorData(dev_diffuseTex, texIndex11) * u_ratio) * v_ratio;
+}
 
+__global__
+void _rasterizePrimitives(
+	int width, int height, 
+	int numPrimitives, 
+	Primitive* dev_primitives, 
+	Fragment *dev_fragmentBuffer, int* dev_depth,
+	bool textureMapping, bool bilinearFiltering) {
+	// primitive id  
+	int pid = (blockIdx.x * blockDim.x) + threadIdx.x;
+
+	if (pid < numPrimitives) {
+		Primitive primitive = dev_primitives[pid];
+		glm::vec3 p0 = glm::vec3(primitive.v[0].pos);
+		glm::vec3 p1 = glm::vec3(primitive.v[1].pos);
+		glm::vec3 p2 = glm::vec3(primitive.v[2].pos);
+		int minX = glm::max(glm::floor(glm::min(p0.x, glm::min(p1.x, p2.x))), 0.0f);
+		int maxX = glm::min(glm::ceil(glm::max(p0.x, glm::max(p1.x, p2.x))), width - 1.0f);
+		int minY = glm::max(glm::floor(glm::min(p0.y, glm::min(p1.y, p2.y))), 0.0f);
+		int maxY = glm::min(glm::ceil(glm::max(p0.y, glm::max(p1.y, p2.y))), height - 1.0f);
+
+		glm::vec3 tri[3] = { p0, p1, p2 };
+		glm::vec3 eyePosTri[3] = { primitive.v[0].eyePos, primitive.v[1].eyePos, primitive.v[2].eyePos };
+		glm::vec3 eyeNorTri[3] = { primitive.v[0].eyeNor, primitive.v[1].eyeNor, primitive.v[2].eyeNor };
+		glm::vec3 colTri[3] = { primitive.v[0].col, primitive.v[1].col, primitive.v[2].col };
+		glm::vec2 texTri[3] = { primitive.v[0].texcoord0 / p0.z, primitive.v[1].texcoord0 / p1.z, primitive.v[2].texcoord0 / p2.z };
+		float invDepthTri[3] = { 1.0f / p0.z, 1.0f / p1.z, 1.0f / p2.z };
+
+
+		int texWidth = primitive.v[0].texWidth;
+		int texHeight = primitive.v[0].texHeight;
+		TextureData* dev_diffuseTex = primitive.v[0].dev_diffuseTex;
+
+		glm::vec2 pix;
+		for (pix.x = minX; pix.x < maxX; pix.x++) {
+			for (pix.y = minY; pix.y < maxY; pix.y++) {
+				int index = (int)(pix.x + pix.y * width);
+
+				glm::vec3 barycentricCoord = calculateBarycentricCoordinate(tri, pix);
+				if (isBarycentricCoordInBounds(barycentricCoord)) {
+					float depth = getZAtCoordinate(barycentricCoord, tri);
+					atomicMin(&dev_depth[index], depth * INT_MAX);
+					if (depth * INT_MAX == dev_depth[index]) {
+						//interpolate eyepos and eyenor
+						dev_fragmentBuffer[index].eyePos = interpolate(barycentricCoord, eyePosTri);
+						dev_fragmentBuffer[index].eyeNor = interpolate(barycentricCoord, eyeNorTri);
+						dev_fragmentBuffer[index].color = interpolate(barycentricCoord, colTri);
+
+						if (textureMapping) {
+							//interpolate texture color
+							if (primitive.v[0].dev_diffuseTex) {
+								glm::vec2 texCoord = interpolate2D(barycentricCoord, texTri) /
+									interpolateFloat(barycentricCoord, invDepthTri);
+
+								if (bilinearFiltering) {
+									dev_fragmentBuffer[index].color = _getBilinearFilteredTexColor(texCoord,
+									dev_diffuseTex, texWidth, texHeight);
+								}
+								else {
+									dev_fragmentBuffer[index].color = _getTexColor(texCoord,
+										dev_diffuseTex, texWidth, texHeight);
+								}
+							}
+						}
+						
+
+					}
+				}
+
+			}
+		}
+	}
+}
+
+struct is_backface
+{
+	__host__ __device__ bool operator()(const Primitive& primitive)
+	{
+		return glm::cross(glm::normalize(primitive.v[1].eyePos - primitive.v[0].eyePos),
+			glm::normalize(primitive.v[2].eyePos - primitive.v[1].eyePos))[2] < 0;
+	}
+};
 
 /**
  * Perform rasterization.
  */
-void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal) {
+void rasterize(uchar4 *pbo, 
+	const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal,
+	bool textureMapping, bool bilinearFiltering, bool backfaceCulling) {
     int sideLength2d = 8;
     dim3 blockSize2d(sideLength2d, sideLength2d);
     dim3 blockCount2d((width  - 1) / blockSize2d.x + 1,
 		(height - 1) / blockSize2d.y + 1);
+	dim3 numThreadsPerBlock(128);
 
 	// Execute your rasterization pipeline here
 	// (See README for rasterization pipeline outline.)
@@ -690,7 +832,6 @@ void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const g
 	// Vertex Process & primitive assembly
 	{
 		curPrimitiveBeginId = 0;
-		dim3 numThreadsPerBlock(128);
 
 		auto it = mesh2PrimitivesMap.begin();
 		auto itEnd = mesh2PrimitivesMap.end();
@@ -719,12 +860,20 @@ void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const g
 		checkCUDAError("Vertex Processing and Primitive Assembly");
 	}
 	
+	int numRemainingPrimitives = totalNumPrimitives;
+	if (backfaceCulling) {
+		numRemainingPrimitives = thrust::remove_if(dev_thrust_primitives, dev_thrust_primitives + totalNumPrimitives, is_backface()) - dev_thrust_primitives;
+	}
+
 	cudaMemset(dev_fragmentBuffer, 0, width * height * sizeof(Fragment));
 	initDepth << <blockCount2d, blockSize2d >> >(width, height, dev_depth);
-	
-	// TODO: rasterize
-
+	checkCUDAError("init depth");
 
+	dim3 numBlocksForPrimitives((numRemainingPrimitives + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x);
+	_rasterizePrimitives << <numRemainingPrimitives, numThreadsPerBlock >> >
+		(width, height, numRemainingPrimitives, dev_primitives, 
+		dev_fragmentBuffer, dev_depth, textureMapping, bilinearFiltering);
+	checkCUDAError("rasterize primitives");
 
     // Copy depthbuffer colors into framebuffer
 	render << <blockCount2d, blockSize2d >> >(width, height, dev_fragmentBuffer, dev_framebuffer);
diff --git a/src/rasterize.h b/src/rasterize.h
index 560aae9..47b4168 100644
--- a/src/rasterize.h
+++ b/src/rasterize.h
@@ -20,5 +20,5 @@ namespace tinygltf{
 void rasterizeInit(int width, int height);
 void rasterizeSetBuffers(const tinygltf::Scene & scene);
 
-void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal);
+void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal, bool textureMapping, bool bilinearFiltering, bool backfaceCulling);
 void rasterizeFree();
diff --git a/src/rasterizeTools.h b/src/rasterizeTools.h
index 46c701e..761b832 100644
--- a/src/rasterizeTools.h
+++ b/src/rasterizeTools.h
@@ -95,7 +95,30 @@ bool isBarycentricCoordInBounds(const glm::vec3 barycentricCoord) {
  */
 __host__ __device__ static
 float getZAtCoordinate(const glm::vec3 barycentricCoord, const glm::vec3 tri[3]) {
-    return -(barycentricCoord.x * tri[0].z
+    return barycentricCoord.x * tri[0].z
            + barycentricCoord.y * tri[1].z
-           + barycentricCoord.z * tri[2].z);
+           + barycentricCoord.z * tri[2].z;
 }
+
+__host__ __device__ static
+glm::vec3 interpolate(const glm::vec3 barycentricCoord, const glm::vec3 tri[3]) {
+	return barycentricCoord.x * tri[0]
+		+ barycentricCoord.y * tri[1]
+		+ barycentricCoord.z * tri[2];
+}
+
+__host__ __device__ static
+glm::vec2 interpolate2D(const glm::vec3 barycentricCoord, const glm::vec2 tri[3]) {
+	return barycentricCoord.x * tri[0]
+		+ barycentricCoord.y * tri[1]
+		+ barycentricCoord.z * tri[2];
+}
+
+__host__ __device__ static
+float interpolateFloat(const glm::vec3 barycentricCoord, const float tri[3]) {
+	return barycentricCoord.x * tri[0]
+		+ barycentricCoord.y * tri[1]
+		+ barycentricCoord.z * tri[2];
+}
+
+