diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1.sln b/Part1/PROJ_WIN/CIS565_PROJ_1.sln
index 2440bc1..2f6ff15 100644
--- a/Part1/PROJ_WIN/CIS565_PROJ_1.sln
+++ b/Part1/PROJ_WIN/CIS565_PROJ_1.sln
@@ -3,16 +3,30 @@ Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CIS565_PROJ_1", "cIS565_PROJ_1\CIS565_PROJ_1.vcxproj", "{D7BEFF7A-4902-4B7E-922B-B0417A66864C}"
EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Matrix", "Matrix\Matrix.vcxproj", "{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
+ Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
+ Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Debug|Win32.ActiveCfg = Debug|Win32
{D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Debug|Win32.Build.0 = Debug|Win32
+ {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Debug|x64.ActiveCfg = Debug|Win32
{D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release|Win32.ActiveCfg = Release|Win32
{D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release|Win32.Build.0 = Release|Win32
+ {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release|x64.ActiveCfg = Release|Win32
+ {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|Win32.ActiveCfg = Debug|Win32
+ {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|Win32.Build.0 = Debug|Win32
+ {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|x64.ActiveCfg = Debug|x64
+ {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|x64.Build.0 = Debug|x64
+ {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|Win32.ActiveCfg = Release|Win32
+ {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|Win32.Build.0 = Release|Win32
+ {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|x64.ActiveCfg = Release|x64
+ {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1.suo b/Part1/PROJ_WIN/CIS565_PROJ_1.suo
index ad46c84..5fc1939 100644
Binary files a/Part1/PROJ_WIN/CIS565_PROJ_1.suo and b/Part1/PROJ_WIN/CIS565_PROJ_1.suo differ
diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj b/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj
index 4c88226..6b18a53 100644
--- a/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj
+++ b/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj
@@ -30,7 +30,7 @@
-
+
@@ -114,6 +114,6 @@
-
+
\ No newline at end of file
diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb b/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb
index b662f85..c6c1c23 100644
Binary files a/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb and b/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb differ
diff --git a/Part1/PROJ_WIN/Matrix/Matrix.vcxproj b/Part1/PROJ_WIN/Matrix/Matrix.vcxproj
new file mode 100644
index 0000000..2f3c65f
--- /dev/null
+++ b/Part1/PROJ_WIN/Matrix/Matrix.vcxproj
@@ -0,0 +1,156 @@
+
+
+
+
+ Debug
+ Win32
+
+
+ Debug
+ x64
+
+
+ Release
+ Win32
+
+
+ Release
+ x64
+
+
+
+ {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}
+ Matrix
+
+
+
+ Application
+ true
+ MultiByte
+
+
+ Application
+ true
+ MultiByte
+
+
+ Application
+ false
+ true
+ MultiByte
+
+
+ Application
+ false
+ true
+ MultiByte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+ true
+
+
+
+ Level3
+ Disabled
+ WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+
+
+ Level3
+ Disabled
+ WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ 64
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ true
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ true
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ 64
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Part1/PROJ_WIN/Matrix/kernel.cu b/Part1/PROJ_WIN/Matrix/kernel.cu
new file mode 100644
index 0000000..d3a26fb
--- /dev/null
+++ b/Part1/PROJ_WIN/Matrix/kernel.cu
@@ -0,0 +1,158 @@
+
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+#include
+
+
+__global__ void mat_add(const float *m1, const float *m2, float *m3)
+{
+ int i = threadIdx.x;
+ m3[i] = m2[i] + m1[i];
+}
+
+__global__ void mat_sub(const float *m1, const float *m2, float *m3)
+{
+ int i = threadIdx.x;
+ m3[i] = m1[i] - m2[i];
+}
+
+__global__ void mat_mult(const float *m1, const float *m2, float *m3, int matrix_size)
+{
+ int i = threadIdx.x;
+ int row = i / matrix_size;
+ int column = i - (i / matrix_size) * matrix_size;
+
+ m3[i] = 0;
+ for(int j=0; j>>(dev_m1, dev_m2, dev_m3);
+ cudaDeviceSynchronize();
+ cudaMemcpy(m3, dev_m3, matrix_width * matrix_height * sizeof(float), cudaMemcpyDeviceToHost);
+
+ for(int i=0;i>>(dev_m1, dev_m2, dev_m3);
+ cudaDeviceSynchronize();
+ cudaMemcpy(m3, dev_m3, matrix_width * matrix_height * sizeof(float), cudaMemcpyDeviceToHost);
+
+ for(int i=0;i>>(dev_m1, dev_m2, dev_m3,matrix_width);
+ cudaDeviceSynchronize();
+ cudaStatus = cudaMemcpy(m3, dev_m3, matrix_width * matrix_height * sizeof(float), cudaMemcpyDeviceToHost);
+
+ for(int i=0;i>>(numObjects,dt,dev_pos,dev_vel,dev_acc);
+ updateS<<>>(numObjects,dt,dev_pos,dev_vel,dev_acc);
+ cudaThreadSynchronize();
}
void cudaUpdateVBO(float * vbodptr, int width, int height)
diff --git a/Part1/src/main.cpp b/Part1/src/main.cpp
index ea86207..3ad3335 100644
--- a/Part1/src/main.cpp
+++ b/Part1/src/main.cpp
@@ -4,7 +4,7 @@
#include "main.h"
-#define N_FOR_VIS 5000
+#define N_FOR_VIS 2500
#define DT 0.2
#define VISUALIZE 1
//-------------------------------
diff --git a/README.md b/README.md
index 70ae0d3..5466ac0 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,32 @@
+Performance Analysis
+
+1.
+blockSize fps
+{32, 18}
+{64, 27}
+{128, 32}
+{256, 32}
+{512, 29}
+{1024, 29}
+
+so set blocksize to 128-256 will be most efficient.
+
+2.
+planetNum fps
+{2500, 60}
+{5000, 32}
+{7500, 15}
+{10000, 9}
+
+Obviously less particle reduce the computation
+
+3.
+If set matrix size as 5*5, then CPU can be more efficient. Because transfer data between memories takes more time when the data set is too small.
+With lager data set GPU computation can be better than CPU.
+
+
+
+
Project 1
=========