diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1.sln b/Part1/PROJ_WIN/CIS565_PROJ_1.sln index 2440bc1..2f6ff15 100644 --- a/Part1/PROJ_WIN/CIS565_PROJ_1.sln +++ b/Part1/PROJ_WIN/CIS565_PROJ_1.sln @@ -3,16 +3,30 @@ Microsoft Visual Studio Solution File, Format Version 11.00 # Visual Studio 2010 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CIS565_PROJ_1", "cIS565_PROJ_1\CIS565_PROJ_1.vcxproj", "{D7BEFF7A-4902-4B7E-922B-B0417A66864C}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Matrix", "Matrix\Matrix.vcxproj", "{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 Release|Win32 = Release|Win32 + Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Debug|Win32.ActiveCfg = Debug|Win32 {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Debug|Win32.Build.0 = Debug|Win32 + {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Debug|x64.ActiveCfg = Debug|Win32 {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release|Win32.ActiveCfg = Release|Win32 {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release|Win32.Build.0 = Release|Win32 + {D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release|x64.ActiveCfg = Release|Win32 + {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|Win32.ActiveCfg = Debug|Win32 + {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|Win32.Build.0 = Debug|Win32 + {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|x64.ActiveCfg = Debug|x64 + {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|x64.Build.0 = Debug|x64 + {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|Win32.ActiveCfg = Release|Win32 + {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|Win32.Build.0 = Release|Win32 + {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|x64.ActiveCfg = Release|x64 + {06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1.suo b/Part1/PROJ_WIN/CIS565_PROJ_1.suo index ad46c84..5fc1939 100644 Binary files a/Part1/PROJ_WIN/CIS565_PROJ_1.suo and b/Part1/PROJ_WIN/CIS565_PROJ_1.suo differ diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj b/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj index 4c88226..6b18a53 100644 --- a/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj +++ b/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj @@ -30,7 +30,7 @@ - + @@ -114,6 +114,6 @@ - + \ No newline at end of file diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb b/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb index b662f85..c6c1c23 100644 Binary files a/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb and b/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb differ diff --git a/Part1/PROJ_WIN/Matrix/Matrix.vcxproj b/Part1/PROJ_WIN/Matrix/Matrix.vcxproj new file mode 100644 index 0000000..2f3c65f --- /dev/null +++ b/Part1/PROJ_WIN/Matrix/Matrix.vcxproj @@ -0,0 +1,156 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {06DD2D05-C897-4C31-9923-61D1BB0CCD6B} + Matrix + + + + Application + true + MultiByte + + + Application + true + MultiByte + + + Application + false + true + MultiByte + + + Application + false + true + MultiByte + + + + + + + + + + + + + + + + + + + + true + + + true + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + + + Level3 + Disabled + WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + 64 + + + + + Level3 + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + + + Level3 + MaxSpeed + true + true + WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + 64 + + + + + + + + + + \ No newline at end of file diff --git a/Part1/PROJ_WIN/Matrix/kernel.cu b/Part1/PROJ_WIN/Matrix/kernel.cu new file mode 100644 index 0000000..d3a26fb --- /dev/null +++ b/Part1/PROJ_WIN/Matrix/kernel.cu @@ -0,0 +1,158 @@ + +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include + + +__global__ void mat_add(const float *m1, const float *m2, float *m3) +{ + int i = threadIdx.x; + m3[i] = m2[i] + m1[i]; +} + +__global__ void mat_sub(const float *m1, const float *m2, float *m3) +{ + int i = threadIdx.x; + m3[i] = m1[i] - m2[i]; +} + +__global__ void mat_mult(const float *m1, const float *m2, float *m3, int matrix_size) +{ + int i = threadIdx.x; + int row = i / matrix_size; + int column = i - (i / matrix_size) * matrix_size; + + m3[i] = 0; + for(int j=0; j>>(dev_m1, dev_m2, dev_m3); + cudaDeviceSynchronize(); + cudaMemcpy(m3, dev_m3, matrix_width * matrix_height * sizeof(float), cudaMemcpyDeviceToHost); + + for(int i=0;i>>(dev_m1, dev_m2, dev_m3); + cudaDeviceSynchronize(); + cudaMemcpy(m3, dev_m3, matrix_width * matrix_height * sizeof(float), cudaMemcpyDeviceToHost); + + for(int i=0;i>>(dev_m1, dev_m2, dev_m3,matrix_width); + cudaDeviceSynchronize(); + cudaStatus = cudaMemcpy(m3, dev_m3, matrix_width * matrix_height * sizeof(float), cudaMemcpyDeviceToHost); + + for(int i=0;i>>(numObjects,dt,dev_pos,dev_vel,dev_acc); + updateS<<>>(numObjects,dt,dev_pos,dev_vel,dev_acc); + cudaThreadSynchronize(); } void cudaUpdateVBO(float * vbodptr, int width, int height) diff --git a/Part1/src/main.cpp b/Part1/src/main.cpp index ea86207..3ad3335 100644 --- a/Part1/src/main.cpp +++ b/Part1/src/main.cpp @@ -4,7 +4,7 @@ #include "main.h" -#define N_FOR_VIS 5000 +#define N_FOR_VIS 2500 #define DT 0.2 #define VISUALIZE 1 //------------------------------- diff --git a/README.md b/README.md index 70ae0d3..5466ac0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,32 @@ +Performance Analysis + +1. +blockSize fps +{32, 18} +{64, 27} +{128, 32} +{256, 32} +{512, 29} +{1024, 29} + +so set blocksize to 128-256 will be most efficient. + +2. +planetNum fps +{2500, 60} +{5000, 32} +{7500, 15} +{10000, 9} + +Obviously less particle reduce the computation + +3. +If set matrix size as 5*5, then CPU can be more efficient. Because transfer data between memories takes more time when the data set is too small. +With lager data set GPU computation can be better than CPU. + + + + Project 1 =========