Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Part1/PROJ_WIN/CIS565_PROJ_1.sln
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,30 @@ Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CIS565_PROJ_1", "cIS565_PROJ_1\CIS565_PROJ_1.vcxproj", "{D7BEFF7A-4902-4B7E-922B-B0417A66864C}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Matrix", "Matrix\Matrix.vcxproj", "{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Debug|Win32.ActiveCfg = Debug|Win32
{D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Debug|Win32.Build.0 = Debug|Win32
{D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Debug|x64.ActiveCfg = Debug|Win32
{D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release|Win32.ActiveCfg = Release|Win32
{D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release|Win32.Build.0 = Release|Win32
{D7BEFF7A-4902-4B7E-922B-B0417A66864C}.Release|x64.ActiveCfg = Release|Win32
{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|Win32.ActiveCfg = Debug|Win32
{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|Win32.Build.0 = Debug|Win32
{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|x64.ActiveCfg = Debug|x64
{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Debug|x64.Build.0 = Debug|x64
{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|Win32.ActiveCfg = Release|Win32
{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|Win32.Build.0 = Release|Win32
{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|x64.ActiveCfg = Release|x64
{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
Binary file modified Part1/PROJ_WIN/CIS565_PROJ_1.suo
Binary file not shown.
4 changes: 2 additions & 2 deletions Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.props" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
Expand Down Expand Up @@ -114,6 +114,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.targets" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
</ImportGroup>
</Project>
Binary file modified Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb
Binary file not shown.
156 changes: 156 additions & 0 deletions Part1/PROJ_WIN/Matrix/Matrix.vcxproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{06DD2D05-C897-4C31-9923-61D1BB0CCD6B}</ProjectGuid>
<RootNamespace>Matrix</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<PostBuildEvent>
<Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<PostBuildEvent>
<Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
</PostBuildEvent>
<CudaCompile>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<PostBuildEvent>
<Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<PostBuildEvent>
<Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
</PostBuildEvent>
<CudaCompile>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<CudaCompile Include="kernel.cu" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
</ImportGroup>
</Project>
158 changes: 158 additions & 0 deletions Part1/PROJ_WIN/Matrix/kernel.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>


__global__ void mat_add(const float *m1, const float *m2, float *m3)
{
int i = threadIdx.x;
m3[i] = m2[i] + m1[i];
}

__global__ void mat_sub(const float *m1, const float *m2, float *m3)
{
int i = threadIdx.x;
m3[i] = m1[i] - m2[i];
}

__global__ void mat_mult(const float *m1, const float *m2, float *m3, int matrix_size)
{
int i = threadIdx.x;
int row = i / matrix_size;
int column = i - (i / matrix_size) * matrix_size;

m3[i] = 0;
for(int j=0; j<matrix_size; j++)
{
m3[i] += m1[row * matrix_size + j] * m2[j * matrix_size + column];
}

}

void mat_add_serial(const float *m1, const float *m2, float *m3, int matrix_size)
{
for(int i=0; i< matrix_size * matrix_size; i++)
m3[i] = m2[i] + m1[i];
}

void mat_sub_serial(const float *m1, const float *m2, float *m3, int matrix_size)
{
for(int i=0; i< matrix_size * matrix_size; i++)
m3[i] = m1[i] - m2[i];
}

void mat_mult_serial(const float *m1, const float *m2, float *m3, int matrix_size)
{
for(int i=0; i< matrix_size * matrix_size; i++){
int row = i / matrix_size;
int column = i - (i / matrix_size) * matrix_size;

m3[i] = 0;
for(int j=0; j<matrix_size; j++)
{
m3[i] += m1[row * matrix_size + j] * m2[j * matrix_size + column];
}
}
}

int main()
{
const int matrix_width = 5;
const int matrix_height = 5;
const float m1[matrix_width * matrix_height] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24};
const float m2[matrix_width * matrix_height] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24};
float m3[matrix_width * matrix_height] = {0};

cudaError_t cudaStatus;
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
}

float *dev_m1 = 0;
float *dev_m2 = 0;
float *dev_m3 = 0;
// Allocate GPU buffers for three vectors (two input, one output) .
cudaStatus = cudaMalloc((void**)&dev_m1, matrix_width * matrix_height * sizeof(float));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
}

cudaStatus = cudaMalloc((void**)&dev_m2, matrix_width * matrix_height * sizeof(float));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
}

cudaStatus = cudaMalloc((void**)&dev_m3, matrix_width * matrix_height * sizeof(float));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
}

// Copy input vectors from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_m1, m1, matrix_width * matrix_height * sizeof(float), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
}

cudaStatus = cudaMemcpy(dev_m2, m2, matrix_width * matrix_height * sizeof(float), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
}

// add
mat_add<<<1, matrix_width * matrix_height>>>(dev_m1, dev_m2, dev_m3);
cudaDeviceSynchronize();
cudaMemcpy(m3, dev_m3, matrix_width * matrix_height * sizeof(float), cudaMemcpyDeviceToHost);

for(int i=0;i<matrix_height;i++)
printf("{%.0f,%.0f,%.0f,%.0f,%.0f}\n",m3[i*matrix_width], m3[i*matrix_width+1], m3[i*matrix_width+2], m3[i*matrix_width+3], m3[i*matrix_width+4]);
printf("\n");

// sub
mat_sub<<<1, matrix_width * matrix_height>>>(dev_m1, dev_m2, dev_m3);
cudaDeviceSynchronize();
cudaMemcpy(m3, dev_m3, matrix_width * matrix_height * sizeof(float), cudaMemcpyDeviceToHost);

for(int i=0;i<matrix_height;i++)
printf("{%.0f,%.0f,%.0f,%.0f,%.0f}\n",m3[i*matrix_width], m3[i*matrix_width+1], m3[i*matrix_width+2], m3[i*matrix_width+3], m3[i*matrix_width+4]);
printf("\n");

//mult
mat_mult<<<1, matrix_width * matrix_height>>>(dev_m1, dev_m2, dev_m3,matrix_width);
cudaDeviceSynchronize();
cudaStatus = cudaMemcpy(m3, dev_m3, matrix_width * matrix_height * sizeof(float), cudaMemcpyDeviceToHost);

for(int i=0;i<matrix_height;i++)
printf("{%.0f,%.0f,%.0f,%.0f,%.0f}\n",m3[i*matrix_width], m3[i*matrix_width+1], m3[i*matrix_width+2], m3[i*matrix_width+3], m3[i*matrix_width+4]);
printf("\n");

// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
}



cudaFree(dev_m1);
cudaFree(dev_m2);
cudaFree(dev_m3);

// Add vectors in parallel.
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "addWithCuda failed!");
return 1;
}

// cudaDeviceReset must be called before exiting in order for profiling and
// tracing tools such as Nsight and Visual Profiler to show complete traces.
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceReset failed!");
return 1;
}

return 0;
}
Loading