Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified Part1/PROJ_WIN/CIS565_PROJ_1.suo
Binary file not shown.
4 changes: 2 additions & 2 deletions Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.props" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
Expand Down Expand Up @@ -114,6 +114,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.targets" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
</ImportGroup>
</Project>
Binary file modified Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb
Binary file not shown.
1,137 changes: 569 additions & 568 deletions Part1/PROJ_WIN/src/kernel.cu.deps

Large diffs are not rendered by default.

43 changes: 42 additions & 1 deletion Part1/src/kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -89,19 +89,56 @@ __global__ void generateCircularVelArray(int time, int N, glm::vec3 * arr, glm::
// REMEMBER : F = (G * m_a * m_b) / (r_ab ^ 2)
__device__ glm::vec3 accelerate(int N, glm::vec4 my_pos, glm::vec4 * their_pos)
{
return glm::vec3(0.0f);
glm::vec3 acc(0.0f);
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if(index < N)
{
for (int i = 0; i < their_pos->length(); i++)
{
glm::vec4 r4 = their_pos[i] - my_pos;
glm::vec3 r(r4.x, r4.y, r4.z);
float s = (G * their_pos[i].w / pow(pow(glm::length(r),2) + pow(ZERO_ABSORPTION_EPSILON,2), 1.5));
acc[0] += s * r.x;
acc[1] += s * r.y;
acc[2] += s * r.z;
}
glm::vec3 starR(-my_pos.x, -my_pos.y, -my_pos.z);
float starS = (G * starMass / pow(pow(glm::length(starR),2) + pow(ZERO_ABSORPTION_EPSILON,2), 1.5));
acc[0] += starS * starR.x;
acc[1] += starS * starR.y;
acc[2] += starS * starR.z;
}
return acc;
}

// TODO : update the acceleration of each body
__global__ void updateF(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc)
{
// FILL IN HERE
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if(index < N)
{
acc[index] = accelerate(N, pos[index], pos);
}
}

// TODO : update velocity and position using a simple Euler integration scheme
__global__ void updateS(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc)
{
// FILL IN HERE
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if(index < N)
{
glm::vec3 vt = vel[index] + acc[index] * dt;
glm::vec3 p(pos[index].x, pos[index].y, pos[index].z);
p += (vel[index] + vt) * dt / 2.0f;
pos[index].x = p.x;
pos[index].y = p.y;
pos[index].z = p.z;
vel[index].x = vt.x;
vel[index].y = vt.y;
vel[index].z = vt.z;
}
}

// Update the vertex buffer object
Expand Down Expand Up @@ -180,6 +217,10 @@ void initCuda(int N)
void cudaNBodyUpdateWrapper(float dt)
{
// FILL IN HERE
updateF<<< blockSize, threadsPerBlock >>>(numObjects, dt, dev_pos, dev_vel, dev_acc);
cudaThreadSynchronize();
updateS<<< blockSize, threadsPerBlock >>>(numObjects, dt, dev_pos, dev_vel, dev_acc);
cudaThreadSynchronize();
}

void cudaUpdateVBO(float * vbodptr, int width, int height)
Expand Down
20 changes: 20 additions & 0 deletions Part2/matrix/matrix.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrix", "matrix\matrix.vcxproj", "{9D12EC35-948A-4D33-A704-5AB4EF052E8B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Release|Win32 = Release|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{9D12EC35-948A-4D33-A704-5AB4EF052E8B}.Debug|Win32.ActiveCfg = Debug|Win32
{9D12EC35-948A-4D33-A704-5AB4EF052E8B}.Debug|Win32.Build.0 = Debug|Win32
{9D12EC35-948A-4D33-A704-5AB4EF052E8B}.Release|Win32.ActiveCfg = Release|Win32
{9D12EC35-948A-4D33-A704-5AB4EF052E8B}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
86 changes: 86 additions & 0 deletions Part2/matrix/matrix/matrix.vcxproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{9D12EC35-948A-4D33-A704-5AB4EF052E8B}</ProjectGuid>
<RootNamespace>matrix</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<CudaCompile>
<Include>
</Include>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Drawing" />
<Reference Include="System.Windows.Forms" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<CudaCompile Include="matrix_math.cu">
<FileType>Document</FileType>
</CudaCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
</ImportGroup>
</Project>
22 changes: 22 additions & 0 deletions Part2/matrix/matrix/matrix.vcxproj.filters
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<CudaCompile Include="matrix_math.cu">
<Filter>Source Files</Filter>
</CudaCompile>
</ItemGroup>
</Project>
174 changes: 174 additions & 0 deletions Part2/matrix/matrix/matrix_math.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#include <cuda_runtime.h>
#include <cuda.h>
#include <iostream>
#include <assert.h>
//#include <ctime>

using namespace std;

#define WIDTH 5
#define MSIZE 25
#define numBlocks 1
dim3 threadsPerBlock(WIDTH, WIDTH);

__global__ void matAdd(float* Ad, float *Bd, float *Pd)
{
int tx = threadIdx.x;
int ty = threadIdx.y;

float a = Ad[ty * WIDTH + tx];
float b = Bd[ty * WIDTH + tx];

Pd[ty * WIDTH + tx] = a + b;
}

__global__ void matSub(float* Ad, float *Bd, float *Pd)
{
int tx = threadIdx.x;
int ty = threadIdx.y;

float a = Ad[ty * WIDTH + tx];
float b = Bd[ty * WIDTH + tx];

Pd[ty * WIDTH + tx] = a - b;
}

__global__ void matMul(float* Ad, float *Bd, float *Pd)
{
int tx = threadIdx.x;
int ty = threadIdx.y;

float pValue = 0.0f;
for (int k = 0; k < WIDTH; k++)
{
float a = Ad[ty * WIDTH + k];
float b = Bd[k * WIDTH + tx];
pValue += a * b;
}


Pd[ty * WIDTH + tx] = pValue;
}


void matSerialAdd(float *A, float *B, float *P)
{
for (int r = 0; r < WIDTH; r++)
{
for (int c = 0; c < WIDTH; c++)
{
P[r * WIDTH + c] = A[r * WIDTH + c] + B[r * WIDTH + c];
}
}
}

void matSerialSub(float *A, float *B, float *P)
{
for (int r = 0; r < WIDTH; r++)
{
for (int c = 0; c < WIDTH; c++)
{
P[r * WIDTH + c] = A[r * WIDTH + c] - B[r * WIDTH + c];
}
}
}

void matSerialMul(float *A, float *B, float *P)
{
for (int r = 0; r < WIDTH; r++)
{
for (int c = 0; c < WIDTH; c++)
{
float pValue = 0.0f;
for (int k = 0; k < WIDTH; k++)
{
pValue += A[r * WIDTH + k] * B[k * WIDTH + c];
}
P[r * WIDTH + c] = pValue;
}
}
}


int main()
{
float *A = new float[MSIZE];
float *B = new float[MSIZE];
float *P = new float[MSIZE];
float *serialP = new float[MSIZE];
for (int i = 0; i < MSIZE; i++)
{
A[i] = i;
B[i] = i;
}

//clock_t start;
//double durationGPU, durationCPU;

//load A, B to device memory
int size = MSIZE * sizeof(float);
float *Ad, *Bd, *Pd;

cudaMalloc((void**)&Ad, size);
cudaMemcpy(Ad, A, size, cudaMemcpyHostToDevice);

cudaMalloc((void**)&Bd, size);
cudaMemcpy(Bd, B, size, cudaMemcpyHostToDevice);

cudaMalloc((void**)&Pd, size);

//add
//start = clock();
matAdd<<< numBlocks, threadsPerBlock >>>(Ad, Bd, Pd);
cudaMemcpy(P, Pd, size, cudaMemcpyDeviceToHost);
//durationGPU = (clock() - start) / (double)CLOCKS_PER_SEC;

//start = clock();
matSerialAdd(A, B, serialP);
//durationCPU = (clock() - start) / (double)CLOCKS_PER_SEC;

for (int i = 0; i < MSIZE; i++)
assert(P[i] == serialP[i]);
cout << "Matrix Addition Success!" << endl;
//cout << "CPU Timing: " << durationCPU << endl;
//cout << "GPU Timing: " << durationGPU << endl<<endl;

//sub
//start = clock();
matSub<<< numBlocks, threadsPerBlock >>>(Ad, Bd, Pd);
cudaMemcpy(P, Pd, size, cudaMemcpyDeviceToHost);
//durationGPU = (clock() - start) / (double)CLOCKS_PER_SEC;

//start = clock();
matSerialSub(A, B, serialP);
//durationCPU = (clock() - start) / (double)CLOCKS_PER_SEC;

for (int i = 0; i < MSIZE; i++)
assert(P[i] == serialP[i]);
std::cout << "Matrix Subtraction Success!" << std::endl;
//cout << "CPU Timing: " << durationCPU << endl;
//cout << "GPU Timing: " << durationGPU << endl<<endl;

//dot mul
//start = clock();
matMul<<< numBlocks, threadsPerBlock >>>(Ad, Bd, Pd);
cudaMemcpy(P, Pd, size, cudaMemcpyDeviceToHost);
//durationGPU = (clock() - start) / (double)CLOCKS_PER_SEC;

//start = clock();
matSerialMul(A, B, serialP);
//durationCPU = (clock() - start) / (double)CLOCKS_PER_SEC;

for (int i = 0; i < MSIZE; i++)
assert(P[i] == serialP[i]);
std::cout << "Matrix Dot Multiplication Success!" << std::endl;
//cout << "CPU Timing: " << durationCPU << endl;
//cout << "GPU Timing: " << durationGPU << endl<<endl;

//free device memory
cudaFree(Ad);
cudaFree(Bd);
cudaFree(Pd);

return 0;
}
Loading