Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified Part1/PROJ_WIN/CIS565_PROJ_1.suo
Binary file not shown.
4 changes: 2 additions & 2 deletions Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.props" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
Expand Down Expand Up @@ -114,6 +114,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.targets" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
</ImportGroup>
</Project>
Binary file modified Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb
Binary file not shown.
1,135 changes: 568 additions & 567 deletions Part1/PROJ_WIN/src/kernel.cu.deps

Large diffs are not rendered by default.

67 changes: 63 additions & 4 deletions Part1/src/kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
dim3 threadsPerBlock(blockSize);

int numObjects;
const float planetMass = 3e8;
const __device__ float planetMass = 3e8;
const __device__ float starMass = 5e10;

const float scene_scale = 2e2; //size of the height map in simulation space
Expand Down Expand Up @@ -89,19 +89,73 @@ __global__ void generateCircularVelArray(int time, int N, glm::vec3 * arr, glm::
// REMEMBER : F = (G * m_a * m_b) / (r_ab ^ 2)
__device__ glm::vec3 accelerate(int N, glm::vec4 my_pos, glm::vec4 * their_pos)
{
return glm::vec3(0.0f);
glm::vec3 F(0.0f);
glm::vec3 my_P = glm::vec3(my_pos.x, my_pos.y, my_pos.z);
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if(index < N)
{
for(int i = 0; i<their_pos->length(); i++)
{
glm::vec3 it_P = glm::vec3(their_pos[i].x, their_pos[i].y, their_pos[i].z);
// float dist = glm::length(my_P - it_P);
glm::vec3 Dir = it_P - my_P;
float it_dist = glm::length(Dir) + EPSILON;
// printf("%f", dist);
float temp = it_dist*it_dist;

// float temp = (glm::length(Dir)+EPSILON)*(glm::length(Dir)+EPSILON);
// float temp2 = 6.6738f * 9 * 10000 ;
float temp2 = G*planetMass*planetMass;
float FValue = temp2 / temp;
// if(temp > 0)
F += FValue * Dir/it_dist;
// else FValue = 0.0f;
// float FValue = 1.0f;
// F += FValue * FDir;
}
float temp3 = G * planetMass * starMass;
glm::vec3 DStar = glm::vec3(0,0,0) - my_P;
float star_dist = glm::length(DStar) + EPSILON;
float temp4 = star_dist * star_dist;
float FValueS = temp3/ temp4;
F += FValueS * DStar/star_dist;
}
return F;
}

// TODO : update the acceleration of each body
// TODO : update the accesleration of each body
__global__ void updateF(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc)
{
// FILL IN HERE
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if(index < N)
{
glm::vec3 newA = (accelerate (N, pos[index], pos))/planetMass;
// glm::vec3 newv = oriv + newa*dt;
// glm::vec3 newp = orip + oriv*dt + 0.5f*newa*dt*dt;

acc[index].x = newA.x; acc[index].y = newA.y; acc[index].z = newA.z;
}
}

// TODO : update velocity and position using a simple Euler integration scheme
__global__ void updateS(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc)
{
// FILL IN HERE
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if(index < N)
{
glm::vec3 oriP = glm::vec3(pos[index].x, pos[index].y, pos[index].z);
glm::vec3 oriV = glm::vec3(vel[index].x, vel[index].y, vel[index].z);

glm::vec3 newP = oriP + dt * oriV;
glm::vec3 newV = oriV + acc[index]*dt;
//printf("newP: %f %f %f /n", newP.x, newP.y, newP.z );

vel[index].x = newV.x; vel[index].y = newV.y; vel[index].z = newV.z;
pos[index].x = newP.x; pos[index].y = newP.y; pos[index].z = newP.z;

}
}

// Update the vertex buffer object
Expand Down Expand Up @@ -179,7 +233,12 @@ void initCuda(int N)
// TODO : Using the functions you wrote above, write a function that calls the CUDA kernels to update a single sim step
void cudaNBodyUpdateWrapper(float dt)
{
// FILL IN HERE
dim3 fullBlocksPerGrid((int)ceil(float(numObjects)/float(blockSize)));
updateF<<<fullBlocksPerGrid, blockSize>>>(numObjects, dt, dev_pos, dev_vel, dev_acc);
updateS<<<fullBlocksPerGrid, blockSize>>>(numObjects, dt, dev_pos, dev_vel, dev_acc);
// updateF<<<blockSize, threadsPerBlock>>>(numObjects, dt, dev_pos, dev_vel, dev_acc);
// updateS<<<blockSize, threadsPerBlock>>>(numObjects, dt, dev_pos, dev_vel, dev_acc);
cudaThreadSynchronize();
}

void cudaUpdateVBO(float * vbodptr, int width, int height)
Expand Down
20 changes: 20 additions & 0 deletions Part2/matrix_math_ry/matrix_math_ry.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrix_math_ry", "matrix_math_ry\matrix_math_ry.vcxproj", "{51526DD6-5537-4AFA-9250-FD1635CC35C5}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Release|Win32 = Release|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{51526DD6-5537-4AFA-9250-FD1635CC35C5}.Debug|Win32.ActiveCfg = Debug|Win32
{51526DD6-5537-4AFA-9250-FD1635CC35C5}.Debug|Win32.Build.0 = Debug|Win32
{51526DD6-5537-4AFA-9250-FD1635CC35C5}.Release|Win32.ActiveCfg = Release|Win32
{51526DD6-5537-4AFA-9250-FD1635CC35C5}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
226 changes: 226 additions & 0 deletions Part2/matrix_math_ry/matrix_math_ry/matrix_math.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>

#define WIDTH 5


__global__ void MatAddKernel (float* Ad, float* Bd, float* Rd, int Width)
{
int tx = threadIdx.x;
int ty = threadIdx.y;

float RValue = 0;

Rd[ty*Width+tx] = Ad[ty * Width + tx] + Bd[ty * Width + tx];
}

__global__ void MatMinKernel (float* Ad, float* Bd, float* Rd, int Width)
{
int tx = threadIdx.x;
int ty = threadIdx.y;

float RValue = 0;

Rd[ty*Width+tx] = Ad[ty * Width + tx] - Bd[ty * Width + tx];
}

__global__ void MatMulKernel(float* Ad, float* Bd, float* Rd, int Width)
{
//2D Thread ID
int tx = threadIdx.x;
int ty = threadIdx.y;

float RValue = 0;

for (int k = 0; k < Width; ++k)
{
float Ad_ele = Ad[ty * Width + k];
float Bd_ele = Bd[k * Width + tx];
RValue += Ad_ele * Bd_ele;
}

Rd[ty*Width+tx] = RValue;
}


void MatrixMulOnDevice(float* A, float*B, float* R, int Width)
{
int size = Width * Width * sizeof(float);
float* Ad, *Bd, *Rd;
//load A and B to device momory
cudaMalloc((void**)&Ad, size);
cudaMemcpy(Ad, A, size, cudaMemcpyHostToDevice);
cudaMalloc((void**)&Bd, size);
cudaMemcpy(Bd, B, size, cudaMemcpyHostToDevice);

dim3 dimBlock(Width, Width);
dim3 dimGrid(1,1);
cudaMalloc((void**)&Rd, size);

MatMulKernel<<<dimGrid, dimBlock>>>(Ad, Bd, Rd, Width);

cudaMemcpy(R, Rd, size, cudaMemcpyDeviceToHost);
cudaFree(Ad); cudaFree(Bd); cudaFree(Rd);
}

void MatrixAddOnDevice(float* A, float*B, float* R, int Width)
{
int size = Width * Width * sizeof(float);
float* Ad, *Bd, *Rd;
//load A and B to device momory
cudaMalloc((void**)&Ad, size);
cudaMemcpy(Ad, A, size, cudaMemcpyHostToDevice);
cudaMalloc((void**)&Bd, size);
cudaMemcpy(Bd, B, size, cudaMemcpyHostToDevice);

dim3 dimBlock(Width, Width);
dim3 dimGrid(1,1);
cudaMalloc((void**)&Rd, size);

MatAddKernel<<<dimGrid, dimBlock>>>(Ad, Bd, Rd, Width);

cudaMemcpy(R, Rd, size, cudaMemcpyDeviceToHost);
cudaFree(Ad); cudaFree(Bd); cudaFree(Rd);
}

void MatrixMinOnDevice(float* A, float*B, float* R, int Width)
{
int size = Width * Width * sizeof(float);
float* Ad, *Bd, *Rd;
//load A and B to device momory
cudaMalloc((void**)&Ad, size);
cudaMemcpy(Ad, A, size, cudaMemcpyHostToDevice);
cudaMalloc((void**)&Bd, size);
cudaMemcpy(Bd, B, size, cudaMemcpyHostToDevice);

dim3 dimBlock(Width, Width);
dim3 dimGrid(1,1);
cudaMalloc((void**)&Rd, size);

MatMinKernel<<<dimGrid, dimBlock>>>(Ad, Bd, Rd, Width);

cudaMemcpy(R, Rd, size, cudaMemcpyDeviceToHost);
cudaFree(Ad); cudaFree(Bd); cudaFree(Rd);
}

void MatrixMulOnHost(float* A, float* B, float* R, int Width)
{
for (int i = 0; i<Width; i++)
for (int j = 0; j<Width; j++)
{
float sum = 0;
for (int k = 0; k<Width; k++)
{
float a = A[i*Width + k];
float b = B[k*Width + j];
sum += a * b;
}
R[i*Width + j] = sum;
}
}
void MatrixAddOnHost(float* A, float* B, float* R, int Width)
{
for (int i = 0; i<Width; i++)
for (int j = 0; j<Width; j++)
{
R[i*Width + j] = A[i*Width + j] + B[i*Width + j];
}
}

void MatrixMinOnHost(float* A, float* B, float* R, int Width)
{
for (int i = 0; i<Width; i++)
for (int j = 0; j<Width; j++)
{
R[i*Width + j] = A[i*Width + j] - B[i*Width + j];
}
}


void main(){
// __device__ float* M1_d = new float[WIDTH * WIDTH];
// __device__ float* M2_d = new float[WIDTH * WIDTH];
// __device__ float* R_d = new float[WIDTH * WIDTH];

float* M1_h = new float[WIDTH * WIDTH];
float* M2_h = new float[WIDTH * WIDTH];

float* R_h_mul = new float[WIDTH * WIDTH];
float* R_h_add = new float[WIDTH * WIDTH];
float* R_h_min = new float[WIDTH * WIDTH];

float* R_h_mulc = new float[WIDTH * WIDTH];
float* R_h_addc = new float[WIDTH * WIDTH];
float* R_h_minc = new float[WIDTH * WIDTH];

for(int i = 0; i<WIDTH*WIDTH; i++)
{
M1_h[i] = i; M2_h[i] = i;
}

MatrixMulOnDevice(M1_h, M2_h, R_h_mul, WIDTH);
MatrixAddOnDevice(M1_h, M2_h, R_h_add, WIDTH);
MatrixMinOnDevice(M1_h, M2_h, R_h_min, WIDTH);

MatrixMulOnHost(M1_h, M2_h, R_h_mulc, WIDTH);
MatrixAddOnHost(M1_h, M2_h, R_h_addc, WIDTH);
MatrixMinOnHost(M1_h, M2_h, R_h_minc, WIDTH);

//results
printf("M1 = \n");
for(int j = 0; j<WIDTH*WIDTH; j++)
{
printf("%.1f ", M1_h[j]);
if(j%5 == 4) printf("\n");
}

printf("\n M2 = \n");
for(int j = 0; j<WIDTH*WIDTH; j++)
{
printf("%.1f ", M2_h[j]);
if(j%5 == 4) printf("\n");
}

printf("\n ============= GPU =============\n");
printf("\n Matrix Multiply: M1 * M2 = \n");
for(int j = 0; j<WIDTH*WIDTH; j++)
{
printf("%.1f ", R_h_mul[j]);
if(j%5 == 4) printf("\n");
}

printf("\n Matrix Addition: M1 + M2 = \n");
for(int j = 0; j<WIDTH*WIDTH; j++)
{
printf("%.1f ", R_h_add[j]);
if(j%5 == 4) printf("\n");
}

printf("\n Matrix Subtraction: M1 - M2 = \n");
for(int j = 0; j<WIDTH*WIDTH; j++)
{
printf("%.1f ", R_h_min[j]);
if(j%5 == 4) printf("\n");
}

printf("\n ============= CPU =============\n");
printf("\n Matrix Multiply: M1 * M2 = \n");
for(int j = 0; j<WIDTH*WIDTH; j++)
{
printf("%.1f ", R_h_mulc[j]);
if(j%5 == 4) printf("\n");
}
printf("\n Matrix Addition: M1 + M2 = \n");
for(int j = 0; j<WIDTH*WIDTH; j++)
{
printf("%.1f ", R_h_addc[j]);
if(j%5 == 4) printf("\n");
}
printf("\n Matrix Subtraction: M1 - M2 = \n");
for(int j = 0; j<WIDTH*WIDTH; j++)
{
printf("%.1f ", R_h_minc[j]);
if(j%5 == 4) printf("\n");
}
}
Loading