Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 95 additions & 94 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,94 +1,95 @@
cmake_minimum_required(VERSION 3.1)
project(cis565_boids)

set_property(GLOBAL PROPERTY USE_FOLDERS ON)

set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})

set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

# Enable C++11 for host code
set(CMAKE_CXX_STANDARD 11)

# Set a default build type if none was specified
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
SET(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
# Set the possible values of build type for cmake-gui
SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()

########################################
# CUDA Setup
########################################
find_package(CUDA 10.0 REQUIRED)
include(${CMAKE_MODULE_PATH}/CUDAComputesList.cmake)

list(APPEND CUDA_NVCC_FLAGS ${CUDA_GENERATE_CODE})
list(APPEND CUDA_NVCC_FLAGS_DEBUG "-g -G")
set(CUDA_VERBOSE_BUILD ON)

if(WIN32)
# Set up include and lib paths
set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH "Host side compiler used by NVCC" FORCE)
endif(WIN32)
########################################

find_package(OpenGL REQUIRED)

if(UNIX)
find_package(glfw3 REQUIRED)
find_package(GLEW REQUIRED)
set(LIBRARIES glfw ${GLEW_LIBRARIES} ${OPENGL_gl_LIBRARY})
else(UNIX)
set(EXTERNAL "external")

set(GLFW_ROOT_DIR ${EXTERNAL})
set(GLFW_USE_STATIC_LIBS ON)
find_package(GLFW REQUIRED)

set(GLEW_ROOT_DIR ${EXTERNAL})
set(GLEW_USE_STATIC_LIBS ON)
find_package(GLEW REQUIRED)

add_definitions(${GLEW_DEFINITIONS})
include_directories(${GLEW_INCLUDE_DIR} ${GLFW_INCLUDE_DIR})
set(LIBRARIES ${GLEW_LIBRARY} ${GLFW_LIBRARY} ${OPENGL_LIBRARY})
endif(UNIX)

set(GLM_ROOT_DIR "external")
find_package(GLM REQUIRED)
include_directories(${GLM_INCLUDE_DIRS})

set(headers
src/cudaMat4.hpp
src/glslUtility.hpp
src/kernel.h
src/main.hpp
src/utilityCore.hpp
)

set(sources
src/glslUtility.cpp
src/kernel.cu
src/main.cpp
src/utilityCore.cpp
)

list(SORT headers)
list(SORT sources)

source_group(Headers FILES ${headers})
source_group(Sources FILES ${sources})

cuda_add_executable(${CMAKE_PROJECT_NAME} ${sources} ${headers})
target_link_libraries(${CMAKE_PROJECT_NAME} ${LIBRARIES})

add_custom_command(
TARGET ${CMAKE_PROJECT_NAME}
PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/shaders
${CMAKE_BINARY_DIR}/shaders
)
cmake_minimum_required(VERSION 3.1)
project(cis565_boids)

set_property(GLOBAL PROPERTY USE_FOLDERS ON)

set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})

set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

# Enable C++11 for host code
set(CMAKE_CXX_STANDARD 11)

# Set a default build type if none was specified
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
SET(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
# Set the possible values of build type for cmake-gui
SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()

########################################
# CUDA Setup
########################################
find_package(CUDA 10.0 REQUIRED)
include(${CMAKE_MODULE_PATH}/CUDAComputesList.cmake)

list(APPEND CUDA_NVCC_FLAGS ${CUDA_GENERATE_CODE})
list(APPEND CUDA_NVCC_FLAGS_DEBUG "-g -G")
set(CUDA_VERBOSE_BUILD ON)

if(WIN32)
# Set up include and lib paths
set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH "Host side compiler used by NVCC" FORCE)
endif(WIN32)
########################################

find_package(OpenGL REQUIRED)

if(UNIX)
find_package(glfw3 REQUIRED)
find_package(GLEW REQUIRED)
set(LIBRARIES glfw ${GLEW_LIBRARIES} ${OPENGL_gl_LIBRARY})
else(UNIX)
set(EXTERNAL "external")

set(GLFW_ROOT_DIR ${EXTERNAL})
set(GLFW_USE_STATIC_LIBS ON)
find_package(GLFW REQUIRED)

set(GLEW_ROOT_DIR ${EXTERNAL})
set(GLEW_USE_STATIC_LIBS ON)
find_package(GLEW REQUIRED)

add_definitions(${GLEW_DEFINITIONS})
include_directories(${GLEW_INCLUDE_DIR} ${GLFW_INCLUDE_DIR})
set(LIBRARIES ${GLEW_LIBRARY} ${GLFW_LIBRARY} ${OPENGL_LIBRARY})
endif(UNIX)

set(GLM_ROOT_DIR "external")
find_package(GLM REQUIRED)
include_directories(${GLM_INCLUDE_DIRS})

set(headers
src/cudaMat4.hpp
src/glslUtility.hpp
src/kernel.h
src/main.hpp
src/cxxopts.hpp
src/utilityCore.hpp
)

set(sources
src/glslUtility.cpp
src/kernel.cu
src/main.cpp
src/utilityCore.cpp
)

list(SORT headers)
list(SORT sources)

source_group(Headers FILES ${headers})
source_group(Sources FILES ${sources})

cuda_add_executable(${CMAKE_PROJECT_NAME} ${sources} ${headers})
target_link_libraries(${CMAKE_PROJECT_NAME} ${LIBRARIES})

add_custom_command(
TARGET ${CMAKE_PROJECT_NAME}
PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/shaders
${CMAKE_BINARY_DIR}/shaders
)
59 changes: 48 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,48 @@
**University of Pennsylvania, CIS 565: GPU Programming and Architecture,
Project 1 - Flocking**

* (TODO) YOUR NAME HERE
* (TODO) [LinkedIn](), [personal website](), [twitter](), etc.
* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)

### (TODO: Your README)

Include screenshots, analysis, etc. (Remember, this is public, so don't put
anything here that you don't want to share with the world.)
**University of Pennsylvania, CIS 565: GPU Programming and Architecture,
Project 1 - Flocking**

* Gangzheng Tong
* www.gtong.me
* Tested on: Windows 10, Xeon E5-2687W v3 @ 3.1GHz 64GB, Titan V (Sig Lab)

###

![Screenshot](images/recording.gif)
![Screenshot](images/screentshot.png)

####
cxxopts.hpp added to CMakeList in order to parse command line argument for testing

## Part 2.2
Compare the uniform grid velocity update to the naive velocity update, I found that with the increased number of objects, the uniform grid velocity performs better than the naive one. As the diagram suggests, once the number of objects exceeds 10000 in this test setting, uniform grid velocity update is generally faster than the naive one.
Part of the reason is due to the naive search linearly goes through all other particles to find the neighbours, and the O(n^2) makes it hard to scale.

## Part 3 Performance Analysis

![FPS Visulized](images/fps_vis.png)

![FPS Un-Visulized](images/fps_unvis.png)

![fps_block256](images/fps_block256.png)

![FPS BlockSize](images/fps_blocksize.png)


1. For each implementation, generally speaking, the performance drops as the number of boids increases. However, for uniform scatter and coherent search, the performance increases in the beginning when the number of boids is small. I think it has something to do with the GPU latency and the initial setup required for these more complicated algorithm. But with these optimizations, it’s also more scalable than the naive approach.

2. As the diagram illustrates, changing the blocksize has little impact on the Naive algorithm, but has a positive impact on the uniform scatter and coherent search algorithm. I’m running the test on Titan V GPU, which has huge memory and a large number of SMs. It implies that for the naive algorithm the computation becomes the bottleneck, whereas there is large room for the bandwidth. With the coherent search, we need to do a lot of reshuffles and sorting, this is why the large bandwidth helps.

3. Changing cell width and checking 27 vs 8 neighboring cells has influence on the performance. The result depends on the current state of the boids layout. Generally speaking, if the number of boids is small and the boids are spread out, checking 27 will help improve the performance because we have a higher chance eliminating the empty cells in the first place.

## Part 3 Feedback
It’s tedious to use #define macro to toggle different test settings because we need to re-compile the application and it’s time-consuming. I introduce an AUTOMATION macro and use run-time arguments to control the settings. By doing this, I can use a single batch file (see test.bat for example) to run all setting combinations (num of boids, blockSize, Naive/Scatter/Coherent, etc.).
The result file will be output and saved to the “output” folder.
For each test, it will run 20s before exit.

It saves me so much time and I’d suggest using the for the future assignment.
I use a third-party argument parsing file “cxxopts.hpp”. It’s added to CMakeList and should be compiled as usual.
To run the executable from command line, copy the exe file to the same directory as the “shaders” folder in order to find the glsl files.

To disable AUTOMATION, simply change it to #define AUTOMATION 0, and it will run just like the provided version.

Due to the limited time, I will optimize the testing procedure in the upcoming assignment.
Binary file added images/fps_block256.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/fps_blocksize.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/fps_unvis.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/fps_vis.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/recording.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/screentshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading