Skip to content

Hw04/simd #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ if (NOT CMAKE_BUILD_TYPE)
endif()

add_executable(main main.cpp)
target_compile_options(main PUBLIC -ffast-math -march=native)
118 changes: 81 additions & 37 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,64 +4,108 @@
#include <chrono>
#include <cmath>

// 减少初始化时的乘法
constexpr float RAND_MAX2 = 1.f / RAND_MAX * 2;
float frand() {
return (float)rand() / RAND_MAX * 2 - 1;
return (float)rand() * RAND_MAX2 - 1;
}

constexpr size_t length = 48;
struct Star {
float px, py, pz;
float vx, vy, vz;
float mass;
float px[length], py[length], pz[length];
float vx[length], vy[length], vz[length];
float mass[length];
};

std::vector<Star> stars;
Star stars;

void init() {
for (int i = 0; i < 48; i++) {
stars.push_back({
frand(), frand(), frand(),
frand(), frand(), frand(),
frand() + 1,
});
#pragma GCC unroll 8
for (size_t i = 0; i < length; i++) {
stars.px[i] = frand();
stars.py[i] = frand();
stars.pz[i] = frand();
stars.vx[i] = frand();
stars.vy[i] = frand();
stars.vz[i] = frand();
stars.mass[i] = frand() + 1;
}
}

float G = 0.001;
float eps = 0.001;
float dt = 0.01;
constexpr float G = 0.001;
constexpr float eps = 0.001;
constexpr float dt = 0.01;
// 提前计算, 减少不必要的乘法
constexpr float Gdt = G * dt;
constexpr float eps2 = eps * eps;
constexpr float G2 = G / 2;

void step() {
for (auto &star: stars) {
for (auto &other: stars) {
float dx = other.px - star.px;
float dy = other.py - star.py;
float dz = other.pz - star.pz;
float d2 = dx * dx + dy * dy + dz * dz + eps * eps;
d2 *= sqrt(d2);
star.vx += dx * other.mass * G * dt / d2;
star.vy += dy * other.mass * G * dt / d2;
star.vz += dz * other.mass * G * dt / d2;
for (size_t i = 0; i < length; i++) {
// 减少不必要的内存访问
float spxi = stars.px[i];
float spyi = stars.py[i];
float spzi = stars.pz[i];

// 先累加到初始化为0的局部变量
float tmp_vxi = 0;
float tmp_vyi = 0;
float tmp_vzi = 0;

#pragma GCC unroll 8
for (size_t j = 0; j < length; j++) {
float dx = stars.px[j] - spxi;
float dy = stars.py[j] - spyi;
float dz = stars.pz[j] - spzi;
float d2 = dx * dx + dy * dy + dz * dz + eps2;
d2 *= std::sqrt(d2);
// Gdt = G * dt 放到for循环外部
d2 = 1.f / d2;
// 乘法变加法
tmp_vxi += dx * stars.mass[j] * d2;
tmp_vyi += dy * stars.mass[j] * d2;
tmp_vzi += dz * stars.mass[j] * d2;
}
// 累加结束后再写入到全局变量中
stars.vx[i] += tmp_vxi * Gdt;
stars.vy[i] += tmp_vyi * Gdt;
stars.vz[i] += tmp_vzi * Gdt;
}
for (auto &star: stars) {
star.px += star.vx * dt;
star.py += star.vy * dt;
star.pz += star.vz * dt;

#pragma GCC unroll 8
for (size_t i = 0; i < length; i++) {
stars.px[i] += stars.vx[i] * dt;
stars.py[i] += stars.vy[i] * dt;
stars.pz[i] += stars.vz[i] * dt;
}
}

float calc() {
float energy = 0;
for (auto &star: stars) {
float v2 = star.vx * star.vx + star.vy * star.vy + star.vz * star.vz;
energy += star.mass * v2 / 2;
for (auto &other: stars) {
float dx = other.px - star.px;
float dy = other.py - star.py;
float dz = other.pz - star.pz;
float d2 = dx * dx + dy * dy + dz * dz + eps * eps;
energy -= other.mass * star.mass * G / sqrt(d2) / 2;
for (size_t i = 0; i < length; i++) {
// 减少不必要的内存访问
float pxi = stars.px[i];
float pyi = stars.py[i];
float pzi = stars.pz[i];
float massi = stars.mass[i];

float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
energy += massi * v2 / 2;

// 先累加到初始化为0的局部变量
float tmp = 0;
#pragma GCC unroll 8
for (size_t j = 0; j < length; j++) {
float dx = stars.px[j] - pxi;
float dy = stars.py[j] - pyi;
float dz = stars.pz[j] - pzi;
float d2 = dx * dx + dy * dy + dz * dz + eps2;
// 将massi = stars.mass[i]和G2 = G / 2放到for循环外部
// 减少乘法次数
tmp += stars.mass[j] / std::sqrt(d2);
}
// 累加结束后写入
energy -= tmp * massi * G2;
}
return energy;
}
Expand Down