-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrain.cpp
71 lines (68 loc) · 2.38 KB
/
train.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include "Agent.cpp"
#include "Pong.cpp"
#include <iostream>
#include <stdio.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <vector>
#include <fstream>
using namespace std;
void gotoxy(int x,int y)
{
printf("%c[%d;%df",0x1B,y,x);
}
// Parameters
int BATCHES = 32; // number of training data trained per frame
double LR = 0.001; // 0.001 Learning rate for the neural networks
int MEM_CAP = 10000; // 1 000 000 replay mem capacity
int FRAME_REACH = 10000; // 5 000 frames till epsilon reaches 0.05
int TARGET_UPDATE = 5000; // 1000 frames for target net to update with net, and also saves neural network parameters every 5000 iter
vector<int> layout ({8, 50, 3}); // Neural Network layout
int gameWidth = 7;
int gameHeight = 12; // if you want to alter width or height try to play around with the reward system
int episodes = 0;
// main
int main () {
Pong game = Pong(gameWidth, gameHeight); // PASS
Agent agent = Agent(layout, LR, MEM_CAP, FRAME_REACH, TARGET_UPDATE, BATCHES);
vector<double> current_state = game.return_state();
int max_score = 0;
int reward_count = 0;
float avg_reward = 0;
ofstream ofs;
ofs.open("reward.txt", std::ofstream::out | std::ofstream::trunc); // clear txt file
ofs.close();
system("clear");
while (episodes < 20000) {
int action = agent.action(current_state);
int reward = game.act(action);
vector<double> next_state = game.return_state();
if (reward != 0) {
avg_reward += reward;
reward_count += 1;
}
if (game.is_done) {
episodes += 1;
// save reward to .txt
if (avg_reward != 0) {
avg_reward /= reward_count;
ofstream myfile;
myfile.open ("reward.txt", ios::app);
myfile << avg_reward << endl;
myfile.close();
}
// show progress
gotoxy(0,0);
cout << "Episodes: " << episodes << " | Max Score: " << max_score << " | Avg Reward: " << avg_reward << " " << endl;
avg_reward = 0;
reward_count = 0;
}
// train
if (game.score > max_score) {
max_score = game.score;
}
agent.store_mem(current_state, action, reward, next_state, game.is_done);
agent.train();
current_state = next_state;
}
}