-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspeed-tests.cc
181 lines (156 loc) · 5.29 KB
/
speed-tests.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
// Kinetoscope: A Sega Genesis Video Player
//
// Copyright (c) 2024 Joey Parrish
//
// See MIT License in LICENSE.txt
// Microcontroller function speed tests.
#include <Arduino.h>
#include <HardwareSerial.h>
#include "fast-gpio.h"
#include "http.h"
#include "registers.h"
#include "segavideo_format.h"
#include "sram.h"
#include "video-server.h"
#define RLE_VIDEO "Never%20Gonna%20Give%20You%20Up.segavideo.rle"
// 3s chunk of audio+video data, at default settings, without main headers
#define ABOUT_3S_VIDEO_AUDIO_BYTES 901376
// A safe buffer size for these tests.
#define BUFFER_SIZE 100 * 1024
// Explicitly unrolled loop for 10 repeated statements.
#define X10(a) { a; a; a; a; a; a; a; a; a; a; }
// Explicitly unrolled loop for 100 repeated statements.
#define X100(a) { X10(X10(a)); }
// Explicitly unrolled loop for 1k repeated statements.
#define X1k(a) { X10(X10(X10(a))); }
// A partially unrolled loop that minimizes time spent on incrementing and
// checking, while not exploding the program size to the point that it slows
// down execution (10k/100k unrolled) or overruns the available program space
// (1M unrolled).
#define X1M(a) { for (int i = 0; i < 1'000; ++i) { X1k(a); } }
static long test_fast_gpio_speed() {
// ~75 ns per pulse
long start = millis();
X1M(FAST_PULSE_ACTIVE_LOW(SYNC_PIN__CMD_CLEAR));
long end = millis();
return end - start;
}
static long test_sync_token_read_speed() {
// ~86 ns per read
long start = millis();
X1M(is_cmd_set());
long end = millis();
return end - start;
}
static long test_sync_token_clear_speed() {
// ~122 ns per clear
long start = millis();
X1M(clear_cmd());
long end = millis();
return end - start;
}
static long test_register_read_speed() {
// ~1543 ns per read
long start = millis();
X1M(read_register(i & 3));
long end = millis();
return end - start;
}
static long test_sram_speed() {
// 100kB: ~116ms
// 1MB: ~1160ms
// 3s video+audio: ~1020ms
// Rather than allocate a buffer, just write out 100kB of instructions.
uint8_t* buffer = (uint8_t*)main;
long start = millis();
sram_start_bank(0);
sram_write(buffer, BUFFER_SIZE);
sram_flush_and_release_bank();
long end = millis();
return end - start;
}
static uint8_t* http_local_buffer = NULL;
static bool http_local_buffer_callback(const uint8_t* buffer, int bytes) {
memcpy(http_local_buffer, buffer, bytes);
http_local_buffer += bytes;
return true;
}
// Linked from firmware.ino:
extern bool http_sram_callback(const uint8_t* buffer, int bytes);
extern bool http_rle_sram_callback(const uint8_t* buffer, int bytes);
extern void http_rle_reset();
extern bool network_connected;
static long test_rle_download_speed(int offset, int size) {
// (Effective) 2.5Mbps minimum required
// (Effective) ~5.1 Mbps (after decompression)
http_rle_reset();
long start = millis();
sram_start_bank(0);
if (!http_fetch(VIDEO_SERVER,
VIDEO_SERVER_PORT,
VIDEO_SERVER_BASE_PATH RLE_VIDEO,
offset,
size,
http_rle_sram_callback)) {
Serial.println("Fetch failed!");
}
sram_flush_and_release_bank();
long end = millis();
return end - start;
}
void run_tests() {
long ms;
ms = test_fast_gpio_speed();
Serial.print(ms);
Serial.println(" ns avg per GPIO pulse."); // 1Mx pulses, ms => ns
ms = test_sync_token_read_speed();
Serial.print(ms);
Serial.println(" ns avg per sync token read."); // 1Mx reads, ms => ns
ms = test_sync_token_clear_speed();
Serial.print(ms);
Serial.println(" ns avg per sync token clear."); // 1Mx reads, ms => ns
ms = test_register_read_speed();
Serial.print(ms);
Serial.println(" ns avg per register read."); // 1Mx reads, ms => ns
ms = test_sram_speed();
Serial.print(ms);
Serial.print(" ms to write ");
Serial.print(BUFFER_SIZE);
Serial.println(" bytes to SRAM");
if (!network_connected) {
Serial.println("No network, skipping network tests.");
} else {
Serial.println("Beginning RLE network tests.");
uint32_t minimal_index[2];
http_local_buffer = (uint8_t*)minimal_index;
if (!http_fetch(VIDEO_SERVER,
VIDEO_SERVER_PORT,
VIDEO_SERVER_BASE_PATH RLE_VIDEO,
sizeof(SegaVideoHeader),
sizeof(minimal_index),
http_local_buffer_callback)) {
Serial.println("Index fetch failed!");
return;
}
int offset = sizeof(SegaVideoHeader) + sizeof(SegaVideoIndex);
int compressed_chunk_size =
ntohl(minimal_index[1]) - ntohl(minimal_index[0]);
Serial.print("Detected compressed chunk size: ");
Serial.println(compressed_chunk_size);
for (int i = 0; i < 10; i++) {
ms = test_rle_download_speed(offset, compressed_chunk_size);
float compressed_bits = compressed_chunk_size * 8.0;
float final_bits = ABOUT_3S_VIDEO_AUDIO_BYTES * 8.0;
float seconds = ms / 1000.0;
float mbps = compressed_bits / seconds / 1024.0 / 1024.0;
float effective_mbps = final_bits / seconds / 1024.0 / 1024.0;
Serial.print(ms);
Serial.print(" ms to stream ~3s RLE video to SRAM (");
Serial.print(mbps);
Serial.print(" Mbps, effectively ");
Serial.print(effective_mbps);
Serial.println(" Mbps vs 2.50 Mbps minimum)");
}
}
Serial.println("\n");
}