Skip to content

Commit 0c973ff

Browse files
committed
Add background refresh statistics and monitoring coordination
- Add background_successful_refreshes and background_failed_refreshes counters to IssuerStats for tracking per-issuer background refresh results - Add is_running() method to BackgroundRefreshManager to check thread state - Track statistics in refresh_loop() when JWKS refresh succeeds or fails - Add maybe_write_monitoring_file_from_verify() that skips file writes when background refresh thread is running (to avoid redundant writes) - Write monitoring file from background thread at end of each refresh cycle - Update get_json() to include new background refresh statistics - Update integration test to verify background refresh via monitoring API using keycache_set_jwks() to force cache entry with short update interval
1 parent 5b0b862 commit 0c973ff

File tree

4 files changed

+126
-14
lines changed

4 files changed

+126
-14
lines changed

src/scitokens_internal.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,19 +110,26 @@ void BackgroundRefreshManager::refresh_loop() {
110110

111111
// If next update is within threshold, try to refresh
112112
if (time_until_update <= threshold) {
113+
auto &stats =
114+
MonitoringStats::instance().get_issuer_stats(issuer);
113115
try {
114116
// Perform refresh (this will use the refresh_jwks method)
115117
scitokens::Validator::refresh_jwks(issuer);
118+
stats.inc_background_successful_refresh();
116119
} catch (std::exception &) {
120+
// Track failed refresh attempts
121+
stats.inc_background_failed_refresh();
117122
// Silently ignore errors in background refresh to avoid
118123
// disrupting the application. Background refresh is a
119124
// best-effort optimization. If it fails, the next token
120125
// verification will trigger a foreground refresh as usual.
121-
// TODO: In future work, track statistics (success/failure
122-
// counts) to monitor refresh health.
123126
}
124127
}
125128
}
129+
130+
// Write monitoring file from background thread if configured
131+
// This avoids writing from verify() when background thread is running
132+
MonitoringStats::instance().maybe_write_monitoring_file();
126133
}
127134
}
128135

src/scitokens_internal.h

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,11 @@ class BackgroundRefreshManager {
122122
// Stop the background refresh thread (can be called multiple times)
123123
void stop();
124124

125+
// Check if the background refresh thread is running
126+
bool is_running() const {
127+
return m_running.load(std::memory_order_acquire);
128+
}
129+
125130
private:
126131
BackgroundRefreshManager() = default;
127132
~BackgroundRefreshManager() { stop(); }
@@ -216,6 +221,10 @@ struct IssuerStats {
216221
std::atomic<uint64_t> failed_refreshes{0};
217222
std::atomic<uint64_t> stale_key_uses{0};
218223

224+
// Background refresh statistics (tracked by background thread)
225+
std::atomic<uint64_t> background_successful_refreshes{0};
226+
std::atomic<uint64_t> background_failed_refreshes{0};
227+
219228
// Increment methods for atomic counters
220229
void inc_successful_validation() { successful_validations++; }
221230
void inc_unsuccessful_validation() { unsuccessful_validations++; }
@@ -227,6 +236,10 @@ struct IssuerStats {
227236
void inc_expired_key() { expired_keys++; }
228237
void inc_successful_key_lookup() { successful_key_lookups++; }
229238
void inc_failed_key_lookup() { failed_key_lookups++; }
239+
void inc_background_successful_refresh() {
240+
background_successful_refreshes++;
241+
}
242+
void inc_background_failed_refresh() { background_failed_refreshes++; }
230243

231244
// Time setters that accept std::chrono::duration
232245
template <typename Rep, typename Period>
@@ -321,6 +334,13 @@ class MonitoringStats {
321334
*/
322335
void maybe_write_monitoring_file() noexcept;
323336

337+
/**
338+
* Same as maybe_write_monitoring_file(), but skips if background refresh
339+
* thread is running. This should be called from verify() routines to
340+
* avoid redundant writes when the background thread is handling them.
341+
*/
342+
void maybe_write_monitoring_file_from_verify() noexcept;
343+
324344
private:
325345
MonitoringStats() = default;
326346
~MonitoringStats() = default;
@@ -674,8 +694,9 @@ class Validator {
674694

675695
void verify(const SciToken &scitoken, time_t expiry_time) {
676696
// Check if monitoring file should be written (fast-path, relaxed
677-
// atomic)
678-
internal::MonitoringStats::instance().maybe_write_monitoring_file();
697+
// atomic). Skip if background thread is running.
698+
internal::MonitoringStats::instance()
699+
.maybe_write_monitoring_file_from_verify();
679700

680701
std::string issuer = "";
681702
auto start_time = std::chrono::steady_clock::now();
@@ -770,8 +791,9 @@ class Validator {
770791

771792
void verify(const jwt::decoded_jwt<jwt::traits::kazuho_picojson> &jwt) {
772793
// Check if monitoring file should be written (fast-path, relaxed
773-
// atomic)
774-
internal::MonitoringStats::instance().maybe_write_monitoring_file();
794+
// atomic). Skip if background thread is running.
795+
internal::MonitoringStats::instance()
796+
.maybe_write_monitoring_file_from_verify();
775797

776798
std::string issuer = "";
777799
auto start_time = std::chrono::steady_clock::now();

src/scitokens_monitoring.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ std::string MonitoringStats::get_json() const {
121121
issuer_obj["stale_key_uses"] =
122122
picojson::value(static_cast<double>(stats.stale_key_uses.load()));
123123

124+
// Background refresh statistics
125+
issuer_obj["background_successful_refreshes"] = picojson::value(
126+
static_cast<double>(stats.background_successful_refreshes.load()));
127+
issuer_obj["background_failed_refreshes"] = picojson::value(
128+
static_cast<double>(stats.background_failed_refreshes.load()));
129+
124130
std::string sanitized_issuer = sanitize_issuer_for_json(issuer);
125131
issuers_obj[sanitized_issuer] = picojson::value(issuer_obj);
126132
}
@@ -190,6 +196,15 @@ void MonitoringStats::maybe_write_monitoring_file() noexcept {
190196
}
191197
}
192198

199+
void MonitoringStats::maybe_write_monitoring_file_from_verify() noexcept {
200+
// If background refresh thread is running, it will handle the writes
201+
// This avoids redundant writes and potential contention
202+
if (BackgroundRefreshManager::get_instance().is_running()) {
203+
return;
204+
}
205+
maybe_write_monitoring_file();
206+
}
207+
193208
void MonitoringStats::write_monitoring_file_impl() noexcept {
194209
try {
195210
std::string monitoring_file =

test/integration_test.cpp

Lines changed: 76 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ class MonitoringStats {
3939
uint64_t expired_keys{0};
4040
uint64_t failed_refreshes{0};
4141
uint64_t stale_key_uses{0};
42+
// Background refresh statistics
43+
uint64_t background_successful_refreshes{0};
44+
uint64_t background_failed_refreshes{0};
4245
};
4346

4447
struct FailedIssuerLookup {
@@ -157,6 +160,19 @@ class MonitoringStats {
157160
static_cast<uint64_t>(it->second.get<double>());
158161
}
159162

163+
// Background refresh statistics
164+
it = stats_obj.find("background_successful_refreshes");
165+
if (it != stats_obj.end() && it->second.is<double>()) {
166+
stats.background_successful_refreshes =
167+
static_cast<uint64_t>(it->second.get<double>());
168+
}
169+
170+
it = stats_obj.find("background_failed_refreshes");
171+
if (it != stats_obj.end() && it->second.is<double>()) {
172+
stats.background_failed_refreshes =
173+
static_cast<uint64_t>(it->second.get<double>());
174+
}
175+
160176
issuers_[issuer_entry.first] = stats;
161177
}
162178
}
@@ -1113,6 +1129,13 @@ TEST_F(IntegrationTest, MonitoringFileOutput) {
11131129
TEST_F(IntegrationTest, BackgroundRefreshTest) {
11141130
char *err_msg = nullptr;
11151131

1132+
// Reset monitoring stats to get a clean baseline
1133+
scitoken_reset_monitoring_stats(&err_msg);
1134+
if (err_msg) {
1135+
free(err_msg);
1136+
err_msg = nullptr;
1137+
}
1138+
11161139
// Set smaller intervals for testing (1 second refresh interval, 2 seconds
11171140
// threshold)
11181141
int rv =
@@ -1133,6 +1156,16 @@ TEST_F(IntegrationTest, BackgroundRefreshTest) {
11331156
err_msg = nullptr;
11341157
}
11351158

1159+
// Set update interval to 1 second BEFORE first verification so the
1160+
// cache entry will have next_update just 1 second in the future.
1161+
// This ensures the background thread can refresh within the test window.
1162+
rv = scitoken_config_set_int("keycache.update_interval_s", 1, &err_msg);
1163+
ASSERT_EQ(rv, 0);
1164+
if (err_msg) {
1165+
free(err_msg);
1166+
err_msg = nullptr;
1167+
}
1168+
11361169
// Create a key and token
11371170
std::unique_ptr<void, decltype(&scitoken_key_destroy)> key(
11381171
scitoken_key_create("test-key-1", "ES256", public_key_.c_str(),
@@ -1203,22 +1236,37 @@ TEST_F(IntegrationTest, BackgroundRefreshTest) {
12031236
ASSERT_EQ(rv, 0) << "Failed to get cached JWKS: "
12041237
<< (err_msg ? err_msg : "unknown error");
12051238
ASSERT_TRUE(jwks_before != nullptr);
1206-
std::unique_ptr<char, decltype(&free)> jwks_before_ptr(jwks_before, free);
12071239
if (err_msg) {
12081240
free(err_msg);
12091241
err_msg = nullptr;
12101242
}
12111243

12121244
std::cout << "Initial JWKS fetched successfully" << std::endl;
12131245

1214-
// Set update interval to 1 second so keys will need refresh soon
1215-
rv = scitoken_config_set_int("keycache.update_interval_s", 1, &err_msg);
1216-
ASSERT_EQ(rv, 0);
1246+
// Re-set the JWKS to force a fresh cache entry with the current
1247+
// update_interval (1 second). This ensures next_update is just 1 second
1248+
// in the future so the background thread will refresh it.
1249+
rv = keycache_set_jwks(issuer_url_.c_str(), jwks_before, &err_msg);
1250+
ASSERT_EQ(rv, 0) << "Failed to set JWKS: "
1251+
<< (err_msg ? err_msg : "unknown error");
1252+
free(jwks_before);
12171253
if (err_msg) {
12181254
free(err_msg);
12191255
err_msg = nullptr;
12201256
}
12211257

1258+
std::cout << "JWKS re-set with 1-second update interval" << std::endl;
1259+
1260+
// Get monitoring stats before background refresh
1261+
auto before_stats = getCurrentMonitoringStats();
1262+
auto before_issuer_stats = before_stats.getIssuerStats(issuer_url_);
1263+
std::cout << "Before background refresh:" << std::endl;
1264+
std::cout << " background_successful_refreshes: "
1265+
<< before_issuer_stats.background_successful_refreshes
1266+
<< std::endl;
1267+
std::cout << " background_failed_refreshes: "
1268+
<< before_issuer_stats.background_failed_refreshes << std::endl;
1269+
12221270
// Enable background refresh
12231271
rv = keycache_set_background_refresh(1, &err_msg);
12241272
ASSERT_EQ(rv, 0) << "Failed to enable background refresh: "
@@ -1238,10 +1286,6 @@ TEST_F(IntegrationTest, BackgroundRefreshTest) {
12381286
std::cout << "Waiting 4 seconds for background refresh..." << std::endl;
12391287
sleep(4);
12401288

1241-
// The background refresh should have occurred
1242-
// We can't easily verify it refreshed without instrumenting the code more,
1243-
// but we can verify the thread is running and didn't crash
1244-
12451289
// Stop background refresh
12461290
rv = keycache_stop_background_refresh(&err_msg);
12471291
ASSERT_EQ(rv, 0) << "Failed to stop background refresh: "
@@ -1265,6 +1309,30 @@ TEST_F(IntegrationTest, BackgroundRefreshTest) {
12651309
err_msg = nullptr;
12661310
}
12671311

1312+
// Verify that background refresh statistics increased for our issuer
1313+
auto after_stats = getCurrentMonitoringStats();
1314+
auto after_issuer_stats = after_stats.getIssuerStats(issuer_url_);
1315+
1316+
std::cout << "After background refresh:" << std::endl;
1317+
std::cout << " background_successful_refreshes: "
1318+
<< after_issuer_stats.background_successful_refreshes
1319+
<< std::endl;
1320+
std::cout << " background_failed_refreshes: "
1321+
<< after_issuer_stats.background_failed_refreshes << std::endl;
1322+
1323+
// The background thread should have performed at least one refresh
1324+
// for our issuer (either successful or failed)
1325+
uint64_t total_background_refreshes =
1326+
after_issuer_stats.background_successful_refreshes +
1327+
after_issuer_stats.background_failed_refreshes;
1328+
uint64_t before_total =
1329+
before_issuer_stats.background_successful_refreshes +
1330+
before_issuer_stats.background_failed_refreshes;
1331+
1332+
EXPECT_GT(total_background_refreshes, before_total)
1333+
<< "Background refresh thread should have performed at least one "
1334+
"refresh attempt for our issuer";
1335+
12681336
std::cout << "Test completed successfully" << std::endl;
12691337
}
12701338

0 commit comments

Comments
 (0)