Skip to content

Commit 5b2386c

Browse files
AlnisMmeta-codesync[bot]
authored andcommitted
prevent memory monitor from advising away more than maxLimitPercent
Summary: Currently, it is possible for MemoryMonitor to advise away more than maxLimitPercent_ of the cache. MemoryMonitor just checks if the amount of currently advised away memory is higher than maxLimitPercent_. It does not take into account how much memory will be advised away in total after performing the slab releases. If MemoryMonitor is configured in a way where maxLimitPercent_ is set to 80% and in each iteration 10% of the memory is advised away, then after each 8 iterations, we will be at exactly 80%, but MemoryMonitor will still do one more iteration, resulting in 90% of the memory being advised away. I change this behavior such that when making the decision if and how much to advise away we also take into account how much is going to be advised away. This ensures that we never advise away more than maxLimitPercent_ of the cache. Reviewed By: pbhandar2, rlyerly Differential Revision: D87043177 fbshipit-source-id: d05becebd76fa094667249306d6410290ca16a82
1 parent b5dce69 commit 5b2386c

File tree

3 files changed

+142
-16
lines changed

3 files changed

+142
-16
lines changed

cachelib/allocator/MemoryMonitor.cpp

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -247,21 +247,32 @@ void MemoryMonitor::adviseAwaySlabs() {
247247
return;
248248
}
249249
const auto numAdvised = cache_.getCacheMemoryStats().numAdvisedSlabs();
250-
const auto advisedPercent = numAdvised * 100 / (numAdvised + totalSlabs);
251-
if (advisedPercent > maxLimitPercent_) {
252-
XLOGF(CRITICAL,
253-
"More than {} slabs of {} ({}"
254-
"%) in the item cache memory have been advised away. "
255-
"This exceeds the maximum limit of {}"
256-
"%. Disabling advising which may result in an OOM.",
257-
numAdvised, numAdvised + totalSlabs, advisedPercent,
258-
maxLimitPercent_);
259-
return;
260-
}
250+
// allSlabs represents the total number of slabs across all pools
251+
const auto allSlabs = numAdvised + totalSlabs;
252+
253+
// Check if we've already hit or exceeded the maximum limit
254+
const auto maxNumAdvisedSlabs = (maxLimitPercent_ * allSlabs / 100);
261255
// Advise percentAdvisePerIteration_% of upperLimit_ - lowerLimit_
262256
// every iteration
263-
const auto slabsToAdvise = bytesToSlabs(upperLimit_ - lowerLimit_) *
264-
percentAdvisePerIteration_ / 100;
257+
auto slabsToAdvise = bytesToSlabs(upperLimit_ - lowerLimit_) *
258+
percentAdvisePerIteration_ / 100;
259+
auto remainingSlabsAllowedToAdviseAway =
260+
numAdvised >= maxNumAdvisedSlabs ? 0 : maxNumAdvisedSlabs - numAdvised;
261+
// Clamp slabsToAdvise to stay within maxLimitPercent_
262+
slabsToAdvise = std::min(slabsToAdvise, remainingSlabsAllowedToAdviseAway);
263+
if (slabsToAdvise == 0) {
264+
// Only log CRITICAL if we hit the max limit, not if slabsToAdvise was
265+
// already 0 due to configuration (e.g., upperLimit_ == lowerLimit_)
266+
if (remainingSlabsAllowedToAdviseAway == 0) {
267+
XLOGF(CRITICAL,
268+
"Already advised {} slabs of {} ({}"
269+
"%) which is at or exceeds the maximum limit of {}"
270+
"%. Disabling advising which may result in an OOM.",
271+
numAdvised, allSlabs, numAdvised * 100 / allSlabs,
272+
maxLimitPercent_);
273+
}
274+
return;
275+
}
265276
XLOGF(DBG, "Advising away {} slabs to free {} bytes", slabsToAdvise,
266277
slabsToAdvise * Slab::kSize);
267278
updateNumSlabsToAdvise(slabsToAdvise);

cachelib/allocator/tests/AllocatorResizeTest.h

Lines changed: 106 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,7 @@ class AllocatorResizeTest : public AllocatorTest<AllocatorT> {
894894
}
895895

896896
// testMemoryMonitorPerIterationAdviseReclaim
897-
// Crate 5 pools of 50 slabs each
897+
// Create 5 pools of 50 slabs each
898898
// fillup all pools
899899
// since lower limit is set to 1GB and upper limit set to 2GB, and
900900
// advise reclaim percent per iter value of 2, per iteration, 5 slabs
@@ -967,7 +967,7 @@ class AllocatorResizeTest : public AllocatorTest<AllocatorT> {
967967

968968
unsigned int i;
969969
/* iterate for numItersToMaxAdviseAway times */
970-
for (i = 1; i <= numItersToMaxAdviseAway + 1; i++) {
970+
for (i = 1; i <= numItersToMaxAdviseAway; i++) {
971971
alloc.memMonitor_->adviseAwaySlabs();
972972
std::this_thread::sleep_for(std::chrono::seconds{2});
973973
ASSERT_EQ(alloc.allocator_->getAdvisedMemorySize(), i * perIterAdvSize);
@@ -980,7 +980,7 @@ class AllocatorResizeTest : public AllocatorTest<AllocatorT> {
980980
ASSERT_EQ(totalAdvisedAwayMemory, i * perIterAdvSize);
981981

982982
// Try to reclaim back
983-
for (i = 1; i <= numItersToMaxAdviseAway + 1; i++) {
983+
for (i = 1; i <= numItersToMaxAdviseAway; i++) {
984984
alloc.memMonitor_->reclaimSlabs();
985985
std::this_thread::sleep_for(std::chrono::seconds{2});
986986
ASSERT_EQ(alloc.allocator_->getAdvisedMemorySize(),
@@ -1172,6 +1172,109 @@ class AllocatorResizeTest : public AllocatorTest<AllocatorT> {
11721172
threads[i].join();
11731173
}
11741174
}
1175+
1176+
struct MemMonitorTestParams {
1177+
unsigned int totalSlabs;
1178+
unsigned int slabsToFill;
1179+
unsigned int maxAdvisePercent;
1180+
unsigned int expectedAdvisedAfterFirstIteration;
1181+
unsigned int expectedAdvisedAfterSecondIteration;
1182+
};
1183+
1184+
void setupMemMonitorTest(typename AllocatorT::Config& config) {
1185+
config.memMonitorConfig.mode = MemoryMonitor::TestMode;
1186+
config.memMonitorInterval = std::chrono::seconds(kMemoryMonitorInterval);
1187+
config.memMonitorConfig.maxAdvisePercentPerIter = 4;
1188+
config.memMonitorConfig.upperLimitGB = 2;
1189+
config.memMonitorConfig.lowerLimitGB = 1;
1190+
// With these settings, memory monitor will advise away 4% * (2GB-1GB) =
1191+
// 40MB = 10 slabs per iteration
1192+
}
1193+
1194+
void runMemMonitorTest(const MemMonitorTestParams& params) {
1195+
typename AllocatorT::Config config;
1196+
setupMemMonitorTest(config);
1197+
config.memMonitorConfig.maxAdvisePercent = params.maxAdvisePercent;
1198+
config.setCacheSize(params.totalSlabs * Slab::kSize);
1199+
1200+
AllocatorT alloc(config);
1201+
1202+
const auto numBytes = alloc.getCacheMemoryStats().ramCacheSize;
1203+
const std::set<uint32_t> acSizes = {512 * 1024};
1204+
auto poolId = alloc.addPool("test_pool", numBytes, acSizes);
1205+
ASSERT_NE(Slab::kInvalidPoolId, poolId);
1206+
1207+
const auto allocSizes = alloc.getPool(poolId).getAllocSizes();
1208+
const size_t itemsPerSlab = Slab::kSize / allocSizes[0];
1209+
const size_t numItems = itemsPerSlab * params.slabsToFill;
1210+
const uint32_t itemSize = 450 * 1024;
1211+
1212+
std::vector<typename AllocatorT::WriteHandle> handles;
1213+
for (size_t i = 0; i < numItems; ++i) {
1214+
auto handle = util::allocateAccessible(
1215+
alloc, poolId, folly::sformat("key_{}", i), itemSize);
1216+
ASSERT_NE(nullptr, handle);
1217+
handles.push_back(std::move(handle));
1218+
}
1219+
handles.clear();
1220+
1221+
ASSERT_EQ(alloc.getPoolStats(poolId).mpStats.allocatedSlabs(),
1222+
params.slabsToFill);
1223+
ASSERT_EQ(alloc.getPool(poolId).getNumSlabsAdvised(), 0);
1224+
1225+
alloc.memMonitor_->adviseAwaySlabs();
1226+
alloc.memMonitor_->checkPoolsAndAdviseReclaim();
1227+
ASSERT_EQ(alloc.getPool(poolId).getNumSlabsAdvised(),
1228+
params.expectedAdvisedAfterFirstIteration);
1229+
1230+
alloc.memMonitor_->adviseAwaySlabs();
1231+
alloc.memMonitor_->checkPoolsAndAdviseReclaim();
1232+
ASSERT_EQ(alloc.getPool(poolId).getNumSlabsAdvised(),
1233+
params.expectedAdvisedAfterSecondIteration);
1234+
}
1235+
1236+
void testMemMonitorAdvisesAwayOverLimit() {
1237+
// Test advising with maxAdvisePercent=51%
1238+
// Cache: 23 total slabs (1 for metadata, 22 usable)
1239+
// First iteration: advises 10 slabs (10/22 = 45%)
1240+
// Second iteration: advises 1 more slab (11/22 = 50% < 51%)
1241+
runMemMonitorTest({
1242+
.totalSlabs = 23,
1243+
.slabsToFill = 22,
1244+
.maxAdvisePercent = 51,
1245+
.expectedAdvisedAfterFirstIteration = 10,
1246+
.expectedAdvisedAfterSecondIteration = 11,
1247+
});
1248+
}
1249+
1250+
void testMemMonitorAdvisesAwayOverLimit2() {
1251+
// Test advising exactly at maxAdvisePercent boundary (50%)
1252+
// Cache: 21 total slabs (1 for metadata, 20 usable)
1253+
// First iteration: advises 10 slabs (10/20 = 50%)
1254+
// Second iteration: cannot advise more (would exceed 50% limit)
1255+
runMemMonitorTest({
1256+
.totalSlabs = 21,
1257+
.slabsToFill = 20,
1258+
.maxAdvisePercent = 50,
1259+
.expectedAdvisedAfterFirstIteration = 10,
1260+
.expectedAdvisedAfterSecondIteration = 10,
1261+
});
1262+
}
1263+
1264+
void testMemMonitorAdvisesAwayOverLimit3() {
1265+
// Test advising with maxAdvisePercent=51%
1266+
// Cache: 21 total slabs (1 for metadata, 20 usable)
1267+
// First iteration: advises 10 slabs (10/20 = 50%)
1268+
// Second iteration: cannot advise more because one more slab would exceed
1269+
// 51%
1270+
runMemMonitorTest({
1271+
.totalSlabs = 21,
1272+
.slabsToFill = 20,
1273+
.maxAdvisePercent = 51,
1274+
.expectedAdvisedAfterFirstIteration = 10,
1275+
.expectedAdvisedAfterSecondIteration = 10,
1276+
});
1277+
}
11751278
};
11761279
} // namespace tests
11771280
} // namespace cachelib

cachelib/allocator/tests/AllocatorResizeTypeTest.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,18 @@ TYPED_TEST(AllocatorResizeTest, MemoryMonitorPerIterationAdviseReclaim) {
6464
this->testMemoryMonitorPerIterationAdviseReclaim();
6565
}
6666

67+
TYPED_TEST(AllocatorResizeTest, MemMonitorAdvisesAwayOverLimit) {
68+
this->testMemMonitorAdvisesAwayOverLimit();
69+
}
70+
71+
TYPED_TEST(AllocatorResizeTest, MemMonitorAdvisesAwayOverLimit2) {
72+
this->testMemMonitorAdvisesAwayOverLimit2();
73+
}
74+
75+
TYPED_TEST(AllocatorResizeTest, MemMonitorAdvisesAwayOverLimit3) {
76+
this->testMemMonitorAdvisesAwayOverLimit3();
77+
}
78+
6779
TYPED_TEST(AllocatorResizeTest, ShrinkGrowthAdviseRaceCondition) {
6880
this->testShrinkGrowthAdviseRaceCondition();
6981
}

0 commit comments

Comments
 (0)