Skip to content

Commit 38b17a8

Browse files
committed
added per tier pool class rolling average latency
1 parent 519f664 commit 38b17a8

File tree

7 files changed

+222
-1
lines changed

7 files changed

+222
-1
lines changed

cachelib/allocator/Cache.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ class CacheBase {
8585
CacheBase(CacheBase&&) = default;
8686
CacheBase& operator=(CacheBase&&) = default;
8787

88+
// TODO: come up with some reasonable number
89+
static constexpr unsigned kMaxTiers = 2;
90+
8891
// Get a string referring to the cache name for this cache
8992
virtual const std::string getCacheName() const = 0;
9093

@@ -102,6 +105,9 @@ class CacheBase {
102105
// @param poolId the pool id
103106
virtual PoolStats getPoolStats(PoolId poolId) const = 0;
104107

108+
virtual AllocationClassBaseStat getAllocationClassStats(
109+
TierId, PoolId pid, ClassId cid) const = 0;
110+
105111
// @param poolId the pool id
106112
virtual AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId poolId) const = 0;
107113

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,8 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
324324
const auto requiredSize = Item::getRequiredSize(key, size);
325325

326326
// the allocation class in our memory allocator.
327-
const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
327+
const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize);
328+
util::RollingLatencyTracker rollTracker{(*stats_.classAllocLatency)[tid][pid][cid]};
328329

329330
(*stats_.allocAttempts)[pid][cid].inc();
330331

@@ -402,6 +403,10 @@ CacheAllocator<CacheTrait>::allocateChainedItemInternal(
402403
const auto pid = allocator_->getAllocInfo(parent->getMemory()).poolId;
403404
const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
404405

406+
util::RollingLatencyTracker rollTracker{(*stats_.classAllocLatency)[tid][pid][cid]};
407+
408+
// TODO: per-tier? Right now stats_ are not used in any public periodic
409+
// worker
405410
(*stats_.allocAttempts)[pid][cid].inc();
406411

407412
void* memory = allocator_->allocate(pid, requiredSize);
@@ -2220,6 +2225,45 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
22202225
return ret;
22212226
}
22222227

2228+
template <typename CacheTrait>
2229+
double CacheAllocator<CacheTrait>::slabsApproxFreePercentage(TierId tid) const
2230+
{
2231+
return allocator_[tid]->approxFreeSlabsPercentage();
2232+
}
2233+
2234+
template <typename CacheTrait>
2235+
AllocationClassBaseStat CacheAllocator<CacheTrait>::getAllocationClassStats(
2236+
TierId tid, PoolId pid, ClassId cid) const {
2237+
const auto &ac = allocator_[tid]->getPool(pid).getAllocationClass(cid);
2238+
2239+
AllocationClassBaseStat stats{};
2240+
stats.allocSize = ac.getAllocSize();
2241+
stats.memorySize = ac.getNumSlabs() * Slab::kSize;
2242+
2243+
if (slabsApproxFreePercentage(tid) > 0.0) {
2244+
auto totalMemory = MemoryAllocator::getMemorySize(memoryTierSize(tid));
2245+
auto freeMemory = static_cast<double>(totalMemory) * slabsApproxFreePercentage(tid) / 100.0;
2246+
2247+
// amount of free memory which has the same ratio to entire free memory as
2248+
// this allocation class memory size has to used memory
2249+
auto scaledFreeMemory = static_cast<size_t>(freeMemory * stats.memorySize / totalMemory);
2250+
2251+
auto acAllocatedMemory = (100.0 - ac.approxFreePercentage()) / 100.0 * ac.getNumSlabs() * Slab::kSize;
2252+
auto acMaxAvailableMemory = ac.getNumSlabs() * Slab::kSize + scaledFreeMemory;
2253+
2254+
if (acMaxAvailableMemory == 0) {
2255+
stats.approxFreePercent = 100.0;
2256+
} else {
2257+
stats.approxFreePercent = 100.0 - 100.0 * acAllocatedMemory / acMaxAvailableMemory;
2258+
}
2259+
} else {
2260+
stats.approxFreePercent = ac.approxFreePercentage();
2261+
}
2262+
stats.allocLatencyNs = (*stats_.classAllocLatency)[tid][pid][cid];
2263+
2264+
return stats;
2265+
}
2266+
22232267
template <typename CacheTrait>
22242268
PoolEvictionAgeStats CacheAllocator<CacheTrait>::getPoolEvictionAgeStats(
22252269
PoolId pid, unsigned int slabProjectionLength) const {

cachelib/allocator/CacheStats.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ void Stats::init() {
4444
initToZero(*fragmentationSize);
4545
initToZero(*chainedItemEvictions);
4646
initToZero(*regularItemEvictions);
47+
48+
classAllocLatency = std::make_unique<PerTierPoolClassRollingStats>();
4749
}
4850

4951
template <int>

cachelib/allocator/CacheStats.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "cachelib/allocator/memory/Slab.h"
2626
#include "cachelib/common/FastStats.h"
2727
#include "cachelib/common/PercentileStats.h"
28+
#include "cachelib/common/RollingStats.h"
2829
#include "cachelib/common/Time.h"
2930

3031
namespace facebook {
@@ -95,6 +96,20 @@ struct MMContainerStat {
9596
uint64_t numTailAccesses;
9697
};
9798

99+
struct AllocationClassBaseStat {
100+
// size of allocation class
101+
size_t allocSize{0};
102+
103+
// size of memory assigned to this allocation class
104+
size_t memorySize{0};
105+
106+
// percent of free memory in this class
107+
double approxFreePercent{0.0};
108+
109+
// Rolling allocation latency (in ns)
110+
util::RollingStats allocLatencyNs;
111+
};
112+
98113
// cache related stats for a given allocation class.
99114
struct CacheStat {
100115
// allocation size for this container.

cachelib/allocator/CacheStatsInternal.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "cachelib/allocator/Cache.h"
2222
#include "cachelib/allocator/memory/MemoryAllocator.h"
2323
#include "cachelib/common/AtomicCounter.h"
24+
#include "cachelib/common/RollingStats.h"
2425

2526
namespace facebook {
2627
namespace cachelib {
@@ -229,6 +230,14 @@ struct Stats {
229230
std::unique_ptr<PerPoolClassAtomicCounters> chainedItemEvictions{};
230231
std::unique_ptr<PerPoolClassAtomicCounters> regularItemEvictions{};
231232

233+
using PerTierPoolClassRollingStats = std::array<
234+
std::array<std::array<util::RollingStats, MemoryAllocator::kMaxClasses>,
235+
MemoryPoolManager::kMaxPools>,
236+
CacheBase::kMaxTiers>;
237+
238+
// rolling latency tracking for every alloc class in every pool
239+
std::unique_ptr<PerTierPoolClassRollingStats> classAllocLatency{};
240+
232241
// Eviction failures due to parent cannot be removed from access container
233242
AtomicCounter evictFailParentAC{0};
234243

cachelib/cachebench/cache/CacheStats.h

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ struct Stats {
100100
uint64_t invalidDestructorCount{0};
101101
int64_t unDestructedItemCount{0};
102102

103+
std::map<TierId, std::map<PoolId, std::map<ClassId, AllocationClassBaseStat>>>
104+
allocationClassStats;
105+
106+
std::vector<double> slabsApproxFreePercentages;
107+
103108
// populate the counters related to nvm usage. Cache implementation can decide
104109
// what to populate since not all of those are interesting when running
105110
// cachebench.
@@ -131,6 +136,56 @@ struct Stats {
131136
<< std::endl;
132137
}
133138

139+
if (FLAGS_report_memory_usage_stats != "") {
140+
for (TierId tid = 0; tid < slabsApproxFreePercentages.size(); tid++) {
141+
out << folly::sformat("tid{:2} free slabs : {:.2f}%", tid,
142+
slabsApproxFreePercentages[tid])
143+
<< std::endl;
144+
}
145+
146+
auto formatMemory = [&](size_t bytes) -> std::tuple<std::string, double> {
147+
if (FLAGS_report_memory_usage_stats == "raw") {
148+
return {"B", bytes};
149+
}
150+
151+
constexpr double KB = 1024.0;
152+
constexpr double MB = 1024.0 * 1024;
153+
constexpr double GB = 1024.0 * 1024 * 1024;
154+
155+
if (bytes >= GB) {
156+
return {"GB", static_cast<double>(bytes) / GB};
157+
} else if (bytes >= MB) {
158+
return {"MB", static_cast<double>(bytes) / MB};
159+
} else if (bytes >= KB) {
160+
return {"KB", static_cast<double>(bytes) / KB};
161+
} else {
162+
return {"B", bytes};
163+
}
164+
};
165+
166+
auto foreachAC = [&](auto cb) {
167+
for (auto& tidStats : allocationClassStats) {
168+
for (auto& pidStat : tidStats.second) {
169+
for (auto& cidStat : pidStat.second) {
170+
cb(tidStats.first, pidStat.first, cidStat.first, cidStat.second);
171+
}
172+
}
173+
}
174+
};
175+
176+
foreachAC([&](auto tid, auto pid, auto cid, auto stats) {
177+
auto [allocSizeSuffix, allocSize] = formatMemory(stats.allocSize);
178+
auto [memorySizeSuffix, memorySize] = formatMemory(stats.memorySize);
179+
out << folly::sformat(
180+
"tid{:2} pid{:2} cid{:4} {:8.2f}{} memorySize:{:8.2f}{} "
181+
"free:{:4.2f}% rollingAvgAllocLatency:{:8.2f}ns",
182+
tid, pid, cid, allocSize, allocSizeSuffix, memorySize,
183+
memorySizeSuffix, stats.approxFreePercent,
184+
stats.allocLatencyNs.estimate())
185+
<< std::endl;
186+
});
187+
}
188+
134189
if (numCacheGets > 0) {
135190
out << folly::sformat("Cache Gets : {:,}", numCacheGets) << std::endl;
136191
out << folly::sformat("Hit Ratio : {:6.2f}%", overallHitRatio)

cachelib/common/RollingStats.h

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <folly/Range.h>
20+
#include <folly/logging/xlog.h>
21+
22+
#include "cachelib/common/Utils.h"
23+
24+
namespace facebook {
25+
namespace cachelib {
26+
namespace util {
27+
28+
class RollingStats {
29+
public:
30+
// track latency by taking the value of duration directly.
31+
void trackValue(double value) {
32+
// This is a highly unlikely scenario where
33+
// cnt_ reaches numerical limits. Skip update
34+
// of the rolling average anymore.
35+
if (cnt_ == std::numeric_limits<uint64_t>::max()) {
36+
cnt_ = 0;
37+
return;
38+
}
39+
auto ratio = static_cast<double>(cnt_) / (cnt_ + 1);
40+
avg_ *= ratio;
41+
++cnt_;
42+
avg_ += value / cnt_;
43+
}
44+
45+
// Return the rolling average.
46+
double estimate() { return avg_; }
47+
48+
private:
49+
double avg_{0};
50+
uint64_t cnt_{0};
51+
};
52+
53+
class RollingLatencyTracker {
54+
public:
55+
explicit RollingLatencyTracker(RollingStats& stats)
56+
: stats_(&stats), begin_(std::chrono::steady_clock::now()) {}
57+
RollingLatencyTracker() {}
58+
~RollingLatencyTracker() {
59+
if (stats_) {
60+
auto tp = std::chrono::steady_clock::now();
61+
auto diffNanos =
62+
std::chrono::duration_cast<std::chrono::nanoseconds>(tp - begin_)
63+
.count();
64+
stats_->trackValue(static_cast<double>(diffNanos));
65+
}
66+
}
67+
68+
RollingLatencyTracker(const RollingLatencyTracker&) = delete;
69+
RollingLatencyTracker& operator=(const RollingLatencyTracker&) = delete;
70+
71+
RollingLatencyTracker(RollingLatencyTracker&& rhs) noexcept
72+
: stats_(rhs.stats_), begin_(rhs.begin_) {
73+
rhs.stats_ = nullptr;
74+
}
75+
76+
RollingLatencyTracker& operator=(RollingLatencyTracker&& rhs) noexcept {
77+
if (this != &rhs) {
78+
this->~RollingLatencyTracker();
79+
new (this) RollingLatencyTracker(std::move(rhs));
80+
}
81+
return *this;
82+
}
83+
84+
private:
85+
RollingStats* stats_{nullptr};
86+
std::chrono::time_point<std::chrono::steady_clock> begin_;
87+
};
88+
} // namespace util
89+
} // namespace cachelib
90+
} // namespace facebook

0 commit comments

Comments
 (0)