diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h index eab7d01184..266de0333c 100644 --- a/cachelib/allocator/Cache.h +++ b/cachelib/allocator/Cache.h @@ -102,6 +102,12 @@ class CacheBase { // @param poolId the pool id virtual PoolStats getPoolStats(PoolId poolId) const = 0; + // Get Allocation Class specific stats. + // + // @param poolId the pool id + // @param classId the class id + virtual ACStats getACStats(PoolId poolId, ClassId classId) const = 0; + // @param poolId the pool id virtual AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId poolId) const = 0; diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index d14bcfa789..e4971eb233 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -325,6 +325,8 @@ CacheAllocator::allocateInternal(PoolId pid, // the allocation class in our memory allocator. const auto cid = allocator_->getAllocationClassId(pid, requiredSize); + util::RollingLatencyTracker rollTracker{ + (*stats_.classAllocLatency)[pid][cid]}; (*stats_.allocAttempts)[pid][cid].inc(); @@ -402,6 +404,9 @@ CacheAllocator::allocateChainedItemInternal( const auto pid = allocator_->getAllocInfo(parent->getMemory()).poolId; const auto cid = allocator_->getAllocationClassId(pid, requiredSize); + util::RollingLatencyTracker rollTracker{ + (*stats_.classAllocLatency)[pid][cid]}; + (*stats_.allocAttempts)[pid][cid].inc(); void* memory = allocator_->allocate(pid, requiredSize); @@ -2220,6 +2225,17 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { return ret; } +template +ACStats CacheAllocator::getACStats(PoolId poolId, + ClassId classId) const { + const auto& pool = allocator_->getPool(poolId); + const auto& ac = pool.getAllocationClass(classId); + + auto stats = ac.getStats(); + stats.allocLatencyNs = (*stats_.classAllocLatency)[poolId][classId]; + return stats; +} + template PoolEvictionAgeStats CacheAllocator::getPoolEvictionAgeStats( PoolId pid, unsigned int slabProjectionLength) const { diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index 612f6d2185..307c72db81 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -1175,6 +1175,9 @@ class CacheAllocator : public CacheBase { // return cache's memory usage stats CacheMemoryStats getCacheMemoryStats() const override final; + // return stats for Allocation Class + ACStats getACStats(PoolId pid, ClassId cid) const override final; + // return the nvm cache stats map util::StatsMap getNvmCacheStatsMap() const override final; diff --git a/cachelib/allocator/CacheStats.cpp b/cachelib/allocator/CacheStats.cpp index 2e848e34a8..d9d202488e 100644 --- a/cachelib/allocator/CacheStats.cpp +++ b/cachelib/allocator/CacheStats.cpp @@ -44,6 +44,8 @@ void Stats::init() { initToZero(*fragmentationSize); initToZero(*chainedItemEvictions); initToZero(*regularItemEvictions); + + classAllocLatency = std::make_unique(); } template @@ -51,7 +53,7 @@ struct SizeVerify {}; void Stats::populateGlobalCacheStats(GlobalCacheStats& ret) const { #ifndef SKIP_SIZE_VERIFY - SizeVerify a = SizeVerify<16176>{}; + SizeVerify a = SizeVerify<16192>{}; std::ignore = a; #endif ret.numCacheGets = numCacheGets.get(); diff --git a/cachelib/allocator/CacheStatsInternal.h b/cachelib/allocator/CacheStatsInternal.h index b2a5f8c469..8f54cd6ecf 100644 --- a/cachelib/allocator/CacheStatsInternal.h +++ b/cachelib/allocator/CacheStatsInternal.h @@ -21,6 +21,7 @@ #include "cachelib/allocator/Cache.h" #include "cachelib/allocator/memory/MemoryAllocator.h" #include "cachelib/common/AtomicCounter.h" +#include "cachelib/common/RollingStats.h" namespace facebook { namespace cachelib { @@ -229,6 +230,13 @@ struct Stats { std::unique_ptr chainedItemEvictions{}; std::unique_ptr regularItemEvictions{}; + using PerPoolClassRollingStats = + std::array, + MemoryPoolManager::kMaxPools>; + + // rolling latency tracking for every alloc class in every pool + std::unique_ptr classAllocLatency{}; + // Eviction failures due to parent cannot be removed from access container AtomicCounter evictFailParentAC{0}; diff --git a/cachelib/allocator/memory/AllocationClass.h b/cachelib/allocator/memory/AllocationClass.h index b602e4d66d..d45a45c6cd 100644 --- a/cachelib/allocator/memory/AllocationClass.h +++ b/cachelib/allocator/memory/AllocationClass.h @@ -94,14 +94,6 @@ class AllocationClass { return static_cast(Slab::kSize / allocationSize_); } - // total number of slabs under this AllocationClass. - unsigned int getNumSlabs() const { - return lock_->lock_combine([this]() { - return static_cast(freeSlabs_.size() + - allocatedSlabs_.size()); - }); - } - // fetch stats about this allocation class. ACStats getStats() const; diff --git a/cachelib/allocator/memory/MemoryAllocatorStats.h b/cachelib/allocator/memory/MemoryAllocatorStats.h index 74ebbe64dd..acda9ee530 100644 --- a/cachelib/allocator/memory/MemoryAllocatorStats.h +++ b/cachelib/allocator/memory/MemoryAllocatorStats.h @@ -20,6 +20,7 @@ #include #include "cachelib/allocator/memory/Slab.h" +#include "cachelib/common/RollingStats.h" namespace facebook { namespace cachelib { @@ -47,6 +48,9 @@ struct ACStats { // true if the allocation class is full. bool full; + // Rolling allocation latency (in ns) + util::RollingStats allocLatencyNs; + constexpr unsigned long long totalSlabs() const noexcept { return freeSlabs + usedSlabs; } @@ -54,6 +58,17 @@ struct ACStats { constexpr size_t getTotalFreeMemory() const noexcept { return Slab::kSize * freeSlabs + freeAllocs * allocSize; } + + constexpr double usageFraction() const noexcept { + if (usedSlabs == 0) + return 0.0; + + return activeAllocs / (usedSlabs * allocsPerSlab); + } + + constexpr size_t totalAllocatedSize() const noexcept { + return activeAllocs * allocSize; + } }; // structure to query stats corresponding to a MemoryPool diff --git a/cachelib/allocator/tests/CacheBaseTest.cpp b/cachelib/allocator/tests/CacheBaseTest.cpp index 928fcc0c67..f249786743 100644 --- a/cachelib/allocator/tests/CacheBaseTest.cpp +++ b/cachelib/allocator/tests/CacheBaseTest.cpp @@ -34,6 +34,7 @@ class CacheBaseTest : public CacheBase, public SlabAllocatorTestBase { bool isObjectCache() const override { return false; } const MemoryPool& getPool(PoolId) const override { return memoryPool_; } PoolStats getPoolStats(PoolId) const override { return PoolStats(); } + ACStats getACStats(PoolId, ClassId) const { return ACStats(); }; AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId) const override { return AllSlabReleaseEvents{}; } diff --git a/cachelib/cachebench/cache/Cache-inl.h b/cachelib/cachebench/cache/Cache-inl.h index ed5369facf..30806a3161 100644 --- a/cachelib/cachebench/cache/Cache-inl.h +++ b/cachelib/cachebench/cache/Cache-inl.h @@ -609,10 +609,19 @@ Stats Cache::getStats() const { aggregate += poolStats; } + std::map> allocationClassStats{}; + + for (size_t pid = 0; pid < pools_.size(); pid++) { + auto cids = cache_->getPoolStats(static_cast(pid)).getClassIds(); + for (auto cid : cids) + allocationClassStats[pid][cid] = cache_->getACStats(pid, cid); + } + const auto cacheStats = cache_->getGlobalCacheStats(); const auto rebalanceStats = cache_->getSlabReleaseStats(); const auto navyStats = cache_->getNvmCacheStatsMap().toMap(); + ret.allocationClassStats = allocationClassStats; ret.numEvictions = aggregate.numEvictions(); ret.numItems = aggregate.numItems(); ret.evictAttempts = cacheStats.evictionAttempts; diff --git a/cachelib/cachebench/cache/Cache.cpp b/cachelib/cachebench/cache/Cache.cpp index 70feb0f764..ea9d6b106c 100644 --- a/cachelib/cachebench/cache/Cache.cpp +++ b/cachelib/cachebench/cache/Cache.cpp @@ -20,6 +20,12 @@ DEFINE_bool(report_api_latency, false, "Enable reporting cache API latency tracking"); +DEFINE_string( + report_ac_memory_usage_stats, + "", + "Enable reporting statistics for each allocation class. Set to" + "'human_readable' to print KB/MB/GB or to 'raw' to print in bytes."); + namespace facebook { namespace cachelib { namespace cachebench {} // namespace cachebench diff --git a/cachelib/cachebench/cache/Cache.h b/cachelib/cachebench/cache/Cache.h index c107b269e5..ac3ea26870 100644 --- a/cachelib/cachebench/cache/Cache.h +++ b/cachelib/cachebench/cache/Cache.h @@ -44,6 +44,7 @@ #include "cachelib/cachebench/util/NandWrites.h" DECLARE_bool(report_api_latency); +DECLARE_string(report_ac_memory_usage_stats); namespace facebook { namespace cachelib { @@ -324,6 +325,10 @@ class Cache { // return the stats for the pool. PoolStats getPoolStats(PoolId pid) const { return cache_->getPoolStats(pid); } + ACStats getACStats(PoolId pid, ClassId cid) const { + return cache_->getACStats(pid, cid); + } + // return the total number of inconsistent operations detected since start. unsigned int getInconsistencyCount() const { return inconsistencyCount_.load(std::memory_order_relaxed); diff --git a/cachelib/cachebench/cache/CacheStats.h b/cachelib/cachebench/cache/CacheStats.h index d6c9e53584..028ba96bde 100644 --- a/cachelib/cachebench/cache/CacheStats.h +++ b/cachelib/cachebench/cache/CacheStats.h @@ -21,6 +21,7 @@ #include "cachelib/common/PercentileStats.h" DECLARE_bool(report_api_latency); +DECLARE_string(report_ac_memory_usage_stats); namespace facebook { namespace cachelib { @@ -100,6 +101,8 @@ struct Stats { uint64_t invalidDestructorCount{0}; int64_t unDestructedItemCount{0}; + std::map> allocationClassStats; + // populate the counters related to nvm usage. Cache implementation can decide // what to populate since not all of those are interesting when running // cachebench. @@ -131,6 +134,63 @@ struct Stats { << std::endl; } + if (FLAGS_report_ac_memory_usage_stats != "") { + auto formatMemory = [&](size_t bytes) -> std::tuple { + if (FLAGS_report_ac_memory_usage_stats == "raw") { + return {"B", bytes}; + } + + constexpr double KB = 1024.0; + constexpr double MB = 1024.0 * 1024; + constexpr double GB = 1024.0 * 1024 * 1024; + + if (bytes >= GB) { + return {"GB", static_cast(bytes) / GB}; + } else if (bytes >= MB) { + return {"MB", static_cast(bytes) / MB}; + } else if (bytes >= KB) { + return {"KB", static_cast(bytes) / KB}; + } else { + return {"B", bytes}; + } + }; + + auto foreachAC = [&](auto cb) { + for (auto& pidStat : allocationClassStats) { + for (auto& cidStat : pidStat.second) { + cb(pidStat.first, cidStat.first, cidStat.second); + } + } + }; + + foreachAC([&](auto pid, auto cid, auto stats) { + auto [allocSizeSuffix, allocSize] = formatMemory(stats.allocSize); + auto [memorySizeSuffix, memorySize] = + formatMemory(stats.totalAllocatedSize()); + out << folly::sformat("pid{:2} cid{:4} {:8.2f}{} memorySize: {:8.2f}{}", + pid, cid, allocSize, allocSizeSuffix, memorySize, + memorySizeSuffix) + << std::endl; + }); + + foreachAC([&](auto pid, auto cid, auto stats) { + auto [allocSizeSuffix, allocSize] = formatMemory(stats.allocSize); + + // If the pool is not full, extrapolate usageFraction for AC assuming it + // will grow at the same rate. This value will be the same for all ACs. + auto acUsageFraction = (poolUsageFraction[pid] < 1.0) + ? poolUsageFraction[pid] + : stats.usageFraction(); + + out << folly::sformat( + "pid{:2} cid{:4} {:8.2f}{} usageFraction: {:4.2f} " + "rollingAvgAllocLatency: {:8.2f}ns", + pid, cid, allocSize, allocSizeSuffix, acUsageFraction, + stats.allocLatencyNs.estimate()) + << std::endl; + }); + } + if (numCacheGets > 0) { out << folly::sformat("Cache Gets : {:,}", numCacheGets) << std::endl; out << folly::sformat("Hit Ratio : {:6.2f}%", overallHitRatio) diff --git a/cachelib/common/RollingStats.h b/cachelib/common/RollingStats.h new file mode 100644 index 0000000000..4d179681ad --- /dev/null +++ b/cachelib/common/RollingStats.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "cachelib/common/Utils.h" + +namespace facebook { +namespace cachelib { +namespace util { + +class RollingStats { + public: + // track latency by taking the value of duration directly. + void trackValue(double value) { + // This is a highly unlikely scenario where + // cnt_ reaches numerical limits. Skip update + // of the rolling average anymore. + if (cnt_ == std::numeric_limits::max()) { + cnt_ = 0; + return; + } + auto ratio = static_cast(cnt_) / (cnt_ + 1); + avg_ *= ratio; + ++cnt_; + avg_ += value / cnt_; + } + + // Return the rolling average. + double estimate() { return avg_; } + + private: + double avg_{0}; + uint64_t cnt_{0}; +}; + +class RollingLatencyTracker { + public: + explicit RollingLatencyTracker(RollingStats& stats) + : stats_(&stats), begin_(std::chrono::steady_clock::now()) {} + RollingLatencyTracker() {} + ~RollingLatencyTracker() { + if (stats_) { + auto tp = std::chrono::steady_clock::now(); + auto diffNanos = + std::chrono::duration_cast(tp - begin_) + .count(); + stats_->trackValue(static_cast(diffNanos)); + } + } + + RollingLatencyTracker(const RollingLatencyTracker&) = delete; + RollingLatencyTracker& operator=(const RollingLatencyTracker&) = delete; + + RollingLatencyTracker(RollingLatencyTracker&& rhs) noexcept + : stats_(rhs.stats_), begin_(rhs.begin_) { + rhs.stats_ = nullptr; + } + + RollingLatencyTracker& operator=(RollingLatencyTracker&& rhs) noexcept { + if (this != &rhs) { + this->~RollingLatencyTracker(); + new (this) RollingLatencyTracker(std::move(rhs)); + } + return *this; + } + + private: + RollingStats* stats_{nullptr}; + std::chrono::time_point begin_; +}; +} // namespace util +} // namespace cachelib +} // namespace facebook