diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h index eab7d01184..5d2e53fe22 100644 --- a/cachelib/allocator/Cache.h +++ b/cachelib/allocator/Cache.h @@ -85,6 +85,9 @@ class CacheBase { CacheBase(CacheBase&&) = default; CacheBase& operator=(CacheBase&&) = default; + // TODO: come up with some reasonable number + static constexpr unsigned kMaxTiers = 2; + // Get a string referring to the cache name for this cache virtual const std::string getCacheName() const = 0; @@ -102,6 +105,9 @@ class CacheBase { // @param poolId the pool id virtual PoolStats getPoolStats(PoolId poolId) const = 0; + virtual AllocationClassBaseStat getAllocationClassStats( + TierId, PoolId pid, ClassId cid) const = 0; + // @param poolId the pool id virtual AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId poolId) const = 0; diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index d14bcfa789..5328e151aa 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -324,7 +324,9 @@ CacheAllocator::allocateInternal(PoolId pid, const auto requiredSize = Item::getRequiredSize(key, size); // the allocation class in our memory allocator. - const auto cid = allocator_->getAllocationClassId(pid, requiredSize); + const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize); + util::RollingLatencyTracker rollTracker{ + (*stats_.classAllocLatency)[tid][pid][cid]}; (*stats_.allocAttempts)[pid][cid].inc(); @@ -402,6 +404,11 @@ CacheAllocator::allocateChainedItemInternal( const auto pid = allocator_->getAllocInfo(parent->getMemory()).poolId; const auto cid = allocator_->getAllocationClassId(pid, requiredSize); + util::RollingLatencyTracker rollTracker{ + (*stats_.classAllocLatency)[tid][pid][cid]}; + + // TODO: per-tier? Right now stats_ are not used in any public periodic + // worker (*stats_.allocAttempts)[pid][cid].inc(); void* memory = allocator_->allocate(pid, requiredSize); @@ -2220,6 +2227,49 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { return ret; } +template +double CacheAllocator::slabsApproxFreePercentage(TierId tid) const { + return allocator_[tid]->approxFreeSlabsPercentage(); +} + +template +AllocationClassBaseStat CacheAllocator::getAllocationClassStats( + TierId tid, PoolId pid, ClassId cid) const { + const auto& ac = allocator_[tid]->getPool(pid).getAllocationClass(cid); + + AllocationClassBaseStat stats{}; + stats.allocSize = ac.getAllocSize(); + stats.memorySize = ac.getNumSlabs() * Slab::kSize; + + if (slabsApproxFreePercentage(tid) > 0.0) { + auto totalMemory = MemoryAllocator::getMemorySize(memoryTierSize(tid)); + auto freeMemory = static_cast(totalMemory) * + slabsApproxFreePercentage(tid) / 100.0; + + // amount of free memory which has the same ratio to entire free memory as + // this allocation class memory size has to used memory + auto scaledFreeMemory = + static_cast(freeMemory * stats.memorySize / totalMemory); + + auto acAllocatedMemory = (100.0 - ac.approxFreePercentage()) / 100.0 * + ac.getNumSlabs() * Slab::kSize; + auto acMaxAvailableMemory = + ac.getNumSlabs() * Slab::kSize + scaledFreeMemory; + + if (acMaxAvailableMemory == 0) { + stats.approxFreePercent = 100.0; + } else { + stats.approxFreePercent = + 100.0 - 100.0 * acAllocatedMemory / acMaxAvailableMemory; + } + } else { + stats.approxFreePercent = ac.approxFreePercentage(); + } + stats.allocLatencyNs = (*stats_.classAllocLatency)[tid][pid][cid]; + + return stats; +} + template PoolEvictionAgeStats CacheAllocator::getPoolEvictionAgeStats( PoolId pid, unsigned int slabProjectionLength) const { diff --git a/cachelib/allocator/CacheStats.cpp b/cachelib/allocator/CacheStats.cpp index 2e848e34a8..1dfa4283ce 100644 --- a/cachelib/allocator/CacheStats.cpp +++ b/cachelib/allocator/CacheStats.cpp @@ -44,6 +44,8 @@ void Stats::init() { initToZero(*fragmentationSize); initToZero(*chainedItemEvictions); initToZero(*regularItemEvictions); + + classAllocLatency = std::make_unique(); } template diff --git a/cachelib/allocator/CacheStats.h b/cachelib/allocator/CacheStats.h index be0a9da5cb..a98b61dd50 100644 --- a/cachelib/allocator/CacheStats.h +++ b/cachelib/allocator/CacheStats.h @@ -25,6 +25,7 @@ #include "cachelib/allocator/memory/Slab.h" #include "cachelib/common/FastStats.h" #include "cachelib/common/PercentileStats.h" +#include "cachelib/common/RollingStats.h" #include "cachelib/common/Time.h" namespace facebook { @@ -95,6 +96,20 @@ struct MMContainerStat { uint64_t numTailAccesses; }; +struct AllocationClassBaseStat { + // size of allocation class + size_t allocSize{0}; + + // size of memory assigned to this allocation class + size_t memorySize{0}; + + // percent of free memory in this class + double approxFreePercent{0.0}; + + // Rolling allocation latency (in ns) + util::RollingStats allocLatencyNs; +}; + // cache related stats for a given allocation class. struct CacheStat { // allocation size for this container. diff --git a/cachelib/allocator/CacheStatsInternal.h b/cachelib/allocator/CacheStatsInternal.h index b2a5f8c469..19a15fbbd4 100644 --- a/cachelib/allocator/CacheStatsInternal.h +++ b/cachelib/allocator/CacheStatsInternal.h @@ -21,6 +21,7 @@ #include "cachelib/allocator/Cache.h" #include "cachelib/allocator/memory/MemoryAllocator.h" #include "cachelib/common/AtomicCounter.h" +#include "cachelib/common/RollingStats.h" namespace facebook { namespace cachelib { @@ -229,6 +230,14 @@ struct Stats { std::unique_ptr chainedItemEvictions{}; std::unique_ptr regularItemEvictions{}; + using PerTierPoolClassRollingStats = std::array< + std::array, + MemoryPoolManager::kMaxPools>, + CacheBase::kMaxTiers>; + + // rolling latency tracking for every alloc class in every pool + std::unique_ptr classAllocLatency{}; + // Eviction failures due to parent cannot be removed from access container AtomicCounter evictFailParentAC{0}; diff --git a/cachelib/cachebench/cache/CacheStats.h b/cachelib/cachebench/cache/CacheStats.h index d6c9e53584..6e24d52498 100644 --- a/cachelib/cachebench/cache/CacheStats.h +++ b/cachelib/cachebench/cache/CacheStats.h @@ -100,6 +100,11 @@ struct Stats { uint64_t invalidDestructorCount{0}; int64_t unDestructedItemCount{0}; + std::map>> + allocationClassStats; + + std::vector slabsApproxFreePercentages; + // populate the counters related to nvm usage. Cache implementation can decide // what to populate since not all of those are interesting when running // cachebench. @@ -131,6 +136,56 @@ struct Stats { << std::endl; } + if (FLAGS_report_memory_usage_stats != "") { + for (TierId tid = 0; tid < slabsApproxFreePercentages.size(); tid++) { + out << folly::sformat("tid{:2} free slabs : {:.2f}%", tid, + slabsApproxFreePercentages[tid]) + << std::endl; + } + + auto formatMemory = [&](size_t bytes) -> std::tuple { + if (FLAGS_report_memory_usage_stats == "raw") { + return {"B", bytes}; + } + + constexpr double KB = 1024.0; + constexpr double MB = 1024.0 * 1024; + constexpr double GB = 1024.0 * 1024 * 1024; + + if (bytes >= GB) { + return {"GB", static_cast(bytes) / GB}; + } else if (bytes >= MB) { + return {"MB", static_cast(bytes) / MB}; + } else if (bytes >= KB) { + return {"KB", static_cast(bytes) / KB}; + } else { + return {"B", bytes}; + } + }; + + auto foreachAC = [&](auto cb) { + for (auto& tidStats : allocationClassStats) { + for (auto& pidStat : tidStats.second) { + for (auto& cidStat : pidStat.second) { + cb(tidStats.first, pidStat.first, cidStat.first, cidStat.second); + } + } + } + }; + + foreachAC([&](auto tid, auto pid, auto cid, auto stats) { + auto [allocSizeSuffix, allocSize] = formatMemory(stats.allocSize); + auto [memorySizeSuffix, memorySize] = formatMemory(stats.memorySize); + out << folly::sformat( + "tid{:2} pid{:2} cid{:4} {:8.2f}{} memorySize:{:8.2f}{} " + "free:{:4.2f}% rollingAvgAllocLatency:{:8.2f}ns", + tid, pid, cid, allocSize, allocSizeSuffix, memorySize, + memorySizeSuffix, stats.approxFreePercent, + stats.allocLatencyNs.estimate()) + << std::endl; + }); + } + if (numCacheGets > 0) { out << folly::sformat("Cache Gets : {:,}", numCacheGets) << std::endl; out << folly::sformat("Hit Ratio : {:6.2f}%", overallHitRatio) diff --git a/cachelib/common/RollingStats.h b/cachelib/common/RollingStats.h new file mode 100644 index 0000000000..4d179681ad --- /dev/null +++ b/cachelib/common/RollingStats.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "cachelib/common/Utils.h" + +namespace facebook { +namespace cachelib { +namespace util { + +class RollingStats { + public: + // track latency by taking the value of duration directly. + void trackValue(double value) { + // This is a highly unlikely scenario where + // cnt_ reaches numerical limits. Skip update + // of the rolling average anymore. + if (cnt_ == std::numeric_limits::max()) { + cnt_ = 0; + return; + } + auto ratio = static_cast(cnt_) / (cnt_ + 1); + avg_ *= ratio; + ++cnt_; + avg_ += value / cnt_; + } + + // Return the rolling average. + double estimate() { return avg_; } + + private: + double avg_{0}; + uint64_t cnt_{0}; +}; + +class RollingLatencyTracker { + public: + explicit RollingLatencyTracker(RollingStats& stats) + : stats_(&stats), begin_(std::chrono::steady_clock::now()) {} + RollingLatencyTracker() {} + ~RollingLatencyTracker() { + if (stats_) { + auto tp = std::chrono::steady_clock::now(); + auto diffNanos = + std::chrono::duration_cast(tp - begin_) + .count(); + stats_->trackValue(static_cast(diffNanos)); + } + } + + RollingLatencyTracker(const RollingLatencyTracker&) = delete; + RollingLatencyTracker& operator=(const RollingLatencyTracker&) = delete; + + RollingLatencyTracker(RollingLatencyTracker&& rhs) noexcept + : stats_(rhs.stats_), begin_(rhs.begin_) { + rhs.stats_ = nullptr; + } + + RollingLatencyTracker& operator=(RollingLatencyTracker&& rhs) noexcept { + if (this != &rhs) { + this->~RollingLatencyTracker(); + new (this) RollingLatencyTracker(std::move(rhs)); + } + return *this; + } + + private: + RollingStats* stats_{nullptr}; + std::chrono::time_point begin_; +}; +} // namespace util +} // namespace cachelib +} // namespace facebook