diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h
index eab7d0118..266de0333 100644
--- a/cachelib/allocator/Cache.h
+++ b/cachelib/allocator/Cache.h
@@ -102,6 +102,12 @@ class CacheBase {
   // @param poolId   the pool id
   virtual PoolStats getPoolStats(PoolId poolId) const = 0;
 
+  // Get Allocation Class specific stats.
+  //
+  // @param poolId   the pool id
+  // @param classId   the class id
+  virtual ACStats getACStats(PoolId poolId, ClassId classId) const = 0;
+
   // @param poolId   the pool id
   virtual AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId poolId) const = 0;
 
diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h
index d14bcfa78..e4971eb23 100644
--- a/cachelib/allocator/CacheAllocator-inl.h
+++ b/cachelib/allocator/CacheAllocator-inl.h
@@ -325,6 +325,8 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
 
   // the allocation class in our memory allocator.
   const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
+  util::RollingLatencyTracker rollTracker{
+      (*stats_.classAllocLatency)[pid][cid]};
 
   (*stats_.allocAttempts)[pid][cid].inc();
 
@@ -402,6 +404,9 @@ CacheAllocator<CacheTrait>::allocateChainedItemInternal(
   const auto pid = allocator_->getAllocInfo(parent->getMemory()).poolId;
   const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
 
+  util::RollingLatencyTracker rollTracker{
+      (*stats_.classAllocLatency)[pid][cid]};
+
   (*stats_.allocAttempts)[pid][cid].inc();
 
   void* memory = allocator_->allocate(pid, requiredSize);
@@ -2220,6 +2225,17 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
   return ret;
 }
 
+template <typename CacheTrait>
+ACStats CacheAllocator<CacheTrait>::getACStats(PoolId poolId,
+                                               ClassId classId) const {
+  const auto& pool = allocator_->getPool(poolId);
+  const auto& ac = pool.getAllocationClass(classId);
+
+  auto stats = ac.getStats();
+  stats.allocLatencyNs = (*stats_.classAllocLatency)[poolId][classId];
+  return stats;
+}
+
 template <typename CacheTrait>
 PoolEvictionAgeStats CacheAllocator<CacheTrait>::getPoolEvictionAgeStats(
     PoolId pid, unsigned int slabProjectionLength) const {
diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h
index 612f6d218..307c72db8 100644
--- a/cachelib/allocator/CacheAllocator.h
+++ b/cachelib/allocator/CacheAllocator.h
@@ -1175,6 +1175,9 @@ class CacheAllocator : public CacheBase {
   // return cache's memory usage stats
   CacheMemoryStats getCacheMemoryStats() const override final;
 
+  // return stats for Allocation Class
+  ACStats getACStats(PoolId pid, ClassId cid) const override final;
+
   // return the nvm cache stats map
   util::StatsMap getNvmCacheStatsMap() const override final;
 
diff --git a/cachelib/allocator/CacheStats.cpp b/cachelib/allocator/CacheStats.cpp
index 2e848e34a..d9d202488 100644
--- a/cachelib/allocator/CacheStats.cpp
+++ b/cachelib/allocator/CacheStats.cpp
@@ -44,6 +44,8 @@ void Stats::init() {
   initToZero(*fragmentationSize);
   initToZero(*chainedItemEvictions);
   initToZero(*regularItemEvictions);
+
+  classAllocLatency = std::make_unique<PerPoolClassRollingStats>();
 }
 
 template <int>
@@ -51,7 +53,7 @@ struct SizeVerify {};
 
 void Stats::populateGlobalCacheStats(GlobalCacheStats& ret) const {
 #ifndef SKIP_SIZE_VERIFY
-  SizeVerify<sizeof(Stats)> a = SizeVerify<16176>{};
+  SizeVerify<sizeof(Stats)> a = SizeVerify<16192>{};
   std::ignore = a;
 #endif
   ret.numCacheGets = numCacheGets.get();
diff --git a/cachelib/allocator/CacheStatsInternal.h b/cachelib/allocator/CacheStatsInternal.h
index b2a5f8c46..8f54cd6ec 100644
--- a/cachelib/allocator/CacheStatsInternal.h
+++ b/cachelib/allocator/CacheStatsInternal.h
@@ -21,6 +21,7 @@
 #include "cachelib/allocator/Cache.h"
 #include "cachelib/allocator/memory/MemoryAllocator.h"
 #include "cachelib/common/AtomicCounter.h"
+#include "cachelib/common/RollingStats.h"
 
 namespace facebook {
 namespace cachelib {
@@ -229,6 +230,13 @@ struct Stats {
   std::unique_ptr<PerPoolClassAtomicCounters> chainedItemEvictions{};
   std::unique_ptr<PerPoolClassAtomicCounters> regularItemEvictions{};
 
+  using PerPoolClassRollingStats =
+      std::array<std::array<util::RollingStats, MemoryAllocator::kMaxClasses>,
+                 MemoryPoolManager::kMaxPools>;
+
+  // rolling latency tracking for every alloc class in every pool
+  std::unique_ptr<PerPoolClassRollingStats> classAllocLatency{};
+
   // Eviction failures due to parent cannot be removed from access container
   AtomicCounter evictFailParentAC{0};
 
diff --git a/cachelib/allocator/memory/AllocationClass.h b/cachelib/allocator/memory/AllocationClass.h
index b602e4d66..d45a45c6c 100644
--- a/cachelib/allocator/memory/AllocationClass.h
+++ b/cachelib/allocator/memory/AllocationClass.h
@@ -94,14 +94,6 @@ class AllocationClass {
     return static_cast<unsigned int>(Slab::kSize / allocationSize_);
   }
 
-  // total number of slabs under this AllocationClass.
-  unsigned int getNumSlabs() const {
-    return lock_->lock_combine([this]() {
-      return static_cast<unsigned int>(freeSlabs_.size() +
-                                       allocatedSlabs_.size());
-    });
-  }
-
   // fetch stats about this allocation class.
   ACStats getStats() const;
 
diff --git a/cachelib/allocator/memory/MemoryAllocatorStats.h b/cachelib/allocator/memory/MemoryAllocatorStats.h
index 74ebbe64d..acda9ee53 100644
--- a/cachelib/allocator/memory/MemoryAllocatorStats.h
+++ b/cachelib/allocator/memory/MemoryAllocatorStats.h
@@ -20,6 +20,7 @@
 #include <unordered_map>
 
 #include "cachelib/allocator/memory/Slab.h"
+#include "cachelib/common/RollingStats.h"
 
 namespace facebook {
 namespace cachelib {
@@ -47,6 +48,9 @@ struct ACStats {
   // true if the allocation class is full.
   bool full;
 
+  // Rolling allocation latency (in ns)
+  util::RollingStats allocLatencyNs;
+
   constexpr unsigned long long totalSlabs() const noexcept {
     return freeSlabs + usedSlabs;
   }
@@ -54,6 +58,17 @@ struct ACStats {
   constexpr size_t getTotalFreeMemory() const noexcept {
     return Slab::kSize * freeSlabs + freeAllocs * allocSize;
   }
+
+  constexpr double usageFraction() const noexcept {
+    if (usedSlabs == 0)
+      return 0.0;
+
+    return activeAllocs / (usedSlabs * allocsPerSlab);
+  }
+
+  constexpr size_t totalAllocatedSize() const noexcept {
+    return activeAllocs * allocSize;
+  }
 };
 
 // structure to query stats corresponding to a MemoryPool
diff --git a/cachelib/allocator/tests/CacheBaseTest.cpp b/cachelib/allocator/tests/CacheBaseTest.cpp
index 928fcc0c6..f24978674 100644
--- a/cachelib/allocator/tests/CacheBaseTest.cpp
+++ b/cachelib/allocator/tests/CacheBaseTest.cpp
@@ -34,6 +34,7 @@ class CacheBaseTest : public CacheBase, public SlabAllocatorTestBase {
   bool isObjectCache() const override { return false; }
   const MemoryPool& getPool(PoolId) const override { return memoryPool_; }
   PoolStats getPoolStats(PoolId) const override { return PoolStats(); }
+  ACStats getACStats(PoolId, ClassId) const { return ACStats(); };
   AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId) const override {
     return AllSlabReleaseEvents{};
   }
diff --git a/cachelib/cachebench/cache/Cache-inl.h b/cachelib/cachebench/cache/Cache-inl.h
index ed5369fac..30806a316 100644
--- a/cachelib/cachebench/cache/Cache-inl.h
+++ b/cachelib/cachebench/cache/Cache-inl.h
@@ -609,10 +609,19 @@ Stats Cache<Allocator>::getStats() const {
     aggregate += poolStats;
   }
 
+  std::map<PoolId, std::map<ClassId, ACStats>> allocationClassStats{};
+
+  for (size_t pid = 0; pid < pools_.size(); pid++) {
+    auto cids = cache_->getPoolStats(static_cast<PoolId>(pid)).getClassIds();
+    for (auto cid : cids)
+      allocationClassStats[pid][cid] = cache_->getACStats(pid, cid);
+  }
+
   const auto cacheStats = cache_->getGlobalCacheStats();
   const auto rebalanceStats = cache_->getSlabReleaseStats();
   const auto navyStats = cache_->getNvmCacheStatsMap().toMap();
 
+  ret.allocationClassStats = allocationClassStats;
   ret.numEvictions = aggregate.numEvictions();
   ret.numItems = aggregate.numItems();
   ret.evictAttempts = cacheStats.evictionAttempts;
diff --git a/cachelib/cachebench/cache/Cache.cpp b/cachelib/cachebench/cache/Cache.cpp
index 70feb0f76..ea9d6b106 100644
--- a/cachelib/cachebench/cache/Cache.cpp
+++ b/cachelib/cachebench/cache/Cache.cpp
@@ -20,6 +20,12 @@ DEFINE_bool(report_api_latency,
             false,
             "Enable reporting cache API latency tracking");
 
+DEFINE_string(
+    report_ac_memory_usage_stats,
+    "",
+    "Enable reporting statistics for each allocation class. Set to"
+    "'human_readable' to print KB/MB/GB or to 'raw' to print in bytes.");
+
 namespace facebook {
 namespace cachelib {
 namespace cachebench {} // namespace cachebench
diff --git a/cachelib/cachebench/cache/Cache.h b/cachelib/cachebench/cache/Cache.h
index c107b269e..ac3ea2687 100644
--- a/cachelib/cachebench/cache/Cache.h
+++ b/cachelib/cachebench/cache/Cache.h
@@ -44,6 +44,7 @@
 #include "cachelib/cachebench/util/NandWrites.h"
 
 DECLARE_bool(report_api_latency);
+DECLARE_string(report_ac_memory_usage_stats);
 
 namespace facebook {
 namespace cachelib {
@@ -324,6 +325,10 @@ class Cache {
   // return the stats for the pool.
   PoolStats getPoolStats(PoolId pid) const { return cache_->getPoolStats(pid); }
 
+  ACStats getACStats(PoolId pid, ClassId cid) const {
+    return cache_->getACStats(pid, cid);
+  }
+
   // return the total number of inconsistent operations detected since start.
   unsigned int getInconsistencyCount() const {
     return inconsistencyCount_.load(std::memory_order_relaxed);
diff --git a/cachelib/cachebench/cache/CacheStats.h b/cachelib/cachebench/cache/CacheStats.h
index d6c9e5358..028ba96bd 100644
--- a/cachelib/cachebench/cache/CacheStats.h
+++ b/cachelib/cachebench/cache/CacheStats.h
@@ -21,6 +21,7 @@
 #include "cachelib/common/PercentileStats.h"
 
 DECLARE_bool(report_api_latency);
+DECLARE_string(report_ac_memory_usage_stats);
 
 namespace facebook {
 namespace cachelib {
@@ -100,6 +101,8 @@ struct Stats {
   uint64_t invalidDestructorCount{0};
   int64_t unDestructedItemCount{0};
 
+  std::map<PoolId, std::map<ClassId, ACStats>> allocationClassStats;
+
   // populate the counters related to nvm usage. Cache implementation can decide
   // what to populate since not all of those are interesting when running
   // cachebench.
@@ -131,6 +134,63 @@ struct Stats {
           << std::endl;
     }
 
+    if (FLAGS_report_ac_memory_usage_stats != "") {
+      auto formatMemory = [&](size_t bytes) -> std::tuple<std::string, double> {
+        if (FLAGS_report_ac_memory_usage_stats == "raw") {
+          return {"B", bytes};
+        }
+
+        constexpr double KB = 1024.0;
+        constexpr double MB = 1024.0 * 1024;
+        constexpr double GB = 1024.0 * 1024 * 1024;
+
+        if (bytes >= GB) {
+          return {"GB", static_cast<double>(bytes) / GB};
+        } else if (bytes >= MB) {
+          return {"MB", static_cast<double>(bytes) / MB};
+        } else if (bytes >= KB) {
+          return {"KB", static_cast<double>(bytes) / KB};
+        } else {
+          return {"B", bytes};
+        }
+      };
+
+      auto foreachAC = [&](auto cb) {
+        for (auto& pidStat : allocationClassStats) {
+          for (auto& cidStat : pidStat.second) {
+            cb(pidStat.first, cidStat.first, cidStat.second);
+          }
+        }
+      };
+
+      foreachAC([&](auto pid, auto cid, auto stats) {
+        auto [allocSizeSuffix, allocSize] = formatMemory(stats.allocSize);
+        auto [memorySizeSuffix, memorySize] =
+            formatMemory(stats.totalAllocatedSize());
+        out << folly::sformat("pid{:2} cid{:4} {:8.2f}{} memorySize: {:8.2f}{}",
+                              pid, cid, allocSize, allocSizeSuffix, memorySize,
+                              memorySizeSuffix)
+            << std::endl;
+      });
+
+      foreachAC([&](auto pid, auto cid, auto stats) {
+        auto [allocSizeSuffix, allocSize] = formatMemory(stats.allocSize);
+
+        // If the pool is not full, extrapolate usageFraction for AC assuming it
+        // will grow at the same rate. This value will be the same for all ACs.
+        auto acUsageFraction = (poolUsageFraction[pid] < 1.0)
+                                   ? poolUsageFraction[pid]
+                                   : stats.usageFraction();
+
+        out << folly::sformat(
+                   "pid{:2} cid{:4} {:8.2f}{} usageFraction: {:4.2f} "
+                   "rollingAvgAllocLatency: {:8.2f}ns",
+                   pid, cid, allocSize, allocSizeSuffix, acUsageFraction,
+                   stats.allocLatencyNs.estimate())
+            << std::endl;
+      });
+    }
+
     if (numCacheGets > 0) {
       out << folly::sformat("Cache Gets    : {:,}", numCacheGets) << std::endl;
       out << folly::sformat("Hit Ratio     : {:6.2f}%", overallHitRatio)
diff --git a/cachelib/common/RollingStats.h b/cachelib/common/RollingStats.h
new file mode 100644
index 000000000..4d179681a
--- /dev/null
+++ b/cachelib/common/RollingStats.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <folly/Range.h>
+#include <folly/logging/xlog.h>
+
+#include "cachelib/common/Utils.h"
+
+namespace facebook {
+namespace cachelib {
+namespace util {
+
+class RollingStats {
+ public:
+  // track latency by taking the value of duration directly.
+  void trackValue(double value) {
+    // This is a highly unlikely scenario where
+    // cnt_ reaches numerical limits. Skip update
+    // of the rolling average anymore.
+    if (cnt_ == std::numeric_limits<uint64_t>::max()) {
+      cnt_ = 0;
+      return;
+    }
+    auto ratio = static_cast<double>(cnt_) / (cnt_ + 1);
+    avg_ *= ratio;
+    ++cnt_;
+    avg_ += value / cnt_;
+  }
+
+  // Return the rolling average.
+  double estimate() { return avg_; }
+
+ private:
+  double avg_{0};
+  uint64_t cnt_{0};
+};
+
+class RollingLatencyTracker {
+ public:
+  explicit RollingLatencyTracker(RollingStats& stats)
+      : stats_(&stats), begin_(std::chrono::steady_clock::now()) {}
+  RollingLatencyTracker() {}
+  ~RollingLatencyTracker() {
+    if (stats_) {
+      auto tp = std::chrono::steady_clock::now();
+      auto diffNanos =
+          std::chrono::duration_cast<std::chrono::nanoseconds>(tp - begin_)
+              .count();
+      stats_->trackValue(static_cast<double>(diffNanos));
+    }
+  }
+
+  RollingLatencyTracker(const RollingLatencyTracker&) = delete;
+  RollingLatencyTracker& operator=(const RollingLatencyTracker&) = delete;
+
+  RollingLatencyTracker(RollingLatencyTracker&& rhs) noexcept
+      : stats_(rhs.stats_), begin_(rhs.begin_) {
+    rhs.stats_ = nullptr;
+  }
+
+  RollingLatencyTracker& operator=(RollingLatencyTracker&& rhs) noexcept {
+    if (this != &rhs) {
+      this->~RollingLatencyTracker();
+      new (this) RollingLatencyTracker(std::move(rhs));
+    }
+    return *this;
+  }
+
+ private:
+  RollingStats* stats_{nullptr};
+  std::chrono::time_point<std::chrono::steady_clock> begin_;
+};
+} // namespace util
+} // namespace cachelib
+} // namespace facebook