diff --git a/.github/workflows/build-cachelib-centos-8-5.yml b/.github/workflows/build-cachelib-centos-8-5.yml
index 14ab8cfa74..30247b2e94 100644
--- a/.github/workflows/build-cachelib-centos-8-5.yml
+++ b/.github/workflows/build-cachelib-centos-8-5.yml
@@ -13,8 +13,6 @@
# limitations under the License.
name: build-cachelib-centos-8.5
on:
-# push:
- pull_request:
schedule:
- cron: '0 9 * * *'
jobs:
diff --git a/.github/workflows/build-cachelib-centos-long.yml b/.github/workflows/build-cachelib-centos-long.yml
new file mode 100644
index 0000000000..92165f603b
--- /dev/null
+++ b/.github/workflows/build-cachelib-centos-long.yml
@@ -0,0 +1,39 @@
+name: build-cachelib-centos-latest
+on:
+ schedule:
+ - cron: '0 7 * * *'
+
+jobs:
+ build-cachelib-centos8-latest:
+ name: "CentOS/latest - Build CacheLib with all dependencies"
+ runs-on: ubuntu-latest
+ # Docker container image name
+ container: "centos:latest"
+ steps:
+ - name: "update packages"
+ run: dnf upgrade -y
+ - name: "install sudo,git"
+ run: dnf install -y sudo git cmake gcc
+ - name: "System Information"
+ run: |
+ echo === uname ===
+ uname -a
+ echo === /etc/os-release ===
+ cat /etc/os-release
+ echo === df -hl ===
+ df -hl
+ echo === free -h ===
+ free -h
+ echo === top ===
+ top -b -n1 -1 -Eg || timeout 1 top -b -n1
+ echo === env ===
+ env
+ echo === gcc -v ===
+ gcc -v
+ - name: "checkout sources"
+ uses: actions/checkout@v2
+ - name: "build CacheLib using build script"
+ run: ./contrib/build.sh -j -v -T
+ - name: "run tests"
+ timeout-minutes: 60
+ run: cd opt/cachelib/tests && ../../../run_tests.sh long
diff --git a/.github/workflows/build-cachelib-debian.yml b/.github/workflows/build-cachelib-debian.yml
new file mode 100644
index 0000000000..5bc3ad3c70
--- /dev/null
+++ b/.github/workflows/build-cachelib-debian.yml
@@ -0,0 +1,43 @@
+name: build-cachelib-debian-10
+on:
+ schedule:
+ - cron: '30 5 * * 0,3'
+
+jobs:
+ build-cachelib-debian-10:
+ name: "Debian/Buster - Build CacheLib with all dependencies"
+ runs-on: ubuntu-latest
+ # Docker container image name
+ container: "debian:buster-slim"
+ steps:
+ - name: "update packages"
+ run: apt-get update
+ - name: "upgrade packages"
+ run: apt-get -y upgrade
+ - name: "install sudo,git"
+ run: apt-get install -y sudo git procps
+ - name: "System Information"
+ run: |
+ echo === uname ===
+ uname -a
+ echo === /etc/os-release ===
+ cat /etc/os-release
+ echo === df -hl ===
+ df -hl
+ echo === free -h ===
+ free -h
+ echo === top ===
+ top -b -n1 -1 -Eg || timeout 1 top -b -n1 ; true
+ echo === env ===
+ env
+ echo === cc -v ===
+ cc -v || true
+ echo === g++ -v ===
+ g++ - || true
+ - name: "checkout sources"
+ uses: actions/checkout@v2
+ - name: "build CacheLib using build script"
+ run: ./contrib/build.sh -j -v -T
+ - name: "run tests"
+ timeout-minutes: 60
+ run: cd opt/cachelib/tests && ../../../run_tests.sh
diff --git a/.github/workflows/build-cachelib-docker.yml b/.github/workflows/build-cachelib-docker.yml
new file mode 100644
index 0000000000..be28bc233c
--- /dev/null
+++ b/.github/workflows/build-cachelib-docker.yml
@@ -0,0 +1,49 @@
+name: build-cachelib-docker
+on:
+ push:
+ pull_request:
+
+jobs:
+ build-cachelib-docker:
+ name: "CentOS/latest - Build CacheLib with all dependencies"
+ runs-on: ubuntu-latest
+ env:
+ REPO: cachelib
+ GITHUB_REPO: intel/CacheLib
+ CONTAINER_REG: ghcr.io/pmem/cachelib
+ CONTAINER_REG_USER: ${{ secrets.GH_CR_USER }}
+ CONTAINER_REG_PASS: ${{ secrets.GH_CR_PAT }}
+ FORCE_IMAGE_ACTION: ${{ secrets.FORCE_IMAGE_ACTION }}
+ HOST_WORKDIR: ${{ github.workspace }}
+ WORKDIR: docker
+ IMG_VER: devel
+ strategy:
+ matrix:
+ CONFIG: ["OS=centos OS_VER=8streams PUSH_IMAGE=1"]
+ steps:
+ - name: "System Information"
+ run: |
+ echo === uname ===
+ uname -a
+ echo === /etc/os-release ===
+ cat /etc/os-release
+ echo === df -hl ===
+ df -hl
+ echo === free -h ===
+ free -h
+ echo === top ===
+ top -b -n1 -1 -Eg || timeout 1 top -b -n1
+ echo === env ===
+ env
+ echo === gcc -v ===
+ gcc -v
+ - name: "checkout sources"
+ uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+
+ - name: Pull the image or rebuild and push it
+ run: cd $WORKDIR && ${{ matrix.CONFIG }} ./pull-or-rebuild-image.sh $FORCE_IMAGE_ACTION
+
+ - name: Run the build
+ run: cd $WORKDIR && ${{ matrix.CONFIG }} ./build.sh
diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml
index 26d942d182..54045f0a36 100644
--- a/.github/workflows/clang-format-check.yml
+++ b/.github/workflows/clang-format-check.yml
@@ -1,6 +1,6 @@
# From: https://github.com/marketplace/actions/clang-format-check#multiple-paths
name: clang-format Check
-on: [pull_request]
+on: []
jobs:
formatting-check:
name: Formatting Check
diff --git a/MultiTierDataMovement.md b/MultiTierDataMovement.md
new file mode 100644
index 0000000000..cccc14b947
--- /dev/null
+++ b/MultiTierDataMovement.md
@@ -0,0 +1,90 @@
+# Background Data Movement
+
+In order to reduce the number of online evictions and support asynchronous
+promotion - we have added two periodic workers to handle eviction and promotion.
+
+The diagram below shows a simplified version of how the background evictor
+thread (green) is integrated to the CacheLib architecture.
+
+
+
+
+
+## Background Evictors
+
+The background evictors scan each class to see if there are objects to move the next (lower)
+tier using a given strategy. Here we document the parameters for the different
+strategies and general parameters.
+
+- `backgroundEvictorIntervalMilSec`: The interval that this thread runs for - by default
+the background evictor threads will wake up every 10 ms to scan the AllocationClasses. Also,
+the background evictor thread will be woken up everytime there is a failed allocation (from
+a request handling thread) and the current percentage of free memory for the
+AllocationClass is lower than `lowEvictionAcWatermark`. This may render the interval parameter
+not as important when there are many allocations occuring from request handling threads.
+
+- `evictorThreads`: The number of background evictors to run - each thread is a assigned
+a set of AllocationClasses to scan and evict objects from. Currently, each thread gets
+an equal number of classes to scan - but as object size distribution may be unequal - future
+versions will attempt to balance the classes among threads. The range is 1 to number of AllocationClasses.
+The default is 1.
+
+- `maxEvictionBatch`: The number of objects to remove in a given eviction call. The
+default is 40. Lower range is 10 and the upper range is 1000. Too low and we might not
+remove objects at a reasonable rate, too high and it might increase contention with user threads.
+
+- `minEvictionBatch`: Minimum number of items to evict at any time (if there are any
+candidates)
+
+- `maxEvictionPromotionHotness`: Maximum candidates to consider for eviction. This is similar to `maxEvictionBatch`
+but it specifies how many candidates will be taken into consideration, not the actual number of items to evict.
+This option can be used to configure duration of critical section on LRU lock.
+
+
+### FreeThresholdStrategy (default)
+
+- `lowEvictionAcWatermark`: Triggers background eviction thread to run
+when this percentage of the AllocationClass is free.
+The default is `2.0`, to avoid wasting capacity we don't set this above `10.0`.
+
+- `highEvictionAcWatermark`: Stop the evictions from an AllocationClass when this
+percentage of the AllocationClass is free. The default is `5.0`, to avoid wasting capacity we
+don't set this above `10`.
+
+
+## Background Promoters
+
+The background promoters scan each class to see if there are objects to move to a lower
+tier using a given strategy. Here we document the parameters for the different
+strategies and general parameters.
+
+- `backgroundPromoterIntervalMilSec`: The interval that this thread runs for - by default
+the background promoter threads will wake up every 10 ms to scan the AllocationClasses for
+objects to promote.
+
+- `promoterThreads`: The number of background promoters to run - each thread is a assigned
+a set of AllocationClasses to scan and promote objects from. Currently, each thread gets
+an equal number of classes to scan - but as object size distribution may be unequal - future
+versions will attempt to balance the classes among threads. The range is `1` to number of AllocationClasses. The default is `1`.
+
+- `maxProtmotionBatch`: The number of objects to promote in a given promotion call. The
+default is 40. Lower range is 10 and the upper range is 1000. Too low and we might not
+remove objects at a reasonable rate, too high and it might increase contention with user threads.
+
+- `minPromotionBatch`: Minimum number of items to promote at any time (if there are any
+candidates)
+
+- `numDuplicateElements`: This allows us to promote items that have existing handles (read-only) since
+we won't need to modify the data when a user is done with the data. Therefore, for a short time
+the data could reside in both tiers until it is evicted from its current tier. The default is to
+not allow this (0). Setting the value to 100 will enable duplicate elements in tiers.
+
+### Background Promotion Strategy (only one currently)
+
+- `promotionAcWatermark`: Promote items if there is at least this
+percent of free AllocationClasses. Promotion thread will attempt to move `maxPromotionBatch` number of objects
+to that tier. The objects are chosen from the head of the LRU. The default is `4.0`.
+This value should correlate with `lowEvictionAcWatermark`, `highEvictionAcWatermark`, `minAcAllocationWatermark`, `maxAcAllocationWatermark`.
+- `maxPromotionBatch`: The number of objects to promote in batch during BG promotion. Analogous to
+`maxEvictionBatch`. It's value should be lower to decrease contention on hot items.
+
diff --git a/cachelib/CMakeLists.txt b/cachelib/CMakeLists.txt
index 6be819974e..407342b581 100644
--- a/cachelib/CMakeLists.txt
+++ b/cachelib/CMakeLists.txt
@@ -85,6 +85,11 @@ set(CMAKE_MODULE_PATH
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED True)
+if(COVERAGE_ENABLED)
+ # Add code coverage
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage -fprofile-arcs -ftest-coverage")
+endif()
+
# include(fb_cxx_flags)
message(STATUS "Update CXXFLAGS: ${CMAKE_CXX_FLAGS}")
diff --git a/cachelib/allocator/BackgroundMover-inl.h b/cachelib/allocator/BackgroundMover-inl.h
new file mode 100644
index 0000000000..b77436635f
--- /dev/null
+++ b/cachelib/allocator/BackgroundMover-inl.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) Intel and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace facebook {
+namespace cachelib {
+
+template
+BackgroundMover::BackgroundMover(
+ Cache& cache,
+ std::shared_ptr strategy,
+ MoverDir direction)
+ : cache_(cache), strategy_(strategy), direction_(direction) {
+ if (direction_ == MoverDir::Evict) {
+ moverFunc = BackgroundMoverAPIWrapper::traverseAndEvictItems;
+
+ } else if (direction_ == MoverDir::Promote) {
+ moverFunc = BackgroundMoverAPIWrapper::traverseAndPromoteItems;
+ }
+}
+
+template
+BackgroundMover::~BackgroundMover() {
+ stop(std::chrono::seconds(0));
+}
+
+template
+void BackgroundMover::work() {
+ try {
+ checkAndRun();
+ } catch (const std::exception& ex) {
+ XLOGF(ERR, "BackgroundMover interrupted due to exception: {}", ex.what());
+ }
+}
+
+template
+void BackgroundMover::setAssignedMemory(
+ std::vector&& assignedMemory) {
+ XLOG(INFO, "Class assigned to background worker:");
+ for (auto [tid, pid, cid] : assignedMemory) {
+ XLOGF(INFO, "Tid: {}, Pid: {}, Cid: {}", tid, pid, cid);
+ }
+
+ mutex.lock_combine([this, &assignedMemory] {
+ this->assignedMemory_ = std::move(assignedMemory);
+ });
+}
+
+// Look for classes that exceed the target memory capacity
+// and return those for eviction
+template
+void BackgroundMover::checkAndRun() {
+ auto assignedMemory = mutex.lock_combine([this] { return assignedMemory_; });
+
+ unsigned int moves = 0;
+ std::set classes{};
+ auto batches = strategy_->calculateBatchSizes(cache_, assignedMemory);
+
+ for (size_t i = 0; i < batches.size(); i++) {
+ const auto [tid, pid, cid] = assignedMemory[i];
+ const auto batch = batches[i];
+
+ classes.insert(cid);
+ const auto& mpStats = cache_.getPoolByTid(pid, tid).getStats();
+
+ if (!batch) {
+ continue;
+ }
+
+ // try moving BATCH items from the class in order to reach free target
+ auto moved = moverFunc(cache_, tid, pid, cid, batch);
+ moves += moved;
+ moves_per_class_[tid][pid][cid] += moved;
+ totalBytesMoved.add(moved * mpStats.acStats.at(cid).allocSize);
+ }
+
+ numTraversals.inc();
+ numMovedItems.add(moves);
+ totalClasses.add(classes.size());
+}
+
+template
+BackgroundMoverStats BackgroundMover::getStats() const noexcept {
+ BackgroundMoverStats stats;
+ stats.numMovedItems = numMovedItems.get();
+ stats.runCount = numTraversals.get();
+ stats.totalBytesMoved = totalBytesMoved.get();
+ stats.totalClasses = totalClasses.get();
+
+ return stats;
+}
+
+template
+std::map>>
+BackgroundMover::getClassStats() const noexcept {
+ return moves_per_class_;
+}
+
+} // namespace cachelib
+} // namespace facebook
diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h
new file mode 100644
index 0000000000..1246676d6e
--- /dev/null
+++ b/cachelib/allocator/BackgroundMover.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Intel and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "cachelib/allocator/BackgroundMoverStrategy.h"
+#include "cachelib/allocator/CacheStats.h"
+#include "cachelib/common/AtomicCounter.h"
+#include "cachelib/common/PeriodicWorker.h"
+
+namespace facebook {
+namespace cachelib {
+
+// wrapper that exposes the private APIs of CacheType that are specifically
+// needed for the cache api
+template
+struct BackgroundMoverAPIWrapper {
+ static size_t traverseAndEvictItems(C& cache,
+ unsigned int tid,
+ unsigned int pid,
+ unsigned int cid,
+ size_t batch) {
+ return cache.traverseAndEvictItems(tid, pid, cid, batch);
+ }
+
+ static size_t traverseAndPromoteItems(C& cache,
+ unsigned int tid,
+ unsigned int pid,
+ unsigned int cid,
+ size_t batch) {
+ return cache.traverseAndPromoteItems(tid, pid, cid, batch);
+ }
+};
+
+enum class MoverDir { Evict = 0, Promote };
+
+// Periodic worker that evicts items from tiers in batches
+// The primary aim is to reduce insertion times for new items in the
+// cache
+template
+class BackgroundMover : public PeriodicWorker {
+ public:
+ using Cache = CacheT;
+ // @param cache the cache interface
+ // @param strategy the stragey class that defines how objects are
+ // moved,
+ // (promoted vs. evicted and how much)
+ BackgroundMover(Cache& cache,
+ std::shared_ptr strategy,
+ MoverDir direction_);
+
+ ~BackgroundMover() override;
+
+ BackgroundMoverStats getStats() const noexcept;
+ std::map>>
+ getClassStats() const noexcept;
+
+ void setAssignedMemory(
+ std::vector&& assignedMemory);
+
+ private:
+ std::map>>
+ moves_per_class_;
+ // cache allocator's interface for evicting
+ using Item = typename Cache::Item;
+
+ Cache& cache_;
+ std::shared_ptr strategy_;
+ MoverDir direction_;
+
+ std::function
+ moverFunc;
+
+ // implements the actual logic of running the background evictor
+ void work() override final;
+ void checkAndRun();
+
+ AtomicCounter numMovedItems{0};
+ AtomicCounter numTraversals{0};
+ AtomicCounter totalClasses{0};
+ AtomicCounter totalBytesMoved{0};
+
+ std::vector assignedMemory_;
+ folly::DistributedMutex mutex;
+};
+} // namespace cachelib
+} // namespace facebook
+
+#include "cachelib/allocator/BackgroundMover-inl.h"
diff --git a/cachelib/allocator/BackgroundMoverStrategy.h b/cachelib/allocator/BackgroundMoverStrategy.h
new file mode 100644
index 0000000000..7706a625a5
--- /dev/null
+++ b/cachelib/allocator/BackgroundMoverStrategy.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "cachelib/allocator/Cache.h"
+
+
+namespace facebook {
+namespace cachelib {
+
+struct MemoryDescriptorType {
+ MemoryDescriptorType(TierId tid, PoolId pid, ClassId cid) :
+ tid_(tid), pid_(pid), cid_(cid) {}
+ TierId tid_;
+ PoolId pid_;
+ ClassId cid_;
+};
+
+// Base class for background eviction strategy.
+class BackgroundMoverStrategy {
+ public:
+ virtual std::vector calculateBatchSizes(
+ const CacheBase& cache,
+ std::vector acVec) = 0;
+};
+
+} // namespace cachelib
+} // namespace facebook
diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt
index f94c8c90c7..0f96a0cd7f 100644
--- a/cachelib/allocator/CMakeLists.txt
+++ b/cachelib/allocator/CMakeLists.txt
@@ -35,6 +35,7 @@ add_library (cachelib_allocator
CCacheManager.cpp
ContainerTypes.cpp
FreeMemStrategy.cpp
+ FreeThresholdStrategy.cpp
HitsPerSlabStrategy.cpp
LruTailAgeStrategy.cpp
MarginalHitsOptimizeStrategy.cpp
@@ -54,6 +55,7 @@ add_library (cachelib_allocator
PoolOptimizeStrategy.cpp
PoolRebalancer.cpp
PoolResizer.cpp
+ PrivateMemoryManager.cpp
RebalanceStrategy.cpp
SlabReleaseStats.cpp
TempShmMapping.cpp
diff --git a/cachelib/allocator/Cache.cpp b/cachelib/allocator/Cache.cpp
index 058eb84501..37457cc3e9 100644
--- a/cachelib/allocator/Cache.cpp
+++ b/cachelib/allocator/Cache.cpp
@@ -245,6 +245,7 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
statPrefix + "cache.size.configured",
memStats.configuredRamCacheSize + memStats.nvmCacheSize);
+ //TODO: add specific per-tier counters
const auto stats = getGlobalCacheStats();
// Eviction Stats
@@ -254,7 +255,8 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
// from both ram and nvm, this is counted as a single eviction from cache.
// Ram Evictions: item evicted from ram but it can be inserted into nvm
const std::string ramEvictionKey = statPrefix + "ram.evictions";
- counters_.updateDelta(ramEvictionKey, stats.numEvictions);
+ counters_.updateDelta(ramEvictionKey,
+ std::accumulate(stats.numEvictions.begin(), stats.numEvictions.end(), 0));
// Nvm Evictions: item evicted from nvm but it can be still in ram
const std::string nvmEvictionKey = statPrefix + "nvm.evictions";
counters_.updateDelta(nvmEvictionKey, stats.numNvmEvictions);
@@ -296,11 +298,11 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
}
counters_.updateDelta(statPrefix + "cache.alloc_attempts",
- stats.allocAttempts);
+ std::accumulate(stats.allocAttempts.begin(), stats.allocAttempts.end(),0));
counters_.updateDelta(statPrefix + "cache.eviction_attempts",
- stats.evictionAttempts);
+ std::accumulate(stats.evictionAttempts.begin(),stats.evictionAttempts.end(),0));
counters_.updateDelta(statPrefix + "cache.alloc_failures",
- stats.allocFailures);
+ std::accumulate(stats.allocFailures.begin(),stats.allocFailures.end(),0));
counters_.updateDelta(statPrefix + "cache.invalid_allocs",
stats.invalidAllocs);
@@ -476,6 +478,10 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
visitEstimates(uploadStatsNanoToMicro, stats.allocateLatencyNs,
statPrefix + "allocate.latency_us");
+ visitEstimates(uploadStatsNanoToMicro, stats.bgEvictLatencyNs,
+ statPrefix + "background.eviction.latency_us");
+ visitEstimates(uploadStatsNanoToMicro, stats.bgPromoteLatencyNs,
+ statPrefix + "background.promotion.latency_us");
visitEstimates(uploadStatsNanoToMicro, stats.moveChainedLatencyNs,
statPrefix + "move.chained.latency_us");
visitEstimates(uploadStatsNanoToMicro, stats.moveRegularLatencyNs,
diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h
index e225ba8a01..c871358189 100644
--- a/cachelib/allocator/Cache.h
+++ b/cachelib/allocator/Cache.h
@@ -85,6 +85,9 @@ class CacheBase {
CacheBase(CacheBase&&) = default;
CacheBase& operator=(CacheBase&&) = default;
+ // TODO: come up with some reasonable number
+ static constexpr unsigned kMaxTiers = 2;
+
// Get a string referring to the cache name for this cache
virtual const std::string getCacheName() const = 0;
@@ -95,6 +98,12 @@ class CacheBase {
//
// @param poolId The pool id to query
virtual const MemoryPool& getPool(PoolId poolId) const = 0;
+
+ // Get the reference to a memory pool using a tier id, for stats purposes
+ //
+ // @param poolId The pool id to query
+ // @param tierId The tier of the pool id
+ virtual const MemoryPool& getPoolByTid(PoolId poolId, TierId tid) const = 0;
// Get Pool specific stats (regular pools). This includes stats from the
// Memory Pool and also the cache.
@@ -102,6 +111,12 @@ class CacheBase {
// @param poolId the pool id
virtual PoolStats getPoolStats(PoolId poolId) const = 0;
+ // Get Allocation Class specific stats.
+ //
+ // @param poolId the pool id
+ // @param classId the class id
+ virtual ACStats getACStats(TierId tid, PoolId poolId, ClassId classId) const = 0;
+
// @param poolId the pool id
virtual AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId poolId) const = 0;
diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h
index 92a04807a7..11e9058a34 100644
--- a/cachelib/allocator/CacheAllocator-inl.h
+++ b/cachelib/allocator/CacheAllocator-inl.h
@@ -16,6 +16,8 @@
#pragma once
+#include
+
namespace facebook {
namespace cachelib {
@@ -35,6 +37,7 @@ CacheAllocator::CacheAllocator(SharedMemNewT, Config config)
template
CacheAllocator::CacheAllocator(SharedMemAttachT, Config config)
: CacheAllocator(InitMemType::kMemAttach, config) {
+ /* TODO - per tier? */
for (auto pid : *metadata_.compactCachePools()) {
isCompactCachePool_[pid] = true;
}
@@ -56,6 +59,9 @@ CacheAllocator::CacheAllocator(
tempShm_(type == InitMemType::kNone && isOnShm_
? std::make_unique(config_.getCacheSize())
: nullptr),
+ privMemManager_(type == InitMemType::kNone && !isOnShm_
+ ? std::make_unique()
+ : nullptr),
shmManager_(type != InitMemType::kNone
? std::make_unique(config_.cacheDir,
config_.isUsingPosixShm())
@@ -67,12 +73,12 @@ CacheAllocator::CacheAllocator(
: serialization::CacheAllocatorMetadata{}},
allocator_(initAllocator(type)),
compactCacheManager_(type != InitMemType::kMemAttach
- ? std::make_unique(*allocator_)
- : restoreCCacheManager()),
+ ? std::make_unique(*allocator_[0] /* TODO: per tier */)
+ : restoreCCacheManager(0/* TODO: per tier */)),
compressor_(createPtrCompressor()),
mmContainers_(type == InitMemType::kMemAttach
? deserializeMMContainers(*deserializer_, compressor_)
- : MMContainers{}),
+ : MMContainers{getNumTiers()}),
accessContainer_(initAccessContainer(
type, detail::kShmHashTableName, config.accessConfig)),
chainedItemAccessContainer_(
@@ -81,6 +87,8 @@ CacheAllocator::CacheAllocator(
config.chainedItemAccessConfig)),
chainedItemLocks_(config_.chainedItemsLockPower,
std::make_shared()),
+ movesMap_(kShards),
+ moveLock_(kShards),
cacheCreationTime_{
type != InitMemType::kMemAttach
? util::getCurrentTimeSec()
@@ -105,48 +113,115 @@ CacheAllocator::~CacheAllocator() {
}
template
-ShmSegmentOpts CacheAllocator::createShmCacheOpts() {
+ShmSegmentOpts CacheAllocator::createShmCacheOpts(TierId tid) {
ShmSegmentOpts opts;
opts.alignment = sizeof(Slab);
// TODO: we support single tier so far
- if (config_.memoryTierConfigs.size() > 1) {
- throw std::invalid_argument("CacheLib only supports a single memory tier");
+ if (config_.memoryTierConfigs.size() > 2) {
+ throw std::invalid_argument("CacheLib only supports two memory tiers");
}
- opts.memBindNumaNodes = config_.memoryTierConfigs[0].getMemBind();
+ opts.memBindNumaNodes = config_.memoryTierConfigs[tid].getMemBind();
+ return opts;
+}
+
+template
+PrivateSegmentOpts CacheAllocator::createPrivateSegmentOpts(TierId tid) {
+ PrivateSegmentOpts opts;
+ opts.alignment = sizeof(Slab);
+ auto memoryTierConfigs = config_.getMemoryTierConfigs();
+ opts.memBindNumaNodes = memoryTierConfigs[tid].getMemBind();
+
return opts;
}
+template
+size_t CacheAllocator::memoryTierSize(TierId tid) const {
+ auto& memoryTierConfigs = config_.memoryTierConfigs;
+ auto partitions = std::accumulate(memoryTierConfigs.begin(), memoryTierConfigs.end(), 0UL,
+ [](const size_t i, const MemoryTierCacheConfig& config){
+ return i + config.getRatio();
+ });
+
+ return memoryTierConfigs[tid].calculateTierSize(config_.getCacheSize(), partitions);
+}
+
+template
+std::unique_ptr
+CacheAllocator::createPrivateAllocator(TierId tid) {
+ if (isOnShm_)
+ return std::make_unique(
+ getAllocatorConfig(config_),
+ tempShm_->getAddr(),
+ memoryTierSize(tid));
+ else
+ return std::make_unique(
+ getAllocatorConfig(config_),
+ privMemManager_->createMapping(config_.size, createPrivateSegmentOpts(tid)),
+ memoryTierSize(tid));
+}
+
template
std::unique_ptr
-CacheAllocator::createNewMemoryAllocator() {
+CacheAllocator::createNewMemoryAllocator(TierId tid) {
+ size_t tierSize = memoryTierSize(tid);
return std::make_unique(
getAllocatorConfig(config_),
shmManager_
- ->createShm(detail::kShmCacheName, config_.getCacheSize(),
- config_.slabMemoryBaseAddr, createShmCacheOpts())
+ ->createShm(detail::kShmCacheName + std::to_string(tid),
+ tierSize, config_.slabMemoryBaseAddr,
+ createShmCacheOpts(tid))
.addr,
- config_.getCacheSize());
+ tierSize);
}
template
std::unique_ptr
-CacheAllocator::restoreMemoryAllocator() {
+CacheAllocator::restoreMemoryAllocator(TierId tid) {
return std::make_unique(
deserializer_->deserialize(),
shmManager_
- ->attachShm(detail::kShmCacheName, config_.slabMemoryBaseAddr,
- createShmCacheOpts())
- .addr,
- config_.getCacheSize(),
+ ->attachShm(detail::kShmCacheName + std::to_string(tid),
+ config_.slabMemoryBaseAddr, createShmCacheOpts(tid)).addr,
+ memoryTierSize(tid),
config_.disableFullCoredump);
}
+template
+std::vector>
+CacheAllocator::createPrivateAllocators() {
+ std::vector> allocators;
+ for (int tid = 0; tid < getNumTiers(); tid++) {
+ allocators.emplace_back(createPrivateAllocator(tid));
+ }
+ return allocators;
+}
+
+template
+std::vector>
+CacheAllocator::createAllocators() {
+ std::vector> allocators;
+ for (int tid = 0; tid < getNumTiers(); tid++) {
+ allocators.emplace_back(createNewMemoryAllocator(tid));
+ }
+ return allocators;
+}
+
+template
+std::vector>
+CacheAllocator::restoreAllocators() {
+ std::vector> allocators;
+ for (int tid = 0; tid < getNumTiers(); tid++) {
+ allocators.emplace_back(restoreMemoryAllocator(tid));
+ }
+ return allocators;
+}
+
template
std::unique_ptr
-CacheAllocator::restoreCCacheManager() {
+CacheAllocator::restoreCCacheManager(TierId tid) {
return std::make_unique(
deserializer_->deserialize(),
- *allocator_);
+ *allocator_[tid]);
}
template
@@ -235,24 +310,30 @@ void CacheAllocator::initWorkers() {
config_.poolOptimizeStrategy,
config_.ccacheOptimizeStepSizePercent);
}
+
+ if (config_.backgroundEvictorEnabled()) {
+ startNewBackgroundEvictor(config_.backgroundEvictorInterval,
+ config_.backgroundEvictorStrategy,
+ config_.backgroundEvictorThreads);
+ }
+
+ if (config_.backgroundPromoterEnabled()) {
+ startNewBackgroundPromoter(config_.backgroundPromoterInterval,
+ config_.backgroundPromoterStrategy,
+ config_.backgroundPromoterThreads);
+ }
}
template
-std::unique_ptr CacheAllocator::initAllocator(
+std::vector>
+CacheAllocator::initAllocator(
InitMemType type) {
if (type == InitMemType::kNone) {
- if (isOnShm_ == true) {
- return std::make_unique(getAllocatorConfig(config_),
- tempShm_->getAddr(),
- config_.getCacheSize());
- } else {
- return std::make_unique(getAllocatorConfig(config_),
- config_.getCacheSize());
- }
+ return createPrivateAllocators();
} else if (type == InitMemType::kMemNew) {
- return createNewMemoryAllocator();
+ return createAllocators();
} else if (type == InitMemType::kMemAttach) {
- return restoreMemoryAllocator();
+ return restoreAllocators();
}
// Invalid type
@@ -320,27 +401,54 @@ CacheAllocator::allocate(PoolId poolId,
}
template
-typename CacheAllocator::WriteHandle
-CacheAllocator::allocateInternal(PoolId pid,
- typename Item::Key key,
- uint32_t size,
- uint32_t creationTime,
- uint32_t expiryTime) {
- util::LatencyTracker tracker{stats().allocateLatency_};
+bool CacheAllocator::shouldWakeupBgEvictor(TierId tid, PoolId pid, ClassId cid) {
+ // TODO: should we also work on lower tiers? should we have separate set of params?
+ if (tid == 1) return false;
+ return (1-getACStats(tid, pid, cid).usageFraction())*100 <= config_.lowEvictionAcWatermark;
+}
+
+template
+size_t CacheAllocator::backgroundWorkerId(TierId tid, PoolId pid, ClassId cid, size_t numWorkers) {
+ XDCHECK(numWorkers);
+
+ // TODO: came up with some better sharding (use some hashing)
+ return (tid + pid + cid) % numWorkers;
+}
+
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::allocateInternalTier(TierId tid,
+ PoolId pid,
+ typename Item::Key key,
+ uint32_t size,
+ uint32_t creationTime,
+ uint32_t expiryTime,
+ bool fromBgThread,
+ bool evict) {
+ util::LatencyTracker tracker{stats().allocateLatency_, static_cast(!fromBgThread)};
SCOPE_FAIL { stats_.invalidAllocs.inc(); };
// number of bytes required for this item
const auto requiredSize = Item::getRequiredSize(key, size);
// the allocation class in our memory allocator.
- const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
+ const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize);
- (*stats_.allocAttempts)[pid][cid].inc();
+ util::RollingLatencyTracker rollTracker{(*stats_.classAllocLatency)[tid][pid][cid]};
- void* memory = allocator_->allocate(pid, requiredSize);
- if (memory == nullptr) {
- memory = findEviction(pid, cid);
+ (*stats_.allocAttempts)[tid][pid][cid].inc();
+
+ void* memory = allocator_[tid]->allocate(pid, requiredSize);
+
+ if (backgroundEvictor_.size() && !fromBgThread && (memory == nullptr || shouldWakeupBgEvictor(tid, pid, cid))) {
+ backgroundEvictor_[backgroundWorkerId(tid, pid, cid, backgroundEvictor_.size())]->wakeUp();
+ }
+
+ if (memory == nullptr && !evict) {
+ return {};
+ } else if (memory == nullptr) {
+ memory = findEviction(tid, pid, cid);
}
WriteHandle handle;
@@ -351,18 +459,18 @@ CacheAllocator::allocateInternal(PoolId pid,
// for example.
SCOPE_FAIL {
// free back the memory to the allocator since we failed.
- allocator_->free(memory);
+ allocator_[tid]->free(memory);
};
handle = acquire(new (memory) Item(key, size, creationTime, expiryTime));
if (handle) {
handle.markNascent();
- (*stats_.fragmentationSize)[pid][cid].add(
+ (*stats_.fragmentationSize)[tid][pid][cid].add(
util::getFragmentation(*this, *handle));
}
} else { // failed to allocate memory.
- (*stats_.allocFailures)[pid][cid].inc();
+ (*stats_.allocFailures)[tid][pid][cid].inc();
// wake up rebalancer
if (!config_.poolRebalancerDisableForcedWakeUp && poolRebalancer_) {
poolRebalancer_->wakeUp();
@@ -379,6 +487,23 @@ CacheAllocator::allocateInternal(PoolId pid,
return handle;
}
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::allocateInternal(PoolId pid,
+ typename Item::Key key,
+ uint32_t size,
+ uint32_t creationTime,
+ uint32_t expiryTime,
+ bool fromBgThread) {
+ auto tid = 0; /* TODO: consult admission policy */
+ for(TierId tid = 0; tid < getNumTiers(); ++tid) {
+ bool evict = !config_.insertToFirstFreeTier || tid == getNumTiers() - 1;
+ auto handle = allocateInternalTier(tid, pid, key, size, creationTime, expiryTime, fromBgThread, evict);
+ if (handle) return handle;
+ }
+ return {};
+}
+
template
typename CacheAllocator::WriteHandle
CacheAllocator::allocateChainedItem(const ReadHandle& parent,
@@ -402,6 +527,18 @@ template
typename CacheAllocator::WriteHandle
CacheAllocator::allocateChainedItemInternal(
const ReadHandle& parent, uint32_t size) {
+ auto tid = 0; /* TODO: consult admission policy */
+ for(TierId tid = 0; tid < getNumTiers(); ++tid) {
+ auto handle = allocateChainedItemInternalTier(*parent, size, tid);
+ if (handle) return handle;
+ }
+ return {};
+}
+
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::allocateChainedItemInternalTier(
+ const Item& parent, uint32_t size, TierId tid) {
util::LatencyTracker tracker{stats().allocateLatency_};
SCOPE_FAIL { stats_.invalidAllocs.inc(); };
@@ -409,29 +546,33 @@ CacheAllocator::allocateChainedItemInternal(
// number of bytes required for this item
const auto requiredSize = ChainedItem::getRequiredSize(size);
- const auto pid = allocator_->getAllocInfo(parent->getMemory()).poolId;
- const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
+ const auto ptid = getTierId(parent); //it is okay because pools/classes are duplicated among the tiers
+ const auto pid = allocator_[ptid]->getAllocInfo(parent.getMemory()).poolId;
+ const auto cid = allocator_[ptid]->getAllocationClassId(pid, requiredSize);
- (*stats_.allocAttempts)[pid][cid].inc();
+ util::RollingLatencyTracker rollTracker{
+ (*stats_.classAllocLatency)[tid][pid][cid]};
+
+ (*stats_.allocAttempts)[tid][pid][cid].inc();
- void* memory = allocator_->allocate(pid, requiredSize);
+ void* memory = allocator_[tid]->allocate(pid, requiredSize);
if (memory == nullptr) {
- memory = findEviction(pid, cid);
+ memory = findEviction(tid, pid, cid);
}
if (memory == nullptr) {
- (*stats_.allocFailures)[pid][cid].inc();
+ (*stats_.allocFailures)[tid][pid][cid].inc();
return WriteHandle{};
}
- SCOPE_FAIL { allocator_->free(memory); };
+ SCOPE_FAIL { allocator_[tid]->free(memory); };
auto child = acquire(
- new (memory) ChainedItem(compressor_.compress(parent.getInternal()), size,
+ new (memory) ChainedItem(compressor_.compress(&parent), size,
util::getCurrentTimeSec()));
if (child) {
child.markNascent();
- (*stats_.fragmentationSize)[pid][cid].add(
+ (*stats_.fragmentationSize)[tid][pid][cid].add(
util::getFragmentation(*this, *child));
}
@@ -467,14 +608,15 @@ void CacheAllocator::addChainedItem(WriteHandle& parent,
// Count a new child
stats_.numChainedChildItems.inc();
- insertInMMContainer(*child);
-
// Increment refcount since this chained item is now owned by the parent
// Parent will decrement the refcount upon release. Since this is an
// internal refcount, we dont include it in active handle tracking.
- child->incRef();
+ auto ret = child->incRef();
+ XDCHECK(ret == RefcountWithFlags::incResult::incOk);
XDCHECK_EQ(2u, child->getRefCount());
+ insertInMMContainer(*child);
+
invalidateNvm(*parent);
if (auto eventTracker = getEventTracker()) {
eventTracker->record(AllocatorApiEvent::ADD_CHAINED, parent->getKey(),
@@ -538,22 +680,20 @@ CacheAllocator::getParentKey(const Item& chainedItem) {
}
template
-void CacheAllocator::transferChainLocked(WriteHandle& parent,
+void CacheAllocator::transferChainLocked(Item& parent,
WriteHandle& newParent) {
// parent must be in a state to not have concurrent readers. Eviction code
- // paths rely on holding the last item handle. Since we hold on to an item
- // handle here, the chain will not be touched by any eviction code path.
- XDCHECK(parent);
+ // paths rely on holding the last item handle.
XDCHECK(newParent);
- XDCHECK_EQ(parent->getKey(), newParent->getKey());
- XDCHECK(parent->hasChainedItem());
+ XDCHECK_EQ(parent.getKey(), newParent->getKey());
+ XDCHECK(parent.hasChainedItem());
if (newParent->hasChainedItem()) {
throw std::invalid_argument(folly::sformat(
"New Parent {} has invalid state", newParent->toString()));
}
- auto headHandle = findChainedItem(*parent);
+ auto headHandle = findChainedItem(parent);
XDCHECK(headHandle);
// remove from the access container since we are changing the key
@@ -565,6 +705,7 @@ void CacheAllocator::transferChainLocked(WriteHandle& parent,
while (curr) {
XDCHECK_EQ(curr == headHandle.get() ? 2u : 1u, curr->getRefCount());
XDCHECK(curr->isInMMContainer());
+ XDCHECK(!newParent->isMoving());
curr->changeKey(newParentPtr);
curr = curr->getNext(compressor_);
}
@@ -576,7 +717,7 @@ void CacheAllocator::transferChainLocked(WriteHandle& parent,
folly::sformat("Did not expect to find an existing chain for {}",
newParent->toString(), oldHead->toString()));
}
- parent->unmarkHasChainedItem();
+ parent.unmarkHasChainedItem();
}
template
@@ -587,7 +728,7 @@ void CacheAllocator::transferChainAndReplace(
}
{ // scope for chained item lock
auto l = chainedItemLocks_.lockExclusive(parent->getKey());
- transferChainLocked(parent, newParent);
+ transferChainLocked(*parent, newParent);
}
if (replaceIfAccessible(*parent, *newParent)) {
@@ -654,33 +795,10 @@ CacheAllocator::replaceChainedItem(Item& oldItem,
}
template
-typename CacheAllocator::WriteHandle
-CacheAllocator::replaceChainedItemLocked(Item& oldItem,
- WriteHandle newItemHdl,
- const Item& parent) {
- XDCHECK(newItemHdl != nullptr);
- XDCHECK_GE(1u, oldItem.getRefCount());
-
- // grab the handle to the old item so that we can return this. Also, we need
- // to drop the refcount the parent holds on oldItem by manually calling
- // decRef. To do that safely we need to have a proper outstanding handle.
- auto oldItemHdl = acquire(&oldItem);
-
- // Replace the old chained item with new item in the MMContainer before we
- // actually replace the old item in the chain
-
- if (!replaceChainedItemInMMContainer(oldItem, *newItemHdl)) {
- // This should never happen since we currently hold an valid
- // parent handle. None of its chained items can be removed
- throw std::runtime_error(folly::sformat(
- "chained item cannot be replaced in MM container, oldItem={}, "
- "newItem={}, parent={}",
- oldItem.toString(), newItemHdl->toString(), parent.toString()));
- }
-
- XDCHECK(!oldItem.isInMMContainer());
- XDCHECK(newItemHdl->isInMMContainer());
-
+void CacheAllocator::replaceInChainLocked(Item& oldItem,
+ WriteHandle& newItemHdl,
+ const Item& parent,
+ bool fromMove) {
auto head = findChainedItem(parent);
XDCHECK(head != nullptr);
XDCHECK_EQ(reinterpret_cast(
@@ -709,16 +827,62 @@ CacheAllocator::replaceChainedItemLocked(Item& oldItem,
oldItem.asChainedItem().getNext(compressor_), compressor_);
oldItem.asChainedItem().setNext(nullptr, compressor_);
- // this should not result in 0 refcount. We are bumping down the internal
- // refcount. If it did, we would leak an item.
- oldItem.decRef();
- XDCHECK_LT(0u, oldItem.getRefCount()) << oldItem.toString();
+ //if called from moveChainedItem then ref will be zero, else
+ //greater than 0
+ if (fromMove) {
+ //if this is the head chained item, release the handle now
+ //while refCount > 1 so that the destructor does not
+ //call releaseBackToAllocator since we want recycle oldItem
+ if (head) {
+ head.reset();
+ XDCHECK_EQ(1u, oldItem.getRefCount());
+ }
+ oldItem.decRef();
+ XDCHECK_EQ(0u, oldItem.getRefCount()) << oldItem.toString();
+ } else {
+ oldItem.decRef();
+ XDCHECK_LT(0u, oldItem.getRefCount()) << oldItem.toString();
+ }
// increment refcount to indicate parent owns this similar to addChainedItem
// Since this is an internal refcount, we dont include it in active handle
// tracking.
- newItemHdl->incRef();
+ auto ret = newItemHdl->incRef();
+ XDCHECK(ret == RefcountWithFlags::incResult::incOk);
+}
+
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::replaceChainedItemLocked(Item& oldItem,
+ WriteHandle newItemHdl,
+ const Item& parent) {
+ XDCHECK(newItemHdl != nullptr);
+ XDCHECK_GE(1u, oldItem.getRefCount());
+
+ // grab the handle to the old item so that we can return this. Also, we need
+ // to drop the refcount the parent holds on oldItem by manually calling
+ // decRef. To do that safely we need to have a proper outstanding handle.
+ auto oldItemHdl = acquire(&oldItem);
+ XDCHECK_GE(2u, oldItem.getRefCount());
+
+ // Replace the old chained item with new item in the MMContainer before we
+ // actually replace the old item in the chain
+
+ if (!replaceChainedItemInMMContainer(oldItem, *newItemHdl)) {
+ // This should never happen since we currently hold an valid
+ // parent handle. None of its chained items can be removed
+ throw std::runtime_error(folly::sformat(
+ "chained item cannot be replaced in MM container, oldItem={}, "
+ "newItem={}, parent={}",
+ oldItem.toString(), newItemHdl->toString(), parent.toString()));
+ }
+
+ XDCHECK(!oldItem.isInMMContainer());
+ XDCHECK(newItemHdl->isInMMContainer());
+
+ replaceInChainLocked(oldItem, newItemHdl, parent, false);
+
return oldItemHdl;
}
@@ -732,8 +896,8 @@ CacheAllocator::releaseBackToAllocator(Item& it,
throw std::runtime_error(
folly::sformat("cannot release this item: {}", it.toString()));
}
-
- const auto allocInfo = allocator_->getAllocInfo(it.getMemory());
+ const auto tid = getTierId(it);
+ const auto allocInfo = allocator_[tid]->getAllocInfo(it.getMemory());
if (ctx == RemoveContext::kEviction) {
const auto timeNow = util::getCurrentTimeSec();
@@ -744,21 +908,23 @@ CacheAllocator::releaseBackToAllocator(Item& it,
stats_.perPoolEvictionAgeSecs_[allocInfo.poolId].trackValue(refreshTime);
}
- (*stats_.fragmentationSize)[allocInfo.poolId][allocInfo.classId].sub(
+ (*stats_.fragmentationSize)[tid][allocInfo.poolId][allocInfo.classId].sub(
util::getFragmentation(*this, it));
// Chained items can only end up in this place if the user has allocated
// memory for a chained item but has decided not to insert the chained item
// to a parent item and instead drop the chained item handle. In this case,
// we free the chained item directly without calling remove callback.
- if (it.isChainedItem()) {
+ //
+ // Except if we are moving a chained item between tiers -
+ // then it == toRecycle and we will want the normal recycle path
+ if (it.isChainedItem() && &it != toRecycle) {
if (toRecycle) {
throw std::runtime_error(
folly::sformat("Can not recycle a chained item {}, toRecyle",
it.toString(), toRecycle->toString()));
}
-
- allocator_->free(&it);
+ allocator_[tid]->free(&it);
return ReleaseRes::kReleased;
}
@@ -825,17 +991,19 @@ CacheAllocator::releaseBackToAllocator(Item& it,
while (head) {
auto next = head->getNext(compressor_);
-
+ const auto ctid = getTierId(head);
const auto childInfo =
- allocator_->getAllocInfo(static_cast(head));
- (*stats_.fragmentationSize)[childInfo.poolId][childInfo.classId].sub(
+ allocator_[ctid]->getAllocInfo(static_cast(head));
+ (*stats_.fragmentationSize)[ctid][childInfo.poolId][childInfo.classId].sub(
util::getFragmentation(*this, *head));
removeFromMMContainer(*head);
// If this chained item is marked as moving, we will not free it.
// We must capture the moving state before we do the decRef when
- // we know the item must still be valid
+ // we know the item must still be valid. Item cannot be marked as
+ // exclusive. Only parent can be marked as such and even parent needs
+ // to be unmark prior to calling releaseBackToAllocator.
const bool wasMoving = head->isMoving();
XDCHECK(!head->isMarkedForEviction());
@@ -847,22 +1015,21 @@ CacheAllocator::releaseBackToAllocator(Item& it,
// If the item is already moving and we already decremented the
// refcount, we don't need to free this item. We'll let the slab
// release thread take care of that
- if (!wasMoving) {
- if (childRef != 0) {
- throw std::runtime_error(folly::sformat(
- "chained item refcount is not zero. We cannot proceed! "
- "Ref: {}, Chained Item: {}",
- childRef, head->toString()));
- }
+ XDCHECK(!wasMoving);
+ if (childRef != 0) {
+ throw std::runtime_error(folly::sformat(
+ "chained item refcount is not zero. We cannot proceed! "
+ "Ref: {}, Chained Item: {}",
+ childRef, head->toString()));
+ }
- // Item is not moving and refcount is 0, we can proceed to
- // free it or recylce the memory
- if (head == toRecycle) {
- XDCHECK(ReleaseRes::kReleased != res);
- res = ReleaseRes::kRecycled;
- } else {
- allocator_->free(head);
- }
+ // Item is not moving and refcount is 0, we can proceed to
+ // free it or recylce the memory
+ if (head == toRecycle) {
+ XDCHECK(ReleaseRes::kReleased != res);
+ res = ReleaseRes::kRecycled;
+ } else {
+ allocator_[ctid]->free(head);
}
stats_.numChainedChildItems.dec();
@@ -872,23 +1039,24 @@ CacheAllocator::releaseBackToAllocator(Item& it,
}
if (&it == toRecycle) {
+ XDCHECK_EQ(it.getRefCount(),0u);
XDCHECK(ReleaseRes::kReleased != res);
res = ReleaseRes::kRecycled;
} else {
XDCHECK(it.isDrained());
- allocator_->free(&it);
+ allocator_[tid]->free(&it);
}
return res;
}
template
-bool CacheAllocator::incRef(Item& it) {
- if (it.incRef()) {
- ++handleCount_.tlStats();
- return true;
- }
- return false;
+RefcountWithFlags::incResult CacheAllocator::incRef(Item& it) {
+ auto ret = it.incRef();
+ if (ret == RefcountWithFlags::incResult::incOk) {
+ ++handleCount_.tlStats();
+ }
+ return ret;
}
template
@@ -908,11 +1076,19 @@ CacheAllocator::acquire(Item* it) {
SCOPE_FAIL { stats_.numRefcountOverflow.inc(); };
- if (LIKELY(incRef(*it))) {
- return WriteHandle{it, *this};
- } else {
- // item is being evicted
- return WriteHandle{};
+ while (true) {
+ auto incRes = incRef(*it);
+ if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) {
+ return WriteHandle{it, *this};
+ } else if (incRes == RefcountWithFlags::incResult::incFailedEviction){
+ // item is being evicted
+ return WriteHandle{};
+ } else {
+ // item is being moved - wait for completion
+ WriteHandle handle;
+ if (tryGetHandleWithWaitContextForMovingItem(*it, handle))
+ return handle;
+ }
}
}
@@ -955,6 +1131,25 @@ bool CacheAllocator::replaceInMMContainer(Item& oldItem,
}
}
+template
+bool CacheAllocator::replaceInMMContainer(Item* oldItem,
+ Item& newItem) {
+ return replaceInMMContainer(*oldItem, newItem);
+}
+
+template
+bool CacheAllocator::replaceInMMContainer(EvictionIterator& oldItemIt,
+ Item& newItem) {
+ auto& oldContainer = getMMContainer(*oldItemIt);
+ auto& newContainer = getMMContainer(newItem);
+
+ // This function is used for eviction across tiers
+ XDCHECK(&oldContainer != &newContainer);
+ oldContainer.remove(oldItemIt);
+
+ return newContainer.add(newItem);
+}
+
template
bool CacheAllocator::replaceChainedItemInMMContainer(
Item& oldItem, Item& newItem) {
@@ -1054,7 +1249,6 @@ CacheAllocator::insertOrReplace(const WriteHandle& handle) {
: std::unique_lock();
replaced = accessContainer_->insertOrReplace(*(handle.getInternal()));
-
if (replaced && replaced->isNvmClean() && !replaced->isNvmEvicted()) {
// item is to be replaced and the destructor will be executed
// upon memory released, mark it in nvm to avoid destructor
@@ -1100,19 +1294,73 @@ CacheAllocator::insertOrReplace(const WriteHandle& handle) {
return replaced;
}
+/* Next two methods are used to asynchronously move Item between memory tiers.
+ *
+ * The thread, which moves Item, allocates new Item in the tier we are moving to
+ * and calls moveRegularItem() method. This method does the following:
+ * 1. Update the access container with the new item from the tier we are
+ * moving to. This Item has moving flag set.
+ * 2. Copy data from the old Item to the new one.
+ *
+ * Concurrent threads which are getting handle to the same key:
+ * 1. When a handle is created it checks if the moving flag is set
+ * 2. If so, Handle implementation creates waitContext and adds it to the
+ * MoveCtx by calling handleWithWaitContextForMovingItem() method.
+ * 3. Wait until the moving thread will complete its job.
+ */
template
-bool CacheAllocator::moveRegularItem(Item& oldItem,
- WriteHandle& newItemHdl) {
- XDCHECK(config_.moveCb);
- util::LatencyTracker tracker{stats_.moveRegularLatency_};
+bool
+CacheAllocator::tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle) {
+ auto shard = getShardForKey(item.getKey());
+ auto& movesMap = getMoveMapForShard(shard);
+ {
+ auto lock = getMoveLockForShard(shard);
- if (!oldItem.isAccessible() || oldItem.isExpired()) {
- return false;
+ // item might have been evicted or moved before the lock was acquired
+ if (!item.isMoving())
+ return false;
+
+ WriteHandle hdl{*this};
+ auto waitContext = hdl.getItemWaitContext();
+
+ auto ret = movesMap.try_emplace(item.getKey(), std::make_unique());
+ ret.first->second->addWaiter(std::move(waitContext));
+
+ handle = std::move(hdl);
+ return true;
}
+}
+
+template
+size_t CacheAllocator::wakeUpWaitersLocked(folly::StringPiece key,
+ WriteHandle&& handle) {
+ std::unique_ptr ctx;
+ auto shard = getShardForKey(key);
+ auto& movesMap = getMoveMapForShard(shard);
+ {
+ auto lock = getMoveLockForShard(shard);
+ movesMap.eraseInto(key, [&](auto &&key, auto &&value) {
+ ctx = std::move(value);
+ });
+ }
+
+ if (ctx) {
+ ctx->setItemHandle(std::move(handle));
+ return ctx->numWaiters();
+ }
+
+ return 0;
+}
+
+template
+bool CacheAllocator::moveRegularItem(
+ Item& oldItem, WriteHandle& newItemHdl) {
+ XDCHECK(oldItem.isMoving());
+ XDCHECK(!oldItem.isExpired());
+ // TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
+ // ??? util::LatencyTracker tracker{stats_.evictRegularLatency_};
XDCHECK_EQ(newItemHdl->getSize(), oldItem.getSize());
- XDCHECK_EQ(reinterpret_cast(&getMMContainer(oldItem)),
- reinterpret_cast(&getMMContainer(*newItemHdl)));
// take care of the flags before we expose the item to be accessed. this
// will ensure that when another thread removes the item from RAM, we issue
@@ -1121,52 +1369,44 @@ bool CacheAllocator::moveRegularItem(Item& oldItem,
newItemHdl->markNvmClean();
}
- // Execute the move callback. We cannot make any guarantees about the
- // consistency of the old item beyond this point, because the callback can
- // do more than a simple memcpy() e.g. update external references. If there
- // are any remaining handles to the old item, it is the caller's
- // responsibility to invalidate them. The move can only fail after this
- // statement if the old item has been removed or replaced, in which case it
- // should be fine for it to be left in an inconsistent state.
- config_.moveCb(oldItem, *newItemHdl, nullptr);
- // Inside the access container's lock, this checks if the old item is
- // accessible and its refcount is zero. If the item is not accessible,
- // there is no point to replace it since it had already been removed
- // or in the process of being removed. If the item is in cache but the
- // refcount is non-zero, it means user could be attempting to remove
- // this item through an API such as remove(itemHandle). In this case,
- // it is unsafe to replace the old item with a new one, so we should
- // also abort.
- if (!accessContainer_->replaceIf(oldItem, *newItemHdl,
- itemExclusivePredicate)) {
- return false;
+ if (config_.moveCb) {
+ // Execute the move callback. We cannot make any guarantees about the
+ // consistency of the old item beyond this point, because the callback can
+ // do more than a simple memcpy() e.g. update external references. If there
+ // are any remaining handles to the old item, it is the caller's
+ // responsibility to invalidate them. The move can only fail after this
+ // statement if the old item has been removed or replaced, in which case it
+ // should be fine for it to be left in an inconsistent state.
+ config_.moveCb(oldItem, *newItemHdl, nullptr);
+ } else {
+ std::memcpy(newItemHdl->getMemory(), oldItem.getMemory(),
+ oldItem.getSize());
}
- // Inside the MM container's lock, this checks if the old item exists to
- // make sure that no other thread removed it, and only then replaces it.
- if (!replaceInMMContainer(oldItem, *newItemHdl)) {
- accessContainer_->remove(*newItemHdl);
- return false;
- }
+ // Adding the item to mmContainer has to succeed since no one can remove the item
+ auto& newContainer = getMMContainer(*newItemHdl);
+ auto mmContainerAdded = newContainer.add(*newItemHdl);
+ XDCHECK(mmContainerAdded);
- // Replacing into the MM container was successful, but someone could have
- // called insertOrReplace() or remove() before or after the
- // replaceInMMContainer() operation, which would invalidate newItemHdl.
- if (!newItemHdl->isAccessible()) {
- removeFromMMContainer(*newItemHdl);
- return false;
- }
- // no one can add or remove chained items at this point
+ auto predicate = [&](const Item& item){
+ // we rely on moving flag being set (it should block all readers)
+ XDCHECK_EQ(item.getRefCount(),0);
+ XDCHECK(item.isMoving());
+ return item.isMoving();
+ };
+
if (oldItem.hasChainedItem()) {
- // safe to acquire handle for a moving Item
- auto oldHandle = acquire(&oldItem);
- XDCHECK_EQ(1u, oldHandle->getRefCount()) << oldHandle->toString();
XDCHECK(!newItemHdl->hasChainedItem()) << newItemHdl->toString();
try {
- auto l = chainedItemLocks_.lockExclusive(oldItem.getKey());
- transferChainLocked(oldHandle, newItemHdl);
+ auto l = chainedItemLocks_.tryLockExclusive(oldItem.getKey());
+ if (l) {
+ transferChainLocked(oldItem, newItemHdl);
+ } else {
+ newContainer.remove(*newItemHdl);
+ return false;
+ }
} catch (const std::exception& e) {
// this should never happen because we drained all the handles.
XLOGF(DFATAL, "{}", e.what());
@@ -1176,71 +1416,87 @@ bool CacheAllocator::moveRegularItem(Item& oldItem,
XDCHECK(!oldItem.hasChainedItem());
XDCHECK(newItemHdl->hasChainedItem());
}
+
+ if (!accessContainer_->replaceIf(oldItem, *newItemHdl, predicate)) {
+ newContainer.remove(*newItemHdl);
+ return false;
+ }
+
newItemHdl.unmarkNascent();
return true;
}
template
bool CacheAllocator::moveChainedItem(ChainedItem& oldItem,
- WriteHandle& newItemHdl) {
- XDCHECK(config_.moveCb);
+ WriteHandle& newItemHdl,
+ Item& parentItem) {
+ XDCHECK(parentItem.isMoving());
util::LatencyTracker tracker{stats_.moveChainedLatency_};
- // This item has been unlinked from its parent and we're the only
- // owner of it, so we're done here
- if (!oldItem.isInMMContainer() || oldItem.isOnlyMoving()) {
- return false;
- }
-
- const auto parentKey = oldItem.getParentItem(compressor_).getKey();
-
- // Grab lock to prevent anyone else from modifying the chain
- auto l = chainedItemLocks_.lockExclusive(parentKey);
-
- auto parentHandle =
- validateAndGetParentHandleForChainedMoveLocked(oldItem, parentKey);
- if (!parentHandle) {
- return false;
+ auto& expectedParent = oldItem.getParentItem(compressor_);
+ const auto parentKey = expectedParent.getKey();
+ auto l = chainedItemLocks_.tryLockExclusive(parentKey);
+ if (!l) {
+ return false;
}
+ XDCHECK_EQ(&expectedParent,&parentItem);
- // once we have the moving sync and valid parent for the old item, check if
+ // check if
// the original allocation was made correctly. If not, we destroy the
// allocation to indicate a retry to moving logic above.
if (reinterpret_cast(
&newItemHdl->asChainedItem().getParentItem(compressor_)) !=
- reinterpret_cast(&parentHandle->asChainedItem())) {
- newItemHdl.reset();
+ reinterpret_cast(&parentItem.asChainedItem())) {
+ XDCHECK(false);
return false;
}
XDCHECK_EQ(reinterpret_cast(
&newItemHdl->asChainedItem().getParentItem(compressor_)),
- reinterpret_cast(&parentHandle->asChainedItem()));
-
- // In case someone else had removed this chained item from its parent by now
- // So we check again to see if the it has been unlinked from its parent
- if (!oldItem.isInMMContainer() || oldItem.isOnlyMoving()) {
- return false;
- }
+ reinterpret_cast(&parentItem.asChainedItem()));
- auto parentPtr = parentHandle.getInternal();
+ auto parentPtr = &parentItem;
XDCHECK_EQ(reinterpret_cast(parentPtr),
reinterpret_cast(&oldItem.getParentItem(compressor_)));
- // Invoke the move callback to fix up any user data related to the chain
- config_.moveCb(oldItem, *newItemHdl, parentPtr);
+ if (config_.moveCb) {
+ // Execute the move callback. We cannot make any guarantees about the
+ // consistency of the old item beyond this point, because the callback can
+ // do more than a simple memcpy() e.g. update external references. If there
+ // are any remaining handles to the old item, it is the caller's
+ // responsibility to invalidate them. The move can only fail after this
+ // statement if the old item has been removed or replaced, in which case it
+ // should be fine for it to be left in an inconsistent state.
+ config_.moveCb(oldItem, *newItemHdl, parentPtr);
+ } else {
+ std::memcpy(newItemHdl->getMemory(), oldItem.getMemory(),
+ oldItem.getSize());
+ }
// Replace the new item in the position of the old one before both in the
// parent's chain and the MMContainer.
- auto oldItemHandle =
- replaceChainedItemLocked(oldItem, std::move(newItemHdl), *parentHandle);
- XDCHECK(oldItemHandle->isMoving());
- XDCHECK(!oldItemHandle->isInMMContainer());
+ XDCHECK_EQ(parentItem.getRefCount(),0);
+ XDCHECK(parentItem.isMoving());
+ XDCHECK(l);
+
+ auto& newContainer = getMMContainer(*newItemHdl);
+ auto mmContainerAdded = newContainer.add(*newItemHdl);
+ XDCHECK(mmContainerAdded);
+
+ replaceInChainLocked(oldItem, newItemHdl, parentItem, true);
return true;
}
+template
+typename CacheAllocator::NvmCacheT::PutToken
+CacheAllocator::createPutToken(Item& item) {
+ const bool evictToNvmCache = shouldWriteToNvmCache(item);
+ return evictToNvmCache ? nvmCache_->createPutToken(item.getKey())
+ : typename NvmCacheT::PutToken{};
+}
+
template
void CacheAllocator::unlinkItemForEviction(Item& it) {
XDCHECK(it.isMarkedForEviction());
@@ -1257,20 +1513,28 @@ void CacheAllocator::unlinkItemForEviction(Item& it) {
template
std::pair::Item*,
typename CacheAllocator::Item*>
-CacheAllocator::getNextCandidate(PoolId pid,
+CacheAllocator::getNextCandidate(TierId tid,
+ PoolId pid,
ClassId cid,
unsigned int& searchTries) {
typename NvmCacheT::PutToken token;
Item* toRecycle = nullptr;
+ Item* toRecycleParent = nullptr;
Item* candidate = nullptr;
- auto& mmContainer = getMMContainer(pid, cid);
-
- mmContainer.withEvictionIterator([this, pid, cid, &candidate, &toRecycle,
- &searchTries, &mmContainer,
- &token](auto&& itr) {
+ bool isExpired = false;
+ Item* syncItem = nullptr;
+ bool chainedItem = false;
+ auto& mmContainer = getMMContainer(tid, pid, cid);
+ bool lastTier = tid+1 >= getNumTiers();
+
+ mmContainer.withEvictionIterator([this, tid, pid, cid, &candidate,
+ &toRecycle, &toRecycleParent, &syncItem,
+ &chainedItem,
+ &searchTries, &mmContainer, &lastTier,
+ &isExpired, &token](auto&& itr) {
if (!itr) {
++searchTries;
- (*stats_.evictionAttempts)[pid][cid].inc();
+ (*stats_.evictionAttempts)[tid][pid][cid].inc();
return;
}
@@ -1278,50 +1542,82 @@ CacheAllocator::getNextCandidate(PoolId pid,
config_.evictionSearchTries > searchTries) &&
itr) {
++searchTries;
- (*stats_.evictionAttempts)[pid][cid].inc();
+ (*stats_.evictionAttempts)[tid][pid][cid].inc();
auto* toRecycle_ = itr.get();
- auto* candidate_ =
- toRecycle_->isChainedItem()
+ bool chainedItem_ = toRecycle_->isChainedItem();
+ Item* toRecycleParent_ = chainedItem_
? &toRecycle_->asChainedItem().getParentItem(compressor_)
- : toRecycle_;
-
- const bool evictToNvmCache = shouldWriteToNvmCache(*candidate_);
- auto putToken = evictToNvmCache
- ? nvmCache_->createPutToken(candidate_->getKey())
- : typename NvmCacheT::PutToken{};
-
- if (evictToNvmCache && !putToken.isValid()) {
+ : nullptr;
+ // in order to safely check if the expected parent (toRecycleParent_) matches
+ // the current parent on the chained item, we need to take the chained
+ // item lock so we are sure that nobody else will be editing the chain
+ auto l_ = chainedItem_
+ ? chainedItemLocks_.tryLockExclusive(toRecycleParent_->getKey())
+ : decltype(chainedItemLocks_.tryLockExclusive(toRecycle_->getKey()))();
+
+ if (chainedItem_ &&
+ ( !l_ || &toRecycle_->asChainedItem().getParentItem(compressor_)
+ != toRecycleParent_) ) {
+ ++itr;
+ continue;
+ }
+ Item* candidate_;
+ Item* syncItem_;
+ //sync on the parent item for chained items to move to next tier
+ if (!lastTier && chainedItem_) {
+ syncItem_ = toRecycleParent_;
+ candidate_ = toRecycle_;
+ } else if (lastTier && chainedItem_) {
+ candidate_ = toRecycleParent_;
+ syncItem_ = toRecycleParent_;
+ } else {
+ candidate_ = toRecycle_;
+ syncItem_ = toRecycle_;
+ }
+ // if it's last tier, the item will be evicted
+ // need to create put token before marking it exclusive
+ const bool evictToNvmCache = lastTier && shouldWriteToNvmCache(*candidate_);
+
+ auto token_ = evictToNvmCache
+ ? nvmCache_->createPutToken(candidate_->getKey())
+ : typename NvmCacheT::PutToken{};
+
+ if (evictToNvmCache && !token_.isValid()) {
stats_.evictFailConcurrentFill.inc();
++itr;
continue;
}
-
- auto markedForEviction = candidate_->markForEviction();
- if (!markedForEviction) {
+
+ auto marked = (lastTier || candidate_->isExpired()) ? syncItem_->markForEviction() : syncItem_->markMoving();
+ if (!marked) {
if (candidate_->hasChainedItem()) {
stats_.evictFailParentAC.inc();
} else {
stats_.evictFailAC.inc();
}
++itr;
+ XDCHECK_EQ(toRecycle,nullptr);
+ XDCHECK_EQ(candidate,nullptr);
continue;
}
-
+
+ XDCHECK(syncItem_->isMoving() || syncItem_->isMarkedForEviction());
+ toRecycleParent = toRecycleParent_;
+ chainedItem = chainedItem_;
// markForEviction to make sure no other thead is evicting the item
- // nor holding a handle to that item
+ // nor holding a handle to that item if this is last tier
+ // since we won't be moving the item to the next tier
toRecycle = toRecycle_;
candidate = candidate_;
- token = std::move(putToken);
-
- // Check if parent changed for chained items - if yes, we cannot
- // remove the child from the mmContainer as we will not be evicting
- // it. We could abort right here, but we need to cleanup in case
- // unmarkForEviction() returns 0 - so just go through normal path.
- if (!toRecycle_->isChainedItem() ||
- &toRecycle->asChainedItem().getParentItem(compressor_) == candidate) {
- mmContainer.remove(itr);
+ isExpired = candidate_->isExpired();
+ token = std::move(token_);
+ if (chainedItem) {
+ XDCHECK(l_);
+ XDCHECK_EQ(toRecycleParent,&toRecycle_->asChainedItem().getParentItem(compressor_));
}
+ mmContainer.remove(itr);
+
return;
}
});
@@ -1332,25 +1628,106 @@ CacheAllocator::getNextCandidate(PoolId pid,
XDCHECK(toRecycle);
XDCHECK(candidate);
- XDCHECK(candidate->isMarkedForEviction());
+ XDCHECK(candidate->isMoving() || candidate->isMarkedForEviction());
+
+ auto evictedToNext = (lastTier || isExpired) ? nullptr
+ : tryEvictToNextMemoryTier(*candidate, false);
+ if (!evictedToNext) {
+ //failed to move a chained item - so evict the entire chain
+ if (candidate->isChainedItem()) {
+ //candidate should be parent now
+ XDCHECK(toRecycleParent->isMoving());
+ XDCHECK_EQ(candidate,toRecycle);
+ candidate = toRecycleParent; //but now we evict the chain and in
+ //doing so recycle the child
+ }
+ //if insertOrReplace was called during move
+ //then candidate will not be accessible (failed replace during tryEvict)
+ // - therefore this was why we failed to
+ // evict to the next tier and insertOrReplace
+ // will remove from NVM cache
+ //however, if candidate is accessible
+ //that means the allocation in the next
+ //tier failed - so we will continue to
+ //evict the item to NVM cache
+ bool failedToReplace = !candidate->isAccessible();
+ if (!token.isValid() && !failedToReplace) {
+ token = createPutToken(*candidate);
+ }
+ // tryEvictToNextMemoryTier can fail if:
+ // a) allocation of the new item fails in that case,
+ // it should be still possible to mark item for eviction.
+ // b) another thread calls insertOrReplace and the item
+ // is no longer accessible
+ //
+ // in case that we are on the last tier, we whould have already marked
+ // as exclusive since we will not be moving the item to the next tier
+ // but rather just evicting all together, no need to
+ // markForEvictionWhenMoving
+ auto ret = (lastTier || isExpired) ? true : candidate->markForEvictionWhenMoving();
+ XDCHECK(ret);
+
+ unlinkItemForEviction(*candidate);
+
+ if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidate)
+ && !failedToReplace) {
+ nvmCache_->put(*candidate, std::move(token));
+ }
+ // wake up any readers that wait for the move to complete
+ // it's safe to do now, as we have the item marked exclusive and
+ // no other reader can be added to the waiters list
+ wakeUpWaiters(*candidate, {});
+ } else {
+ XDCHECK(!evictedToNext->isMarkedForEviction() && !evictedToNext->isMoving());
+ XDCHECK(!candidate->isMarkedForEviction() && !candidate->isMoving());
+ XDCHECK(!candidate->isAccessible());
+ XDCHECK(candidate->getKey() == evictedToNext->getKey());
+
+ (*stats_.numWritebacks)[tid][pid][cid].inc();
+ if (chainedItem) {
+ XDCHECK(toRecycleParent->isMoving());
+ XDCHECK_EQ(evictedToNext->getRefCount(),2u);
+ (*stats_.chainedItemEvictions)[tid][pid][cid].inc();
+ // check if by releasing the item we intend to, we actually
+ // recycle the candidate.
+ auto ret = releaseBackToAllocator(*candidate, RemoveContext::kEviction,
+ /* isNascent */ false, toRecycle);
+ XDCHECK_EQ(ret,ReleaseRes::kRecycled);
+ evictedToNext.reset(); //once we unmark moving threads will try and alloc, drop
+ //the handle now - and refcount will drop to 1
+ auto ref = toRecycleParent->unmarkMoving();
+ if (UNLIKELY(ref == 0)) {
+ wakeUpWaiters(*toRecycleParent,{});
+ const auto res =
+ releaseBackToAllocator(*toRecycleParent, RemoveContext::kNormal, false);
+ XDCHECK(res == ReleaseRes::kReleased);
+ } else {
+ auto parentHandle = acquire(toRecycleParent);
+ if (parentHandle) {
+ wakeUpWaiters(*toRecycleParent,std::move(parentHandle));
+ } //in case where parent handle is null that means some other thread
+ // would have called wakeUpWaiters with null handle and released
+ // parent back to allocator
+ }
+ } else {
+ wakeUpWaiters(*candidate, std::move(evictedToNext));
+ }
+ }
- unlinkItemForEviction(*candidate);
+ XDCHECK(!candidate->isMarkedForEviction() && !candidate->isMoving());
- if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidate)) {
- nvmCache_->put(*candidate, std::move(token));
- }
return {candidate, toRecycle};
}
template
typename CacheAllocator::Item*
-CacheAllocator::findEviction(PoolId pid, ClassId cid) {
+CacheAllocator::findEviction(TierId tid, PoolId pid, ClassId cid) {
// Keep searching for a candidate until we were able to evict it
// or until the search limit has been exhausted
unsigned int searchTries = 0;
while (config_.evictionSearchTries == 0 ||
config_.evictionSearchTries > searchTries) {
- auto [candidate, toRecycle] = getNextCandidate(pid, cid, searchTries);
+ auto [candidate, toRecycle] = getNextCandidate(tid, pid, cid, searchTries);
// Reached the end of the eviction queue but doulen't find a candidate,
// start again.
@@ -1361,9 +1738,9 @@ CacheAllocator::findEviction(PoolId pid, ClassId cid) {
// NULL. If `ref` == 0 then it means that we are the last holder of
// that item.
if (candidate->hasChainedItem()) {
- (*stats_.chainedItemEvictions)[pid][cid].inc();
+ (*stats_.chainedItemEvictions)[tid][pid][cid].inc();
} else {
- (*stats_.regularItemEvictions)[pid][cid].inc();
+ (*stats_.regularItemEvictions)[tid][pid][cid].inc();
}
if (auto eventTracker = getEventTracker()) {
@@ -1372,6 +1749,7 @@ CacheAllocator::findEviction(PoolId pid, ClassId cid) {
candidate->getConfiguredTTL().count());
}
+ XDCHECK(!candidate->isChainedItem());
// check if by releasing the item we intend to, we actually
// recycle the candidate.
auto ret = releaseBackToAllocator(*candidate, RemoveContext::kEviction,
@@ -1431,6 +1809,117 @@ bool CacheAllocator::shouldWriteToNvmCacheExclusive(
return true;
}
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::tryEvictToNextMemoryTier(
+ TierId tid, PoolId pid, Item& item, bool fromBgThread) {
+
+ TierId nextTier = tid; // TODO - calculate this based on some admission policy
+ while (++nextTier < getNumTiers()) { // try to evict down to the next memory tiers
+ // always evict item from the nextTier to make room for new item
+ bool evict = true;
+
+ // allocateInternal might trigger another eviction
+ WriteHandle newItemHdl{};
+ Item* parentItem;
+ bool chainedItem = false;
+ if(item.isChainedItem()) {
+ chainedItem = true;
+ parentItem = &item.asChainedItem().getParentItem(compressor_);
+ XDCHECK(parentItem->isMoving());
+ XDCHECK(item.isChainedItem() && item.getRefCount() == 1);
+ XDCHECK_EQ(0, parentItem->getRefCount());
+ newItemHdl = allocateChainedItemInternalTier(*parentItem,
+ item.getSize(),
+ nextTier);
+ } else {
+ // this assert can fail if parent changed
+ XDCHECK(item.isMoving());
+ XDCHECK(item.getRefCount() == 0);
+ newItemHdl = allocateInternalTier(nextTier, pid,
+ item.getKey(),
+ item.getSize(),
+ item.getCreationTime(),
+ item.getExpiryTime(),
+ fromBgThread,
+ evict);
+ }
+
+ if (newItemHdl) {
+ XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
+ bool moveSuccess = chainedItem
+ ? moveChainedItem(item.asChainedItem(),
+ newItemHdl, *parentItem)
+ : moveRegularItem(item, newItemHdl);
+ if (!moveSuccess) {
+ return WriteHandle{};
+ }
+ if (!chainedItem) {
+ XDCHECK_EQ(newItemHdl->getKey(),item.getKey());
+ item.unmarkMoving();
+ }
+ return newItemHdl;
+ } else {
+ return WriteHandle{};
+ }
+ }
+
+ return {};
+}
+
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::tryEvictToNextMemoryTier(Item& item, bool fromBgThread) {
+ auto tid = getTierId(item);
+ auto pid = allocator_[tid]->getAllocInfo(item.getMemory()).poolId;
+ return tryEvictToNextMemoryTier(tid, pid, item, fromBgThread);
+}
+
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::tryPromoteToNextMemoryTier(
+ TierId tid, PoolId pid, Item& item, bool fromBgThread) {
+ if(item.isExpired()) { return {}; }
+ TierId nextTier = tid;
+ while (nextTier > 0) { // try to evict down to the next memory tiers
+ auto toPromoteTier = nextTier - 1;
+ --nextTier;
+
+ // always evict item from the toPromoteTier to make room for new item
+ bool evict = true;
+
+ // allocateInternal might trigger another eviction
+ auto newItemHdl = allocateInternalTier(toPromoteTier, pid,
+ item.getKey(),
+ item.getSize(),
+ item.getCreationTime(),
+ item.getExpiryTime(),
+ fromBgThread,
+ true);
+
+ if (newItemHdl) {
+ XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
+ if (!moveRegularItem(item, newItemHdl)) {
+ return WriteHandle{};
+ }
+ item.unmarkMoving();
+ return newItemHdl;
+ } else {
+ return WriteHandle{};
+ }
+ }
+
+ return {};
+}
+
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::tryPromoteToNextMemoryTier(Item& item, bool fromBgThread) {
+ auto tid = getTierId(item);
+ auto pid = allocator_[tid]->getAllocInfo(item.getMemory()).poolId;
+ return tryPromoteToNextMemoryTier(tid, pid, item, fromBgThread);
+}
+
template
typename CacheAllocator::RemoveRes
CacheAllocator::remove(typename Item::Key key) {
@@ -1631,21 +2120,57 @@ void CacheAllocator::invalidateNvm(Item& item) {
}
}
+template
+TierId
+CacheAllocator::getTierId(const Item& item) const {
+ return getTierId(item.getMemory());
+}
+
+template
+TierId
+CacheAllocator::getTierId(const void* ptr) const {
+ for (TierId tid = 0; tid < getNumTiers(); tid++) {
+ if (allocator_[tid]->isMemoryInAllocator(ptr))
+ return tid;
+ }
+
+ throw std::invalid_argument("Item does not belong to any tier!");
+}
+
template
typename CacheAllocator::MMContainer&
CacheAllocator::getMMContainer(const Item& item) const noexcept {
+ const auto tid = getTierId(item);
const auto allocInfo =
- allocator_->getAllocInfo(static_cast(&item));
- return getMMContainer(allocInfo.poolId, allocInfo.classId);
+ allocator_[tid]->getAllocInfo(static_cast(&item));
+ return getMMContainer(tid, allocInfo.poolId, allocInfo.classId);
}
template
typename CacheAllocator::MMContainer&
-CacheAllocator::getMMContainer(PoolId pid,
+CacheAllocator::getMMContainer(TierId tid,
+ PoolId pid,
ClassId cid) const noexcept {
- XDCHECK_LT(static_cast(pid), mmContainers_.size());
- XDCHECK_LT(static_cast(cid), mmContainers_[pid].size());
- return *mmContainers_[pid][cid];
+ XDCHECK_LT(static_cast(tid), mmContainers_.size());
+ XDCHECK_LT(static_cast(pid), mmContainers_[tid].size());
+ XDCHECK_LT(static_cast(cid), mmContainers_[tid][pid].size());
+ return *mmContainers_[tid][pid][cid];
+}
+
+template
+MMContainerStat CacheAllocator::getMMContainerStat(
+ TierId tid, PoolId pid, ClassId cid) const noexcept {
+ if(static_cast(tid) >= mmContainers_.size()) {
+ return MMContainerStat{};
+ }
+ if (static_cast(pid) >= mmContainers_[tid].size()) {
+ return MMContainerStat{};
+ }
+ if (static_cast(cid) >= mmContainers_[tid][pid].size()) {
+ return MMContainerStat{};
+ }
+ return mmContainers_[tid][pid][cid] ? mmContainers_[tid][pid][cid]->getStats()
+ : MMContainerStat{};
}
template
@@ -1839,23 +2364,25 @@ void CacheAllocator::markUseful(const ReadHandle& handle,
template
bool CacheAllocator::recordAccessInMMContainer(Item& item,
AccessMode mode) {
+ const auto tid = getTierId(item);
const auto allocInfo =
- allocator_->getAllocInfo(static_cast(&item));
- (*stats_.cacheHits)[allocInfo.poolId][allocInfo.classId].inc();
+ allocator_[tid]->getAllocInfo(static_cast(&item));
+ (*stats_.cacheHits)[tid][allocInfo.poolId][allocInfo.classId].inc();
// track recently accessed items if needed
if (UNLIKELY(config_.trackRecentItemsForDump)) {
ring_->trackItem(reinterpret_cast(&item), item.getSize());
}
- auto& mmContainer = getMMContainer(allocInfo.poolId, allocInfo.classId);
+ auto& mmContainer = getMMContainer(tid, allocInfo.poolId, allocInfo.classId);
return mmContainer.recordAccess(item, mode);
}
template
uint32_t CacheAllocator::getUsableSize(const Item& item) const {
+ const auto tid = getTierId(item);
const auto allocSize =
- allocator_->getAllocInfo(static_cast