Skip to content

Commit 7390167

Browse files
committed
avoid meta/data shrink when dnode is over quota
Fix: #17487 When iterating over millions of files, dnode cache grows almost unbounded consuming nearly all available ARC memory and pushing out valuable metadata and data. This is probably due to the kernel not releasing its dentries and inodes caches, keeping dnodes pinned and unable to be pruned. This patch avoid shrinking metadata and data when dnode is over quota, forcing the kernel to drop its caches and, in turn, enabling the zfs shrinker thread to prune the now-unpinned dnodes. Signed-off-by: Gionatan Danti <[email protected]>
1 parent 4c2a7f8 commit 7390167

File tree

1 file changed

+10
-29
lines changed

1 file changed

+10
-29
lines changed

module/zfs/arc.c

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4426,18 +4426,6 @@ arc_evict_adj(uint64_t frac, uint64_t total, uint64_t up, uint64_t down,
44264426
return (frac + up - down);
44274427
}
44284428

4429-
/*
4430-
* Calculate (x * multiplier / divisor) without unnecesary overflows.
4431-
*/
4432-
static uint64_t
4433-
arc_mf(uint64_t x, uint64_t multiplier, uint64_t divisor)
4434-
{
4435-
uint64_t q = (x / divisor);
4436-
uint64_t r = (x % divisor);
4437-
4438-
return ((q * multiplier) + ((r * multiplier) / divisor));
4439-
}
4440-
44414429
/*
44424430
* Evict buffers from the cache, such that arcstat_size is capped by arc_c.
44434431
*/
@@ -4485,29 +4473,17 @@ arc_evict(void)
44854473
uint64_t ac = arc_c;
44864474
int64_t wt = t - (asize - ac);
44874475

4488-
/*
4489-
* Try to reduce pinned dnodes if more than 3/4 of wanted metadata
4490-
* target is not evictable or if they go over arc_dnode_limit.
4491-
*/
4476+
/* Avoid meta/data shrink to force dentries and inodes reclaim. */
44924477
int64_t prune = 0;
44934478
int64_t dn = wmsum_value(&arc_sums.arcstat_dnode_size);
4494-
int64_t nem = zfs_refcount_count(&arc_mru->arcs_size[ARC_BUFC_METADATA])
4495-
+ zfs_refcount_count(&arc_mfu->arcs_size[ARC_BUFC_METADATA])
4496-
- zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA])
4497-
- zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]);
4498-
w = wt * (int64_t)(arc_meta >> 16) >> 16;
4499-
if (nem > w * 3 / 4) {
4500-
prune = dn / sizeof (dnode_t) *
4501-
zfs_arc_dnode_reduce_percent / 100;
4502-
if (nem < w && w > 4)
4503-
prune = arc_mf(prune, nem - w * 3 / 4, w / 4);
4504-
}
45054479
if (dn > arc_dnode_limit) {
45064480
prune = MAX(prune, (dn - arc_dnode_limit) / sizeof (dnode_t) *
4507-
zfs_arc_dnode_reduce_percent / 100);
4481+
zfs_arc_dnode_reduce_percent / 100);
45084482
}
4509-
if (prune > 0)
4483+
if (prune > 0) {
45104484
arc_prune_async(prune);
4485+
return 0;
4486+
}
45114487

45124488
/* Evict MRU metadata. */
45134489
w = wt * (int64_t)(arc_meta * arc_pm >> 48) >> 16;
@@ -4698,6 +4674,11 @@ arc_async_flush_guid_inuse(uint64_t spa_guid)
46984674
uint64_t
46994675
arc_reduce_target_size(uint64_t to_free)
47004676
{
4677+
/* Avoid meta/data shrink to force dentries and inodes reclaim. */
4678+
int64_t dn = wmsum_value(&arc_sums.arcstat_dnode_size);
4679+
if (dn > arc_dnode_limit)
4680+
return 0;
4681+
47014682
/*
47024683
* Get the actual arc size. Even if we don't need it, this updates
47034684
* the aggsum lower bound estimate for arc_is_overflowing().

0 commit comments

Comments
 (0)