From ba11d733c0d9cac9ddc8f05a818166d3ccf45c36 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Tue, 11 Mar 2025 14:46:40 -0400 Subject: [PATCH] readahead: flush the readahead cache based on time, not extent count If the extent wasn't read in the last second, chances are high that it was evicted from the page cache. If the extents have been evicted from the cache by the time we grow or dedupe them, we'll take a serious performance hit as we read them back in, one page at a time. Use a 5-second delay to match the default writeback interval. Signed-off-by: Zygo Blaxell --- src/bees.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/bees.cc b/src/bees.cc index a4b0b0c..99cd782 100644 --- a/src/bees.cc +++ b/src/bees.cc @@ -228,8 +228,10 @@ bees_readahead_check(int const fd, off_t const offset, size_t const size) auto tup = make_tuple(offset, size, stat_rv.st_dev, stat_rv.st_ino); static mutex s_recent_mutex; static set s_recent; + static Timer s_recent_timer; unique_lock lock(s_recent_mutex); - if (s_recent.size() > BEES_MAX_EXTENT_REF_COUNT) { + if (s_recent_timer.age() > 5.0) { + s_recent_timer.reset(); s_recent.clear(); BEESCOUNT(readahead_clear); }