From 5fe89d85c35ee788d9974a0442634f4be0ccd2e4 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sun, 19 Jan 2025 21:13:21 -0500 Subject: [PATCH] extent scan: make sure we run every extent crawler once per transaction There's a pathological case where all of the extent scan crawlers except one are at the end of a crawl cycle, but the one crawler that is still running is keeping the Task queue full. The result is that bees never starts the other extent scan crawlers, because the queue is always full at the instant a new transid triggers the start of a new scan. That's bad because it will result in bees falling behind when new data from the inactive size tiers appears. To fix this, check for throttling _after_ creating at least one scan task in each crawler. That will keep the crawlers running, and possibly allow them to claw back some space in the Task queue. It slightly overcommits the Task queue, so there will be a few more Tasks than nominally allowed. Also (re)introduce some hysteresis in the queue size limit and reduce it a little, so that bees isn't continually stopping and restarting crawls every time one task is created or completed, and so that we stay under the configured Task limit despite overcommitting. Signed-off-by: Zygo Blaxell --- src/bees-roots.cc | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 0c2dfb0..76a3357 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -538,8 +538,9 @@ should_throttle() // If there's not too many entries in the queue, restart the scan task const auto instance_count = Task::instance_count(); const auto instance_limit = BEES_MAX_EXTENT_REF_COUNT; - const bool queue_empty = s_throttled && instance_count < instance_limit; - const bool queue_full = !s_throttled && instance_count > instance_limit; + // Add some hysteresis so that we aren't constantly flipping throttle on and off + const bool queue_empty = s_throttled && instance_count < instance_limit * .90; + const bool queue_full = !s_throttled && instance_count > instance_limit * .99; if (queue_full) { BEESLOGDEBUG("Throttling crawl at " << instance_count << " tasks"); s_throttled = true; @@ -761,8 +762,6 @@ BeesScanModeExtent::scan() { BEESTRACE("bsm scan"); - if (should_throttle()) return; - unique_lock lock(m_mutex); const auto task_map_copy = m_task_map; lock.unlock(); @@ -778,8 +777,6 @@ BeesScanModeExtent::map_next_extent(uint64_t const subvol) { BEESTRACE("map_next_extent " << subvol); - if (should_throttle()) return; - size_t discard_count = 0; size_t gen_low_count = 0; size_t gen_high_count = 0; @@ -902,8 +899,10 @@ BeesScanModeExtent::map_next_extent(uint64_t const subvol) << " time " << crawl_time << " subvol " << subvol); } - // We did something! Get in line to run again - Task::current_task().idle(); + // We did something! Get in line to run again...unless we're throttled + if (!should_throttle()) { + Task::current_task().idle(); + } return; }