1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 13:25:45 +02:00

extent scan: make sure we run every extent crawler once per transaction

There's a pathological case where all of the extent scan crawlers except
one are at the end of a crawl cycle, but the one crawler that is still
running is keeping the Task queue full.  The result is that bees never
starts the other extent scan crawlers, because the queue is always
full at the instant a new transid triggers the start of a new scan.
That's bad because it will result in bees falling behind when new data
from the inactive size tiers appears.

To fix this, check for throttling _after_ creating at least one scan task
in each crawler.  That will keep the crawlers running, and possibly allow
them to claw back some space in the Task queue.  It slightly overcommits
the Task queue, so there will be a few more Tasks than nominally allowed.

Also (re)introduce some hysteresis in the queue size limit and reduce it
a little, so that bees isn't continually stopping and restarting crawls
every time one task is created or completed, and so that we stay under
the configured Task limit despite overcommitting.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2025-01-19 21:13:21 -05:00
parent a2b3e1e0c2
commit 5fe89d85c3

View File

@ -538,8 +538,9 @@ should_throttle()
// If there's not too many entries in the queue, restart the scan task
const auto instance_count = Task::instance_count();
const auto instance_limit = BEES_MAX_EXTENT_REF_COUNT;
const bool queue_empty = s_throttled && instance_count < instance_limit;
const bool queue_full = !s_throttled && instance_count > instance_limit;
// Add some hysteresis so that we aren't constantly flipping throttle on and off
const bool queue_empty = s_throttled && instance_count < instance_limit * .90;
const bool queue_full = !s_throttled && instance_count > instance_limit * .99;
if (queue_full) {
BEESLOGDEBUG("Throttling crawl at " << instance_count << " tasks");
s_throttled = true;
@ -761,8 +762,6 @@ BeesScanModeExtent::scan()
{
BEESTRACE("bsm scan");
if (should_throttle()) return;
unique_lock<mutex> lock(m_mutex);
const auto task_map_copy = m_task_map;
lock.unlock();
@ -778,8 +777,6 @@ BeesScanModeExtent::map_next_extent(uint64_t const subvol)
{
BEESTRACE("map_next_extent " << subvol);
if (should_throttle()) return;
size_t discard_count = 0;
size_t gen_low_count = 0;
size_t gen_high_count = 0;
@ -902,8 +899,10 @@ BeesScanModeExtent::map_next_extent(uint64_t const subvol)
<< " time " << crawl_time << " subvol " << subvol);
}
// We did something! Get in line to run again
Task::current_task().idle();
// We did something! Get in line to run again...unless we're throttled
if (!should_throttle()) {
Task::current_task().idle();
}
return;
}