1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 21:35:45 +02:00

extent scan: implement an experimental ordered scan mode

Parallel scan runs each extent size tier in a separate thread.  The
threads compete to process extents within the tier's size range.

Ordered scan processes each extent size tier completely before moving on
to the next.  In theory, this means large extents always get processed
quickly, especially when new ones appear, and the queue does not fill up
with small extents.

In practice, the multi-threaded scanner massively outperforms the
single-threaded scanner, unless the number of worker threads is very
small (i.e. one).

Disable most of the feature for now, but leave the code in place so it
can be easily reactivated for future testing.

Ordered scan introduces a parallelized extent mapper Task.  Keep that in
parallel scan mode, which further enhances the parallelism.  The extent
scan crawl threads now run at 'idle' priority while the map tasks run
at normal priority, so the map tasks don't flood the task queue.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2025-02-03 23:09:32 -05:00
parent 1aea2d2f96
commit aa39bddb2d

View File

@ -538,8 +538,8 @@ friend ostream& operator<<(ostream &os, const BeesScanModeExtent::ExtentRef& tod
SizeTier(const shared_ptr<BeesScanModeExtent> &bsme, const uint64_t subvol, const MagicCrawl &size_range); SizeTier(const shared_ptr<BeesScanModeExtent> &bsme, const uint64_t subvol, const MagicCrawl &size_range);
void set_crawl_and_task(const shared_ptr<BeesCrawl> &crawl); void set_crawl_and_task(const shared_ptr<BeesCrawl> &crawl);
void run_task(); void run_task();
void find_next_extent(); bool find_next_extent();
void create_extent_map(const uint64_t bytenr, const ProgressTracker<BeesCrawlState>::ProgressHolder& m_hold, uint64_t len); void create_extent_map(const uint64_t bytenr, const ProgressTracker<BeesCrawlState>::ProgressHolder& m_hold, uint64_t len, const Task &again);
bool scan_one_ref(const ExtentRef &bior); bool scan_one_ref(const ExtentRef &bior);
shared_ptr<BeesCrawl> crawl() const; shared_ptr<BeesCrawl> crawl() const;
}; };
@ -728,7 +728,7 @@ BeesScanModeExtent::SizeTier::scan_one_ref(const BeesScanModeExtent::ExtentRef &
} }
void void
BeesScanModeExtent::SizeTier::create_extent_map(const uint64_t bytenr, const ProgressTracker<BeesCrawlState>::ProgressHolder& hold, const uint64_t len) BeesScanModeExtent::SizeTier::create_extent_map(const uint64_t bytenr, const ProgressTracker<BeesCrawlState>::ProgressHolder& hold, const uint64_t len, const Task &next_task)
{ {
BEESNOTE("Creating extent map for " << to_hex(bytenr) << " with LOGICAL_INO"); BEESNOTE("Creating extent map for " << to_hex(bytenr) << " with LOGICAL_INO");
BEESTRACE("Creating extent map for " << to_hex(bytenr) << " with LOGICAL_INO"); BEESTRACE("Creating extent map for " << to_hex(bytenr) << " with LOGICAL_INO");
@ -838,8 +838,7 @@ BeesScanModeExtent::SizeTier::create_extent_map(const uint64_t bytenr, const Pro
ostringstream oss; ostringstream oss;
oss << "ref_" << hex << bytenr << "_" << pretty(len) << "_" << dec << refs_list->size(); oss << "ref_" << hex << bytenr << "_" << pretty(len) << "_" << dec << refs_list->size();
const auto bec = shared_from_this(); const auto bec = shared_from_this();
const auto map_task = Task::current_task(); Task crawl_one(oss.str(), [bec, refs_list, next_task]() {
Task crawl_one(oss.str(), [bec, refs_list, map_task]() {
if (!refs_list->empty()) { if (!refs_list->empty()) {
const auto extref = *(refs_list->begin()); const auto extref = *(refs_list->begin());
refs_list->pop_front(); refs_list->pop_front();
@ -854,9 +853,10 @@ BeesScanModeExtent::SizeTier::create_extent_map(const uint64_t bytenr, const Pro
if (refs_list->empty()) { if (refs_list->empty()) {
// We might be throttled and we're about to exit a task, so restart our map task // We might be throttled and we're about to exit a task, so restart our map task
if (!should_throttle()) { if (!should_throttle()) {
map_task.idle(); next_task.idle();
} }
} else { } else {
// Run the next ref
Task::current_task().run(); Task::current_task().run();
} }
}); });
@ -892,19 +892,31 @@ BeesScanModeExtent::init_tasks()
} }
} }
static bool bees_ordered_scan = false;
void void
BeesScanModeExtent::scan() BeesScanModeExtent::scan()
{ {
BEESTRACE("bsm scan"); BEESTRACE("bsm scan");
unique_lock<mutex> lock(m_mutex); unique_lock<mutex> lock(m_mutex);
// Poke all the size tier scan tasks const auto size_tiers_copy = m_size_tiers;
for (const auto &i : m_size_tiers) { lock.unlock();
i.second->run_task();
if (bees_ordered_scan) {
// Ordered scan: Run size tier scan tasks in order until one of them finds a new extent
for (const auto &i : size_tiers_copy) {
if (i.second->find_next_extent()) break;
}
} else {
// Parallel scan: Run all the scan tasks at once
for (const auto &i : size_tiers_copy) {
i.second->run_task();
}
} }
} }
void bool
BeesScanModeExtent::SizeTier::find_next_extent() BeesScanModeExtent::SizeTier::find_next_extent()
{ {
BEESTRACE("find_next_extent " << m_subvol); BEESTRACE("find_next_extent " << m_subvol);
@ -1128,22 +1140,31 @@ BeesScanModeExtent::SizeTier::find_next_extent()
continue; continue;
} }
// Map this extent here to regulate task creation const auto find_next_task = Task::current_task();
// Note next_hold >= this_hold > current_hold after the swap above. // Note next_hold >= this_hold > current_hold after the swap above,
create_extent_map(this_bytenr, m_crawl->hold_state(this_state), this_length); // so the extent map may or may not advance the BeesCrawlState position.
BEESCOUNT(crawl_extent); const auto hold_state = m_crawl->hold_state(this_state);
const auto sft = shared_from_this();
ostringstream oss;
oss << "map_" << to_hex(this_bytenr) << "_" << pretty(this_length);
Task create_map_task(oss.str(), [sft, this_bytenr, hold_state, this_length, find_next_task]() {
sft->create_extent_map(this_bytenr, hold_state, this_length, find_next_task);
BEESCOUNT(crawl_extent);
});
create_map_task.run();
// We did something! Get in line to run again...unless we're throttled // We did something! Get in line to run again...unless we're throttled
if (!should_throttle()) { if (!should_throttle()) {
Task::current_task().idle(); Task::current_task().idle();
} }
return; return true;
} }
// Crawl state is updated by holder destructors // Crawl state is updated by holder destructors
// All crawls done // All crawls done
BEESCOUNT(crawl_done); BEESCOUNT(crawl_done);
return false;
} }
static static