1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-18 05:45:45 +02:00

extent scan: refactor BeesScanMode so derived classes decide their own scan scheduling

BeesScanModeExtent uses six scan Tasks instead of one, which leads
to awkwardness like the do_scan method to tell crawl_roots how to do
what it shouldn't need to know how to do anyway.

Move the crawl_roots logic into the ::scan methods themselves.

This also deletes the very popular "crawl_more ran out of data" message.
Extent scan explicitly indicates when a scan is complete, so there's
no longer a need to fish this message out of the log.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2024-11-29 23:57:15 -05:00
parent 1e139d0ccc
commit 8080abac97
2 changed files with 54 additions and 110 deletions

View File

@ -99,12 +99,14 @@ class BeesScanMode : public enable_shared_from_this<BeesScanMode> {
protected: protected:
shared_ptr<BeesContext> m_ctx; shared_ptr<BeesContext> m_ctx;
shared_ptr<BeesRoots> m_roots; shared_ptr<BeesRoots> m_roots;
mutex m_scan_task_mutex;
Task m_scan_task;
bool crawl_batch(const shared_ptr<BeesCrawl>& crawl); bool crawl_batch(const shared_ptr<BeesCrawl>& crawl);
virtual void start_scan();
virtual void scan() = 0;
public: public:
virtual ~BeesScanMode() {} virtual ~BeesScanMode() {}
BeesScanMode(const shared_ptr<BeesRoots>& roots, const shared_ptr<BeesContext>& ctx); BeesScanMode(const shared_ptr<BeesRoots>& roots, const shared_ptr<BeesContext>& ctx);
virtual bool do_scan() = 0;
virtual bool scan() = 0;
using CrawlMap = decltype(BeesRoots::m_root_crawl_map); using CrawlMap = decltype(BeesRoots::m_root_crawl_map);
virtual void next_transid(const CrawlMap &crawl_map) = 0; virtual void next_transid(const CrawlMap &crawl_map) = 0;
virtual const char *ntoa() const = 0; virtual const char *ntoa() const = 0;
@ -116,6 +118,21 @@ BeesScanMode::BeesScanMode(const shared_ptr<BeesRoots>& roots, const shared_ptr<
{ {
} }
void
BeesScanMode::start_scan()
{
unique_lock<mutex> lock(m_scan_task_mutex);
if (!m_scan_task) {
const auto st = shared_from_this();
ostringstream oss;
oss << "scan_" << ntoa();
m_scan_task = Task(oss.str(), [st] {
st->scan();
});
}
m_scan_task.idle();
}
bool bool
BeesScanMode::crawl_batch(const shared_ptr<BeesCrawl>& crawl) BeesScanMode::crawl_batch(const shared_ptr<BeesCrawl>& crawl)
{ {
@ -129,11 +146,10 @@ class BeesScanModeLockstep : public BeesScanMode {
using Map = map<SortKey, CrawlMap::mapped_type>; using Map = map<SortKey, CrawlMap::mapped_type>;
mutex m_mutex; mutex m_mutex;
shared_ptr<Map> m_sorted; shared_ptr<Map> m_sorted;
void scan() override;
public: public:
using BeesScanMode::BeesScanMode; using BeesScanMode::BeesScanMode;
~BeesScanModeLockstep() override {} ~BeesScanModeLockstep() override {}
bool do_scan() override;
bool scan() override;
void next_transid(const CrawlMap &crawl_map) override; void next_transid(const CrawlMap &crawl_map) override;
const char *ntoa() const override; const char *ntoa() const override;
}; };
@ -144,13 +160,7 @@ BeesScanModeLockstep::ntoa() const
return "LOCKSTEP"; return "LOCKSTEP";
} }
bool void
BeesScanModeLockstep::do_scan()
{
return true;
}
bool
BeesScanModeLockstep::scan() BeesScanModeLockstep::scan()
{ {
unique_lock<mutex> lock(m_mutex); unique_lock<mutex> lock(m_mutex);
@ -158,7 +168,7 @@ BeesScanModeLockstep::scan()
lock.unlock(); lock.unlock();
if (!hold_sorted) { if (!hold_sorted) {
BEESLOGINFO("called Lockstep scan without a sorted map"); BEESLOGINFO("called Lockstep scan without a sorted map");
return false; return;
} }
auto &sorted = *hold_sorted; auto &sorted = *hold_sorted;
while (!sorted.empty()) { while (!sorted.empty()) {
@ -173,10 +183,10 @@ BeesScanModeLockstep::scan()
const auto insert_rv = sorted.insert(new_value); const auto insert_rv = sorted.insert(new_value);
THROW_CHECK0(runtime_error, insert_rv.second); THROW_CHECK0(runtime_error, insert_rv.second);
} }
return true; Task::current_task().idle();
return;
} }
} }
return false;
} }
void void
@ -196,6 +206,8 @@ BeesScanModeLockstep::next_transid(const CrawlMap &crawl_map)
} }
unique_lock<mutex> lock(m_mutex); unique_lock<mutex> lock(m_mutex);
swap(m_sorted, new_map); swap(m_sorted, new_map);
lock.unlock();
start_scan();
} }
/// Scan each subvol in round-robin with no synchronization. /// Scan each subvol in round-robin with no synchronization.
@ -204,11 +216,10 @@ class BeesScanModeIndependent : public BeesScanMode {
using List = list<CrawlMap::mapped_type>; using List = list<CrawlMap::mapped_type>;
mutex m_mutex; mutex m_mutex;
shared_ptr<List> m_subvols; shared_ptr<List> m_subvols;
void scan() override;
public: public:
using BeesScanMode::BeesScanMode; using BeesScanMode::BeesScanMode;
~BeesScanModeIndependent() override {} ~BeesScanModeIndependent() override {}
bool do_scan() override;
bool scan() override;
void next_transid(const CrawlMap &crawl_map) override; void next_transid(const CrawlMap &crawl_map) override;
const char *ntoa() const override; const char *ntoa() const override;
}; };
@ -219,13 +230,7 @@ BeesScanModeIndependent::ntoa() const
return "INDEPENDENT"; return "INDEPENDENT";
} }
bool void
BeesScanModeIndependent::do_scan()
{
return true;
}
bool
BeesScanModeIndependent::scan() BeesScanModeIndependent::scan()
{ {
unique_lock<mutex> lock(m_mutex); unique_lock<mutex> lock(m_mutex);
@ -233,7 +238,7 @@ BeesScanModeIndependent::scan()
lock.unlock(); lock.unlock();
if (!hold_subvols) { if (!hold_subvols) {
BEESLOGINFO("called Independent scan without a subvol list"); BEESLOGINFO("called Independent scan without a subvol list");
return false; return;
} }
auto &subvols = *hold_subvols; auto &subvols = *hold_subvols;
while (!subvols.empty()) { while (!subvols.empty()) {
@ -242,10 +247,10 @@ BeesScanModeIndependent::scan()
const bool rv = crawl_batch(this_crawl); const bool rv = crawl_batch(this_crawl);
if (rv) { if (rv) {
subvols.push_back(this_crawl); subvols.push_back(this_crawl);
return true; Task::current_task().idle();
return;
} }
} }
return false;
} }
void void
@ -262,6 +267,8 @@ BeesScanModeIndependent::next_transid(const CrawlMap &crawl_map)
} }
unique_lock<mutex> lock(m_mutex); unique_lock<mutex> lock(m_mutex);
swap(m_subvols, new_subvols); swap(m_subvols, new_subvols);
lock.unlock();
start_scan();
} }
/// Scan each subvol completely, in numerical order, before moving on to the next. /// Scan each subvol completely, in numerical order, before moving on to the next.
@ -272,11 +279,10 @@ class BeesScanModeSequential : public BeesScanMode {
using Map = map<SortKey, CrawlMap::mapped_type>; using Map = map<SortKey, CrawlMap::mapped_type>;
mutex m_mutex; mutex m_mutex;
shared_ptr<Map> m_sorted; shared_ptr<Map> m_sorted;
void scan() override;
public: public:
using BeesScanMode::BeesScanMode; using BeesScanMode::BeesScanMode;
~BeesScanModeSequential() override {} ~BeesScanModeSequential() override {}
bool do_scan() override;
bool scan() override;
void next_transid(const CrawlMap &crawl_map) override; void next_transid(const CrawlMap &crawl_map) override;
const char *ntoa() const override; const char *ntoa() const override;
}; };
@ -287,13 +293,7 @@ BeesScanModeSequential::ntoa() const
return "SEQUENTIAL"; return "SEQUENTIAL";
} }
bool void
BeesScanModeSequential::do_scan()
{
return true;
}
bool
BeesScanModeSequential::scan() BeesScanModeSequential::scan()
{ {
unique_lock<mutex> lock(m_mutex); unique_lock<mutex> lock(m_mutex);
@ -301,19 +301,19 @@ BeesScanModeSequential::scan()
lock.unlock(); lock.unlock();
if (!hold_sorted) { if (!hold_sorted) {
BEESLOGINFO("called Sequential scan without a sorted map"); BEESLOGINFO("called Sequential scan without a sorted map");
return false; return;
} }
auto &sorted = *hold_sorted; auto &sorted = *hold_sorted;
while (!sorted.empty()) { while (!sorted.empty()) {
const auto this_crawl = sorted.begin()->second; const auto this_crawl = sorted.begin()->second;
const bool rv = crawl_batch(this_crawl); const bool rv = crawl_batch(this_crawl);
if (rv) { if (rv) {
return true; Task::current_task().idle();
return;
} else { } else {
sorted.erase(sorted.begin()); sorted.erase(sorted.begin());
} }
} }
return false;
} }
void void
@ -333,6 +333,8 @@ BeesScanModeSequential::next_transid(const CrawlMap &crawl_map)
} }
unique_lock<mutex> lock(m_mutex); unique_lock<mutex> lock(m_mutex);
swap(m_sorted, new_map); swap(m_sorted, new_map);
lock.unlock();
start_scan();
} }
/// Scan the most recently completely scanned subvols first. Keeps recently added data /// Scan the most recently completely scanned subvols first. Keeps recently added data
@ -347,11 +349,10 @@ class BeesScanModeRecent : public BeesScanMode {
using Map = map<SortKey, list<CrawlMap::mapped_type>>; using Map = map<SortKey, list<CrawlMap::mapped_type>>;
mutex m_mutex; mutex m_mutex;
shared_ptr<Map> m_sorted; shared_ptr<Map> m_sorted;
void scan() override;
public: public:
using BeesScanMode::BeesScanMode; using BeesScanMode::BeesScanMode;
~BeesScanModeRecent() override {} ~BeesScanModeRecent() override {}
bool do_scan() override;
bool scan() override;
void next_transid(const CrawlMap &crawl_map) override; void next_transid(const CrawlMap &crawl_map) override;
const char *ntoa() const override; const char *ntoa() const override;
}; };
@ -362,13 +363,7 @@ BeesScanModeRecent::ntoa() const
return "RECENT"; return "RECENT";
} }
bool void
BeesScanModeRecent::do_scan()
{
return true;
}
bool
BeesScanModeRecent::scan() BeesScanModeRecent::scan()
{ {
unique_lock<mutex> lock(m_mutex); unique_lock<mutex> lock(m_mutex);
@ -376,7 +371,7 @@ BeesScanModeRecent::scan()
lock.unlock(); lock.unlock();
if (!hold_sorted) { if (!hold_sorted) {
BEESLOGINFO("called Recent scan without a sorted map"); BEESLOGINFO("called Recent scan without a sorted map");
return false; return;
} }
auto &sorted = *hold_sorted; auto &sorted = *hold_sorted;
while (!sorted.empty()) { while (!sorted.empty()) {
@ -389,11 +384,11 @@ BeesScanModeRecent::scan()
const bool rv = crawl_batch(this_crawl); const bool rv = crawl_batch(this_crawl);
if (rv) { if (rv) {
this_list.push_back(this_crawl); this_list.push_back(this_crawl);
return true; Task::current_task().idle();
return;
} }
} }
} }
return false;
} }
void void
@ -419,6 +414,7 @@ BeesScanModeRecent::next_transid(const CrawlMap &crawl_map)
} }
unique_lock<mutex> lock(m_mutex); unique_lock<mutex> lock(m_mutex);
swap(m_sorted, new_map); swap(m_sorted, new_map);
start_scan();
} }
/// Scan the extent tree and submit each extent's references in a single batch. /// Scan the extent tree and submit each extent's references in a single batch.
@ -445,7 +441,7 @@ friend ostream& operator<<(ostream &os, const BeesScanModeExtent::MagicCrawl& ma
friend ostream& operator<<(ostream &os, const BeesScanModeExtent::ExtentRef& todo); friend ostream& operator<<(ostream &os, const BeesScanModeExtent::ExtentRef& todo);
void init_tasks(); void init_tasks();
void run_tasks(); void scan() override;
void map_next_extent(uint64_t subvol); void map_next_extent(uint64_t subvol);
bool crawl_one_extent(const ExtentRef &bior); bool crawl_one_extent(const ExtentRef &bior);
void create_extent_map(const uint64_t bytenr, const ProgressTracker<BeesCrawlState>::ProgressHolder& m_hold, uint64_t len); void create_extent_map(const uint64_t bytenr, const ProgressTracker<BeesCrawlState>::ProgressHolder& m_hold, uint64_t len);
@ -453,8 +449,6 @@ friend ostream& operator<<(ostream &os, const BeesScanModeExtent::ExtentRef& tod
public: public:
BeesScanModeExtent(const shared_ptr<BeesRoots>& roots, const shared_ptr<BeesContext>& ctx); BeesScanModeExtent(const shared_ptr<BeesRoots>& roots, const shared_ptr<BeesContext>& ctx);
~BeesScanModeExtent() override {} ~BeesScanModeExtent() override {}
bool do_scan() override;
bool scan() override;
void next_transid(const CrawlMap &crawl_map) override; void next_transid(const CrawlMap &crawl_map) override;
const char *ntoa() const override; const char *ntoa() const override;
}; };
@ -747,13 +741,16 @@ BeesScanModeExtent::init_tasks()
} }
void void
BeesScanModeExtent::run_tasks() BeesScanModeExtent::scan()
{ {
if (should_throttle()) return; if (should_throttle()) return;
unique_lock<mutex> lock(m_mutex); unique_lock<mutex> lock(m_mutex);
const auto task_map_copy = m_task_map;
lock.unlock();
// Good to go, start everything running // Good to go, start everything running
for (const auto &i : m_task_map) { for (const auto &i : task_map_copy) {
i.second.idle(); i.second.idle();
} }
} }
@ -883,19 +880,6 @@ BeesScanModeExtent::map_next_extent(uint64_t const subvol)
BEESCOUNT(crawl_done); BEESCOUNT(crawl_done);
} }
bool
BeesScanModeExtent::do_scan()
{
return false;
}
bool
BeesScanModeExtent::scan()
{
// This is now driven directly from next_transid
return false;
}
void void
BeesScanModeExtent::next_transid(const CrawlMap &crawl_map) BeesScanModeExtent::next_transid(const CrawlMap &crawl_map)
{ {
@ -917,7 +901,7 @@ BeesScanModeExtent::next_transid(const CrawlMap &crawl_map)
} }
// Kick off tasks if they aren't already running // Kick off tasks if they aren't already running
run_tasks(); start_scan();
// Swap in the new crawl map with freshly undeferred crawlers // Swap in the new crawl map with freshly undeferred crawlers
auto crawl_map_copy = crawl_map; auto crawl_map_copy = crawl_map;
@ -1424,38 +1408,6 @@ BeesRoots::crawl_batch(shared_ptr<BeesCrawl> this_crawl)
return true; return true;
} }
bool
BeesRoots::crawl_roots()
{
BEESNOTE("Crawling roots");
BEESTRACE("Crawling roots");
unique_lock<mutex> lock(m_mutex);
const auto hold_scanner = m_scanner;
lock.unlock();
THROW_CHECK0(runtime_error, hold_scanner);
BEESNOTE("Scanning roots in " << hold_scanner->ntoa() << " mode");
BEESTRACE("scanning roots in " << hold_scanner->ntoa() << " mode");
// Clumsy adapter for legacy scan modes
if (!hold_scanner->do_scan()) {
return false;
}
if (hold_scanner->scan()) {
return true;
}
BEESCOUNT(crawl_done);
const auto ran_out_time = m_crawl_timer.lap();
BEESLOGINFO("crawl_more ran out of data after " << ran_out_time << "s");
// Do not run again
return false;
}
void void
BeesRoots::clear_caches() BeesRoots::clear_caches()
{ {
@ -1470,18 +1422,11 @@ BeesRoots::crawl_thread()
// Create the Task that does the crawling // Create the Task that does the crawling
const auto shared_this = shared_from_this(); const auto shared_this = shared_from_this();
const auto crawl_task = Task("crawl_more", [shared_this]() { const auto crawl_new = Task("crawl_new", [shared_this]() {
BEESTRACE("crawl_more " << shared_this);
if (shared_this->crawl_roots()) {
Task::current_task().idle();
}
});
const auto crawl_new = Task("crawl_new", [shared_this, crawl_task]() {
BEESTRACE("crawl_new " << shared_this); BEESTRACE("crawl_new " << shared_this);
catch_all([&]() { catch_all([&]() {
shared_this->insert_new_crawl(); shared_this->insert_new_crawl();
}); });
crawl_task.run();
}); });
// Monitor transid_max and wake up roots when it changes // Monitor transid_max and wake up roots when it changes

View File

@ -567,7 +567,6 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
void state_load(); void state_load();
ostream &state_to_stream(ostream &os); ostream &state_to_stream(ostream &os);
void state_save(); void state_save();
bool crawl_roots();
string crawl_state_filename() const; string crawl_state_filename() const;
void crawl_state_set_dirty(); void crawl_state_set_dirty();
void crawl_state_erase(const BeesCrawlState &bcs); void crawl_state_erase(const BeesCrawlState &bcs);