1
0
mirror of https://github.com/Zygo/bees.git synced 2025-07-01 00:02:27 +02:00

roots: reimplement scan modes using virtual base and methods

Split each scan mode into two distinct phases:

    1.  A heavy discovery phase, where we search the entire filesystem
    for something (new items in subvol trees in this case).

    2.  A light consuming phase, where we fetch extents to dedupe
    from places that we found in the discovery phase.

Part 1 recomputes the subvol ordering every time there is a new transid.
For some scan modes this computation is quite expensive, far too costly
to pay for every extent, so we do it no more than once per transaction.

Part 2 is run every time a worker thread hits the crawl_more Task.
It simply pulls one extent from the first crawler off a sorted list,
removing the crawler from the list when the crawler runs out of data.

Part 1 creates a new structure and swaps it into place, while Part 2
continues to run using the previous strucuture.  Neither of these
need to block the other, so they don't.

The separate class and base pointer also make it easer to add new scan
modes that are not based on subvol trees or that don't use BeesCrawl.

While we're here, fix up some method visibility in BeesRoots.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell
2022-11-14 13:01:41 -05:00
parent 0dca6f74b0
commit 03f809bf22
2 changed files with 338 additions and 142 deletions

View File

@ -528,6 +528,8 @@ public:
void deferred(bool def_setting);
};
class BeesScanMode;
class BeesRoots : public enable_shared_from_this<BeesRoots> {
shared_ptr<BeesContext> m_ctx;
@ -545,6 +547,8 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
Task m_crawl_task;
bool m_workaround_btrfs_send = false;
shared_ptr<BeesScanMode> m_scanner;
mutex m_tmpfiles_mutex;
map<BeesFileId, Fd> m_tmpfiles;
@ -573,12 +577,10 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
RateEstimator& transid_re();
bool crawl_batch(shared_ptr<BeesCrawl> crawl);
void clear_caches();
void insert_tmpfile(Fd fd);
void erase_tmpfile(Fd fd);
friend class BeesFdCache;
friend class BeesCrawl;
friend class BeesTempFile;
friend class BeesFdCache;
friend class BeesScanMode;
public:
BeesRoots(shared_ptr<BeesContext> ctx);
@ -586,6 +588,9 @@ public:
void stop_request();
void stop_wait();
void insert_tmpfile(Fd fd);
void erase_tmpfile(Fd fd);
Fd open_root(uint64_t root);
Fd open_root_ino(uint64_t root, uint64_t ino);
Fd open_root_ino(const BeesFileId &bfi) { return open_root_ino(bfi.root(), bfi.ino()); }
@ -602,11 +607,6 @@ public:
void set_scan_mode(ScanMode new_mode);
void set_workaround_btrfs_send(bool do_avoid);
private:
ScanMode m_scan_mode = SCAN_MODE_COUNT; // must be set
static string scan_mode_ntoa(ScanMode new_mode);
};
struct BeesHash {