mirror of
https://github.com/Zygo/bees.git
synced 2025-07-01 00:02:27 +02:00
roots: organize scan workers by inode instead of extent
Split crawlers into two separate Tasks: 1. a Task which locates the next inode with a new data extent. 2. a Task which scans every new extent in that inode. This simplifies some lock contention and execution ordering issues. Files are read sequentially. Workers dynamically scale up or down as needed, without creating thousands of deferred Task objects. Workers obtain inode locks for different inodes in btrfs, so they can work in parallel instead of waiting for each other. This change in behavior comes with new names for the worker Tasks: "crawl_master" is now "crawl_more", the singular Task which creates inode-scanning Tasks. "crawl_<subvol>" is now "crawl_<subvol>_<inode>". Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
17
src/bees.h
17
src/bees.h
@ -100,12 +100,6 @@ const size_t BEES_MAX_EXTENT_REF_COUNT = (16 * 1024 * 1024 / 24) - 1;
|
||||
// How long between hash table histograms
|
||||
const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;
|
||||
|
||||
// Stop growing the work queue after we have this many tasks queued
|
||||
const size_t BEES_MAX_QUEUE_SIZE = 128;
|
||||
|
||||
// Insert this many items before switching to a new subvol
|
||||
const size_t BEES_MAX_CRAWL_BATCH = 128;
|
||||
|
||||
// Wait this many transids between crawls
|
||||
const size_t BEES_TRANSID_FACTOR = 10;
|
||||
|
||||
@ -509,24 +503,27 @@ class BeesCrawl {
|
||||
shared_ptr<BeesContext> m_ctx;
|
||||
|
||||
mutex m_mutex;
|
||||
set<BeesFileRange> m_extents;
|
||||
BtrfsTreeItem m_next_extent_data;
|
||||
bool m_deferred = false;
|
||||
bool m_finished = false;
|
||||
|
||||
mutex m_state_mutex;
|
||||
ProgressTracker<BeesCrawlState> m_state;
|
||||
|
||||
BtrfsTreeObjectFetcher m_btof;
|
||||
|
||||
bool fetch_extents();
|
||||
void fetch_extents_harder();
|
||||
bool next_transid();
|
||||
BeesFileRange bti_to_bfr(const BtrfsTreeItem &bti) const;
|
||||
|
||||
public:
|
||||
BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state);
|
||||
BeesFileRange peek_front();
|
||||
BeesFileRange pop_front();
|
||||
ProgressTracker<BeesCrawlState>::ProgressHolder hold_state(const BeesFileRange &bfr);
|
||||
ProgressTracker<BeesCrawlState>::ProgressHolder hold_state(const BeesCrawlState &bcs);
|
||||
BeesCrawlState get_state_begin();
|
||||
BeesCrawlState get_state_end();
|
||||
BeesCrawlState get_state_end() const;
|
||||
void set_state(const BeesCrawlState &bcs);
|
||||
void deferred(bool def_setting);
|
||||
};
|
||||
@ -574,7 +571,7 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
||||
uint64_t next_root(uint64_t root = 0);
|
||||
void current_state_set(const BeesCrawlState &bcs);
|
||||
RateEstimator& transid_re();
|
||||
size_t crawl_batch(shared_ptr<BeesCrawl> crawl);
|
||||
bool crawl_batch(shared_ptr<BeesCrawl> crawl);
|
||||
void clear_caches();
|
||||
void insert_tmpfile(Fd fd);
|
||||
void erase_tmpfile(Fd fd);
|
||||
|
Reference in New Issue
Block a user