From 0fdae3796220d2a32a2a02ade9fe06a6375211f3 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Wed, 24 Jan 2018 02:03:42 -0500 Subject: [PATCH] roots: use RateEstimator to track transids Make the crawl polling interval more closely track the commit interval on the btrfs filesystem. In the future this will provide opportunities to do things like clear FD caches and stop crawls on deleted subvols, but triggered by transaction commits instead of arbitrary time intervals. Rename the "crawl" thread so it no longer has the same name as the "crawl" task, and repurpose it for dedicated transid polling. Cancel the deletion of crawl_thread and repurpose it to trigger new crawls and wake up the main crawl Task when it runs out of data. Signed-off-by: Zygo Blaxell --- src/bees-roots.cc | 56 ++++++++++++++++++++++++++++++----------------- src/bees.h | 3 ++- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 14de5f2..ed2df25 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -92,9 +92,6 @@ BeesRoots::crawl_state_filename() const void BeesRoots::state_save() { - // Make sure we have a full complement of crawlers - insert_new_crawl(); - BEESNOTE("saving crawl state"); BEESLOGINFO("Saving crawl state"); BEESTOOLONG("Saving crawl state"); @@ -212,6 +209,8 @@ BeesRoots::transid_max() }); } } while (root); + m_transid_re.update(rv); + // BEESLOGDEBUG("RateEstimator transid_max update: " << m_transid_re); return rv; } @@ -304,11 +303,12 @@ BeesRoots::crawl_roots() case SCAN_MODE_COUNT: assert(false); break; } - BEESLOGINFO("Crawl ran out of data after " << m_crawl_timer.lap() << "s, waiting for more..."); BEESCOUNT(crawl_done); - BEESNOTE("idle, waiting for more data"); - lock.lock(); - m_condvar.wait(lock); + + auto want_transid = m_transid_re.count() + 1; + BEESLOGINFO("Crawl ran out of data after " << m_crawl_timer.lap() << "s, waiting for transid " << want_transid << "..."); + BEESNOTE("idle, waiting for transid " << want_transid << ": " << m_transid_re); + m_transid_re.wait_until(want_transid); // Don't count the time we were waiting as part of the crawl time m_crawl_timer.reset(); @@ -317,10 +317,9 @@ BeesRoots::crawl_roots() void BeesRoots::crawl_thread() { - // TODO: get rid of the thread. For now it is a convenient - // way to avoid the weird things that happen when you try to - // shared_from_this() in a constructor. - BEESNOTE("crawling"); + BEESNOTE("creating crawl task"); + + // Start the Task that does the crawling auto shared_this = shared_from_this(); Task("crawl", [shared_this]() { auto tqs = TaskMaster::get_queue_count(); @@ -333,12 +332,25 @@ BeesRoots::crawl_thread() } Task::current_task().run(); }).run(); + + // Monitor transid_max and wake up roots when it changes + BEESNOTE("tracking transids"); + while (true) { + // Make sure we have a full complement of crawlers + // Calls transid_max() which updates m_transid_re + insert_new_crawl(); + + BEESNOTE("waiting for next transid " << m_transid_re); + // We don't use wait_for here because somebody needs to + // be updating m_transid_re from time to time. + nanosleep(m_transid_re.eta_rel(1)); + } } void BeesRoots::writeback_thread() { - while (1) { + while (true) { BEESNOTE(m_crawl_current << (m_crawl_dirty ? " (dirty)" : "")); catch_all([&]() { @@ -347,7 +359,6 @@ BeesRoots::writeback_thread() }); nanosleep(BEES_WRITEBACK_INTERVAL); - } } @@ -393,9 +404,7 @@ BeesRoots::insert_new_crawl() crawl_state_erase(new_bcs); } - // Wake up crawl_roots if sleeping - lock.lock(); - m_condvar.notify_all(); + // transid_max updates m_transid_re which wakes up crawl_roots() } void @@ -437,7 +446,7 @@ BeesRoots::state_load() BeesRoots::BeesRoots(shared_ptr ctx) : m_ctx(ctx), m_crawl_state_file(ctx->home_fd(), crawl_state_filename()), - m_crawl_thread("crawl"), + m_crawl_thread("crawl_transid"), m_writeback_thread("crawl_writeback") { m_crawl_thread.exec([&]() { @@ -695,6 +704,12 @@ BeesRoots::open_root_ino(uint64_t root, uint64_t ino) return m_ctx->fd_cache()->open_root_ino(m_ctx, root, ino); } +RateEstimator & +BeesRoots::transid_re() +{ + return m_transid_re; +} + BeesCrawl::BeesCrawl(shared_ptr ctx, BeesCrawlState initial_state) : m_ctx(ctx), m_state(initial_state) @@ -705,12 +720,14 @@ bool BeesCrawl::next_transid() { // If this crawl is recently empty, quickly and _silently_ bail out + auto roots = m_ctx->roots(); auto current_time = time(NULL); auto crawl_state = get_state(); auto elapsed_time = current_time - crawl_state.m_started; - if (elapsed_time < BEES_COMMIT_INTERVAL) { + auto transid_delta = roots->transid_re().eta_abs(crawl_state.m_max_transid + 1); + if (elapsed_time < transid_delta) { if (!m_deferred) { - BEESLOGINFO("Deferring next transid in " << get_state()); + BEESLOGINFO("Deferring next transid " << transid_delta << "s in " << get_state()); } m_deferred = true; BEESCOUNT(crawl_defer); @@ -722,7 +739,6 @@ BeesCrawl::next_transid() // Start new crawl m_deferred = false; - auto roots = m_ctx->roots(); crawl_state.m_min_transid = crawl_state.m_max_transid; crawl_state.m_max_transid = roots->transid_max(); crawl_state.m_objectid = 0; diff --git a/src/bees.h b/src/bees.h index e405806..38d51c4 100644 --- a/src/bees.h +++ b/src/bees.h @@ -520,11 +520,11 @@ class BeesRoots : public enable_shared_from_this { BeesCrawlState m_crawl_current; map> m_root_crawl_map; mutex m_mutex; - condition_variable m_condvar; bool m_crawl_dirty = false; Timer m_crawl_timer; BeesThread m_crawl_thread; BeesThread m_writeback_thread; + RateEstimator m_transid_re; void insert_new_crawl(); void insert_root(const BeesCrawlState &bcs); @@ -543,6 +543,7 @@ class BeesRoots : public enable_shared_from_this { void writeback_thread(); uint64_t next_root(uint64_t root = 0); void current_state_set(const BeesCrawlState &bcs); + RateEstimator& transid_re(); friend class BeesFdCache; friend class BeesCrawl;