From 4f0bc78a4c9b83f1ca56e59e6b433e6fb74dda48 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sun, 28 Jan 2018 22:07:45 -0500 Subject: [PATCH] crawl: don't block a Task waiting for new transids Task should not block for extended periods of time. Remove the RateEstimator::wait_for() in crawl_roots. When crawl_roots runs out of data, let the last crawl_task end without rescheduling. Schedule crawl_task again on transid polls if it was not already running. Signed-off-by: Zygo Blaxell --- src/bees-roots.cc | 49 +++++++++++++++++++++++++++++------------------ src/bees.h | 7 +++++-- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/bees-roots.cc b/src/bees-roots.cc index f1f2e34..728a810 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -247,7 +247,7 @@ BeesRoots::crawl_batch(shared_ptr this_crawl) return batch_count; } -void +bool BeesRoots::crawl_roots() { BEESNOTE("Crawling roots"); @@ -280,13 +280,13 @@ BeesRoots::crawl_roots() } if (!first_crawl) { - return; + return false; } auto batch_count = crawl_batch(first_crawl); if (batch_count) { - return; + return true; } break; @@ -300,7 +300,7 @@ BeesRoots::crawl_roots() } if (batch_count) { - return; + return true; } break; @@ -322,7 +322,7 @@ BeesRoots::crawl_roots() for (auto i : crawl_vector) { batch_count += crawl_batch(i); if (batch_count) { - return; + return true; } } @@ -337,13 +337,9 @@ BeesRoots::crawl_roots() auto want_transid = m_transid_re.count() + m_transid_factor; auto ran_out_time = m_crawl_timer.lap(); BEESLOGINFO("Crawl master ran out of data after " << ran_out_time << "s, waiting about " << m_transid_re.seconds_until(want_transid) << "s for transid " << want_transid << "..."); - BEESNOTE("idle, waiting for transid " << want_transid << ": " << m_transid_re); - // FIXME: Tasks should not block arbitrarily - m_transid_re.wait_until(want_transid); - auto resumed_after_time = m_crawl_timer.lap(); - auto new_transid = m_transid_re.count(); - BEESLOGINFO("Crawl master resumed after " << resumed_after_time << "s at transid " << new_transid); + // Do not run again + return false; } void @@ -351,19 +347,25 @@ BeesRoots::crawl_thread() { BEESNOTE("creating crawl task"); - // Start the Task that does the crawling + // Create the Task that does the crawling auto shared_this = shared_from_this(); - Task("crawl_master", [shared_this]() { + m_crawl_task = Task("crawl_master", [shared_this]() { auto tqs = TaskMaster::get_queue_count(); BEESNOTE("queueing extents to scan, " << tqs << " of " << BEES_MAX_QUEUE_SIZE); + bool run_again = false; while (tqs < BEES_MAX_QUEUE_SIZE) { - catch_all([&]() { - shared_this->crawl_roots(); - }); + run_again = shared_this->crawl_roots(); tqs = TaskMaster::get_queue_count(); + if (!run_again) { + break; + } } - Task::current_task().run(); - }).run(); + if (run_again) { + shared_this->m_crawl_task.run(); + } else { + shared_this->m_task_running = false; + } + }); // Monitor transid_max and wake up roots when it changes BEESNOTE("tracking transid"); @@ -389,6 +391,14 @@ BeesRoots::crawl_thread() } last_count = new_count; + // If no crawl task is running, start a new one + bool already_running = m_task_running.exchange(true); + if (!already_running) { + auto resumed_after_time = m_crawl_timer.lap(); + BEESLOGINFO("Crawl master resumed after " << resumed_after_time << "s at transid " << new_count); + m_crawl_task.run(); + } + auto poll_time = m_transid_re.seconds_for(m_transid_factor); BEESLOGDEBUG("Polling " << poll_time << "s for next " << m_transid_factor << " transid " << m_transid_re); BEESNOTE("waiting " << poll_time << "s for next " << m_transid_factor << " transid " << m_transid_re); @@ -497,7 +507,8 @@ BeesRoots::BeesRoots(shared_ptr ctx) : m_ctx(ctx), m_crawl_state_file(ctx->home_fd(), crawl_state_filename()), m_crawl_thread("crawl_transid"), - m_writeback_thread("crawl_writeback") + m_writeback_thread("crawl_writeback"), + m_task_running(false) { m_crawl_thread.exec([&]() { // Measure current transid before creating any crawlers diff --git a/src/bees.h b/src/bees.h index 292bd56..e409119 100644 --- a/src/bees.h +++ b/src/bees.h @@ -9,8 +9,9 @@ #include "crucible/fs.h" #include "crucible/lockset.h" #include "crucible/time.h" +#include "crucible/task.h" -#include +#include #include #include #include @@ -528,6 +529,8 @@ class BeesRoots : public enable_shared_from_this { BeesThread m_writeback_thread; RateEstimator m_transid_re; size_t m_transid_factor = BEES_TRANSID_FACTOR; + atomic m_task_running; + Task m_crawl_task; void insert_new_crawl(); void insert_root(const BeesCrawlState &bcs); @@ -538,7 +541,7 @@ class BeesRoots : public enable_shared_from_this { uint64_t transid_max_nocache(); void state_load(); void state_save(); - void crawl_roots(); + bool crawl_roots(); string crawl_state_filename() const; BeesCrawlState crawl_state_get(uint64_t root); void crawl_state_set_dirty();