1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-18 05:45:45 +02:00

roots: use RateEstimator to track transids

Make the crawl polling interval more closely track the commit interval
on the btrfs filesystem.  In the future this will provide opportunities
to do things like clear FD caches and stop crawls on deleted subvols,
but triggered by transaction commits instead of arbitrary time intervals.

Rename the "crawl" thread so it no longer has the same name as the "crawl"
task, and repurpose it for dedicated transid polling.  Cancel the deletion
of crawl_thread and repurpose it to trigger new crawls and wake up the
main crawl Task when it runs out of data.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2018-01-24 02:03:42 -05:00
parent 4694c7d250
commit 0fdae37962
2 changed files with 38 additions and 21 deletions

View File

@ -92,9 +92,6 @@ BeesRoots::crawl_state_filename() const
void void
BeesRoots::state_save() BeesRoots::state_save()
{ {
// Make sure we have a full complement of crawlers
insert_new_crawl();
BEESNOTE("saving crawl state"); BEESNOTE("saving crawl state");
BEESLOGINFO("Saving crawl state"); BEESLOGINFO("Saving crawl state");
BEESTOOLONG("Saving crawl state"); BEESTOOLONG("Saving crawl state");
@ -212,6 +209,8 @@ BeesRoots::transid_max()
}); });
} }
} while (root); } while (root);
m_transid_re.update(rv);
// BEESLOGDEBUG("RateEstimator transid_max update: " << m_transid_re);
return rv; return rv;
} }
@ -304,11 +303,12 @@ BeesRoots::crawl_roots()
case SCAN_MODE_COUNT: assert(false); break; case SCAN_MODE_COUNT: assert(false); break;
} }
BEESLOGINFO("Crawl ran out of data after " << m_crawl_timer.lap() << "s, waiting for more...");
BEESCOUNT(crawl_done); BEESCOUNT(crawl_done);
BEESNOTE("idle, waiting for more data");
lock.lock(); auto want_transid = m_transid_re.count() + 1;
m_condvar.wait(lock); BEESLOGINFO("Crawl ran out of data after " << m_crawl_timer.lap() << "s, waiting for transid " << want_transid << "...");
BEESNOTE("idle, waiting for transid " << want_transid << ": " << m_transid_re);
m_transid_re.wait_until(want_transid);
// Don't count the time we were waiting as part of the crawl time // Don't count the time we were waiting as part of the crawl time
m_crawl_timer.reset(); m_crawl_timer.reset();
@ -317,10 +317,9 @@ BeesRoots::crawl_roots()
void void
BeesRoots::crawl_thread() BeesRoots::crawl_thread()
{ {
// TODO: get rid of the thread. For now it is a convenient BEESNOTE("creating crawl task");
// way to avoid the weird things that happen when you try to
// shared_from_this() in a constructor. // Start the Task that does the crawling
BEESNOTE("crawling");
auto shared_this = shared_from_this(); auto shared_this = shared_from_this();
Task("crawl", [shared_this]() { Task("crawl", [shared_this]() {
auto tqs = TaskMaster::get_queue_count(); auto tqs = TaskMaster::get_queue_count();
@ -333,12 +332,25 @@ BeesRoots::crawl_thread()
} }
Task::current_task().run(); Task::current_task().run();
}).run(); }).run();
// Monitor transid_max and wake up roots when it changes
BEESNOTE("tracking transids");
while (true) {
// Make sure we have a full complement of crawlers
// Calls transid_max() which updates m_transid_re
insert_new_crawl();
BEESNOTE("waiting for next transid " << m_transid_re);
// We don't use wait_for here because somebody needs to
// be updating m_transid_re from time to time.
nanosleep(m_transid_re.eta_rel(1));
}
} }
void void
BeesRoots::writeback_thread() BeesRoots::writeback_thread()
{ {
while (1) { while (true) {
BEESNOTE(m_crawl_current << (m_crawl_dirty ? " (dirty)" : "")); BEESNOTE(m_crawl_current << (m_crawl_dirty ? " (dirty)" : ""));
catch_all([&]() { catch_all([&]() {
@ -347,7 +359,6 @@ BeesRoots::writeback_thread()
}); });
nanosleep(BEES_WRITEBACK_INTERVAL); nanosleep(BEES_WRITEBACK_INTERVAL);
} }
} }
@ -393,9 +404,7 @@ BeesRoots::insert_new_crawl()
crawl_state_erase(new_bcs); crawl_state_erase(new_bcs);
} }
// Wake up crawl_roots if sleeping // transid_max updates m_transid_re which wakes up crawl_roots()
lock.lock();
m_condvar.notify_all();
} }
void void
@ -437,7 +446,7 @@ BeesRoots::state_load()
BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) : BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) :
m_ctx(ctx), m_ctx(ctx),
m_crawl_state_file(ctx->home_fd(), crawl_state_filename()), m_crawl_state_file(ctx->home_fd(), crawl_state_filename()),
m_crawl_thread("crawl"), m_crawl_thread("crawl_transid"),
m_writeback_thread("crawl_writeback") m_writeback_thread("crawl_writeback")
{ {
m_crawl_thread.exec([&]() { m_crawl_thread.exec([&]() {
@ -695,6 +704,12 @@ BeesRoots::open_root_ino(uint64_t root, uint64_t ino)
return m_ctx->fd_cache()->open_root_ino(m_ctx, root, ino); return m_ctx->fd_cache()->open_root_ino(m_ctx, root, ino);
} }
RateEstimator &
BeesRoots::transid_re()
{
return m_transid_re;
}
BeesCrawl::BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state) : BeesCrawl::BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state) :
m_ctx(ctx), m_ctx(ctx),
m_state(initial_state) m_state(initial_state)
@ -705,12 +720,14 @@ bool
BeesCrawl::next_transid() BeesCrawl::next_transid()
{ {
// If this crawl is recently empty, quickly and _silently_ bail out // If this crawl is recently empty, quickly and _silently_ bail out
auto roots = m_ctx->roots();
auto current_time = time(NULL); auto current_time = time(NULL);
auto crawl_state = get_state(); auto crawl_state = get_state();
auto elapsed_time = current_time - crawl_state.m_started; auto elapsed_time = current_time - crawl_state.m_started;
if (elapsed_time < BEES_COMMIT_INTERVAL) { auto transid_delta = roots->transid_re().eta_abs(crawl_state.m_max_transid + 1);
if (elapsed_time < transid_delta) {
if (!m_deferred) { if (!m_deferred) {
BEESLOGINFO("Deferring next transid in " << get_state()); BEESLOGINFO("Deferring next transid " << transid_delta << "s in " << get_state());
} }
m_deferred = true; m_deferred = true;
BEESCOUNT(crawl_defer); BEESCOUNT(crawl_defer);
@ -722,7 +739,6 @@ BeesCrawl::next_transid()
// Start new crawl // Start new crawl
m_deferred = false; m_deferred = false;
auto roots = m_ctx->roots();
crawl_state.m_min_transid = crawl_state.m_max_transid; crawl_state.m_min_transid = crawl_state.m_max_transid;
crawl_state.m_max_transid = roots->transid_max(); crawl_state.m_max_transid = roots->transid_max();
crawl_state.m_objectid = 0; crawl_state.m_objectid = 0;

View File

@ -520,11 +520,11 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
BeesCrawlState m_crawl_current; BeesCrawlState m_crawl_current;
map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map; map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map;
mutex m_mutex; mutex m_mutex;
condition_variable m_condvar;
bool m_crawl_dirty = false; bool m_crawl_dirty = false;
Timer m_crawl_timer; Timer m_crawl_timer;
BeesThread m_crawl_thread; BeesThread m_crawl_thread;
BeesThread m_writeback_thread; BeesThread m_writeback_thread;
RateEstimator m_transid_re;
void insert_new_crawl(); void insert_new_crawl();
void insert_root(const BeesCrawlState &bcs); void insert_root(const BeesCrawlState &bcs);
@ -543,6 +543,7 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
void writeback_thread(); void writeback_thread();
uint64_t next_root(uint64_t root = 0); uint64_t next_root(uint64_t root = 0);
void current_state_set(const BeesCrawlState &bcs); void current_state_set(const BeesCrawlState &bcs);
RateEstimator& transid_re();
friend class BeesFdCache; friend class BeesFdCache;
friend class BeesCrawl; friend class BeesCrawl;