From be9321cdb347daeb5c49bab577b2b55623405b12 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 26 Nov 2022 23:16:13 -0500 Subject: [PATCH] roots: correctly track crawl dirty state If there's an error while writing the crawl state, the state should remain dirty. If the crawl state is successfully written, the state is only clean if there were no changes to crawl state since the write was committed. We need to release the lock while writing the state but correctly set the dirty flag when the state is written successfully. Replace the bool with a version number counter. Track the last version successfully saved and the current version of the crawl state. The state is dirty if these counters disagree and clean if they agree. Signed-off-by: Zygo Blaxell --- src/bees-roots.cc | 20 +++++++++++--------- src/bees.h | 3 ++- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 76f129f..7dc877a 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -120,27 +120,29 @@ BeesRoots::state_save() // We don't have ofstreamat or ofdstream in C++11, so we're building a string and writing it with raw syscalls. ostringstream ofs; - if (!m_crawl_dirty) { + if (m_crawl_clean == m_crawl_dirty) { BEESLOGINFO("Nothing to save"); return; } state_to_stream(ofs); + const auto crawl_saved = m_crawl_dirty; if (ofs.str().empty()) { BEESLOGWARN("Crawl state empty!"); - m_crawl_dirty = false; + m_crawl_clean = crawl_saved; return; } lock.unlock(); + // This may throw an exception, so we didn't save the state we thought we did. m_crawl_state_file.write(ofs.str()); - BEESNOTE("relocking crawl state"); + BEESNOTE("relocking crawl state to update dirty/clean state"); lock.lock(); - // Not really correct but probably close enough - m_crawl_dirty = false; + // This records the version of the crawl state we saved, which is not necessarily the current state + m_crawl_clean = crawl_saved; BEESLOGINFO("Saved crawl state in " << save_time << "s"); } @@ -148,7 +150,7 @@ void BeesRoots::crawl_state_set_dirty() { unique_lock lock(m_mutex); - m_crawl_dirty = true; + ++m_crawl_dirty; } void @@ -164,7 +166,7 @@ BeesRoots::crawl_state_erase(const BeesCrawlState &bcs) if (m_root_crawl_map.count(bcs.m_root)) { m_root_crawl_map.erase(bcs.m_root); - m_crawl_dirty = true; + ++m_crawl_dirty; } } @@ -442,7 +444,7 @@ void BeesRoots::writeback_thread() { while (true) { - BEESNOTE("idle, " << (m_crawl_dirty ? "dirty" : "clean")); + BEESNOTE("idle, " << (m_crawl_clean != m_crawl_dirty ? "dirty" : "clean")); catch_all([&]() { BEESNOTE("saving crawler state"); @@ -470,7 +472,7 @@ BeesRoots::insert_root(const BeesCrawlState &new_bcs) auto new_bcp = make_shared(m_ctx, new_bcs); auto new_pair = make_pair(new_bcs.m_root, new_bcp); m_root_crawl_map.insert(new_pair); - m_crawl_dirty = true; + ++m_crawl_dirty; } auto found = m_root_crawl_map.find(new_bcs.m_root); THROW_CHECK0(runtime_error, found != m_root_crawl_map.end()); diff --git a/src/bees.h b/src/bees.h index 7dc7434..00762a6 100644 --- a/src/bees.h +++ b/src/bees.h @@ -544,7 +544,8 @@ class BeesRoots : public enable_shared_from_this { BeesStringFile m_crawl_state_file; map> m_root_crawl_map; mutex m_mutex; - bool m_crawl_dirty = false; + uint64_t m_crawl_dirty = 0; + uint64_t m_crawl_clean = 0; Timer m_crawl_timer; BeesThread m_crawl_thread; BeesThread m_writeback_thread;