diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 76f129f..7dc877a 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -120,27 +120,29 @@ BeesRoots::state_save() // We don't have ofstreamat or ofdstream in C++11, so we're building a string and writing it with raw syscalls. ostringstream ofs; - if (!m_crawl_dirty) { + if (m_crawl_clean == m_crawl_dirty) { BEESLOGINFO("Nothing to save"); return; } state_to_stream(ofs); + const auto crawl_saved = m_crawl_dirty; if (ofs.str().empty()) { BEESLOGWARN("Crawl state empty!"); - m_crawl_dirty = false; + m_crawl_clean = crawl_saved; return; } lock.unlock(); + // This may throw an exception, so we didn't save the state we thought we did. m_crawl_state_file.write(ofs.str()); - BEESNOTE("relocking crawl state"); + BEESNOTE("relocking crawl state to update dirty/clean state"); lock.lock(); - // Not really correct but probably close enough - m_crawl_dirty = false; + // This records the version of the crawl state we saved, which is not necessarily the current state + m_crawl_clean = crawl_saved; BEESLOGINFO("Saved crawl state in " << save_time << "s"); } @@ -148,7 +150,7 @@ void BeesRoots::crawl_state_set_dirty() { unique_lock lock(m_mutex); - m_crawl_dirty = true; + ++m_crawl_dirty; } void @@ -164,7 +166,7 @@ BeesRoots::crawl_state_erase(const BeesCrawlState &bcs) if (m_root_crawl_map.count(bcs.m_root)) { m_root_crawl_map.erase(bcs.m_root); - m_crawl_dirty = true; + ++m_crawl_dirty; } } @@ -442,7 +444,7 @@ void BeesRoots::writeback_thread() { while (true) { - BEESNOTE("idle, " << (m_crawl_dirty ? "dirty" : "clean")); + BEESNOTE("idle, " << (m_crawl_clean != m_crawl_dirty ? "dirty" : "clean")); catch_all([&]() { BEESNOTE("saving crawler state"); @@ -470,7 +472,7 @@ BeesRoots::insert_root(const BeesCrawlState &new_bcs) auto new_bcp = make_shared(m_ctx, new_bcs); auto new_pair = make_pair(new_bcs.m_root, new_bcp); m_root_crawl_map.insert(new_pair); - m_crawl_dirty = true; + ++m_crawl_dirty; } auto found = m_root_crawl_map.find(new_bcs.m_root); THROW_CHECK0(runtime_error, found != m_root_crawl_map.end()); diff --git a/src/bees.h b/src/bees.h index 7dc7434..00762a6 100644 --- a/src/bees.h +++ b/src/bees.h @@ -544,7 +544,8 @@ class BeesRoots : public enable_shared_from_this { BeesStringFile m_crawl_state_file; map> m_root_crawl_map; mutex m_mutex; - bool m_crawl_dirty = false; + uint64_t m_crawl_dirty = 0; + uint64_t m_crawl_clean = 0; Timer m_crawl_timer; BeesThread m_crawl_thread; BeesThread m_writeback_thread;