1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 21:35:45 +02:00

roots: correctly track crawl dirty state

If there's an error while writing the crawl state, the state should
remain dirty.  If the crawl state is successfully written, the state
is only clean if there were no changes to crawl state since the write
was committed.  We need to release the lock while writing the state but
correctly set the dirty flag when the state is written successfully.

Replace the bool with a version number counter.  Track the last version
successfully saved and the current version of the crawl state.  The state
is dirty if these counters disagree and clean if they agree.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2022-11-26 23:16:13 -05:00
parent a9c81e5531
commit be9321cdb3
2 changed files with 13 additions and 10 deletions

View File

@ -120,27 +120,29 @@ BeesRoots::state_save()
// We don't have ofstreamat or ofdstream in C++11, so we're building a string and writing it with raw syscalls. // We don't have ofstreamat or ofdstream in C++11, so we're building a string and writing it with raw syscalls.
ostringstream ofs; ostringstream ofs;
if (!m_crawl_dirty) { if (m_crawl_clean == m_crawl_dirty) {
BEESLOGINFO("Nothing to save"); BEESLOGINFO("Nothing to save");
return; return;
} }
state_to_stream(ofs); state_to_stream(ofs);
const auto crawl_saved = m_crawl_dirty;
if (ofs.str().empty()) { if (ofs.str().empty()) {
BEESLOGWARN("Crawl state empty!"); BEESLOGWARN("Crawl state empty!");
m_crawl_dirty = false; m_crawl_clean = crawl_saved;
return; return;
} }
lock.unlock(); lock.unlock();
// This may throw an exception, so we didn't save the state we thought we did.
m_crawl_state_file.write(ofs.str()); m_crawl_state_file.write(ofs.str());
BEESNOTE("relocking crawl state"); BEESNOTE("relocking crawl state to update dirty/clean state");
lock.lock(); lock.lock();
// Not really correct but probably close enough // This records the version of the crawl state we saved, which is not necessarily the current state
m_crawl_dirty = false; m_crawl_clean = crawl_saved;
BEESLOGINFO("Saved crawl state in " << save_time << "s"); BEESLOGINFO("Saved crawl state in " << save_time << "s");
} }
@ -148,7 +150,7 @@ void
BeesRoots::crawl_state_set_dirty() BeesRoots::crawl_state_set_dirty()
{ {
unique_lock<mutex> lock(m_mutex); unique_lock<mutex> lock(m_mutex);
m_crawl_dirty = true; ++m_crawl_dirty;
} }
void void
@ -164,7 +166,7 @@ BeesRoots::crawl_state_erase(const BeesCrawlState &bcs)
if (m_root_crawl_map.count(bcs.m_root)) { if (m_root_crawl_map.count(bcs.m_root)) {
m_root_crawl_map.erase(bcs.m_root); m_root_crawl_map.erase(bcs.m_root);
m_crawl_dirty = true; ++m_crawl_dirty;
} }
} }
@ -442,7 +444,7 @@ void
BeesRoots::writeback_thread() BeesRoots::writeback_thread()
{ {
while (true) { while (true) {
BEESNOTE("idle, " << (m_crawl_dirty ? "dirty" : "clean")); BEESNOTE("idle, " << (m_crawl_clean != m_crawl_dirty ? "dirty" : "clean"));
catch_all([&]() { catch_all([&]() {
BEESNOTE("saving crawler state"); BEESNOTE("saving crawler state");
@ -470,7 +472,7 @@ BeesRoots::insert_root(const BeesCrawlState &new_bcs)
auto new_bcp = make_shared<BeesCrawl>(m_ctx, new_bcs); auto new_bcp = make_shared<BeesCrawl>(m_ctx, new_bcs);
auto new_pair = make_pair(new_bcs.m_root, new_bcp); auto new_pair = make_pair(new_bcs.m_root, new_bcp);
m_root_crawl_map.insert(new_pair); m_root_crawl_map.insert(new_pair);
m_crawl_dirty = true; ++m_crawl_dirty;
} }
auto found = m_root_crawl_map.find(new_bcs.m_root); auto found = m_root_crawl_map.find(new_bcs.m_root);
THROW_CHECK0(runtime_error, found != m_root_crawl_map.end()); THROW_CHECK0(runtime_error, found != m_root_crawl_map.end());

View File

@ -544,7 +544,8 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
BeesStringFile m_crawl_state_file; BeesStringFile m_crawl_state_file;
map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map; map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map;
mutex m_mutex; mutex m_mutex;
bool m_crawl_dirty = false; uint64_t m_crawl_dirty = 0;
uint64_t m_crawl_clean = 0;
Timer m_crawl_timer; Timer m_crawl_timer;
BeesThread m_crawl_thread; BeesThread m_crawl_thread;
BeesThread m_writeback_thread; BeesThread m_writeback_thread;