mirror of
https://github.com/Zygo/bees.git
synced 2025-05-17 13:25:45 +02:00
roots: correctly track crawl dirty state
If there's an error while writing the crawl state, the state should remain dirty. If the crawl state is successfully written, the state is only clean if there were no changes to crawl state since the write was committed. We need to release the lock while writing the state but correctly set the dirty flag when the state is written successfully. Replace the bool with a version number counter. Track the last version successfully saved and the current version of the crawl state. The state is dirty if these counters disagree and clean if they agree. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
parent
a9c81e5531
commit
be9321cdb3
@ -120,27 +120,29 @@ BeesRoots::state_save()
|
||||
// We don't have ofstreamat or ofdstream in C++11, so we're building a string and writing it with raw syscalls.
|
||||
ostringstream ofs;
|
||||
|
||||
if (!m_crawl_dirty) {
|
||||
if (m_crawl_clean == m_crawl_dirty) {
|
||||
BEESLOGINFO("Nothing to save");
|
||||
return;
|
||||
}
|
||||
|
||||
state_to_stream(ofs);
|
||||
const auto crawl_saved = m_crawl_dirty;
|
||||
|
||||
if (ofs.str().empty()) {
|
||||
BEESLOGWARN("Crawl state empty!");
|
||||
m_crawl_dirty = false;
|
||||
m_crawl_clean = crawl_saved;
|
||||
return;
|
||||
}
|
||||
|
||||
lock.unlock();
|
||||
|
||||
// This may throw an exception, so we didn't save the state we thought we did.
|
||||
m_crawl_state_file.write(ofs.str());
|
||||
|
||||
BEESNOTE("relocking crawl state");
|
||||
BEESNOTE("relocking crawl state to update dirty/clean state");
|
||||
lock.lock();
|
||||
// Not really correct but probably close enough
|
||||
m_crawl_dirty = false;
|
||||
// This records the version of the crawl state we saved, which is not necessarily the current state
|
||||
m_crawl_clean = crawl_saved;
|
||||
BEESLOGINFO("Saved crawl state in " << save_time << "s");
|
||||
}
|
||||
|
||||
@ -148,7 +150,7 @@ void
|
||||
BeesRoots::crawl_state_set_dirty()
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
m_crawl_dirty = true;
|
||||
++m_crawl_dirty;
|
||||
}
|
||||
|
||||
void
|
||||
@ -164,7 +166,7 @@ BeesRoots::crawl_state_erase(const BeesCrawlState &bcs)
|
||||
|
||||
if (m_root_crawl_map.count(bcs.m_root)) {
|
||||
m_root_crawl_map.erase(bcs.m_root);
|
||||
m_crawl_dirty = true;
|
||||
++m_crawl_dirty;
|
||||
}
|
||||
}
|
||||
|
||||
@ -442,7 +444,7 @@ void
|
||||
BeesRoots::writeback_thread()
|
||||
{
|
||||
while (true) {
|
||||
BEESNOTE("idle, " << (m_crawl_dirty ? "dirty" : "clean"));
|
||||
BEESNOTE("idle, " << (m_crawl_clean != m_crawl_dirty ? "dirty" : "clean"));
|
||||
|
||||
catch_all([&]() {
|
||||
BEESNOTE("saving crawler state");
|
||||
@ -470,7 +472,7 @@ BeesRoots::insert_root(const BeesCrawlState &new_bcs)
|
||||
auto new_bcp = make_shared<BeesCrawl>(m_ctx, new_bcs);
|
||||
auto new_pair = make_pair(new_bcs.m_root, new_bcp);
|
||||
m_root_crawl_map.insert(new_pair);
|
||||
m_crawl_dirty = true;
|
||||
++m_crawl_dirty;
|
||||
}
|
||||
auto found = m_root_crawl_map.find(new_bcs.m_root);
|
||||
THROW_CHECK0(runtime_error, found != m_root_crawl_map.end());
|
||||
|
@ -544,7 +544,8 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
||||
BeesStringFile m_crawl_state_file;
|
||||
map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map;
|
||||
mutex m_mutex;
|
||||
bool m_crawl_dirty = false;
|
||||
uint64_t m_crawl_dirty = 0;
|
||||
uint64_t m_crawl_clean = 0;
|
||||
Timer m_crawl_timer;
|
||||
BeesThread m_crawl_thread;
|
||||
BeesThread m_writeback_thread;
|
||||
|
Loading…
x
Reference in New Issue
Block a user