mirror of
https://github.com/Zygo/bees.git
synced 2025-07-12 13:12:26 +02:00
bees: handle SIGTERM and SIGINT, force immediate flush and exit
Capture SIGINT and SIGTERM and shut down, preserving current completed crawl and hash table state. * Executing tasks are completed, queued tasks are paused. * Crawl state is saved. * The crawl master and crawl writeback threads are terminated. * The task queue is flushed. * Dirty hash table extents are flushed. * Hash prefetch and writeback threads are terminated. * Hash table is deallocated. * FD caches and tmpfiles are destroyed. * Assuming the above didn't crash or deadlock, bees exits. The above order isn't the fastest, but it does roughly follow the shared_ptr dependencies and avoids data races--especially those that might lead to bees reporting an extent scanned when it was only queued for future scanning that did not occur. In case of a violation of expected shared_ptr dependency order, exceptions in BeesContext child object accessor methods (i.e. roots(), hash_table(), etc) prevent any further progress in threads that somehow remain unexpectedly active. Move some threads from main into BeesContext so they can be stopped via BeesContext. The main thread now runs a loop waiting for signals. A slow FD leak was discovered in TempFile handling. This has not been fixed yet, but an implementation detail of the C++ runtime library makes the leak so slow it may never be important enough to fix. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
@ -414,7 +414,7 @@ BeesRoots::crawl_thread()
|
||||
// Monitor transid_max and wake up roots when it changes
|
||||
BEESNOTE("tracking transid");
|
||||
auto last_count = m_transid_re.count();
|
||||
while (true) {
|
||||
while (!m_stop_requested) {
|
||||
// Measure current transid
|
||||
catch_all([&]() {
|
||||
m_transid_re.update(transid_max_nocache());
|
||||
@ -441,7 +441,12 @@ BeesRoots::crawl_thread()
|
||||
auto poll_time = m_transid_re.seconds_for(m_transid_factor);
|
||||
BEESLOGDEBUG("Polling " << poll_time << "s for next " << m_transid_factor << " transid " << m_transid_re);
|
||||
BEESNOTE("waiting " << poll_time << "s for next " << m_transid_factor << " transid " << m_transid_re);
|
||||
nanosleep(poll_time);
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
if (m_stop_requested) {
|
||||
BEESLOGDEBUG("Stop requested in crawl thread");
|
||||
break;
|
||||
}
|
||||
m_stop_condvar.wait_for(lock, chrono::duration<double>(poll_time));
|
||||
}
|
||||
}
|
||||
|
||||
@ -456,7 +461,16 @@ BeesRoots::writeback_thread()
|
||||
state_save();
|
||||
});
|
||||
|
||||
nanosleep(BEES_WRITEBACK_INTERVAL);
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
if (m_stop_requested) {
|
||||
BEESLOGDEBUG("Stop requested in writeback thread");
|
||||
catch_all([&]() {
|
||||
BEESNOTE("flushing crawler state");
|
||||
state_save();
|
||||
});
|
||||
return;
|
||||
}
|
||||
m_stop_condvar.wait_for(lock, chrono::duration<double>(BEES_WRITEBACK_INTERVAL));
|
||||
}
|
||||
}
|
||||
|
||||
@ -574,6 +588,7 @@ BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) :
|
||||
catch_all([&]() {
|
||||
state_load();
|
||||
});
|
||||
|
||||
m_writeback_thread.exec([&]() {
|
||||
writeback_thread();
|
||||
});
|
||||
@ -581,6 +596,29 @@ BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) :
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
BeesRoots::stop()
|
||||
{
|
||||
BEESLOGDEBUG("BeesRoots stop requested");
|
||||
BEESNOTE("stopping BeesRoots");
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
m_stop_requested = true;
|
||||
m_stop_condvar.notify_all();
|
||||
lock.unlock();
|
||||
|
||||
// Stop crawl writeback first because we will break progress
|
||||
// state tracking when we cancel the TaskMaster queue
|
||||
BEESLOGDEBUG("Waiting for crawl writeback");
|
||||
BEESNOTE("waiting for crawl_writeback thread");
|
||||
m_writeback_thread.join();
|
||||
|
||||
BEESLOGDEBUG("Waiting for crawl thread");
|
||||
BEESNOTE("waiting for crawl_thread thread");
|
||||
m_crawl_thread.join();
|
||||
|
||||
BEESLOGDEBUG("BeesRoots stopped");
|
||||
}
|
||||
|
||||
Fd
|
||||
BeesRoots::open_root_nocache(uint64_t rootid)
|
||||
{
|
||||
|
Reference in New Issue
Block a user