mirror of
https://github.com/Zygo/bees.git
synced 2025-06-17 01:56:16 +02:00
bees: handle SIGTERM and SIGINT, force immediate flush and exit
Capture SIGINT and SIGTERM and shut down, preserving current completed crawl and hash table state. * Executing tasks are completed, queued tasks are paused. * Crawl state is saved. * The crawl master and crawl writeback threads are terminated. * The task queue is flushed. * Dirty hash table extents are flushed. * Hash prefetch and writeback threads are terminated. * Hash table is deallocated. * FD caches and tmpfiles are destroyed. * Assuming the above didn't crash or deadlock, bees exits. The above order isn't the fastest, but it does roughly follow the shared_ptr dependencies and avoids data races--especially those that might lead to bees reporting an extent scanned when it was only queued for future scanning that did not occur. In case of a violation of expected shared_ptr dependency order, exceptions in BeesContext child object accessor methods (i.e. roots(), hash_table(), etc) prevent any further progress in threads that somehow remain unexpectedly active. Move some threads from main into BeesContext so they can be stopped via BeesContext. The main thread now runs a loop waiting for signals. A slow FD leak was discovered in TempFile handling. This has not been fixed yet, but an implementation detail of the C++ runtime library makes the leak so slow it may never be important enough to fix. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
@ -130,13 +130,11 @@ BeesHashTable::flush_dirty_extent(uint64_t extent_index)
|
||||
wrote_extent = true;
|
||||
});
|
||||
|
||||
BEESNOTE("flush rate limited after extent #" << extent_index << " of " << m_extents << " extents");
|
||||
m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT);
|
||||
return wrote_extent;
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::flush_dirty_extents()
|
||||
size_t
|
||||
BeesHashTable::flush_dirty_extents(bool slowly)
|
||||
{
|
||||
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
|
||||
|
||||
@ -144,12 +142,22 @@ BeesHashTable::flush_dirty_extents()
|
||||
for (size_t extent_index = 0; extent_index < m_extents; ++extent_index) {
|
||||
if (flush_dirty_extent(extent_index)) {
|
||||
++wrote_extents;
|
||||
if (slowly) {
|
||||
BEESNOTE("flush rate limited after extent #" << extent_index << " of " << m_extents << " extents");
|
||||
chrono::duration<double> sleep_time(m_flush_rate_limit.sleep_time(BLOCK_SIZE_HASHTAB_EXTENT));
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
if (m_stop_requested) {
|
||||
BEESLOGDEBUG("Stop requested in hash table flush_dirty_extents");
|
||||
break;
|
||||
}
|
||||
m_stop_condvar.wait_for(lock, sleep_time);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BEESNOTE("idle after writing " << wrote_extents << " of " << m_extents << " extents");
|
||||
unique_lock<mutex> lock(m_dirty_mutex);
|
||||
m_dirty_condvar.wait(lock);
|
||||
if (!slowly) {
|
||||
BEESLOGINFO("Flushed " << wrote_extents << " of " << m_extents << " extents");
|
||||
}
|
||||
return wrote_extents;
|
||||
}
|
||||
|
||||
void
|
||||
@ -160,15 +168,29 @@ BeesHashTable::set_extent_dirty_locked(uint64_t extent_index)
|
||||
|
||||
// Signal writeback thread
|
||||
unique_lock<mutex> dirty_lock(m_dirty_mutex);
|
||||
m_dirty = true;
|
||||
m_dirty_condvar.notify_one();
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::writeback_loop()
|
||||
{
|
||||
while (true) {
|
||||
flush_dirty_extents();
|
||||
while (!m_stop_requested) {
|
||||
auto wrote_extents = flush_dirty_extents(true);
|
||||
|
||||
BEESNOTE("idle after writing " << wrote_extents << " of " << m_extents << " extents");
|
||||
|
||||
unique_lock<mutex> lock(m_dirty_mutex);
|
||||
if (m_stop_requested) {
|
||||
break;
|
||||
}
|
||||
if (m_dirty) {
|
||||
m_dirty = false;
|
||||
} else {
|
||||
m_dirty_condvar.wait(lock);
|
||||
}
|
||||
}
|
||||
BEESLOGDEBUG("Exited hash table writeback_loop");
|
||||
}
|
||||
|
||||
static
|
||||
@ -186,7 +208,7 @@ void
|
||||
BeesHashTable::prefetch_loop()
|
||||
{
|
||||
bool not_locked = true;
|
||||
while (true) {
|
||||
while (!m_stop_requested) {
|
||||
size_t width = 64;
|
||||
vector<size_t> occupancy(width, 0);
|
||||
size_t occupied_count = 0;
|
||||
@ -196,7 +218,7 @@ BeesHashTable::prefetch_loop()
|
||||
size_t toxic_count = 0;
|
||||
size_t unaligned_eof_count = 0;
|
||||
|
||||
for (uint64_t ext = 0; ext < m_extents; ++ext) {
|
||||
for (uint64_t ext = 0; ext < m_extents && !m_stop_requested; ++ext) {
|
||||
BEESNOTE("prefetching hash table extent #" << ext << " of " << m_extents);
|
||||
catch_all([&]() {
|
||||
fetch_missing_extent_by_index(ext);
|
||||
@ -300,7 +322,7 @@ BeesHashTable::prefetch_loop()
|
||||
m_stats_file.write(graph_blob.str());
|
||||
});
|
||||
|
||||
if (not_locked) {
|
||||
if (not_locked && !m_stop_requested) {
|
||||
// Always do the mlock, whether shared or not
|
||||
THROW_CHECK1(runtime_error, m_size, m_size > 0);
|
||||
BEESLOGINFO("mlock(" << pretty(m_size) << ")...");
|
||||
@ -314,7 +336,12 @@ BeesHashTable::prefetch_loop()
|
||||
}
|
||||
|
||||
BEESNOTE("idle " << BEES_HASH_TABLE_ANALYZE_INTERVAL << "s");
|
||||
nanosleep(BEES_HASH_TABLE_ANALYZE_INTERVAL);
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
if (m_stop_requested) {
|
||||
BEESLOGDEBUG("Stop requested in hash table prefetch");
|
||||
return;
|
||||
}
|
||||
m_stop_condvar.wait_for(lock, chrono::duration<double>(BEES_HASH_TABLE_ANALYZE_INTERVAL));
|
||||
}
|
||||
}
|
||||
|
||||
@ -704,13 +731,50 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t
|
||||
|
||||
BeesHashTable::~BeesHashTable()
|
||||
{
|
||||
BEESLOGDEBUG("Destroy BeesHashTable");
|
||||
if (m_cell_ptr && m_size) {
|
||||
flush_dirty_extents();
|
||||
// Dirty extents should have been flushed before now,
|
||||
// e.g. in stop(). If that didn't happen, don't fall
|
||||
// into the same trap (and maybe throw an exception) here.
|
||||
// flush_dirty_extents(false);
|
||||
catch_all([&]() {
|
||||
DIE_IF_NON_ZERO(munmap(m_cell_ptr, m_size));
|
||||
m_cell_ptr = nullptr;
|
||||
m_size = 0;
|
||||
});
|
||||
}
|
||||
BEESLOGDEBUG("BeesHashTable destroyed");
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::stop()
|
||||
{
|
||||
BEESNOTE("stopping BeesHashTable threads");
|
||||
BEESLOGDEBUG("Stopping BeesHashTable threads");
|
||||
|
||||
unique_lock<mutex> lock(m_stop_mutex);
|
||||
m_stop_requested = true;
|
||||
m_stop_condvar.notify_all();
|
||||
lock.unlock();
|
||||
|
||||
// Wake up hash writeback too
|
||||
unique_lock<mutex> dirty_lock(m_dirty_mutex);
|
||||
m_dirty_condvar.notify_all();
|
||||
dirty_lock.unlock();
|
||||
|
||||
BEESNOTE("waiting for hash_prefetch thread");
|
||||
BEESLOGDEBUG("Waiting for hash_prefetch thread");
|
||||
m_prefetch_thread.join();
|
||||
|
||||
BEESNOTE("waiting for hash_writeback thread");
|
||||
BEESLOGDEBUG("Waiting for hash_writeback thread");
|
||||
m_writeback_thread.join();
|
||||
|
||||
if (m_cell_ptr && m_size) {
|
||||
BEESLOGDEBUG("Flushing hash table");
|
||||
BEESNOTE("flushing hash table");
|
||||
flush_dirty_extents(false);
|
||||
}
|
||||
|
||||
BEESLOGDEBUG("BeesHashTable stopped");
|
||||
}
|
||||
|
Reference in New Issue
Block a user