mirror of
https://github.com/Zygo/bees.git
synced 2025-06-17 01:56:16 +02:00
bees: handle SIGTERM and SIGINT, force immediate flush and exit
Capture SIGINT and SIGTERM and shut down, preserving current completed crawl and hash table state. * Executing tasks are completed, queued tasks are paused. * Crawl state is saved. * The crawl master and crawl writeback threads are terminated. * The task queue is flushed. * Dirty hash table extents are flushed. * Hash prefetch and writeback threads are terminated. * Hash table is deallocated. * FD caches and tmpfiles are destroyed. * Assuming the above didn't crash or deadlock, bees exits. The above order isn't the fastest, but it does roughly follow the shared_ptr dependencies and avoids data races--especially those that might lead to bees reporting an extent scanned when it was only queued for future scanning that did not occur. In case of a violation of expected shared_ptr dependency order, exceptions in BeesContext child object accessor methods (i.e. roots(), hash_table(), etc) prevent any further progress in threads that somehow remain unexpectedly active. Move some threads from main into BeesContext so they can be stopped via BeesContext. The main thread now runs a loop waiting for signals. A slow FD leak was discovered in TempFile handling. This has not been fixed yet, but an implementation detail of the C++ runtime library makes the leak so slow it may never be important enough to fix. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
91
src/bees.cc
91
src/bees.cc
@ -531,10 +531,16 @@ BeesTempFile::resize(off_t offset)
|
||||
BEESCOUNTADD(tmp_resize_ms, resize_timer.age() * 1000);
|
||||
}
|
||||
|
||||
BeesTempFile::~BeesTempFile()
|
||||
{
|
||||
BEESLOGDEBUG("Destructing BeesTempFile " << this);
|
||||
}
|
||||
|
||||
BeesTempFile::BeesTempFile(shared_ptr<BeesContext> ctx) :
|
||||
m_ctx(ctx),
|
||||
m_end_offset(0)
|
||||
{
|
||||
BEESLOGDEBUG("Constructing BeesTempFile " << this);
|
||||
create();
|
||||
}
|
||||
|
||||
@ -640,6 +646,77 @@ BeesTempFile::make_copy(const BeesFileRange &src)
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
ostream &
|
||||
operator<<(ostream &os, const siginfo_t &si)
|
||||
{
|
||||
return os << "siginfo_t { "
|
||||
<< "signo = " << si.si_signo << " (" << signal_ntoa(si.si_signo) << "), "
|
||||
<< "errno = " << si.si_errno << ", "
|
||||
<< "code = " << si.si_code << ", "
|
||||
// << "trapno = " << si.si_trapno << ", "
|
||||
<< "pid = " << si.si_pid << ", "
|
||||
<< "uid = " << si.si_uid << ", "
|
||||
<< "status = " << si.si_status << ", "
|
||||
<< "utime = " << si.si_utime << ", "
|
||||
<< "stime = " << si.si_stime << ", "
|
||||
// << "value = " << si.si_value << ", "
|
||||
<< "int = " << si.si_int << ", "
|
||||
<< "ptr = " << si.si_ptr << ", "
|
||||
<< "overrun = " << si.si_overrun << ", "
|
||||
<< "timerid = " << si.si_timerid << ", "
|
||||
<< "addr = " << si.si_addr << ", "
|
||||
<< "band = " << si.si_band << ", "
|
||||
<< "fd = " << si.si_fd << ", "
|
||||
<< "addr_lsb = " << si.si_addr_lsb << ", "
|
||||
<< "lower = " << si.si_lower << ", "
|
||||
<< "upper = " << si.si_upper << ", "
|
||||
// << "pkey = " << si.si_pkey << ", "
|
||||
<< "call_addr = " << si.si_call_addr << ", "
|
||||
<< "syscall = " << si.si_syscall << ", "
|
||||
<< "arch = " << si.si_arch
|
||||
<< " }";
|
||||
}
|
||||
|
||||
static sigset_t new_sigset, old_sigset;
|
||||
|
||||
void
|
||||
block_term_signal()
|
||||
{
|
||||
BEESLOGDEBUG("Masking signals");
|
||||
|
||||
DIE_IF_NON_ZERO(sigemptyset(&new_sigset));
|
||||
DIE_IF_NON_ZERO(sigaddset(&new_sigset, SIGTERM));
|
||||
DIE_IF_NON_ZERO(sigaddset(&new_sigset, SIGINT));
|
||||
DIE_IF_NON_ZERO(sigprocmask(SIG_BLOCK, &new_sigset, &old_sigset));
|
||||
}
|
||||
|
||||
void
|
||||
wait_for_term_signal()
|
||||
{
|
||||
BEESNOTE("waiting for signals");
|
||||
BEESLOGDEBUG("Waiting for signals...");
|
||||
siginfo_t info;
|
||||
|
||||
// Ironically, sigwaitinfo can be interrupted by a signal.
|
||||
while (true) {
|
||||
const int rv = sigwaitinfo(&new_sigset, &info);
|
||||
if (rv == -1) {
|
||||
if (errno == EINTR) {
|
||||
BEESLOGDEBUG("Restarting sigwaitinfo");
|
||||
continue;
|
||||
}
|
||||
THROW_ERRNO("sigwaitinfo errno = " << errno);
|
||||
} else {
|
||||
BEESLOGNOTICE("Received signal " << rv << " info " << info);
|
||||
// Unblock so we die immediately if signalled again
|
||||
DIE_IF_NON_ZERO(sigprocmask(SIG_BLOCK, &old_sigset, &new_sigset));
|
||||
break;
|
||||
}
|
||||
}
|
||||
BEESLOGDEBUG("Signal catcher exiting");
|
||||
}
|
||||
|
||||
int
|
||||
bees_main(int argc, char *argv[])
|
||||
{
|
||||
@ -656,6 +733,10 @@ bees_main(int argc, char *argv[])
|
||||
|
||||
THROW_CHECK1(invalid_argument, argc, argc >= 0);
|
||||
|
||||
// Have to block signals now before we create a bunch of threads
|
||||
// so the threads will also have the signals blocked.
|
||||
block_term_signal();
|
||||
|
||||
// Create a context so we can apply configuration to it
|
||||
shared_ptr<BeesContext> bc = make_shared<BeesContext>();
|
||||
|
||||
@ -813,12 +894,14 @@ bees_main(int argc, char *argv[])
|
||||
// Create a context and start crawlers
|
||||
bc->set_root_path(argv[optind++]);
|
||||
|
||||
BeesThread status_thread("status", [&]() {
|
||||
bc->dump_status();
|
||||
});
|
||||
// Start crawlers
|
||||
bc->start();
|
||||
|
||||
// Now we just wait forever
|
||||
bc->show_progress();
|
||||
wait_for_term_signal();
|
||||
|
||||
// Shut it down
|
||||
bc->stop();
|
||||
|
||||
// That is all.
|
||||
return EXIT_SUCCESS;
|
||||
|
Reference in New Issue
Block a user