From 055c8d4c75941985d326a30513e501ac4fc399c3 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Mon, 15 Jan 2018 23:07:12 -0500 Subject: [PATCH] roots: scan in parallel using Tasks Distribute incoming extents across a thread pool for faster execution on multi-core, multi-disk environments. Switch extent enumeration model to scan extent refs consecutively(ish). Signed-off-by: Zygo Blaxell --- README.md | 22 ++++++++++++++++++ src/bees-context.cc | 17 ++++++++++++-- src/bees-roots.cc | 54 ++++++++++++++++++++++++++++++--------------- src/bees.cc | 36 +++++++++++++++++++++++++----- src/bees.h | 9 +++++--- 5 files changed, 109 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 240973c..a9295b1 100644 --- a/README.md +++ b/README.md @@ -488,6 +488,28 @@ of information about the contents of the filesystem through the log file. There are also some shell wrappers in the `scripts/` directory. + +Command Line Options +-------------------- + +* --thread-count (-c) COUNT + * Specify number of worker threads for scanning. Overrides --thread-factor (-C) + and default/autodetected values. +* --thread-factor (-C) FACTOR + * Specify ratio of worker threads to CPU cores. Overridden by --thread-count (-c). + Default is 1.0, i.e. 1 worker thread per detected CPU. Use values + below 1.0 to leave some cores idle, or above 1.0 if there are more + disks than CPUs in the filesystem. +* --timestamps (-t) + * Enable timestamps in log output. +* --notimestamps (-T) + * Disable timestamps in log output. +* --absolute-paths (-p) + * Paths in log output will be absolute. +* --relative-paths (-P) + * Paths in log output will be relative to the working directory at Bees startup. + + Bug Reports and Contributions ----------------------------- diff --git a/src/bees-context.cc b/src/bees-context.cc index 62f8f05..4c1c324 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -2,6 +2,7 @@ #include "crucible/limits.h" #include "crucible/string.h" +#include "crucible/task.h" #include #include @@ -94,11 +95,19 @@ BeesContext::dump_status() ofs << "RATES:\n"; ofs << "\t" << avg_rates << "\n"; - ofs << "THREADS:\n"; + ofs << "THREADS (work queue " << TaskMaster::get_queue_count() << " tasks):\n"; for (auto t : BeesNote::get_status()) { ofs << "\ttid " << t.first << ": " << t.second << "\n"; } +#if 0 + // Huge amount of data, not a lot of information (yet) + ofs << "WORKERS:\n"; + TaskMaster::print_workers(ofs); + ofs << "QUEUE:\n"; + TaskMaster::print_queue(ofs); +#endif + ofs.close(); BEESNOTE("renaming status file '" << status_file << "'"); @@ -719,6 +728,9 @@ BeesContext::scan_forward(const BeesFileRange &bfr) e = ew.current(); catch_all([&]() { + uint64_t extent_bytenr = e.bytenr(); + BEESNOTE("waiting for extent bytenr " << to_hex(extent_bytenr)); + auto extent_lock = m_extent_lock_set.make_lock(extent_bytenr); Timer one_extent_timer; return_bfr = scan_one_extent(bfr, e); BEESCOUNTADD(scanf_extent_ms, one_extent_timer.age() * 1000); @@ -810,7 +822,8 @@ BeesContext::set_root_fd(Fd fd) m_root_uuid = fsinfo.uuid(); BEESLOG("Filesystem UUID is " << m_root_uuid); - // 65536 is big enough for two max-sized extents + // 65536 is big enough for two max-sized extents. + // Need enough total space in the cache for the maximum number of active threads. m_resolve_cache.max_size(65536); m_resolve_cache.func([&](BeesAddress addr) -> BeesResolveAddrResult { return resolve_addr_uncached(addr); diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 65b5aeb..d793912 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -2,6 +2,7 @@ #include "crucible/cache.h" #include "crucible/string.h" +#include "crucible/task.h" #include #include @@ -197,17 +198,17 @@ BeesRoots::crawl_roots() BEESNOTE("Crawling roots"); unique_lock lock(m_mutex); - if (m_root_crawl_map.empty()) { - BEESNOTE("idle, crawl map is empty"); - m_condvar.wait(lock); - // Don't count the time we were waiting as part of the crawl time - m_crawl_timer.reset(); - } - // Work from a copy because BeesCrawl might change the world under us auto crawl_map_copy = m_root_crawl_map; lock.unlock(); + // Nothing to crawl? Seems suspicious... + if (m_root_crawl_map.empty()) { + BEESLOG("idle: crawl map is empty!"); + } + + auto ctx_copy = m_ctx; + #if 0 // Scan the same inode/offset tuple in each subvol (good for snapshots) BeesFileRange first_range; @@ -224,10 +225,13 @@ BeesRoots::crawl_roots() } if (first_range) { - catch_all([&]() { + Task([ctx_copy, first_range]() { // BEESINFO("scan_forward " << first_range); - m_ctx->scan_forward(first_range); - }); + ctx_copy->scan_forward(first_range); + }, + [first_range](ostream &os) -> ostream & { + return os << "scan_forward " << first_range; + }).run(); BEESCOUNT(crawl_scan); m_crawl_current = first_crawl->get_state(); auto first_range_popped = first_crawl->pop_front(); @@ -241,10 +245,13 @@ BeesRoots::crawl_roots() auto this_crawl = i.second; auto this_range = this_crawl->peek_front(); if (this_range) { - catch_all([&]() { + Task([ctx_copy, this_range]() { // BEESINFO("scan_forward " << this_range); - m_ctx->scan_forward(this_range); - }); + ctx_copy->scan_forward(this_range); + }, + [this_range](ostream &os) -> ostream & { + return os << "scan_forward " << this_range; + }).run(); crawled = true; BEESCOUNT(crawl_scan); m_crawl_current = this_crawl->get_state(); @@ -269,12 +276,23 @@ BeesRoots::crawl_roots() void BeesRoots::crawl_thread() { + // TODO: get rid of the thread. For now it is a convenient + // way to avoid the weird things that happen when you try to + // shared_from_this() in a constructor. BEESNOTE("crawling"); - while (1) { - catch_all([&]() { - crawl_roots(); - }); - } + auto shared_this = shared_from_this(); + Task([shared_this]() { + auto tqs = TaskMaster::get_queue_count(); + while (tqs < BEES_MAX_QUEUE_SIZE) { + // BEESLOG("Task queue size " << tqs << ", crawling..."); + catch_all([&]() { + shared_this->crawl_roots(); + }); + tqs = TaskMaster::get_queue_count(); + } + BEESLOG("Task queue size " << tqs << ", paused"); + Task::current_task().run(); + }, [](ostream &os) -> ostream& { return os << "crawl task"; }).run(); } void diff --git a/src/bees.cc b/src/bees.cc index aa47609..36ed4e4 100644 --- a/src/bees.cc +++ b/src/bees.cc @@ -3,6 +3,7 @@ #include "crucible/limits.h" #include "crucible/process.h" #include "crucible/string.h" +#include "crucible/task.h" #include #include @@ -39,6 +40,8 @@ do_cmd_help(char *argv[]) "\n" "Options:\n" "\t-h, --help\t\tShow this help\n" + "\t-c, --thread-count\tWorker thread count (default CPU count * factor)\n" + "\t-C, --thread-factor\tWorker thread factor (default " << BEES_DEFAULT_THREAD_FACTOR << ")\n" "\t-t, --timestamps\tShow timestamps in log output (default)\n" "\t-T, --notimestamps\tOmit timestamps in log output\n" "\t-p, --absolute-paths\tShow absolute paths (default)\n" @@ -613,25 +616,35 @@ bees_main(int argc, char *argv[]) // Defaults bool chatter_prefix_timestamp = true; + double thread_factor = 0; + unsigned thread_count = 0; // Parse options int c; while (1) { int option_index = 0; static struct option long_options[] = { - { "timestamps", no_argument, NULL, 't' }, - { "notimestamps", no_argument, NULL, 'T' }, - { "absolute-paths", no_argument, NULL, 'p' }, - { "relative-paths", no_argument, NULL, 'P' }, - { "help", no_argument, NULL, 'h' } + { "thread-count", required_argument, NULL, 'c' }, + { "thread-factor", required_argument, NULL, 'C' }, + { "timestamps", no_argument, NULL, 't' }, + { "notimestamps", no_argument, NULL, 'T' }, + { "absolute-paths", no_argument, NULL, 'p' }, + { "relative-paths", no_argument, NULL, 'P' }, + { "help", no_argument, NULL, 'h' } }; - c = getopt_long(argc, argv, "TtPph", long_options, &option_index); + c = getopt_long(argc, argv, "c:C:TtPph", long_options, &option_index); if (-1 == c) { break; } switch (c) { + case 'c': + thread_count = stoul(optarg); + break; + case 'C': + thread_factor = stod(optarg); + break; case 'T': chatter_prefix_timestamp = false; break; @@ -668,6 +681,17 @@ bees_main(int argc, char *argv[]) BEESLOG("setrlimit(RLIMIT_NOFILE, { " << lim.rlim_cur << " }): " << strerror(errno)); }; + // Set up worker thread pool + THROW_CHECK1(out_of_range, thread_factor, thread_factor >= 0); + if (thread_count < 1) { + if (thread_factor == 0) { + thread_factor = BEES_DEFAULT_THREAD_FACTOR; + } + thread_count = max(1U, static_cast(ceil(thread::hardware_concurrency() * thread_factor))); + } + + TaskMaster::set_thread_count(thread_count); + // Create a context and start crawlers bool did_subscription = false; while (optind < argc) { diff --git a/src/bees.h b/src/bees.h index cdbc34e..2850ba2 100644 --- a/src/bees.h +++ b/src/bees.h @@ -82,8 +82,8 @@ const size_t BEES_ROOT_FD_CACHE_SIZE = 1024; // Number of FDs to open (rlimit) const size_t BEES_OPEN_FILE_LIMIT = (BEES_FILE_FD_CACHE_SIZE + BEES_ROOT_FD_CACHE_SIZE) * 2 + 100; -// Worker thread limit (more threads may be created, but only this number will be active concurrently) -const size_t BEES_WORKER_THREAD_LIMIT = 128; +// Worker thread factor (multiplied by detected number of CPU cores) +const double BEES_DEFAULT_THREAD_FACTOR = 1.0; // Log warnings when an operation takes too long const double BEES_TOO_LONG = 2.5; @@ -516,7 +516,7 @@ public: void set_state(const BeesCrawlState &bcs); }; -class BeesRoots { +class BeesRoots : public enable_shared_from_this { shared_ptr m_ctx; BeesStringFile m_crawl_state_file; @@ -677,6 +677,8 @@ class BeesContext : public enable_shared_from_this { Timer m_total_timer; + LockSet m_extent_lock_set; + void set_root_fd(Fd fd); BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr); @@ -714,6 +716,7 @@ public: shared_ptr tmpfile(); const Timer &total_timer() const { return m_total_timer; } + LockSet &extent_lock_set() { return m_extent_lock_set; } // TODO: move the rest of the FD cache methods here void insert_root_ino(Fd fd);