diff --git a/src/bees-roots.cc b/src/bees-roots.cc index d793912..40f90eb 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -1,6 +1,7 @@ #include "bees.h" #include "crucible/cache.h" +#include "crucible/ntoa.h" #include "crucible/string.h" #include "crucible/task.h" @@ -10,6 +11,8 @@ using namespace crucible; using namespace std; +BeesRoots::ScanMode BeesRoots::s_scan_mode = BeesRoots::SCAN_MODE_ZERO; + string format_time(time_t t) { @@ -47,6 +50,26 @@ BeesCrawlState::operator<(const BeesCrawlState &that) const < tie(that.m_objectid, that.m_offset, that.m_root, that.m_min_transid, that.m_max_transid); } +string +BeesRoots::scan_mode_ntoa(BeesRoots::ScanMode mode) +{ + static const bits_ntoa_table table[] = { + NTOA_TABLE_ENTRY_ENUM(SCAN_MODE_ZERO), + NTOA_TABLE_ENTRY_ENUM(SCAN_MODE_ONE), + NTOA_TABLE_ENTRY_ENUM(SCAN_MODE_COUNT), + NTOA_TABLE_ENTRY_END() + }; + return bits_ntoa(mode, table); +} + +void +BeesRoots::set_scan_mode(ScanMode mode) +{ + THROW_CHECK1(invalid_argument, mode, mode < SCAN_MODE_COUNT); + s_scan_mode = mode; + BEESLOG("Scan mode set to " << mode << " (" << scan_mode_ntoa(mode) << ")"); +} + string BeesRoots::crawl_state_filename() const { @@ -209,59 +232,75 @@ BeesRoots::crawl_roots() auto ctx_copy = m_ctx; -#if 0 - // Scan the same inode/offset tuple in each subvol (good for snapshots) - BeesFileRange first_range; - shared_ptr first_crawl; - for (auto i : crawl_map_copy) { - auto this_crawl = i.second; - auto this_range = this_crawl->peek_front(); - if (this_range) { - if (!first_range || this_range < first_range) { - first_crawl = this_crawl; - first_range = this_range; + switch (s_scan_mode) { + case SCAN_MODE_ZERO: { + // Scan the same inode/offset tuple in each subvol (good for snapshots) + BeesFileRange first_range; + shared_ptr first_crawl; + for (auto i : crawl_map_copy) { + auto this_crawl = i.second; + auto this_range = this_crawl->peek_front(); + if (this_range) { + if (!first_range || this_range < first_range) { + first_crawl = this_crawl; + first_range = this_range; + } + } } - } - } - if (first_range) { - Task([ctx_copy, first_range]() { - // BEESINFO("scan_forward " << first_range); - ctx_copy->scan_forward(first_range); - }, - [first_range](ostream &os) -> ostream & { - return os << "scan_forward " << first_range; - }).run(); - BEESCOUNT(crawl_scan); - m_crawl_current = first_crawl->get_state(); - auto first_range_popped = first_crawl->pop_front(); - THROW_CHECK2(runtime_error, first_range, first_range_popped, first_range == first_range_popped); - return; - } -#else - // Scan each subvol one extent at a time (good for continuous forward progress) - bool crawled = false; - for (auto i : crawl_map_copy) { - auto this_crawl = i.second; - auto this_range = this_crawl->peek_front(); - if (this_range) { - Task([ctx_copy, this_range]() { - // BEESINFO("scan_forward " << this_range); - ctx_copy->scan_forward(this_range); - }, - [this_range](ostream &os) -> ostream & { - return os << "scan_forward " << this_range; - }).run(); - crawled = true; - BEESCOUNT(crawl_scan); - m_crawl_current = this_crawl->get_state(); - auto this_range_popped = this_crawl->pop_front(); - THROW_CHECK2(runtime_error, this_range, this_range_popped, this_range == this_range_popped); - } - } + size_t batch_count = 0; + while (first_range && batch_count < BEES_MAX_CRAWL_BATCH) { + Task([ctx_copy, first_range]() { + BEESNOTE("scan_forward " << first_range); + ctx_copy->scan_forward(first_range); + }, + [first_range](ostream &os) -> ostream & { + return os << "scan_forward " << first_range; + }).run(); + BEESCOUNT(crawl_scan); + m_crawl_current = first_crawl->get_state(); + auto first_range_popped = first_crawl->pop_front(); + THROW_CHECK2(runtime_error, first_range, first_range_popped, first_range == first_range_popped); + first_range = first_crawl->peek_front(); + ++batch_count; + } - if (crawled) return; -#endif + if (first_range || batch_count) { + return; + } + + break; + } + case SCAN_MODE_ONE: { + // Scan each subvol one extent at a time (good for continuous forward progress) + bool crawled = false; + for (auto i : crawl_map_copy) { + auto this_crawl = i.second; + auto this_range = this_crawl->peek_front(); + size_t batch_count = 0; + while (this_range && batch_count < BEES_MAX_CRAWL_BATCH) { + Task([ctx_copy, this_range]() { + BEESNOTE("scan_forward " << this_range); + ctx_copy->scan_forward(this_range); + }, + [this_range](ostream &os) -> ostream & { + return os << "scan_forward " << this_range; + }).run(); + crawled = true; + BEESCOUNT(crawl_scan); + m_crawl_current = this_crawl->get_state(); + auto this_range_popped = this_crawl->pop_front(); + THROW_CHECK2(runtime_error, this_range, this_range_popped, this_range == this_range_popped); + this_range = this_crawl->peek_front(); + ++batch_count; + } + } + + if (crawled) return; + break; + } + case SCAN_MODE_COUNT: assert(false); break; + } BEESLOG("Crawl ran out of data after " << m_crawl_timer.lap() << "s, waiting for more..."); BEESCOUNT(crawl_done); @@ -283,14 +322,13 @@ BeesRoots::crawl_thread() auto shared_this = shared_from_this(); Task([shared_this]() { auto tqs = TaskMaster::get_queue_count(); + BEESNOTE("queueing extents to scan, " << tqs << " of " << BEES_MAX_QUEUE_SIZE); while (tqs < BEES_MAX_QUEUE_SIZE) { - // BEESLOG("Task queue size " << tqs << ", crawling..."); catch_all([&]() { shared_this->crawl_roots(); }); tqs = TaskMaster::get_queue_count(); } - BEESLOG("Task queue size " << tqs << ", paused"); Task::current_task().run(); }, [](ostream &os) -> ostream& { return os << "crawl task"; }).run(); } diff --git a/src/bees.cc b/src/bees.cc index 36ed4e4..89085a5 100644 --- a/src/bees.cc +++ b/src/bees.cc @@ -42,6 +42,7 @@ do_cmd_help(char *argv[]) "\t-h, --help\t\tShow this help\n" "\t-c, --thread-count\tWorker thread count (default CPU count * factor)\n" "\t-C, --thread-factor\tWorker thread factor (default " << BEES_DEFAULT_THREAD_FACTOR << ")\n" + "\t-m, --scan-mode\tScanning mode (0..1, default 0)\n" "\t-t, --timestamps\tShow timestamps in log output (default)\n" "\t-T, --notimestamps\tOmit timestamps in log output\n" "\t-p, --absolute-paths\tShow absolute paths (default)\n" @@ -626,6 +627,7 @@ bees_main(int argc, char *argv[]) static struct option long_options[] = { { "thread-count", required_argument, NULL, 'c' }, { "thread-factor", required_argument, NULL, 'C' }, + { "scan-mode", required_argument, NULL, 'm' }, { "timestamps", no_argument, NULL, 't' }, { "notimestamps", no_argument, NULL, 'T' }, { "absolute-paths", no_argument, NULL, 'p' }, @@ -633,7 +635,7 @@ bees_main(int argc, char *argv[]) { "help", no_argument, NULL, 'h' } }; - c = getopt_long(argc, argv, "c:C:TtPph", long_options, &option_index); + c = getopt_long(argc, argv, "c:C:m:TtPph", long_options, &option_index); if (-1 == c) { break; } @@ -645,6 +647,9 @@ bees_main(int argc, char *argv[]) case 'C': thread_factor = stod(optarg); break; + case 'm': + BeesRoots::set_scan_mode(static_cast(stoul(optarg))); + break; case 'T': chatter_prefix_timestamp = false; break; diff --git a/src/bees.h b/src/bees.h index 2850ba2..109872d 100644 --- a/src/bees.h +++ b/src/bees.h @@ -86,7 +86,7 @@ const size_t BEES_OPEN_FILE_LIMIT = (BEES_FILE_FD_CACHE_SIZE + BEES_ROOT_FD_CACH const double BEES_DEFAULT_THREAD_FACTOR = 1.0; // Log warnings when an operation takes too long -const double BEES_TOO_LONG = 2.5; +const double BEES_TOO_LONG = 5.0; // Avoid any extent where LOGICAL_INO takes this long // const double BEES_TOXIC_DURATION = 9.9; @@ -100,11 +100,14 @@ const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL; const double BEES_INFO_RATE = 10.0; const double BEES_INFO_BURST = 1.0; -// After we have this many events queued, wait -const size_t BEES_MAX_QUEUE_SIZE = 1024; +// Stop growing the work queue after we have this many tasks queued +const size_t BEES_MAX_QUEUE_SIZE = 128; // Read this many items at a time in SEARCHv2 -const size_t BEES_MAX_CRAWL_SIZE = 4096; +const size_t BEES_MAX_CRAWL_SIZE = 1024; + +// Insert this many items before switching to a new subvol +const size_t BEES_MAX_CRAWL_BATCH = 128; // If an extent has this many refs, pretend it does not exist // to avoid a crippling btrfs performance bug @@ -555,6 +558,21 @@ public: Fd open_root(uint64_t root); Fd open_root_ino(uint64_t root, uint64_t ino); Fd open_root_ino(const BeesFileId &bfi) { return open_root_ino(bfi.root(), bfi.ino()); } + + // TODO: think of better names for these. + // or TODO: do extent-tree scans instead + enum ScanMode { + SCAN_MODE_ZERO, + SCAN_MODE_ONE, + SCAN_MODE_COUNT, // must be last + }; + + static void set_scan_mode(ScanMode new_mode); + +private: + static ScanMode s_scan_mode; + static string scan_mode_ntoa(ScanMode new_mode); + }; struct BeesHash {