diff --git a/docs/event-counters.md b/docs/event-counters.md index ab226d4..a81094b 100644 --- a/docs/event-counters.md +++ b/docs/event-counters.md @@ -363,6 +363,8 @@ scanf The `scanf` event group consists of operations related to `BeesContext::scan_forward`. This is the entry point where `crawl` schedules new data for scanning. + * `scanf_deferred_extent`: Two tasks attempted to scan the same extent at the same time, so one was deferred. + * `scanf_deferred_inode`: Two tasks attempted to scan the same inode at the same time, so one was deferred. * `scanf_extent`: A btrfs extent item was scanned. * `scanf_extent_ms`: Total thread-seconds spent scanning btrfs extent items. * `scanf_total`: A logical byte range of a file was scanned. diff --git a/src/bees-context.cc b/src/bees-context.cc index c49a903..370f0c2 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -89,7 +89,6 @@ BeesContext::dump_status() for (auto t : BeesNote::get_status()) { ofs << "\ttid " << t.first << ": " << t.second << "\n"; } - #if 0 // Huge amount of data, not a lot of information (yet) ofs << "WORKERS:\n"; @@ -678,7 +677,13 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e) return bfr; } -BeesFileRange +shared_ptr +BeesContext::get_inode_mutex(const uint64_t inode) +{ + return m_inode_locks(inode); +} + +void BeesContext::scan_forward(const BeesFileRange &bfr_in) { BEESTRACE("scan_forward " << bfr_in); @@ -689,7 +694,7 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in) // Silently filter out blacklisted files if (is_blacklisted(bfr_in.fid())) { BEESCOUNT(scan_blacklisted); - return bfr_in; + return; } // Reconstitute FD @@ -703,31 +708,36 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in) if (!bfr.fd()) { // BEESLOGINFO("No FD in " << root_path() << " for " << bfr); BEESCOUNT(scan_no_fd); - return bfr; + return; } // Sanity check if (bfr.begin() >= bfr.file_size()) { BEESLOGWARN("past EOF: " << bfr); BEESCOUNT(scan_eof); - return bfr; + return; } BtrfsExtentWalker ew(bfr.fd(), bfr.begin(), root_fd()); - BeesFileRange return_bfr(bfr); - Extent e; + bool start_over = false; catch_all([&]() { - while (!stop_requested()) { + while (!stop_requested() && !start_over) { e = ew.current(); catch_all([&]() { uint64_t extent_bytenr = e.bytenr(); - BEESNOTE("waiting for extent bytenr " << to_hex(extent_bytenr)); - auto extent_lock = m_extent_lock_set.make_lock(extent_bytenr); + auto extent_mutex = m_extent_locks(extent_bytenr); + const auto extent_lock = extent_mutex->try_lock(Task::current_task()); + if (!extent_lock) { + // BEESLOGDEBUG("Deferring extent bytenr " << to_hex(extent_bytenr) << " from " << bfr); + BEESCOUNT(scanf_deferred_extent); + start_over = true; + return; + } Timer one_extent_timer; - return_bfr = scan_one_extent(bfr, e); + scan_one_extent(bfr, e); BEESCOUNTADD(scanf_extent_ms, one_extent_timer.age() * 1000); BEESCOUNT(scanf_extent); }); @@ -745,7 +755,7 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in) BEESCOUNTADD(scanf_total_ms, scan_timer.age() * 1000); BEESCOUNT(scanf_total); - return return_bfr; + return; } BeesResolveAddrResult::BeesResolveAddrResult() @@ -875,6 +885,15 @@ BeesContext::start() BEESLOGNOTICE("Starting bees main loop..."); BEESNOTE("starting BeesContext"); + m_extent_locks.func([](uint64_t bytenr) { + return make_shared(); + (void)bytenr; + }); + m_inode_locks.func([](const uint64_t fid) { + return make_shared(); + (void)fid; + }); + m_progress_thread = make_shared("progress_report"); m_progress_thread = make_shared("progress_report"); m_status_thread = make_shared("status_report"); m_progress_thread->exec([=]() { diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 10fb64a..1b6afb6 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -243,6 +243,18 @@ BeesFileCrawl::crawl_one_extent() { BEESNOTE("crawl_one_extent m_offset " << to_hex(m_offset) << " state " << m_state); BEESTRACE("crawl_one_extent m_offset " << to_hex(m_offset) << " state " << m_state); + + // Only one thread can dedupe a file. btrfs will lock others out. + // Inodes are usually full of shared extents, especially in the case of snapshots, + // so when we lock an inode, we'll lock the same inode number in all subvols at once. + auto inode_mutex = m_ctx->get_inode_mutex(m_bedf.objectid()); + auto inode_lock = inode_mutex->try_lock(Task::current_task()); + if (!inode_lock) { + BEESCOUNT(scanf_deferred_inode); + // Returning false here means we won't reschedule ourselves, but inode_mutex will do that + return false; + } + // If we hit an exception here we don't try to catch it. // It will mean the file or subvol was deleted or there's metadata corruption, // and we should stop trying to scan the inode in that case. diff --git a/src/bees.h b/src/bees.h index b91aaef..271cbae 100644 --- a/src/bees.h +++ b/src/bees.h @@ -726,7 +726,8 @@ class BeesContext : public enable_shared_from_this { Timer m_total_timer; - LockSet m_extent_lock_set; + NamedPtr m_extent_locks; + NamedPtr m_inode_locks; mutable mutex m_stop_mutex; condition_variable m_stop_condvar; @@ -751,7 +752,7 @@ public: Fd home_fd(); string root_path() const { return m_root_path; } - BeesFileRange scan_forward(const BeesFileRange &bfr); + void scan_forward(const BeesFileRange &bfr); bool is_root_ro(uint64_t root); BeesRangePair dup_extent(const BeesFileRange &src, const shared_ptr &tmpfile); @@ -761,6 +762,8 @@ public: void blacklist_erase(const BeesFileId &fid); bool is_blacklisted(const BeesFileId &fid) const; + shared_ptr get_inode_mutex(uint64_t inode); + BeesResolveAddrResult resolve_addr(BeesAddress addr); void invalidate_addr(BeesAddress addr); @@ -777,7 +780,6 @@ public: shared_ptr tmpfile(); const Timer &total_timer() const { return m_total_timer; } - LockSet &extent_lock_set() { return m_extent_lock_set; } }; class BeesResolver {