1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 21:35:45 +02:00

context: don't let multiple worker Tasks get stuck on a single extent or inode

When two Tasks attempt to lock the same extent, append the later Task
to the earlier Task's post-exec work queue.  This will guarantee that
all Tasks which attempt to manipulate the same extent will execute
sequentially, and free up threads to process other extents.

Similarly, if two scanner threads operate on the same inode, any dedupe
they perform will lock out other scanner threads in btrfs.  Avoid this
by serializing Task objects that reference the same file.

This does theoretically use an unbounded amount of memory, but in practice
a Task that encounters a contended extent or inode quickly stops spawning
new Tasks that might increase the queue size, and all Tasks that might
contend for the same lock(s) end up on a single FIFO queue.

Note that the scope of inode locks is intentionally global, i.e. when
an inode is locked, it locks every inode with the same number in every
subvol.  This avoids significant lock contention and task queue growth
when the same inode with the same file extents appear in snapshots.

Fixes: https://github.com/Zygo/bees/issues/158
Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2022-11-20 12:01:16 -05:00
parent 31d26bcfc6
commit 84f91af503
4 changed files with 50 additions and 15 deletions

View File

@ -363,6 +363,8 @@ scanf
The `scanf` event group consists of operations related to `BeesContext::scan_forward`. This is the entry point where `crawl` schedules new data for scanning.
* `scanf_deferred_extent`: Two tasks attempted to scan the same extent at the same time, so one was deferred.
* `scanf_deferred_inode`: Two tasks attempted to scan the same inode at the same time, so one was deferred.
* `scanf_extent`: A btrfs extent item was scanned.
* `scanf_extent_ms`: Total thread-seconds spent scanning btrfs extent items.
* `scanf_total`: A logical byte range of a file was scanned.

View File

@ -89,7 +89,6 @@ BeesContext::dump_status()
for (auto t : BeesNote::get_status()) {
ofs << "\ttid " << t.first << ": " << t.second << "\n";
}
#if 0
// Huge amount of data, not a lot of information (yet)
ofs << "WORKERS:\n";
@ -678,7 +677,13 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
return bfr;
}
BeesFileRange
shared_ptr<Exclusion>
BeesContext::get_inode_mutex(const uint64_t inode)
{
return m_inode_locks(inode);
}
void
BeesContext::scan_forward(const BeesFileRange &bfr_in)
{
BEESTRACE("scan_forward " << bfr_in);
@ -689,7 +694,7 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in)
// Silently filter out blacklisted files
if (is_blacklisted(bfr_in.fid())) {
BEESCOUNT(scan_blacklisted);
return bfr_in;
return;
}
// Reconstitute FD
@ -703,31 +708,36 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in)
if (!bfr.fd()) {
// BEESLOGINFO("No FD in " << root_path() << " for " << bfr);
BEESCOUNT(scan_no_fd);
return bfr;
return;
}
// Sanity check
if (bfr.begin() >= bfr.file_size()) {
BEESLOGWARN("past EOF: " << bfr);
BEESCOUNT(scan_eof);
return bfr;
return;
}
BtrfsExtentWalker ew(bfr.fd(), bfr.begin(), root_fd());
BeesFileRange return_bfr(bfr);
Extent e;
bool start_over = false;
catch_all([&]() {
while (!stop_requested()) {
while (!stop_requested() && !start_over) {
e = ew.current();
catch_all([&]() {
uint64_t extent_bytenr = e.bytenr();
BEESNOTE("waiting for extent bytenr " << to_hex(extent_bytenr));
auto extent_lock = m_extent_lock_set.make_lock(extent_bytenr);
auto extent_mutex = m_extent_locks(extent_bytenr);
const auto extent_lock = extent_mutex->try_lock(Task::current_task());
if (!extent_lock) {
// BEESLOGDEBUG("Deferring extent bytenr " << to_hex(extent_bytenr) << " from " << bfr);
BEESCOUNT(scanf_deferred_extent);
start_over = true;
return;
}
Timer one_extent_timer;
return_bfr = scan_one_extent(bfr, e);
scan_one_extent(bfr, e);
BEESCOUNTADD(scanf_extent_ms, one_extent_timer.age() * 1000);
BEESCOUNT(scanf_extent);
});
@ -745,7 +755,7 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in)
BEESCOUNTADD(scanf_total_ms, scan_timer.age() * 1000);
BEESCOUNT(scanf_total);
return return_bfr;
return;
}
BeesResolveAddrResult::BeesResolveAddrResult()
@ -875,6 +885,15 @@ BeesContext::start()
BEESLOGNOTICE("Starting bees main loop...");
BEESNOTE("starting BeesContext");
m_extent_locks.func([](uint64_t bytenr) {
return make_shared<Exclusion>();
(void)bytenr;
});
m_inode_locks.func([](const uint64_t fid) {
return make_shared<Exclusion>();
(void)fid;
});
m_progress_thread = make_shared<BeesThread>("progress_report");
m_progress_thread = make_shared<BeesThread>("progress_report");
m_status_thread = make_shared<BeesThread>("status_report");
m_progress_thread->exec([=]() {

View File

@ -243,6 +243,18 @@ BeesFileCrawl::crawl_one_extent()
{
BEESNOTE("crawl_one_extent m_offset " << to_hex(m_offset) << " state " << m_state);
BEESTRACE("crawl_one_extent m_offset " << to_hex(m_offset) << " state " << m_state);
// Only one thread can dedupe a file. btrfs will lock others out.
// Inodes are usually full of shared extents, especially in the case of snapshots,
// so when we lock an inode, we'll lock the same inode number in all subvols at once.
auto inode_mutex = m_ctx->get_inode_mutex(m_bedf.objectid());
auto inode_lock = inode_mutex->try_lock(Task::current_task());
if (!inode_lock) {
BEESCOUNT(scanf_deferred_inode);
// Returning false here means we won't reschedule ourselves, but inode_mutex will do that
return false;
}
// If we hit an exception here we don't try to catch it.
// It will mean the file or subvol was deleted or there's metadata corruption,
// and we should stop trying to scan the inode in that case.

View File

@ -726,7 +726,8 @@ class BeesContext : public enable_shared_from_this<BeesContext> {
Timer m_total_timer;
LockSet<uint64_t> m_extent_lock_set;
NamedPtr<Exclusion, uint64_t> m_extent_locks;
NamedPtr<Exclusion, uint64_t> m_inode_locks;
mutable mutex m_stop_mutex;
condition_variable m_stop_condvar;
@ -751,7 +752,7 @@ public:
Fd home_fd();
string root_path() const { return m_root_path; }
BeesFileRange scan_forward(const BeesFileRange &bfr);
void scan_forward(const BeesFileRange &bfr);
bool is_root_ro(uint64_t root);
BeesRangePair dup_extent(const BeesFileRange &src, const shared_ptr<BeesTempFile> &tmpfile);
@ -761,6 +762,8 @@ public:
void blacklist_erase(const BeesFileId &fid);
bool is_blacklisted(const BeesFileId &fid) const;
shared_ptr<Exclusion> get_inode_mutex(uint64_t inode);
BeesResolveAddrResult resolve_addr(BeesAddress addr);
void invalidate_addr(BeesAddress addr);
@ -777,7 +780,6 @@ public:
shared_ptr<BeesTempFile> tmpfile();
const Timer &total_timer() const { return m_total_timer; }
LockSet<uint64_t> &extent_lock_set() { return m_extent_lock_set; }
};
class BeesResolver {