mirror of
https://github.com/Zygo/bees.git
synced 2025-06-17 10:06:16 +02:00
context: don't let multiple worker Tasks get stuck on a single extent or inode
When two Tasks attempt to lock the same extent, append the later Task to the earlier Task's post-exec work queue. This will guarantee that all Tasks which attempt to manipulate the same extent will execute sequentially, and free up threads to process other extents. Similarly, if two scanner threads operate on the same inode, any dedupe they perform will lock out other scanner threads in btrfs. Avoid this by serializing Task objects that reference the same file. This does theoretically use an unbounded amount of memory, but in practice a Task that encounters a contended extent or inode quickly stops spawning new Tasks that might increase the queue size, and all Tasks that might contend for the same lock(s) end up on a single FIFO queue. Note that the scope of inode locks is intentionally global, i.e. when an inode is locked, it locks every inode with the same number in every subvol. This avoids significant lock contention and task queue growth when the same inode with the same file extents appear in snapshots. Fixes: https://github.com/Zygo/bees/issues/158 Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
@ -89,7 +89,6 @@ BeesContext::dump_status()
|
||||
for (auto t : BeesNote::get_status()) {
|
||||
ofs << "\ttid " << t.first << ": " << t.second << "\n";
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Huge amount of data, not a lot of information (yet)
|
||||
ofs << "WORKERS:\n";
|
||||
@ -678,7 +677,13 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
return bfr;
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
shared_ptr<Exclusion>
|
||||
BeesContext::get_inode_mutex(const uint64_t inode)
|
||||
{
|
||||
return m_inode_locks(inode);
|
||||
}
|
||||
|
||||
void
|
||||
BeesContext::scan_forward(const BeesFileRange &bfr_in)
|
||||
{
|
||||
BEESTRACE("scan_forward " << bfr_in);
|
||||
@ -689,7 +694,7 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in)
|
||||
// Silently filter out blacklisted files
|
||||
if (is_blacklisted(bfr_in.fid())) {
|
||||
BEESCOUNT(scan_blacklisted);
|
||||
return bfr_in;
|
||||
return;
|
||||
}
|
||||
|
||||
// Reconstitute FD
|
||||
@ -703,31 +708,36 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in)
|
||||
if (!bfr.fd()) {
|
||||
// BEESLOGINFO("No FD in " << root_path() << " for " << bfr);
|
||||
BEESCOUNT(scan_no_fd);
|
||||
return bfr;
|
||||
return;
|
||||
}
|
||||
|
||||
// Sanity check
|
||||
if (bfr.begin() >= bfr.file_size()) {
|
||||
BEESLOGWARN("past EOF: " << bfr);
|
||||
BEESCOUNT(scan_eof);
|
||||
return bfr;
|
||||
return;
|
||||
}
|
||||
|
||||
BtrfsExtentWalker ew(bfr.fd(), bfr.begin(), root_fd());
|
||||
|
||||
BeesFileRange return_bfr(bfr);
|
||||
|
||||
Extent e;
|
||||
bool start_over = false;
|
||||
catch_all([&]() {
|
||||
while (!stop_requested()) {
|
||||
while (!stop_requested() && !start_over) {
|
||||
e = ew.current();
|
||||
|
||||
catch_all([&]() {
|
||||
uint64_t extent_bytenr = e.bytenr();
|
||||
BEESNOTE("waiting for extent bytenr " << to_hex(extent_bytenr));
|
||||
auto extent_lock = m_extent_lock_set.make_lock(extent_bytenr);
|
||||
auto extent_mutex = m_extent_locks(extent_bytenr);
|
||||
const auto extent_lock = extent_mutex->try_lock(Task::current_task());
|
||||
if (!extent_lock) {
|
||||
// BEESLOGDEBUG("Deferring extent bytenr " << to_hex(extent_bytenr) << " from " << bfr);
|
||||
BEESCOUNT(scanf_deferred_extent);
|
||||
start_over = true;
|
||||
return;
|
||||
}
|
||||
Timer one_extent_timer;
|
||||
return_bfr = scan_one_extent(bfr, e);
|
||||
scan_one_extent(bfr, e);
|
||||
BEESCOUNTADD(scanf_extent_ms, one_extent_timer.age() * 1000);
|
||||
BEESCOUNT(scanf_extent);
|
||||
});
|
||||
@ -745,7 +755,7 @@ BeesContext::scan_forward(const BeesFileRange &bfr_in)
|
||||
BEESCOUNTADD(scanf_total_ms, scan_timer.age() * 1000);
|
||||
BEESCOUNT(scanf_total);
|
||||
|
||||
return return_bfr;
|
||||
return;
|
||||
}
|
||||
|
||||
BeesResolveAddrResult::BeesResolveAddrResult()
|
||||
@ -875,6 +885,15 @@ BeesContext::start()
|
||||
BEESLOGNOTICE("Starting bees main loop...");
|
||||
BEESNOTE("starting BeesContext");
|
||||
|
||||
m_extent_locks.func([](uint64_t bytenr) {
|
||||
return make_shared<Exclusion>();
|
||||
(void)bytenr;
|
||||
});
|
||||
m_inode_locks.func([](const uint64_t fid) {
|
||||
return make_shared<Exclusion>();
|
||||
(void)fid;
|
||||
});
|
||||
m_progress_thread = make_shared<BeesThread>("progress_report");
|
||||
m_progress_thread = make_shared<BeesThread>("progress_report");
|
||||
m_status_thread = make_shared<BeesThread>("status_report");
|
||||
m_progress_thread->exec([=]() {
|
||||
|
Reference in New Issue
Block a user