mirror of
https://github.com/Zygo/bees.git
synced 2025-05-17 21:35:45 +02:00
context: wait for btrfs send to finish, then try dedupe again
Dedupe is not possible on a subvol where a btrfs send is running: BTRFS warning (device dm-22): cannot deduplicate to root 259417 while send operations are using it (1 in progress) btrfs informs a process with EAGAIN that a dedupe could not be performed due to a running send operation. It would be possible to save the crawler state at the affected point, fork a new crawler that avoids the subvol under send, and resume the crawler state after a successful dedupe is detected; however, this only helps the intersection of the set of users who have unrelated subvols that don't share extents, and the set of users who cannot simply delay dedupe until send is finished. The simplest approach is to simply stop and wait until the send goes away. The simplest approach is taken here. When a dedupe fails with EAGAIN, affected Tasks will poll, approximately once per transaction, until the dedupe succeeds or fails with a different error. bees dedupe performance corresponds with the availability of subvols that can accept dedupe requests. While the dedupe is paused, no new Tasks can be performed by the worker thread. If subvols are small and isolated from the bulk of the filesystem data, the result will be a small but partial loss of dedupe performance during the send as some worker threads get stuck on the sending subvol. If subvols heavily share extents with duplicate data in other subvols, worker threads will all become blocked, and the entire bees process will pause until at least some of the running sends terminate. During the polling for btrfs send, the dedupe Task will hold its dst file open. This open FD won't interfere with snapshot or file delete because send subvols are always read-only (it is not possible to delete a file on a RO subvol, open or otherwise) and send itself holds the affected subvol open, preventing its deletion. Once the send terminates, the dedupe will terminate soon after, and the normal FD release can occur. This pausing during btrfs send is unrelated to the `--workaround-btrfs-send` option, although `--workaround-btrfs-send` will cause the pausing to trigger less often. It applies to all scan modes. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
parent
bb09b1ab0e
commit
08fe145988
@ -243,25 +243,36 @@ BeesContext::dedup(const BeesRangePair &brp_in)
|
|||||||
BEESNOTE("waiting to dedup " << brp);
|
BEESNOTE("waiting to dedup " << brp);
|
||||||
const auto lock = MultiLocker::get_lock("dedupe");
|
const auto lock = MultiLocker::get_lock("dedupe");
|
||||||
|
|
||||||
Timer dedup_timer;
|
|
||||||
|
|
||||||
BEESLOGINFO("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd()) << "\n"
|
BEESLOGINFO("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd()) << "\n"
|
||||||
<< " dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
|
<< " dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
|
||||||
BEESNOTE("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd()) << "\n"
|
BEESNOTE("dedup: src " << pretty(brp.first.size()) << " [" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "] {" << first_addr << "} " << name_fd(brp.first.fd()) << "\n"
|
||||||
<< " dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
|
<< " dst " << pretty(brp.second.size()) << " [" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "] {" << second_addr << "} " << name_fd(brp.second.fd()));
|
||||||
|
|
||||||
const bool rv = btrfs_extent_same(brp.first.fd(), brp.first.begin(), brp.first.size(), brp.second.fd(), brp.second.begin());
|
while (true) {
|
||||||
BEESCOUNTADD(dedup_ms, dedup_timer.age() * 1000);
|
try {
|
||||||
|
Timer dedup_timer;
|
||||||
|
const bool rv = btrfs_extent_same(brp.first.fd(), brp.first.begin(), brp.first.size(), brp.second.fd(), brp.second.begin());
|
||||||
|
BEESCOUNTADD(dedup_ms, dedup_timer.age() * 1000);
|
||||||
|
|
||||||
if (rv) {
|
if (rv) {
|
||||||
BEESCOUNT(dedup_hit);
|
BEESCOUNT(dedup_hit);
|
||||||
BEESCOUNTADD(dedup_bytes, brp.first.size());
|
BEESCOUNTADD(dedup_bytes, brp.first.size());
|
||||||
} else {
|
} else {
|
||||||
BEESCOUNT(dedup_miss);
|
BEESCOUNT(dedup_miss);
|
||||||
BEESLOGWARN("NO Dedup! " << brp);
|
BEESLOGWARN("NO Dedup! " << brp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rv;
|
||||||
|
} catch (const std::system_error &e) {
|
||||||
|
if (e.code().value() == EAGAIN) {
|
||||||
|
BEESNOTE("dedup waiting for btrfs send on " << brp.second);
|
||||||
|
BEESLOGDEBUG("dedup waiting for btrfs send on " << brp.second);
|
||||||
|
roots()->wait_for_transid(1);
|
||||||
|
} else {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return rv;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BeesRangePair
|
BeesRangePair
|
||||||
|
@ -1479,6 +1479,15 @@ BeesRoots::clear_caches()
|
|||||||
m_ctx->resolve_cache_clear();
|
m_ctx->resolve_cache_clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BeesRoots::wait_for_transid(const uint64_t count)
|
||||||
|
{
|
||||||
|
const auto now_transid = transid_max_nocache();
|
||||||
|
const auto target_transid = now_transid + count;
|
||||||
|
BEESLOGDEBUG("Waiting for transid " << target_transid << ", current transid is " << now_transid);
|
||||||
|
m_transid_re.wait_until(target_transid);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BeesRoots::crawl_thread()
|
BeesRoots::crawl_thread()
|
||||||
{
|
{
|
||||||
|
@ -552,6 +552,8 @@ class BeesRoots : public enable_shared_from_this<BeesRoots> {
|
|||||||
BeesThread m_writeback_thread;
|
BeesThread m_writeback_thread;
|
||||||
bool m_workaround_btrfs_send = false;
|
bool m_workaround_btrfs_send = false;
|
||||||
|
|
||||||
|
RateEstimator m_transid_re;
|
||||||
|
|
||||||
shared_ptr<BeesScanMode> m_scanner;
|
shared_ptr<BeesScanMode> m_scanner;
|
||||||
|
|
||||||
mutex m_tmpfiles_mutex;
|
mutex m_tmpfiles_mutex;
|
||||||
@ -613,6 +615,8 @@ public:
|
|||||||
|
|
||||||
uint64_t transid_min();
|
uint64_t transid_min();
|
||||||
uint64_t transid_max();
|
uint64_t transid_max();
|
||||||
|
|
||||||
|
void wait_for_transid(const uint64_t count);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BeesHash {
|
struct BeesHash {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user