1
0
mirror of https://github.com/Zygo/bees.git synced 2025-08-03 06:13:29 +02:00

5 Commits

Author SHA1 Message Date
Zygo Blaxell
3430f16998 context: create a Pool of BtrfsIoctlLogicalInoArgs objects
Each object contains a 16 MiB buffer, which is very heavy for some
malloc implementations.

Keep the objects in a Pool so that their buffers are only allocated and
deallocated once in the process lifetime.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2023-02-23 22:45:31 -05:00
Zygo Blaxell
7c764a73c8 fs: allow BtrfsIoctlLogicalInoArgs to be reused, remove virtual methods
Some malloc implementations will try to mmap() and munmap() large buffers
every time they are used, causing a severe loss of performance.

Nothing ever overrode the virtual methods, and there was no virtual
destructor, so they cause compiler warnings at build time when used with
a template that tries to delete pointers to them.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2023-02-23 22:40:12 -05:00
Zygo Blaxell
a9a5cd03a5 ProgressTracker: reduce memory usage with long-running work items
ProgressTracker was only freeing memory for work items when they reach
the head of the work tracking queue.  If the first work item takes
hours to complete, and thousands of items are processed every second,
this leads to millions of completed items tracked in memory at a time,
wasting gigabytes of system RAM.

Rewrite ProgressHolderState methods to keep only incomplete work items
in memory, regardless of the order in which they are added or removed.

Also fix the unit tests which were relying on the memory leak to work,
and add test cases for code coverage.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2023-02-23 22:33:35 -05:00
Zygo Blaxell
299509ce32 seeker: fix the test for ILP32 platforms
Not sure what I was thinking, but the argument here should clearly
be uint64_t.

Fixes: https://github.com/Zygo/bees/issues/248
Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2023-02-20 11:30:56 -05:00
Zygo Blaxell
d5a99c2f5e roots: don't share a RootFetcher between threads
If the send workaround is enabled, it is possible for two threads (a
thread running the crawl_new task, and a thread attempting to apply the
send workaround) to access the same RootFetcher object at the same time.
That never ends well.

Give each function its own BtrfsRootFetcher object.

Fixes: https://github.com/Zygo/bees/issues/250
Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2023-02-20 11:14:34 -05:00
8 changed files with 108 additions and 34 deletions

View File

@@ -69,9 +69,11 @@ namespace crucible {
uint64_t get_flags() const; uint64_t get_flags() const;
void set_flags(uint64_t new_flags); void set_flags(uint64_t new_flags);
void set_logical(uint64_t new_logical);
void set_size(uint64_t new_size);
virtual void do_ioctl(int fd); void do_ioctl(int fd);
virtual bool do_ioctl_nothrow(int fd); bool do_ioctl_nothrow(int fd);
struct BtrfsInodeOffsetRootSpan { struct BtrfsInodeOffsetRootSpan {
using iterator = BtrfsInodeOffsetRoot*; using iterator = BtrfsInodeOffsetRoot*;

View File

@@ -4,13 +4,20 @@
#include "crucible/error.h" #include "crucible/error.h"
#include <functional> #include <functional>
#include <map>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include <set>
#include <cassert>
namespace crucible { namespace crucible {
using namespace std; using namespace std;
/// A class to track progress of multiple workers using only two points:
/// the first and last incomplete state. The first incomplete
/// state can be recorded as a checkpoint to resume later on.
/// The last completed state is the starting point for workers that
/// need something to do.
template <class T> template <class T>
class ProgressTracker { class ProgressTracker {
struct ProgressTrackerState; struct ProgressTrackerState;
@@ -19,8 +26,16 @@ namespace crucible {
using value_type = T; using value_type = T;
using ProgressHolder = shared_ptr<ProgressHolderState>; using ProgressHolder = shared_ptr<ProgressHolderState>;
/// Create ProgressTracker with initial begin and end state 'v'.
ProgressTracker(const value_type &v); ProgressTracker(const value_type &v);
/// The first incomplete state. This is not "sticky",
/// it will revert to the end state if there are no
/// items in progress.
value_type begin() const; value_type begin() const;
/// The last incomplete state. This is "sticky",
/// it can only increase and never decrease.
value_type end() const; value_type end() const;
ProgressHolder hold(const value_type &v); ProgressHolder hold(const value_type &v);
@@ -31,7 +46,7 @@ namespace crucible {
struct ProgressTrackerState { struct ProgressTrackerState {
using key_type = pair<value_type, ProgressHolderState *>; using key_type = pair<value_type, ProgressHolderState *>;
mutex m_mutex; mutex m_mutex;
map<key_type, bool> m_in_progress; set<key_type> m_in_progress;
value_type m_begin; value_type m_begin;
value_type m_end; value_type m_end;
}; };
@@ -39,6 +54,7 @@ namespace crucible {
class ProgressHolderState { class ProgressHolderState {
shared_ptr<ProgressTrackerState> m_state; shared_ptr<ProgressTrackerState> m_state;
const value_type m_value; const value_type m_value;
using key_type = typename ProgressTrackerState::key_type;
public: public:
ProgressHolderState(shared_ptr<ProgressTrackerState> state, const value_type &v); ProgressHolderState(shared_ptr<ProgressTrackerState> state, const value_type &v);
~ProgressHolderState(); ~ProgressHolderState();
@@ -86,7 +102,11 @@ namespace crucible {
m_value(v) m_value(v)
{ {
unique_lock<mutex> lock(m_state->m_mutex); unique_lock<mutex> lock(m_state->m_mutex);
m_state->m_in_progress[make_pair(m_value, this)] = true; const auto rv = m_state->m_in_progress.insert(key_type(m_value, this));
THROW_CHECK1(runtime_error, m_value, rv.second);
// Set the beginning to the first existing in-progress item
m_state->m_begin = m_state->m_in_progress.begin()->first;
// If this value is past the end, move the end, but don't go backwards
if (m_state->m_end < m_value) { if (m_state->m_end < m_value) {
m_state->m_end = m_value; m_state->m_end = m_value;
} }
@@ -96,17 +116,15 @@ namespace crucible {
ProgressTracker<T>::ProgressHolderState::~ProgressHolderState() ProgressTracker<T>::ProgressHolderState::~ProgressHolderState()
{ {
unique_lock<mutex> lock(m_state->m_mutex); unique_lock<mutex> lock(m_state->m_mutex);
m_state->m_in_progress[make_pair(m_value, this)] = false; const auto rv = m_state->m_in_progress.erase(key_type(m_value, this));
auto p = m_state->m_in_progress.begin(); // THROW_CHECK2(runtime_error, m_value, rv, rv == 1);
while (p != m_state->m_in_progress.end()) { assert(rv == 1);
if (p->second) { if (m_state->m_in_progress.empty()) {
break; // If we made the list empty, then m_begin == m_end
} m_state->m_begin = m_state->m_end;
if (m_state->m_begin < p->first.first) { } else {
m_state->m_begin = p->first.first; // If we deleted the first element, then m_begin = current first element
} m_state->m_begin = m_state->m_in_progress.begin()->first;
m_state->m_in_progress.erase(p);
p = m_state->m_in_progress.begin();
} }
} }

View File

@@ -315,6 +315,18 @@ namespace crucible {
return m_flags; return m_flags;
} }
void
BtrfsIoctlLogicalInoArgs::set_logical(uint64_t new_logical)
{
m_logical = new_logical;
}
void
BtrfsIoctlLogicalInoArgs::set_size(uint64_t new_size)
{
m_container_size = new_size;
}
bool bool
BtrfsIoctlLogicalInoArgs::do_ioctl_nothrow(int fd) BtrfsIoctlLogicalInoArgs::do_ioctl_nothrow(int fd)
{ {

View File

@@ -757,6 +757,15 @@ BeesResolveAddrResult::BeesResolveAddrResult()
{ {
} }
shared_ptr<BtrfsIoctlLogicalInoArgs>
BeesContext::logical_ino(const uint64_t logical, const bool all_refs)
{
const auto rv = m_logical_ino_pool();
rv->set_logical(logical);
rv->set_flags(all_refs ? BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET : 0);
return rv;
}
BeesResolveAddrResult BeesResolveAddrResult
BeesContext::resolve_addr_uncached(BeesAddress addr) BeesContext::resolve_addr_uncached(BeesAddress addr)
{ {
@@ -768,7 +777,8 @@ BeesContext::resolve_addr_uncached(BeesAddress addr)
// transaction latency, competing threads, and freeze/SIGSTOP // transaction latency, competing threads, and freeze/SIGSTOP
// pausing the bees process. // pausing the bees process.
BtrfsIoctlLogicalInoArgs log_ino(addr.get_physical_or_zero()); const auto log_ino_ptr = logical_ino(addr.get_physical_or_zero(), false);
auto &log_ino = *log_ino_ptr;
// Time how long this takes // Time how long this takes
Timer resolve_timer; Timer resolve_timer;
@@ -910,6 +920,9 @@ BeesContext::start()
m_tmpfile_pool.generator([=]() -> shared_ptr<BeesTempFile> { m_tmpfile_pool.generator([=]() -> shared_ptr<BeesTempFile> {
return make_shared<BeesTempFile>(shared_from_this()); return make_shared<BeesTempFile>(shared_from_this());
}); });
m_logical_ino_pool.generator([]() {
return make_shared<BtrfsIoctlLogicalInoArgs>(0);
});
m_tmpfile_pool.checkin([](const shared_ptr<BeesTempFile> &btf) { m_tmpfile_pool.checkin([](const shared_ptr<BeesTempFile> &btf) {
catch_all([&](){ catch_all([&](){
btf->reset(); btf->reset();

View File

@@ -500,7 +500,8 @@ BeesRoots::transid_max_nocache()
// We look for the root of the extent tree and read its transid. // We look for the root of the extent tree and read its transid.
// Should run in O(1) time and be fairly reliable. // Should run in O(1) time and be fairly reliable.
const auto bti = m_root_fetcher.root(BTRFS_EXTENT_TREE_OBJECTID); BtrfsRootFetcher root_fetcher(m_ctx->root_fd());
const auto bti = root_fetcher.root(BTRFS_EXTENT_TREE_OBJECTID);
BEESTRACE("extracting transid from " << bti); BEESTRACE("extracting transid from " << bti);
const auto rv = bti.transid(); const auto rv = bti.transid();
@@ -927,7 +928,6 @@ BeesRoots::state_load()
BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) : BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) :
m_ctx(ctx), m_ctx(ctx),
m_root_fetcher(ctx->root_fd()),
m_crawl_state_file(ctx->home_fd(), crawl_state_filename()), m_crawl_state_file(ctx->home_fd(), crawl_state_filename()),
m_crawl_thread("crawl_transid"), m_crawl_thread("crawl_transid"),
m_writeback_thread("crawl_writeback") m_writeback_thread("crawl_writeback")
@@ -1101,7 +1101,8 @@ BeesRoots::is_root_ro(uint64_t root)
BEESTRACE("checking subvol flags on root " << root); BEESTRACE("checking subvol flags on root " << root);
const auto item = m_root_fetcher.root(root); BtrfsRootFetcher root_fetcher(m_ctx->root_fd());
const auto item = root_fetcher.root(root);
// If we can't access the subvol's root item...guess it's ro? // If we can't access the subvol's root item...guess it's ro?
if (!item || item.root_flags() & BTRFS_ROOT_SUBVOL_RDONLY) { if (!item || item.root_flags() & BTRFS_ROOT_SUBVOL_RDONLY) {
return true; return true;

View File

@@ -534,7 +534,6 @@ class BeesScanMode;
class BeesRoots : public enable_shared_from_this<BeesRoots> { class BeesRoots : public enable_shared_from_this<BeesRoots> {
shared_ptr<BeesContext> m_ctx; shared_ptr<BeesContext> m_ctx;
BtrfsRootFetcher m_root_fetcher;
BeesStringFile m_crawl_state_file; BeesStringFile m_crawl_state_file;
map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map; map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map;
mutex m_mutex; mutex m_mutex;
@@ -715,6 +714,7 @@ class BeesContext : public enable_shared_from_this<BeesContext> {
shared_ptr<BeesHashTable> m_hash_table; shared_ptr<BeesHashTable> m_hash_table;
shared_ptr<BeesRoots> m_roots; shared_ptr<BeesRoots> m_roots;
Pool<BeesTempFile> m_tmpfile_pool; Pool<BeesTempFile> m_tmpfile_pool;
Pool<BtrfsIoctlLogicalInoArgs> m_logical_ino_pool;
LRUCache<BeesResolveAddrResult, BeesAddress> m_resolve_cache; LRUCache<BeesResolveAddrResult, BeesAddress> m_resolve_cache;
@@ -754,6 +754,8 @@ public:
bool scan_forward(const BeesFileRange &bfr); bool scan_forward(const BeesFileRange &bfr);
shared_ptr<BtrfsIoctlLogicalInoArgs> logical_ino(uint64_t bytenr, bool all_refs);
bool is_root_ro(uint64_t root); bool is_root_ro(uint64_t root);
BeesRangePair dup_extent(const BeesFileRange &src, const shared_ptr<BeesTempFile> &tmpfile); BeesRangePair dup_extent(const BeesFileRange &src, const shared_ptr<BeesTempFile> &tmpfile);
bool dedup(const BeesRangePair &brp); bool dedup(const BeesRangePair &brp);

View File

@@ -12,23 +12,49 @@ using namespace std;
void void
test_progress() test_progress()
{ {
// On create, begin == end == constructor argument
ProgressTracker<uint64_t> pt(123); ProgressTracker<uint64_t> pt(123);
auto hold = pt.hold(234);
auto hold2 = pt.hold(345);
assert(pt.begin() == 123); assert(pt.begin() == 123);
assert(pt.end() == 345); assert(pt.end() == 123);
auto hold3 = pt.hold(456);
assert(pt.begin() == 123); // Holding a position past the end increases the end (and moves begin to match)
assert(pt.end() == 456); auto hold345 = pt.hold(345);
hold2.reset();
assert(pt.begin() == 123);
assert(pt.end() == 456);
hold.reset();
assert(pt.begin() == 345); assert(pt.begin() == 345);
assert(pt.end() == 345);
// Holding a position before begin reduces begin, without changing end
auto hold234 = pt.hold(234);
assert(pt.begin() == 234);
assert(pt.end() == 345);
// Holding a position past the end increases the end, without affecting begin
auto hold456 = pt.hold(456);
assert(pt.begin() == 234);
assert(pt.end() == 456); assert(pt.end() == 456);
hold3.reset();
// Releasing a position in the middle affects neither begin nor end
hold345.reset();
assert(pt.begin() == 234);
assert(pt.end() == 456);
// Hold another position in the middle to test begin moving forward
auto hold400 = pt.hold(400);
// Releasing a position at the beginning moves begin forward
hold234.reset();
assert(pt.begin() == 400);
assert(pt.end() == 456);
// Releasing a position at the end doesn't move end backward
hold456.reset();
assert(pt.begin() == 400);
assert(pt.end() == 456);
// Releasing a position in the middle doesn't move end backward but does move begin forward
hold400.reset();
assert(pt.begin() == 456); assert(pt.begin() == 456);
assert(pt.end() == 456); assert(pt.end() == 456);
} }
int int

View File

@@ -28,7 +28,7 @@ static bool test_fails = false;
static static
void void
seeker_test(const vector<uint64_t> &vec, size_t target) seeker_test(const vector<uint64_t> &vec, uint64_t const target)
{ {
cerr << "Find " << target << " in {"; cerr << "Find " << target << " in {";
for (auto i : vec) { for (auto i : vec) {
@@ -42,7 +42,7 @@ seeker_test(const vector<uint64_t> &vec, size_t target)
return seeker_finder(vec, lower, upper); return seeker_finder(vec, lower, upper);
}); });
cerr << found; cerr << found;
size_t my_found = 0; uint64_t my_found = 0;
for (auto i : vec) { for (auto i : vec) {
if (i <= target) { if (i <= target) {
my_found = i; my_found = i;