1
0
mirror of https://github.com/Zygo/bees.git synced 2025-08-02 13:53:28 +02:00

4 Commits

Author SHA1 Message Date
Zygo Blaxell
a92b122161 roots: reimplement transid_max_nocache using extent tree root
ROOT_TREE contains the ROOT_ITEM for EXTENT_TREE.  Every modification
(that we care about) to a btrfs must go through EXTENT_TREE, and must
modify the page in ROOT_TREE pointing to the root of EXTENT_TREE...
which makes that a very good source for the filesystem transid.

Remove the loop and the root lookups, and just look at one item for
max_transid.

Also note that every caller of transid_max_nocache() immediately
feeds the return value to m_transid_re.update(), so don't do that
inside transid_max_nocache().

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2020-12-23 13:41:49 -05:00
Zygo Blaxell
74d4a8fe01 roots: add a TRACE for transid_max search and crawl_transid thread
Users are hitting an exception somewhere in crawl_transid on 0.6.3, which
forces bees to return back to the transid_max calculation over and over.
Also there are out-of-range transids.

Add some BEESTRACE so we can see what we were doing in the exception
handler.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2020-12-23 00:50:29 -05:00
Zygo Blaxell
7283126e5c bees: initialize context in the correct order
We cannot use BeesContext::roots() until after
BeesContext::set_root_path() has been called.
Save up the parameter settings until then.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2020-08-31 22:39:51 -04:00
Zygo Blaxell
ac53e50d3e context: workaround to prevent LOGICAL_INO and btrfs balance from running concurrently
This avoids some kernel bugs.  One of them is fixed in 5.3.4 and later:

	efad8a853a "Btrfs: fix use-after-free when using the tree modification log"

There are apparently others in current kernels, so for now just put bees
on pause until the balance is done.

At some point we may want to provide an option to disable this
workaround; however, running bees and balance at the same time makes
neither particularly fast, so maybe we'll just leave it this way.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2019-11-28 11:32:30 +01:00
4 changed files with 65 additions and 23 deletions

View File

@@ -773,11 +773,42 @@ BeesResolveAddrResult::BeesResolveAddrResult()
{
}
void
BeesContext::wait_for_balance()
{
Timer balance_timer;
BEESNOTE("WORKAROUND: waiting for balance to stop");
while (true) {
btrfs_ioctl_balance_args args;
memset_zero<btrfs_ioctl_balance_args>(&args);
const int ret = ioctl(root_fd(), BTRFS_IOC_BALANCE_PROGRESS, &args);
if (ret < 0) {
// Either can't get balance status or not running, exit either way
break;
}
if (!(args.state & BTRFS_BALANCE_STATE_RUNNING)) {
// Balance not running, doesn't matter if paused or cancelled
break;
}
BEESLOGDEBUG("WORKAROUND: Waiting " << balance_timer << "s for balance to stop");
sleep(BEES_BALANCE_POLL_INTERVAL);
}
}
BeesResolveAddrResult
BeesContext::resolve_addr_uncached(BeesAddress addr)
{
THROW_CHECK1(invalid_argument, addr, !addr.is_magic());
THROW_CHECK0(invalid_argument, !!root_fd());
// Is there a bug where resolve and balance cause a crash (BUG_ON at fs/btrfs/ctree.c:1227)?
// Apparently yes, and more than one.
// Wait for the balance to finish before we run LOGICAL_INO
wait_for_balance();
// Time how long this takes
Timer resolve_timer;
// There is no performance benefit if we restrict the buffer size.

View File

@@ -207,43 +207,37 @@ uint64_t
BeesRoots::transid_max_nocache()
{
uint64_t rv = 0;
uint64_t root = BTRFS_FS_TREE_OBJECTID;
BEESNOTE("Calculating transid_max (" << rv << " as of root " << root << ")");
BEESTRACE("Calculating transid_max...");
rv = btrfs_get_root_transid(root);
// XXX: Do we need any of this? Or is
// m_transid_re.update(btrfs_get_root_transid(BTRFS_FS_TREE_OBJECTID)) good enough?
BEESNOTE("Calculating transid_max");
BEESTRACE("Calculating transid_max");
// We look for the root of the extent tree and read its transid.
// Should run in O(1) time and be fairly reliable.
BtrfsIoctlSearchKey sk;
sk.tree_id = BTRFS_ROOT_TREE_OBJECTID;
sk.min_type = sk.max_type = BTRFS_ROOT_BACKREF_KEY;
sk.min_objectid = root;
sk.min_type = sk.max_type = BTRFS_ROOT_ITEM_KEY;
sk.min_objectid = sk.max_objectid = BTRFS_EXTENT_TREE_OBJECTID;
while (true) {
sk.nr_items = 1024;
BEESTRACE("transid_max search sk " << sk);
sk.do_ioctl(m_ctx->root_fd());
if (sk.m_result.empty()) {
break;
}
// We are just looking for the highest transid on the filesystem.
// We don't care which object it comes from.
for (auto i : sk.m_result) {
sk.next_min(i);
if (i.type == BTRFS_ROOT_BACKREF_KEY) {
if (i.transid > rv) {
BEESLOGDEBUG("transid_max root " << i.objectid << " parent " << i.offset << " transid " << i.transid);
BEESCOUNT(transid_max_miss);
}
root = i.objectid;
}
if (i.transid > rv) {
rv = i.transid;
}
}
}
m_transid_re.update(rv);
// transid must be greater than zero, or we did something very wrong
THROW_CHECK1(runtime_error, rv, rv > 0);
return rv;
}
@@ -422,13 +416,15 @@ BeesRoots::crawl_thread()
BEESNOTE("tracking transid");
auto last_count = m_transid_re.count();
while (true) {
// Measure current transid
BEESTRACE("Measure current transid");
catch_all([&]() {
BEESTRACE("calling transid_max_nocache");
m_transid_re.update(transid_max_nocache());
});
// Make sure we have a full complement of crawlers
BEESTRACE("Make sure we have a full complement of crawlers");
catch_all([&]() {
BEESTRACE("calling insert_new_crawl");
insert_new_crawl();
});
@@ -496,19 +492,24 @@ BeesRoots::insert_new_crawl()
unique_lock<mutex> lock(m_mutex);
set<uint64_t> excess_roots;
for (auto i : m_root_crawl_map) {
BEESTRACE("excess_roots.insert(" << i.first << ")");
excess_roots.insert(i.first);
}
lock.unlock();
while (new_bcs.m_root) {
BEESTRACE("excess_roots.erase(" << new_bcs.m_root << ")");
excess_roots.erase(new_bcs.m_root);
BEESTRACE("insert_root(" << new_bcs << ")");
insert_root(new_bcs);
BEESCOUNT(crawl_create);
BEESTRACE("next_root(" << new_bcs.m_root << ")");
new_bcs.m_root = next_root(new_bcs.m_root);
}
for (auto i : excess_roots) {
new_bcs.m_root = i;
BEESTRACE("crawl_state_erase(" << new_bcs << ")");
crawl_state_erase(new_bcs);
}
}

View File

@@ -667,6 +667,7 @@ bees_main(int argc, char *argv[])
unsigned thread_min = 0;
double load_target = 0;
bool workaround_btrfs_send = false;
BeesRoots::ScanMode root_scan_mode = BeesRoots::SCAN_MODE_ZERO;
// Configure getopt_long
static const struct option long_options[] = {
@@ -735,7 +736,7 @@ bees_main(int argc, char *argv[])
load_target = stod(optarg);
break;
case 'm':
bc->roots()->set_scan_mode(static_cast<BeesRoots::ScanMode>(stoul(optarg)));
root_scan_mode = static_cast<BeesRoots::ScanMode>(stoul(optarg));
break;
case 'p':
crucible::set_relative_path("");
@@ -806,11 +807,16 @@ bees_main(int argc, char *argv[])
BEESLOGNOTICE("setting worker thread pool maximum size to " << thread_count);
TaskMaster::set_thread_count(thread_count);
// Set root path
string root_path = argv[optind++];
BEESLOGNOTICE("setting root path to '" << root_path << "'");
bc->set_root_path(root_path);
// Workaround for btrfs send
bc->roots()->set_workaround_btrfs_send(workaround_btrfs_send);
// Create a context and start crawlers
bc->set_root_path(argv[optind++]);
// Set root scan mode
bc->roots()->set_scan_mode(root_scan_mode);
BeesThread status_thread("status", [&]() {
bc->dump_status();

View File

@@ -117,6 +117,9 @@ const size_t BEES_TRANSID_FACTOR = 10;
// The actual limit in LOGICAL_INO seems to be 2730, but let's leave a little headroom
const size_t BEES_MAX_EXTENT_REF_COUNT = 2560;
// Wait this long for a balance to stop
const double BEES_BALANCE_POLL_INTERVAL = 60.0;
// Flags
const int FLAGS_OPEN_COMMON = O_NOFOLLOW | O_NONBLOCK | O_CLOEXEC | O_NOATIME | O_LARGEFILE | O_NOCTTY;
const int FLAGS_OPEN_DIR = FLAGS_OPEN_COMMON | O_RDONLY | O_DIRECTORY;
@@ -716,6 +719,7 @@ class BeesContext : public enable_shared_from_this<BeesContext> {
void set_root_fd(Fd fd);
BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr);
void wait_for_balance();
BeesFileRange scan_one_extent(const BeesFileRange &bfr, const Extent &e);
void rewrite_file_range(const BeesFileRange &bfr);