mirror of
https://github.com/Zygo/bees.git
synced 2025-05-18 05:45:45 +02:00
Fix discussion of nodatasum files, clarifying what we can and cannot do. Get rid of some BEESNOTE and BEESTRACE calls which cannot be observed (well, BEESNOTE can, but you have to be quick!). Signed-off-by: Zygo Blaxell <bees@furryterror.org>
942 lines
26 KiB
C++
942 lines
26 KiB
C++
#include "bees.h"
|
|
|
|
#include "crucible/cache.h"
|
|
#include "crucible/ntoa.h"
|
|
#include "crucible/string.h"
|
|
#include "crucible/task.h"
|
|
|
|
#include <fstream>
|
|
#include <tuple>
|
|
|
|
using namespace crucible;
|
|
using namespace std;
|
|
|
|
BeesRoots::ScanMode BeesRoots::s_scan_mode = BeesRoots::SCAN_MODE_ZERO;
|
|
|
|
string
|
|
format_time(time_t t)
|
|
{
|
|
struct tm *tmp = localtime(&t);
|
|
char buf[1024];
|
|
strftime(buf, sizeof(buf), "%Y-%m-%d-%H-%M-%S", tmp);
|
|
return buf;
|
|
}
|
|
|
|
ostream &
|
|
operator<<(ostream &os, const BeesCrawlState &bcs)
|
|
{
|
|
time_t now = time(NULL);
|
|
auto age = now - bcs.m_started;
|
|
return os << "BeesCrawlState "
|
|
<< bcs.m_root << ":" << bcs.m_objectid << " offset " << to_hex(bcs.m_offset)
|
|
<< " transid " << bcs.m_min_transid << ".." << bcs.m_max_transid
|
|
<< " started " << format_time(bcs.m_started) << " (" << age << "s ago)";
|
|
}
|
|
|
|
BeesCrawlState::BeesCrawlState() :
|
|
m_root(0),
|
|
m_objectid(0),
|
|
m_offset(0),
|
|
m_min_transid(0),
|
|
m_max_transid(0),
|
|
m_started(time(NULL))
|
|
{
|
|
}
|
|
|
|
bool
|
|
BeesCrawlState::operator<(const BeesCrawlState &that) const
|
|
{
|
|
return tie(m_objectid, m_offset, m_root, m_min_transid, m_max_transid)
|
|
< tie(that.m_objectid, that.m_offset, that.m_root, that.m_min_transid, that.m_max_transid);
|
|
}
|
|
|
|
string
|
|
BeesRoots::scan_mode_ntoa(BeesRoots::ScanMode mode)
|
|
{
|
|
static const bits_ntoa_table table[] = {
|
|
NTOA_TABLE_ENTRY_ENUM(SCAN_MODE_ZERO),
|
|
NTOA_TABLE_ENTRY_ENUM(SCAN_MODE_ONE),
|
|
NTOA_TABLE_ENTRY_ENUM(SCAN_MODE_COUNT),
|
|
NTOA_TABLE_ENTRY_END()
|
|
};
|
|
return bits_ntoa(mode, table);
|
|
}
|
|
|
|
void
|
|
BeesRoots::set_scan_mode(ScanMode mode)
|
|
{
|
|
THROW_CHECK1(invalid_argument, mode, mode < SCAN_MODE_COUNT);
|
|
s_scan_mode = mode;
|
|
BEESLOGINFO("Scan mode set to " << mode << " (" << scan_mode_ntoa(mode) << ")");
|
|
}
|
|
|
|
string
|
|
BeesRoots::crawl_state_filename() const
|
|
{
|
|
string rv;
|
|
|
|
// Legacy filename included UUID
|
|
rv += "beescrawl.";
|
|
rv += m_ctx->root_uuid();
|
|
rv += ".dat";
|
|
|
|
struct stat buf;
|
|
if (fstatat(m_ctx->home_fd(), rv.c_str(), &buf, AT_SYMLINK_NOFOLLOW)) {
|
|
// Use new filename
|
|
rv = "beescrawl.dat";
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
void
|
|
BeesRoots::state_save()
|
|
{
|
|
// Make sure we have a full complement of crawlers
|
|
insert_new_crawl();
|
|
|
|
BEESNOTE("saving crawl state");
|
|
BEESLOGINFO("Saving crawl state");
|
|
BEESTOOLONG("Saving crawl state");
|
|
|
|
Timer save_time;
|
|
|
|
unique_lock<mutex> lock(m_mutex);
|
|
|
|
// We don't have ofstreamat or ofdstream in C++11, so we're building a string and writing it with raw syscalls.
|
|
ostringstream ofs;
|
|
|
|
if (!m_crawl_dirty) {
|
|
BEESLOGINFO("Nothing to save");
|
|
return;
|
|
}
|
|
|
|
for (auto i : m_root_crawl_map) {
|
|
auto ibcs = i.second->get_state();
|
|
if (ibcs.m_max_transid) {
|
|
ofs << "root " << ibcs.m_root << " ";
|
|
ofs << "objectid " << ibcs.m_objectid << " ";
|
|
ofs << "offset " << ibcs.m_offset << " ";
|
|
ofs << "min_transid " << ibcs.m_min_transid << " ";
|
|
ofs << "max_transid " << ibcs.m_max_transid << " ";
|
|
ofs << "started " << ibcs.m_started << " ";
|
|
ofs << "start_ts " << format_time(ibcs.m_started) << "\n";
|
|
}
|
|
}
|
|
|
|
if (ofs.str().empty()) {
|
|
BEESLOGWARN("Crawl state empty!");
|
|
m_crawl_dirty = false;
|
|
return;
|
|
}
|
|
|
|
lock.unlock();
|
|
|
|
m_crawl_state_file.write(ofs.str());
|
|
|
|
// Renaming things is hard after release
|
|
if (m_crawl_state_file.name() != "beescrawl.dat") {
|
|
renameat(m_ctx->home_fd(), m_crawl_state_file.name().c_str(), m_ctx->home_fd(), "beescrawl.dat");
|
|
m_crawl_state_file.name("beescrawl.dat");
|
|
}
|
|
|
|
BEESNOTE("relocking crawl state");
|
|
lock.lock();
|
|
// Not really correct but probably close enough
|
|
m_crawl_dirty = false;
|
|
BEESLOGINFO("Saved crawl state in " << save_time << "s");
|
|
}
|
|
|
|
BeesCrawlState
|
|
BeesRoots::crawl_state_get(uint64_t rootid)
|
|
{
|
|
unique_lock<mutex> lock(m_mutex);
|
|
auto rv = m_root_crawl_map.at(rootid)->get_state();
|
|
THROW_CHECK2(runtime_error, rv.m_root, rootid, rv.m_root == rootid);
|
|
return rv;
|
|
}
|
|
|
|
void
|
|
BeesRoots::crawl_state_set_dirty()
|
|
{
|
|
unique_lock<mutex> lock(m_mutex);
|
|
m_crawl_dirty = true;
|
|
}
|
|
|
|
void
|
|
BeesRoots::crawl_state_erase(const BeesCrawlState &bcs)
|
|
{
|
|
unique_lock<mutex> lock(m_mutex);
|
|
|
|
// Do not delete the last entry, it holds our max_transid
|
|
if (m_root_crawl_map.size() < 2) {
|
|
BEESCOUNT(crawl_no_empty);
|
|
return;
|
|
}
|
|
|
|
if (m_root_crawl_map.count(bcs.m_root)) {
|
|
m_root_crawl_map.erase(bcs.m_root);
|
|
m_crawl_dirty = true;
|
|
}
|
|
}
|
|
|
|
uint64_t
|
|
BeesRoots::transid_min()
|
|
{
|
|
BEESNOTE("Calculating transid_min");
|
|
unique_lock<mutex> lock(m_mutex);
|
|
if (m_root_crawl_map.empty()) {
|
|
return 0;
|
|
}
|
|
uint64_t rv = numeric_limits<uint64_t>::max();
|
|
for (auto i : m_root_crawl_map) {
|
|
rv = min(rv, i.second->get_state().m_min_transid);
|
|
}
|
|
return rv;
|
|
}
|
|
|
|
uint64_t
|
|
BeesRoots::transid_max()
|
|
{
|
|
uint64_t rv = 0;
|
|
uint64_t root = 0;
|
|
BEESNOTE("Calculating transid_max (" << rv << " as of root " << root << ")");
|
|
BEESTRACE("Calculating transid_max...");
|
|
do {
|
|
root = next_root(root);
|
|
if (root) {
|
|
catch_all([&]() {
|
|
auto transid = btrfs_get_root_transid(open_root(root));
|
|
rv = max(rv, transid);
|
|
// BEESLOG("\troot " << root << " transid " << transid << " max " << rv);
|
|
});
|
|
}
|
|
} while (root);
|
|
return rv;
|
|
}
|
|
|
|
void
|
|
BeesRoots::crawl_roots()
|
|
{
|
|
BEESNOTE("Crawling roots");
|
|
|
|
unique_lock<mutex> lock(m_mutex);
|
|
// Work from a copy because BeesCrawl might change the world under us
|
|
auto crawl_map_copy = m_root_crawl_map;
|
|
lock.unlock();
|
|
|
|
// Nothing to crawl? Seems suspicious...
|
|
if (m_root_crawl_map.empty()) {
|
|
BEESLOGINFO("idle: crawl map is empty!");
|
|
}
|
|
|
|
auto ctx_copy = m_ctx;
|
|
|
|
switch (s_scan_mode) {
|
|
case SCAN_MODE_ZERO: {
|
|
// Scan the same inode/offset tuple in each subvol (good for snapshots)
|
|
BeesFileRange first_range;
|
|
shared_ptr<BeesCrawl> first_crawl;
|
|
for (auto i : crawl_map_copy) {
|
|
auto this_crawl = i.second;
|
|
auto this_range = this_crawl->peek_front();
|
|
if (this_range) {
|
|
if (!first_range || this_range < first_range) {
|
|
first_crawl = this_crawl;
|
|
first_range = this_range;
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t batch_count = 0;
|
|
while (first_range && batch_count < BEES_MAX_CRAWL_BATCH) {
|
|
auto subvol = first_crawl->get_state().m_root;
|
|
ostringstream oss;
|
|
oss << "crawl_" << subvol;
|
|
auto task_title = oss.str();
|
|
Task(task_title, [ctx_copy, first_range]() {
|
|
BEESNOTE("scan_forward " << first_range);
|
|
ctx_copy->scan_forward(first_range);
|
|
}).run();
|
|
BEESCOUNT(crawl_scan);
|
|
m_crawl_current = first_crawl->get_state();
|
|
auto first_range_popped = first_crawl->pop_front();
|
|
THROW_CHECK2(runtime_error, first_range, first_range_popped, first_range == first_range_popped);
|
|
first_range = first_crawl->peek_front();
|
|
++batch_count;
|
|
}
|
|
|
|
if (first_range || batch_count) {
|
|
return;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case SCAN_MODE_ONE: {
|
|
// Scan each subvol one extent at a time (good for continuous forward progress)
|
|
bool crawled = false;
|
|
for (auto i : crawl_map_copy) {
|
|
auto this_crawl = i.second;
|
|
auto this_range = this_crawl->peek_front();
|
|
size_t batch_count = 0;
|
|
while (this_range && batch_count < BEES_MAX_CRAWL_BATCH) {
|
|
auto subvol = this_crawl->get_state().m_root;
|
|
ostringstream oss;
|
|
oss << "crawl_" << subvol;
|
|
auto task_title = oss.str();
|
|
Task(task_title, [ctx_copy, this_range]() {
|
|
BEESNOTE("scan_forward " << this_range);
|
|
ctx_copy->scan_forward(this_range);
|
|
}).run();
|
|
crawled = true;
|
|
BEESCOUNT(crawl_scan);
|
|
m_crawl_current = this_crawl->get_state();
|
|
auto this_range_popped = this_crawl->pop_front();
|
|
THROW_CHECK2(runtime_error, this_range, this_range_popped, this_range == this_range_popped);
|
|
this_range = this_crawl->peek_front();
|
|
++batch_count;
|
|
}
|
|
}
|
|
|
|
if (crawled) return;
|
|
break;
|
|
}
|
|
case SCAN_MODE_COUNT: assert(false); break;
|
|
}
|
|
|
|
BEESLOGINFO("Crawl ran out of data after " << m_crawl_timer.lap() << "s, waiting for more...");
|
|
BEESCOUNT(crawl_done);
|
|
BEESNOTE("idle, waiting for more data");
|
|
lock.lock();
|
|
m_condvar.wait(lock);
|
|
|
|
// Don't count the time we were waiting as part of the crawl time
|
|
m_crawl_timer.reset();
|
|
}
|
|
|
|
void
|
|
BeesRoots::crawl_thread()
|
|
{
|
|
// TODO: get rid of the thread. For now it is a convenient
|
|
// way to avoid the weird things that happen when you try to
|
|
// shared_from_this() in a constructor.
|
|
BEESNOTE("crawling");
|
|
auto shared_this = shared_from_this();
|
|
Task("crawl", [shared_this]() {
|
|
auto tqs = TaskMaster::get_queue_count();
|
|
BEESNOTE("queueing extents to scan, " << tqs << " of " << BEES_MAX_QUEUE_SIZE);
|
|
while (tqs < BEES_MAX_QUEUE_SIZE) {
|
|
catch_all([&]() {
|
|
shared_this->crawl_roots();
|
|
});
|
|
tqs = TaskMaster::get_queue_count();
|
|
}
|
|
Task::current_task().run();
|
|
}).run();
|
|
}
|
|
|
|
void
|
|
BeesRoots::writeback_thread()
|
|
{
|
|
while (1) {
|
|
BEESNOTE(m_crawl_current << (m_crawl_dirty ? " (dirty)" : ""));
|
|
|
|
catch_all([&]() {
|
|
BEESNOTE("saving crawler state");
|
|
state_save();
|
|
});
|
|
|
|
nanosleep(BEES_WRITEBACK_INTERVAL);
|
|
|
|
}
|
|
}
|
|
|
|
void
|
|
BeesRoots::insert_root(const BeesCrawlState &new_bcs)
|
|
{
|
|
unique_lock<mutex> lock(m_mutex);
|
|
if (!m_root_crawl_map.count(new_bcs.m_root)) {
|
|
auto new_bcp = make_shared<BeesCrawl>(m_ctx, new_bcs);
|
|
auto new_pair = make_pair(new_bcs.m_root, new_bcp);
|
|
m_root_crawl_map.insert(new_pair);
|
|
m_crawl_dirty = true;
|
|
}
|
|
}
|
|
|
|
void
|
|
BeesRoots::insert_new_crawl()
|
|
{
|
|
BEESNOTE("adding crawlers for new subvols and removing crawlers for removed subvols");
|
|
|
|
BeesCrawlState new_bcs;
|
|
// Avoid a wasted loop iteration by starting from root 5
|
|
new_bcs.m_root = BTRFS_FS_TREE_OBJECTID;
|
|
new_bcs.m_min_transid = transid_min();
|
|
new_bcs.m_max_transid = transid_max();
|
|
|
|
unique_lock<mutex> lock(m_mutex);
|
|
set<uint64_t> excess_roots;
|
|
for (auto i : m_root_crawl_map) {
|
|
excess_roots.insert(i.first);
|
|
}
|
|
lock.unlock();
|
|
|
|
while (new_bcs.m_root) {
|
|
excess_roots.erase(new_bcs.m_root);
|
|
insert_root(new_bcs);
|
|
BEESCOUNT(crawl_create);
|
|
new_bcs.m_root = next_root(new_bcs.m_root);
|
|
}
|
|
|
|
for (auto i : excess_roots) {
|
|
new_bcs.m_root = i;
|
|
crawl_state_erase(new_bcs);
|
|
}
|
|
|
|
// Wake up crawl_roots if sleeping
|
|
lock.lock();
|
|
m_condvar.notify_all();
|
|
}
|
|
|
|
void
|
|
BeesRoots::state_load()
|
|
{
|
|
BEESNOTE("loading crawl state");
|
|
BEESLOGINFO("loading crawl state");
|
|
|
|
string crawl_data = m_crawl_state_file.read();
|
|
|
|
for (auto line : split("\n", crawl_data)) {
|
|
BEESLOGDEBUG("Read line: " << line);
|
|
map<string, uint64_t> d;
|
|
auto words = split(" ", line);
|
|
for (auto it = words.begin(); it < words.end(); ++it) {
|
|
auto it1 = it;
|
|
++it;
|
|
THROW_CHECK1(out_of_range, words.size(), it < words.end());
|
|
string key = *it1;
|
|
uint64_t val = from_hex(*it);
|
|
BEESTRACE("key " << key << " val " << val);
|
|
auto result = d.insert(make_pair(key, val));
|
|
THROW_CHECK0(runtime_error, result.second);
|
|
}
|
|
BeesCrawlState loaded_state;
|
|
loaded_state.m_root = d.at("root");
|
|
loaded_state.m_objectid = d.at("objectid");
|
|
loaded_state.m_offset = d.at("offset");
|
|
loaded_state.m_min_transid = d.count("gen_current") ? d.at("gen_current") : d.at("min_transid");
|
|
loaded_state.m_max_transid = d.count("gen_next") ? d.at("gen_next") : d.at("max_transid");
|
|
if (d.count("started")) {
|
|
loaded_state.m_started = d.at("started");
|
|
}
|
|
BEESLOGDEBUG("loaded_state " << loaded_state);
|
|
insert_root(loaded_state);
|
|
}
|
|
}
|
|
|
|
BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) :
|
|
m_ctx(ctx),
|
|
m_crawl_state_file(ctx->home_fd(), crawl_state_filename()),
|
|
m_crawl_thread("crawl"),
|
|
m_writeback_thread("crawl_writeback")
|
|
{
|
|
m_crawl_thread.exec([&]() {
|
|
catch_all([&]() {
|
|
state_load();
|
|
});
|
|
m_writeback_thread.exec([&]() {
|
|
writeback_thread();
|
|
});
|
|
crawl_thread();
|
|
});
|
|
}
|
|
|
|
Fd
|
|
BeesRoots::open_root_nocache(uint64_t rootid)
|
|
{
|
|
BEESTRACE("open_root_nocache " << rootid);
|
|
BEESNOTE("open_root_nocache " << rootid);
|
|
|
|
// Stop recursion at the root of the filesystem tree
|
|
if (rootid == BTRFS_FS_TREE_OBJECTID) {
|
|
return m_ctx->root_fd();
|
|
}
|
|
|
|
// Find backrefs for this rootid and follow up to root
|
|
BtrfsIoctlSearchKey sk;
|
|
sk.tree_id = BTRFS_ROOT_TREE_OBJECTID;
|
|
sk.min_objectid = sk.max_objectid = rootid;
|
|
sk.min_type = sk.max_type = BTRFS_ROOT_BACKREF_KEY;
|
|
|
|
BEESTRACE("sk " << sk);
|
|
while (sk.min_objectid <= rootid) {
|
|
sk.nr_items = 1024;
|
|
sk.do_ioctl(m_ctx->root_fd());
|
|
|
|
if (sk.m_result.empty()) {
|
|
break;
|
|
}
|
|
|
|
for (auto i : sk.m_result) {
|
|
sk.next_min(i);
|
|
if (i.type == BTRFS_ROOT_BACKREF_KEY && i.objectid == rootid) {
|
|
auto dirid = call_btrfs_get(btrfs_stack_root_ref_dirid, i.m_data);
|
|
auto name_len = call_btrfs_get(btrfs_stack_root_ref_name_len, i.m_data);
|
|
auto name_start = sizeof(struct btrfs_root_ref);
|
|
auto name_end = name_len + name_start;
|
|
THROW_CHECK2(runtime_error, i.m_data.size(), name_end, i.m_data.size() >= name_end);
|
|
string name(i.m_data.data() + name_start, i.m_data.data() + name_end);
|
|
|
|
auto parent_rootid = i.offset;
|
|
// BEESLOG("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
|
|
BEESTRACE("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
|
|
Fd parent_fd = open_root(parent_rootid);
|
|
if (!parent_fd) {
|
|
BEESLOGTRACE("no parent_fd");
|
|
continue;
|
|
}
|
|
|
|
if (dirid != BTRFS_FIRST_FREE_OBJECTID) {
|
|
BEESTRACE("dirid " << dirid << " root " << rootid << " INO_PATH");
|
|
BtrfsIoctlInoPathArgs ino(dirid);
|
|
if (!ino.do_ioctl_nothrow(parent_fd)) {
|
|
BEESLOGINFO("dirid " << dirid << " inode path lookup failed in parent_fd " << name_fd(parent_fd));
|
|
continue;
|
|
}
|
|
if (ino.m_paths.empty()) {
|
|
BEESLOGINFO("dirid " << dirid << " inode has no paths in parent_fd " << name_fd(parent_fd));
|
|
continue;
|
|
}
|
|
BEESTRACE("dirid " << dirid << " path " << ino.m_paths.at(0));
|
|
parent_fd = openat(parent_fd, ino.m_paths.at(0).c_str(), FLAGS_OPEN_DIR);
|
|
if (!parent_fd) {
|
|
BEESLOGTRACE("no parent_fd from dirid");
|
|
continue;
|
|
}
|
|
}
|
|
// BEESLOG("openat(" << name_fd(parent_fd) << ", " << name << ")");
|
|
BEESTRACE("openat(" << name_fd(parent_fd) << ", " << name << ")");
|
|
Fd rv = openat(parent_fd, name.c_str(), FLAGS_OPEN_DIR);
|
|
if (!rv) {
|
|
BEESLOGTRACE("open failed for name " << name);
|
|
continue;
|
|
}
|
|
BEESCOUNT(root_found);
|
|
|
|
// Verify correct root ID
|
|
auto new_root_id = btrfs_get_root_id(rv);
|
|
THROW_CHECK2(runtime_error, new_root_id, rootid, new_root_id == rootid);
|
|
Stat st(rv);
|
|
THROW_CHECK1(runtime_error, st.st_ino, st.st_ino == BTRFS_FIRST_FREE_OBJECTID);
|
|
// BEESLOGDEBUG("open_root_nocache " << rootid << ": " << name_fd(rv));
|
|
return rv;
|
|
}
|
|
}
|
|
}
|
|
BEESLOGDEBUG("No path for rootid " << rootid);
|
|
BEESCOUNT(root_notfound);
|
|
return Fd();
|
|
}
|
|
|
|
Fd
|
|
BeesRoots::open_root(uint64_t rootid)
|
|
{
|
|
// Ignore some of the crap that comes out of LOGICAL_INO
|
|
if (rootid == BTRFS_ROOT_TREE_OBJECTID) {
|
|
return Fd();
|
|
}
|
|
|
|
return m_ctx->fd_cache()->open_root(m_ctx, rootid);
|
|
}
|
|
|
|
|
|
uint64_t
|
|
BeesRoots::next_root(uint64_t root)
|
|
{
|
|
BEESNOTE("Next root from " << root);
|
|
BEESTRACE("Next root from " << root);
|
|
|
|
// BTRFS_FS_TREE_OBJECTID has no backref keys so we can't find it that way
|
|
if (root < BTRFS_FS_TREE_OBJECTID) {
|
|
// BEESLOG("First root is BTRFS_FS_TREE_OBJECTID = " << BTRFS_FS_TREE_OBJECTID);
|
|
return BTRFS_FS_TREE_OBJECTID;
|
|
}
|
|
|
|
BtrfsIoctlSearchKey sk;
|
|
sk.tree_id = BTRFS_ROOT_TREE_OBJECTID;
|
|
sk.min_type = sk.max_type = BTRFS_ROOT_BACKREF_KEY;
|
|
sk.min_objectid = root + 1;
|
|
|
|
while (true) {
|
|
sk.nr_items = 1024;
|
|
sk.do_ioctl(m_ctx->root_fd());
|
|
|
|
if (sk.m_result.empty()) {
|
|
return 0;
|
|
}
|
|
|
|
for (auto i : sk.m_result) {
|
|
sk.next_min(i);
|
|
if (i.type == BTRFS_ROOT_BACKREF_KEY) {
|
|
// BEESLOG("Found root " << i.objectid << " parent " << i.offset);
|
|
return i.objectid;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Fd
|
|
BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
|
|
{
|
|
BEESTRACE("opening root " << root << " ino " << ino);
|
|
|
|
Fd root_fd = open_root(root);
|
|
if (!root_fd) {
|
|
return root_fd;
|
|
}
|
|
|
|
BEESTOOLONG("open_root_ino(root " << root << ", ino " << ino << ")");
|
|
|
|
BEESTRACE("looking up ino " << ino);
|
|
BtrfsIoctlInoPathArgs ipa(ino);
|
|
if (!ipa.do_ioctl_nothrow(root_fd)) {
|
|
BEESLOGINFO("Lookup root " << root << " ino " << ino << " failed: " << strerror(errno));
|
|
return Fd();
|
|
}
|
|
|
|
BEESTRACE("searching paths for root " << root << " ino " << ino);
|
|
Fd rv;
|
|
if (ipa.m_paths.empty()) {
|
|
BEESLOGWARN("No paths for root " << root << " ino " << ino);
|
|
}
|
|
for (auto file_path : ipa.m_paths) {
|
|
BEESTRACE("Looking up root " << root << " ino " << ino << " in dir " << name_fd(root_fd) << " path " << file_path);
|
|
BEESCOUNT(open_file);
|
|
// Try to open file RW, fall back to RO
|
|
const char *fp_cstr = file_path.c_str();
|
|
rv = openat(root_fd, fp_cstr, FLAGS_OPEN_FILE);
|
|
if (!rv) {
|
|
BEESCOUNT(open_fail);
|
|
// errno == ENOENT is common during snapshot delete, ignore it
|
|
if (errno != ENOENT) {
|
|
BEESLOGWARN("Could not open path '" << file_path << "' at root " << root << " " << name_fd(root_fd) << ": " << strerror(errno));
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Correct inode?
|
|
Stat file_stat(rv);
|
|
if (file_stat.st_ino != ino) {
|
|
BEESLOGWARN("Opening " << name_fd(root_fd) << "/" << file_path << " found wrong inode " << file_stat.st_ino << " instead of " << ino);
|
|
rv = Fd();
|
|
BEESCOUNT(open_wrong_ino);
|
|
break;
|
|
}
|
|
|
|
// Correct root?
|
|
auto file_root = btrfs_get_root_id(rv);
|
|
if (file_root != root) {
|
|
BEESLOGWARN("Opening " << name_fd(root_fd) << "/" << file_path << " found wrong root " << file_root << " instead of " << root);
|
|
rv = Fd();
|
|
BEESCOUNT(open_wrong_root);
|
|
break;
|
|
}
|
|
|
|
// Same filesystem?
|
|
Stat root_stat(root_fd);
|
|
if (root_stat.st_dev != file_stat.st_dev) {
|
|
BEESLOGWARN("Opening root " << name_fd(root_fd) << " path " << file_path << " found path st_dev " << file_stat.st_dev << " but root st_dev is " << root_stat.st_dev);
|
|
rv = Fd();
|
|
BEESCOUNT(open_wrong_dev);
|
|
break;
|
|
}
|
|
|
|
// The kernel rejects dedup requests with
|
|
// src and dst that have different datasum flags
|
|
// (datasum is a flag in the inode).
|
|
//
|
|
// We can detect the common case where a file is
|
|
// marked with nodatacow (which implies nodatasum).
|
|
// nodatacow files are arguably out of scope for dedup,
|
|
// since dedup would just make them datacow again.
|
|
// To handle these we pretend we couldn't open them.
|
|
//
|
|
// A less common case is nodatasum + datacow files.
|
|
// Those are availble for dedup but we have to solve
|
|
// some other problems before we can dedup them. They
|
|
// require a separate hash table namespace from datasum
|
|
// + datacow files, and we have to create nodatasum
|
|
// temporary files when we rewrite extents.
|
|
//
|
|
// FIXME: the datasum flag is scooped up by
|
|
// TREE_SEARCH_V2 during crawls. We throw the inode
|
|
// items away when we should be examining them for the
|
|
// nodatasum flag.
|
|
|
|
int attr = ioctl_iflags_get(rv);
|
|
if (attr & FS_NOCOW_FL) {
|
|
BEESLOGWARN("Opening " << name_fd(rv) << " found FS_NOCOW_FL flag in " << to_hex(attr));
|
|
rv = Fd();
|
|
BEESCOUNT(open_wrong_flags);
|
|
break;
|
|
}
|
|
|
|
BEESCOUNT(open_hit);
|
|
return rv;
|
|
}
|
|
|
|
// Odd, we didn't find a path.
|
|
return Fd();
|
|
}
|
|
|
|
Fd
|
|
BeesRoots::open_root_ino(uint64_t root, uint64_t ino)
|
|
{
|
|
return m_ctx->fd_cache()->open_root_ino(m_ctx, root, ino);
|
|
}
|
|
|
|
BeesCrawl::BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state) :
|
|
m_ctx(ctx),
|
|
m_state(initial_state)
|
|
{
|
|
}
|
|
|
|
bool
|
|
BeesCrawl::next_transid()
|
|
{
|
|
// If this crawl is recently empty, quickly and _silently_ bail out
|
|
auto current_time = time(NULL);
|
|
auto crawl_state = get_state();
|
|
auto elapsed_time = current_time - crawl_state.m_started;
|
|
if (elapsed_time < BEES_COMMIT_INTERVAL) {
|
|
if (!m_deferred) {
|
|
BEESLOGINFO("Deferring next transid in " << get_state());
|
|
}
|
|
m_deferred = true;
|
|
BEESCOUNT(crawl_defer);
|
|
return false;
|
|
}
|
|
|
|
// Log performance stats from the old crawl
|
|
BEESLOGINFO("Next transid in " << get_state());
|
|
|
|
// Start new crawl
|
|
m_deferred = false;
|
|
auto roots = m_ctx->roots();
|
|
crawl_state.m_min_transid = crawl_state.m_max_transid;
|
|
crawl_state.m_max_transid = roots->transid_max();
|
|
crawl_state.m_objectid = 0;
|
|
crawl_state.m_offset = 0;
|
|
crawl_state.m_started = current_time;
|
|
BEESCOUNT(crawl_restart);
|
|
set_state(crawl_state);
|
|
BEESLOGINFO("Restarted crawl " << get_state());
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
BeesCrawl::fetch_extents()
|
|
{
|
|
THROW_CHECK1(runtime_error, m_extents.size(), m_extents.empty());
|
|
|
|
auto old_state = get_state();
|
|
if (m_deferred || old_state.m_max_transid <= old_state.m_min_transid) {
|
|
BEESTRACE("Nothing to crawl in " << get_state());
|
|
return next_transid();
|
|
}
|
|
|
|
BEESNOTE("crawling " << get_state());
|
|
// BEESLOGINFO("Crawling " << get_state());
|
|
|
|
Timer crawl_timer;
|
|
|
|
BtrfsIoctlSearchKey sk(BEES_MAX_CRAWL_SIZE * (sizeof(btrfs_file_extent_item) + sizeof(btrfs_ioctl_search_header)));
|
|
sk.tree_id = old_state.m_root;
|
|
sk.min_objectid = old_state.m_objectid;
|
|
sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY;
|
|
sk.min_offset = old_state.m_offset;
|
|
sk.min_transid = old_state.m_min_transid;
|
|
sk.max_transid = old_state.m_max_transid;
|
|
sk.nr_items = BEES_MAX_CRAWL_SIZE;
|
|
|
|
// Lock in the old state
|
|
set_state(old_state);
|
|
|
|
BEESTRACE("Searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
|
|
bool ioctl_ok = false;
|
|
{
|
|
BEESNOTE("searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
|
|
BEESTOOLONG("Searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
|
|
Timer crawl_timer;
|
|
ioctl_ok = sk.do_ioctl_nothrow(m_ctx->root_fd());
|
|
BEESCOUNTADD(crawl_ms, crawl_timer.age() * 1000);
|
|
}
|
|
|
|
if (ioctl_ok) {
|
|
BEESCOUNT(crawl_search);
|
|
} else {
|
|
BEESLOGWARN("Search ioctl failed: " << strerror(errno));
|
|
BEESCOUNT(crawl_fail);
|
|
}
|
|
|
|
if (!ioctl_ok || sk.m_result.empty()) {
|
|
BEESCOUNT(crawl_empty);
|
|
// BEESLOGINFO("Crawl empty " << get_state());
|
|
return next_transid();
|
|
}
|
|
|
|
// BEESLOGINFO("Crawling " << sk.m_result.size() << " results from " << get_state());
|
|
auto results_left = sk.m_result.size();
|
|
BEESNOTE("crawling " << results_left << " results from " << get_state());
|
|
size_t count_other = 0;
|
|
size_t count_inline = 0;
|
|
size_t count_unknown = 0;
|
|
size_t count_data = 0;
|
|
size_t count_low = 0;
|
|
size_t count_high = 0;
|
|
BeesFileRange last_bfr;
|
|
for (auto i : sk.m_result) {
|
|
sk.next_min(i);
|
|
--results_left;
|
|
BEESCOUNT(crawl_items);
|
|
|
|
BEESTRACE("i = " << i);
|
|
|
|
// We need the "+ 1" and objectid rollover that next_min does.
|
|
auto new_state = get_state();
|
|
new_state.m_objectid = sk.min_objectid;
|
|
new_state.m_offset = sk.min_offset;
|
|
|
|
// Saving state here means we can skip a search result
|
|
// if we are interrupted. Not saving state here means we
|
|
// can fail to make forward progress in cases where there
|
|
// is a lot of metadata we can't process. Favor forward
|
|
// progress over losing search results.
|
|
set_state(new_state);
|
|
|
|
// Ignore things that aren't EXTENT_DATA_KEY
|
|
if (i.type != BTRFS_EXTENT_DATA_KEY) {
|
|
++count_other;
|
|
BEESCOUNT(crawl_nondata);
|
|
continue;
|
|
}
|
|
|
|
auto gen = call_btrfs_get(btrfs_stack_file_extent_generation, i.m_data);
|
|
if (gen < get_state().m_min_transid) {
|
|
BEESCOUNT(crawl_gen_low);
|
|
++count_low;
|
|
// We probably want (need?) to scan these anyway.
|
|
// continue;
|
|
}
|
|
if (gen > get_state().m_max_transid) {
|
|
BEESCOUNT(crawl_gen_high);
|
|
++count_high;
|
|
// This shouldn't ever happen
|
|
// continue;
|
|
}
|
|
|
|
auto type = call_btrfs_get(btrfs_stack_file_extent_type, i.m_data);
|
|
switch (type) {
|
|
default:
|
|
BEESLOGDEBUG("Unhandled file extent type " << type << " in root " << get_state().m_root << " ino " << i.objectid << " offset " << to_hex(i.offset));
|
|
++count_unknown;
|
|
BEESCOUNT(crawl_unknown);
|
|
break;
|
|
case BTRFS_FILE_EXTENT_INLINE:
|
|
// Ignore these for now.
|
|
// BEESLOGDEBUG("Ignored file extent type INLINE in root " << get_state().m_root << " ino " << i.objectid << " offset " << to_hex(i.offset));
|
|
++count_inline;
|
|
// TODO: replace with out-of-line dup extents
|
|
BEESCOUNT(crawl_inline);
|
|
break;
|
|
case BTRFS_FILE_EXTENT_PREALLOC:
|
|
BEESCOUNT(crawl_prealloc);
|
|
// fallthrough
|
|
case BTRFS_FILE_EXTENT_REG: {
|
|
auto physical = call_btrfs_get(btrfs_stack_file_extent_disk_bytenr, i.m_data);
|
|
auto ram = call_btrfs_get(btrfs_stack_file_extent_ram_bytes, i.m_data);
|
|
auto len = call_btrfs_get(btrfs_stack_file_extent_num_bytes, i.m_data);
|
|
auto offset = call_btrfs_get(btrfs_stack_file_extent_offset, i.m_data);
|
|
BEESTRACE("Root " << get_state().m_root << " ino " << i.objectid << " physical " << to_hex(physical)
|
|
<< " logical " << to_hex(i.offset) << ".." << to_hex(i.offset + len)
|
|
<< " gen " << gen);
|
|
++count_data;
|
|
if (physical) {
|
|
THROW_CHECK1(runtime_error, ram, ram > 0);
|
|
THROW_CHECK1(runtime_error, len, len > 0);
|
|
THROW_CHECK2(runtime_error, offset, ram, offset < ram);
|
|
BeesFileId bfi(get_state().m_root, i.objectid);
|
|
if (m_ctx->is_blacklisted(bfi)) {
|
|
BEESCOUNT(crawl_blacklisted);
|
|
} else {
|
|
BeesFileRange bfr(bfi, i.offset, i.offset + len);
|
|
// BEESNOTE("pushing bfr " << bfr << " limit " << BEES_MAX_QUEUE_SIZE);
|
|
m_extents.insert(bfr);
|
|
BEESCOUNT(crawl_push);
|
|
}
|
|
} else {
|
|
BEESCOUNT(crawl_hole);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
// BEESLOGINFO("Crawled inline " << count_inline << " data " << count_data << " other " << count_other << " unknown " << count_unknown << " gen_low " << count_low << " gen_high " << count_high << " " << get_state() << " in " << crawl_timer << "s");
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
BeesCrawl::fetch_extents_harder()
|
|
{
|
|
BEESNOTE("fetch_extents_harder " << get_state() << " with " << m_extents.size() << " extents");
|
|
while (m_extents.empty()) {
|
|
bool progress_made = fetch_extents();
|
|
if (!progress_made) {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
BeesFileRange
|
|
BeesCrawl::peek_front()
|
|
{
|
|
unique_lock<mutex> lock(m_mutex);
|
|
fetch_extents_harder();
|
|
if (m_extents.empty()) {
|
|
return BeesFileRange();
|
|
}
|
|
auto rv = *m_extents.begin();
|
|
return rv;
|
|
}
|
|
|
|
BeesFileRange
|
|
BeesCrawl::pop_front()
|
|
{
|
|
unique_lock<mutex> lock(m_mutex);
|
|
fetch_extents_harder();
|
|
if (m_extents.empty()) {
|
|
return BeesFileRange();
|
|
}
|
|
auto rv = *m_extents.begin();
|
|
m_extents.erase(m_extents.begin());
|
|
return rv;
|
|
}
|
|
|
|
BeesCrawlState
|
|
BeesCrawl::get_state()
|
|
{
|
|
unique_lock<mutex> lock(m_state_mutex);
|
|
auto rv = m_state;
|
|
return rv;
|
|
}
|
|
|
|
void
|
|
BeesCrawl::set_state(const BeesCrawlState &bcs)
|
|
{
|
|
unique_lock<mutex> lock(m_state_mutex);
|
|
m_state = bcs;
|
|
lock.unlock();
|
|
m_ctx->roots()->crawl_state_set_dirty();
|
|
}
|