mirror of
https://github.com/Zygo/bees.git
synced 2025-08-03 14:23:29 +02:00
bees: remove local cruft, throw at github
This commit is contained in:
39
src/Makefile
Normal file
39
src/Makefile
Normal file
@@ -0,0 +1,39 @@
|
||||
PROGRAMS = \
|
||||
../bin/bees \
|
||||
../bin/fiemap \
|
||||
../bin/fiewalk \
|
||||
|
||||
all: $(PROGRAMS) depends.mk
|
||||
|
||||
include ../makeflags
|
||||
|
||||
LIBS = -lcrucible -lpthread
|
||||
LDFLAGS = -L../lib -Wl,-rpath=$(shell realpath ../lib)
|
||||
|
||||
depends.mk: Makefile *.cc
|
||||
for x in *.cc; do $(CXX) $(CXXFLAGS) -M "$$x"; done > depends.mk.new
|
||||
mv -fv depends.mk.new depends.mk
|
||||
|
||||
-include depends.mk
|
||||
|
||||
%.o: %.cc %.h
|
||||
$(CXX) $(CXXFLAGS) -o "$@" -c "$<"
|
||||
|
||||
../bin/%: %.o
|
||||
@echo Implicit bin rule "$<" '->' "$@"
|
||||
$(CXX) $(CXXFLAGS) -o "$@" "$<" $(LDFLAGS) $(LIBS)
|
||||
|
||||
BEES_OBJS = \
|
||||
bees.o \
|
||||
bees-context.o \
|
||||
bees-hash.o \
|
||||
bees-resolve.o \
|
||||
bees-roots.o \
|
||||
bees-thread.o \
|
||||
bees-types.o \
|
||||
|
||||
../bin/bees: $(BEES_OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o "$@" $(BEES_OBJS) $(LDFLAGS) $(LIBS)
|
||||
|
||||
clean:
|
||||
-rm -fv *.o
|
1009
src/bees-context.cc
Normal file
1009
src/bees-context.cc
Normal file
File diff suppressed because it is too large
Load Diff
682
src/bees-hash.cc
Normal file
682
src/bees-hash.cc
Normal file
@@ -0,0 +1,682 @@
|
||||
#include "bees.h"
|
||||
|
||||
#include "crucible/crc64.h"
|
||||
#include "crucible/string.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
|
||||
static inline
|
||||
bool
|
||||
using_any_madvise()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
ostream &
|
||||
operator<<(ostream &os, const BeesHash &bh)
|
||||
{
|
||||
return os << to_hex(BeesHash::Type(bh));
|
||||
}
|
||||
|
||||
ostream &
|
||||
operator<<(ostream &os, const BeesHashTable::Cell &bhte)
|
||||
{
|
||||
return os << "BeesHashTable::Cell { hash = " << BeesHash(bhte.e_hash) << ", addr = "
|
||||
<< BeesAddress(bhte.e_addr) << " }";
|
||||
}
|
||||
|
||||
void
|
||||
dump_bucket(BeesHashTable::Cell *p, BeesHashTable::Cell *q)
|
||||
{
|
||||
// Must be called while holding m_bucket_mutex
|
||||
for (auto i = p; i < q; ++i) {
|
||||
BEESLOG("Entry " << i - p << " " << *i);
|
||||
}
|
||||
}
|
||||
|
||||
const bool VERIFY_CLEARS_BUGS = false;
|
||||
|
||||
bool
|
||||
verify_cell_range(BeesHashTable::Cell *p, BeesHashTable::Cell *q, bool clear_bugs = VERIFY_CLEARS_BUGS)
|
||||
{
|
||||
// Must be called while holding m_bucket_mutex
|
||||
bool bugs_found = false;
|
||||
set<BeesHashTable::Cell> seen_it;
|
||||
for (BeesHashTable::Cell *cell = p; cell < q; ++cell) {
|
||||
if (cell->e_addr && cell->e_addr < 0x1000) {
|
||||
BEESCOUNT(bug_hash_magic_addr);
|
||||
BEESINFO("Bad hash table address hash " << to_hex(cell->e_hash) << " addr " << to_hex(cell->e_addr));
|
||||
if (clear_bugs) {
|
||||
cell->e_addr = 0;
|
||||
cell->e_hash = 0;
|
||||
}
|
||||
bugs_found = true;
|
||||
}
|
||||
if (cell->e_addr && !seen_it.insert(*cell).second) {
|
||||
BEESCOUNT(bug_hash_duplicate_cell);
|
||||
// BEESLOG("Duplicate hash table entry:\nthis = " << *cell << "\nold = " << *seen_it.find(*cell));
|
||||
BEESINFO("Duplicate hash table entry: " << *cell);
|
||||
if (clear_bugs) {
|
||||
cell->e_addr = 0;
|
||||
cell->e_hash = 0;
|
||||
}
|
||||
bugs_found = true;
|
||||
}
|
||||
}
|
||||
return bugs_found;
|
||||
}
|
||||
|
||||
pair<BeesHashTable::Cell *, BeesHashTable::Cell *>
|
||||
BeesHashTable::get_cell_range(HashType hash)
|
||||
{
|
||||
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
|
||||
THROW_CHECK1(runtime_error, m_bucket_ptr, m_bucket_ptr != nullptr);
|
||||
Bucket *pp = &m_bucket_ptr[hash % m_buckets];
|
||||
Cell *bp = pp[0].p_cells;
|
||||
Cell *ep = pp[1].p_cells;
|
||||
THROW_CHECK2(out_of_range, m_cell_ptr, bp, bp >= m_cell_ptr);
|
||||
THROW_CHECK2(out_of_range, m_cell_ptr_end, ep, ep <= m_cell_ptr_end);
|
||||
return make_pair(bp, ep);
|
||||
}
|
||||
|
||||
pair<uint8_t *, uint8_t *>
|
||||
BeesHashTable::get_extent_range(HashType hash)
|
||||
{
|
||||
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
|
||||
THROW_CHECK1(runtime_error, m_bucket_ptr, m_bucket_ptr != nullptr);
|
||||
Extent *iop = &m_extent_ptr[ (hash % m_buckets) / c_buckets_per_extent ];
|
||||
uint8_t *bp = iop[0].p_byte;
|
||||
uint8_t *ep = iop[1].p_byte;
|
||||
THROW_CHECK2(out_of_range, m_byte_ptr, bp, bp >= m_byte_ptr);
|
||||
THROW_CHECK2(out_of_range, m_byte_ptr_end, ep, ep <= m_byte_ptr_end);
|
||||
return make_pair(bp, ep);
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::flush_dirty_extents()
|
||||
{
|
||||
if (using_shared_map()) return;
|
||||
|
||||
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
|
||||
|
||||
unique_lock<mutex> lock(m_extent_mutex);
|
||||
auto dirty_extent_copy = m_buckets_dirty;
|
||||
m_buckets_dirty.clear();
|
||||
if (dirty_extent_copy.empty()) {
|
||||
BEESNOTE("idle");
|
||||
m_condvar.wait(lock);
|
||||
return; // please call later, i.e. immediately
|
||||
}
|
||||
lock.unlock();
|
||||
|
||||
size_t extent_counter = 0;
|
||||
for (auto extent_number : dirty_extent_copy) {
|
||||
++extent_counter;
|
||||
BEESNOTE("flush extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")");
|
||||
catch_all([&]() {
|
||||
uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte;
|
||||
uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte;
|
||||
THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr);
|
||||
THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end);
|
||||
if (using_shared_map()) {
|
||||
BEESTOOLONG("flush extent " << extent_number);
|
||||
copy(dirty_extent, dirty_extent_end, dirty_extent);
|
||||
} else {
|
||||
BEESTOOLONG("pwrite(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
|
||||
// Page locks slow us down more than copying the data does
|
||||
vector<uint8_t> extent_copy(dirty_extent, dirty_extent_end);
|
||||
pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr);
|
||||
BEESCOUNT(hash_extent_out);
|
||||
}
|
||||
});
|
||||
BEESNOTE("flush rate limited at extent #" << extent_number << " (" << extent_counter << " of " << dirty_extent_copy.size() << ")");
|
||||
m_flush_rate_limit.sleep_for(BLOCK_SIZE_HASHTAB_EXTENT);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::set_extent_dirty(HashType hash)
|
||||
{
|
||||
if (using_shared_map()) return;
|
||||
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
|
||||
auto pr = get_extent_range(hash);
|
||||
uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
|
||||
THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents);
|
||||
unique_lock<mutex> lock(m_extent_mutex);
|
||||
m_buckets_dirty.insert(extent_number);
|
||||
m_condvar.notify_one();
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::writeback_loop()
|
||||
{
|
||||
if (!using_shared_map()) {
|
||||
while (1) {
|
||||
flush_dirty_extents();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
string
|
||||
percent(size_t num, size_t den)
|
||||
{
|
||||
if (den) {
|
||||
return astringprintf("%u%%", num * 100 / den);
|
||||
} else {
|
||||
return "--%";
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::prefetch_loop()
|
||||
{
|
||||
// Always do the mlock, whether shared or not
|
||||
THROW_CHECK1(runtime_error, m_size, m_size > 0);
|
||||
catch_all([&]() {
|
||||
BEESNOTE("mlock " << pretty(m_size));
|
||||
DIE_IF_NON_ZERO(mlock(m_byte_ptr, m_size));
|
||||
});
|
||||
|
||||
while (1) {
|
||||
size_t width = 64;
|
||||
vector<size_t> occupancy(width, 0);
|
||||
size_t occupied_count = 0;
|
||||
size_t total_count = 0;
|
||||
size_t compressed_count = 0;
|
||||
size_t compressed_offset_count = 0;
|
||||
size_t toxic_count = 0;
|
||||
size_t unaligned_eof_count = 0;
|
||||
|
||||
for (uint64_t ext = 0; ext < m_extents; ++ext) {
|
||||
BEESNOTE("prefetching hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr);
|
||||
catch_all([&]() {
|
||||
fetch_missing_extent(ext * c_buckets_per_extent);
|
||||
|
||||
BEESNOTE("analyzing hash table extent " << ext << " of " << m_extent_ptr_end - m_extent_ptr);
|
||||
bool duplicate_bugs_found = false;
|
||||
unique_lock<mutex> lock(m_bucket_mutex);
|
||||
for (Bucket *bucket = m_extent_ptr[ext].p_buckets; bucket < m_extent_ptr[ext + 1].p_buckets; ++bucket) {
|
||||
if (verify_cell_range(bucket[0].p_cells, bucket[1].p_cells)) {
|
||||
duplicate_bugs_found = true;
|
||||
}
|
||||
size_t this_bucket_occupied_count = 0;
|
||||
for (Cell *cell = bucket[0].p_cells; cell < bucket[1].p_cells; ++cell) {
|
||||
if (cell->e_addr) {
|
||||
++this_bucket_occupied_count;
|
||||
BeesAddress a(cell->e_addr);
|
||||
if (a.is_compressed()) {
|
||||
++compressed_count;
|
||||
if (a.has_compressed_offset()) {
|
||||
++compressed_offset_count;
|
||||
}
|
||||
}
|
||||
if (a.is_toxic()) {
|
||||
++toxic_count;
|
||||
}
|
||||
if (a.is_unaligned_eof()) {
|
||||
++unaligned_eof_count;
|
||||
}
|
||||
}
|
||||
++total_count;
|
||||
}
|
||||
++occupancy.at(this_bucket_occupied_count * width / (1 + c_cells_per_bucket) );
|
||||
// Count these instead of calculating the number so we get better stats in case of exceptions
|
||||
occupied_count += this_bucket_occupied_count;
|
||||
}
|
||||
lock.unlock();
|
||||
if (duplicate_bugs_found) {
|
||||
set_extent_dirty(ext);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
BEESNOTE("calculating hash table statistics");
|
||||
|
||||
vector<string> histogram;
|
||||
vector<size_t> thresholds;
|
||||
size_t threshold = 1;
|
||||
bool threshold_exceeded = false;
|
||||
do {
|
||||
threshold_exceeded = false;
|
||||
histogram.push_back(string(width, ' '));
|
||||
thresholds.push_back(threshold);
|
||||
for (size_t x = 0; x < width; ++x) {
|
||||
if (occupancy.at(x) >= threshold) {
|
||||
histogram.back().at(x) = '#';
|
||||
threshold_exceeded = true;
|
||||
}
|
||||
}
|
||||
threshold *= 2;
|
||||
} while (threshold_exceeded);
|
||||
|
||||
ostringstream out;
|
||||
size_t count = histogram.size();
|
||||
bool first_line = true;
|
||||
for (auto it = histogram.rbegin(); it != histogram.rend(); ++it) {
|
||||
out << *it << " " << thresholds.at(--count);
|
||||
if (first_line) {
|
||||
first_line = false;
|
||||
out << " pages";
|
||||
}
|
||||
out << "\n";
|
||||
}
|
||||
|
||||
size_t uncompressed_count = occupied_count - compressed_count;
|
||||
size_t legacy_count = compressed_count - compressed_offset_count;
|
||||
|
||||
ostringstream graph_blob;
|
||||
|
||||
graph_blob << "Now: " << format_time(time(NULL)) << "\n";
|
||||
graph_blob << "Uptime: " << m_ctx->total_timer().age() << " seconds\n";
|
||||
|
||||
graph_blob
|
||||
<< "\nHash table page occupancy histogram (" << occupied_count << "/" << total_count << " cells occupied, " << (occupied_count * 100 / total_count) << "%)\n"
|
||||
<< out.str() << "0% | 25% | 50% | 75% | 100% page fill\n"
|
||||
<< "compressed " << compressed_count << " (" << percent(compressed_count, occupied_count) << ")"
|
||||
<< " new-style " << compressed_offset_count << " (" << percent(compressed_offset_count, occupied_count) << ")"
|
||||
<< " old-style " << legacy_count << " (" << percent(legacy_count, occupied_count) << ")\n"
|
||||
<< "uncompressed " << uncompressed_count << " (" << percent(uncompressed_count, occupied_count) << ")"
|
||||
<< " unaligned_eof " << unaligned_eof_count << " (" << percent(unaligned_eof_count, occupied_count) << ")"
|
||||
<< " toxic " << toxic_count << " (" << percent(toxic_count, occupied_count) << ")";
|
||||
|
||||
graph_blob << "\n\n";
|
||||
|
||||
graph_blob << "TOTAL:\n";
|
||||
auto thisStats = BeesStats::s_global;
|
||||
graph_blob << "\t" << thisStats << "\n";
|
||||
|
||||
graph_blob << "\nRATES:\n";
|
||||
auto avg_rates = thisStats / m_ctx->total_timer().age();
|
||||
graph_blob << "\t" << avg_rates << "\n";
|
||||
|
||||
BEESLOG(graph_blob.str());
|
||||
catch_all([&]() {
|
||||
m_stats_file.write(graph_blob.str());
|
||||
});
|
||||
|
||||
BEESNOTE("idle " << BEES_HASH_TABLE_ANALYZE_INTERVAL << "s");
|
||||
nanosleep(BEES_HASH_TABLE_ANALYZE_INTERVAL);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::fetch_missing_extent(HashType hash)
|
||||
{
|
||||
BEESTOOLONG("fetch_missing_extent for hash " << to_hex(hash));
|
||||
if (using_shared_map()) return;
|
||||
THROW_CHECK1(runtime_error, m_buckets, m_buckets > 0);
|
||||
auto pr = get_extent_range(hash);
|
||||
uint64_t extent_number = reinterpret_cast<Extent *>(pr.first) - m_extent_ptr;
|
||||
THROW_CHECK1(runtime_error, extent_number, extent_number < m_extents);
|
||||
|
||||
unique_lock<mutex> lock(m_extent_mutex);
|
||||
if (!m_buckets_missing.count(extent_number)) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t missing_buckets = m_buckets_missing.size();
|
||||
lock.unlock();
|
||||
|
||||
BEESNOTE("fetch waiting for hash extent #" << extent_number << ", " << missing_buckets << " left to fetch");
|
||||
|
||||
// Acquire blocking lock on this extent only
|
||||
LockSet<uint64_t>::Lock extent_lock(m_extent_lock_set, extent_number);
|
||||
|
||||
// Check missing again because someone else might have fetched this
|
||||
// extent for us while we didn't hold any locks
|
||||
lock.lock();
|
||||
if (!m_buckets_missing.count(extent_number)) {
|
||||
BEESCOUNT(hash_extent_in_twice);
|
||||
return;
|
||||
}
|
||||
lock.unlock();
|
||||
|
||||
// OK we have to read this extent
|
||||
BEESNOTE("fetching hash extent #" << extent_number << ", " << missing_buckets << " left to fetch");
|
||||
|
||||
BEESTRACE("Fetching missing hash extent " << extent_number);
|
||||
uint8_t *dirty_extent = m_extent_ptr[extent_number].p_byte;
|
||||
uint8_t *dirty_extent_end = m_extent_ptr[extent_number + 1].p_byte;
|
||||
|
||||
{
|
||||
BEESTOOLONG("pread(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
|
||||
pread_or_die(m_fd, dirty_extent, dirty_extent_end - dirty_extent, dirty_extent - m_byte_ptr);
|
||||
}
|
||||
|
||||
BEESCOUNT(hash_extent_in);
|
||||
// We don't block when fetching an extent but we do slow down the
|
||||
// prefetch thread.
|
||||
m_prefetch_rate_limit.borrow(BLOCK_SIZE_HASHTAB_EXTENT);
|
||||
lock.lock();
|
||||
m_buckets_missing.erase(extent_number);
|
||||
}
|
||||
|
||||
bool
|
||||
BeesHashTable::is_toxic_hash(BeesHashTable::HashType hash) const
|
||||
{
|
||||
return m_toxic_hashes.find(hash) != m_toxic_hashes.end();
|
||||
}
|
||||
|
||||
vector<BeesHashTable::Cell>
|
||||
BeesHashTable::find_cell(HashType hash)
|
||||
{
|
||||
// This saves a lot of time prefilling the hash table, and there's no risk of eviction
|
||||
if (is_toxic_hash(hash)) {
|
||||
BEESCOUNT(hash_toxic);
|
||||
BeesAddress toxic_addr(0x1000);
|
||||
toxic_addr.set_toxic();
|
||||
Cell toxic_cell(hash, toxic_addr);
|
||||
vector<Cell> rv;
|
||||
rv.push_back(toxic_cell);
|
||||
return rv;
|
||||
}
|
||||
fetch_missing_extent(hash);
|
||||
BEESTOOLONG("find_cell hash " << BeesHash(hash));
|
||||
vector<Cell> rv;
|
||||
unique_lock<mutex> lock(m_bucket_mutex);
|
||||
auto er = get_cell_range(hash);
|
||||
// FIXME: Weed out zero addresses in the table due to earlier bugs
|
||||
copy_if(er.first, er.second, back_inserter(rv), [=](const Cell &ip) { return ip.e_hash == hash && ip.e_addr >= 0x1000; });
|
||||
BEESCOUNT(hash_lookup);
|
||||
return rv;
|
||||
}
|
||||
|
||||
// Move an entry to the end of the list. Used after an attempt to resolve
|
||||
// an address in the hash table fails. Probably more correctly called
|
||||
// push_back_hash_addr, except it never inserts. Shared hash tables
|
||||
// never erase anything, since there is no way to tell if an entry is
|
||||
// out of date or just belonging to the wrong filesystem.
|
||||
void
|
||||
BeesHashTable::erase_hash_addr(HashType hash, AddrType addr)
|
||||
{
|
||||
// if (m_shared) return;
|
||||
fetch_missing_extent(hash);
|
||||
BEESTOOLONG("erase hash " << to_hex(hash) << " addr " << addr);
|
||||
unique_lock<mutex> lock(m_bucket_mutex);
|
||||
auto er = get_cell_range(hash);
|
||||
Cell mv(hash, addr);
|
||||
Cell *ip = find(er.first, er.second, mv);
|
||||
bool found = (ip < er.second);
|
||||
if (found) {
|
||||
// Lookups on invalid addresses really hurt us. Kill it with fire!
|
||||
*ip = Cell(0, 0);
|
||||
set_extent_dirty(hash);
|
||||
BEESCOUNT(hash_erase);
|
||||
#if 0
|
||||
if (verify_cell_range(er.first, er.second)) {
|
||||
BEESINFO("while erasing hash " << hash << " addr " << addr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// If entry is already present in list, move it to the front of the
|
||||
// list without dropping any entries, and return true. If entry is not
|
||||
// present in list, insert it at the front of the list, possibly dropping
|
||||
// the last entry in the list, and return false. Used to move duplicate
|
||||
// hash blocks to the front of the list.
|
||||
bool
|
||||
BeesHashTable::push_front_hash_addr(HashType hash, AddrType addr)
|
||||
{
|
||||
fetch_missing_extent(hash);
|
||||
BEESTOOLONG("push_front_hash_addr hash " << BeesHash(hash) <<" addr " << BeesAddress(addr));
|
||||
unique_lock<mutex> lock(m_bucket_mutex);
|
||||
auto er = get_cell_range(hash);
|
||||
Cell mv(hash, addr);
|
||||
Cell *ip = find(er.first, er.second, mv);
|
||||
bool found = (ip < er.second);
|
||||
if (!found) {
|
||||
// If no match found, get rid of an empty space instead
|
||||
// If no empty spaces, ip will point to end
|
||||
ip = find(er.first, er.second, Cell(0, 0));
|
||||
}
|
||||
if (ip > er.first) {
|
||||
// Delete matching entry, first empty entry,
|
||||
// or last entry whether empty or not
|
||||
// move_backward(er.first, ip - 1, ip);
|
||||
auto sp = ip;
|
||||
auto dp = ip;
|
||||
--sp;
|
||||
// If we are deleting the last entry then don't copy it
|
||||
if (ip == er.second) {
|
||||
--sp;
|
||||
--dp;
|
||||
BEESCOUNT(hash_evict);
|
||||
}
|
||||
while (dp > er.first) {
|
||||
*dp-- = *sp--;
|
||||
}
|
||||
}
|
||||
// There is now a space at the front, insert there if different
|
||||
if (er.first[0] != mv) {
|
||||
er.first[0] = mv;
|
||||
set_extent_dirty(hash);
|
||||
BEESCOUNT(hash_front);
|
||||
}
|
||||
#if 0
|
||||
if (verify_cell_range(er.first, er.second)) {
|
||||
BEESINFO("while push_fronting hash " << hash << " addr " << addr);
|
||||
}
|
||||
#endif
|
||||
return found;
|
||||
}
|
||||
|
||||
// If entry is already present in list, returns true and does not
|
||||
// modify list. If entry is not present in list, returns false and
|
||||
// inserts at a random position in the list, possibly evicting the entry
|
||||
// at the end of the list. Used to insert new unique (not-yet-duplicate)
|
||||
// blocks in random order.
|
||||
bool
|
||||
BeesHashTable::push_random_hash_addr(HashType hash, AddrType addr)
|
||||
{
|
||||
fetch_missing_extent(hash);
|
||||
BEESTOOLONG("push_random_hash_addr hash " << BeesHash(hash) << " addr " << BeesAddress(addr));
|
||||
unique_lock<mutex> lock(m_bucket_mutex);
|
||||
auto er = get_cell_range(hash);
|
||||
Cell mv(hash, addr);
|
||||
Cell *ip = find(er.first, er.second, mv);
|
||||
bool found = (ip < er.second);
|
||||
|
||||
thread_local default_random_engine generator;
|
||||
thread_local uniform_int_distribution<int> distribution(0, c_cells_per_bucket - 1);
|
||||
auto pos = distribution(generator);
|
||||
|
||||
int case_cond = 0;
|
||||
vector<Cell> saved(er.first, er.second);
|
||||
|
||||
if (found) {
|
||||
// If hash already exists after pos, swap with pos
|
||||
if (ip > er.first + pos) {
|
||||
|
||||
// move_backward(er.first + pos, ip - 1, ip);
|
||||
auto sp = ip;
|
||||
auto dp = ip;
|
||||
--sp;
|
||||
while (dp > er.first + pos) {
|
||||
*dp-- = *sp--;
|
||||
}
|
||||
*dp = mv;
|
||||
BEESCOUNT(hash_bump);
|
||||
case_cond = 1;
|
||||
goto ret_dirty;
|
||||
}
|
||||
// Hash already exists before (or at) pos, leave it there
|
||||
BEESCOUNT(hash_already);
|
||||
case_cond = 2;
|
||||
goto ret;
|
||||
}
|
||||
|
||||
// Find an empty space to back of pos
|
||||
for (ip = er.first + pos; ip < er.second; ++ip) {
|
||||
if (*ip == Cell(0, 0)) {
|
||||
*ip = mv;
|
||||
case_cond = 3;
|
||||
goto ret_dirty;
|
||||
}
|
||||
}
|
||||
|
||||
// Find an empty space to front of pos
|
||||
// if there is anything to front of pos
|
||||
if (pos > 0) {
|
||||
for (ip = er.first + pos - 1; ip >= er.first; --ip) {
|
||||
if (*ip == Cell(0, 0)) {
|
||||
*ip = mv;
|
||||
case_cond = 4;
|
||||
goto ret_dirty;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Evict something and insert at pos
|
||||
move_backward(er.first + pos, er.second - 1, er.second);
|
||||
er.first[pos] = mv;
|
||||
BEESCOUNT(hash_evict);
|
||||
case_cond = 5;
|
||||
ret_dirty:
|
||||
BEESCOUNT(hash_insert);
|
||||
set_extent_dirty(hash);
|
||||
ret:
|
||||
#if 0
|
||||
if (verify_cell_range(er.first, er.second, false)) {
|
||||
BEESLOG("while push_randoming (case " << case_cond << ") pos " << pos
|
||||
<< " ip " << (ip - er.first) << " " << mv);
|
||||
// dump_bucket(saved.data(), saved.data() + saved.size());
|
||||
// dump_bucket(er.first, er.second);
|
||||
}
|
||||
#else
|
||||
(void)case_cond;
|
||||
#endif
|
||||
return found;
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::try_mmap_flags(int flags)
|
||||
{
|
||||
if (!m_cell_ptr) {
|
||||
THROW_CHECK1(out_of_range, m_size, m_size > 0);
|
||||
Timer map_time;
|
||||
catch_all([&]() {
|
||||
BEESLOG("mapping hash table size " << m_size << " with flags " << mmap_flags_ntoa(flags));
|
||||
void *ptr = mmap_or_die(nullptr, m_size, PROT_READ | PROT_WRITE, flags, flags & MAP_ANONYMOUS ? -1 : int(m_fd), 0);
|
||||
BEESLOG("mmap done in " << map_time << " sec");
|
||||
m_cell_ptr = static_cast<Cell *>(ptr);
|
||||
void *ptr_end = static_cast<uint8_t *>(ptr) + m_size;
|
||||
m_cell_ptr_end = static_cast<Cell *>(ptr_end);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BeesHashTable::set_shared(bool shared)
|
||||
{
|
||||
m_shared = shared;
|
||||
}
|
||||
|
||||
BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename) :
|
||||
m_ctx(ctx),
|
||||
m_size(0),
|
||||
m_void_ptr(nullptr),
|
||||
m_void_ptr_end(nullptr),
|
||||
m_buckets(0),
|
||||
m_cells(0),
|
||||
m_writeback_thread("hash_writeback"),
|
||||
m_prefetch_thread("hash_prefetch " + m_ctx->root_path()),
|
||||
m_flush_rate_limit(BEES_FLUSH_RATE),
|
||||
m_prefetch_rate_limit(BEES_FLUSH_RATE),
|
||||
m_stats_file(m_ctx->home_fd(), "beesstats.txt")
|
||||
{
|
||||
BEESNOTE("opening hash table " << filename);
|
||||
|
||||
m_fd = openat_or_die(m_ctx->home_fd(), filename, FLAGS_OPEN_FILE_RW, 0700);
|
||||
Stat st(m_fd);
|
||||
m_size = st.st_size;
|
||||
|
||||
BEESTRACE("hash table size " << m_size);
|
||||
BEESTRACE("hash table bucket size " << BLOCK_SIZE_HASHTAB_BUCKET);
|
||||
BEESTRACE("hash table extent size " << BLOCK_SIZE_HASHTAB_EXTENT);
|
||||
|
||||
THROW_CHECK2(invalid_argument, BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_EXTENT, (BLOCK_SIZE_HASHTAB_EXTENT % BLOCK_SIZE_HASHTAB_BUCKET) == 0);
|
||||
|
||||
// Does the union work?
|
||||
THROW_CHECK2(runtime_error, m_void_ptr, m_cell_ptr, m_void_ptr == m_cell_ptr);
|
||||
THROW_CHECK2(runtime_error, m_void_ptr, m_byte_ptr, m_void_ptr == m_byte_ptr);
|
||||
THROW_CHECK2(runtime_error, m_void_ptr, m_bucket_ptr, m_void_ptr == m_bucket_ptr);
|
||||
THROW_CHECK2(runtime_error, m_void_ptr, m_extent_ptr, m_void_ptr == m_extent_ptr);
|
||||
|
||||
// There's more than one union
|
||||
THROW_CHECK2(runtime_error, sizeof(Bucket), BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_BUCKET == sizeof(Bucket));
|
||||
THROW_CHECK2(runtime_error, sizeof(Bucket::p_byte), BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_BUCKET == sizeof(Bucket::p_byte));
|
||||
THROW_CHECK2(runtime_error, sizeof(Extent), BLOCK_SIZE_HASHTAB_EXTENT, BLOCK_SIZE_HASHTAB_EXTENT == sizeof(Extent));
|
||||
THROW_CHECK2(runtime_error, sizeof(Extent::p_byte), BLOCK_SIZE_HASHTAB_EXTENT, BLOCK_SIZE_HASHTAB_EXTENT == sizeof(Extent::p_byte));
|
||||
|
||||
BEESLOG("opened hash table filename '" << filename << "' length " << m_size);
|
||||
m_buckets = m_size / BLOCK_SIZE_HASHTAB_BUCKET;
|
||||
m_cells = m_buckets * c_cells_per_bucket;
|
||||
m_extents = (m_size + BLOCK_SIZE_HASHTAB_EXTENT - 1) / BLOCK_SIZE_HASHTAB_EXTENT;
|
||||
BEESLOG("\tcells " << m_cells << ", buckets " << m_buckets << ", extents " << m_extents);
|
||||
|
||||
BEESLOG("\tflush rate limit " << BEES_FLUSH_RATE);
|
||||
|
||||
if (using_shared_map()) {
|
||||
try_mmap_flags(MAP_SHARED);
|
||||
} else {
|
||||
try_mmap_flags(MAP_PRIVATE | MAP_ANONYMOUS);
|
||||
}
|
||||
|
||||
if (!m_cell_ptr) {
|
||||
THROW_ERROR(runtime_error, "unable to mmap " << filename);
|
||||
}
|
||||
|
||||
if (!using_shared_map()) {
|
||||
// madvise fails if MAP_SHARED
|
||||
if (using_any_madvise()) {
|
||||
// DONTFORK because we sometimes do fork,
|
||||
// but the child doesn't touch any of the many, many pages
|
||||
BEESTOOLONG("madvise(MADV_HUGEPAGE | MADV_DONTFORK)");
|
||||
DIE_IF_NON_ZERO(madvise(m_byte_ptr, m_size, MADV_HUGEPAGE | MADV_DONTFORK));
|
||||
}
|
||||
for (uint64_t i = 0; i < m_size / sizeof(Extent); ++i) {
|
||||
m_buckets_missing.insert(i);
|
||||
}
|
||||
}
|
||||
|
||||
m_writeback_thread.exec([&]() {
|
||||
writeback_loop();
|
||||
});
|
||||
|
||||
m_prefetch_thread.exec([&]() {
|
||||
prefetch_loop();
|
||||
});
|
||||
|
||||
// Blacklist might fail if the hash table is not stored on a btrfs
|
||||
catch_all([&]() {
|
||||
m_ctx->blacklist_add(BeesFileId(m_fd));
|
||||
});
|
||||
|
||||
// Skip zero because we already weed that out before it gets near a hash function
|
||||
for (unsigned i = 1; i < 256; ++i) {
|
||||
vector<uint8_t> v(BLOCK_SIZE_SUMS, i);
|
||||
HashType hash = Digest::CRC::crc64(v.data(), v.size());
|
||||
m_toxic_hashes.insert(hash);
|
||||
}
|
||||
}
|
||||
|
||||
BeesHashTable::~BeesHashTable()
|
||||
{
|
||||
if (m_cell_ptr && m_size) {
|
||||
flush_dirty_extents();
|
||||
catch_all([&]() {
|
||||
DIE_IF_NON_ZERO(munmap(m_cell_ptr, m_size));
|
||||
m_cell_ptr = nullptr;
|
||||
m_size = 0;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
487
src/bees-resolve.cc
Normal file
487
src/bees-resolve.cc
Normal file
@@ -0,0 +1,487 @@
|
||||
#include "bees.h"
|
||||
|
||||
#include "crucible/limits.h"
|
||||
#include "crucible/string.h"
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
|
||||
BeesAddress
|
||||
BeesResolver::addr(BeesAddress new_addr)
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, new_addr, !new_addr.is_magic());
|
||||
|
||||
m_found_data = false;
|
||||
m_found_dup = false;
|
||||
m_found_hash = false;
|
||||
m_wrong_data = false;
|
||||
m_biors.clear();
|
||||
m_ranges.clear();
|
||||
m_addr = new_addr;
|
||||
m_bior_count = 0;
|
||||
|
||||
auto rv = m_ctx->resolve_addr(m_addr);
|
||||
m_biors = rv.m_biors;
|
||||
m_is_toxic = rv.m_is_toxic;
|
||||
m_bior_count = m_biors.size();
|
||||
|
||||
return m_addr;
|
||||
}
|
||||
|
||||
BeesResolver::BeesResolver(shared_ptr<BeesContext> ctx, BeesAddress new_addr) :
|
||||
m_ctx(ctx),
|
||||
m_bior_count(0)
|
||||
{
|
||||
addr(new_addr);
|
||||
}
|
||||
|
||||
BeesBlockData
|
||||
BeesResolver::adjust_offset(const BeesFileRange &haystack, const BeesBlockData &needle)
|
||||
{
|
||||
BEESTRACE("Searching for needle " << needle << "\n\tin haystack " << haystack);
|
||||
|
||||
BEESCOUNT(adjust_try);
|
||||
|
||||
// Constraint checks
|
||||
THROW_CHECK1(invalid_argument, needle.begin(), (needle.begin() & BLOCK_MASK_CLONE) == 0);
|
||||
THROW_CHECK1(invalid_argument, haystack.begin(), (haystack.begin() & BLOCK_MASK_CLONE) == 0);
|
||||
|
||||
// Need to know the precise dimensions of the haystack and needle
|
||||
off_t haystack_size = haystack.file_size();
|
||||
|
||||
// If the needle is not a full block then it can only match at EOF
|
||||
off_t needle_len = needle.size();
|
||||
bool is_unaligned_eof = needle_len & BLOCK_MASK_CLONE;
|
||||
BEESTRACE("is_unaligned_eof = " << is_unaligned_eof << ", needle_len = " << to_hex(needle_len) << ", haystack_size = " << to_hex(haystack_size));
|
||||
|
||||
// Unaligned EOF can only match at EOF, so only check there
|
||||
if (is_unaligned_eof) {
|
||||
BEESTRACE("Construct needle_bfr from " << needle);
|
||||
BeesFileRange needle_bfr(needle);
|
||||
|
||||
// Census
|
||||
if (haystack_size & BLOCK_MASK_CLONE) {
|
||||
BEESCOUNT(adjust_eof_haystack);
|
||||
}
|
||||
if (needle_bfr.end() & BLOCK_MASK_CLONE) {
|
||||
BEESCOUNT(adjust_eof_needle);
|
||||
}
|
||||
|
||||
// Non-aligned part of the lengths must be the same
|
||||
if ( (haystack_size & BLOCK_MASK_CLONE) != (needle_bfr.end() & BLOCK_MASK_CLONE) ) {
|
||||
BEESCOUNT(adjust_eof_fail);
|
||||
return BeesBlockData();
|
||||
}
|
||||
|
||||
// Read the haystack block
|
||||
BEESTRACE("Reading haystack (haystack_size = " << to_hex(haystack_size) << ")");
|
||||
BeesBlockData straw(haystack.fd(), haystack_size & ~BLOCK_MASK_CLONE, haystack_size & BLOCK_MASK_CLONE);
|
||||
|
||||
// It either matches or it doesn't
|
||||
BEESTRACE("Verifying haystack " << straw);
|
||||
if (straw.is_data_equal(needle)) {
|
||||
BEESCOUNT(adjust_eof_hit);
|
||||
m_found_data = true;
|
||||
m_found_hash = true;
|
||||
return straw;
|
||||
}
|
||||
|
||||
// Check for matching hash
|
||||
BEESTRACE("Verifying haystack hash");
|
||||
if (straw.hash() == needle.hash()) {
|
||||
// OK at least the hash is still valid
|
||||
m_found_hash = true;
|
||||
}
|
||||
|
||||
BEESCOUNT(adjust_eof_miss);
|
||||
// BEESLOG("adjust_eof_miss " << straw);
|
||||
return BeesBlockData();
|
||||
}
|
||||
|
||||
off_t lower_offset = haystack.begin();
|
||||
off_t upper_offset = haystack.end();
|
||||
bool is_compressed_offset = false;
|
||||
bool is_exact = false;
|
||||
bool is_legacy = false;
|
||||
if (m_addr.is_compressed()) {
|
||||
BtrfsExtentWalker ew(haystack.fd(), haystack.begin(), m_ctx->root_fd());
|
||||
BEESTRACE("haystack extent data " << ew);
|
||||
Extent e = ew.current();
|
||||
if (m_addr.has_compressed_offset()) {
|
||||
off_t coff = m_addr.get_compressed_offset();
|
||||
if (e.offset() > coff) {
|
||||
// this extent begins after the target block
|
||||
BEESCOUNT(adjust_offset_low);
|
||||
return BeesBlockData();
|
||||
}
|
||||
coff -= e.offset();
|
||||
if (e.size() <= coff) {
|
||||
// this extent ends before the target block
|
||||
BEESCOUNT(adjust_offset_high);
|
||||
return BeesBlockData();
|
||||
}
|
||||
lower_offset = e.begin() + coff;
|
||||
upper_offset = lower_offset + BLOCK_SIZE_CLONE;
|
||||
BEESCOUNT(adjust_offset_hit);
|
||||
is_compressed_offset = true;
|
||||
} else {
|
||||
lower_offset = e.begin();
|
||||
upper_offset = e.end();
|
||||
BEESCOUNT(adjust_legacy);
|
||||
is_legacy = true;
|
||||
}
|
||||
} else {
|
||||
BEESCOUNT(adjust_exact);
|
||||
is_exact = true;
|
||||
}
|
||||
|
||||
BEESTRACE("Checking haystack " << haystack << " offsets " << to_hex(lower_offset) << ".." << to_hex(upper_offset));
|
||||
|
||||
// Check all the blocks in the list
|
||||
for (off_t haystack_offset = lower_offset; haystack_offset < upper_offset; haystack_offset += BLOCK_SIZE_CLONE) {
|
||||
THROW_CHECK1(out_of_range, haystack_offset, (haystack_offset & BLOCK_MASK_CLONE) == 0);
|
||||
|
||||
// Straw cannot extend beyond end of haystack
|
||||
if (haystack_offset + needle.size() > haystack_size) {
|
||||
BEESCOUNT(adjust_needle_too_long);
|
||||
break;
|
||||
}
|
||||
|
||||
// Read the haystack
|
||||
BEESTRACE("straw " << name_fd(haystack.fd()) << ", offset " << to_hex(haystack_offset) << ", length " << needle.size());
|
||||
BeesBlockData straw(haystack.fd(), haystack_offset, needle.size());
|
||||
|
||||
BEESTRACE("straw = " << straw);
|
||||
|
||||
// Stop if we find a match
|
||||
if (straw.is_data_equal(needle)) {
|
||||
BEESCOUNT(adjust_hit);
|
||||
m_found_data = true;
|
||||
m_found_hash = true;
|
||||
if (is_compressed_offset) BEESCOUNT(adjust_compressed_offset_correct);
|
||||
if (is_legacy) BEESCOUNT(adjust_legacy_correct);
|
||||
if (is_exact) BEESCOUNT(adjust_exact_correct);
|
||||
return straw;
|
||||
}
|
||||
|
||||
if (straw.hash() != needle.hash()) {
|
||||
// Not the same hash or data, try next block
|
||||
BEESCOUNT(adjust_miss);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Found the hash but not the data. Yay!
|
||||
m_found_hash = true;
|
||||
BEESLOG("HASH COLLISION\n"
|
||||
<< "\tneedle " << needle << "\n"
|
||||
<< "\tstraw " << straw);
|
||||
BEESCOUNT(hash_collision);
|
||||
}
|
||||
|
||||
// Ran out of offsets to try
|
||||
BEESCOUNT(adjust_no_match);
|
||||
if (is_compressed_offset) BEESCOUNT(adjust_compressed_offset_wrong);
|
||||
if (is_legacy) BEESCOUNT(adjust_legacy_wrong);
|
||||
if (is_exact) BEESCOUNT(adjust_exact_wrong);
|
||||
m_wrong_data = true;
|
||||
return BeesBlockData();
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
BeesResolver::chase_extent_ref(const BtrfsInodeOffsetRoot &bior, BeesBlockData &needle_bbd)
|
||||
{
|
||||
BEESTRACE("chase_extent_ref bior " << bior << " needle_bbd " << needle_bbd);
|
||||
BEESNOTE("chase_extent_ref bior " << bior << " needle_bbd " << needle_bbd);
|
||||
BEESCOUNT(chase_try);
|
||||
|
||||
Fd file_fd = m_ctx->roots()->open_root_ino(bior.m_root, bior.m_inum);
|
||||
if (!file_fd) {
|
||||
// Delete snapshots generate craptons of these
|
||||
// BEESINFO("No FD in chase_extent_ref " << bior);
|
||||
BEESCOUNT(chase_no_fd);
|
||||
return BeesFileRange();
|
||||
}
|
||||
|
||||
BEESNOTE("searching at offset " << to_hex(bior.m_offset) << " in file " << name_fd(file_fd) << "\n\tfor " << needle_bbd);
|
||||
|
||||
BEESTRACE("bior file " << name_fd(file_fd));
|
||||
BEESTRACE("get file_addr " << bior);
|
||||
BeesAddress file_addr(file_fd, bior.m_offset, m_ctx);
|
||||
BEESTRACE("file_addr " << file_addr);
|
||||
|
||||
// ...or are we?
|
||||
if (file_addr.is_magic()) {
|
||||
BEESINFO("file_addr is magic: file_addr = " << file_addr << " bior = " << bior << " needle_bbd = " << needle_bbd);
|
||||
BEESCOUNT(chase_wrong_magic);
|
||||
return BeesFileRange();
|
||||
}
|
||||
THROW_CHECK1(invalid_argument, m_addr, !m_addr.is_magic());
|
||||
|
||||
// Did we get the physical block we asked for? The magic bits have to match too,
|
||||
// but the compressed offset bits do not.
|
||||
if (file_addr.get_physical_or_zero() != m_addr.get_physical_or_zero()) {
|
||||
// BEESINFO("found addr " << file_addr << " at " << name_fd(file_fd) << " offset " << to_hex(bior.m_offset) << " but looking for " << m_addr);
|
||||
// FIEMAP/resolve are working, but the data is old.
|
||||
BEESCOUNT(chase_wrong_addr);
|
||||
return BeesFileRange();
|
||||
}
|
||||
|
||||
// Calculate end of range, which is a sum block or less
|
||||
// It's a sum block because we have to compare content now
|
||||
off_t file_size = Stat(file_fd).st_size;
|
||||
off_t bior_offset = ranged_cast<off_t>(bior.m_offset);
|
||||
off_t end_offset = min(file_size, bior_offset + needle_bbd.size());
|
||||
BeesBlockData haystack_bbd(file_fd, bior_offset, end_offset - bior_offset);
|
||||
|
||||
BEESTRACE("matched haystack_bbd " << haystack_bbd << " file_addr " << file_addr);
|
||||
|
||||
// If the data was compressed and no offset was captured then
|
||||
// we won't get an exact address from resolve.
|
||||
// Search near the resolved address for a matching data block.
|
||||
// ...even if it's not compressed, we should do this sanity
|
||||
// check before considering the block as a duplicate candidate.
|
||||
auto new_bbd = adjust_offset(haystack_bbd, needle_bbd);
|
||||
if (new_bbd.empty()) {
|
||||
// matching offset search failed
|
||||
BEESCOUNT(chase_wrong_data);
|
||||
return BeesFileRange();
|
||||
}
|
||||
if (new_bbd.begin() == haystack_bbd.begin()) {
|
||||
BEESCOUNT(chase_uncorrected);
|
||||
} else {
|
||||
// corrected the bfr
|
||||
BEESCOUNT(chase_corrected);
|
||||
haystack_bbd = new_bbd;
|
||||
}
|
||||
|
||||
// We have found at least one duplicate block, so resolve was a success
|
||||
BEESCOUNT(chase_hit);
|
||||
|
||||
// Matching block
|
||||
BEESTRACE("Constructing dst_bfr { " << BeesFileId(haystack_bbd.fd()) << ", " << to_hex(haystack_bbd.begin()) << ".." << to_hex(haystack_bbd.end()) << " }");
|
||||
BeesFileRange dst_bfr(BeesFileId(haystack_bbd.fd()), haystack_bbd.begin(), haystack_bbd.end());
|
||||
|
||||
return dst_bfr;
|
||||
}
|
||||
|
||||
void
|
||||
BeesResolver::replace_src(const BeesFileRange &src_bfr)
|
||||
{
|
||||
BEESTRACE("replace_src src_bfr " << src_bfr);
|
||||
THROW_CHECK0(runtime_error, !m_is_toxic);
|
||||
BEESCOUNT(replacesrc_try);
|
||||
|
||||
// Open src, reuse it for all dst
|
||||
auto i_bfr = src_bfr;
|
||||
BEESNOTE("Opening src bfr " << i_bfr);
|
||||
BEESTRACE("Opening src bfr " << i_bfr);
|
||||
i_bfr.fd(m_ctx);
|
||||
|
||||
BeesBlockData bbd(i_bfr);
|
||||
|
||||
for_each_extent_ref(bbd, [&](const BeesFileRange &j) -> bool {
|
||||
// Open dst
|
||||
auto j_bfr = j;
|
||||
BEESNOTE("Opening dst bfr " << j_bfr);
|
||||
BEESTRACE("Opening dst bfr " << j_bfr);
|
||||
j_bfr.fd(m_ctx);
|
||||
|
||||
if (i_bfr.overlaps(j_bfr)) {
|
||||
BEESCOUNT(replacesrc_overlaps);
|
||||
return false; // i.e. continue
|
||||
}
|
||||
|
||||
// Make pair(src, dst)
|
||||
BEESTRACE("creating brp (" << i_bfr << ", " << j_bfr << ")");
|
||||
BeesRangePair brp(i_bfr, j_bfr);
|
||||
BEESTRACE("Found matching range: " << brp);
|
||||
|
||||
// Extend range at beginning
|
||||
BEESNOTE("Extending matching range: " << brp);
|
||||
// No particular reason to be constrained?
|
||||
if (brp.grow(m_ctx, true)) {
|
||||
BEESCOUNT(replacesrc_grown);
|
||||
}
|
||||
|
||||
// Dedup
|
||||
BEESNOTE("dedup " << brp);
|
||||
if (m_ctx->dedup(brp)) {
|
||||
BEESCOUNT(replacesrc_dedup_hit);
|
||||
m_found_dup = true;
|
||||
} else {
|
||||
BEESCOUNT(replacesrc_dedup_miss);
|
||||
}
|
||||
return false; // i.e. continue
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
BeesResolver::find_matches(bool just_one, BeesBlockData &bbd)
|
||||
{
|
||||
// Walk through the (ino, offset, root) tuples until we find a match.
|
||||
BEESTRACE("finding all matches for " << bbd << " at " << m_addr << ": " << m_biors.size() << " found");
|
||||
THROW_CHECK0(runtime_error, !m_is_toxic);
|
||||
bool stop_now = false;
|
||||
for (auto ino_off_root : m_biors) {
|
||||
if (m_wrong_data) {
|
||||
return;
|
||||
}
|
||||
|
||||
BEESTRACE("ino_off_root " << ino_off_root);
|
||||
BeesFileId this_fid(ino_off_root.m_root, ino_off_root.m_inum);
|
||||
|
||||
// Silently ignore blacklisted files, e.g. BeesTempFile files
|
||||
if (m_ctx->is_blacklisted(this_fid)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Look at the old data
|
||||
catch_all([&]() {
|
||||
BEESTRACE("chase_extent_ref ino " << ino_off_root << " bbd " << bbd);
|
||||
auto new_range = chase_extent_ref(ino_off_root, bbd);
|
||||
if (new_range) {
|
||||
m_ranges.insert(new_range.copy_closed());
|
||||
stop_now = true;
|
||||
}
|
||||
});
|
||||
|
||||
if (just_one && stop_now) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
BeesResolver::for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFileRange &bfr)> visitor)
|
||||
{
|
||||
// Walk through the (ino, offset, root) tuples until we are told to stop
|
||||
BEESTRACE("for_each_extent_ref " << bbd << " at " << m_addr << ": " << m_biors.size() << " found");
|
||||
THROW_CHECK0(runtime_error, !m_is_toxic);
|
||||
bool stop_now = false;
|
||||
for (auto ino_off_root : m_biors) {
|
||||
BEESTRACE("ino_off_root " << ino_off_root);
|
||||
BeesFileId this_fid(ino_off_root.m_root, ino_off_root.m_inum);
|
||||
|
||||
// Silently ignore blacklisted files, e.g. BeesTempFile files
|
||||
if (m_ctx->is_blacklisted(this_fid)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Look at the old data
|
||||
catch_all([&]() {
|
||||
BEESTRACE("chase_extent_ref ino " << ino_off_root << " bbd " << bbd);
|
||||
auto new_range = chase_extent_ref(ino_off_root, bbd);
|
||||
// XXX: should we catch visitor's exceptions here?
|
||||
if (new_range) {
|
||||
stop_now = visitor(new_range);
|
||||
} else {
|
||||
// We have reliable block addresses now, so we guarantee we can hit the desired block.
|
||||
// Failure in chase_extent_ref means we are done, and don't need to look up all the
|
||||
// other references.
|
||||
stop_now = true;
|
||||
}
|
||||
});
|
||||
|
||||
if (stop_now) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return stop_now;
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
|
||||
{
|
||||
BEESTRACE("replace_dst dst_bfr " << dst_bfr);
|
||||
BEESCOUNT(replacedst_try);
|
||||
|
||||
// Open dst, reuse it for all src
|
||||
BEESNOTE("Opening dst bfr " << dst_bfr);
|
||||
BEESTRACE("Opening dst bfr " << dst_bfr);
|
||||
dst_bfr.fd(m_ctx);
|
||||
|
||||
BeesFileRange overlap_bfr;
|
||||
BEESTRACE("overlap_bfr " << overlap_bfr);
|
||||
|
||||
BeesBlockData bbd(dst_bfr);
|
||||
|
||||
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr) -> bool {
|
||||
// Open src
|
||||
BEESNOTE("Opening src bfr " << src_bfr);
|
||||
BEESTRACE("Opening src bfr " << src_bfr);
|
||||
src_bfr.fd(m_ctx);
|
||||
|
||||
if (dst_bfr.overlaps(src_bfr)) {
|
||||
BEESCOUNT(replacedst_overlaps);
|
||||
return false; // i.e. continue
|
||||
}
|
||||
|
||||
// If dst is already occupying src, skip.
|
||||
// FIXME: BeesContext::scan_one_extent should be weeding these out, but does not.
|
||||
BeesBlockData src_bbd(src_bfr.fd(), src_bfr.begin(), min(BLOCK_SIZE_SUMS, src_bfr.size()));
|
||||
if (bbd.addr().get_physical_or_zero() == src_bbd.addr().get_physical_or_zero()) {
|
||||
BEESCOUNT(replacedst_same);
|
||||
return false; // i.e. continue
|
||||
}
|
||||
|
||||
// Make pair(src, dst)
|
||||
BEESTRACE("creating brp (" << src_bfr << ", " << dst_bfr << ")");
|
||||
BeesRangePair brp(src_bfr, dst_bfr);
|
||||
BEESTRACE("Found matching range: " << brp);
|
||||
|
||||
// Extend range at beginning
|
||||
BEESNOTE("Extending matching range: " << brp);
|
||||
// 'false' Has nasty loops, and may not be faster.
|
||||
// 'true' At best, keeps fragmentation constant...but can also make it worse
|
||||
if (brp.grow(m_ctx, true)) {
|
||||
BEESCOUNT(replacedst_grown);
|
||||
}
|
||||
|
||||
// Dedup
|
||||
BEESNOTE("dedup " << brp);
|
||||
if (m_ctx->dedup(brp)) {
|
||||
BEESCOUNT(replacedst_dedup_hit);
|
||||
m_found_dup = true;
|
||||
overlap_bfr = brp.second;
|
||||
// FIXME: find best range first, then dedup that
|
||||
return true; // i.e. break
|
||||
} else {
|
||||
BEESCOUNT(replacedst_dedup_miss);
|
||||
return false; // i.e. continue
|
||||
}
|
||||
});
|
||||
// BEESLOG("overlap_bfr after " << overlap_bfr);
|
||||
return overlap_bfr.copy_closed();
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
BeesResolver::find_one_match(BeesBlockData &bbd)
|
||||
{
|
||||
THROW_CHECK0(runtime_error, !m_is_toxic);
|
||||
find_matches(true, bbd);
|
||||
if (m_ranges.empty()) {
|
||||
return BeesFileRange();
|
||||
} else {
|
||||
return *m_ranges.begin();
|
||||
}
|
||||
}
|
||||
|
||||
set<BeesFileRange>
|
||||
BeesResolver::find_all_matches(BeesBlockData &bbd)
|
||||
{
|
||||
THROW_CHECK0(runtime_error, !m_is_toxic);
|
||||
find_matches(false, bbd);
|
||||
return m_ranges;
|
||||
}
|
||||
|
||||
bool
|
||||
BeesResolver::operator<(const BeesResolver &that) const
|
||||
{
|
||||
if (that.m_bior_count < m_bior_count) {
|
||||
return true;
|
||||
} else if (m_bior_count < that.m_bior_count) {
|
||||
return false;
|
||||
}
|
||||
return m_addr < that.m_addr;
|
||||
}
|
||||
|
823
src/bees-roots.cc
Normal file
823
src/bees-roots.cc
Normal file
@@ -0,0 +1,823 @@
|
||||
#include "bees.h"
|
||||
|
||||
#include "crucible/cache.h"
|
||||
#include "crucible/string.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <tuple>
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
|
||||
string
|
||||
format_time(time_t t)
|
||||
{
|
||||
struct tm *tmp = localtime(&t);
|
||||
char buf[1024];
|
||||
strftime(buf, sizeof(buf), "%Y-%m-%d-%H-%M-%S", tmp);
|
||||
return buf;
|
||||
}
|
||||
|
||||
ostream &
|
||||
operator<<(ostream &os, const BeesCrawlState &bcs)
|
||||
{
|
||||
time_t now = time(NULL);
|
||||
auto age = now - bcs.m_started;
|
||||
return os << "BeesCrawlState "
|
||||
<< bcs.m_root << ":" << bcs.m_objectid << " offset " << to_hex(bcs.m_offset)
|
||||
<< " transid " << bcs.m_min_transid << ".." << bcs.m_max_transid
|
||||
<< " started " << format_time(bcs.m_started) << " (" << age << "s ago)";
|
||||
}
|
||||
|
||||
BeesCrawlState::BeesCrawlState() :
|
||||
m_root(0),
|
||||
m_objectid(0),
|
||||
m_offset(0),
|
||||
m_min_transid(0),
|
||||
m_max_transid(0),
|
||||
m_started(time(NULL))
|
||||
{
|
||||
}
|
||||
|
||||
bool
|
||||
BeesCrawlState::operator<(const BeesCrawlState &that) const
|
||||
{
|
||||
return tie(m_root, m_objectid, m_offset, m_min_transid, m_max_transid)
|
||||
< tie(that.m_root, that.m_objectid, that.m_offset, that.m_min_transid, that.m_max_transid);
|
||||
}
|
||||
|
||||
string
|
||||
BeesRoots::crawl_state_filename() const
|
||||
{
|
||||
string rv;
|
||||
rv += "beescrawl.";
|
||||
rv += m_ctx->root_uuid();
|
||||
rv += ".dat";
|
||||
return rv;
|
||||
}
|
||||
|
||||
void
|
||||
BeesRoots::state_save()
|
||||
{
|
||||
// Make sure we have a full complement of crawlers
|
||||
insert_new_crawl();
|
||||
|
||||
BEESNOTE("saving crawl state");
|
||||
BEESLOG("Saving crawl state");
|
||||
BEESTOOLONG("Saving crawl state");
|
||||
|
||||
Timer save_time;
|
||||
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
|
||||
// We don't have ofstreamat or ofdstream in C++11, so we're building a string and writing it with raw syscalls.
|
||||
ostringstream ofs;
|
||||
|
||||
if (!m_crawl_dirty) {
|
||||
BEESLOG("Nothing to save");
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto i : m_root_crawl_map) {
|
||||
auto ibcs = i.second->get_state();
|
||||
if (ibcs.m_max_transid) {
|
||||
ofs << "root " << ibcs.m_root << " ";
|
||||
ofs << "objectid " << ibcs.m_objectid << " ";
|
||||
ofs << "offset " << ibcs.m_offset << " ";
|
||||
ofs << "min_transid " << ibcs.m_min_transid << " ";
|
||||
ofs << "max_transid " << ibcs.m_max_transid << " ";
|
||||
ofs << "started " << ibcs.m_started << " ";
|
||||
ofs << "start_ts " << format_time(ibcs.m_started) << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (ofs.str().empty()) {
|
||||
BEESLOG("Crawl state empty!");
|
||||
m_crawl_dirty = false;
|
||||
return;
|
||||
}
|
||||
|
||||
lock.unlock();
|
||||
|
||||
m_crawl_state_file.write(ofs.str());
|
||||
|
||||
BEESNOTE("relocking crawl state");
|
||||
lock.lock();
|
||||
// Not really correct but probably close enough
|
||||
m_crawl_dirty = false;
|
||||
BEESLOG("Saved crawl state in " << save_time << "s");
|
||||
}
|
||||
|
||||
BeesCrawlState
|
||||
BeesRoots::crawl_state_get(uint64_t rootid)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
auto rv = m_root_crawl_map.at(rootid)->get_state();
|
||||
THROW_CHECK2(runtime_error, rv.m_root, rootid, rv.m_root == rootid);
|
||||
return rv;
|
||||
}
|
||||
|
||||
void
|
||||
BeesRoots::crawl_state_set_dirty()
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
m_crawl_dirty = true;
|
||||
}
|
||||
|
||||
void
|
||||
BeesRoots::crawl_state_erase(const BeesCrawlState &bcs)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
|
||||
// Do not delete the last entry, it holds our max_transid
|
||||
if (m_root_crawl_map.size() < 2) {
|
||||
BEESCOUNT(crawl_no_empty);
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_root_crawl_map.count(bcs.m_root)) {
|
||||
m_root_crawl_map.erase(bcs.m_root);
|
||||
m_crawl_dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BeesRoots::transid_min()
|
||||
{
|
||||
BEESNOTE("Calculating transid_min");
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
if (m_root_crawl_map.empty()) {
|
||||
return 0;
|
||||
}
|
||||
uint64_t rv = numeric_limits<uint64_t>::max();
|
||||
for (auto i : m_root_crawl_map) {
|
||||
rv = min(rv, i.second->get_state().m_min_transid);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
BeesRoots::transid_max()
|
||||
{
|
||||
BEESNOTE("Calculating transid_max");
|
||||
uint64_t rv = 0;
|
||||
uint64_t root = 0;
|
||||
BEESTRACE("Calculating transid_max...");
|
||||
do {
|
||||
root = next_root(root);
|
||||
if (root) {
|
||||
catch_all([&]() {
|
||||
auto transid = btrfs_get_root_transid(open_root(root));
|
||||
rv = max(rv, transid);
|
||||
// BEESLOG("\troot " << root << " transid " << transid << " max " << rv);
|
||||
});
|
||||
}
|
||||
} while (root);
|
||||
return rv;
|
||||
}
|
||||
|
||||
void
|
||||
BeesRoots::crawl_roots()
|
||||
{
|
||||
BEESNOTE("Crawling roots");
|
||||
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
if (m_root_crawl_map.empty()) {
|
||||
BEESNOTE("idle, crawl map is empty");
|
||||
m_condvar.wait(lock);
|
||||
// Don't count the time we were waiting as part of the crawl time
|
||||
m_crawl_timer.reset();
|
||||
}
|
||||
|
||||
// Work from a copy because BeesCrawl might change the world under us
|
||||
auto crawl_map_copy = m_root_crawl_map;
|
||||
lock.unlock();
|
||||
|
||||
BeesFileRange first_range;
|
||||
shared_ptr<BeesCrawl> first_crawl;
|
||||
for (auto i : crawl_map_copy) {
|
||||
auto this_crawl = i.second;
|
||||
auto this_range = this_crawl->peek_front();
|
||||
if (this_range) {
|
||||
auto tuple_this = make_tuple(this_range.fid().ino(), this_range.fid().root(), this_range.begin());
|
||||
auto tuple_first = make_tuple(first_range.fid().ino(), first_range.fid().root(), first_range.begin());
|
||||
if (!first_range || tuple_this < tuple_first) {
|
||||
first_crawl = this_crawl;
|
||||
first_range = this_range;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (first_range) {
|
||||
catch_all([&]() {
|
||||
// BEESINFO("scan_forward " << first_range);
|
||||
m_ctx->scan_forward(first_range);
|
||||
});
|
||||
BEESCOUNT(crawl_scan);
|
||||
m_crawl_current = first_crawl->get_state();
|
||||
auto first_range_popped = first_crawl->pop_front();
|
||||
THROW_CHECK2(runtime_error, first_range, first_range_popped, first_range == first_range_popped);
|
||||
return;
|
||||
}
|
||||
|
||||
BEESLOG("Crawl ran out of data after " << m_crawl_timer.lap() << "s, waiting for more...");
|
||||
BEESCOUNT(crawl_done);
|
||||
BEESNOTE("idle, waiting for more data");
|
||||
lock.lock();
|
||||
m_condvar.wait(lock);
|
||||
|
||||
// Don't count the time we were waiting as part of the crawl time
|
||||
m_crawl_timer.reset();
|
||||
}
|
||||
|
||||
void
|
||||
BeesRoots::crawl_thread()
|
||||
{
|
||||
BEESNOTE("crawling");
|
||||
while (1) {
|
||||
catch_all([&]() {
|
||||
crawl_roots();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BeesRoots::writeback_thread()
|
||||
{
|
||||
while (1) {
|
||||
BEESNOTE(m_crawl_current << (m_crawl_dirty ? " (dirty)" : ""));
|
||||
|
||||
catch_all([&]() {
|
||||
BEESNOTE("saving crawler state");
|
||||
state_save();
|
||||
});
|
||||
|
||||
nanosleep(BEES_WRITEBACK_INTERVAL);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BeesRoots::insert_root(const BeesCrawlState &new_bcs)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
if (!m_root_crawl_map.count(new_bcs.m_root)) {
|
||||
auto new_bcp = make_shared<BeesCrawl>(m_ctx, new_bcs);
|
||||
auto new_pair = make_pair(new_bcs.m_root, new_bcp);
|
||||
m_root_crawl_map.insert(new_pair);
|
||||
m_crawl_dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BeesRoots::insert_new_crawl()
|
||||
{
|
||||
BEESNOTE("adding crawlers for new subvols and removing crawlers for removed subvols");
|
||||
|
||||
BeesCrawlState new_bcs;
|
||||
// Avoid a wasted loop iteration by starting from root 5
|
||||
new_bcs.m_root = BTRFS_FS_TREE_OBJECTID;
|
||||
new_bcs.m_min_transid = transid_min();
|
||||
new_bcs.m_max_transid = transid_max();
|
||||
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
set<uint64_t> excess_roots;
|
||||
for (auto i : m_root_crawl_map) {
|
||||
excess_roots.insert(i.first);
|
||||
}
|
||||
lock.unlock();
|
||||
|
||||
while (new_bcs.m_root) {
|
||||
excess_roots.erase(new_bcs.m_root);
|
||||
insert_root(new_bcs);
|
||||
BEESCOUNT(crawl_create);
|
||||
new_bcs.m_root = next_root(new_bcs.m_root);
|
||||
}
|
||||
|
||||
for (auto i : excess_roots) {
|
||||
new_bcs.m_root = i;
|
||||
crawl_state_erase(new_bcs);
|
||||
}
|
||||
|
||||
// Wake up crawl_roots if sleeping
|
||||
lock.lock();
|
||||
m_condvar.notify_all();
|
||||
}
|
||||
|
||||
void
|
||||
BeesRoots::state_load()
|
||||
{
|
||||
BEESNOTE("loading crawl state");
|
||||
BEESLOG("loading crawl state");
|
||||
|
||||
string crawl_data = m_crawl_state_file.read();
|
||||
|
||||
for (auto line : split("\n", crawl_data)) {
|
||||
BEESLOG("Read line: " << line);
|
||||
map<string, uint64_t> d;
|
||||
auto words = split(" ", line);
|
||||
for (auto it = words.begin(); it < words.end(); ++it) {
|
||||
auto it1 = it;
|
||||
++it;
|
||||
THROW_CHECK1(out_of_range, words.size(), it < words.end());
|
||||
string key = *it1;
|
||||
uint64_t val = from_hex(*it);
|
||||
BEESTRACE("key " << key << " val " << val);
|
||||
auto result = d.insert(make_pair(key, val));
|
||||
THROW_CHECK0(runtime_error, result.second);
|
||||
}
|
||||
BeesCrawlState loaded_state;
|
||||
loaded_state.m_root = d.at("root");
|
||||
loaded_state.m_objectid = d.at("objectid");
|
||||
loaded_state.m_offset = d.at("offset");
|
||||
loaded_state.m_min_transid = d.count("gen_current") ? d.at("gen_current") : d.at("min_transid");
|
||||
loaded_state.m_max_transid = d.count("gen_next") ? d.at("gen_next") : d.at("max_transid");
|
||||
if (d.count("started")) {
|
||||
loaded_state.m_started = d.at("started");
|
||||
}
|
||||
BEESLOG("loaded_state " << loaded_state);
|
||||
insert_root(loaded_state);
|
||||
}
|
||||
}
|
||||
|
||||
BeesRoots::BeesRoots(shared_ptr<BeesContext> ctx) :
|
||||
m_ctx(ctx),
|
||||
m_crawl_state_file(ctx->home_fd(), crawl_state_filename()),
|
||||
m_crawl_thread("crawl " + ctx->root_path()),
|
||||
m_writeback_thread("crawl_writeback " + ctx->root_path())
|
||||
{
|
||||
m_crawl_thread.exec([&]() {
|
||||
catch_all([&]() {
|
||||
state_load();
|
||||
});
|
||||
m_writeback_thread.exec([&]() {
|
||||
writeback_thread();
|
||||
});
|
||||
crawl_thread();
|
||||
});
|
||||
}
|
||||
|
||||
Fd
|
||||
BeesRoots::open_root_nocache(uint64_t rootid)
|
||||
{
|
||||
BEESTRACE("open_root_nocache " << rootid);
|
||||
BEESNOTE("open_root_nocache " << rootid);
|
||||
|
||||
// Stop recursion at the root of the filesystem tree
|
||||
if (rootid == BTRFS_FS_TREE_OBJECTID) {
|
||||
return m_ctx->root_fd();
|
||||
}
|
||||
|
||||
// Find backrefs for this rootid and follow up to root
|
||||
BtrfsIoctlSearchKey sk;
|
||||
sk.tree_id = BTRFS_ROOT_TREE_OBJECTID;
|
||||
sk.min_objectid = sk.max_objectid = rootid;
|
||||
sk.min_type = sk.max_type = BTRFS_ROOT_BACKREF_KEY;
|
||||
|
||||
BEESTRACE("sk " << sk);
|
||||
while (sk.min_objectid <= rootid) {
|
||||
sk.nr_items = 1024;
|
||||
sk.do_ioctl(m_ctx->root_fd());
|
||||
|
||||
if (sk.m_result.empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (auto i : sk.m_result) {
|
||||
sk.next_min(i);
|
||||
if (i.type == BTRFS_ROOT_BACKREF_KEY && i.objectid == rootid) {
|
||||
auto dirid = call_btrfs_get(btrfs_stack_root_ref_dirid, i.m_data);
|
||||
auto name_len = call_btrfs_get(btrfs_stack_root_ref_name_len, i.m_data);
|
||||
auto name_start = sizeof(struct btrfs_root_ref);
|
||||
auto name_end = name_len + name_start;
|
||||
THROW_CHECK2(runtime_error, i.m_data.size(), name_end, i.m_data.size() >= name_end);
|
||||
string name(i.m_data.data() + name_start, i.m_data.data() + name_end);
|
||||
|
||||
auto parent_rootid = i.offset;
|
||||
// BEESLOG("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
|
||||
BEESTRACE("parent_rootid " << parent_rootid << " dirid " << dirid << " name " << name);
|
||||
Fd parent_fd = open_root(parent_rootid);
|
||||
if (!parent_fd) {
|
||||
BEESLOGTRACE("no parent_fd");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dirid != BTRFS_FIRST_FREE_OBJECTID) {
|
||||
BEESTRACE("dirid " << dirid << " root " << rootid << " INO_PATH");
|
||||
BtrfsIoctlInoPathArgs ino(dirid);
|
||||
if (!ino.do_ioctl_nothrow(parent_fd)) {
|
||||
BEESINFO("dirid " << dirid << " inode path lookup failed in parent_fd " << name_fd(parent_fd));
|
||||
continue;
|
||||
}
|
||||
if (ino.m_paths.empty()) {
|
||||
BEESINFO("dirid " << dirid << " inode has no paths in parent_fd " << name_fd(parent_fd));
|
||||
continue;
|
||||
}
|
||||
BEESTRACE("dirid " << dirid << " path " << ino.m_paths.at(0));
|
||||
parent_fd = openat(parent_fd, ino.m_paths.at(0).c_str(), FLAGS_OPEN_DIR);
|
||||
if (!parent_fd) {
|
||||
BEESLOGTRACE("no parent_fd from dirid");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// BEESLOG("openat(" << name_fd(parent_fd) << ", " << name << ")");
|
||||
BEESTRACE("openat(" << name_fd(parent_fd) << ", " << name << ")");
|
||||
Fd rv = openat(parent_fd, name.c_str(), FLAGS_OPEN_DIR);
|
||||
if (!rv) {
|
||||
BEESLOGTRACE("open failed for name " << name);
|
||||
continue;
|
||||
}
|
||||
BEESCOUNT(root_found);
|
||||
|
||||
// Verify correct root ID
|
||||
auto new_root_id = btrfs_get_root_id(rv);
|
||||
THROW_CHECK2(runtime_error, new_root_id, rootid, new_root_id == rootid);
|
||||
Stat st(rv);
|
||||
THROW_CHECK1(runtime_error, st.st_ino, st.st_ino == BTRFS_FIRST_FREE_OBJECTID);
|
||||
BEESINFO("open_root_nocache " << rootid << ": " << name_fd(rv));
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
}
|
||||
BEESINFO("No path for rootid " << rootid);
|
||||
BEESCOUNT(root_notfound);
|
||||
return Fd();
|
||||
}
|
||||
|
||||
Fd
|
||||
BeesRoots::open_root(uint64_t rootid)
|
||||
{
|
||||
// Ignore some of the crap that comes out of LOGICAL_INO
|
||||
if (rootid == BTRFS_ROOT_TREE_OBJECTID) {
|
||||
return Fd();
|
||||
}
|
||||
|
||||
return m_ctx->fd_cache()->open_root(m_ctx, rootid);
|
||||
}
|
||||
|
||||
|
||||
uint64_t
|
||||
BeesRoots::next_root(uint64_t root)
|
||||
{
|
||||
BEESNOTE("Next root from " << root);
|
||||
BEESTRACE("Next root from " << root);
|
||||
|
||||
// BTRFS_FS_TREE_OBJECTID has no backref keys so we can't find it that way
|
||||
if (root < BTRFS_FS_TREE_OBJECTID) {
|
||||
// BEESLOG("First root is BTRFS_FS_TREE_OBJECTID = " << BTRFS_FS_TREE_OBJECTID);
|
||||
return BTRFS_FS_TREE_OBJECTID;
|
||||
}
|
||||
|
||||
BtrfsIoctlSearchKey sk;
|
||||
sk.tree_id = BTRFS_ROOT_TREE_OBJECTID;
|
||||
sk.min_type = sk.max_type = BTRFS_ROOT_BACKREF_KEY;
|
||||
sk.min_objectid = root + 1;
|
||||
|
||||
while (true) {
|
||||
sk.nr_items = 1024;
|
||||
sk.do_ioctl(m_ctx->root_fd());
|
||||
|
||||
if (sk.m_result.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (auto i : sk.m_result) {
|
||||
sk.next_min(i);
|
||||
if (i.type == BTRFS_ROOT_BACKREF_KEY) {
|
||||
// BEESLOG("Found root " << i.objectid << " parent " << i.offset);
|
||||
return i.objectid;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Fd
|
||||
BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
|
||||
{
|
||||
BEESTRACE("opening root " << root << " ino " << ino);
|
||||
|
||||
Fd root_fd = open_root(root);
|
||||
if (!root_fd) {
|
||||
return root_fd;
|
||||
}
|
||||
|
||||
BEESTOOLONG("open_root_ino(root " << root << ", ino " << ino << ")");
|
||||
|
||||
BEESTRACE("looking up ino " << ino);
|
||||
BtrfsIoctlInoPathArgs ipa(ino);
|
||||
if (!ipa.do_ioctl_nothrow(root_fd)) {
|
||||
BEESINFO("Lookup root " << root << " ino " << ino << " failed: " << strerror(errno));
|
||||
return Fd();
|
||||
}
|
||||
|
||||
BEESTRACE("searching paths for root " << root << " ino " << ino);
|
||||
Fd rv;
|
||||
if (ipa.m_paths.empty()) {
|
||||
BEESLOG("No paths for root " << root << " ino " << ino);
|
||||
}
|
||||
for (auto file_path : ipa.m_paths) {
|
||||
BEESTRACE("Looking up root " << root << " ino " << ino << " in dir " << name_fd(root_fd) << " path " << file_path);
|
||||
BEESCOUNT(open_file);
|
||||
// Try to open file RW, fall back to RO
|
||||
const char *fp_cstr = file_path.c_str();
|
||||
rv = openat(root_fd, fp_cstr, FLAGS_OPEN_FILE);
|
||||
if (!rv) {
|
||||
BEESCOUNT(open_fail);
|
||||
// errno == ENOENT is common during snapshot delete, ignore it
|
||||
if (errno != ENOENT) {
|
||||
BEESLOG("Could not open path '" << file_path << "' at root " << root << " " << name_fd(root_fd) << ": " << strerror(errno));
|
||||
BEESNOTE("ipa" << ipa);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Correct inode?
|
||||
Stat file_stat(rv);
|
||||
if (file_stat.st_ino != ino) {
|
||||
BEESLOG("Opening " << name_fd(root_fd) << "/" << file_path << " found wrong inode " << file_stat.st_ino << " instead of " << ino);
|
||||
rv = Fd();
|
||||
BEESCOUNT(open_wrong_ino);
|
||||
break;
|
||||
}
|
||||
|
||||
// Correct root?
|
||||
auto file_root = btrfs_get_root_id(rv);
|
||||
if (file_root != root) {
|
||||
BEESLOG("Opening " << name_fd(root_fd) << "/" << file_path << " found wrong root " << file_root << " instead of " << root);
|
||||
rv = Fd();
|
||||
BEESCOUNT(open_wrong_root);
|
||||
break;
|
||||
}
|
||||
|
||||
// Same filesystem?
|
||||
Stat root_stat(root_fd);
|
||||
if (root_stat.st_dev != file_stat.st_dev) {
|
||||
BEESLOG("Opening root " << name_fd(root_fd) << " path " << file_path << " found path st_dev " << file_stat.st_dev << " but root st_dev is " << root_stat.st_dev);
|
||||
rv = Fd();
|
||||
BEESCOUNT(open_wrong_dev);
|
||||
break;
|
||||
}
|
||||
|
||||
BEESTRACE("mapped " << BeesFileId(root, ino));
|
||||
BEESTRACE("\tto " << name_fd(rv));
|
||||
BEESCOUNT(open_hit);
|
||||
return rv;
|
||||
}
|
||||
|
||||
// Odd, we didn't find a path.
|
||||
return Fd();
|
||||
}
|
||||
|
||||
Fd
|
||||
BeesRoots::open_root_ino(uint64_t root, uint64_t ino)
|
||||
{
|
||||
return m_ctx->fd_cache()->open_root_ino(m_ctx, root, ino);
|
||||
}
|
||||
|
||||
BeesCrawl::BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state) :
|
||||
m_ctx(ctx),
|
||||
m_state(initial_state)
|
||||
{
|
||||
}
|
||||
|
||||
bool
|
||||
BeesCrawl::next_transid()
|
||||
{
|
||||
// If this crawl is recently empty, quickly and _silently_ bail out
|
||||
auto current_time = time(NULL);
|
||||
auto crawl_state = get_state();
|
||||
auto elapsed_time = current_time - crawl_state.m_started;
|
||||
if (elapsed_time < BEES_COMMIT_INTERVAL) {
|
||||
if (!m_deferred) {
|
||||
BEESLOG("Deferring next transid in " << get_state());
|
||||
}
|
||||
m_deferred = true;
|
||||
BEESCOUNT(crawl_defer);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Log performance stats from the old crawl
|
||||
BEESLOG("Next transid in " << get_state());
|
||||
|
||||
// Start new crawl
|
||||
m_deferred = false;
|
||||
auto roots = m_ctx->roots();
|
||||
crawl_state.m_min_transid = crawl_state.m_max_transid;
|
||||
crawl_state.m_max_transid = roots->transid_max();
|
||||
crawl_state.m_objectid = 0;
|
||||
crawl_state.m_offset = 0;
|
||||
crawl_state.m_started = current_time;
|
||||
BEESLOG("Restarting crawl " << get_state());
|
||||
BEESCOUNT(crawl_restart);
|
||||
set_state(crawl_state);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
BeesCrawl::fetch_extents()
|
||||
{
|
||||
THROW_CHECK1(runtime_error, m_extents.size(), m_extents.empty());
|
||||
|
||||
auto old_state = get_state();
|
||||
if (m_deferred || old_state.m_max_transid <= old_state.m_min_transid) {
|
||||
BEESTRACE("Nothing to crawl in " << get_state());
|
||||
return next_transid();
|
||||
}
|
||||
|
||||
BEESNOTE("crawling " << get_state());
|
||||
BEESLOG("Crawling " << get_state());
|
||||
|
||||
Timer crawl_timer;
|
||||
|
||||
BtrfsIoctlSearchKey sk;
|
||||
sk.tree_id = old_state.m_root;
|
||||
sk.min_objectid = old_state.m_objectid;
|
||||
sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY;
|
||||
sk.min_offset = old_state.m_offset;
|
||||
sk.min_transid = old_state.m_min_transid;
|
||||
sk.max_transid = old_state.m_max_transid;
|
||||
sk.nr_items = BEES_MAX_CRAWL_SIZE;
|
||||
|
||||
// Lock in the old state
|
||||
set_state(old_state);
|
||||
|
||||
BEESTRACE("Searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
|
||||
bool ioctl_ok = false;
|
||||
{
|
||||
BEESNOTE("searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
|
||||
BEESTOOLONG("Searching crawl sk " << static_cast<btrfs_ioctl_search_key&>(sk));
|
||||
ioctl_ok = sk.do_ioctl_nothrow(m_ctx->root_fd());
|
||||
}
|
||||
|
||||
if (ioctl_ok) {
|
||||
BEESCOUNT(crawl_search);
|
||||
} else {
|
||||
BEESLOG("Search ioctl failed: " << strerror(errno));
|
||||
BEESCOUNT(crawl_fail);
|
||||
}
|
||||
|
||||
if (!ioctl_ok || sk.m_result.empty()) {
|
||||
BEESCOUNT(crawl_empty);
|
||||
BEESLOG("Crawl empty " << get_state());
|
||||
return next_transid();
|
||||
}
|
||||
|
||||
BEESLOG("Crawling " << sk.m_result.size() << " results from " << get_state());
|
||||
auto results_left = sk.m_result.size();
|
||||
BEESNOTE("crawling " << results_left << " results from " << get_state());
|
||||
size_t count_other = 0;
|
||||
size_t count_inline = 0;
|
||||
size_t count_unknown = 0;
|
||||
size_t count_data = 0;
|
||||
size_t count_low = 0;
|
||||
size_t count_high = 0;
|
||||
BeesFileRange last_bfr;
|
||||
for (auto i : sk.m_result) {
|
||||
sk.next_min(i);
|
||||
--results_left;
|
||||
BEESCOUNT(crawl_items);
|
||||
|
||||
BEESTRACE("i = " << i);
|
||||
|
||||
#if 1
|
||||
// We need the "+ 1" and objectid rollover that next_min does.
|
||||
auto new_state = get_state();
|
||||
new_state.m_objectid = sk.min_objectid;
|
||||
new_state.m_offset = sk.min_offset;
|
||||
|
||||
// Saving state here means we can skip a search result
|
||||
// if we are interrupted. Not saving state here means we
|
||||
// can fail to make forward progress in cases where there
|
||||
// is a lot of metadata we can't process. Favor forward
|
||||
// progress over losing search results.
|
||||
set_state(new_state);
|
||||
#endif
|
||||
|
||||
// Ignore things that aren't EXTENT_DATA_KEY
|
||||
if (i.type != BTRFS_EXTENT_DATA_KEY) {
|
||||
++count_other;
|
||||
BEESCOUNT(crawl_nondata);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto gen = call_btrfs_get(btrfs_stack_file_extent_generation, i.m_data);
|
||||
if (gen < get_state().m_min_transid) {
|
||||
BEESCOUNT(crawl_gen_low);
|
||||
++count_low;
|
||||
// We probably want (need?) to scan these anyway.
|
||||
// continue;
|
||||
}
|
||||
if (gen > get_state().m_max_transid) {
|
||||
BEESCOUNT(crawl_gen_high);
|
||||
++count_high;
|
||||
// This shouldn't ever happen
|
||||
// continue;
|
||||
}
|
||||
|
||||
auto type = call_btrfs_get(btrfs_stack_file_extent_type, i.m_data);
|
||||
switch (type) {
|
||||
default:
|
||||
BEESINFO("Unhandled file extent type " << type << " in root " << get_state().m_root << " ino " << i.objectid << " offset " << to_hex(i.offset));
|
||||
++count_unknown;
|
||||
BEESCOUNT(crawl_unknown);
|
||||
break;
|
||||
case BTRFS_FILE_EXTENT_INLINE:
|
||||
// Ignore these for now.
|
||||
// BEESINFO("Ignored file extent type INLINE in root " << get_state().m_root << " ino " << i.objectid << " offset " << to_hex(i.offset));
|
||||
++count_inline;
|
||||
// TODO: replace with out-of-line dup extents
|
||||
BEESCOUNT(crawl_inline);
|
||||
break;
|
||||
case BTRFS_FILE_EXTENT_PREALLOC:
|
||||
BEESCOUNT(crawl_prealloc);
|
||||
case BTRFS_FILE_EXTENT_REG: {
|
||||
auto physical = call_btrfs_get(btrfs_stack_file_extent_disk_bytenr, i.m_data);
|
||||
auto ram = call_btrfs_get(btrfs_stack_file_extent_ram_bytes, i.m_data);
|
||||
auto len = call_btrfs_get(btrfs_stack_file_extent_num_bytes, i.m_data);
|
||||
auto offset = call_btrfs_get(btrfs_stack_file_extent_offset, i.m_data);
|
||||
BEESTRACE("Root " << get_state().m_root << " ino " << i.objectid << " physical " << to_hex(physical)
|
||||
<< " logical " << to_hex(i.offset) << ".." << to_hex(i.offset + len)
|
||||
<< " gen " << gen);
|
||||
++count_data;
|
||||
if (physical) {
|
||||
THROW_CHECK1(runtime_error, ram, ram > 0);
|
||||
THROW_CHECK1(runtime_error, len, len > 0);
|
||||
THROW_CHECK2(runtime_error, offset, ram, offset < ram);
|
||||
BeesFileId bfi(get_state().m_root, i.objectid);
|
||||
if (m_ctx->is_blacklisted(bfi)) {
|
||||
BEESCOUNT(crawl_blacklisted);
|
||||
} else {
|
||||
BeesFileRange bfr(bfi, i.offset, i.offset + len);
|
||||
// BEESNOTE("pushing bfr " << bfr << " limit " << BEES_MAX_QUEUE_SIZE);
|
||||
m_extents.insert(bfr);
|
||||
BEESCOUNT(crawl_push);
|
||||
}
|
||||
} else {
|
||||
BEESCOUNT(crawl_hole);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
BEESLOG("Crawled inline " << count_inline << " data " << count_data << " other " << count_other << " unknown " << count_unknown << " gen_low " << count_low << " gen_high " << count_high << " " << get_state() << " in " << crawl_timer << "s");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
BeesCrawl::fetch_extents_harder()
|
||||
{
|
||||
BEESNOTE("fetch_extents_harder " << get_state() << " with " << m_extents.size() << " extents");
|
||||
while (m_extents.empty()) {
|
||||
bool progress_made = fetch_extents();
|
||||
if (!progress_made) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
BeesCrawl::peek_front()
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
fetch_extents_harder();
|
||||
if (m_extents.empty()) {
|
||||
return BeesFileRange();
|
||||
}
|
||||
return *m_extents.begin();
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
BeesCrawl::pop_front()
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
fetch_extents_harder();
|
||||
if (m_extents.empty()) {
|
||||
return BeesFileRange();
|
||||
}
|
||||
auto rv = *m_extents.begin();
|
||||
m_extents.erase(m_extents.begin());
|
||||
#if 0
|
||||
auto state = get_state();
|
||||
state.m_objectid = rv.fid().ino();
|
||||
state.m_offset = rv.begin();
|
||||
set_state(state);
|
||||
#endif
|
||||
return rv;
|
||||
}
|
||||
|
||||
BeesCrawlState
|
||||
BeesCrawl::get_state()
|
||||
{
|
||||
unique_lock<mutex> lock(m_state_mutex);
|
||||
return m_state;
|
||||
}
|
||||
|
||||
void
|
||||
BeesCrawl::set_state(const BeesCrawlState &bcs)
|
||||
{
|
||||
unique_lock<mutex> lock(m_state_mutex);
|
||||
m_state = bcs;
|
||||
lock.unlock();
|
||||
m_ctx->roots()->crawl_state_set_dirty();
|
||||
}
|
91
src/bees-thread.cc
Normal file
91
src/bees-thread.cc
Normal file
@@ -0,0 +1,91 @@
|
||||
#include "bees.h"
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
|
||||
BeesThread::BeesThread(string name) :
|
||||
m_name(name)
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, name, !name.empty());
|
||||
}
|
||||
|
||||
void
|
||||
BeesThread::exec(function<void()> func)
|
||||
{
|
||||
m_timer.reset();
|
||||
BEESLOG("BeesThread exec " << m_name);
|
||||
m_thread_ptr = make_shared<thread>([=]() {
|
||||
BEESLOG("Starting thread " << m_name);
|
||||
BeesNote::set_name(m_name);
|
||||
BEESNOTE("thread function");
|
||||
Timer thread_time;
|
||||
catch_all([&]() {
|
||||
DIE_IF_MINUS_ERRNO(pthread_setname_np(pthread_self(), m_name.c_str()));
|
||||
});
|
||||
catch_all([&]() {
|
||||
func();
|
||||
});
|
||||
BEESLOG("Exiting thread " << m_name << ", " << thread_time << " sec");
|
||||
});
|
||||
}
|
||||
|
||||
BeesThread::BeesThread(string name, function<void()> func) :
|
||||
m_name(name)
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, name, !name.empty());
|
||||
BEESLOG("BeesThread construct " << m_name);
|
||||
exec(func);
|
||||
}
|
||||
|
||||
void
|
||||
BeesThread::join()
|
||||
{
|
||||
if (!m_thread_ptr) {
|
||||
BEESLOG("Thread " << m_name << " no thread ptr");
|
||||
return;
|
||||
}
|
||||
|
||||
BEESLOG("BeesThread::join " << m_name);
|
||||
if (m_thread_ptr->joinable()) {
|
||||
BEESLOG("Joining thread " << m_name);
|
||||
Timer thread_time;
|
||||
m_thread_ptr->join();
|
||||
BEESLOG("Waited for " << m_name << ", " << thread_time << " sec");
|
||||
} else if (!m_name.empty()) {
|
||||
BEESLOG("BeesThread " << m_name << " not joinable");
|
||||
} else {
|
||||
BEESLOG("BeesThread else " << m_name);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BeesThread::set_name(const string &name)
|
||||
{
|
||||
m_name = name;
|
||||
}
|
||||
|
||||
BeesThread::~BeesThread()
|
||||
{
|
||||
if (!m_thread_ptr) {
|
||||
BEESLOG("Thread " << m_name << " no thread ptr");
|
||||
return;
|
||||
}
|
||||
|
||||
BEESLOG("BeesThread destructor " << m_name);
|
||||
if (m_thread_ptr->joinable()) {
|
||||
BEESLOG("Cancelling thread " << m_name);
|
||||
int rv = pthread_cancel(m_thread_ptr->native_handle());
|
||||
if (rv) {
|
||||
BEESLOG("pthread_cancel returned " << strerror(-rv));
|
||||
}
|
||||
BEESLOG("Waiting for thread " << m_name);
|
||||
Timer thread_time;
|
||||
m_thread_ptr->join();
|
||||
BEESLOG("Waited for " << m_name << ", " << thread_time << " sec");
|
||||
} else if (!m_name.empty()) {
|
||||
BEESLOG("Thread " << m_name << " not joinable");
|
||||
} else {
|
||||
BEESLOG("Thread destroy else " << m_name);
|
||||
}
|
||||
}
|
||||
|
1006
src/bees-types.cc
Normal file
1006
src/bees-types.cc
Normal file
File diff suppressed because it is too large
Load Diff
599
src/bees.cc
Normal file
599
src/bees.cc
Normal file
@@ -0,0 +1,599 @@
|
||||
#include "bees.h"
|
||||
|
||||
#include "crucible/interp.h"
|
||||
#include "crucible/limits.h"
|
||||
#include "crucible/process.h"
|
||||
#include "crucible/string.h"
|
||||
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
// PRIx64
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <sched.h>
|
||||
#include <sys/fanotify.h>
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
|
||||
int
|
||||
do_cmd_help(const ArgList &argv)
|
||||
{
|
||||
cerr << "Usage: " << argv[0] << " fs-root-path [fs-root-path-2...]\n"
|
||||
"Performs best-effort extent-same deduplication on btrfs.\n"
|
||||
"\n"
|
||||
"fs-root-path MUST be the root of a btrfs filesystem tree (id 5).\n"
|
||||
"Other directories will be rejected.\n"
|
||||
"\n"
|
||||
"Multiple filesystems can share a single hash table (BEESHOME)\n"
|
||||
"but this only works well if the content of each filesystem\n"
|
||||
"is distinct from all the others.\n"
|
||||
"\n"
|
||||
"Required environment variables:\n"
|
||||
"\tBEESHOME\tPath to hash table and configuration files\n"
|
||||
"\n"
|
||||
"Optional environment variables:\n"
|
||||
"\tBEESSTATUS\tFile to write status to (tmpfs recommended, e.g. /run)\n"
|
||||
"\n"
|
||||
<< endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// tracing ----------------------------------------
|
||||
|
||||
RateLimiter bees_info_rate_limit(BEES_INFO_RATE, BEES_INFO_BURST);
|
||||
|
||||
thread_local BeesTracer *BeesTracer::s_next_tracer = nullptr;
|
||||
|
||||
BeesTracer::~BeesTracer()
|
||||
{
|
||||
if (uncaught_exception()) {
|
||||
m_func();
|
||||
if (!m_next_tracer) {
|
||||
BEESLOG("--- END TRACE --- exception ---");
|
||||
}
|
||||
}
|
||||
s_next_tracer = m_next_tracer;
|
||||
}
|
||||
|
||||
BeesTracer::BeesTracer(function<void()> f) :
|
||||
m_func(f)
|
||||
{
|
||||
m_next_tracer = s_next_tracer;
|
||||
s_next_tracer = this;
|
||||
}
|
||||
|
||||
void
|
||||
BeesTracer::trace_now()
|
||||
{
|
||||
BeesTracer *tp = s_next_tracer;
|
||||
BEESLOG("--- BEGIN TRACE ---");
|
||||
while (tp) {
|
||||
tp->m_func();
|
||||
tp = tp->m_next_tracer;
|
||||
}
|
||||
BEESLOG("--- END TRACE ---");
|
||||
}
|
||||
|
||||
thread_local BeesNote *BeesNote::s_next = nullptr;
|
||||
mutex BeesNote::s_mutex;
|
||||
map<pid_t, BeesNote*> BeesNote::s_status;
|
||||
thread_local string BeesNote::s_name;
|
||||
|
||||
BeesNote::~BeesNote()
|
||||
{
|
||||
unique_lock<mutex> lock(s_mutex);
|
||||
s_next = m_prev;
|
||||
if (s_next) {
|
||||
s_status[gettid()] = s_next;
|
||||
} else {
|
||||
s_status.erase(gettid());
|
||||
}
|
||||
}
|
||||
|
||||
BeesNote::BeesNote(function<void(ostream &os)> f) :
|
||||
m_func(f)
|
||||
{
|
||||
unique_lock<mutex> lock(s_mutex);
|
||||
m_name = s_name;
|
||||
m_prev = s_next;
|
||||
s_next = this;
|
||||
s_status[gettid()] = s_next;
|
||||
}
|
||||
|
||||
void
|
||||
BeesNote::set_name(const string &name)
|
||||
{
|
||||
unique_lock<mutex> lock(s_mutex);
|
||||
s_name = name;
|
||||
}
|
||||
|
||||
string
|
||||
BeesNote::get_name()
|
||||
{
|
||||
unique_lock<mutex> lock(s_mutex);
|
||||
if (s_name.empty()) {
|
||||
return "bees";
|
||||
} else {
|
||||
return s_name;
|
||||
}
|
||||
}
|
||||
|
||||
BeesNote::ThreadStatusMap
|
||||
BeesNote::get_status()
|
||||
{
|
||||
unique_lock<mutex> lock(s_mutex);
|
||||
ThreadStatusMap rv;
|
||||
for (auto t : s_status) {
|
||||
ostringstream oss;
|
||||
if (!t.second->m_name.empty()) {
|
||||
oss << t.second->m_name << ": ";
|
||||
}
|
||||
if (t.second->m_timer.age() > BEES_TOO_LONG) {
|
||||
oss << "[" << t.second->m_timer << "s] ";
|
||||
}
|
||||
t.second->m_func(oss);
|
||||
rv[t.first] = oss.str();
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
// static inline helpers ----------------------------------------
|
||||
|
||||
static inline
|
||||
bool
|
||||
bees_addr_check(uint64_t v)
|
||||
{
|
||||
return !(v & (1ULL << 63));
|
||||
}
|
||||
|
||||
static inline
|
||||
bool
|
||||
bees_addr_check(int64_t v)
|
||||
{
|
||||
return !(v & (1ULL << 63));
|
||||
}
|
||||
|
||||
string
|
||||
pretty(double d)
|
||||
{
|
||||
static const char * units[] = { "", "K", "M", "G", "T", "P", "E" };
|
||||
static const char * *units_stop = units + sizeof(units) / sizeof(units[0]) - 1;
|
||||
const char * *unit = units;
|
||||
while (d >= 1024 && unit < units_stop) {
|
||||
d /= 1024;
|
||||
++unit;
|
||||
}
|
||||
ostringstream oss;
|
||||
oss << (round(d * 1000.0) / 1000.0) << *unit;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
// ostream operators ----------------------------------------
|
||||
|
||||
template <class T>
|
||||
ostream &
|
||||
operator<<(ostream &os, const BeesStatTmpl<T> &bs)
|
||||
{
|
||||
unique_lock<mutex> lock(bs.m_mutex);
|
||||
bool first = true;
|
||||
string last_tag;
|
||||
for (auto i : bs.m_stats_map) {
|
||||
if (i.second == 0) {
|
||||
continue;
|
||||
}
|
||||
string tag = i.first.substr(0, i.first.find_first_of("_"));
|
||||
if (!last_tag.empty() && tag != last_tag) {
|
||||
os << "\n\t";
|
||||
} else if (!first) {
|
||||
os << " ";
|
||||
}
|
||||
last_tag = tag;
|
||||
first = false;
|
||||
os << i.first << "=" << i.second;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
// other ----------------------------------------
|
||||
|
||||
template <class T>
|
||||
T&
|
||||
BeesStatTmpl<T>::at(string idx)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
if (!m_stats_map.count(idx)) {
|
||||
m_stats_map[idx] = 0;
|
||||
}
|
||||
return m_stats_map[idx];
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T
|
||||
BeesStatTmpl<T>::at(string idx) const
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
return m_stats_map.at(idx);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void
|
||||
BeesStatTmpl<T>::add_count(string idx, size_t amount)
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
if (!m_stats_map.count(idx)) {
|
||||
m_stats_map[idx] = 0;
|
||||
}
|
||||
m_stats_map.at(idx) += amount;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
BeesStatTmpl<T>::BeesStatTmpl(const BeesStatTmpl &that)
|
||||
{
|
||||
if (&that == this) return;
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
unique_lock<mutex> lock2(that.m_mutex);
|
||||
m_stats_map = that.m_stats_map;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
BeesStatTmpl<T> &
|
||||
BeesStatTmpl<T>::operator=(const BeesStatTmpl<T> &that)
|
||||
{
|
||||
if (&that == this) return *this;
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
unique_lock<mutex> lock2(that.m_mutex);
|
||||
m_stats_map = that.m_stats_map;
|
||||
return *this;
|
||||
}
|
||||
|
||||
BeesStats BeesStats::s_global;
|
||||
|
||||
BeesStats
|
||||
BeesStats::operator-(const BeesStats &that) const
|
||||
{
|
||||
if (&that == this) return BeesStats();
|
||||
unique_lock<mutex> this_lock(m_mutex);
|
||||
BeesStats this_copy;
|
||||
this_copy.m_stats_map = m_stats_map;
|
||||
unique_lock<mutex> that_lock(that.m_mutex);
|
||||
BeesStats that_copy;
|
||||
that_copy.m_stats_map = that.m_stats_map;
|
||||
this_lock.unlock();
|
||||
that_lock.unlock();
|
||||
for (auto i : that.m_stats_map) {
|
||||
if (i.second != 0) {
|
||||
this_copy.at(i.first) -= i.second;
|
||||
}
|
||||
}
|
||||
return this_copy;
|
||||
}
|
||||
|
||||
BeesRates
|
||||
BeesStats::operator/(double d) const
|
||||
{
|
||||
BeesRates rv;
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
for (auto i : m_stats_map) {
|
||||
rv.m_stats_map[i.first] = ceil(i.second / d * 1000) / 1000;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
BeesStats::operator bool() const
|
||||
{
|
||||
unique_lock<mutex> lock(m_mutex);
|
||||
for (auto i : m_stats_map) {
|
||||
if (i.second != 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BeesTooLong::BeesTooLong(const string &s, double limit) :
|
||||
m_limit(limit),
|
||||
m_func([s](ostream &os) { os << s; })
|
||||
{
|
||||
}
|
||||
|
||||
BeesTooLong::BeesTooLong(const func_type &func, double limit) :
|
||||
m_limit(limit),
|
||||
m_func(func)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
BeesTooLong::check() const
|
||||
{
|
||||
if (age() > m_limit) {
|
||||
ostringstream oss;
|
||||
m_func(oss);
|
||||
BEESLOG("PERFORMANCE: " << *this << " sec: " << oss.str());
|
||||
}
|
||||
}
|
||||
|
||||
BeesTooLong::~BeesTooLong()
|
||||
{
|
||||
check();
|
||||
}
|
||||
|
||||
BeesTooLong &
|
||||
BeesTooLong::operator=(const func_type &f)
|
||||
{
|
||||
m_func = f;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void
|
||||
bees_sync(int fd)
|
||||
{
|
||||
Timer sync_timer;
|
||||
BEESNOTE("syncing " << name_fd(fd));
|
||||
BEESTOOLONG("syncing " << name_fd(fd));
|
||||
DIE_IF_NON_ZERO(fsync(fd));
|
||||
BEESCOUNT(sync_count);
|
||||
BEESCOUNTADD(sync_ms, sync_timer.age() * 1000);
|
||||
}
|
||||
|
||||
BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
|
||||
m_dir_fd(dir_fd),
|
||||
m_name(name),
|
||||
m_limit(limit)
|
||||
{
|
||||
BEESLOG("BeesStringFile " << name_fd(m_dir_fd) << "/" << m_name << " max size " << pretty(m_limit));
|
||||
}
|
||||
|
||||
string
|
||||
BeesStringFile::read()
|
||||
{
|
||||
BEESNOTE("opening " << m_name << " in " << name_fd(m_dir_fd));
|
||||
Fd fd(openat(m_dir_fd, m_name.c_str(), FLAGS_OPEN_FILE));
|
||||
if (!fd) {
|
||||
return string();
|
||||
}
|
||||
|
||||
BEESNOTE("sizing " << m_name << " in " << name_fd(m_dir_fd));
|
||||
Stat st(fd);
|
||||
THROW_CHECK1(out_of_range, st.st_size, st.st_size > 0);
|
||||
THROW_CHECK1(out_of_range, st.st_size, st.st_size < ranged_cast<off_t>(m_limit));
|
||||
|
||||
BEESNOTE("reading " << m_name << " in " << name_fd(m_dir_fd));
|
||||
return read_string(fd, st.st_size);
|
||||
}
|
||||
|
||||
void
|
||||
BeesStringFile::write(string contents)
|
||||
{
|
||||
THROW_CHECK2(out_of_range, contents.size(), m_limit, contents.size() < m_limit);
|
||||
auto tmpname = m_name + ".tmp";
|
||||
|
||||
BEESNOTE("unlinking " << tmpname << " in " << name_fd(m_dir_fd));
|
||||
unlinkat(m_dir_fd, tmpname.c_str(), 0);
|
||||
// ignore error
|
||||
|
||||
BEESNOTE("closing " << tmpname << " in " << name_fd(m_dir_fd));
|
||||
{
|
||||
Fd ofd = openat_or_die(m_dir_fd, tmpname, FLAGS_CREATE_FILE, S_IRUSR | S_IWUSR);
|
||||
BEESNOTE("writing " << tmpname << " in " << name_fd(m_dir_fd));
|
||||
write_or_die(ofd, contents);
|
||||
BEESNOTE("fsyncing " << tmpname << " in " << name_fd(m_dir_fd));
|
||||
DIE_IF_NON_ZERO(fsync(ofd));
|
||||
}
|
||||
BEESNOTE("renaming " << tmpname << " to " << m_name << " in FD " << name_fd(m_dir_fd));
|
||||
BEESTRACE("renaming " << tmpname << " to " << m_name << " in FD " << name_fd(m_dir_fd));
|
||||
renameat_or_die(m_dir_fd, tmpname, m_dir_fd, m_name);
|
||||
}
|
||||
|
||||
void
|
||||
BeesTempFile::create()
|
||||
{
|
||||
// BEESLOG("creating temporary file in " << m_ctx->root_path());
|
||||
BEESNOTE("creating temporary file in " << m_ctx->root_path());
|
||||
BEESTOOLONG("creating temporary file in " << m_ctx->root_path());
|
||||
|
||||
DIE_IF_MINUS_ONE(m_fd = openat(m_ctx->root_fd(), ".", FLAGS_OPEN_TMPFILE, S_IRUSR | S_IWUSR));
|
||||
BEESCOUNT(tmp_create);
|
||||
|
||||
// Can't reopen this file, so don't allow any resolves there
|
||||
// Resolves won't work there anyway. There are lots of tempfiles
|
||||
// and they're short-lived, so this ends up being just a memory leak
|
||||
// m_ctx->blacklist_add(BeesFileId(m_fd));
|
||||
m_ctx->insert_root_ino(m_fd);
|
||||
|
||||
// Set compression attribute
|
||||
int flags = 0;
|
||||
BEESTRACE("Getting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags));
|
||||
DIE_IF_MINUS_ONE(ioctl(m_fd, FS_IOC_GETFLAGS, &flags));
|
||||
flags |= FS_COMPR_FL;
|
||||
BEESTRACE("Setting FS_COMPR_FL on m_fd " << name_fd(m_fd) << " flags " << to_hex(flags));
|
||||
DIE_IF_MINUS_ONE(ioctl(m_fd, FS_IOC_SETFLAGS, &flags));
|
||||
|
||||
// Always leave first block empty to avoid creating a file with an inline extent
|
||||
m_end_offset = BLOCK_SIZE_CLONE;
|
||||
}
|
||||
|
||||
void
|
||||
BeesTempFile::resize(off_t offset)
|
||||
{
|
||||
BEESTOOLONG("Resizing temporary file to " << to_hex(offset));
|
||||
BEESNOTE("Resizing temporary file " << name_fd(m_fd) << " to " << to_hex(offset));
|
||||
BEESTRACE("Resizing temporary file " << name_fd(m_fd) << " to " << to_hex(offset));
|
||||
|
||||
// Ensure that file covers m_end_offset..offset
|
||||
THROW_CHECK2(invalid_argument, m_end_offset, offset, m_end_offset < offset);
|
||||
|
||||
// Truncate
|
||||
DIE_IF_NON_ZERO(ftruncate(m_fd, offset));
|
||||
BEESCOUNT(tmp_resize);
|
||||
|
||||
// Success
|
||||
m_end_offset = offset;
|
||||
}
|
||||
|
||||
BeesTempFile::BeesTempFile(shared_ptr<BeesContext> ctx) :
|
||||
m_ctx(ctx),
|
||||
m_end_offset(0)
|
||||
{
|
||||
create();
|
||||
}
|
||||
|
||||
void
|
||||
BeesTempFile::realign()
|
||||
{
|
||||
if (m_end_offset > BLOCK_SIZE_MAX_TEMP_FILE) {
|
||||
BEESLOG("temporary file size " << to_hex(m_end_offset) << " > max " << BLOCK_SIZE_MAX_TEMP_FILE);
|
||||
BEESCOUNT(tmp_trunc);
|
||||
return create();
|
||||
}
|
||||
if (m_end_offset & BLOCK_MASK_CLONE) {
|
||||
// BEESTRACE("temporary file size " << to_hex(m_end_offset) << " not aligned");
|
||||
BEESCOUNT(tmp_realign);
|
||||
return create();
|
||||
}
|
||||
// OK as is
|
||||
BEESCOUNT(tmp_aligned);
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
BeesTempFile::make_hole(off_t count)
|
||||
{
|
||||
THROW_CHECK1(invalid_argument, count, count > 0);
|
||||
realign();
|
||||
|
||||
BEESTRACE("make hole at " << m_end_offset);
|
||||
|
||||
auto end = m_end_offset + count;
|
||||
BeesFileRange rv(m_fd, m_end_offset, end);
|
||||
|
||||
resize(end);
|
||||
|
||||
BEESTRACE("created temporary hole " << rv);
|
||||
BEESCOUNT(tmp_hole);
|
||||
return rv;
|
||||
}
|
||||
|
||||
BeesFileRange
|
||||
BeesTempFile::make_copy(const BeesFileRange &src)
|
||||
{
|
||||
BEESLOG("copy: " << src);
|
||||
BEESNOTE("Copying " << src);
|
||||
BEESTRACE("Copying " << src);
|
||||
|
||||
THROW_CHECK1(invalid_argument, src, src.size() > 0);
|
||||
|
||||
// FIXME: don't know where these come from, but we can't handle them.
|
||||
// Grab a trace for the log.
|
||||
THROW_CHECK1(invalid_argument, src, src.size() < BLOCK_SIZE_MAX_TEMP_FILE);
|
||||
|
||||
realign();
|
||||
|
||||
auto begin = m_end_offset;
|
||||
auto end = m_end_offset + src.size();
|
||||
resize(end);
|
||||
|
||||
BeesFileRange rv(m_fd, begin, end);
|
||||
BEESTRACE("copying to: " << rv);
|
||||
BEESNOTE("copying " << src << " to " << rv);
|
||||
|
||||
auto src_p = src.begin();
|
||||
auto dst_p = begin;
|
||||
|
||||
bool did_block_write = false;
|
||||
while (dst_p < end) {
|
||||
auto len = min(BLOCK_SIZE_CLONE, end - dst_p);
|
||||
BeesBlockData bbd(src.fd(), src_p, len);
|
||||
// Don't fill in holes
|
||||
if (bbd.is_data_zero()) {
|
||||
BEESCOUNT(tmp_block_zero);
|
||||
} else {
|
||||
BEESNOTE("copying " << src << " to " << rv << "\n"
|
||||
"\tpwrite " << bbd << " to " << name_fd(m_fd) << " offset " << to_hex(dst_p) << " len " << len);
|
||||
pwrite_or_die(m_fd, bbd.data().data(), len, dst_p);
|
||||
did_block_write = true;
|
||||
BEESCOUNT(tmp_block);
|
||||
BEESCOUNTADD(tmp_bytes, len);
|
||||
}
|
||||
src_p += len;
|
||||
dst_p += len;
|
||||
}
|
||||
|
||||
// We seem to get lockups without this!
|
||||
if (did_block_write) {
|
||||
bees_sync(m_fd);
|
||||
}
|
||||
|
||||
BEESCOUNT(tmp_copy);
|
||||
return rv;
|
||||
}
|
||||
|
||||
int
|
||||
bees_main(ArgList args)
|
||||
{
|
||||
set_catch_explainer([&](string s) {
|
||||
BEESLOG("\n\n*** EXCEPTION ***\n\t" << s << "\n***\n");
|
||||
BEESCOUNT(exception_caught);
|
||||
});
|
||||
|
||||
BEESNOTE("main");
|
||||
BeesNote::set_name("main");
|
||||
|
||||
list<shared_ptr<BeesContext>> all_contexts;
|
||||
shared_ptr<BeesContext> bc;
|
||||
|
||||
// Subscribe to fanotify events
|
||||
bool did_subscription = false;
|
||||
for (string arg : args) {
|
||||
catch_all([&]() {
|
||||
bc = make_shared<BeesContext>(bc);
|
||||
bc->set_root_path(arg);
|
||||
did_subscription = true;
|
||||
});
|
||||
}
|
||||
|
||||
if (!did_subscription) {
|
||||
BEESLOG("WARNING: no filesystems added");
|
||||
}
|
||||
|
||||
BeesThread status_thread("status", [&]() {
|
||||
bc->dump_status();
|
||||
});
|
||||
|
||||
// Now we just wait forever
|
||||
bc->show_progress();
|
||||
|
||||
// That is all.
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, const char **argv)
|
||||
{
|
||||
if (argc < 2) {
|
||||
do_cmd_help(argv);
|
||||
return 2;
|
||||
}
|
||||
|
||||
ArgList args(argv + 1);
|
||||
|
||||
int rv = 1;
|
||||
catch_and_explain([&]() {
|
||||
rv = bees_main(args);
|
||||
});
|
||||
return rv;
|
||||
}
|
||||
|
||||
// instantiate templates for linkage ----------------------------------------
|
||||
|
||||
template class BeesStatTmpl<uint64_t>;
|
||||
template ostream & operator<<(ostream &os, const BeesStatTmpl<uint64_t> &bs);
|
||||
|
||||
template class BeesStatTmpl<double>;
|
||||
template ostream & operator<<(ostream &os, const BeesStatTmpl<double> &bs);
|
828
src/bees.h
Normal file
828
src/bees.h
Normal file
@@ -0,0 +1,828 @@
|
||||
#ifndef BEES_H
|
||||
#define BEES_H
|
||||
|
||||
#include "crucible/bool.h"
|
||||
#include "crucible/cache.h"
|
||||
#include "crucible/chatter.h"
|
||||
#include "crucible/error.h"
|
||||
#include "crucible/extentwalker.h"
|
||||
#include "crucible/fd.h"
|
||||
#include "crucible/fs.h"
|
||||
#include "crucible/lockset.h"
|
||||
#include "crucible/time.h"
|
||||
#include "crucible/timequeue.h"
|
||||
#include "crucible/workqueue.h"
|
||||
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include <endian.h>
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
|
||||
// Block size for clone alignment (FIXME: should read this from /sys/fs/btrfs/<FS-UUID>/clone_alignment)
|
||||
const off_t BLOCK_SIZE_CLONE = 4096;
|
||||
|
||||
// Block size for dedup checksums (arbitrary, but must be a multiple of clone alignment)
|
||||
const off_t BLOCK_SIZE_SUMS = 4096;
|
||||
|
||||
// Block size for memory allocations and file mappings (FIXME: should be CPU page size)
|
||||
const off_t BLOCK_SIZE_MMAP = 4096;
|
||||
|
||||
// Maximum length parameter to extent-same ioctl (FIXME: hardcoded in kernel)
|
||||
const off_t BLOCK_SIZE_MAX_EXTENT_SAME = 4096 * 4096;
|
||||
|
||||
// Maximum length of a compressed extent in bytes
|
||||
const off_t BLOCK_SIZE_MAX_COMPRESSED_EXTENT = 128 * 1024;
|
||||
|
||||
// Try to combine smaller extents into larger ones
|
||||
const off_t BLOCK_SIZE_MIN_EXTENT_DEFRAG = BLOCK_SIZE_MAX_COMPRESSED_EXTENT;
|
||||
|
||||
// Avoid splitting extents that are already too small
|
||||
const off_t BLOCK_SIZE_MIN_EXTENT_SPLIT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT;
|
||||
// const off_t BLOCK_SIZE_MIN_EXTENT_SPLIT = 1024LL * 1024 * 1024 * 1024;
|
||||
|
||||
// Maximum length of any extent in bytes
|
||||
// except we've seen 1.03G extents...
|
||||
// ...FIEMAP is slow and full of lies
|
||||
const off_t BLOCK_SIZE_MAX_EXTENT = 128 * 1024 * 1024;
|
||||
|
||||
// Masks, so we don't have to write "(BLOCK_SIZE_CLONE - 1)" everywhere
|
||||
const off_t BLOCK_MASK_CLONE = BLOCK_SIZE_CLONE - 1;
|
||||
const off_t BLOCK_MASK_SUMS = BLOCK_SIZE_SUMS - 1;
|
||||
const off_t BLOCK_MASK_MMAP = BLOCK_SIZE_MMAP - 1;
|
||||
const off_t BLOCK_MASK_MAX_COMPRESSED_EXTENT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT * 2 - 1;
|
||||
|
||||
// Maximum temporary file size
|
||||
const off_t BLOCK_SIZE_MAX_TEMP_FILE = 1024 * 1024 * 1024;
|
||||
|
||||
// Bucket size for hash table (size of one hash bucket)
|
||||
const off_t BLOCK_SIZE_HASHTAB_BUCKET = BLOCK_SIZE_MMAP;
|
||||
|
||||
// Extent size for hash table (since the nocow file attribute does not seem to be working today)
|
||||
const off_t BLOCK_SIZE_HASHTAB_EXTENT = 16 * 1024 * 1024;
|
||||
|
||||
// Bytes per second we want to flush (8GB every two hours)
|
||||
const double BEES_FLUSH_RATE = 8.0 * 1024 * 1024 * 1024 / 7200.0;
|
||||
|
||||
// Interval between writing non-hash-table things to disk (15 minutes)
|
||||
const int BEES_WRITEBACK_INTERVAL = 900;
|
||||
|
||||
// Statistics reports while scanning
|
||||
const int BEES_STATS_INTERVAL = 3600;
|
||||
|
||||
// Progress shows instantaneous rates and thread status
|
||||
const int BEES_PROGRESS_INTERVAL = 3600;
|
||||
|
||||
// Status is output every freakin second. Use a ramdisk.
|
||||
const int BEES_STATUS_INTERVAL = 1;
|
||||
|
||||
// Log warnings when an operation takes too long
|
||||
const double BEES_TOO_LONG = 2.5;
|
||||
|
||||
// Avoid any extent where LOGICAL_INO takes this long
|
||||
const double BEES_TOXIC_DURATION = 9.9;
|
||||
|
||||
// How long we should wait for new btrfs transactions
|
||||
const double BEES_COMMIT_INTERVAL = 900;
|
||||
|
||||
// How long between hash table histograms
|
||||
const double BEES_HASH_TABLE_ANALYZE_INTERVAL = 3600;
|
||||
|
||||
// Rate limiting of informational messages
|
||||
const double BEES_INFO_RATE = 10.0;
|
||||
const double BEES_INFO_BURST = 1.0;
|
||||
|
||||
// After we have this many events queued, wait
|
||||
const size_t BEES_MAX_QUEUE_SIZE = 1024;
|
||||
|
||||
// Read this many items at a time in SEARCHv2
|
||||
const size_t BEES_MAX_CRAWL_SIZE = 4096;
|
||||
|
||||
// If an extent has this many refs, pretend it does not exist
|
||||
// to avoid a crippling btrfs performance bug
|
||||
// The actual limit in LOGICAL_INO seems to be 2730, but let's leave a little headroom
|
||||
const size_t BEES_MAX_EXTENT_REF_COUNT = 2560;
|
||||
|
||||
// Flags
|
||||
const int FLAGS_OPEN_COMMON = O_NOFOLLOW | O_NONBLOCK | O_CLOEXEC | O_NOATIME | O_LARGEFILE | O_NOCTTY;
|
||||
const int FLAGS_OPEN_DIR = FLAGS_OPEN_COMMON | O_RDONLY | O_DIRECTORY;
|
||||
const int FLAGS_OPEN_FILE = FLAGS_OPEN_COMMON | O_RDONLY;
|
||||
const int FLAGS_OPEN_FILE_RW = FLAGS_OPEN_COMMON | O_RDWR;
|
||||
const int FLAGS_OPEN_TMPFILE = FLAGS_OPEN_FILE_RW | O_TMPFILE | O_TRUNC | O_EXCL;
|
||||
const int FLAGS_CREATE_FILE = FLAGS_OPEN_COMMON | O_WRONLY | O_CREAT | O_EXCL;
|
||||
|
||||
// Fanotify allows O_APPEND, O_DSYNC, O_NOATIME, O_NONBLOCK, O_CLOEXEC, O_LARGEFILE
|
||||
const int FLAGS_OPEN_FANOTIFY = O_RDWR | O_NOATIME | O_CLOEXEC | O_LARGEFILE;
|
||||
|
||||
// macros ----------------------------------------
|
||||
|
||||
#define BEESLOG(x) do { Chatter c(BeesNote::get_name()); c << x; } while (0)
|
||||
#define BEESLOGTRACE(x) do { BEESLOG(x); BeesTracer::trace_now(); } while (0)
|
||||
|
||||
#define BEESTRACE(x) BeesTracer SRSLY_WTF_C(beesTracer_, __LINE__) ([&]() { BEESLOG(x); })
|
||||
#define BEESTOOLONG(x) BeesTooLong SRSLY_WTF_C(beesTooLong_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
|
||||
#define BEESNOTE(x) BeesNote SRSLY_WTF_C(beesNote_, __LINE__) ([&](ostream &_btl_os) { _btl_os << x; })
|
||||
#define BEESINFO(x) do { \
|
||||
if (bees_info_rate_limit.is_ready()) { \
|
||||
bees_info_rate_limit.borrow(1); \
|
||||
Chatter c(BeesNote::get_name()); \
|
||||
c << x; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define BEESCOUNT(stat) do { \
|
||||
BeesStats::s_global.add_count(#stat); \
|
||||
} while (0)
|
||||
|
||||
#define BEESCOUNTADD(stat, amount) do { \
|
||||
BeesStats::s_global.add_count(#stat, (amount)); \
|
||||
} while (0)
|
||||
|
||||
// ----------------------------------------
|
||||
|
||||
template <class T> class BeesStatTmpl;
|
||||
template <class T> ostream& operator<<(ostream &os, const BeesStatTmpl<T> &bs);
|
||||
|
||||
template <class T>
|
||||
class BeesStatTmpl {
|
||||
map<string, T> m_stats_map;
|
||||
mutable mutex m_mutex;
|
||||
|
||||
public:
|
||||
BeesStatTmpl() = default;
|
||||
BeesStatTmpl(const BeesStatTmpl &that);
|
||||
BeesStatTmpl &operator=(const BeesStatTmpl &that);
|
||||
void add_count(string idx, size_t amount = 1);
|
||||
T& at(string idx);
|
||||
T at(string idx) const;
|
||||
|
||||
friend ostream& operator<< <>(ostream &os, const BeesStatTmpl<T> &bs);
|
||||
friend class BeesStats;
|
||||
};
|
||||
|
||||
using BeesRates = BeesStatTmpl<double>;
|
||||
|
||||
struct BeesStats : public BeesStatTmpl<uint64_t> {
|
||||
static BeesStats s_global;
|
||||
|
||||
BeesStats operator-(const BeesStats &that) const;
|
||||
BeesRates operator/(double d) const;
|
||||
explicit operator bool() const;
|
||||
};
|
||||
|
||||
class BeesContext;
|
||||
class BeesBlockData;
|
||||
|
||||
class BeesTracer {
|
||||
function<void()> m_func;
|
||||
BeesTracer *m_next_tracer = 0;
|
||||
|
||||
thread_local static BeesTracer *s_next_tracer;
|
||||
public:
|
||||
BeesTracer(function<void()> f);
|
||||
~BeesTracer();
|
||||
static void trace_now();
|
||||
};
|
||||
|
||||
class BeesNote {
|
||||
function<void(ostream &)> m_func;
|
||||
BeesNote *m_prev;
|
||||
Timer m_timer;
|
||||
string m_name;
|
||||
|
||||
static mutex s_mutex;
|
||||
static map<pid_t, BeesNote*> s_status;
|
||||
|
||||
thread_local static BeesNote *s_next;
|
||||
thread_local static string s_name;
|
||||
|
||||
public:
|
||||
BeesNote(function<void(ostream &)> f);
|
||||
~BeesNote();
|
||||
|
||||
using ThreadStatusMap = map<pid_t, string>;
|
||||
|
||||
static ThreadStatusMap get_status();
|
||||
|
||||
static void set_name(const string &name);
|
||||
static string get_name();
|
||||
};
|
||||
|
||||
// C++ threads dumbed down even further
|
||||
class BeesThread {
|
||||
string m_name;
|
||||
Timer m_timer;
|
||||
shared_ptr<thread> m_thread_ptr;
|
||||
|
||||
public:
|
||||
~BeesThread();
|
||||
BeesThread(string name);
|
||||
BeesThread(string name, function<void()> args);
|
||||
void exec(function<void()> args);
|
||||
void join();
|
||||
void set_name(const string &name);
|
||||
};
|
||||
|
||||
class BeesFileId {
|
||||
uint64_t m_root;
|
||||
uint64_t m_ino;
|
||||
|
||||
public:
|
||||
uint64_t root() const { return m_root; }
|
||||
uint64_t ino() const { return m_ino; }
|
||||
bool operator<(const BeesFileId &that) const;
|
||||
bool operator!=(const BeesFileId &that) const;
|
||||
bool operator==(const BeesFileId &that) const;
|
||||
operator bool() const;
|
||||
BeesFileId(const BtrfsInodeOffsetRoot &bior);
|
||||
BeesFileId(int fd);
|
||||
BeesFileId(uint64_t root, uint64_t ino);
|
||||
BeesFileId();
|
||||
};
|
||||
|
||||
ostream& operator<<(ostream &os, const BeesFileId &bfi);
|
||||
|
||||
class BeesFileRange {
|
||||
protected:
|
||||
static mutex s_mutex;
|
||||
mutable Fd m_fd;
|
||||
mutable BeesFileId m_fid;
|
||||
off_t m_begin, m_end;
|
||||
mutable off_t m_file_size;
|
||||
|
||||
public:
|
||||
|
||||
BeesFileRange();
|
||||
BeesFileRange(Fd fd, off_t begin, off_t end);
|
||||
BeesFileRange(const BeesFileId &fid, off_t begin, off_t end);
|
||||
BeesFileRange(const BeesBlockData &bbd);
|
||||
|
||||
operator BeesBlockData() const;
|
||||
|
||||
bool operator<(const BeesFileRange &that) const;
|
||||
bool operator==(const BeesFileRange &that) const;
|
||||
bool operator!=(const BeesFileRange &that) const;
|
||||
|
||||
bool empty() const;
|
||||
bool is_same_file(const BeesFileRange &that) const;
|
||||
bool overlaps(const BeesFileRange &that) const;
|
||||
|
||||
// If file ranges overlap, extends this to include that.
|
||||
// Coalesce with empty bfr = non-empty bfr
|
||||
bool coalesce(const BeesFileRange &that);
|
||||
|
||||
// Remove that from this, creating 0, 1, or 2 new objects
|
||||
pair<BeesFileRange, BeesFileRange> subtract(const BeesFileRange &that) const;
|
||||
|
||||
off_t begin() const { return m_begin; }
|
||||
off_t end() const { return m_end; }
|
||||
off_t size() const;
|
||||
|
||||
// Lazy accessors
|
||||
off_t file_size() const;
|
||||
BeesFileId fid() const;
|
||||
|
||||
// Get the fd if there is one
|
||||
Fd fd() const;
|
||||
|
||||
// Get the fd, opening it if necessary
|
||||
Fd fd(const shared_ptr<BeesContext> &ctx) const;
|
||||
|
||||
BeesFileRange copy_closed() const;
|
||||
|
||||
// Is it defined?
|
||||
operator bool() const { return !!m_fd || m_fid; }
|
||||
|
||||
// Make range larger
|
||||
off_t grow_end(off_t delta);
|
||||
off_t grow_begin(off_t delta);
|
||||
|
||||
friend ostream & operator<<(ostream &os, const BeesFileRange &bfr);
|
||||
};
|
||||
|
||||
class BeesAddress {
|
||||
public:
|
||||
using Type = uint64_t;
|
||||
private:
|
||||
Type m_addr = ZERO;
|
||||
bool magic_check(uint64_t flags);
|
||||
public:
|
||||
|
||||
// Blocks with no physical address (not yet allocated, hole, or "other").
|
||||
// PREALLOC blocks have a physical address so they're not magic enough to be handled here.
|
||||
// Compressed blocks have a physical address but it's two-dimensional.
|
||||
enum MagicValue {
|
||||
ZERO, // BeesAddress uninitialized
|
||||
DELALLOC, // delayed allocation
|
||||
HOLE, // no extent present, no space allocated
|
||||
UNUSABLE, // inline extent or unrecognized FIEMAP flags
|
||||
LAST, // all further values are non-magic
|
||||
};
|
||||
|
||||
BeesAddress(Type addr = ZERO) : m_addr(addr) {}
|
||||
BeesAddress(MagicValue addr) : m_addr(addr) {}
|
||||
BeesAddress& operator=(const BeesAddress &that) = default;
|
||||
operator Type() const { return m_addr; }
|
||||
bool operator==(const BeesAddress &that) const;
|
||||
bool operator==(const MagicValue that) const { return *this == BeesAddress(that); }
|
||||
bool operator!=(const BeesAddress &that) const { return !(*this == that); }
|
||||
bool operator!=(const MagicValue that) const { return *this != BeesAddress(that); }
|
||||
bool operator<(const BeesAddress &that) const;
|
||||
|
||||
static const Type c_offset_min = 1;
|
||||
static const Type c_offset_max = BLOCK_SIZE_MAX_COMPRESSED_EXTENT / BLOCK_SIZE_CLONE;
|
||||
|
||||
// if this isn't 0x3f we will have problems
|
||||
static const Type c_offset_mask = (c_offset_max - 1) | (c_offset_max);
|
||||
|
||||
static const Type c_compressed_mask = 1 << 11;
|
||||
static const Type c_eof_mask = 1 << 10;
|
||||
static const Type c_toxic_mask = 1 << 9;
|
||||
|
||||
static const Type c_all_mask = c_compressed_mask | c_eof_mask | c_offset_mask | c_toxic_mask;
|
||||
|
||||
bool is_compressed() const { return m_addr >= MagicValue::LAST && (m_addr & c_compressed_mask); }
|
||||
bool has_compressed_offset() const { return m_addr >= MagicValue::LAST && (m_addr & c_compressed_mask) && (m_addr & c_offset_mask); }
|
||||
bool is_toxic() const { return m_addr >= MagicValue::LAST && (m_addr & c_toxic_mask); }
|
||||
bool is_unaligned_eof() const { return m_addr >= MagicValue::LAST && (m_addr & c_eof_mask); }
|
||||
bool is_magic() const { return m_addr < MagicValue::LAST; }
|
||||
|
||||
Type get_compressed_offset() const;
|
||||
Type get_physical_or_zero() const;
|
||||
|
||||
void set_toxic();
|
||||
|
||||
BeesAddress(int fd, off_t offset);
|
||||
BeesAddress(int fd, off_t offset, shared_ptr<BeesContext> ctx);
|
||||
BeesAddress(const Extent &e, off_t offset);
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream &os, const BeesAddress &ba);
|
||||
|
||||
class BeesStringFile {
|
||||
Fd m_dir_fd;
|
||||
string m_name;
|
||||
size_t m_limit;
|
||||
|
||||
public:
|
||||
BeesStringFile(Fd dir_fd, string name, size_t limit = 1024 * 1024);
|
||||
string read();
|
||||
void write(string contents);
|
||||
};
|
||||
|
||||
class BeesHashTable {
|
||||
shared_ptr<BeesContext> m_ctx;
|
||||
public:
|
||||
using HashType = uint64_t;
|
||||
using AddrType = uint64_t;
|
||||
|
||||
struct Cell {
|
||||
HashType e_hash;
|
||||
AddrType e_addr;
|
||||
Cell(const Cell &) = default;
|
||||
Cell(HashType hash, AddrType addr) : e_hash(hash), e_addr(addr) { }
|
||||
bool operator==(const Cell &e) const { return tie(e_hash, e_addr) == tie(e.e_hash, e.e_addr); }
|
||||
bool operator!=(const Cell &e) const { return tie(e_hash, e_addr) != tie(e.e_hash, e.e_addr); }
|
||||
bool operator<(const Cell &e) const { return tie(e_hash, e_addr) < tie(e.e_hash, e.e_addr); }
|
||||
} __attribute__((packed));
|
||||
|
||||
private:
|
||||
static const uint64_t c_cells_per_bucket = BLOCK_SIZE_HASHTAB_BUCKET / sizeof(Cell);
|
||||
static const uint64_t c_buckets_per_extent = BLOCK_SIZE_HASHTAB_EXTENT / BLOCK_SIZE_HASHTAB_BUCKET;
|
||||
|
||||
public:
|
||||
union Bucket {
|
||||
Cell p_cells[c_cells_per_bucket];
|
||||
uint8_t p_byte[BLOCK_SIZE_HASHTAB_BUCKET];
|
||||
} __attribute__((packed));
|
||||
|
||||
union Extent {
|
||||
Bucket p_buckets[BLOCK_SIZE_HASHTAB_EXTENT / BLOCK_SIZE_HASHTAB_BUCKET];
|
||||
uint8_t p_byte[BLOCK_SIZE_HASHTAB_EXTENT];
|
||||
} __attribute__((packed));
|
||||
|
||||
BeesHashTable(shared_ptr<BeesContext> ctx, string filename);
|
||||
~BeesHashTable();
|
||||
|
||||
vector<Cell> find_cell(HashType hash);
|
||||
bool push_random_hash_addr(HashType hash, AddrType addr);
|
||||
void erase_hash_addr(HashType hash, AddrType addr);
|
||||
bool push_front_hash_addr(HashType hash, AddrType addr);
|
||||
|
||||
void set_shared(bool shared);
|
||||
|
||||
private:
|
||||
string m_filename;
|
||||
Fd m_fd;
|
||||
uint64_t m_size;
|
||||
union {
|
||||
void *m_void_ptr; // Save some casting
|
||||
uint8_t *m_byte_ptr; // for pointer arithmetic
|
||||
Cell *m_cell_ptr; // pointer to one table cell (entry)
|
||||
Bucket *m_bucket_ptr; // all cells in one LRU unit
|
||||
Extent *m_extent_ptr; // all buckets in one I/O unit
|
||||
};
|
||||
union {
|
||||
void *m_void_ptr_end;
|
||||
uint8_t *m_byte_ptr_end;
|
||||
Cell *m_cell_ptr_end;
|
||||
Bucket *m_bucket_ptr_end;
|
||||
Extent *m_extent_ptr_end;
|
||||
};
|
||||
uint64_t m_buckets;
|
||||
uint64_t m_extents;
|
||||
uint64_t m_cells;
|
||||
set<uint64_t> m_buckets_dirty;
|
||||
set<uint64_t> m_buckets_missing;
|
||||
BeesThread m_writeback_thread;
|
||||
BeesThread m_prefetch_thread;
|
||||
RateLimiter m_flush_rate_limit;
|
||||
RateLimiter m_prefetch_rate_limit;
|
||||
mutex m_extent_mutex;
|
||||
mutex m_bucket_mutex;
|
||||
condition_variable m_condvar;
|
||||
set<HashType> m_toxic_hashes;
|
||||
BeesStringFile m_stats_file;
|
||||
|
||||
LockSet<uint64_t> m_extent_lock_set;
|
||||
|
||||
DefaultBool m_shared;
|
||||
|
||||
void writeback_loop();
|
||||
void prefetch_loop();
|
||||
void try_mmap_flags(int flags);
|
||||
pair<Cell *, Cell *> get_cell_range(HashType hash);
|
||||
pair<uint8_t *, uint8_t *> get_extent_range(HashType hash);
|
||||
void fetch_missing_extent(HashType hash);
|
||||
void set_extent_dirty(HashType hash);
|
||||
void flush_dirty_extents();
|
||||
bool is_toxic_hash(HashType h) const;
|
||||
|
||||
bool using_shared_map() const { return false; }
|
||||
|
||||
BeesHashTable(const BeesHashTable &) = delete;
|
||||
BeesHashTable &operator=(const BeesHashTable &) = delete;
|
||||
};
|
||||
|
||||
ostream &operator<<(ostream &os, const BeesHashTable::Cell &bhte);
|
||||
|
||||
struct BeesCrawlState {
|
||||
uint64_t m_root;
|
||||
uint64_t m_objectid;
|
||||
uint64_t m_offset;
|
||||
uint64_t m_min_transid;
|
||||
uint64_t m_max_transid;
|
||||
time_t m_started;
|
||||
BeesCrawlState();
|
||||
bool operator<(const BeesCrawlState &that) const;
|
||||
};
|
||||
|
||||
class BeesCrawl {
|
||||
shared_ptr<BeesContext> m_ctx;
|
||||
|
||||
mutex m_mutex;
|
||||
set<BeesFileRange> m_extents;
|
||||
DefaultBool m_deferred;
|
||||
|
||||
mutex m_state_mutex;
|
||||
BeesCrawlState m_state;
|
||||
|
||||
bool fetch_extents();
|
||||
void fetch_extents_harder();
|
||||
bool next_transid();
|
||||
|
||||
public:
|
||||
BeesCrawl(shared_ptr<BeesContext> ctx, BeesCrawlState initial_state);
|
||||
BeesFileRange peek_front();
|
||||
BeesFileRange pop_front();
|
||||
BeesCrawlState get_state();
|
||||
void set_state(const BeesCrawlState &bcs);
|
||||
};
|
||||
|
||||
class BeesRoots {
|
||||
shared_ptr<BeesContext> m_ctx;
|
||||
|
||||
BeesStringFile m_crawl_state_file;
|
||||
BeesCrawlState m_crawl_current;
|
||||
map<uint64_t, shared_ptr<BeesCrawl>> m_root_crawl_map;
|
||||
mutex m_mutex;
|
||||
condition_variable m_condvar;
|
||||
DefaultBool m_crawl_dirty;
|
||||
Timer m_crawl_timer;
|
||||
BeesThread m_crawl_thread;
|
||||
BeesThread m_writeback_thread;
|
||||
|
||||
void insert_new_crawl();
|
||||
void insert_root(const BeesCrawlState &bcs);
|
||||
Fd open_root_nocache(uint64_t root);
|
||||
Fd open_root_ino_nocache(uint64_t root, uint64_t ino);
|
||||
uint64_t transid_min();
|
||||
uint64_t transid_max();
|
||||
void state_load();
|
||||
void state_save();
|
||||
void crawl_roots();
|
||||
string crawl_state_filename() const;
|
||||
BeesCrawlState crawl_state_get(uint64_t root);
|
||||
void crawl_state_set_dirty();
|
||||
void crawl_state_erase(const BeesCrawlState &bcs);
|
||||
void crawl_thread();
|
||||
void writeback_thread();
|
||||
uint64_t next_root(uint64_t root = 0);
|
||||
void current_state_set(const BeesCrawlState &bcs);
|
||||
|
||||
friend class BeesFdCache;
|
||||
friend class BeesCrawl;
|
||||
|
||||
public:
|
||||
BeesRoots(shared_ptr<BeesContext> ctx);
|
||||
Fd open_root(uint64_t root);
|
||||
Fd open_root_ino(uint64_t root, uint64_t ino);
|
||||
Fd open_root_ino(const BeesFileId &bfi) { return open_root_ino(bfi.root(), bfi.ino()); }
|
||||
};
|
||||
|
||||
struct BeesHash {
|
||||
using Type = uint64_t;
|
||||
|
||||
BeesHash() : m_hash(0) { }
|
||||
BeesHash(Type that) : m_hash(that) { }
|
||||
operator Type() const { return m_hash; }
|
||||
BeesHash& operator=(const Type that) { m_hash = that; return *this; }
|
||||
private:
|
||||
Type m_hash;
|
||||
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream &os, const BeesHash &bh);
|
||||
|
||||
class BeesBlockData {
|
||||
using Blob = vector<char>;
|
||||
|
||||
mutable Fd m_fd;
|
||||
off_t m_offset;
|
||||
off_t m_length;
|
||||
mutable BeesAddress m_addr;
|
||||
mutable Blob m_data;
|
||||
mutable BeesHash m_hash;
|
||||
mutable DefaultBool m_hash_done;
|
||||
|
||||
public:
|
||||
// Constructor with the immutable fields
|
||||
BeesBlockData(Fd fd, off_t offset, size_t read_length = BLOCK_SIZE_SUMS);
|
||||
BeesBlockData();
|
||||
|
||||
// Non-lazy accessors
|
||||
Fd fd() const { return m_fd; }
|
||||
|
||||
// Renaming
|
||||
off_t begin() const { return m_offset; }
|
||||
off_t end() const { return m_offset + m_length; }
|
||||
off_t size() const { return m_length; }
|
||||
bool empty() const { return !m_length; }
|
||||
|
||||
// Lazy accessors may modify const things
|
||||
const Blob &data() const;
|
||||
BeesHash hash() const;
|
||||
BeesAddress addr() const;
|
||||
bool is_data_zero() const;
|
||||
bool is_data_equal(const BeesBlockData &that) const;
|
||||
|
||||
// Setters
|
||||
BeesBlockData &addr(const BeesAddress &a);
|
||||
|
||||
friend ostream &operator<<(ostream &, const BeesBlockData &);
|
||||
};
|
||||
|
||||
class BeesRangePair : public pair<BeesFileRange, BeesFileRange> {
|
||||
public:
|
||||
BeesRangePair(const BeesFileRange &src, const BeesFileRange &dst);
|
||||
bool grow(shared_ptr<BeesContext> ctx, bool constrained);
|
||||
BeesRangePair copy_closed() const;
|
||||
bool operator<(const BeesRangePair &that) const;
|
||||
friend ostream & operator<<(ostream &os, const BeesRangePair &brp);
|
||||
};
|
||||
|
||||
class BeesWorkQueueBase {
|
||||
string m_name;
|
||||
|
||||
protected:
|
||||
static mutex s_mutex;
|
||||
static set<BeesWorkQueueBase *> s_all_workers;
|
||||
|
||||
public:
|
||||
virtual ~BeesWorkQueueBase();
|
||||
BeesWorkQueueBase(const string &name);
|
||||
|
||||
string name() const;
|
||||
void name(const string &new_name);
|
||||
|
||||
virtual size_t active_size() const = 0;
|
||||
virtual list<string> peek_active(size_t count) const = 0;
|
||||
|
||||
static void for_each_work_queue(function<void(BeesWorkQueueBase *)> f);
|
||||
};
|
||||
|
||||
template <class Task>
|
||||
class BeesWorkQueue : public BeesWorkQueueBase {
|
||||
WorkQueue<Task> m_active_queue;
|
||||
|
||||
public:
|
||||
BeesWorkQueue(const string &name);
|
||||
~BeesWorkQueue();
|
||||
void push_active(const Task &task, size_t limit);
|
||||
void push_active(const Task &task);
|
||||
|
||||
size_t active_size() const override;
|
||||
list<string> peek_active(size_t count) const override;
|
||||
|
||||
Task pop();
|
||||
};
|
||||
|
||||
class BeesTempFile {
|
||||
shared_ptr<BeesContext> m_ctx;
|
||||
Fd m_fd;
|
||||
off_t m_end_offset;
|
||||
|
||||
void create();
|
||||
void realign();
|
||||
void resize(off_t new_end_offset);
|
||||
|
||||
public:
|
||||
BeesTempFile(shared_ptr<BeesContext> ctx);
|
||||
BeesFileRange make_hole(off_t count);
|
||||
BeesFileRange make_copy(const BeesFileRange &src);
|
||||
};
|
||||
|
||||
class BeesFdCache {
|
||||
LRUCache<Fd, shared_ptr<BeesContext>, uint64_t> m_root_cache;
|
||||
LRUCache<Fd, shared_ptr<BeesContext>, uint64_t, uint64_t> m_file_cache;
|
||||
Timer m_root_cache_timer;
|
||||
|
||||
public:
|
||||
BeesFdCache();
|
||||
Fd open_root(shared_ptr<BeesContext> ctx, uint64_t root);
|
||||
Fd open_root_ino(shared_ptr<BeesContext> ctx, uint64_t root, uint64_t ino);
|
||||
void insert_root_ino(shared_ptr<BeesContext> ctx, Fd fd);
|
||||
};
|
||||
|
||||
struct BeesResolveAddrResult {
|
||||
BeesResolveAddrResult();
|
||||
vector<BtrfsInodeOffsetRoot> m_biors;
|
||||
DefaultBool m_is_toxic;
|
||||
bool is_toxic() const { return m_is_toxic; }
|
||||
};
|
||||
|
||||
class BeesContext : public enable_shared_from_this<BeesContext> {
|
||||
shared_ptr<BeesContext> m_parent_ctx;
|
||||
|
||||
Fd m_home_fd;
|
||||
|
||||
shared_ptr<BeesFdCache> m_fd_cache;
|
||||
shared_ptr<BeesHashTable> m_hash_table;
|
||||
shared_ptr<BeesRoots> m_roots;
|
||||
|
||||
map<thread::id, shared_ptr<BeesTempFile>> m_tmpfiles;
|
||||
|
||||
LRUCache<BeesResolveAddrResult, BeesAddress> m_resolve_cache;
|
||||
|
||||
string m_root_path;
|
||||
Fd m_root_fd;
|
||||
string m_root_uuid;
|
||||
|
||||
mutable mutex m_blacklist_mutex;
|
||||
set<BeesFileId> m_blacklist;
|
||||
|
||||
string m_uuid;
|
||||
|
||||
Timer m_total_timer;
|
||||
|
||||
void set_root_fd(Fd fd);
|
||||
|
||||
BeesResolveAddrResult resolve_addr_uncached(BeesAddress addr);
|
||||
|
||||
BeesFileRange scan_one_extent(const BeesFileRange &bfr, const Extent &e);
|
||||
void rewrite_file_range(const BeesFileRange &bfr);
|
||||
|
||||
public:
|
||||
BeesContext(shared_ptr<BeesContext> parent_ctx = nullptr);
|
||||
|
||||
void set_root_path(string path);
|
||||
|
||||
Fd root_fd() const { return m_root_fd; }
|
||||
Fd home_fd() const { return m_home_fd; }
|
||||
string root_path() const { return m_root_path; }
|
||||
string root_uuid() const { return m_root_uuid; }
|
||||
|
||||
BeesFileRange scan_forward(const BeesFileRange &bfr);
|
||||
|
||||
BeesRangePair dup_extent(const BeesFileRange &src);
|
||||
bool dedup(const BeesRangePair &brp);
|
||||
|
||||
void blacklist_add(const BeesFileId &fid);
|
||||
bool is_blacklisted(const BeesFileId &fid) const;
|
||||
|
||||
BeesResolveAddrResult resolve_addr(BeesAddress addr);
|
||||
void invalidate_addr(BeesAddress addr);
|
||||
|
||||
void dump_status();
|
||||
void show_progress();
|
||||
|
||||
shared_ptr<BeesFdCache> fd_cache();
|
||||
shared_ptr<BeesHashTable> hash_table();
|
||||
shared_ptr<BeesRoots> roots();
|
||||
shared_ptr<BeesTempFile> tmpfile();
|
||||
|
||||
const Timer &total_timer() const { return m_total_timer; }
|
||||
|
||||
// TODO: move the rest of the FD cache methods here
|
||||
void insert_root_ino(Fd fd);
|
||||
};
|
||||
|
||||
class BeesResolver {
|
||||
shared_ptr<BeesContext> m_ctx;
|
||||
BeesAddress m_addr;
|
||||
vector<BtrfsInodeOffsetRoot> m_biors;
|
||||
set<BeesFileRange> m_ranges;
|
||||
unsigned m_bior_count;
|
||||
|
||||
// We found matching data, so we can dedup
|
||||
DefaultBool m_found_data;
|
||||
|
||||
// We found matching data, so we *did* dedup
|
||||
DefaultBool m_found_dup;
|
||||
|
||||
// We found matching hash, so the hash table is still correct
|
||||
DefaultBool m_found_hash;
|
||||
|
||||
// We found matching physical address, so the hash table isn't totally wrong
|
||||
DefaultBool m_found_addr;
|
||||
|
||||
// We found matching physical address, but data did not match
|
||||
DefaultBool m_wrong_data;
|
||||
|
||||
// The whole thing is a placebo to avoid crippling btrfs performance bugs
|
||||
DefaultBool m_is_toxic;
|
||||
|
||||
BeesFileRange chase_extent_ref(const BtrfsInodeOffsetRoot &bior, BeesBlockData &needle_bbd);
|
||||
BeesBlockData adjust_offset(const BeesFileRange &haystack, const BeesBlockData &needle);
|
||||
void find_matches(bool just_one, BeesBlockData &bbd);
|
||||
|
||||
// FIXME: Do we need these? We probably always have at least one BBD
|
||||
BeesFileRange chase_extent_ref(const BtrfsInodeOffsetRoot &bior, BeesHash hash);
|
||||
BeesBlockData adjust_offset(const BeesFileRange &haystack, bool inexact, BeesHash needle);
|
||||
void find_matches(bool just_one, BeesHash hash);
|
||||
|
||||
public:
|
||||
BeesResolver(shared_ptr<BeesContext> ctx, BeesAddress addr);
|
||||
BeesAddress addr(BeesAddress new_addr);
|
||||
|
||||
// visitor returns true to stop loop, false to continue
|
||||
bool for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFileRange &bfr)> visitor);
|
||||
|
||||
set<BeesFileRange> find_all_matches(BeesBlockData &bbd);
|
||||
set<BeesFileRange> find_all_matches(BeesHash hash);
|
||||
|
||||
// TODO: Replace these with "for_each_extent_ref"
|
||||
BeesFileRange find_one_match(BeesBlockData &bbd);
|
||||
BeesFileRange find_one_match(BeesHash hash);
|
||||
|
||||
void replace_src(const BeesFileRange &src_bfr);
|
||||
BeesFileRange replace_dst(const BeesFileRange &dst_bfr);
|
||||
|
||||
bool found_addr() const { return m_found_addr; }
|
||||
bool found_data() const { return m_found_data; }
|
||||
bool found_dup() const { return m_found_dup; }
|
||||
bool found_hash() const { return m_found_hash; }
|
||||
bool is_toxic() const { return m_is_toxic; }
|
||||
size_t count() const { return m_bior_count; }
|
||||
BeesAddress addr() const { return m_addr; }
|
||||
|
||||
bool operator<(const BeesResolver &that) const;
|
||||
};
|
||||
|
||||
class BeesTooLong : public Timer {
|
||||
using func_type = function<void(ostream &)>;
|
||||
double m_limit;
|
||||
func_type m_func;
|
||||
|
||||
public:
|
||||
BeesTooLong(const func_type &func = [](ostream &os) { os << __PRETTY_FUNCTION__; }, double limit = BEES_TOO_LONG);
|
||||
BeesTooLong(const string &s, double limit = BEES_TOO_LONG);
|
||||
BeesTooLong &operator=(const func_type &s);
|
||||
~BeesTooLong();
|
||||
void check() const;
|
||||
|
||||
};
|
||||
|
||||
// And now, a giant pile of extern declarations
|
||||
string pretty(double d);
|
||||
extern RateLimiter bees_info_rate_limit;
|
||||
void bees_sync(int fd);
|
||||
string format_time(time_t t);
|
||||
|
||||
#endif
|
52
src/fiemap.cc
Normal file
52
src/fiemap.cc
Normal file
@@ -0,0 +1,52 @@
|
||||
#include "crucible/fd.h"
|
||||
#include "crucible/fs.h"
|
||||
#include "crucible/error.h"
|
||||
#include "crucible/string.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
catch_all([&]() {
|
||||
THROW_CHECK1(invalid_argument, argc, argc > 1);
|
||||
string filename = argv[1];
|
||||
|
||||
|
||||
cout << "File: " << filename << endl;
|
||||
Fd fd = open_or_die(filename, O_RDONLY);
|
||||
Fiemap fm;
|
||||
fm.m_max_count = 100;
|
||||
if (argc > 2) { fm.fm_start = stoull(argv[2], nullptr, 0); }
|
||||
if (argc > 3) { fm.fm_length = stoull(argv[3], nullptr, 0); }
|
||||
if (argc > 4) { fm.fm_flags = stoull(argv[4], nullptr, 0); }
|
||||
fm.fm_length = min(fm.fm_length, FIEMAP_MAX_OFFSET - fm.fm_start);
|
||||
uint64_t stop_at = fm.fm_start + fm.fm_length;
|
||||
uint64_t last_byte = fm.fm_start;
|
||||
do {
|
||||
fm.do_ioctl(fd);
|
||||
// cerr << fm;
|
||||
uint64_t last_logical = FIEMAP_MAX_OFFSET;
|
||||
for (auto &extent : fm.m_extents) {
|
||||
if (extent.fe_logical > last_byte) {
|
||||
cout << "Log " << to_hex(last_byte) << ".." << to_hex(extent.fe_logical) << " Hole" << endl;
|
||||
}
|
||||
cout << "Log " << to_hex(extent.fe_logical) << ".." << to_hex(extent.fe_logical + extent.fe_length)
|
||||
<< " Phy " << to_hex(extent.fe_physical) << ".." << to_hex(extent.fe_physical + extent.fe_length)
|
||||
<< " Flags " << fiemap_extent_flags_ntoa(extent.fe_flags) << endl;
|
||||
last_logical = extent.fe_logical + extent.fe_length;
|
||||
last_byte = last_logical;
|
||||
}
|
||||
fm.fm_start = last_logical;
|
||||
} while (fm.fm_start < stop_at);
|
||||
});
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
40
src/fiewalk.cc
Normal file
40
src/fiewalk.cc
Normal file
@@ -0,0 +1,40 @@
|
||||
#include "crucible/extentwalker.h"
|
||||
#include "crucible/error.h"
|
||||
#include "crucible/string.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace crucible;
|
||||
using namespace std;
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
catch_all([&]() {
|
||||
THROW_CHECK1(invalid_argument, argc, argc > 1);
|
||||
string filename = argv[1];
|
||||
|
||||
cout << "File: " << filename << endl;
|
||||
Fd fd = open_or_die(filename, O_RDONLY);
|
||||
BtrfsExtentWalker ew(fd);
|
||||
off_t pos = 0;
|
||||
if (argc > 2) { pos = stoull(argv[2], nullptr, 0); }
|
||||
ew.seek(pos);
|
||||
do {
|
||||
// cout << "\n\n>>>" << ew.current() << "<<<\n\n" << endl;
|
||||
cout << ew.current() << endl;
|
||||
} while (ew.next());
|
||||
#if 0
|
||||
cout << "\n\n\nAnd now, backwards...\n\n\n" << endl;
|
||||
do {
|
||||
cout << "\n\n>>>" << ew.current() << "<<<\n\n" << endl;
|
||||
} while (ew.prev());
|
||||
cout << "\n\n\nDone!\n\n\n" << endl;
|
||||
#endif
|
||||
});
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
Reference in New Issue
Block a user