1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-18 05:45:45 +02:00
bees/src/bees-types.cc
Zygo Blaxell 96eb100ded bees: use readahead instead of posix_fadvise
Other btrfs utils use readahead() not posix_fadvise().

There does not appear to be a performance or correctness difference
between the three (none, posix_fadvise, or readahead()).

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2018-09-14 23:50:00 -04:00

1013 lines
27 KiB
C++

#include "bees.h"
#include "crucible/crc64.h"
#include "crucible/limits.h"
#include "crucible/ntoa.h"
#include "crucible/string.h"
#include <fstream>
#include <inttypes.h>
using namespace crucible;
using namespace std;
ostream &
operator<<(ostream &os, const BeesFileId &bfi)
{
return os << bfi.root() << ":" << bfi.ino();
}
bool
BeesFileId::operator<(const BeesFileId &that) const
{
// Order by inode first so we get good locality when scanning across snapshots
return tie(m_ino, m_root) < tie(that.m_ino, that.m_root);
}
bool
BeesFileId::operator==(const BeesFileId &that) const
{
return m_root == that.m_root && m_ino == that.m_ino;
}
bool
BeesFileId::operator!=(const BeesFileId &that) const
{
return m_root != that.m_root || m_ino != that.m_ino;
}
BeesFileId::operator bool() const
{
return m_root && m_ino;
}
BeesFileId::BeesFileId(const BtrfsInodeOffsetRoot &bior) :
m_root(bior.m_root),
m_ino(bior.m_inum)
{
}
BeesFileId::BeesFileId(uint64_t root, uint64_t ino) :
m_root(root),
m_ino(ino)
{
}
BeesFileId::BeesFileId(int fd) :
m_root(btrfs_get_root_id(fd)),
m_ino(Stat(fd).st_ino)
{
}
BeesFileId::BeesFileId() :
m_root(0),
m_ino(0)
{
}
ostream &
operator<<(ostream &os, const BeesFileRange &bfr)
{
if (bfr.end() == numeric_limits<off_t>::max()) {
os << "- [" << to_hex(bfr.begin()) << "..eof]";
} else {
os << pretty(bfr.size()) << " ";
if (bfr.begin() != 0) {
os << "[" << to_hex(bfr.begin());
} else {
os << "(";
}
os << ".." << to_hex(bfr.end());
if (!!bfr.m_fd && bfr.end() >= bfr.file_size()) {
os << ")";
} else {
os << "]";
}
}
if (bfr.m_fid) {
os << " fid = " << bfr.m_fid;
}
if (!!bfr.m_fd) {
os << " fd = " << bfr.m_fd << " '" << name_fd(bfr.m_fd) << "'";
}
return os;
}
ostream &
operator<<(ostream &os, const BeesRangePair &brp)
{
return os << "BeesRangePair: " << pretty(brp.first.size())
<< " src[" << to_hex(brp.first.begin()) << ".." << to_hex(brp.first.end()) << "]"
<< " dst[" << to_hex(brp.second.begin()) << ".." << to_hex(brp.second.end()) << "]"
<< "\nsrc = " << brp.first.fd() << " " << name_fd(brp.first.fd())
<< "\ndst = " << brp.second.fd() << " " << name_fd(brp.second.fd());
}
bool
BeesFileRange::operator<(const BeesFileRange &that) const
{
// Read file blocks in order
return make_tuple(fid(), m_begin, m_end) < make_tuple(that.fid(), that.m_begin, that.m_end);
// Faster to read big chunks first? Probably confuses the hell
// out of crawl state, so let's only keep this if there's a clear
// performance win.
// return make_tuple(that.size(), fid(), m_begin, m_end) < make_tuple(size(), that.fid(), that.m_begin, that.m_end);
}
bool
BeesFileRange::operator==(const BeesFileRange &that) const
{
// These fields are cheap to compare and have the most variety
if (m_begin != that.m_begin || m_end != that.m_end) {
return false;
}
// If they both have the same fd they're equal,
// but different fds are not necessarily distinct
if (!!m_fd && !!that.m_fd && m_fd == that.m_fd) {
return true;
}
// OK now we have to go check their FileIds
return fid() == that.fid();
}
bool
BeesFileRange::operator!=(const BeesFileRange &that) const
{
return !((*this) == that);
}
bool
BeesFileRange::empty() const
{
THROW_CHECK2(invalid_argument, m_begin, m_end, m_begin <= m_end);
return m_begin >= m_end;
}
off_t
BeesFileRange::size() const
{
THROW_CHECK2(invalid_argument, m_begin, m_end, m_begin <= m_end);
return m_end - m_begin;
}
off_t
BeesFileRange::file_size() const
{
if (m_file_size <= 0) {
Stat st(fd());
m_file_size = st.st_size;
// These checks could trigger on valid input, but that would mean we have
// lost a race (e.g. a file was truncated while we were building a
// matching range pair with it). In such cases we should probably stop
// whatever we were doing and backtrack to some higher level anyway.
// Well, OK, but we call this function from exception handlers...
THROW_CHECK1(invalid_argument, m_file_size, m_file_size >= 0);
// THROW_CHECK2(invalid_argument, m_file_size, m_end, m_end <= m_file_size || m_end == numeric_limits<off_t>::max());
}
return m_file_size;
}
off_t
BeesFileRange::grow_end(off_t delta)
{
THROW_CHECK1(invalid_argument, delta, delta > 0);
m_end = min(m_end + delta, file_size());
THROW_CHECK2(runtime_error, m_file_size, m_end, m_end <= m_file_size);
return m_end;
}
off_t
BeesFileRange::grow_begin(off_t delta)
{
THROW_CHECK1(invalid_argument, delta, delta > 0);
m_begin -= min(delta, m_begin);
return m_begin;
}
BeesFileRange::BeesFileRange(const BeesBlockData &bbd) :
m_fd(bbd.fd()),
m_begin(bbd.begin()),
m_end(bbd.end())
{
}
BeesFileRange::BeesFileRange(Fd fd, off_t begin, off_t end) :
m_fd(fd),
m_begin(begin),
m_end(end)
{
}
BeesFileRange::BeesFileRange(const BeesFileId &fid, off_t begin, off_t end) :
m_fid(fid),
m_begin(begin),
m_end(end)
{
}
bool
BeesFileRange::is_same_file(const BeesFileRange &that) const
{
// If we have two FDs, start by comparing those
if (!!m_fd && !!that.m_fd && m_fd == that.m_fd) {
return true;
}
// OK have to go fetch the fid from both files and compare them
return fid() == that.fid();
}
bool
BeesFileRange::overlaps(const BeesFileRange &that) const
{
// Determine whether the byte ranges overlap before doing syscalls on file descriptors
pair<uint64_t, uint64_t> a(m_begin, m_end);
pair<uint64_t, uint64_t> b(that.m_begin, that.m_end);
// range a starts lower than or equal b
if (b.first < a.first) {
swap(a, b);
}
// if b starts within a, they overlap
// (and the intersecting region is b.first..min(a.second, b.second))
// (and the union region is a.first..max(a.second, b.second))
if (b.first >= a.first && b.first < a.second) {
return is_same_file(that);
}
return false;
}
bool
BeesFileRange::coalesce(const BeesFileRange &that)
{
// Let's define coalesce-with-null as identity,
// and coalesce-null-with-null as coalesced
if (!*this) {
operator=(that);
return true;
}
if (!that) {
return true;
}
// Can't coalesce different files
if (!is_same_file(that)) return false;
pair<uint64_t, uint64_t> a(m_begin, m_end);
pair<uint64_t, uint64_t> b(that.m_begin, that.m_end);
// range a starts lower than or equal b
if (b.first < a.first) {
swap(a, b);
}
// if b starts within a, they overlap
// (and the intersecting region is b.first..min(a.second, b.second))
// (and the union region is a.first..max(a.second, b.second))
if (b.first >= a.first && b.first < a.second) {
m_begin = a.first;
m_end = max(a.second, b.second);
return true;
}
return false;
}
BeesFileRange::operator BeesBlockData() const
{
BEESTRACE("operator BeesBlockData " << *this);
return BeesBlockData(m_fd, m_begin, m_end - m_begin);
}
Fd
BeesFileRange::fd() const
{
return m_fd;
}
Fd
BeesFileRange::fd(const shared_ptr<BeesContext> &ctx) const
{
// If we don't have a fid we can't do much here
if (m_fid) {
if (!m_fd) {
// If we don't have a fd, open by fid
if (m_fid && ctx) {
Fd new_fd = ctx->roots()->open_root_ino(m_fid);
m_fd = new_fd;
}
} else {
// If we have both fid and fd, make sure they match
BeesFileId fd_fid(m_fd);
THROW_CHECK2(invalid_argument, fd_fid, m_fid, fd_fid == m_fid);
}
}
// We either had a fid and opened it, or we didn't and we're just stuck with our fd
return m_fd;
}
BeesFileRange
BeesFileRange::copy_closed() const
{
return BeesFileRange(fid(), m_begin, m_end);
}
BeesFileId
BeesFileRange::fid() const
{
if (!m_fid) {
if (!!m_fd) {
m_fid = BeesFileId(m_fd);
}
}
return m_fid;
}
BeesRangePair::BeesRangePair(const BeesFileRange &src, const BeesFileRange &dst) :
pair<BeesFileRange, BeesFileRange>(src, dst)
{
BEESTRACE("checking constraints on " << *this);
// Must not initially overlap
THROW_CHECK2(invalid_argument, first, second, !first.overlaps(second));
// Must initially be equal
THROW_CHECK2(invalid_argument, first, second, first.size() == second.size());
// Can't check content unless open
if (!first.fd() || !second.fd()) {
return;
}
// Must check every block individually
off_t first_begin = first.begin();
off_t second_begin = second.begin();
off_t size = first.size();
while (size) {
off_t len = min(BLOCK_SIZE_SUMS, size);
BeesBlockData first_bbd(first.fd(), first_begin, len);
BeesBlockData second_bbd(second.fd(), second_begin, len);
THROW_CHECK2(invalid_argument, first_bbd, second_bbd, first_bbd.is_data_equal(second_bbd));
first_begin += len;
second_begin += len;
size -= len;
}
}
bool
BeesRangePair::operator<(const BeesRangePair &that) const
{
// Order by destination then source
return tie(second, first) < tie(that.second, that.first);
}
bool
BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
{
BEESTOOLONG("grow constrained = " << constrained << " *this = " << *this);
BEESTRACE("grow constrained = " << constrained << " *this = " << *this);
bool rv = false;
Timer grow_backward_timer;
THROW_CHECK1(invalid_argument, first.begin(), (first.begin() & BLOCK_MASK_CLONE) == 0);
THROW_CHECK1(invalid_argument, second.begin(), (second.begin() & BLOCK_MASK_CLONE) == 0);
// We should not be overlapping already
THROW_CHECK2(invalid_argument, first, second, !first.overlaps(second));
BtrfsExtentWalker ew_second(second.fd());
// Stop on aligned extent boundary
ew_second.seek(second.begin());
Extent e_second = ew_second.current();
BEESTRACE("e_second " << e_second);
// Preread entire extent
readahead(second.fd(), e_second.begin(), e_second.size());
readahead(first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size());
auto hash_table = ctx->hash_table();
// Look backward
BEESTRACE("grow_backward " << *this);
while (first.size() < BLOCK_SIZE_MAX_EXTENT) {
if (second.begin() <= e_second.begin()) {
#if 0
if (constrained) {
break;
}
BEESCOUNT(pairbackward_extent);
ew_second.seek(second.begin() - min(BLOCK_SIZE_CLONE, second.begin()));
e_second = ew_second.current();
if (e_second.flags() & Extent::HOLE) {
BEESCOUNT(pairbackward_hole);
break;
}
readahead(second.fd(), e_second.begin(), e_second.size());
#else
// This tends to repeatedly process extents that were recently processed.
// We tend to catch duplicate blocks early since we scan them forwards.
// Also, reading backwards is slow so we probably don't want to do it much.
break;
#endif
}
BEESCOUNT(pairbackward_try);
// Extend first range. If we hit BOF we can go no further.
BeesFileRange new_first = first;
BEESTRACE("new_first = " << new_first);
new_first.grow_begin(BLOCK_SIZE_CLONE);
if (new_first.begin() == first.begin()) {
BEESCOUNT(pairbackward_bof_first);
break;
}
// Source extent cannot be toxic
BeesAddress first_addr(first.fd(), new_first.begin());
if (!first_addr.is_magic()) {
auto first_resolved = ctx->resolve_addr(first_addr);
if (first_resolved.is_toxic()) {
BEESLOGWARN("WORKAROUND: not growing matching pair backward because src addr is toxic:\n" << *this);
BEESCOUNT(pairbackward_toxic_addr);
break;
}
}
// Extend second range. If we hit BOF we can go no further.
BeesFileRange new_second = second;
BEESTRACE("new_second = " << new_second);
new_second.grow_begin(BLOCK_SIZE_CLONE);
if (new_second.begin() == second.begin()) {
BEESCOUNT(pairbackward_bof_second);
break;
}
// If the ranges now overlap we went too far
if (new_first.overlaps(new_second)) {
BEESCOUNT(pairbackward_overlap);
break;
}
BEESTRACE("first " << first << " new_first " << new_first);
BeesBlockData first_bbd(first.fd(), new_first.begin(), first.begin() - new_first.begin());
BEESTRACE("first_bbd " << first_bbd);
BEESTRACE("second " << second << " new_second " << new_second);
BeesBlockData second_bbd(second.fd(), new_second.begin(), second.begin() - new_second.begin());
BEESTRACE("second_bbd " << second_bbd);
// Both blocks must have identical content
if (!first_bbd.is_data_equal(second_bbd)) {
BEESCOUNT(pairbackward_miss);
break;
}
// Physical blocks must be distinct
if (first_bbd.addr().get_physical_or_zero() == second_bbd.addr().get_physical_or_zero()) {
BEESCOUNT(pairbackward_same);
break;
}
// Source block cannot be zero in a non-compressed non-magic extent
if (first_bbd.is_data_zero() && !first_addr.is_magic() && !first_addr.is_compressed()) {
BEESCOUNT(pairbackward_zero);
break;
}
// Source block cannot have a toxic hash
auto found_hashes = hash_table->find_cell(first_bbd.hash());
bool found_toxic = false;
for (auto i : found_hashes) {
if (BeesAddress(i.e_addr).is_toxic()) {
found_toxic = true;
break;
}
}
if (found_toxic) {
BEESLOGWARN("WORKAROUND: found toxic hash in " << first_bbd << " while extending backward:\n" << *this);
BEESCOUNT(pairbackward_toxic_hash);
break;
}
THROW_CHECK2(invalid_argument, new_first.size(), new_second.size(), new_first.size() == new_second.size());
first = new_first;
second = new_second;
rv = true;
BEESCOUNT(pairbackward_hit);
}
BEESCOUNT(pairbackward_stop);
BEESCOUNTADD(pairbackward_ms, grow_backward_timer.age() * 1000);
// Look forward
BEESTRACE("grow_forward " << *this);
Timer grow_forward_timer;
while (first.size() < BLOCK_SIZE_MAX_EXTENT) {
if (second.end() >= e_second.end()) {
if (constrained) {
break;
}
BEESCOUNT(pairforward_extent);
ew_second.seek(second.end());
e_second = ew_second.current();
if (e_second.flags() & Extent::HOLE) {
BEESCOUNT(pairforward_hole);
break;
}
readahead(second.fd(), e_second.begin(), e_second.size());
}
BEESCOUNT(pairforward_try);
// Extend first range. If we hit EOF we can go no further.
BeesFileRange new_first = first;
BEESTRACE("new_first = " << new_first);
new_first.grow_end(BLOCK_SIZE_CLONE);
if (new_first.end() == first.end()) {
BEESCOUNT(pairforward_eof_first);
break;
}
// Source extent cannot be toxic
BeesAddress first_addr(first.fd(), new_first.begin());
if (!first_addr.is_magic()) {
auto first_resolved = ctx->resolve_addr(first_addr);
if (first_resolved.is_toxic()) {
BEESLOGWARN("WORKAROUND: not growing matching pair forward because src is toxic:\n" << *this);
BEESCOUNT(pairforward_toxic);
break;
}
}
// Extend second range. If we hit EOF we can go no further.
BeesFileRange new_second = second;
BEESTRACE("new_second = " << new_second);
new_second.grow_end(BLOCK_SIZE_CLONE);
if (new_second.end() == second.end()) {
BEESCOUNT(pairforward_eof_second);
break;
}
// If we have hit an unaligned EOF then it has to be the same unaligned EOF.
// If we haven't hit EOF then the ends of the ranges are still aligned,
// so the misalignment (zero) will be equal.
if ((new_second.end() & BLOCK_MASK_CLONE) != (new_first.end() & BLOCK_MASK_CLONE)) {
BEESCOUNT(pairforward_eof_malign);
break;
}
// If the ranges now overlap we went too far
if (new_first.overlaps(new_second)) {
BEESCOUNT(pairforward_overlap);
break;
}
BEESTRACE("first " << first << " new_first " << new_first);
BeesBlockData first_bbd(first.fd(), first.end(), new_first.end() - first.end());
BEESTRACE("first_bbd " << first_bbd);
BEESTRACE("second " << second << " new_second " << new_second);
BeesBlockData second_bbd(second.fd(), second.end(), new_second.end() - second.end());
BEESTRACE("second_bbd " << second_bbd);
// Both blocks must have identical content
if (!first_bbd.is_data_equal(second_bbd)) {
BEESCOUNT(pairforward_miss);
break;
}
// Physical blocks must be distinct
if (first_bbd.addr().get_physical_or_zero() == second_bbd.addr().get_physical_or_zero()) {
BEESCOUNT(pairforward_same);
break;
}
// Source block cannot be zero in a non-compressed non-magic extent
if (first_bbd.is_data_zero() && !first_addr.is_magic() && !first_addr.is_compressed()) {
BEESCOUNT(pairforward_zero);
break;
}
// Source block cannot have a toxic hash
auto found_hashes = hash_table->find_cell(first_bbd.hash());
bool found_toxic = false;
for (auto i : found_hashes) {
if (BeesAddress(i.e_addr).is_toxic()) {
found_toxic = true;
break;
}
}
if (found_toxic) {
BEESLOGWARN("WORKAROUND: found toxic hash in " << first_bbd << " while extending forward:\n" << *this);
BEESCOUNT(pairforward_toxic_hash);
break;
}
// OK, next block
THROW_CHECK2(invalid_argument, new_first.size(), new_second.size(), new_first.size() == new_second.size());
first = new_first;
second = new_second;
rv = true;
BEESCOUNT(pairforward_hit);
}
if (first.overlaps(second)) {
BEESLOGTRACE("after grow, first " << first << "\n\toverlaps " << second);
BEESCOUNT(bug_grow_pair_overlaps);
}
BEESCOUNT(pairforward_stop);
BEESCOUNTADD(pairforward_ms, grow_forward_timer.age() * 1000);
return rv;
}
BeesRangePair
BeesRangePair::copy_closed() const
{
return BeesRangePair(first.copy_closed(), second.copy_closed());
}
ostream &
operator<<(ostream &os, const BeesAddress &ba)
{
if (ba.is_magic()) {
enum {
ZERO = BeesAddress::MagicValue::ZERO,
DELALLOC = BeesAddress::MagicValue::DELALLOC,
HOLE = BeesAddress::MagicValue::HOLE,
UNUSABLE = BeesAddress::MagicValue::UNUSABLE,
};
static const bits_ntoa_table table[] = {
NTOA_TABLE_ENTRY_ENUM(ZERO),
NTOA_TABLE_ENTRY_ENUM(DELALLOC),
NTOA_TABLE_ENTRY_ENUM(HOLE),
NTOA_TABLE_ENTRY_ENUM(UNUSABLE),
NTOA_TABLE_ENTRY_END()
};
return os << bits_ntoa(static_cast<BeesAddress::Type>(ba), table);
}
auto gpz = ba.get_physical_or_zero();
if (gpz == 0x1000) {
os << "NIL";
} else {
os << to_hex(gpz);
}
if (ba.is_toxic()) {
os << "t";
}
if (ba.is_unaligned_eof()) {
os << "u";
}
if (ba.is_compressed()) {
os << "z";
if (ba.has_compressed_offset()) {
os << astringprintf("%" PRIx64, ba.get_compressed_offset());
}
}
return os;
}
bool
BeesAddress::magic_check(uint64_t flags)
{
// This one isn't FIEMAP
if (flags & Extent::HOLE) {
m_addr = HOLE;
BEESCOUNT(addr_hole);
return true;
}
// These trigger extra processing steps for compressed extents
static const unsigned compressed_flags = FIEMAP_EXTENT_ENCODED;
// These indicate the extent is not yet on disk (try again with sync)
static const unsigned delalloc_flags = FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DELALLOC;
// These flags are irrelevant to extent-same
static const unsigned ignore_flags = FIEMAP_EXTENT_LAST | FIEMAP_EXTENT_SHARED;
// These flags mean we can't use extent-same
static const unsigned unusable_flags = FIEMAP_EXTENT_NOT_ALIGNED | FIEMAP_EXTENT_DATA_INLINE;
// All of the above (any other flag is a new feature we maybe can't cope with)
static const unsigned recognized_flags = compressed_flags | delalloc_flags | ignore_flags | unusable_flags;
if (flags & ~recognized_flags) {
BEESLOGTRACE("Unrecognized flags in " << fiemap_extent_flags_ntoa(flags));
m_addr = UNUSABLE;
// maybe we throw here?
BEESCOUNT(addr_unrecognized);
return true;
}
if (flags & unusable_flags) {
// we know these, but can't touch them
BEESCOUNT(addr_unusable);
m_addr = UNUSABLE;
return true;
}
if (flags & delalloc_flags) {
// delayed allocation, try again with force
BEESCOUNT(addr_delalloc);
m_addr = DELALLOC;
return true;
}
return false;
}
BeesAddress::BeesAddress(const Extent &e, off_t offset) :
m_addr(ZERO)
{
BEESTRACE("BeesAddress " << e << " offset " << to_hex(offset));
Type new_addr = 0;
THROW_CHECK1(invalid_argument, e, (e.physical() & BLOCK_MASK_CLONE) == 0);
THROW_CHECK1(invalid_argument, e, (e.begin() & BLOCK_MASK_CLONE) == 0);
THROW_CHECK1(invalid_argument, e, (offset & BLOCK_MASK_CLONE) == 0);
THROW_CHECK1(invalid_argument, e, e.end() > e.begin());
if (magic_check(e.flags())) {
BEESCOUNT(addr_magic);
return;
}
// All addresses from here on are physical
THROW_CHECK1(invalid_argument, e, e.physical() > 0);
if (e.flags() & FIEMAP_EXTENT_ENCODED) {
THROW_CHECK1(invalid_argument, e, (e.offset() & BLOCK_MASK_CLONE) == 0);
THROW_CHECK1(invalid_argument, e, e.offset() >= 0 && e.offset() < BLOCK_SIZE_MAX_COMPRESSED_EXTENT);
int extent_offset = offset - e.begin() + e.offset();
BEESTRACE("extent_offset = " << to_hex(extent_offset));
THROW_CHECK1(invalid_argument, extent_offset, extent_offset >= 0 && extent_offset < BLOCK_SIZE_MAX_COMPRESSED_EXTENT);
THROW_CHECK1(invalid_argument, extent_offset, (extent_offset & BLOCK_MASK_CLONE) == 0);
unsigned offset_bits = (extent_offset / BLOCK_SIZE_CLONE) + 1;
BEESTRACE("offset_bits = " << offset_bits);
THROW_CHECK1(invalid_argument, offset_bits, offset_bits >= c_offset_min && offset_bits <= c_offset_max);
THROW_CHECK1(invalid_argument, offset_bits, (offset_bits & ~c_offset_mask) == 0);
#if 1
new_addr = e.physical() | c_compressed_mask | offset_bits;
BEESCOUNT(addr_compressed_offset);
#else
new_addr = e.physical() | c_compressed_mask;
BEESCOUNT(addr_compressed);
#endif
} else {
new_addr = e.physical() + (offset - e.begin());
BEESCOUNT(addr_uncompressed);
}
if ((e.flags() & FIEMAP_EXTENT_LAST) && (e.end() & BLOCK_MASK_CLONE) != 0 && (offset & ~BLOCK_MASK_CLONE) == (e.end() & ~BLOCK_MASK_CLONE)) {
new_addr |= c_eof_mask;
BEESCOUNT(addr_eof_e);
}
m_addr = new_addr;
BEESCOUNT(addr_block);
}
BeesAddress::BeesAddress(int fd, off_t offset) :
m_addr(ZERO)
{
BEESTOOLONG("BeesAddress(fd " << fd << " " << name_fd(fd) << " offset " << to_hex(offset) << ")");
BEESTRACE("BeesAddress(fd " << fd << " " << name_fd(fd) << " offset " << to_hex(offset) << ")");
Type uoffset = ranged_cast<Type>(offset);
THROW_CHECK1(invalid_argument, uoffset, (uoffset & c_all_mask) == 0);
THROW_CHECK1(invalid_argument, uoffset, (uoffset & BLOCK_MASK_CLONE) == 0);
Timer extentwalker_timer;
BtrfsExtentWalker ew(fd, uoffset);
Extent e = ew.current();
BEESCOUNT(addr_from_fd);
BEESCOUNTADD(addr_ms, extentwalker_timer.age() * 1000);
*this = BeesAddress(e, offset);
}
BeesAddress::BeesAddress(int fd, off_t offset, shared_ptr<BeesContext> ctx) :
m_addr(ZERO)
{
BEESTOOLONG("BeesAddress(fd " << fd << " " << name_fd(fd) << " offset " << to_hex(offset) << " ctx " << ctx->root_path() << ")");
BEESTRACE("BeesAddress(fd " << fd << " " << name_fd(fd) << " offset " << to_hex(offset) << " ctx " << ctx->root_path() << ")");
Type uoffset = ranged_cast<Type>(offset);
THROW_CHECK1(invalid_argument, uoffset, (uoffset & c_all_mask) == 0);
THROW_CHECK1(invalid_argument, uoffset, (uoffset & BLOCK_MASK_CLONE) == 0);
Timer extentwalker_timer;
BtrfsExtentWalker ew(fd, uoffset, ctx->root_fd());
Extent e = ew.current();
BEESCOUNT(addr_from_root_fd);
BEESCOUNTADD(addr_ms, extentwalker_timer.age() * 1000);
*this = BeesAddress(e, offset);
}
// Get just the physical address with no extra bits or compressed block offset (magic values become zero)
BeesAddress::Type
BeesAddress::get_physical_or_zero() const
{
if (is_magic()) {
return 0;
} else {
return m_addr & ~c_all_mask;
}
}
// A compressed block address is divided into two fields:
// the beginning of the physical extent,
// and the distance (in CLONE blocks) from the start of the extent to the current block.
// Throws an exception if has_compressed_offset is not true.
BeesAddress::Type
BeesAddress::get_compressed_offset() const
{
THROW_CHECK1(invalid_argument, *this, has_compressed_offset());
return ((m_addr & c_offset_mask) - 1) * BLOCK_SIZE_CLONE;
}
void
BeesAddress::set_toxic()
{
THROW_CHECK1(invalid_argument, *this, !is_magic());
m_addr |= c_toxic_mask;
}
bool
BeesAddress::operator==(const BeesAddress &that) const
{
// If one side has an offset and the other doesn't, compare without checking offset bits
// This returns the right result for comparisons between magic and non-magic values,
// even though the math is all wrong.
if (has_compressed_offset() != that.has_compressed_offset()) {
return (m_addr & ~c_offset_mask) == (that.m_addr & ~c_offset_mask);
} else {
return m_addr == that.m_addr;
}
}
bool
BeesAddress::operator<(const BeesAddress &that) const
{
if (has_compressed_offset() != that.has_compressed_offset()) {
return (m_addr & ~c_offset_mask) < (that.m_addr & ~c_offset_mask);
} else {
return m_addr < that.m_addr;
}
}
ostream &
operator<<(ostream &os, const BeesBlockData &bbd)
{
os << "BeesBlockData { " << pretty(bbd.m_length) << " " << to_hex(bbd.m_offset) << " fd = " << bbd.m_fd << " '" << name_fd(bbd.m_fd) << "'";
if (bbd.m_addr != BeesAddress::ZERO) {
os << ", address = " << bbd.m_addr;
}
if (bbd.m_hash_done) {
os << ", hash = " << bbd.m_hash;
}
if (!bbd.m_data.empty()) {
// Turn this on to debug BeesBlockData, but leave it off otherwise.
// It's a massive data leak that is only interesting to developers.
#if 0
os << ", data[" << bbd.m_data.size() << "] = '";
size_t max_print = 12;
size_t to_print = min(bbd.m_data.size(), max_print);
for (size_t i = 0; i < to_print; ++i) {
uint8_t c = bbd.m_data[i];
// We are ASCII heathens here
if (c >= 32 && c < 127 && c != '\\') {
os << c;
} else {
char buf[8];
sprintf(buf, "\\x%02x", c);
os << buf;
}
}
os << "...'";
#else
os << ", data[" << bbd.m_data.size() << "]";
#endif
}
return os << " }";
}
BeesBlockData::BeesBlockData(Fd fd, off_t offset, size_t read_length) :
m_fd(fd),
m_offset(offset),
m_length(read_length)
{
BEESTRACE("Constructing " << *this);
THROW_CHECK1(invalid_argument, m_length, m_length > 0);
THROW_CHECK1(invalid_argument, m_length, m_length <= BLOCK_SIZE_SUMS);
THROW_CHECK1(invalid_argument, m_offset, (m_offset % BLOCK_SIZE_SUMS) == 0);
}
BeesBlockData::BeesBlockData() :
m_offset(0),
m_length(0)
{
}
BeesAddress
BeesBlockData::addr() const
{
if (m_addr == BeesAddress::ZERO) {
m_addr = BeesAddress(fd(), m_offset);
}
return m_addr;
}
BeesBlockData &
BeesBlockData::addr(const BeesAddress &a)
{
m_addr = a;
return *this;
}
const BeesBlockData::Blob &
BeesBlockData::data() const
{
if (m_data.empty()) {
THROW_CHECK1(invalid_argument, size(), size() > 0);
BEESNOTE("Reading BeesBlockData " << *this);
BEESTOOLONG("Reading BeesBlockData " << *this);
Timer read_timer;
Blob rv(size());
pread_or_die(m_fd, rv, m_offset);
THROW_CHECK2(runtime_error, rv.size(), size(), ranged_cast<off_t>(rv.size()) == size());
m_data = rv;
BEESCOUNT(block_read);
BEESCOUNTADD(block_bytes, rv.size());
BEESCOUNTADD(block_ms, read_timer.age() * 1000);
}
return m_data;
}
BeesHash
BeesBlockData::hash() const
{
if (!m_hash_done) {
// We can only dedup unaligned EOF blocks against other unaligned EOF blocks,
// so we do NOT round up to a full sum block size.
const Blob &blob = data();
// TODO: It turns out that file formats with 4K block
// alignment and embedded CRC64 do exist, and every block
// of such files has the same hash. Could use a subset
// of SHA1 here instead.
m_hash = Digest::CRC::crc64(blob.data(), blob.size());
m_hash_done = true;
BEESCOUNT(block_hash);
}
return m_hash;
}
bool
BeesBlockData::is_data_zero() const
{
// The CRC64 of zero is zero, so skip some work if we already know the CRC
if (m_hash_done && m_hash != 0) {
return false;
}
// OK read block (maybe) and check every byte
for (auto c : data()) {
if (c != '\0') {
return false;
}
}
BEESCOUNT(block_zero);
return true;
}
bool
BeesBlockData::is_data_equal(const BeesBlockData &that) const
{
BEESTRACE("is_data_equal this = " << *this << ", that = " << that);
THROW_CHECK1(invalid_argument, size(), size() > 0);
THROW_CHECK2(invalid_argument, size(), that.size(), size() == that.size());
// skip some work if we already know the CRCs don't match
if (m_hash_done && that.m_hash_done && m_hash != that.m_hash) {
return false;
}
return data() == that.data();
}