1
0
mirror of https://github.com/Zygo/bees.git synced 2025-05-17 13:25:45 +02:00

extentwalker: fix the binary search and add some debug infrastructure

Add some conditionally-compiled debug code, including an in-memory log
of what ExtentWalker does.  Dump that log on exceptions.

If we loop too many times in a debug build, kill the process so we can
stack trace.  In non-debug builds just throw a normal exception.

Grow the step size instead of shrinking it, to reduce the number of
binary search iterations.

Prevent a bug where the step size bottoms out before positioning the
target extent in the middle of the result vector.

Use the first extent for "first_extent", instead of the 3rd.

Get rid of some redundant checks.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell 2021-06-01 20:06:54 -04:00
parent 54f03a0297
commit 6adaedeecd
2 changed files with 82 additions and 55 deletions

View File

@ -58,10 +58,6 @@ namespace crucible {
virtual Vec get_extent_map(off_t pos); virtual Vec get_extent_map(off_t pos);
static const unsigned sc_extent_fetch_max = 16;
static const unsigned sc_extent_fetch_min = 4;
static const off_t sc_step_size = 0x1000 * (sc_extent_fetch_max / 2);
private: private:
Vec m_extents; Vec m_extents;
Itr m_current; Itr m_current;
@ -69,6 +65,10 @@ namespace crucible {
Itr find_in_cache(off_t pos); Itr find_in_cache(off_t pos);
void run_fiemap(off_t pos); void run_fiemap(off_t pos);
#ifdef EXTENTWALKER_DEBUG
ostringstream m_log;
#endif
public: public:
ExtentWalker(Fd fd = Fd()); ExtentWalker(Fd fd = Fd());
ExtentWalker(Fd fd, off_t initial_pos); ExtentWalker(Fd fd, off_t initial_pos);

View File

@ -9,21 +9,37 @@
namespace crucible { namespace crucible {
using namespace std; using namespace std;
const off_t ExtentWalker::sc_step_size;
// fm_start, fm_length, fm_flags, m_extents // fm_start, fm_length, fm_flags, m_extents
// fe_logical, fe_physical, fe_length, fe_flags // fe_logical, fe_physical, fe_length, fe_flags
static const off_t FIEMAP_BLOCK_SIZE = 4096; static const off_t FIEMAP_BLOCK_SIZE = 4096;
static bool __ew_do_log = getenv("EXTENTWALKER_DEBUG"); // Maximum number of extents from TREE_SEARCH.
static const unsigned sc_extent_fetch_max = 16;
// Minimum number of extents from TREE_SEARCH.
// If we don't get this number, we'll binary search backward
// until we reach the beginning of the file or find at least this
// number of extents.
static const unsigned sc_extent_fetch_min = 4;
// This is a guess that tries to land at least one extent
// before the target extent, so we don't have to search backward as often.
static const off_t sc_back_step_size = 64 * 1024;
#ifdef EXTENTWALKER_DEBUG
#define EWLOG(x) do { \ #define EWLOG(x) do { \
if (__ew_do_log) { \ m_log << x << endl; \
CHATTER(x); \
} \
} while (0) } while (0)
#define EWTRACE(x) do { \
CHATTER_UNWIND(x); \
} while (0)
#else
#define EWLOG(x) do {} while (0)
#define EWTRACE(x) do {} while (0)
#endif
ostream & ostream &
operator<<(ostream &os, const Extent &e) operator<<(ostream &os, const Extent &e)
{ {
@ -67,12 +83,16 @@ namespace crucible {
ostream & ostream &
operator<<(ostream &os, const ExtentWalker &ew) operator<<(ostream &os, const ExtentWalker &ew)
{ {
return os << "ExtentWalker {" os << "ExtentWalker {"
<< " fd = " << name_fd(ew.m_fd) << " fd = " << name_fd(ew.m_fd)
<< ", stat.st_size = " << to_hex(ew.m_stat.st_size) << ", stat.st_size = " << to_hex(ew.m_stat.st_size)
<< ", extents = " << ew.m_extents << ", extents = " << ew.m_extents
<< ", current = [" << ew.m_current - ew.m_extents.begin() << ", current = [" << ew.m_current - ew.m_extents.begin()
<< "] }"; << "] ";
#ifdef EXTENTWALKER_DEBUG
os << "\nLog:\n" << ew.m_log.str() << "\nEnd log";
#endif
return os << "}";
} }
Extent::operator bool() const Extent::operator bool() const
@ -158,8 +178,7 @@ namespace crucible {
void void
ExtentWalker::run_fiemap(off_t pos) ExtentWalker::run_fiemap(off_t pos)
{ {
ostringstream log; EWTRACE("Log of run_fiemap: " << m_log.str());
CHATTER_UNWIND("Log of run_fiemap: " << log.str());
EWLOG("pos = " << to_hex(pos)); EWLOG("pos = " << to_hex(pos));
@ -167,18 +186,24 @@ namespace crucible {
Vec fm; Vec fm;
off_t step_size = pos; // Start backward search by dropping lowest bit
off_t begin = pos - min(pos, sc_step_size); off_t step_size = (pos > 0) ? (pos ^ (pos & (pos - 1))) * 2 : 0;
// Start first pass through loop just a little before the target extent,
// because the first iteration will be wasted if we have an exact match.
off_t begin = pos - min(pos, sc_back_step_size);
// This loop should not run forever // This loop should not run forever
int loop_count = 0; int loop_count = 0;
int loop_limit = 99; const int loop_limit = 99;
while (true) { while (true) {
if (loop_count == 90) { #ifdef EXTENTWALKER_DEBUG
EWLOG(log.str()); if (loop_count >= loop_limit) {
cerr << "Too many loops!" << endl << m_log.str() << endl;
abort();
} }
#endif
THROW_CHECK1(runtime_error, loop_count, loop_count < loop_limit); THROW_CHECK2(runtime_error, *this, loop_count, loop_count < loop_limit);
++loop_count; ++loop_count;
// Get file size every time in case it changes under us // Get file size every time in case it changes under us
@ -186,7 +211,16 @@ namespace crucible {
// Get fiemap begin..EOF // Get fiemap begin..EOF
fm = get_extent_map(begin); fm = get_extent_map(begin);
EWLOG("fiemap result loop count #" << loop_count << ":" << fm); EWLOG("fiemap result loop count #" << loop_count << " begin " << to_hex(begin) << " pos "
<< to_hex(pos) << " step_size " << to_hex(step_size) << ":\n" << fm);
// Sanity check on the data: in order, not overlapping, not empty, not before pos
off_t sanity_pos = begin;
for (auto const &i : fm) {
THROW_CHECK1(runtime_error, fm, i.begin() >= sanity_pos);
THROW_CHECK1(runtime_error, fm, i.end() > i.begin());
sanity_pos = i.end();
}
// This algorithm seeks at least three extents: one before, // This algorithm seeks at least three extents: one before,
// one after, and one containing pos. Files which contain // one after, and one containing pos. Files which contain
@ -194,15 +228,15 @@ namespace crucible {
// so handle those cases separately. // so handle those cases separately.
// FIEMAP lies, and we catch it in a lie about the size of the // FIEMAP lies, and we catch it in a lie about the size of the
// second extent. To work around this, try getting more than 3. // second extent. To work around this, sc_extent_fetch_min is at least 4.
// 0..2(ish) extents // 0..2(ish) extents
if (fm.size() < sc_extent_fetch_min) { if (fm.size() < sc_extent_fetch_min) {
// If we are not at beginning of file, move backward // If we are not at beginning of file, move backward by zeroing the lowest bit
if (begin > 0) { if (begin > 0) {
step_size /= 2; step_size = (begin > 0) ? (begin ^ (begin & (begin - 1))) : 0;
auto next_begin = (begin - min(step_size, begin)) & ~(FIEMAP_BLOCK_SIZE - 1); auto next_begin = (begin - min(step_size, begin)) & ~(FIEMAP_BLOCK_SIZE - 1);
EWLOG("step backward " << to_hex(begin) << " -> " << to_hex(next_begin) << " extents size " << fm.size()); EWLOG("step backward " << to_hex(begin) << " -> " << to_hex(next_begin));
if (begin == next_begin) { if (begin == next_begin) {
EWLOG("step backward stopped"); EWLOG("step backward stopped");
break; break;
@ -230,18 +264,18 @@ namespace crucible {
// We have at least three extents, so there is now a first and last. // We have at least three extents, so there is now a first and last.
// We want pos to be between first and last. There doesn't have // We want pos to be between first and last. There doesn't have
// to be an extent between these (it could be a hole). // to be an extent between these (it could be a hole).
auto &first_extent = fm.at(sc_extent_fetch_min - 2); auto &first_extent = *fm.begin();
auto &last_extent = *fm.rbegin(); auto &last_extent = *fm.rbegin();
EWLOG("first_extent = " << first_extent); EWLOG("first_extent = " << first_extent);
EWLOG("last_extent = " << last_extent); EWLOG("last_extent = " << last_extent);
// First extent must end on or before pos // First extent must end on or before pos; otherwise, go further back
if (first_extent.end() > pos) { if (first_extent.end() > pos) {
// Can we move backward? // Can we move backward?
if (begin > 0) { if (begin > 0) {
step_size /= 2; step_size = (begin > 0) ? (begin ^ (begin & (begin - 1))) : 0;
auto next_begin = (begin - min(step_size, begin)) & ~(FIEMAP_BLOCK_SIZE - 1); auto next_begin = (begin - min(step_size, begin)) & ~(FIEMAP_BLOCK_SIZE - 1);
EWLOG("step backward " << to_hex(begin) << " -> " << to_hex(next_begin) << " extents size " << fm.size()); EWLOG("step backward " << to_hex(begin) << " -> " << to_hex(next_begin));
if (begin == next_begin) { if (begin == next_begin) {
EWLOG("step backward stopped"); EWLOG("step backward stopped");
break; break;
@ -251,38 +285,29 @@ namespace crucible {
} }
// We are as far back as we can go, so there must be no // We are as far back as we can go, so there must be no
// extent before pos (i.e. file starts with a hole). // extent before pos (i.e. file starts with a hole
// or first extent starts at pos 0).
EWLOG("no extent before pos"); EWLOG("no extent before pos");
break; break;
} }
// First extent ends on or before pos. // If last extent is EOF then we cannot more any further forward.
// If last extent is EOF then we have the entire file in the buffer.
// pos could be in last extent, so skip the later checks that // pos could be in last extent, so skip the later checks that
// insist pos be located prior to the last extent. // insist pos be located prior to the last extent.
if (last_extent.flags() & FIEMAP_EXTENT_LAST) { if (last_extent.flags() & FIEMAP_EXTENT_LAST) {
break; break;
} }
// Don't have EOF, must have an extent after pos. // Don't have EOF, must have an extent after pos; otherwise, go forward
if (last_extent.begin() <= pos) { if (last_extent.begin() <= pos) {
// Set the bit just below the one we last cleared
step_size /= 2; step_size /= 2;
auto new_begin = (begin + step_size) & ~(FIEMAP_BLOCK_SIZE - 1); auto new_begin = (begin + max(FIEMAP_BLOCK_SIZE, step_size)) & ~(FIEMAP_BLOCK_SIZE - 1);
EWLOG("step forward " << to_hex(begin) << " -> " << to_hex(new_begin)); EWLOG("step forward " << to_hex(begin) << " -> " << to_hex(new_begin));
if (begin == new_begin) {
EWLOG("step forward stopped");
break;
}
begin = new_begin; begin = new_begin;
continue; continue;
} }
// Last extent begins after pos, first extent ends on or before pos.
// All other cases should have been handled before here.
THROW_CHECK2(runtime_error, pos, first_extent, first_extent.end() <= pos);
THROW_CHECK2(runtime_error, pos, last_extent, last_extent.begin() > pos);
// We should probably stop now // We should probably stop now
break; break;
} }
@ -372,7 +397,7 @@ namespace crucible {
void void
ExtentWalker::seek(off_t pos) ExtentWalker::seek(off_t pos)
{ {
CHATTER_UNWIND("seek " << to_hex(pos)); EWTRACE("seek " << to_hex(pos));
THROW_CHECK1(out_of_range, pos, pos >= 0); THROW_CHECK1(out_of_range, pos, pos >= 0);
Itr rv = find_in_cache(pos); Itr rv = find_in_cache(pos);
if (rv != m_extents.end()) { if (rv != m_extents.end()) {
@ -381,29 +406,28 @@ namespace crucible {
} }
run_fiemap(pos); run_fiemap(pos);
m_current = find_in_cache(pos); m_current = find_in_cache(pos);
THROW_CHECK2(runtime_error, *this, to_hex(pos), m_current != m_extents.end());
} }
Extent Extent
ExtentWalker::current() ExtentWalker::current()
{ {
THROW_CHECK2(invalid_argument, *this, m_extents.size(), m_current != m_extents.end()); THROW_CHECK2(invalid_argument, *this, m_extents.size(), m_current != m_extents.end());
CHATTER_UNWIND("current " << *m_current);
return *m_current; return *m_current;
} }
bool bool
ExtentWalker::next() ExtentWalker::next()
{ {
CHATTER_UNWIND("next"); EWTRACE("next");
THROW_CHECK1(invalid_argument, (m_current != m_extents.end()), m_current != m_extents.end()); THROW_CHECK1(invalid_argument, (m_current != m_extents.end()), m_current != m_extents.end());
if (current().m_end >= m_stat.st_size) { if (current().m_end >= m_stat.st_size) {
CHATTER_UNWIND("next EOF"); EWTRACE("next EOF");
return false; return false;
} }
auto next_pos = current().m_end; auto next_pos = current().m_end;
if (next_pos >= m_stat.st_size) { if (next_pos >= m_stat.st_size) {
CHATTER_UNWIND("next next_pos = " << next_pos << " m_stat.st_size = " << m_stat.st_size); EWTRACE("next next_pos = " << next_pos << " m_stat.st_size = " << m_stat.st_size);
return false; return false;
} }
seek(next_pos); seek(next_pos);
@ -421,16 +445,16 @@ namespace crucible {
bool bool
ExtentWalker::prev() ExtentWalker::prev()
{ {
CHATTER_UNWIND("prev"); EWTRACE("prev");
THROW_CHECK1(invalid_argument, (m_current != m_extents.end()), m_current != m_extents.end()); THROW_CHECK1(invalid_argument, (m_current != m_extents.end()), m_current != m_extents.end());
auto prev_iter = m_current; auto prev_iter = m_current;
if (prev_iter->m_begin == 0) { if (prev_iter->m_begin == 0) {
CHATTER_UNWIND("prev BOF"); EWTRACE("prev BOF");
return false; return false;
} }
THROW_CHECK1(invalid_argument, (prev_iter != m_extents.begin()), prev_iter != m_extents.begin()); THROW_CHECK1(invalid_argument, (prev_iter != m_extents.begin()), prev_iter != m_extents.begin());
--prev_iter; --prev_iter;
CHATTER_UNWIND("prev seeking to " << *prev_iter << "->m_begin"); EWTRACE("prev seeking to " << *prev_iter << "->m_begin");
auto prev_end = current().m_begin; auto prev_end = current().m_begin;
seek(prev_iter->m_begin); seek(prev_iter->m_begin);
THROW_CHECK1(runtime_error, (m_current != m_extents.end()), m_current != m_extents.end()); THROW_CHECK1(runtime_error, (m_current != m_extents.end()), m_current != m_extents.end());
@ -489,7 +513,7 @@ namespace crucible {
sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY; sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY;
sk.nr_items = sc_extent_fetch_max; sk.nr_items = sc_extent_fetch_max;
CHATTER_UNWIND("sk " << sk << " root_fd " << name_fd(m_root_fd)); EWTRACE("sk " << sk << " root_fd " << name_fd(m_root_fd));
sk.do_ioctl(m_root_fd); sk.do_ioctl(m_root_fd);
Vec rv; Vec rv;
@ -615,6 +639,7 @@ namespace crucible {
ExtentWalker::Vec ExtentWalker::Vec
ExtentWalker::get_extent_map(off_t pos) ExtentWalker::get_extent_map(off_t pos)
{ {
EWLOG("get_extent_map(" << to_hex(pos) << ")");
Fiemap fm; Fiemap fm;
fm.fm_start = ranged_cast<uint64_t>(pos); fm.fm_start = ranged_cast<uint64_t>(pos);
fm.fm_length = ranged_cast<uint64_t>(numeric_limits<off_t>::max() - pos); fm.fm_length = ranged_cast<uint64_t>(numeric_limits<off_t>::max() - pos);
@ -628,7 +653,9 @@ namespace crucible {
e.m_physical = i.fe_physical; e.m_physical = i.fe_physical;
e.m_flags = i.fe_flags; e.m_flags = i.fe_flags;
rv.push_back(e); rv.push_back(e);
EWLOG("push_back(" << e << ")");
} }
EWLOG("get_extent_map(" << to_hex(pos) << ") returning " << rv.size() << " extents");
return rv; return rv;
} }