From 6adaedeecd7bf11eba0eb6f8e56330beade083fc Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Tue, 1 Jun 2021 20:06:54 -0400 Subject: [PATCH] extentwalker: fix the binary search and add some debug infrastructure Add some conditionally-compiled debug code, including an in-memory log of what ExtentWalker does. Dump that log on exceptions. If we loop too many times in a debug build, kill the process so we can stack trace. In non-debug builds just throw a normal exception. Grow the step size instead of shrinking it, to reduce the number of binary search iterations. Prevent a bug where the step size bottoms out before positioning the target extent in the middle of the result vector. Use the first extent for "first_extent", instead of the 3rd. Get rid of some redundant checks. Signed-off-by: Zygo Blaxell --- include/crucible/extentwalker.h | 8 +- lib/extentwalker.cc | 129 +++++++++++++++++++------------- 2 files changed, 82 insertions(+), 55 deletions(-) diff --git a/include/crucible/extentwalker.h b/include/crucible/extentwalker.h index 1d93894..d8c4ea9 100644 --- a/include/crucible/extentwalker.h +++ b/include/crucible/extentwalker.h @@ -58,10 +58,6 @@ namespace crucible { virtual Vec get_extent_map(off_t pos); - static const unsigned sc_extent_fetch_max = 16; - static const unsigned sc_extent_fetch_min = 4; - static const off_t sc_step_size = 0x1000 * (sc_extent_fetch_max / 2); - private: Vec m_extents; Itr m_current; @@ -69,6 +65,10 @@ namespace crucible { Itr find_in_cache(off_t pos); void run_fiemap(off_t pos); +#ifdef EXTENTWALKER_DEBUG + ostringstream m_log; +#endif + public: ExtentWalker(Fd fd = Fd()); ExtentWalker(Fd fd, off_t initial_pos); diff --git a/lib/extentwalker.cc b/lib/extentwalker.cc index 12f7a4f..8fa883d 100644 --- a/lib/extentwalker.cc +++ b/lib/extentwalker.cc @@ -9,21 +9,37 @@ namespace crucible { using namespace std; - const off_t ExtentWalker::sc_step_size; - // fm_start, fm_length, fm_flags, m_extents // fe_logical, fe_physical, fe_length, fe_flags static const off_t FIEMAP_BLOCK_SIZE = 4096; - static bool __ew_do_log = getenv("EXTENTWALKER_DEBUG"); + // Maximum number of extents from TREE_SEARCH. + static const unsigned sc_extent_fetch_max = 16; + // Minimum number of extents from TREE_SEARCH. + // If we don't get this number, we'll binary search backward + // until we reach the beginning of the file or find at least this + // number of extents. + static const unsigned sc_extent_fetch_min = 4; + + // This is a guess that tries to land at least one extent + // before the target extent, so we don't have to search backward as often. + static const off_t sc_back_step_size = 64 * 1024; + +#ifdef EXTENTWALKER_DEBUG #define EWLOG(x) do { \ - if (__ew_do_log) { \ - CHATTER(x); \ - } \ + m_log << x << endl; \ } while (0) +#define EWTRACE(x) do { \ + CHATTER_UNWIND(x); \ +} while (0) +#else +#define EWLOG(x) do {} while (0) +#define EWTRACE(x) do {} while (0) +#endif + ostream & operator<<(ostream &os, const Extent &e) { @@ -67,12 +83,16 @@ namespace crucible { ostream & operator<<(ostream &os, const ExtentWalker &ew) { - return os << "ExtentWalker {" + os << "ExtentWalker {" << " fd = " << name_fd(ew.m_fd) << ", stat.st_size = " << to_hex(ew.m_stat.st_size) << ", extents = " << ew.m_extents << ", current = [" << ew.m_current - ew.m_extents.begin() - << "] }"; + << "] "; +#ifdef EXTENTWALKER_DEBUG + os << "\nLog:\n" << ew.m_log.str() << "\nEnd log"; +#endif + return os << "}"; } Extent::operator bool() const @@ -158,8 +178,7 @@ namespace crucible { void ExtentWalker::run_fiemap(off_t pos) { - ostringstream log; - CHATTER_UNWIND("Log of run_fiemap: " << log.str()); + EWTRACE("Log of run_fiemap: " << m_log.str()); EWLOG("pos = " << to_hex(pos)); @@ -167,18 +186,24 @@ namespace crucible { Vec fm; - off_t step_size = pos; - off_t begin = pos - min(pos, sc_step_size); + // Start backward search by dropping lowest bit + off_t step_size = (pos > 0) ? (pos ^ (pos & (pos - 1))) * 2 : 0; + + // Start first pass through loop just a little before the target extent, + // because the first iteration will be wasted if we have an exact match. + off_t begin = pos - min(pos, sc_back_step_size); // This loop should not run forever int loop_count = 0; - int loop_limit = 99; + const int loop_limit = 99; while (true) { - if (loop_count == 90) { - EWLOG(log.str()); +#ifdef EXTENTWALKER_DEBUG + if (loop_count >= loop_limit) { + cerr << "Too many loops!" << endl << m_log.str() << endl; + abort(); } - - THROW_CHECK1(runtime_error, loop_count, loop_count < loop_limit); +#endif + THROW_CHECK2(runtime_error, *this, loop_count, loop_count < loop_limit); ++loop_count; // Get file size every time in case it changes under us @@ -186,7 +211,16 @@ namespace crucible { // Get fiemap begin..EOF fm = get_extent_map(begin); - EWLOG("fiemap result loop count #" << loop_count << ":" << fm); + EWLOG("fiemap result loop count #" << loop_count << " begin " << to_hex(begin) << " pos " + << to_hex(pos) << " step_size " << to_hex(step_size) << ":\n" << fm); + + // Sanity check on the data: in order, not overlapping, not empty, not before pos + off_t sanity_pos = begin; + for (auto const &i : fm) { + THROW_CHECK1(runtime_error, fm, i.begin() >= sanity_pos); + THROW_CHECK1(runtime_error, fm, i.end() > i.begin()); + sanity_pos = i.end(); + } // This algorithm seeks at least three extents: one before, // one after, and one containing pos. Files which contain @@ -194,15 +228,15 @@ namespace crucible { // so handle those cases separately. // FIEMAP lies, and we catch it in a lie about the size of the - // second extent. To work around this, try getting more than 3. + // second extent. To work around this, sc_extent_fetch_min is at least 4. // 0..2(ish) extents if (fm.size() < sc_extent_fetch_min) { - // If we are not at beginning of file, move backward + // If we are not at beginning of file, move backward by zeroing the lowest bit if (begin > 0) { - step_size /= 2; + step_size = (begin > 0) ? (begin ^ (begin & (begin - 1))) : 0; auto next_begin = (begin - min(step_size, begin)) & ~(FIEMAP_BLOCK_SIZE - 1); - EWLOG("step backward " << to_hex(begin) << " -> " << to_hex(next_begin) << " extents size " << fm.size()); + EWLOG("step backward " << to_hex(begin) << " -> " << to_hex(next_begin)); if (begin == next_begin) { EWLOG("step backward stopped"); break; @@ -230,18 +264,18 @@ namespace crucible { // We have at least three extents, so there is now a first and last. // We want pos to be between first and last. There doesn't have // to be an extent between these (it could be a hole). - auto &first_extent = fm.at(sc_extent_fetch_min - 2); + auto &first_extent = *fm.begin(); auto &last_extent = *fm.rbegin(); EWLOG("first_extent = " << first_extent); EWLOG("last_extent = " << last_extent); - // First extent must end on or before pos + // First extent must end on or before pos; otherwise, go further back if (first_extent.end() > pos) { // Can we move backward? if (begin > 0) { - step_size /= 2; + step_size = (begin > 0) ? (begin ^ (begin & (begin - 1))) : 0; auto next_begin = (begin - min(step_size, begin)) & ~(FIEMAP_BLOCK_SIZE - 1); - EWLOG("step backward " << to_hex(begin) << " -> " << to_hex(next_begin) << " extents size " << fm.size()); + EWLOG("step backward " << to_hex(begin) << " -> " << to_hex(next_begin)); if (begin == next_begin) { EWLOG("step backward stopped"); break; @@ -251,38 +285,29 @@ namespace crucible { } // We are as far back as we can go, so there must be no - // extent before pos (i.e. file starts with a hole). + // extent before pos (i.e. file starts with a hole + // or first extent starts at pos 0). EWLOG("no extent before pos"); break; } - // First extent ends on or before pos. - - // If last extent is EOF then we have the entire file in the buffer. + // If last extent is EOF then we cannot more any further forward. // pos could be in last extent, so skip the later checks that // insist pos be located prior to the last extent. if (last_extent.flags() & FIEMAP_EXTENT_LAST) { break; } - // Don't have EOF, must have an extent after pos. + // Don't have EOF, must have an extent after pos; otherwise, go forward if (last_extent.begin() <= pos) { + // Set the bit just below the one we last cleared step_size /= 2; - auto new_begin = (begin + step_size) & ~(FIEMAP_BLOCK_SIZE - 1); + auto new_begin = (begin + max(FIEMAP_BLOCK_SIZE, step_size)) & ~(FIEMAP_BLOCK_SIZE - 1); EWLOG("step forward " << to_hex(begin) << " -> " << to_hex(new_begin)); - if (begin == new_begin) { - EWLOG("step forward stopped"); - break; - } begin = new_begin; continue; } - // Last extent begins after pos, first extent ends on or before pos. - // All other cases should have been handled before here. - THROW_CHECK2(runtime_error, pos, first_extent, first_extent.end() <= pos); - THROW_CHECK2(runtime_error, pos, last_extent, last_extent.begin() > pos); - // We should probably stop now break; } @@ -372,7 +397,7 @@ namespace crucible { void ExtentWalker::seek(off_t pos) { - CHATTER_UNWIND("seek " << to_hex(pos)); + EWTRACE("seek " << to_hex(pos)); THROW_CHECK1(out_of_range, pos, pos >= 0); Itr rv = find_in_cache(pos); if (rv != m_extents.end()) { @@ -381,29 +406,28 @@ namespace crucible { } run_fiemap(pos); m_current = find_in_cache(pos); + THROW_CHECK2(runtime_error, *this, to_hex(pos), m_current != m_extents.end()); } Extent ExtentWalker::current() { THROW_CHECK2(invalid_argument, *this, m_extents.size(), m_current != m_extents.end()); - CHATTER_UNWIND("current " << *m_current); return *m_current; } - bool ExtentWalker::next() { - CHATTER_UNWIND("next"); + EWTRACE("next"); THROW_CHECK1(invalid_argument, (m_current != m_extents.end()), m_current != m_extents.end()); if (current().m_end >= m_stat.st_size) { - CHATTER_UNWIND("next EOF"); + EWTRACE("next EOF"); return false; } auto next_pos = current().m_end; if (next_pos >= m_stat.st_size) { - CHATTER_UNWIND("next next_pos = " << next_pos << " m_stat.st_size = " << m_stat.st_size); + EWTRACE("next next_pos = " << next_pos << " m_stat.st_size = " << m_stat.st_size); return false; } seek(next_pos); @@ -421,16 +445,16 @@ namespace crucible { bool ExtentWalker::prev() { - CHATTER_UNWIND("prev"); + EWTRACE("prev"); THROW_CHECK1(invalid_argument, (m_current != m_extents.end()), m_current != m_extents.end()); auto prev_iter = m_current; if (prev_iter->m_begin == 0) { - CHATTER_UNWIND("prev BOF"); + EWTRACE("prev BOF"); return false; } THROW_CHECK1(invalid_argument, (prev_iter != m_extents.begin()), prev_iter != m_extents.begin()); --prev_iter; - CHATTER_UNWIND("prev seeking to " << *prev_iter << "->m_begin"); + EWTRACE("prev seeking to " << *prev_iter << "->m_begin"); auto prev_end = current().m_begin; seek(prev_iter->m_begin); THROW_CHECK1(runtime_error, (m_current != m_extents.end()), m_current != m_extents.end()); @@ -489,7 +513,7 @@ namespace crucible { sk.min_type = sk.max_type = BTRFS_EXTENT_DATA_KEY; sk.nr_items = sc_extent_fetch_max; - CHATTER_UNWIND("sk " << sk << " root_fd " << name_fd(m_root_fd)); + EWTRACE("sk " << sk << " root_fd " << name_fd(m_root_fd)); sk.do_ioctl(m_root_fd); Vec rv; @@ -615,6 +639,7 @@ namespace crucible { ExtentWalker::Vec ExtentWalker::get_extent_map(off_t pos) { + EWLOG("get_extent_map(" << to_hex(pos) << ")"); Fiemap fm; fm.fm_start = ranged_cast(pos); fm.fm_length = ranged_cast(numeric_limits::max() - pos); @@ -628,7 +653,9 @@ namespace crucible { e.m_physical = i.fe_physical; e.m_flags = i.fe_flags; rv.push_back(e); + EWLOG("push_back(" << e << ")"); } + EWLOG("get_extent_map(" << to_hex(pos) << ") returning " << rv.size() << " extents"); return rv; }