From 50e012ad6ddec9a2233309f88225e0f10348fbfd Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Mon, 10 Feb 2025 23:25:03 -0500 Subject: [PATCH] seeker: add a runtime debug stream This allows detailed but selective debugging when using the library, particularly when something goes wrong. Signed-off-by: Zygo Blaxell --- include/crucible/seeker.h | 222 +++++++++++++++++++------------------- lib/Makefile | 1 + lib/seeker.cc | 7 ++ test/seeker.cc | 8 +- 4 files changed, 122 insertions(+), 116 deletions(-) create mode 100644 lib/seeker.cc diff --git a/include/crucible/seeker.h b/include/crucible/seeker.h index b4832f5..69d0336 100644 --- a/include/crucible/seeker.h +++ b/include/crucible/seeker.h @@ -6,23 +6,23 @@ #include #include -#include - -#if 0 +// Debug stream +#include #include #include -#define DINIT(__x) __x -#define DLOG(__x) do { logs << __x << std::endl; } while (false) -#define DOUT(__err) do { __err << logs.str(); } while (false) -#else -#define DINIT(__x) do {} while (false) -#define DLOG(__x) do {} while (false) -#define DOUT(__x) do {} while (false) -#endif + +#include namespace crucible { using namespace std; + extern thread_local shared_ptr tl_seeker_debug_str; + #define SEEKER_DEBUG_LOG(__x) do { \ + if (tl_seeker_debug_str) { \ + (*tl_seeker_debug_str) << __x << "\n"; \ + } \ + } while (false) + // Requirements for Container Fetch(Pos lower, Pos upper): // - fetches objects in Pos order, starting from lower (must be >= lower) // - must return upper if present, may or may not return objects after that @@ -49,113 +49,107 @@ namespace crucible { Pos seek_backward(Pos const target_pos, Fetch fetch, Pos min_step = 1, size_t max_loops = numeric_limits::max()) { - DINIT(ostringstream logs); - try { - static const Pos end_pos = numeric_limits::max(); - // TBH this probably won't work if begin_pos != 0, i.e. any signed type - static const Pos begin_pos = numeric_limits::min(); - // Run a binary search looking for the highest key below target_pos. - // Initial upper bound of the search is target_pos. - // Find initial lower bound by doubling the size of the range until a key below target_pos - // is found, or the lower bound reaches the beginning of the search space. - // If the lower bound search reaches the beginning of the search space without finding a key, - // return the beginning of the search space; otherwise, perform a binary search between - // the bounds now established. - Pos lower_bound = 0; - Pos upper_bound = target_pos; - bool found_low = false; - Pos probe_pos = target_pos; - // We need one loop for each bit of the search space to find the lower bound, - // one loop for each bit of the search space to find the upper bound, - // and one extra loop to confirm the boundary is correct. - for (size_t loop_count = min(numeric_limits::digits * size_t(2) + 1, max_loops); loop_count; --loop_count) { - DLOG("fetch(probe_pos = " << probe_pos << ", target_pos = " << target_pos << ")"); - auto result = fetch(probe_pos, target_pos); - const Pos low_pos = result.empty() ? end_pos : *result.begin(); - const Pos high_pos = result.empty() ? end_pos : *result.rbegin(); - DLOG(" = " << low_pos << ".." << high_pos); - // check for correct behavior of the fetch function - THROW_CHECK2(out_of_range, high_pos, probe_pos, probe_pos <= high_pos); - THROW_CHECK2(out_of_range, low_pos, probe_pos, probe_pos <= low_pos); - THROW_CHECK2(out_of_range, low_pos, high_pos, low_pos <= high_pos); - if (!found_low) { - // if target_pos == end_pos then we will find it in every empty result set, - // so in that case we force the lower bound to be lower than end_pos - if ((target_pos == end_pos) ? (low_pos < target_pos) : (low_pos <= target_pos)) { - // found a lower bound, set the low bound there and switch to binary search - found_low = true; - lower_bound = low_pos; - DLOG("found_low = true, lower_bound = " << lower_bound); - } else { - // still looking for lower bound - // if probe_pos was begin_pos then we can stop with no result - if (probe_pos == begin_pos) { - DLOG("return: probe_pos == begin_pos " << begin_pos); - return begin_pos; - } - // double the range size, or use the distance between objects found so far - THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound); - // already checked low_pos <= high_pos above - const Pos want_delta = max(upper_bound - probe_pos, min_step); - // avoid underflowing the beginning of the search space - const Pos have_delta = min(want_delta, probe_pos - begin_pos); - THROW_CHECK2(out_of_range, want_delta, have_delta, have_delta <= want_delta); - // move probe and try again - probe_pos = probe_pos - have_delta; - DLOG("probe_pos " << probe_pos << " = probe_pos - have_delta " << have_delta << " (want_delta " << want_delta << ")"); - continue; + static const Pos end_pos = numeric_limits::max(); + // TBH this probably won't work if begin_pos != 0, i.e. any signed type + static const Pos begin_pos = numeric_limits::min(); + // Run a binary search looking for the highest key below target_pos. + // Initial upper bound of the search is target_pos. + // Find initial lower bound by doubling the size of the range until a key below target_pos + // is found, or the lower bound reaches the beginning of the search space. + // If the lower bound search reaches the beginning of the search space without finding a key, + // return the beginning of the search space; otherwise, perform a binary search between + // the bounds now established. + Pos lower_bound = 0; + Pos upper_bound = target_pos; + bool found_low = false; + Pos probe_pos = target_pos; + // We need one loop for each bit of the search space to find the lower bound, + // one loop for each bit of the search space to find the upper bound, + // and one extra loop to confirm the boundary is correct. + for (size_t loop_count = min(numeric_limits::digits * size_t(2) + 1, max_loops); loop_count; --loop_count) { + SEEKER_DEBUG_LOG("fetch(probe_pos = " << probe_pos << ", target_pos = " << target_pos << ")"); + auto result = fetch(probe_pos, target_pos); + const Pos low_pos = result.empty() ? end_pos : *result.begin(); + const Pos high_pos = result.empty() ? end_pos : *result.rbegin(); + SEEKER_DEBUG_LOG(" = " << low_pos << ".." << high_pos); + // check for correct behavior of the fetch function + THROW_CHECK2(out_of_range, high_pos, probe_pos, probe_pos <= high_pos); + THROW_CHECK2(out_of_range, low_pos, probe_pos, probe_pos <= low_pos); + THROW_CHECK2(out_of_range, low_pos, high_pos, low_pos <= high_pos); + if (!found_low) { + // if target_pos == end_pos then we will find it in every empty result set, + // so in that case we force the lower bound to be lower than end_pos + if ((target_pos == end_pos) ? (low_pos < target_pos) : (low_pos <= target_pos)) { + // found a lower bound, set the low bound there and switch to binary search + found_low = true; + lower_bound = low_pos; + SEEKER_DEBUG_LOG("found_low = true, lower_bound = " << lower_bound); + } else { + // still looking for lower bound + // if probe_pos was begin_pos then we can stop with no result + if (probe_pos == begin_pos) { + SEEKER_DEBUG_LOG("return: probe_pos == begin_pos " << begin_pos); + return begin_pos; } + // double the range size, or use the distance between objects found so far + THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound); + // already checked low_pos <= high_pos above + const Pos want_delta = max(upper_bound - probe_pos, min_step); + // avoid underflowing the beginning of the search space + const Pos have_delta = min(want_delta, probe_pos - begin_pos); + THROW_CHECK2(out_of_range, want_delta, have_delta, have_delta <= want_delta); + // move probe and try again + probe_pos = probe_pos - have_delta; + SEEKER_DEBUG_LOG("probe_pos " << probe_pos << " = probe_pos - have_delta " << have_delta << " (want_delta " << want_delta << ")"); + continue; } - if (low_pos <= target_pos && target_pos <= high_pos) { - // have keys on either side of target_pos in result - // search from the high end until we find the highest key below target - for (auto i = result.rbegin(); i != result.rend(); ++i) { - // more correctness checking for fetch - THROW_CHECK2(out_of_range, *i, probe_pos, probe_pos <= *i); - if (*i <= target_pos) { - DLOG("return: *i " << *i << " <= target_pos " << target_pos); - return *i; - } - } - // if the list is empty then low_pos = high_pos = end_pos - // if target_pos = end_pos also, then we will execute the loop - // above but not find any matching entries. - THROW_CHECK0(runtime_error, result.empty()); - } - if (target_pos <= low_pos) { - // results are all too high, so probe_pos..low_pos is too high - // lower the high bound to the probe pos - upper_bound = probe_pos; - DLOG("upper_bound = probe_pos " << probe_pos); - } - if (high_pos < target_pos) { - // results are all too low, so probe_pos..high_pos is too low - // raise the low bound to the high_pos - DLOG("lower_bound = high_pos " << high_pos); - lower_bound = high_pos; - } - // compute a new probe pos at the middle of the range and try again - // we can't have a zero-size range here because we would not have set found_low yet - THROW_CHECK2(out_of_range, lower_bound, upper_bound, lower_bound <= upper_bound); - const Pos delta = (upper_bound - lower_bound) / 2; - probe_pos = lower_bound + delta; - if (delta < 1) { - // nothing can exist in the range (lower_bound, upper_bound) - // and an object is known to exist at lower_bound - DLOG("return: probe_pos == lower_bound " << lower_bound); - return lower_bound; - } - THROW_CHECK2(out_of_range, lower_bound, probe_pos, lower_bound <= probe_pos); - THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound); - DLOG("loop: lower_bound " << lower_bound << ", probe_pos " << probe_pos << ", upper_bound " << upper_bound); } - THROW_ERROR(runtime_error, "FIXME: should not reach this line: " - "lower_bound..upper_bound " << lower_bound << ".." << upper_bound << ", " - "found_low " << found_low); - } catch (...) { - DOUT(cerr); - throw; + if (low_pos <= target_pos && target_pos <= high_pos) { + // have keys on either side of target_pos in result + // search from the high end until we find the highest key below target + for (auto i = result.rbegin(); i != result.rend(); ++i) { + // more correctness checking for fetch + THROW_CHECK2(out_of_range, *i, probe_pos, probe_pos <= *i); + if (*i <= target_pos) { + SEEKER_DEBUG_LOG("return: *i " << *i << " <= target_pos " << target_pos); + return *i; + } + } + // if the list is empty then low_pos = high_pos = end_pos + // if target_pos = end_pos also, then we will execute the loop + // above but not find any matching entries. + THROW_CHECK0(runtime_error, result.empty()); + } + if (target_pos <= low_pos) { + // results are all too high, so probe_pos..low_pos is too high + // lower the high bound to the probe pos, low_pos cannot be lower + SEEKER_DEBUG_LOG("upper_bound = probe_pos " << probe_pos); + upper_bound = probe_pos; + } + if (high_pos < target_pos) { + // results are all too low, so probe_pos..high_pos is too low + // raise the low bound to high_pos since it's above probe_pos + SEEKER_DEBUG_LOG("lower_bound = high_pos " << high_pos); + lower_bound = high_pos; + } + // compute a new probe pos at the middle of the range and try again + // we can't have a zero-size range here because we would not have set found_low yet + THROW_CHECK2(out_of_range, lower_bound, upper_bound, lower_bound <= upper_bound); + const Pos delta = (upper_bound - lower_bound) / 2; + probe_pos = lower_bound + delta; + if (delta < 1) { + // nothing can exist in the range (lower_bound, upper_bound) + // and an object is known to exist at lower_bound + SEEKER_DEBUG_LOG("return: probe_pos == lower_bound " << lower_bound); + return lower_bound; + } + THROW_CHECK2(out_of_range, lower_bound, probe_pos, lower_bound <= probe_pos); + THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound); + SEEKER_DEBUG_LOG("loop bottom: lower_bound " << lower_bound << ", probe_pos " << probe_pos << ", upper_bound " << upper_bound); } + THROW_ERROR(runtime_error, "FIXME: should not reach this line: " + "lower_bound..upper_bound " << lower_bound << ".." << upper_bound << ", " + "found_low " << found_low); } } diff --git a/lib/Makefile b/lib/Makefile index 939f844..e890960 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -17,6 +17,7 @@ CRUCIBLE_OBJS = \ openat2.o \ path.o \ process.o \ + seeker.o \ string.o \ table.o \ task.o \ diff --git a/lib/seeker.cc b/lib/seeker.cc new file mode 100644 index 0000000..75a40b4 --- /dev/null +++ b/lib/seeker.cc @@ -0,0 +1,7 @@ +#include "crucible/seeker.h" + +namespace crucible { + + thread_local shared_ptr tl_seeker_debug_str; + +}; diff --git a/test/seeker.cc b/test/seeker.cc index b4f8121..5474a11 100644 --- a/test/seeker.cc +++ b/test/seeker.cc @@ -36,6 +36,8 @@ seeker_test(const vector &vec, uint64_t const target) } cerr << " } = "; size_t loops = 0; + tl_seeker_debug_str = make_shared(); + bool local_test_fails = false; bool excepted = catch_all([&]() { auto found = seek_backward(target, [&](uint64_t lower, uint64_t upper) { ++loops; @@ -52,13 +54,15 @@ seeker_test(const vector &vec, uint64_t const target) cerr << " (correct)"; } else { cerr << " (INCORRECT - right answer is " << my_found << ")"; - test_fails = true; + local_test_fails = true; } }); cerr << " (" << loops << " loops)" << endl; - if (excepted) { + if (excepted || local_test_fails) { + cerr << dynamic_pointer_cast(tl_seeker_debug_str)->str(); test_fails = true; } + tl_seeker_debug_str.reset(); } static