mirror of
https://github.com/Zygo/bees.git
synced 2025-05-17 21:35:45 +02:00
seeker: backward searching template function
This template turns a forward search primitive (e.g. lower_bound, FIEMAP, TREE_SEARCH_V2) into a backward search primitive. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
parent
23c16aa978
commit
24b904f002
163
include/crucible/seeker.h
Normal file
163
include/crucible/seeker.h
Normal file
@ -0,0 +1,163 @@
|
||||
#ifndef _CRUCIBLE_SEEKER_H_
|
||||
#define _CRUCIBLE_SEEKER_H_
|
||||
|
||||
#include "crucible/error.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#if 1
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#define DINIT(__x) __x
|
||||
#define DLOG(__x) do { logs << __x << std::endl; } while (false)
|
||||
#define DOUT(__err) do { __err << logs.str(); } while (false)
|
||||
#else
|
||||
#define DINIT(__x) do {} while (false)
|
||||
#define DLOG(__x) do {} while (false)
|
||||
#define DOUT(__x) do {} while (false)
|
||||
#endif
|
||||
|
||||
namespace crucible {
|
||||
using namespace std;
|
||||
|
||||
// Requirements for Container<Pos> Fetch(Pos lower, Pos upper):
|
||||
// - fetches objects in Pos order, starting from lower (must be >= lower)
|
||||
// - must return upper if present, may or may not return objects after that
|
||||
// - returns a container of Pos objects with begin(), end(), rbegin(), rend()
|
||||
// - container must iterate over objects in Pos order
|
||||
// - uniqueness of Pos objects not required
|
||||
// - should store the underlying data as a side effect
|
||||
//
|
||||
// Requirements for Pos:
|
||||
// - should behave like an unsigned integer type
|
||||
// - must have specializations in numeric_limits<T> for digits, max(), min()
|
||||
// - must support +, -, -=, and related operators
|
||||
// - must support <, <=, ==, and related operators
|
||||
// - must support Pos / 2 (only)
|
||||
//
|
||||
// Requirements for seek_backward:
|
||||
// - calls Fetch to search Pos space near target_pos
|
||||
// - if no key exists with value <= target_pos, returns the minimum Pos value
|
||||
// - returns the highest key value <= target_pos
|
||||
// - returned key value may not be part of most recent Fetch result
|
||||
// - 1 loop iteration when target_pos exists
|
||||
|
||||
template <class Fetch, class Pos = uint64_t>
|
||||
Pos
|
||||
seek_backward(Pos const target_pos, Fetch fetch, Pos min_step = 1, size_t max_loops = numeric_limits<size_t>::max())
|
||||
{
|
||||
DINIT(ostringstream logs);
|
||||
try {
|
||||
static const Pos end_pos = numeric_limits<Pos>::max();
|
||||
// TBH this probably won't work if begin_pos != 0, i.e. any signed type
|
||||
static const Pos begin_pos = numeric_limits<Pos>::min();
|
||||
// Run a binary search looking for the highest key below target_pos.
|
||||
// Initial upper bound of the search is target_pos.
|
||||
// Find initial lower bound by doubling the size of the range until a key below target_pos
|
||||
// is found, or the lower bound reaches the beginning of the search space.
|
||||
// If the lower bound search reaches the beginning of the search space without finding a key,
|
||||
// return the beginning of the search space; otherwise, perform a binary search between
|
||||
// the bounds now established.
|
||||
Pos lower_bound = 0;
|
||||
Pos upper_bound = target_pos;
|
||||
bool found_low = false;
|
||||
Pos probe_pos = target_pos;
|
||||
// We need one loop for each bit of the search space to find the lower bound,
|
||||
// one loop for each bit of the search space to find the upper bound,
|
||||
// and one extra loop to confirm the boundary is correct.
|
||||
for (size_t loop_count = min(numeric_limits<Pos>::digits * size_t(2) + 1, max_loops); loop_count; --loop_count) {
|
||||
DLOG("fetch(probe_pos = " << probe_pos << ", target_pos = " << target_pos << ")");
|
||||
auto result = fetch(probe_pos, target_pos);
|
||||
const Pos low_pos = result.empty() ? end_pos : *result.begin();
|
||||
const Pos high_pos = result.empty() ? end_pos : *result.rbegin();
|
||||
DLOG(" = " << low_pos << ".." << high_pos);
|
||||
// check for correct behavior of the fetch function
|
||||
THROW_CHECK2(out_of_range, high_pos, probe_pos, probe_pos <= high_pos);
|
||||
THROW_CHECK2(out_of_range, low_pos, probe_pos, probe_pos <= low_pos);
|
||||
THROW_CHECK2(out_of_range, low_pos, high_pos, low_pos <= high_pos);
|
||||
if (!found_low) {
|
||||
// if target_pos == end_pos then we will find it in every empty result set,
|
||||
// so in that case we force the lower bound to be lower than end_pos
|
||||
if ((target_pos == end_pos) ? (low_pos < target_pos) : (low_pos <= target_pos)) {
|
||||
// found a lower bound, set the low bound there and switch to binary search
|
||||
found_low = true;
|
||||
lower_bound = low_pos;
|
||||
DLOG("found_low = true, lower_bound = " << lower_bound);
|
||||
} else {
|
||||
// still looking for lower bound
|
||||
// if probe_pos was begin_pos then we can stop with no result
|
||||
if (probe_pos == begin_pos) {
|
||||
DLOG("return: probe_pos == begin_pos " << begin_pos);
|
||||
return begin_pos;
|
||||
}
|
||||
// double the range size, or use the distance between objects found so far
|
||||
THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound);
|
||||
// already checked low_pos <= high_pos above
|
||||
const Pos want_delta = max(upper_bound - probe_pos, min_step);
|
||||
// avoid underflowing the beginning of the search space
|
||||
const Pos have_delta = min(want_delta, probe_pos - begin_pos);
|
||||
THROW_CHECK2(out_of_range, want_delta, have_delta, have_delta <= want_delta);
|
||||
// move probe and try again
|
||||
probe_pos = probe_pos - have_delta;
|
||||
DLOG("probe_pos " << probe_pos << " = probe_pos - have_delta " << have_delta << " (want_delta " << want_delta << ")");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (low_pos <= target_pos && target_pos <= high_pos) {
|
||||
// have keys on either side of target_pos in result
|
||||
// search from the high end until we find the highest key below target
|
||||
for (auto i = result.rbegin(); i != result.rend(); ++i) {
|
||||
// more correctness checking for fetch
|
||||
THROW_CHECK2(out_of_range, *i, probe_pos, probe_pos <= *i);
|
||||
if (*i <= target_pos) {
|
||||
DLOG("return: *i " << *i << " <= target_pos " << target_pos);
|
||||
return *i;
|
||||
}
|
||||
}
|
||||
// if the list is empty then low_pos = high_pos = end_pos
|
||||
// if target_pos = end_pos also, then we will execute the loop
|
||||
// above but not find any matching entries.
|
||||
THROW_CHECK0(runtime_error, result.empty());
|
||||
}
|
||||
if (target_pos <= low_pos) {
|
||||
// results are all too high, so probe_pos..low_pos is too high
|
||||
// lower the high bound to the probe pos
|
||||
upper_bound = probe_pos;
|
||||
DLOG("upper_bound = probe_pos " << probe_pos);
|
||||
}
|
||||
if (high_pos < target_pos) {
|
||||
// results are all too low, so probe_pos..high_pos is too low
|
||||
// raise the low bound to the high_pos
|
||||
DLOG("lower_bound = high_pos " << high_pos);
|
||||
lower_bound = high_pos;
|
||||
}
|
||||
// compute a new probe pos at the middle of the range and try again
|
||||
// we can't have a zero-size range here because we would not have set found_low yet
|
||||
THROW_CHECK2(out_of_range, lower_bound, upper_bound, lower_bound <= upper_bound);
|
||||
const Pos delta = (upper_bound - lower_bound) / 2;
|
||||
probe_pos = lower_bound + delta;
|
||||
if (delta < 1) {
|
||||
// nothing can exist in the range (lower_bound, upper_bound)
|
||||
// and an object is known to exist at lower_bound
|
||||
DLOG("return: probe_pos == lower_bound " << lower_bound);
|
||||
return lower_bound;
|
||||
}
|
||||
THROW_CHECK2(out_of_range, lower_bound, probe_pos, lower_bound <= probe_pos);
|
||||
THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound);
|
||||
DLOG("loop: lower_bound " << lower_bound << ", probe_pos " << probe_pos << ", upper_bound " << upper_bound);
|
||||
}
|
||||
THROW_ERROR(runtime_error, "FIXME: should not reach this line: "
|
||||
"lower_bound..upper_bound " << lower_bound << ".." << upper_bound << ", "
|
||||
"found_low " << found_low);
|
||||
} catch (...) {
|
||||
DOUT(cerr);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // _CRUCIBLE_SEEKER_H_
|
||||
|
@ -7,6 +7,7 @@ PROGRAMS = \
|
||||
path \
|
||||
process \
|
||||
progress \
|
||||
seeker \
|
||||
task \
|
||||
|
||||
all: test
|
||||
|
101
test/seeker.cc
Normal file
101
test/seeker.cc
Normal file
@ -0,0 +1,101 @@
|
||||
#include "tests.h"
|
||||
|
||||
#include "crucible/seeker.h"
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace crucible;
|
||||
|
||||
static
|
||||
set<uint64_t>
|
||||
seeker_finder(const vector<uint64_t> &vec, uint64_t lower, uint64_t upper)
|
||||
{
|
||||
set<uint64_t> s(vec.begin(), vec.end());
|
||||
auto lb = s.lower_bound(lower);
|
||||
auto ub = lb;
|
||||
if (ub != s.end()) ++ub;
|
||||
if (ub != s.end()) ++ub;
|
||||
for (; ub != s.end(); ++ub) {
|
||||
if (*ub > upper) break;
|
||||
}
|
||||
return set<uint64_t>(lb, ub);
|
||||
}
|
||||
|
||||
static bool test_fails = false;
|
||||
|
||||
static
|
||||
void
|
||||
seeker_test(const vector<uint64_t> &vec, size_t target)
|
||||
{
|
||||
cerr << "Find " << target << " in {";
|
||||
for (auto i : vec) {
|
||||
cerr << " " << i;
|
||||
}
|
||||
cerr << " } = ";
|
||||
size_t loops = 0;
|
||||
bool excepted = catch_all([&]() {
|
||||
auto found = seek_backward(target, [&](uint64_t lower, uint64_t upper) {
|
||||
++loops;
|
||||
return seeker_finder(vec, lower, upper);
|
||||
});
|
||||
cerr << found;
|
||||
size_t my_found = 0;
|
||||
for (auto i : vec) {
|
||||
if (i <= target) {
|
||||
my_found = i;
|
||||
}
|
||||
}
|
||||
if (found == my_found) {
|
||||
cerr << " (correct)";
|
||||
} else {
|
||||
cerr << " (INCORRECT - right answer is " << my_found << ")";
|
||||
test_fails = true;
|
||||
}
|
||||
});
|
||||
cerr << " (" << loops << " loops)" << endl;
|
||||
if (excepted) {
|
||||
test_fails = true;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
test_seeker()
|
||||
{
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 3);
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 5);
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 0);
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 1);
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 4);
|
||||
seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 2);
|
||||
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 2);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 25);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 52);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 99);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55, 56 }, 99);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 1);
|
||||
seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 55);
|
||||
seeker_test(vector<uint64_t> { 11 }, 55);
|
||||
seeker_test(vector<uint64_t> { 11 }, 10);
|
||||
seeker_test(vector<uint64_t> { 55 }, 55);
|
||||
seeker_test(vector<uint64_t> { }, 55);
|
||||
seeker_test(vector<uint64_t> { 55 }, numeric_limits<uint64_t>::max());
|
||||
seeker_test(vector<uint64_t> { 55 }, numeric_limits<uint64_t>::max() - 1);
|
||||
seeker_test(vector<uint64_t> { }, numeric_limits<uint64_t>::max());
|
||||
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max());
|
||||
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max() - 1);
|
||||
seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() - 1 }, numeric_limits<uint64_t>::max());
|
||||
}
|
||||
|
||||
|
||||
int main(int, const char **)
|
||||
{
|
||||
|
||||
RUN_A_TEST(test_seeker());
|
||||
|
||||
return test_fails ? EXIT_FAILURE : EXIT_SUCCESS;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user