mirror of
				https://github.com/Zygo/bees.git
				synced 2025-10-31 10:10:34 +01:00 
			
		
		
		
	seeker: backward searching template function
This template turns a forward search primitive (e.g. lower_bound, FIEMAP, TREE_SEARCH_V2) into a backward search primitive. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
		
							
								
								
									
										163
									
								
								include/crucible/seeker.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										163
									
								
								include/crucible/seeker.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,163 @@ | |||||||
|  | #ifndef _CRUCIBLE_SEEKER_H_ | ||||||
|  | #define _CRUCIBLE_SEEKER_H_ | ||||||
|  |  | ||||||
|  | #include "crucible/error.h" | ||||||
|  |  | ||||||
|  | #include <algorithm> | ||||||
|  | #include <limits> | ||||||
|  |  | ||||||
|  | #include <cstdint> | ||||||
|  |  | ||||||
|  | #if 1 | ||||||
|  | #include <iostream> | ||||||
|  | #include <sstream> | ||||||
|  | #define DINIT(__x) __x | ||||||
|  | #define DLOG(__x) do { logs << __x << std::endl; } while (false) | ||||||
|  | #define DOUT(__err) do { __err << logs.str(); } while (false) | ||||||
|  | #else | ||||||
|  | #define DINIT(__x) do {} while (false) | ||||||
|  | #define DLOG(__x) do {} while (false) | ||||||
|  | #define DOUT(__x) do {} while (false) | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | namespace crucible { | ||||||
|  | 	using namespace std; | ||||||
|  |  | ||||||
|  | 	// Requirements for Container<Pos> Fetch(Pos lower, Pos upper): | ||||||
|  | 	// - fetches objects in Pos order, starting from lower (must be >= lower) | ||||||
|  | 	// - must return upper if present, may or may not return objects after that | ||||||
|  | 	// - returns a container of Pos objects with begin(), end(), rbegin(), rend() | ||||||
|  | 	// - container must iterate over objects in Pos order | ||||||
|  | 	// - uniqueness of Pos objects not required | ||||||
|  | 	// - should store the underlying data as a side effect | ||||||
|  | 	// | ||||||
|  | 	// Requirements for Pos: | ||||||
|  | 	// - should behave like an unsigned integer type | ||||||
|  | 	// - must have specializations in numeric_limits<T> for digits, max(), min() | ||||||
|  | 	// - must support +, -, -=, and related operators | ||||||
|  | 	// - must support <, <=, ==, and related operators | ||||||
|  | 	// - must support Pos / 2 (only) | ||||||
|  | 	// | ||||||
|  | 	// Requirements for seek_backward: | ||||||
|  | 	// - calls Fetch to search Pos space near target_pos | ||||||
|  | 	// - if no key exists with value <= target_pos, returns the minimum Pos value | ||||||
|  | 	// - returns the highest key value <= target_pos | ||||||
|  | 	// - returned key value may not be part of most recent Fetch result | ||||||
|  | 	// - 1 loop iteration when target_pos exists | ||||||
|  |  | ||||||
|  | 	template <class Fetch, class Pos = uint64_t> | ||||||
|  | 	Pos | ||||||
|  | 	seek_backward(Pos const target_pos, Fetch fetch, Pos min_step = 1, size_t max_loops = numeric_limits<size_t>::max()) | ||||||
|  | 	{ | ||||||
|  | 		DINIT(ostringstream logs); | ||||||
|  | 		try { | ||||||
|  | 			static const Pos end_pos = numeric_limits<Pos>::max(); | ||||||
|  | 			// TBH this probably won't work if begin_pos != 0, i.e. any signed type | ||||||
|  | 			static const Pos begin_pos = numeric_limits<Pos>::min(); | ||||||
|  | 			// Run a binary search looking for the highest key below target_pos. | ||||||
|  | 			// Initial upper bound of the search is target_pos. | ||||||
|  | 			// Find initial lower bound by doubling the size of the range until a key below target_pos | ||||||
|  | 			// is found, or the lower bound reaches the beginning of the search space. | ||||||
|  | 			// If the lower bound search reaches the beginning of the search space without finding a key, | ||||||
|  | 			// return the beginning of the search space; otherwise, perform a binary search between | ||||||
|  | 			// the bounds now established. | ||||||
|  | 			Pos lower_bound = 0; | ||||||
|  | 			Pos upper_bound = target_pos; | ||||||
|  | 			bool found_low = false; | ||||||
|  | 			Pos probe_pos = target_pos; | ||||||
|  | 			// We need one loop for each bit of the search space to find the lower bound, | ||||||
|  | 			// one loop for each bit of the search space to find the upper bound, | ||||||
|  | 			// and one extra loop to confirm the boundary is correct. | ||||||
|  | 			for (size_t loop_count = min(numeric_limits<Pos>::digits * size_t(2) + 1, max_loops); loop_count; --loop_count) { | ||||||
|  | 				DLOG("fetch(probe_pos = " << probe_pos << ", target_pos = " << target_pos << ")"); | ||||||
|  | 				auto result = fetch(probe_pos, target_pos); | ||||||
|  | 				const Pos low_pos = result.empty() ? end_pos : *result.begin(); | ||||||
|  | 				const Pos high_pos = result.empty() ? end_pos : *result.rbegin(); | ||||||
|  | 				DLOG(" = " << low_pos << ".." << high_pos); | ||||||
|  | 				// check for correct behavior of the fetch function | ||||||
|  | 				THROW_CHECK2(out_of_range, high_pos, probe_pos, probe_pos <= high_pos); | ||||||
|  | 				THROW_CHECK2(out_of_range, low_pos, probe_pos, probe_pos <= low_pos); | ||||||
|  | 				THROW_CHECK2(out_of_range, low_pos, high_pos, low_pos <= high_pos); | ||||||
|  | 				if (!found_low) { | ||||||
|  | 					// if target_pos == end_pos then we will find it in every empty result set, | ||||||
|  | 					// so in that case we force the lower bound to be lower than end_pos | ||||||
|  | 					if ((target_pos == end_pos) ? (low_pos < target_pos) : (low_pos <= target_pos)) { | ||||||
|  | 						// found a lower bound, set the low bound there and switch to binary search | ||||||
|  | 						found_low = true; | ||||||
|  | 						lower_bound = low_pos; | ||||||
|  | 						DLOG("found_low = true, lower_bound = " << lower_bound); | ||||||
|  | 					} else { | ||||||
|  | 						// still looking for lower bound | ||||||
|  | 						// if probe_pos was begin_pos then we can stop with no result | ||||||
|  | 						if (probe_pos == begin_pos) { | ||||||
|  | 							DLOG("return: probe_pos == begin_pos " << begin_pos); | ||||||
|  | 							return begin_pos; | ||||||
|  | 						} | ||||||
|  | 						// double the range size, or use the distance between objects found so far | ||||||
|  | 						THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound); | ||||||
|  | 						// already checked low_pos <= high_pos above | ||||||
|  | 						const Pos want_delta = max(upper_bound - probe_pos, min_step); | ||||||
|  | 						// avoid underflowing the beginning of the search space | ||||||
|  | 						const Pos have_delta = min(want_delta, probe_pos - begin_pos); | ||||||
|  | 						THROW_CHECK2(out_of_range, want_delta, have_delta, have_delta <= want_delta); | ||||||
|  | 						// move probe and try again | ||||||
|  | 						probe_pos = probe_pos - have_delta; | ||||||
|  | 						DLOG("probe_pos " << probe_pos << " = probe_pos - have_delta " << have_delta << " (want_delta " << want_delta << ")"); | ||||||
|  | 						continue; | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 				if (low_pos <= target_pos && target_pos <= high_pos) { | ||||||
|  | 					// have keys on either side of target_pos in result | ||||||
|  | 					// search from the high end until we find the highest key below target | ||||||
|  | 					for (auto i = result.rbegin(); i != result.rend(); ++i) { | ||||||
|  | 						// more correctness checking for fetch | ||||||
|  | 						THROW_CHECK2(out_of_range, *i, probe_pos, probe_pos <= *i); | ||||||
|  | 						if (*i <= target_pos) { | ||||||
|  | 							DLOG("return: *i " << *i << " <= target_pos " << target_pos); | ||||||
|  | 							return *i; | ||||||
|  | 						} | ||||||
|  | 					} | ||||||
|  | 					// if the list is empty then low_pos = high_pos = end_pos | ||||||
|  | 					// if target_pos = end_pos also, then we will execute the loop | ||||||
|  | 					// above but not find any matching entries. | ||||||
|  | 					THROW_CHECK0(runtime_error, result.empty()); | ||||||
|  | 				} | ||||||
|  | 				if (target_pos <= low_pos) { | ||||||
|  | 					// results are all too high, so probe_pos..low_pos is too high | ||||||
|  | 					// lower the high bound to the probe pos | ||||||
|  | 					upper_bound = probe_pos; | ||||||
|  | 					DLOG("upper_bound = probe_pos " << probe_pos); | ||||||
|  | 				} | ||||||
|  | 				if (high_pos < target_pos) { | ||||||
|  | 					// results are all too low, so probe_pos..high_pos is too low | ||||||
|  | 					// raise the low bound to the high_pos | ||||||
|  | 					DLOG("lower_bound = high_pos " << high_pos); | ||||||
|  | 					lower_bound = high_pos; | ||||||
|  | 				} | ||||||
|  | 				// compute a new probe pos at the middle of the range and try again | ||||||
|  | 				// we can't have a zero-size range here because we would not have set found_low yet | ||||||
|  | 				THROW_CHECK2(out_of_range, lower_bound, upper_bound, lower_bound <= upper_bound); | ||||||
|  | 				const Pos delta = (upper_bound - lower_bound) / 2; | ||||||
|  | 				probe_pos = lower_bound + delta; | ||||||
|  | 				if (delta < 1) { | ||||||
|  | 					// nothing can exist in the range (lower_bound, upper_bound) | ||||||
|  | 					// and an object is known to exist at lower_bound | ||||||
|  | 					DLOG("return: probe_pos == lower_bound " << lower_bound); | ||||||
|  | 					return lower_bound; | ||||||
|  | 				} | ||||||
|  | 				THROW_CHECK2(out_of_range, lower_bound, probe_pos, lower_bound <= probe_pos); | ||||||
|  | 				THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound); | ||||||
|  | 				DLOG("loop: lower_bound " << lower_bound << ", probe_pos " << probe_pos << ", upper_bound " << upper_bound); | ||||||
|  | 			} | ||||||
|  | 			THROW_ERROR(runtime_error, "FIXME: should not reach this line: " | ||||||
|  | 				"lower_bound..upper_bound " << lower_bound << ".." << upper_bound << ", " | ||||||
|  | 				"found_low " << found_low); | ||||||
|  | 		} catch (...) { | ||||||
|  | 			DOUT(cerr); | ||||||
|  | 			throw; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #endif // _CRUCIBLE_SEEKER_H_ | ||||||
|  |  | ||||||
| @@ -7,6 +7,7 @@ PROGRAMS = \ | |||||||
| 	path \ | 	path \ | ||||||
| 	process \ | 	process \ | ||||||
| 	progress \ | 	progress \ | ||||||
|  | 	seeker \ | ||||||
| 	task \ | 	task \ | ||||||
|  |  | ||||||
| all: test | all: test | ||||||
|   | |||||||
							
								
								
									
										101
									
								
								test/seeker.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								test/seeker.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,101 @@ | |||||||
|  | #include "tests.h" | ||||||
|  |  | ||||||
|  | #include "crucible/seeker.h" | ||||||
|  |  | ||||||
|  | #include <set> | ||||||
|  | #include <vector> | ||||||
|  |  | ||||||
|  | #include <unistd.h> | ||||||
|  |  | ||||||
|  | using namespace crucible; | ||||||
|  |  | ||||||
|  | static | ||||||
|  | set<uint64_t> | ||||||
|  | seeker_finder(const vector<uint64_t> &vec, uint64_t lower, uint64_t upper) | ||||||
|  | { | ||||||
|  | 	set<uint64_t> s(vec.begin(), vec.end()); | ||||||
|  | 	auto lb = s.lower_bound(lower); | ||||||
|  | 	auto ub = lb; | ||||||
|  | 	if (ub != s.end()) ++ub; | ||||||
|  | 	if (ub != s.end()) ++ub; | ||||||
|  | 	for (; ub != s.end(); ++ub) { | ||||||
|  | 		if (*ub > upper) break; | ||||||
|  | 	} | ||||||
|  | 	return set<uint64_t>(lb, ub); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static bool test_fails = false; | ||||||
|  |  | ||||||
|  | static | ||||||
|  | void | ||||||
|  | seeker_test(const vector<uint64_t> &vec, size_t target) | ||||||
|  | { | ||||||
|  | 	cerr << "Find " << target << " in {"; | ||||||
|  | 	for (auto i : vec) { | ||||||
|  | 		cerr << " " << i; | ||||||
|  | 	} | ||||||
|  | 	cerr << " } = "; | ||||||
|  | 	size_t loops = 0; | ||||||
|  | 	bool excepted = catch_all([&]() { | ||||||
|  | 		auto found = seek_backward(target, [&](uint64_t lower, uint64_t upper) { | ||||||
|  | 			++loops; | ||||||
|  | 			return seeker_finder(vec, lower, upper); | ||||||
|  | 		}); | ||||||
|  | 		cerr << found; | ||||||
|  | 		size_t my_found = 0; | ||||||
|  | 		for (auto i : vec) { | ||||||
|  | 			if (i <= target) { | ||||||
|  | 				my_found = i; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		if (found == my_found) { | ||||||
|  | 			cerr << " (correct)"; | ||||||
|  | 		} else { | ||||||
|  | 			cerr << " (INCORRECT - right answer is " << my_found << ")"; | ||||||
|  | 			test_fails = true; | ||||||
|  | 		} | ||||||
|  | 	}); | ||||||
|  | 	cerr << " (" << loops << " loops)" << endl; | ||||||
|  | 	if (excepted) { | ||||||
|  | 		test_fails = true; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static | ||||||
|  | void | ||||||
|  | test_seeker() | ||||||
|  | { | ||||||
|  | 	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 3); | ||||||
|  | 	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 5); | ||||||
|  | 	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 0); | ||||||
|  | 	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 1); | ||||||
|  | 	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 4); | ||||||
|  | 	seeker_test(vector<uint64_t> { 0, 1, 2, 3, 4, 5 }, 2); | ||||||
|  |  | ||||||
|  | 	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 2); | ||||||
|  | 	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 25); | ||||||
|  | 	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 52); | ||||||
|  | 	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 99); | ||||||
|  | 	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55, 56 }, 99); | ||||||
|  | 	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 1); | ||||||
|  | 	seeker_test(vector<uint64_t> { 11, 22, 33, 44, 55 }, 55); | ||||||
|  | 	seeker_test(vector<uint64_t> { 11 }, 55); | ||||||
|  | 	seeker_test(vector<uint64_t> { 11 }, 10); | ||||||
|  | 	seeker_test(vector<uint64_t> { 55 }, 55); | ||||||
|  | 	seeker_test(vector<uint64_t> { }, 55); | ||||||
|  | 	seeker_test(vector<uint64_t> { 55 }, numeric_limits<uint64_t>::max()); | ||||||
|  | 	seeker_test(vector<uint64_t> { 55 }, numeric_limits<uint64_t>::max() - 1); | ||||||
|  | 	seeker_test(vector<uint64_t> { }, numeric_limits<uint64_t>::max()); | ||||||
|  | 	seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max()); | ||||||
|  | 	seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() }, numeric_limits<uint64_t>::max() - 1); | ||||||
|  | 	seeker_test(vector<uint64_t> { 0, numeric_limits<uint64_t>::max() - 1 }, numeric_limits<uint64_t>::max()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | int main(int, const char **) | ||||||
|  | { | ||||||
|  |  | ||||||
|  | 	RUN_A_TEST(test_seeker()); | ||||||
|  |  | ||||||
|  | 	return test_fails ? EXIT_FAILURE : EXIT_SUCCESS; | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user