mirror of
https://github.com/Zygo/bees.git
synced 2025-05-17 21:35:45 +02:00
readahead: inject some sanity at the foundation of an insane architecture
This solves some of the worst problems with bees reads: 1. The kernel readahead doesn't work. More precisely, it's much better adapted for a very different use case: a single thread alternating between reading a file sequentially and processing the data that was read. bees has multiple threads which compete for access to IO and then issue reads in random order immediately after the call to readahead. The kernel uses idle ioprio scheduling for the readaheads, so the readaheads get preempted by the random reads, or cancels the readaheads because the data access pattern isn't sequential after the readahead was issued. 2. Seeking drives perform terribly with multiple competing readers, especially with btrfs striped profiles where the iops are broken into tiny stripe-sized pieces. At one point I intended to read the btrfs device map and figure out which devices can be read in parallel, but to make that useful, the user needs to have an array with multiple drives in single profile, or 4+ drives in raid1 profile. In all other cases, the elaborate calculations always return the same result: there can be only one reader at a time. This commit fixes both problems: 1. Don't use the kernel readahead. Use normal reads into a dummy buffer instead. 2. Allow only one thread to readahead at any time. Once the read is completed, the data is in the page cache, and all the random-order small reads that bees does will hit the page cache, not a spinning disk. In some cases we need to read two things close together, so add a `bees_readahead_pair` which holds one lock across both reads. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
parent
cdcdf8e218
commit
8d08a3c06f
@ -349,8 +349,8 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
|
||||
BEESTRACE("e_second " << e_second);
|
||||
|
||||
// Preread entire extent
|
||||
bees_readahead(second.fd(), e_second.begin(), e_second.size());
|
||||
bees_readahead(first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size());
|
||||
bees_readahead_pair(second.fd(), e_second.begin(), e_second.size(),
|
||||
first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size());
|
||||
|
||||
auto hash_table = ctx->hash_table();
|
||||
|
||||
|
29
src/bees.cc
29
src/bees.cc
@ -214,8 +214,9 @@ BeesTooLong::operator=(const func_type &f)
|
||||
return *this;
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
bees_readahead(int const fd, const off_t offset, const size_t size)
|
||||
bees_readahead_nolock(int const fd, const off_t offset, const size_t size)
|
||||
{
|
||||
Timer readahead_timer;
|
||||
BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||
@ -225,10 +226,8 @@ bees_readahead(int const fd, const off_t offset, const size_t size)
|
||||
DIE_IF_NON_ZERO(readahead(fd, offset, size));
|
||||
#else
|
||||
// Make sure this data is in page cache by brute force
|
||||
// This isn't necessary and it might even be slower,
|
||||
// but the btrfs kernel code does readahead with lower ioprio
|
||||
// and might discard the readahead request entirely,
|
||||
// so it's maybe, *maybe*, worth doing both.
|
||||
// The btrfs kernel code does readahead with lower ioprio
|
||||
// and might discard the readahead request entirely.
|
||||
BEESNOTE("emulating readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||
auto working_size = size;
|
||||
auto working_offset = offset;
|
||||
@ -249,6 +248,26 @@ bees_readahead(int const fd, const off_t offset, const size_t size)
|
||||
BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
|
||||
}
|
||||
|
||||
void
|
||||
bees_readahead_pair(int fd, off_t offset, size_t size, int fd2, off_t offset2, size_t size2)
|
||||
{
|
||||
BEESNOTE("waiting to readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size)
|
||||
<< ", " << name_fd(fd2) << " offset " << to_hex(offset2) << " len " << pretty(size2));
|
||||
static mutex only_one;
|
||||
unique_lock<mutex> m_lock(only_one);
|
||||
bees_readahead_nolock(fd, offset, size);
|
||||
bees_readahead_nolock(fd2, offset2, size2);
|
||||
}
|
||||
|
||||
void
|
||||
bees_readahead(int const fd, const off_t offset, const size_t size)
|
||||
{
|
||||
BEESNOTE("waiting to readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||
static mutex only_one;
|
||||
unique_lock<mutex> m_lock(only_one);
|
||||
bees_readahead_nolock(fd, offset, size);
|
||||
}
|
||||
|
||||
void
|
||||
bees_unreadahead(int const fd, off_t offset, size_t size)
|
||||
{
|
||||
|
@ -868,6 +868,7 @@ extern const char *BEES_VERSION;
|
||||
extern thread_local default_random_engine bees_generator;
|
||||
string pretty(double d);
|
||||
void bees_readahead(int fd, off_t offset, size_t size);
|
||||
void bees_readahead_pair(int fd, off_t offset, size_t size, int fd2, off_t offset2, size_t size2);
|
||||
void bees_unreadahead(int fd, off_t offset, size_t size);
|
||||
string format_time(time_t t);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user