1
0
mirror of https://github.com/Zygo/bees.git synced 2025-10-25 00:07:37 +02:00

readahead: inject some sanity at the foundation of an insane architecture

This solves some of the worst problems with bees reads:

1.  The kernel readahead doesn't work.  More precisely, it's much better
adapted for a very different use case:  a single thread alternating
between reading a file sequentially and processing the data that was read.
bees has multiple threads which compete for access to IO and then issue
reads in random order immediately after the call to readahead.  The kernel
uses idle ioprio scheduling for the readaheads, so the readaheads get
preempted by the random reads, or cancels the readaheads because the
data access pattern isn't sequential after the readahead was issued.

2.  Seeking drives perform terribly with multiple competing readers,
especially with btrfs striped profiles where the iops are broken into
tiny stripe-sized pieces.  At one point I intended to read the btrfs
device map and figure out which devices can be read in parallel, but to
make that useful, the user needs to have an array with multiple drives
in single profile, or 4+ drives in raid1 profile.  In all other cases,
the elaborate calculations always return the same result:  there can be
only one reader at a time.

This commit fixes both problems:

1.  Don't use the kernel readahead.  Use normal reads into a dummy
buffer instead.

2.  Allow only one thread to readahead at any time.  Once the read is
completed, the data is in the page cache, and all the random-order small
reads that bees does will hit the page cache, not a spinning disk.
In some cases we need to read two things close together, so add a
`bees_readahead_pair` which holds one lock across both reads.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
Zygo Blaxell
2024-11-15 14:35:01 -05:00
parent cdcdf8e218
commit 8d08a3c06f
3 changed files with 27 additions and 7 deletions

View File

@@ -349,8 +349,8 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
BEESTRACE("e_second " << e_second);
// Preread entire extent
bees_readahead(second.fd(), e_second.begin(), e_second.size());
bees_readahead(first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size());
bees_readahead_pair(second.fd(), e_second.begin(), e_second.size(),
first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size());
auto hash_table = ctx->hash_table();

View File

@@ -214,8 +214,9 @@ BeesTooLong::operator=(const func_type &f)
return *this;
}
static
void
bees_readahead(int const fd, const off_t offset, const size_t size)
bees_readahead_nolock(int const fd, const off_t offset, const size_t size)
{
Timer readahead_timer;
BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
@@ -225,10 +226,8 @@ bees_readahead(int const fd, const off_t offset, const size_t size)
DIE_IF_NON_ZERO(readahead(fd, offset, size));
#else
// Make sure this data is in page cache by brute force
// This isn't necessary and it might even be slower,
// but the btrfs kernel code does readahead with lower ioprio
// and might discard the readahead request entirely,
// so it's maybe, *maybe*, worth doing both.
// The btrfs kernel code does readahead with lower ioprio
// and might discard the readahead request entirely.
BEESNOTE("emulating readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
auto working_size = size;
auto working_offset = offset;
@@ -249,6 +248,26 @@ bees_readahead(int const fd, const off_t offset, const size_t size)
BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
}
void
bees_readahead_pair(int fd, off_t offset, size_t size, int fd2, off_t offset2, size_t size2)
{
BEESNOTE("waiting to readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size)
<< ", " << name_fd(fd2) << " offset " << to_hex(offset2) << " len " << pretty(size2));
static mutex only_one;
unique_lock<mutex> m_lock(only_one);
bees_readahead_nolock(fd, offset, size);
bees_readahead_nolock(fd2, offset2, size2);
}
void
bees_readahead(int const fd, const off_t offset, const size_t size)
{
BEESNOTE("waiting to readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
static mutex only_one;
unique_lock<mutex> m_lock(only_one);
bees_readahead_nolock(fd, offset, size);
}
void
bees_unreadahead(int const fd, off_t offset, size_t size)
{

View File

@@ -868,6 +868,7 @@ extern const char *BEES_VERSION;
extern thread_local default_random_engine bees_generator;
string pretty(double d);
void bees_readahead(int fd, off_t offset, size_t size);
void bees_readahead_pair(int fd, off_t offset, size_t size, int fd2, off_t offset2, size_t size2);
void bees_unreadahead(int fd, off_t offset, size_t size);
string format_time(time_t t);