mirror of
https://github.com/Zygo/bees.git
synced 2025-05-17 21:35:45 +02:00
hash: use POSIX_FADV_WILLNEED and POSIX_FADV_DONTNEED
The hash table is one of the few cases in bees where a non-trivial amount of page cache memory will be used in a predictable way, so we can advise the kernel about our IO demands in advance. Use WILLNEED to prefetch hash table pages at startup. Use DONTNEED to trigger writeback on hash table pages at shutdown. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
parent
97d70ef4c5
commit
a353d8cc6e
@ -115,8 +115,9 @@ BeesHashTable::flush_dirty_extent(uint64_t extent_index)
|
|||||||
bool wrote_extent = false;
|
bool wrote_extent = false;
|
||||||
|
|
||||||
catch_all([&]() {
|
catch_all([&]() {
|
||||||
uint8_t *dirty_extent = m_extent_ptr[extent_index].p_byte;
|
uint8_t *const dirty_extent = m_extent_ptr[extent_index].p_byte;
|
||||||
uint8_t *dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte;
|
uint8_t *const dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte;
|
||||||
|
const size_t dirty_extent_offset = dirty_extent - m_byte_ptr;
|
||||||
THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr);
|
THROW_CHECK1(out_of_range, dirty_extent, dirty_extent >= m_byte_ptr);
|
||||||
THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end);
|
THROW_CHECK1(out_of_range, dirty_extent_end, dirty_extent_end <= m_byte_ptr_end);
|
||||||
THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT);
|
THROW_CHECK2(out_of_range, dirty_extent_end, dirty_extent, dirty_extent_end - dirty_extent == BLOCK_SIZE_HASHTAB_EXTENT);
|
||||||
@ -131,9 +132,13 @@ BeesHashTable::flush_dirty_extent(uint64_t extent_index)
|
|||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
// Write the extent (or not)
|
// Write the extent (or not)
|
||||||
pwrite_or_die(m_fd, extent_copy, dirty_extent - m_byte_ptr);
|
pwrite_or_die(m_fd, extent_copy, dirty_extent_offset);
|
||||||
BEESCOUNT(hash_extent_out);
|
BEESCOUNT(hash_extent_out);
|
||||||
|
|
||||||
|
// Nope, this causes a _dramatic_ loss of performance.
|
||||||
|
// const size_t dirty_extent_size = dirty_extent_end - dirty_extent;
|
||||||
|
// bees_unreadahead(m_fd, dirty_extent_offset, dirty_extent_size);
|
||||||
|
|
||||||
wrote_extent = true;
|
wrote_extent = true;
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -155,6 +160,8 @@ BeesHashTable::flush_dirty_extents(bool slowly)
|
|||||||
unique_lock<mutex> lock(m_stop_mutex);
|
unique_lock<mutex> lock(m_stop_mutex);
|
||||||
if (m_stop_requested) {
|
if (m_stop_requested) {
|
||||||
BEESLOGDEBUG("Stop requested in hash table flush_dirty_extents");
|
BEESLOGDEBUG("Stop requested in hash table flush_dirty_extents");
|
||||||
|
// This function is called by another thread with !slowly,
|
||||||
|
// so we just get out of the way here.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
m_stop_condvar.wait_for(lock, sleep_time);
|
m_stop_condvar.wait_for(lock, sleep_time);
|
||||||
@ -197,6 +204,11 @@ BeesHashTable::writeback_loop()
|
|||||||
m_dirty_condvar.wait(lock);
|
m_dirty_condvar.wait(lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
catch_all([&]() {
|
||||||
|
// trigger writeback on our way out
|
||||||
|
BEESTOOLONG("unreadahead hash table size " << pretty(m_size));
|
||||||
|
bees_unreadahead(m_fd, 0, m_size);
|
||||||
|
});
|
||||||
BEESLOGDEBUG("Exited hash table writeback_loop");
|
BEESLOGDEBUG("Exited hash table writeback_loop");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -225,6 +237,7 @@ BeesHashTable::prefetch_loop()
|
|||||||
size_t toxic_count = 0;
|
size_t toxic_count = 0;
|
||||||
size_t unaligned_eof_count = 0;
|
size_t unaligned_eof_count = 0;
|
||||||
|
|
||||||
|
m_prefetch_running = true;
|
||||||
for (uint64_t ext = 0; ext < m_extents && !m_stop_requested; ++ext) {
|
for (uint64_t ext = 0; ext < m_extents && !m_stop_requested; ++ext) {
|
||||||
BEESNOTE("prefetching hash table extent #" << ext << " of " << m_extents);
|
BEESNOTE("prefetching hash table extent #" << ext << " of " << m_extents);
|
||||||
catch_all([&]() {
|
catch_all([&]() {
|
||||||
@ -266,6 +279,7 @@ BeesHashTable::prefetch_loop()
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
m_prefetch_running = false;
|
||||||
|
|
||||||
BEESNOTE("calculating hash table statistics");
|
BEESNOTE("calculating hash table statistics");
|
||||||
|
|
||||||
@ -394,18 +408,29 @@ BeesHashTable::fetch_missing_extent_by_index(uint64_t extent_index)
|
|||||||
BEESTRACE("Fetching hash extent #" << extent_index << " of " << m_extents << " extents");
|
BEESTRACE("Fetching hash extent #" << extent_index << " of " << m_extents << " extents");
|
||||||
BEESTOOLONG("Fetching hash extent #" << extent_index << " of " << m_extents << " extents");
|
BEESTOOLONG("Fetching hash extent #" << extent_index << " of " << m_extents << " extents");
|
||||||
|
|
||||||
uint8_t *dirty_extent = m_extent_ptr[extent_index].p_byte;
|
uint8_t *const dirty_extent = m_extent_ptr[extent_index].p_byte;
|
||||||
uint8_t *dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte;
|
uint8_t *const dirty_extent_end = m_extent_ptr[extent_index + 1].p_byte;
|
||||||
|
const size_t dirty_extent_size = dirty_extent_end - dirty_extent;
|
||||||
|
const size_t dirty_extent_offset = dirty_extent - m_byte_ptr;
|
||||||
|
|
||||||
// If the read fails don't retry, just go with whatever data we have
|
// If the read fails don't retry, just go with whatever data we have
|
||||||
m_extent_metadata.at(extent_index).m_missing = false;
|
m_extent_metadata.at(extent_index).m_missing = false;
|
||||||
|
|
||||||
catch_all([&]() {
|
catch_all([&]() {
|
||||||
BEESTOOLONG("pread(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
|
BEESTOOLONG("pread(fd " << m_fd << " '" << name_fd(m_fd)<< "', length " << to_hex(dirty_extent_end - dirty_extent) << ", offset " << to_hex(dirty_extent - m_byte_ptr) << ")");
|
||||||
pread_or_die(m_fd, dirty_extent, dirty_extent_end - dirty_extent, dirty_extent - m_byte_ptr);
|
pread_or_die(m_fd, dirty_extent, dirty_extent_size, dirty_extent_offset);
|
||||||
|
|
||||||
// Only count extents successfully read
|
// Only count extents successfully read
|
||||||
BEESCOUNT(hash_extent_in);
|
BEESCOUNT(hash_extent_in);
|
||||||
|
|
||||||
|
// Won't need that again
|
||||||
|
bees_unreadahead(m_fd, dirty_extent_offset, dirty_extent_size);
|
||||||
|
|
||||||
|
// If we are in prefetch, give the kernel a hint about the next extent
|
||||||
|
if (m_prefetch_running) {
|
||||||
|
// XXX: don't call this if bees_readahead is implemented by pread()
|
||||||
|
bees_readahead(m_fd, dirty_extent_offset + dirty_extent_size, dirty_extent_size);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -753,10 +778,12 @@ BeesHashTable::~BeesHashTable()
|
|||||||
// into the same trap (and maybe throw an exception) here.
|
// into the same trap (and maybe throw an exception) here.
|
||||||
// flush_dirty_extents(false);
|
// flush_dirty_extents(false);
|
||||||
catch_all([&]() {
|
catch_all([&]() {
|
||||||
|
// drop the memory mapping
|
||||||
|
BEESTOOLONG("unmap handle table size " << pretty(m_size));
|
||||||
DIE_IF_NON_ZERO(munmap(m_cell_ptr, m_size));
|
DIE_IF_NON_ZERO(munmap(m_cell_ptr, m_size));
|
||||||
m_cell_ptr = nullptr;
|
|
||||||
m_size = 0;
|
|
||||||
});
|
});
|
||||||
|
m_cell_ptr = nullptr;
|
||||||
|
m_size = 0;
|
||||||
}
|
}
|
||||||
BEESLOGDEBUG("BeesHashTable destroyed");
|
BEESLOGDEBUG("BeesHashTable destroyed");
|
||||||
}
|
}
|
||||||
|
10
src/bees.cc
10
src/bees.cc
@ -252,6 +252,16 @@ bees_readahead(int const fd, off_t offset, size_t size)
|
|||||||
BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
|
BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
bees_unreadahead(int const fd, off_t offset, size_t size)
|
||||||
|
{
|
||||||
|
Timer unreadahead_timer;
|
||||||
|
BEESNOTE("unreadahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
|
BEESTOOLONG("unreadahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
|
||||||
|
DIE_IF_NON_ZERO(posix_fadvise(fd, offset, size, POSIX_FADV_DONTNEED));
|
||||||
|
BEESCOUNTADD(readahead_unread_ms, unreadahead_timer.age() * 1000);
|
||||||
|
}
|
||||||
|
|
||||||
BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
|
BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
|
||||||
m_dir_fd(dir_fd),
|
m_dir_fd(dir_fd),
|
||||||
m_name(name),
|
m_name(name),
|
||||||
|
@ -462,6 +462,9 @@ private:
|
|||||||
RateLimiter m_flush_rate_limit;
|
RateLimiter m_flush_rate_limit;
|
||||||
BeesStringFile m_stats_file;
|
BeesStringFile m_stats_file;
|
||||||
|
|
||||||
|
// Prefetch readahead hint
|
||||||
|
bool m_prefetch_running = false;
|
||||||
|
|
||||||
// Mutex/condvar for the writeback thread
|
// Mutex/condvar for the writeback thread
|
||||||
mutex m_dirty_mutex;
|
mutex m_dirty_mutex;
|
||||||
condition_variable m_dirty_condvar;
|
condition_variable m_dirty_condvar;
|
||||||
@ -887,6 +890,7 @@ extern const char *BEES_VERSION;
|
|||||||
string pretty(double d);
|
string pretty(double d);
|
||||||
void bees_sync(int fd);
|
void bees_sync(int fd);
|
||||||
void bees_readahead(int fd, off_t offset, size_t size);
|
void bees_readahead(int fd, off_t offset, size_t size);
|
||||||
|
void bees_unreadahead(int fd, off_t offset, size_t size);
|
||||||
string format_time(time_t t);
|
string format_time(time_t t);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user