mirror of
https://github.com/Zygo/bees.git
synced 2025-05-17 21:35:45 +02:00
hash: create beeshash.dat if it does not exist
BeesHashTable can now create a beeshash.dat if the file does not already exist. Currently the default size is one hash table extent (16MB) and there's no way to change that (yet), so users should still create their own hash tables for now. The opening of the hash table is deferred (slightly) in preparation for hash table resizing. No doc as the feature is currently unfinished.
This commit is contained in:
parent
d58de9b76d
commit
6fa8de660b
@ -579,7 +579,37 @@ BeesHashTable::set_shared(bool shared)
|
||||
m_shared = shared;
|
||||
}
|
||||
|
||||
BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename) :
|
||||
void
|
||||
BeesHashTable::open_file()
|
||||
{
|
||||
// OK open hash table
|
||||
BEESNOTE("opening hash table '" << m_filename << "' target size " << m_size << " (" << pretty(m_size) << ")");
|
||||
|
||||
// Try to open existing hash table
|
||||
Fd new_fd = openat(m_ctx->home_fd(), m_filename.c_str(), FLAGS_OPEN_FILE_RW, 0700);
|
||||
|
||||
// If that doesn't work, try to make a new one
|
||||
if (!new_fd) {
|
||||
string tmp_filename = m_filename + ".tmp";
|
||||
BEESLOGNOTE("creating new hash table '" << tmp_filename << "'");
|
||||
unlinkat(m_ctx->home_fd(), tmp_filename.c_str(), 0);
|
||||
new_fd = openat_or_die(m_ctx->home_fd(), tmp_filename, FLAGS_CREATE_FILE, 0700);
|
||||
BEESLOGNOTE("truncating new hash table '" << tmp_filename << "' size " << m_size << " (" << pretty(m_size) << ")");
|
||||
ftruncate_or_die(new_fd, m_size);
|
||||
BEESLOGNOTE("truncating new hash table '" << tmp_filename << "' -> '" << m_filename << "'");
|
||||
renameat_or_die(m_ctx->home_fd(), tmp_filename, m_ctx->home_fd(), m_filename);
|
||||
}
|
||||
|
||||
Stat st(new_fd);
|
||||
off_t new_size = st.st_size;
|
||||
|
||||
THROW_CHECK1(invalid_argument, new_size, new_size > 0);
|
||||
THROW_CHECK1(invalid_argument, new_size, (new_size % BLOCK_SIZE_HASHTAB_EXTENT) == 0);
|
||||
m_size = new_size;
|
||||
m_fd = new_fd;
|
||||
}
|
||||
|
||||
BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t size) :
|
||||
m_ctx(ctx),
|
||||
m_size(0),
|
||||
m_void_ptr(nullptr),
|
||||
@ -592,16 +622,7 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename) :
|
||||
m_prefetch_rate_limit(BEES_FLUSH_RATE),
|
||||
m_stats_file(m_ctx->home_fd(), "beesstats.txt")
|
||||
{
|
||||
BEESNOTE("opening hash table " << filename);
|
||||
|
||||
m_fd = openat_or_die(m_ctx->home_fd(), filename, FLAGS_OPEN_FILE_RW, 0700);
|
||||
Stat st(m_fd);
|
||||
m_size = st.st_size;
|
||||
|
||||
BEESTRACE("hash table size " << m_size);
|
||||
BEESTRACE("hash table bucket size " << BLOCK_SIZE_HASHTAB_BUCKET);
|
||||
BEESTRACE("hash table extent size " << BLOCK_SIZE_HASHTAB_EXTENT);
|
||||
|
||||
// Sanity checks to protect the implementation from its weaknesses
|
||||
THROW_CHECK2(invalid_argument, BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_EXTENT, (BLOCK_SIZE_HASHTAB_EXTENT % BLOCK_SIZE_HASHTAB_BUCKET) == 0);
|
||||
|
||||
// Does the union work?
|
||||
@ -616,6 +637,16 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename) :
|
||||
THROW_CHECK2(runtime_error, sizeof(Extent), BLOCK_SIZE_HASHTAB_EXTENT, BLOCK_SIZE_HASHTAB_EXTENT == sizeof(Extent));
|
||||
THROW_CHECK2(runtime_error, sizeof(Extent::p_byte), BLOCK_SIZE_HASHTAB_EXTENT, BLOCK_SIZE_HASHTAB_EXTENT == sizeof(Extent::p_byte));
|
||||
|
||||
m_filename = filename;
|
||||
m_size = size;
|
||||
open_file();
|
||||
|
||||
// Now we know size we can compute stuff
|
||||
|
||||
BEESTRACE("hash table size " << m_size);
|
||||
BEESTRACE("hash table bucket size " << BLOCK_SIZE_HASHTAB_BUCKET);
|
||||
BEESTRACE("hash table extent size " << BLOCK_SIZE_HASHTAB_EXTENT);
|
||||
|
||||
BEESLOG("opened hash table filename '" << filename << "' length " << m_size);
|
||||
m_buckets = m_size / BLOCK_SIZE_HASHTAB_BUCKET;
|
||||
m_cells = m_buckets * c_cells_per_bucket;
|
||||
@ -631,7 +662,7 @@ BeesHashTable::BeesHashTable(shared_ptr<BeesContext> ctx, string filename) :
|
||||
}
|
||||
|
||||
if (!m_cell_ptr) {
|
||||
THROW_ERROR(runtime_error, "unable to mmap " << filename);
|
||||
THROW_ERRNO("unable to mmap " << filename);
|
||||
}
|
||||
|
||||
if (!using_shared_map()) {
|
||||
|
@ -411,7 +411,7 @@ public:
|
||||
uint8_t p_byte[BLOCK_SIZE_HASHTAB_EXTENT];
|
||||
} __attribute__((packed));
|
||||
|
||||
BeesHashTable(shared_ptr<BeesContext> ctx, string filename);
|
||||
BeesHashTable(shared_ptr<BeesContext> ctx, string filename, off_t size = BLOCK_SIZE_HASHTAB_EXTENT);
|
||||
~BeesHashTable();
|
||||
|
||||
vector<Cell> find_cell(HashType hash);
|
||||
@ -458,6 +458,7 @@ private:
|
||||
|
||||
DefaultBool m_shared;
|
||||
|
||||
void open_file();
|
||||
void writeback_loop();
|
||||
void prefetch_loop();
|
||||
void try_mmap_flags(int flags);
|
||||
|
Loading…
x
Reference in New Issue
Block a user