From 6fa8de660b9850640e1213791020e82a9d170af9 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Thu, 1 Dec 2016 23:22:01 -0500 Subject: [PATCH] hash: create beeshash.dat if it does not exist BeesHashTable can now create a beeshash.dat if the file does not already exist. Currently the default size is one hash table extent (16MB) and there's no way to change that (yet), so users should still create their own hash tables for now. The opening of the hash table is deferred (slightly) in preparation for hash table resizing. No doc as the feature is currently unfinished. --- src/bees-hash.cc | 55 +++++++++++++++++++++++++++++++++++++----------- src/bees.h | 3 ++- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/src/bees-hash.cc b/src/bees-hash.cc index 2fa302b..cec1d3c 100644 --- a/src/bees-hash.cc +++ b/src/bees-hash.cc @@ -579,7 +579,37 @@ BeesHashTable::set_shared(bool shared) m_shared = shared; } -BeesHashTable::BeesHashTable(shared_ptr ctx, string filename) : +void +BeesHashTable::open_file() +{ + // OK open hash table + BEESNOTE("opening hash table '" << m_filename << "' target size " << m_size << " (" << pretty(m_size) << ")"); + + // Try to open existing hash table + Fd new_fd = openat(m_ctx->home_fd(), m_filename.c_str(), FLAGS_OPEN_FILE_RW, 0700); + + // If that doesn't work, try to make a new one + if (!new_fd) { + string tmp_filename = m_filename + ".tmp"; + BEESLOGNOTE("creating new hash table '" << tmp_filename << "'"); + unlinkat(m_ctx->home_fd(), tmp_filename.c_str(), 0); + new_fd = openat_or_die(m_ctx->home_fd(), tmp_filename, FLAGS_CREATE_FILE, 0700); + BEESLOGNOTE("truncating new hash table '" << tmp_filename << "' size " << m_size << " (" << pretty(m_size) << ")"); + ftruncate_or_die(new_fd, m_size); + BEESLOGNOTE("truncating new hash table '" << tmp_filename << "' -> '" << m_filename << "'"); + renameat_or_die(m_ctx->home_fd(), tmp_filename, m_ctx->home_fd(), m_filename); + } + + Stat st(new_fd); + off_t new_size = st.st_size; + + THROW_CHECK1(invalid_argument, new_size, new_size > 0); + THROW_CHECK1(invalid_argument, new_size, (new_size % BLOCK_SIZE_HASHTAB_EXTENT) == 0); + m_size = new_size; + m_fd = new_fd; +} + +BeesHashTable::BeesHashTable(shared_ptr ctx, string filename, off_t size) : m_ctx(ctx), m_size(0), m_void_ptr(nullptr), @@ -592,16 +622,7 @@ BeesHashTable::BeesHashTable(shared_ptr ctx, string filename) : m_prefetch_rate_limit(BEES_FLUSH_RATE), m_stats_file(m_ctx->home_fd(), "beesstats.txt") { - BEESNOTE("opening hash table " << filename); - - m_fd = openat_or_die(m_ctx->home_fd(), filename, FLAGS_OPEN_FILE_RW, 0700); - Stat st(m_fd); - m_size = st.st_size; - - BEESTRACE("hash table size " << m_size); - BEESTRACE("hash table bucket size " << BLOCK_SIZE_HASHTAB_BUCKET); - BEESTRACE("hash table extent size " << BLOCK_SIZE_HASHTAB_EXTENT); - + // Sanity checks to protect the implementation from its weaknesses THROW_CHECK2(invalid_argument, BLOCK_SIZE_HASHTAB_BUCKET, BLOCK_SIZE_HASHTAB_EXTENT, (BLOCK_SIZE_HASHTAB_EXTENT % BLOCK_SIZE_HASHTAB_BUCKET) == 0); // Does the union work? @@ -616,6 +637,16 @@ BeesHashTable::BeesHashTable(shared_ptr ctx, string filename) : THROW_CHECK2(runtime_error, sizeof(Extent), BLOCK_SIZE_HASHTAB_EXTENT, BLOCK_SIZE_HASHTAB_EXTENT == sizeof(Extent)); THROW_CHECK2(runtime_error, sizeof(Extent::p_byte), BLOCK_SIZE_HASHTAB_EXTENT, BLOCK_SIZE_HASHTAB_EXTENT == sizeof(Extent::p_byte)); + m_filename = filename; + m_size = size; + open_file(); + + // Now we know size we can compute stuff + + BEESTRACE("hash table size " << m_size); + BEESTRACE("hash table bucket size " << BLOCK_SIZE_HASHTAB_BUCKET); + BEESTRACE("hash table extent size " << BLOCK_SIZE_HASHTAB_EXTENT); + BEESLOG("opened hash table filename '" << filename << "' length " << m_size); m_buckets = m_size / BLOCK_SIZE_HASHTAB_BUCKET; m_cells = m_buckets * c_cells_per_bucket; @@ -631,7 +662,7 @@ BeesHashTable::BeesHashTable(shared_ptr ctx, string filename) : } if (!m_cell_ptr) { - THROW_ERROR(runtime_error, "unable to mmap " << filename); + THROW_ERRNO("unable to mmap " << filename); } if (!using_shared_map()) { diff --git a/src/bees.h b/src/bees.h index c823bea..154b695 100644 --- a/src/bees.h +++ b/src/bees.h @@ -411,7 +411,7 @@ public: uint8_t p_byte[BLOCK_SIZE_HASHTAB_EXTENT]; } __attribute__((packed)); - BeesHashTable(shared_ptr ctx, string filename); + BeesHashTable(shared_ptr ctx, string filename, off_t size = BLOCK_SIZE_HASHTAB_EXTENT); ~BeesHashTable(); vector find_cell(HashType hash); @@ -458,6 +458,7 @@ private: DefaultBool m_shared; + void open_file(); void writeback_loop(); void prefetch_loop(); void try_mmap_flags(int flags);