From 924008603e398fc39717ab2e18c9df7336e0bd19 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Mon, 8 Oct 2018 23:31:16 -0400 Subject: [PATCH] hash: reduce hash table extent size to 128KB The 16MB hash table extent size did not serve any useful defragmentation or compression purpose, and for very small filesystems (under 100GB), 16MB is much larger than necessary. Signed-off-by: Zygo Blaxell --- docs/how-it-works.md | 2 +- docs/running.md | 4 ++-- src/bees.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/how-it-works.md b/docs/how-it-works.md index b3fa62c..78cc421 100644 --- a/docs/how-it-works.md +++ b/docs/how-it-works.md @@ -16,7 +16,7 @@ be stored on a different btrfs filesystem, ext4, or even CIFS. bees uses a persistent dedupe hash table with a fixed size configured by the user. Any size of hash table can be dedicated to dedupe. If a fast dedupe with low hit rate is desired, bees can use a hash table as -small as 16MB. +small as 128KB. The bees hash table is loaded into RAM at startup and `mlock`ed so it will not be swapped out by the kernel (if swap is permitted, performance diff --git a/docs/running.md b/docs/running.md index d604891..3ac1be3 100644 --- a/docs/running.md +++ b/docs/running.md @@ -13,7 +13,7 @@ Create a directory for bees state files: mkdir -p "$BEESHOME" Create an empty hash table ([your choice of size](config.md), but it -must be a multiple of 16MB). This example creates a 1GB hash table: +must be a multiple of 128KB). This example creates a 1GB hash table: truncate -s 1g "$BEESHOME/beeshash.dat" chmod 700 "$BEESHOME/beeshash.dat" @@ -50,7 +50,7 @@ Configuration There are some runtime configurable options using environment variables: * BEESHOME: Directory containing bees state files: - * beeshash.dat | persistent hash table. Must be a multiple of 16MB, and must be created before bees starts. + * beeshash.dat | persistent hash table. Must be a multiple of 128KB, and must be created before bees starts. * beescrawl.dat | state of SEARCH_V2 crawlers. ASCII text. bees will create this. * beesstats.txt | statistics and performance counters. ASCII text. bees will create this. * BEESSTATUS: File containing a snapshot of current bees state: performance diff --git a/src/bees.h b/src/bees.h index da87d88..83231ee 100644 --- a/src/bees.h +++ b/src/bees.h @@ -56,7 +56,7 @@ const off_t BLOCK_SIZE_MAX_TEMP_FILE = 1024 * 1024 * 1024; const off_t BLOCK_SIZE_HASHTAB_BUCKET = BLOCK_SIZE_MMAP; // Extent size for hash table (since the nocow file attribute does not seem to be working today) -const off_t BLOCK_SIZE_HASHTAB_EXTENT = 16 * 1024 * 1024; +const off_t BLOCK_SIZE_HASHTAB_EXTENT = BLOCK_SIZE_MAX_COMPRESSED_EXTENT; // Bytes per second we want to flush (8GB every two hours) const double BEES_FLUSH_RATE = 8.0 * 1024 * 1024 * 1024 / 7200.0;