From 0bbaddd54ce34d6fbf80365cdffd215d65e45699 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Wed, 28 Apr 2021 22:14:56 -0400 Subject: [PATCH] docs: finally concede that the consensus spelling is "dedupe" Change documentation and comments to use the word "dedupe," not "dedup" as found in circa-3.15 kernel sources. No changes in code or program output--if they used "dedup" before, they will continue to be spelled "dedup" now. Signed-off-by: Zygo Blaxell --- docs/event-counters.md | 2 +- include/crucible/btrfs.h | 2 +- scripts/beesd.conf.sample | 2 +- src/bees-context.cc | 6 +++--- src/bees-resolve.cc | 2 +- src/bees-roots.cc | 14 +++++++------- src/bees-types.cc | 2 +- src/bees.cc | 2 +- src/bees.h | 6 +++--- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/event-counters.md b/docs/event-counters.md index 24f7708..1471a6a 100644 --- a/docs/event-counters.md +++ b/docs/event-counters.md @@ -140,7 +140,7 @@ The `crawl` event group consists of operations related to scanning btrfs trees t dedup ----- -The `dedup` event group consists of operations that deduplicate data. +The `dedup` (sic) event group consists of operations that deduplicate data. * `dedup_bytes`: Total bytes in extent references deduplicated. * `dedup_copy`: Total bytes copied to eliminate unique data in extents containing a mix of unique and duplicate data. diff --git a/include/crucible/btrfs.h b/include/crucible/btrfs.h index 63c67d6..798c4b7 100644 --- a/include/crucible/btrfs.h +++ b/include/crucible/btrfs.h @@ -162,7 +162,7 @@ enum btrfs_compression_type { __u64 bytes_deduped; /* out - total # of bytes we were able * to dedupe from this file */ /* status of this dedupe operation: - * 0 if dedup succeeds + * 0 if dedupe succeeds * < 0 for error * == BTRFS_SAME_DATA_DIFFERS if data differs */ diff --git a/scripts/beesd.conf.sample b/scripts/beesd.conf.sample index f9cc9e9..845be86 100644 --- a/scripts/beesd.conf.sample +++ b/scripts/beesd.conf.sample @@ -23,7 +23,7 @@ UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # sHash table entries are 16 bytes each # (64-bit hash, 52-bit block number, and some metadata bits) # Each entry represents a minimum of 4K on disk. -# unique data size hash table size average dedup block size +# unique data size hash table size average dedupe block size # 1TB 4GB 4K # 1TB 1GB 16K # 1TB 256MB 64K diff --git a/src/bees-context.cc b/src/bees-context.cc index b15cc1f..426525a 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -328,7 +328,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e) // Apparently they can both extend past EOF BeesFileRange copy_bfr(bfr.fd(), e.begin(), e.begin() + extent_size); BeesRangePair brp(prealloc_bfr, copy_bfr); - // Raw dedup here - nothing else to do with this extent, nothing to merge with + // Raw dedupe here - nothing else to do with this extent, nothing to merge with if (m_ctx->dedup(brp)) { BEESCOUNT(dedup_prealloc_hit); BEESCOUNTADD(dedup_prealloc_bytes, e.size()); @@ -338,7 +338,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e) } } ).run(); - return bfr; // if dedup success, which we now blindly assume + return bfr; // if dedupe success, which we now blindly assume } // OK we need to read extent now @@ -596,7 +596,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e) // If the extent contains obscured blocks, and we can find no // other refs to the extent that reveal those blocks, nuke the incoming extent. // Don't rewrite extents that are bigger than the maximum FILE_EXTENT_SAME size - // because we can't make extents that large with dedup. + // because we can't make extents that large with dedupe. // Don't rewrite small extents because it is a waste of time without being // able to combine them into bigger extents. if (!rewrite_extent && (e.flags() & Extent::OBSCURED) && (e.physical_len() > BLOCK_SIZE_MAX_COMPRESSED_EXTENT) && (e.physical_len() < BLOCK_SIZE_MAX_EXTENT_SAME)) { diff --git a/src/bees-resolve.cc b/src/bees-resolve.cc index 5a877cc..ae80c2c 100644 --- a/src/bees-resolve.cc +++ b/src/bees-resolve.cc @@ -438,7 +438,7 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr) BEESCOUNT(replacedst_dedup_hit); m_found_dup = true; overlap_bfr = brp.second; - // FIXME: find best range first, then dedup that + // FIXME: find best range first, then dedupe that return true; // i.e. break } else { BEESCOUNT(replacedst_dedup_miss); diff --git a/src/bees-roots.cc b/src/bees-roots.cc index 92c6a1c..01f47a1 100644 --- a/src/bees-roots.cc +++ b/src/bees-roots.cc @@ -820,7 +820,7 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino) for (auto file_path : ipa.m_paths) { BEESTRACE("Looking up root " << root << " ino " << ino << " in dir " << name_fd(root_fd) << " path " << file_path); BEESCOUNT(open_file); - // Just open file RO. root can do the dedup ioctl without + // Just open file RO. root can do the dedupe ioctl without // opening in write mode, and if we do open in write mode, // we can't exec the file while we have it open. const char *fp_cstr = file_path.c_str(); @@ -864,19 +864,19 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino) break; } - // The kernel rejects dedup requests with + // The kernel rejects dedupe requests with // src and dst that have different datasum flags // (datasum is a flag in the inode). // // We can detect the common case where a file is // marked with nodatacow (which implies nodatasum). - // nodatacow files are arguably out of scope for dedup, - // since dedup would just make them datacow again. + // nodatacow files are arguably out of scope for dedupe, + // since dedupe would just make them datacow again. // To handle these we pretend we couldn't open them. // // A less common case is nodatasum + datacow files. - // Those are availble for dedup but we have to solve - // some other problems before we can dedup them. They + // Those are availble for dedupe but we have to solve + // some other problems before we can dedupe them. They // require a separate hash table namespace from datasum // + datacow files, and we have to create nodatasum // temporary files when we rewrite extents. @@ -992,7 +992,7 @@ BeesCrawl::fetch_extents() // Check for btrfs send workaround: don't scan RO roots at all, pretend // they are just empty. We can't free any space there, and we // don't have the necessary analysis logic to be able to use - // them as dedup src extents (yet). + // them as dedupe src extents (yet). // // This will keep the max_transid up to date so if the root // is ever switched back to read-write, it won't trigger big diff --git a/src/bees-types.cc b/src/bees-types.cc index 5960892..1bc9380 100644 --- a/src/bees-types.cc +++ b/src/bees-types.cc @@ -960,7 +960,7 @@ BeesHash BeesBlockData::hash() const { if (!m_hash_done) { - // We can only dedup unaligned EOF blocks against other unaligned EOF blocks, + // We can only dedupe unaligned EOF blocks against other unaligned EOF blocks, // so we do NOT round up to a full sum block size. const Blob &blob = data(); m_hash = BeesHash(blob.data(), blob.size()); diff --git a/src/bees.cc b/src/bees.cc index c51d830..03b8215 100644 --- a/src/bees.cc +++ b/src/bees.cc @@ -426,7 +426,7 @@ BeesStringFile::write(string contents) write_or_die(ofd, contents); #if 0 // This triggers too many btrfs bugs. I wish I was kidding. - // Forget snapshots, balance, compression, and dedup: + // Forget snapshots, balance, compression, and dedupe: // the system call you have to fear on btrfs is fsync(). // Also note that when bees renames a temporary over an // existing file, it flushes the temporary, so we get diff --git a/src/bees.h b/src/bees.h index 257daeb..19b53d8 100644 --- a/src/bees.h +++ b/src/bees.h @@ -29,7 +29,7 @@ using namespace std; // Block size for clone alignment (FIXME: should read this from /sys/fs/btrfs//clone_alignment) const off_t BLOCK_SIZE_CLONE = 4096; -// Block size for dedup checksums (arbitrary, but must be a multiple of clone alignment) +// Block size for dedupe checksums (arbitrary, but must be a multiple of clone alignment) const off_t BLOCK_SIZE_SUMS = 4096; // Block size for memory allocations and file mappings (FIXME: should be CPU page size) @@ -805,10 +805,10 @@ class BeesResolver { set m_ranges; unsigned m_bior_count; - // We found matching data, so we can dedup + // We found matching data, so we can dedupe bool m_found_data = false; - // We found matching data, so we *did* dedup + // We found matching data, so we *did* dedupe bool m_found_dup = false; // We found matching hash, so the hash table is still correct