docs: finally concede that the consensus spelling is "dedupe"

Change documentation and comments to use the word "dedupe," not "dedup" as found in circa-3.15 kernel sources. No changes in code or program output--if they used "dedup" before, they will continue to be spelled "dedup" now. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
2025-12-23 20:10:21 +01:00 · 2021-04-28 22:14:56 -04:00
parent 06a46e2736
commit 0bbaddd54c
9 changed files with 19 additions and 19 deletions
--- a/docs/event-counters.md
+++ b/docs/event-counters.md
@@ -140,7 +140,7 @@ The `crawl` event group consists of operations related to scanning btrfs trees t
 dedup
 -----

-The `dedup` event group consists of operations that deduplicate data.
+The `dedup` (sic) event group consists of operations that deduplicate data.

 * `dedup_bytes`: Total bytes in extent references deduplicated.
 * `dedup_copy`: Total bytes copied to eliminate unique data in extents containing a mix of unique and duplicate data.
--- a/include/crucible/btrfs.h
+++ b/include/crucible/btrfs.h
@@ -162,7 +162,7 @@ enum btrfs_compression_type {
 		__u64 bytes_deduped;    /* out - total # of bytes we were able
 					 * to dedupe from this file */
 		/* status of this dedupe operation:
-		 * 0 if dedup succeeds
+		 * 0 if dedupe succeeds
 		 * < 0 for error
 		 * == BTRFS_SAME_DATA_DIFFERS if data differs
 		 */
--- a/scripts/beesd.conf.sample
+++ b/scripts/beesd.conf.sample
@@ -23,7 +23,7 @@ UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
 # sHash table entries are 16 bytes each
 # (64-bit hash, 52-bit block number, and some metadata bits)
 # Each entry represents a minimum of 4K on disk.
-# unique data size    hash table size    average dedup block size
+# unique data size    hash table size    average dedupe block size
 #     1TB                 4GB                  4K
 #     1TB                 1GB                 16K
 #     1TB               256MB                 64K
--- a/src/bees-context.cc
+++ b/src/bees-context.cc
@@ -328,7 +328,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 				// Apparently they can both extend past EOF
 				BeesFileRange copy_bfr(bfr.fd(), e.begin(), e.begin() + extent_size);
 				BeesRangePair brp(prealloc_bfr, copy_bfr);
-				// Raw dedup here - nothing else to do with this extent, nothing to merge with
+				// Raw dedupe here - nothing else to do with this extent, nothing to merge with
 				if (m_ctx->dedup(brp)) {
 					BEESCOUNT(dedup_prealloc_hit);
 					BEESCOUNTADD(dedup_prealloc_bytes, e.size());
@@ -338,7 +338,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 				}
 			}
 		).run();
-		return bfr; // if dedup success, which we now blindly assume
+		return bfr; // if dedupe success, which we now blindly assume
 	}

 	// OK we need to read extent now
@@ -596,7 +596,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 	// If the extent contains obscured blocks, and we can find no
 	// other refs to the extent that reveal those blocks, nuke the incoming extent.
 	// Don't rewrite extents that are bigger than the maximum FILE_EXTENT_SAME size
-	// because we can't make extents that large with dedup.
+	// because we can't make extents that large with dedupe.
 	// Don't rewrite small extents because it is a waste of time without being
 	// able to combine them into bigger extents.
 	if (!rewrite_extent && (e.flags() & Extent::OBSCURED) && (e.physical_len() > BLOCK_SIZE_MAX_COMPRESSED_EXTENT) && (e.physical_len() < BLOCK_SIZE_MAX_EXTENT_SAME)) {
--- a/src/bees-resolve.cc
+++ b/src/bees-resolve.cc
@@ -438,7 +438,7 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
 			BEESCOUNT(replacedst_dedup_hit);
 			m_found_dup = true;
 			overlap_bfr = brp.second;
-			// FIXME:  find best range first, then dedup that
+			// FIXME:  find best range first, then dedupe that
 			return true; // i.e. break
 		} else {
 			BEESCOUNT(replacedst_dedup_miss);
--- a/src/bees-roots.cc
+++ b/src/bees-roots.cc
@@ -820,7 +820,7 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
 	for (auto file_path : ipa.m_paths) {
 		BEESTRACE("Looking up root " << root << " ino " << ino << " in dir " << name_fd(root_fd) << " path " << file_path);
 		BEESCOUNT(open_file);
-		// Just open file RO.  root can do the dedup ioctl without
+		// Just open file RO.  root can do the dedupe ioctl without
 		// opening in write mode, and if we do open in write mode,
 		// we can't exec the file while we have it open.
 		const char *fp_cstr = file_path.c_str();
@@ -864,19 +864,19 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
 			break;
 		}

-		// The kernel rejects dedup requests with
+		// The kernel rejects dedupe requests with
 		// src and dst that have different datasum flags
 		// (datasum is a flag in the inode).
 		//
 		// We can detect the common case where a file is
 		// marked with nodatacow (which implies nodatasum).
-		// nodatacow files are arguably out of scope for dedup,
-		// since dedup would just make them datacow again.
+		// nodatacow files are arguably out of scope for dedupe,
+		// since dedupe would just make them datacow again.
 		// To handle these we pretend we couldn't open them.
 		//
 		// A less common case is nodatasum + datacow files.
-		// Those are availble for dedup but we have to solve
-		// some other problems before we can dedup them.  They
+		// Those are availble for dedupe but we have to solve
+		// some other problems before we can dedupe them.  They
 		// require a separate hash table namespace from datasum
 		// + datacow files, and we have to create nodatasum
 		// temporary files when we rewrite extents.
@@ -992,7 +992,7 @@ BeesCrawl::fetch_extents()
 	// Check for btrfs send workaround: don't scan RO roots at all, pretend
 	// they are just empty.  We can't free any space there, and we
 	// don't have the necessary analysis logic to be able to use
-	// them as dedup src extents (yet).
+	// them as dedupe src extents (yet).
 	//
 	// This will keep the max_transid up to date so if the root
 	// is ever switched back to read-write, it won't trigger big
--- a/src/bees-types.cc
+++ b/src/bees-types.cc
@@ -960,7 +960,7 @@ BeesHash
 BeesBlockData::hash() const
 {
 	if (!m_hash_done) {
-		// We can only dedup unaligned EOF blocks against other unaligned EOF blocks,
+		// We can only dedupe unaligned EOF blocks against other unaligned EOF blocks,
 		// so we do NOT round up to a full sum block size.
 		const Blob &blob = data();
 		m_hash = BeesHash(blob.data(), blob.size());
--- a/src/bees.cc
+++ b/src/bees.cc
@@ -426,7 +426,7 @@ BeesStringFile::write(string contents)
 		write_or_die(ofd, contents);
 #if 0
 		// This triggers too many btrfs bugs.  I wish I was kidding.
-		// Forget snapshots, balance, compression, and dedup:
+		// Forget snapshots, balance, compression, and dedupe:
 		// the system call you have to fear on btrfs is fsync().
 		// Also note that when bees renames a temporary over an
 		// existing file, it flushes the temporary, so we get
--- a/src/bees.h
+++ b/src/bees.h
@@ -29,7 +29,7 @@ using namespace std;
 // Block size for clone alignment (FIXME: should read this from /sys/fs/btrfs/<FS-UUID>/clone_alignment)
 const off_t BLOCK_SIZE_CLONE = 4096;

-// Block size for dedup checksums (arbitrary, but must be a multiple of clone alignment)
+// Block size for dedupe checksums (arbitrary, but must be a multiple of clone alignment)
 const off_t BLOCK_SIZE_SUMS = 4096;

 // Block size for memory allocations and file mappings  (FIXME: should be CPU page size)
@@ -805,10 +805,10 @@ class BeesResolver {
 	set<BeesFileRange>			m_ranges;
 	unsigned				m_bior_count;

-	// We found matching data, so we can dedup
+	// We found matching data, so we can dedupe
 	bool					m_found_data = false;

-	// We found matching data, so we *did* dedup
+	// We found matching data, so we *did* dedupe
 	bool					m_found_dup = false;

 	// We found matching hash, so the hash table is still correct