mirror of
				https://github.com/Zygo/bees.git
				synced 2025-11-04 04:00:36 +01:00 
			
		
		
		
	docs: finally concede that the consensus spelling is "dedupe"
Change documentation and comments to use the word "dedupe," not "dedup" as found in circa-3.15 kernel sources. No changes in code or program output--if they used "dedup" before, they will continue to be spelled "dedup" now. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
		@@ -328,7 +328,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 | 
			
		||||
				// Apparently they can both extend past EOF
 | 
			
		||||
				BeesFileRange copy_bfr(bfr.fd(), e.begin(), e.begin() + extent_size);
 | 
			
		||||
				BeesRangePair brp(prealloc_bfr, copy_bfr);
 | 
			
		||||
				// Raw dedup here - nothing else to do with this extent, nothing to merge with
 | 
			
		||||
				// Raw dedupe here - nothing else to do with this extent, nothing to merge with
 | 
			
		||||
				if (m_ctx->dedup(brp)) {
 | 
			
		||||
					BEESCOUNT(dedup_prealloc_hit);
 | 
			
		||||
					BEESCOUNTADD(dedup_prealloc_bytes, e.size());
 | 
			
		||||
@@ -338,7 +338,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		).run();
 | 
			
		||||
		return bfr; // if dedup success, which we now blindly assume
 | 
			
		||||
		return bfr; // if dedupe success, which we now blindly assume
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// OK we need to read extent now
 | 
			
		||||
@@ -596,7 +596,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 | 
			
		||||
	// If the extent contains obscured blocks, and we can find no
 | 
			
		||||
	// other refs to the extent that reveal those blocks, nuke the incoming extent.
 | 
			
		||||
	// Don't rewrite extents that are bigger than the maximum FILE_EXTENT_SAME size
 | 
			
		||||
	// because we can't make extents that large with dedup.
 | 
			
		||||
	// because we can't make extents that large with dedupe.
 | 
			
		||||
	// Don't rewrite small extents because it is a waste of time without being
 | 
			
		||||
	// able to combine them into bigger extents.
 | 
			
		||||
	if (!rewrite_extent && (e.flags() & Extent::OBSCURED) && (e.physical_len() > BLOCK_SIZE_MAX_COMPRESSED_EXTENT) && (e.physical_len() < BLOCK_SIZE_MAX_EXTENT_SAME)) {
 | 
			
		||||
 
 | 
			
		||||
@@ -438,7 +438,7 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
 | 
			
		||||
			BEESCOUNT(replacedst_dedup_hit);
 | 
			
		||||
			m_found_dup = true;
 | 
			
		||||
			overlap_bfr = brp.second;
 | 
			
		||||
			// FIXME:  find best range first, then dedup that
 | 
			
		||||
			// FIXME:  find best range first, then dedupe that
 | 
			
		||||
			return true; // i.e. break
 | 
			
		||||
		} else {
 | 
			
		||||
			BEESCOUNT(replacedst_dedup_miss);
 | 
			
		||||
 
 | 
			
		||||
@@ -820,7 +820,7 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
 | 
			
		||||
	for (auto file_path : ipa.m_paths) {
 | 
			
		||||
		BEESTRACE("Looking up root " << root << " ino " << ino << " in dir " << name_fd(root_fd) << " path " << file_path);
 | 
			
		||||
		BEESCOUNT(open_file);
 | 
			
		||||
		// Just open file RO.  root can do the dedup ioctl without
 | 
			
		||||
		// Just open file RO.  root can do the dedupe ioctl without
 | 
			
		||||
		// opening in write mode, and if we do open in write mode,
 | 
			
		||||
		// we can't exec the file while we have it open.
 | 
			
		||||
		const char *fp_cstr = file_path.c_str();
 | 
			
		||||
@@ -864,19 +864,19 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// The kernel rejects dedup requests with
 | 
			
		||||
		// The kernel rejects dedupe requests with
 | 
			
		||||
		// src and dst that have different datasum flags
 | 
			
		||||
		// (datasum is a flag in the inode).
 | 
			
		||||
		//
 | 
			
		||||
		// We can detect the common case where a file is
 | 
			
		||||
		// marked with nodatacow (which implies nodatasum).
 | 
			
		||||
		// nodatacow files are arguably out of scope for dedup,
 | 
			
		||||
		// since dedup would just make them datacow again.
 | 
			
		||||
		// nodatacow files are arguably out of scope for dedupe,
 | 
			
		||||
		// since dedupe would just make them datacow again.
 | 
			
		||||
		// To handle these we pretend we couldn't open them.
 | 
			
		||||
		//
 | 
			
		||||
		// A less common case is nodatasum + datacow files.
 | 
			
		||||
		// Those are availble for dedup but we have to solve
 | 
			
		||||
		// some other problems before we can dedup them.  They
 | 
			
		||||
		// Those are availble for dedupe but we have to solve
 | 
			
		||||
		// some other problems before we can dedupe them.  They
 | 
			
		||||
		// require a separate hash table namespace from datasum
 | 
			
		||||
		// + datacow files, and we have to create nodatasum
 | 
			
		||||
		// temporary files when we rewrite extents.
 | 
			
		||||
@@ -992,7 +992,7 @@ BeesCrawl::fetch_extents()
 | 
			
		||||
	// Check for btrfs send workaround: don't scan RO roots at all, pretend
 | 
			
		||||
	// they are just empty.  We can't free any space there, and we
 | 
			
		||||
	// don't have the necessary analysis logic to be able to use
 | 
			
		||||
	// them as dedup src extents (yet).
 | 
			
		||||
	// them as dedupe src extents (yet).
 | 
			
		||||
	//
 | 
			
		||||
	// This will keep the max_transid up to date so if the root
 | 
			
		||||
	// is ever switched back to read-write, it won't trigger big
 | 
			
		||||
 
 | 
			
		||||
@@ -960,7 +960,7 @@ BeesHash
 | 
			
		||||
BeesBlockData::hash() const
 | 
			
		||||
{
 | 
			
		||||
	if (!m_hash_done) {
 | 
			
		||||
		// We can only dedup unaligned EOF blocks against other unaligned EOF blocks,
 | 
			
		||||
		// We can only dedupe unaligned EOF blocks against other unaligned EOF blocks,
 | 
			
		||||
		// so we do NOT round up to a full sum block size.
 | 
			
		||||
		const Blob &blob = data();
 | 
			
		||||
		m_hash = BeesHash(blob.data(), blob.size());
 | 
			
		||||
 
 | 
			
		||||
@@ -426,7 +426,7 @@ BeesStringFile::write(string contents)
 | 
			
		||||
		write_or_die(ofd, contents);
 | 
			
		||||
#if 0
 | 
			
		||||
		// This triggers too many btrfs bugs.  I wish I was kidding.
 | 
			
		||||
		// Forget snapshots, balance, compression, and dedup:
 | 
			
		||||
		// Forget snapshots, balance, compression, and dedupe:
 | 
			
		||||
		// the system call you have to fear on btrfs is fsync().
 | 
			
		||||
		// Also note that when bees renames a temporary over an
 | 
			
		||||
		// existing file, it flushes the temporary, so we get
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ using namespace std;
 | 
			
		||||
// Block size for clone alignment (FIXME: should read this from /sys/fs/btrfs/<FS-UUID>/clone_alignment)
 | 
			
		||||
const off_t BLOCK_SIZE_CLONE = 4096;
 | 
			
		||||
 | 
			
		||||
// Block size for dedup checksums (arbitrary, but must be a multiple of clone alignment)
 | 
			
		||||
// Block size for dedupe checksums (arbitrary, but must be a multiple of clone alignment)
 | 
			
		||||
const off_t BLOCK_SIZE_SUMS = 4096;
 | 
			
		||||
 | 
			
		||||
// Block size for memory allocations and file mappings  (FIXME: should be CPU page size)
 | 
			
		||||
@@ -805,10 +805,10 @@ class BeesResolver {
 | 
			
		||||
	set<BeesFileRange>			m_ranges;
 | 
			
		||||
	unsigned				m_bior_count;
 | 
			
		||||
 | 
			
		||||
	// We found matching data, so we can dedup
 | 
			
		||||
	// We found matching data, so we can dedupe
 | 
			
		||||
	bool					m_found_data = false;
 | 
			
		||||
 | 
			
		||||
	// We found matching data, so we *did* dedup
 | 
			
		||||
	// We found matching data, so we *did* dedupe
 | 
			
		||||
	bool					m_found_dup = false;
 | 
			
		||||
 | 
			
		||||
	// We found matching hash, so the hash table is still correct
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user