mirror of
https://github.com/Zygo/bees.git
synced 2025-10-24 07:47:36 +02:00
docs: finally concede that the consensus spelling is "dedupe"
Change documentation and comments to use the word "dedupe," not "dedup" as found in circa-3.15 kernel sources. No changes in code or program output--if they used "dedup" before, they will continue to be spelled "dedup" now. Signed-off-by: Zygo Blaxell <bees@furryterror.org>
This commit is contained in:
@@ -140,7 +140,7 @@ The `crawl` event group consists of operations related to scanning btrfs trees t
|
||||
dedup
|
||||
-----
|
||||
|
||||
The `dedup` event group consists of operations that deduplicate data.
|
||||
The `dedup` (sic) event group consists of operations that deduplicate data.
|
||||
|
||||
* `dedup_bytes`: Total bytes in extent references deduplicated.
|
||||
* `dedup_copy`: Total bytes copied to eliminate unique data in extents containing a mix of unique and duplicate data.
|
||||
|
@@ -162,7 +162,7 @@ enum btrfs_compression_type {
|
||||
__u64 bytes_deduped; /* out - total # of bytes we were able
|
||||
* to dedupe from this file */
|
||||
/* status of this dedupe operation:
|
||||
* 0 if dedup succeeds
|
||||
* 0 if dedupe succeeds
|
||||
* < 0 for error
|
||||
* == BTRFS_SAME_DATA_DIFFERS if data differs
|
||||
*/
|
||||
|
@@ -23,7 +23,7 @@ UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
||||
# sHash table entries are 16 bytes each
|
||||
# (64-bit hash, 52-bit block number, and some metadata bits)
|
||||
# Each entry represents a minimum of 4K on disk.
|
||||
# unique data size hash table size average dedup block size
|
||||
# unique data size hash table size average dedupe block size
|
||||
# 1TB 4GB 4K
|
||||
# 1TB 1GB 16K
|
||||
# 1TB 256MB 64K
|
||||
|
@@ -328,7 +328,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
// Apparently they can both extend past EOF
|
||||
BeesFileRange copy_bfr(bfr.fd(), e.begin(), e.begin() + extent_size);
|
||||
BeesRangePair brp(prealloc_bfr, copy_bfr);
|
||||
// Raw dedup here - nothing else to do with this extent, nothing to merge with
|
||||
// Raw dedupe here - nothing else to do with this extent, nothing to merge with
|
||||
if (m_ctx->dedup(brp)) {
|
||||
BEESCOUNT(dedup_prealloc_hit);
|
||||
BEESCOUNTADD(dedup_prealloc_bytes, e.size());
|
||||
@@ -338,7 +338,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
}
|
||||
}
|
||||
).run();
|
||||
return bfr; // if dedup success, which we now blindly assume
|
||||
return bfr; // if dedupe success, which we now blindly assume
|
||||
}
|
||||
|
||||
// OK we need to read extent now
|
||||
@@ -596,7 +596,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
|
||||
// If the extent contains obscured blocks, and we can find no
|
||||
// other refs to the extent that reveal those blocks, nuke the incoming extent.
|
||||
// Don't rewrite extents that are bigger than the maximum FILE_EXTENT_SAME size
|
||||
// because we can't make extents that large with dedup.
|
||||
// because we can't make extents that large with dedupe.
|
||||
// Don't rewrite small extents because it is a waste of time without being
|
||||
// able to combine them into bigger extents.
|
||||
if (!rewrite_extent && (e.flags() & Extent::OBSCURED) && (e.physical_len() > BLOCK_SIZE_MAX_COMPRESSED_EXTENT) && (e.physical_len() < BLOCK_SIZE_MAX_EXTENT_SAME)) {
|
||||
|
@@ -438,7 +438,7 @@ BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
|
||||
BEESCOUNT(replacedst_dedup_hit);
|
||||
m_found_dup = true;
|
||||
overlap_bfr = brp.second;
|
||||
// FIXME: find best range first, then dedup that
|
||||
// FIXME: find best range first, then dedupe that
|
||||
return true; // i.e. break
|
||||
} else {
|
||||
BEESCOUNT(replacedst_dedup_miss);
|
||||
|
@@ -820,7 +820,7 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
|
||||
for (auto file_path : ipa.m_paths) {
|
||||
BEESTRACE("Looking up root " << root << " ino " << ino << " in dir " << name_fd(root_fd) << " path " << file_path);
|
||||
BEESCOUNT(open_file);
|
||||
// Just open file RO. root can do the dedup ioctl without
|
||||
// Just open file RO. root can do the dedupe ioctl without
|
||||
// opening in write mode, and if we do open in write mode,
|
||||
// we can't exec the file while we have it open.
|
||||
const char *fp_cstr = file_path.c_str();
|
||||
@@ -864,19 +864,19 @@ BeesRoots::open_root_ino_nocache(uint64_t root, uint64_t ino)
|
||||
break;
|
||||
}
|
||||
|
||||
// The kernel rejects dedup requests with
|
||||
// The kernel rejects dedupe requests with
|
||||
// src and dst that have different datasum flags
|
||||
// (datasum is a flag in the inode).
|
||||
//
|
||||
// We can detect the common case where a file is
|
||||
// marked with nodatacow (which implies nodatasum).
|
||||
// nodatacow files are arguably out of scope for dedup,
|
||||
// since dedup would just make them datacow again.
|
||||
// nodatacow files are arguably out of scope for dedupe,
|
||||
// since dedupe would just make them datacow again.
|
||||
// To handle these we pretend we couldn't open them.
|
||||
//
|
||||
// A less common case is nodatasum + datacow files.
|
||||
// Those are availble for dedup but we have to solve
|
||||
// some other problems before we can dedup them. They
|
||||
// Those are availble for dedupe but we have to solve
|
||||
// some other problems before we can dedupe them. They
|
||||
// require a separate hash table namespace from datasum
|
||||
// + datacow files, and we have to create nodatasum
|
||||
// temporary files when we rewrite extents.
|
||||
@@ -992,7 +992,7 @@ BeesCrawl::fetch_extents()
|
||||
// Check for btrfs send workaround: don't scan RO roots at all, pretend
|
||||
// they are just empty. We can't free any space there, and we
|
||||
// don't have the necessary analysis logic to be able to use
|
||||
// them as dedup src extents (yet).
|
||||
// them as dedupe src extents (yet).
|
||||
//
|
||||
// This will keep the max_transid up to date so if the root
|
||||
// is ever switched back to read-write, it won't trigger big
|
||||
|
@@ -960,7 +960,7 @@ BeesHash
|
||||
BeesBlockData::hash() const
|
||||
{
|
||||
if (!m_hash_done) {
|
||||
// We can only dedup unaligned EOF blocks against other unaligned EOF blocks,
|
||||
// We can only dedupe unaligned EOF blocks against other unaligned EOF blocks,
|
||||
// so we do NOT round up to a full sum block size.
|
||||
const Blob &blob = data();
|
||||
m_hash = BeesHash(blob.data(), blob.size());
|
||||
|
@@ -426,7 +426,7 @@ BeesStringFile::write(string contents)
|
||||
write_or_die(ofd, contents);
|
||||
#if 0
|
||||
// This triggers too many btrfs bugs. I wish I was kidding.
|
||||
// Forget snapshots, balance, compression, and dedup:
|
||||
// Forget snapshots, balance, compression, and dedupe:
|
||||
// the system call you have to fear on btrfs is fsync().
|
||||
// Also note that when bees renames a temporary over an
|
||||
// existing file, it flushes the temporary, so we get
|
||||
|
@@ -29,7 +29,7 @@ using namespace std;
|
||||
// Block size for clone alignment (FIXME: should read this from /sys/fs/btrfs/<FS-UUID>/clone_alignment)
|
||||
const off_t BLOCK_SIZE_CLONE = 4096;
|
||||
|
||||
// Block size for dedup checksums (arbitrary, but must be a multiple of clone alignment)
|
||||
// Block size for dedupe checksums (arbitrary, but must be a multiple of clone alignment)
|
||||
const off_t BLOCK_SIZE_SUMS = 4096;
|
||||
|
||||
// Block size for memory allocations and file mappings (FIXME: should be CPU page size)
|
||||
@@ -805,10 +805,10 @@ class BeesResolver {
|
||||
set<BeesFileRange> m_ranges;
|
||||
unsigned m_bior_count;
|
||||
|
||||
// We found matching data, so we can dedup
|
||||
// We found matching data, so we can dedupe
|
||||
bool m_found_data = false;
|
||||
|
||||
// We found matching data, so we *did* dedup
|
||||
// We found matching data, so we *did* dedupe
|
||||
bool m_found_dup = false;
|
||||
|
||||
// We found matching hash, so the hash table is still correct
|
||||
|
Reference in New Issue
Block a user