From d9e3c0070b8e6b382b7956d286e43e0e6643f360 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Thu, 10 Jun 2021 09:20:26 -0400 Subject: [PATCH] context: stop creating new refs when there are too many already LOGICAL_INO_V2 has a maximum limit of 655050 references per extent. Although it no longer has a crippling performance problem, at roughly two seconds to process extent, it's too slow to be useful. When an extent gains an absurd number of references, stop making any more. Returning zero extent refs will make bees believe the extent was deleted, and it will remove the block from the hash table. This helps speed processing of highly duplicated large files like VM images, and the cost of a slightly lower dedupe hit rate. Signed-off-by: Zygo Blaxell --- src/bees-context.cc | 15 +++++++++++---- src/bees.h | 3 +++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/bees-context.cc b/src/bees-context.cc index f279a2c..45c1d08 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -892,20 +892,27 @@ BeesContext::resolve_addr_uncached(BeesAddress addr) auto rt_age = resolve_timer.age(); BeesResolveAddrResult rv; - rv.m_biors = log_ino.m_iors; - // Avoid performance bug + // Avoid performance problems - pretend resolve failed if there are too many refs + const size_t rv_count = log_ino.m_iors.size(); + if (rv_count < BEES_MAX_EXTENT_REF_COUNT) { + rv.m_biors = log_ino.m_iors; + } else { + BEESLOGINFO("addr " << addr << " refs " << rv_count << " overflows configured ref limit " << BEES_MAX_EXTENT_REF_COUNT); + BEESCOUNT(resolve_overflow); + } + + // Avoid crippling performance bug if (sys_usage_delta < BEES_TOXIC_SYS_DURATION) { rv.m_is_toxic = false; } else { - BEESLOGNOTICE("WORKAROUND: toxic address: addr = " << addr << ", sys_usage_delta = " << round(sys_usage_delta* 1000.0) / 1000.0 << ", user_usage_delta = " << round(user_usage_delta * 1000.0) / 1000.0 << ", rt_age = " << rt_age << ", refs " << log_ino.m_iors.size()); + BEESLOGNOTICE("WORKAROUND: toxic address: addr = " << addr << ", sys_usage_delta = " << round(sys_usage_delta* 1000.0) / 1000.0 << ", user_usage_delta = " << round(user_usage_delta * 1000.0) / 1000.0 << ", rt_age = " << rt_age << ", refs " << rv_count); BEESCOUNT(resolve_toxic); rv.m_is_toxic = true; } // Count how many times this happens so we can figure out how // important this case is - size_t rv_count = rv.m_biors.size(); static size_t most_refs_ever = 2730; if (rv_count > most_refs_ever) { BEESLOGINFO("addr " << addr << " refs " << rv_count << " beats previous record " << most_refs_ever); diff --git a/src/bees.h b/src/bees.h index 52f4031..d0380d9 100644 --- a/src/bees.h +++ b/src/bees.h @@ -92,6 +92,9 @@ const double BEES_TOO_LONG = 5.0; // Avoid any extent where LOGICAL_INO takes this much kernel CPU time const double BEES_TOXIC_SYS_DURATION = 0.1; +// Maximum number of refs to a single extent +const size_t BEES_MAX_EXTENT_REF_COUNT = (16 * 1024 * 1024 / 24) - 1; + // How long between hash table histograms const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;