diff --git a/src/bees-context.cc b/src/bees-context.cc index f279a2c..45c1d08 100644 --- a/src/bees-context.cc +++ b/src/bees-context.cc @@ -892,20 +892,27 @@ BeesContext::resolve_addr_uncached(BeesAddress addr) auto rt_age = resolve_timer.age(); BeesResolveAddrResult rv; - rv.m_biors = log_ino.m_iors; - // Avoid performance bug + // Avoid performance problems - pretend resolve failed if there are too many refs + const size_t rv_count = log_ino.m_iors.size(); + if (rv_count < BEES_MAX_EXTENT_REF_COUNT) { + rv.m_biors = log_ino.m_iors; + } else { + BEESLOGINFO("addr " << addr << " refs " << rv_count << " overflows configured ref limit " << BEES_MAX_EXTENT_REF_COUNT); + BEESCOUNT(resolve_overflow); + } + + // Avoid crippling performance bug if (sys_usage_delta < BEES_TOXIC_SYS_DURATION) { rv.m_is_toxic = false; } else { - BEESLOGNOTICE("WORKAROUND: toxic address: addr = " << addr << ", sys_usage_delta = " << round(sys_usage_delta* 1000.0) / 1000.0 << ", user_usage_delta = " << round(user_usage_delta * 1000.0) / 1000.0 << ", rt_age = " << rt_age << ", refs " << log_ino.m_iors.size()); + BEESLOGNOTICE("WORKAROUND: toxic address: addr = " << addr << ", sys_usage_delta = " << round(sys_usage_delta* 1000.0) / 1000.0 << ", user_usage_delta = " << round(user_usage_delta * 1000.0) / 1000.0 << ", rt_age = " << rt_age << ", refs " << rv_count); BEESCOUNT(resolve_toxic); rv.m_is_toxic = true; } // Count how many times this happens so we can figure out how // important this case is - size_t rv_count = rv.m_biors.size(); static size_t most_refs_ever = 2730; if (rv_count > most_refs_ever) { BEESLOGINFO("addr " << addr << " refs " << rv_count << " beats previous record " << most_refs_ever); diff --git a/src/bees.h b/src/bees.h index 52f4031..d0380d9 100644 --- a/src/bees.h +++ b/src/bees.h @@ -92,6 +92,9 @@ const double BEES_TOO_LONG = 5.0; // Avoid any extent where LOGICAL_INO takes this much kernel CPU time const double BEES_TOXIC_SYS_DURATION = 0.1; +// Maximum number of refs to a single extent +const size_t BEES_MAX_EXTENT_REF_COUNT = (16 * 1024 * 1024 / 24) - 1; + // How long between hash table histograms const double BEES_HASH_TABLE_ANALYZE_INTERVAL = BEES_STATS_INTERVAL;