From 4943a07cce25d1e9b6347e8d1578c19b0f80c7f4 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Wed, 17 Jan 2018 22:47:38 -0500 Subject: [PATCH] crucible: cache: linked-list LRU implementation We need a better cache expiration algorithm than "make a copy of the entire thing, sort it while holding a lock, and delete half the items in a single burst." Replace the Lamport clock with a double-linked list. Each insert or lookup operation moves the affected item to the head of the list. Each erase operation deletes one single item at the tail of the list. Also sort out some iterator invalidation nonsense by doing erases before inserts instead of "insert, erase, find the inserted item again because we invalidated the found iterator during the erase." The new implementation adds a second word-sized member to each Value as well as a copy of the Key. Hopefully the enlarged size is not a deal-breaker. Signed-off-by: Zygo Blaxell --- include/crucible/cache.h | 162 +++++++++++++++++++++++++++------------ 1 file changed, 111 insertions(+), 51 deletions(-) diff --git a/include/crucible/cache.h b/include/crucible/cache.h index d7fa41d..beff086 100644 --- a/include/crucible/cache.h +++ b/include/crucible/cache.h @@ -18,17 +18,27 @@ namespace crucible { public: using Key = tuple; using Func = function; - using Time = size_t; - using Value = pair; private: + struct Value { + Value *fp = nullptr; + Value *bp = nullptr; + Key key; + Return ret; + Value(Key k, Return r) : key(k), ret(r) { } + // Crash early! + ~Value() { fp = bp = nullptr; }; + }; + Func m_fn; - Time m_ctr; map m_map; LockSet m_lockset; size_t m_max_size; mutex m_mutex; + Value *m_last = nullptr; - bool check_overflow(); + void check_overflow(); + void move_to_front(Value *vp); + void erase_one(Value *vp); public: LRUCache(Func f = Func(), size_t max_size = 100); @@ -46,30 +56,82 @@ namespace crucible { template LRUCache::LRUCache(Func f, size_t max_size) : m_fn(f), - m_ctr(0), m_max_size(max_size) { } template - bool + void + LRUCache::erase_one(Value *vp) + { + THROW_CHECK0(invalid_argument, vp); + Value *vp_bp = vp->bp; + THROW_CHECK0(runtime_error, vp_bp); + Value *vp_fp = vp->fp; + THROW_CHECK0(runtime_error, vp_fp); + vp_fp->bp = vp_bp; + vp_bp->fp = vp_fp; + // If we delete the head of the list then advance the head by one + if (vp == m_last) { + // If the head of the list is also the tail of the list then clear m_last + if (vp_fp == m_last) { + m_last = nullptr; + } else { + m_last = vp_fp; + } + } + m_map.erase(vp->key); + if (!m_last) { + THROW_CHECK0(runtime_error, m_map.empty()); + } else { + THROW_CHECK0(runtime_error, !m_map.empty()); + } + } + + template + void LRUCache::check_overflow() { - if (m_map.size() <= m_max_size) { - return false; + while (m_map.size() >= m_max_size) { + THROW_CHECK0(runtime_error, m_last); + THROW_CHECK0(runtime_error, m_last->bp); + erase_one(m_last->bp); } - vector> key_times; - key_times.reserve(m_map.size()); - for (auto i : m_map) { - key_times.push_back(make_pair(i.first, i.second.first)); + } + + template + void + LRUCache::move_to_front(Value *vp) + { + if (!m_last) { + // Create new LRU list + m_last = vp->fp = vp->bp = vp; + } else if (m_last != vp) { + Value *vp_fp = vp->fp; + Value *vp_bp = vp->bp; + if (vp_fp && vp_bp) { + // There are at least two and we are removing one that isn't m_last + // Connect adjacent nodes to each other (has no effect if vp is new), removing vp from list + vp_fp->bp = vp_bp; + vp_bp->fp = vp_fp; + } else { + // New insertion, both must be null + THROW_CHECK0(runtime_error, !vp_fp); + THROW_CHECK0(runtime_error, !vp_bp); + } + // Splice new node into list + Value *last_bp = m_last->bp; + THROW_CHECK0(runtime_error, last_bp); + // New elemnt points to both ends of list + vp->fp = m_last; + vp->bp = last_bp; + // Insert vp as fp from the end of the list + last_bp->fp = vp; + // Insert vp as bp from the second from the start of the list + m_last->bp = vp; + // Update start of list + m_last = vp; } - sort(key_times.begin(), key_times.end(), [](const pair &a, const pair &b) { - return a.second < b.second; - }); - for (size_t i = 0; i < key_times.size() / 2; ++i) { - m_map.erase(key_times[i].first); - } - return true; } template @@ -78,6 +140,9 @@ namespace crucible { { unique_lock lock(m_mutex); m_max_size = new_max_size; + // FIXME: this really reduces the cache size to new_max_size - 1 + // because every other time we call this method, it is immediately + // followed by insert. check_overflow(); } @@ -95,6 +160,7 @@ namespace crucible { { unique_lock lock(m_mutex); m_map.clear(); + m_last = nullptr; } template @@ -104,8 +170,8 @@ namespace crucible { unique_lock lock(m_mutex); for (auto it = m_map.begin(); it != m_map.end(); ) { auto next_it = ++it; - if (pred(it.second.second)) { - m_map.erase(it); + if (pred(it.second.ret)) { + erase_one(&it.second); } it = next_it; } @@ -133,12 +199,18 @@ namespace crucible { // No, we hold key and cache locks, but item not in cache. // Release cache lock and call function - auto ctr_copy = m_ctr++; lock.unlock(); - Value v(ctr_copy, m_fn(args...)); + + // Create new value + Value v(k, m_fn(args...)); + + // Reacquire cache lock + lock.lock(); + + // Make room + check_overflow(); // Reacquire cache lock and insert return value - lock.lock(); tie(found, inserted) = m_map.insert(make_pair(k, v)); // We hold a lock on this key so we are the ones to insert it @@ -147,23 +219,17 @@ namespace crucible { // Release key lock, keep the cache lock key_lock.unlock(); - // Check to see if we have too many items and reduce if so. - if (check_overflow()) { - // Reset iterator - found = m_map.find(k); - } } } // Item should be in cache now THROW_CHECK0(runtime_error, found != m_map.end()); - // We are using this object so update the timestamp - if (!inserted) { - found->second.first = m_ctr++; - } + // (Re)insert at head of LRU + move_to_front(&(found->second)); + // Make copy before releasing lock - auto rv = found->second.second; + auto rv = found->second.ret; return rv; } @@ -173,7 +239,10 @@ namespace crucible { { Key k(args...); unique_lock lock(m_mutex); - m_map.erase(k); + auto found = m_map.find(k); + if (found != m_map.end()) { + erase_one(&found->second); + } } template @@ -204,33 +273,24 @@ namespace crucible { found = m_map.find(k); if (found == m_map.end()) { + // Make room + check_overflow(); + // No, we hold key and cache locks, but item not in cache. - // Release cache lock and insert the provided return value - auto ctr_copy = m_ctr++; - Value v(ctr_copy, r); + // Insert the provided return value (no need to unlock here) + Value v(k, r); tie(found, inserted) = m_map.insert(make_pair(k, v)); // We hold a lock on this key so we are the ones to insert it THROW_CHECK0(runtime_error, inserted); - - // Release key lock and clean out overflow - key_lock.unlock(); - - // Check to see if we have too many items and reduce if so. - if (check_overflow()) { - // Reset iterator - found = m_map.find(k); - } } } // Item should be in cache now THROW_CHECK0(runtime_error, found != m_map.end()); - // We are using this object so update the timestamp - if (!inserted) { - found->second.first = m_ctr++; - } + // (Re)insert at head of LRU + move_to_front(&(found->second)); } }