Bitcoin ABC  0.28.12
P2P Digital Currency
cuckoocache.h
Go to the documentation of this file.
1 // Copyright (c) 2016 Jeremy Rubin
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4 
5 #ifndef BITCOIN_CUCKOOCACHE_H
6 #define BITCOIN_CUCKOOCACHE_H
7 
8 #include <util/fastrange.h>
9 
10 #include <algorithm> // std::find
11 #include <array>
12 #include <atomic>
13 #include <cmath>
14 #include <cstring>
15 #include <limits>
16 #include <memory>
17 #include <optional>
18 #include <utility>
19 #include <vector>
20 
33 namespace CuckooCache {
48  std::unique_ptr<std::atomic<uint8_t>[]> mem;
49 
50 public:
53 
65  explicit bit_packed_atomic_flags(uint32_t size) {
66  // pad out the size if needed
67  size = (size + 7) / 8;
68  mem.reset(new std::atomic<uint8_t>[size]);
69  for (uint32_t i = 0; i < size; ++i) {
70  mem[i].store(0xFF);
71  }
72  };
73 
84  inline void setup(uint32_t b) {
86  std::swap(mem, d.mem);
87  }
88 
96  inline void bit_set(uint32_t s) {
97  mem[s >> 3].fetch_or(uint8_t(1 << (s & 7)), std::memory_order_relaxed);
98  }
99 
107  inline void bit_unset(uint32_t s) {
108  mem[s >> 3].fetch_and(uint8_t(~(1 << (s & 7))),
109  std::memory_order_relaxed);
110  }
111 
118  inline bool bit_is_set(uint32_t s) const {
119  return (1 << (s & 7)) & mem[s >> 3].load(std::memory_order_relaxed);
120  }
121 };
122 
167 template <typename Element, typename Hash> class cache {
168 private:
170  std::vector<Element> table;
171 
173  uint32_t size;
174 
180 
186  mutable std::vector<bool> epoch_flags;
187 
195 
205  uint32_t epoch_size;
206 
211  uint8_t depth_limit;
212 
219 
223  using Key = typename Element::KeyType;
224 
262  inline std::array<uint32_t, 8> compute_hashes(const Key &k) const {
263  return {{FastRange32(hash_function.template operator()<0>(k), size),
264  FastRange32(hash_function.template operator()<1>(k), size),
265  FastRange32(hash_function.template operator()<2>(k), size),
266  FastRange32(hash_function.template operator()<3>(k), size),
267  FastRange32(hash_function.template operator()<4>(k), size),
268  FastRange32(hash_function.template operator()<5>(k), size),
269  FastRange32(hash_function.template operator()<6>(k), size),
270  FastRange32(hash_function.template operator()<7>(k), size)}};
271  }
272 
277  constexpr uint32_t invalid() const { return ~uint32_t(0); }
278 
284  inline void allow_erase(uint32_t n) const { collection_flags.bit_set(n); }
285 
291  inline void please_keep(uint32_t n) const { collection_flags.bit_unset(n); }
292 
303  void epoch_check() {
304  if (epoch_heuristic_counter != 0) {
306  return;
307  }
308  // count the number of elements from the latest epoch which have not
309  // been erased.
310  uint32_t epoch_unused_count = 0;
311  for (uint32_t i = 0; i < size; ++i) {
312  epoch_unused_count +=
314  }
315  // If there are more non-deleted entries in the current epoch than the
316  // epoch size, then allow_erase on all elements in the old epoch (marked
317  // false) and move all elements in the current epoch to the old epoch
318  // but do not call allow_erase on their indices.
319  if (epoch_unused_count >= epoch_size) {
320  for (uint32_t i = 0; i < size; ++i) {
321  if (epoch_flags[i]) {
322  epoch_flags[i] = false;
323  } else {
324  allow_erase(i);
325  }
326  }
328  } else {
329  // reset the epoch_heuristic_counter to next do a scan when worst
330  // case behavior (no intermittent erases) would exceed epoch size,
331  // with a reasonable minimum scan size. Ordinarily, we would have to
332  // sanity check std::min(epoch_size, epoch_unused_count), but we
333  // already know that `epoch_unused_count < epoch_size` in this
334  // branch
335  epoch_heuristic_counter = std::max(
336  1u, std::max(epoch_size / 16, epoch_size - epoch_unused_count));
337  }
338  }
339 
340 public:
346  : table(), size(), collection_flags(0), epoch_flags(),
348  hash_function() {}
349 
359  uint32_t setup(uint32_t new_size) {
360  // depth_limit must be at least one otherwise errors can occur.
361  size = std::max<uint32_t>(2, new_size);
362  depth_limit = static_cast<uint8_t>(std::log2(static_cast<float>(size)));
363  table.resize(size);
365  epoch_flags.resize(size);
366  // Set to 45% as described above
367  epoch_size = std::max((uint32_t)1, (45 * size) / 100);
368  // Initially set to wait for a whole epoch
370  return size;
371  }
372 
387  std::optional<std::pair<uint32_t, size_t>> setup_bytes(size_t bytes) {
388  size_t requested_num_elems = bytes / sizeof(Element);
389  if (std::numeric_limits<uint32_t>::max() < requested_num_elems) {
390  return std::nullopt;
391  }
392 
393  auto num_elems = setup(bytes / sizeof(Element));
394 
395  size_t approx_size_bytes = num_elems * sizeof(Element);
396  return std::make_pair(num_elems, approx_size_bytes);
397  }
398 
424  inline void insert(Element e, bool replace = false) {
425  epoch_check();
426  uint32_t last_loc = invalid();
427  bool last_epoch = true;
428  std::array<uint32_t, 8> locs = compute_hashes(e.getKey());
429  // Make sure we have not already inserted this element.
430  // If we have, make sure that it does not get deleted.
431  for (const uint32_t loc : locs) {
432  if (table[loc].getKey() == e.getKey()) {
433  if (replace) {
434  table[loc] = std::move(e);
435  }
436  please_keep(loc);
437  epoch_flags[loc] = last_epoch;
438  return;
439  }
440  }
441  for (uint8_t depth = 0; depth < depth_limit; ++depth) {
442  // First try to insert to an empty slot, if one exists
443  for (const uint32_t loc : locs) {
444  if (!collection_flags.bit_is_set(loc)) {
445  continue;
446  }
447  table[loc] = std::move(e);
448  please_keep(loc);
449  epoch_flags[loc] = last_epoch;
450  return;
451  }
467  last_loc =
468  locs[(1 + (std::find(locs.begin(), locs.end(), last_loc) -
469  locs.begin())) &
470  7];
471  std::swap(table[last_loc], e);
472  // Can't std::swap a std::vector<bool>::reference and a bool&.
473  bool epoch = last_epoch;
474  last_epoch = epoch_flags[last_loc];
475  epoch_flags[last_loc] = epoch;
476 
477  // Recompute the locs -- unfortunately happens one too many times!
478  locs = compute_hashes(e.getKey());
479  }
480  }
481 
510  bool contains(const Key &k, const bool erase) const {
511  return find(k, erase) != nullptr;
512  }
513 
526  bool get(Element &e, const bool erase) const {
527  if (const Element *eptr = find(e.getKey(), erase)) {
528  e = *eptr;
529  return true;
530  }
531 
532  return false;
533  }
534 
535 private:
536  const Element *find(const Key &k, const bool erase) const {
537  std::array<uint32_t, 8> locs = compute_hashes(k);
538  for (const uint32_t loc : locs) {
539  if (table[loc].getKey() == k) {
540  if (erase) {
541  allow_erase(loc);
542  }
543  return &table[loc];
544  }
545  }
546  return nullptr;
547  }
548 };
549 
553 template <typename T> struct KeyOnly : public T {
554  // For contains.
555  using KeyType = T;
556 
557  // Ensure implicit conversion from T.
558  KeyOnly() = default;
559  KeyOnly(const T &x) : T(x) {}
560 
561  // Implement required features.
562  const T &getKey() const { return *this; }
563 };
564 
565 } // namespace CuckooCache
566 
567 #endif // BITCOIN_CUCKOOCACHE_H
bit_packed_atomic_flags implements a container for garbage collection flags that is only thread unsaf...
Definition: cuckoocache.h:47
void bit_set(uint32_t s)
bit_set sets an entry as discardable.
Definition: cuckoocache.h:96
void setup(uint32_t b)
setup marks all entries and ensures that bit_packed_atomic_flags can store at least b entries.
Definition: cuckoocache.h:84
bit_packed_atomic_flags()=delete
No default constructor, as there must be some size.
bool bit_is_set(uint32_t s) const
bit_is_set queries the table for discardability at s.
Definition: cuckoocache.h:118
void bit_unset(uint32_t s)
bit_unset marks an entry as something that should not be overwritten.
Definition: cuckoocache.h:107
bit_packed_atomic_flags(uint32_t size)
bit_packed_atomic_flags constructor creates memory to sufficiently keep track of garbage collection i...
Definition: cuckoocache.h:65
std::unique_ptr< std::atomic< uint8_t >[]> mem
Definition: cuckoocache.h:48
cache implements a cache with properties similar to a cuckoo-set.
Definition: cuckoocache.h:167
uint32_t size
size stores the total available slots in the hash table
Definition: cuckoocache.h:173
uint8_t depth_limit
depth_limit determines how many elements insert should try to replace.
Definition: cuckoocache.h:211
std::vector< Element > table
table stores all the elements
Definition: cuckoocache.h:170
typename Element::KeyType Key
Key is the key type for this map or set.
Definition: cuckoocache.h:223
uint32_t epoch_heuristic_counter
epoch_heuristic_counter is used to determine when an epoch might be aged & an expensive scan should b...
Definition: cuckoocache.h:194
uint32_t setup(uint32_t new_size)
setup initializes the container to store no more than new_size elements and no less than 2 elements.
Definition: cuckoocache.h:359
void epoch_check()
epoch_check handles the changing of epochs for elements stored in the cache.
Definition: cuckoocache.h:303
bool get(Element &e, const bool erase) const
get is almost identical to contains(), with the difference that it obtains the found element (for Ele...
Definition: cuckoocache.h:526
uint32_t epoch_size
epoch_size is set to be the number of elements supposed to be in a epoch.
Definition: cuckoocache.h:205
std::vector< bool > epoch_flags
epoch_flags tracks how recently an element was inserted into the cache.
Definition: cuckoocache.h:186
bit_packed_atomic_flags collection_flags
The bit_packed_atomic_flags array is marked mutable because we want garbage collection to be allowed ...
Definition: cuckoocache.h:179
std::array< uint32_t, 8 > compute_hashes(const Key &k) const
compute_hashes is convenience for not having to write out this expression everywhere we use the hash ...
Definition: cuckoocache.h:262
std::optional< std::pair< uint32_t, size_t > > setup_bytes(size_t bytes)
setup_bytes is a convenience function which accounts for internal memory usage when deciding how many...
Definition: cuckoocache.h:387
bool contains(const Key &k, const bool erase) const
contains iterates through the hash locations for a given element and checks to see if it is present.
Definition: cuckoocache.h:510
constexpr uint32_t invalid() const
invalid returns a special index that can never be inserted to
Definition: cuckoocache.h:277
const Hash hash_function
hash_function is a const instance of the hash function.
Definition: cuckoocache.h:218
const Element * find(const Key &k, const bool erase) const
Definition: cuckoocache.h:536
void allow_erase(uint32_t n) const
allow_erase marks the element at index n as discardable.
Definition: cuckoocache.h:284
void insert(Element e, bool replace=false)
insert loops at most depth_limit times trying to insert a hash at various locations in the table via ...
Definition: cuckoocache.h:424
void please_keep(uint32_t n) const
please_keep marks the element at index n as an entry that should be kept.
Definition: cuckoocache.h:291
cache()
You must always construct a cache with some elements via a subsequent call to setup or setup_bytes,...
Definition: cuckoocache.h:345
static uint32_t FastRange32(uint32_t x, uint32_t n)
This file offers implementations of the fast range reduction technique described in https://lemire....
Definition: fastrange.h:22
uint256 Hash(const T &in1)
Compute the 256-bit hash of an object.
Definition: hash.h:74
High-performance cache primitives.
Definition: cuckoocache.h:33
Helper class used when we only want the cache to be a set rather than a map.
Definition: cuckoocache.h:553
KeyOnly(const T &x)
Definition: cuckoocache.h:559
const T & getKey() const
Definition: cuckoocache.h:562