LRU缓存与跳表:原理剖析与C++实现
LRU缓存机制
LRU(Least Recently Used)是一种经典的缓存淘汰策略,核心思想是当缓存空间不足时,优先移除最长时间未被访问的数据。这种策略基于局部性原理,认为最近被频繁访问的数据在未来仍有较大概率被再次使用。
实现LRU缓存通常需要两种数据结构的配合:哈希表用于O(1)的查找效率,双向链表用于维护数据的访问时序。C++标准库中的std::list提供了splice方法,能够在不移动元素的前提下调整节点位置,非常适合LRU的场景。
std::list::splice 核心用法
// 将源链表x的全部元素拼接到当前链表的position位置
void splice(const_iterator position, list& x);
// 将源链表x的单个元素i移动到当前链表的position位置
void splice(const_iterator position, list& x, const_iterator i);
// 将源链表x的[first, last)范围内的元素拼接到当前链表
void splice(const_iterator position, list& x, const_iterator first, const_iterator last);上述操作均通过调整指针完成,时间复杂度为O(1),无需复制任何元素。
LRU缓存完整实现
class LRUCache {
using Iter = list<pair<int, int>>::iterator;
int _cap;
unordered_map<int, Iter> _idx;
list<pair<int, int>> _items;
public:
explicit LRUCache(int capacity) : _cap(capacity) {}
int get(int key) {
auto pos = _idx.find(key);
if (pos == _idx.end()) return -1;
// 移至队首表示最近使用
_items.splice(_items.begin(), _items, pos->second);
return pos->second->second;
}
void put(int key, int value) {
auto pos = _idx.find(key);
if (pos != _idx.end()) {
// 更新值并提升优先级
pos->second->second = value;
_items.splice(_items.begin(), _items, pos->second);
return;
}
// 容量检查,淘汰尾部最久未用元素
if (_items.size() == static_cast<size_t>(_cap)) {
_idx.erase(_items.back().first);
_items.pop_back();
}
// 新元素插入队首
_items.emplace_front(key, value);
_idx[key] = _items.begin();
}
};跳表(Skip List)
跳表是由William Pugh提出的一种概率性数据结构,通过在有序链表上建立多层索引来实现快速查找。与平衡树相比,跳表的实现更为简洁,同时能保证平均O(log n)的查询效率,被广泛应用于Redis等系统中。
跳表的核心设计在于:不再严格维护每层节点数的固定比例,而是通过随机算法决定新节点的层数。这种折中方案使得插入和删除操作无需大规模调整结构,仅需局部修改指针即可。
节点结构设计
struct Node {
int key;
vector<Node*> forward; // 各层前向指针
Node(int k, int level) : key(k), forward(level, nullptr) {}
};跳表完整实现
class SkipList {
static constexpr int MAX_LEVEL = 32;
static constexpr double PROB = 0.25;
Node* _head;
int _level;
int randomLevel() const {
int lvl = 1;
static std::mt19937 gen(static_cast<unsigned>(
chrono::steady_clock::now().time_since_epoch().count()));
static std::uniform_real_distribution<double> dist(0.0, 1.0);
while (dist(gen) < PROB && lvl < MAX_LEVEL) {
++lvl;
}
return lvl;
}
public:
SkipList() : _level(1) {
_head = new Node(numeric_limits<int>::min(), MAX_LEVEL);
}
~SkipList() {
Node* cur = _head;
while (cur) {
Node* nxt = cur->forward[0];
delete cur;
cur = nxt;
}
}
// 查找目标值是否存在
bool contains(int target) const {
Node* cur = _head;
for (int i = _level - 1; i >= 0; --i) {
while (cur->forward[i] && cur->forward[i]->key < target) {
cur = cur->forward[i];
}
}
cur = cur->forward[0];
return cur != nullptr && cur->key == target;
}
// 插入新元素
void insert(int key) {
vector<Node*> update(_level);
Node* cur = _head;
for (int i = _level - 1; i >= 0; --i) {
while (cur->forward[i] && cur->forward[i]->key < key) {
cur = cur->forward[i];
}
update[i] = cur;
}
// 已存在则不重复插入
if (cur->forward[0] && cur->forward[0]->key == key) return;
int newLevel = randomLevel();
if (newLevel > _level) {
for (int i = _level; i < newLevel; ++i) {
update.push_back(_head);
}
_level = newLevel;
}
Node* newbie = new Node(key, newLevel);
for (int i = 0; i < newLevel; ++i) {
newbie->forward[i] = update[i]->forward[i];
update[i]->forward[i] = newbie;
}
}
// 删除指定元素
bool remove(int key) {
vector<Node*> update(_level);
Node* cur = _head;
for (int i = _level - 1; i >= 0; --i) {
while (cur->forward[i] && cur->forward[i]->key < key) {
cur = cur->forward[i];
}
update[i] = cur;
}
Node* target = cur->forward[0];
if (!target || target->key != key) return false;
int targetLevel = static_cast<int>(target->forward.size());
for (int i = 0; i < targetLevel; ++i) {
update[i]->forward[i] = target->forward[i];
}
delete target;
// 清理空层
while (_level > 1 && !_head->forward[_level - 1]) {
--_level;
}
return true;
}
// 可视化输出
void display() const {
for (int i = _level - 1; i >= 0; --i) {
Node* cur = _head->forward[i];
cout << "Level " << i << ": ";
while (cur) {
cout << cur->key << " ";
cur = cur->forward[i];
}
cout << "\n";
}
}
};功能测试
int main() {
SkipList sl;
int data[] = {3, 6, 7, 9, 12, 19, 17, 26, 21, 25};
for (int x : data) sl.insert(x);
sl.display();
cout << "查找 19: " << (sl.contains(19) ? "命中" : "未命中") << "\n";
cout << "查找 100: " << (sl.contains(100) ? "命中" : "未命中") << "\n";
sl.remove(19);
cout << "删除 19 后再次查找: "
<< (sl.contains(19) ? "命中" : "未命中") << "\n";
return 0;
}跳表的优势在于实现简单且并发性能优异,通过细粒度锁甚至可以实现无锁并发。相比红黑树等平衡结构,跳表在范围查询和顺序遍历场景下同样表现良好,是内存数据库中索引结构的理想选择。