本文主要是介绍leveldb阅读-Skiplist,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
Skiplist是一种随机化的链表,通过并联链表,可以实现数据的快速插入和查找,同时能够取得比较好的时间开销和空间开销。详细的实现原理可以参照http://blog.csdn.net/haidao2009/article/details/8206856。leveldb采用skiplist来实现k-value的处理应该也是综合考虑到空间开销和时间开销的成本。
在介绍leveldb中的Skiplist之前,首先需要看一下其Node的定义,也就是Skiplist中存储信息的节点是如何定义的;
template<typename Key, class Comparator>
struct SkipList<Key,Comparator>::Node {explicit Node(const Key& k) : key(k) { }//key值不可修改Key const key;//获得下一个节点的地址,为了防止冲突,因此Release_Store版本中采用了内存屏障,//这样可以有效的确保线程获得数据都是完整的,但不一定是同步的;// Accessors/mutators for links. Wrapped in methods so we can// add the appropriate barriers as necessary.Node* Next(int n) {assert(n >= 0);// Use an 'acquire load' so that we observe a fully initialized// version of the returned Node.return reinterpret_cast<Node*>(next_[n].Acquire_Load());}//设置下一个节点的地址,通过内存屏障或者系统提供的原语,这样确保任何线程读的数据都是完整的,当然不一定是同步的;void SetNext(int n, Node* x) {assert(n >= 0);// Use a 'release store' so that anybody who reads through this// pointer observes a fully initialized version of the inserted node.next_[n].Release_Store(x);}// No-barrier variants that can be safely used in a few locations.Node* NoBarrier_Next(int n) {assert(n >= 0);return reinterpret_cast<Node*>(next_[n].NoBarrier_Load());}void NoBarrier_SetNext(int n, Node* x) {assert(n >= 0);next_[n].NoBarrier_Store(x);}private:// Array of length equal to the node height. next_[0] is lowest level link.//弹性指针,并且采用原子性的指针来确保数据进行读写的时候的完整性;port::AtomicPointer next_[1];
};
其中,Atomicpointer的定义如下,由于leveldb需要适应不同的平台,因此,AtomicPointer有不同平台版本的定义,为了理解,只看一下比较简单版本的,也就是支持atomic的平台上的代码;
// AtomicPointer based on <cstdatomic>
#elif defined(LEVELDB_ATOMIC_PRESENT)
class AtomicPointer {private:std::atomic<void*> rep_;public:AtomicPointer() { }explicit AtomicPointer(void* v) : rep_(v) { }inline void* Acquire_Load() const {return rep_.load(std::memory_order_acquire);}inline void Release_Store(void* v) {rep_.store(v, std::memory_order_release);}inline void* NoBarrier_Load() const {return rep_.load(std::memory_order_relaxed);}inline void NoBarrier_Store(void* v) {rep_.store(v, std::memory_order_relaxed);}
};
可以从上面看出,所有的操作都是原子性的,因此就可以确保不同线程对next_数据读取和写入的时候,可以保证原子性;
SKiplist的定义如下:
//需要提供compare仿函数
template<typename Key, class Comparator>
class SkipList {private:struct Node;public://利用arena进行内存管理,cmp用于对key进行比较// Create a new SkipList object that will use "cmp" for comparing keys,// and will allocate memory using "*arena". Objects allocated in the arena// must remain allocated for the lifetime of the skiplist object.explicit SkipList(Comparator cmp, Arena* arena);//插入数据,要求不能有相同键值的数据存在列表中;// Insert key into the list.// REQUIRES: nothing that compares equal to key is currently in the list.void Insert(const Key& key);//判断指定的键值是否已经存在list中;// Returns true iff an entry that compares equal to key is in the list.bool Contains(const Key& key) const;// Iteration over the contents of a skip listclass Iterator {public:// Initialize an iterator over the specified list.// The returned iterator is not valid.explicit Iterator(const SkipList* list);// Returns true iff the iterator is positioned at a valid node.bool Valid() const;// Returns the key at the current position.// REQUIRES: Valid()const Key& key() const;// Advances to the next position.// REQUIRES: Valid()void Next();// Advances to the previous position.// REQUIRES: Valid()void Prev();// Advance to the first entry with a key >= targetvoid Seek(const Key& target);// Position at the first entry in list.// Final state of iterator is Valid() iff list is not empty.void SeekToFirst();// Position at the last entry in list.// Final state of iterator is Valid() iff list is not empty.void SeekToLast();private:const SkipList* list_;Node* node_;//head节点// Intentionally copyable};private://skiplist最大并行列表的数量;enum { kMaxHeight = 12 };//用于比较键值对大小;// Immutable after constructionComparator const compare_;//用于分配和管理node内存;Arena* const arena_; // Arena used for allocations of nodes//skiplist的heaed指针;Node* const head_;// Modified only by Insert(). Read racily by readers, but stale// values are ok.//链表的并行度,在insert的时候可能会被修改;port::AtomicPointer max_height_; // Height of the entire listinline int GetMaxHeight() const {return static_cast<int>(reinterpret_cast<intptr_t>(max_height_.NoBarrier_Load()));}//产生随机数;// Read/written only by Insert().Random rnd_;//产生新的节点;Node* NewNode(const Key& key, int height);//随机产生并行高度;int RandomHeight();//判断两个键值对是否相等;bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }//判断key值是否比n中存储的值要大;// Return true if key is greater than the data stored in "n"bool KeyIsAfterNode(const Key& key, Node* n) const;// Return the earliest node that comes at or after key.// Return NULL if there is no such node.//// If prev is non-NULL, fills prev[level] with pointer to previous// node at "level" for every level in [0..max_height_-1].Node* FindGreaterOrEqual(const Key& key, Node** prev) const;// Return the latest node with a key < key.// Return head_ if there is no such node.Node* FindLessThan(const Key& key) const;// Return the last node in the list.// Return head_ if list is empty.Node* FindLast() const;//禁止复制// No copying allowedSkipList(const SkipList&);void operator=(const SkipList&);
};
其中随机数产生器Radom的定义如下:
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.#ifndef STORAGE_LEVELDB_UTIL_RANDOM_H_
#define STORAGE_LEVELDB_UTIL_RANDOM_H_#include <stdint.h>namespace leveldb {//
//简单的随机数产生器,不能产生真正的随机数,但是在leveldb中足够使用
// A very simple random number generator. Not especially good at
// generating truly random bits, but good enough for our needs in this
// package.
class Random {private:uint32_t seed_;public:explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) {// Avoid bad seeds.if (seed_ == 0 || seed_ == 2147483647L) {seed_ = 1;}}//获得下一个随机数;uint32_t Next() {static const uint32_t M = 2147483647L; // 2^31-1static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0// We are computing// seed_ = (seed_ * A) % M, where M = 2^31-1//// seed_ must not be zero or M, or else all subsequent computed values// will be zero or M respectively. For all other values, seed_ will end// up cycling through every number in [1,M-1]uint64_t product = seed_ * A;// Compute (product % M) using the fact that ((x << 31) % M) == x.seed_ = static_cast<uint32_t>((product >> 31) + (product & M));// The first reduction may overflow by 1 bit, so we may need to// repeat. mod == M is not possible; using > allows the faster// sign-bit-based test.if (seed_ > M) {seed_ -= M;}return seed_;}// Returns a uniformly distributed value in the range [0..n-1]// REQUIRES: n > 0uint32_t Uniform(int n) { return Next() % n; }// Randomly returns true ~"1/n" of the time, and false otherwise.// REQUIRES: n > 0bool OneIn(int n) { return (Next() % n) == 0; }// Skewed: pick "base" uniformly from range [0,max_log] and then// return "base" random bits. The effect is to pick a number in the// range [0,2^max_log-1] with exponential bias towards smaller numbers.uint32_t Skewed(int max_log) {return Uniform(1 << Uniform(max_log + 1));}
};} // namespace leveldb#endif // STORAGE_LEVELDB_UTIL_RANDOM_H_
其中,next中求mod的值的原理如下:
Skiplist的具体实现如下;
//产生新的节点
template<typename Key, class Comparator>
typename SkipList<Key,Comparator>::Node*
SkipList<Key,Comparator>::NewNode(const Key& key, int height) {char* mem = arena_->AllocateAligned(sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1));return new (mem) Node(key);
}template<typename Key, class Comparator>
inline SkipList<Key,Comparator>::Iterator::Iterator(const SkipList* list) {list_ = list;node_ = NULL;
}template<typename Key, class Comparator>
inline bool SkipList<Key,Comparator>::Iterator::Valid() const {return node_ != NULL;
}template<typename Key, class Comparator>
inline const Key& SkipList<Key,Comparator>::Iterator::key() const {assert(Valid());return node_->key;
}template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::Next() {assert(Valid());node_ = node_->Next(0);
}template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::Prev() {// Instead of using explicit "prev" links, we just search for the// last node that falls before key.assert(Valid());node_ = list_->FindLessThan(node_->key);if (node_ == list_->head_) {node_ = NULL;}
}template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::Seek(const Key& target) {node_ = list_->FindGreaterOrEqual(target, NULL);
}template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::SeekToFirst() {node_ = list_->head_->Next(0);
}template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::SeekToLast() {node_ = list_->FindLast();if (node_ == list_->head_) {node_ = NULL;}
}//随机random出node节点的高度;
template<typename Key, class Comparator>
int SkipList<Key,Comparator>::RandomHeight() {// Increase height with probability 1 in kBranchingstatic const unsigned int kBranching = 4;int height = 1;//利用<span style="font-family: Arial, Helvetica, sans-serif;">kBranching,可以控制不同点出现的概率,如1出现的概率为3/4,2出现的概率为3/4 *1/4 ,3出现的概率为 1/4*1/4*3/4;</span>
while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {height++;}assert(height > 0);assert(height <= kMaxHeight);return height;
}template<typename Key, class Comparator>
bool SkipList<Key,Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {// NULL n is considered infinitereturn (n != NULL) && (compare_(n->key, key) < 0);
}//找到第一个大于或者等于key值的节点
template<typename Key, class Comparator>
typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOrEqual(const Key& key, Node** prev)const {Node* x = head_;int level = GetMaxHeight() - 1;//快速查找并填充prev,这样就可以快速的查找到大于或者等于key的第一个值//prev[1-level]填充的是每一层大于key的值的第一个node节点;while (true) {Node* next = x->Next(level);if (KeyIsAfterNode(key, next)) {// Keep searching in this listx = next;} else {if (prev != NULL) prev[level] = x;if (level == 0) {return next;} else {// Switch to next listlevel--;}}}
}//找到小于key的最大值
template<typename Key, class Comparator>
typename SkipList<Key,Comparator>::Node*
SkipList<Key,Comparator>::FindLessThan(const Key& key) const {Node* x = head_;int level = GetMaxHeight() - 1;while (true) {assert(x == head_ || compare_(x->key, key) < 0);Node* next = x->Next(level);if (next == NULL || compare_(next->key, key) >= 0) {if (level == 0) {return x;} else {// Switch to next listlevel--;}} else {x = next;}}
}//快速的找到最后一个值
template<typename Key, class Comparator>
typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()const {Node* x = head_;int level = GetMaxHeight() - 1;while (true) {Node* next = x->Next(level);if (next == NULL) {if (level == 0) {return x;} else {// Switch to next listlevel--;}} else {x = next;}}
}//初始化;
//最大高度= kMaxHeight = 12
//random初始化种子为 0xdeadbeef
//初始高度为1
template<typename Key, class Comparator>
SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena): compare_(cmp),arena_(arena),head_(NewNode(0 /* any key will do */, kMaxHeight)),max_height_(reinterpret_cast<void*>(1)),rnd_(0xdeadbeef) {for (int i = 0; i < kMaxHeight; i++) {head_->SetNext(i, NULL);}
}//插入节点,插入的时候会改变maxheght;
template<typename Key, class Comparator>
void SkipList<Key,Comparator>::Insert(const Key& key) {// TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual()// here since Insert() is externally synchronized.Node* prev[kMaxHeight];Node* x = FindGreaterOrEqual(key, prev);// Our data structure does not allow duplicate insertionassert(x == NULL || !Equal(key, x->key));int height = RandomHeight();if (height > GetMaxHeight()) {for (int i = GetMaxHeight(); i < height; i++) {prev[i] = head_;}//fprintf(stderr, "Change height from %d to %d\n", max_height_, height);// It is ok to mutate max_height_ without any synchronization// with concurrent readers. A concurrent reader that observes// the new value of max_height_ will see either the old value of// new level pointers from head_ (NULL), or a new value set in// the loop below. In the former case the reader will// immediately drop to the next level since NULL sorts after all// keys. In the latter case the reader will use the new node.//更新最大高度max_height_.NoBarrier_Store(reinterpret_cast<void*>(height));}x = NewNode(key, height);for (int i = 0; i < height; i++) {// NoBarrier_SetNext() suffices since we will add a barrier when// we publish a pointer to "x" in prev[i].x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i));//增加了memory barrierprev[i]->SetNext(i, x);}
}//判断skiplist中是否存在目标键值
template<typename Key, class Comparator>
bool SkipList<Key,Comparator>::Contains(const Key& key) const {Node* x = FindGreaterOrEqual(key, NULL);if (x != NULL && Equal(key, x->key)) {return true;} else {return false;}
}
可以看到,在leveldb中,skiplist没有删除节点的接口,主要是在leveldb中,skiplist中没有节点删除这一说,利用arena进行内存管理,可以有效的防止内存泄漏,并避免内存管理的复杂性;
这篇关于leveldb阅读-Skiplist的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!