当前位置：首页 > article >正文

【C++STL(十四)】一个哈希桶简单模拟实现unordered_map/set

article 2025/2/21 3:42:19

前言

一、改造哈希桶

改造结点类

改造主体

模板参数改造

迭代器(重点)

改造完整哈希桶

unordered_map模拟实现

unordered_set模拟实现

前言

前面的文章都有说unordered_map/set的底层结构就是哈希表，严格来说是哈希桶，那么接下来我们就尝试使用同一个哈希桶来模拟实现一下。整体的逻辑和一棵红黑树封装map/set类似，所以这里为了方便给出前面所实现的哈希桶

template<class K>
struct  HashFuc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};
template<>
struct  HashFuc<string>
{
	size_t operator()(const string& s)
	{
		size_t hash = 0;
		for (auto a : s)
		{
			hash *= 131;
			hash += a;
		}

		return hash;
	}
};

template <class K, class V>
struct HashNode
{
	pair<K, V> _kv;
	HashNode<K, V>* _next;

	HashNode(const pair<K, V>& kv)
		:_kv(kv)
		, _next(nullptr)
	{}
};

template<class K, class V, class Hash = HashFuc<K>>
class HashTable
{
	typedef HashNode<K, V> Node;
public:
	HashTable()
	{
		_h.resize(10, nullptr);
		_n = 0;
	}
	// 哈希桶的销毁
	~HashTable()
	{
		for (size_t i = 0; i < _h.size(); i++)
		{
			Node* cur = _h[i];
			while (cur)
			{
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}
			_h[i] = nullptr;
		}
		_n = 0;
	}
	// 插入值为data的元素，如果data存在则不插入
	bool Insert(const pair<K, V>& kv)
	{
		//不能存在重复
		if (Find(kv.first))
			return false;
		//负载因子为1时开始扩容
		if (_n == _h.size())
		{
			vector<Node*> newHash(_h.size() * 2, nullptr);
			for (size_t i = 0; i < _h.size(); i++)
			{
				//将旧表的结点一个一个取下来重新映射至新表，节省空间
				Node* cur = _h[i];
				while (cur)
				{
					Node* next = cur->_next;
					//映射至新表中
					Hash hs;
					size_t newi = hs(cur->_kv.first) % newHash.size();//哈希函数又称散列函数
					cur->_next = newHash[newi];
					newHash[newi] = cur;

					cur = next;
				}
				//旧表置空
				_h[i] = nullptr;
			}
			//交换两个表的内容即可
			_h.swap(newHash);
		}

		Hash hs;
		size_t hashi = hs(kv.first) % _h.size();
		//头插
		Node* newnode = new Node(kv);
		newnode->_next = _h[hashi];
		_h[hashi] = newnode;
		++_n;
		return true;
	}
	// 在哈希桶中查找值为key的元素，存在返回true否则返回false?
	Node* Find(const K& key)
	{
		Hash hs;
		size_t hashi = hs(key) % _h.size();
		Node* cur = _h[hashi];
		while (cur)
		{
			if (cur->_kv.first == key)
			{
				return cur;
			}
			cur = cur->_next;
		}
		return nullptr;
	}
	// 哈希桶中删除key的元素，删除成功返回true，否则返回false
	bool Erase(const K& key)
	{
		Hash hs;
		size_t hashi = hs(key) % _h.size();
		Node* prev = nullptr;
		Node* cur = _h[hashi];
		while (cur)
		{
			if (cur->_kv.first == key)
			{
				//删除的是第一个
				if (prev == nullptr)
					_h[hashi] = cur->_next;
				else
					prev->_next = cur->_next;
				delete cur;
				return true;
			}
			else
			{
				prev = cur;
				cur = cur->_next;
			}
		}
		--_n;
		return false;
	}
private:
	vector<Node*> _h;
	size_t _n = 0;
};

可以看到。上述的哈希桶也是仅仅支持的类型是pair<K,V>这样的键值对，也就是仅可以简单模拟实现一下unordered_map,但不支持unordered_set，也就是还不够泛，所以必须先改造改造这个哈希桶，那么，来吧，开整！

一、改造哈希桶

改造结点类

这里和红黑树封装map/set如出一辙！

template <class T>
struct HashNode
{
	T _data;
	HashNode<T>* _next;

	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

T可以是key，也可以pair<K，V>,会根据上层传来的类型去识别出实际的类型而去实例化出一个合适的模板！就很泛型！

改造主体

模板参数改造

template<class K, class T, class KeyOfT,class Hash>
class HashTable
{
	typedef HashNode<T> Node;
public:
	
private:
	vector<Node*> _h;
	size_t _n = 0;
};

实际上和红黑树封装map/set类似，也是要带上KeyOfT这个仿函数去取对应的类型的Key来比较！

uordered_map的仿函数

	template<class K, class V, class Hash=HashFuc<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};
	private:
		HashTable<K, pair<K, V>, MapKeyOfT, Hash> _ht;
	};

uordered_set的仿函数

	template<class K,class Hash = HashFuc<K>>
	class unordered_map
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	private:
		HashTable<K,K, SetKeyOfT, Hash> _ht;
	};

这里看到，这个改造的哈希桶参数部分就存在了四个，看官别晕！

迭代器(重点)

这里的迭代器很有说法，首先就是底层依旧是结点的指针，其次就是++操作，因为是桶结构，当遍历完当前桶的所有数据时需要寻找下一个桶进行遍历，所以需要传对象的哈希表地址！

基本结构如下：

template<class K, class T, class KeyOfT, class Hash>
struct __HTIterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef __HTIterator<K, T, KeyOfT, Hash> Self;//为了方便而进行迭代器的重命名

	Node* _node;//底层结构依旧是结点指针
	HT* _pt;//对象哈希表地址
    //构造函数
	__HTIterator(Node* node,HT* pht)
		:_node(node)
		,_pt(pht)
	{}
};

++操作的逻辑：

先遍历当前桶的所有结点，直到空为止。
当前桶遍历结束，寻找下一个不为空的桶的第一个结点。（具体做法：先通过哈希函数算出遍历结束的桶的位置，在循环寻找下一个不为空的桶即可）

	Self& operator++()
	{
		//先遍历当前桶
		if (_node->_next)
		{
			//当前桶没走完，就遍历当前桶的下一个结点
			_node = _node->_next;
		}
		else
		{
			//当前桶走完，找下一个不为空的桶的第一个结点
			KeyOfT kot;
			Hash hs;
			size_t i = hs(kot(_node->_data)) % _pt->_h.size();
			++i;
			for (; _pt->_h.size(); i++)
			{
				if (_pt->_h[i])
					break;
			}
			//遍历到最后，直接返回空
			if (i == _pt->_h.size())
			{
				_node = nullptr;
			}
			else
				_node = _pt->_h[i];
		}
		return *this;
	}

注意：这里仅有单向迭代器，因为桶里面的结构是单向链表，所以没有重载--操作。若需要重载可以将桶中的结构改为双向链表。这里就不这样做了，因为库里没有实现。。。

大家会发现一个问题，就是这个迭代器的参数是不是变得十分的多了？若再加上封装const迭代器的实现，那么上述的迭代器的参数就变为如下

template<class K, class T, class KeyOfT, class Hash,class Ptr,class Ref>
struct __HTIterator
{}

实际上大家会发现前面四个参数和哈希桶主体的模板参数类似，同时呢我们迭代器的实现又需要使用到哈希桶，因为要访问哈希桶的私有的内部成员！类外的函数要访问一个类的私有成员变量，可以使用友元声明，还以采用内部类的方式，这样的方式会简便很多。所以对于上述迭代器，更加简便的方式就是将迭代器封装成哈希桶的内部类。内部类天生是外部类的友元

放置类外

//前置声明，因为程序是向上查找函数的，迭代器是在哈希桶的上面
template<class K, class T, class KeyOfT, class Hash>
class HashTable;

template<class K, class T, class KeyOfT, class Hash，class Ptr,class Ref>
struct __HTIterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef __HTIterator<K, T, KeyOfT, Hash,Ptr,Ref> Self;//为了方便而进行迭代器的重命名

	Node* _node;//底层结构依旧是结点指针
	const HT* _pt;//对象哈希表地址
	__HTIterator(Node* node,const HT* pht)
		:_node(node)
		,_pt(pht)
	{}

	Ref operator*()
	{
		return _node->_data;
	}
	Ptr operator->()
	{
		return &_node->_data;
	}

	Self& operator++()
	{
		//先遍历当前桶
		if (_node->_next)
		{
			//当前桶没走完，就遍历当前桶的下一个结点
			_node = _node->_next;
		}
		else
		{
			//当前桶走完，找下一个不为空的桶的第一个结点
			KeyOfT kot;
			Hash hs;
			size_t i = hs(kot(_node->_data)) % _pt->_h.size();
			++i;
			for (; i<_pt->_h.size(); i++)
			{
				if (_pt->_h[i])
					break;
			}
			//遍历到最后，直接放回空
			if (i == _pt->_h.size())
			{
				_node = nullptr;
			}
			else
				_node = _pt->_h[i];
		}
		return *this;
	}

	bool operator!=(const Self& s)
	{
		return _node != s._node;
	}

	bool operator==(const Self& s)
	{
		return _node == s._node;
	}
};
//哈希桶
template<class K, class T, class KeyOfT,class Hash>
class HashTable
{
	typedef HashNode<T> Node;
public:
	typedef __HTIterator<K, T, KeyOfT, Hash,Ptr,Ref> Iterator;
    //友元声明
	template<class K, class T, class KeyOfT, class Hash，class Ptr,class Ref>
	friend struct __HTIterator;

    //………………
}

内部类

template<class K, class T, class KeyOfT,class Hash>
class HashTable
{
	typedef HashNode<T> Node;
public:
	//内部类
	template<class Ptr, class Ref>
	struct __HTIterator
	{
		typedef HashNode<T> Node;
		typedef HashTable HT;
		typedef __HTIterator<Ptr, Ref> Self;//为了方便而进行迭代器的重命名

		Node* _node;//底层结构依旧是结点指针
		const HT* _pt;//对象哈希表地址
		__HTIterator(Node* node,const HT* pht)
			:_node(node)
			, _pt(pht)
		{}

		Ref operator*()
		{
			return _node->_data;
		}
		Ptr operator->()
		{
			return &_node->_data;
		}

		Self& operator++()
		{
			//先遍历当前桶
			if (_node->_next)
			{
				//当前桶没走完，就遍历当前桶的下一个结点
				_node = _node->_next;
			}
			else
			{
				//当前桶走完，找下一个不为空的桶的第一个结点
				KeyOfT kot;
				Hash hs;
				size_t i = hs(kot(_node->_data)) % _pt->_h.size();
				++i;
				for (; i < _pt->_h.size(); i++)
				{
					if (_pt->_h[i])
						break;
				}
				//遍历到最后，直接放回空
				if (i == _pt->_h.size())
				{
					_node = nullptr;
				}
				else
					_node = _pt->_h[i];
			}
			return *this;
		}

		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}

		bool operator==(const Self& s)
		{
			return _node == s._node;
		}
	};

	typedef __HTIterator<T*,T&> Iterator;//普通迭代器
	typedef __HTIterator<const T*, const T&> const_Iterator;//const迭代器
    Iterator Begin()
	{
		for (size_t i = 0; i < _h.size(); i++)
		{
			Node* cur = _h[i];
			if (cur)
			{
				//this->HashTable*
				return Iterator(cur, this);
			}
		}
		return End();
	}
	Iterator End()
	{
		return Iterator(nullptr,this);
	}
	const_Iterator Begin()const
	{
		for (size_t i = 0; i < _h.size(); i++)
		{
			Node* cur = _h[i];
			if (cur)
			{
				//const this->const HashTable*
				return const_Iterator(cur, this);
			}
		}
		return End();
	}
	const_Iterator End()const
	{
		return const_Iterator(nullptr, this);
	}
    //………………
}

可以看出当封装成内部类时，前面的四个参数可以完全省略了，共用的是哈希桶的！这样实现起来就方便很多了，但是要注意底层可以省略，但是上层一定要传四个参数，也就是模拟实现unordered_map那层。

还需要注意的一个细节，就是因为要传入对于哈希表的地址，即this指针，而const迭代器修饰的就是const this，那么底层的哈希地址就需要使用const指针修饰，因为如果不用const的话，当传入const this时会存在权限放大的问题。

typedef HashTable HT;
const HT* _pt;//对象哈希表地址

改造完整哈希桶

#include <iostream>
#include <vector>
using namespace std;

template<class K>
struct  HashFuc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};
template<>
struct  HashFuc<string>
{
	size_t operator()(const string& s)
	{
		size_t hash = 0;
		for (auto a : s)
		{
			hash *= 131;
			hash += a;
		}

		return hash;
	}
};

template <class T>
struct HashNode
{
	T _data;
	HashNode<T>* _next;

	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

template<class K, class T, class KeyOfT,class Hash>
class HashTable
{
	typedef HashNode<T> Node;
public:
	//内部类
	template<class Ptr, class Ref>
	struct __HTIterator
	{
		typedef HashNode<T> Node;
		typedef HashTable HT;
		typedef __HTIterator<Ptr, Ref> Self;//为了方便而进行迭代器的重命名

		Node* _node;//底层结构依旧是结点指针
		const HT* _pt;//对象哈希表地址
		__HTIterator(Node* node, const HT* pht)
			:_node(node)
			, _pt(pht)
		{}

		Ref operator*()
		{
			return _node->_data;
		}
		Ptr operator->()
		{
			return &_node->_data;
		}

		Self& operator++()
		{
			//先遍历当前桶
			if (_node->_next)
			{
				//当前桶没走完，就遍历当前桶的下一个结点
				_node = _node->_next;
			}
			else
			{
				//当前桶走完，找下一个不为空的桶的第一个结点
				KeyOfT kot;
				Hash hs;
				size_t i = hs(kot(_node->_data)) % _pt->_h.size();
				++i;
				for (; i < _pt->_h.size(); i++)
				{
					if (_pt->_h[i])
						break;
				}
				//遍历到最后，直接放回空
				if (i == _pt->_h.size())
				{
					_node = nullptr;
				}
				else
					_node = _pt->_h[i];
			}
			return *this;
		}

		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}

		bool operator==(const Self& s)
		{
			return _node == s._node;
		}
	};
	typedef __HTIterator<T*,T&> Iterator;//普通迭代器
	typedef __HTIterator<const T*, const T&> const_Iterator;//const迭代器
	Iterator Begin()
	{
		for (size_t i = 0; i < _h.size(); i++)
		{
			Node* cur = _h[i];
			if (cur)
			{
				//this->HashTable*
				return Iterator(cur, this);
			}
		}
		return End();
	}
	Iterator End()
	{
		return Iterator(nullptr,this);
	}
	const_Iterator Begin()const
	{
		for (size_t i = 0; i < _h.size(); i++)
		{
			Node* cur = _h[i];
			if (cur)
			{
				//const this->HashTable*
				return const_Iterator(cur, this);
			}
		}
		return End();
	}
	const_Iterator End()const
	{
		return const_Iterator(nullptr, this);
	}

	HashTable()
	{
		_h.resize(10, nullptr);
		_n = 0;
	}
	// 哈希桶的销毁
	~HashTable()
	{
		for (size_t i = 0; i < _h.size(); i++)
		{
			Node* cur = _h[i];
			while (cur)
			{
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}
			_h[i] = nullptr;
		}
		_n = 0;
	}
	// 插入值为data的元素，如果data存在则不插入
	pair<Iterator,bool> Insert(const T& data)
	{
		KeyOfT kot;//取对应类型的key
		//不能存在重复
		Iterator it = Find(kot(data));
		if (it!=End())
			return make_pair(it,false);
		//负载因子为1时开始扩容
		if (_n == _h.size())
		{
			vector<Node*> newHash(_h.size() * 2, nullptr);
			for (size_t i = 0; i < _h.size(); i++)
			{
				//将旧表的结点一个一个取下来重新映射至新表，节省空间
				Node* cur = _h[i];
				while (cur)
				{
					Node* next = cur->_next;
					//映射至新表中
					Hash hs;
					size_t newi = hs(kot(cur->_data)) % newHash.size();//哈希函数又称散列函数
					cur->_next = newHash[newi];
					newHash[newi] = cur;

					cur = next;
				}
				//旧表置空
				_h[i] = nullptr;
			}
			//交换两个表的内容即可
			_h.swap(newHash);
		}

		Hash hs;
		size_t hashi = hs(kot(data)) % _h.size();
		//头插
		Node* newnode = new Node(data);
		newnode->_next = _h[hashi];
		_h[hashi] = newnode;
		++_n;
		return make_pair(Iterator(newnode,this), true);
	}
	 //在哈希桶中查找值为key的元素，存在返回true否则返回false
	Iterator Find(const K& key)
	{
		KeyOfT kot;
		Hash hs;
		size_t hashi = hs(key) % _h.size();
		Node* cur = _h[hashi];
		while (cur)
		{
			if (kot(cur->_data) == key)
			{
				return Iterator(cur, this);
			}
			cur = cur->_next;
		}
		return Iterator(nullptr, this);
	}
	// 哈希桶中删除key的元素，删除成功返回true，否则返回false
	bool Erase(const K& key)
	{
		KeyOfT kot;
		Hash hs;
		size_t hashi = hs(key) % _h.size();
		Node* prev = nullptr;
		Node* cur = _h[hashi];
		while (cur)
		{
			if (kot(cur->_data) == key)
			{
				//删除的是第一个
				if (prev == nullptr)
					_h[hashi] = cur->_next;
				else
					prev->_next = cur->_next;
				delete cur;
				return true;
			}
			else
			{
				prev = cur;
				cur = cur->_next;
			}
		}
		--_n;
		return false;
	}
	size_t Size()
	{
		return _n;
	}
	bool empty()const
	{
		return _n == 0;
	}
private:
	vector<Node*> _h;
	size_t _n = 0;
};

unordered_map模拟实现

实际上就是封装哈希桶，没啥的，重点在底层哈希桶结构，这里直接给出完整代码

#include "HashTable.h"
namespace um
{
	template<class K, class V, class Hash=HashFuc<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};
	public:
		typedef typename HashTable<K, pair<K, V>, MapKeyOfT, Hash>::Iterator iterator;
		typedef typename HashTable<K, pair<K, V>, MapKeyOfT, Hash>::const_Iterator const_iterator;
		pair<iterator,bool> insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}
		iterator begin()
		{
			return _ht.Begin();
		}
		iterator end()
		{
			return _ht.End();
		}
		const_iterator begin()const
		{
			return _ht.Begin();
		}
		const_iterator end()const
		{
			return _ht.End();
		}

		iterator Find(const K& key)
		{
			return _ht.Find(key);
		}

		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}

		size_t size()
		{
			return _ht.Size();
		}
		bool empty()
		{
			return _ht.empty();
		}
        //重载[]
		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
			iterator it = ret.first;
			return it->second;
		}
	private:
		HashTable<K, pair<K, V>, MapKeyOfT, Hash> _ht;//底层对哈希桶的封装
	};

unordered_set模拟实现

namespace us
{
	template<class K,class Hash = HashFuc<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		typedef typename HashTable<K, K, SetKeyOfT, Hash>::Iterator iterator;
		typedef typename HashTable<K, K, SetKeyOfT, Hash>::const_Iterator const_iterator;

		iterator begin()
		{
			return _ht.Begin();
		}

		iterator end()
		{
			return _ht.End();
		}
		const_iterator begin()const
		{
			return _ht.Begin();
		}

		const_iterator end()const
		{
			return _ht.End();
		}

		pair<iterator,bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}

		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}

		size_t size()
		{
			return _ht.Size();
		}

		bool empty()
		{
			return _ht.empty();
		}
	private:
		HashTable<K,K, SetKeyOfT, Hash> _ht;
	};
}

好了，今天的分享就到这里，如果对你有所帮助，欢迎点赞+关注!

查看全文

http://www.kler.cn/a/294791.html