当前位置：首页 > article >正文

【C++】用哈希桶模拟实现unordered_set和unordered_map

article 2024/10/26 0:52:10

一、unordered_set的基本框架：

二、unordered_map的基本框架

三、哈希表的封装：

1、哈希节点结构：

2、哈希表的封装：

3、迭代器的实现：

1、解引用 ->与!=：

2、operator++

3、在HashTable中的迭代器定义：

4、const迭代器的完善：

四、unordered_set和unordered_map的完善：

五、注意：

1、前置声明：

2.友元：

首先，需要将原哈希表进行封装，这里通过修改哈希桶的哈希表来进行封装

一、unordered_set的基本框架：

template<class K>
class unordered_set
{
	struct SetKeyofT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:

private:
	hash_bucket::HashTable<K, K, SetKeyofT> _ht;
};

如上所示，这个内部类是对应map的，map的成员键值对需要仿函数来进行取first，所以set就和map对应，这只是走个过场，实现泛型编程。下面的hash_bucket是自定义域名。

二、unordered_map的基本框架

template<class K, class V>
class unordered_map
{
	struct MapKeyofT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
public:
	
private:
	hash_bucket::HashTable<K, pair<K, V>, MapKeyofT> _ht;
};

可以看到，unordered_map和unordered_set基本相似，区别就是一个是K结构一个是pair<K,V>结构。所以就需要仿函数来进行取pair<K,V>中的first。

三、哈希表的封装：

1、哈希节点结构：

template<class T>
struct HashNode
{
	T _data;
	HashNode<T>* _next;
	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

如上所示：

将原先的pair<K,V>结果更新为T结构，这个T类型是下面在哈希表中进行传的，

如下所示：

在set里面传K就是K结构，在map里面传pair<K,V>就是K,V结构。

2、哈希表的封装：

template<class K, class T, class KeyofT, class HashFunc = HashFunctest<K>>
class HashTable
{
	typedef HashNode<T> Node;
public:
	HashTable();
	~HashTable();
	bool Insert(const T& data);
	Node* Find(const K& key);
	bool Erase(const K& key);
private:
	vector<Node*> _table;
	size_t _n = 0;
};

如上所示：

这里第一个参数是方便下面的erase和find传参的，毕竟keyoft只能把对象里面的k取出来，无法把类型里面的k取出来

这里第二个参数就是决定这是K结构还是pair<K,V>结构的

keyoft是将键值对中的first取出来的

HashFunc是能够进行字符串类的操作的。

关于其他的函数修改主要就是将取first的地方修改为通过仿函数来取。

修改前：

修改后：

如上所示：

这里的kot是将键值对里面的first取出来，hf是保证字符串类也可以进行操作

修改后完整代码：

template<class K>
struct HashFunctest
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

template<>
struct HashFunctest<string>
{
	size_t operator()(const string& str)
	{
		size_t n = 0;
		for (auto& e : str)
		{
			n *= 131;
			n += e;
		}
		return n;
	}
};

template<class T>
struct HashNode
{
	T _data;
	HashNode<T>* _next;
	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

    template<class K, class T, class KeyofT, class HashFunc = HashFunctest<K>>
	class HashTable
	{
		typedef HashNode<T> Node;
	public:
		HashTable()
		{
			_table.resize(10, nullptr);
		}
		~HashTable()
		{
			for (size_t i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
		}
		bool Insert(const T& data)
		{
			KeyofT kot;
			if (Find(kot(data)))
			{
				return false;
			}
			HashFunc hf;
			if (_table.size() == _n)
			{
				size_t newSize = _table.size() * 2;
				vector<Node*> newTable;
				newTable.resize(newSize, nullptr);

				for (size_t i = 0; i < _table.size(); i++)
				{
					Node* cur = _table[i];
					while (cur)
					{
						Node* next = cur->_next;
						size_t hashi = hf(kot(cur->_data)) / newTable.size();
						//将cur的_next指向newTable所在位置的第一个节点
						cur->_next = newTable[hashi];
						//newTable[hashi]处的指针指向cur
						newTable[hashi] = cur;

						cur = next;
					}
					_table[i] = nullptr;
				}
				_table.swap(newTable);
			}
			size_t hashi = hf(kot(data)) % _table.size();
			Node* newnode = new Node(data);
			newnode->_next = _table[hashi];
			_table[hashi] = newnode;
			++_n;
			return true;
		}

		Node* Find(const K& key)
		{
			KeyofT kot;
			HashFunc hf;
			size_t hashi = hf(key) % _table.size();
			Node* cur = _table[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return cur;
				}
				cur = cur->_next;
			}
			return nullptr;
		}

		bool Erase(const K& key)
		{
			HashFunc hf;
			KeyofT kot;
			size_t hashi = hf(key) % _table.size();
			Node* cur = _table[hashi];
			Node* prev = nullptr;
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr)
					{
						_table[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}
					delete cur;
					return true;
				}
				prev = cur;
				cur = cur->_next;
			}
			return false;
		}
	private:
		vector<Node*> _table;
		size_t _n = 0;
	};

3、迭代器的实现：

基本框架：

template<class K, class T, class KeyofT, class HashFunc>
struct HTIterator
{
	typedef HashNode<T> Node;
	typedef HTIterator<K, T, KeyofT, HashFunc> Self;
	
	Node* _node;
	HashTable<K, T, KeyofT, HashFunc>* _pht;

	HTIterator(Node* node, HashTable<K, T, KeyofT, HashFunc>* pht)
		:_node(node)
		,_pht(pht)
	{}

	T& operator*();
	T* operator->();
	Self& operator++();
	bool operator!=(const Self& s);
};

上述中这个HashFunc不需要缺省值，因为这个迭代器是在HashTable里面定义的，HashFunc在HashTable就已经给了所以就不需要了.

其中，一个是节点的指针，一个是哈希表的指针。哈希表的指针作用是为了保证当前桶走完了之后找到下一个桶

1、解引用 ->与!=：

解引用和 -> 直接返回_node的_data和&_node的_data即可，!=就是判断_node和待判断的_node是否相等即可

T& operator*()
{
	return _node->_data;
}

T* operator->()
{
	return &_node->_data;
}
bool operator!=(const Self& s)
{
	return _node != s._node;
}

2、operator++

思路：

首先有两种情况：

如下图的it1和it2，一个是当前哈希桶还有元素，这个时候++的话直接走向next位置

另一种情况是it2，当前哈希桶走完后，要走到另外一个哈希桶中，这个时候：

1、找到当前位置的哈希地址hashi

2、从hashi的下一个位置开始遍历，直到找到下一个不为空的桶

3、如果找到了就将此时hashi所在的位置的第一个数据给_node，在返回*this即可

4、如果hashi向后走，直到比这个哈希表还要大，这个时候将空给_node返回*this即可

Self& operator++()
{
	if (_node->_next)
	{
		//当前桶还没走完继续往下走
		_node = _node->_next;
	}
	else
	{
		HashFunc hf;
		KeyofT kot;
		size_t hashi = hf(kot(_node->_data)) % _pht->_table.size();
		//从下一个位置查找下一个不为空的桶
		++hashi;
		while (hashi < _pht->_table.size())
		{
			if (_pht->_table[hashi])
			{
				_node = _pht->_table[hashi];
				return *this;
			}
			++hashi;
		}
		_node = nullptr;
	}
	return *this;
}

3、在HashTable中的迭代器定义：

下面的begin和end一个直接遍历直到找到第一个存在的数据，另一个直接返回nullptr。

template<class K, class T, class KeyofT, class HashFunc = HashFunctest<K>>
class HashTable
{
	typedef HashNode<T> Node;
public:
	typedef HTIterator<K, T, KeyofT, HashFunc> iterator;
	iterator begin()
	{
		for (size_t i = 0; i < _table.size(); i++)
		{
			Node* cur = _table[i];
			iterator it(cur, this);
			if (cur)
			{
				return it;
			}
		}
		return iterator(nullptr, this);
	}

	iterator end()
	{
		return iterator(nullptr, this);
	}
    //......
private:
    //......
};

4、const迭代器的完善：

这里像上次实现set和map差不多：

在iterator类里面加上两个类模版作为解引用和->的返回值

Ref operator*()
{
	return _node->_data;
}

Ptr operator->()
{
	return &_node->_data;
}

在哈希表类里面实现const begin和const end

但是会像上图那样报错，这是因为如下：

这个时候this是指向pht的，这样的话，权限就被放大了，

解决方法：

将pht这个指针用const修饰

但是这样的话pht就又会存在权限的放大了，所以将_pht也用const修饰即可

最后对set和map进行const封装即可：

四、unordered_set和unordered_map的完善：

这里就直接封装HashTable里面的即可

template<class K>
class unordered_set
{
	struct SetKeyofT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:
	typedef typename hash_bucket::HashTable<K, K, SetKeyofT>::iterator iterator;

	iterator begin()
	{
		return _ht.begin();
	}
	iterator end()
	{
		return _ht.end();
	}
	bool Insert(const K& key)
	{
		return _ht.Insert(key);
	}
private:
	hash_bucket::HashTable<K, K, SetKeyofT> _ht;
};

template<class K, class V>
class unordered_map
{
	struct MapKeyofT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
public:
	typedef typename hash_bucket::HashTable<K, pair<K, V>, MapKeyofT>::iterator iterator;

	iterator begin()
	{
		return _ht.begin();
	}
	iterator end()
	{
		return _ht.end();
	}
	bool Insert(const pair<K, V>& kv)
	{
		return _ht.Insert(kv);
	}
private:
	hash_bucket::HashTable<K, pair<K, V>, MapKeyofT> _ht;
};