当前位置：首页 > article >正文

C++：unordered_set、unordered_map类

article 2024/11/6 4:23:11

unordered_set和set的差异/unordered_map和map的差异

unordered_set和unordered_map的模拟实现

哈希节点

迭代器

operator++

哈希表类

begin和end

Insert和Find

HashTable.h

unordered_set.h

unordered_map.h

map支持operator[]

unordered_set和unordered_map的底层是哈希桶实现的，使用方法和set/map都差不多

所以它们的增删查平均效率是O(1)

unordered_set和set的差异/unordered_map和map的差异

对key的要求不同，set/map要求key支持小于比较（红黑树的要求），而unodered_set/map要求key支持转成整型且支持等于比较（哈希表的要求）
迭代器的差异，set/map是双向迭代器，unordered_set/map是单向迭代器，set/map迭代器遍历作用是有序+去重，unordered_set/map迭代器遍历的作用是无序+去重
性能的差异，set/map红黑树的增删查改效率是O(logN)，unordered_set/map哈希表的增删查改效率是O(1)

unordered_set和unordered_map的模拟实现

哈希节点

template<class T>
struct HashNode
{
	T _data;
	HashNode<T>* _next;

	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

数据类型需要是模板T，因为unordered_set的T是key，unordered_map的T是pair<key,value>

迭代器

// 前置声明
template<class K, class T, class KeyOfT, class Hash>
class HashTable;

template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
struct HTIterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;

	Node* _node;
	const HT* _ht;

	HTIterator(Node* node, const HT* ht)
		:_node(node)
		, _ht(ht)
	{}

	Ref operator*()
	{
		return _node->_data;
	}

	Ptr operator->()
	{
		return &_node->_data;
	}

	bool operator!=(const Self& s)
	{
		return _node != s._node;
	}

	Self& operator++()
	{
		if (_node->_next)
		{
			_node = _node->_next;
		}
		else
		{
			KeyOfT kot;
			Hash hash;
			size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();
			hashi++;
			while (hashi < _ht->_tables.size())
			{
				_node = _ht->_tables[hashi];
				
				if (_node)
					break;
				else
					hashi++;
			}

			if (hashi == _ht->_tables.size())
			{
				_node = nullptr;
			}
		}

		return *this;
	}
};

因为迭代器的类需要使用哈希表类作为成员对象

而哈希表类也需要使用迭代器类作为返回值返回

所以这里迭代器需要一个前置声明告诉编译器我的代码下面有一个哈希表类，待会实现，否则会报错

operator++

哈希表是用哈希桶实现的，所以如果想要找到下一个节点无非三种情况

1. 当前节点桶下面还有节点，直接走到next下一个节点并返回即可

2. 当前节点桶下面没有节点，则先需要通过哈希映射关系找到下一个桶，若该桶不为空，则++后的位置即为当前桶的第一个节点

3. 若当前节点桶下面没有节点，并且桶一个个往后找都没有数据时，则为nullptr即可

哈希表类

template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
class HashTable
{
	// 友元声明
	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	friend struct HTIterator;

	typedef HashNode<T> Node;

public:
	typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;
	typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;

private:
	vector<Node*> _tables; // 指针数组
	size_t _n = 0;
};

KeyOfT和前面map和set的一样，我们需要从KeyOfT这个类中创建一个对象，通过这个对象调用类中的仿函数来取到key

这个key可能是unordered_set的key，也有可能是unordered_map中pair<key, value>中的key

begin和end

Iterator Begin()
{
	if (_n == 0)
		return End();

	for (size_t i = 0; i < _tables.size(); i++)
	{
		Node* cur = _tables[i];
		if (cur)
		{
			return Iterator(cur, this);
		}
	}

	return End();
}

Iterator End()
{
	return Iterator(nullptr, this);
}

ConstIterator Begin() const
{
	if (_n == 0)
		return End();

	for (size_t i = 0; i < _tables.size(); i++)
	{
		Node* cur = _tables[i];
		if (cur)
		{
			return Iterator(cur, this);
		}
	}

	return End();
}

ConstIterator End() const
{
	return Iterator(nullptr, this);
}

它们都需要返回一个迭代器

End()直接返回nullptr节点的迭代器即可

而Begin需要从第一个桶开始往后找，直到找到不为空的桶即可返回当前节点构造的迭代器

Insert和Find

pair<Iterator, bool> Insert(const T& data)
{
	KeyOfT kot;
	Iterator it = Find(kot(data));
	if (it != End())
		return { it, false };

	Hash hash;
	// 负载因子为1时扩容
	if (_n == _tables.size())
	{
		vector<Node*> newTable(__stl_next_prime(_tables.size() + 1));
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];
			while (cur)
			{
				Node* next = cur->_next;
				// 头插到新表
				size_t hashi = hash(kot(cur->_data)) % newTable.size();
				cur->_next = newTable[hashi];
				newTable[hashi] = cur;

				cur = next;
			}
			_tables[i] = nullptr;
		}

		_tables.swap(newTable);
	}

	size_t hashi = hash(kot(data)) % _tables.size();
	Node* newnode = new Node(data);
	newnode->_next = _tables[hashi];
	_tables[hashi] = newnode;
	_n++;

	return { Iterator(newnode, this), false };
}

Iterator Find(const K& key)
{
	Hash hash;
	KeyOfT kot;
	size_t hashi = hash(key) % _tables.size();
	Node* cur = _tables[hashi];
	while (cur)
	{
		if (kot(cur->_data) == key)
		{
			return Iterator(cur, this);
		}

		cur = cur->_next;
	}

	return End();
}

和前面哈希表几乎没什么区别

现在需要把原来的key用KeyOfT创建的kot对象使用仿函数将key取出来，因为不确定T的类型是什么

返回值也需要改成Iterator，为了后面unordered_map的operator[]的实现

HashTable.h

#pragma once
#include <iostream>
#include <vector>
#include <string>
using namespace std;

enum State
{
	EXIST,
	EMPTY,
	DELETE
};

template<class K, class V>
struct HashData
{
	pair<K, V> _kv;
	State _state = EMPTY;
};

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

template<>
struct HashFunc<string>
{
	size_t operator()(const string& s)
	{
		// BKDR
		size_t hash = 0;
		for (auto ch : s)
		{
			hash += ch;
			hash *= 131;
		}

		return hash;
	}
};

inline unsigned long __stl_next_prime(unsigned long n)
{
	// Note: assumes long is at least 32 bits.
	static const int __stl_num_primes = 28;
	static const unsigned long __stl_prime_list[__stl_num_primes] = {
		53, 97, 193, 389, 769,
		1543, 3079, 6151, 12289, 24593,
		49157, 98317, 196613, 393241, 786433,
		1572869, 3145739, 6291469, 12582917, 25165843,
		50331653, 100663319, 201326611, 402653189, 805306457,
		1610612741, 3221225473, 4294967291
	};
	const unsigned long* first = __stl_prime_list;
	const unsigned long* last = __stl_prime_list + __stl_num_primes;
	const unsigned long* pos = lower_bound(first, last, n);
	return pos == last ? *(last - 1) : *pos;
}

namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};

	// 前置声明
	template<class K, class T, class KeyOfT, class Hash>
	class HashTable;

	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	struct HTIterator
	{
		typedef HashNode<T> Node;
		typedef HashTable<K, T, KeyOfT, Hash> HT;
		typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;

		Node* _node;
		const HT* _ht;

		HTIterator(Node* node, const HT* ht)
			:_node(node)
			, _ht(ht)
		{}

		Ref operator*()
		{
			return _node->_data;
		}

		Ptr operator->()
		{
			return &_node->_data;
		}

		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}

		Self& operator++()
		{
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else
			{
				KeyOfT kot;
				Hash hash;
				size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();
				hashi++;
				while (hashi < _ht->_tables.size())
				{
					_node = _ht->_tables[hashi];
					
					if (_node)
						break;
					else
						hashi++;
				}

				if (hashi == _ht->_tables.size())
				{
					_node = nullptr;
				}
			}

			return *this;
		}
	};

	template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
	class HashTable
	{
		// 友元声明
		template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
		friend struct HTIterator;

		typedef HashNode<T> Node;
	public:
		typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;
		typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;
	
		Iterator Begin()
		{
			if (_n == 0)
				return End();

			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
				{
					return Iterator(cur, this);
				}
			}

			return End();
		}

		Iterator End()
		{
			return Iterator(nullptr, this);
		}

		ConstIterator Begin() const
		{
			if (_n == 0)
				return End();

			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
				{
					return Iterator(cur, this);
				}
			}

			return End();
		}

		ConstIterator End() const
		{
			return Iterator(nullptr, this);
		}
		
		HashTable()
			:_tables(__stl_next_prime(0))
			, _n(0)
		{}

		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;

					cur = next;
				}

				_tables[i] = nullptr;
			}
		}

		pair<Iterator, bool> Insert(const T& data)
		{
			KeyOfT kot;
			Iterator it = Find(kot(data));
			if (it != End())
				return { it, false };

			Hash hash;
			// 负载因子为1时扩容
			if (_n == _tables.size())
			{
				vector<Node*> newTable(__stl_next_prime(_tables.size() + 1));
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;
						// 头插到新表
						size_t hashi = hash(kot(cur->_data)) % newTable.size();
						cur->_next = newTable[hashi];
						newTable[hashi] = cur;

						cur = next;
					}
					_tables[i] = nullptr;
				}

				_tables.swap(newTable);
			}

			size_t hashi = hash(kot(data)) % _tables.size();
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			_n++;

			return { Iterator(newnode, this), false };
		}

		Iterator Find(const K& key)
		{
			Hash hash;
			KeyOfT kot;
			size_t hashi = hash(key) % _tables.size();
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return Iterator(cur, this);
				}

				cur = cur->_next;
			}

			return End();
		}

		bool Erase(const K& key)
		{
			KeyOfT kot;
			size_t hashi = key % _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr)
					{
						// 头结点
						_tables[hashi] = cur->_next;
					}
					else
					{
						// 中间节点
						prev->_next = cur->_next;
					}

					delete cur;
					--_n;

					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}

			return false;
		}
	private:
		vector<Node*> _tables; // 指针数组
		size_t _n = 0;
	};
}

unordered_set.h

#pragma once

#include "HashTable.h"

namespace lyw
{
	template<class K, class Hash = HashFunc<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::Iterator iterator;
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::ConstIterator const_iterator;

		iterator begin()
		{
			return _ht.Begin();
		}

		iterator end()
		{
			return _ht.End();
		}

		const_iterator begin() const
		{
			return _ht.Begin();
		}

		const_iterator end() const
		{
			return _ht.End();
		}

		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}

		iterator find(const K& key)
		{
			return _ht.Find(key);
		}

		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}
	private:
		hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;
	};
}

unordered_map.h

#pragma once

#include "HashTable.h"

namespace lyw
{
	template<class K, class V, class Hash = HashFunc<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};
	public:
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::Iterator iterator;
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::ConstIterator const_iterator;
	
		iterator begin()
		{
			return _ht.Begin();
		}

		iterator end()
		{
			return _ht.End();
		}

		const_iterator begin() const
		{
			return _ht.Begin();
		}

		const_iterator end() const
		{
			return _ht.End();
		}

		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = insert({ key, V() });
			return ret.first->second;
		}

		pair<iterator, bool> insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}

		iterator find(const K& key)
		{
			return _ht.Find(key);
		}

		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}
	private:
		hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;
	};
}