当前位置：首页 > article >正文

C++ 哈希封装详解

article 2025/2/3 9:04:01

文章目录

1.buckets
- 1.1 load_factor和max_load_factor
- 1.2 reserve和rehash
- 1.3 bucket_count
- 1.4 bucket_size
2.哈希表的底层
- 2.1 iterator的实现
- 2.2 operator++
- 2.3 HashTable.h
- 2.4 Unorderedmap.h
- 2.5 Uorderedset.h

1.buckets

1.1 load_factor和max_load_factor

load_factor负载因子,max_load_factor最大负载因子，负载因子 = size(数据个数)/bucket_count(空间大小或者桶的个数)，最大负载因子为1，负载因子为1时要扩容

1.2 reserve和rehash

在这里插入图片描述

reserve和rehash都是给哈希表预留空间

1.3 bucket_count

在这里插入图片描述

返回有多少个桶(多少个空间大小)，bucket_count()

1.4 bucket_size

在这里插入图片描述

获取第n号桶的长度，可以用于算最大桶的长度

size_t max = 0;
for(int i = 0;i < us.bucket_count();i++)
{
  if(us.bucket_size(i) > max)
  max = us.bucket_size(i);
}
cout << max << endl;

2.哈希表的底层

2.1 iterator的实现

iterator实现的大框架跟list的iterator思路是一致的，⽤一个类型封装结点的指针，再通过重载运算符实现，迭代器像指针一样访问的行为，要注意的是哈希表的迭代器是单向迭代器


	// 前置声明
    // 因为HTTable向前找,找不到
	// HTIterator和HTTable相互依赖
	template<class K, class T, class KeyOfT, class Hash>
	class HashTable;

	template<class K,class T,class Ref,class Ptr,class KeyOfT, class Hash>
	struct HTIterator
	{
		// 节点
		typedef HashNode<T> Node;
		// 哈希表
		typedef HashTable<K, T,KeyOfT, Hash> HT;
		typedef HTIterator<K, T,Ref, Ptr, KeyOfT, Hash > Self;

		Node* _node;
		const HT* _ht;

		HTIterator(Node* node,const HT* ht)
			:_node(node),
			_ht(ht)
		{}

		// T数据的类型
		// Ref数据的引用
		Ref operator*()
		{
			return _node->_data;
		}

		// Ptr数据的指针
		Ptr operator->()
		{
			return &_node->_data;
		}

		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}

		Self& operator++()
		{
			if (_node->_next)
			{
				// 当前桶还有数据,走到当前桶的下一个节点
				_node = _node->_next;
			}
			else
			{
				// 当前桶走完了,找下一个不为空的桶
				KeyOfT kot;
				Hash hash;
				// 把data取出来,转为整数 % M
				size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();
				++hashi;
				while (hashi < _ht->_tables.size())
				{
					_node = _ht->_tables[hashi];

					if (_node)
						break;
					else
						++hashi;
				}

				// 1.所有桶都走完了,end()给空表示的_node
		        // 2.break出来的不为空的桶
				if (hashi == _ht->_tables.size())
				{
					_node = nullptr;
				}
			}

			return *this;
		}
	};

2.2 operator++

Self& operator++()
{
	if (_node->_next)
	{
		// 当前桶还有数据,走到当前桶的下一个节点
		_node = _node->_next;
	}
	else
	{
		// 当前桶走完了,找下一个不为空的桶
		KeyOfT kot;
		Hash hash;
		// 把data取出来,转为整数 % M
		size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();
		// hashi当前桶，++到下一个桶
		++hashi;
		// 要找不为空的桶
		while (hashi < _ht->_tables.size())
		{
			_node = _ht->_tables[hashi];

			if (_node)
				break;
			else
				++hashi;
		}

		// 1.所有桶都走完了,end()给空表示的_node
		// 2.break出来的不为空的桶
		if (hashi == _ht->_tables.size())
		{
			_node = nullptr;
		}
	}

	return *this;
}

2.3 HashTable.h

namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

        HashNode(const T& data)
			:_data(data),
			_next(nullptr)
		{}
	};

	// 前置声明
    // 因为HTTable向前找,找不到
	// HTIterator和HTTable相互依赖
	template<class K, class T, class KeyOfT, class Hash>
	class HashTable;

	template<class K,class T,class Ref,class Ptr,class KeyOfT, class Hash>
	struct HTIterator
	{
		// 节点
		typedef HashNode<T> Node;
		// 哈希表
		typedef HashTable<K, T,KeyOfT, Hash> HT;
		typedef HTIterator<K, T,Ref, Ptr, KeyOfT, Hash > Self;

		Node* _node;
		const HT* _ht;

		HTIterator(Node* node,const HT* ht)
			:_node(node),
			_ht(ht)
		{}

		// T数据的类型
		// Ref数据的引用
		Ref operator*()
		{
			return _node->_data;
		}

		// Ptr数据的指针
		Ptr operator->()
		{
			return &_node->_data;
		}

		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}

		Self& operator++()
		{
			if (_node->_next)
			{
				// 当前桶还有数据,走到当前桶的下一个节点
				_node = _node->_next;
			}
			else
			{
				// 当前桶走完了,找下一个不为空的桶
				KeyOfT kot;
				Hash hash;
				// 把data取出来,转为整数 % M
				size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();
				++hashi;
				while (hashi < _ht->_tables.size())
				{
					_node = _ht->_tables[hashi];

					if (_node)
						break;
					else
						++hashi;
				}

				// 1.所有桶都走完了,end()给空表示的_node
		        // 2.break出来的不为空的桶
				if (hashi == _ht->_tables.size())
				{
					_node = nullptr;
				}
			}

			return *this;
		}
	};

	template<class K,class T,class KeyOfT,class Hash>
	class HashTable
	{
		// 类模版定义友元声明
		template<class K, class T,  class Ref, class Ptr, class KeyOfT, class Hash>
		friend struct HTIterator;
		
		typedef HashNode<T> Node;
	public:
		typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;
		typedef HTIterator<K, T,const T&,const T*, KeyOfT, Hash> ConstIterator;


		// begin返回第一个不为空的桶中的节点
		Iterator Begin()
		{
			// 如果没有数据返回End(),因为桶是空的
			if (_n == 0)
				return End();

			for (int i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if(cur)
				{
					return Iterator(cur, this);
				}
			}
			// 找不到不为空的节点,返回End()
			return End();
		}

		Iterator End()
		{
			return Iterator(nullptr, this);
		}

		// begin返回第一个不为空的桶中的节点
		ConstIterator Begin() const
		{
			// 如果没有数据返回End(),因为桶是空的
			if (_n == 0)
				return End();

			for (int i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
				{
					// this是const HashTable
					return ConstIterator(cur, this);
				}
			}
			// 找不到不为空的节点,返回End()
			return End();
		}

		ConstIterator End() const
		{
			return ConstIterator(nullptr, this);
		}


		// 构造
		HashTable()
			:_tables(__stl_next_prime(0)),
			_n(0)
		{}

		// 析构
		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;

					cur = next;
				}
				_tables[i] = nullptr;
			}
		}

		// 插入
		pair<Iterator,bool> Insert(const T& data)
		{
			KeyOfT kot;
			// kot转为key
			Iterator it = Find(kot(data));
			// 如果插入的值存在冗余了返回false
			if (it != End())
			{
				return {it,false};
			}

			Hash hash;

			// 负载因子 == 1时扩容
			if (_n == _tables.size())
			{
				// 这种方法每个节点都要拷贝,影响效率
				// 并且原数组释放完后,不会自动析构每个节点,因为是内置类型
				// 还要自己写析构函数,比较麻烦
			    
				//HashTable<K, V> newht;
				//newht._tables.resize(_stl_next_prime(tables.size() + 1));
				//
				//for(size_t i = 0;i < _tables.size();i++)
				//{
				//	// 旧表的数据扩容后可能不冲突,必须一个一个取
				//	Node* cur = _tables[i];
				//	while (cur)
				//	{
				//		newht.Insert(cur->_kv);
				//		cur = cur->_next;
				//	}
				//}
				//_tables.swap(newht._tables);

				
				vector<Node*> newTable(__stl_next_prime(_tables.size() + 1));
				for(size_t i = 0;i < _tables.size();i++)
				{
					// 表旧表中的数据插入到新表中
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;
						// 算cur在新表中的位置
						size_t hashi = hash(kot(cur->_data)) % newTable.size();
						cur->_next = newTable[hashi];
						newTable[hashi] = cur;

						cur = next;
					}
					_tables[i] = nullptr;
				}
				_tables.swap(newTable);
			}

			size_t hashi = hash(kot(data)) % _tables.size();
			
			// 头插
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;

			return {Iterator(newnode,this),true};
		}

		// 查找
		Iterator Find(const K& key)
		{
			KeyOfT kot;
			Hash hash;

			size_t hashi = hash(key) % _tables.size();
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return Iterator(cur,this);
				}
				cur = cur->_next;
			}

			return End();
		}

		// 删除
		bool Erase(const K& key)
		{
			KeyOfT kot;

			size_t hashi = key % _tables.size();
			Node* cur = _tables[hashi];
			Node* prev = nullptr;
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr)
					{
						// 头删
						_tables[hashi] = cur->_next;
					}
					else
					{
						// 删除中间的节点
						prev->_next = cur->_next;
					}
					delete cur;

					--_n;
					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}

			return false;
		}

	private:
		vector<Node*> _tables;// 指针数组
		size_t _n;// 表示存了多少个数据
	};
}

2.4 Unorderedmap.h

#pragma once

#include"HashTable.h"

namespace wbc
{
	template<class K,class V,class Hash = HashFunc<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};
	public:
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT,Hash>::Iterator iterator;
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT,Hash>::ConstIterator const_iterator;


		iterator begin()
		{
			return _ht.Begin();
		}

		iterator end()
		{
			return _ht.End();
		}

		const_iterator begin() const
		{
			return _ht.Begin();
		}

		const_iterator end() const
		{
			return _ht.End();
		}

		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = insert({ key,V() });

			return ret.first->second;
		}

		pair<iterator, bool> insert(const pair<K,V>& kv)
		{
			return _ht.Insert(kv);
		}

		iterator Find(const K& key)
		{
			return _ht.Insert(key);
		}

		iterator Erase(const K& key)
		{
			return _ht.Erase(key);
		}
	private:
		hash_bucket::HashTable<K, pair<const K,V>, MapKeyOfT,Hash> _ht;
	};

	void test_map1()
	{
		unordered_map<string, string> dict;
		dict.insert({ "insert","排序" });
		dict.insert({ "sort","排序" });

		for (auto& kv : dict)
		{
			cout << kv.first << ":" << kv.second << endl;
		}

		unordered_map<string, string>::iterator it = dict.begin();
		while (it != dict.end())
		{
			// 不能修改first，可以修改second
			//it->first += 'x';
			it->second += 'x';
			cout << it->first << ":" << it->second << endl;
			++it;
		}
		cout << endl;
	}
}

2.5 Uorderedset.h

#pragma once

#include"HashTable.h"

namespace wbc
{
	template<class K,class Hash = HashFunc<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT,Hash>::Iterator iterator;
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::ConstIterator const_iterator;


		iterator begin()
		{
			return _ht.Begin();
		}

		iterator end()
		{
			return _ht.End();
		}

		const_iterator begin() const
		{
			return _ht.Begin();
		}

		const_iterator end() const
		{
			return _ht.End();
		}

		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}

		iterator Find(const K& key)
		{
			return _ht.Insert(key);
		}

		iterator Erase(const K& key)
		{
			return _ht.Erase(key);
		}
	private:
		hash_bucket::HashTable<K,const K, SetKeyOfT,Hash> _ht;
	};

	void print(const unordered_set<int>& s)
	{
		unordered_set<int>::const_iterator it = s.begin();
		while (it != s.end())
		{
			//*it = 1;
			cout << *it << " ";
			++it;
		}
		cout << endl;

		for (auto e : s)
		{
			cout << e << " ";
		}
		cout << endl;
	}

	void test_set1()
	{
		int a[] = { 3,11,86,7,88,82,1,881,5,6,7,6 };
		unordered_set<int> s;
		for (auto e : a)
		{
			s.insert(e);
		}

		unordered_set<int>::iterator it = s.begin();
		while (it != s.end())
		{
			//*it = 1;
			cout << *it << " ";
			++it;
		}
		cout << endl;

		for (auto e : s)
		{
			cout << e << " ";
		}
		cout << endl;

		print(s);
	}
}