当前位置：首页 > article >正文

【C++】哈希表封装 unordered_map 和 unordered_set 的实现过程

article 2025/2/21 23:52:05

在这里插入图片描述

C++语法	相关知识点	可以通过点击	以下链接进行学习	一起加油！
命名空间	缺省参数与函数重载	C++相关特性	类和对象-上篇	类和对象-中篇
类和对象-下篇	日期类	C/C++内存管理	模板初阶	String使用
String模拟实现	Vector使用及其模拟实现	List使用及其模拟实现	容器适配器Stack与Queue	Priority Queue与仿函数
模板进阶-模板特化	面向对象三大特性-继承机制	面向对象三大特性-多态机制	STL 树形结构容器	二叉搜索树
AVL树	红黑树	红黑树封装map/set	哈希-开篇	闭散列-模拟实现哈希
哈希桶-模拟实现哈希

本篇将讲述如何通过哈希表封装 unordered_map 和 unordered_set 容器。在开始本章内容之前，建议先阅读红黑树封装 map 和 set一文，以便更好地理解编译器如何区分 unordered_map 和 unordered_set 并调用底层哈希表。

请添加图片描述
Alt
🌈个人主页：是店小二呀
🌈C语言专栏：C语言
🌈C++专栏： C++
🌈初阶数据结构专栏：初阶数据结构
🌈高阶数据结构专栏：高阶数据结构
🌈Linux专栏： Linux

🌈喜欢的诗句:无人扶我青云志我自踏雪至山巅请添加图片描述

文章目录

一、节点类型
二、区分unordered_map/set容器
三、相关文件修改后
- 3.1 HashTable.h
- 3.2 unoderded_set.h
- 3.3 unoderded_map.h
四、实现迭代器
- 4.1 迭代器常见功能
- 4.2 operator++实现
- 4.3 编译器扫描顺序问题
- - 4.3.1 【第一个办法】:声明及其友元
  - 4.3.2 【第二办法】:内部类
- 4.4 operator++内部逻辑
- 4.5 获得begin和end迭代器位置
- 4.6 迭代器调用流程图
五、unoderded_map/set相关接口实现
- 5.1 unoderded_map相关接口
- 5.2 unoderded_set相关接口
- 5.3 HashTable相关接口调整
HashTable.h

这里哈希表将采用哈希桶的方式解决哈希冲突且实现哈希表结构，那么我们需要对哈希表节点类型进行处理，同时设计编译器去识别unordered_map/set的逻辑。

一、节点类型

如果我们想使用哈希表封装unordered_map/set容器，就需要考虑泛型编程，将节点类型统一成T类型

template<class T>
    struct HashNode
    {
        T _data;
        HashNode<T>* _next;

        HashNode(const T& data)
            :_data(data)
                ,_next(nullptr)
            {}
    };

二、区分unordered_map/set容器

编译器是无非确定你需要啥容器**，这里我们可以通过外部输入数据，使得编译器明确你需要啥容器，该容器都有一套属于返回K类型的仿函数**，只要unordered_set是否会多余，这里可以参考红黑树封装map/set文章，这里是为了跟map构成模板，陪太子读书。

在这里插入图片描述

既然我们希望可以通过手动输入数据，得到预计效果，那么关于HashFunc函数也是期望从外部调用，这里可以写到外面来。（图片写错了，这里看下面的）

template<class K,class Hash = HashFunc > and template<class K,class T,class Hash = HashFunc>

三、相关文件修改后

3.1 HashTable.h

#pragma once
#include <iostream>
using namespace std;
#include <string>
#include <vector>

template<class K>
    struct HashFunc
    {
        size_t operator()(const K& key)
        {
            return (size_t)key;
        }
    };

template<>
struct HashFunc<string>
{
    size_t operator()(const string& key)
    {
        size_t hash = 0;
        for (auto ch : key)
        {
            hash *= 131;
            hash += ch;
        }
        return hash;
    }
};

namespace hash_bucket
{


    template<class T>
        struct HashNode
        {
            T _data;
            HashNode<T>* _next;

            HashNode(const T& data)
                :_data(data)
                    ,_next(nullptr)
                {}
        };

    template<class K, class T, class KeyOfT, class Hash>
        class HashTable
        {
            public:
            typedef HashNode<T> Node;
            HashTable()
            {
                _tables.resize(10, nullptr);

            }

            ~HashTable()
            {
                for (size_t i = 0; i < _tables.size(); i++)
                {
                    Node* cur = _tables[i];
                    while (cur)
                    {
                        Node* next = cur->_next;
                        delete cur;
                        cur = next;
                    }

                }
            }

            bool Insert(const T& data)
            {
                Hash hs;
                KeyOfT kot;

                if (Find(kot(data))
                    {
                        return false;
                    }


                    if (_n == _tables.size())
                    {
                        vector<Node*> NewTable(n * 10, nullptr);

                        for (size_t i = 0; i < _tables.size(); i++)
                        {
                            Node* cur = _tables[i];
                            while (cur)
                            {
                                Node* next = cur->_next;
                                // 头插新表的位置
                                size_t hashi = hs(kot(cur->_data)) % NewTable.size();
                                cur->_next = NewTable[hashi];
                                NewTable[hashi] = cur;

                                cur = cur->_next;
                            }
                            _tables[i] = nullptr;
                        }
                        _tables.swap(NewTable);
                    }


                    size_t hashi = hs(kot(data)) % _tables.size();
                    //进行头插操作
                    Node* newnode = new Node(data);
                    _tables[hashi]->_next = newnode;
                    _tables[hashi] = newnode;
                    ++_n;
                    return true;
                    }

                    Node* Find(const K& key)
                    {
                        Hash hs;
                        KeyOfT kot;
                        size_t hashi = hs(key) % _tables.size();
                        Node* cur = _tables[hashi];
                        while (cur)
                        {
                            if (kot(cur->_data) == key)
                            {
                                return &kot(cur->_data);
                            }
                            else
                            {
                                cur = cur->_next;
                            }
                        }
                        return nullptr;
                    }

                    bool Erase(const K& key)
                    {
                        Hash hs;
                        size_t hashi = hs(key) % _tables.size();

                        Node* cur = _tables[hashi];
                        Node* prev = nullptr;

                        while (cur)
                        {
                            if (kot(cur->_data) == key)
                            {
                                if (prev == nullptr)
                                {
                                    _tables[hashi] = cur->_next;
                                }
                                else
                                {
                                    prev->_next = cur->_next;
                                }
                                delete cur;

                                return true;
                            }
                            else
                            {
                                prev = cur;
                                cur = cur->_next;
                            }
                        }
                        return false;
                    }
                    private:
                    vector<Node*> _tables;
                    size_t _n = 0;
                    };
                    }

这里关于HashFunc()需要放在哈希命名空间外部，因为在其他头文件需要使用到该HashFunc()

3.2 unoderded_set.h

namespace unoderded_set
{
    template<class K,class Hash = HashFunc<K>>
        class unoderded_set
        {
            struct SetKeyOfT
            {
                const K& operator()(const K& key)
                {
                    return key;
                }
            };
            private:
            hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;
        };
}

3.3 unoderded_map.h

namespace unoderded_map
{
    template<class K,class V,class Hash = HashFunc<K>>
        class unoderded_map
        {
            struct SetKeyOfT
            {
                const K& operator()(const pair<K,V>& key)
                {
                    return key.first;
                }
            };
            public:

            private:
            hash_bucket::HashTable<K, const pair<K,V>, MapKeyOfT, Hash> _ht;
        };
}

四、实现迭代器

实现哈希表中迭代器跟之前其他容器实现迭代器需要考虑的差不多，主要是对于operator++和operator–需要考虑不同容器的结构进行行为重定义。同时需要设计为模板template<class T,class Ptr, class Ref>去匹配const类型和非const类型调用同一个迭代器。

typedef __HTIterator<T*, T&> iterator;
typedef __HTIterator<const T*, const T&> const_iterator;

这里得到当前_node的位置，是浅拷贝。这不希望改变实参，而是希望通过该实参位置进行操作，得到预期数据，在下一次进行操作时还是之前位置。

4.1 迭代器常见功能

template<class T,class Ptr, class Ref>
    struct _HTIterator
    {
        //得到一个节点
        typedef HashNode<T>	Node;
        typedef _HTIterator Self;

        Node* _node;

        //这里希望是浅拷贝
        _HTIterator(const Node* node)
            :_node(node)
            {}

        Ptr operator->()
        {
            return &_node->_data;
        }

        Ref operator*()
        {
            return _node->_data;
        }

        bool operator!=(const Self& s)
        {
            return _node != s._node;
        }
    };

4.2 operator++实现

【大体思路】:在第一个不为空位置，向下遍历++直到_node-> _next ==nullptr说明该位置迭代器达到悬挂着哈希桶最后一个不为空的桶，那么需要通过当前节点数值，进行除留余数法得到当前表位置下标，以该下标为基准重新在表中找到下一个不为空的位置，直到i == tables.size()说明遍历完成。

4.3 编译器扫描顺序问题

目前关于迭代器实现某方面功能需要借助HashTable类的类型及其成员，但是问题在于HashTable需要将迭代器设为"武器"，将迭代器类功能具体实现放在HashTabl类上方，编译器扫描是上到下扫描，这就导致了这两个类总有一方无法得到另外一份的东西。在这里插入图片描述

4.3.1 【第一个办法】:声明及其友元

我们可以在迭代器类中声明HashTable类作为类型定义一个哈希表指针，同时在HashTable类型设置迭代器友元，使之迭代器可以通过定义的哈希表指针访问到哈希表。

template<class K,class T,class Ptr, class Ref,class KeyOfT, class Hash>
    struct _HTIterator
    {	//前置声明
        template<class K, class T, class KeyOfT, class Hash>
            class HashTable;	
    }


template<class K, class T, class KeyOfT, class Hash>
    class HashTable
    {
        public:
        template<class K, class T, class Ptr, class Ref, class KeyOfT, class Hash>
            friend struct _HTIterator;
    }

【前置声明】

前置声明仅仅告诉编译器有一个 HashTable 类，但由于没有给出该类的完整定义和实现，编译器并不知道如何去使用它。也就是说，你不能在代码中使用 HashTable 类的功能，比如创建对象、调用其成员函数、访问数据成员等，除非你提供了类的完整定义。

4.3.2 【第二办法】:内部类

template<class K, class T, class KeyOfT, class Hash>
	class HashTable
	{
		typedef HashNode<T> Node;
	public:
		// 友元声明
		/*template<class K, class T, class KeyOfT, class Hash>
		friend struct __HTIterator;*/

		// 内部类
		template<class Ptr, class Ref>
		struct __HTIterator
		{
			typedef HashNode<T> Node;
			typedef __HTIterator Self;
        }
    }

这里就采用第一种方法(选择哪个方法都是差不多的)

4.4 operator++内部逻辑

Self& operator++()
{
    KeyOfT kot;
    Hash hs;

    if( _node->_next )
    {
        _node = _node->_next;
    }
    else
    {
        //访问该成员
        size_t i = hs(kot(_node->_data)) % _pht->_tables.size();
        i++;
        for (; i < _pht->_tables.size(); i++)
        {
            if (_pht->_tables[i])
            {
                break;
            }
        }

        if (i == _pht->tables.size())
        {
            _node = nullptr;
        }
        else
        {
            _node = _pht->_tables[i];
        }
    }
}

由于在库中unoderded_map和unoderded_set只有单向迭代器，就没有operator–。如果有兴趣可以将单链表设计为双向链表，然后根据operator++逻辑，设计出一个operator–接口

4.5 获得begin和end迭代器位置

在这里插入图片描述

iterator begin()
{
    for (size_t i = 0; i < _tables.size(); i++)
    {
        Node* cur = _tables[i];
        if (cur)
        {
            return iterator(cur, this);
        }

    }

}

iterator end()
{
    return iterator(nullptr, this);
}

const_iterator begin() const
{

    for (size_t i = 0; i < _tables.size(); i++)
    {
        Node* cur = _tables[i];
        if (cur)
        {
            return iterator(cur, this);
        }

    }
}

const_iterator end() const
{
    return iterator(nullptr, this);
}

这里就是通过哈希表begin和end接口，构造相关需求的迭代器。这里很好体现了每个不同对象的迭代器指向表是自己对象的哈希表。

4.6 迭代器调用流程图

在这里插入图片描述

unoderded_map及其unoderded_set迭代器配置，由于unoderded_map支持修改元素，那么没有必要写const系列。

外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传

五、unoderded_map/set相关接口实现

5.1 unoderded_map相关接口

namespace unoderded_map
{
    template<class K, class V, class Hash = HashFunc<K>>
        class unoderded_map
        {
            struct MapKeyOfT
            {
                const K& operator()(const pair<K, V>& key)
                {
                    return key.first;
                }
            };

            typedef typename hash_bucket::HashTable<K, const pair<K, V>, MapKeyOfT, Hash>::iterator iterator;


            iterator begin()
            {
                return _ht.begin();
            }

            iterator end()
            {
                return _ht.end();
            }

            V& operator[](const K& key)
            {
                pair<iterator, bool> ret = insert(make_pair(key, V()));
                return ret.first->second;
            }


            pair<iterator, bool> insert(const pair<K, V>& kv)
            {
                return _ht.Insert(kv);
            }

            private:
            hash_bucket::HashTable<K, const pair<K, V>, MapKeyOfT, Hash> _ht;
        };

}

【unoderded_map增添接口】:V& operator[](const K& key)和pair<iterator, bool> insert(const pair<K, V>& kv)

5.2 unoderded_set相关接口

#pragma once
#include "HashTable.h"

namespace unoderded_set
{
    template<class K, class Hash = HashFunc<K>>
        class unoderded_set
        {
            struct SetKeyOfT
            {
                const K& operator()(const K& key)
                {
                    return key;
                }
            };
            typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT,Hash>::iterator iterator;
            typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::const_iterator  const_iterator;


            iterator begin()
            {
                return _ht.begin();
            }

            iterator end()
            {
                return _ht.end();
            }

            const_iterator begin() const
            {
                return _ht.begin();
            }

            const_iterator end() const
            {
                return _ht.end();
            }
            pair<iterator, bool> insert(const K& key)
            {
                return _ht.Insert(key);
            }

            iterator find(const K& key)
            {
                return _ht.Find(key);
            }

            bool erase(const K& key)
            {
                return _ht.Erase(key);
            }
            private:
            hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;
        };
}

【unoderded_set增添接口】 pair<iterator, bool> insert(const K& key)和 iterator find(const K& key)及其bool erase(const K& key)

5.3 HashTable相关接口调整

iterator Find(const K& key)
{
    Hash hs;
    KeyOfT kot;
    size_t hashi = hs(key) % _tables.size();
    Node* cur = _tables[hashi];
    while (cur)
    {
        if (kot(cur->_data) == key)
        {
            return iterator(cur, this);
        }
        else
        {
            cur = cur->_next;
        }
    }
    return end();
}

pair<iterator, bool> Insert(const T& data)
{
    Hash hs;
    KeyOfT kot;

    iterator it = Find(kot(data));
    if (it != end())
        return make_pair(it, false);

    if (_n == _tables.size())
    {
        vector<Node*> NewTable(_n * 10, nullptr);

        for (size_t i = 0; i < _tables.size(); i++)
        {
            Node* cur = _tables[i];
            while (cur)
            {
                Node* next = cur->_next;
                // 头插新表的位置
                size_t hashi = hs(kot(cur->_data)) % NewTable.size();
                cur->_next = NewTable[hashi];
                NewTable[hashi] = cur;

                cur = cur->_next;
            }
            _tables[i] = nullptr;
        }
        _tables.swap(NewTable);
    }


    size_t hashi = hs(kot(data)) % _tables.size();
    //进行头插操作
    Node* newnode = new Node(data);
    _tables[hashi]->_next = newnode;
    _tables[hashi] = newnode;
    ++_n;
    return make_pair(iterator(newnode, this), true);
}

unodered_map当中operator[]可以看作insert和find融合版本，不需要单独设计Find()，但是内部还是有调用Find()逻辑

HashTable.h

#pragma once
#include <iostream>
using namespace std;
#include <string>
#include <vector>

template<class K>
    struct HashFunc
    {
        size_t operator()(const K& key)
        {
            return (size_t)key;
        }
    };

template<>
struct HashFunc<string>
{
    size_t operator()(const string& key)
    {
        size_t hash = 0;
        for (auto ch : key)
        {
            hash *= 131;
            hash += ch;
        }
        return hash;
    }
};




namespace hash_bucket
{

    template<class T>
        struct HashNode
        {
            T _data;
            HashNode<T>* _next;

            HashNode(const T& data)
                :_data(data)
                    , _next(nullptr)
                {}
        };

    template<class K,class T,class Ptr, class Ref,class KeyOfT, class Hash>
        struct _HTIterator
        {
            //前置声明
            template<class K, class T, class KeyOfT, class Hash>
                class HashTable;

            //得到一个节点
            typedef HashNode<T>	Node;
            typedef _HTIterator Self;

            Node* _node;
            const HashTable* _pht;

            //这里希望是浅拷贝
            //这里对象-->指向一个哈希表-->cur及其this。
            _HTIterator(const Node* node,const HashTable* pht)
                :_node(node)
                    ,_pht(pht)
                {}

            Ptr operator->()
            {
                return &_node->_data;
            }

            Ref operator*()
            {
                return _node->_data;
            }


            bool operator!=(const Self& s)
            {
                return _node != s._node;
            }

            Self& operator++()
            {
                KeyOfT kot;
                Hash hs;

                if( _node->_next )
                {
                    _node = _node->_next;
                }
                else
                {
                    //访问该成员
                    size_t i = hs(kot(_node->_data)) % _pht->_tables.size();
                    i++;
                    for (; i < _pht->_tables.size(); i++)
                    {
                        if (_pht->_tables[i])
                        {
                            break;
                        }
                    }

                    if (i == _pht->tables.size())
                    {
                        _node = nullptr;
                    }
                    else
                    {
                        _node = _pht->_tables[i];
                    }
                }
            }
        };

    template<class K, class T, class KeyOfT, class Hash>
        class HashTable
        {
            public:

            template<class K, class T, class Ptr, class Ref, class KeyOfT, class Hash>
                friend struct _HTIterator;


            typedef _HTIterator< K, T, T*,  T&, KeyOfT, Hash> iterator;
            typedef _HTIterator<K,  T,const T*, const T&, KeyOfT, Hash> const_iterator;

            typedef HashNode<T> Node;
            HashTable()
            {
                _tables.resize(10, nullptr);
            }

            ~HashTable()
            {
                for (size_t i = 0; i < _tables.size(); i++)
                {
                    Node* cur = _tables[i];
                    while (cur)
                    {
                        Node* next = cur->_next;
                        delete cur;
                        cur = next;
                    }

                }
            }

            iterator Find(const K& key)
            {
                Hash hs;
                KeyOfT kot;
                size_t hashi = hs(key) % _tables.size();
                Node* cur = _tables[hashi];
                while (cur)
                {
                    if (kot(cur->_data) == key)
                    {
                        return iterator(cur, this);
                    }
                    else
                    {
                        cur = cur->_next;
                    }
                }
                return end();
            }

            pair<iterator, bool> Insert(const T& data)
            {
                Hash hs;
                KeyOfT kot;

                iterator it = Find(kot(data));
                if (it != end())
                    return make_pair(it, false);

                if (_n == _tables.size())
                {
                    vector<Node*> NewTable(_n * 10, nullptr);

                    for (size_t i = 0; i < _tables.size(); i++)
                    {
                        Node* cur = _tables[i];
                        while (cur)
                        {
                            Node* next = cur->_next;
                            // 头插新表的位置
                            size_t hashi = hs(kot(cur->_data)) % NewTable.size();
                            cur->_next = NewTable[hashi];
                            NewTable[hashi] = cur;

                            cur = cur->_next;
                        }
                        _tables[i] = nullptr;
                    }
                    _tables.swap(NewTable);
                }


                size_t hashi = hs(kot(data)) % _tables.size();
                //进行头插操作
                Node* newnode = new Node(data);
                _tables[hashi]->_next = newnode;
                _tables[hashi] = newnode;
                ++_n;

                return make_pair(iterator(newnode, this), true);
            }

            iterator begin()
            {
                for (size_t i = 0; i < _tables.size(); i++)
                {
                    Node* cur = _tables[i];
                    if (cur)
                    {
                        return iterator(cur, this);
                    }

                }

            }

            iterator end()
            {
                return iterator(nullptr, this);
            }

            const_iterator begin() const
            {

                for (size_t i = 0; i < _tables.size(); i++)
                {
                    Node* cur = _tables[i];
                    if (cur)
                    {
                        return iterator(cur, this);
                    }

                }
            }

            const_iterator end() const
            {
                return iterator(nullptr, this);
            }

            bool Erase(const K& key)
            {
                Hash hs;
                size_t hashi = hs(key) % _tables.size();

                Node* cur = _tables[hashi];
                Node* prev = nullptr;

                while (cur)
                {
                    if (kot(cur->_data) == key)
                    {
                        if (prev == nullptr)
                        {
                            _tables[hashi] = cur->_next;
                        }
                        else
                        {
                            prev->_next = cur->_next;
                        }
                        delete cur;

                        return true;
                    }
                    else
                    {
                        prev = cur;
                        cur = cur->_next;
                    }
                }
                return false;
            }
            private:
            vector<Node*> _tables;
            size_t _n = 0;
        };

};