当前位置：首页 > article >正文

【面经】CPP经典面试手撕{LRUCache、字典树、布隆过滤器}

article 2025/3/4 1:22:09

文章目录

LRUCache
字典树
布隆过滤器

LRUCache

在这里插入图片描述

class LRUCache {
    using ListIt = list<pair<int, int>>::iterator;

    list<pair<int, int>> _LRUlist;
    int _capacity;

    unordered_map<int, ListIt> _hashmap;

public:
    LRUCache(int capacity) : _capacity(capacity) {}

    int get(int key) {
        auto it = _hashmap.find(key);
        if (it != _hashmap.end()) {
            // 将该关键字移动到LRU队列头
            ListIt listit = it->second;
            _LRUlist.splice(_LRUlist.begin(), _LRUlist, listit);
            return listit->second;
        } else
            return -1;
    }

    void put(int key, int value) {
        auto it = _hashmap.find(key);
        // 不存在插入
        if (it == _hashmap.end()) {
            // 满了逐出最久未使用的关键字
            if (_hashmap.size() == _capacity) {
                pair<int, int> back = _LRUlist.back();
                _hashmap.erase(back.first);
                _LRUlist.pop_back();
            }
            // 将关键字插入到LRU队列头
            _LRUlist.push_front(make_pair(key, value));
            _hashmap.insert(make_pair(key, _LRUlist.begin()));

        }
        // 存在更新
        else {
            ListIt listit = it->second;
            listit->second = value;
            _LRUlist.splice(_LRUlist.begin(), _LRUlist, listit);
        }
    }
};

字典树

在这里插入图片描述

class Trie {
private:
    struct TreeNode {
        vector<TreeNode*> next;
        bool isEnd;

        TreeNode() {
            isEnd = false;
            next.resize(26, nullptr);
        }
    };

    // 递归释放树的内存
    void deleteTree(TreeNode* node) {
        if (node == nullptr)
            return;
        for (TreeNode* nextNode : node->next)
            deleteTree(nextNode);

        delete node;
    }

public:
    TreeNode* root;
    Trie() { root = new TreeNode(); }
    ~Trie() { deleteTree(root); }

    void insert(const std::string& word) {
        TreeNode* cur = root;
        for (char ch : word) {
            int index = ch - 'a';
            if (cur->next[index] == nullptr)
                cur->next[index] = new TreeNode();

            cur = cur->next[index];
        }
        cur->isEnd = true;
    }

    // 查找单词
    bool search(const std::string& word) {
        TreeNode* cur = root;
        for (char ch : word) {
            int index = ch - 'a';
            if (cur->next[index] == nullptr)
                return false;

            cur = cur->next[index];
        }
        return cur->isEnd;
    }

    // 查找前缀
    bool startsWith(const std::string& prefix) {
        TreeNode* cur = root;
        for (char ch : prefix) {
            int index = ch - 'a';
            if (cur->next[index] == nullptr)
                return false;

            cur = cur->next[index];
        }
        return true;
    }
    bool query(const string& s) {
        TreeNode* cur = root;
        for (char c : s) {
            int u = c - 'a';
            cur = cur->next[u];  // 移动到下一个字符
            if (!cur->isEnd)      // 如果某个前缀不是完整单词，返回 false
                return false;
        }
        return true;  // 如果所有前缀都是完整单词，则返回 true
    }
};

布隆过滤器

#pragma once

#include <iostream>
#include <list>
#include <vector>
#include <algorithm>
#include <array>
#include <time.h>
#include <queue>
#include <stack>
#include <string>
#include <set>
#include <map>
#include <unordered_set>
#include <unordered_map>
#include <functional>
#include <assert.h>
using namespace std;

// 一个比特位变标识两种状态 0 1
template <size_t N>
class bitmap
{
private:
    vector<char> _bits;

public:
    // 构造函数
    bitmap()
    {
        // 开空间 初始化成0
        // 传N 表示需要N个bit 开N/8+1个字节
        // cout << "N/8+1:" << N / 8 + 1 << endl;
        _bits.resize(N / 8 + 1, 0);
    }

    // 插入: 将数x映射的位 置成1
    void insert_setone(size_t x)
    {
        // 第i个字节  0 1 2 3 ...
        size_t i = x / 8;
        // 第i个字节的第j个位
        size_t j = x % 8;

        // 利用或等 第j位-置1 其余位-不变
        _bits[i] |= (1 << j); // 左移:并不是向左移而是向高位移
    }

    // 删除: 将数x映射的位 置成0
    void erase_setzero(size_t x)
    {
        // 第i个字节  0 1 2 3 ...
        size_t i = x / 8;
        // 第i个字节的第j个位
        size_t j = x % 8;

        // 利用与等 第j位-置0 其余位-不变
        _bits[i] &= ~(1 << j);
    }

    // 判断: 判断数x是否存在
    bool judge(size_t x)
    {
        // 第i个字节  0 1 2 3 ...
        size_t i = x / 8;
        // 第i个字节的第j个位
        size_t j = x % 8;

        // 假定数x存在 那么第j位应为1
        //_bits[i]访问到的是 数x所在第i个字节的整体数
        return _bits[i] & (1 << j);
    }
};

// 测试函数 ///

void test_bitmap1()
{
    bitmap<100> bm;
    bm.insert_setone(10);
    bm.insert_setone(11);
    bm.insert_setone(15);

    cout << bm.judge(10) << endl;
    cout << bm.judge(15) << endl;

    bm.erase_setzero(10);

    cout << bm.judge(10) << endl;
    cout << bm.judge(15) << endl;

    bm.erase_setzero(10);
    bm.erase_setzero(15);

    cout << bm.judge(10) << endl;
    cout << bm.judge(15) << endl;
}

void test_bitmap2()
{
    // 4294967295
    // bitset<-1> bm;
    bitmap<0xFFFFFFFF> bm;
}

// Brian Kernighan与Dennis Ritchie《The C Programming Language》
struct BKDR_Hash
{
    size_t operator()(const string &s)
    {
        size_t value = 0;
        for (auto &ch : s)
        {
            value = value * 31 + ch;
        }
        return value;
    }
};

// Arash Partow
struct AP_Hash
{
    size_t operator()(const string &s)
    {
        size_t value = 0;
        for (long i = 0; i < s.size(); i++)
        {
            size_t ch = s[i];
            if ((i & 1) == 0)
                value ^= ((value << 7) ^ ch ^ (value >> 3));
            else
                value ^= (~((value << 11) ^ ch ^ (value >> 5)));
        }
        return value;
    }
};

// Daniel J. Bernstein教授
struct DJB_Hash
{
    size_t operator()(const string &s)
    {
        size_t value = 5381;
        for (auto ch : s)
        {
            value += (value << 5) + ch;
        }
        return value;
    }
};

template <
    size_t N,         // 插入数据个数
    class K = string, // 数据类型
    class Hash1 = BKDR_Hash,
    class Hash2 = AP_Hash,
    class Hash3 = DJB_Hash>
class BloomFilter
{

private:
    static const size_t num = 4; // 倍数
    bitmap<num * N> _bm;         // 布隆过滤器长度(bit位个数) ≈ 4.33 * 数据个数
public:
    // 插入
    void insert_setone(const K &key)
    {
        size_t len = num * N;
        size_t hash1 = Hash1()(key) % len;
        _bm.insert_setone(hash1);

        size_t hash2 = Hash2()(key) % len;
        _bm.insert_setone(hash2);

        size_t hash3 = Hash3()(key) % len;
        _bm.insert_setone(hash3);

        // cout << hash1 << " " << hash2 << " " << hash3 << " " << endl << endl;
    }

    // 判断是否存在
    bool judge(const K &key)
    {
        // 但凡一个位置没有 一定不存在

        size_t len = N * num;

        size_t hash1 = Hash1()(key) % len;
        if (!_bm.judge(hash1))
        {
            return false;
        }

        size_t hash2 = Hash2()(key) % len;
        if (!_bm.judge(hash2))
        {
            return false;
        }

        size_t hash3 = Hash3()(key) % len;
        if (!_bm.judge(hash3))
        {
            return false;
        }

        return true;
    }
};

// 测试插入、判断函数
void test_bloomfilter1()
{
    BloomFilter<100> bf;
    // 插入
    bf.insert_setone("sort");
    bf.insert_setone("bloom");
    bf.insert_setone("hello world hello bit");
    bf.insert_setone("test");
    bf.insert_setone("etst");
    bf.insert_setone("estt");
    // 判断存在[有误判]
    cout << bf.judge("sort") << endl;
    cout << bf.judge("bloom") << endl;
    cout << bf.judge("hello world hello bit") << endl;
    cout << bf.judge("etst") << endl;
    cout << bf.judge("test") << endl;
    cout << bf.judge("estt") << endl;
    // 判断不存在[精确]
    cout << bf.judge("ssort") << endl;
    cout << bf.judge("tors") << endl;
    cout << bf.judge("ttes") << endl;
}

// 测试误判率
void test_bloomfilter2()
{
    srand(time(0));
    const size_t N = 10000;
    BloomFilter<N> bf; // 4w个比特位

    // v1: url1 url2 url3... url9999 
    vector<string> v1;
    string url = "https://www.gitee.com/Ape-LHR/apes-warehouse/547993.html";
    // v1存储内容:url1 url2 url3....url9999共N个
    for (size_t i = 0; i < N; ++i)
    {
        v1.push_back(url + to_string(i));
    }
    // 把v1里面的每个字符串插入到布隆过滤器
    for (auto &str : v1)
    {
        bf.insert_setone(str);
    }

    // v2 : url10000 url10001 url10002... url19999
    // v2存储和v1前缀相同 后缀不同的字符串
    vector<string> v2;
    for (size_t i = N; i < 2 * N; ++i)
    {
        v2.push_back(url + to_string(i));
    }
    // 判断v2中每个字符串是否在布隆过滤器中
    size_t count1 = 0;
    for (auto &str : v2)
    {
        if (bf.judge(str))
            ++count1;
    }
    double rate1 = (double)count1 / (double)N;
    cout << "相似字符串误判率  :" << rate1 * 100 << "%" << endl;

    // v3存储和v1前缀和后缀均有较大差异
    vector<string> v3;
    string url2 = "https://www.csdn.net/?spm=1001.2014.3001.4476";
    for (size_t i = 0; i < N; ++i)
    {
        v3.push_back(url2 + to_string(i + rand()));
    }
    // 判断v3中每个字符串是否在布隆过滤器中
    size_t count2 = 0;
    for (auto &str : v3)
    {
        if (bf.judge(str))
            ++count2;
    }
    double rate2 = (double)count2 / (double)N;
    cout << "不相似字符串误判率:" << rate2 * 100 << "%" << endl;
}