当前位置：首页 > article >正文

串匹配问题的三种算法

article 2024/11/20 0:47:18

BF算法

#include <iostream>  
#include <string>  
  
using namespace std;  
  
// 暴力匹配算法  
int bruteForceMatch(const string& s, const string& p) {  
    int n = s.length();  
    int m = p.length();  
    for (int i = 0; i <= n - m; ++i) {  
        int j;  
        for (j = 0; j < m; ++j) {  
            if (s[i + j] != p[j])  
                break;  
        }  
        if (j == m)  
            return i; // 匹配成功  
    }  
    return -1; // 匹配失败  
}  
  
int main() {  
    string s = "ABABDABACDABABCABAB";  
    string p = "ABABCABAB";  
    cout << "Brute-Force Match Result: " << bruteForceMatch(s, p) << endl;  
    return 0;  
}

KMP算法

#include <iostream>  
#include <string>  
#include <vector>  
  
using namespace std;  
  
// 计算部分匹配表  
vector<int> computeLPSArray(const string& p, int M) {  
    vector<int> lps(M, 0);  
    int len = 0;  
    int i = 1;  
    while (i < M) {  
        if (p[i] == p[len]) {  
            len++;  
            lps[i] = len;  
            i++;  
        } else { // (p[i] != p[len])  
            if (len != 0) {  
                len = lps[len - 1];  
            } else { // if (len == 0)  
                lps[i] = len;  
                i++;  
            }  
        }  
    }  
    return lps;  
}  
  
// KMP算法  
int KMPSearch(const string& s, const string& p) {  
    int n = s.length();  
    int m = p.length();  
    vector<int> lps = computeLPSArray(p, m);  
    int i = 0; // s的索引  
    int j = 0; // p的索引  
    while (i < n) {  
        if (p[j] == s[i]) {  
            j++;  
            i++;  
        }  
        if (j == m) {  
            return i - j; // 匹配成功，返回模式串在主串中的起始位置  
            j = lps[j - 1];  
        }  
        // 不匹配时  
        else if (i < n && p[j] != s[i]) {  
            // 不为0则回到之前某个位置  
            if (j != 0)  
                j = lps[j - 1];  
            else // j == 0  
                i = i + 1;  
        }  
    }  
    return -1; // 匹配失败  
}  
  
int main() {  
    string s = "ABABDABACDABABCABAB";  
    string p = "ABABCABAB";  
    cout << "KMP Match Result: " << KMPSearch(s, p) << endl;  
    return 0;  
}

BM算法

#include <iostream>
#include <vector>
#include <string>

using namespace std;

class BoyerMoore {
public:
    BoyerMoore(const string& pattern) {
        this->pattern = pattern; // 存储模式串
        this->m = pattern.length(); // 模式串长度
        buildBadChar(); // 构建坏字符表
        buildGoodSuffix(); // 构建好后缀表
    }

    // 在文本中搜索模式串
    void search(const string& text) {
        int n = text.length(); // 文本长度
        int s = 0; // 当前文本的起始位置

        while (s <= n - m) { // 直到文本末尾
            int j = m - 1; // 从模式串末尾开始比较

            // 比较模式串与文本
            while (j >= 0 && pattern[j] == text[s + j]) {
                j--;
            }

            // 如果找到匹配
            if (j < 0) {
                cout << "Pattern found at index " << s << endl;
                // 移动模式串
                s += (s + m < n) ? m - badChar[text[s + m]] : 1;
            } else {
                // 移动模式串
                s += max(1, j - badChar[text[s + j]]);
            }
        }
    }

private:
    string pattern; // 模式串
    int m; // 模式串长度
    vector<int> badChar; // 坏字符表
    vector<int> goodSuffix; // 好后缀表

    // 构建坏字符表
    void buildBadChar() {
        badChar.assign(256, -1); // 初始化为-1
        for (int i = 0; i < m; i++) {
            badChar[(int)pattern[i]] = i; // 更新坏字符的位置
        }
    }

    // 构建好后缀表
    void buildGoodSuffix() {
        goodSuffix.assign(m + 1, m); // 初始化
        vector<int> z(m); // z数组
        int last = m - 1;

        for (int i = m - 1; i >= 0; i--) {
            if (i == last) {
                while (last >= 0 && pattern[last] == pattern[last - (m - 1 - i)]) {
                    last--;
                }
                goodSuffix[m - 1 - i] = last + 1; // 更新好后缀
            } else {
                goodSuffix[m - 1 - i] = last - i; // 更新
            }
        }
    }
};

int main() {
    string text = "ABAAABCDABCDE"; // 文本
    string pattern = "ABC"; // 模式串
    BoyerMoore bm(pattern); // 创建BM对象
    bm.search(text); // 搜索模式串
    return 0;
}

暴力匹配算法（BF算法）：最坏情况时间复杂度：O(m * n)，其中m是模式串长度，n是文本长度。最坏情况下，需要逐个字符比较。
KMP算法：最坏情况时间复杂度：O(m + n)。KMP通过预处理模式串生成部分匹配表，避免了不必要的回溯，从而提高了效率。
Boyer-Moore算法（BM算法）：最坏情况时间复杂度：O(m * n)，但在大多数实际情况中，平均时间复杂度是O(n/m)。BM算法通过坏字符和好后缀启发式规则有效减少比较次数，通常在长模式串和长文本中表现优越。

总结：

BF：简单但效率低，尤其在长文本和模式串时。

KMP：高效，适合任何情况。

BM：在实际应用中通常更快，尤其是长模式串时。

查看全文

http://www.kler.cn/a/322495.html