python 字符串算法
字符串算法是处理字符串相关问题的核心工具。以下是使用Python实现几个经典字符串算法的示例:
1. KMP 算法(Knuth-Morris-Pratt Algorithm)
KMP 算法用于高效地解决字符串匹配问题,通过预处理模式串来避免不必要的比较。
def kmp_search(text, pattern):
def compute_lps(pattern):
lps = [0] * len(pattern)
length = 0
i = 1
while i < len(pattern):
if pattern[i] == pattern[length]:
length += 1
lps[i] = length
i += 1
else:
if length != 0:
length = lps[length - 1]
else:
lps[i] = 0
i += 1
return lps
lps = compute_lps(pattern)
i = j = 0
while i < len(text):
if pattern[j] == text[i]:
i += 1
j += 1
if j == len(pattern):
return i - j # 匹配成功
else:
if j != 0:
j = lps[j - 1]
else:
i += 1
return -1 # 未找到
# 示例
text = "ABABDABACDABABCABAB"
pattern = "ABABCABAB"
print(kmp_search(text, pattern)) # 输出: 10
2. Rabin-Karp 算法
Rabin-Karp 算法通过哈希值来快速匹配字符串。
def rabin_karp_search(text, pattern):
d = 256 # 字符集大小
q = 101 # 素数
m = len(pattern)
n = len(text)
h = pow(d, m - 1) % q
p = 0 # 模式串的哈希值
t = 0 # 文本串的哈希值
# 计算模式串和第一个窗口的哈希值
for i in range(m):
p = (d * p + ord(pattern[i])) % q
t = (d * t + ord(text[i])) % q
for i in range(n - m + 1):
if p == t: # 哈希值匹配
if text[i:i + m] == pattern:
return i # 匹配成功
if i < n - m: # 更新哈希值
t = (d * (t - ord(text[i]) * h) + ord(text[i + m])) % q
if t < 0:
t += q
return -1 # 未找到
# 示例
text = "ABABDABACDABABCABAB"
pattern = "ABABCABAB"
print(rabin_karp_search(text, pattern)) # 输出: 10
3. 最长回文子串(Longest Palindromic Substring)
使用动态规划解决最长回文子串问题。
def longest_palindromic_substring(s):
n = len(s)
dp = [[False] * n for _ in range(n)]
start = 0
max_len = 1
# 单个字符是回文
for i in range(n):
dp[i][i] = True
# 检查长度为 2 的子串
for i in range(n - 1):
if s[i] == s[i + 1]:
dp[i][i + 1] = True
start = i
max_len = 2
# 检查长度大于 2 的子串
for length in range(3, n + 1):
for i in range(n - length + 1):
j = i + length - 1
if s[i] == s[j] and dp[i + 1][j - 1]:
dp[i][j] = True
if length > max_len:
start = i
max_len = length
return s[start:start + max_len]
# 示例
s = "babad"
print(longest_palindromic_substring(s)) # 输出: "bab" 或 "aba"
4. 字符串编辑距离(Edit Distance)
编辑距离是衡量两个字符串之间相似度的指标,通过插入、删除、替换操作将一个字符串转换为另一个字符串所需的最少操作次数。
def edit_distance(word1, word2):
m, n = len(word1), len(word2)
dp = [[0] * (n + 1) for _ in range(m + 1)]
for i in range(m + 1):
for j in range(n + 1):
if i == 0:
dp[i][j] = j
elif j == 0:
dp[i][j] = i
elif word1[i - 1] == word2[j - 1]:
dp[i][j] = dp[i - 1][j - 1]
else:
dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])
return dp[m][n]
# 示例
word1 = "kitten"
word2 = "sitting"
print(edit_distance(word1, word2)) # 输出: 3
5. Trie 树(前缀树)
Trie 树是一种用于高效存储和检索字符串的数据结构。
class TrieNode:
def __init__(self):
self.children = {}
self.is_end_of_word = False
class Trie:
def __init__(self):
self.root = TrieNode()
def insert(self, word):
node = self.root
for char in word:
if char not in node.children:
node.children[char] = TrieNode()
node = node.children[char]
node.is_end_of_word = True
def search(self, word):
node = self.root
for char in word:
if char not in node.children:
return False
node = node.children[char]
return node.is_end_of_word
def starts_with(self, prefix):
node = self.root
for char in prefix:
if char not in node.children:
return False
node = node.children[char]
return True
# 示例
trie = Trie()
trie.insert("apple")
print(trie.search("apple")) # 输出: True
print(trie.search("app")) # 输出: False
print(trie.starts_with("app")) # 输出: True
trie.insert("app")
print(trie.search("app")) # 输出: True
总结
字符串算法是解决文本处理问题的核心工具。通过 KMP、Rabin-Karp、动态规划、Trie 树等算法,可以高效地解决字符串匹配、回文子串、编辑距离等问题。