C/C++ 查找算法
一.二分算法
调整:
如果arr[mid] < x, min = mid + 1;
如果arr[mid] > x, max = mid - 1;
如果arr[mid] == x, 找到结果
终止条件:min >= max
二分查找--泛型情况
找第一个1:
找最后一个1:
注意:mid = (min + max + 1) /2
代码演示:
#define _CRT_SECURE_NO_WARNINGS
#include<iostream>
#include<algorithm>
using namespace std;
void output(int* arr, int n, int ind = -1) {
int len = 0;
for (int i = 0; i < n; i++) {
len += printf("%4d", i);
}
printf("\n");
for (int i = 0; i < len; i++) {
printf("-");
}
printf("\n");
for (int i = 0; i < n; i++) {
if (i == ind) printf("\033[1;32m");
printf("%4d", arr[i]);
if (i == ind) printf("\033[0m");
}
printf("\n");
return;
}
int binary_search(int* arr, int n, int x) {
int head = 0, tail = n - 1, mid;
while (head <= tail) {
mid = (head + tail) / 2;
if (arr[mid] == x) return mid;
if (arr[mid] < x) head = mid + 1;
else tail = mid - 1;
}
return -1;
}
void test_binary_search(int n) {
int* arr = (int*)malloc(sizeof(int) * n);
arr[0] = rand() % 10;
for (int i = 1; i < n; i++) arr[i] = arr[i - 1] + rand() % 10;
output(arr, n);
int x;
while (~scanf("%d", &x)) {
if (x == -1) break;
int ind = binary_search(arr, n, x);
output(arr, n, ind);
}
}
#define EXP 1e-4
double f(double x) {
if (x >= 0) x -= min(x, 3000.0) * 0.03;
if (x >= 3000) x -= (min(x, 12000.0) - 3000) * 0.1;
if (x >= 12000) x -= (min(x, 25000.0) - 12000) * 0.2;
if (x >= 25000) x -= (min(x, 35000.0) - 25000) * 0.25;
if (x >= 35000) x -= (min(x, 55000.0) - 35000) * 0.3;
if (x >= 55000) x -= (min(x, 80000.0) - 55000) * 0.35;
if (x >= 80000) x -= (x - 80000) * 0.45;
return x;
}
double binary_algorithm(double y) {
double head = 0, tail = 1000000, mid;
while (tail - head >= EXP) {
mid = (head + tail) / 2;
if (f(mid) < y) head = mid;
else tail = mid;
}
return head;
}
void test_binary_algorithm() {
double y;
while (~scanf("%lf", &y)) {
if (y < 0) break;
double x = binary_algorithm(y);
printf("f(%.2lf) = %.2lf\n", x, y);
}
}
int main() {
#define MAX_N 10
test_binary_search(MAX_N);
test_binary_algorithm();
return 0;
}
二.跳跃表
对于原有链表中,对每个节点都设置了一个层高;
头节点的值为一个极小值,尾节点的值为一个极大值.
跳跃表本身,会维护节点中值的有序性
查找操作:
从左上角的节点开始查找:
若下一个节点的值比要查找的值大,则向下走一层;
若下一个节点的值比要查找的值小,则走向下一个节点;
插入操作:
跳跃表的每一个节点会随机一个层高
#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<string.h>
#include<inttypes.h>
typedef struct Node {
int key, level;
struct Node* next, * down, * up;
}Node;
#define min(a,b) ((a) < (b) ? (a) : (b))
#define MAX_LEVEL 32
typedef struct Skiplist {
Node* head, * tail;
int max_level;
}Skiplist;
Node* getNewNode(int key, int n) {
Node* nodes = (Node*)malloc(sizeof(Node) * n);
for (int i = 0; i < n; i++) {
nodes[i].key = key;
nodes[i].level = i;
nodes[i].next = NULL;
nodes[i].down = ((i != 0)? nodes + i - 1 : NULL);
nodes[i].up = ((i + 1 < n)? nodes + i + 1 : NULL);
}
return nodes + n - 1;
}
Skiplist* getNewSkiplist(int n) {
Skiplist* s = (Skiplist*)malloc(sizeof(Skiplist));
s->head = getNewNode(INT32_MIN, n);
s->tail = getNewNode(INT32_MAX, n);
s->max_level = n;
Node* p = s->head, *q = s->tail;
while (p) {
p->next = q;
p = p->down;
q = q->down;
}
while (s->head->level != 0) s->head = s->head->down;
return s;
}
Node* find(Skiplist* s, int x) {
Node* p = s->head;
while (p && p->key != x) {
if (p->next->key <= x) p = p->next;
else p = p->down;
}
return p;
}
#define MAX_RAND_N 10000
double randDouble() {
int n = (rand() % MAX_RAND_N);
return (n * 1.0 / (double)MAX_RAND_N);
}
int randLevel(Skiplist* s) {
int level = 1;
double p = 1.0 / 2.0;
double temp = randDouble();
while (temp < p) {
level += 1;
temp = randDouble();
}
return min(s->max_level, level);
}
void insert(Skiplist* s, int x) {
int level = randLevel(s);
while (s->head->level + 1 < level) {
s->head = s->head->up;
}
Node* node = getNewNode(x, level);
Node* p = s->head;
printf("insert begin\n");
fflush(stdout);
while(p->level != node->level) p = p->down;
while (p) {
while (p->next->key < node->key) p = p->next;
node->next = p->next;
p->next = node;
p = p->down;
node = node->down;
}
return;
}
void clearNode(Node* p) {
if (p == NULL) return;
free(p);
return;
}
void clearSkiplist(Skiplist* s) {
Node* p = s->head, *q;
while (p->level != 0) p = p->down;
while (p) {
q = p->next;
clearNode(p);
p = q;
}
free(s);
}
void output(Skiplist* s) {
Node* p = s->head;
int len = 0;
for (int i = 0; i < s->head->level; i++) {
len += printf("%4d", i);
}
printf("\n");
for (int i = 0; i < len; i++) printf("-");
printf("\n");
while (p->level > 0) p = p->down;
while (p) {
bool flag = (p->key != INT32_MIN && p->key != INT32_MAX);
for (Node* q = p; q && flag; q = q->up) {
printf("%4d", q->key);
}
if(flag) printf("\n");
p = p->next;
}
}
int main() {
srand(time(0));
int x;
Skiplist* s = getNewSkiplist(MAX_LEVEL);
while (~scanf("%d", &x)) {
if (x == -1)break;
insert(s, x);
}
output(s);
while (~scanf("%d", &x)) {
Node* p = find(s, x);
if (p) {
printf("key: %d, level: %d", p->key, p->level);
}else {
printf("NULL\n");
}
}
clearSkiplist(s);
return 0;
}
三.哈希表与布隆过滤器
哈希表:
哈希冲突:不同元素经过哈希函数被映射到了同一个位置.
哈希冲突的解决办法:
1.开放定址法
如果发生冲突,将新的元素按照一定规则,去重新计算得出一个新的下标;
2.再哈希法
如果发生冲突,用其他的哈希函数进行计算得出新的下标;
3.建立公共溢出区法
用另一种数据结构来存储多余的元素;
4.链式地址法(推荐)
每个下标中维护一个链表;
布隆过滤器:
传统哈希表,储存空间和元素数量强相关;
布隆过滤器,储存空间和元素数量弱相关;
若经过多个哈希函数运算,出现的结果相同,则布隆过滤器就认为此元素出现过;
注意,并不是一定出现过,而是大概率出现过;
如何减少误判:增加布隆过滤器的底层存储空间,降低重复的概率;
#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<time.h>
#include<stdlib.h>
#include<string.h>
#include<iostream>
using namespace std;
typedef struct Node {
char* s;
struct Node* next;
}Node;
typedef struct HashTable {
Node* data;
int cnt, size;
}HashTable;
Node* getNewNode(const char* s) {
Node* p = (Node*)malloc(sizeof(Node));
p->s = _strdup(s);
p->next = NULL;
return p;
}
HashTable* getNewHashTable(int n) {
HashTable* h = (HashTable*)malloc(sizeof(HashTable));
h->data = (Node*)malloc(sizeof(Node) * n);
for (int i = 0; i < n; i++) {
h->data[i].next = NULL;
}
h->size = n;
h->cnt = 0;
return h;
}
int hash_func(const char* s) {
int seed = 131, h = 0;
for (int i = 0; s[i]; i++) {
h = h * seed + s[i];
}
return h & 0x7fffffff;
}
bool find(HashTable* h, const char* s) {
int hcode = hash_func(s), ind = hcode % h->size;
Node* p = h->data[ind].next;
while (p) {
if (strcmp(p->s, s) == 0)return true;
p = p->next;
}
return false;
}
void swapHashTable(HashTable* h1, HashTable* h2) {
swap(h1->data, h2->data);
swap(h1->cnt, h2->cnt);
swap(h1->size, h2->size);
return;
}
void expand(HashTable* h);
bool insert(HashTable* h, const char* s) {
if (h->cnt >= h->size * 2) {
expand(h);
}
int hcode = hash_func(s), ind = hcode% h->size;
Node* p = getNewNode(s);
p->next = h->data[ind].next;
h->data[ind].next = p;
h->cnt += 1;
return true;
}
void clearNode(Node* p) {
if (p == NULL) return;
if (p->s) free(p->s);
free(p);
return;
}
void clearHashTable(HashTable* h) {
if (h == NULL) return;
for (int i = 0; i < h->size; i++) {
Node* p = h->data[i].next, * q;
while (p) {
q = p->next;
clearNode(p);
p = q;
}
}
free(h->data);
free(h);
return;
}
void expand(HashTable* h) {
HashTable* new_h = getNewHashTable(h->size * 2);
for (int i = 0; i < h->size; i++) {
Node* p = h->data[i].next;
while (p) {
insert(new_h, p->s);
p = p->next;
}
}
swapHashTable(h, new_h);
clearHashTable(new_h);
}
void output(HashTable* h) {
printf("\n\nHash Table(%d/%d): \n", h->cnt, h->size);
for (int i = 0; i < h->size; i++) {
printf("%d:", i);
Node* p = h->data[i].next;
while (p) {
printf("%s -> ", p->s);
p = p->next;
}
printf("\n");
}
return;
}
int main() {
srand(time(0));
char s[100];
#define MAX_N 2
HashTable* h = getNewHashTable(MAX_N);
while (~scanf("%s", s)) {
if (strcmp(s, "end") == 0) break;
insert(h, s);
}
output(h);
while (~scanf("%s", s)) {
printf("find(%s) = %d\n", s, find(h, s));
}
return 0;
}
代码练习
1. 两数之和 - 力扣(LeetCode)
哈希表做法
class Solution {
public:
vector<int> twoSum(vector<int>& nums, int target) {
unordered_map<int, int> h;
vector<int> ret(2);
for(int i = 0; i < nums.size();i++) {
if(h.find(target -nums[i]) != h.end()) {
ret[0] = h[target - nums[i]];
ret[1] = i;
break;
}
h[nums[i]] = i;
}
return ret;
}
};
二分查找做法
class Solution {
public:
int binary_search(vector<int>&nums, vector<int>&ind, int b, int x) {
int head = b, tail = nums.size() - 1, mid;
while(head <= tail) {
mid = (head + tail) /2;
if(nums[ind[mid]] == x) return mid;
if(nums[ind[mid]] < x) head = mid + 1;
else tail = mid - 1;
}
return -1;
}
vector<int> twoSum(vector<int>& nums, int target) {
int n = nums.size();
vector<int> ind(n, 0);
for(int i = 0; i < n; i++) ind[i] = i;
sort(ind.begin(), ind.end(), [&](int i , int j) {
return nums[i] < nums[j];
});
vector<int> ret(2);
for(int i = 0; i < n; i++) {
int j = binary_search(nums, ind, i + 1, target - nums[ind[i]]);
if(j == -1) continue;
ret[0] = ind[j];
ret[1] = ind[i];
}
//if(ret[0] >ret[1]) swap(ret[0], ret[1]);
return ret;
}
};
35. 搜索插入位置 - 力扣(LeetCode)
class Solution {
public:
int searchInsert(vector<int>& nums, int target) {
int head = 0, tail = nums.size(), mid;
while(head < tail) {
mid = (head + tail) /2;
if(nums[mid] < target) head = mid + 1;
else tail = mid;
}
return head;
}
};
217. 存在重复元素 - 力扣(LeetCode)
class Solution {
public:
bool containsDuplicate(vector<int>& nums) {
unordered_set<int> h;
for(auto x: nums) {
if(h.find(x) != h.end()) return true;
h.insert(x);
}
return false;
}
};
349. 两个数组的交集 - 力扣(LeetCode)
class Solution {
public:
vector<int> intersection(vector<int>& nums1, vector<int>& nums2) {
unordered_set<int> h;
vector<int> ret;
for(auto x: nums1) h.insert(x);
for(auto x: nums2) {
if(h.find(x) == h.end()) continue;
ret.push_back(x);
h.erase(h.find(x));
}
return ret;
}
};
3. 无重复字符的最长子串 - 力扣(LeetCode)
class Solution {
public:
bool check(string &s, int l) {
int cnt[256] = {0}, k = 0;
for(int i = 0; s[i]; i++) {
cnt[s[i]] += 1;
if(cnt[s[i]] == 1) k += 1;
if(i >= l) {
cnt[s[i - l]] -= 1;
if(cnt[s[i - l]] == 0) k -= 1;
}
if (l == k) return true;
}
return false;;
}
int lengthOfLongestSubstring(string s) {
int head = 1, tail = s.size(), mid;
//111100000
while(head < tail) {
mid = (head +tail + 1) /2;
if(check(s,mid)) head = mid;
else tail = mid - 1;
}
return head;
}
};
4. 寻找两个正序数组的中位数 - 力扣(LeetCode)
class Solution {
public:
int findK(vector<int> &n1, int ind1, vector<int> &n2, int ind2, int k) {
int n = n1.size(), m = n2.size();
if(k == 1) {
int a = (n1.size() == ind1 ? INT32_MAX: n1[ind1]);
int b = (n2.size() == ind2 ? INT32_MAX: n2[ind2]);
return min(a,b);
}
if(n == ind1) return n2[k - 1];
if(m == ind2) return n1[k - 1];
int cnt1 = min(k / 2 , n - ind1);
int cnt2 = min(k - cnt1, m - ind2);
cnt1 = k - cnt2;
if(n1[ind1 + cnt1 - 1] <= n2[ind2 + cnt2 - 1]) {
return findK(n1,ind1 + cnt1, n2, ind2, k - cnt1);
}
return findK(n1, ind1, n2 ,ind2 + cnt2, k - cnt2);
}
double findMedianSortedArrays(vector<int>& nums1, vector<int>& nums2) {
int n = nums1.size(), m = nums2.size();
if((n + m) % 2 == 1) return findK(nums1, 0, nums2, 0, (n + m) / 2 + 1);
double a = findK(nums1, 0, nums2, 0, (n + m) / 2);
double b = findK(nums1, 0, nums2, 0, (n + m) / 2 + 1);
return (a + b) /2.0;
}
};