数据结构——哈夫曼编码
目录
- 1、哈夫曼编码定义
- 2、问题描述
- 3、逐步分析
- 1)涉及操作
- 2)代码实现
- 4、代码整合
1、哈夫曼编码定义
哈夫曼编码(Huffman Coding)是一种用于无损数据压缩的熵编码算法。它是由大卫・哈夫曼(David A. Huffman)在 1952 年发明的。其基本思想是根据字符在数据中出现的频率来分配不同长度的编码,频率高的字符分配较短的编码,频率低的字符分配较长的编码,从而达到数据压缩的目的。
2、问题描述
1、问题描述
从键盘接收一串电文字符,输入对应的Huffman编码。同时,能翻译由Huffman编码生成的代码串,输出对应的电文字符串。
2、设计要求
(1)构造一棵 Huffman树。
(2)实现Huffman编码,并用Huffman编码生成的代码串进行译码。
(3)程序中字符和权值是可变的,实现程序的灵活性。
3、逐步分析
1)涉及操作
根据定义可知哈夫曼编码首先会使用字符的频率创建一棵树,然后通过这个树的结构为每个字符生成一个特定的编码,出现频率高的字符使用较短的编码,出现频率低的则使用较长的编码,这样就会使编码之后的字符串平均长度降低,从而达到数据无损压缩的目的。
①基础操作
- 哈夫曼树构造
计算字符串中每个字符的频率
按照字符出现的频率进行排序
把这些字符作为叶子节点开始构建一颗哈夫曼树 - 哈夫曼树打印输出
②要求操作
- 哈夫曼编码
根据哈夫曼树对输入的字符进行编码 - 哈夫曼译码
根据哈夫曼树对输入的电文进行译码
2)代码实现
1、结构体定义
①哈夫曼叶子节点
typedef struct node {
char letter;
int weight;//结点的权值
int parent;//结点的双亲
int lchild;//结点的左孩子
int rchild;//结点的右孩子
}HNodeType;
②哈夫曼编码节点
typedef struct {
char letter;
int bit[MAXBIT];
int start;
}HCodeType;
③输入的字符
typedef struct {
char s;
int num;
}Message;
2、基础操作
①哈夫曼树构造
void HuffmanTree(HNodeType HuffNode[], int n, Message a[])
{
int i, j, m1, m2, x1, x2, temp1; char temp2;
for (i = 0; i < 2 * n - 1; i++)//HuffNode[]初始化
{
HuffNode[i].letter = NULL;
HuffNode[i].weight = 0;
HuffNode[i].parent = -1;
HuffNode[i].lchild = -1;
HuffNode[i].rchild = -1;
}
for (i = 0; i < n - 1; i++)
for (j = i + 1; j < n - 1; j++)
if (a[j].num > a[i].num)
{
temp1 = a[i].num; a[i].num = a[j].num; a[j].num = temp1;
temp2 = a[i].s; a[i].s = a[j].s; a[j].s = temp2;
}
for (i = 0; i < n; i++)
{
HuffNode[i].weight = a[i].num;
HuffNode[i].letter = a[i].s;
}
for (i = 0; i < n - 1; i++)//构造哈夫曼树
{
m1 = m2 = MAXVALUE;
x1 = x2 = 0;
for (j = 0; j < n + i; j++)//找出的两棵权值最小的子树
{
if (HuffNode[j].parent == -1 && HuffNode[j].weight < m1)
{
m2 = m1; x2 = x1;
m1 = HuffNode[j].weight; x1 = j;
}
else if (HuffNode[j].parent == -1 && HuffNode[j].weight < m2)
{
m2 = HuffNode[j].weight;
x2 = j;
}
}
//将找出的两棵子树合并为一棵子树
HuffNode[x1].parent = n + i; HuffNode[x2].parent = n + i;
HuffNode[n + i].weight = HuffNode[x1].weight + HuffNode[x2].weight;
HuffNode[n + i].lchild = x1; HuffNode[n + i].rchild = x2;
}
}
②哈夫曼树打印输出
void PrintHaffmanTree(int n, Message a[], HCodeType *HuffCode)
{
HNodeType HuffNode[MAXNODE];
HCodeType cd;
int i, j, c, p;
HuffmanTree(HuffNode, n, a); //建立哈夫曼树
for (i = 0; i < n; i++)
{
cd.start = n - 1;
c = i;
p = HuffNode[c].parent;
while (p != -1)//由叶结点向上直到树根
{
if (HuffNode[p].lchild == c)
cd.bit[cd.start] = 0;
else
cd.bit[cd.start] = 1;
cd.start--;
c = p;
p = HuffNode[c].parent;
}
for (j = cd.start + 1; j < n; j++)//保存求出的每个结点的哈夫曼编码和编码的起始位
HuffCode[i].bit[j] = cd.bit[j];
HuffCode[i].start = cd.start;
}
printf(" 输出每个叶子的哈夫曼编码:\n");
for (i = 0; i < n; i++)//输出每个叶子结点的哈夫曼编码
{
HuffCode[i].letter = HuffNode[i].letter;
printf(" %c:", HuffCode[i].letter);
for (j = HuffCode[i].start + 1; j < n; j++)
printf(" %d", HuffCode[i].bit[j]);
printf("\n");
}
}
3、要求操作
①哈夫曼编码
void Encoding(int n, Message a[], HCodeType *HuffCode)
{
HNodeType HuffNode[MAXNODE];
int i, j;
char * m;
HuffmanTree(HuffNode, n, a);//建立哈夫曼树
printf("请输入需要编码的字符: ");
for (i = 0; i < 30; i++)code[i] = NULL;
scanf(" %s", &m);
printf("输出的电文为: ");
while (*m != NULL)
{
for (int i = 0; i < n; i++)
{
if (*m == HuffNode[i].letter)
{
for (j = HuffCode[i].start + 1; j < n; j++)
printf(" %d", HuffCode[i].bit[j]);
}
}
m++;
}
printf("\n");
}
②哈夫曼译码
void Decoding(int n, Message a[])
{
HNodeType HuffNode[MAXNODE];
int i, c;
char * m;
HuffmanTree(HuffNode, n, a);//建立哈夫曼树
printf(" 请输入电文(1/0):\n");
for (i = 0; i < 30; i++)code[i] = NULL;
scanf(" %s", &m);
c = 2 * n - 2;
printf(" 输出哈夫曼译码:\n");
while (*m != NULL)
{
if (*m == '0')
{
c = i = HuffNode[c].lchild;
if (HuffNode[c].lchild == -1 && HuffNode[c].rchild == -1)
{
printf("%c", HuffNode[i].letter);
c = 2 * n - 2;
}
}
if (*m == '1')
{
c = i = HuffNode[c].rchild;
if (HuffNode[c].lchild == -1 && HuffNode[c].rchild == -1)
{
printf("%c", HuffNode[i].letter);
c = 2 * n - 2;
}
}
m++;
}
printf("\n");
}
4、主函数
int main()
{
Message data[30];
char s[100], * p;
int i, count = 0, select;
HCodeType HuffCode[MAXLEAF];
printf("\n 请输入一些字符:");
scanf("%s", &s);
for (i = 0; i < 30; i++)
{
data[i].s = NULL;
data[i].num = 0;
}
p = s;
while (*p)
{
for (i = 0; i <= count + 1; i++)
{
if (data[i].s == NULL)
{
data[i].s = *p; data[i].num++; count++; break;
}
else if (data[i].s == *p)
{
data[i].num++; break;
}
}
p++;
}
printf("\n");
printf(" 不同的字符数:%d\n", count);
for (i = 0; i < count; i++)
{
printf(" %c ", data[i].s);
printf(" 权值:%d", data[i].num);
printf("\n");
}
PrintHaffmanTree(count, data, HuffCode);
do {
printf("请输入指令(1:编码 2:译码 0:退出系统): ");
scanf("%d", &select);
switch (select)
{
case 1:
Encoding(count, data, HuffCode);
break;
case 2:
Decoding(count, data);
break;
case 0:
printf("已退出系统!\n");
break;
default:
printf("请重新输入\n");
getchar();
}
} while (select);
return 0;
}
4、代码整合
#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<conio.h>
#define MAXVALUE 10000
#define MAXLEAF 30
#define MAXNODE MAXLEAF*2-1
#define MAXBIT 50
#define NULL 0
typedef struct node {
char letter;
int weight;//结点的权值
int parent;//结点的双亲
int lchild;//结点的左孩子
int rchild;//结点的右孩子
}HNodeType;
typedef struct {
char letter;
int bit[MAXBIT];
int start;
}HCodeType;
typedef struct {
char s;
int num;
}Message;
//哈夫曼树的构造
void HuffmanTree(HNodeType HuffNode[], int n, Message a[])
{
int i, j, m1, m2, x1, x2, temp1; char temp2;
for (i = 0; i < 2 * n - 1; i++)//HuffNode[]初始化
{
HuffNode[i].letter = NULL;
HuffNode[i].weight = 0;
HuffNode[i].parent = -1;
HuffNode[i].lchild = -1;
HuffNode[i].rchild = -1;
}
for (i = 0; i < n - 1; i++)
for (j = i + 1; j < n - 1; j++)
if (a[j].num > a[i].num)
{
temp1 = a[i].num; a[i].num = a[j].num; a[j].num = temp1;
temp2 = a[i].s; a[i].s = a[j].s; a[j].s = temp2;
}
for (i = 0; i < n; i++)
{
HuffNode[i].weight = a[i].num;
HuffNode[i].letter = a[i].s;
}
for (i = 0; i < n - 1; i++)//构造哈夫曼树
{
m1 = m2 = MAXVALUE;
x1 = x2 = 0;
for (j = 0; j < n + i; j++)//找出的两棵权值最小的子树
{
if (HuffNode[j].parent == -1 && HuffNode[j].weight < m1)
{
m2 = m1; x2 = x1;
m1 = HuffNode[j].weight; x1 = j;
}
else if (HuffNode[j].parent == -1 && HuffNode[j].weight < m2)
{
m2 = HuffNode[j].weight;
x2 = j;
}
}
//将找出的两棵子树合并为一棵子树
HuffNode[x1].parent = n + i; HuffNode[x2].parent = n + i;
HuffNode[n + i].weight = HuffNode[x1].weight + HuffNode[x2].weight;
HuffNode[n + i].lchild = x1; HuffNode[n + i].rchild = x2;
}
}
//输出哈夫曼树
void PrintHaffmanTree(int n, Message a[], HCodeType *HuffCode)
{
HNodeType HuffNode[MAXNODE];
HCodeType cd;
int i, j, c, p;
HuffmanTree(HuffNode, n, a); //建立哈夫曼树
for (i = 0; i < n; i++)
{
cd.start = n - 1;
c = i;
p = HuffNode[c].parent;
while (p != -1)//由叶结点向上直到树根
{
if (HuffNode[p].lchild == c)
cd.bit[cd.start] = 0;
else
cd.bit[cd.start] = 1;
cd.start--;
c = p;
p = HuffNode[c].parent;
}
for (j = cd.start + 1; j < n; j++)//保存求出的每个结点的哈夫曼编码和编码的起始位
HuffCode[i].bit[j] = cd.bit[j];
HuffCode[i].start = cd.start;
}
printf(" 输出每个叶子的哈夫曼编码:\n");
for (i = 0; i < n; i++)//输出每个叶子结点的哈夫曼编码
{
HuffCode[i].letter = HuffNode[i].letter;
printf(" %c:", HuffCode[i].letter);
for (j = HuffCode[i].start + 1; j < n; j++)
printf(" %d", HuffCode[i].bit[j]);
printf("\n");
}
}
//生成哈夫曼编码
void Encoding(int n, Message a[], HCodeType *HuffCode)
{
HNodeType HuffNode[MAXNODE];
int i, j;
char * m;
HuffmanTree(HuffNode, n, a);//建立哈夫曼树
printf("请输入需要编码的字符: ");
for (i = 0; i < 30; i++)code[i] = NULL;
scanf(" %s", &m);
printf("输出的电文为: ");
while (*m != NULL)
{
for (int i = 0; i < n; i++)
{
if (*m == HuffNode[i].letter)
{
for (j = HuffCode[i].start + 1; j < n; j++)
printf(" %d", HuffCode[i].bit[j]);
}
}
m++;
}
printf("\n");
}
//生成哈夫曼译码
void Decoding(int n, Message a[])
{
HNodeType HuffNode[MAXNODE];
int i, c;
char * m;
HuffmanTree(HuffNode, n, a);//建立哈夫曼树
printf(" 请输入电文(1/0):\n");
for (i = 0; i < 30; i++)code[i] = NULL;
scanf(" %s", &m);
c = 2 * n - 2;
printf(" 输出哈夫曼译码:\n");
while (*m != NULL)
{
if (*m == '0')
{
c = i = HuffNode[c].lchild;
if (HuffNode[c].lchild == -1 && HuffNode[c].rchild == -1)
{
printf("%c", HuffNode[i].letter);
c = 2 * n - 2;
}
}
if (*m == '1')
{
c = i = HuffNode[c].rchild;
if (HuffNode[c].lchild == -1 && HuffNode[c].rchild == -1)
{
printf("%c", HuffNode[i].letter);
c = 2 * n - 2;
}
}
m++;
}
printf("\n");
}
int main()
{
Message data[30];
char s[100], * p;
int i, count = 0, select;
HCodeType HuffCode[MAXLEAF];
printf("\n 请输入一些字符:");
scanf("%s", &s);
for (i = 0; i < 30; i++)
{
data[i].s = NULL;
data[i].num = 0;
}
p = s;
while (*p)
{
for (i = 0; i <= count + 1; i++)
{
if (data[i].s == NULL)
{
data[i].s = *p; data[i].num++; count++; break;
}
else if (data[i].s == *p)
{
data[i].num++; break;
}
}
p++;
}
printf("\n");
printf(" 不同的字符数:%d\n", count);
for (i = 0; i < count; i++)
{
printf(" %c ", data[i].s);
printf(" 权值:%d", data[i].num);
printf("\n");
}
PrintHaffmanTree(count, data, HuffCode);
do {
printf("请输入指令(1:编码 2:译码 0:退出系统): ");
scanf("%d", &select);
switch (select)
{
case 1:
Encoding(count, data, HuffCode);
break;
case 2:
Decoding(count, data);
break;
case 0:
printf("已退出系统!\n");
break;
default:
printf("请重新输入\n");
getchar();
}
} while (select);
return 0;
}