哈夫曼文件压缩

xiaoxiao2021-03-26 47

用法：压缩./ tar -y filename 解压缩 ./tar -j filename ==============编程思想================== 一、分析文件filename 按字符来分析得出的结果，有哪些字符，每个字符出现的次数。二、根据字符出现的次数，构建哈弗曼树。得出字符的哈夫曼编码。三、根据字符的哈夫曼编码，进行转换、压缩。创建压缩文件。（压缩文件里，要包含哈夫曼编码。）四、需要的时候，读取压缩文件，从中读出哈夫曼编码和字符的对照表。然后解压缩。数据结构：保存字符次数和字符的数据结构 struct _symbol{ char character; unsigned int number; }; 用一个数组保存所有字符的信息。 struct _symbol symbol_list[128]; 哈夫曼树节点 struct node{ struct _symbol symbol; struct node *left; struct node *right; }; 哈夫曼编码 struct code{ char character; char codebit[100];

};

==================================================源代码======================================================

#include "header.h" #include <string.h> //单个字符的结构体 struct _symbol{ char character;//字符 unsigned int number;//字符的次数 char huffcode[20]; //编码 }; //总结构体结构定义 struct _filestate{ char symbol_count;//多少种字符 struct _symbol symbol_array[128]; //结构体数组保存各种字符 }; //文件字符总结构体的变量定义 struct _filestate filestate; //树节点 struct _node{ struct _node *parent; struct _symbol symbol; struct _node *left; struct _node *right; int idx; //在filestate中的索引 }; //创建一个数据类型 typedef struct _node node; //创建节点 node *creatnode(struct _symbol symbol,int idx) { node *temp=malloc(sizeof(node)); temp->symbol=symbol; temp->left=NULL; temp->right=NULL; temp->idx=idx; temp->parent=NULL; return temp; } //构建一棵哈弗曼树 node *createhaffman(struct _filestate *pfilestate) { //创建一维数组，里面保存指针 node **array=malloc(pfilestate->symbol_count*sizeof(node *)); node *temp=NULL; int i; for(i=0;i<pfilestate->symbol_count;i++) { array[i]=creatnode(pfilestate->symbol_array[i],i); } int j; int minidx,secminidx; for(i=0;i<pfilestate->symbol_count-1;i++) { j=0; //找出第一个非空节点 while(array[j]==NULL) { j++; } minidx=j; //拿第一个非空节点和后面的对比 for(j=0;j<pfilestate->symbol_count;j++) { if(array[j]&&array[j]->symbol.number<array[minidx]->symbol.number) { minidx=j; } } j=0; while(array[j]==NULL||j==minidx) { j++; } secminidx=j; for(j=0;j<pfilestate->symbol_count;j++) { if(array[j]&&j!=minidx&&array[j]->symbol.number<array[secminidx]->symbol.number) { secminidx=j; } } struct _symbol symbol={0,array[minidx]->symbol.number+array[secminidx]->symbol.number}; temp=creatnode(symbol,-1); temp->left=array[minidx]; temp->right=array[secminidx]; temp->parent=NULL; array[minidx]->parent=temp; array[secminidx]->parent=temp; array[minidx]=temp; array[secminidx]=NULL; } free(array); array=NULL; return temp; } //判断是否叶子节点 int isleaf(node *cur) { if(cur->left==NULL&&cur->right==NULL) return 1; else { return 0; } } //取得哈夫曼编码 void huffmancode(node *leaf,char code[20]) { char a[20]={0}; node *temp=leaf; int i=0; while(temp->parent) { if(temp->parent->left==temp) { a[i]=2; } else { a[i]=1; } temp=temp->parent; i++; } //以下将数组元素反转 int length=i; char tmp=0; for(i=0;i<length/2;i++) { tmp=a[i]; a[i]=a[length-i-1]; a[length-i-1]=tmp; } strcpy(code,a); } //取得叶子的哈夫曼码并填充到文件结构体里 void fillhuffmancode(node *root,struct _filestate *pfilestate) { int idx; if(root) { if(isleaf(root)) { idx=root->idx; huffmancode(root,pfilestate->symbol_array[idx].huffcode); } else { fillhuffmancode(root->left,pfilestate); fillhuffmancode(root->right,pfilestate); } } } //根据频率创建树且获取编码 void createandcoding(struct _filestate *pfilestate) { node *root=createhaffman(pfilestate); fillhuffmancode(root,pfilestate); int i,j; for(i=0;i<pfilestate->symbol_count;i++) { printf("%c ",pfilestate->symbol_array[i].character); for(j=0;j<20;j++) { printf("%d",pfilestate->symbol_array[i].huffcode[j]); } puts("\n"); } } //查字典，获得编码 void lookupdict(char character,struct _filestate *pfilestate,char bitcode[]) { int i=0; for(i=0;i<pfilestate->symbol_count;i++) { if(character==pfilestate->symbol_array[i].character) { strcpy(bitcode,pfilestate->symbol_array[i].huffcode); break; } } } //写文件头 void writeheader(int fd,struct _filestate *pfilestate) { write(fd,pfilestate,sizeof(struct _filestate)); } //读文件头 void readheader(int fd,struct _filestate *pfilestate) { read(fd,pfilestate,sizeof(struct _filestate)); } //写编码 void writecode(int fd_r,int fd_w,struct _filestate *pfilestate) { int flgend=0; //临时变量，用来保存取出来哈夫曼编码 char bitcode[20]; //临时变量 char temp; char buffer; //写字节的索引 int buf_idx=0; //读到文件尾 int bitcode_idx=0; //从源文件读一个字符 read(fd_r,&temp,1); //从字典里查出字符的哈夫曼编码 lookupdict(temp,pfilestate,bitcode); //一直读字符，直到文件的末尾 while(!flgend) { //用来写的字符，需要根据哈夫曼编码构建 buffer=0; //现在指向buffer的第0位 buf_idx=0; while(buf_idx<8) { if(bitcode[bitcode_idx]==2) { //把对应的位清零 buffer&=~(1<<buf_idx); bitcode_idx++; buf_idx++; } else if(bitcode[bitcode_idx]==1) { //给对应的位置1 buffer|=(1<<buf_idx); bitcode_idx++; buf_idx++; } else//如果编码数组里的编码读完了 { //再取一个字符 if((read(fd_r,&temp,1))==1) { //根据字符取得哈夫曼码 lookupdict(temp,pfilestate,bitcode); bitcode_idx=0; } else { flgend=1; break; } } } write(fd_w,&buffer,1); } } //写到文件中 void writetofile(char *filename,struct _filestate *pfilestate) { int fd1=open(filename,O_RDONLY); if(fd1<0) { perror("open"); exit(EXIT_FAILURE); } char *newfilename=strcat(filename,".aar"); int fd2=open(newfilename,O_CREAT|O_TRUNC|O_WRONLY,0666); if(fd2<0) { perror("open"); exit(EXIT_FAILURE); } //将字典结构体先写到文件里面，方便解压时读取 writeheader(fd2,pfilestate); //写编码 writecode(fd1,fd2,pfilestate); close(fd1); close(fd2); } //计算字符的个数 void coutsymbol(struct _filestate *pfilestate,char ch) { int i; int flagexist=0; for(i=0;i<pfilestate->symbol_count;i++) { //如果字符已被保存过 if(ch==pfilestate->symbol_array[i].character) { //则让该字符的数量+1 pfilestate->symbol_array[i].number++; flagexist=1; break; } } //如果字符没有被保存过 if(!flagexist) { pfilestate->symbol_array[pfilestate->symbol_count].character=ch; pfilestate->symbol_array[pfilestate->symbol_count].number=1; pfilestate->symbol_count++; } } //分析文件，得到各字符的次数 void analyse(char *filename) { //打开文件 int fd=open(filename,O_RDONLY); if(fd<0) { return; } char temp; //读取一个字符，直到文件结束 while((read(fd,&temp,1))==1) { //把单个字符交给更细的分析函数, //该函数将字符和现有的数据进行对比，然后计算数量 coutsymbol(&filestate,temp); } close(fd); } //压缩 void compress(char *filename) { //分析文件 analyse(filename); //创建哈夫曼树且取得每个字母的编码 createandcoding(&filestate); //将文件的每个字母翻译成编码，写入压缩文件 writetofile(filename,&filestate); } //--------------------------------------------------- //写解压缩文件 void writedecompressfile(int fd, char *filename,node *root) { char *newfilename=strcat(filename,".un"); node *tmpnode=root; int fd_w=open(newfilename,O_CREAT|O_WRONLY|O_TRUNC,0600); if(fd_w<0) { perror("open"); return; } char temp; int bit_idx; while(read(fd,&temp,1)==1) { bit_idx=0; while(bit_idx<8) { if(temp&(1<<bit_idx)) { tmpnode=tmpnode->right; } else { tmpnode=tmpnode->left; } if(isleaf(tmpnode)) { write(fd_w,&(tmpnode->symbol.character),1); tmpnode=root; } bit_idx++; } } close(fd_w); } //解压 void decompress(char *filename) { int fd=open(filename,O_RDWR); if(fd<0) { perror("open"); return; } memset(&filestate,0,sizeof(filestate)); //读文件头 readheader(fd,&filestate); //创建哈弗曼树 node *root=NULL; root=createhaffman(&filestate); writedecompressfile(fd,filename,root); close(fd); } //------------------------------------------------ //主函数 int main(int argc,char const *argv[]) { char *filename=(char *)argv[2]; if(argc<3) { puts("用法：./tar -y[-j] filename"); exit(EXIT_FAILURE); } //选项为-y,压缩 if(strcmp(argv[1],"-y")==0) { compress(filename); } //选项为-j,解压 else if(strcmp(argv[1],"-j")==0) { decompress(filename); } else { puts("参数错误！"); exit(EXIT_FAILURE); } }

转载请注明原文地址: https://ju.6miu.com/read-599997.html

技术

最新回复(0)