C++:利用哈夫曼编码压缩文件

    xiaoxiao2025-02-14  11

    首先需要借助一个堆 Heap.hpp

    #pragma once #include<iostream> #include<vector> #include<assert.h> template<class T> class Greater { public: bool operator()(const T&left, const T&right) const { return left > right; } }; template<class T> class Less { public: bool operator()(const T&left, const T&right) const { return left < right; } }; template<class T, class Compare = Less<T> > class Heap { public: Heap(){} Heap(const T* arr, size_t size) { assert(arr); for (size_t i = 0; i < size; ++i) { _heap.push_back(arr[i]); } size_t root = _heap.size() / 2 - 1; for (int i = root; i >= 0; --i) { _adjustDown(i); } } size_t Size() { return _heap.size(); } bool Empty() { if (_heap.size()) return 1; return 0; } const T& Top()const { assert(_heap.size()); return _heap[0]; } void Insert(const T&data) { _heap.push_back(data); if (_heap.size() > 1) { size_t child = _heap.size() - 1; _adjustUp(child); } } void Print() { assert(!_heap.empty()); for (size_t i = 0; i < _heap.size(); ++i) { std::cout << _heap[i] << " "; } std::cout << std::endl; } void Pop() { assert(!_heap.empty()); if (_heap.size() > 1) { _heap[0] = _heap[_heap.size() - 1]; _heap.pop_back(); _adjustDown(0); } else { _heap.pop_back(); } } private: void _adjustUp(size_t child) { size_t root = (child - 1) / 2; while ((child>0) && (Compare()(_heap[child], _heap[root]))) { std::swap(_heap[child], _heap[root]); child = root; root = (root - 1) / 2; } } void _adjustDown(size_t root) { size_t child = root * 2 + 1; while (child<_heap.size()) { if (((child + 1)<_heap.size()) && (Compare()(_heap[child + 1], _heap[child]))) { child = child + 1; } if (Compare()(_heap[child], _heap[root])) { std::swap(_heap[root], _heap[child]); } root = child; child = child * 2 + 1; } } private: std::vector< T > _heap; };

    其次需要借助哈夫曼树 HuffmanTree.hpp

    #pragma once #include"Heap.hpp" #include<queue> template<class T> class Compare { public: bool operator()(T left, T right) { return left->_weight < right->_weight; } }; template<class T> struct HuffmanTreeNode { HuffmanTreeNode(const T &weight) :_weight(weight) , pleft(NULL) , pright(NULL) , parent(NULL) {} T _weight; HuffmanTreeNode* pleft; HuffmanTreeNode* pright; HuffmanTreeNode* parent; }; template<class T> class HuffmanTree { public: HuffmanTree(){} HuffmanTree(T* WeightList, int size, T invalid) { assert(WeightList); Heap< HuffmanTreeNode<T>*, Compare<HuffmanTreeNode<T>* > > heap; for (int i = 0; i < size; ++i) { if (WeightList[i] != invalid) { HuffmanTreeNode<T>* node = new HuffmanTreeNode<T>(WeightList[i]); heap.Insert(node); } } HuffmanTreeNode<T>* parent = NULL; while (heap.Size()>1) { HuffmanTreeNode<T>* left = heap.Top(); heap.Pop(); HuffmanTreeNode<T>* right = heap.Top(); heap.Pop(); parent = new HuffmanTreeNode<T>(left->_weight + right->_weight); parent->pleft = left; parent->pright = right; left->parent = parent; right->parent = parent; heap.Insert(parent); } proot = parent; } void showLeavelOrder() { assert(proot); showLeavelOrder(proot); } ~HuffmanTree() { DestroyNodes(proot); } HuffmanTreeNode<T>* GetRoot() { return proot; } private: void showLeavelOrder(HuffmanTreeNode<T>* root) { std::queue<HuffmanTreeNode<T>*> Q; Q.push(root); while (!Q.empty()) { HuffmanTreeNode<T>* TMP = Q.front(); std::cout << TMP->_weight << " "; Q.pop(); if (TMP->pleft) Q.push(TMP->pleft); if (TMP->pright) Q.push(TMP->pright); } std::cout << std::endl; } void DestroyNodes(HuffmanTreeNode<T>* root) { if (root) { if (root->pleft) { DestroyNodes(root->pleft); delete root->pleft; } if (root->pright) { DestroyNodes(root->pright); delete root->pright; } } else return; } private: HuffmanTreeNode<T>* proot; };

    FileCompress.h

    #pragma once #define _CRT_SECURE_NO_WARNINGS 1 #include"HuffmanTree.hpp" #include<stdlib.h> struct FileInfo { FileInfo(size_t count):_count(count){} FileInfo(){} unsigned char _ch; //字符 std::string _strcode; //编码 size_t _count; //出现次数 FileInfo operator+(const FileInfo& fileinfo) { FileInfo ret(*this); ret._count += fileinfo._count; return ret; } bool operator<(const FileInfo& fileinfo) { return _count < fileinfo._count; } bool operator!=(const FileInfo& fileinfo) const { return fileinfo._count != _count; } }; class FileCompress { public: FileCompress(); void CompressFile(const std::string&filename); void UnCompressFile(const std::string&filename); private: void GetUnCompressCode(FILE* fr); void WriteContent(const std::string&dest, const std::string src); void FileCompress::GetHead(std::string&StrHead, const std::string&name, const std::string src); void WriteHead(const std::string&name, const std::string src); void _GentHuffmancode(HuffmanTreeNode<FileInfo>* pRoot);//获取霍夫曼编码 void ReadFrom(const std::string&filename, size_t size); std::string GetFilename(const std::string& strFilePath); //获取文件名 std::string GetFilePostFix(const std::string& strFilePath); //获取后缀名 FileInfo _fileInfo[256]; };

    FileCompress.cpp

    #include"FileCompress.h" FileCompress::FileCompress() { for (int idx = 0; idx < 256; idx++) { _fileInfo[idx]._ch = idx; _fileInfo[idx]._count = 0; _fileInfo[idx]._strcode = ""; } } void FileCompress::ReadFrom(const std::string&filename, size_t size)//读文件并且添加fileinfo { FILE *fr = fopen(filename.c_str(), "rb"); assert(fr); unsigned char* buf = new unsigned char[1024]; while (1) { size_t _s= fread(buf, 1, 1024, fr); for (size_t idx = 0; idx < _s; idx++) { _fileInfo[buf[idx]]._count++; } if (_s < 1024) break; } delete[] buf; fclose(fr); } void FileCompress::_GentHuffmancode(HuffmanTreeNode<FileInfo>* pRoot) { if (pRoot) { _GentHuffmancode(pRoot->pleft); _GentHuffmancode(pRoot->pright); if (NULL == pRoot->pleft && NULL == pRoot->pright) { HuffmanTreeNode<FileInfo>* pParent = pRoot->parent; std::string& code = _fileInfo[pRoot->_weight._ch]._strcode; while (pParent) { if (pParent->pleft == pRoot) { code += '0'; } if (pParent->pright == pRoot) { code += '1'; } pParent = pParent->parent; pRoot = pRoot->parent; } std::reverse(code.begin(), code.end()); } } } std::string FileCompress::GetFilename(const std::string& strFilePath) //获取文件名 { size_t pos = strFilePath.find_last_of('\\'); return strFilePath.substr(pos + 1, strFilePath.find_last_of('.')); } std::string FileCompress::GetFilePostFix(const std::string& strFilePath) //获取后缀名 { size_t pos = strFilePath.find_last_of('.'); return strFilePath.substr(pos, strFilePath.length() - pos); } char* ITOA(size_t size,char*str) { int i = 0; while (size != 0) { char c = size % 10 + '0'; str[i++] = c; size /= 10; } str[i] = '\0'; int len = strlen(str); for (int j = 0; j < len / 2; j++) { std::swap(str[j], str[len - j - 1]); } return str; } bool ReadLine(FILE* fr, std::string& codeInfo) { assert(fr); char ch = fgetc(fr); if (EOF == ch) return false; // while ('\n' != ch) { codeInfo += ch; ch = fgetc(fr); if (EOF == ch) return false; } return true; } void FileCompress::GetHead(std::string&StrHead, const std::string&name, const std::string src) { std::string CodeInfo; char count[32] = {0}; int LineCount = 0; for (int idx = 0; idx < 256; idx++) { if (_fileInfo[idx]._count) { CodeInfo += _fileInfo[idx]._ch; CodeInfo += ','; ITOA(_fileInfo[idx]._count,count); CodeInfo += count; CodeInfo += '\n'; LineCount++; } } StrHead += GetFilePostFix(src); StrHead += '\n'; ITOA(LineCount, count); StrHead += count; StrHead += '\n'; StrHead += CodeInfo; } void FileCompress::WriteHead(const std::string&name, const std::string src) { FILE *fw = fopen(name.c_str(), "wb"); assert(fw); std::string StrHead; GetHead(StrHead,name,src); fwrite(StrHead.c_str(), 1, StrHead.length(), fw);//写入头部信息 fclose(fw); } void FileCompress::WriteContent(const std::string&dest,const std::string src) { FILE *fw = fopen(dest.c_str(), "ab"); FILE *fr = fopen(src.c_str(), "rb"); assert(fw); assert(fr); int pos = 0; char ch = 0; unsigned char* Wbuf = new unsigned char[1024]; unsigned char *Rbuf = new unsigned char[1024]; int wrIdx = 0; while (true) { int _rsize = fread(Rbuf, 1, 1024, fr); if (_rsize == 0) break; // 1024 for (int i = 0; i < _rsize; ++i) { std::string code = _fileInfo[Rbuf[i]]._strcode; for (size_t idx = 0; idx < code.size(); ++idx) { ch <<= 1; pos++; if (code[idx] == '1') { ch |= 1; } if (pos == 8) { Wbuf[wrIdx++] = ch; if (wrIdx == 1024) { // 写入文件 fwrite(Wbuf, 1, 1024, fw); wrIdx = 0; } ch = 0; pos = 0; } } } } if (wrIdx!=0&&pos < 8) { ch <<= (8 - pos); Wbuf[wrIdx++] = ch; Wbuf[wrIdx] = '\0'; fwrite(Wbuf, 1, wrIdx, fw); } delete[] Rbuf; delete[] Wbuf; fclose(fr); fclose(fw); } void FileCompress::GetUnCompressCode(FILE* fr) { std::string strLineCount; ReadLine(fr, strLineCount); int lineCount = atoi(strLineCount.c_str()); // 读取编码信息 std::string strCodeInfo; while (lineCount) { ReadLine(fr, strCodeInfo);// if ("" != strCodeInfo) { lineCount--; unsigned char ch = strCodeInfo[0]; _fileInfo[ch]._ch = ch; strCodeInfo = strCodeInfo.substr(2); _fileInfo[ch]._count = atoi(strCodeInfo.c_str()); strCodeInfo = ""; } else { strCodeInfo += '\n'; } } } void FileCompress::CompressFile(const std::string&filename) { ReadFrom(filename, 1024); FileInfo invalid; invalid._count = 0; HuffmanTree<FileInfo> ht(_fileInfo, 256, invalid); _GentHuffmancode(ht.GetRoot()); std::string CompressName = GetFilename(filename); std::string Postname = GetFilePostFix(filename); CompressName += ".hzp"; WriteHead(CompressName,filename);//写入头部信息 WriteContent(CompressName,filename);//追加压缩后的编码 } void FileCompress::UnCompressFile(const std::string& FilePath) { FILE* fr = fopen(FilePath.c_str(), "rb"); assert(fr); //获取后缀 std::string strPostFix; ReadLine(fr, strPostFix); //得到fileinfo GetUnCompressCode(fr); //用fileinfo构建Huffman树 HuffmanTree<FileInfo> ht(_fileInfo, 256, FileInfo(0)); HuffmanTreeNode<FileInfo>* PRoot = ht.GetRoot(); std::string filename = GetFilename(FilePath); filename += strPostFix;//得到要解压的文件名及后缀 unsigned char* Rbuf = new unsigned char[1024]; size_t lenth = PRoot->_weight._count; size_t _wsize = 0; int pos = 7; FILE* fw = fopen(filename.c_str(), "wb"); unsigned char* Wbuf = new unsigned char[1024]; int _rsize = 0; while (true) { _rsize = fread(Rbuf, 1, 1024, fr); if ( _rsize == 0) break; size_t idx = 0; while (idx < _rsize) { if (Rbuf[idx] & 1 << pos--) { PRoot = PRoot->pright; } else { PRoot = PRoot->pleft; } if (NULL == PRoot->pleft && NULL == PRoot->pright) //0就往左走,1就往右走,一直走到叶子节点就是编码,然后找到编码对应的符号 { Wbuf[_wsize++] = PRoot->_weight._ch;//把这个符号写进输出缓冲区 if (1024 == _wsize&& _wsize<lenth ) { //写文件 fwrite(Wbuf, 1, 1024, fw); lenth-=1024; _wsize = 0; } PRoot = ht.GetRoot(); } if (0 > pos) { ++idx; pos = 7; } } } if (_wsize < 1024 && lenth>0) { fwrite(Wbuf, 1, lenth, fw); } delete[] Wbuf; delete[] Rbuf; fclose(fr); fclose(fw); }

    main.cpp

    #include"FileCompress.h" void test() { FileCompress fl; fl.CompressFile("Text.txt"); fl.UnCompressFile("Text.hzp"); } int main() { test(); system("pause"); return 0; }
    转载请注明原文地址: https://ju.6miu.com/read-1296443.html
    最新回复(0)