哈夫曼编码—数据压缩与解压(Java)
<!– more –>
博客阐明
文章所波及的材料来自互联网整顿和集体总结,意在于集体学习和教训汇总,如有什么中央侵权,请分割自己删除,谢谢!
介绍
- 赫夫曼编码也翻译为 哈夫曼编码(Huffman Coding),又称霍夫曼编码,是一种编码方式, 属于一种程序算法
- 赫夫曼编码是赫哈夫曼树在电讯通信中的经典的利用之一。
- 赫夫曼编码宽泛地用于数据文件压缩。其压缩率通常在 20%~90% 之间
- 赫夫曼码是可变字长编码 (VLC) 的一种。Huffman 于 1952 年提出一种编码方法,称之为最佳编码
通信畛域中信息的解决形式
定长编码
数据传输太长
i like like like java do you like a java // 共 40 个字符(包含空格)
105 32 108 105 107 101 32 108 105 107 101 32 108 105 107 101 32 106 97 118 97 32 100 111 32 121 111 117 32 108 105 107 101 32 97 32 106 97 118 97 // 对应 Ascii 码
01101001 00100000 01101100 01101001 01101011 01100101 00100000 01101100 01101001 01101011 01100101 00100000 01101100 01101001 01101011 01100101 00100000 01101010 01100001 01110110 01100001 00100000 01100100 01101111 00100000 01111001 01101111 01110101 00100000 01101100 01101001 01101011 01100101 00100000 01100001 00100000 01101010 01100001 01110110 01100001 // 对应的二进制
变长编码
存在多义性
i like like like java do you like a java // 共 40 个字符(包含空格)
d:1 y:1 u:1 j:2 v:2 o:2 l:4 k:4 e:4 i:5 a:5 :9 // 各个字符对应的个数
0= , 1=a, 10=i, 11=e, 100=k, 101=l, 110=o, 111=v, 1000=j, 1001=u, 1010=y, 1011=d
阐明:依照各个字符呈现的次数进行编码,准则是呈现次数越多的,则编码越小,比方 空格呈现了 9 次,编码为 0 , 其它顺次类推.
依照下面给各个字符规定的编码,则咱们在传输 "i like like like java do you like a java" 数据时,编码就是 10010110100...
哈夫曼编码(前缀编码)
i like like like java do you like a java // 共 40 个字符(包含空格)
d:1 y:1 u:1 j:2 v:2 o:2 l:4 k:4 e:4 i:5 a:5 :9 // 各个字符对应的个数
依照下面字符呈现的次数构建一颗赫夫曼树, 次数作为权值
// 依据赫夫曼树,给各个字符
// 规定编码,向左的门路为 0
// 向右的门路为 1,编码如下:
o: 1000 u: 10010 d: 100110 y: 100111 i: 101
a : 110 k: 1110 e: 1111 j: 0000 v: 0001
l: 001 : 01
依照下面的赫夫曼编码,咱们的 "i like like like java do you like a java" 字符串对应的编码为 (留神这里咱们应用的无损压缩)
1010100110111101111010011011110111101001101111011110100001100001110011001111000011001111000100100100110111101111011100100001100001110
长度为:133
阐明:
原来长度是 359 , 压缩了 (359-133) / 359 = 62.9%
此编码满足前缀编码, 即字符的编码都不能是其余字符编码的前缀。不会造成匹配的多义性
留神
这个哈夫曼树依据排序办法不同,也可能不太一样,这样对应的赫夫曼编码也不齐全一样,然而 wpl 是一样的,都是最小的
压缩思路
- 首先将字符串转化为字节数组
- 创立哈夫曼树,将值和权重写入
- 依据叶子结点的权重来计算哈夫曼编码表
- 依据哈夫曼编码表来计算哈夫曼编码
- 最初再转化为字节数组
代码
package cn.guizimo.huffmancode;
import java.util.*;
/**
* @author guizimo
* @date 2020/8/8 11:55 上午
*/
public class HuffmanCode {public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte[] contentBytes = content.getBytes();
// 哈夫曼编码
byte[] zip = huffmanZip(contentBytes);
System.out.println("哈夫曼编码:" + Arrays.toString(zip));
}
private static byte[] huffmanZip(byte[] bytes){List<Node> nodes = getNodes(bytes);
// 哈夫曼树
Node huffmanTree = createHuffmanTree(nodes);
// 哈夫曼编码表
Map<Byte, String> huffmanCodes = getCodes(huffmanTree);
// 哈夫曼编码
byte[] zip = zip(bytes, huffmanCodes);
return zip;
}
// 压缩
private static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) {StringBuilder stringBuilder = new StringBuilder();
for (byte b : bytes) {stringBuilder.append(huffmanCodes.get(b));
}
int len;
if (stringBuilder.length() % 8 == 0) {len = stringBuilder.length() / 8;
} else {len = stringBuilder.length() / 8 + 1;
}
byte[] by = new byte[len];
int index = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
String strByte;
if (i + 8 > stringBuilder.length()) {strByte = stringBuilder.substring(i);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
} else {strByte = stringBuilder.substring(i, i + 8);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
}
}
return by;
}
static Map<Byte, String> huffmanCodes = new HashMap<Byte, String>();
static StringBuilder stringBuilder = new StringBuilder();
// 重载
private static Map<Byte, String> getCodes(Node root) {if (root == null) {return null;}
getCodes(root.left, "0", stringBuilder);
getCodes(root.right, "1", stringBuilder);
return huffmanCodes;
}
// 获取哈夫曼编码
private static void getCodes(Node node, String code, StringBuilder stringBuilder) {StringBuilder builder = new StringBuilder(stringBuilder);
builder.append(code);
if (node != null) {if (node.data == null) { // 递归
getCodes(node.left, "0", builder);
getCodes(node.right, "1", builder);
} else {huffmanCodes.put(node.data, builder.toString());
}
}
}
// 前序遍历
private static void preOrder(Node root) {if (root != null) {root.preOrder();
} else {System.out.println("哈夫曼树为空");
}
}
// 生成哈夫曼树
private static Node createHuffmanTree(List<Node> nodes) {while (nodes.size() > 1) {Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
Node parent = new Node(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);
}
// 接管字节数组
private static List<Node> getNodes(byte[] bytes) {List<Node> nodes = new ArrayList<>();
Map<Byte, Integer> counts = new HashMap<>();
for (byte b : bytes) {Integer count = counts.get(b);
if (count == null) {counts.put(b, 1);
} else {counts.put(b, count + 1);
}
}
// 遍历 map
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {nodes.add(new Node(entry.getKey(), entry.getValue()));
}
return nodes;
}
}
class Node implements Comparable<Node> {
Byte data;
int weight; // 字符呈现的次数
Node left;
Node right;
// 前序遍历
public void preOrder() {System.out.println(this);
if (this.left != null) {this.left.preOrder();
}
if (this.right != null) {this.right.preOrder();
}
}
public Node(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public int compareTo(Node o) {
// 从小到大排序
return this.weight - o.weight;
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weight=" + weight +
'}';
}
}
解压思路
- 将字节数组转化为二进制
- 依据反转的哈夫曼编码表生成 ASCLL 汇合
代码
package cn.guizimo.huffmancode;
import java.util.*;
/**
* @author guizimo
* @date 2020/8/8 11:55 上午
*/
public class HuffmanCode {public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte[] contentBytes = content.getBytes();
// 哈夫曼压缩
byte[] zip = huffmanZip(contentBytes);
System.out.println("哈夫曼压缩:" + Arrays.toString(zip));
// 哈夫曼解压
byte[] unzip = huffmanUnzip(huffmanCodes, zip);
System.out.println("哈夫曼解压:" + new String(unzip));
}
// 哈夫曼解压
private static byte[] huffmanUnzip(Map<Byte, String> huffmanCodes, byte[] huffmanBytes) {StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < huffmanBytes.length; i++) {byte b = huffmanBytes[i];
boolean flag = (i == huffmanBytes.length - 1);
stringBuilder.append(byteToBitString(!flag, b));
}
// 解码, 反向编码表
HashMap<String, Byte> map = new HashMap<>();
for (Map.Entry<Byte, String> entry : huffmanCodes.entrySet()) {map.put(entry.getValue(), entry.getKey());
}
// 依据编码扫描到对应的 ASCLL 码对应的字符
List<Byte> list = new ArrayList<>();
for (int i = 0; i < stringBuilder.length(); ) {
int count = 1;
boolean flag = true;
Byte b = null;
while (flag) {String key = stringBuilder.substring(i, i + count);
b = map.get(key);
if (b == null) {count++;} else {flag = false;}
}
list.add(b);
i += count;
}
byte b[] = new byte[list.size()];
for (int i = 0; i < b.length; i++) {b[i] = list.get(i);
}
return b;
}
// 转化二进制
private static String byteToBitString(boolean flag, byte b) {
int temp = b;
if (flag) {temp |= 256;}
String str = Integer.toBinaryString(temp);
if (flag) {return str.substring(str.length() - 8);
} else {return str;}
}
// 哈夫曼编码压缩
private static byte[] huffmanZip(byte[] bytes) {List<Node> nodes = getNodes(bytes);
// 哈夫曼树
Node huffmanTree = createHuffmanTree(nodes);
// 哈夫曼编码表
Map<Byte, String> huffmanCodes = getCodes(huffmanTree);
// 哈夫曼编码
byte[] zip = zip(bytes, huffmanCodes);
return zip;
}
// 压缩
private static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) {StringBuilder stringBuilder = new StringBuilder();
for (byte b : bytes) {stringBuilder.append(huffmanCodes.get(b));
}
int len;
if (stringBuilder.length() % 8 == 0) {len = stringBuilder.length() / 8;
} else {len = stringBuilder.length() / 8 + 1;
}
byte[] by = new byte[len];
int index = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
String strByte;
if (i + 8 > stringBuilder.length()) {strByte = stringBuilder.substring(i);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
} else {strByte = stringBuilder.substring(i, i + 8);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
}
}
return by;
}
static Map<Byte, String> huffmanCodes = new HashMap<Byte, String>();
static StringBuilder stringBuilder = new StringBuilder();
// 重载
private static Map<Byte, String> getCodes(Node root) {if (root == null) {return null;}
getCodes(root.left, "0", stringBuilder);
getCodes(root.right, "1", stringBuilder);
return huffmanCodes;
}
// 获取哈夫曼编码
private static void getCodes(Node node, String code, StringBuilder stringBuilder) {StringBuilder builder = new StringBuilder(stringBuilder);
builder.append(code);
if (node != null) {if (node.data == null) { // 递归
getCodes(node.left, "0", builder);
getCodes(node.right, "1", builder);
} else {huffmanCodes.put(node.data, builder.toString());
}
}
}
// 前序遍历
private static void preOrder(Node root) {if (root != null) {root.preOrder();
} else {System.out.println("哈夫曼树为空");
}
}
// 生成哈夫曼树
private static Node createHuffmanTree(List<Node> nodes) {while (nodes.size() > 1) {Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
Node parent = new Node(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);
}
// 接管字节数组
private static List<Node> getNodes(byte[] bytes) {List<Node> nodes = new ArrayList<>();
Map<Byte, Integer> counts = new HashMap<>();
for (byte b : bytes) {Integer count = counts.get(b);
if (count == null) {counts.put(b, 1);
} else {counts.put(b, count + 1);
}
}
// 遍历 map
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {nodes.add(new Node(entry.getKey(), entry.getValue()));
}
return nodes;
}
}
class Node implements Comparable<Node> {
Byte data;
int weight; // 字符呈现的次数
Node left;
Node right;
// 前序遍历
public void preOrder() {System.out.println(this);
if (this.left != null) {this.left.preOrder();
}
if (this.right != null) {this.right.preOrder();
}
}
public Node(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public int compareTo(Node o) {
// 从小到大排序
return this.weight - o.weight;
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weight=" + weight +
'}';
}
}
测试
感激
尚硅谷
以及勤奋的本人