-
-
Notifications
You must be signed in to change notification settings - Fork 46.5k
/
Copy pathhuffman.py
100 lines (75 loc) · 2.48 KB
/
huffman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import heapq
from collections import defaultdict
import sys
class HuffmanNode:
def __init__(self, char=None, freq=0):
self.char = char
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other):
return self.freq < other.freq
def calculate_frequencies(file_path):
"""
Reads the file and calculates the frequency of each character.
"""
freq = defaultdict(int)
with open(file_path, "r") as file:
for line in file:
for char in line:
freq[char] += 1
return freq
def build_huffman_tree(freq_dict):
"""
Builds the Huffman tree using a priority queue.
"""
priority_queue = [HuffmanNode(char, freq) for char, freq in freq_dict.items()]
heapq.heapify(priority_queue)
while len(priority_queue) > 1:
left = heapq.heappop(priority_queue)
right = heapq.heappop(priority_queue)
merged = HuffmanNode(freq=left.freq + right.freq)
merged.left = left
merged.right = right
heapq.heappush(priority_queue, merged)
return priority_queue[0]
def generate_codes(node, current_code="", code_map=None):
"""
Generates the Huffman codes by traversing the tree recursively.
"""
if code_map is None:
code_map = {}
if node is not None:
if node.char is not None:
code_map[node.char] = current_code
generate_codes(node.left, current_code + "0", code_map)
generate_codes(node.right, current_code + "1", code_map)
return code_map
def encode_file(file_path, code_map):
"""
Encodes the file contents using the Huffman codes.
"""
encoded_output = []
with open(file_path, "r") as file:
for line in file:
for char in line:
encoded_output.append(code_map[char])
return "".join(encoded_output)
def huffman(file_path):
"""
Main function to perform Huffman encoding on a given file.
"""
freq_dict = calculate_frequencies(file_path)
huffman_tree_root = build_huffman_tree(freq_dict)
code_map = generate_codes(huffman_tree_root)
print(f"Huffman Codes for characters in {file_path}:")
for char, code in code_map.items():
print(f"'{char}': {code}")
encoded_data = encode_file(file_path, code_map)
print("\nEncoded Data:")
print(encoded_data)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python huffman.py <file_path>")
else:
huffman(sys.argv[1])