Skip to content

Commit d1200cb

Browse files
sushanth
1 parent 0bcdfbd commit d1200cb

File tree

1 file changed

+77
-70
lines changed

1 file changed

+77
-70
lines changed

compression/huffman.py

+77-70
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,99 @@
1-
from __future__ import annotations
2-
1+
import heapq
2+
from collections import defaultdict
33
import sys
44

5+
class HuffmanNode:
6+
def __init__(self, char=None, freq=0):
7+
self.char = char
8+
self.freq = freq
9+
self.left = None
10+
self.right = None
511

6-
class Letter:
7-
def __init__(self, letter: str, freq: int):
8-
self.letter: str = letter
9-
self.freq: int = freq
10-
self.bitstring: dict[str, str] = {}
11-
12-
def __repr__(self) -> str:
13-
return f"{self.letter}:{self.freq}"
12+
def __lt__(self, other):
13+
return self.freq < other.freq
1414

1515

16-
class TreeNode:
17-
def __init__(self, freq: int, left: Letter | TreeNode, right: Letter | TreeNode):
18-
self.freq: int = freq
19-
self.left: Letter | TreeNode = left
20-
self.right: Letter | TreeNode = right
16+
def calculate_frequencies(file_path):
17+
"""
18+
Reads the file and calculates the frequency of each character.
19+
"""
20+
freq = defaultdict(int)
21+
with open(file_path, 'r') as file:
22+
for line in file:
23+
for char in line:
24+
freq[char] += 1
25+
return freq
2126

2227

23-
def parse_file(file_path: str) -> list[Letter]:
28+
def build_huffman_tree(freq_dict):
2429
"""
25-
Read the file and build a dict of all letters and their
26-
frequencies, then convert the dict into a list of Letters.
30+
Builds the Huffman tree using a priority queue.
2731
"""
28-
chars: dict[str, int] = {}
29-
with open(file_path) as f:
30-
while True:
31-
c = f.read(1)
32-
if not c:
33-
break
34-
chars[c] = chars[c] + 1 if c in chars else 1
35-
return sorted((Letter(c, f) for c, f in chars.items()), key=lambda x: x.freq)
32+
priority_queue = [HuffmanNode(char, freq) for char, freq in freq_dict.items()]
33+
heapq.heapify(priority_queue)
34+
35+
while len(priority_queue) > 1:
36+
left = heapq.heappop(priority_queue)
37+
right = heapq.heappop(priority_queue)
38+
39+
merged = HuffmanNode(freq=left.freq + right.freq)
40+
merged.left = left
41+
merged.right = right
42+
43+
heapq.heappush(priority_queue, merged)
44+
45+
return priority_queue[0]
3646

3747

38-
def build_tree(letters: list[Letter]) -> Letter | TreeNode:
48+
def generate_codes(node, current_code="", code_map=None):
3949
"""
40-
Run through the list of Letters and build the min heap
41-
for the Huffman Tree.
50+
Generates the Huffman codes by traversing the tree recursively.
4251
"""
43-
response: list[Letter | TreeNode] = list(letters)
44-
while len(response) > 1:
45-
left = response.pop(0)
46-
right = response.pop(0)
47-
total_freq = left.freq + right.freq
48-
node = TreeNode(total_freq, left, right)
49-
response.append(node)
50-
response.sort(key=lambda x: x.freq)
51-
return response[0]
52-
53-
54-
def traverse_tree(root: Letter | TreeNode, bitstring: str) -> list[Letter]:
52+
if code_map is None:
53+
code_map = {}
54+
55+
if node is not None:
56+
if node.char is not None:
57+
code_map[node.char] = current_code
58+
59+
generate_codes(node.left, current_code + "0", code_map)
60+
generate_codes(node.right, current_code + "1", code_map)
61+
62+
return code_map
63+
64+
65+
def encode_file(file_path, code_map):
5566
"""
56-
Recursively traverse the Huffman Tree to set each
57-
Letter's bitstring dictionary, and return the list of Letters
67+
Encodes the file contents using the Huffman codes.
5868
"""
59-
if isinstance(root, Letter):
60-
root.bitstring[root.letter] = bitstring
61-
return [root]
62-
treenode: TreeNode = root
63-
letters = []
64-
letters += traverse_tree(treenode.left, bitstring + "0")
65-
letters += traverse_tree(treenode.right, bitstring + "1")
66-
return letters
69+
encoded_output = []
70+
with open(file_path, 'r') as file:
71+
for line in file:
72+
for char in line:
73+
encoded_output.append(code_map[char])
74+
75+
return ''.join(encoded_output)
6776

6877

69-
def huffman(file_path: str) -> None:
78+
def huffman(file_path):
7079
"""
71-
Parse the file, build the tree, then run through the file
72-
again, using the letters dictionary to find and print out the
73-
bitstring for each letter.
80+
Main function to perform Huffman encoding on a given file.
7481
"""
75-
letters_list = parse_file(file_path)
76-
root = build_tree(letters_list)
77-
letters = {
78-
k: v for letter in traverse_tree(root, "") for k, v in letter.bitstring.items()
79-
}
80-
print(f"Huffman Coding of {file_path}: ")
81-
with open(file_path) as f:
82-
while True:
83-
c = f.read(1)
84-
if not c:
85-
break
86-
print(letters[c], end=" ")
87-
print()
82+
freq_dict = calculate_frequencies(file_path)
83+
huffman_tree_root = build_huffman_tree(freq_dict)
84+
code_map = generate_codes(huffman_tree_root)
85+
86+
print(f"Huffman Codes for characters in {file_path}:")
87+
for char, code in code_map.items():
88+
print(f"'{char}': {code}")
89+
90+
encoded_data = encode_file(file_path, code_map)
91+
print("\nEncoded Data:")
92+
print(encoded_data)
8893

8994

9095
if __name__ == "__main__":
91-
# pass the file path to the huffman function
92-
huffman(sys.argv[1])
96+
if len(sys.argv) < 2:
97+
print("Usage: python huffman.py <file_path>")
98+
else:
99+
huffman(sys.argv[1])

0 commit comments

Comments
 (0)