-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhuffman.py
More file actions
123 lines (107 loc) · 4.05 KB
/
huffman.py
File metadata and controls
123 lines (107 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from collections import deque
import pickle
class CharacterNode:
def __init__(self, weight, value = None, child_left = None, child_right = None):
self.value = value
self.weight = weight
self.child_left = child_left
self.child_right = child_right
def sort_array(array):
for i in range(len(character_count)):
min_value = character_count[i].weight
index = i
for j in range(i, len(character_count)):
if(character_count[j].weight < min_value):
min_value = character_count[j].weight
index = j
temp = character_count[i]
character_count[i] = character_count[index]
character_count[index] = temp
return character_count
def find_element(root, elem):
stack = deque()
stack.append(root)
while(stack):
element = stack.pop()
if(element.value is None):
stack.append(element.child_left)
stack.append(element.child_right)
elif(element.value == elem):
return element
print("DNE")
return "DNE"
def Create_Dict(root, code, dict):
if(root.value is None):
Create_Dict(root.child_left, code + "0", dict)
Create_Dict(root.child_right, code + "1", dict)
else:
dict[root.value] = code
return dict
def Write_To_File(rawFile, encodedFile, charDict):
with open(encodedFile, "w") as outfile:
outfile.write("")
with open(rawFile) as infile:
encodedLine = ""
for line in infile:
#print(line)
for letter in line:
code = charDict[letter]
for oneorzero in code:
encodedLine += oneorzero
if(len(encodedLine) == 8):
binary = int(encodedLine, 2)
#print(binary)
with open(encodedFile, "ab") as outfile:
s = chr(binary).encode("UTF-8")
try:
outfile.write(bytes(s))
except:
#print(binary,chr(binary))
outfile.write(s)
encodedLine = ""
while(len(encodedLine) != 8 and len(encodedLine) != 0):
encodedLine+="0"
binary = int(encodedLine, 2)
s = chr(binary).encode("UTF-8")
with open(encodedFile, "ab") as outfile:
outfile.write(bytes(s))
##Go through the file in an efficient way and create a Node for each one, and
#Get the frequency of every character. Only ASCII values supported, if they do not appear, they are a None in the array.
character_count = [None] * 127
with open("TextFile1.txt") as infile: ##memory kept only as large as the line is
for line in infile:
for letter in line:
if(character_count[ord(letter)] is None):
character_count[ord(letter)] = CharacterNode(1, letter)
else:
character_count[ord(letter)].weight += 1
#Remove all of the unused characters
for element in character_count[:]:
if(element is None):
character_count.remove(element)
#print('None')
else:
pass
#print(element.value, element.weight)
#Sorted All elements
character_count = sort_array(character_count)
for element in character_count[:]:
if(element is None):
print('None')
else:
print(element.value, element.weight)
##build the Huffman Tree
while(len(character_count) > 1):
parent = CharacterNode(character_count[0].weight + character_count[1].weight, None, character_count[0], character_count[1])
character_count.remove(character_count[0])
character_count.remove(character_count[0])
character_count.append(parent)
character_count = sort_array(character_count)
root = character_count[0]
Node = find_element(root, 'b')
print(Node.value)
##build dictionary
charDict = dict()
charDict = Create_Dict(root, "", charDict)
##Writes and encodes the file
Write_To_File("TextFile1.txt", "EncodedData.bnr", charDict)