Skip to content

Commit d353193

Browse files
committed
Added move to front algorithm
1 parent 6c04620 commit d353193

1 file changed

Lines changed: 118 additions & 0 deletions

File tree

data_compression/move_to_front.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
"""
2+
Move to Front (MTF) is a data compression technique that reorders
3+
symbols based on their frequency of occurrence.
4+
- type of context based methods
5+
- it lays somewhere between coding and transforming data
6+
- it is core of the Burrows-Wheeler Transform (BWT)
7+
- for simplicity we do not distinguish between upper and lower case
8+
9+
10+
Sources:
11+
- https://en.wikipedia.org/wiki/Move-to-front_transform
12+
- https://www.mbit.edu.in/wp-content/uploads/2020/05/data_compression.pdf
13+
(chapter 6.4.1)
14+
"""
15+
16+
english_alphabet = [
17+
"a",
18+
"b",
19+
"c",
20+
"d",
21+
"e",
22+
"f",
23+
"g",
24+
"h",
25+
"i",
26+
"j",
27+
"k",
28+
"l",
29+
"m",
30+
"n",
31+
"o",
32+
"p",
33+
"q",
34+
"r",
35+
"s",
36+
"t",
37+
"u",
38+
"v",
39+
"w",
40+
"x",
41+
"y",
42+
"z",
43+
]
44+
45+
46+
class MoveToFront:
47+
"""
48+
Core class doing encoding and also decoding.
49+
50+
>>> mtf = MoveToFront()
51+
>>> list(mtf.encode_text("algorithm"))
52+
[0, 11, 7, 14, 17, 11, 19, 12, 15]
53+
>>> mtf.decode_text([0, 11, 7, 14, 17, 11, 19, 12, 15])
54+
'algorithm'
55+
"""
56+
57+
def __init__(self, source_alphabet: list[str] = english_alphabet) -> None:
58+
"""
59+
If the source alphabet is not provided, it will be set to the english.
60+
The class does not distinguish between UPPER and lower case.
61+
"""
62+
self.source_alphabet = list(source_alphabet)
63+
64+
def encode_text(self, plain_text: str) -> list[int]:
65+
"""
66+
Encodes given text. The output is list of char positions in the alphabet.
67+
68+
>>> mtf = MoveToFront()
69+
>>> list(mtf.encode_text("algorithm"))
70+
[0, 11, 7, 14, 17, 11, 19, 12, 15]
71+
"""
72+
73+
if not isinstance(plain_text, str):
74+
raise TypeError("The parameter plain_text type must be str.")
75+
76+
# making a copy so we do not rotate the original alphabet
77+
alphabet = list(self.source_alphabet)
78+
encoded_text = []
79+
80+
for char in plain_text.lower():
81+
# find the position of the char in the alphabet and add it to the result
82+
char_position_in_alphabet = alphabet.index(char)
83+
encoded_text.append(char_position_in_alphabet)
84+
85+
# move our char to the front of the alphabet
86+
alphabet.pop(char_position_in_alphabet)
87+
alphabet.insert(0, char)
88+
89+
return encoded_text
90+
91+
def decode_text(self, compressed_text: list[int]) -> str:
92+
"""
93+
Decodes given text. The input is list of char positions in the alphabet.
94+
95+
>>> mtf = MoveToFront()
96+
>>> mtf.decode_text([0, 11, 7, 14, 17, 11, 19, 12, 15])
97+
'algorithm'
98+
"""
99+
100+
alphabet = list(self.source_alphabet)
101+
decoded_text = []
102+
103+
for idx in compressed_text:
104+
# find corresponding chart to given index
105+
char = alphabet[idx]
106+
decoded_text.append(char)
107+
108+
# move found char to the front of the alphabet
109+
alphabet.pop(idx)
110+
alphabet.insert(0, char)
111+
112+
return "".join(decoded_text)
113+
114+
115+
if __name__ == "__main__":
116+
mtf = MoveToFront()
117+
print(mtf.encode_text("algorithm"))
118+
print(mtf.decode_text([0, 11, 7, 14, 17, 11, 19, 12, 15]))

0 commit comments

Comments
 (0)