-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathregex_parser.py
More file actions
305 lines (260 loc) · 10.4 KB
/
regex_parser.py
File metadata and controls
305 lines (260 loc) · 10.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
#!/usr/bin/python3
# RegEx Parser
# John Shields
# References
# Functions
# https://www.w3schools.com/python/python_functions.asp
# Shunting Yard Algorithm
# https://www.tldp.org/LDP/Bash-Beginners-Guide/html/sect_04_01.html
# https://web.microsoftstream.com/video/cfc9f4a2-d34f-4cde-afba-063797493a90
# http://www.martinbroadhurst.com/shunting-yard-algorithm-in-python.html
# Thompson's Construction
# https://web.microsoftstream.com/video/5e2a482a-b1c9-48a3-b183-19eb8362abc9
# https://xysun.github.io/posts/regex-parsing-thompsons-algorithm.html
# Matching
# https://web.microsoftstream.com/video/6b4ba6a4-01b7-4bde-8f85-b4b96abc902a
# Prompt
# https://stackoverflow.com/questions/70797/user-input-and-command-line-arguments
# https://stackabuse.com/getting-user-input-in-python/
# https://stackoverflow.com/questions/3754620/a-basic-question-about-while-true
import cmd
# Shunting Yard Algorithm
def shunt(infix):
# specials dictionary
specials = {'*': 50, '.': 40, '|': 30, '+': 40, '?': 35}
pofix = ""
stack = ""
for c in infix:
if c == '(':
stack = stack + c
elif c == ')':
while stack[-1] != '(':
# putting statements together
pofix, stack = pofix + stack[-1], stack[:-1]
stack = stack[:-1]
elif c in specials:
# looks for c in specials dictionary, if not found, give value 0
# looks for top of stack in specials dictionary, if not found, give value 0
# n/ compares 1 against the other
while stack and specials.get(c, 0) <= specials.get(stack[-1], 0):
pofix, stack = pofix + stack[-1], stack[:-1]
stack = stack + c
else:
pofix = pofix + c
while stack:
# putting statements together
pofix, stack = pofix + stack[-1], stack[:-1]
return pofix
# Thompson's Construction
# represents a state with two arrows, labelled by label
# use 'None' for a label representing 'e' arrows
class state:
label = None
edge1 = None
edge2 = None
# an NFA is represented by its initial and accepts states
class nfa:
initial = None
accept = None
# mandatory to have 'self' args in constructor
def __init__(self, initial, accept):
self.initial = initial
self.accept = accept
# Algorithm
# function has a stack of NFAs
# pofix allows to loop one character at a time
# until the regular expression is complete
def compile_tom(pofix):
nfa_stack = []
for c in pofix:
if c == '.':
# stack works as last in first out
# method to go to stack
# pop 2 NFAs off the stack.
nfa2 = nfa_stack.pop()
nfa1 = nfa_stack.pop()
# merges them together
# connect 1st NFAs accept state to the 2nd's initial
nfa1.accept.edge1 = nfa2.initial
# push NFA to stack
# one way to do it ¬ nfa_stack.append(nfa(initial, accept))
# second way 'easier way'
new_nfa = nfa(nfa1.initial, nfa2.accept)
# push new_nfa
nfa_stack.append(new_nfa)
elif c == '|':
# pop 2 NFAs off the stack.
nfa2 = nfa_stack.pop()
nfa1 = nfa_stack.pop()
# create a new initial state, connect it to
# initial states of the two NFAs popped from the stack
initial = state()
initial.edge1 = nfa1.initial
initial.edge2 = nfa2.initial
# create new accept state, connecting the accept states
# of the 2 NFAs popped from the stack, to the new state
accept = state()
nfa1.accept.edge1 = accept
nfa2.accept.edge1 = accept
# push NFA to stack
# one way to do it ¬ nfa_stack.append(nfa(initial, accept))
# second way 'easier way'
new_nfa = nfa(initial, accept)
# push new_nfa
nfa_stack.append(new_nfa)
elif c == '*':
# pop a single NFA from the stack
nfa1 = nfa_stack.pop()
# create a new initial and accept states
initial = state()
accept = state()
# join the new initial state to nfa1's initial state and the new accept state.
initial.edge1 = nfa1.initial
initial.edge2 = accept
# join the old accept state to the new accept state and nfa1's initial state.
nfa1.accept.edge1 = nfa.initial
nfa1.accept.edge2 = accept
# push new NFA to the stack
# one way to do it ¬ nfa_stack.append(nfa(initial, accept))
# second way 'easier way'
new_nfa = nfa(initial, accept)
# push new_nfa
nfa_stack.append(new_nfa)
elif c == '+':
# pop nfa from stack
nfa1 = nfa_stack.pop()
# create a new initial and accept states
initial = state()
accept = state()
# join the new initial stse to nfa1's initial state and the new accept state.
initial.edge1 = nfa1.initial
# join the old accept state to the new accept state and nfa1's initial state.
nfa1.accept.edge1 = nfa1.initial
nfa1.accept.edge2 = accept
# push new NFA to the stack
new_nfa = nfa(initial, accept)
# pushes new_nfa
nfa_stack.append(new_nfa)
elif c == '?':
# pop nfa from stack
nfa1 = nfa_stack.pop()
# create a new initial and accept states
initial = state()
accept = state()
# join the new initial stse to nfa1's initial state and the new accept state.
initial.edge1 = nfa1.initial
initial.edge2 = accept
# join the old accept state to the new accept state and nfa1's initial state.
nfa1.accept.edge1 = nfa1.initial
nfa1.accept.edge2 = accept
# push new NFA to the stack
new_nfa = nfa(initial, accept)
# pushes new_nfa
nfa_stack.append(new_nfa)
else:
# create new initial and accept states
# creates new nfa
accept = state()
# creates new nfa
initial = state()
# string character
# joins the initial states to the accept state
# using an arrow labelled c.
initial.label = c
initial.edge1 = accept
# going to create a new instance of the NFA class.
# set its initial state to the
# initial state
# push new NFA to the stack # ¬ returns an instance of the nfa class ¬
# one way to do it ¬ nfa_stack.append(nfa(initial, accept))
# second way 'easier way'
new_nfa = nfa(initial, accept)
# push new_nfa
nfa_stack.append(new_nfa)
# nfa_stack should only have a single nfa on it at the point.
return nfa_stack.pop()
#
# Return the set of states that can be reached from state following e arrows
def follow_es(following_state):
# create a new set, with state as its only member
states = set()
states.add(following_state)
# check if state has arrows labeled e from it
if following_state.label is None:
# Check if edge1 is a state
if following_state.edge1 is not None:
# if there's an edge1, follow it
states |= follow_es(following_state.edge1)
# check if edge2 is a state
if following_state.edge2 is not None:
# if there's an edge2, follow it
states |= follow_es(following_state.edge2)
# return the set of states
return states
# matching
def match(infix, string_match):
# Shunt and compile the regular expression
# calls the shunt function
postfix = shunt(infix)
# calls the compile_tom function
non_fin_auto = compile_tom(postfix)
# current set of states and the next set of the states
current = set()
upcoming = set()
# add the initial state to the current set
current |= follow_es(non_fin_auto.initial)
# Loop through each character in the string
for s in string_match:
# Loop through the current set of states
for c in current:
# check of that state is labelled s
if c.label == s:
# Add edge1 state to the upcoming set
upcoming |= follow_es(c.edge1)
# set current to upcoming, and clear out next
current = upcoming
upcoming = set()
# check if the accept state is in the set of current states
return non_fin_auto.accept in current
# testing code
def executioner():
print('\nTest Shunting Yard Algorithm \n')
# test Shunting Yard Algorithm
print(shunt("(a.b)|(c*.d)"))
print("\nTest Thompson's Construction \n")
# test Thompson's Construction
print(compile_tom("ab.cd.|"))
print(compile_tom("aa.*"))
print(compile_tom("(0|(1(01*(00)*0)*1)*)*"))
print(compile_tom("(0|(1(01*(00)*0)*1)*)*"))
# test Matching
# creates list of infix regex
infixes = ["a.b.c", "a.b.c*, a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"]
# strings to match against the regex infixes
strings = ["abc", "abbc", "abcc", "abad", "abbbc"]
print('\n Test Matching \n')
# double for loop - loops through the regex and strings
for exp in infixes:
for res in strings:
# function prints out True/False if the infix regex matches the string
print(match(exp, res), exp, res)
# input prompt to match Regular Expressions
print('\nInput a regular expression to be tested \n')
# prints out True/False if the infix regex matches the string
while True:
try:
# writes an infix regex to match with string
infixes = (input("Input infix regular expression: "))
# writes a string to match with infix regex
string = (input("Input string to match: "))
print('\nInfix and String match \n')
# function prints out True/False if the infix regex matches the string
print(match(infixes, string), infixes, string, '\n')
a_file = open("bash_tests/output.txt", "w")
text = match(infixes, string), infixes, string
print(text, file=a_file)
a_file.close()
except (KeyboardInterrupt, EOFError, SystemExit):
break
if __name__ == "__main__":
executioner()