-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtestre.py
More file actions
56 lines (49 loc) · 1.67 KB
/
testre.py
File metadata and controls
56 lines (49 loc) · 1.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#coding=utf-8
import re
def tokenize(s):
keywords = {'IF', 'THEN', 'ENDIF', 'FOR', 'NEXT', 'GOSUB', 'RETURN'}
token_specification = [
('NUMBER', r'\d+(\.\d*)?'), # Integer or decimal number
('ASSIGN', r':='), # Assignment operator
('END', r';'), # Statement terminator
('ID', r'[A-Za-z]+'), # Identifiers
('OP', r'[+*\/\-]'), # Arithmetic operators
('NEWLINE', r'\n'), # Line endings
('SKIP', r'[ \t]'), # Skip over spaces and tabs
]
tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
get_token = re.compile(tok_regex).match
line = 1
pos = line_start = 0
mo = get_token(s)
print(mo.end())
print(mo.lastgroup)
print(mo.lastindex)
print(mo.groupdict())
while mo is not None:
typ = mo.lastgroup
if typ == 'NEWLINE':
line_start = pos
line += 1
elif typ != 'SKIP':
val = mo.group(typ)
if typ == 'ID' and val in keywords:
typ = val
yield Token(typ, val, line, mo.start()-line_start)
pos = mo.end()
mo = get_token(s, pos)
if pos != len(s):
raise RuntimeError('Unexpected character %r on line %d' %(s[pos], line))
statements = ''' abss x *
'''
def getWordSign(d):
if isinstance(d, dict):
for _key,_val in d.items():
if _val != None:
return (_val,_key)
return None
def token(words):
for token in re.finditer(r"(?P<TOKEN>[a-z]+)|(?P<SIGN>[\.\+\*=]+)", words):
yield getWordSign(token.groupdict())
for i in token(statements):
print i