-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjson_parser.py
More file actions
144 lines (121 loc) · 4.27 KB
/
json_parser.py
File metadata and controls
144 lines (121 loc) · 4.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import argparse
import sys
import re
# Token types
TOKEN_TYPES = {
'LBRACE': r'\{',
'RBRACE': r'\}',
'STRING': r'"([^"\\]*(\\.[^"\\]*)*)"',
'NUMBER': r'-?\d+(\.\d+)?([eE][+-]?\d+)?',
'BOOLEAN': r'true|false',
'NULL': r'null',
'COLON': r':',
'COMMA': r',',
'EOF': r'$',
'LBRACKET': r'\[',
'RBRACKET': r'\]',
'TRUE': r'true', # Recognize lowercase 'true'
'FALSE': r'false', # Recognize lowercase 'false'
}
class Token:
def __init__(self, type, value):
self.type = type
self.value = value
def __repr__(self):
return f"Token({self.type}, {self.value})"
def lexer(input):
tokens = []
# Pattern to match any whitespace
whitespace_pattern = re.compile(r'\s+')
while input:
whitespace_match = whitespace_pattern.match(input)
if whitespace_match:
input = input[whitespace_match.end():]
match = None
for type, pattern in TOKEN_TYPES.items():
regex = re.compile(pattern)
match = regex.match(input)
if match:
value = match.group(0)
if type == 'STRING':
value = value[1:-1]
tokens.append(Token(type, value))
input = input[match.end():]
break
if not match:
if input: # Only raise an error if there's non-whitespace input left
raise ValueError(f"Unexpected character: {input[0]}")
break
tokens.append(Token('EOF', None))
return tokens
def parse(tokens):
def parse_value(tokens):
next_token_type = tokens[0].type
if next_token_type == 'LBRACE':
return parse_object(tokens)
elif next_token_type == 'LBRACKET':
return parse_array(tokens)
else:
token = tokens.pop(0)
if token.type == 'STRING':
return token.value
elif token.type == 'NUMBER':
try:
return int(token.value)
except ValueError:
return float(token.value)
elif token.type == 'BOOLEAN':
return token.value == 'true'
elif token.type == 'NULL':
return None
else:
raise ValueError(f"Unsupported value type: {token.type}")
def parse_array(tokens):
array = []
tokens.pop(0) # Consume the opening '['
while tokens[0].type != 'RBRACKET':
element = parse_value(tokens)
array.append(element)
if tokens[0].type == 'COMMA':
tokens.pop(0) # Consume the comma, if present
tokens.pop(0) # Consume the closing ']'
return array
def parse_object(tokens):
obj = {}
tokens.pop(0) # Consume the opening '{'
while tokens[0].type != 'RBRACE':
key_token = tokens.pop(0)
if key_token.type != 'STRING':
raise ValueError("Expected a string key")
if tokens.pop(0).type != 'COLON':
raise ValueError("Expected ':' after key")
value = parse_value(tokens)
obj[key_token.value] = value
if tokens[0].type == 'COMMA':
tokens.pop(0) # Consume the comma, continue parsing the next key-value pair
elif tokens[0].type != 'RBRACE':
raise ValueError("Expected ',' or '}' after a key-value pair")
tokens.pop(0) # Consume the closing '}'
return obj
if not tokens:
raise ValueError("Empty token list")
ast = parse_object(tokens)
if tokens[0].type != 'EOF':
raise ValueError("Expected end of file after JSON object")
return ast
def main(file_path):
try:
with open(file_path, 'r') as file:
json_input = file.read()
tokens = lexer(json_input)
parsed = parse(tokens)
print(parsed)
sys.exit(0)
except ValueError as e:
print(f"Error parsing JSON: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Simple JSON Parser')
parser.add_argument('file', help='Path to the JSON file to parse')
args = parser.parse_args()
main(args.file)