-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathJsonParser.py
More file actions
326 lines (273 loc) · 13.7 KB
/
JsonParser.py
File metadata and controls
326 lines (273 loc) · 13.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" JSON Parser to both validate and process the JSON String to a Python-understandable structure
Spaces, tabs and line-breaks are allowed in input and will be ignored
Validate and Parse JSON String by importing Parser to continue with processed output
from JsonParser import JSONParser
output = JSONParser( '{ "Name": "Reinier", "Age": 30.5 }' ).parse()
print( output[ 'Name'] )
Validate JSON String via command line - as return is impossible
$ python JsonParser.py "{ \"Name\": \"Reinier\", \"Age\": 30.5 }"
Author: Reinier van den Assum (reinier@foxy-solutions.com)
Date: January 2022
"""
import sys
import re
class JSONParser:
__originalString = ""
__remainingChars = []
__requiresSeparator = False
__lastWasComma = False
global CHARS
CHARS = { 'object': { 'open': '{', 'close': '}' },
'list': { 'open': '[', 'close': ']' },
'attr': { 'open': '"', 'close': '"' },
'sep': { 'attr': ',', 'decimal': '.' } }
# parameterized constructor
def __init__( self, jsonString ):
# Save original string without line-breaks and tabs to allow better display in case of invalid JSON
# Strip spaces in front and at the end, to avoid the main-while loops (object and list) to still have remainingChars, but only spaces
# Convert to List as is more efficient than string manipulations
self.__originalString = re.sub( r'\s{2,}', ' ', jsonString ).strip()
self.__remainingChars = list( self.__originalString )
"""
Initial parse method, which returns the JSON Object in Python types (dict or list)
"""
def parse( self ):
structure = None # define but not initiate as can be list or object
# JSON is only valid when starts with object or list, thus { or [
try:
firstChar = self.__getNextChar()
if firstChar == CHARS[ 'object' ][ 'open' ]:
structure = self.__parseObject()
elif firstChar == CHARS[ 'list' ][ 'open' ]:
structure = self.__parseList()
else:
raise Exception( "JSON should start with " + CHARS[ 'object' ][ 'open' ] + " or " + CHARS[ 'list' ][ 'open' ] + ", not with " + firstChar )
# Capture any Exception found and dump with current index
except Exception as ex:
currIndex = len( self.__originalString ) - len( self.__remainingChars )
print( "Invalid char at " + str( currIndex ) + ": "+ ex.args[ 0 ] )
print( self.__originalString )
print( '^'.rjust( currIndex, ' ' ) )
return structure
print( "JSON successfully parsed: ", structure )
return structure
"""
Method to parse an Object (thus when { was found);
Note, method can be called Recursively to allow processing nested Objects
"""
def __parseObject( self ):
# Construct object which is represented by JSON Object
obj = dict()
while len( self.__remainingChars ) > 0:
try: # Get next Char, unless the ended only with spaces, then skip to throw correct Exception
nextChar = self.__getNextChar()
except:
continue
# First check if Object was closed, then return the constructed Object, only if there was no rogue comma before closure
if nextChar == CHARS[ 'object' ][ 'close' ]:
if self.__lastWasComma:
raise Exception( "Can't close an Object, while separator-comma implied additional attribute" )
self.__requiresSeparator = False # Reset for next recursive methods
# Object was correctly closed so return the parsed object
return obj
# When Object continues (not closed), first check whether a separator was provided and whether needed
if nextChar == CHARS[ 'sep' ][ 'attr' ]:
self.__lastWasComma = True
if self.__requiresSeparator:
self.__requiresSeparator = False
continue
else:
raise Exception( "No comma was expected" )
# Knowing Object is not closed AND no comma was found, check whether an attribute was completed before
if self.__requiresSeparator:
raise Exception( "Comma-separator is missing between attributes" )
self.__lastWasComma = False
# Attribute parsing
if nextChar == CHARS[ 'attr' ][ 'open' ]:
# Parse key and value and assign to object structure
[ key, value ] = self.__parseAttributeAssignment()
obj[ key ] = value
# Invalid character
else:
raise Exception( "Unexpected " + nextChar )
# When no next characters, while object not yet closed
raise Exception( "JSON ended, but expected Object to be ended" )
return False
"""
Method to parse a List (thus when [ was found);
Note, method can be called Recursively to allow processing nested Objects
"""
def __parseList( self ):
# Construct List which is represented by JSON Object
lst = []
while len( self.__remainingChars ) > 0:
try: # Get next Char, unless the ended only with spaces, then skip to throw correct Exception
nextChar = self.__getNextChar()
except:
continue
# First check if List was closed, then return the constructed List, only if there was no rogue comma before closure
if nextChar == CHARS[ 'list' ][ 'close' ]:
if self.__lastWasComma:
raise Exception( "Can't close a List, while separator-comma implied additional attribute" )
self.__requiresSeparator = False # Reset for next recursive methods
return lst
# When List continues (not closed), first check whether a separator was provided and whether needed
if nextChar == CHARS[ 'sep' ][ 'attr' ]:
self.__lastWasComma = True
if self.__requiresSeparator:
self.__requiresSeparator = False
continue
else:
raise Exception( "No comma was expected" )
# Knowing list is not closed AND no comma was found, check whether an attribute was closed before
if self.__requiresSeparator:
raise Exception( "Comma-separator is missing between attributes" )
self.__lastWasComma = False
# Value parsing
lst.append( self.__parseValue( nextChar ) )
raise Exception( "JSON ended, but expected the Attribute name to be closed with double-quote" )
return lst
"""
Method to parse an attribute assignment within an Object: get key, require semi-column and get value
"""
def __parseAttributeAssignment( self ):
# Before the opening " was popped before, check [text]": [value]
attrName = self.__parseString()
self.__processSemiColumn()
attrValue = self.__parseValueOfAssignment()
return [ attrName, attrValue ]
"""
Method to enforce a semi-column in between an Attribute assignment in an object
"""
def __processSemiColumn( self ):
if len( self.__remainingChars ) == 0:
raise Exception( "JSON ended, but expected ':'" )
nextChar = self.__getNextChar()
if nextChar != ":":
raise Exception( "Expected ':' but found " + nextChar )
"""
Method to parse a value used for an assignment
"""
def __parseValueOfAssignment( self ):
while len( self.__remainingChars ) > 0:
return self.__parseValue( self.__getNextChar() )
raise Exception( "JSON ended, but expected value assignment completion" )
"""
Method to process a nextChar and determine which recursive methods to approach
Both Values in an object-attribute-assignment and Values in Lists have the same allowed data types: String, Number, List and Object
Therefore this method is centralised to allow shared use
"""
def __parseValue( self, nextChar ):
if nextChar == CHARS[ 'attr' ][ 'open' ]:
val = self.__parseString()
elif nextChar.isnumeric():
val = self.__parseNumber( nextChar )
elif nextChar == CHARS[ 'object' ][ 'open' ]:
val = self.__parseObject()
elif nextChar == CHARS[ 'list' ][ 'open' ]:
val = self.__parseList()
else:
raise Exception( "A value is allowed to be a string or numeric value, or a list or object, but found " + nextChar )
# When Object, List, String or Number was found, require Separator before a next element is parsed
self.__requiresSeparator = True
return val
"""
Method to parse a String value, either used in attribute key or value in an object; or a value in a List
For efficiency, and since all characters between quotes are allowed, simply check the closing-quote index and continue
(instead of looping over the remainingChars and popping each)
"""
def __parseString( self ):
# Find closing Attribute
try:
closingIndex = self.__remainingChars.index( CHARS[ 'attr' ][ 'close' ] )
except ValueError:
raise Exception( "No closing-"+ CHARS[ 'attr' ][ 'close' ] + " was found for this text" )
# Get the String Value (between "") & remove from remainingChars
stringValue = ''.join( self.__remainingChars[ :closingIndex ] )
self.__remainingChars = self.__remainingChars[ ( closingIndex + 1 ): ]
return stringValue
"""
Method to parse a numeric value
Since a Number can be both an Integer (only isnumeric characters) or a Float/Decimal (including decimal),
loop over remainingChars till first character which is not numeric, nor '.'; then combine the strings and convert to Numeric value
Note, didn't use while .pop() to prevent the next character to already be removed from __remainingChars
Also constructed numList due to significant better performance of List-manipulations vs String-manipulations
"""
def __parseNumber( self, firstNum ):
numList = [ firstNum ]
isInteger = True
for char in self.__remainingChars:
if char.isnumeric():
numList.append( char )
elif char == CHARS[ 'sep' ][ 'decimal' ]:
numList.append( char )
isInteger = False
else:
break
try:
numberString = ''.join( numList )
numberValue = int( numberString ) if isInteger else float( numberString )
except:
raise Exception( "Invalid number " + ''.join( numList ) )
# Remove current number from remaining Chars (only after to avoid currentIndex mismatch)
self.__remainingChars = self.__remainingChars[ len( numList )-1: ]
return numberValue
"""
Method to return the first next character which isn't a space, tab or line-break
Note, no trailing spaces can be left, since input was trimmed to avoid this to happen
"""
def __getNextChar( self ):
nextChar = self.__remainingChars.pop( 0 )
while nextChar.isspace():
nextChar = self.__remainingChars.pop( 0 )
return nextChar
"""
The magic!
"""
# Logic to allow direct validation of Command-line argument jsonString
if len( sys.argv ) == 2: # [ fileReference jsonString ]
JSONParser( sys.argv[ 1 ] ).parse()
quit()
# Test scripts:
print( ">> Starting test scripts" )
# Asserting JSON starting character validation
assert dict() == JSONParser( '{}' ).parse(), "Expected valid JSON of empty object"
assert [] == JSONParser( '[]' ).parse(), "Expected valid JSON of empty list"
assert None == JSONParser( '"Naam": "Reinier"' ).parse(), "Should have Parsing exception, since JSON should be list or object"
# Successful complex parsing
inputString = '''{
"users": [
{ "Name": "Person1 Surname",
"LastOrders": [ 350.27, 13, 14.5 ],
"Details": {
"LastLogin": "2021-01-07T11:00:00.000Z",
"Gender": "Unknown"
}
},
{ "Name": "Person2",
"LastOrders": [],
"Details": {
"LastLogin": "Never",
"Gender": "Neutral"
}
}
]
}'''
output = JSONParser( inputString ).parse()
assert None != output, "Expected a successfully parsed JSON object"
usersList = output[ "users" ]
assert 2 == len( usersList ), "Expected 2 users to be provided in the output users list"
assert "Person1 Surname" == usersList[ 0 ][ "Name" ], "Expected name of person 1 to be in correct order and still containing space"
assert "Person2" == usersList[ 1 ][ "Name" ], "Expected name of person 2 to be in correct order"
assert 3 == len( usersList[ 0 ][ "LastOrders" ] ), "Expected 3 last Orders for person 1"
assert 0 == len( usersList[ 1 ][ "LastOrders" ] ), "Expected 0 last Orders for person 1"
assert "Neutral" == usersList[ 1 ][ "Details" ][ "Gender" ], "Expected nested Objects to be parsed correctly"
# Validation errors
assert None == JSONParser( '{ "Name": "Reinier" ').parse(), "Missing last closing object-character"
assert None == JSONParser( '{ "Name": "Reinier }').parse(), "Missing closing quote"
assert None == JSONParser( '{ "Age": 30.5.5 }').parse(), "Invalid number"
assert None == JSONParser( '{ "Age": 30, }').parse(), "Rogue comma"
assert None == JSONParser( '{ "Age": 30 "Name": "Reinier" }').parse(), "Missing comma between attributes"
print( ">> All test cases were completed successfully!" )