-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathParserGen.hs
More file actions
109 lines (78 loc) · 5.12 KB
/
ParserGen.hs
File metadata and controls
109 lines (78 loc) · 5.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
module ParserGen where
-- ===========================================================================
-- Parser Generator
-- ===========================================================================
import BasicFunctions
import ParseBasis
import Data.List
-- ==========================================================================================================
-- endSkip for dealing with end of input during parsing
endSkip nt = case nt of
Opt _ -> True
Rep0 _ -> True
Alt nts mts -> all endSkip nts || all endSkip mts
Rep1 nts -> all endSkip nts
_ -> False
-- ==========================================================================================================
-- Parser Generator
-- ----------------
-- NOTE:
-- - Grammar gr is *function*
-- - nt is non-terminal; nt:rule is the rule under consideration
-- - nt0 is the father node
-- - ts is the list of subtrees under nt0 produced so far
-- - tokens here is the list of *indexed* input tokens
-- - recCheck is used for checking left-recursiveness of the grammar
-- ==========================================================================================================
parserGen :: Grammar -> [Alphabet] -> ParseState -> [(ParseTree,[(Int,Token)])]
parserGen gr [] (nt0,ts,tokens,recCheck) = [(PNode nt0 ts, tokens)]
parserGen gr (nt:rule) (nt0,ts,[],recCheck) | endSkip nt = parserGen gr rule (nt0,ts,[],recCheck)
| otherwise = [(PError (PNode nt0 ts) (nt:rule) nt "end of input" 0, [])]
parserGen gr (nt:rule) (nt0,ts, allTokens@((k,(cat,str)):remTokens), recCheck)
| nt ∈ recCheck = error ("grammar is left-recursive. Chain: " ++ show (recCheck ++ [nt]))
| otherwise = case nt of
-- ============================================================================================================
-- Backus-Naur constructions
Alt nts mts -> parserGen gr (nts++rule) (nt0,ts,allTokens,recCheck)
++ parserGen gr (mts++rule) (nt0,ts,allTokens,recCheck)
Opt nts -> parserGen gr (nts++rule) (nt0,ts,allTokens,recCheck)
++ parserGen gr rule (nt0,ts,allTokens,recCheck)
Rep0 nts -> parserGen gr (nts ++ (Rep0 nts : rule)) (nt0,ts,allTokens,recCheck)
++ parserGen gr rule (nt0,ts,allTokens,recCheck)
Rep1 nts -> parserGen gr (nts ++ (Rep0 nts : rule)) (nt0,ts,allTokens,recCheck)
-- ============================================================================================================
-- Terminal Symbols
TermSymb str' | str==str' -> parserGen gr rule (nt0, ts++[PLeaf (cat,str)], remTokens, [])
| otherwise -> [(PError (PNode nt0 ts) (nt:rule) nt str k, [])]
SyntCat cat' | cat==cat' -> parserGen gr rule (nt0, ts++[PLeaf (cat,str)], remTokens, [])
| otherwise -> [(PError (PNode nt0 ts) (nt:rule) nt str k, [])]
-- ============================================================================================================
-- Non-terminals
_ -> concat [ nextParses
| r <- gr nt
, let parses = parserGen gr r (nt,[],allTokens, recCheck++[nt])
, let correctParses = filter (not.isPError.fst) parses
, let nextParses | null correctParses = [ (finalPError (nt0,ts) $ maximum $ map fst parses , []) ]
| otherwise = concat $ map (parserGen gr rule) nextParseStates
where
nextParseStates = [ (nt0,ts++[t],remTokens,[])
| (t,remTokens) <- correctParses ]
]
-- ==================================================
-- Additional functions
isPError (PError _ _ _ _ _) = True
isPError _ = False
finalPError (nt0,ts) (PError t rule nt str k) = PError (PNode nt0 (ts++[t])) rule nt str k
-- ==================================================
-- Top-level parse function
parse :: Grammar -> Alphabet -> [Token] -> ParseTree
parse gr s tokens | null correctParses = maximum $ map fst parses
| not $ null rest = error ("tokenList not fully parsed. Still left: " ++ (show $ map snd rest))
| otherwise = final
where
parses = [ (t,rem) | r <- gr s
, (t,rem) <- parserGen gr r (s,[],tokens',[])
]
tokens' = zip [0..] tokens -- indexed tokens
correctParses = filter (not.isPError.fst) parses
(final,rest) = head correctParses