ExpressionParser/ExpressionParser.py at master · apopelo/ExpressionParser · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# Simple expression parser in pure Python
# Authors: Andrey Popelo <andrey@popelo.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
import re


class ExpressionParser(object):
    """Very simple but configurable parser for parsing different expressions.

    Syntax definition should be defined in subclass.

    Call parse() method with input string as a parameter and it will return a
    tree which contains parsed data."""

    # syntax definition for the parser
    # (!) order is important
    syntax = [
                # This is a token definition, it can contain the following
                # fields:
                #
                #  {
                #    'start': <regex>,  # regexp wich matches a start of a token
                #
                #    'end':   <regex>,  # (optional) regexp which matches end of
                #                       # a token
                #
                #    'type':  <string>, # type (or name) of a token
                #
                #    'create': <fnc>    # (optional) function which will be
                #  }                    # called after text which mathes this
                #                       # type of token was found.
                #                       # Function should accept one parameter
                #                       # which is a MatchObject object and
                #                       # return a result which will be stored
                #                       # in a resulting tree
                #
                #
                # Example syntax definition is listed below. Parser with this
                # syntax definition can parse simple expressions, which look
                # like:
                #   "prop1:val1"
                #   "prop1:val1 AND prop2:val2 OR prop3:val3"
                #   "(prop1:val1 AND NOT (prop2:val2 OR prop3:val3)) AND prop4:val4"
                #   ...
                #
                #  # group, eg: "(...)"
                #  { 'start': '\\s*\(', # cut opening bracket and leading white space
                #    'end': '\)\\s*',   # cut closing bracket and following white space
                #    'type': 'group'
                #  },
                #
                #  # propval, eg: "prop1:val1"
                #  # with optional NOT modifier
                #  { 'start': '\\s*(NOT)?\\s*([a-zA-Z0-9]+):\\s*([^ ()]+)\\s*',
                #    'type': 'propval',
                #    'create': lambda m: {"modifier":m.group(1), "prop":m.group(2), "val":m.group(3)}
                #  },
                #
                #  # operator, eg: "AND", "OR"
                #  { 'start': '\\s*(AND|OR)\\s*',
                #    'type': 'operator',
                #    'create': lambda m: m.group(1)
                #  },
                #
                #  # text - all other text
                #  { 'start': '[ ()]?[^ ()]*', # match all characters until special character found
                #    'type': 'text'
                #  }
             ]

    def parse(self, text):
        self.text = text.strip()

        self.tokens_stack = []
        self.parent_stack = []

        while self.text:
            # check if one of end regexps matches
            if self._search_end_patterns():
                continue

            # check if one of start regexps matches
            if self._search_start_patterns():
                continue

        return self.tokens_stack

    def _search_start_patterns(self):
        if self.parent_stack:
            parent_item = self.parent_stack[-1]
        else:
            parent_item = None

        for syntax_def in self.syntax:
            match = self._match_and_cut(syntax_def['start'])

            if match:
                type = syntax_def['type']

                if "end" in syntax_def:
                    item = {'type':type, 'end':syntax_def['end'], 'value':[]}
                    self.parent_stack.append(item)
                else:
                    if 'create' in syntax_def:
                        value = syntax_def['create'](match)
                    else:
                        value = match.group(0)

                    item = {'type': type, 'value': value}

                # if it has a parent, add it to parent
                if parent_item:
                    stack = parent_item['value']
                else:
                    stack = self.tokens_stack

                stack.append(item)

                return True

        return False

    def _search_end_patterns(self):
        # check if one of end regexps matches
        for item in self.parent_stack[::-1]:
            if self._match_and_cut(item['end']):
                self.parent_stack.pop()
                return True

        return False

    def _match_and_cut(self, pattern):
        match = re.match(pattern, self.text)

        if match:
            # cut matched text
            self.text = self.text[len(match.group(0)):]

        return match