-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathASF_RegexEngine.cls
More file actions
2681 lines (2445 loc) · 107 KB
/
Copy pathASF_RegexEngine.cls
File metadata and controls
2681 lines (2445 loc) · 107 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
VERSION 1.0 CLASS
BEGIN
MultiUse = -1 'True
Persistable = 0 'NotPersistable
DataBindingBehavior = 0 'vbNone
DataSourceBehavior = 0 'vbNone
MTSTransactionMode = 0 'NotAnMTSObject
END
Attribute VB_Name = "Regex"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = True
Attribute VB_PredeclaredId = False
Attribute VB_Exposed = True
' change name RegeExe
' https://ecp-solutions.github.io/ from mr W. Garcia
' ASF_RegexEngine.cls - Full-featured regex engine for ASF (single class module)
' Features implemented:
' - Precompile pattern into an AST (no reparsing on Exec)
' - Literals, . (any char)
' - Character classes: [abc], [a-z], negation [^...]
' - Escapes: \d \w \s and common escapes (\n, \r, \t)
' - Quantifiers: *, +, ?, {m,n} with {m,} and exact {n}
' - Greedy and lazy quantifiers (use trailing ? after quantifier to mark lazy)
' - Grouping: ( ... ) with precise capture start/end tracking
' - Non-capturing groups: (?:pattern)
' - Alternation: | via `alt` AST node
' - Anchors: ^ and $
' - IgnoreCase (i) flag
' - DotAll (s) controlling whether '.' matches newlines
' - Multiline (m) flag
' - Public API: Init, Test, Exec, Replace, Split
'
' Design notes:
' - AST nodes are represented as VBA Collections with string keys; helper
' accessors (NodeAdd/NodeGet/NodeExists) are provided.
' - Matching is backtracking-based; defensive limits and optimizations are
' implemented to reduce pathological backtracking where practical.
'
' Notes & Limitations:
' + Lookbehind is fixed-width only.
' + Atomic groups protection is capture-slot based. When an atomic group matches it locks
' the capture slots that changed during the atomic group's internal match. Those locked
' slots will not be restored by RestoreCaps. This is correct semantics: atomic group
' prevents backtracking into the group.
' + Possessive quantifiers are supported. In the quantifier attempt loop your possessive
' flag prevents trying smaller k values.
' + Performance: atomic groups and lookarounds use snapshots. Possessive quantifiers will
' often be faster (no backtracking), but atomic groups may make some matches irreversible.
Option Explicit
Option Base 0
' ---------------- Public config -----------------------------------------
Private Const DEFAULT_MAX_STEPS As Long = 200000
' --- debugging toggle: set True to print detailed trace during matching
'Private Const DEBUG_TRACE As Boolean = False
' ---------------- Public state ------------------------------------------
Private pPattern As String
Private pIgnoreCase As Boolean
Private pMaxMatchSteps As Long
Private pAST As Collection
Private pGroupCount As Long
Private pMultiline As Boolean
Private pDotAll As Boolean
' Current subject & captures
Private CurSubject As String
Private capsStart() As Long
Private capsEnd() As Long
Private atomicLocks() As Boolean
Private MatchSteps As Long
Private pNamedCaptures As Collection
Public Property Get LastNamedCaptures() As Collection
Set LastNamedCaptures = pNamedCaptures
End Property
' ---------------- Node helpers ------------------------------------------
Private Function newNode(ByVal typ As String) As Collection
Dim c As New Collection
c.Add typ, "type"
Set newNode = c
End Function
Private Sub NodeAdd(ByRef node As Collection, ByVal Key As String, ByVal Value As Variant)
If NodeExists(node, Key) Then
On Error Resume Next
node.Remove Key
On Error GoTo 0
End If
node.Add Value, Key
End Sub
Private Sub vAssignVar(ByRef dest As Variant, ByVal Src As Variant)
If IsObject(Src) Then Set dest = Src Else dest = Src
End Sub
Private Function NodeGet(ByVal node As Collection, ByVal Key As String) As Variant
On Error GoTo notfound
vAssignVar NodeGet, node(Key)
Exit Function
notfound:
Err.Clear
NodeGet = Empty
End Function
Private Function NodeExists(ByVal node As Collection, ByVal Key As String) As Boolean
On Error Resume Next
Dim tmp As Variant: vAssignVar tmp, node(Key)
If Err.Number = 0 Then NodeExists = True Else NodeExists = False
Err.Clear
On Error GoTo 0
End Function
' ---------------- Public API -------------------------------------------
Public Sub Init(ByVal Pattern As String, Optional ByVal ignoreCase As Boolean = False, Optional ByVal MaxSteps As Long = DEFAULT_MAX_STEPS, Optional ByVal multiline As Boolean = False, Optional ByVal dotAll As Boolean = False)
pPattern = Pattern
pIgnoreCase = ignoreCase
pMultiline = multiline
pDotAll = dotAll
If MaxSteps <= 0 Then pMaxMatchSteps = DEFAULT_MAX_STEPS Else pMaxMatchSteps = MaxSteps
' Build AST
Set pAST = CompilePattern(Pattern)
If Not pAST Is Nothing Then
If Not NodeExists(pAST, "groupNodes") Then BuildGroupNodesFromAST pAST
End If
' Run annotate/validation pass (will raise vbObjectError + 10 on invalid lookbehinds)
AnnotateLookbehinds pAST
' compute group count after validation
pGroupCount = GetGroupCount(pAST)
End Sub
Public Property Get Pattern() As String: Pattern = pPattern: End Property
Public Property Let Pattern(ByVal v As String): Init v, pIgnoreCase, pMaxMatchSteps, pMultiline, pDotAll: End Property
Public Property Get ignoreCase() As Boolean: ignoreCase = pIgnoreCase: End Property
Public Property Let ignoreCase(ByVal v As Boolean): pIgnoreCase = v: End Property
Public Property Get MaxMatchSteps() As Long: MaxMatchSteps = pMaxMatchSteps: End Property
Public Property Let MaxMatchSteps(ByVal v As Long): pMaxMatchSteps = v: End Property
Public Property Get multiline() As Boolean: multiline = pMultiline: End Property
Public Property Let multiline(ByVal v As Boolean): pMultiline = v: End Property
Public Property Get dotAll() As Boolean: dotAll = pDotAll: End Property
Public Property Let dotAll(ByVal v As Boolean): pDotAll = v: End Property
Public Function test(ByVal subj As String) As Boolean
Dim res As Collection
Set res = Exec(subj)
test = Not (res Is Nothing)
End Function
' ---------------- Parser & AST precomputation ----------------------------
Private Function CompilePattern(ByVal pat As String) As Collection
' Reject unsupported inline-comment syntax early
DetectUnsupportedCommentSyntax pat
Dim ast As New Collection
NodeAdd ast, "raw", pat
Dim anchoredStart As Boolean: anchoredStart = False
Dim anchoredEnd As Boolean: anchoredEnd = False
If Len(pat) > 0 And SafeMid(pat, 1, 1) = "^" Then anchoredStart = True: pat = SafeMid(pat, 2)
If Len(pat) > 0 And Right$(pat, 1) = "$" Then anchoredEnd = True: pat = Left$(pat, Len(pat) - 1)
NodeAdd ast, "anchoredStart", anchoredStart
NodeAdd ast, "anchoredEnd", anchoredEnd
Dim Pos As Long: Pos = 1
Dim groupCounter As Long: groupCounter = 0
Dim Body As Collection: Set Body = ParseAlternation(pat, Pos, Len(pat), groupCounter)
NodeAdd ast, "body", Body
' enforce lookbehind fixed-width constraint (raise at Init/Compile time)
EnforceFixedWidthLookbehinds ast
' Precompute minLen
Dim mlen As Long: mlen = ComputeMinLen(Body)
NodeAdd ast, "minLen", mlen
' Validate/lookbehind annotate
AnnotateLookbehinds ast
Set CompilePattern = ast
End Function
' ParseAlternation: sequences separated by '|'
Private Function ParseAlternation(ByVal pat As String, ByRef p As Long, ByVal pEnd As Long, ByRef groupCounter As Long) As Collection
Dim firstSeq As Collection
Set firstSeq = ParseSequence(pat, p, pEnd, groupCounter)
If Not (p <= pEnd And SafeMid(pat, p, 1) = "|") Then
Set ParseAlternation = firstSeq
Exit Function
End If
Dim altNode As Collection: Set altNode = newNode("alt")
Dim alts As New Collection
alts.Add firstSeq
Do While p <= pEnd And SafeMid(pat, p, 1) = "|"
p = p + 1 ' consume '|'
Dim nextSeq As Collection
Set nextSeq = ParseSequence(pat, p, pEnd, groupCounter)
alts.Add nextSeq
Loop
NodeAdd altNode, "child", alts
Set ParseAlternation = altNode
End Function
Private Function ParseSequence(pat As String, ByRef p As Long, pEnd As Long, _
ByRef groupCounter As Long, Optional stopAtPipeOrParen As Boolean = False) As Collection
Dim seq As Collection: Set seq = newNode("seq")
Dim children As New Collection
Dim thisIdx As Long
Dim ch As String
Dim node As Collection
Do While p <= pEnd
ch = SafeMid(pat, p, 1)
If ch = ")" Or ch = "|" Then Exit Do
Set node = Nothing
If ch = "\" Then
p = p + 1
If p > pEnd Then Exit Do
Set node = ParseEscapeNode(SafeMid(pat, p, 1))
p = p + 1
ElseIf ch = "." Then
Set node = newNode("dot")
p = p + 1
ElseIf ch = "[" Then
Dim clsEnd As Long: clsEnd = FindClassEnd(pat, p, pEnd)
If clsEnd = -1 Then
Set node = newNode("lit"): NodeAdd node, "ch", "[": p = p + 1
Else
Set node = ParseClass(pat, p + 1, clsEnd - 1)
p = clsEnd + 1
End If
ElseIf ch = "(" Then
' locate matching ')' deterministically
Dim foundClose As Long
foundClose = FindGroupEnd(pat, p, pEnd)
If foundClose = -1 Then
' malformed -> treat '(' as literal
Set node = newNode("lit"): NodeAdd node, "ch", "("
p = p + 1
Else
' consume '('
p = p + 1
If p <= pEnd And SafeMid(pat, p, 1) = "?" Then
p = p + 1
If p > pEnd Then
Set node = newNode("lit"): NodeAdd node, "ch", "("
Else
Dim nextCh As String: nextCh = SafeMid(pat, p, 1)
Select Case nextCh
Case "="
p = p + 1
Dim innerLA As Collection
Set innerLA = ParseSequence(pat, p, foundClose - 1, groupCounter)
If p <= pEnd And SafeMid(pat, p, 1) = ")" Then p = p + 1
Dim laNode As Collection: Set laNode = newNode("lookahead_pos")
NodeAdd laNode, "child", innerLA
Set node = laNode
Case "!"
p = p + 1
Dim innerNL As Collection
Set innerNL = ParseSequence(pat, p, foundClose - 1, groupCounter)
If p <= pEnd And SafeMid(pat, p, 1) = ")" Then p = p + 1
Dim nlaNode As Collection: Set nlaNode = newNode("lookahead_neg")
NodeAdd nlaNode, "child", innerNL
Set node = nlaNode
Case "<"
p = p + 1
If p <= pEnd Then
Dim sbch As String: sbch = SafeMid(pat, p, 1)
If sbch = "=" Then
p = p + 1
Dim innerLB As Collection
Set innerLB = ParseSequence(pat, p, foundClose - 1, groupCounter)
If p <= pEnd And SafeMid(pat, p, 1) = ")" Then p = p + 1
Dim lbNode As Collection: Set lbNode = newNode("lookbehind_pos")
NodeAdd lbNode, "child", innerLB
Set node = lbNode
ElseIf sbch = "!" Then
p = p + 1
Dim innerLBN As Collection
Set innerLBN = ParseSequence(pat, p, foundClose - 1, groupCounter)
If p <= pEnd And SafeMid(pat, p, 1) = ")" Then p = p + 1
Dim lbnNode As Collection: Set lbnNode = newNode("lookbehind_neg")
NodeAdd lbnNode, "child", innerLBN
Set node = lbnNode
Else
' named capture form (?<name>...)
Dim nameBuf As String: nameBuf = vbNullString
Do While p <= pEnd
Dim Nc As String: Nc = SafeMid(pat, p, 1)
If Nc = ">" Then Exit Do
If (Nc >= "a" And Nc <= "z") Or (Nc >= "A" And Nc <= "Z") Or (Nc >= "0" And Nc <= "9") Or Nc = "_" Or Nc = "-" Then
nameBuf = nameBuf & Nc
p = p + 1
Else
Exit Do
End If
Loop
If p <= pEnd And SafeMid(pat, p, 1) = ">" And Len(nameBuf) > 0 Then
p = p + 1 ' consume '>'
groupCounter = groupCounter + 1
thisIdx = groupCounter
Dim innerNamed As Collection
Set innerNamed = ParseSequence(pat, p, foundClose - 1, groupCounter)
If p <= pEnd And SafeMid(pat, p, 1) = ")" Then p = p + 1
Dim gnodeNamed As Collection: Set gnodeNamed = newNode("group")
NodeAdd gnodeNamed, "idx", thisIdx
NodeAdd gnodeNamed, "child", innerNamed
NodeAdd gnodeNamed, "name", nameBuf
Set node = gnodeNamed
' If DEBUG_TRACE Then Debug.Print "TRACE PARSE named-group -> name=" & nameBuf & " idx=" & thisIdx
Else
Set node = newNode("lit"): NodeAdd node, "ch", "("
End If
End If
Else
Set node = newNode("lit"): NodeAdd node, "ch", "("
End If
Case ">"
p = p + 1
Dim innerAtomic As Collection
Set innerAtomic = ParseSequence(pat, p, foundClose - 1, groupCounter)
If p <= pEnd And SafeMid(pat, p, 1) = ")" Then p = p + 1
Dim atomicNode As Collection: Set atomicNode = newNode("atomic")
NodeAdd atomicNode, "child", innerAtomic
Set node = atomicNode
Case ":"
p = p + 1
Dim innerNC As Collection
Set innerNC = ParseSequence(pat, p, foundClose - 1, groupCounter)
If p <= pEnd And SafeMid(pat, p, 1) = ")" Then p = p + 1
Dim ncNode As Collection: Set ncNode = newNode("group")
NodeAdd ncNode, "child", innerNC
NodeAdd ncNode, "noncap_syntax", True
Set node = ncNode
Case "R"
p = p + 1
If p <= pEnd And SafeMid(pat, p, 1) = ")" Then p = p + 1
Dim rnode As Collection: Set rnode = newNode("recurse")
NodeAdd rnode, "target", 0
Set node = rnode
Case "("
' conditional (?(cond)yes|no)
p = p + 1
Dim condStart As Long: condStart = p
Dim condEnd As Long: condEnd = -1
Do While p <= pEnd
If SafeMid(pat, p, 1) = ")" Then
condEnd = p - 1
Exit Do
End If
p = p + 1
Loop
If condEnd = -1 Then
Set node = newNode("lit"): NodeAdd node, "ch", "("
Else
Dim condText As String: condText = Mid$(pat, condStart, condEnd - condStart + 1)
p = p + 1 ' consume ')'
Dim yesNode As Collection: Set yesNode = ParseSequence(pat, p, foundClose - 1, groupCounter, True)
Dim noNode As Collection: Set noNode = Nothing
If p <= pEnd And SafeMid(pat, p, 1) = "|" Then
p = p + 1
Set noNode = ParseSequence(pat, p, foundClose - 1, groupCounter, True)
End If
If foundClose <= pEnd Then p = foundClose + 1
Dim condNode As Collection: Set condNode = newNode("cond")
NodeAdd condNode, "cond", condText
NodeAdd condNode, "yes", yesNode
If Not (noNode Is Nothing) Then NodeAdd condNode, "no", noNode
Set node = condNode
End If
Case Else
' recursion by number or inline flags
If SafeMid(pat, p, 1) >= "0" And SafeMid(pat, p, 1) <= "9" Then
Dim ds As String: ds = vbNullString
Do While p <= pEnd
Dim dc As String: dc = SafeMid(pat, p, 1)
If dc >= "0" And dc <= "9" Then
ds = ds & dc
p = p + 1
Else
Exit Do
End If
Loop
If p <= pEnd And SafeMid(pat, p, 1) = ")" Then p = p + 1
Dim rn As Collection: Set rn = newNode("recurse")
NodeAdd rn, "target", CLng(ds)
Set node = rn
Else
Dim flagsStr As String: flagsStr = vbNullString
Do While p <= pEnd
Dim fch As String: fch = SafeMid(pat, p, 1)
If (fch >= "a" And fch <= "z") Or (fch >= "A" And fch <= "Z") Or fch = "-" Then
flagsStr = flagsStr & fch
p = p + 1
Else
Exit Do
End If
Loop
If p <= pEnd And SafeMid(pat, p, 1) = ":" Then
p = p + 1
Dim innerFlagged As Collection
Set innerFlagged = ParseSequence(pat, p, foundClose - 1, groupCounter)
If p <= pEnd And SafeMid(pat, p, 1) = ")" Then p = p + 1
Dim gnodeF As Collection: Set gnodeF = newNode("group")
NodeAdd gnodeF, "noncap_syntax", True
Dim normFlags As String: normFlags = vbNullString
Dim kch As Long
For kch = 1 To Len(flagsStr)
Dim fc As String: fc = LCase$(Mid$(flagsStr, kch, 1))
If fc = "i" Or fc = "m" Or fc = "s" Or fc = "-" Then normFlags = normFlags & fc
Next kch
If Len(normFlags) > 0 Then
NodeAdd gnodeF, "flags_on", normFlags
PropagateFlags innerFlagged, normFlags
End If
NodeAdd gnodeF, "child", innerFlagged
Set node = gnodeF
Else
Err.Raise vbObjectError + 101, "RegexEngine", "Inline flags without ':' are not supported at compile time (use (?i:...) for group-scoped flags)."
End If
End If
End Select
End If
Else
' normal capturing group
groupCounter = groupCounter + 1
thisIdx = groupCounter
Dim inner As Collection
Set inner = ParseSequence(pat, p, foundClose - 1, groupCounter)
If p <= pEnd And SafeMid(pat, p, 1) = ")" Then p = p + 1
Dim gnode As Collection: Set gnode = newNode("group")
NodeAdd gnode, "idx", thisIdx
NodeAdd gnode, "child", inner
Set node = gnode
End If
' defensive: if inner parsing didn't advance to foundClose, jump past it
If foundClose <> -1 Then
If p <= foundClose And SafeMid(pat, p, 1) <> ")" Then p = foundClose + 1
End If
End If
Else
Set node = newNode("lit"): NodeAdd node, "ch", ch
p = p + 1
End If
' quantifier (same semantics as original)
If Not node Is Nothing And p <= pEnd Then
Dim qch As String: qch = SafeMid(pat, p, 1)
If qch = "*" Or qch = "+" Or qch = "?" Or qch = "{" Then
Dim qnode As Collection: Set qnode = newNode("quant")
NodeAdd qnode, "child", node
NodeAdd qnode, "possessive", False
If qch = "*" Then
NodeAdd qnode, "min", 0: NodeAdd qnode, "max", -1
p = p + 1
ElseIf qch = "+" Then
NodeAdd qnode, "min", 1: NodeAdd qnode, "max", -1
p = p + 1
ElseIf qch = "?" Then
NodeAdd qnode, "min", 0: NodeAdd qnode, "max", 1
p = p + 1
ElseIf qch = "{" Then
Dim closePos As Long: closePos = InStr(p, pat, "}")
If closePos > 0 Then
Dim content As String: content = SafeMid(pat, p + 1, closePos - p - 1)
Dim parts() As String: parts = VBA.Split(content, ",")
Dim mi As Long, ma As Long
If UBound(parts) = 0 Then
If Len(Trim$(parts(0))) = 0 Then mi = 0: ma = -1 Else mi = CLng(Trim$(parts(0))): ma = mi
Else
mi = CLng(Trim$(parts(0)))
If Len(Trim$(parts(1))) = 0 Then ma = -1 Else ma = CLng(Trim$(parts(1)))
End If
NodeAdd qnode, "min", mi: NodeAdd qnode, "max", ma
p = closePos + 1
Else
NodeAdd qnode, "min", 0: NodeAdd qnode, "max", 1
p = p + 1
End If
End If
If p <= pEnd Then
Dim nextQ As String: nextQ = SafeMid(pat, p, 1)
If nextQ = "?" Then
NodeAdd qnode, "lazy", True
p = p + 1
ElseIf nextQ = "+" Then
NodeAdd qnode, "possessive", True
NodeAdd qnode, "lazy", False
p = p + 1
Else
NodeAdd qnode, "lazy", False
End If
Else
NodeAdd qnode, "lazy", False
End If
Set node = qnode
End If
End If
If Not node Is Nothing Then children.Add node
Loop
NodeAdd seq, "child", children
If Not stopAtPipeOrParen Then
If p <= pEnd And SafeMid(pat, p, 1) = "|" Then
Dim altNode As Collection: Set altNode = newNode("alt")
Dim altChildren As New Collection
altChildren.Add seq
Do While p <= pEnd And SafeMid(pat, p, 1) = "|"
p = p + 1
Dim nextSeq As Collection
Set nextSeq = ParseSequence(pat, p, pEnd, groupCounter)
altChildren.Add nextSeq
Loop
NodeAdd altNode, "child", altChildren
Set ParseSequence = altNode
Exit Function
End If
End If
Set ParseSequence = seq
End Function
Private Sub PropagateFlags(ByVal node As Variant, ByRef flags As String)
If node Is Nothing Then Exit Sub
On Error GoTo err_
If Typename(node) = "Collection" Then
Dim t As String
Dim elm As Variant
For Each elm In node
If Typename(elm) = "Collection" Then
If NodeExists(elm, "type") Then t = NodeGet(elm, "type")
Select Case t
Case "lit", "dot"
Dim tmpNode As Collection
Set tmpNode = elm
NodeAdd tmpNode, "flags_on", flags
End Select
If NodeExists(elm, "child") Then
PropagateFlags NodeGet(elm, "child"), flags
Else
PropagateFlags elm, flags
End If
End If
Next elm
End If
exit_:
Exit Sub
err_:
' Be defensive in production: clear error and continue (or rethrow in debug).
Debug.Print "PropagateFlags error: "; Err.Number; Err.Description
Err.Clear
Resume exit_
End Sub
' ---------- Helper: Find matching ] for a class starting at startPos (where pat[startPos] = "[") ----------
Private Function FindClassEnd(ByVal pat As String, ByVal startPos As Long, ByVal pEnd As Long) As Long
Dim Pos As Long: Pos = startPos
Dim escaped As Boolean: escaped = False
Dim ch As String
' We expect pat(startPos) = "["
Pos = Pos + 1
Do While Pos <= pEnd
ch = SafeMid(pat, Pos, 1)
If escaped Then
escaped = False
Pos = Pos + 1
GoTo ContinueDo
End If
If ch = "\" Then
escaped = True
Pos = Pos + 1
GoTo ContinueDo
End If
If ch = "]" Then
FindClassEnd = Pos
Exit Function
End If
Pos = Pos + 1
ContinueDo:
Loop
FindClassEnd = -1
End Function
' ---------- Helper: Find matching ) for a group starting at startPos (where pat[startPos] = "(") ----------
Private Function FindGroupEnd(ByVal pat As String, ByVal startPos As Long, ByVal pEnd As Long) As Long
Dim Pos As Long: Pos = startPos
Dim depth As Long: depth = 0
Dim inClass As Boolean: inClass = False
Dim escaped As Boolean: escaped = False
Dim ch As String
' start scanning after the '('
Pos = Pos + 1
Do While Pos <= pEnd
ch = SafeMid(pat, Pos, 1)
If escaped Then
escaped = False
Pos = Pos + 1
GoTo ContinueDo
End If
If ch = "\" Then
escaped = True
Pos = Pos + 1
GoTo ContinueDo
End If
If inClass Then
If ch = "]" Then inClass = False
Pos = Pos + 1
GoTo ContinueDo
End If
If ch = "[" Then
inClass = True
Pos = Pos + 1
GoTo ContinueDo
End If
If ch = "(" Then
depth = depth + 1
Pos = Pos + 1
GoTo ContinueDo
End If
If ch = ")" Then
If depth = 0 Then
FindGroupEnd = Pos
Exit Function
Else
depth = depth - 1
Pos = Pos + 1
GoTo ContinueDo
End If
End If
Pos = Pos + 1
ContinueDo:
Loop
FindGroupEnd = -1
End Function
Private Function ParseEscapeNode(ByVal ch As String) As Collection
Dim node As Collection
Select Case ch
Case "d": Set node = ParseClassFromSimple("[0-9]")
Case "D"
' Non-digit escape: reuse the simple class and mark it negated
Set node = ParseClassFromSimple("[0-9]")
NodeAdd node, "neg", True
Case "w": Set node = ParseClassFromSimple("[A-Za-z0-9_]")
Case "W"
' Non-word
Set node = ParseClassFromSimple("[A-Za-z0-9_]")
NodeAdd node, "neg", True
Case "s": Set node = ParseClassFromSimple("[ " & vbTab & vbLf & vbCr & "]")
Case "S"
' Non-space
Set node = ParseClassFromSimple("[ " & vbTab & vbLf & vbCr & "]")
NodeAdd node, "neg", True
Case "n": Set node = newNode("lit"): NodeAdd node, "ch", vbLf
Case "r": Set node = newNode("lit"): NodeAdd node, "ch", vbCr
Case "t": Set node = newNode("lit"): NodeAdd node, "ch", vbTab
Case Else: Set node = newNode("lit"): NodeAdd node, "ch", ch
End Select
Set ParseEscapeNode = node
End Function
' Parse class and build a 256-byte boolean map for quick matching (ASCII fast-path)
Private Function ParseClass(ByVal pat As String, ByVal pStart As Long, ByVal pEnd As Long) As Collection
Dim node As Collection: Set node = newNode("class")
Dim Chars As New Collection
Dim Neg As Boolean: Neg = False
Dim i As Long: i = pStart
If i <= pEnd And SafeMid(pat, i, 1) = "^" Then Neg = True: i = i + 1
Do While i <= pEnd
Dim ch As String: ch = SafeMid(pat, i, 1)
If ch = "\" Then
i = i + 1
If i <= pEnd Then
Dim esc As String: esc = SafeMid(pat, i, 1)
Select Case esc
Case "d"
' digits
Chars.Add Array("range", "0", "9")
Case "w"
' word chars: A-Z, a-z, 0-9, '_'
Chars.Add Array("range", "A", "Z")
Chars.Add Array("range", "a", "z")
Chars.Add Array("range", "0", "9")
Chars.Add "_"
Case "s"
' whitespace
Chars.Add " "
Chars.Add vbTab
Chars.Add vbLf
Chars.Add vbCr
Case Else
' fallback (n, r, t, or literal)
Chars.Add ParseEscapeChar(esc)
End Select
End If
i = i + 1
ElseIf i + 2 <= pEnd And SafeMid(pat, i + 1, 1) = "-" Then
Chars.Add Array("range", SafeMid(pat, i, 1), SafeMid(pat, i + 2, 1))
i = i + 3
Else
Chars.Add ch
i = i + 1
End If
Loop
NodeAdd node, "chars", Chars
NodeAdd node, "neg", Neg
' Precompute ASCII map
Dim mapArr() As Boolean: ReDim mapArr(0 To 255) As Boolean
Dim it As Variant
For Each it In Chars
If IsArray(it) Then
If it(0) = "range" Then
Dim aCode As Long: aCode = AscW(it(1))
Dim bCode As Long: bCode = AscW(it(2))
Dim cc As Long
For cc = aCode To bCode
If cc >= 0 And cc <= 255 Then mapArr(cc) = True
Next cc
End If
Else
Dim Code As Long: Code = AscW(it)
If Code >= 0 And Code <= 255 Then mapArr(Code) = True
End If
Next it
NodeAdd node, "map", mapArr
Set ParseClass = node
End Function
Private Function ParseClassFromSimple(ByVal spec As String) As Collection
Dim node As Collection: Set node = newNode("class")
Dim Chars As New Collection
Dim i As Long: i = 2
Do While i <= Len(spec) - 1
Dim ch As String: ch = SafeMid(spec, i, 1)
If i + 2 <= Len(spec) - 1 And SafeMid(spec, i + 1, 1) = "-" Then
Chars.Add Array("range", ch, SafeMid(spec, i + 2, 1))
i = i + 3
Else
Chars.Add ch
i = i + 1
End If
Loop
NodeAdd node, "chars", Chars
NodeAdd node, "neg", False
' build simple map
Dim mapArr() As Boolean: ReDim mapArr(0 To 255) As Boolean
Dim it As Variant
For Each it In Chars
If IsArray(it) Then
Dim aCode As Long: aCode = AscW(it(1))
Dim bCode As Long: bCode = AscW(it(2))
Dim cc As Long
For cc = aCode To bCode
If cc >= 0 And cc <= 255 Then mapArr(cc) = True
Next cc
Else
Dim ccode As Long: ccode = AscW(it)
If ccode >= 0 And ccode <= 255 Then mapArr(ccode) = True
End If
Next it
NodeAdd node, "map", mapArr
Set ParseClassFromSimple = node
End Function
Private Function ParseEscapeChar(ByVal ch As String) As String
Select Case ch
Case "n": ParseEscapeChar = vbLf
Case "r": ParseEscapeChar = vbCr
Case "t": ParseEscapeChar = vbTab
Case Else: ParseEscapeChar = ch
End Select
End Function
' ------------------ Matching engine (returns explicit end positions) --------
Private Function GetGroupCount(ByVal ast As Collection) As Long
Dim maxIdx As Long: maxIdx = 0
TraverseForGroups NodeGet(ast, "body"), maxIdx
GetGroupCount = maxIdx
End Function
' Deterministic traversal for capturing maximum group index in an AST node tree.
Private Sub TraverseForGroups(ByVal node As Variant, ByRef maxIdx As Long)
If node Is Nothing Then Exit Sub
On Error GoTo err_
If Typename(node) = "Collection" Then
Dim t As String
Dim elm As Variant
Dim tmpIdx As Long
If NodeExists(node, "type") Then t = NodeGet(node, "type")
If t = "group" Then
If NodeExists(node, "idx") Then
tmpIdx = NodeGet(node, "idx")
If tmpIdx > maxIdx Then maxIdx = tmpIdx
End If
End If
For Each elm In node
If Typename(elm) = "Collection" Then
If NodeExists(elm, "type") Then t = NodeGet(elm, "type")
If t = "group" Then
If NodeExists(elm, "idx") Then
tmpIdx = NodeGet(elm, "idx")
If tmpIdx > maxIdx Then maxIdx = tmpIdx
End If
End If
If NodeExists(elm, "child") Then
TraverseForGroups NodeGet(elm, "child"), maxIdx
Else
TraverseForGroups elm, maxIdx
End If
End If
Next elm
End If
exit_:
Exit Sub
err_:
' Be defensive in production: clear error and continue (or rethrow in debug).
' If DEBUG_TRACE Then Debug.Print "TraverseForGroups error: "; Err.Number; Err.Description
Err.Clear
Resume exit_
End Sub
Private Function MatchNode(ByVal node As Collection, ByVal subj As String, ByVal Pos As Long, Optional ByRef outEnd As Variant) As Boolean
' If DEBUG_TRACE Then Debug.Print "TRACE MatchNode ENTER type=" & CStr(NodeGet(node, "type")) & " pos=" & Pos & " caps0=[" & capsStart(0) & "," & capsEnd(0) & "]"
MatchSteps = MatchSteps + 1
If MatchSteps > pMaxMatchSteps Then Err.Raise vbObjectError + 2, "RegexEngine", "Match aborted: too many steps"
Dim t As String: t = CStr(NodeGet(node, "type"))
Dim res As Boolean: res = False
Dim localEnd As Variant: localEnd = Empty
Dim inlineFlags As String
Select Case t
Case "seq"
res = MatchSequence(NodeGet(node, "child"), subj, Pos, localEnd)
Case "lit"
If Pos > Len(subj) Then
res = False
Else
Dim ch As String: ch = NodeGet(node, "ch")
Dim sc As String: sc = SafeMid(subj, Pos, 1)
Dim iCase As Boolean
Dim tOffCase As Boolean
If pIgnoreCase Then
'Inline flags
If NodeExists(node, "flags_on") Then
inlineFlags = NodeGet(node, "flags_on")
tOffCase = InStrB(1, inlineFlags, "-i")
End If
If Not tOffCase Then
If LCase$(sc) = LCase$(ch) Then localEnd = Pos: res = True
Else
' Case sentitive
If sc = ch Then localEnd = Pos: res = True
End If
Else
If NodeExists(node, "flags_on") Then
inlineFlags = NodeGet(node, "flags_on")
iCase = InStrB(1, inlineFlags, "i") And (InStrB(1, inlineFlags, "-i") = 0)
If iCase Then
' Case insensitive
If LCase$(sc) = LCase$(ch) Then localEnd = Pos: res = True
Else
If sc = ch Then localEnd = Pos: res = True
End If
Else
If sc = ch Then localEnd = Pos: res = True
End If
End If
End If
Case "dot"
If Pos > Len(subj) Then
res = False
Else
Dim dAll As Boolean
If NodeExists(node, "flags_on") Then
inlineFlags = NodeGet(node, "flags_on")
tOffCase = (InStrB(1, inlineFlags, "-s") > 0)
dAll = InStrB(1, inlineFlags, "s") And (InStrB(1, inlineFlags, "-s") = 0)
End If
If pDotAll And tOffCase Then
dAll = False
Else
If Not dAll Then dAll = pDotAll
End If
Dim dch As String: dch = SafeMid(subj, Pos, 1)
If (dch = vbLf Or dch = vbCr) And Not dAll Then
res = False
Else
localEnd = Pos: res = True
End If
End If
Case "class"
If Pos > Len(subj) Then
res = False
Else
If MatchCharClassNode(node, SafeMid(subj, Pos, 1)) Then localEnd = Pos: res = True
End If
Case "group"
' Handle both capturing and non-capturing groups.
Dim hasIdx As Boolean: hasIdx = NodeExists(node, "idx")
Dim gidx As Long
If hasIdx Then gidx = CLng(NodeGet(node, "idx")) Else gidx = 0
' Support group-scoped inline flags (flags_on) during the child match.
Dim flagsApplied As Boolean: flagsApplied = False
Dim savedIgnore As Boolean, savedMultiline As Boolean, savedDotAll As Boolean
If NodeExists(node, "flags_on") Then
flagsApplied = True
savedIgnore = pIgnoreCase
savedMultiline = IIf(NodeExists(Me, "pMultiline"), pMultiline, False)
savedDotAll = IIf(NodeExists(Me, "pDotAll"), pDotAll, False)
Dim fstr As String: fstr = CStr(NodeGet(node, "flags_on"))
If InStr(fstr, "i") > 0 Then pIgnoreCase = True
If InStr(fstr, "m") > 0 Then pMultiline = True
If InStr(fstr, "s") > 0 Then pDotAll = True
End If
' --- FULL snapshot before touching ANY capture slots ---
Dim snapGroup() As Long: snapGroup = CopyCaps()
' set group's start (only after we took full snapshot)
If gidx >= 1 Then
capsStart(gidx) = Pos
End If
Dim childEnd As Variant
If MatchNode(NodeGet(node, "child"), subj, Pos, childEnd) Then
If gidx >= 1 Then capsEnd(gidx) = CLng(childEnd)
localEnd = childEnd
res = True
Else
' restore entire capture state (safer than just restoring this group's two slots)
RestoreCaps snapGroup
res = False
End If
' restore flags if applied (always do this after child path)
If flagsApplied Then
pIgnoreCase = savedIgnore
pMultiline = savedMultiline
pDotAll = savedDotAll
End If
Case "alt"
Dim altChild As Variant
For Each altChild In NodeGet(node, "child")
Dim snapAlt() As Long: snapAlt = CopyCaps()
Dim altEnd As Variant
If MatchNode(altChild, subj, Pos, altEnd) Then
localEnd = altEnd
res = True
Exit For
Else
RestoreCaps snapAlt
End If
Next altChild
Case "quant"
' This path is rarely used now since we handle quantifiers inline in MatchSequence.
Dim mn As Long: mn = NodeGet(node, "min")
Dim mX As Long: mX = NodeGet(node, "max")
If mX = -1 Then mX = Len(subj) - Pos + 1
' Attempt greedy expansion and accept largest
Dim occPos() As Long: ReDim occPos(0 To 0)
Dim occCount As Long: occCount = 0
Dim curPos As Long: curPos = Pos
Dim nodeEnd As Variant