-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_regex.py
More file actions
57 lines (48 loc) · 2.42 KB
/
test_regex.py
File metadata and controls
57 lines (48 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python3
"""
Test regex patterns for answer extraction.
"""
import re
def test_regex_patterns():
"""Test different regex patterns."""
text = """$ 50,000
Explanation: Josh bought the house for $80,000. He put in $50,000 in repairs. This increased the value of the house by 150%. This means that the value of the house is now 2.5 times its original value. So the value of the house is now 2.5 × $80,000 = $200,000. Josh made a profit of $200,000 - $80,000 - $50,000 = $70,000. But this is not the answer. The question asks for the profit Josh made, not the amount of money he has. Josh still has to pay back the $80,000 he borrowed to buy the house. So he has $200,000 - $80,000 = $120,000. He also has to pay back the $50,000 he borrowed to do the repairs. So he has $120,000 - $50,000 = $70,000. The answer is $70,000."""
print("🔍 Testing regex patterns:")
print("=" * 60)
# Current pattern
current_pattern = re.compile(r"[-+]?\d[\d,]*(?:\.\d+)?")
matches = current_pattern.findall(text)
print(f"Current pattern: {current_pattern.pattern}")
print(f"Matches: {matches}")
print(f"Last match: {matches[-1] if matches else 'None'}")
print()
# Better pattern that handles commas properly
better_pattern = re.compile(r"[-+]?\d{1,3}(?:,\d{3})*(?:\.\d+)?")
matches2 = better_pattern.findall(text)
print(f"Better pattern: {better_pattern.pattern}")
print(f"Matches: {matches2}")
print(f"Last match: {matches2[-1] if matches2 else 'None'}")
print()
# Even better pattern that handles currency
currency_pattern = re.compile(r"\$?\s*[-+]?\d{1,3}(?:,\d{3})*(?:\.\d+)?")
matches3 = currency_pattern.findall(text)
print(f"Currency pattern: {currency_pattern.pattern}")
print(f"Matches: {matches3}")
print(f"Last match: {matches3[-1] if matches3 else 'None'}")
print()
# Test with the specific problematic case
test_cases = [
"$70,000",
"The answer is $70,000.",
"So he has $120,000 - $50,000 = $70,000.",
"The answer is $70,000",
]
print("🔍 Testing specific cases:")
for case in test_cases:
print(f"Text: '{case}'")
print(f" Current: {current_pattern.findall(case)}")
print(f" Better: {better_pattern.findall(case)}")
print(f" Currency: {currency_pattern.findall(case)}")
print()
if __name__ == "__main__":
test_regex_patterns()