-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_parser.py
More file actions
70 lines (59 loc) · 2.39 KB
/
test_parser.py
File metadata and controls
70 lines (59 loc) · 2.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import json
import sys
def test_message_extraction():
"""Test message extraction from JSON without full dependencies"""
# Load a small sample of the JSON
with open("пограничный_контроль_эксопрт.json", 'r', encoding='utf-8') as f:
# Read just enough to get a few messages
content = f.read(50000) # First 50KB
# Try to parse what we can
try:
# Find the start of messages array
start_idx = content.find('"messages": [')
if start_idx == -1:
print("Could not find messages array")
return
# Extract a sample message manually for testing
print("Sample content from JSON:")
sample = content[start_idx:start_idx+2000]
print(sample)
except Exception as e:
print(f"Error: {e}")
def extract_text_from_message(message):
"""Test text extraction function"""
text_content = message.get('text', '')
if isinstance(text_content, list):
text_parts = []
for item in text_content:
if isinstance(item, dict):
text_parts.append(item.get('text', ''))
else:
text_parts.append(str(item))
return ''.join(text_parts)
else:
return str(text_content)
def test_with_sample_message():
"""Test with a sample message structure"""
sample_message = {
"id": 72,
"type": "message",
"date": "2022-09-22T16:16:34",
"text": [
{"type": "hashtag", "text": "#жд"},
" ",
{"type": "hashtag", "text": "#омск"},
" М 36 лет, IT, один, не служил (нет военника, только приписное). Билеты покупал в 2 часа ночи 22.09. Сел на поезд из Омска."
]
}
extracted_text = extract_text_from_message(sample_message)
print(f"Extracted text: {extracted_text}")
# Test filtering
keywords = ['военник', 'служил', 'повестка', 'границ', 'пересеч']
text_lower = extracted_text.lower()
relevant = any(keyword in text_lower for keyword in keywords)
print(f"Is relevant: {relevant}")
if __name__ == "__main__":
print("Testing Telegram parser components...")
test_message_extraction()
print("\n" + "="*50 + "\n")
test_with_sample_message()