-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathspell_checker.py
More file actions
117 lines (91 loc) · 3.75 KB
/
spell_checker.py
File metadata and controls
117 lines (91 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""
Spell Checker Service for Recipe Names
Uses Python's built-in difflib for fuzzy matching - NO external dependencies!
Optimized for Vercel serverless deployment (lightweight).
"""
import os
import logging
from difflib import SequenceMatcher, get_close_matches
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class SpellChecker:
"""Lightweight spell checker using difflib fuzzy matching"""
def __init__(self):
self.recipes = []
self.recipes_lower = []
self._loaded = False
self._load_attempted = False
def _ensure_loaded(self):
"""Lazy load recipes on first use"""
if self._load_attempted:
return self._loaded
self._load_attempted = True
self._load_recipes()
return self._loaded
def _load_recipes(self):
"""Load recipe names from CSV file"""
try:
base_path = os.path.dirname(os.path.abspath(__file__))
recipes_path = os.path.join(base_path, 'models', 'recipes.csv')
if not os.path.exists(recipes_path):
logger.warning(f"Recipes file not found: {recipes_path}")
return
# Read CSV manually (no pandas needed!)
with open(recipes_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Skip header, get recipe names
self.recipes = [line.strip() for line in lines[1:] if line.strip()]
self.recipes_lower = [r.lower() for r in self.recipes]
logger.info(f"Loaded {len(self.recipes)} recipe names")
self._loaded = True
except Exception as e:
logger.error(f"Error loading recipes: {e}")
self._loaded = False
def check_spelling(self, text, threshold=0.6, top_n=3):
"""
Check if input matches a recipe; return suggestions if not.
Args:
text: User input recipe name
threshold: Minimum similarity score (0.0 to 1.0)
top_n: Maximum number of suggestions
Returns:
dict: {"is_correct": bool, "suggestions": list}
"""
if not self._ensure_loaded() or not text:
return {"is_correct": True, "suggestions": []}
text_lower = text.strip().lower()
# Exact match check
if text_lower in self.recipes_lower:
return {"is_correct": True, "suggestions": []}
try:
# Use difflib's get_close_matches for fuzzy matching
matches = get_close_matches(
text_lower,
self.recipes_lower,
n=top_n,
cutoff=threshold
)
# Map back to original case
suggestions = []
for match in matches:
idx = self.recipes_lower.index(match)
suggestions.append(self.recipes[idx])
# Check if top match is very close (>90% similar)
is_correct = len(suggestions) == 0
if suggestions:
ratio = SequenceMatcher(None, text_lower, matches[0]).ratio()
is_correct = ratio > 0.9
return {
"is_correct": bool(is_correct),
"suggestions": suggestions
}
except Exception as e:
logger.error(f"Spell check error: {e}")
return {"is_correct": True, "suggestions": []}
def get_all_recipes(self):
"""Return all recipe names"""
self._ensure_loaded()
return self.recipes if self._loaded else []
# Global singleton
spell_checker = SpellChecker()