-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalidators.py
More file actions
381 lines (303 loc) · 11.8 KB
/
validators.py
File metadata and controls
381 lines (303 loc) · 11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
"""Core validation logic for Indian Identity and Financial documents."""
import re
from typing import Iterable, Optional
from .utils import validate_verhoeff, validate_luhn
PATTERNS = {
"pan": re.compile(r"^[A-Z]{5}[0-9]{4}[A-Z]{1}$"),
"tan": re.compile(r"^[A-Z]{4}[0-9]{5}[A-Z]$"),
"dl": re.compile(r"^[A-Z]{2}[0-9]{2}[0-9]{4}[0-9]{7}$"),
"uan": re.compile(r"^[1-9]\d{11}$"),
"abha": re.compile(r"^[1-9]\d{13}$"),
"fssai": re.compile(r"^[12]\d{13}$"),
"pran": re.compile(r"^[1-9]\d{11}$"),
"mobile": re.compile(r"^[6-9]\d{9}$"),
"ifsc": re.compile(r"^[A-Z]{4}0[A-Z0-9]{6}$"),
"credit_card": re.compile(r"^\d{13,19}$"),
"vehicle": re.compile(r"^[A-Z]{2}[0-9]{1,2}[A-Z]{0,3}[0-9]{4}$"),
"upi": re.compile(r"^[\w\.\-]+@[\w\.\-]+$"),
"voterid": re.compile(r"^[A-Z]{3}[0-9]{7}$"),
"passport": re.compile(r"^[A-Z][0-9]{7}$"),
"cin": re.compile(r"^[LU]\d{5}[A-Z]{2}\d{4}[A-Z]{3}\d{6}$"),
"pincode": re.compile(r"^[1-9]\d{5}$"),
}
def is_pan(pan_number: str) -> bool:
"""
Validates Permanent Account Number (PAN).
Regex: ^[A-Z]{5}[0-9]{4}[A-Z]{1}$
- ^[A-Z]{5} : Starts with exactly 5 uppercase letters
- [0-9]{4} : Followed by exactly 4 digits
- [A-Z]{1} : Ends with 1 uppercase letter
- $ : End of string
Example: ABCDE1234F
Usage: is_pan("ABCDE1234F") -> True
"""
if not isinstance(pan_number, str):
return False
return bool(PATTERNS["pan"].match(pan_number.upper()))
def is_tan(tan_number: str) -> bool:
"""
Validates Tax Deduction and Collection Account Number (TAN).
Regex: ^[A-Z]{4}[0-9]{5}[A-Z]$
Format: 4 Letters + 5 Digits + 1 Letter
Example: DELM12345L
"""
if not tan_number:
return False
return bool(PATTERNS["tan"].match(str(tan_number).strip().upper()))
def is_dl(dl_number: str) -> bool:
"""
Validates Indian Driving License Number (Standard Sarathi Format).
Regex: ^[A-Z]{2}[0-9]{2}[0-9]{4}[0-9]{7}$
Format: State(2) + RTO(2) + Year(4) + Number(7) (15 chars total)
Example: MH1220140001234
Accepts: Spaces and dashes (automatically removed)
"""
if not dl_number:
return False
clean_dl = str(dl_number).replace(" ", "").replace("-", "").upper()
return bool(PATTERNS["dl"].match(clean_dl))
def is_uan(uan_number: str) -> bool:
"""
Validates EPFO Universal Account Number (UAN).
Regex: ^[1-9]\\d{11}$
Format: Exactly 12 digits, cannot start with 0.
Example: 100012345678
"""
if not uan_number:
return False
clean_uan = str(uan_number).replace(" ", "").replace("-", "")
return bool(PATTERNS["uan"].match(clean_uan))
def is_abha(abha_id: str) -> bool:
"""
Validates Ayushman Bharat Health Account (ABHA) Number.
Regex: ^[1-9]\\d{13}$
Format: Exactly 14 digits.
Example: 91-1234-5678-9012 or 91123456789012
Accepts: Spaces and dashes (automatically removed)
"""
if not abha_id:
return False
clean_abha = str(abha_id).replace(" ", "").replace("-", "")
return bool(PATTERNS["abha"].match(clean_abha))
def is_fssai(fssai_number: str) -> bool:
"""
Validates FSSAI (Food Safety and Standards Authority of India) License Number.
Regex: ^[12]\\d{13}$
Format: Exactly 14 digits. Must start with 1 (Registration) or 2 (License).
Example: 10012011000001
"""
if not fssai_number:
return False
clean_fssai = str(fssai_number).replace(" ", "").replace("-", "")
return bool(PATTERNS["fssai"].match(clean_fssai))
def is_pran(pran_number: str) -> bool:
"""
Validates Permanent Retirement Account Number (PRAN) for NPS.
Regex: ^[1-9]\\d{11}$
Format: Exactly 12 digits, cannot start with 0.
Example: 110012345678
"""
if not pran_number:
return False
clean_pran = str(pran_number).replace(" ", "").replace("-", "")
return bool(PATTERNS["pran"].match(clean_pran))
def is_mobile(number: str) -> bool:
"""
Validates 10-digit Indian mobile number.
Regex: ^[6-9]\\d{9}$
- ^[6-9] : Starts with digit 6, 7, 8, or 9 (not 0-5)
- \\d{9} : Followed by exactly 9 digits (0-9)
- $ : End of string
Accepts: Spaces, dashes, +91 prefix (automatically removed)
Example: 9876543210 or +91-9876543210 or 9876-543210
Usage: is_mobile("9876543210") -> True
"""
if not number:
return False
clean_num = str(number).replace(" ", "").replace("-", "").replace("+91", "")
return bool(PATTERNS["mobile"].match(clean_num))
def is_ifsc(code: str, valid_bank_codes: Optional[Iterable[str]] = None) -> bool:
"""
Validates Indian Financial System Code (IFSC).
Regex: ^[A-Z]{4}0[A-Z0-9]{6}$
- ^[A-Z]{4} : Starts with exactly 4 uppercase letters (bank code)
- 0 : 5th character must be literal '0'
- [A-Z0-9]{6} : Followed by 6 alphanumeric characters (branch code)
- $ : End of string
Format: AAAA0XXXXXX (11 characters total)
Example: SBIN0004321
Usage: is_ifsc("SBIN0004321") -> True
Optional: pass valid_bank_codes to enforce known bank codes.
"""
if not code:
return False
clean_code = str(code).replace(" ", "").upper()
if not PATTERNS["ifsc"].match(clean_code):
return False
if valid_bank_codes is not None:
bank_code = clean_code[:4]
if bank_code not in {str(c).upper() for c in valid_bank_codes}:
return False
return True
def is_vehicle(number: str) -> bool:
"""
Validates RC (Registration Certificate) number.
Regex: ^[A-Z]{2}[0-9]{1,2}[A-Z]{0,3}[0-9]{4}$
- ^[A-Z]{2} : Starts with 2 uppercase letters (state code)
- [0-9]{1,2} : Followed by 1 or 2 digits (district code)
- [A-Z]{0,3} : 0 to 3 uppercase letters (series, optional)
- [0-9]{4} : Ends with exactly 4 digits (registration number)
- $ : End of string
Accepts: Spaces, dashes (automatically removed)
Example: DL01CA1234 or UP-16-Z-5555
Usage: is_vehicle("DL01CA1234") -> True
"""
clean_num = str(number).replace(" ", "").replace("-", "").upper()
return bool(PATTERNS["vehicle"].match(clean_num))
def is_upi(upi_id: str) -> bool:
"""
Validates UPI ID format.
Regex: ^[\\w\\.\\-]+@[\\w\\.\\-]+$
- ^[\\w\\.\\-]+ : Starts with word chars (a-z, A-Z, 0-9, _), dots, or dashes
- @ : Literal '@' symbol (required)
- [\\w\\.\\-]+ : Followed by word chars, dots, or dashes
- $ : End of string
Format: username@bankcode
Example: user@paytm or john.doe@ybl
Usage: is_upi("user@paytm") -> True
"""
return bool(PATTERNS["upi"].match(str(upi_id)))
def is_gstin(gstin: str) -> bool:
"""
Validates GSTIN with Modulo-36 checksum.
Regex: ^\\d{2}[A-Z]{5}\\d{4}[A-Z]{1}[1-9A-Z]{1}Z[0-9A-Z]{1}$
- ^\\d{2} : Starts with 2 digits (state code)
- [A-Z]{5} : 5 uppercase letters (PAN first 5 chars)
- \\d{4} : 4 digits (sequential number)
- [A-Z]{1} : 1 letter (entity type)
- [1-9A-Z]{1} : 1 char: digit 1-9 or letter A-Z (sub-division)
- Z : Literal 'Z' (fixed char)
- [0-9A-Z]{1} : Check digit (alphanumeric)
- $ : End of string
Format: 2 digits + 5 letters + 4 digits + 1 letter + 1 alnum + Z + 1 alnum
Checksum: Modulo-36 algorithm applied on first 14 characters
Example: 27AAPFU0939F1ZV
Usage: is_gstin("27AAPFU0939F1ZV") -> True
"""
gstin = str(gstin).upper().strip()
if not re.match(r"^\d{2}[A-Z]{5}\d{4}[A-Z]{1}[1-9A-Z]{1}Z[0-9A-Z]{1}$", gstin):
return False
chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
total = 0
factor = 1
try:
for i in range(14):
code_point = chars.index(gstin[i])
product = factor * code_point
digit = (product // 36) + (product % 36)
total += digit
factor = 2 if factor == 1 else 1
check_code = (36 - (total % 36)) % 36
return gstin[14] == chars[check_code]
except ValueError:
return False
def is_aadhaar(aadhaar: str) -> bool:
"""
Validates Aadhaar number with Verhoeff checksum.
Regex: ^[2-9]\\d{11}$
- ^[2-9] : Starts with digit 2-9 (cannot start with 0 or 1)
- \\d{11} : Followed by exactly 11 digits (0-9)
- $ : End of string
Total: 12 digits
Checksum: Verhoeff algorithm on all 12 digits
Accepts: Spaces, dashes (automatically removed)
Example: 234123451234 or 2341-2345-1234
Usage: is_aadhaar("234123451234") -> True
"""
if not aadhaar:
return False
clean_num = str(aadhaar).replace(" ", "").replace("-", "")
if not re.match(r"^[2-9]\d{11}$", clean_num):
return False
return validate_verhoeff(clean_num)
def is_voterid(voter_id: str) -> bool:
"""
Validates Voter ID (EPIC) number.
Regex: ^[A-Z]{3}[0-9]{7}$
- ^[A-Z]{3} : Starts with exactly 3 uppercase letters (state/region code)
- [0-9]{7} : Followed by exactly 7 digits (serial number)
- $ : End of string
Format: 3 letters + 7 digits (10 chars total)
Example: ABC1234567 or XYZ9876543
Usage: is_voterid("ABC1234567") -> True
"""
if not voter_id:
return False
clean_id = str(voter_id).replace(" ", "").upper()
return bool(PATTERNS["voterid"].match(clean_id))
def is_passport(passport: str) -> bool:
"""
Validates Passport number.
Regex: ^[A-Z][0-9]{7}$
- ^[A-Z] : Starts with exactly 1 uppercase letter (passport book type)
- [0-9]{7} : Followed by exactly 7 digits (serial number)
- $ : End of string
Format: 1 letter + 7 digits (8 chars total)
Example: A1234567 or Z9876543
Usage: is_passport("A1234567") -> True
"""
if not passport:
return False
clean_pass = str(passport).replace(" ", "").upper()
return bool(PATTERNS["passport"].match(clean_pass))
def is_cin(cin: str) -> bool:
"""
Validates Corporate Identity Number (CIN).
Regex: ^[LU]\\d{5}[A-Z]{2}\\d{4}[A-Z]{3}\\d{6}$
- ^[LU] : Starts with L (Limited) or U (Unlimited company)
- \\d{5} : 5 digits (industry classification code)
- [A-Z]{2} : 2 uppercase letters (state code)
- \\d{4} : 4 digits (year of incorporation)
- [A-Z]{3} : 3 uppercase letters (ownership code: PTC, PLC, OPC, GOI, etc.)
- \\d{6} : 6 digits (registration number)
- $ : End of string
Format: L/U + 5 digits + 2 letters + 4 digits + 3 letters + 6 digits (21 chars)
Example: U12345MH2024PTC123456
Usage: is_cin("U12345MH2024PTC123456") -> True
"""
if not cin:
return False
cin = str(cin).replace(" ", "").upper()
if len(cin) != 21:
return False
return bool(PATTERNS["cin"].match(cin))
def is_pincode(pincode: str) -> bool:
"""
Validates Indian postal pincode.
Regex: ^[1-9]\\d{5}$
- ^[1-9] : Starts with digit 1-9 (cannot start with 0)
- \\d{5} : Followed by exactly 5 digits (0-9)
- $ : End of string
Total: 6 digits, no leading zero
Range: 100000 to 999999
Example: 110001 or 560034
Usage: is_pincode("110001") -> True
"""
if not pincode:
return False
pincode = str(pincode).replace(" ", "")
return bool(PATTERNS["pincode"].match(pincode))
def is_credit_card(card_number: str) -> bool:
"""
Validates Credit/Debit Card numbers (Visa, MasterCard, RuPay, Amex).
Uses the Luhn Algorithm (Mod 10) to verify the checksum digit.
Accepts spaces and dashes (automatically removed).
Example: 4532123456781234
Usage: is_credit_card("4532 1234 5678 1234") -> True
"""
if not card_number:
return False
clean_num = str(card_number).replace(" ", "").replace("-", "")
if not PATTERNS["credit_card"].match(clean_num):
return False
return validate_luhn(clean_num)