diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/date_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/date_recognizer.py index 08bc4aa1d..94a609155 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/date_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/date_recognizer.py @@ -16,7 +16,7 @@ class DateRecognizer(PatternRecognizer): PATTERNS = [ Pattern( "ISO 8601 datetime", - r"\b(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))\b", + r"\b(\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])T[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])T[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])T[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))\b", 0.8, ), Pattern( diff --git a/presidio-analyzer/tests/test_date_recognizer.py b/presidio-analyzer/tests/test_date_recognizer.py index 62a6c700d..b9b458ad7 100644 --- a/presidio-analyzer/tests/test_date_recognizer.py +++ b/presidio-analyzer/tests/test_date_recognizer.py @@ -49,6 +49,11 @@ def entities(): ("Today is 2024-03-15T14:30:00Z\r or not?", 1, ((9, 29),), ((0.6, 0.81),),), ("Today is 2024-03-15T14:30Z\n or not?", 1, ((9, 26),), ((0.6, 0.81),),), ("2024-03-15T14:30Z", 1, ((0, 17),), ((0.6, 1),),), + # Invalid ISO 8601 month/day values must not be detected as a date + ("2024-13-15T14:30:00Z", 0, (), (),), + ("2024-00-15T14:30:00Z", 0, (), (),), + ("2024-12-32T14:30Z", 0, (), (),), + ("2024-12-00T14:30Z", 0, (), (),), ("Today is2024-06-05T09:15:30.500-07:00", 0, (), (),), # Word boundary tests ("Today is5/21", 0, (), (),),