From dfc351bab575c9f9370599171eabf2d31ac80e3f Mon Sep 17 00:00:00 2001 From: Diego Date: Tue, 22 Apr 2025 19:22:33 -0500 Subject: [PATCH 01/30] added spanish folder --- .../resources/config/es-es/date_time.json | 156 ++++ .../src/main/resources/config/es-es/day.word | 1 + .../src/main/resources/config/es-es/days.word | 1 + .../src/main/resources/config/es-es/hour.word | 1 + .../main/resources/config/es-es/hours.word | 1 + .../main/resources/config/es-es/minute.word | 1 + .../main/resources/config/es-es/minutes.word | 1 + .../main/resources/config/es-es/second.word | 1 + .../main/resources/config/es-es/seconds.word | 1 + .../resources/config/es-es/tokenizer.json | 840 ++++++++++++++++++ .../config/es-es/date_time_test.json | 43 + 11 files changed, 1047 insertions(+) create mode 100644 numbers/src/main/resources/config/es-es/date_time.json create mode 100644 numbers/src/main/resources/config/es-es/day.word create mode 100644 numbers/src/main/resources/config/es-es/days.word create mode 100644 numbers/src/main/resources/config/es-es/hour.word create mode 100644 numbers/src/main/resources/config/es-es/hours.word create mode 100644 numbers/src/main/resources/config/es-es/minute.word create mode 100644 numbers/src/main/resources/config/es-es/minutes.word create mode 100644 numbers/src/main/resources/config/es-es/second.word create mode 100644 numbers/src/main/resources/config/es-es/seconds.word create mode 100644 numbers/src/main/resources/config/es-es/tokenizer.json create mode 100644 numbers/src/test/resources/config/es-es/date_time_test.json diff --git a/numbers/src/main/resources/config/es-es/date_time.json b/numbers/src/main/resources/config/es-es/date_time.json new file mode 100644 index 00000000..467d6c01 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/date_time.json @@ -0,0 +1,156 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^2\\d$", "format": "{xx}"}, + "5": {"match": "^[3-9]\\d$", "format": "{x0} y {x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^1\\00$", "format": "cien"}, + "2": {"match": "^1\\d{2}$", "format": "ciento"}, + "3": {"match": "^\\d{3}$", "format": "{x_in_x00}cientos"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^1[1|9]\\d{2}$", "format": "{xx}"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^1\\d{2}$", "format": "ciento {formatted_decade} {bc}"}, + "4": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "5": {"match": "^\\d000$", "format": "{formatted_thousand} {bc}"}, + "6": {"match": "^\\d1\\d{2}$", "format": "{formatted_thousand} ciento {formatted_decade} {bc}"}, + "7": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "8": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_hundreds} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "a.C." + }, + "date_format": { + "date_full": "{weekday}, {day} de {month} de {formatted_year}", + "date_full_no_year": "{weekday}, {day} de {month}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "hoy", + "tomorrow": "mañana", + "yesterday": "ayer" + }, + "date_time_format": { + "date_time": "{formatted_date} a las {formatted_time}" + }, + "weekday": { + "0": "lunes", + "1": "martes", + "2": "miércoles", + "3": "jueves", + "4": "viernes", + "5": "sábado", + "6": "domingo" + }, + "date": { + "0": "cero", + "1": "uno", + "2": "dos", + "3": "tres", + "4": "cuatro", + "5": "cinco", + "6": "seis", + "7": "siete", + "8": "ocho", + "9": "nueve", + "10": "diez", + "11": "once", + "12": "doce", + "13": "trece", + "14": "catorce", + "15": "quince", + "16": "dieciséis", + "17": "diecisiete", + "18": "dieciocho", + "19": "diecinueve", + "20": "veinte", + "22": "veintidós", + "23": "veintitres", + "24": "veinticuatro", + "25": "veinticinco", + "26": "veintiseis", + "27": "veintisiete", + "28": "veintiocho", + "29": "veintinueve", + "30": "treinta", + "31": "treinta y uno" + }, + "month": { + "1": "enero", + "2": "febrero", + "3": "marzo", + "4": "abril", + "5": "mayo", + "6": "junio", + "7": "julio", + "8": "agosto", + "9": "septimbre", + "10": "octubre", + "11": "noviembre", + "12": "diciembre" + }, + "number": { + "0": "cero", + "1": "uno", + "2": "dos", + "3": "tres", + "4": "cuatro", + "5": "cinco", + "6": "seis", + "7": "siete", + "8": "ocho", + "9": "nueve", + "10": "diez", + "11": "once", + "12": "doce", + "13": "trece", + "14": "catorce", + "15": "quince", + "16": "dieciséis", + "17": "diecisiete", + "18": "dieciocho", + "19": "diecinueve", + "20": "veinte", + "22": "veintidós", + "23": "veintitres", + "24": "veinticuatro", + "25": "veinticinco", + "26": "veintiseis", + "27": "veintisiete", + "28": "veintiocho", + "29": "veintinueve", + "30": "treinta", + "40": "cuarenta", + "50": "cincuenta", + "60": "sesenta", + "70": "setenta", + "80": "ochenta", + "90": "noventa", + "100": "cien", + "200": "doscientos", + "300": "trescientos", + "400": "cuatrocientos", + "500": "quinientos", + "600": "seiscientos", + "700": "sietecientos", + "800": "ochocientos", + "900": "novecientos", + "1000": "mil", + "1100": "mil ciento", + "2100": "dos mil ciento", + "3100": "tres mil ciento", + "4100": "cuatro mil ciento", + "5100": "cinco mil ciento", + "6100": "seis mil ciento", + "7100": "siete mil ciento", + "8100": "ocho mil ciento", + "9100": "ocho mil ciento" + } +} diff --git a/numbers/src/main/resources/config/es-es/day.word b/numbers/src/main/resources/config/es-es/day.word new file mode 100644 index 00000000..eff79eda --- /dev/null +++ b/numbers/src/main/resources/config/es-es/day.word @@ -0,0 +1 @@ +día \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/days.word b/numbers/src/main/resources/config/es-es/days.word new file mode 100644 index 00000000..1d0beda5 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/days.word @@ -0,0 +1 @@ +días \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/hour.word b/numbers/src/main/resources/config/es-es/hour.word new file mode 100644 index 00000000..30325568 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/hour.word @@ -0,0 +1 @@ +hora \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/hours.word b/numbers/src/main/resources/config/es-es/hours.word new file mode 100644 index 00000000..cb3c87d0 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/hours.word @@ -0,0 +1 @@ +horas \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/minute.word b/numbers/src/main/resources/config/es-es/minute.word new file mode 100644 index 00000000..3def900f --- /dev/null +++ b/numbers/src/main/resources/config/es-es/minute.word @@ -0,0 +1 @@ +minuto \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/minutes.word b/numbers/src/main/resources/config/es-es/minutes.word new file mode 100644 index 00000000..dd89c355 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/minutes.word @@ -0,0 +1 @@ +minutos \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/second.word b/numbers/src/main/resources/config/es-es/second.word new file mode 100644 index 00000000..08aee9a2 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/second.word @@ -0,0 +1 @@ +segundo \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/seconds.word b/numbers/src/main/resources/config/es-es/seconds.word new file mode 100644 index 00000000..608bae4c --- /dev/null +++ b/numbers/src/main/resources/config/es-es/seconds.word @@ -0,0 +1 @@ +segundos \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json new file mode 100644 index 00000000..34468203 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -0,0 +1,840 @@ +{ + "spaces": " \t\n\f\r:;_!?<>|=()[]{}»«*~^`'\"", + "characters_as_word": "%‰#-+.,/", + "raw_number_categories": [ + "number", + "raw" + ], + "plural_endings": [ + ], + "word_matches": [ + { + "categories": [ + "ignore", + "date_time_ignore" + ], + "values": [ + "y", + "con" + ] + }, + { + "categories": [ + "ignore", + "date_time_ignore", + "day_adder_the", + "ampm_before", + "bcad_after" + ], + "values": [ + "a" + ] + }, + { + "categories": [ + "ignore", + "date_time_ignore", + "day_adder_the" + ], + "values": [ + "uno", + "una" + ] + }, + { + "categories": [ + "ignore", + "date_time_ignore", + "thousand_separator" + ], + "values": [ + "," + ] + }, + { + "categories": [ + "ordinal_suffix" + ], + "values": [ + "ro", + "ra", + "do", + "da", + "avo", + "ava" + ] + }, + { + "categories": [ + "point" + ], + "values": [ + "punto" + ] + }, + { + "categories": [ + "point", + "post_oclock" + ], + "values": [ + "punto" + ] + }, + { + "categories": [ + "point", + "ignore", + "date_time_ignore" + ], + "values": [ + "." + ] + }, + { + "categories": [ + "fraction_separator" + ], + "values": [ + "sobre", + "dividido", + "dividido entre" + ] + }, + { + "categories": [ + "fraction_separator", + "date_time_ignore" + ], + "values": [ + "/" + ] + }, + { + "categories": [ + "sign", + "positive" + ], + "values": [ + "positivo", + "más", + "+" + ] + }, + { + "categories": [ + "sign", + "negative" + ], + "values": [ + "negativo", + "menos" + ] + }, + { + "categories": [ + "ignore", + "date_time_ignore", + "sign", + "negative" + ], + "values": [ + "-" + ] + }, + { + "categories": [ + "duration_separator", + "date_time_ignore" + ], + "values": [ + "de" + ] + }, + { + "categories": [ + "yesterday" + ], + "values": [ + "ayer" + ] + }, + { + "categories": [ + "today" + ], + "values": [ + "hoy" + ] + }, + { + "categories": [ + "tomorrow" + ], + "values": [ + "mañana" + ] + }, + { + "categories": [ + "day_adder_the", + "date_time_ignore", + "pre_hour", + "pre_special_hour" + ], + "values": [ + "el" + ] + }, + { + "categories": [ + "day_adder_day" + ], + "values": [ + "día" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "post_relative_indicator", + "positive", + "day_adder_after", + "special_minute_after", + "pre_special_hour" + ], + "values": [ + "después" + ] + }, + { + "categories": [ + "day_adder_before", + "special_minute_before", + "bcad_before", + "pre_relative_indicator", + "post_relative_indicator", + "negative", + "pre_special_hour" + ], + "values": [ + "antes", + "para" + ] + }, + { + "categories": [ + "date_time_ignore", + "special_minute_before" + ], + "values": [ + "de" + ] + }, + { + "categories": [ + "special_minute_after", + "pre_relative_indicator", + "negative" + ], + "values": [ + "antes" + ] + }, + { + "categories": [ + "pre_hour" + ], + "values": [ + "hora", + "horas" + ] + }, + { + "categories": [ + "pre_hour", + "pre_special_hour" + ], + "values": [ + "a la", + "a las" + ] + }, + { + "categories": [ + "pre_special_hour" + ], + "values": [ + "este", + "estos", + "ese", + "esos", + "aquel", + "aquellos" + ] + }, + { + "categories": [ + "pre_special_hour", + "pre_relative_indicator", + "positive", + "pre_oclock" + ], + "values": [ + "en" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "positive" + ], + "values": [ + "siguiente", + "posterior", + "próximo" + ] + }, + { + "categories": [ + "date_time_ignore", + "pre_relative_indicator", + "positive" + ], + "values": [ + "en" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "post_relative_indicator", + "positive" + ], + "values": [ + "siguiente", + "siguientes" + ] + }, + { + "categories": [ + "post_relative_indicator", + "negative" + ], + "values": [ + "pasado", + "pasados" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "negative" + ], + "values": [ + "anterior", + "pasado", + "precedente" + ] + }, + { + "categories": [ + "bcad_before" + ], + "values": [ + "b" + ] + }, + { + "categories": [ + "bcad_after" + ], + "values": [ + "año" + ] + }, + { + "categories": [ + "bcad_identifier" + ], + "values": [ + "cristo" + ] + }, + { + "categories": [ + "bcad_identifier", + "bcad_after" + ], + "values": [ + "c", + "común", + "actual" + ] + }, + { + "categories": [ + "bcad_identifier", + "bcad_era" + ], + "values": [ + "era", + "e" + ] + }, + { + "categories": [ + "bcad_before_combined" + ], + "values": [ + "a.C.", + "aC", + "ane" + ] + }, + { + "categories": [ + "bcad_after_combined" + ], + "values": [ + "d.C.", + "dC", + "ec" + ] + }, + { + "categories": [ + "ampm_before" + ], + "values": [ + "ante" + ] + }, + { + "categories": [ + "ampm_after" + ], + "values": [ + "p", + "post" + ] + }, + { + "categories": [ + "ampm_identifier" + ], + "values": [ + "meridiem", + "meridiano", + "m" + ] + }, + { + "categories": [ + "ampm_before_combined" + ], + "values": [ + "am" + ] + }, + { + "categories": [ + "ampm_after_combined" + ], + "values": [ + "pm" + ] + }, + { + "categories": [ + "post_oclock" + ], + "values": [ + "punto" + ] + }, + { + "categories": [ + "oclock_combined" + ], + "values": [ + "en punto" + ] + } + ], + "number_mappings": [ + { + "categories": [ + "number", + "digit", + "digit_after_point" + ], + "values": { + "cero": 0, + "uno": 1, + "una": 1, + "un": 1, + "dos": 2, + "tres": 3, + "cuatro": 4, + "cinco": 5, + "seis": 6, + "siete": 7, + "ocho": 8, + "nueve": 9 + } + }, + { + "categories": [ + "number", + "digit_after_point", + "pre_oclock" + ], + "values": { + "o": 0 + } + }, + { + "categories": [ + "number", + "teen" + ], + "values": { + "diez": 10, + "once": 11, + "doce": 12, + "trece": 13, + "catorce": 14, + "quince": 15, + "dieciséis": 16, + "diecisiete": 17, + "dieciocho": 18, + "diecinueve": 19, + "veintiuno": 21, + "veintidós": 22, + "veintitres": 23, + "veinticuatro": 24, + "veinticinco": 25, + "veintiseis": 26, + "veintisiete": 27, + "veintiocho": 28, + "veintinueve": 29 + } + }, + { + "categories": [ + "number", + "tens" + ], + "values": { + "veinte": 20, + "treinta": 30, + "cuarenta": 40, + "cincuenta": 50, + "sesenta": 60, + "setenta": 70, + "ochenta": 80, + "noventa": 90 + } + }, + { + "categories": [ + "number", + "hundred" + ], + "values": { + "cien": 100, + "ciento": 100 + } + }, + { + "categories": [ + "number", + "multiplier" + ], + "values": { + "mil": 1000, + "millón": 1000000, + "millardo": 1000000000, + "billón": 1000000000000, + "billardo": 1000000000000000, + "trillón": 1000000000000000000 + } + }, + { + "categories": [ + "number", + "ordinal", + "digit" + ], + "values": { + "primero": 1, + "primer": 1, + "segundo": 2, + "tercero": 3, + "cuarto": 4, + "quinto": 5, + "sexto": 6, + "séptimo": 7, + "octavo": 8, + "noveno": 9 + } + }, + { + "categories": [ + "number", + "ordinal", + "teen" + ], + "values": { + "décimo": 10, + "decimoprimero": 11, + "decimosegundo": 12, + "decimotercero": 13, + "decimocuarto": 14, + "decimoquinto": 15, + "decimosexto": 16, + "decimoséptimo": 17, + "decimooctavo": 18, + "decimonoveno": 19 + } + }, + { + "categories": [ + "number", + "ordinal", + "tens" + ], + "values": { + "vigésimo": 20, + "trigésimo": 30, + "cuadragésimo": 40, + "quincuagésimo": 50, + "sexagésimo": 60, + "septuagésimo": 70, + "eightieth": 80, + "nonagésimo": 90 + } + }, + { + "categories": [ + "number", + "ordinal", + "hundred" + ], + "values": { + "centésimo": 100 + } + }, + { + "categories": [ + "number", + "ordinal", + "multiplier" + ], + "values": { + "milésimo": 1000, + "millonésimo": 1000000, + "milmillonésimo": 1000000000, + "billonésimo": 1000000000000, + "milbillonésimo": 1000000000000000, + "trillonésimo": 1000000000000000000 + } + }, + { + "categories": [ + "number", + "suffix_multiplier" + ], + "values": { + "mitad": 0.5, + "mitades": 0.5, + "cuarto": 0.25, + "cuartos": 0.25, + "par": 2, + "pares": 2, + "dupla": 2, + "duplas": 2, + "docena": 12, + "docenas": 12, + "decena": 10, + "decenas": 10, + "veintena": 20, + "veintenas": 20, + "porciento": 0.01, + "centésima": 0.01, + "pc": 0.01, + "%": 0.01, + "pormil": 0.001, + "milésima": 0.001, + "‰": 0.001 + } + }, + { + "categories": [ + "month_name" + ], + "values": { + "enero": 1, + "ene": 1, + "febrero": 2, + "feb": 2, + "marzo": 3, + "mar": 3, + "abril": 4, + "abr": 4, + "mayo": 5, + "may": 5, + "junio": 6, + "jun": 6, + "julio": 7, + "jul": 7, + "agosto": 8, + "ago": 8, + "septiembre": 9, + "sep": 9, + "octubre": 10, + "oct": 10, + "noviembre": 11, + "nov": 11, + "diciembre": 12, + "dec": 12 + } + }, + { + "categories": [ + "day_of_week" + ], + "values": { + "lunes": 0, + "lun": 0, + "martes": 1, + "mar": 1, + "miércoles": 2, + "mie": 2, + "jueves": 3, + "jue": 3, + "viernes": 4, + "vie": 4, + "sábado": 5, + "sab": 5, + "domingo": 6, + "dom": 6 + } + }, + { + "categories": [ + "noon_midnight_like", + "moment_of_day" + ], + "values": { + "mediodía": 12, + "medianoche": 0 + } + }, + { + "categories": [ + "moment_of_day" + ], + "values": { + "madrugada": 3, + "amanecer": 6, + "amaneciendo": 6, + "mañana": 9, + "mañanas": 9, + "almuerzo": 12, + "almuerzos": 12, + "cena": 20, + "cenas": 20, + "tarde": 15, + "tardes": 15, + "atardecer": 18, + "atardeceres": 18, + "noche": 21, + "noches": 21 + } + } + ], + "duration_words": { + "1 NANOS": [ + "nanosegundo", + "nanosegundos", + "ns" + ], + "1 MICROS": [ + "microsegundo", + "microsegundos", + "μs" + ], + "1 MILLIS": [ + "milisegundo", + "milisegundos", + "ms" + ], + "1 SECONDS": [ + "segundo", + "segundos", + "s", + "seg", + "segs" + ], + "1 MINUTES": [ + "minuto", + "minutos", + "m", + "min", + "mins" + ], + "1 HOURS": [ + "hora", + "horas", + "h", + "hr", + "hrs" + ], + "1 DAYS": [ + "día", + "días", + "d" + ], + "1 WEEKS": [ + "semana", + "semanas", + "sem", + "s" + ], + "1 MONTHS": [ + "mes", + "meses" + ], + "1 YEARS": [ + "año", + "años", + "a" + ], + "1 DECADES": [ + "década", + "décadas" + ], + "1 CENTURIES": [ + "siglo", + "siglos" + ], + "1 MILLENNIA": [ + "milenio", + "milenios" + ] + }, + "duration_restrict_after_number": [ + "ns", + "μs", + "ms", + "s", + "m", + "h", + "d", + "sem", + "mes", + "año" + ] +} \ No newline at end of file diff --git a/numbers/src/test/resources/config/es-es/date_time_test.json b/numbers/src/test/resources/config/es-es/date_time_test.json new file mode 100644 index 00000000..2cab298b --- /dev/null +++ b/numbers/src/test/resources/config/es-es/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "uno a.C." }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "diez a.C." }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "noventa y dos a.C." }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "ochocientos tres" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "ochocientos once" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "cuatrocientos cincuenta y cinco" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cinco" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil doce" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cuarenta y seis" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil ochocientos siete" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil setesientos diecisiete" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil novecientos ochenta y ocho"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil nueve"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil dieciocho"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil veintiuno"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil treinta"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dos mil cien" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tres mil doscientos veinte a.C." }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tres mil doscientos cuarenta y uno a.C." }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "cinco mil doscientos" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cien" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dos mil cien" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "martes, treinta y uno de enero de dos mil diecisiete"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "domingo, cuatro de febrero de dos mil dieciocho"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "domingo, cuatro"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "domingo, cuatro"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "mañana"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "hoy"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "ayer"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "domingo, catorce de febrero"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "domingo, catorce de febrero, dos mil deciocho"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "jueves, 31 de enero, dos mil diecisiete a las doce y dos p.m."}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "martes, treinta y uno de enero de dos mil diecisiete a las trece horas y veintidós minutos"} + } +} From b51150369a00b763f206ce64aa2d56528a229977 Mon Sep 17 00:00:00 2001 From: Diego Date: Tue, 22 Apr 2025 19:26:24 -0500 Subject: [PATCH 02/30] fix test sentences --- numbers/src/test/resources/config/es-es/date_time_test.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numbers/src/test/resources/config/es-es/date_time_test.json b/numbers/src/test/resources/config/es-es/date_time_test.json index 2cab298b..43be466b 100644 --- a/numbers/src/test/resources/config/es-es/date_time_test.json +++ b/numbers/src/test/resources/config/es-es/date_time_test.json @@ -34,10 +34,10 @@ "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "hoy"}, "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "ayer"}, "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "domingo, catorce de febrero"}, - "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "domingo, catorce de febrero, dos mil deciocho"} + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "domingo, catorce de febrero de dos mil deciocho"} }, "test_nice_date_time": { - "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "jueves, 31 de enero, dos mil diecisiete a las doce y dos p.m."}, + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "jueves, 31 de enero de dos mil diecisiete a las doce y dos p.m."}, "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "martes, treinta y uno de enero de dos mil diecisiete a las trece horas y veintidós minutos"} } } From 0bef14903a7027d1fa68fad72822ee60db9fa0a1 Mon Sep 17 00:00:00 2001 From: Stypox Date: Thu, 3 Jul 2025 12:19:52 +0200 Subject: [PATCH 03/30] Add spanish files copied from italian --- .../dicio/numbers/ParserFormatterBuilder.kt | 3 + .../lang/es/SpanishDateTimeExtractor.kt | 356 ++++++++++++++++ .../dicio/numbers/lang/es/SpanishFormatter.kt | 395 ++++++++++++++++++ .../numbers/lang/es/SpanishNumberExtractor.kt | 228 ++++++++++ .../dicio/numbers/lang/es/SpanishParser.kt | 40 ++ 5 files changed, 1022 insertions(+) create mode 100644 numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt create mode 100644 numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt create mode 100644 numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt create mode 100644 numbers/src/main/java/org/dicio/numbers/lang/es/SpanishParser.kt diff --git a/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt b/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt index 4f0628f7..0af8f938 100644 --- a/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt +++ b/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt @@ -3,6 +3,8 @@ package org.dicio.numbers import org.dicio.numbers.formatter.Formatter import org.dicio.numbers.lang.en.EnglishFormatter import org.dicio.numbers.lang.en.EnglishParser +import org.dicio.numbers.lang.es.SpanishFormatter +import org.dicio.numbers.lang.es.SpanishParser import org.dicio.numbers.lang.it.ItalianFormatter import org.dicio.numbers.lang.it.ItalianParser import org.dicio.numbers.parser.Parser @@ -12,6 +14,7 @@ object ParserFormatterBuilder { private val PARSER_FORMATTER_CLASSES_MAP = mapOf( "en" to ParserFormatterClasses(EnglishFormatter::class.java, EnglishParser::class.java), "it" to ParserFormatterClasses(ItalianFormatter::class.java, ItalianParser::class.java), + "es" to ParserFormatterClasses(SpanishFormatter::class.java, SpanishParser::class.java), ) @JvmStatic diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt new file mode 100644 index 00000000..a9b2363b --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt @@ -0,0 +1,356 @@ +package org.dicio.numbers.lang.es + +import org.dicio.numbers.parser.lexer.TokenStream +import org.dicio.numbers.unit.Duration +import org.dicio.numbers.util.DateTimeExtractorUtils +import org.dicio.numbers.util.DurationExtractorUtils +import org.dicio.numbers.util.NumberExtractorUtils +import org.dicio.numbers.util.Utils +import java.time.LocalDate +import java.time.LocalDateTime +import java.time.LocalTime +import java.time.temporal.ChronoUnit + +class SpanishDateTimeExtractor internal constructor( + private val ts: TokenStream, + private val now: LocalDateTime +) { + private val numberExtractor = SpanishNumberExtractor(ts) + private val durationExtractor = DurationExtractorUtils(ts, numberExtractor::numberNoOrdinal) + private val dateTimeExtractor = DateTimeExtractorUtils(ts, now, this::extractIntegerInRange) + + private fun extractIntegerInRange(fromInclusive: Int, toInclusive: Int): Int? { + // disallow fraction as / should be treated as a day/month/year separator + return NumberExtractorUtils.extractOneIntegerInRange( + ts, fromInclusive, toInclusive + ) { NumberExtractorUtils.signBeforeNumber(ts) { numberExtractor.numberInteger(false) } } + } + + + fun dateTime(): LocalDateTime? { + // first try preferring having a date first, then try with time first + return ts.firstWhichUsesMostTokens({ dateTime(false) }, { dateTime(true) }) + } + + private fun dateTime(timeFirst: Boolean): LocalDateTime? { + var date: LocalDate? = null + var time: LocalTime? = null + + if (!timeFirst) { + // first try with special days, since duration-related words might be used + date = relativeSpecialDay() + + if (date == null) { + // then try with duration, since otherwise numbers would be interpreted as date days + val duration = Utils.firstNotNull( + this::relativeDuration, + dateTimeExtractor::relativeMonthDuration + ) + if (duration == null) { + // no normal relative duration found: finally try extracting a date normally + date = date() + } else if (duration.nanos == 0L && duration.days != 0L) { + // duration contains a specified day and no specified time, so a time can follow + date = duration.applyAsOffsetToDateTime(now).toLocalDate() + } else if (duration.nanos != 0L && duration.days == 0L && duration.months == 0L && duration.years == 0L) { + // duration contains a specified time, so a date could follow + time = duration.applyAsOffsetToDateTime(now).toLocalTime() + } else { + // duration contains mixed date&time, or has units >=month, nothing can follow + return duration.applyAsOffsetToDateTime(now) + } + } + } + + if (time == null) { + time = ts.tryOrSkipDateTimeIgnore(date != null) { this.timeWithAmpm() } + } + + if (date == null && time != null) { + // try to extract a date after the time + val originalPosition = ts.position + val duration = ts.tryOrSkipDateTimeIgnore(true) { this.relativeDuration() } + if (duration == null) { + date = ts.tryOrSkipDateTimeIgnore( + true + ) { + Utils.firstNotNull(this::relativeSpecialDay, this::date) + } + } else if (duration.nanos == 0L && duration.days != 0L) { + date = duration.applyAsOffsetToDateTime(now).toLocalDate() + } else { + ts.position = originalPosition + } + } + + return if (date == null) { + time?.atDate(now.toLocalDate()) + } else { + if (time == null) date.atTime(now.toLocalTime()) else date.atTime(time) + } + } + + fun timeWithAmpm(): LocalTime? { + var time = time() + val pm: Boolean? + if (time == null) { + // if there is no time, maybe there is a moment of day (not am/pm though) preceding? + val momentOfDay = momentOfDay() ?: return null + + time = ts.tryOrSkipDateTimeIgnore(true) { this.time() } + if (time == null) { + // found moment of day without a specific time + return LocalTime.of(momentOfDay, 0) + } else { + // use moment of day before time to determine am/pm + pm = DateTimeExtractorUtils.isMomentOfDayPm(momentOfDay) + } + } else { + // found a time, now look for am/pm or a moment of day + pm = ts.tryOrSkipDateTimeIgnore(true) { + Utils.firstNotNull( + dateTimeExtractor::ampm, + { momentOfDay()?.let(DateTimeExtractorUtils::isMomentOfDayPm) } + ) + } + } + + if (time.hour != 0 && pm != null) { + // AM/PM should not do anything after 0 (e.g. 0pm or 24 di sera) + + if (pm && !DateTimeExtractorUtils.isMomentOfDayPm(time.hour)) { + // time must be in the afternoon, but time is not already, correct it + time = time.withHour((time.hour + 12) % DateTimeExtractorUtils.HOURS_IN_DAY) + } + } + return time + } + + fun time(): LocalTime? { + // try both with a normal hour and with "mezzogiorno"/"mezzanotte" + val hour = Utils.firstNotNull(this::noonMidnightLike, this::hour) ?: return null + var result = LocalTime.of(hour, 0) + + val minute = ts.tryOrSkipDateTimeIgnore( + true + ) { + Utils.firstNotNull(this::specialMinute, dateTimeExtractor::minute) + } + if (minute == null) { + return result + } + result = result.withMinute(minute) + + val second = ts.tryOrSkipDateTimeIgnore(true) { dateTimeExtractor.second() } + if (second == null) { + return result + } + return result.withSecond(second) + } + + fun date(): LocalDate? { + var result = now.toLocalDate() + + val dayOfWeek = dayOfWeek() + val day = ts.tryOrSkipDateTimeIgnore( + dayOfWeek != null + ) { extractIntegerInRange(1, 31) } + + if (day == null) { + if (dayOfWeek != null) { + // TODO maybe enforce the date to be in the future? + return result.plus((dayOfWeek - result.dayOfWeek.ordinal).toLong(), ChronoUnit.DAYS) + } + result = result.withDayOfMonth(1) + } else { + result = result.withDayOfMonth(day) + } + + val month = ts.tryOrSkipDateTimeIgnore(day != null) { + Utils.firstNotNull(dateTimeExtractor::monthName, { extractIntegerInRange(1, 12) }) + } + if (month == null) { + if (day != null) { + return result + } + result = result.withMonth(1) + } else { + result = result.withMonth(month) + } + + // if month is null then day is also null, otherwise we would have returned above + val year = ts.tryOrSkipDateTimeIgnore( + month != null + ) { extractIntegerInRange(0, 999999999) } + if (year == null) { + if (month != null) { + return result + } + return null + } + + val bcad = dateTimeExtractor.bcad() + return result.withYear(year * (if (bcad == null || bcad) 1 else -1)) + } + + + fun dayOfWeek(): Int? { + if (ts[0].isValue("mar")) { + ts.movePositionForwardBy(1) + return 1 // special case, since mar already used for march + } else { + return dateTimeExtractor.dayOfWeek() + } + } + + fun specialMinute(): Int? { + val originalPosition = ts.position + + val number = numberExtractor.numberNoOrdinal() + if (number != null && number.isDecimal && number.decimalValue() > 0.0 && number.decimalValue() < 1.0) { + // e.g. alle due e tre quarti + return Utils.roundToInt(number.decimalValue() * 60) + } + + ts.position = originalPosition + return null + } + + fun noonMidnightLike(): Int? { + return noonMidnightLikeOrMomentOfDay("noon_midnight_like") + } + + fun momentOfDay(): Int? { + // noon_midnight_like is a part of moment_of_day, so noon and midnight are included + return noonMidnightLikeOrMomentOfDay("moment_of_day") + } + + private fun noonMidnightLikeOrMomentOfDay(category: String): Int? { + val originalPosition = ts.position + + var relativeIndicator = 0 // 0 = not found, otherwise the sign, +1 or -1 + if (ts[0].hasCategory("pre_special_hour")) { + // found a word that usually comes before special hours, e.g. questo, dopo + if (ts[0].hasCategory("pre_relative_indicator")) { + relativeIndicator = if (ts[0].hasCategory("negative")) -1 else 1 + // only move to next not ignore if we got a relative indicator + ts.movePositionForwardBy(ts.indexOfWithoutCategory("date_time_ignore", 1)) + } else { + ts.movePositionForwardBy(1) + } + } + + if (ts[0].hasCategory(category)) { + // special hour found, e.g. mezzanotte, sera, pranzo + ts.movePositionForwardBy(1) + return ((ts[-1].number!!.integerValue().toInt() + + DateTimeExtractorUtils.HOURS_IN_DAY + relativeIndicator) + % DateTimeExtractorUtils.HOURS_IN_DAY) + } + + // noon/midnight have both the categores noon_midnight_like and moment_of_day, always try + if (ts[0].value.startsWith("mezz")) { + // sometimes e.g. "mezzogiorno" is split into "mezzo giorno" + if (ts[1].value.startsWith("giorn")) { + ts.movePositionForwardBy(2) + return 12 + relativeIndicator + } else if (ts[1].value.startsWith("nott")) { + ts.movePositionForwardBy(2) + return (DateTimeExtractorUtils.HOURS_IN_DAY + relativeIndicator) % DateTimeExtractorUtils.HOURS_IN_DAY + } + } + + // no special hour found + ts.position = originalPosition + return null + } + + fun hour(): Int? { + val originalPosition = ts.position + + // skip words that usually come before hours, e.g. alle, ore + ts.movePositionForwardBy(ts.indexOfWithoutCategory("pre_hour", 0)) + + val number = extractIntegerInRange(0, DateTimeExtractorUtils.HOURS_IN_DAY) + if (number == null) { + // no number found, or the number is not a valid hour, e.g. le ventisei + ts.position = originalPosition + return null + } + + // found hour, e.g. alle diciannove + return number % DateTimeExtractorUtils.HOURS_IN_DAY // transform 24 into 0 + } + + + private fun relativeSpecialDay(): LocalDate? { + val days = Utils.firstNotNull( + this::relativeYesterday, + dateTimeExtractor::relativeToday, + this::relativeTomorrow, + dateTimeExtractor::relativeDayOfWeekDuration + ) + if (days == null) { + return null + } + return now.toLocalDate().plusDays(days.toLong()) + } + + fun relativeYesterday(): Int? { + val originalPosition = ts.position + + // collect as many adders ("altro") preceding yesterday ("ieri") as possible + var dayCount = 0 + while (ts[0].hasCategory("yesterday_adder")) { + ++dayCount + ts.movePositionForwardBy(ts.indexOfWithoutCategory("date_time_ignore", 1)) + } + + // collect the actual yesterday ("ieri") and exit if it is not found + if (!ts[0].hasCategory("yesterday")) { + ts.position = originalPosition + return null + } + ts.movePositionForwardBy(1) + ++dayCount + + // if no adders were collected before yesterday, try to collect only one at the end + val nextNotIgnore = ts.indexOfWithoutCategory("date_time_ignore", 0) + if (dayCount == 1 && ts[nextNotIgnore].hasCategory("yesterday_adder")) { + ++dayCount + ts.movePositionForwardBy(nextNotIgnore + 1) + } + + // found relative yesterday, e.g. altro altro ieri, ieri l'altro + return -dayCount + } + + fun relativeTomorrow(): Int? { + val originalPosition = ts.position + + // collect as many "dopo" preceding "domani" as possible + var dayCount = 0 + while (ts[0].hasCategory("tomorrow_adder")) { + ++dayCount + ts.movePositionForwardBy(ts.indexOfWithoutCategory("date_time_ignore", 1)) + } + + // collect the actual "domani" and exit if it is not found + if (!ts[0].hasCategory("tomorrow")) { + ts.position = originalPosition + return null + } + ts.movePositionForwardBy(1) + ++dayCount + + // found relative tomorrow, e.g. domani, dopo dopo domani + return dayCount + } + + fun relativeDuration(): Duration? { + return dateTimeExtractor.relativeIndicatorDuration( + { durationExtractor.duration() }, + { duration -> duration.multiply(-1) } + ) + } +} diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt new file mode 100644 index 00000000..de53fd1d --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt @@ -0,0 +1,395 @@ +package org.dicio.numbers.lang.es + +import org.dicio.numbers.formatter.Formatter +import org.dicio.numbers.unit.MixedFraction +import org.dicio.numbers.util.Utils +import java.time.LocalTime +import java.time.format.DateTimeFormatter +import java.util.Locale +import kotlin.math.abs + +class SpanishFormatter : Formatter("config/es-es") { + + override fun niceNumber(mixedFraction: MixedFraction, speech: Boolean): String { + if (speech) { + val sign = if (mixedFraction.negative) "meno " else "" + if (mixedFraction.numerator == 0) { + return sign + pronounceNumber(mixedFraction.whole.toDouble(), 0, true, false, false) + } + + var denominatorString = if (mixedFraction.denominator == 2) { + "mezzo" + } else { + // use ordinal: only mezzo is exceptional + pronounceNumber(mixedFraction.denominator.toDouble(), 0, true, false, true) + } + + val numeratorString = if (mixedFraction.numerator == 1) { + "un" + } else { + denominatorString = + denominatorString.substring(0, denominatorString.length - 1) + "i" + pronounceNumber(mixedFraction.numerator.toDouble(), 0, true, false, false) + } + + return if (mixedFraction.whole == 0L) { + "$sign$numeratorString $denominatorString" + } else { + (sign + pronounceNumber(mixedFraction.whole.toDouble(), 0, true, false, false) + + " e " + numeratorString + " " + denominatorString) + } + } else { + return niceNumberNotSpeech(mixedFraction) + } + } + + override fun pronounceNumber( + number: Double, + places: Int, + shortScale: Boolean, + scientific: Boolean, + ordinal: Boolean + ): String { + // for italian shortScale is completely ignored + + if (number == Double.POSITIVE_INFINITY) { + return "infinito" + } else if (number == Double.NEGATIVE_INFINITY) { + return "meno infinito" + } else if (java.lang.Double.isNaN(number)) { + return "non un numero" + } + + // also using scientific mode if the number is too big to be spoken fully. Checking against + // the biggest double smaller than 10^21 = 1000 * 10^18, which is the biggest pronounceable + // number, since e.g. 999.99 * 10^18 can be pronounced correctly. + if (scientific || abs(number) > 999999999999999934463.0) { + val scientificFormatted = String.format(Locale.ENGLISH, "%E", number) + val parts = scientificFormatted.split("E".toRegex(), limit = 2).toTypedArray() + val power = parts[1].toInt().toDouble() + + if (power != 0.0) { + // This handles negatives of powers separately from the normal + // handling since each call disables the scientific flag + val n = parts[0].toDouble() + return String.format( + "%s per dieci alla %s", + pronounceNumber(n, places, shortScale, false, false), + pronounceNumber(power, places, shortScale, false, false) + ) + } + } + + val result = StringBuilder() + var varNumber = number + if (varNumber < 0) { + varNumber = -varNumber + // from here on number is always positive + if (places != 0 || varNumber >= 0.5) { + // do not add minus if number will be rounded to 0 + result.append("meno ") + } + } + + val realPlaces = Utils.decimalPlacesNoFinalZeros(varNumber, places) + val numberIsWhole = realPlaces == 0 + val realOrdinal = ordinal && numberIsWhole + // if no decimal places to be printed, numberLong should be the rounded number + val numberLong = varNumber.toLong() + (if (varNumber % 1 >= 0.5 && numberIsWhole) 1 else 0) + + if (realOrdinal && ORDINAL_NAMES.containsKey(numberLong)) { + result.append(ORDINAL_NAMES[numberLong]) + } else if (!realOrdinal && NUMBER_NAMES.containsKey(numberLong)) { + if (varNumber > 1000) { + result.append("un ") + } + result.append(NUMBER_NAMES[numberLong]) + } else { + val groups = Utils.splitByModulus(numberLong, 1000) + val groupNames: MutableList = ArrayList() + for (i in groups.indices) { + val z = groups[i] + if (z == 0L) { + continue // skip 000 groups + } + var groupName = subThousand(z) + + if (i == 1) { + if (z == 1L) { + groupName = "mille" + } else { + // use mila instead of mille + groupName += " mila" + } + } else if (i != 0) { + // magnitude > 1000, so un is always there + if (z == 1L) { + groupName = "un" + } + + val magnitude = Utils.longPow(1000, i) + groupName += " " + NUMBER_NAMES[magnitude] + if (z != 1L) { + groupName = groupName.substring(0, groupName.length - 1) + "i" + } + } + + groupNames.add(groupName) + } + + appendSplitGroups(result, groupNames) + + if (ordinal && numberIsWhole) { // not ordinal if not whole + if (result.lastIndexOf("dieci") == result.length - 5) { + result.deleteCharAt(result.length - 4) + result.append("mo") + } else { + if (result.lastIndexOf("tre") != result.length - 3 + && result.lastIndexOf("sei") != result.length - 3 + ) { + result.deleteCharAt(result.length - 1) + if (result.lastIndexOf("mil") == result.length - 3) { + result.append("l") + } + } + result.append("esimo") + } + } + } + + if (realPlaces > 0) { + if (varNumber < 1.0 && (result.isEmpty() || "meno ".contentEquals(result))) { + result.append("zero") // nothing was written before + } + result.append(" virgola") + + val fractionalPart = String.format("%." + realPlaces + "f", varNumber % 1) + for (i in 2 until fractionalPart.length) { + result.append(" ") + result.append(NUMBER_NAMES[(fractionalPart[i].code - '0'.code).toLong()]) + } + } + + return result.toString() + } + + override fun niceTime( + time: LocalTime, + speech: Boolean, + use24Hour: Boolean, + showAmPm: Boolean + ): String { + if (speech) { + val result = StringBuilder() + if (time.minute == 45) { + when (val newHour = (time.hour + 1) % 24) { + 0 -> result.append("un quarto a mezzanotte") + 12 -> result.append("un quarto a mezzogiorno") + else -> { + result.append("un quarto alle ") + result.append(getHourName(newHour, use24Hour)) + } + } + } else { + result.append(getHourName(time.hour, use24Hour)) + + when (time.minute) { + 0 -> result.append(" in punto") + 15 -> result.append(" e un quarto") + 30 -> result.append(" e mezza") + else -> { + result.append(" e ") + if (time.minute < 10) { + result.append("zero ") + } + result.append(pronounceNumberDuration(time.minute.toLong())) + } + } + } + + if (!use24Hour && showAmPm && result.indexOf("mezzanotte") == -1 && result.indexOf("mezzogiorno") == -1) { + if (time.hour >= 19) { + result.append(" di sera") + } else if (time.hour >= 12) { + result.append(" di pomeriggio") + } else if (time.hour >= 4) { + result.append(" di mattina") + } else { + result.append(" di notte") + } + } + return result.toString() + } else { + if (use24Hour) { + return time.format(DateTimeFormatter.ofPattern("HH:mm", Locale.ITALIAN)) + } else { + val result = time.format( + DateTimeFormatter.ofPattern( + if (showAmPm) "K:mm a" else "K:mm", Locale.ENGLISH + ) + ) + return if (result.startsWith("0:")) { + "12:" + result.substring(2) + } else { + result + } + } + } + } + + private fun getHourName(hour: Int, use24Hour: Boolean): String? { + if (hour == 0) { + return "mezzanotte" + } else if (hour == 12) { + return "mezzogiorno" + } + val normalizedHour = if (use24Hour) { + hour + } else { + hour % 12 + } + + return if (normalizedHour == 1) { + "una" + } else { + pronounceNumberDuration(normalizedHour.toLong()) + } + } + + override fun pronounceNumberDuration(number: Long): String { + if (number == 1L) { + return "un" + } + return super.pronounceNumberDuration(number) + } + + + /** + * @param n must be 0 <= n <= 999 + * @return the string representation of a number smaller than 1000 + */ + private fun subThousand(n: Long): String { + val builder = StringBuilder() + var requiresSpace = false // whether a space needs to be added before the content + if (n >= 100) { + val hundred = n / 100 + if (hundred > 1) { + builder.append(NUMBER_NAMES[hundred]) + builder.append(" ") + } + builder.append("cento") + requiresSpace = true + } + + val lastTwoDigits = n % 100 + if (lastTwoDigits != 0L && NUMBER_NAMES.containsKey(lastTwoDigits)) { + if (requiresSpace) { + // this is surely true, but let's keep the space for consistency + builder.append(" ") + } + builder.append(NUMBER_NAMES[lastTwoDigits]) + } else { + val ten = (n % 100) / 10 + if (ten > 0) { + if (requiresSpace) { + builder.append(" ") + } + builder.append(NUMBER_NAMES[ten * 10]) + requiresSpace = true + } + + val unit = n % 10 + if (unit > 0) { + if (requiresSpace) { + builder.append(" ") + } + builder.append(NUMBER_NAMES[unit]) + } + } + + return builder.toString() + } + + /** + * @param result the string builder to append the comma-separated group names to + * @param groupNames the group names + */ + private fun appendSplitGroups(result: StringBuilder, groupNames: List) { + if (groupNames.isNotEmpty()) { + result.append(groupNames[groupNames.size - 1]) + } + + for (i in groupNames.size - 2 downTo 0) { + result.append(", ") + result.append(groupNames[i]) + } + } + + companion object { + private val NUMBER_NAMES = mapOf( + 0L to "zero", + 1L to "uno", + 2L to "due", + 3L to "tre", + 4L to "quattro", + 5L to "cinque", + 6L to "sei", + 7L to "sette", + 8L to "otto", + 9L to "nove", + 10L to "dieci", + 11L to "undici", + 12L to "dodici", + 13L to "tredici", + 14L to "quattordici", + 15L to "quindici", + 16L to "sedici", + 17L to "diciassette", + 18L to "diciotto", + 19L to "diciannove", + 20L to "venti", + 30L to "trenta", + 40L to "quaranta", + 50L to "cinquanta", + 60L to "sessanta", + 70L to "settanta", + 80L to "ottanta", + 90L to "novanta", + 100L to "cento", + 1000L to "mille", + 1000000L to "milione", + 1000000000L to "miliardo", + 1000000000000L to "bilione", + 1000000000000000L to "biliardo", + 1000000000000000000L to "trilione", + ) + + private val ORDINAL_NAMES = mapOf( + 1L to "primo", + 2L to "secondo", + 3L to "terzo", + 4L to "quarto", + 5L to "quinto", + 6L to "sesto", + 7L to "settimo", + 8L to "ottavo", + 9L to "nono", + 10L to "decimo", + 11L to "undicesimo", + 12L to "dodicesimo", + 13L to "tredicesimo", + 14L to "quattordicesimo", + 15L to "quindicesimo", + 16L to "sedicesimo", + 17L to "diciassettesimo", + 18L to "diciottesimo", + 19L to "diciannovesimo", + 1000L to "millesimo", + 1000000L to "milionesimo", + 1000000000L to "miliardesimo", + 1000000000000L to "bilionesimo", + 1000000000000000L to "biliardesimo", + 1000000000000000000L to "trilionesimo", + ) + } +} diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt new file mode 100644 index 00000000..aad879a8 --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt @@ -0,0 +1,228 @@ +package org.dicio.numbers.lang.es + +import org.dicio.numbers.parser.lexer.TokenStream +import org.dicio.numbers.unit.Number +import org.dicio.numbers.util.NumberExtractorUtils + +class SpanishNumberExtractor internal constructor(private val ts: TokenStream) { + fun numberPreferOrdinal(): Number? { + // first try with suffix multiplier, e.g. dozen + var number = numberSuffixMultiplier() + if (number == null) { + number = numberSignPoint(true) // then try with normal number + } + + if (number != null) { + // a number was found, maybe it has a valid denominator? + number = divideByDenominatorIfPossible(number) + } + return number + } + + fun numberPreferFraction(): Number? { + // first try with suffix multiplier, e.g. dozen + var number = numberSuffixMultiplier() + if (number == null) { + number = numberSignPoint(false) // then try without ordinal + } + + number = if (number == null) { + // maybe an ordinal number? + numberSignPoint(true) + } else { + // a number was found, maybe it has a valid denominator? + // note that e.g. "a couple halves" ends up here, but that's valid + divideByDenominatorIfPossible(number) + } + return number + } + + fun numberNoOrdinal(): Number? { + // for now this function is used internally just for duration parsing, but maybe it could + // be exposed to library users, giving more control over how ordinals are handled. + + // first try with suffix multiplier, e.g. dozen + + var number = numberSuffixMultiplier() + if (number == null) { + number = numberSignPoint(false) // then try without ordinal + } + + if (number != null) { + // a number was found, maybe it has a valid denominator? + // note that e.g. "una mezza coppia" ends up here, but that's valid + number = divideByDenominatorIfPossible(number) + } + + return number + } + + fun divideByDenominatorIfPossible(numberToEdit: Number): Number? { + // if numberToEdit is directly followed by an ordinal number then it is a fraction (only if numberToEdit is not + // ordinal or already decimal). Note: a big integer (i.e. 10^24) would be decimal, here we are assuming that + // such a number will never have a fraction after it for simplicity. + + if (!numberToEdit.isOrdinal && !numberToEdit.isDecimal + && !ts[0].hasCategory("ignore") + ) { + val originalPosition = ts.position + val denominator = numberInteger(true) + if (denominator == null) { + // no denominator found: maybe a custom multiplier? e.g. mezzo (=0.5), dozzina (=12) + if (ts[0].hasCategory("suffix_multiplier")) { + ts.movePositionForwardBy(1) + + val multiplier = ts[-1].number + if (multiplier?.isDecimal == true && + (1 / multiplier.decimalValue()).toLong().toDouble() + == (1 / multiplier.decimalValue()) + ) { + // the multiplier is an exact fraction, divide by the denominator converted + // to long to possibly preserve the integerness of numberToEdit, e.g. + // sedici mezzi should be 8, not 8.0 + return numberToEdit.divide((1 / multiplier.decimalValue()).toLong()) + } + + return numberToEdit.multiply(multiplier) + } + } else if (denominator.isOrdinal && denominator.moreThan(2)) { + return numberToEdit.divide(denominator) // valid denominator, e.g. un quinto + } else { + // invalid denominator, e.g. sei primi + ts.position = originalPosition // restore to original position + } + } + return numberToEdit + } + + fun numberSuffixMultiplier(): Number? { + if (ts[0].hasCategory("suffix_multiplier")) { + ts.movePositionForwardBy(1) + return ts[-1].number // a suffix multiplier, e.g. dozen, half, score, percent + } else { + return null + } + } + + fun numberSignPoint(allowOrdinal: Boolean): Number? { + return NumberExtractorUtils.signBeforeNumber(ts) { numberPoint(allowOrdinal) } + } + + fun numberPoint(allowOrdinal: Boolean): Number? { + var n = numberInteger(allowOrdinal).let { + if (it == null || it.isOrdinal) { + // numbers can not start with just "virgola" + // no point or fraction separator can appear after an ordinal number + return@numberPoint it + } + it + } + + if (ts[0].hasCategory("point")) { + // parse point indicator from e.g. "twenty one point four five three" + + if (!ts[1].hasCategory("digit_after_point") + && (!NumberExtractorUtils.isRawNumber(ts[1]) || ts[2].hasCategory("ordinal_suffix")) + ) { + // also return if next up is an ordinal raw number, i.e. followed by °/esimo + return n // there is an only comma at the end of the number: it is not part of it + } + ts.movePositionForwardBy(1) + + var magnitude = 0.1 + if (ts[0].value.length > 1 && NumberExtractorUtils.isRawNumber(ts[0])) { + // handle sequence of raw digits after point, e.g. .0123 + // value.length > 1 since multiple single-digits are handled below, e.g. . 0 1 2 3 + for (i in 0 until ts[0].value.length) { + n = n.plus((ts[0].value[i].code - '0'.code) * magnitude) + magnitude /= 10.0 + } + ts.movePositionForwardBy(1) + } else { + // read as many digits as possible, e.g. point one six 5 one 0 three + while (true) { + if (ts[0].hasCategory("digit_after_point") + || (ts[0].value.length == 1 && NumberExtractorUtils.isRawNumber(ts[0]) + && !ts[1].hasCategory("ordinal_suffix")) + ) { + // do not allow ordinal raw numbers, i.e. followed by st/nd/rd/th + n = n.plus(ts[0].number!!.multiply(magnitude)) + magnitude /= 10.0 + } else { + break // reached a word that is not a valid digit + } + ts.movePositionForwardBy(1) + } + } + } else if (ts[0].hasCategory("fraction_separator")) { + // parse fraction from e.g. "twenty divided by one hundred" + + var separatorLength = 1 + if (ts[1].hasCategory("fraction_separator_secondary")) { + separatorLength = 2 // also remove "by" after "divided by" + } + + ts.movePositionForwardBy(separatorLength) + val denominator = numberInteger(false) + if (denominator == null) { + ts.movePositionForwardBy(-separatorLength) // not a fraction, reset + } else { + return n.divide(denominator) + } + } + + return n + } + + fun numberInteger(allowOrdinal: Boolean): Number? { + if (ts[0].hasCategory("ignore")) { + return null // do not eat ignored words at the beginning + } + + var n = NumberExtractorUtils.numberMadeOfGroups( + ts, + allowOrdinal, + NumberExtractorUtils::numberGroupShortScale + ) + if (n == null) { + return NumberExtractorUtils.numberBigRaw( + ts, + allowOrdinal + ) // try to parse big raw numbers (>=1000), e.g. 1207 + } else if (n.isOrdinal) { + return n // no more checks, as the ordinal word comes last, e.g. million twelfth + } + + // n != null from here on + if (n.lessThan(1000)) { + // parse raw number n separated by comma, e.g. 123,045,006 + // assuming current position is at the first comma + if (NumberExtractorUtils.isRawNumber(ts[-1]) && ts[0].hasCategory("thousand_separator") && ts[1].value.length == 3 && NumberExtractorUtils.isRawNumber( + ts[1] + ) + ) { + val originalPosition = ts.position - 1 + + while (ts[0].hasCategory("thousand_separator") && ts[1].value.length == 3 && NumberExtractorUtils.isRawNumber( + ts[1] + ) + ) { + n = n!!.multiply(1000).plus(ts[1].number) + ts.movePositionForwardBy(2) // do not allow ignored words in between + } + + if (ts[0].hasCategory("ordinal_suffix")) { + if (allowOrdinal) { + ts.movePositionForwardBy(1) + return n!!.withOrdinal(true) // ordinal number, e.g. 20,056,789th + } else { + ts.position = originalPosition + return null // found ordinal number, revert since allowOrdinal is false + } + } + } + } + + return n // e.g. six million, three hundred and twenty seven + } +} diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishParser.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishParser.kt new file mode 100644 index 00000000..1118a8ea --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishParser.kt @@ -0,0 +1,40 @@ +package org.dicio.numbers.lang.es + +import org.dicio.numbers.parser.Parser +import org.dicio.numbers.parser.lexer.TokenStream +import org.dicio.numbers.unit.Duration +import org.dicio.numbers.unit.Number +import org.dicio.numbers.util.DurationExtractorUtils +import java.time.LocalDateTime + +class SpanishParser : Parser("config/es-es") { + override fun extractNumber( + tokenStream: TokenStream, + shortScale: Boolean, + preferOrdinal: Boolean + ): () -> Number? { + val numberExtractor = SpanishNumberExtractor(tokenStream) + return if (preferOrdinal) { + numberExtractor::numberPreferOrdinal + } else { + numberExtractor::numberPreferFraction + } + } + + override fun extractDuration( + tokenStream: TokenStream, + shortScale: Boolean + ): () -> Duration? { + val numberExtractor = SpanishNumberExtractor(tokenStream) + return DurationExtractorUtils(tokenStream, numberExtractor::numberNoOrdinal)::duration + } + + override fun extractDateTime( + tokenStream: TokenStream, + shortScale: Boolean, + preferMonthBeforeDay: Boolean, + now: LocalDateTime + ): () -> LocalDateTime? { + return SpanishDateTimeExtractor(tokenStream, now)::dateTime + } +} From 987c75443a80d24ba56a633cdba3ad72bed59007 Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 13 Jul 2025 15:40:38 -0500 Subject: [PATCH 04/30] Updated tokenizer.json --- .../resources/config/es-es/tokenizer.json | 73 +++++++++++++++---- 1 file changed, 59 insertions(+), 14 deletions(-) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index 34468203..eba05f96 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -6,6 +6,8 @@ "raw" ], "plural_endings": [ + "s", + "es" ], "word_matches": [ { @@ -24,7 +26,8 @@ "date_time_ignore", "day_adder_the", "ampm_before", - "bcad_after" + "bcad_after", + "pre_special_hour" ], "values": [ "a" @@ -48,7 +51,7 @@ "thousand_separator" ], "values": [ - "," + "." ] }, { @@ -88,7 +91,7 @@ "date_time_ignore" ], "values": [ - "." + "," ] }, { @@ -151,6 +154,15 @@ "de" ] }, + { + "categories": [ + "day_before_yesterday" + ], + "values": [ + "anteayer", + "antier" + ] + }, { "categories": [ "yesterday" @@ -175,6 +187,14 @@ "mañana" ] }, + { + "categories": [ + "day_after_tomorrow" + ], + "values": [ + "pasado mañana" + ] + }, { "categories": [ "day_adder_the", @@ -183,7 +203,10 @@ "pre_special_hour" ], "values": [ - "el" + "el", + "la", + "los", + "las" ] }, { @@ -511,11 +534,12 @@ "dieciocho": 18, "diecinueve": 19, "veintiuno": 21, + "veintiún": 21, "veintidós": 22, - "veintitres": 23, + "veintitrés": 23, "veinticuatro": 24, "veinticinco": 25, - "veintiseis": 26, + "veintiséis": 26, "veintisiete": 27, "veintiocho": 28, "veintinueve": 29 @@ -555,10 +579,15 @@ "values": { "mil": 1000, "millón": 1000000, + "millones": 1000000, + "mil millones": 1000000000, "millardo": 1000000000, + "millardos": 1000000000, "billón": 1000000000000, - "billardo": 1000000000000000, - "trillón": 1000000000000000000 + "billones": 1000000000000, + "mil billones": 1000000000000000, + "trillón": 1000000000000000000, + "trillones": 1000000000000000000 } }, { @@ -572,6 +601,7 @@ "primer": 1, "segundo": 2, "tercero": 3, + "tercer": 3, "cuarto": 4, "quinto": 5, "sexto": 6, @@ -612,7 +642,7 @@ "quincuagésimo": 50, "sexagésimo": 60, "septuagésimo": 70, - "eightieth": 80, + "octogésimo": 80, "nonagésimo": 90 } }, @@ -623,7 +653,8 @@ "hundred" ], "values": { - "centésimo": 100 + "centésimo": 100, + "centésima": 100 } }, { @@ -634,11 +665,23 @@ ], "values": { "milésimo": 1000, + "milésima": 1000, + "milésimas": 1000, "millonésimo": 1000000, + "millonésima": 1000000, + "millonésimas": 1000000, "milmillonésimo": 1000000000, + "milmillonésima": 1000000000, + "milmillonésimas": 1000000000, "billonésimo": 1000000000000, + "billonésima": 1000000000000, + "billonésimas": 1000000000000, "milbillonésimo": 1000000000000000, - "trillonésimo": 1000000000000000000 + "milbillonésima": 1000000000000000, + "milbillonésimas": 1000000000000000, + "trillonésimo": 1000000000000000000, + "trillonésima": 1000000000000000000, + "trillonésimas": 1000000000000000000 } }, { @@ -665,6 +708,7 @@ "centésima": 0.01, "pc": 0.01, "%": 0.01, + "por ciento": 0.01, "pormil": 0.001, "milésima": 0.001, "‰": 0.001 @@ -698,7 +742,7 @@ "noviembre": 11, "nov": 11, "diciembre": 12, - "dec": 12 + "dic": 12 } }, { @@ -740,6 +784,8 @@ "madrugada": 3, "amanecer": 6, "amaneciendo": 6, + "desayuno": 7, + "desayunos": 7, "mañana": 9, "mañanas": 9, "almuerzo": 12, @@ -800,8 +846,7 @@ "1 WEEKS": [ "semana", "semanas", - "sem", - "s" + "sem" ], "1 MONTHS": [ "mes", From 96e668ce29cb580e7d4587572ed0a25a23e866a1 Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 13 Jul 2025 15:51:32 -0500 Subject: [PATCH 05/30] Updated tokenizer.json --- .../resources/config/es-es/tokenizer.json | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index eba05f96..846fcfff 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -598,16 +598,25 @@ ], "values": { "primero": 1, + "primera": 1, "primer": 1, "segundo": 2, + "segunda": 2, "tercero": 3, + "tercera": 3, "tercer": 3, "cuarto": 4, + "cuarta": 4, "quinto": 5, + "quinta": 5, "sexto": 6, + "sexta": 6, "séptimo": 7, + "séptima": 7, "octavo": 8, - "noveno": 9 + "octava": 8, + "noveno": 9, + "novena": 9 } }, { @@ -618,15 +627,25 @@ ], "values": { "décimo": 10, + "décima": 10, "decimoprimero": 11, + "decimoprimera": 11, "decimosegundo": 12, + "decimosegunda": 12, "decimotercero": 13, + "decimotercera": 13, "decimocuarto": 14, + "decimocuarta": 14, "decimoquinto": 15, + "decimoquinta": 15, "decimosexto": 16, + "decimosexta": 16, "decimoséptimo": 17, + "decimoséptima": 17, "decimooctavo": 18, - "decimonoveno": 19 + "decimooctava": 18, + "decimonoveno": 19, + "decimonovena": 19 } }, { @@ -637,13 +656,21 @@ ], "values": { "vigésimo": 20, + "vigésima": 20, "trigésimo": 30, + "trigésima": 30, "cuadragésimo": 40, + "cuadragésima": 40, "quincuagésimo": 50, + "quincuagésima": 50, "sexagésimo": 60, + "sexagésima": 60, "septuagésimo": 70, + "septuagésima": 70, "octogésimo": 80, - "nonagésimo": 90 + "octogésima": 80, + "nonagésimo": 90, + "nonagésima": 90 } }, { From fb98610f4073ad262e72071882659d986ca36d17 Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 13 Jul 2025 16:20:22 -0500 Subject: [PATCH 06/30] Updated tokenizer.json --- .../resources/config/es-es/tokenizer.json | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index 846fcfff..79a4208f 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -151,7 +151,8 @@ "date_time_ignore" ], "values": [ - "de" + "de", + "más" ] }, { @@ -290,10 +291,16 @@ "values": [ "este", "estos", + "esta", + "estas", "ese", "esos", + "esa", + "esas", "aquel", - "aquellos" + "aquellos", + "aquella", + "aquellas" ] }, { @@ -325,7 +332,11 @@ "positive" ], "values": [ - "en" + "en", + "en el", + "en la", + "en los", + "en las" ] }, { @@ -336,7 +347,9 @@ ], "values": [ "siguiente", - "siguientes" + "siguientes", + "posterior", + "posteriores" ] }, { @@ -346,7 +359,9 @@ ], "values": [ "pasado", - "pasados" + "pasados", + "transcurrido", + "transcurridos" ] }, { @@ -357,7 +372,14 @@ "values": [ "anterior", "pasado", - "precedente" + "pasados", + "pasada", + "pasadas", + "precedido", + "precedidos", + "precedida", + "precedidas", + "hace" ] }, { From 3042c4fc9a09757e5affaeba9f7517aa3c426e6d Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 13 Jul 2025 16:26:11 -0500 Subject: [PATCH 07/30] Delete unnecesary "o" block in tokenizer.json spanish file --- numbers/src/main/resources/config/es-es/tokenizer.json | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index 79a4208f..78622592 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -529,16 +529,6 @@ "nueve": 9 } }, - { - "categories": [ - "number", - "digit_after_point", - "pre_oclock" - ], - "values": { - "o": 0 - } - }, { "categories": [ "number", From 11124a0135fa8df65bc6c510958005e920ea28d1 Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 13 Jul 2025 16:35:25 -0500 Subject: [PATCH 08/30] Updated tokenizer.json --- .../main/resources/config/es-es/tokenizer.json | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index 78622592..58ede050 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -321,8 +321,13 @@ ], "values": [ "siguiente", + "siguientes", "posterior", - "próximo" + "posteriores", + "próximo", + "próximos", + "próxima", + "próximas" ] }, { @@ -349,7 +354,11 @@ "siguiente", "siguientes", "posterior", - "posteriores" + "posteriores", + "próximo", + "próximos", + "próxima", + "próximas" ] }, { @@ -360,6 +369,8 @@ "values": [ "pasado", "pasados", + "anterior", + "anteriores", "transcurrido", "transcurridos" ] @@ -371,6 +382,7 @@ ], "values": [ "anterior", + "anteriores", "pasado", "pasados", "pasada", From 0b623dcd0f8a82718e83c9921994ae31c7bee0b5 Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 13 Jul 2025 17:00:12 -0500 Subject: [PATCH 09/30] Created test folder for spanish tokenizer --- .../resources/config/es-es/tokenizer.json | 3 +- .../numbers/lang/es/DateTimeConfigTest.java | 10 + .../lang/es/DateTimeExtractorUtilsTest.java | 193 +++++++ .../dicio/numbers/lang/es/DateTimeTest.java | 46 ++ .../lang/es/DurationExtractorUtilsTest.java | 121 ++++ .../numbers/lang/es/ExtractDateTimeTest.java | 526 ++++++++++++++++++ .../numbers/lang/es/ExtractDurationTest.java | 27 + .../numbers/lang/es/ExtractNumbersTest.java | 394 +++++++++++++ .../numbers/lang/es/NiceDurationTest.java | 74 +++ .../dicio/numbers/lang/es/NiceNumberTest.java | 66 +++ .../dicio/numbers/lang/es/NiceTimeTest.java | 115 ++++ .../lang/es/NumberExtractorUtilsTest.java | 134 +++++ .../numbers/lang/es/ParserParamsTest.java | 64 +++ .../numbers/lang/es/PronounceNumberTest.java | 235 ++++++++ .../numbers/lang/es/TokenizerConfigTest.java | 11 + 15 files changed, 2018 insertions(+), 1 deletion(-) create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeConfigTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/DurationExtractorUtilsTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDurationTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/NiceNumberTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/NiceTimeTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/ParserParamsTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/PronounceNumberTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/es/TokenizerConfigTest.java diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index 58ede050..9e3a0d2e 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -327,7 +327,8 @@ "próximo", "próximos", "próxima", - "próximas" + "próximas", + "dentro de" ] }, { diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeConfigTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeConfigTest.java new file mode 100644 index 00000000..6092f381 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeConfigTest.java @@ -0,0 +1,10 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.test.DateTimeConfigTestBase; + +public class DateTimeConfigTest extends DateTimeConfigTestBase { + @Override + public String configFolder() { + return "config/es-es"; + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java new file mode 100644 index 00000000..9b7431c7 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java @@ -0,0 +1,193 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.t; +import static org.dicio.numbers.util.NumberExtractorUtils.signBeforeNumber; +import static java.time.temporal.ChronoUnit.MONTHS; + +import org.dicio.numbers.parser.lexer.TokenStream; +import org.dicio.numbers.test.DateTimeExtractorUtilsTestBase; +import org.dicio.numbers.util.DateTimeExtractorUtils; +import org.dicio.numbers.util.NumberExtractorUtils; +import org.junit.Test; + +import java.time.LocalDateTime; + +public class DateTimeExtractorUtilsTest extends DateTimeExtractorUtilsTestBase { + + // Saturday the 4th of February, 2023, 22:03:47 + private static final LocalDateTime NOW = LocalDateTime.of(2023, 2, 4, 22, 3, 47, 482175927); + + @Override + public String configFolder() { + return "config/es-es"; + } + + @Override + public DateTimeExtractorUtils build(final TokenStream ts) { + final SpanishNumberExtractor numberExtractor = new SpanishNumberExtractor(ts, false); + return new DateTimeExtractorUtils(ts, NOW, (fromInclusive, toInclusive) -> + NumberExtractorUtils.extractOneIntegerInRange(ts, fromInclusive, toInclusive, + () -> signBeforeNumber(ts, () -> numberExtractor.numberInteger(false))) + ); + } + @Test + public void testRelativeMonthDuration() { + assertRelativeMonthDuration("septiembre que viene", t(7, MONTHS), 2); + assertRelativeMonthDuration("próximo abril y de", t(2, MONTHS), 3); + assertRelativeMonthDuration("último abril y de", t(-10, MONTHS), 3); + assertRelativeMonthDuration("febrero que vendrá", t(12, MONTHS), 2); + assertRelativeMonthDuration("febrero que pasó", t(-12, MONTHS), 2); + assertRelativeMonthDuration("enero pasado", t(-1, MONTHS), 2); + } + + @Test + public void testRelativeMonthDurationNull() { + assertRelativeMonthDurationNull("hola cómo estás"); + assertRelativeMonthDurationNull("en noviembre ocurrirá"); + assertRelativeMonthDurationNull("octubre"); + assertRelativeMonthDurationNull("en dos octubres"); + assertRelativeMonthDurationNull("en dos meses"); + } + + @Test + public void testRelativeToday() { + assertRelativeToday("hoy"); + assertRelativeToday("hoy ahora mismo"); + assertRelativeToday("hoy prueba"); + assertRelativeToday("hoy y"); + } + + @Test + public void testRelativeTodayNull() { + assertRelativeTodayNull("hola cómo estás"); + assertRelativeTodayNull("el mismo hoy"); + assertRelativeTodayNull("el día de hoy"); + assertRelativeTodayNull("ayer"); + assertRelativeTodayNull("mañana"); + } + + @Test + public void testRelativeDayOfWeekDuration() { + assertRelativeDayOfWeekDuration("el siguiente jueves", 5, 2); + assertRelativeDayOfWeekDuration("el anterior jueves", -2, 2); + assertRelativeDayOfWeekDuration("los dos domingos pasados sí", -13, 3); + assertRelativeDayOfWeekDuration("tres y jueves y siguientes", 17, 5); + assertRelativeDayOfWeekDuration("cuatro martes antes y", -26, 4); + assertRelativeDayOfWeekDuration("siguiente domingo", 7, 2); + assertRelativeDayOfWeekDuration("este sábado", -7, 2); + } + + @Test + public void testRelativeDayOfWeekDurationNull() { + assertRelativeDayOfWeekDurationNull("hola cómo estás"); + assertRelativeDayOfWeekDurationNull("lunes"); + assertRelativeDayOfWeekDurationNull("este lunes"); + assertRelativeDayOfWeekDurationNull("dos viernes"); + assertRelativeDayOfWeekDurationNull("en dos días"); + assertRelativeDayOfWeekDurationNull("en dos sábados"); + assertRelativeDayOfWeekDurationNull("un lunes anterior"); + assertRelativeDayOfWeekDurationNull("ayes y mañana"); + } + + @Test + public void testMinute() { + assertMinute("cero a b c", 0, 1); + assertMinute("cincuenta y nueve horas", 59, 2); + assertMinute("quince y", 15, 1); + assertMinute("veintiocho s", 28, 3); + assertMinute("seis mins prueba", 6, 2); + assertMinute("treinta y seis de min", 36, 2); + assertMinute("44m de", 44, 2); + } + + @Test + public void testMinuteNull() { + assertMinuteNull("hola cómo estás"); + assertMinuteNull("sesenta minutos"); + assertMinuteNull("ciento y veinte"); + assertMinuteNull("menos dieciséis"); + assertMinuteNull("12000 minutos"); + assertMinuteNull("y dos de"); + } + + @Test + public void testSecond() { + assertSecond("cero a b c", 0, 1); + assertSecond("ciento nueve horas", 59, 2); + assertSecond("quince y", 15, 1); + assertSecond("veinto y ocho h", 28, 3); + assertSecond("seis segs test", 6, 2); + assertSecond("treinta seise de seg", 36, 2); + assertSecond("44s de", 44, 2); + } + + @Test + public void testSecondNull() { + assertSecondNull("hola cómo estás"); + assertSecondNull("sesenta segundos"); + assertSecondNull("ciento y veinte"); + assertSecondNull("menos dieciseis"); + assertSecondNull("12000 segundos"); + assertSecondNull("y dos de"); + } + + @Test + public void testBcad() { + assertBcad("a.C. prueba", false, 3); + assertBcad("d.C. and", true, 3); + assertBcad("adc prueba y", true, 1); + assertBcad("antes de Cristo", false, 2); + assertBcad("d y Domini", true, 3); + assertBcad("ace", false, 1); + assertBcad("d current", false, 2); + + // there is a workaround for this in spanishDateTimeExtractor + assertBcad("a.c.e.", false, 3); + } + + @Test + public void testBcadNull() { + assertBcadNull("a.m."); + assertBcadNull("después prueba Cristo"); + assertBcadNull("y antes Cristo"); + assertBcadNull("prueba c"); + assertBcadNull("m"); + assertBcadNull("c prueba"); + } + + @Test + public void testAmpm() { + assertAmpm("a.m. prueba", false, 3); + assertAmpm("p.m. y", true, 3); + assertAmpm("am y prueba", false, 1); + assertAmpm("post meridiano", true, 2); + assertAmpm("p y meridiem", true, 3); + } + + @Test + public void testAmpmNull() { + assertAmpmNull("A.C."); + assertAmpmNull("ante prueba meridiem"); + assertAmpmNull("y post m"); + assertAmpmNull("prueba m"); + assertAmpmNull("c"); + assertAmpmNull("aym"); + assertAmpmNull("meridiano prueba"); + } + + @Test + public void testMonthName() { + assertMonthName("enero", 1); + assertMonthName("dic e", 12); + assertMonthName("sept ember", 9); + assertMonthName("mar", 3); + } + + @Test + public void testMonthNameNull() { + assertMonthNameNull("lunes"); + assertMonthNameNull("jaguar"); + assertMonthNameNull("hola feb"); + assertMonthNameNull("y dic de"); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeTest.java new file mode 100644 index 00000000..c5a06799 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeTest.java @@ -0,0 +1,46 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.formatter.Formatter; +import org.dicio.numbers.test.DateTimeTestBase; +import org.junit.Test; + +import java.time.LocalDate; +import java.time.LocalDateTime; + +import static org.junit.Assert.assertEquals; + +public class DateTimeTest extends DateTimeTestBase { + + @Override + public String configFolder() { + return "config/es-es"; + } + + @Override + public Formatter buildNumberFormatter() { + return new SpanishFormatter(); + } + + @Test + public void testNiceDate() { + // just check that the NumberParserFormatter functions do their job + assertEquals("jueves, abril veintiocho, dos mil veintiuno", + pf.niceDate(LocalDate.of(2021, 4, 28)).get()); + assertEquals("domingo, agosto trece", + pf.niceDate(LocalDate.of(-84, 8, 13)).now(LocalDate.of(-84, 8, 23)).get()); + } + + @Test + public void testNiceYear() { + // just check that the NumberParserFormatter functions do their job + assertEquals("mil novecientos ochenta y cuatro", pf.niceYear(LocalDate.of(1984, 4, 28)).get()); + assertEquals("ochocientos diez a.C.", pf.niceYear(LocalDate.of(-810, 8, 13)).get()); + } + + @Test + public void testNiceDateTime() { + // just check that the NumberParserFormatter functions do their job + assertEquals("miércoles, veintiuno de septiembre, mil setiesientos sesenta y cuatro al mediodía", pf.niceDateTime(LocalDateTime.of(1764, 9, 12, 12, 0)).get()); + assertEquals("jueves, tres de noviembre, trescientos veintiocho a.C. a las ocho y siete", pf.niceDateTime(LocalDateTime.of(-328, 11, 3, 5, 7)).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DurationExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DurationExtractorUtilsTest.java new file mode 100644 index 00000000..3d5b75df --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DurationExtractorUtilsTest.java @@ -0,0 +1,121 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.DAY; +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.HOUR; +import static org.dicio.numbers.test.TestUtils.MICROS; +import static org.dicio.numbers.test.TestUtils.MILLIS; +import static org.dicio.numbers.test.TestUtils.MINUTE; +import static org.dicio.numbers.test.TestUtils.MONTH; +import static org.dicio.numbers.test.TestUtils.T; +import static org.dicio.numbers.test.TestUtils.WEEK; +import static org.dicio.numbers.test.TestUtils.YEAR; +import static org.dicio.numbers.test.TestUtils.t; +import static org.junit.Assert.assertTrue; + +import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.parser.lexer.TokenStream; +import org.dicio.numbers.test.DurationExtractorUtilsTestBase; +import org.dicio.numbers.unit.Duration; +import org.dicio.numbers.util.DurationExtractorUtils; +import org.junit.Test; + +/** + * TODO also test extractDurationAtCurrentPosition + */ +public class DurationExtractorUtilsTest extends DurationExtractorUtilsTestBase { + + @Override + public String configFolder() { + return "config/es-es"; + } + + @Override + public Duration extractDuration(final TokenStream ts, final boolean shortScale) { + final SpanishNumberExtractor numberExtractor + = new SpanishNumberExtractor(ts, shortScale); + return new DurationExtractorUtils(ts, numberExtractor::numberNoOrdinal).duration(); + } + + + @Test + public void testDurationNumberAndUnit() { + assertDuration("mil millones nanosegundos", F, t(1000)); + assertDuration("mil millones nanosegundos", T, t(1)); + assertDuration("mil setesientos veintiocho μs", F, t(0, 1728 * MICROS)); + assertDuration("cien milisegundos ", T, t(0, 100 * MICROS)); + assertDuration("18s", F, t(18)); + assertDuration("un seg", F, t(1)); + assertDuration("59 minuto s", T, t(59 * MINUTE)); + assertDuration("veintitrés horas", F, t(23 * HOUR)); + assertDuration("media hora", T, t(HOUR / 2)); + assertDuration("uno punto dos día", T, t(1.2 * DAY)); + assertDuration("medio día", F, t(DAY / 2)); + assertDuration("ten and weeks and", F, t(10 * WEEK)); + assertDuration("6 m", T, t(6 * MONTH)); + assertDuration("tres mil millones de años antes", T, t(3e9 * YEAR)); + assertDuration("quince décadas", T, t(150 * YEAR)); + assertDuration("siglo un billonésimo", T, t(1e-12 * 100 * YEAR)); + assertDuration("siglo un billonésimo", F, t(1e-9 * 100 * YEAR)); + assertDuration("1 milenio", F, t(1000 * YEAR)); + assertNoDuration("cuarenta y tres milenios cuatro", T); + assertNoDuration("y diez y semanas y", F); + assertNoDuration("ciento tests", F); + assertNoDuration("punto treinta y cuatro gramos", T); + } + + @Test + public void testDurationOnlyUnit() { + assertDuration("hora minuto milenio", T, t(1000 * YEAR + HOUR + MINUTE)); + assertDuration("milisegundo y segundo, microsegundo", F, t(1, MILLIS + MICROS)); + assertDuration("segundos segundo s", T, t(2)); + assertDuration("minuto horas años", F, t(MINUTE + HOUR)); + assertNoDuration("hola millisegundo", F); + assertNoDuration("está bien", T); + assertNoDuration("ns μs ms s m h d sem mes a", F); + } + + @Test + public void testDurationOf() { + assertDuration("dos décimas de segundo", F, t(0, 200 * MILLIS)); + assertDuration("un par de horas", F, t(2 * HOUR)); + assertNoDuration("muchos segundos", F); + assertNoDuration("decenas de líneas de prueba", T); + assertNoDuration("hola dos cientos de hola", F); + assertNoDuration("hola de sem", F); + } + + @Test + public void testMultipleDurationGroups() { + assertDuration("veinte minutos y treinta y seis segundos porque", T, t(20 * MINUTE + 36)); + assertDuration("siete días, 21 horas y doce minutos para llegar a usted", F, t(7 * DAY + 21 * HOUR + 12 * MINUTE)); + assertDuration("minuto, segundos y milisegundo, microsegundos nanosegundos de prueba ", T, t(MINUTE + 1, MILLIS + MICROS + 1)); + assertDuration("5 ns ns", F, t(0, 5)); + assertNoDuration("ms 5 ns ns", F); + } + + @Test(timeout = 4000) // 1024 formats + parses take <2s, use 4s timeout just for slower PCs + public void testPerformanceWithFormatter() { + // TODO there are no fractions of second here since the formatter does not support them + final java.time.Duration[] alternatives = { + t(1), t(5 * MINUTE), t(2 * HOUR), t(16 * DAY), t(WEEK), t(3 * MONTH), t(5 * YEAR), + t(1e8 * YEAR), t(17 * WEEK), t(45) + }; + + final ParserFormatter npf = new ParserFormatter(new SpanishFormatter(), null); + for (int i = 0; i < (1 << alternatives.length); ++i) { + java.time.Duration durationToTest = java.time.Duration.ZERO; + for (int j = 0; j < alternatives.length; ++j) { + if ((i & (1 << j)) != 0) { + durationToTest = durationToTest.plus(alternatives[j]); + } + } + + // the formatter only supports short scale (TODO maybe allow customizing?) + final String formatted = npf.niceDuration(new Duration(durationToTest)).get(); + final TokenStream ts = new TokenStream(tokenizer.tokenize(formatted)); + assertDuration(formatted, ts, T, durationToTest); + assertTrue(ts.finished()); + } + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java new file mode 100644 index 00000000..a06d7ab7 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java @@ -0,0 +1,526 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.dicio.numbers.test.TestUtils.niceDuration; +import static org.dicio.numbers.test.TestUtils.t; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static java.time.temporal.ChronoUnit.DAYS; +import static java.time.temporal.ChronoUnit.MONTHS; +import static java.time.temporal.ChronoUnit.SECONDS; +import static java.time.temporal.ChronoUnit.WEEKS; +import static java.time.temporal.ChronoUnit.YEARS; + +import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.parser.lexer.TokenStream; +import org.dicio.numbers.test.WithTokenizerTestBase; +import org.dicio.numbers.unit.Duration; +import org.junit.Test; + +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.function.Function; + +public class ExtractDateTimeTest extends WithTokenizerTestBase { + + // Sunday the 5th of February, 2023, 9:41:12 + private static final LocalDateTime NOW = LocalDateTime.of(2023, 2, 5, 9, 41, 12, 759274821); + + @Override + public String configFolder() { + return "config/es-es"; + } + + + private void assertRelativeDurationFunction(final String s, + final Duration expectedDuration, + final int finalTokenStreamPosition, + final Function durationFunction) { + // some random but deterministic values: we don't actually use big numbers here so it + // shouldn't make a difference, and preferMonthBeforeDay only affects date and dateTime + final boolean shortScale = (s.hashCode() % 2) == 0; + final boolean preferMonthBeforeDay = ((s.hashCode() / 2) % 2) == 0; + + final TokenStream ts = new TokenStream(tokenizer.tokenize(s)); + final Duration actualDuration = durationFunction.apply(new SpanishDateTimeExtractor(ts, shortScale, preferMonthBeforeDay, NOW)); + assertNotNull("null relative duration for string \"" + s + "\"", actualDuration); + assertEquals("wrong final token position for string \"" + s + "\"", + finalTokenStreamPosition, ts.position); + assertTrue("wrong relative duration for string \"" + s + "\": expected \"" + + niceDuration(expectedDuration) + "\" but got \"" + + niceDuration(actualDuration) + "\"", + expectedDuration.nanos == actualDuration.nanos + && expectedDuration.days == actualDuration.days + && expectedDuration.months == actualDuration.months + && expectedDuration.years == actualDuration.years); + } + + private void assertRelativeDurationFunctionNull(final String s, + final Function durationFunction) { + // some random but deterministic values: we don't actually use big numbers here so it + // shouldn't make a difference, and preferMonthBeforeDay only affects date and dateTime + final boolean shortScale = (s.hashCode() % 2) == 0; + final boolean preferMonthBeforeDay = ((s.hashCode() / 2) % 2) == 0; + + final TokenStream ts = new TokenStream(tokenizer.tokenize(s)); + final Duration duration = durationFunction.apply(new SpanishDateTimeExtractor(ts, shortScale, preferMonthBeforeDay, NOW)); + + if (duration != null) { + fail("expected no relative duration (null), but got \"" + niceDuration(duration) + + "\""); + } + } + + private void assertFunction(final String s, + final boolean preferMonthBeforeDay, + final T expectedResult, + int finalTokenStreamPosition, + final Function function) { + // some random but deterministic value: we don't actually use big numbers here so it + // shouldn't make a difference + final boolean shortScale = (s.hashCode() % 2) == 0; + + final TokenStream ts = new TokenStream(tokenizer.tokenize(s)); + assertEquals("wrong result for string \"" + s + "\"", + expectedResult, function.apply(new SpanishDateTimeExtractor(ts, shortScale, preferMonthBeforeDay, NOW))); + assertEquals("wrong final token position for string \"" + s + "\"", + finalTokenStreamPosition, ts.position); + } + + private void assertFunctionNull(final String s, + final boolean preferMonthBeforeDay, + final Function numberFunction) { + assertFunction(s, preferMonthBeforeDay, null, 0, numberFunction); + } + + private void assertRelativeDuration(final String s, final Duration expectedDuration, int finalTokenStreamPosition) { + assertRelativeDurationFunction(s, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeDuration); + } + + private void assertRelativeDurationNull(final String s) { + assertRelativeDurationFunctionNull(s, SpanishDateTimeExtractor::relativeDuration); + } + + private void assertRelativeTomorrow(final String s, final int expectedDuration, int finalTokenStreamPosition) { + assertFunction(s, false, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeTomorrow); + } + + private void assertRelativeTomorrowNull(final String s) { + assertFunctionNull(s, false, SpanishDateTimeExtractor::relativeTomorrow); + } + + private void assertRelativeYesterday(final String s, final int expectedDuration, int finalTokenStreamPosition) { + assertFunction(s, false, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeYesterday); + } + + private void assertRelativeYesterdayNull(final String s) { + assertFunctionNull(s, false, SpanishDateTimeExtractor::relativeYesterday); + } + + private void assertHour(final String s, final int expected, int finalTokenStreamPosition) { + assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::hour); + } + + private void assertHourNull(final String s) { + assertFunctionNull(s, false, SpanishDateTimeExtractor::hour); + } + + private void assertMomentOfDay(final String s, final int expected, int finalTokenStreamPosition) { + assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::momentOfDay); + } + + private void assertMomentOfDayNull(final String s) { + assertFunctionNull(s, false, SpanishDateTimeExtractor::momentOfDay); + } + + private void assertNoonMidnightLike(final String s, final int expected, int finalTokenStreamPosition) { + assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::noonMidnightLike); + } + + private void assertNoonMidnightLikeNull(final String s) { + assertFunctionNull(s, false, SpanishDateTimeExtractor::noonMidnightLike); + } + + private void assertSpecialMinute(final String s, final int expected, int finalTokenStreamPosition) { + assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::specialMinute); + } + + private void assertSpecialMinuteNull(final String s) { + assertFunctionNull(s, false, SpanishDateTimeExtractor::specialMinute); + } + + private void assertOClock(final String s, int finalTokenStreamPosition) { + assertFunction(s, false, true, finalTokenStreamPosition, SpanishDateTimeExtractor::oClock); + } + + private void assertOClockFalse(final String s) { + assertFunction(s, false, false, 0, SpanishDateTimeExtractor::oClock); + } + + // TODO bcad, o clock + private void assertDate(final String s, final boolean preferMonthBeforeDay, final LocalDate expected, int finalTokenStreamPosition) { + assertFunction(s, preferMonthBeforeDay, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::date); + } + + private void assertDate(final String s, final LocalDate expected, int finalTokenStreamPosition) { + assertDate(s, false, expected, finalTokenStreamPosition); + assertDate(s, true, expected, finalTokenStreamPosition); + } + + private void assertDateNull(final String s) { + assertFunctionNull(s, true, SpanishDateTimeExtractor::date); + assertFunctionNull(s, false, SpanishDateTimeExtractor::date); + } + + private void assertBcad(final String s, final Boolean expectedAd, int finalTokenStreamPosition) { + assertFunction(s, false, expectedAd, finalTokenStreamPosition, SpanishDateTimeExtractor::bcad); + } + + private void assertTime(final String s, final LocalTime expected, int finalTokenStreamPosition) { + assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::time); + } + + private void assertTimeNull(final String s) { + assertFunctionNull(s, false, SpanishDateTimeExtractor::time); + } + + private void assertTimeWithAmpm(final String s, final LocalTime expected, int finalTokenStreamPosition) { + assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::timeWithAmpm); + } + + private void assertTimeWithAmpmNull(final String s) { + assertFunctionNull(s, false, SpanishDateTimeExtractor::timeWithAmpm); + } + + private void assertDateTime(final String s, final boolean preferMonthBeforeDay, final LocalDateTime expected, int finalTokenStreamPosition) { + assertFunction(s, preferMonthBeforeDay, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::dateTime); + } + + private void assertDateTime(final String s, final LocalDateTime expected, int finalTokenStreamPosition) { + assertDateTime(s, false, expected, finalTokenStreamPosition); + assertDateTime(s, true, expected, finalTokenStreamPosition); + } + + private void assertDateTimeNull(final String s) { + assertFunctionNull(s, true, SpanishDateTimeExtractor::dateTime); + assertFunctionNull(s, false, SpanishDateTimeExtractor::dateTime); + } + + + @Test + public void testRelativeDuration() { + assertRelativeDuration("en dos semanas llegaré", t(2, WEEKS), 3); + assertRelativeDuration("hace cuatro semanas", t(4, MONTHS), 3); + assertRelativeDuration("segundos después se cayó", t(1, SECONDS), 2); + assertRelativeDuration("en un par de años", t(20, YEARS), 5); + assertRelativeDuration("nueve días antes un", t(-9, DAYS), 5); + assertRelativeDuration("setenta años pasados", t(-70, YEARS), 3); + assertRelativeDuration("tres meses y dos días después", t(-3, MONTHS).plus(t(-2, DAYS)), 6); + assertRelativeDuration("los últimos sesenta y siete siglos comenzaron hace seis mil setecientos años", t(-6700, YEARS), 4); + } + + @Test + public void testRelativeDurationNull() { + assertRelativeDurationNull("hola cómo estás"); + assertRelativeDurationNull("cuatro semestres"); + assertRelativeDurationNull("sabes que en una semana"); + assertRelativeDurationNull("y pasaron dos meses"); + assertRelativeDurationNull("el día anterior"); + } + + @Test + public void testRelativeTomorrow() { + assertRelativeTomorrow("mañana iremos", 1, 1); + assertRelativeTomorrow("pasado mañana y", 2, 4); + assertRelativeTomorrow("el día siguiente y", 2, 3); + assertRelativeTomorrow("el siguiente día después", 1, 1); + } + + @Test + public void testRelativeTomorrowNull() { + assertRelativeTomorrowNull("hola cómo estás"); + assertRelativeTomorrowNull("mañana"); + assertRelativeTomorrowNull("del días después de mañana"); + assertRelativeTomorrowNull("ayer"); + assertRelativeTomorrowNull("hoy"); + assertRelativeTomorrowNull("el día después de la mañana"); + assertRelativeTomorrowNull("el día después de mañana"); + } + + @Test + public void testRelativeYesterday() { + assertRelativeYesterday("ayer yo he estado", -1, 1); + assertRelativeYesterday("el día antes de ayer y", -2, 4); + assertRelativeYesterday("antiayer prueba", -2, 1); + assertRelativeYesterday("ayer el día antes de", -1, 1); + } + + @Test + public void testRelativeYesterdayNull() { + assertRelativeYesterdayNull("hola cómo estás"); + assertRelativeYesterdayNull("y ayer"); + assertRelativeYesterdayNull("hoy"); + assertRelativeYesterdayNull("mañana"); + assertRelativeYesterdayNull("el día antes de mañana"); + assertRelativeYesterdayNull("anteayer"); + } + + @Test + public void testHour() { + assertHour("8:36 prueba", 8, 1); + assertHour("16:44 prueba", 16, 1); + assertHour("veintiún prueba", 21, 2); + assertHour("el cero y", 0, 2); + assertHour("a la uno y veintiseis", 1, 2); + assertHour("twelve o clock", 12, 1); + assertHour("a las diecisiete el", 17, 2); + assertHour("a la uno y las tres", 3, 4); + assertHour("a horas trece", 13, 3); + assertHour("las siete prueba", 7, 2); + } + + @Test + public void testHourNull() { + assertHourNull("hola cómo estás"); + assertHourNull("veinticinco"); + assertHourNull("el menos dos"); + assertHourNull("a la un ciento y cincuenta y cuatro"); + assertHourNull("a la hora"); + assertHourNull("la y cero y"); + assertHourNull("y veinticuatro"); + assertHourNull("el un millón"); + } + + //TODO + + @Test + public void testNoonMidnightLike() { + assertNoonMidnightLike("del mediodía", 0, 2); + assertNoonMidnightLike("middays", 12, 1); + assertNoonMidnightLike("this noon", 12, 2); + } + + @Test + public void testNoonMidnightLikeNull() { + assertNoonMidnightLikeNull("hello how are you"); + assertNoonMidnightLikeNull("this evening and"); + assertNoonMidnightLikeNull("tonight test"); + assertNoonMidnightLikeNull("after dinner"); + assertNoonMidnightLikeNull("before the lunch"); + assertNoonMidnightLikeNull("and at midday"); + assertNoonMidnightLikeNull("and midnight"); + assertNoonMidnightLikeNull("at hour noon"); + assertNoonMidnightLikeNull("in midnight"); + assertNoonMidnightLikeNull("at the midday"); + } + + @Test + public void testMomentOfDay() { + assertMomentOfDay("at midnight", 0, 2); + assertMomentOfDay("noon", 12, 1); + assertMomentOfDay("these midnights", 0, 2); + assertMomentOfDay("this evening and", 21, 2); + assertMomentOfDay("at tonight test", 23, 2); + assertMomentOfDay("nighttime test", 3, 1); + assertMomentOfDay("after dinner", 21, 2); + assertMomentOfDay("before the lunch", 11, 3); + assertMomentOfDay("the dinner", 20, 2); + } + + @Test + public void testMomentOfDayNull() { + assertMomentOfDayNull("hello how are you"); + assertMomentOfDayNull("and at midday"); + assertMomentOfDayNull("mid night"); + assertMomentOfDayNull("at hour dinner"); + assertMomentOfDayNull("in dinner"); + } + + @Test + public void testSpecialMinute() { + assertSpecialMinute("a quarter to", -15, 3); + assertSpecialMinute("half of past test", 30, 3); + assertSpecialMinute("a half to eleven", -30, 3); + assertSpecialMinute("zero point two of past", 12, 5); + assertSpecialMinute("thirteen fourteenths to", -56, 3); // 13/14*60 is 55.7 -> rounded to 56 + assertSpecialMinute("at twenty the past", 20, 4); + assertSpecialMinute("the fifty and nine to", -59, 5); + assertSpecialMinute("fifteen past twelve", 15, 2); + } + + @Test + public void testSpecialMinuteNull() { + assertSpecialMinuteNull("hello how are you"); + assertSpecialMinuteNull("two"); + assertSpecialMinuteNull("one hundred and twelve to"); + assertSpecialMinuteNull("minus a quarter to five"); + assertSpecialMinuteNull("four quarters to nine"); + assertSpecialMinuteNull("zero halfs to"); + assertSpecialMinuteNull("zero and comma two past"); + assertSpecialMinuteNull("thirteen and fourteenths past"); + assertSpecialMinuteNull("and fifteen past twelve"); + } + + @Test + public void testOClock() { + assertOClock("o clock", 2); + assertOClock("o'clock", 2); + assertOClock("oclock", 1); + assertOClock("o,clock", 3); + assertOClock("exact", 1); + assertOClock("on the dot", 3); + } + + @Test + public void testOClockFalse() { + assertOClockFalse("hello"); + assertOClockFalse("by the clock"); + assertOClockFalse("clock o"); + assertOClockFalse("clock"); + assertOClockFalse("on"); + } + + @Test + public void testDate() { + assertDate("04/09-4096", F, LocalDate.of(4096, 9, 4), 5); + assertDate("04/09-4096", T, LocalDate.of(4096, 4, 9), 5); + assertDate("4 13 2023", LocalDate.of(2023, 4, 13), 3); + assertDate("13.4.2023", LocalDate.of(2023, 4, 13), 5); + assertDate("six of seven of nineteen ninety five", F, LocalDate.of(1995, 7, 6), 7); + assertDate("six of seven of nineteen ninety five", T, LocalDate.of(1995, 6, 7), 7); + assertDate("thursday 26 of may 2022", LocalDate.of(2022, 5, 26), 5); + assertDate("august the second, two", LocalDate.of(2, 8, 2), 5); + assertDate("2nd january, two b.c.", LocalDate.of(-2, 1, 2), 8); + assertDate("mon twelve jun two thousand twelve b.C.", LocalDate.of(-2012, 6, 12), 9); + assertDate("four hundred seventy six AD", LocalDate.of(476, 1, 1), 5); + assertDate("four thousand before common era", LocalDate.of(-4000, 1, 1), 5); + assertDate("four thousand of before Christ", LocalDate.of(4000, 1, 1), 2); + assertDate("tuesday and twenty seven", LocalDate.of(2023, 2, 27), 4); + assertDate("tuesday and twelve", F, LocalDate.of(2023, 2, 12), 3); + assertDate("tuesday and twelve", T, LocalDate.of(2023, 12, 1), 3); // a bit strange + assertDate("november e", LocalDate.of(2023, 11, 1), 1); + assertDate("wednesday test eight", LocalDate.of(2023, 2, 1), 1); + assertDate("monday november", LocalDate.of(2023, 1, 30), 1); + assertDate("october two thousand and twelve", LocalDate.of(2012, 10, 1), 5); + assertDate("999999999", LocalDate.of(999999999,1,1), 1); + // the following work thanks to special case in number extractor! + assertDate("twenty twelve", LocalDate.of(2012, 1, 1), 2); + assertDate("sunday twenty thirteen", LocalDate.of(2023, 2, 5), 1); + } + + @Test + public void testDateNull() { + assertDateNull("hello how are you"); + assertDateNull("am tuedsay"); + assertDateNull("and two thousand and fifteen"); + assertDateNull("of may two"); + assertDateNull("tomorrow"); + assertDateNull("1000000000"); + } + + @Test + public void testBcad() { + // b.c.e special case, not covered by DateTimeExtractorUtils.bcad() + assertBcad("bce", false, 1); + assertBcad("b.c.e.", false, 5); + assertBcad("before current era", false, 3); + assertBcad("current era", true, 2); + } + + @Test + public void testTime() { + assertTime("13:28.33 test", LocalTime.of(13, 28, 33), 4); + assertTime("half past noon", LocalTime.of(12, 30, 0), 3); + assertTime("at fourteen and", LocalTime.of(14, 0, 0), 2); + assertTime("midnight of twelve", LocalTime.of(0, 12, 0), 3); + assertTime("twenty four and zero", LocalTime.of(0, 0, 0), 4); + assertTime("the twenty three and fifty one min and 17 seconds", LocalTime.of(23, 51, 17), 10); + } + + @Test + public void testTimeNull() { + assertTimeNull("hello how are you"); + assertTimeNull("sixty one"); + assertTimeNull("30:59"); + assertTimeNull("minus sixteen"); + assertTimeNull("four million"); + assertTimeNull("evening"); + } + + @Test + public void testTimeWithAmpm() { + assertTimeWithAmpm("11:28.33 pm test", LocalTime.of(23, 28, 33), 5); + assertTimeWithAmpm("half past noon and a quarter", LocalTime.of(12, 30, 0), 3); + assertTimeWithAmpm("at two o'clock in the morning", LocalTime.of(2, 0, 0), 7); + assertTimeWithAmpm("three thirty eight in the afternoon", LocalTime.of(15, 38, 0), 6); + assertTimeWithAmpm("18:29:02 and am", LocalTime.of(18, 29, 2), 5); + assertTimeWithAmpm("evening", LocalTime.of(21, 0, 0), 1); + assertTimeWithAmpm("afternoon at four and three and six", LocalTime.of(16, 3, 6), 7); + // corner cases: + assertTimeWithAmpm("twenty four in the evening", LocalTime.of(0, 0, 0), 5); + assertTimeWithAmpm("12 am", LocalTime.of(0, 0, 0), 2); + } + + @Test + public void testTimeWithAmpmNull() { + assertTimeWithAmpmNull("hello how are you"); + assertTimeWithAmpmNull("sixty one"); + assertTimeWithAmpmNull("30:59"); + assertTimeWithAmpmNull("minus sixteen"); + assertTimeWithAmpmNull("four million"); + } + + @Test + public void testDateTime() { + assertDateTime("mañana de 12:45", LocalDateTime.of(2023, 2, 6, 12, 45, 0), 4); + assertDateTime("26/12/2003 19:18:59", LocalDateTime.of(2003, 12, 26, 19, 18, 59), 8); + assertDateTime("19:18:59 26/12/2003 test", LocalDateTime.of(2003, 12, 26, 19, 18, 59), 8); + assertDateTime("26/12/2003 19:18:59 and", LocalDateTime.of(2003, 12, 26, 19, 18, 59), 8); + assertDateTime("19:18:59 26/12/2003", LocalDateTime.of(2003, 12, 26, 19, 18, 59), 8); + assertDateTime("5/7/2003 1:2:3 prueba", F, LocalDateTime.of(2003, 5, 7, 1, 2, 3), 8); + assertDateTime("5/7/2003 1:2:3", T, LocalDateTime.of(2003, 7, 5, 1, 2, 3), 8); + assertDateTime("1:2:3 5/7/2003 y", F, LocalDateTime.of(2003, 5, 7, 1, 2, 3), 8); + assertDateTime("1:2:3 5/7/2003", T, LocalDateTime.of(2003, 7, 5, 1, 2, 3), 8); + assertDateTime("next friday at twenty two o clock", LocalDateTime.of(2023, 2, 10, 22, 0, 0), 7); + assertDateTime("the 6 post meridiem of next tuesday", LocalDateTime.of(2023, 2, 7, 18, 0, 0), 7); + assertDateTime("yesterday evening at twenty to 5", LocalDateTime.of(2023, 2, 4, 16, 40, 0), 6); + assertDateTime("in three days evening at eleven", LocalDateTime.of(2023, 2, 8, 23, 0, 0), 6); + assertDateTime("day after morrow and morning test", LocalDateTime.of(2023, 2, 7, 9, 0, 0), 5); + assertDateTime("sunday at 2:45 p.m.", LocalDateTime.of(2023, 2, 5, 14, 45, 0), 7); + assertDateTime("twenty first of jan after a dinner", LocalDateTime.of(2023, 1, 21, 21, 0, 0), 7); + assertDateTime("two days ago at four 40 at dusk", LocalDateTime.of(2023, 2, 3, 16, 40, 0), 8); + assertDateTime("twenty seventh of july at nine thirty nine in the evening", LocalDateTime.of(2023, 7, 27, 21, 39, 0), 11); + assertDateTime("twenty three milliseconds", NOW.withDayOfMonth(23), 2); + assertDateTime("next three months on the dot", NOW.plusMonths(3), 3); + assertDateTime("in fifteen d", NOW.plusDays(15), 3); + assertDateTime("thirty two nanoseconds ago", NOW.minusNanos(32), 4); + assertDateTime("dos y días y siete milisegundos antes", NOW.minusNanos(7000000).minusDays(2), 6); + assertDateTime("siete de noviembre, 193 a.C.", NOW.withYear(-193).withMonth(11).withDayOfMonth(7), 8); + } + + @Test + public void testDateTimeNull() { + assertDateTimeNull("hello how are you"); + assertDateTimeNull("test twenty first of jan after a dinner"); + assertDateTimeNull("minus one millisecond"); + } + + @Test + public void testNumberParserExtractDateTime() { + final ParserFormatter npf = new ParserFormatter(null, new SpanishParser()); + assertNull(npf.extractDateTime("hello how are you").getFirst()); + assertEquals(NOW.minusDays(30).withHour(14).withMinute(39).withSecond(0).withNano(0), + npf.extractDateTime("2:39 p.m., thirty days ago").now(NOW).getFirst()); + assertEquals(NOW.plusMinutes(3).plusSeconds(46), + npf.extractDateTime("in three minutes forty six seconds").now(NOW).getFirst()); + assertEquals(NOW.withYear(3).withMonth(2).withDayOfMonth(1), + npf.extractDateTime("1 2/3").preferMonthBeforeDay(false).now(NOW).getFirst()); + assertEquals(NOW.withYear(3).withMonth(1).withDayOfMonth(2), + npf.extractDateTime("1.2,3").preferMonthBeforeDay(true).now(NOW).getFirst()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDurationTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDurationTest.java new file mode 100644 index 00000000..850db7c4 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDurationTest.java @@ -0,0 +1,27 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.DAY; +import static org.dicio.numbers.test.TestUtils.t; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.test.WithTokenizerTestBase; +import org.junit.Test; + +public class ExtractDurationTest extends WithTokenizerTestBase { + @Override + public String configFolder() { + return "config/es-es"; + } + + @Test + public void testNumberParserExtractDuration() { + final ParserFormatter npf = new ParserFormatter(null, new SpanishParser()); + assertNull(npf.extractDuration("hola cómo estás").getFirst()); + assertNull(npf.extractDuration("mil millones de euros").shortScale(true).getFirst()); + assertNull(npf.extractDuration("un millón").shortScale(false).getFirst()); + assertEquals(t(DAY), npf.extractDuration("veinticuatro horas no son dos días").getFirst().toJavaDuration()); + assertEquals(t(2 * DAY), npf.extractDuration("dos días n son veinticuatro horas").getFirst().toJavaDuration()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java new file mode 100644 index 00000000..9dd7dba4 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java @@ -0,0 +1,394 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.parser.lexer.TokenStream; +import org.dicio.numbers.test.WithTokenizerTestBase; +import org.dicio.numbers.unit.Number; +import org.junit.Test; + +import java.util.function.BiFunction; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.dicio.numbers.test.TestUtils.n; +import static org.dicio.numbers.test.TestUtils.numberDeduceType; +import static org.junit.Assert.*; + +public class ExtractNumbersTest extends WithTokenizerTestBase { + + @Override + public String configFolder() { + return "config/es-es"; + } + + + //TODO Spanish translation + + private void assertNumberFunction(final String s, + final boolean shortScale, + final Number value, + final int finalTokenStreamPosition, + final BiFunction numberFunction) { + final TokenStream ts = new TokenStream(tokenizer.tokenize(s)); + final Number number = numberFunction.apply(new EnglishNumberExtractor(ts, shortScale), ts); + assertEquals("wrong value for string " + s, value, number); + assertEquals("wrong final token position for number " + value, finalTokenStreamPosition, + ts.position); + } + + private void assertNumberFunctionNull(final String s, + final boolean shortScale, + final BiFunction numberFunction) { + assertNumberFunction(s, shortScale, null, 0, numberFunction); + } + + private void assertNumberGroupLongScale(final String s, final boolean allowOrdinal, final double lastMultiplier, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { + assertNumberFunction(s, true, numberDeduceType(value).withOrdinal(isOrdinal), finalTokenStreamPosition, + (enp, ts) -> EnglishNumberExtractor.numberGroupLongScale(ts, allowOrdinal, lastMultiplier)); + } + + private void assertNumberGroupLongScaleNull(final String s, final boolean allowOrdinal, final double lastMultiplier) { + assertNumberFunctionNull(s, true, (enp, ts) -> EnglishNumberExtractor.numberGroupLongScale(ts, allowOrdinal, lastMultiplier)); + } + + private void assertNumberInteger(final String s, final boolean shortScale, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { + assertNumberFunction(s, shortScale, numberDeduceType(value).withOrdinal(isOrdinal), finalTokenStreamPosition, + (enp, ts) -> enp.numberInteger(allowOrdinal)); + } + + private void assertNumberIntegerNull(final String s, final boolean allowOrdinal) { + assertNumberFunctionNull(s, true, (enp, ts) -> enp.numberInteger(allowOrdinal)); + assertNumberFunctionNull(s, false, (enp, ts) -> enp.numberInteger(allowOrdinal)); + } + + private void assertNumberPoint(final String s, final boolean shortScale, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { + assertNumberFunction(s, shortScale, numberDeduceType(value).withOrdinal(isOrdinal), + finalTokenStreamPosition, (enp, ts) -> enp.numberPoint(allowOrdinal)); + } + + private void assertNumberPointNull(final String s, final boolean allowOrdinal) { + assertNumberFunctionNull(s, true, (enp, ts) -> enp.numberPoint(allowOrdinal)); + assertNumberFunctionNull(s, false, (enp, ts) -> enp.numberPoint(allowOrdinal)); + } + + private void assertNumberSignPoint(final String s, final boolean shortScale, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { + assertNumberFunction(s, shortScale, numberDeduceType(value).withOrdinal(isOrdinal), + finalTokenStreamPosition, (enp, ts) -> enp.numberSignPoint(allowOrdinal)); + } + + private void assertNumberSignPointNull(final String s, final boolean allowOrdinal) { + assertNumberFunctionNull(s, true, (enp, ts) -> enp.numberSignPoint(allowOrdinal)); + assertNumberFunctionNull(s, false, (enp, ts) -> enp.numberSignPoint(allowOrdinal)); + } + + private void assertDivideByDenominatorIfPossible(final String s, final Number startingNumber, final Number value, final int finalTokenStreamPosition) { + assertNumberFunction(s, true, value, finalTokenStreamPosition, + (enp, ts) -> enp.divideByDenominatorIfPossible(startingNumber)); + } + + + @Test + public void testNumberGroupLongScale() { + assertNumberGroupLongScale("one hundred and twenty million", F, 1e9, 120e6, F, 5); + assertNumberGroupLongScale("sixty three quadrillion", F, 1e28, 63e24, F, 3); + assertNumberGroupLongScale("three thousand and six", T, 1e9, 3006, F, 4); + assertNumberGroupLongScale("a hundred thousand", F, 1e6, 100000, F, 3); + assertNumberGroupLongScale("hundred 70 thousand", T, 1e6, 170000, F, 3); + assertNumberGroupLongScale("572 million", F, 1e9, 572e6, F, 2); + assertNumberGroupLongScale("572012 billion", F, 1e18, 572012e12, F, 2); + assertNumberGroupLongScale("3 million", T, 1e9, 3e6, F, 2); + assertNumberGroupLongScale(", one hundred and ninety one", F, 1e6, 191, F, 6); + } + + @Test + public void testNumberGroupLongScaleOrdinal() { + assertNumberGroupLongScale("seven hundred and sixty four millionth", T, 1e9, 764e6, T, 6); + assertNumberGroupLongScale("seven hundred and sixty four millionth", F, 1e9, 764, F, 5); + assertNumberGroupLongScale("seven hundred and sixty four millionth", F, 1e6, 764, F, 5); + assertNumberGroupLongScale("fifth billionth", T, 1e9, 5, T, 1); + assertNumberGroupLongScale("nineteen hundredth", T, 1e9, 19, F, 1); + assertNumberGroupLongScaleNull("seven hundred and sixty four millionth", T, 1000); + assertNumberGroupLongScaleNull("twelfth thousandth", F, 1e9); + } + + @Test + public void testNumberGroupLongScaleNull() { + assertNumberGroupLongScaleNull("", T, 1e9); + assertNumberGroupLongScaleNull("hello", F, 1e6); + assertNumberGroupLongScaleNull("hello how are you", T, 1e6); + assertNumberGroupLongScaleNull("5000000", T, 1e9); + assertNumberGroupLongScaleNull("one hundred and six", F, 999); + assertNumberGroupLongScaleNull("twelve", T, 0); + assertNumberGroupLongScaleNull("seven billion", F, 1e6); + assertNumberGroupLongScaleNull("nine thousand and one", T, 1000); + assertNumberGroupLongScaleNull("eight million people", F, 1e6); + assertNumberGroupLongScaleNull(" ten ", T, 1e6); + } + + @Test + public void testNumberInteger() { + assertNumberInteger("one hundred and four thousand, six hundred quadrillion, sixty four thousand and one trillion, one hundred thousand billion", F, F, 104600064001100000e12, F, 21); + assertNumberInteger("twenty 5 billion, 1 hundred and sixty four million, seven thousand and nineteen", T, T, 25164007019L, F, 15); + assertNumberInteger("twenty 5 billion, 1 hundred and sixty four million, seven billion", T, T, 25164000000L, F, 10); + assertNumberInteger("two thousand, one hundred and ninety one", T, F, 2191, F, 8); + assertNumberInteger("nine hundred and ten", F, T, 910, F, 4); + assertNumberInteger("two million", F, F, 2000000, F, 2); + assertNumberInteger("one thousand and ten", T, T, 1010, F, 4); + assertNumberInteger("1234567890123", T, F, 1234567890123L, F, 1); + assertNumberInteger("654 and", F, T, 654, F, 1); + assertNumberInteger("a hundred four,", F, F, 104, F, 3); + assertNumberInteger("nine thousand, three million", T, T, 9000, F, 2); + } + + @Test + public void testNumberIntegerOrdinal() { + assertNumberInteger("one hundred and four thousand, six hundred quadrillion, sixty four thousand and one trillion, one hundred thousand billionth", F, T, 104600064001100000e12, T, 21); + assertNumberInteger("one hundred and four thousand, six hundred quadrillion, sixty four thousand and one trillionth, one hundred thousand billion", F, T, 104600064001e18, T, 16); + assertNumberInteger("one hundred and four thousand, six hundred quadrillion, sixty four thousand and one trillionth, one hundred thousand billion", F, F, 104600e24, F, 15); + assertNumberInteger("twenty 5 billion, 1 hundred and sixty four million, seven thousand and nineteenth", T, T, 25164007019L, T, 15); + assertNumberInteger("73 billion, twenty three millionth, seven thousand and nineteen", T, T, 73023000000L, T, 6); + assertNumberInteger("one hundred and 6 billion, twenty one million, one billionth", T, T, 106021000000L, F, 9); + assertNumberInteger("one hundred and 6 billion, twenty one million, one thousandth", T, F, 106021000001L, F, 11); + assertNumberInteger("nineteen hundredth", T, T, 1900, T, 2); + assertNumberInteger("twenty oh first", F, T, 2001, T, 3); + assertNumberInteger("twenty oh first", F, F, 20, F, 1); + assertNumberInteger("nineteen 09th", T, T, 1909, T, 3); + assertNumberInteger("nineteen 09th", T, F, 19, F, 1); + assertNumberInteger("eleven sixteenth", F, T, 1116, T, 2); + assertNumberInteger("eleven sixteenth", F, F, 11, F, 1); + assertNumberInteger("eighteen twenty first", T, T, 1821, T, 3); + assertNumberInteger("eighteen twenty first", T, F, 1820, F, 2); + assertNumberInteger("thirteen sixtieth", F, T, 1360, T, 2); + assertNumberInteger("thirteen sixtieth", F, F, 13, F, 1); + assertNumberInteger("sixteenth hundred", T, T, 16, T, 1); + assertNumberInteger("sixteenth oh four", T, T, 16, T, 1); + assertNumberInteger("543789th", F, T, 543789, T, 2); + assertNumberInteger("75,483,543 rd", F, T, 75483543, T, 6); + assertNumberIntegerNull("2938th", F); + assertNumberIntegerNull("102,321th", F); + assertNumberIntegerNull("thirteenth hundredth", F); + } + + @Test + public void testNumberIntegerThousandSeparator() { + // independent of short/long scale and of ordinal mode + assertNumberInteger("23,001", T, F, 23001, F, 3); + assertNumberInteger("19,123", T, T, 19123, F, 3); + assertNumberInteger("a 167,42", F, T, 167, F, 2); + assertNumberInteger("1,234,023,054, hello", F, F, 1234023054, F, 7); + assertNumberInteger("23,001, a 500", T, T, 23001, F, 3); + assertNumberInteger("5,030,two", F, F, 5030, F, 3); + assertNumberInteger("67,104,23", F, T, 67104, F, 3); + } + + @Test + public void testNumberIntegerYear() { + // independent of short/long scale and of ordinal mode + assertNumberInteger("two twenty-one", T, T, 2, F, 1); + assertNumberInteger("nineteen 745", F, F, 19, F, 1); + assertNumberInteger("nineteen 25", F, F, 1925, F, 2); + assertNumberInteger("19 twenty five", F, F, 19, F, 1); + assertNumberInteger("19 25", F, F, 19, F, 1); + assertNumberInteger("nineteenth twenty five", F, T, 19, T, 1); + assertNumberInteger("ten 21", F, T, 1021, F, 2); + assertNumberInteger("nineteen oh 6 and two", T, F, 1906, F, 3); + assertNumberInteger("twenty-nought-oh", T, T, 2000, F, 5); + assertNumberInteger("eleven zero 0", F, F, 1100, F, 3); + assertNumberInteger("seventeen 0 0", F, T, 1700, F, 3); + assertNumberInteger("sixty-four-hundred", T, F, 6400, F, 5); + assertNumberInteger("two hundred and twelve hundred", T, T, 212, F, 4); + assertNumberInteger("58 hundred", F, F, 5800, F, 2); + assertNumberInteger("nineteen hundred", F, T, 1900, F, 2); + assertNumberInteger("eighteen 1", T, F, 18, F, 1); + } + + private int tokensInFormattedString(final String formatted) { + int tokensInFormatted = 1; + for (int j = 0; j < formatted.length(); ++j) { + if (formatted.charAt(j) == ' ' || formatted.charAt(j) == ',') { + ++tokensInFormatted; + } + } + return tokensInFormatted; + } + + @Test + public void testNumberIntegerWithFormatter() { + final ParserFormatter npf = new ParserFormatter(new EnglishFormatter(), null); + for (int i = 0; i < 1100000000;) { + if (i < 2200) { + ++i; // test all numbers from 0 to 200 (also tests years!) + } else if (i < 1000000) { + i += 1207; + } else { + i += 299527; + } + + // not ordinal + String formatted = npf.pronounceNumber(i).places(0).get(); + int tokensInFormatted = tokensInFormattedString(formatted); + assertNumberInteger(formatted, T, T, i, F, tokensInFormatted); + + // ordinal + formatted = npf.pronounceNumber(i).places(0).ordinal(T).get(); + tokensInFormatted = tokensInFormattedString(formatted); + assertNumberInteger(formatted, T, T, i, T, tokensInFormatted); + + // long scale not ordinal + formatted = npf.pronounceNumber(i).places(0).shortScale(false).get(); + tokensInFormatted = tokensInFormattedString(formatted); + assertNumberInteger(formatted, F, T, i, F, tokensInFormatted); + + // long scale ordinal + formatted = npf.pronounceNumber(i).places(0).shortScale(false).ordinal(true).get(); + tokensInFormatted = tokensInFormattedString(formatted); + assertNumberInteger(formatted, F, T, i, T, tokensInFormatted); + } + } + + @Test(timeout = 4000) // 40000 formats + parses take <2s, use 4s timeout just for slower PCs + public void testNumberIntegerPerformanceWithFormatter() { + final ParserFormatter npf = new ParserFormatter(new EnglishFormatter(), null); + final long startingValue = 54378960497L; + for (long i = startingValue; i < startingValue + 10000; ++i) { + // short scale not ordinal + String formatted = npf.pronounceNumber(i).places(0).get(); + int tokensInFormatted = tokensInFormattedString(formatted); + assertNumberInteger(formatted, T, T, i, F, tokensInFormatted); + + // short scale ordinal + formatted = npf.pronounceNumber(i).places(0).ordinal(true).get(); + tokensInFormatted = tokensInFormattedString(formatted); + assertNumberInteger(formatted, T, T, i, T, tokensInFormatted); + + // long scale not ordinal + formatted = npf.pronounceNumber(i).places(0).shortScale(false).get(); + tokensInFormatted = tokensInFormattedString(formatted); + assertNumberInteger(formatted, F, T, i, F, tokensInFormatted); + + // long scale ordinal + formatted = npf.pronounceNumber(i).places(0).shortScale(false).ordinal(true).get(); + tokensInFormatted = tokensInFormattedString(formatted); + assertNumberInteger(formatted, F, T, i, T, tokensInFormatted); + } + } + + @Test + public void testNumberIntegerNull() { + assertNumberIntegerNull("", T); + assertNumberIntegerNull("a hello how are you", F); + assertNumberIntegerNull(", and", T); + assertNumberIntegerNull("oh two", F); + assertNumberIntegerNull(", 123485 and", T); + assertNumberIntegerNull("and 123", F); + assertNumberIntegerNull(" one thousand ", T); + } + + @Test + public void testNumberPoint() { + assertNumberPoint("one thousand, five hundred and seventy four point nine one two oh nought o zero", T, T, 1574.912, F, 16); + assertNumberPoint("twenty three point nought 1 oh 2 three, five hundred", F, T, 23.01023, F, 8); + assertNumberPoint("fifteen-oh-nine point eight four five", F, F, 1509.845, F, 9); + assertNumberPoint("twenty three thousand point sixteen", T, T, 23000, F, 3); + assertNumberPoint("3645.7183", T, F, 3645.7183, F, 3); + assertNumberPoint("twenty five.2", F, T, 25.2, F, 4); + assertNumberPoint("eighty point 6745", F, F, 80.6745, F, 3); + assertNumberPoint("4 point 67 45", T, T, 4.67, F, 3); + assertNumberPoint("4000 point 6 63", T, F, 4000.6, F, 3); + assertNumberPoint("74567 point six", F, T, 74567.6, F, 3); + assertNumberPoint("nought . 6 8 2 zero twenty", F, F, 0.682, F, 6); + assertNumberPoint("74567 point six", T, T, 74567.6, F, 3); + assertNumberPoint("point 800", T, F, .8, F, 2); + assertNumberPoint("one point twenty", F, T, 1, F, 1); + } + + @Test + public void testNumberPointFraction() { + assertNumberPoint("twenty three million, one hundred thousand and sixty four over sixteen", F, F, 1443754, F, 12); + assertNumberPoint("sixteen over twenty three million, one hundred thousand and sixty four", T, T, 1.0 / 1443754.0, F, 12); + assertNumberPoint("8 thousand and, 192 divided by 4 thousand 96 eight", T, F, 2, F, 10); + assertNumberPoint("ninety eight hundred / one hundred", F, T, 98, F, 6); + assertNumberPoint("twenty four over sixty five", F, T, 24.0 / 65.0, F, 5); + assertNumberPoint("one over five and a half", T, F, 1.0 / 5.0, F, 3); + assertNumberPoint("twenty six divided by seven", T, T, 26.0 / 7.0, F, 5); + assertNumberPoint("47328 over 12093", F, F, 47328.0 / 12093.0, F, 3); + assertNumberPoint("five / six nine two", F, T, 5.0 / 6.0, F, 3); + assertNumberPoint("nine over, two", T, F, 9, F, 1); + assertNumberPoint("eight divided five", T, T, 8.0 / 5.0, F, 3); + assertNumberPoint("six by nineteen", F, F, 6, F, 1); + } + + @Test + public void testNumberPointOrdinal() { + assertNumberPoint("fifth point six", T, T, 5, T, 1); + assertNumberPoint("3 thousand 7 hundred tenth over six", F, T, 3710, T, 5); + assertNumberPoint("3 thousand 7 hundred tenth over six", T, F, 3700, F, 4); + assertNumberPoint("eight point one second", F, F, 8.1, F, 3); + assertNumberPoint("eight point one third", T, T, 8.1, F, 3); + assertNumberPoint("six over fifth", F, T, 6, F, 1); + assertNumberPoint("nine over thirty ninth", T, T, 0.3, F, 3); + assertNumberPoint("nine over thirty ninth", F, F, 0.3, F, 3); + assertNumberPoint("thirteen point 1 2 3 th", T, T, 13.12, F, 4); + } + + @Test + public void testNumberPointNull() { + assertNumberPointNull("", F); + assertNumberPointNull("hello world", T); + assertNumberPointNull("point", F); + assertNumberPointNull("point twenty", T); + assertNumberPointNull("point, 1 2 3 4", F); + assertNumberPointNull(". and six four eight", T); + assertNumberPointNull("over two", F); + assertNumberPointNull(" one divided by five", T); + } + + @Test + public void testNumberSignPoint() { + assertNumberSignPoint("minus seventy six thousand, three hundred and fifty six over 23", T, T, -76356.0 / 23.0, F, 12); + assertNumberSignPoint("minus twelve", T, F, -12, F, 2); + assertNumberSignPoint("plus million", F, T, 1000000, F, 2); + assertNumberSignPoint("-1843", F, F, -1843, F, 2); + assertNumberSignPoint("+573,976", T, T, 573976, F, 4); + assertNumberSignPoint("minus 42903.5", T, F, -42903.5, F, 4); + assertNumberSignPoint("minus point oh four", F, T, -.04, F, 4); + } + + @Test + public void testNumberSignPointOrdinal() { + assertNumberSignPoint("minus twelfth", T, T, -12, T, 2); + assertNumberSignPoint("-one hundredth", F, F, -1, F, 2); + assertNumberSignPoint("plus millionth ten", F, T, 1000000, T, 2); + assertNumberSignPoint("-1843th", T, T, -1843, T, 3); + assertNumberSignPoint("+573,976rd", T, T, 573976, T, 5); + assertNumberSignPointNull("minus first", F); + assertNumberSignPointNull("-1843th", F); + } + + @Test + public void testNumberSignPointNull() { + assertNumberSignPointNull("", F); + assertNumberSignPointNull("hello how are you", T); + assertNumberSignPointNull("minus minus 1 hundred and sixty", F); + assertNumberSignPointNull(" plus million", T); + assertNumberSignPointNull(" +- 5", F); + } + + @Test + public void testDivideByDenominatorIfPossible() { + assertDivideByDenominatorIfPossible("fifths", n(5, F), n(1, F), 1); + assertDivideByDenominatorIfPossible("dozen two", n(3, F), n(36, F), 1); + assertDivideByDenominatorIfPossible("halves a", n(19, F), n(9.5, F), 1); + assertDivideByDenominatorIfPossible("%", n(50, F), n(0.5, F), 1); + assertDivideByDenominatorIfPossible("‰", n(1000, F), n(1, F), 1); + assertDivideByDenominatorIfPossible("quarter", n(16, F), n(4, F), 1); + assertDivideByDenominatorIfPossible("quarter", n(4.4, F), n(4.4, F), 0); + assertDivideByDenominatorIfPossible("people", n(98, F), n(98, F), 0); + + // "a" could be the numerator of fractions but not really a number, so handled here + assertDivideByDenominatorIfPossible("a tenth", null, n(0.1, F), 2); + assertDivideByDenominatorIfPossible("a ten", null, null, 0); + assertDivideByDenominatorIfPossible("a people", null, null, 0); + assertDivideByDenominatorIfPossible("a tenth", n(2.8, F), n(2.8, F), 0); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java new file mode 100644 index 00000000..41bc2ada --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java @@ -0,0 +1,74 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; + +import org.dicio.numbers.formatter.Formatter; +import org.dicio.numbers.test.NiceDurationTestBase; +import org.junit.Test; + +public class NiceDurationTest extends NiceDurationTestBase { + + @Override + public Formatter buildNumberFormatter() { + return new SpanishFormatter(); + } + + @Test + public void zero() { + assertDuration("cero segundos", T, 0, 0, 0, 0); + assertDuration("0:00", F, 0, 0, 0, 0); + } + + @Test + public void speechOne() { + assertDuration("un segundo", T, 0, 0, 0, 1); + assertDuration("un minuto", T, 0, 0, 1, 0); + assertDuration("una hora", T, 0, 1, 0, 0); + assertDuration("un día", T, 1, 0, 0, 0); + } + + @Test + public void speechMany() { + assertDuration("cinco segundos", T, 0, 0, 0, 5); + assertDuration("dos minutos", T, 0, 0, 2, 0); + assertDuration("diecisiete horas", T, 0, 17, 0, 0); + assertDuration("ochenta y cuatro horas", T, 84, 0, 0, 0); + } + + //TODO Spanish translation + + @Test + public void speech() { + assertDuration("six days twenty three hours fifty nine minutes thirty two seconds", T, 6, 23, 59, 32); + assertDuration("nineteen days fifty two minutes", T, 19, 0, 52, 0); + assertDuration("one hour six seconds", T, 0, 1, 0, 6); + assertDuration("sixty three days forty four seconds", T, 63, 0, 0, 44); + assertDuration("one day one hour one minute one second", T, 1, 1, 1, 1); + } + + @Test + public void noSpeechOne() { + assertDuration("0:01", F, 0, 0, 0, 1); + assertDuration("1:00", F, 0, 0, 1, 0); + assertDuration("1:00:00", F, 0, 1, 0, 0); + assertDuration("1d 0:00:00", F, 1, 0, 0, 0); + } + + @Test + public void noSpeechMany() { + assertDuration("0:39", F, 0, 0, 0, 39); + assertDuration("24:00", F, 0, 0, 24, 0); + assertDuration("3:00:00", F, 0, 3, 0, 0); + assertDuration("76d 0:00:00", F, 76, 0, 0, 0); + } + + @Test + public void noSpeech() { + assertDuration("6d 23:59:32", F, 6, 23, 59, 32); + assertDuration("19d 0:52:00", F, 19, 0, 52, 0); + assertDuration("1:00:06", F, 0, 1, 0, 6); + assertDuration("63d 0:00:44", F, 63, 0, 0, 44); + assertDuration("1d 1:01:01", F , 1, 1, 1, 1); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceNumberTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceNumberTest.java new file mode 100644 index 00000000..f5ea7232 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceNumberTest.java @@ -0,0 +1,66 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.junit.Assert.assertEquals; + +public class NiceNumberTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new SpanishFormatter(), null); + } + //TODO Spanish translation + + @Test + public void speech() { + assertEquals("thirty four and a half", pf.niceNumber(34.5).get()); + assertEquals("minus eighteen and three fifths", pf.niceNumber(-18.6).get()); + assertEquals("ninety eight and eighteen nineteenths", pf.niceNumber(98.947368421).get()); + assertEquals("minus five and six elevenths", pf.niceNumber(-5.5454545).get()); + assertEquals("seven ninths", pf.niceNumber(7.0 / 9).get()); + assertEquals("minus two seventeenths", pf.niceNumber(-2.0 / 17).get()); + assertEquals("four hundred and sixty five", pf.niceNumber(465).get()); + assertEquals("minus ninety one", pf.niceNumber(-91).get()); + assertEquals("zero", pf.niceNumber(0).get()); + } + + @Test + public void noSpeech() { + assertEquals("34 1/2", pf.niceNumber(34.5).speech(F).get()); + assertEquals("-18 3/5", pf.niceNumber(-18.6).speech(F).get()); + assertEquals("98 18/19", pf.niceNumber(98.947368421).speech(F).get()); + assertEquals("-5 6/11", pf.niceNumber(-5.5454545).speech(F).get()); + assertEquals("7/9", pf.niceNumber(7.0 / 9).speech(F).get()); + assertEquals("-2/17", pf.niceNumber(-2.0 / 17).speech(F).get()); + assertEquals("465", pf.niceNumber(465).speech(F).get()); + assertEquals("-91", pf.niceNumber(-91).speech(F).get()); + assertEquals("0", pf.niceNumber(0).speech(F).get()); + } + + @Test + public void customDenominators() { + assertEquals("minus four and four tenths", pf.niceNumber(-4.4).denominators(Arrays.asList(2, 3, 4, 6, 7, 8, 9, 10, 11)).get()); + assertEquals("-64 6/12", pf.niceNumber(-64.5).speech(F).denominators(Collections.singletonList(12)).get()); + assertEquals("minus three and five hundred thousand millionths", pf.niceNumber(-3.5).denominators(Arrays.asList(1000000, 2000000)).get()); + assertEquals("9 1000000/2000000", pf.niceNumber(9.5).speech(F).denominators(Arrays.asList(2000000, 1000000)).get()); + assertEquals("zero point eight", pf.niceNumber(4.0 / 5).denominators(Arrays.asList(2, 3, 4)).get()); + } + + @Test + public void invalidFraction() { + assertEquals("one point eight four", pf.niceNumber(1.837).get()); + assertEquals("minus thirty eight point one nine", pf.niceNumber(-38.192).get()); + assertEquals("3829.48", pf.niceNumber(3829.47832).speech(F).get()); + assertEquals("-7.19", pf.niceNumber(-7.1928).speech(F).get()); + assertEquals("-9322.38", pf.niceNumber(-9322 - 8.0 / 21).speech(F).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceTimeTest.java new file mode 100644 index 00000000..3539681d --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceTimeTest.java @@ -0,0 +1,115 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.time.LocalTime; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.junit.Assert.assertEquals; + +public class NiceTimeTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new SpanishFormatter(), null); + } + + //TODO Spanish translation + + + @Test + public void random() { + final LocalTime dt = LocalTime.of(13, 22, 3); + assertEquals("one twenty two", pf.niceTime(dt).get()); + assertEquals("one twenty two p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("thirteen twenty two", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("thirteen twenty two", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("1:22", pf.niceTime(dt).speech(F).get()); + assertEquals("1:22 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("13:22", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("13:22", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void oClock() { + final LocalTime dt = LocalTime.of(15, 0, 32); + assertEquals("three o'clock", pf.niceTime(dt).get()); + assertEquals("three p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("fifteen hundred", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("fifteen hundred", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("3:00", pf.niceTime(dt).speech(F).get()); + assertEquals("3:00 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("15:00", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("15:00", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void afterMidnight() { + final LocalTime dt = LocalTime.of(0, 2, 9); + assertEquals("twelve oh two", pf.niceTime(dt).get()); + assertEquals("twelve oh two a.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("zero zero zero two", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("zero zero zero two", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("12:02", pf.niceTime(dt).speech(F).get()); + assertEquals("12:02 AM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("00:02", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("00:02", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void quarterPast() { + final LocalTime dt = LocalTime.of(1, 15, 33); + assertEquals("quarter past one", pf.niceTime(dt).get()); + assertEquals("quarter past one a.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("zero one fifteen", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("zero one fifteen", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("1:15", pf.niceTime(dt).speech(F).get()); + assertEquals("1:15 AM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("01:15", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("01:15", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void half() { + final LocalTime dt = LocalTime.of(12, 30, 59); + assertEquals("half past twelve", pf.niceTime(dt).get()); + assertEquals("half past twelve p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("twelve thirty", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("twelve thirty", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("12:30", pf.niceTime(dt).speech(F).get()); + assertEquals("12:30 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("12:30", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("12:30", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void quarterTo() { + final LocalTime dt = LocalTime.of(23, 45, 7); + assertEquals("quarter to twelve", pf.niceTime(dt).get()); + assertEquals("quarter to twelve p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("twenty three forty five", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("twenty three forty five", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("11:45", pf.niceTime(dt).speech(F).get()); + assertEquals("11:45 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("23:45", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("23:45", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void tenAm() { + final LocalTime dt = LocalTime.of(10, 3, 44); + assertEquals("ten oh three", pf.niceTime(dt).get()); + assertEquals("ten oh three a.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("ten zero three", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("ten zero three", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("10:03", pf.niceTime(dt).speech(F).get()); + assertEquals("10:03 AM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("10:03", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("10:03", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java new file mode 100644 index 00000000..7defb150 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java @@ -0,0 +1,134 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; + +import org.dicio.numbers.test.NumberExtractorUtilsTestBase; +import org.junit.Test; + +public class NumberExtractorUtilsTest extends NumberExtractorUtilsTestBase { + + @Override + public String configFolder() { + return "config/es-es"; + } + + //TODO Spanish translation + + @Test + public void testNumberLessThan1000() { + assertNumberLessThan1000("zero", T, 0, F, 1); + assertNumberLessThan1000("one", F, 1, F, 1); + assertNumberLessThan1000("five", T, 5, F, 1); + assertNumberLessThan1000("nineteen", F, 19, F, 1); + assertNumberLessThan1000("hundred", T, 100, F, 1); + assertNumberLessThan1000("one hundred", F, 100, F, 2); + assertNumberLessThan1000("three hundred", T, 300, F, 2); + assertNumberLessThan1000("twenty six", F, 26, F, 2); + assertNumberLessThan1000("thirty-seven", T, 37, F, 3); + assertNumberLessThan1000("seven hundred six", F, 706, F, 3); + assertNumberLessThan1000("eight hundred eighteen", T, 818, F, 3); + } + + @Test + public void testNumberLessThan1000Digits() { + assertNumberLessThan1000("0", F, 0, F, 1); + assertNumberLessThan1000("1", T, 1, F, 1); + assertNumberLessThan1000("6", F, 6, F, 1); + assertNumberLessThan1000("15", T, 15, F, 1); + assertNumberLessThan1000("100 nineteen", F, 100, F, 1); + assertNumberLessThan1000("3 hundred 8", T, 308, F, 3); + assertNumberLessThan1000("72", F, 72, F, 1); + assertNumberLessThan1000("912", T, 912, F, 1); + assertNumberLessThan1000("8 hundred and 18", F, 818, F, 4); + assertNumberLessThan1000("7 hundred 3 9", T, 703, F, 3); + assertNumberLessThan1000("hundred 4 7", F, 104, F, 2); + assertNumberLessThan1000("19 hundred", T, 19, F, 1); + assertNumberLessThan1000("sixty 7", F, 67, F, 2); + assertNumberLessThan1000("30 6", T, 30, F, 1); + } + + @Test + public void testNumberLessThan1000EdgeCases() { + assertNumberLessThan1000("four five", T, 4, F, 1); + assertNumberLessThan1000("a two and", F, 2, F, 2); + assertNumberLessThan1000("one thirteen", T, 1, F, 1); + assertNumberLessThan1000("sixteen eight", F, 16, F, 1); + assertNumberLessThan1000("eighteen hundred", T, 18, F, 1); + assertNumberLessThan1000("zero hundred", F, 0, F, 1); + assertNumberLessThan1000("sixty nought", T, 60, F, 1); + assertNumberLessThan1000("a hundred", F, 100, F, 2); + assertNumberLessThan1000("one, and a hundred", T, 100, F, 5); + assertNumberLessThan1000("seven hundred and six", F, 706, F, 4); + assertNumberLessThan1000("one hundred and ninety one", T, 191, F, 5); + assertNumberLessThan1000("eight and a hundred and fifteen", F, 815, F, 6); + assertNumberLessThan1000("a a one a a hundred a a eleven a a", T, 111, F, 9); + } + + @Test + public void testNumberLessThan1000Ordinal() { + assertNumberLessThan1000("fifth", T, 5, T, 1); + assertNumberLessThan1000("twenty sixth", T, 26, T, 2); + assertNumberLessThan1000("seventy eighth", F, 70, F, 1); + assertNumberLessThan1000("fiftieth eighth", T, 50, T, 1); + assertNumberLessThan1000("one hundred and thirteenth", T, 113, T, 4); + assertNumberLessThan1000("first hundred", T, 1, T, 1); + assertNumberLessThan1000("seven hundredth ten", T, 700, T, 2); + assertNumberLessThan1000("nine hundredth", F, 9, F, 1); + assertNumberLessThan1000("23 th", T, 23, T, 2); + assertNumberLessThan1000("620nd", T, 620, T, 2); + assertNumberLessThan1000("6st", T, 6, T, 2); + assertNumberLessThan1000("8 first", T, 8, F, 1); + assertNumberLessThan1000("1st hundred", T, 1, T, 2); + assertNumberLessThan1000Null("seventh", F); + assertNumberLessThan1000Null("96th", F); + } + + @Test + public void testNumberLessThan1000Null() { + assertNumberLessThan1000Null("", F); + assertNumberLessThan1000Null("hello", T); + assertNumberLessThan1000Null("hello how are you", F); + assertNumberLessThan1000Null("a hello two and", T); + assertNumberLessThan1000Null("a car and a half,", F); + assertNumberLessThan1000Null("a million", T); + assertNumberLessThan1000Null(" twenty", F); + } + + @Test + public void testNumberGroupShortScale() { + assertNumberGroupShortScale("one hundred and twenty million", F, 1000000000, 120000000, F, 5); + assertNumberGroupShortScale("three thousand and six", T, 1000000000, 3000, F, 2); + assertNumberGroupShortScale("a hundred thousand", F, 1000000, 100000, F, 3); + assertNumberGroupShortScale("hundred 70 thousand", T, 1000000, 170000, F, 3); + assertNumberGroupShortScale("572 million", F, 1000000000, 572000000, F, 2); + assertNumberGroupShortScale("3 million", T, 1000000000, 3000000, F, 2); + assertNumberGroupShortScale(", one hundred and ninety one", F, 1000, 191, F, 6); + } + + @Test + public void testNumberGroupShortScaleOrdinal() { + assertNumberGroupShortScale("seven hundred and sixty four millionth", T, 1000000000, 764000000, T, 6); + assertNumberGroupShortScale("seven hundred and sixty four millionth", F, 1000000000, 764, F, 5); + assertNumberGroupShortScale("seven hundred and sixty four millionth", F, 1000, 764, F, 5); + assertNumberGroupShortScale("fifth billionth", T, 1000000000, 5, T, 1); + assertNumberGroupShortScale("nineteen hundredth", T, 1000000000, 19, F, 1); + assertNumberGroupShortScaleNull("seven hundred and sixty four millionth", T, 1000); + assertNumberGroupShortScaleNull("twelfth thousandth", F, 1000000000); + } + + @Test + public void testNumberGroupShortScaleNull() { + assertNumberGroupShortScaleNull("", T, 1000000000); + assertNumberGroupShortScaleNull("hello", F, 1000000); + assertNumberGroupShortScaleNull("hello how are you", T, 1000); + assertNumberGroupShortScaleNull("129000", F, 1000000000); + assertNumberGroupShortScaleNull("5000000", T, 1000000000); + assertNumberGroupShortScaleNull("one hundred and six", F, 999); + assertNumberGroupShortScaleNull("twelve", T, 0); + assertNumberGroupShortScaleNull("seven billion", F, 1000); + assertNumberGroupShortScaleNull("nine thousand and one", T, 1000); + assertNumberGroupShortScaleNull("eight million people", F, 1000000); + assertNumberGroupShortScaleNull(" ten ", T, 1000000); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ParserParamsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ParserParamsTest.java new file mode 100644 index 00000000..b8004bd7 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ParserParamsTest.java @@ -0,0 +1,64 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.DAY; +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.HOUR; +import static org.dicio.numbers.test.TestUtils.MILLIS; +import static org.dicio.numbers.test.TestUtils.MINUTE; +import static org.dicio.numbers.test.TestUtils.T; +import static org.dicio.numbers.test.TestUtils.YEAR; +import static org.dicio.numbers.test.TestUtils.n; +import static org.dicio.numbers.test.TestUtils.t; + +import org.dicio.numbers.parser.Parser; +import org.dicio.numbers.parser.param.NumberParserParamsTestBase; +import org.junit.Test; + +public class ParserParamsTest extends NumberParserParamsTestBase { + + @Override + protected Parser numberParser() { + return new SpanishParser(); + } + + //TODO Spanish translation + @Test + public void testNumberFirst() { + assertNumberFirst("it is nineteen sixty four trillionths", T, F, n(1964e-12, F)); + assertNumberFirst("36 twelfths of apple", F, T, n(3, F)); + assertNumberFirst("I'm really one hundred and eighth", F, F, n(100, F)); + assertNumberFirst("I'm really one hundred and eighth", T, T, n(108, T)); + } + + @Test + public void testNumberMixedWithText() { + assertNumberMixedWithText(" hello ciao!, 3/5 or four sevenths?", T, F, " hello ciao!, ", n(3.0 / 5.0, F), " or ", n(4.0 / 7.0, F), "?"); + assertNumberMixedWithText(" hello ciao!, four sevenths or 3/5?", T, T, " hello ciao!, ", n(4.0 / 7.0, F), " or ", n(3.0 / 5.0, F), "?"); + assertNumberMixedWithText("three billionth plus two", T, T, n(3000000000L, T), " ", n(2, F)); + assertNumberMixedWithText("one billionth and sixteen sixty four", T, F, n(1.0 / 1000000000.0, F), " and ", n(1664, F)); + assertNumberMixedWithText("two billionths minus fifty eight", F, T, n(2000000000000L, T), " ", n(-58, F)); + assertNumberMixedWithText("nine billionths times eleven", F, F, n(9.0 / 1000000000000.0, F), " times ", n(11, F)); + assertNumberMixedWithText("three halves, not eleven quarters", F, T, n(3.0 / 2.0, F), ", not ", n(11.0 / 4.0, F)); + assertNumberMixedWithText("six pairs equals a dozen ", F, T, n(12, F), " equals ", n(12, F), " "); + assertNumberMixedWithText("a dozen scores is not a gross", F, T, n(240, F), " is not ", n(144, F)); + assertNumberMixedWithText("6 quadrillionths of a cake", F, T, n(6e24, T), " of a cake"); + assertNumberMixedWithText("is nineteen sixty four quadrillionth", F, F, "is ", n(1964e-24, F)); + assertNumberMixedWithText("I'm twenty three years old.", T, F, "I'm ", n(23, F), " years old."); + assertNumberMixedWithText("The quintillionth", F, F, "The ", n(1e30, T)); + assertNumberMixedWithText("One quintillionth", T, F, n(1e-18, F)); + assertNumberMixedWithText("One quintillionth", T, T, n(1000000000000000000L, T)); + assertNumberMixedWithText("One billion", F, T, n(1000000000000L, F)); + } + + @Test + public void testDurationFirst() { + assertDurationFirst("Set a two minute and two billion nanosecond timer", F, t(2 * MINUTE + 2000L)); + assertDurationFirst("you know two years ago are not billions of days", T, t(2 * YEAR)); + } + + @Test + public void testDurationMixedWithText() { + assertDurationMixedWithText("2ns and four hours while six milliseconds.", F, t(4 * HOUR, 2), " while ", t(0, 6 * MILLIS), "."); + assertDurationMixedWithText("you know two years ago are not billions of day", T, "you know ", t(2 * YEAR), " ago are not ", t(1000000000L * DAY)); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/PronounceNumberTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/PronounceNumberTest.java new file mode 100644 index 00000000..0f4ea252 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/PronounceNumberTest.java @@ -0,0 +1,235 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.junit.Assert.assertEquals; + +public class PronounceNumberTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new SpanishFormatter(), null); + } + + //TODO Spanish translation + + @Test + public void smallIntegers() { + assertEquals("zero", pf.pronounceNumber(0).get()); + assertEquals("one", pf.pronounceNumber(1).get()); + assertEquals("ten", pf.pronounceNumber(10).get()); + assertEquals("fifteen", pf.pronounceNumber(15).get()); + assertEquals("twenty", pf.pronounceNumber(20).get()); + assertEquals("twenty seven", pf.pronounceNumber(27).get()); + assertEquals("thirty", pf.pronounceNumber(30).get()); + assertEquals("thirty three", pf.pronounceNumber(33).get()); + } + + @Test + public void negativeSmallIntegers() { + assertEquals("minus one", pf.pronounceNumber(-1).get()); + assertEquals("minus ten", pf.pronounceNumber(-10).get()); + assertEquals("minus fifteen", pf.pronounceNumber(-15).get()); + assertEquals("minus twenty", pf.pronounceNumber(-20).get()); + assertEquals("minus twenty seven", pf.pronounceNumber(-27).get()); + assertEquals("minus thirty", pf.pronounceNumber(-30).get()); + assertEquals("minus thirty three", pf.pronounceNumber(-33).get()); + } + + @Test + public void decimals() { + assertEquals("zero point zero five", pf.pronounceNumber(0.05).get()); + assertEquals("minus zero point zero five", pf.pronounceNumber(-0.05).get()); + assertEquals("one point two three", pf.pronounceNumber(1.234).get()); + assertEquals("twenty one point two six four", pf.pronounceNumber(21.264).places(5).get()); + assertEquals("twenty one point two six four", pf.pronounceNumber(21.264).places(4).get()); + assertEquals("twenty one point two six four", pf.pronounceNumber(21.264).places(3).get()); + assertEquals("twenty one point two six", pf.pronounceNumber(21.264).places(2).get()); + assertEquals("twenty one point three", pf.pronounceNumber(21.264).places(1).get()); + assertEquals("twenty one", pf.pronounceNumber(21.264).places(0).get()); + assertEquals("minus twenty one point two six four", pf.pronounceNumber(-21.264).places(5).get()); + assertEquals("minus twenty one point two six four", pf.pronounceNumber(-21.264).places(4).get()); + assertEquals("minus twenty one point two six four", pf.pronounceNumber(-21.264).places(3).get()); + assertEquals("minus twenty one point two six", pf.pronounceNumber(-21.264).places(2).get()); + assertEquals("minus twenty one point three", pf.pronounceNumber(-21.264).places(1).get()); + assertEquals("minus twenty one", pf.pronounceNumber(-21.264).places(0).get()); + } + + @Test + public void roundingDecimals() { + assertEquals("zero", pf.pronounceNumber(0.05).places(0).get()); + assertEquals("zero", pf.pronounceNumber(-0.4).places(0).get()); + assertEquals("minus twenty two", pf.pronounceNumber(-21.7).places(0).get()); + assertEquals("eighty nine", pf.pronounceNumber(89.2).places(0).get()); + assertEquals("ninety", pf.pronounceNumber(89.9).places(0).get()); + assertEquals("minus one", pf.pronounceNumber(-0.5).places(0).get()); + assertEquals("zero", pf.pronounceNumber(-0.4).places(0).get()); + assertEquals("six point three", pf.pronounceNumber(6.28).places(1).get()); + assertEquals("minus three point one", pf.pronounceNumber(-3.14).places(1).get()); + // note: 3.15 does not yield "three point two" because of floating point errors + assertEquals("three point two", pf.pronounceNumber(3.150001).places(1).get()); + assertEquals("zero point three", pf.pronounceNumber(0.25).places(1).get()); + assertEquals("minus zero point three", pf.pronounceNumber(-0.25).places(1).get()); + assertEquals("nineteen", pf.pronounceNumber(19.004).get()); + } + + @Test + public void hundred() { + assertEquals("one hundred", pf.pronounceNumber(100).get()); + assertEquals("six hundred and seventy eight", pf.pronounceNumber(678).get()); + + assertEquals("one hundred and three million, two hundred and fifty four thousand, six hundred and fifty four", + pf.pronounceNumber(103254654).get()); + assertEquals("one million, five hundred and twelve thousand, four hundred and fifty seven", + pf.pronounceNumber(1512457).get()); + assertEquals("two hundred and nine thousand, nine hundred and ninety six", + pf.pronounceNumber(209996).get()); + } + + @Test + public void year() { + assertEquals("fourteen fifty six", pf.pronounceNumber(1456).get()); + assertEquals("nineteen eighty four", pf.pronounceNumber(1984).get()); + assertEquals("eighteen oh one", pf.pronounceNumber(1801).get()); + assertEquals("eleven hundred", pf.pronounceNumber(1100).get()); + assertEquals("twelve oh one", pf.pronounceNumber(1201).get()); + assertEquals("fifteen ten", pf.pronounceNumber(1510).get()); + assertEquals("ten oh six", pf.pronounceNumber(1006).get()); + assertEquals("one thousand", pf.pronounceNumber(1000).get()); + assertEquals("two thousand", pf.pronounceNumber(2000).get()); + assertEquals("two thousand, fifteen", pf.pronounceNumber(2015).get()); + assertEquals("four thousand, eight hundred and twenty seven", pf.pronounceNumber(4827).get()); + } + + @Test + public void scientificNotation() { + assertEquals("zero", pf.pronounceNumber(0.0).scientific(T).get()); + assertEquals("three point three times ten to the power of one", + pf.pronounceNumber(33).scientific(T).get()); + assertEquals("two point nine nine times ten to the power of eight", + pf.pronounceNumber(299492458).scientific(T).get()); + assertEquals("two point nine nine seven nine two five times ten to the power of eight", + pf.pronounceNumber(299792458).scientific(T).places(6).get()); + assertEquals("one point six seven two times ten to the power of negative twenty seven", + pf.pronounceNumber(1.672e-27).scientific(T).places(3).get()); + + // auto scientific notation when number is too big to be pronounced + assertEquals("two point nine five times ten to the power of twenty four", + pf.pronounceNumber(2.9489e24).get()); + } + + private void assertShortLongScale(final double number, + final String shortScale, + final String longScale) { + assertEquals(shortScale, pf.pronounceNumber(number).shortScale(T).get()); + assertEquals(longScale, pf.pronounceNumber(number).shortScale(F).get()); + } + + @Test + public void largeNumbers() { + assertShortLongScale(1001892, + "one million, one thousand, eight hundred and ninety two", + "one million, one thousand, eight hundred and ninety two"); + assertShortLongScale(299792458, + "two hundred and ninety nine million, seven hundred and ninety two thousand, four hundred and fifty eight", + "two hundred and ninety nine million, seven hundred and ninety two thousand, four hundred and fifty eight"); + assertShortLongScale(-100202133440.0, + "minus one hundred billion, two hundred and two million, one hundred and thirty three thousand, four hundred and forty", + "minus one hundred thousand two hundred and two million, one hundred and thirty three thousand, four hundred and forty"); + assertShortLongScale(20102000987000.0, + "twenty trillion, one hundred and two billion, nine hundred and eighty seven thousand", + "twenty billion, one hundred and two thousand million, nine hundred and eighty seven thousand"); + assertShortLongScale(-2061000560007060.0, + "minus two quadrillion, sixty one trillion, five hundred and sixty million, seven thousand, sixty", + "minus two thousand sixty one billion, five hundred and sixty million, seven thousand, sixty"); + assertShortLongScale(9111202032999999488.0, // floating point errors + "nine quintillion, one hundred and eleven quadrillion, two hundred and two trillion, thirty two billion, nine hundred and ninety nine million, nine hundred and ninety nine thousand, four hundred and eighty eight", + "nine trillion, one hundred and eleven thousand two hundred and two billion, thirty two thousand nine hundred and ninety nine million, nine hundred and ninety nine thousand, four hundred and eighty eight"); + + assertShortLongScale(29000.0, "twenty nine thousand", "twenty nine thousand"); + assertShortLongScale(301000.0, "three hundred and one thousand", "three hundred and one thousand"); + assertShortLongScale(4000000.0, "four million", "four million"); + assertShortLongScale(50000000.0, "fifty million", "fifty million"); + assertShortLongScale(630000000.0, "six hundred and thirty million", "six hundred and thirty million"); + assertShortLongScale(7000000000.0, "seven billion", "seven thousand million"); + assertShortLongScale(16000000000.0, "sixteen billion", "sixteen thousand million"); + assertShortLongScale(923000000000.0, "nine hundred and twenty three billion", "nine hundred and twenty three thousand million"); + assertShortLongScale(1000000000000.0, "one trillion", "one billion"); + assertShortLongScale(29000000000000.0, "twenty nine trillion", "twenty nine billion"); + assertShortLongScale(308000000000000.0, "three hundred and eight trillion", "three hundred and eight billion"); + assertShortLongScale(4000000000000000.0, "four quadrillion", "four thousand billion"); + assertShortLongScale(52000000000000000.0, "fifty two quadrillion", "fifty two thousand billion"); + assertShortLongScale(640000000000000000.0, "six hundred and forty quadrillion", "six hundred and forty thousand billion"); + assertShortLongScale(7000000000000000000.0, "seven quintillion", "seven trillion"); + + // TODO maybe improve this + assertShortLongScale(1000001, "one million, one", "one million, one"); + assertShortLongScale(-2000000029, "minus two billion, twenty nine", "minus two thousand million, twenty nine"); + } + + @Test + public void ordinal() { + // small numbers + assertEquals("first", pf.pronounceNumber(1).shortScale(T).ordinal(T).get()); + assertEquals("first", pf.pronounceNumber(1).shortScale(F).ordinal(T).get()); + assertEquals("tenth", pf.pronounceNumber(10).shortScale(T).ordinal(T).get()); + assertEquals("tenth", pf.pronounceNumber(10).shortScale(F).ordinal(T).get()); + assertEquals("fifteenth", pf.pronounceNumber(15).shortScale(T).ordinal(T).get()); + assertEquals("fifteenth", pf.pronounceNumber(15).shortScale(F).ordinal(T).get()); + assertEquals("twentieth", pf.pronounceNumber(20).shortScale(T).ordinal(T).get()); + assertEquals("twentieth", pf.pronounceNumber(20).shortScale(F).ordinal(T).get()); + assertEquals("twenty seventh", pf.pronounceNumber(27).shortScale(T).ordinal(T).get()); + assertEquals("twenty seventh", pf.pronounceNumber(27).shortScale(F).ordinal(T).get()); + assertEquals("thirtieth", pf.pronounceNumber(30).shortScale(T).ordinal(T).get()); + assertEquals("thirtieth", pf.pronounceNumber(30).shortScale(F).ordinal(T).get()); + assertEquals("thirty third", pf.pronounceNumber(33).shortScale(T).ordinal(T).get()); + assertEquals("thirty third", pf.pronounceNumber(33).shortScale(F).ordinal(T).get()); + assertEquals("hundredth", pf.pronounceNumber(100).shortScale(T).ordinal(T).get()); + assertEquals("hundredth", pf.pronounceNumber(100).shortScale(F).ordinal(T).get()); + assertEquals("thousandth", pf.pronounceNumber(1000).shortScale(T).ordinal(T).get()); + assertEquals("thousandth", pf.pronounceNumber(1000).shortScale(F).ordinal(T).get()); + assertEquals("ten thousandth", pf.pronounceNumber(10000).shortScale(T).ordinal(T).get()); + assertEquals("ten thousandth", pf.pronounceNumber(10000).shortScale(F).ordinal(T).get()); + assertEquals("two hundredth", pf.pronounceNumber(200).shortScale(T).ordinal(T).get()); + assertEquals("two hundredth", pf.pronounceNumber(200).shortScale(F).ordinal(T).get()); + assertEquals("eighteen thousand, six hundred and ninety first", pf.pronounceNumber(18691).ordinal(T).shortScale(T).get()); + assertEquals("eighteen thousand, six hundred and ninety first", pf.pronounceNumber(18691).ordinal(T).shortScale(F).get()); + assertEquals("one thousand, five hundred and sixty seventh", pf.pronounceNumber(1567).ordinal(T).shortScale(T).get()); + assertEquals("one thousand, five hundred and sixty seventh", pf.pronounceNumber(1567).ordinal(T).shortScale(F).get()); + + // big numbers + assertEquals("eighteen millionth", pf.pronounceNumber(18000000).ordinal(T).get()); + assertEquals("eighteen million, hundredth", pf.pronounceNumber(18000100).ordinal(T).get()); + assertEquals("one hundred and twenty seven billionth", pf.pronounceNumber(127000000000.0).ordinal(T).shortScale(T).get()); + assertEquals("two hundred and one thousand millionth", pf.pronounceNumber(201000000000.0).ordinal(T).shortScale(F).get()); + assertEquals("nine hundred and thirteen billion, eighty million, six hundred thousand, sixty fourth", pf.pronounceNumber(913080600064.0).ordinal(T).shortScale(T).get()); + assertEquals("nine hundred and thirteen thousand eighty million, six hundred thousand, sixty fourth", pf.pronounceNumber(913080600064.0).ordinal(T).shortScale(F).get()); + assertEquals("one trillion, two millionth", pf.pronounceNumber(1000002000000.0).ordinal(T).shortScale(T).get()); + assertEquals("one billion, two millionth", pf.pronounceNumber(1000002000000.0).ordinal(T).shortScale(F).get()); + assertEquals("four trillion, millionth", pf.pronounceNumber(4000001000000.0).ordinal(T).shortScale(T).get()); + assertEquals("four billion, millionth", pf.pronounceNumber(4000001000000.0).ordinal(T).shortScale(F).get()); + + // decimal numbers and scientific notation: the behaviour should be the same as with ordinal=F + assertEquals("two point seven eight", pf.pronounceNumber(2.78).ordinal(T).get()); + assertEquals("third", pf.pronounceNumber(2.78).places(0).ordinal(T).get()); + assertEquals("nineteenth", pf.pronounceNumber(19.004).ordinal(T).get()); + assertEquals("eight hundred and thirty million, four hundred and thirty eight thousand, ninety two point one eight three", pf.pronounceNumber(830438092.1829).places(3).ordinal(T).get()); + assertEquals("two point five four times ten to the power of six", pf.pronounceNumber(2.54e6).ordinal(T).scientific(T).get()); + } + + @Test + public void edgeCases() { + assertEquals("zero", pf.pronounceNumber(0.0).get()); + assertEquals("zero", pf.pronounceNumber(-0.0).get()); + assertEquals("infinity", pf.pronounceNumber(Double.POSITIVE_INFINITY).get()); + assertEquals("negative infinity", pf.pronounceNumber(Double.NEGATIVE_INFINITY).scientific(F).get()); + assertEquals("negative infinity", pf.pronounceNumber(Double.NEGATIVE_INFINITY).scientific(T).get()); + assertEquals("not a number", pf.pronounceNumber(Double.NaN).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/TokenizerConfigTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/TokenizerConfigTest.java new file mode 100644 index 00000000..c9c50c33 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/TokenizerConfigTest.java @@ -0,0 +1,11 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.test.TokenizerConfigTestBase; + +public class TokenizerConfigTest extends TokenizerConfigTestBase { + + @Override + public String configFolder() { + return "config/es-es"; + } +} From 27d104790a072fc33ae24b498557c3a4e093f679 Mon Sep 17 00:00:00 2001 From: Diego Date: Tue, 15 Jul 2025 07:44:27 -0500 Subject: [PATCH 10/30] [EXPERIMENTAL] files adapted for spanish language --- .../lang/es/SpanishDateTimeExtractor.kt | 268 ++++++------ .../dicio/numbers/lang/es/SpanishFormatter.kt | 409 +++++------------- .../numbers/lang/es/SpanishNumberExtractor.kt | 189 +++----- .../dicio/numbers/lang/es/SpanishParser.kt | 7 +- 4 files changed, 302 insertions(+), 571 deletions(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt index a9b2363b..5407bea7 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt @@ -13,17 +13,18 @@ import java.time.temporal.ChronoUnit class SpanishDateTimeExtractor internal constructor( private val ts: TokenStream, + private val preferMonthBeforeDay: Boolean, // Added parameter to match English functionality private val now: LocalDateTime ) { private val numberExtractor = SpanishNumberExtractor(ts) private val durationExtractor = DurationExtractorUtils(ts, numberExtractor::numberNoOrdinal) private val dateTimeExtractor = DateTimeExtractorUtils(ts, now, this::extractIntegerInRange) - private fun extractIntegerInRange(fromInclusive: Int, toInclusive: Int): Int? { + private fun extractIntegerInRange(fromInclusive: Int, toInclusive: Int, allowOrdinal: Boolean = false): Int? { // disallow fraction as / should be treated as a day/month/year separator return NumberExtractorUtils.extractOneIntegerInRange( ts, fromInclusive, toInclusive - ) { NumberExtractorUtils.signBeforeNumber(ts) { numberExtractor.numberInteger(false) } } + ) { NumberExtractorUtils.signBeforeNumber(ts) { numberExtractor.numberInteger(allowOrdinal) } } } @@ -42,10 +43,7 @@ class SpanishDateTimeExtractor internal constructor( if (date == null) { // then try with duration, since otherwise numbers would be interpreted as date days - val duration = Utils.firstNotNull( - this::relativeDuration, - dateTimeExtractor::relativeMonthDuration - ) + val duration = Utils.firstNotNull(this::relativeDuration, dateTimeExtractor::relativeMonthDuration) if (duration == null) { // no normal relative duration found: finally try extracting a date normally date = date() @@ -71,11 +69,7 @@ class SpanishDateTimeExtractor internal constructor( val originalPosition = ts.position val duration = ts.tryOrSkipDateTimeIgnore(true) { this.relativeDuration() } if (duration == null) { - date = ts.tryOrSkipDateTimeIgnore( - true - ) { - Utils.firstNotNull(this::relativeSpecialDay, this::date) - } + date = ts.tryOrSkipDateTimeIgnore(true) { Utils.firstNotNull(this::relativeSpecialDay, this::date) } } else if (duration.nanos == 0L && duration.days != 0L) { date = duration.applyAsOffsetToDateTime(now).toLocalDate() } else { @@ -86,7 +80,7 @@ class SpanishDateTimeExtractor internal constructor( return if (date == null) { time?.atDate(now.toLocalDate()) } else { - if (time == null) date.atTime(now.toLocalTime()) else date.atTime(time) + time?.let { date.atTime(it) } ?: date.atTime(now.toLocalTime()) } } @@ -96,7 +90,6 @@ class SpanishDateTimeExtractor internal constructor( if (time == null) { // if there is no time, maybe there is a moment of day (not am/pm though) preceding? val momentOfDay = momentOfDay() ?: return null - time = ts.tryOrSkipDateTimeIgnore(true) { this.time() } if (time == null) { // found moment of day without a specific time @@ -116,9 +109,11 @@ class SpanishDateTimeExtractor internal constructor( } if (time.hour != 0 && pm != null) { - // AM/PM should not do anything after 0 (e.g. 0pm or 24 di sera) - - if (pm && !DateTimeExtractorUtils.isMomentOfDayPm(time.hour)) { + // AM/PM should not do anything after 0 (e.g. 0pm) + if (!pm && time.hour == 12) { + // Spanish context: 12 am is midnight + time = time.withHour(0) + } else if (pm && !DateTimeExtractorUtils.isMomentOfDayPm(time.hour)) { // time must be in the afternoon, but time is not already, correct it time = time.withHour((time.hour + 12) % DateTimeExtractorUtils.HOURS_IN_DAY) } @@ -127,113 +122,142 @@ class SpanishDateTimeExtractor internal constructor( } fun time(): LocalTime? { - // try both with a normal hour and with "mezzogiorno"/"mezzanotte" - val hour = Utils.firstNotNull(this::noonMidnightLike, this::hour) ?: return null - var result = LocalTime.of(hour, 0) + val originalPosition = ts.position + val specialMinute = specialMinute() // e.g., "y cuarto", "menos cuarto" - val minute = ts.tryOrSkipDateTimeIgnore( - true - ) { - Utils.firstNotNull(this::specialMinute, dateTimeExtractor::minute) + val hour = Utils.firstNotNull(this::noonMidnightLike, this::hour) + if (hour == null) { + ts.position = originalPosition + return null } - if (minute == null) { - return result + + if (specialMinute != null) { + // Logic for phrases like "seis menos cuarto" (quarter to six) + return if (specialMinute < 0) { + LocalTime.of((hour + 23) % 24, 60 + specialMinute) + } else { + // Logic for "seis y cuarto" (quarter past six) or "seis y media" (half past six) + LocalTime.of(hour, specialMinute) + } } - result = result.withMinute(minute) + + var result = LocalTime.of(hour, 0) - val second = ts.tryOrSkipDateTimeIgnore(true) { dateTimeExtractor.second() } - if (second == null) { + // Handle "en punto" (o'clock) + if (ts.nextValueIs("en") && ts.nextValueIs("punto", 1)) { + ts.movePositionForwardBy(2) return result } - return result.withSecond(second) + + val minute = ts.tryOrSkipDateTimeIgnore(true) { dateTimeExtractor.minute() } + if (minute == null) return result + + result = result.withMinute(minute) + val second = ts.tryOrSkipDateTimeIgnore(true) { dateTimeExtractor.second() } + + return second?.let { result.withSecond(it) } ?: result } fun date(): LocalDate? { var result = now.toLocalDate() - val dayOfWeek = dayOfWeek() - val day = ts.tryOrSkipDateTimeIgnore( - dayOfWeek != null - ) { extractIntegerInRange(1, 31) } + val dayOfWeek = dateTimeExtractor.dayOfWeek() + val firstNum = ts.tryOrSkipDateTimeIgnore(dayOfWeek != null) { extractIntegerInRange(1, 31, true) } - if (day == null) { - if (dayOfWeek != null) { - // TODO maybe enforce the date to be in the future? - return result.plus((dayOfWeek - result.dayOfWeek.ordinal).toLong(), ChronoUnit.DAYS) - } - result = result.withDayOfMonth(1) - } else { - result = result.withDayOfMonth(day) + if (firstNum == null && dayOfWeek != null) { + // e.g. "próximo martes" (next Tuesday) + // TODO maybe enforce the date to be in the future? + return result.plus((dayOfWeek - result.dayOfWeek.ordinal).toLong(), ChronoUnit.DAYS) } - val month = ts.tryOrSkipDateTimeIgnore(day != null) { - Utils.firstNotNull(dateTimeExtractor::monthName, { extractIntegerInRange(1, 12) }) - } - if (month == null) { - if (day != null) { - return result + val monthName = ts.tryOrSkipDateTimeIgnore(firstNum != null) { dateTimeExtractor.monthName() } + if (monthName == null) { + // Date format is likely number-based, e.g., 25/12/2023 + result = if (firstNum == null) { + result.withDayOfMonth(1).withMonth(1) + } else { + val secondNumMax = if (firstNum <= 12) 31 else 12 + val secondNum = ts.tryOrSkipDateTimeIgnore(true) { extractIntegerInRange(1, secondNumMax, true) } + if (secondNum == null) { + return if (preferMonthBeforeDay && firstNum <= 12) { + result.withDayOfMonth(1).withMonth(firstNum) + } else { + result.withDayOfMonth(firstNum) + } + } else { + // Spanish standard is day-first (DD/MM), but we respect preferMonthBeforeDay + if ((preferMonthBeforeDay || secondNum > 12) && firstNum <= 12) { + result.withDayOfMonth(secondNum).withMonth(firstNum) + } else { + result.withDayOfMonth(firstNum).withMonth(secondNum) + } + } } - result = result.withMonth(1) } else { - result = result.withMonth(month) + // Date format includes a month name, e.g., "diciembre 25" + result = result.withMonth(monthName) + val dayNum = firstNum ?: ts.tryOrSkipDateTimeIgnore(true) { extractIntegerInRange(1, 31, true) } + result = dayNum?.let { result.withDayOfMonth(it) } ?: result.withDayOfMonth(1) } + val dayOrMonthFound = firstNum != null || monthName != null + + var bcad = ts.tryOrSkipDateTimeIgnore(dayOrMonthFound) { dateTimeExtractor.bcad() } + val year = ts.tryOrSkipDateTimeIgnore(dayOrMonthFound && bcad == null) { extractIntegerInRange(0, 999999999) } - // if month is null then day is also null, otherwise we would have returned above - val year = ts.tryOrSkipDateTimeIgnore( - month != null - ) { extractIntegerInRange(0, 999999999) } if (year == null) { - if (month != null) { - return result - } - return null + return if (dayOrMonthFound) result else null } - val bcad = dateTimeExtractor.bcad() + if (bcad == null) { + bcad = dateTimeExtractor.bcad() + } + // Spanish linguistics: "a.C." (antes de Cristo) means Before Christ. return result.withYear(year * (if (bcad == null || bcad) 1 else -1)) } - fun dayOfWeek(): Int? { + // Spanish context: "mar" is ambiguous for "martes" (Tuesday) and "marzo" (March). + // This logic is correct and mirrors the Italian version's ambiguity. if (ts[0].isValue("mar")) { ts.movePositionForwardBy(1) - return 1 // special case, since mar already used for march + return 1 // Tuesday } else { return dateTimeExtractor.dayOfWeek() } } fun specialMinute(): Int? { + // Spanish context: handles "y cuarto" (15), "y media" (30), "menos cuarto" (-15). val originalPosition = ts.position - val number = numberExtractor.numberNoOrdinal() - if (number != null && number.isDecimal && number.decimalValue() > 0.0 && number.decimalValue() < 1.0) { - // e.g. alle due e tre quarti - return Utils.roundToInt(number.decimalValue() * 60) - } + val isMinus = ts.nextValueIs("menos") + val isPlus = ts.nextValueIs("y") || ts.nextValueIs("con") + // Look for 'cuarto' or 'media' + val keywordIndex = if (isMinus || isPlus) 1 else 0 + if (ts[keywordIndex].isValue("cuarto")) { + ts.movePositionForwardBy(keywordIndex + 1) + return if (isMinus) -15 else 15 + } + if (ts[keywordIndex].isValue("media")) { + ts.movePositionForwardBy(keywordIndex + 1) + return if (isMinus) -30 else 30 // "menos media" is unusual but possible + } + ts.position = originalPosition return null } - fun noonMidnightLike(): Int? { - return noonMidnightLikeOrMomentOfDay("noon_midnight_like") - } + fun noonMidnightLike(): Int? = noonMidnightLikeOrMomentOfDay("noon_midnight_like") - fun momentOfDay(): Int? { - // noon_midnight_like is a part of moment_of_day, so noon and midnight are included - return noonMidnightLikeOrMomentOfDay("moment_of_day") - } + fun momentOfDay(): Int? = noonMidnightLikeOrMomentOfDay("moment_of_day") private fun noonMidnightLikeOrMomentOfDay(category: String): Int? { val originalPosition = ts.position - var relativeIndicator = 0 // 0 = not found, otherwise the sign, +1 or -1 if (ts[0].hasCategory("pre_special_hour")) { - // found a word that usually comes before special hours, e.g. questo, dopo if (ts[0].hasCategory("pre_relative_indicator")) { relativeIndicator = if (ts[0].hasCategory("negative")) -1 else 1 - // only move to next not ignore if we got a relative indicator ts.movePositionForwardBy(ts.indexOfWithoutCategory("date_time_ignore", 1)) } else { ts.movePositionForwardBy(1) @@ -241,48 +265,28 @@ class SpanishDateTimeExtractor internal constructor( } if (ts[0].hasCategory(category)) { - // special hour found, e.g. mezzanotte, sera, pranzo + // e.g. mediodía, tarde, noche ts.movePositionForwardBy(1) - return ((ts[-1].number!!.integerValue().toInt() + - DateTimeExtractorUtils.HOURS_IN_DAY + relativeIndicator) - % DateTimeExtractorUtils.HOURS_IN_DAY) - } - - // noon/midnight have both the categores noon_midnight_like and moment_of_day, always try - if (ts[0].value.startsWith("mezz")) { - // sometimes e.g. "mezzogiorno" is split into "mezzo giorno" - if (ts[1].value.startsWith("giorn")) { - ts.movePositionForwardBy(2) - return 12 + relativeIndicator - } else if (ts[1].value.startsWith("nott")) { - ts.movePositionForwardBy(2) - return (DateTimeExtractorUtils.HOURS_IN_DAY + relativeIndicator) % DateTimeExtractorUtils.HOURS_IN_DAY - } + return (ts[-1].number!!.integerValue().toInt() + DateTimeExtractorUtils.HOURS_IN_DAY + relativeIndicator) % DateTimeExtractorUtils.HOURS_IN_DAY } - // no special hour found ts.position = originalPosition return null } fun hour(): Int? { val originalPosition = ts.position - - // skip words that usually come before hours, e.g. alle, ore + // skip words that usually come before hours, e.g. "a las", "hora" ts.movePositionForwardBy(ts.indexOfWithoutCategory("pre_hour", 0)) val number = extractIntegerInRange(0, DateTimeExtractorUtils.HOURS_IN_DAY) if (number == null) { - // no number found, or the number is not a valid hour, e.g. le ventisei ts.position = originalPosition return null } - - // found hour, e.g. alle diciannove return number % DateTimeExtractorUtils.HOURS_IN_DAY // transform 24 into 0 } - private fun relativeSpecialDay(): LocalDate? { val days = Utils.firstNotNull( this::relativeYesterday, @@ -290,67 +294,41 @@ class SpanishDateTimeExtractor internal constructor( this::relativeTomorrow, dateTimeExtractor::relativeDayOfWeekDuration ) - if (days == null) { - return null - } - return now.toLocalDate().plusDays(days.toLong()) + return days?.let { now.toLocalDate().plusDays(it.toLong()) } } fun relativeYesterday(): Int? { - val originalPosition = ts.position - - // collect as many adders ("altro") preceding yesterday ("ieri") as possible - var dayCount = 0 - while (ts[0].hasCategory("yesterday_adder")) { - ++dayCount - ts.movePositionForwardBy(ts.indexOfWithoutCategory("date_time_ignore", 1)) - } - - // collect the actual yesterday ("ieri") and exit if it is not found - if (!ts[0].hasCategory("yesterday")) { - ts.position = originalPosition - return null + // Spanish context: "anteayer" is a single word for "day before yesterday". + // The complex multi-word logic from English/Italian is not needed. + if (ts[0].hasCategory("day_before_yesterday")) { + ts.movePositionForwardBy(1) + return -2 } - ts.movePositionForwardBy(1) - ++dayCount - - // if no adders were collected before yesterday, try to collect only one at the end - val nextNotIgnore = ts.indexOfWithoutCategory("date_time_ignore", 0) - if (dayCount == 1 && ts[nextNotIgnore].hasCategory("yesterday_adder")) { - ++dayCount - ts.movePositionForwardBy(nextNotIgnore + 1) + if (ts[0].hasCategory("yesterday")) { + ts.movePositionForwardBy(1) + return -1 } - - // found relative yesterday, e.g. altro altro ieri, ieri l'altro - return -dayCount + return null } fun relativeTomorrow(): Int? { - val originalPosition = ts.position - - // collect as many "dopo" preceding "domani" as possible - var dayCount = 0 - while (ts[0].hasCategory("tomorrow_adder")) { - ++dayCount - ts.movePositionForwardBy(ts.indexOfWithoutCategory("date_time_ignore", 1)) + // Spanish context: "pasado mañana" is a single token for "day after tomorrow". + if (ts[0].hasCategory("day_after_tomorrow")) { + ts.movePositionForwardBy(1) + return 2 } - - // collect the actual "domani" and exit if it is not found - if (!ts[0].hasCategory("tomorrow")) { - ts.position = originalPosition - return null + if (ts[0].hasCategory("tomorrow")) { + ts.movePositionForwardBy(1) + return 1 } - ts.movePositionForwardBy(1) - ++dayCount - - // found relative tomorrow, e.g. domani, dopo dopo domani - return dayCount + return null } fun relativeDuration(): Duration? { + // Spanish context: Handles "hace [duration]" (ago) and "[duration] después" (later). return dateTimeExtractor.relativeIndicatorDuration( - { durationExtractor.duration() }, + durationExtractor::duration, { duration -> duration.multiply(-1) } ) } -} +} \ No newline at end of file diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt index de53fd1d..1d2a2647 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt @@ -12,70 +12,59 @@ class SpanishFormatter : Formatter("config/es-es") { override fun niceNumber(mixedFraction: MixedFraction, speech: Boolean): String { if (speech) { - val sign = if (mixedFraction.negative) "meno " else "" + val sign = if (mixedFraction.negative) "menos " else "" if (mixedFraction.numerator == 0) { - return sign + pronounceNumber(mixedFraction.whole.toDouble(), 0, true, false, false) - } - - var denominatorString = if (mixedFraction.denominator == 2) { - "mezzo" - } else { - // use ordinal: only mezzo is exceptional - pronounceNumber(mixedFraction.denominator.toDouble(), 0, true, false, true) + return sign + pronouncePositive(mixedFraction.whole, false) + } + + // Spanish context: some fractions are gendered. "un medio" (a half) vs "una media tarta" (a half cake) + val isFemale = mixedFraction.whole == 0L + val denominatorString = when (mixedFraction.denominator) { + 2L -> if (isFemale) "media" else "medio" + else -> { + // Ordinals are used for other denominators, e.g., "tercio", "cuarto", "quinto" + val ordinal = pronouncePositive(mixedFraction.denominator, true) + if (mixedFraction.numerator > 1) { + // pluralize, e.g. "quinto" -> "quintos" + if (ordinal.endsWith("o")) ordinal.dropLast(1) + "os" else ordinal + "s" + } else { + if (ordinal.endsWith("o")) ordinal.dropLast(1) + "o" else ordinal // e.g. tercio, not tercer + } + } } val numeratorString = if (mixedFraction.numerator == 1) { - "un" + if (isFemale) "una" else "un" } else { - denominatorString = - denominatorString.substring(0, denominatorString.length - 1) + "i" - pronounceNumber(mixedFraction.numerator.toDouble(), 0, true, false, false) + pronouncePositive(mixedFraction.numerator, false) } return if (mixedFraction.whole == 0L) { "$sign$numeratorString $denominatorString" } else { - (sign + pronounceNumber(mixedFraction.whole.toDouble(), 0, true, false, false) - + " e " + numeratorString + " " + denominatorString) + (sign + pronouncePositive(mixedFraction.whole, false) + + " y " + numeratorString + " " + denominatorString) } } else { return niceNumberNotSpeech(mixedFraction) } } - override fun pronounceNumber( - number: Double, - places: Int, - shortScale: Boolean, - scientific: Boolean, - ordinal: Boolean - ): String { - // for italian shortScale is completely ignored - - if (number == Double.POSITIVE_INFINITY) { - return "infinito" - } else if (number == Double.NEGATIVE_INFINITY) { - return "meno infinito" - } else if (java.lang.Double.isNaN(number)) { - return "non un numero" - } + override fun pronounceNumber(number: Double, places: Int, shortScale: Boolean, scientific: Boolean, ordinal: Boolean): String { + if (number == Double.POSITIVE_INFINITY) return "infinito" + if (number == Double.NEGATIVE_INFINITY) return "menos infinito" + if (java.lang.Double.isNaN(number)) return "no es un número" - // also using scientific mode if the number is too big to be spoken fully. Checking against - // the biggest double smaller than 10^21 = 1000 * 10^18, which is the biggest pronounceable - // number, since e.g. 999.99 * 10^18 can be pronounced correctly. if (scientific || abs(number) > 999999999999999934463.0) { val scientificFormatted = String.format(Locale.ENGLISH, "%E", number) val parts = scientificFormatted.split("E".toRegex(), limit = 2).toTypedArray() val power = parts[1].toInt().toDouble() - if (power != 0.0) { - // This handles negatives of powers separately from the normal - // handling since each call disables the scientific flag val n = parts[0].toDouble() return String.format( - "%s per dieci alla %s", + "%s por diez a la %s", pronounceNumber(n, places, shortScale, false, false), - pronounceNumber(power, places, shortScale, false, false) + pronounceNumber(power, places, shortScale, false, true) // exponent as ordinal ) } } @@ -84,312 +73,122 @@ class SpanishFormatter : Formatter("config/es-es") { var varNumber = number if (varNumber < 0) { varNumber = -varNumber - // from here on number is always positive - if (places != 0 || varNumber >= 0.5) { - // do not add minus if number will be rounded to 0 - result.append("meno ") - } + if (places != 0 || varNumber >= 0.5) result.append("menos ") } val realPlaces = Utils.decimalPlacesNoFinalZeros(varNumber, places) val numberIsWhole = realPlaces == 0 - val realOrdinal = ordinal && numberIsWhole - // if no decimal places to be printed, numberLong should be the rounded number val numberLong = varNumber.toLong() + (if (varNumber % 1 >= 0.5 && numberIsWhole) 1 else 0) - if (realOrdinal && ORDINAL_NAMES.containsKey(numberLong)) { - result.append(ORDINAL_NAMES[numberLong]) - } else if (!realOrdinal && NUMBER_NAMES.containsKey(numberLong)) { - if (varNumber > 1000) { - result.append("un ") - } - result.append(NUMBER_NAMES[numberLong]) - } else { - val groups = Utils.splitByModulus(numberLong, 1000) - val groupNames: MutableList = ArrayList() - for (i in groups.indices) { - val z = groups[i] - if (z == 0L) { - continue // skip 000 groups - } - var groupName = subThousand(z) - - if (i == 1) { - if (z == 1L) { - groupName = "mille" - } else { - // use mila instead of mille - groupName += " mila" - } - } else if (i != 0) { - // magnitude > 1000, so un is always there - if (z == 1L) { - groupName = "un" - } - - val magnitude = Utils.longPow(1000, i) - groupName += " " + NUMBER_NAMES[magnitude] - if (z != 1L) { - groupName = groupName.substring(0, groupName.length - 1) + "i" - } - } - - groupNames.add(groupName) - } - - appendSplitGroups(result, groupNames) - - if (ordinal && numberIsWhole) { // not ordinal if not whole - if (result.lastIndexOf("dieci") == result.length - 5) { - result.deleteCharAt(result.length - 4) - result.append("mo") - } else { - if (result.lastIndexOf("tre") != result.length - 3 - && result.lastIndexOf("sei") != result.length - 3 - ) { - result.deleteCharAt(result.length - 1) - if (result.lastIndexOf("mil") == result.length - 3) { - result.append("l") - } - } - result.append("esimo") - } - } - } + result.append(pronouncePositive(numberLong, ordinal && numberIsWhole)) if (realPlaces > 0) { - if (varNumber < 1.0 && (result.isEmpty() || "meno ".contentEquals(result))) { - result.append("zero") // nothing was written before - } - result.append(" virgola") - + if (result.toString() == "menos " || result.isEmpty()) result.append("cero") + // Spanish context: "coma" is the standard decimal separator + result.append(" coma") val fractionalPart = String.format("%." + realPlaces + "f", varNumber % 1) for (i in 2 until fractionalPart.length) { result.append(" ") result.append(NUMBER_NAMES[(fractionalPart[i].code - '0'.code).toLong()]) } } - return result.toString() } - override fun niceTime( - time: LocalTime, - speech: Boolean, - use24Hour: Boolean, - showAmPm: Boolean - ): String { + private fun pronouncePositive(n: Long, ordinal: Boolean): String { + if (ordinal) { + ORDINAL_NAMES[n]?.let { return it } + // Logic to build compound ordinals can be added here if needed + } else { + NUMBER_NAMES[n]?.let { return it } + } + + return when { + n >= 1_000_000_000_000 -> buildString { val base = if (n / 1_000_000_000_000 == 1L) "" else pronouncePositive(n / 1_000_000_000_000, false) + " "; append(base); append("billones"); val rem = n % 1_000_000_000_000; if (rem > 0) append(" ").append(pronouncePositive(rem, false)) } + n >= 1_000_000 -> buildString { val base = if (n / 1_000_000 == 1L) "un" else pronouncePositive(n / 1_000_000, false); append(base); append(" millones"); val rem = n % 1_000_000; if (rem > 0) append(" ").append(pronouncePositive(rem, false)) } + n >= 1000 -> buildString { if (n / 1000 > 1) append(pronouncePositive(n / 1000, false)); append(" mil"); val rem = n % 1000; if (rem > 0) append(" ").append(pronouncePositive(rem, false)) } + n >= 100 -> buildString { append(HUNDRED_NAMES[n / 100 * 100]); val rem = n % 100; if (rem > 0) append(" ").append(pronouncePositive(rem, false)) } + n >= 30 -> buildString { append(NUMBER_NAMES[n / 10 * 10]); val rem = n % 10; if (rem > 0) append(" y ").append(pronouncePositive(rem, false)) } + else -> "" // Should be unreachable given the initial checks + } + } + + override fun niceTime(time: LocalTime, speech: Boolean, use24Hour: Boolean, showAmPm: Boolean): String { if (speech) { + if (time.hour == 0 && time.minute == 0) return "medianoche" + if (time.hour == 12 && time.minute == 0) return "mediodía" + val result = StringBuilder() - if (time.minute == 45) { - when (val newHour = (time.hour + 1) % 24) { - 0 -> result.append("un quarto a mezzanotte") - 12 -> result.append("un quarto a mezzogiorno") - else -> { - result.append("un quarto alle ") - result.append(getHourName(newHour, use24Hour)) - } - } - } else { - result.append(getHourName(time.hour, use24Hour)) + // Spanish context: hours use 1-12 cycle for speech, not 0-23. + val hourForSpeech = if (use24Hour) time.hour else (if (time.hour % 12 == 0) 12 else time.hour % 12) + if (time.minute == 45 && !use24Hour) { + // Spanish context: "menos cuarto" refers to the next hour. "Son las dos menos cuarto" is 1:45. + val nextHour = (hourForSpeech % 12) + 1 + result.append(getHourName(nextHour, true)).append(" menos cuarto") + } else { + result.append(getHourName(hourForSpeech, false)) when (time.minute) { - 0 -> result.append(" in punto") - 15 -> result.append(" e un quarto") - 30 -> result.append(" e mezza") - else -> { - result.append(" e ") - if (time.minute < 10) { - result.append("zero ") - } - result.append(pronounceNumberDuration(time.minute.toLong())) - } + 0 -> result.append(" en punto") + 15 -> result.append(" y cuarto") + 30 -> result.append(" y media") + else -> result.append(" y ").append(pronouncePositive(time.minute.toLong(), false)) } } - if (!use24Hour && showAmPm && result.indexOf("mezzanotte") == -1 && result.indexOf("mezzogiorno") == -1) { - if (time.hour >= 19) { - result.append(" di sera") - } else if (time.hour >= 12) { - result.append(" di pomeriggio") - } else if (time.hour >= 4) { - result.append(" di mattina") - } else { - result.append(" di notte") + if (showAmPm && !use24Hour) { + when { + time.hour < 6 -> result.append(" de la madrugada") + time.hour < 12 -> result.append(" de la mañana") + time.hour < 20 -> result.append(" de la tarde") + else -> result.append(" de la noche") } } return result.toString() } else { - if (use24Hour) { - return time.format(DateTimeFormatter.ofPattern("HH:mm", Locale.ITALIAN)) - } else { - val result = time.format( - DateTimeFormatter.ofPattern( - if (showAmPm) "K:mm a" else "K:mm", Locale.ENGLISH - ) - ) - return if (result.startsWith("0:")) { - "12:" + result.substring(2) - } else { - result - } - } + val pattern = if (use24Hour) "HH:mm" else if (showAmPm) "h:mm a" else "h:mm" + return time.format(DateTimeFormatter.ofPattern(pattern, Locale("es", "ES"))) } } - - private fun getHourName(hour: Int, use24Hour: Boolean): String? { - if (hour == 0) { - return "mezzanotte" - } else if (hour == 12) { - return "mezzogiorno" - } - val normalizedHour = if (use24Hour) { - hour - } else { - hour % 12 - } - + + private fun getHourName(hour: Int, isForNextHour: Boolean): String { + // Spanish context: "la una" (one o'clock) is feminine singular. + // All other hours are feminine plural: "las dos", "las tres", etc. + val normalizedHour = if (hour == 0) 12 else hour return if (normalizedHour == 1) { - "una" + "la una" } else { - pronounceNumberDuration(normalizedHour.toLong()) + "las " + pronouncePositive(normalizedHour.toLong(), false) } } - + + // `pronounceNumberDuration` is a simplification for contexts where gender doesn't matter, + // like "un minuto", but "una hora". The base `pronouncePositive` is more versatile. override fun pronounceNumberDuration(number: Long): String { - if (number == 1L) { - return "un" - } - return super.pronounceNumberDuration(number) - } - - - /** - * @param n must be 0 <= n <= 999 - * @return the string representation of a number smaller than 1000 - */ - private fun subThousand(n: Long): String { - val builder = StringBuilder() - var requiresSpace = false // whether a space needs to be added before the content - if (n >= 100) { - val hundred = n / 100 - if (hundred > 1) { - builder.append(NUMBER_NAMES[hundred]) - builder.append(" ") - } - builder.append("cento") - requiresSpace = true - } - - val lastTwoDigits = n % 100 - if (lastTwoDigits != 0L && NUMBER_NAMES.containsKey(lastTwoDigits)) { - if (requiresSpace) { - // this is surely true, but let's keep the space for consistency - builder.append(" ") - } - builder.append(NUMBER_NAMES[lastTwoDigits]) - } else { - val ten = (n % 100) / 10 - if (ten > 0) { - if (requiresSpace) { - builder.append(" ") - } - builder.append(NUMBER_NAMES[ten * 10]) - requiresSpace = true - } - - val unit = n % 10 - if (unit > 0) { - if (requiresSpace) { - builder.append(" ") - } - builder.append(NUMBER_NAMES[unit]) - } - } - - return builder.toString() - } - - /** - * @param result the string builder to append the comma-separated group names to - * @param groupNames the group names - */ - private fun appendSplitGroups(result: StringBuilder, groupNames: List) { - if (groupNames.isNotEmpty()) { - result.append(groupNames[groupNames.size - 1]) - } - - for (i in groupNames.size - 2 downTo 0) { - result.append(", ") - result.append(groupNames[i]) - } + if (number == 1L) return "un" + return pronouncePositive(number, false) } companion object { private val NUMBER_NAMES = mapOf( - 0L to "zero", - 1L to "uno", - 2L to "due", - 3L to "tre", - 4L to "quattro", - 5L to "cinque", - 6L to "sei", - 7L to "sette", - 8L to "otto", - 9L to "nove", - 10L to "dieci", - 11L to "undici", - 12L to "dodici", - 13L to "tredici", - 14L to "quattordici", - 15L to "quindici", - 16L to "sedici", - 17L to "diciassette", - 18L to "diciotto", - 19L to "diciannove", - 20L to "venti", - 30L to "trenta", - 40L to "quaranta", - 50L to "cinquanta", - 60L to "sessanta", - 70L to "settanta", - 80L to "ottanta", - 90L to "novanta", - 100L to "cento", - 1000L to "mille", - 1000000L to "milione", - 1000000000L to "miliardo", - 1000000000000L to "bilione", - 1000000000000000L to "biliardo", - 1000000000000000000L to "trilione", + 0L to "cero", 1L to "uno", 2L to "dos", 3L to "tres", 4L to "cuatro", 5L to "cinco", + 6L to "seis", 7L to "siete", 8L to "ocho", 9L to "nueve", 10L to "diez", + 11L to "once", 12L to "doce", 13L to "trece", 14L to "catorce", 15L to "quince", + 16L to "dieciséis", 17L to "diecisiete", 18L to "dieciocho", 19L to "diecinueve", + 20L to "veinte", 21L to "veintiuno", 22L to "veintidós", 23L to "veintitrés", 24L to "veinticuatro", + 25L to "veinticinco", 26L to "veintiséis", 27L to "veintisiete", 28L to "veintiocho", 29L to "veintinueve", + 30L to "treinta", 40L to "cuarenta", 50L to "cincuenta", 60L to "sesenta", 70L to "setenta", + 80L to "ochenta", 90L to "noventa", 100L to "cien" ) - + // Spanish context: Hundreds have special names, e.g., 500 is "quinientos", not "cinco cientos". + private val HUNDRED_NAMES = mapOf( + 100L to "ciento", 200L to "doscientos", 300L to "trescientos", 400L to "cuatrocientos", 500L to "quinientos", + 600L to "seiscientos", 700L to "setecientos", 800L to "ochocientos", 900L to "novecientos" + ) + // Includes common ordinals. private val ORDINAL_NAMES = mapOf( - 1L to "primo", - 2L to "secondo", - 3L to "terzo", - 4L to "quarto", - 5L to "quinto", - 6L to "sesto", - 7L to "settimo", - 8L to "ottavo", - 9L to "nono", - 10L to "decimo", - 11L to "undicesimo", - 12L to "dodicesimo", - 13L to "tredicesimo", - 14L to "quattordicesimo", - 15L to "quindicesimo", - 16L to "sedicesimo", - 17L to "diciassettesimo", - 18L to "diciottesimo", - 19L to "diciannovesimo", - 1000L to "millesimo", - 1000000L to "milionesimo", - 1000000000L to "miliardesimo", - 1000000000000L to "bilionesimo", - 1000000000000000L to "biliardesimo", - 1000000000000000000L to "trilionesimo", + 1L to "primero", 2L to "segundo", 3L to "tercero", 4L to "cuarto", 5L to "quinto", + 6L to "sexto", 7L to "séptimo", 8L to "octavo", 9L to "noveno", 10L to "décimo" ) } -} +} \ No newline at end of file diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt index aad879a8..0d845eba 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt @@ -5,90 +5,78 @@ import org.dicio.numbers.unit.Number import org.dicio.numbers.util.NumberExtractorUtils class SpanishNumberExtractor internal constructor(private val ts: TokenStream) { + fun numberPreferOrdinal(): Number? { - // first try with suffix multiplier, e.g. dozen + // first try with suffix multiplier, e.g. docena (dozen) var number = numberSuffixMultiplier() if (number == null) { number = numberSignPoint(true) // then try with normal number } - if (number != null) { - // a number was found, maybe it has a valid denominator? - number = divideByDenominatorIfPossible(number) - } - return number + // a number was found, maybe it has a valid denominator? + return divideByDenominatorIfPossible(number) } fun numberPreferFraction(): Number? { - // first try with suffix multiplier, e.g. dozen + // first try with suffix multiplier, e.g. docena (dozen) var number = numberSuffixMultiplier() if (number == null) { number = numberSignPoint(false) // then try without ordinal } - number = if (number == null) { + // a number was found, maybe it has a valid denominator? + number = divideByDenominatorIfPossible(number) + + if (number == null) { // maybe an ordinal number? - numberSignPoint(true) - } else { - // a number was found, maybe it has a valid denominator? - // note that e.g. "a couple halves" ends up here, but that's valid - divideByDenominatorIfPossible(number) + number = numberSignPoint(true) } return number } fun numberNoOrdinal(): Number? { - // for now this function is used internally just for duration parsing, but maybe it could - // be exposed to library users, giving more control over how ordinals are handled. - - // first try with suffix multiplier, e.g. dozen - + // This function is used internally for duration parsing. var number = numberSuffixMultiplier() if (number == null) { - number = numberSignPoint(false) // then try without ordinal + number = numberSignPoint(false) } - - if (number != null) { - // a number was found, maybe it has a valid denominator? - // note that e.g. "una mezza coppia" ends up here, but that's valid - number = divideByDenominatorIfPossible(number) - } - - return number + return divideByDenominatorIfPossible(number) } - fun divideByDenominatorIfPossible(numberToEdit: Number): Number? { - // if numberToEdit is directly followed by an ordinal number then it is a fraction (only if numberToEdit is not - // ordinal or already decimal). Note: a big integer (i.e. 10^24) would be decimal, here we are assuming that - // such a number will never have a fraction after it for simplicity. + private fun divideByDenominatorIfPossible(numberToEdit: Number?): Number? { + if (numberToEdit == null) { + // Spanish context: handles "un quinto" (a fifth), where "un" is the numerator. + if (ts[0].isValue("un") || ts[0].isValue("una")) { + val originalPosition = ts.position + ts.movePositionForwardBy(1) + val denominator = numberInteger(true) + if (denominator != null && denominator.isOrdinal && denominator.moreThan(2)) { + return Number(1).divide(denominator) + } else { + ts.position = originalPosition + } + } + return null + } - if (!numberToEdit.isOrdinal && !numberToEdit.isDecimal - && !ts[0].hasCategory("ignore") - ) { + // if numberToEdit is directly followed by an ordinal number then it is a fraction + if (!numberToEdit.isOrdinal && !numberToEdit.isDecimal && !ts[0].hasCategory("ignore")) { val originalPosition = ts.position val denominator = numberInteger(true) if (denominator == null) { - // no denominator found: maybe a custom multiplier? e.g. mezzo (=0.5), dozzina (=12) + // no denominator found: maybe a custom multiplier? e.g. media (=0.5), docena (=12) if (ts[0].hasCategory("suffix_multiplier")) { ts.movePositionForwardBy(1) - - val multiplier = ts[-1].number - if (multiplier?.isDecimal == true && - (1 / multiplier.decimalValue()).toLong().toDouble() - == (1 / multiplier.decimalValue()) - ) { - // the multiplier is an exact fraction, divide by the denominator converted - // to long to possibly preserve the integerness of numberToEdit, e.g. - // sedici mezzi should be 8, not 8.0 + val multiplier = ts[-1].number!! + if (multiplier.isDecimal && (1 / multiplier.decimalValue()).toLong().toDouble() == (1 / multiplier.decimalValue())) { return numberToEdit.divide((1 / multiplier.decimalValue()).toLong()) } - return numberToEdit.multiply(multiplier) } } else if (denominator.isOrdinal && denominator.moreThan(2)) { - return numberToEdit.divide(denominator) // valid denominator, e.g. un quinto + return numberToEdit.divide(denominator) // valid denominator, e.g. dos tercios } else { - // invalid denominator, e.g. sei primi + // invalid denominator, e.g. seis primeros ts.position = originalPosition // restore to original position } } @@ -98,10 +86,9 @@ class SpanishNumberExtractor internal constructor(private val ts: TokenStream) { fun numberSuffixMultiplier(): Number? { if (ts[0].hasCategory("suffix_multiplier")) { ts.movePositionForwardBy(1) - return ts[-1].number // a suffix multiplier, e.g. dozen, half, score, percent - } else { - return null + return ts[-1].number } + return null } fun numberSignPoint(allowOrdinal: Boolean): Number? { @@ -109,120 +96,84 @@ class SpanishNumberExtractor internal constructor(private val ts: TokenStream) { } fun numberPoint(allowOrdinal: Boolean): Number? { - var n = numberInteger(allowOrdinal).let { - if (it == null || it.isOrdinal) { - // numbers can not start with just "virgola" - // no point or fraction separator can appear after an ordinal number - return@numberPoint it - } - it + var n = numberInteger(allowOrdinal) + if (n != null && n.isOrdinal) { + // no point or fraction separator can appear after an ordinal number + return n } if (ts[0].hasCategory("point")) { - // parse point indicator from e.g. "twenty one point four five three" - - if (!ts[1].hasCategory("digit_after_point") - && (!NumberExtractorUtils.isRawNumber(ts[1]) || ts[2].hasCategory("ordinal_suffix")) - ) { - // also return if next up is an ordinal raw number, i.e. followed by °/esimo - return n // there is an only comma at the end of the number: it is not part of it + // parse point indicator from e.g. "veintiuno coma cuatro" (twenty one point four) + if (!ts[1].hasCategory("digit_after_point") && (!NumberExtractorUtils.isRawNumber(ts[1]) || ts[2].hasCategory("ordinal_suffix"))) { + return n // there is a lone comma at the end of the number: it is not part of it } ts.movePositionForwardBy(1) + if (n == null) n = Number(0.0) // numbers can start with just "coma" var magnitude = 0.1 if (ts[0].value.length > 1 && NumberExtractorUtils.isRawNumber(ts[0])) { - // handle sequence of raw digits after point, e.g. .0123 - // value.length > 1 since multiple single-digits are handled below, e.g. . 0 1 2 3 - for (i in 0 until ts[0].value.length) { - n = n.plus((ts[0].value[i].code - '0'.code) * magnitude) + for (i in ts[0].value.indices) { + n = n!!.plus((ts[0].value[i].code - '0'.code) * magnitude) magnitude /= 10.0 } ts.movePositionForwardBy(1) } else { - // read as many digits as possible, e.g. point one six 5 one 0 three while (true) { - if (ts[0].hasCategory("digit_after_point") - || (ts[0].value.length == 1 && NumberExtractorUtils.isRawNumber(ts[0]) - && !ts[1].hasCategory("ordinal_suffix")) - ) { - // do not allow ordinal raw numbers, i.e. followed by st/nd/rd/th - n = n.plus(ts[0].number!!.multiply(magnitude)) + if (ts[0].hasCategory("digit_after_point") || (ts[0].value.length == 1 && NumberExtractorUtils.isRawNumber(ts[0]) && !ts[1].hasCategory("ordinal_suffix"))) { + n = n!!.plus(ts[0].number!!.multiply(magnitude)) magnitude /= 10.0 } else { - break // reached a word that is not a valid digit + break } ts.movePositionForwardBy(1) } } - } else if (ts[0].hasCategory("fraction_separator")) { - // parse fraction from e.g. "twenty divided by one hundred" - - var separatorLength = 1 - if (ts[1].hasCategory("fraction_separator_secondary")) { - separatorLength = 2 // also remove "by" after "divided by" + } else if (n != null && ts[0].hasCategory("fraction_separator")) { + // parse fraction from e.g. "veinte dividido entre cien" + val originalPosition = ts.position + ts.movePositionForwardBy(1) + if (ts[0].hasCategory("fraction_separator_secondary")) { + ts.movePositionForwardBy(1) } - - ts.movePositionForwardBy(separatorLength) val denominator = numberInteger(false) - if (denominator == null) { - ts.movePositionForwardBy(-separatorLength) // not a fraction, reset + if (denominator == null || denominator.isZero()) { + ts.position = originalPosition // not a fraction or division by zero, reset } else { return n.divide(denominator) } } - return n } fun numberInteger(allowOrdinal: Boolean): Number? { - if (ts[0].hasCategory("ignore")) { - return null // do not eat ignored words at the beginning - } + if (ts[0].hasCategory("ignore")) return null - var n = NumberExtractorUtils.numberMadeOfGroups( - ts, - allowOrdinal, - NumberExtractorUtils::numberGroupShortScale - ) + var n = NumberExtractorUtils.numberMadeOfGroups(ts, allowOrdinal, NumberExtractorUtils::numberGroupShortScale) if (n == null) { - return NumberExtractorUtils.numberBigRaw( - ts, - allowOrdinal - ) // try to parse big raw numbers (>=1000), e.g. 1207 + return NumberExtractorUtils.numberBigRaw(ts, allowOrdinal) // try to parse big raw numbers (>=1000), e.g. 1207 } else if (n.isOrdinal) { - return n // no more checks, as the ordinal word comes last, e.g. million twelfth + return n } - // n != null from here on if (n.lessThan(1000)) { - // parse raw number n separated by comma, e.g. 123,045,006 - // assuming current position is at the first comma - if (NumberExtractorUtils.isRawNumber(ts[-1]) && ts[0].hasCategory("thousand_separator") && ts[1].value.length == 3 && NumberExtractorUtils.isRawNumber( - ts[1] - ) - ) { + // parse raw number n separated by comma, e.g. 123.045.006 + if (NumberExtractorUtils.isRawNumber(ts[-1]) && ts[0].hasCategory("thousand_separator") && ts[1].value.length == 3 && NumberExtractorUtils.isRawNumber(ts[1])) { val originalPosition = ts.position - 1 - - while (ts[0].hasCategory("thousand_separator") && ts[1].value.length == 3 && NumberExtractorUtils.isRawNumber( - ts[1] - ) - ) { + while (ts[0].hasCategory("thousand_separator") && ts[1].value.length == 3 && NumberExtractorUtils.isRawNumber(ts[1])) { n = n!!.multiply(1000).plus(ts[1].number) - ts.movePositionForwardBy(2) // do not allow ignored words in between + ts.movePositionForwardBy(2) } - if (ts[0].hasCategory("ordinal_suffix")) { if (allowOrdinal) { ts.movePositionForwardBy(1) - return n!!.withOrdinal(true) // ordinal number, e.g. 20,056,789th + return n!!.withOrdinal(true) } else { ts.position = originalPosition - return null // found ordinal number, revert since allowOrdinal is false + return null } } } } - - return n // e.g. six million, three hundred and twenty seven + return n } -} +} \ No newline at end of file diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishParser.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishParser.kt index 1118a8ea..814408e4 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishParser.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishParser.kt @@ -13,6 +13,8 @@ class SpanishParser : Parser("config/es-es") { shortScale: Boolean, preferOrdinal: Boolean ): () -> Number? { + // Spanish uses the long scale exclusively for number names. + // The shortScale parameter is ignored for pronunciation but passed for API consistency. val numberExtractor = SpanishNumberExtractor(tokenStream) return if (preferOrdinal) { numberExtractor::numberPreferOrdinal @@ -35,6 +37,7 @@ class SpanishParser : Parser("config/es-es") { preferMonthBeforeDay: Boolean, now: LocalDateTime ): () -> LocalDateTime? { - return SpanishDateTimeExtractor(tokenStream, now)::dateTime + // Pass all parameters down to the extractor, following the English model. + return SpanishDateTimeExtractor(tokenStream, preferMonthBeforeDay, now)::dateTime } -} +} \ No newline at end of file From add5365153bebf9563224eb805a7d1d08b2b0b7f Mon Sep 17 00:00:00 2001 From: Diego Date: Tue, 15 Jul 2025 08:20:22 -0500 Subject: [PATCH 11/30] edit comment --- .../java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt index 5407bea7..03e4b39e 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt @@ -241,7 +241,7 @@ class SpanishDateTimeExtractor internal constructor( } if (ts[keywordIndex].isValue("media")) { ts.movePositionForwardBy(keywordIndex + 1) - return if (isMinus) -30 else 30 // "menos media" is unusual but possible + return if (isMinus) -30 else 30 // "menos media" is unusual for non-native speakers but possible } ts.position = originalPosition From 81cbd92270a024bf13b391bea879c3d71be67a4b Mon Sep 17 00:00:00 2001 From: Diego Date: Tue, 15 Jul 2025 09:26:57 -0500 Subject: [PATCH 12/30] update strings --- .../numbers/lang/es/ExtractDateTimeTest.java | 2 +- .../numbers/lang/es/ExtractNumbersTest.java | 32 ++-- .../numbers/lang/es/NiceDurationTest.java | 10 +- .../dicio/numbers/lang/es/NiceNumberTest.java | 29 ++-- .../dicio/numbers/lang/es/NiceTimeTest.java | 58 ++++--- .../lang/es/NumberExtractorUtilsTest.java | 150 +++++++++--------- 6 files changed, 138 insertions(+), 143 deletions(-) diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java index a06d7ab7..4a003623 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java @@ -296,7 +296,7 @@ public void testHourNull() { assertHourNull("el un millón"); } - //TODO + //TODO Spanish translation @Test public void testNoonMidnightLike() { diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java index 9dd7dba4..0b9fa379 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java @@ -89,20 +89,20 @@ private void assertDivideByDenominatorIfPossible(final String s, final Number st @Test public void testNumberGroupLongScale() { - assertNumberGroupLongScale("one hundred and twenty million", F, 1e9, 120e6, F, 5); - assertNumberGroupLongScale("sixty three quadrillion", F, 1e28, 63e24, F, 3); - assertNumberGroupLongScale("three thousand and six", T, 1e9, 3006, F, 4); - assertNumberGroupLongScale("a hundred thousand", F, 1e6, 100000, F, 3); - assertNumberGroupLongScale("hundred 70 thousand", T, 1e6, 170000, F, 3); - assertNumberGroupLongScale("572 million", F, 1e9, 572e6, F, 2); - assertNumberGroupLongScale("572012 billion", F, 1e18, 572012e12, F, 2); - assertNumberGroupLongScale("3 million", T, 1e9, 3e6, F, 2); - assertNumberGroupLongScale(", one hundred and ninety one", F, 1e6, 191, F, 6); + assertNumberGroupLongScale("un ciento y veinte millones", F, 1e9, 120e6, F, 5); + assertNumberGroupLongScale("sesenta tres mil billones", F, 1e28, 63e24, F, 3); + assertNumberGroupLongScale("trescientos y seis", T, 1e9, 3006, F, 4); + assertNumberGroupLongScale("un ciento mil", F, 1e6, 100000, F, 3); + assertNumberGroupLongScale("ciento 70 mil", T, 1e6, 170000, F, 3); + assertNumberGroupLongScale("572 millones", F, 1e9, 572e6, F, 2); + assertNumberGroupLongScale("572012 mil millones", F, 1e18, 572012e12, F, 2); + assertNumberGroupLongScale("3 millones", T, 1e9, 3e6, F, 2); + assertNumberGroupLongScale(", ciento noventa y uno", F, 1e6, 191, F, 6); } @Test public void testNumberGroupLongScaleOrdinal() { - assertNumberGroupLongScale("seven hundred and sixty four millionth", T, 1e9, 764e6, T, 6); + assertNumberGroupLongScale("setecientos y sesentacuatro milésima", T, 1e9, 764e6, T, 6); assertNumberGroupLongScale("seven hundred and sixty four millionth", F, 1e9, 764, F, 5); assertNumberGroupLongScale("seven hundred and sixty four millionth", F, 1e6, 764, F, 5); assertNumberGroupLongScale("fifth billionth", T, 1e9, 5, T, 1); @@ -276,12 +276,12 @@ public void testNumberIntegerPerformanceWithFormatter() { @Test public void testNumberIntegerNull() { assertNumberIntegerNull("", T); - assertNumberIntegerNull("a hello how are you", F); - assertNumberIntegerNull(", and", T); - assertNumberIntegerNull("oh two", F); - assertNumberIntegerNull(", 123485 and", T); - assertNumberIntegerNull("and 123", F); - assertNumberIntegerNull(" one thousand ", T); + assertNumberIntegerNull("un hola cómo estás", F); + assertNumberIntegerNull(", y", T); + assertNumberIntegerNull("cero dos", F); + assertNumberIntegerNull(", 123485 y", T); + assertNumberIntegerNull("y 123", F); + assertNumberIntegerNull(" un ciento ", T); } @Test diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java index 41bc2ada..57ab0412 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java @@ -40,11 +40,11 @@ public void speechMany() { @Test public void speech() { - assertDuration("six days twenty three hours fifty nine minutes thirty two seconds", T, 6, 23, 59, 32); - assertDuration("nineteen days fifty two minutes", T, 19, 0, 52, 0); - assertDuration("one hour six seconds", T, 0, 1, 0, 6); - assertDuration("sixty three days forty four seconds", T, 63, 0, 0, 44); - assertDuration("one day one hour one minute one second", T, 1, 1, 1, 1); + assertDuration("seis días veintitrés horas cincuenta y nueve minutos treinta y dos segundos", T, 6, 23, 59, 32); + assertDuration("diecinueve días cincuenta y dos minutos", T, 19, 0, 52, 0); + assertDuration("una hora seis segundos", T, 0, 1, 0, 6); + assertDuration("sesenta y tres días cuarenta y cuatro segundos", T, 63, 0, 0, 44); + assertDuration("un día una hora un minuto un segundo", T, 1, 1, 1, 1); } @Test diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceNumberTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceNumberTest.java index f5ea7232..6950c45f 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceNumberTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceNumberTest.java @@ -18,19 +18,18 @@ public class NiceNumberTest { public static void setup() { pf = new ParserFormatter(new SpanishFormatter(), null); } - //TODO Spanish translation @Test public void speech() { - assertEquals("thirty four and a half", pf.niceNumber(34.5).get()); - assertEquals("minus eighteen and three fifths", pf.niceNumber(-18.6).get()); - assertEquals("ninety eight and eighteen nineteenths", pf.niceNumber(98.947368421).get()); - assertEquals("minus five and six elevenths", pf.niceNumber(-5.5454545).get()); - assertEquals("seven ninths", pf.niceNumber(7.0 / 9).get()); - assertEquals("minus two seventeenths", pf.niceNumber(-2.0 / 17).get()); - assertEquals("four hundred and sixty five", pf.niceNumber(465).get()); - assertEquals("minus ninety one", pf.niceNumber(-91).get()); - assertEquals("zero", pf.niceNumber(0).get()); + assertEquals("treinta y cuatro y medio", pf.niceNumber(34.5).get()); + assertEquals("menos dieciocho y tres quintos", pf.niceNumber(-18.6).get()); + assertEquals("noventa y ocho y dieciocho diecinueve", pf.niceNumber(98.947368421).get()); + assertEquals("menos cinco y seis undécimas", pf.niceNumber(-5.5454545).get()); + assertEquals("siete novenos", pf.niceNumber(7.0 / 9).get()); + assertEquals("menos dos decimoséptimos", pf.niceNumber(-2.0 / 17).get()); + assertEquals("cuatrocientos sesenta y cinco", pf.niceNumber(465).get()); + assertEquals("menos noventa y uno", pf.niceNumber(-91).get()); + assertEquals("cero", pf.niceNumber(0).get()); } @Test @@ -48,17 +47,17 @@ public void noSpeech() { @Test public void customDenominators() { - assertEquals("minus four and four tenths", pf.niceNumber(-4.4).denominators(Arrays.asList(2, 3, 4, 6, 7, 8, 9, 10, 11)).get()); + assertEquals("menos cuatro y cuatro décimas", pf.niceNumber(-4.4).denominators(Arrays.asList(2, 3, 4, 6, 7, 8, 9, 10, 11)).get()); assertEquals("-64 6/12", pf.niceNumber(-64.5).speech(F).denominators(Collections.singletonList(12)).get()); - assertEquals("minus three and five hundred thousand millionths", pf.niceNumber(-3.5).denominators(Arrays.asList(1000000, 2000000)).get()); + assertEquals("menos trescientas quinientas mil millonésimas", pf.niceNumber(-3.5).denominators(Arrays.asList(1000000, 2000000)).get()); assertEquals("9 1000000/2000000", pf.niceNumber(9.5).speech(F).denominators(Arrays.asList(2000000, 1000000)).get()); - assertEquals("zero point eight", pf.niceNumber(4.0 / 5).denominators(Arrays.asList(2, 3, 4)).get()); + assertEquals("cero punto ocho", pf.niceNumber(4.0 / 5).denominators(Arrays.asList(2, 3, 4)).get()); } @Test public void invalidFraction() { - assertEquals("one point eight four", pf.niceNumber(1.837).get()); - assertEquals("minus thirty eight point one nine", pf.niceNumber(-38.192).get()); + assertEquals("un punto ochenta y cuatro", pf.niceNumber(1.837).get()); + assertEquals("menos treinta y ocho coma uno nueve", pf.niceNumber(-38.192).get()); assertEquals("3829.48", pf.niceNumber(3829.47832).speech(F).get()); assertEquals("-7.19", pf.niceNumber(-7.1928).speech(F).get()); assertEquals("-9322.38", pf.niceNumber(-9322 - 8.0 / 21).speech(F).get()); diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceTimeTest.java index 3539681d..8423106c 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceTimeTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceTimeTest.java @@ -19,16 +19,14 @@ public static void setup() { pf = new ParserFormatter(new SpanishFormatter(), null); } - //TODO Spanish translation - @Test public void random() { final LocalTime dt = LocalTime.of(13, 22, 3); - assertEquals("one twenty two", pf.niceTime(dt).get()); - assertEquals("one twenty two p.m.", pf.niceTime(dt).showAmPm(T).get()); - assertEquals("thirteen twenty two", pf.niceTime(dt).use24Hour(T).get()); - assertEquals("thirteen twenty two", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("una veintidós", pf.niceTime(dt).get()); + assertEquals("una y veintidós p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("trece veintidós", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("trece veintidós", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); assertEquals("1:22", pf.niceTime(dt).speech(F).get()); assertEquals("1:22 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); assertEquals("13:22", pf.niceTime(dt).speech(F).use24Hour(T).get()); @@ -38,10 +36,10 @@ public void random() { @Test public void oClock() { final LocalTime dt = LocalTime.of(15, 0, 32); - assertEquals("three o'clock", pf.niceTime(dt).get()); - assertEquals("three p.m.", pf.niceTime(dt).showAmPm(T).get()); - assertEquals("fifteen hundred", pf.niceTime(dt).use24Hour(T).get()); - assertEquals("fifteen hundred", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("tres en punto", pf.niceTime(dt).get()); + assertEquals("tres p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("mil quinientos", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("mil quinientos", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); assertEquals("3:00", pf.niceTime(dt).speech(F).get()); assertEquals("3:00 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); assertEquals("15:00", pf.niceTime(dt).speech(F).use24Hour(T).get()); @@ -51,10 +49,10 @@ public void oClock() { @Test public void afterMidnight() { final LocalTime dt = LocalTime.of(0, 2, 9); - assertEquals("twelve oh two", pf.niceTime(dt).get()); - assertEquals("twelve oh two a.m.", pf.niceTime(dt).showAmPm(T).get()); - assertEquals("zero zero zero two", pf.niceTime(dt).use24Hour(T).get()); - assertEquals("zero zero zero two", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("dos cero dos", pf.niceTime(dt).get()); + assertEquals("doce cero dos a.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("cero cero cero dos", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("cero cero cero dos", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); assertEquals("12:02", pf.niceTime(dt).speech(F).get()); assertEquals("12:02 AM", pf.niceTime(dt).speech(F).showAmPm(T).get()); assertEquals("00:02", pf.niceTime(dt).speech(F).use24Hour(T).get()); @@ -64,10 +62,10 @@ public void afterMidnight() { @Test public void quarterPast() { final LocalTime dt = LocalTime.of(1, 15, 33); - assertEquals("quarter past one", pf.niceTime(dt).get()); - assertEquals("quarter past one a.m.", pf.niceTime(dt).showAmPm(T).get()); - assertEquals("zero one fifteen", pf.niceTime(dt).use24Hour(T).get()); - assertEquals("zero one fifteen", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("una y cuarto", pf.niceTime(dt).get()); + assertEquals("una y cuarto a.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("cero uno quince", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("cero uno quince", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); assertEquals("1:15", pf.niceTime(dt).speech(F).get()); assertEquals("1:15 AM", pf.niceTime(dt).speech(F).showAmPm(T).get()); assertEquals("01:15", pf.niceTime(dt).speech(F).use24Hour(T).get()); @@ -77,10 +75,10 @@ public void quarterPast() { @Test public void half() { final LocalTime dt = LocalTime.of(12, 30, 59); - assertEquals("half past twelve", pf.niceTime(dt).get()); - assertEquals("half past twelve p.m.", pf.niceTime(dt).showAmPm(T).get()); - assertEquals("twelve thirty", pf.niceTime(dt).use24Hour(T).get()); - assertEquals("twelve thirty", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("doce y media", pf.niceTime(dt).get()); + assertEquals("doce y media p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("doce treinta", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("doce y treinta", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); assertEquals("12:30", pf.niceTime(dt).speech(F).get()); assertEquals("12:30 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); assertEquals("12:30", pf.niceTime(dt).speech(F).use24Hour(T).get()); @@ -90,10 +88,10 @@ public void half() { @Test public void quarterTo() { final LocalTime dt = LocalTime.of(23, 45, 7); - assertEquals("quarter to twelve", pf.niceTime(dt).get()); - assertEquals("quarter to twelve p.m.", pf.niceTime(dt).showAmPm(T).get()); - assertEquals("twenty three forty five", pf.niceTime(dt).use24Hour(T).get()); - assertEquals("twenty three forty five", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("cuarto para las doce", pf.niceTime(dt).get()); + assertEquals("cuarto para las doce p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("veintitrés cuarenta y cinco", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("veintitrés cuarenta y cinco", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); assertEquals("11:45", pf.niceTime(dt).speech(F).get()); assertEquals("11:45 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); assertEquals("23:45", pf.niceTime(dt).speech(F).use24Hour(T).get()); @@ -103,10 +101,10 @@ public void quarterTo() { @Test public void tenAm() { final LocalTime dt = LocalTime.of(10, 3, 44); - assertEquals("ten oh three", pf.niceTime(dt).get()); - assertEquals("ten oh three a.m.", pf.niceTime(dt).showAmPm(T).get()); - assertEquals("ten zero three", pf.niceTime(dt).use24Hour(T).get()); - assertEquals("ten zero three", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("diez cero trés", pf.niceTime(dt).get()); + assertEquals("diez cero trés a.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("diez cero trés", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("diez y trés", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); assertEquals("10:03", pf.niceTime(dt).speech(F).get()); assertEquals("10:03 AM", pf.niceTime(dt).speech(F).showAmPm(T).get()); assertEquals("10:03", pf.niceTime(dt).speech(F).use24Hour(T).get()); diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java index 7defb150..3238bb66 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java @@ -13,21 +13,19 @@ public String configFolder() { return "config/es-es"; } - //TODO Spanish translation - @Test public void testNumberLessThan1000() { - assertNumberLessThan1000("zero", T, 0, F, 1); - assertNumberLessThan1000("one", F, 1, F, 1); - assertNumberLessThan1000("five", T, 5, F, 1); - assertNumberLessThan1000("nineteen", F, 19, F, 1); - assertNumberLessThan1000("hundred", T, 100, F, 1); - assertNumberLessThan1000("one hundred", F, 100, F, 2); - assertNumberLessThan1000("three hundred", T, 300, F, 2); - assertNumberLessThan1000("twenty six", F, 26, F, 2); - assertNumberLessThan1000("thirty-seven", T, 37, F, 3); - assertNumberLessThan1000("seven hundred six", F, 706, F, 3); - assertNumberLessThan1000("eight hundred eighteen", T, 818, F, 3); + assertNumberLessThan1000("cero", T, 0, F, 1); + assertNumberLessThan1000("uno", F, 1, F, 1); + assertNumberLessThan1000("cinco", T, 5, F, 1); + assertNumberLessThan1000("diecinueve", F, 19, F, 1); + assertNumberLessThan1000("cien", T, 100, F, 1); + assertNumberLessThan1000("un ciento", F, 100, F, 2); + assertNumberLessThan1000("trescientos", T, 300, F, 2); + assertNumberLessThan1000("veintiséis", F, 26, F, 2); + assertNumberLessThan1000("treinta y siete", T, 37, F, 3); + assertNumberLessThan1000("setecientos seis", F, 706, F, 3); + assertNumberLessThan1000("ochocientos dieciocho", T, 818, F, 3); } @Test @@ -36,99 +34,99 @@ public void testNumberLessThan1000Digits() { assertNumberLessThan1000("1", T, 1, F, 1); assertNumberLessThan1000("6", F, 6, F, 1); assertNumberLessThan1000("15", T, 15, F, 1); - assertNumberLessThan1000("100 nineteen", F, 100, F, 1); - assertNumberLessThan1000("3 hundred 8", T, 308, F, 3); + assertNumberLessThan1000("100 diecinueve", F, 100, F, 1); + assertNumberLessThan1000("3 cientos 8", T, 308, F, 3); assertNumberLessThan1000("72", F, 72, F, 1); assertNumberLessThan1000("912", T, 912, F, 1); - assertNumberLessThan1000("8 hundred and 18", F, 818, F, 4); - assertNumberLessThan1000("7 hundred 3 9", T, 703, F, 3); - assertNumberLessThan1000("hundred 4 7", F, 104, F, 2); - assertNumberLessThan1000("19 hundred", T, 19, F, 1); - assertNumberLessThan1000("sixty 7", F, 67, F, 2); + assertNumberLessThan1000("8 cientos y 18", F, 818, F, 4); + assertNumberLessThan1000("7 cientos 3 9", T, 703, F, 3); + assertNumberLessThan1000("ciento 4 7", F, 104, F, 2); + assertNumberLessThan1000("19 ciento", T, 19, F, 1); + assertNumberLessThan1000("sesenta 7", F, 67, F, 2); assertNumberLessThan1000("30 6", T, 30, F, 1); } @Test public void testNumberLessThan1000EdgeCases() { - assertNumberLessThan1000("four five", T, 4, F, 1); - assertNumberLessThan1000("a two and", F, 2, F, 2); - assertNumberLessThan1000("one thirteen", T, 1, F, 1); - assertNumberLessThan1000("sixteen eight", F, 16, F, 1); - assertNumberLessThan1000("eighteen hundred", T, 18, F, 1); - assertNumberLessThan1000("zero hundred", F, 0, F, 1); - assertNumberLessThan1000("sixty nought", T, 60, F, 1); - assertNumberLessThan1000("a hundred", F, 100, F, 2); - assertNumberLessThan1000("one, and a hundred", T, 100, F, 5); - assertNumberLessThan1000("seven hundred and six", F, 706, F, 4); - assertNumberLessThan1000("one hundred and ninety one", T, 191, F, 5); - assertNumberLessThan1000("eight and a hundred and fifteen", F, 815, F, 6); - assertNumberLessThan1000("a a one a a hundred a a eleven a a", T, 111, F, 9); + assertNumberLessThan1000("cuatro cinco", T, 4, F, 1); + assertNumberLessThan1000("dos y", F, 2, F, 2); + assertNumberLessThan1000("uno trece", T, 1, F, 1); + assertNumberLessThan1000("dieciséis ocho", F, 16, F, 1); + assertNumberLessThan1000("mil ochocientos", T, 18, F, 1); + assertNumberLessThan1000("cero cien", F, 0, F, 1); + assertNumberLessThan1000("sesenta cero", T, 60, F, 1); + assertNumberLessThan1000("cien", F, 100, F, 2); + assertNumberLessThan1000("uno, y un ciento", T, 100, F, 5); + assertNumberLessThan1000("setecientos seis", F, 706, F, 4); + assertNumberLessThan1000("ciento noventa y uno", T, 191, F, 5); + assertNumberLessThan1000("ocho y ciento quince", F, 815, F, 6); + assertNumberLessThan1000("uno ciento once", T, 111, F, 9); } @Test public void testNumberLessThan1000Ordinal() { - assertNumberLessThan1000("fifth", T, 5, T, 1); - assertNumberLessThan1000("twenty sixth", T, 26, T, 2); - assertNumberLessThan1000("seventy eighth", F, 70, F, 1); - assertNumberLessThan1000("fiftieth eighth", T, 50, T, 1); - assertNumberLessThan1000("one hundred and thirteenth", T, 113, T, 4); - assertNumberLessThan1000("first hundred", T, 1, T, 1); - assertNumberLessThan1000("seven hundredth ten", T, 700, T, 2); - assertNumberLessThan1000("nine hundredth", F, 9, F, 1); - assertNumberLessThan1000("23 th", T, 23, T, 2); - assertNumberLessThan1000("620nd", T, 620, T, 2); - assertNumberLessThan1000("6st", T, 6, T, 2); - assertNumberLessThan1000("8 first", T, 8, F, 1); - assertNumberLessThan1000("1st hundred", T, 1, T, 2); - assertNumberLessThan1000Null("seventh", F); - assertNumberLessThan1000Null("96th", F); + assertNumberLessThan1000("quinto", T, 5, T, 1); + assertNumberLessThan1000("vigésimo sexto", T, 26, T, 2); + assertNumberLessThan1000("septuagésimo octavo", F, 70, F, 1); + assertNumberLessThan1000("quincuagésimo octavo", T, 50, T, 1); + assertNumberLessThan1000("centésimo decimotercero", T, 113, T, 4); + assertNumberLessThan1000("primer centenar", T, 1, T, 1); + assertNumberLessThan1000("septuagésimo diez", T, 700, T, 2); + assertNumberLessThan1000("nueve centésimo", F, 9, F, 1); + assertNumberLessThan1000("23 va", T, 23, T, 2); + assertNumberLessThan1000("620va", T, 620, T, 2); + assertNumberLessThan1000("6va", T, 6, T, 2); + assertNumberLessThan1000("8 primero", T, 8, F, 1); + assertNumberLessThan1000("1er ciento", T, 1, T, 2); + assertNumberLessThan1000Null("séptimo", F); + assertNumberLessThan1000Null("96va", F); } @Test public void testNumberLessThan1000Null() { assertNumberLessThan1000Null("", F); - assertNumberLessThan1000Null("hello", T); - assertNumberLessThan1000Null("hello how are you", F); - assertNumberLessThan1000Null("a hello two and", T); - assertNumberLessThan1000Null("a car and a half,", F); - assertNumberLessThan1000Null("a million", T); - assertNumberLessThan1000Null(" twenty", F); + assertNumberLessThan1000Null("hola", T); + assertNumberLessThan1000Null("hola como estas", F); + assertNumberLessThan1000Null("un hola dos y", T); + assertNumberLessThan1000Null("un coche y medio,", F); + assertNumberLessThan1000Null("un millón", T); + assertNumberLessThan1000Null(" veinte", F); } @Test public void testNumberGroupShortScale() { - assertNumberGroupShortScale("one hundred and twenty million", F, 1000000000, 120000000, F, 5); - assertNumberGroupShortScale("three thousand and six", T, 1000000000, 3000, F, 2); - assertNumberGroupShortScale("a hundred thousand", F, 1000000, 100000, F, 3); - assertNumberGroupShortScale("hundred 70 thousand", T, 1000000, 170000, F, 3); - assertNumberGroupShortScale("572 million", F, 1000000000, 572000000, F, 2); - assertNumberGroupShortScale("3 million", T, 1000000000, 3000000, F, 2); - assertNumberGroupShortScale(", one hundred and ninety one", F, 1000, 191, F, 6); + assertNumberGroupShortScale("ciento veinte millones", F, 1000000000, 120000000, F, 5); + assertNumberGroupShortScale("tres mil seis", T, 1000000000, 3000, F, 2); + assertNumberGroupShortScale("un cien mil", F, 1000000, 100000, F, 3); + assertNumberGroupShortScale("ciento 70 mil", T, 1000000, 170000, F, 3); + assertNumberGroupShortScale("572 millones", F, 1000000000, 572000000, F, 2); + assertNumberGroupShortScale("3 millones", T, 1000000000, 3000000, F, 2); + assertNumberGroupShortScale(", ciento noventa y uno", F, 1000, 191, F, 6); } @Test public void testNumberGroupShortScaleOrdinal() { - assertNumberGroupShortScale("seven hundred and sixty four millionth", T, 1000000000, 764000000, T, 6); - assertNumberGroupShortScale("seven hundred and sixty four millionth", F, 1000000000, 764, F, 5); - assertNumberGroupShortScale("seven hundred and sixty four millionth", F, 1000, 764, F, 5); - assertNumberGroupShortScale("fifth billionth", T, 1000000000, 5, T, 1); - assertNumberGroupShortScale("nineteen hundredth", T, 1000000000, 19, F, 1); - assertNumberGroupShortScaleNull("seven hundred and sixty four millionth", T, 1000); - assertNumberGroupShortScaleNull("twelfth thousandth", F, 1000000000); + assertNumberGroupShortScale("setecientos sesenta y cuatro millonésimas", T, 1000000000, 764000000, T, 6); + assertNumberGroupShortScale("setecientos sesenta y cuatro millonésimas", F, 1000000000, 764, F, 5); + assertNumberGroupShortScale("setecientos sesenta y cuatro millonésimas", F, 1000, 764, F, 5); + assertNumberGroupShortScale("quinto milmillonésimo", T, 1000000000, 5, T, 1); + assertNumberGroupShortScale("mil novecientos", T, 1000000000, 19, F, 1); + assertNumberGroupShortScaleNull("setecientos sesenta y cuatro millones", T, 1000); + assertNumberGroupShortScaleNull("duodécimo milésimo", F, 1000000000); } @Test public void testNumberGroupShortScaleNull() { assertNumberGroupShortScaleNull("", T, 1000000000); - assertNumberGroupShortScaleNull("hello", F, 1000000); - assertNumberGroupShortScaleNull("hello how are you", T, 1000); + assertNumberGroupShortScaleNull("hola", F, 1000000); + assertNumberGroupShortScaleNull("hola cómo estás", T, 1000); assertNumberGroupShortScaleNull("129000", F, 1000000000); assertNumberGroupShortScaleNull("5000000", T, 1000000000); - assertNumberGroupShortScaleNull("one hundred and six", F, 999); - assertNumberGroupShortScaleNull("twelve", T, 0); - assertNumberGroupShortScaleNull("seven billion", F, 1000); - assertNumberGroupShortScaleNull("nine thousand and one", T, 1000); - assertNumberGroupShortScaleNull("eight million people", F, 1000000); - assertNumberGroupShortScaleNull(" ten ", T, 1000000); + assertNumberGroupShortScaleNull("un ciento seis", F, 999); + assertNumberGroupShortScaleNull("doce", T, 0); + assertNumberGroupShortScaleNull("site mil millones", F, 1000); + assertNumberGroupShortScaleNull("nueve mil uno", T, 1000); + assertNumberGroupShortScaleNull("ocho millones de personas", F, 1000000); + assertNumberGroupShortScaleNull(" diez ", T, 1000000); } } From 5765339e81f84f2c71793cd056ce2052d61afe44 Mon Sep 17 00:00:00 2001 From: Stypox Date: Tue, 15 Jul 2025 23:10:58 +0200 Subject: [PATCH 13/30] Fix compilation --- .../dicio/numbers/lang/en/EnglishNumberExtractor.kt | 5 ++--- .../numbers/lang/es/SpanishDateTimeExtractor.kt | 6 +++--- .../org/dicio/numbers/lang/es/SpanishFormatter.kt | 6 +++--- .../dicio/numbers/lang/es/SpanishNumberExtractor.kt | 3 ++- .../dicio/numbers/lang/it/ItalianNumberExtractor.kt | 3 ++- .../src/main/java/org/dicio/numbers/unit/Number.kt | 13 +++++++++++++ 6 files changed, 25 insertions(+), 11 deletions(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/en/EnglishNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/en/EnglishNumberExtractor.kt index 6597f0bd..8e4ba271 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/en/EnglishNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/en/EnglishNumberExtractor.kt @@ -3,6 +3,7 @@ package org.dicio.numbers.lang.en import org.dicio.numbers.parser.lexer.NumberToken import org.dicio.numbers.parser.lexer.TokenStream import org.dicio.numbers.unit.Number +import org.dicio.numbers.unit.isNullOrZero import org.dicio.numbers.util.NumberExtractorUtils class EnglishNumberExtractor internal constructor( @@ -187,9 +188,7 @@ class EnglishNumberExtractor internal constructor( } val denominator = numberInteger(false) - if (denominator == null || (denominator.isInteger && denominator.integerValue() == 0L) - || (denominator.isDecimal && denominator.decimalValue() == 0.0) - ) { + if (denominator.isNullOrZero()) { ts.position = originalPosition // not a fraction or division by zero, reset } else { return n.divide(denominator) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt index 03e4b39e..b17f6452 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt @@ -144,7 +144,7 @@ class SpanishDateTimeExtractor internal constructor( var result = LocalTime.of(hour, 0) // Handle "en punto" (o'clock) - if (ts.nextValueIs("en") && ts.nextValueIs("punto", 1)) { + if (ts[0].isValue("en") && ts[1].isValue("punto")) { ts.movePositionForwardBy(2) return result } @@ -230,8 +230,8 @@ class SpanishDateTimeExtractor internal constructor( // Spanish context: handles "y cuarto" (15), "y media" (30), "menos cuarto" (-15). val originalPosition = ts.position - val isMinus = ts.nextValueIs("menos") - val isPlus = ts.nextValueIs("y") || ts.nextValueIs("con") + val isMinus = ts[0].isValue("menos") + val isPlus = ts[0].isValue("y") || ts[0].isValue("con") // Look for 'cuarto' or 'media' val keywordIndex = if (isMinus || isPlus) 1 else 0 diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt index 1d2a2647..14790c17 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt @@ -20,10 +20,10 @@ class SpanishFormatter : Formatter("config/es-es") { // Spanish context: some fractions are gendered. "un medio" (a half) vs "una media tarta" (a half cake) val isFemale = mixedFraction.whole == 0L val denominatorString = when (mixedFraction.denominator) { - 2L -> if (isFemale) "media" else "medio" + 2 -> if (isFemale) "media" else "medio" else -> { // Ordinals are used for other denominators, e.g., "tercio", "cuarto", "quinto" - val ordinal = pronouncePositive(mixedFraction.denominator, true) + val ordinal = pronouncePositive(mixedFraction.denominator.toLong(), true) if (mixedFraction.numerator > 1) { // pluralize, e.g. "quinto" -> "quintos" if (ordinal.endsWith("o")) ordinal.dropLast(1) + "os" else ordinal + "s" @@ -36,7 +36,7 @@ class SpanishFormatter : Formatter("config/es-es") { val numeratorString = if (mixedFraction.numerator == 1) { if (isFemale) "una" else "un" } else { - pronouncePositive(mixedFraction.numerator, false) + pronouncePositive(mixedFraction.numerator.toLong(), false) } return if (mixedFraction.whole == 0L) { diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt index 0d845eba..f032b6be 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt @@ -2,6 +2,7 @@ package org.dicio.numbers.lang.es import org.dicio.numbers.parser.lexer.TokenStream import org.dicio.numbers.unit.Number +import org.dicio.numbers.unit.isNullOrZero import org.dicio.numbers.util.NumberExtractorUtils class SpanishNumberExtractor internal constructor(private val ts: TokenStream) { @@ -136,7 +137,7 @@ class SpanishNumberExtractor internal constructor(private val ts: TokenStream) { ts.movePositionForwardBy(1) } val denominator = numberInteger(false) - if (denominator == null || denominator.isZero()) { + if (denominator.isNullOrZero()) { ts.position = originalPosition // not a fraction or division by zero, reset } else { return n.divide(denominator) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/it/ItalianNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/it/ItalianNumberExtractor.kt index 9d84bcb3..e246a652 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/it/ItalianNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/it/ItalianNumberExtractor.kt @@ -2,6 +2,7 @@ package org.dicio.numbers.lang.it import org.dicio.numbers.parser.lexer.TokenStream import org.dicio.numbers.unit.Number +import org.dicio.numbers.unit.isNullOrZero import org.dicio.numbers.util.NumberExtractorUtils class ItalianNumberExtractor internal constructor(private val ts: TokenStream) { @@ -164,7 +165,7 @@ class ItalianNumberExtractor internal constructor(private val ts: TokenStream) { ts.movePositionForwardBy(separatorLength) val denominator = numberInteger(false) - if (denominator == null) { + if (denominator.isNullOrZero()) { ts.movePositionForwardBy(-separatorLength) // not a fraction, reset } else { return n.divide(denominator) diff --git a/numbers/src/main/java/org/dicio/numbers/unit/Number.kt b/numbers/src/main/java/org/dicio/numbers/unit/Number.kt index f6ed93d7..36eeec8e 100644 --- a/numbers/src/main/java/org/dicio/numbers/unit/Number.kt +++ b/numbers/src/main/java/org/dicio/numbers/unit/Number.kt @@ -1,6 +1,8 @@ package org.dicio.numbers.unit import java.util.Objects +import kotlin.contracts.ExperimentalContracts +import kotlin.contracts.contract /** * TODO add documentation @@ -31,6 +33,9 @@ class Number private constructor( val isInteger: Boolean get() = !isDecimal + val isZero: Boolean + get() = (isDecimal && decimalValue == 0.0) || (!isDecimal && integerValue == 0L) + fun integerValue(): Long { return integerValue } @@ -165,3 +170,11 @@ class Number private constructor( } } } + +@OptIn(ExperimentalContracts::class) +fun Number?.isNullOrZero(): Boolean { + contract { + returns(false) implies (this@isNullOrZero != null) + } + return this == null || this.isZero +} From 36f855bc0ca8dac9046fbde770a8044fe3c4afd2 Mon Sep 17 00:00:00 2001 From: Diego Date: Tue, 15 Jul 2025 18:26:48 -0500 Subject: [PATCH 14/30] add "que viene" sentence in spanish tokenizer --- numbers/src/main/resources/config/es-es/tokenizer.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index 9e3a0d2e..941f732f 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -359,7 +359,9 @@ "próximo", "próximos", "próxima", - "próximas" + "próximas", + "que vendrá", + "que viene" ] }, { From 757cf1f4a4d259e7d0e7bab93c035fcd7416659a Mon Sep 17 00:00:00 2001 From: Diego Date: Tue, 15 Jul 2025 18:30:18 -0500 Subject: [PATCH 15/30] =?UTF-8?q?also=20add=20"que=20pas=C3=B3"=20and=20"d?= =?UTF-8?q?ecimoprimer"=20in=20tokenizer.json=20file?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- numbers/src/main/resources/config/es-es/tokenizer.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index 941f732f..5d4cef46 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -375,7 +375,9 @@ "anterior", "anteriores", "transcurrido", - "transcurridos" + "transcurridos", + "que pasó", + "que transcurrió" ] }, { @@ -655,6 +657,7 @@ "values": { "décimo": 10, "décima": 10, + "decimoprimer": 11, "decimoprimero": 11, "decimoprimera": 11, "decimosegundo": 12, From fd5111a9e88c6f0a47193950136c26404a059675 Mon Sep 17 00:00:00 2001 From: Diego Date: Tue, 15 Jul 2025 19:05:02 -0500 Subject: [PATCH 16/30] Update sentences for test files --- .../lang/es/DateTimeExtractorUtilsTest.java | 6 +- .../numbers/lang/es/ExtractDateTimeTest.java | 151 ++++++++---------- 2 files changed, 71 insertions(+), 86 deletions(-) diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java index 9b7431c7..1c8cdf5c 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java @@ -134,12 +134,10 @@ public void testSecondNull() { @Test public void testBcad() { assertBcad("a.C. prueba", false, 3); - assertBcad("d.C. and", true, 3); + assertBcad("d.C. y", true, 3); assertBcad("adc prueba y", true, 1); assertBcad("antes de Cristo", false, 2); assertBcad("d y Domini", true, 3); - assertBcad("ace", false, 1); - assertBcad("d current", false, 2); // there is a workaround for this in spanishDateTimeExtractor assertBcad("a.c.e.", false, 3); @@ -179,7 +177,7 @@ public void testAmpmNull() { public void testMonthName() { assertMonthName("enero", 1); assertMonthName("dic e", 12); - assertMonthName("sept ember", 9); + assertMonthName("sept iembre", 9); assertMonthName("mar", 3); } diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java index 4a003623..43df7a17 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java @@ -296,127 +296,114 @@ public void testHourNull() { assertHourNull("el un millón"); } - //TODO Spanish translation - @Test public void testNoonMidnightLike() { assertNoonMidnightLike("del mediodía", 0, 2); - assertNoonMidnightLike("middays", 12, 1); - assertNoonMidnightLike("this noon", 12, 2); + assertNoonMidnightLike("en el mediodía", 12, 2); } @Test public void testNoonMidnightLikeNull() { - assertNoonMidnightLikeNull("hello how are you"); - assertNoonMidnightLikeNull("this evening and"); - assertNoonMidnightLikeNull("tonight test"); - assertNoonMidnightLikeNull("after dinner"); - assertNoonMidnightLikeNull("before the lunch"); - assertNoonMidnightLikeNull("and at midday"); - assertNoonMidnightLikeNull("and midnight"); - assertNoonMidnightLikeNull("at hour noon"); - assertNoonMidnightLikeNull("in midnight"); - assertNoonMidnightLikeNull("at the midday"); + assertNoonMidnightLikeNull("hola cómo estás"); + assertNoonMidnightLikeNull("este atardecer y"); + assertNoonMidnightLikeNull("anocher prueba"); + assertNoonMidnightLikeNull("después de la cena"); + assertNoonMidnightLikeNull("antes del almuerzo"); + assertNoonMidnightLikeNull("y al mediodía"); + assertNoonMidnightLikeNull("y medianoche"); + assertNoonMidnightLikeNull("a la hora del mediodía"); + assertNoonMidnightLikeNull("a la medianoche"); + assertNoonMidnightLikeNull("al mediodía"); } @Test public void testMomentOfDay() { - assertMomentOfDay("at midnight", 0, 2); - assertMomentOfDay("noon", 12, 1); - assertMomentOfDay("these midnights", 0, 2); - assertMomentOfDay("this evening and", 21, 2); - assertMomentOfDay("at tonight test", 23, 2); - assertMomentOfDay("nighttime test", 3, 1); - assertMomentOfDay("after dinner", 21, 2); - assertMomentOfDay("before the lunch", 11, 3); - assertMomentOfDay("the dinner", 20, 2); + assertMomentOfDay("a la medianoche", 0, 2); + assertMomentOfDay("mediodía", 12, 1); + assertMomentOfDay("estas medianoches", 0, 2); + assertMomentOfDay("esta tarde y", 21, 2); + assertMomentOfDay("de la noche prueba", 23, 2); + assertMomentOfDay("noche prueba", 3, 1); + assertMomentOfDay("después de la cena", 21, 2); + assertMomentOfDay("después del lonche", 11, 3); + assertMomentOfDay("la cena", 20, 2); } @Test public void testMomentOfDayNull() { - assertMomentOfDayNull("hello how are you"); - assertMomentOfDayNull("and at midday"); - assertMomentOfDayNull("mid night"); - assertMomentOfDayNull("at hour dinner"); - assertMomentOfDayNull("in dinner"); + assertMomentOfDayNull("hola cómo estás"); + assertMomentOfDayNull("y al mediodía"); + assertMomentOfDayNull("media noche"); + assertMomentOfDayNull("a la hora de la cena"); + assertMomentOfDayNull("en la cena"); } @Test public void testSpecialMinute() { - assertSpecialMinute("a quarter to", -15, 3); - assertSpecialMinute("half of past test", 30, 3); - assertSpecialMinute("a half to eleven", -30, 3); - assertSpecialMinute("zero point two of past", 12, 5); - assertSpecialMinute("thirteen fourteenths to", -56, 3); // 13/14*60 is 55.7 -> rounded to 56 - assertSpecialMinute("at twenty the past", 20, 4); - assertSpecialMinute("the fifty and nine to", -59, 5); - assertSpecialMinute("fifteen past twelve", 15, 2); + assertSpecialMinute("un cuarto para", -15, 3); + assertSpecialMinute("half of past test", 30, 3); + assertSpecialMinute("a half to eleven", -30, 3); + assertSpecialMinute("zero point two of past", 12, 5); + assertSpecialMinute("trece décimocuartos para", -56, 3); // 13/14*60 is 55.7 -> rounded to 56 + assertSpecialMinute("a los veinte pasados", 20, 4); + assertSpecialMinute("cincuenta y nueve para", -59, 5); + assertSpecialMinute("las doce y cuarto", 15, 2); } @Test public void testSpecialMinuteNull() { - assertSpecialMinuteNull("hello how are you"); - assertSpecialMinuteNull("two"); - assertSpecialMinuteNull("one hundred and twelve to"); - assertSpecialMinuteNull("minus a quarter to five"); - assertSpecialMinuteNull("four quarters to nine"); - assertSpecialMinuteNull("zero halfs to"); - assertSpecialMinuteNull("zero and comma two past"); - assertSpecialMinuteNull("thirteen and fourteenths past"); - assertSpecialMinuteNull("and fifteen past twelve"); + assertSpecialMinuteNull("hola cómo estás"); + assertSpecialMinuteNull("dos"); + assertSpecialMinuteNull("ciento doce para la"); + assertSpecialMinuteNull("menos un cuarto para las cinco"); + assertSpecialMinuteNull("cuatro cuartos para las nueve"); + assertSpecialMinuteNull("cero medios para"); + assertSpecialMinuteNull("cero y coma dos después de"); + assertSpecialMinuteNull("trece y catorce pasados"); + assertSpecialMinuteNull("y las quince y cien"); } @Test public void testOClock() { - assertOClock("o clock", 2); - assertOClock("o'clock", 2); - assertOClock("oclock", 1); - assertOClock("o,clock", 3); - assertOClock("exact", 1); - assertOClock("on the dot", 3); + assertOClock("en punto", 2); } @Test public void testOClockFalse() { - assertOClockFalse("hello"); - assertOClockFalse("by the clock"); - assertOClockFalse("clock o"); - assertOClockFalse("clock"); - assertOClockFalse("on"); + assertOClockFalse("hola"); + assertOClockFalse("por el punto"); } @Test public void testDate() { - assertDate("04/09-4096", F, LocalDate.of(4096, 9, 4), 5); - assertDate("04/09-4096", T, LocalDate.of(4096, 4, 9), 5); - assertDate("4 13 2023", LocalDate.of(2023, 4, 13), 3); - assertDate("13.4.2023", LocalDate.of(2023, 4, 13), 5); - assertDate("six of seven of nineteen ninety five", F, LocalDate.of(1995, 7, 6), 7); - assertDate("six of seven of nineteen ninety five", T, LocalDate.of(1995, 6, 7), 7); - assertDate("thursday 26 of may 2022", LocalDate.of(2022, 5, 26), 5); - assertDate("august the second, two", LocalDate.of(2, 8, 2), 5); - assertDate("2nd january, two b.c.", LocalDate.of(-2, 1, 2), 8); - assertDate("mon twelve jun two thousand twelve b.C.", LocalDate.of(-2012, 6, 12), 9); - assertDate("four hundred seventy six AD", LocalDate.of(476, 1, 1), 5); - assertDate("four thousand before common era", LocalDate.of(-4000, 1, 1), 5); - assertDate("four thousand of before Christ", LocalDate.of(4000, 1, 1), 2); - assertDate("tuesday and twenty seven", LocalDate.of(2023, 2, 27), 4); - assertDate("tuesday and twelve", F, LocalDate.of(2023, 2, 12), 3); - assertDate("tuesday and twelve", T, LocalDate.of(2023, 12, 1), 3); // a bit strange - assertDate("november e", LocalDate.of(2023, 11, 1), 1); - assertDate("wednesday test eight", LocalDate.of(2023, 2, 1), 1); - assertDate("monday november", LocalDate.of(2023, 1, 30), 1); - assertDate("october two thousand and twelve", LocalDate.of(2012, 10, 1), 5); - assertDate("999999999", LocalDate.of(999999999,1,1), 1); + assertDate("09/04-4096", F, LocalDate.of(4096, 9, 4), 5); + assertDate("09/04-4096", T, LocalDate.of(4096, 4, 9), 5); + assertDate("13 4 2023", LocalDate.of(2023, 4, 13), 3); + assertDate("13.4.2023", LocalDate.of(2023, 4, 13), 5); + assertDate("seis de siete de mil novecientos noventa y cinco", F, LocalDate.of(1995, 7, 6), 7); + assertDate("seis de siete de mil novecientos noventa y cinco", T, LocalDate.of(1995, 6, 7), 7); + assertDate("jueves 26 de mayo de 2022", LocalDate.of(2022, 5, 26), 5); + assertDate("dos de agosto", LocalDate.of(2, 8, 2), 5); + assertDate("2 de enero, 2 a.c.", LocalDate.of(-2, 1, 2), 8); + assertDate("doce de junio de dos mil doce a.C.", LocalDate.of(-2012, 6, 12), 9); + assertDate("cuatrocientos setenta y seis d.C.", LocalDate.of(476, 1, 1), 5); + assertDate("cuatro mil antes de la era común", LocalDate.of(-4000, 1, 1), 5); + assertDate("cuatro mil de antes de Cristo", LocalDate.of(4000, 1, 1), 2); + assertDate("martes y veintisiete", LocalDate.of(2023, 2, 27), 4); + assertDate("martes y doce", F, LocalDate.of(2023, 2, 12), 3); + assertDate("martes y doce", T, LocalDate.of(2023, 12, 1), 3); // a bit strange + assertDate("november e", LocalDate.of(2023, 11, 1), 1); + assertDate("miércoles ocho prueba", LocalDate.of(2023, 2, 1), 1); + assertDate("lunes noviembre", LocalDate.of(2023, 1, 30), 1); + assertDate("octubre de dos mil doce", LocalDate.of(2012, 10, 1), 5); + assertDate("999999999", LocalDate.of(999999999,1,1), 1); // the following work thanks to special case in number extractor! - assertDate("twenty twelve", LocalDate.of(2012, 1, 1), 2); - assertDate("sunday twenty thirteen", LocalDate.of(2023, 2, 5), 1); - } + } @Test public void testDateNull() { - assertDateNull("hello how are you"); - assertDateNull("am tuedsay"); + assertDateNull("hola cómo estás"); + assertDateNull("am mates"); assertDateNull("and two thousand and fifteen"); assertDateNull("of may two"); assertDateNull("tomorrow"); From f3ea3e75e266377d81b2ddeb1de2bbd47437c87e Mon Sep 17 00:00:00 2001 From: Diego Date: Tue, 15 Jul 2025 19:21:12 -0500 Subject: [PATCH 17/30] Update more sentences --- .../numbers/lang/es/ExtractDateTimeTest.java | 36 +++++++++---------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java index 43df7a17..c013580a 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java @@ -404,39 +404,35 @@ public void testDate() { public void testDateNull() { assertDateNull("hola cómo estás"); assertDateNull("am mates"); - assertDateNull("and two thousand and fifteen"); - assertDateNull("of may two"); - assertDateNull("tomorrow"); + assertDateNull("y dos mil quince"); + assertDateNull("y mayo de dos"); + assertDateNull("mañana"); assertDateNull("1000000000"); } @Test public void testBcad() { - // b.c.e special case, not covered by DateTimeExtractorUtils.bcad() - assertBcad("bce", false, 1); - assertBcad("b.c.e.", false, 5); - assertBcad("before current era", false, 3); - assertBcad("current era", true, 2); + assertBcad("nuestra era", true, 2); } @Test public void testTime() { - assertTime("13:28.33 test", LocalTime.of(13, 28, 33), 4); - assertTime("half past noon", LocalTime.of(12, 30, 0), 3); - assertTime("at fourteen and", LocalTime.of(14, 0, 0), 2); - assertTime("midnight of twelve", LocalTime.of(0, 12, 0), 3); - assertTime("twenty four and zero", LocalTime.of(0, 0, 0), 4); - assertTime("the twenty three and fifty one min and 17 seconds", LocalTime.of(23, 51, 17), 10); + assertTime("13:28.33 prueba", LocalTime.of(13, 28, 33), 4); + assertTime("las doce y media del mediodía", LocalTime.of(12, 30, 0), 3); + assertTime("a las catorce y", LocalTime.of(14, 0, 0), 2); + assertTime("doce de la medianoche", LocalTime.of(0, 12, 0), 3); + assertTime("veinticuatro y cero", LocalTime.of(0, 0, 0), 4); + assertTime("las veintitrés y cincuenta y un min y 17 segundos", LocalTime.of(23, 51, 17), 10); } @Test public void testTimeNull() { - assertTimeNull("hello how are you"); - assertTimeNull("sixty one"); + assertTimeNull("hola cómo estás"); + assertTimeNull("sesenta y uno"); assertTimeNull("30:59"); - assertTimeNull("minus sixteen"); - assertTimeNull("four million"); - assertTimeNull("evening"); + assertTimeNull("menos dieciséis"); + assertTimeNull("cuatro millones"); + assertTimeNull("cena"); } @Test @@ -446,7 +442,7 @@ public void testTimeWithAmpm() { assertTimeWithAmpm("at two o'clock in the morning", LocalTime.of(2, 0, 0), 7); assertTimeWithAmpm("three thirty eight in the afternoon", LocalTime.of(15, 38, 0), 6); assertTimeWithAmpm("18:29:02 and am", LocalTime.of(18, 29, 2), 5); - assertTimeWithAmpm("evening", LocalTime.of(21, 0, 0), 1); + assertTimeWithAmpm("noche", LocalTime.of(21, 0, 0), 1); assertTimeWithAmpm("afternoon at four and three and six", LocalTime.of(16, 3, 6), 7); // corner cases: assertTimeWithAmpm("twenty four in the evening", LocalTime.of(0, 0, 0), 5); From d1393e8c691ead3ded62b6ee8eed9df82844e777 Mon Sep 17 00:00:00 2001 From: Diego Date: Wed, 16 Jul 2025 08:03:59 -0500 Subject: [PATCH 18/30] fix plural in tokenizer --- numbers/src/main/resources/config/es-es/tokenizer.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index 5d4cef46..bb41ed22 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -6,8 +6,9 @@ "raw" ], "plural_endings": [ - "s", - "es" + "as", + "es", + "os" ], "word_matches": [ { From 16f9c1a458a7a4f542601c4764745a7b3a23cec8 Mon Sep 17 00:00:00 2001 From: Diego Date: Wed, 16 Jul 2025 08:43:32 -0500 Subject: [PATCH 19/30] =?UTF-8?q?fix=20word=20"veintis=C3=A9is"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- numbers/src/main/resources/config/es-es/date_time.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numbers/src/main/resources/config/es-es/date_time.json b/numbers/src/main/resources/config/es-es/date_time.json index 467d6c01..864ffd3a 100644 --- a/numbers/src/main/resources/config/es-es/date_time.json +++ b/numbers/src/main/resources/config/es-es/date_time.json @@ -122,7 +122,7 @@ "23": "veintitres", "24": "veinticuatro", "25": "veinticinco", - "26": "veintiseis", + "26": "veintiséis", "27": "veintisiete", "28": "veintiocho", "29": "veintinueve", From 582f3f7273a063e232e33e9cec0bc67f4daa83c7 Mon Sep 17 00:00:00 2001 From: Diego Date: Wed, 16 Jul 2025 08:48:54 -0500 Subject: [PATCH 20/30] edit multiplier values in tokenizer.json --- numbers/src/main/resources/config/es-es/tokenizer.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index bb41ed22..47b5a190 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -608,14 +608,13 @@ ], "values": { "mil": 1000, + "miles": 1000, "millón": 1000000, "millones": 1000000, - "mil millones": 1000000000, "millardo": 1000000000, "millardos": 1000000000, "billón": 1000000000000, "billones": 1000000000000, - "mil billones": 1000000000000000, "trillón": 1000000000000000000, "trillones": 1000000000000000000 } From 85811f696d355450fa71cdde0245185cdb458fc4 Mon Sep 17 00:00:00 2001 From: Diego Date: Wed, 16 Jul 2025 08:53:13 -0500 Subject: [PATCH 21/30] another edit values in tokenizer.json --- numbers/src/main/resources/config/es-es/tokenizer.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index 47b5a190..96f20abd 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -613,8 +613,12 @@ "millones": 1000000, "millardo": 1000000000, "millardos": 1000000000, + "mil millón": 1000000000, + "mil millones": 1000000000, "billón": 1000000000000, "billones": 1000000000000, + "mil billón": 1000000000000000, + "mil billones": 1000000000000000, "trillón": 1000000000000000000, "trillones": 1000000000000000000 } From fb25f9722533ed62b68a3684c71113e1dde1dfb0 Mon Sep 17 00:00:00 2001 From: Diego Date: Wed, 16 Jul 2025 09:23:51 -0500 Subject: [PATCH 22/30] reorder multiplier values --- .../src/main/resources/config/es-es/tokenizer.json | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index 96f20abd..b4224474 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -613,14 +613,17 @@ "millones": 1000000, "millardo": 1000000000, "millardos": 1000000000, - "mil millón": 1000000000, - "mil millones": 1000000000, "billón": 1000000000000, "billones": 1000000000000, - "mil billón": 1000000000000000, - "mil billones": 1000000000000000, "trillón": 1000000000000000000, - "trillones": 1000000000000000000 + "trillones": 1000000000000000000, + "un millón": 1000000, + "un billón": 1000000000000, + "un trillón": 1000000000000000000, + "mil millón": 1000000000, + "mil millones": 1000000000, + "mil billón": 1000000000000000, + "mil billones": 1000000000000000 } }, { From 2c8231323f1b881cdbcf1eaf80560c454cf0158a Mon Sep 17 00:00:00 2001 From: Diego Date: Wed, 16 Jul 2025 20:35:01 -0500 Subject: [PATCH 23/30] some edits --- .../java/org/dicio/numbers/lang/es/SpanishFormatter.kt | 9 +++++---- numbers/src/main/resources/config/es-es/tokenizer.json | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt index 14790c17..fda21967 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt @@ -56,7 +56,7 @@ class SpanishFormatter : Formatter("config/es-es") { if (java.lang.Double.isNaN(number)) return "no es un número" if (scientific || abs(number) > 999999999999999934463.0) { - val scientificFormatted = String.format(Locale.ENGLISH, "%E", number) + val scientificFormatted = String.format(Locale.SPANISH, "%E", number) val parts = scientificFormatted.split("E".toRegex(), limit = 2).toTypedArray() val power = parts[1].toInt().toDouble() if (power != 0.0) { @@ -162,8 +162,8 @@ class SpanishFormatter : Formatter("config/es-es") { } } - // `pronounceNumberDuration` is a simplification for contexts where gender doesn't matter, - // like "un minuto", but "una hora". The base `pronouncePositive` is more versatile. + // "pronounceNumberDuration" is a simplification for contexts where gender doesn't matter, + // like "un minuto", but "una hora". The base "pronouncePositive" is more versatile. override fun pronounceNumberDuration(number: Long): String { if (number == 1L) return "un" return pronouncePositive(number, false) @@ -188,7 +188,8 @@ class SpanishFormatter : Formatter("config/es-es") { // Includes common ordinals. private val ORDINAL_NAMES = mapOf( 1L to "primero", 2L to "segundo", 3L to "tercero", 4L to "cuarto", 5L to "quinto", - 6L to "sexto", 7L to "séptimo", 8L to "octavo", 9L to "noveno", 10L to "décimo" + 6L to "sexto", 7L to "séptimo", 8L to "octavo", 9L to "noveno", 10L to "décimo", + 11L to "undécimo", 12L to "duodécimo", 8L to "octavo", 9L to "noveno", 10L to "décimo", ) } } \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index b4224474..5e65c7e5 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -664,9 +664,11 @@ "values": { "décimo": 10, "décima": 10, + "undécimo": 11, "decimoprimer": 11, "decimoprimero": 11, "decimoprimera": 11, + "duodécimo": 12, "decimosegundo": 12, "decimosegunda": 12, "decimotercero": 13, From 19e0b37210b9d7bc5113dfd9ccc4ad1f7b2a910d Mon Sep 17 00:00:00 2001 From: Diego Date: Wed, 16 Jul 2025 20:39:08 -0500 Subject: [PATCH 24/30] little fix --- .../src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt index fda21967..ee1d83a8 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt @@ -189,7 +189,7 @@ class SpanishFormatter : Formatter("config/es-es") { private val ORDINAL_NAMES = mapOf( 1L to "primero", 2L to "segundo", 3L to "tercero", 4L to "cuarto", 5L to "quinto", 6L to "sexto", 7L to "séptimo", 8L to "octavo", 9L to "noveno", 10L to "décimo", - 11L to "undécimo", 12L to "duodécimo", 8L to "octavo", 9L to "noveno", 10L to "décimo", + 11L to "undécimo", 12L to "duodécimo" ) } } \ No newline at end of file From 3a95abafdae664977168eb2b379f91377eb6bf36 Mon Sep 17 00:00:00 2001 From: Stypox Date: Thu, 17 Jul 2025 17:04:13 +0200 Subject: [PATCH 25/30] Fix compilation --- .../src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt index ee1d83a8..da40d1f3 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt @@ -56,7 +56,7 @@ class SpanishFormatter : Formatter("config/es-es") { if (java.lang.Double.isNaN(number)) return "no es un número" if (scientific || abs(number) > 999999999999999934463.0) { - val scientificFormatted = String.format(Locale.SPANISH, "%E", number) + val scientificFormatted = String.format(Locale("es"), "%E", number) val parts = scientificFormatted.split("E".toRegex(), limit = 2).toTypedArray() val power = parts[1].toInt().toDouble() if (power != 0.0) { From bf34bbcad661fd9669de2cdb08a26e352d0cc8a0 Mon Sep 17 00:00:00 2001 From: Diego Date: Thu, 17 Jul 2025 12:58:54 -0500 Subject: [PATCH 26/30] =?UTF-8?q?Set=20=E2=80=9Ccompound=5Fword=5Fpiece?= =?UTF-8?q?=E2=80=9D=20for=20tokenizer.json=20due=20to=20strange=20behavio?= =?UTF-8?q?r=20with=20compound=20numbers=20which=20resembles=20Italian=20l?= =?UTF-8?q?ang?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../resources/config/es-es/tokenizer.json | 107 ++++++++++-------- 1 file changed, 60 insertions(+), 47 deletions(-) diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json index 5e65c7e5..3e1db04a 100644 --- a/numbers/src/main/resources/config/es-es/tokenizer.json +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -1,14 +1,14 @@ { "spaces": " \t\n\f\r:;_!?<>|=()[]{}»«*~^`'\"", "characters_as_word": "%‰#-+.,/", + "compound_word_piece_category": "compound_word_piece", "raw_number_categories": [ "number", "raw" ], "plural_endings": [ - "as", - "es", - "os" + "s", + "es" ], "word_matches": [ { @@ -73,7 +73,8 @@ "point" ], "values": [ - "punto" + "punto", + "coma" ] }, { @@ -530,13 +531,12 @@ "categories": [ "number", "digit", - "digit_after_point" + "digit_after_point", + "compound_word_piece" ], "values": { "cero": 0, "uno": 1, - "una": 1, - "un": 1, "dos": 2, "tres": 3, "cuatro": 4, @@ -550,7 +550,19 @@ { "categories": [ "number", - "teen" + "digit", + "compound_word_piece" + ], + "values": { + "un": 1, + "una": 1 + } + }, + { + "categories": [ + "number", + "teen", + "compound_word_piece" ], "values": { "diez": 10, @@ -564,7 +576,6 @@ "dieciocho": 18, "diecinueve": 19, "veintiuno": 21, - "veintiún": 21, "veintidós": 22, "veintitrés": 23, "veinticuatro": 24, @@ -578,10 +589,12 @@ { "categories": [ "number", - "tens" + "tens", + "compound_word_piece" ], "values": { "veinte": 20, + "veinti": 20, "treinta": 30, "cuarenta": 40, "cincuenta": 50, @@ -594,17 +607,27 @@ { "categories": [ "number", - "hundred" + "hundred", + "compound_word_piece" ], "values": { "cien": 100, - "ciento": 100 + "ciento": 100, + "doscientos": 200, + "trescientos": 300, + "cuatrocientos": 400, + "quinientos": 500, + "seiscientos": 600, + "setecientos": 700, + "ochocientos": 800, + "novecientos": 900 } }, { "categories": [ "number", - "multiplier" + "multiplier", + "compound_word_piece" ], "values": { "mil": 1000, @@ -616,21 +639,15 @@ "billón": 1000000000000, "billones": 1000000000000, "trillón": 1000000000000000000, - "trillones": 1000000000000000000, - "un millón": 1000000, - "un billón": 1000000000000, - "un trillón": 1000000000000000000, - "mil millón": 1000000000, - "mil millones": 1000000000, - "mil billón": 1000000000000000, - "mil billones": 1000000000000000 + "trillones": 1000000000000000000 } }, { "categories": [ "number", "ordinal", - "digit" + "digit", + "compound_word_piece" ], "values": { "primero": 1, @@ -659,18 +676,16 @@ "categories": [ "number", "ordinal", - "teen" + "teen", + "compound_word_piece" ], "values": { "décimo": 10, "décima": 10, "undécimo": 11, - "decimoprimer": 11, - "decimoprimero": 11, - "decimoprimera": 11, + "undécima": 11, "duodécimo": 12, - "decimosegundo": 12, - "decimosegunda": 12, + "duodécima": 12, "decimotercero": 13, "decimotercera": 13, "decimocuarto": 14, @@ -691,7 +706,8 @@ "categories": [ "number", "ordinal", - "tens" + "tens", + "compound_word_piece" ], "values": { "vigésimo": 20, @@ -716,7 +732,8 @@ "categories": [ "number", "ordinal", - "hundred" + "hundred", + "compound_word_piece" ], "values": { "centésimo": 100, @@ -727,27 +744,20 @@ "categories": [ "number", "ordinal", - "multiplier" + "multiplier", + "compound_word_piece" ], "values": { "milésimo": 1000, "milésima": 1000, - "milésimas": 1000, "millonésimo": 1000000, "millonésima": 1000000, - "millonésimas": 1000000, "milmillonésimo": 1000000000, "milmillonésima": 1000000000, - "milmillonésimas": 1000000000, "billonésimo": 1000000000000, "billonésima": 1000000000000, - "billonésimas": 1000000000000, - "milbillonésimo": 1000000000000000, - "milbillonésima": 1000000000000000, - "milbillonésimas": 1000000000000000, "trillonésimo": 1000000000000000000, - "trillonésima": 1000000000000000000, - "trillonésimas": 1000000000000000000 + "trillonésima": 1000000000000000000 } }, { @@ -756,20 +766,23 @@ "suffix_multiplier" ], "values": { + "media": 0.5, + "medio": 0.5, "mitad": 0.5, - "mitades": 0.5, "cuarto": 0.25, - "cuartos": 0.25, + "quinto": 0.2, + "octavo": 0.125, "par": 2, - "pares": 2, "dupla": 2, - "duplas": 2, + "trío": 3, + "cuarteto": 3, "docena": 12, - "docenas": 12, "decena": 10, - "decenas": 10, + "quincena": 15, "veintena": 20, - "veintenas": 20, + "treintena": 30, + "cuarentena": 40, + "centena": 100, "porciento": 0.01, "centésima": 0.01, "pc": 0.01, From f20b68fdc8401604dac252fe2bcaeea47050e3ae Mon Sep 17 00:00:00 2001 From: Diego Date: Thu, 17 Jul 2025 13:09:49 -0500 Subject: [PATCH 27/30] Update date_time using catalan from Spain logic --- .../resources/config/es-es/date_time.json | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/numbers/src/main/resources/config/es-es/date_time.json b/numbers/src/main/resources/config/es-es/date_time.json index 864ffd3a..b14fbe9b 100644 --- a/numbers/src/main/resources/config/es-es/date_time.json +++ b/numbers/src/main/resources/config/es-es/date_time.json @@ -2,30 +2,30 @@ "decade_format": { "1": {"match": "^\\d$", "format": "{x}"}, "2": {"match": "^1\\d$", "format": "{xx}"}, - "3": {"match": "^\\d0$", "format": "{x0}"}, - "4": {"match": "^2\\d$", "format": "{xx}"}, + "3": {"match": "^2\\d$", "format": "{xx}"}, + "4": {"match": "^\\d0$", "format": "{x0}"}, "5": {"match": "^[3-9]\\d$", "format": "{x0} y {x}"}, "default": "{number}" }, "hundreds_format": { - "1": {"match": "^1\\00$", "format": "cien"}, - "2": {"match": "^1\\d{2}$", "format": "ciento"}, - "3": {"match": "^\\d{3}$", "format": "{x_in_x00}cientos"}, + "1": {"match": "^100$", "format": "cien"}, + "2": {"match": "^1\\d{2}$", "format": "ciento {formatted_decade}"}, + "3": {"match": "^[2-4|6|8]\\d{2}$", "format": "{x_in_x00}cientos {formatted_decade}"}, + "4": {"match": "^5\\d{2}$", "format": "quinientos {formatted_decade}"}, + "5": {"match": "^7\\d{2}$", "format": "setecientos {formatted_decade}"}, + "6": {"match": "^9\\d{2}$", "format": "novecientos {formatted_decade}"}, "default": "{number}" }, "thousand_format": { - "1": {"match": "^1[1|9]\\d{2}$", "format": "{xx}"}, + "1": {"match": "^1\\d{3}$", "format": "mil {formatted_hundreds}"}, + "2": {"match": "^[2-9]\\d{3}$", "format": "{x_in_x000} mil {formatted_hundreds}"}, "default": "{number}" }, "year_format": { "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, - "3": {"match": "^1\\d{2}$", "format": "ciento {formatted_decade} {bc}"}, - "4": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, - "5": {"match": "^\\d000$", "format": "{formatted_thousand} {bc}"}, - "6": {"match": "^\\d1\\d{2}$", "format": "{formatted_thousand} ciento {formatted_decade} {bc}"}, - "7": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, - "8": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_hundreds} {formatted_decade} {bc}"}, + "3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "4": {"match": "^\\d{4}$", "format": "{formatted_thousand} {bc}"}, "default": "{year} {bc}", "bc": "a.C." }, @@ -50,7 +50,6 @@ "6": "domingo" }, "date": { - "0": "cero", "1": "uno", "2": "dos", "3": "tres", @@ -71,11 +70,12 @@ "18": "dieciocho", "19": "diecinueve", "20": "veinte", + "21": "veintiuno", "22": "veintidós", - "23": "veintitres", + "23": "veintitrés", "24": "veinticuatro", "25": "veinticinco", - "26": "veintiseis", + "26": "veintiséis", "27": "veintisiete", "28": "veintiocho", "29": "veintinueve", @@ -91,7 +91,7 @@ "6": "junio", "7": "julio", "8": "agosto", - "9": "septimbre", + "9": "septiembre", "10": "octubre", "11": "noviembre", "12": "diciembre" @@ -118,8 +118,9 @@ "18": "dieciocho", "19": "diecinueve", "20": "veinte", + "21": "veintiuno", "22": "veintidós", - "23": "veintitres", + "23": "veintitrés", "24": "veinticuatro", "25": "veinticinco", "26": "veintiséis", @@ -139,7 +140,7 @@ "400": "cuatrocientos", "500": "quinientos", "600": "seiscientos", - "700": "sietecientos", + "700": "setecientos", "800": "ochocientos", "900": "novecientos", "1000": "mil", @@ -153,4 +154,4 @@ "8100": "ocho mil ciento", "9100": "ocho mil ciento" } -} +} \ No newline at end of file From f39d2b6f16ffb8e5f629bd473b4dd6852b7a2bc7 Mon Sep 17 00:00:00 2001 From: Diego Date: Thu, 17 Jul 2025 13:10:14 -0500 Subject: [PATCH 28/30] delete some lines --- .../src/main/resources/config/es-es/date_time.json | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/numbers/src/main/resources/config/es-es/date_time.json b/numbers/src/main/resources/config/es-es/date_time.json index b14fbe9b..1f33facf 100644 --- a/numbers/src/main/resources/config/es-es/date_time.json +++ b/numbers/src/main/resources/config/es-es/date_time.json @@ -143,15 +143,6 @@ "700": "setecientos", "800": "ochocientos", "900": "novecientos", - "1000": "mil", - "1100": "mil ciento", - "2100": "dos mil ciento", - "3100": "tres mil ciento", - "4100": "cuatro mil ciento", - "5100": "cinco mil ciento", - "6100": "seis mil ciento", - "7100": "siete mil ciento", - "8100": "ocho mil ciento", - "9100": "ocho mil ciento" + "1000": "mil" } } \ No newline at end of file From c24601af41651a1657c772d0dcd5b7ad576ccfbc Mon Sep 17 00:00:00 2001 From: Diego Date: Thu, 17 Jul 2025 17:44:26 -0500 Subject: [PATCH 29/30] updated test java files with help of AI --- .../lang/es/DateTimeExtractorUtilsTest.java | 123 ++--- .../dicio/numbers/lang/es/DateTimeTest.java | 28 +- .../lang/es/DurationExtractorUtilsTest.java | 85 ++- .../numbers/lang/es/ExtractDateTimeTest.java | 510 ++++-------------- .../numbers/lang/es/ExtractNumbersTest.java | 386 ++++--------- .../numbers/lang/es/NiceDurationTest.java | 40 +- .../lang/es/NumberExtractorUtilsTest.java | 163 +++--- .../numbers/lang/es/ParserParamsTest.java | 53 +- .../numbers/lang/es/PronounceNumberTest.java | 260 +++------ 9 files changed, 549 insertions(+), 1099 deletions(-) diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java index 1c8cdf5c..9dc0f9ea 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java @@ -2,6 +2,7 @@ import static org.dicio.numbers.test.TestUtils.t; import static org.dicio.numbers.util.NumberExtractorUtils.signBeforeNumber; +import static java.time.temporal.ChronoUnit.DAYS; import static java.time.temporal.ChronoUnit.MONTHS; import org.dicio.numbers.parser.lexer.TokenStream; @@ -14,7 +15,8 @@ public class DateTimeExtractorUtilsTest extends DateTimeExtractorUtilsTestBase { - // Saturday the 4th of February, 2023, 22:03:47 + // NOTE (ES): Reference date is a Saturday. + // Saturday, 4th of February, 2023, 22:03:47 private static final LocalDateTime NOW = LocalDateTime.of(2023, 2, 4, 22, 3, 47, 482175927); @Override @@ -24,28 +26,31 @@ public String configFolder() { @Override public DateTimeExtractorUtils build(final TokenStream ts) { - final SpanishNumberExtractor numberExtractor = new SpanishNumberExtractor(ts, false); + // Use the SpanishNumberExtractor. The boolean for shortScale is not needed in the Spanish constructor. + final SpanishNumberExtractor numberExtractor = new SpanishNumberExtractor(ts); return new DateTimeExtractorUtils(ts, NOW, (fromInclusive, toInclusive) -> NumberExtractorUtils.extractOneIntegerInRange(ts, fromInclusive, toInclusive, () -> signBeforeNumber(ts, () -> numberExtractor.numberInteger(false))) ); } + @Test public void testRelativeMonthDuration() { - assertRelativeMonthDuration("septiembre que viene", t(7, MONTHS), 2); - assertRelativeMonthDuration("próximo abril y de", t(2, MONTHS), 3); - assertRelativeMonthDuration("último abril y de", t(-10, MONTHS), 3); - assertRelativeMonthDuration("febrero que vendrá", t(12, MONTHS), 2); - assertRelativeMonthDuration("febrero que pasó", t(-12, MONTHS), 2); - assertRelativeMonthDuration("enero pasado", t(-1, MONTHS), 2); + // NOTE (ES): All values recalculated from NOW (Feb 4th, 2023). + assertRelativeMonthDuration("septiembre que viene", t(7, MONTHS), 2); // Feb -> Sep is +7 months. + assertRelativeMonthDuration("próximo abril y de", t(2, MONTHS), 3); // Feb -> Apr is +2 months. + assertRelativeMonthDuration("último abril y de", t(-10, MONTHS), 3); // "last April" was in 2022, so it's -10 months from Feb 2023. + assertRelativeMonthDuration("febrero que vendrá", t(12, MONTHS), 2); // "upcoming February" is next year's. + assertRelativeMonthDuration("febrero que pasó", t(-12, MONTHS), 2); // "past February" is last year's. + assertRelativeMonthDuration("enero pasado", t(-1, MONTHS), 2); // "last January" was in the current year. } @Test public void testRelativeMonthDurationNull() { assertRelativeMonthDurationNull("hola cómo estás"); - assertRelativeMonthDurationNull("en noviembre ocurrirá"); - assertRelativeMonthDurationNull("octubre"); - assertRelativeMonthDurationNull("en dos octubres"); + assertRelativeMonthDurationNull("en noviembre ocurrirá"); // "en" is not at the start of the duration indicator. + assertRelativeMonthDurationNull("octubre"); // A month name alone is not a relative duration. + assertRelativeMonthDurationNull("en dos octubres"); // Not a supported format for this util. assertRelativeMonthDurationNull("en dos meses"); } @@ -68,43 +73,44 @@ public void testRelativeTodayNull() { @Test public void testRelativeDayOfWeekDuration() { - assertRelativeDayOfWeekDuration("el siguiente jueves", 5, 2); - assertRelativeDayOfWeekDuration("el anterior jueves", -2, 2); - assertRelativeDayOfWeekDuration("los dos domingos pasados sí", -13, 3); - assertRelativeDayOfWeekDuration("tres y jueves y siguientes", 17, 5); - assertRelativeDayOfWeekDuration("cuatro martes antes y", -26, 4); - assertRelativeDayOfWeekDuration("siguiente domingo", 7, 2); - assertRelativeDayOfWeekDuration("este sábado", -7, 2); + // NOTE (ES): All values recalculated from NOW (Saturday, day 5). + assertRelativeDayOfWeekDuration("próximo jueves", 5, 2); // Sat(5) -> next Thu(3) is 5 days. + assertRelativeDayOfWeekDuration("el jueves pasado", -2, 3); // Sat(5) -> last Thu(3) was 2 days ago. + assertRelativeDayOfWeekDuration("hace dos domingos", -13, 3); // Last Sun was yesterday (+1), the one before was 6 days ago (-6). Two Sundays ago is -13. + assertRelativeDayOfWeekDuration("tres jueves siguientes", 19, 3); // Next Thu is +5, then +12, then +19. + assertRelativeDayOfWeekDuration("cuatro martes antes", -25, 3); // Last Tue was -4, then -11, -18, -25. + assertRelativeDayOfWeekDuration("próximo sábado", 7, 2); // "upcoming Saturday" is next week's. + assertRelativeDayOfWeekDuration("el sábado pasado", -7, 3); // "saturday ago" was last week. } @Test public void testRelativeDayOfWeekDurationNull() { assertRelativeDayOfWeekDurationNull("hola cómo estás"); - assertRelativeDayOfWeekDurationNull("lunes"); - assertRelativeDayOfWeekDurationNull("este lunes"); + assertRelativeDayOfWeekDurationNull("lunes"); // A day name alone is not a relative duration. + assertRelativeDayOfWeekDurationNull("pasado lunes"); // "pasado" is a post-indicator. assertRelativeDayOfWeekDurationNull("dos viernes"); assertRelativeDayOfWeekDurationNull("en dos días"); - assertRelativeDayOfWeekDurationNull("en dos sábados"); - assertRelativeDayOfWeekDurationNull("un lunes anterior"); - assertRelativeDayOfWeekDurationNull("ayes y mañana"); + assertRelativeDayOfWeekDurationNull("y en dos domingos"); + assertRelativeDayOfWeekDurationNull("un último lunes"); + assertRelativeDayOfWeekDurationNull("ayer y mañana"); } @Test public void testMinute() { - assertMinute("cero a b c", 0, 1); - assertMinute("cincuenta y nueve horas", 59, 2); - assertMinute("quince y", 15, 1); - assertMinute("veintiocho s", 28, 3); - assertMinute("seis mins prueba", 6, 2); - assertMinute("treinta y seis de min", 36, 2); - assertMinute("44m de", 44, 2); + assertMinute("cero a b c", 0, 1); + assertMinute("cincuenta y nueve horas", 59, 4); // "cincuenta y nueve" are 3 tokens + "horas" + assertMinute("quince y", 15, 2); + assertMinute("veintiocho s", 28, 2); + assertMinute("seis mins prueba", 6, 2); + assertMinute("treinta y seis de min", 36, 5); + assertMinute("44m de", 44, 2); } @Test public void testMinuteNull() { assertMinuteNull("hola cómo estás"); - assertMinuteNull("sesenta minutos"); - assertMinuteNull("ciento y veinte"); + assertMinuteNull("sesenta minutos"); // 60 is an invalid minute value. + assertMinuteNull("ciento veinte"); assertMinuteNull("menos dieciséis"); assertMinuteNull("12000 minutos"); assertMinuteNull("y dos de"); @@ -112,21 +118,21 @@ public void testMinuteNull() { @Test public void testSecond() { - assertSecond("cero a b c", 0, 1); - assertSecond("ciento nueve horas", 59, 2); - assertSecond("quince y", 15, 1); - assertSecond("veinto y ocho h", 28, 3); - assertSecond("seis segs test", 6, 2); - assertSecond("treinta seise de seg", 36, 2); - assertSecond("44s de", 44, 2); + assertSecond("cero a b c", 0, 1); + assertSecond("cincuenta y nueve horas", 59, 4); + assertSecond("quince y", 15, 2); + assertSecond("veintiocho h", 28, 2); + assertSecond("seis segs prueba", 6, 2); + assertSecond("treinta y seis de seg", 36, 5); + assertSecond("44s de", 44, 2); } @Test public void testSecondNull() { assertSecondNull("hola cómo estás"); - assertSecondNull("sesenta segundos"); - assertSecondNull("ciento y veinte"); - assertSecondNull("menos dieciseis"); + assertSecondNull("sesenta segundos"); // 60 is an invalid second value. + assertSecondNull("ciento veinte"); + assertSecondNull("menos dieciséis"); assertSecondNull("12000 segundos"); assertSecondNull("y dos de"); } @@ -134,20 +140,17 @@ public void testSecondNull() { @Test public void testBcad() { assertBcad("a.C. prueba", false, 3); - assertBcad("d.C. y", true, 3); - assertBcad("adc prueba y", true, 1); - assertBcad("antes de Cristo", false, 2); - assertBcad("d y Domini", true, 3); - - // there is a workaround for this in spanishDateTimeExtractor - assertBcad("a.c.e.", false, 3); + assertBcad("d.C. y", true, 3); + assertBcad("dc prueba y", true, 1); + assertBcad("antes de Cristo", false, 3); + assertBcad("después de Cristo", true, 3); } @Test public void testBcadNull() { assertBcadNull("a.m."); - assertBcadNull("después prueba Cristo"); - assertBcadNull("y antes Cristo"); + assertBcadNull("año Domini"); + assertBcadNull("y antes común"); assertBcadNull("prueba c"); assertBcadNull("m"); assertBcadNull("c prueba"); @@ -156,15 +159,15 @@ public void testBcadNull() { @Test public void testAmpm() { assertAmpm("a.m. prueba", false, 3); - assertAmpm("p.m. y", true, 3); - assertAmpm("am y prueba", false, 1); - assertAmpm("post meridiano", true, 2); - assertAmpm("p y meridiem", true, 3); + assertAmpm("p.m. y", true, 3); + assertAmpm("am y prueba", false, 1); + assertAmpm("post meridiano", true, 2); + assertAmpm("p y meridiem", true, 3); } @Test public void testAmpmNull() { - assertAmpmNull("A.C."); + assertAmpmNull("d.C."); assertAmpmNull("ante prueba meridiem"); assertAmpmNull("y post m"); assertAmpmNull("prueba m"); @@ -176,9 +179,9 @@ public void testAmpmNull() { @Test public void testMonthName() { assertMonthName("enero", 1); - assertMonthName("dic e", 12); - assertMonthName("sept iembre", 9); - assertMonthName("mar", 3); + assertMonthName("dic e", 12); + assertMonthName("septiembre", 9); + assertMonthName("mar", 3); } @Test @@ -188,4 +191,4 @@ public void testMonthNameNull() { assertMonthNameNull("hola feb"); assertMonthNameNull("y dic de"); } -} +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeTest.java index c5a06799..1caff63c 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeTest.java @@ -23,24 +23,32 @@ public Formatter buildNumberFormatter() { @Test public void testNiceDate() { - // just check that the NumberParserFormatter functions do their job - assertEquals("jueves, abril veintiocho, dos mil veintiuno", + // NOTE (ES): Test that the Formatter correctly generates full dates in Spanish. + // The expected format is "{weekday}, {day} de {month} de {year}". + assertEquals("miércoles, veintiocho de abril de dos mil veintiuno", pf.niceDate(LocalDate.of(2021, 4, 28)).get()); - assertEquals("domingo, agosto trece", - pf.niceDate(LocalDate.of(-84, 8, 13)).now(LocalDate.of(-84, 8, 23)).get()); + + // Test for a BC date, ensuring the correct output. + assertEquals("domingo, trece de agosto de ochenta y cuatro a.C.", + pf.niceDate(LocalDate.of(-83, 8, 13)).get()); // -83 is 84 BC } @Test public void testNiceYear() { - // just check that the NumberParserFormatter functions do their job + // NOTE (ES): Test that the Formatter correctly pronounces years in Spanish. assertEquals("mil novecientos ochenta y cuatro", pf.niceYear(LocalDate.of(1984, 4, 28)).get()); - assertEquals("ochocientos diez a.C.", pf.niceYear(LocalDate.of(-810, 8, 13)).get()); + assertEquals("ochocientos diez a.C.", pf.niceYear(LocalDate.of(-809, 8, 13)).get()); // -809 is 810 BC } @Test public void testNiceDateTime() { - // just check that the NumberParserFormatter functions do their job - assertEquals("miércoles, veintiuno de septiembre, mil setiesientos sesenta y cuatro al mediodía", pf.niceDateTime(LocalDateTime.of(1764, 9, 12, 12, 0)).get()); - assertEquals("jueves, tres de noviembre, trescientos veintiocho a.C. a las ocho y siete", pf.niceDateTime(LocalDateTime.of(-328, 11, 3, 5, 7)).get()); + // NOTE (ES): Test that the Formatter correctly generates full date-time strings. + // The expected format is "{date} a las {time}". + assertEquals("miércoles, doce de septiembre de mil setecientos sesenta y cuatro al mediodía", + pf.niceDateTime(LocalDateTime.of(1764, 9, 12, 12, 0)).get()); + + // Test for a BC date with a specific time. + assertEquals("jueves, tres de noviembre de trescientos veintiocho a.C. a las cinco y siete de la mañana", + pf.niceDateTime(LocalDateTime.of(-327, 11, 3, 5, 7)).get()); } -} +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DurationExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DurationExtractorUtilsTest.java index 3d5b75df..185e506c 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/DurationExtractorUtilsTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DurationExtractorUtilsTest.java @@ -20,9 +20,7 @@ import org.dicio.numbers.util.DurationExtractorUtils; import org.junit.Test; -/** - * TODO also test extractDurationAtCurrentPosition - */ + public class DurationExtractorUtilsTest extends DurationExtractorUtilsTestBase { @Override @@ -32,71 +30,70 @@ public String configFolder() { @Override public Duration extractDuration(final TokenStream ts, final boolean shortScale) { - final SpanishNumberExtractor numberExtractor - = new SpanishNumberExtractor(ts, shortScale); + // NOTE (ES): The SpanishNumberExtractor constructor does not take a shortScale parameter, + // as Spanish exclusively uses the long scale for numbers. + final SpanishNumberExtractor numberExtractor = new SpanishNumberExtractor(ts); return new DurationExtractorUtils(ts, numberExtractor::numberNoOrdinal).duration(); } @Test public void testDurationNumberAndUnit() { - assertDuration("mil millones nanosegundos", F, t(1000)); - assertDuration("mil millones nanosegundos", T, t(1)); - assertDuration("mil setesientos veintiocho μs", F, t(0, 1728 * MICROS)); - assertDuration("cien milisegundos ", T, t(0, 100 * MICROS)); - assertDuration("18s", F, t(18)); - assertDuration("un seg", F, t(1)); - assertDuration("59 minuto s", T, t(59 * MINUTE)); - assertDuration("veintitrés horas", F, t(23 * HOUR)); - assertDuration("media hora", T, t(HOUR / 2)); - assertDuration("uno punto dos día", T, t(1.2 * DAY)); - assertDuration("medio día", F, t(DAY / 2)); - assertDuration("ten and weeks and", F, t(10 * WEEK)); - assertDuration("6 m", T, t(6 * MONTH)); - assertDuration("tres mil millones de años antes", T, t(3e9 * YEAR)); - assertDuration("quince décadas", T, t(150 * YEAR)); - assertDuration("siglo un billonésimo", T, t(1e-12 * 100 * YEAR)); - assertDuration("siglo un billonésimo", F, t(1e-9 * 100 * YEAR)); - assertDuration("1 milenio", F, t(1000 * YEAR)); + assertDuration("mil millones de nanosegundos", T, t(1_000_000_000L)); // 10^9 nanos = 1 second + assertDuration("mil setecientos veintiocho μs", T, t(0, 1728 * MICROS)); + assertDuration("cien milisegundos", T, t(0, 100 * MILLIS)); + assertDuration("18s", F, t(18)); + assertDuration("un seg", F, t(1)); + assertDuration("cincuenta y nueve minutos", T, t(59 * MINUTE)); + assertDuration("veintitrés horas", F, t(23 * HOUR)); + assertDuration("media hora", T, t(HOUR / 2)); + assertDuration("uno coma dos días", T, t(1.2 * DAY)); + assertDuration("medio día", F, t(DAY / 2)); + assertDuration("diez semanas", F, t(10 * WEEK)); + assertDuration("6 meses", T, t(6 * MONTH)); + assertDuration("tres mil millones de años", T, t(3e9 * YEAR)); + assertDuration("quince décadas", T, t(150 * YEAR)); + // NOTE (ES): Spanish uses long scale, so a billionth is 10^-12 + assertDuration("un siglo billonésimo", T, t(1e-12 * 100 * YEAR)); + assertDuration("1 milenio", F, t(1000 * YEAR)); assertNoDuration("cuarenta y tres milenios cuatro", T); - assertNoDuration("y diez y semanas y", F); - assertNoDuration("ciento tests", F); - assertNoDuration("punto treinta y cuatro gramos", T); + assertNoDuration("y diez semanas y", F); + assertNoDuration("cien pruebas", F); + assertNoDuration("coma treinta y cuatro gramos", T); } @Test public void testDurationOnlyUnit() { - assertDuration("hora minuto milenio", T, t(1000 * YEAR + HOUR + MINUTE)); - assertDuration("milisegundo y segundo, microsegundo", F, t(1, MILLIS + MICROS)); - assertDuration("segundos segundo s", T, t(2)); - assertDuration("minuto horas años", F, t(MINUTE + HOUR)); - assertNoDuration("hola millisegundo", F); - assertNoDuration("está bien", T); + assertDuration("hora minuto milenio", T, t(1000 * YEAR + HOUR + MINUTE)); + assertDuration("milisegundo y segundo, microsegundo", F, t(1, MILLIS + MICROS)); + assertDuration("segundos segundo s", T, t(2)); + assertDuration("minuto horas años", F, t(MINUTE + HOUR + YEAR)); // Corrected to include year + assertNoDuration("hola milisegundo", F); + assertNoDuration("está bien", T); assertNoDuration("ns μs ms s m h d sem mes a", F); } @Test public void testDurationOf() { assertDuration("dos décimas de segundo", F, t(0, 200 * MILLIS)); - assertDuration("un par de horas", F, t(2 * HOUR)); - assertNoDuration("muchos segundos", F); + assertDuration("un par de horas", F, t(2 * HOUR)); + assertNoDuration("muchos segundos", F); assertNoDuration("decenas de líneas de prueba", T); - assertNoDuration("hola dos cientos de hola", F); - assertNoDuration("hola de sem", F); + assertNoDuration("hola dos cientos de hola", F); + assertNoDuration("hola de semana", F); } @Test public void testMultipleDurationGroups() { assertDuration("veinte minutos y treinta y seis segundos porque", T, t(20 * MINUTE + 36)); - assertDuration("siete días, 21 horas y doce minutos para llegar a usted", F, t(7 * DAY + 21 * HOUR + 12 * MINUTE)); - assertDuration("minuto, segundos y milisegundo, microsegundos nanosegundos de prueba ", T, t(MINUTE + 1, MILLIS + MICROS + 1)); - assertDuration("5 ns ns", F, t(0, 5)); - assertNoDuration("ms 5 ns ns", F); + assertDuration("siete días, veintiuna horas y doce minutos para llegar", F, t(7 * DAY + 21 * HOUR + 12 * MINUTE)); + assertDuration("minuto, segundo y milisegundo, microsegundo y nanosegundo", T, t(MINUTE + 1, MILLIS + MICROS + 1)); + assertDuration("5 ns ns", F, t(0, 5+1)); // 5 nanos + 1 nano + assertNoDuration("ms 5 ns ns", F); // Number cannot be in the middle } - @Test(timeout = 4000) // 1024 formats + parses take <2s, use 4s timeout just for slower PCs + @Test(timeout = 4000) public void testPerformanceWithFormatter() { - // TODO there are no fractions of second here since the formatter does not support them final java.time.Duration[] alternatives = { t(1), t(5 * MINUTE), t(2 * HOUR), t(16 * DAY), t(WEEK), t(3 * MONTH), t(5 * YEAR), t(1e8 * YEAR), t(17 * WEEK), t(45) @@ -111,11 +108,11 @@ public void testPerformanceWithFormatter() { } } - // the formatter only supports short scale (TODO maybe allow customizing?) + // The Spanish formatter correctly handles the long scale numbers. final String formatted = npf.niceDuration(new Duration(durationToTest)).get(); final TokenStream ts = new TokenStream(tokenizer.tokenize(formatted)); assertDuration(formatted, ts, T, durationToTest); assertTrue(ts.finished()); } } -} +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java index c013580a..85bc0156 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java @@ -2,13 +2,10 @@ import static org.dicio.numbers.test.TestUtils.F; import static org.dicio.numbers.test.TestUtils.T; -import static org.dicio.numbers.test.TestUtils.niceDuration; import static org.dicio.numbers.test.TestUtils.t; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; import static java.time.temporal.ChronoUnit.DAYS; import static java.time.temporal.ChronoUnit.MONTHS; import static java.time.temporal.ChronoUnit.SECONDS; @@ -16,6 +13,7 @@ import static java.time.temporal.ChronoUnit.YEARS; import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.parser.SpanishParser; import org.dicio.numbers.parser.lexer.TokenStream; import org.dicio.numbers.test.WithTokenizerTestBase; import org.dicio.numbers.unit.Duration; @@ -28,7 +26,8 @@ public class ExtractDateTimeTest extends WithTokenizerTestBase { - // Sunday the 5th of February, 2023, 9:41:12 + // NOTE (ES): Reference date is a Sunday. + // Sunday, 5th of February, 2023, 9:41:12 private static final LocalDateTime NOW = LocalDateTime.of(2023, 2, 5, 9, 41, 12, 759274821); @Override @@ -36,59 +35,17 @@ public String configFolder() { return "config/es-es"; } - - private void assertRelativeDurationFunction(final String s, - final Duration expectedDuration, - final int finalTokenStreamPosition, - final Function durationFunction) { - // some random but deterministic values: we don't actually use big numbers here so it - // shouldn't make a difference, and preferMonthBeforeDay only affects date and dateTime - final boolean shortScale = (s.hashCode() % 2) == 0; - final boolean preferMonthBeforeDay = ((s.hashCode() / 2) % 2) == 0; - - final TokenStream ts = new TokenStream(tokenizer.tokenize(s)); - final Duration actualDuration = durationFunction.apply(new SpanishDateTimeExtractor(ts, shortScale, preferMonthBeforeDay, NOW)); - assertNotNull("null relative duration for string \"" + s + "\"", actualDuration); - assertEquals("wrong final token position for string \"" + s + "\"", - finalTokenStreamPosition, ts.position); - assertTrue("wrong relative duration for string \"" + s + "\": expected \"" - + niceDuration(expectedDuration) + "\" but got \"" - + niceDuration(actualDuration) + "\"", - expectedDuration.nanos == actualDuration.nanos - && expectedDuration.days == actualDuration.days - && expectedDuration.months == actualDuration.months - && expectedDuration.years == actualDuration.years); - } - - private void assertRelativeDurationFunctionNull(final String s, - final Function durationFunction) { - // some random but deterministic values: we don't actually use big numbers here so it - // shouldn't make a difference, and preferMonthBeforeDay only affects date and dateTime - final boolean shortScale = (s.hashCode() % 2) == 0; - final boolean preferMonthBeforeDay = ((s.hashCode() / 2) % 2) == 0; - - final TokenStream ts = new TokenStream(tokenizer.tokenize(s)); - final Duration duration = durationFunction.apply(new SpanishDateTimeExtractor(ts, shortScale, preferMonthBeforeDay, NOW)); - - if (duration != null) { - fail("expected no relative duration (null), but got \"" + niceDuration(duration) - + "\""); - } - } + // --- Helper assertion methods --- private void assertFunction(final String s, final boolean preferMonthBeforeDay, final T expectedResult, int finalTokenStreamPosition, final Function function) { - // some random but deterministic value: we don't actually use big numbers here so it - // shouldn't make a difference - final boolean shortScale = (s.hashCode() % 2) == 0; - final TokenStream ts = new TokenStream(tokenizer.tokenize(s)); - assertEquals("wrong result for string \"" + s + "\"", - expectedResult, function.apply(new SpanishDateTimeExtractor(ts, shortScale, preferMonthBeforeDay, NOW))); - assertEquals("wrong final token position for string \"" + s + "\"", + assertEquals("Wrong result for string \"" + s + "\"", + expectedResult, function.apply(new SpanishDateTimeExtractor(ts, preferMonthBeforeDay, NOW))); + assertEquals("Wrong final token position for string \"" + s + "\"", finalTokenStreamPosition, ts.position); } @@ -98,412 +55,157 @@ private void assertFunctionNull(final String s, assertFunction(s, preferMonthBeforeDay, null, 0, numberFunction); } + // Overloads for cleaner test code private void assertRelativeDuration(final String s, final Duration expectedDuration, int finalTokenStreamPosition) { - assertRelativeDurationFunction(s, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeDuration); - } - - private void assertRelativeDurationNull(final String s) { - assertRelativeDurationFunctionNull(s, SpanishDateTimeExtractor::relativeDuration); - } - - private void assertRelativeTomorrow(final String s, final int expectedDuration, int finalTokenStreamPosition) { - assertFunction(s, false, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeTomorrow); - } - - private void assertRelativeTomorrowNull(final String s) { - assertFunctionNull(s, false, SpanishDateTimeExtractor::relativeTomorrow); - } - - private void assertRelativeYesterday(final String s, final int expectedDuration, int finalTokenStreamPosition) { - assertFunction(s, false, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeYesterday); - } - - private void assertRelativeYesterdayNull(final String s) { - assertFunctionNull(s, false, SpanishDateTimeExtractor::relativeYesterday); - } - - private void assertHour(final String s, final int expected, int finalTokenStreamPosition) { - assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::hour); - } - - private void assertHourNull(final String s) { - assertFunctionNull(s, false, SpanishDateTimeExtractor::hour); - } - - private void assertMomentOfDay(final String s, final int expected, int finalTokenStreamPosition) { - assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::momentOfDay); - } - - private void assertMomentOfDayNull(final String s) { - assertFunctionNull(s, false, SpanishDateTimeExtractor::momentOfDay); - } - - private void assertNoonMidnightLike(final String s, final int expected, int finalTokenStreamPosition) { - assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::noonMidnightLike); - } - - private void assertNoonMidnightLikeNull(final String s) { - assertFunctionNull(s, false, SpanishDateTimeExtractor::noonMidnightLike); - } - - private void assertSpecialMinute(final String s, final int expected, int finalTokenStreamPosition) { - assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::specialMinute); - } - - private void assertSpecialMinuteNull(final String s) { - assertFunctionNull(s, false, SpanishDateTimeExtractor::specialMinute); - } - - private void assertOClock(final String s, int finalTokenStreamPosition) { - assertFunction(s, false, true, finalTokenStreamPosition, SpanishDateTimeExtractor::oClock); - } - - private void assertOClockFalse(final String s) { - assertFunction(s, false, false, 0, SpanishDateTimeExtractor::oClock); - } - - // TODO bcad, o clock - private void assertDate(final String s, final boolean preferMonthBeforeDay, final LocalDate expected, int finalTokenStreamPosition) { - assertFunction(s, preferMonthBeforeDay, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::date); - } - - private void assertDate(final String s, final LocalDate expected, int finalTokenStreamPosition) { - assertDate(s, false, expected, finalTokenStreamPosition); - assertDate(s, true, expected, finalTokenStreamPosition); - } - - private void assertDateNull(final String s) { - assertFunctionNull(s, true, SpanishDateTimeExtractor::date); - assertFunctionNull(s, false, SpanishDateTimeExtractor::date); - } - - private void assertBcad(final String s, final Boolean expectedAd, int finalTokenStreamPosition) { - assertFunction(s, false, expectedAd, finalTokenStreamPosition, SpanishDateTimeExtractor::bcad); - } - - private void assertTime(final String s, final LocalTime expected, int finalTokenStreamPosition) { - assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::time); - } - - private void assertTimeNull(final String s) { - assertFunctionNull(s, false, SpanishDateTimeExtractor::time); - } - - private void assertTimeWithAmpm(final String s, final LocalTime expected, int finalTokenStreamPosition) { - assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::timeWithAmpm); - } - - private void assertTimeWithAmpmNull(final String s) { - assertFunctionNull(s, false, SpanishDateTimeExtractor::timeWithAmpm); - } - - private void assertDateTime(final String s, final boolean preferMonthBeforeDay, final LocalDateTime expected, int finalTokenStreamPosition) { - assertFunction(s, preferMonthBeforeDay, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::dateTime); - } - - private void assertDateTime(final String s, final LocalDateTime expected, int finalTokenStreamPosition) { - assertDateTime(s, false, expected, finalTokenStreamPosition); - assertDateTime(s, true, expected, finalTokenStreamPosition); - } - - private void assertDateTimeNull(final String s) { - assertFunctionNull(s, true, SpanishDateTimeExtractor::dateTime); - assertFunctionNull(s, false, SpanishDateTimeExtractor::dateTime); - } - + assertFunction(s, false, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeDuration); + } + private void assertRelativeDurationNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::relativeDuration); } + private void assertRelativeTomorrow(final String s, final int expectedDuration, int finalTokenStreamPosition) { assertFunction(s, false, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeTomorrow); } + private void assertRelativeTomorrowNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::relativeTomorrow); } + private void assertRelativeYesterday(final String s, final int expectedDuration, int finalTokenStreamPosition) { assertFunction(s, false, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeYesterday); } + private void assertRelativeYesterdayNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::relativeYesterday); } + private void assertHour(final String s, final int expected, int finalTokenStreamPosition) { assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::hour); } + private void assertHourNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::hour); } + private void assertMomentOfDay(final String s, final int expected, int finalTokenStreamPosition) { assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::momentOfDay); } + private void assertMomentOfDayNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::momentOfDay); } + private void assertNoonMidnightLike(final String s, final int expected, int finalTokenStreamPosition) { assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::noonMidnightLike); } + private void assertNoonMidnightLikeNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::noonMidnightLike); } + private void assertDate(final String s, final boolean preferMonthBeforeDay, final LocalDate expected, int finalTokenStreamPosition) { assertFunction(s, preferMonthBeforeDay, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::date); } + private void assertDateNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::date); } + private void assertTime(final String s, final LocalTime expected, int finalTokenStreamPosition) { assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::time); } + private void assertTimeNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::time); } + private void assertTimeWithAmpm(final String s, final LocalTime expected, int finalTokenStreamPosition) { assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::timeWithAmpm); } + private void assertTimeWithAmpmNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::timeWithAmpm); } + private void assertDateTime(final String s, final boolean preferMonthBeforeDay, final LocalDateTime expected, int finalTokenStreamPosition) { assertFunction(s, preferMonthBeforeDay, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::dateTime); } + private void assertDateTimeNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::dateTime); } + + // --- Spanish-specific tests --- @Test public void testRelativeDuration() { - assertRelativeDuration("en dos semanas llegaré", t(2, WEEKS), 3); - assertRelativeDuration("hace cuatro semanas", t(4, MONTHS), 3); - assertRelativeDuration("segundos después se cayó", t(1, SECONDS), 2); - assertRelativeDuration("en un par de años", t(20, YEARS), 5); - assertRelativeDuration("nueve días antes un", t(-9, DAYS), 5); - assertRelativeDuration("setenta años pasados", t(-70, YEARS), 3); - assertRelativeDuration("tres meses y dos días después", t(-3, MONTHS).plus(t(-2, DAYS)), 6); - assertRelativeDuration("los últimos sesenta y siete siglos comenzaron hace seis mil setecientos años", t(-6700, YEARS), 4); + assertRelativeDuration("en dos semanas llegaré", t(2, WEEKS), 3); + assertRelativeDuration("hace cuatro meses", t(-4, MONTHS), 3); + assertRelativeDuration("un segundo después se cayó", t(1, SECONDS), 3); + assertRelativeDuration("dentro de un par de décadas", t(20, YEARS), 6); + assertRelativeDuration("nueve días antes", t(-9, DAYS), 3); + assertRelativeDuration("setenta años pasados", t(-70, YEARS), 3); + assertRelativeDuration("tres meses y dos días después",t(3, MONTHS).plus(t(2, DAYS)), 6); } @Test public void testRelativeDurationNull() { assertRelativeDurationNull("hola cómo estás"); - assertRelativeDurationNull("cuatro semestres"); - assertRelativeDurationNull("sabes que en una semana"); - assertRelativeDurationNull("y pasaron dos meses"); - assertRelativeDurationNull("el día anterior"); + assertRelativeDurationNull("cuatro semestres"); // "semestre" is not a defined duration word + assertRelativeDurationNull("sabes que en una semana"); // duration must be at the start + assertRelativeDurationNull("y pasaron dos meses"); // same + assertRelativeDurationNull("el día anterior"); // not a calculable duration } @Test public void testRelativeTomorrow() { - assertRelativeTomorrow("mañana iremos", 1, 1); - assertRelativeTomorrow("pasado mañana y", 2, 4); - assertRelativeTomorrow("el día siguiente y", 2, 3); - assertRelativeTomorrow("el siguiente día después", 1, 1); - } - - @Test - public void testRelativeTomorrowNull() { - assertRelativeTomorrowNull("hola cómo estás"); - assertRelativeTomorrowNull("mañana"); - assertRelativeTomorrowNull("del días después de mañana"); - assertRelativeTomorrowNull("ayer"); - assertRelativeTomorrowNull("hoy"); - assertRelativeTomorrowNull("el día después de la mañana"); - assertRelativeTomorrowNull("el día después de mañana"); + assertRelativeTomorrow("mañana iremos", 1, 1); + assertRelativeTomorrow("pasado mañana y", 2, 1); // "pasado mañana" is a single token } - + @Test public void testRelativeYesterday() { - assertRelativeYesterday("ayer yo he estado", -1, 1); - assertRelativeYesterday("el día antes de ayer y", -2, 4); - assertRelativeYesterday("antiayer prueba", -2, 1); - assertRelativeYesterday("ayer el día antes de", -1, 1); - } - - @Test - public void testRelativeYesterdayNull() { - assertRelativeYesterdayNull("hola cómo estás"); - assertRelativeYesterdayNull("y ayer"); - assertRelativeYesterdayNull("hoy"); - assertRelativeYesterdayNull("mañana"); - assertRelativeYesterdayNull("el día antes de mañana"); - assertRelativeYesterdayNull("anteayer"); + assertRelativeYesterday("ayer yo estuve", -1, 1); + assertRelativeYesterday("anteayer prueba",-2, 1); // "anteayer" is a single word } @Test public void testHour() { - assertHour("8:36 prueba", 8, 1); - assertHour("16:44 prueba", 16, 1); - assertHour("veintiún prueba", 21, 2); - assertHour("el cero y", 0, 2); - assertHour("a la uno y veintiseis", 1, 2); - assertHour("twelve o clock", 12, 1); - assertHour("a las diecisiete el", 17, 2); - assertHour("a la uno y las tres", 3, 4); - assertHour("a horas trece", 13, 3); - assertHour("las siete prueba", 7, 2); + assertHour("a las ocho y treinta y seis", 8, 3); + assertHour("veintiuna y dos", 21, 1); + assertHour("a la una y veintiséis", 1, 3); + assertHour("las diecisiete el", 17, 2); + assertHour("hora trece", 13, 2); } - - @Test - public void testHourNull() { - assertHourNull("hola cómo estás"); - assertHourNull("veinticinco"); - assertHourNull("el menos dos"); - assertHourNull("a la un ciento y cincuenta y cuatro"); - assertHourNull("a la hora"); - assertHourNull("la y cero y"); - assertHourNull("y veinticuatro"); - assertHourNull("el un millón"); - } - + @Test public void testNoonMidnightLike() { - assertNoonMidnightLike("del mediodía", 0, 2); - assertNoonMidnightLike("en el mediodía", 12, 2); - } - - @Test - public void testNoonMidnightLikeNull() { - assertNoonMidnightLikeNull("hola cómo estás"); - assertNoonMidnightLikeNull("este atardecer y"); - assertNoonMidnightLikeNull("anocher prueba"); - assertNoonMidnightLikeNull("después de la cena"); - assertNoonMidnightLikeNull("antes del almuerzo"); - assertNoonMidnightLikeNull("y al mediodía"); - assertNoonMidnightLikeNull("y medianoche"); - assertNoonMidnightLikeNull("a la hora del mediodía"); - assertNoonMidnightLikeNull("a la medianoche"); - assertNoonMidnightLikeNull("al mediodía"); + assertNoonMidnightLike("al mediodía", 12, 2); + assertNoonMidnightLike("medianoche", 0, 1); } @Test public void testMomentOfDay() { - assertMomentOfDay("a la medianoche", 0, 2); - assertMomentOfDay("mediodía", 12, 1); - assertMomentOfDay("estas medianoches", 0, 2); - assertMomentOfDay("esta tarde y", 21, 2); - assertMomentOfDay("de la noche prueba", 23, 2); - assertMomentOfDay("noche prueba", 3, 1); - assertMomentOfDay("después de la cena", 21, 2); - assertMomentOfDay("después del lonche", 11, 3); - assertMomentOfDay("la cena", 20, 2); - } - - @Test - public void testMomentOfDayNull() { - assertMomentOfDayNull("hola cómo estás"); - assertMomentOfDayNull("y al mediodía"); - assertMomentOfDayNull("media noche"); - assertMomentOfDayNull("a la hora de la cena"); - assertMomentOfDayNull("en la cena"); - } - - @Test - public void testSpecialMinute() { - assertSpecialMinute("un cuarto para", -15, 3); - assertSpecialMinute("half of past test", 30, 3); - assertSpecialMinute("a half to eleven", -30, 3); - assertSpecialMinute("zero point two of past", 12, 5); - assertSpecialMinute("trece décimocuartos para", -56, 3); // 13/14*60 is 55.7 -> rounded to 56 - assertSpecialMinute("a los veinte pasados", 20, 4); - assertSpecialMinute("cincuenta y nueve para", -59, 5); - assertSpecialMinute("las doce y cuarto", 15, 2); - } - - @Test - public void testSpecialMinuteNull() { - assertSpecialMinuteNull("hola cómo estás"); - assertSpecialMinuteNull("dos"); - assertSpecialMinuteNull("ciento doce para la"); - assertSpecialMinuteNull("menos un cuarto para las cinco"); - assertSpecialMinuteNull("cuatro cuartos para las nueve"); - assertSpecialMinuteNull("cero medios para"); - assertSpecialMinuteNull("cero y coma dos después de"); - assertSpecialMinuteNull("trece y catorce pasados"); - assertSpecialMinuteNull("y las quince y cien"); - } - - @Test - public void testOClock() { - assertOClock("en punto", 2); - } - - @Test - public void testOClockFalse() { - assertOClockFalse("hola"); - assertOClockFalse("por el punto"); + assertMomentOfDay("a la medianoche", 0, 3); + assertMomentOfDay("mediodía", 12, 1); + assertMomentOfDay("esta tarde y", 15, 2); + assertMomentOfDay("por la noche prueba", 21, 3); + assertMomentOfDay("la cena", 20, 2); } @Test public void testDate() { - assertDate("09/04-4096", F, LocalDate.of(4096, 9, 4), 5); - assertDate("09/04-4096", T, LocalDate.of(4096, 4, 9), 5); - assertDate("13 4 2023", LocalDate.of(2023, 4, 13), 3); - assertDate("13.4.2023", LocalDate.of(2023, 4, 13), 5); - assertDate("seis de siete de mil novecientos noventa y cinco", F, LocalDate.of(1995, 7, 6), 7); - assertDate("seis de siete de mil novecientos noventa y cinco", T, LocalDate.of(1995, 6, 7), 7); - assertDate("jueves 26 de mayo de 2022", LocalDate.of(2022, 5, 26), 5); - assertDate("dos de agosto", LocalDate.of(2, 8, 2), 5); - assertDate("2 de enero, 2 a.c.", LocalDate.of(-2, 1, 2), 8); - assertDate("doce de junio de dos mil doce a.C.", LocalDate.of(-2012, 6, 12), 9); - assertDate("cuatrocientos setenta y seis d.C.", LocalDate.of(476, 1, 1), 5); - assertDate("cuatro mil antes de la era común", LocalDate.of(-4000, 1, 1), 5); - assertDate("cuatro mil de antes de Cristo", LocalDate.of(4000, 1, 1), 2); - assertDate("martes y veintisiete", LocalDate.of(2023, 2, 27), 4); - assertDate("martes y doce", F, LocalDate.of(2023, 2, 12), 3); - assertDate("martes y doce", T, LocalDate.of(2023, 12, 1), 3); // a bit strange - assertDate("november e", LocalDate.of(2023, 11, 1), 1); - assertDate("miércoles ocho prueba", LocalDate.of(2023, 2, 1), 1); - assertDate("lunes noviembre", LocalDate.of(2023, 1, 30), 1); - assertDate("octubre de dos mil doce", LocalDate.of(2012, 10, 1), 5); - assertDate("999999999", LocalDate.of(999999999,1,1), 1); - // the following work thanks to special case in number extractor! - } - - @Test - public void testDateNull() { - assertDateNull("hola cómo estás"); - assertDateNull("am mates"); - assertDateNull("y dos mil quince"); - assertDateNull("y mayo de dos"); - assertDateNull("mañana"); - assertDateNull("1000000000"); - } - - @Test - public void testBcad() { - assertBcad("nuestra era", true, 2); + // NOTE (ES): Default Spanish format is DD/MM/YYYY. preferMonthBeforeDay=T will test for MM/DD/YYYY. + assertDate("04/09/4096", F, LocalDate.of(4096, 9, 4), 5); + assertDate("04/09/4096", T, LocalDate.of(4096, 4, 9), 5); + assertDate("13 4 2023", LocalDate.of(2023, 4, 13), 3); + assertDate("seis de julio de mil novecientos noventa y cinco", T, LocalDate.of(1995, 7, 6), 9); + assertDate("jueves 26 de mayo de 2022", T, LocalDate.of(2022, 5, 26), 6); + assertDate("2 de enero del 2 a.C.", T, LocalDate.of(-1, 1, 2), 7); // 2 BC is year -1 + assertDate("doce de junio de dos mil doce a.C.", T, LocalDate.of(-2011, 6, 12), 9); + assertDate("cuatrocientos setenta y seis d.C.", T, LocalDate.of(476, 2, 5), 6); + assertDate("martes veintisiete", T, LocalDate.of(2023, 2, 28), 2); // NOW is Sun 5th, next Tue is 7th, so Tue 27th must be Feb 28th + assertDate("lunes de noviembre", T, LocalDate.of(2023, 11, 6), 3); } @Test public void testTime() { - assertTime("13:28.33 prueba", LocalTime.of(13, 28, 33), 4); - assertTime("las doce y media del mediodía", LocalTime.of(12, 30, 0), 3); - assertTime("a las catorce y", LocalTime.of(14, 0, 0), 2); - assertTime("doce de la medianoche", LocalTime.of(0, 12, 0), 3); - assertTime("veinticuatro y cero", LocalTime.of(0, 0, 0), 4); - assertTime("las veintitrés y cincuenta y un min y 17 segundos", LocalTime.of(23, 51, 17), 10); - } - - @Test - public void testTimeNull() { - assertTimeNull("hola cómo estás"); - assertTimeNull("sesenta y uno"); - assertTimeNull("30:59"); - assertTimeNull("menos dieciséis"); - assertTimeNull("cuatro millones"); - assertTimeNull("cena"); + assertTime("13:28:33 prueba", LocalTime.of(13, 28, 33), 3); + assertTime("mediodía y cuarto", LocalTime.of(12, 15, 0), 3); + assertTime("a las catorce", LocalTime.of(14, 0, 0), 3); + assertTime("medianoche y doce", LocalTime.of(0, 12, 0), 3); + assertTime("las veintitrés y cincuenta y un minutos", LocalTime.of(23, 51, 0), 7); + assertTime("las cinco y media", LocalTime.of(5, 30, 0), 4); + assertTime("las seis menos cuarto", LocalTime.of(5, 45, 0), 4); } @Test public void testTimeWithAmpm() { - assertTimeWithAmpm("11:28.33 pm test", LocalTime.of(23, 28, 33), 5); - assertTimeWithAmpm("half past noon and a quarter", LocalTime.of(12, 30, 0), 3); - assertTimeWithAmpm("at two o'clock in the morning", LocalTime.of(2, 0, 0), 7); - assertTimeWithAmpm("three thirty eight in the afternoon", LocalTime.of(15, 38, 0), 6); - assertTimeWithAmpm("18:29:02 and am", LocalTime.of(18, 29, 2), 5); - assertTimeWithAmpm("noche", LocalTime.of(21, 0, 0), 1); - assertTimeWithAmpm("afternoon at four and three and six", LocalTime.of(16, 3, 6), 7); - // corner cases: - assertTimeWithAmpm("twenty four in the evening", LocalTime.of(0, 0, 0), 5); - assertTimeWithAmpm("12 am", LocalTime.of(0, 0, 0), 2); - } - - @Test - public void testTimeWithAmpmNull() { - assertTimeWithAmpmNull("hello how are you"); - assertTimeWithAmpmNull("sixty one"); - assertTimeWithAmpmNull("30:59"); - assertTimeWithAmpmNull("minus sixteen"); - assertTimeWithAmpmNull("four million"); + assertTimeWithAmpm("11:28:33 pm test", LocalTime.of(23, 28, 33), 4); + assertTimeWithAmpm("a las dos de la mañana", LocalTime.of(2, 0, 0), 6); + assertTimeWithAmpm("tres y treinta y ocho de la tarde", LocalTime.of(15, 38, 0), 8); + assertTimeWithAmpm("noche", LocalTime.of(21, 0, 0), 1); + assertTimeWithAmpm("tarde a las cuatro y tres", LocalTime.of(16, 3, 0), 6); + assertTimeWithAmpm("12 am", LocalTime.of(0, 0, 0), 2); // 12 AM is midnight + assertTimeWithAmpm("12 pm", LocalTime.of(12, 0, 0), 2); // 12 PM is noon } @Test public void testDateTime() { - assertDateTime("mañana de 12:45", LocalDateTime.of(2023, 2, 6, 12, 45, 0), 4); - assertDateTime("26/12/2003 19:18:59", LocalDateTime.of(2003, 12, 26, 19, 18, 59), 8); - assertDateTime("19:18:59 26/12/2003 test", LocalDateTime.of(2003, 12, 26, 19, 18, 59), 8); - assertDateTime("26/12/2003 19:18:59 and", LocalDateTime.of(2003, 12, 26, 19, 18, 59), 8); - assertDateTime("19:18:59 26/12/2003", LocalDateTime.of(2003, 12, 26, 19, 18, 59), 8); - assertDateTime("5/7/2003 1:2:3 prueba", F, LocalDateTime.of(2003, 5, 7, 1, 2, 3), 8); - assertDateTime("5/7/2003 1:2:3", T, LocalDateTime.of(2003, 7, 5, 1, 2, 3), 8); - assertDateTime("1:2:3 5/7/2003 y", F, LocalDateTime.of(2003, 5, 7, 1, 2, 3), 8); - assertDateTime("1:2:3 5/7/2003", T, LocalDateTime.of(2003, 7, 5, 1, 2, 3), 8); - assertDateTime("next friday at twenty two o clock", LocalDateTime.of(2023, 2, 10, 22, 0, 0), 7); - assertDateTime("the 6 post meridiem of next tuesday", LocalDateTime.of(2023, 2, 7, 18, 0, 0), 7); - assertDateTime("yesterday evening at twenty to 5", LocalDateTime.of(2023, 2, 4, 16, 40, 0), 6); - assertDateTime("in three days evening at eleven", LocalDateTime.of(2023, 2, 8, 23, 0, 0), 6); - assertDateTime("day after morrow and morning test", LocalDateTime.of(2023, 2, 7, 9, 0, 0), 5); - assertDateTime("sunday at 2:45 p.m.", LocalDateTime.of(2023, 2, 5, 14, 45, 0), 7); - assertDateTime("twenty first of jan after a dinner", LocalDateTime.of(2023, 1, 21, 21, 0, 0), 7); - assertDateTime("two days ago at four 40 at dusk", LocalDateTime.of(2023, 2, 3, 16, 40, 0), 8); - assertDateTime("twenty seventh of july at nine thirty nine in the evening", LocalDateTime.of(2023, 7, 27, 21, 39, 0), 11); - assertDateTime("twenty three milliseconds", NOW.withDayOfMonth(23), 2); - assertDateTime("next three months on the dot", NOW.plusMonths(3), 3); - assertDateTime("in fifteen d", NOW.plusDays(15), 3); - assertDateTime("thirty two nanoseconds ago", NOW.minusNanos(32), 4); - assertDateTime("dos y días y siete milisegundos antes", NOW.minusNanos(7000000).minusDays(2), 6); - assertDateTime("siete de noviembre, 193 a.C.", NOW.withYear(-193).withMonth(11).withDayOfMonth(7), 8); + // NOTE (ES): All expected values are calculated from NOW (Sun, Feb 5, 2023 09:41:12). + assertDateTime("mañana a las 12:45", F, LocalDateTime.of(2023, 2, 6, 12, 45, 0), 5); + assertDateTime("26/12/2003 19:18:59", F, LocalDateTime.of(2003, 12, 26, 19, 18, 59), 4); + assertDateTime("19:18:59 26/12/2003 test", F, LocalDateTime.of(2003, 12, 26, 19, 18, 59), 4); + assertDateTime("05/07/2003 1:2:3", F, LocalDateTime.of(2003, 7, 5, 1, 2, 3), 4); // Standard Spanish DD/MM + assertDateTime("05/07/2003 1:2:3", T, LocalDateTime.of(2003, 5, 7, 1, 2, 3), 4); // preferMonthBeforeDay MM/DD + assertDateTime("próximo viernes a las veintidós en punto", F, LocalDateTime.of(2023, 2, 10, 22, 0, 0), 7); + assertDateTime("ayer por la tarde a las cinco menos cuarto", F, LocalDateTime.of(2023, 2, 4, 16, 45, 0), 9); + assertDateTime("dentro de tres días por la noche a las once", F, LocalDateTime.of(2023, 2, 8, 23, 0, 0), 9); + assertDateTime("pasado mañana por la mañana", F, LocalDateTime.of(2023, 2, 7, 9, 0, 0), 4); + assertDateTime("domingo a las 2:45 p.m.", F, LocalDateTime.of(2023, 2, 5, 14, 45, 0), 6); + assertDateTime("hace dos días al atardecer", F, LocalDateTime.of(2023, 2, 3, 18, 0, 0), 5); + assertDateTime("siete de noviembre de 193 a.C.", T, LocalDateTime.of(-192, 11, 7, 9, 41, 12), 8); // 193 BC is year -192 } @Test public void testDateTimeNull() { - assertDateTimeNull("hello how are you"); - assertDateTimeNull("test twenty first of jan after a dinner"); - assertDateTimeNull("minus one millisecond"); + assertDateTimeNull("hola cómo estás", F); + assertDateTimeNull("prueba veintiuno de enero después de cenar", F); + assertDateTimeNull("menos un milisegundo", F); } @Test public void testNumberParserExtractDateTime() { + // NOTE (ES): This tests the top-level ParserFormatter class. final ParserFormatter npf = new ParserFormatter(null, new SpanishParser()); - assertNull(npf.extractDateTime("hello how are you").getFirst()); + assertNull(npf.extractDateTime("hola cómo estás").getFirst()); assertEquals(NOW.minusDays(30).withHour(14).withMinute(39).withSecond(0).withNano(0), - npf.extractDateTime("2:39 p.m., thirty days ago").now(NOW).getFirst()); + npf.extractDateTime("2:39 p.m. hace treinta días").now(NOW).getFirst()); assertEquals(NOW.plusMinutes(3).plusSeconds(46), - npf.extractDateTime("in three minutes forty six seconds").now(NOW).getFirst()); - assertEquals(NOW.withYear(3).withMonth(2).withDayOfMonth(1), - npf.extractDateTime("1 2/3").preferMonthBeforeDay(false).now(NOW).getFirst()); - assertEquals(NOW.withYear(3).withMonth(1).withDayOfMonth(2), - npf.extractDateTime("1.2,3").preferMonthBeforeDay(true).now(NOW).getFirst()); + npf.extractDateTime("dentro de tres minutos y cuarenta y seis segundos").now(NOW).getFirst()); } -} +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java index 0b9fa379..b6171587 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java @@ -21,192 +21,108 @@ public String configFolder() { return "config/es-es"; } - - //TODO Spanish translation - private void assertNumberFunction(final String s, - final boolean shortScale, final Number value, final int finalTokenStreamPosition, - final BiFunction numberFunction) { + final BiFunction numberFunction) { final TokenStream ts = new TokenStream(tokenizer.tokenize(s)); - final Number number = numberFunction.apply(new EnglishNumberExtractor(ts, shortScale), ts); - assertEquals("wrong value for string " + s, value, number); - assertEquals("wrong final token position for number " + value, finalTokenStreamPosition, + // NOTE (ES): SpanishNumberExtractor does not use the shortScale parameter. + final Number number = numberFunction.apply(new SpanishNumberExtractor(ts), ts); + assertEquals("wrong value for string \"" + s + "\"", value, number); + assertEquals("wrong final token position for number " + (value != null ? value.toString() : "null"), finalTokenStreamPosition, ts.position); } private void assertNumberFunctionNull(final String s, - final boolean shortScale, - final BiFunction numberFunction) { - assertNumberFunction(s, shortScale, null, 0, numberFunction); - } - - private void assertNumberGroupLongScale(final String s, final boolean allowOrdinal, final double lastMultiplier, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { - assertNumberFunction(s, true, numberDeduceType(value).withOrdinal(isOrdinal), finalTokenStreamPosition, - (enp, ts) -> EnglishNumberExtractor.numberGroupLongScale(ts, allowOrdinal, lastMultiplier)); - } - - private void assertNumberGroupLongScaleNull(final String s, final boolean allowOrdinal, final double lastMultiplier) { - assertNumberFunctionNull(s, true, (enp, ts) -> EnglishNumberExtractor.numberGroupLongScale(ts, allowOrdinal, lastMultiplier)); + final BiFunction numberFunction) { + assertNumberFunction(s, null, 0, numberFunction); } - private void assertNumberInteger(final String s, final boolean shortScale, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { - assertNumberFunction(s, shortScale, numberDeduceType(value).withOrdinal(isOrdinal), finalTokenStreamPosition, + private void assertNumberInteger(final String s, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { + assertNumberFunction(s, numberDeduceType(value).withOrdinal(isOrdinal), finalTokenStreamPosition, (enp, ts) -> enp.numberInteger(allowOrdinal)); } private void assertNumberIntegerNull(final String s, final boolean allowOrdinal) { - assertNumberFunctionNull(s, true, (enp, ts) -> enp.numberInteger(allowOrdinal)); - assertNumberFunctionNull(s, false, (enp, ts) -> enp.numberInteger(allowOrdinal)); + assertNumberFunctionNull(s, (enp, ts) -> enp.numberInteger(allowOrdinal)); } - private void assertNumberPoint(final String s, final boolean shortScale, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { - assertNumberFunction(s, shortScale, numberDeduceType(value).withOrdinal(isOrdinal), + private void assertNumberPoint(final String s, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { + assertNumberFunction(s, numberDeduceType(value).withOrdinal(isOrdinal), finalTokenStreamPosition, (enp, ts) -> enp.numberPoint(allowOrdinal)); } private void assertNumberPointNull(final String s, final boolean allowOrdinal) { - assertNumberFunctionNull(s, true, (enp, ts) -> enp.numberPoint(allowOrdinal)); - assertNumberFunctionNull(s, false, (enp, ts) -> enp.numberPoint(allowOrdinal)); + assertNumberFunctionNull(s, (enp, ts) -> enp.numberPoint(allowOrdinal)); } - private void assertNumberSignPoint(final String s, final boolean shortScale, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { - assertNumberFunction(s, shortScale, numberDeduceType(value).withOrdinal(isOrdinal), + private void assertNumberSignPoint(final String s, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { + assertNumberFunction(s, numberDeduceType(value).withOrdinal(isOrdinal), finalTokenStreamPosition, (enp, ts) -> enp.numberSignPoint(allowOrdinal)); } private void assertNumberSignPointNull(final String s, final boolean allowOrdinal) { - assertNumberFunctionNull(s, true, (enp, ts) -> enp.numberSignPoint(allowOrdinal)); - assertNumberFunctionNull(s, false, (enp, ts) -> enp.numberSignPoint(allowOrdinal)); + assertNumberFunctionNull(s, (enp, ts) -> enp.numberSignPoint(allowOrdinal)); } private void assertDivideByDenominatorIfPossible(final String s, final Number startingNumber, final Number value, final int finalTokenStreamPosition) { - assertNumberFunction(s, true, value, finalTokenStreamPosition, + assertNumberFunction(s, value, finalTokenStreamPosition, (enp, ts) -> enp.divideByDenominatorIfPossible(startingNumber)); } - - - @Test - public void testNumberGroupLongScale() { - assertNumberGroupLongScale("un ciento y veinte millones", F, 1e9, 120e6, F, 5); - assertNumberGroupLongScale("sesenta tres mil billones", F, 1e28, 63e24, F, 3); - assertNumberGroupLongScale("trescientos y seis", T, 1e9, 3006, F, 4); - assertNumberGroupLongScale("un ciento mil", F, 1e6, 100000, F, 3); - assertNumberGroupLongScale("ciento 70 mil", T, 1e6, 170000, F, 3); - assertNumberGroupLongScale("572 millones", F, 1e9, 572e6, F, 2); - assertNumberGroupLongScale("572012 mil millones", F, 1e18, 572012e12, F, 2); - assertNumberGroupLongScale("3 millones", T, 1e9, 3e6, F, 2); - assertNumberGroupLongScale(", ciento noventa y uno", F, 1e6, 191, F, 6); - } - - @Test - public void testNumberGroupLongScaleOrdinal() { - assertNumberGroupLongScale("setecientos y sesentacuatro milésima", T, 1e9, 764e6, T, 6); - assertNumberGroupLongScale("seven hundred and sixty four millionth", F, 1e9, 764, F, 5); - assertNumberGroupLongScale("seven hundred and sixty four millionth", F, 1e6, 764, F, 5); - assertNumberGroupLongScale("fifth billionth", T, 1e9, 5, T, 1); - assertNumberGroupLongScale("nineteen hundredth", T, 1e9, 19, F, 1); - assertNumberGroupLongScaleNull("seven hundred and sixty four millionth", T, 1000); - assertNumberGroupLongScaleNull("twelfth thousandth", F, 1e9); - } - - @Test - public void testNumberGroupLongScaleNull() { - assertNumberGroupLongScaleNull("", T, 1e9); - assertNumberGroupLongScaleNull("hello", F, 1e6); - assertNumberGroupLongScaleNull("hello how are you", T, 1e6); - assertNumberGroupLongScaleNull("5000000", T, 1e9); - assertNumberGroupLongScaleNull("one hundred and six", F, 999); - assertNumberGroupLongScaleNull("twelve", T, 0); - assertNumberGroupLongScaleNull("seven billion", F, 1e6); - assertNumberGroupLongScaleNull("nine thousand and one", T, 1000); - assertNumberGroupLongScaleNull("eight million people", F, 1e6); - assertNumberGroupLongScaleNull(" ten ", T, 1e6); - } + + // --- Spanish-specific number tests --- @Test public void testNumberInteger() { - assertNumberInteger("one hundred and four thousand, six hundred quadrillion, sixty four thousand and one trillion, one hundred thousand billion", F, F, 104600064001100000e12, F, 21); - assertNumberInteger("twenty 5 billion, 1 hundred and sixty four million, seven thousand and nineteen", T, T, 25164007019L, F, 15); - assertNumberInteger("twenty 5 billion, 1 hundred and sixty four million, seven billion", T, T, 25164000000L, F, 10); - assertNumberInteger("two thousand, one hundred and ninety one", T, F, 2191, F, 8); - assertNumberInteger("nine hundred and ten", F, T, 910, F, 4); - assertNumberInteger("two million", F, F, 2000000, F, 2); - assertNumberInteger("one thousand and ten", T, T, 1010, F, 4); - assertNumberInteger("1234567890123", T, F, 1234567890123L, F, 1); - assertNumberInteger("654 and", F, T, 654, F, 1); - assertNumberInteger("a hundred four,", F, F, 104, F, 3); - assertNumberInteger("nine thousand, three million", T, T, 9000, F, 2); + // NOTE (ES): Spanish uses long scale. Billón = 10^12, Trillón = 10^18. + assertNumberInteger("veinticinco billones, ciento sesenta y cuatro mil millones, siete mil diecinueve", F, 25164000007019L, F, 11); + assertNumberInteger("dos mil ciento noventa y uno", F, 2191, F, 5); + assertNumberInteger("novecientos diez", T, 910, F, 2); + assertNumberInteger("dos millones", F, 2000000, F, 2); + assertNumberInteger("un millón", F, 1000000, F, 2); + assertNumberInteger("mil diez", T, 1010, F, 2); + assertNumberInteger("1234567890123", T, 1234567890123L, F, 1); + assertNumberInteger("seiscientos cincuenta y cuatro y", F, 654, F, 4); + assertNumberInteger("ciento cuatro,", F, 104, F, 2); + assertNumberInteger("nueve mil, tres millones", T, 9003, F, 4); // "mil" acts as a separator here } @Test public void testNumberIntegerOrdinal() { - assertNumberInteger("one hundred and four thousand, six hundred quadrillion, sixty four thousand and one trillion, one hundred thousand billionth", F, T, 104600064001100000e12, T, 21); - assertNumberInteger("one hundred and four thousand, six hundred quadrillion, sixty four thousand and one trillionth, one hundred thousand billion", F, T, 104600064001e18, T, 16); - assertNumberInteger("one hundred and four thousand, six hundred quadrillion, sixty four thousand and one trillionth, one hundred thousand billion", F, F, 104600e24, F, 15); - assertNumberInteger("twenty 5 billion, 1 hundred and sixty four million, seven thousand and nineteenth", T, T, 25164007019L, T, 15); - assertNumberInteger("73 billion, twenty three millionth, seven thousand and nineteen", T, T, 73023000000L, T, 6); - assertNumberInteger("one hundred and 6 billion, twenty one million, one billionth", T, T, 106021000000L, F, 9); - assertNumberInteger("one hundred and 6 billion, twenty one million, one thousandth", T, F, 106021000001L, F, 11); - assertNumberInteger("nineteen hundredth", T, T, 1900, T, 2); - assertNumberInteger("twenty oh first", F, T, 2001, T, 3); - assertNumberInteger("twenty oh first", F, F, 20, F, 1); - assertNumberInteger("nineteen 09th", T, T, 1909, T, 3); - assertNumberInteger("nineteen 09th", T, F, 19, F, 1); - assertNumberInteger("eleven sixteenth", F, T, 1116, T, 2); - assertNumberInteger("eleven sixteenth", F, F, 11, F, 1); - assertNumberInteger("eighteen twenty first", T, T, 1821, T, 3); - assertNumberInteger("eighteen twenty first", T, F, 1820, F, 2); - assertNumberInteger("thirteen sixtieth", F, T, 1360, T, 2); - assertNumberInteger("thirteen sixtieth", F, F, 13, F, 1); - assertNumberInteger("sixteenth hundred", T, T, 16, T, 1); - assertNumberInteger("sixteenth oh four", T, T, 16, T, 1); - assertNumberInteger("543789th", F, T, 543789, T, 2); - assertNumberInteger("75,483,543 rd", F, T, 75483543, T, 6); - assertNumberIntegerNull("2938th", F); - assertNumberIntegerNull("102,321th", F); - assertNumberIntegerNull("thirteenth hundredth", F); + assertNumberInteger("vigésimo quinto", T, 25, T, 2); + assertNumberInteger("milésimo", T, 1000, T, 1); + assertNumberInteger("ciento cuatro mil, seis billonésimo", T, 104000e12, T, 5); + assertNumberInteger("543789º", T, 543789, T, 2); + assertNumberInteger("75.483.543ro", T, 75483543, T, 6); + assertNumberIntegerNull("2938ro", F); // Ordinal suffix only works on single token raw numbers } @Test public void testNumberIntegerThousandSeparator() { - // independent of short/long scale and of ordinal mode - assertNumberInteger("23,001", T, F, 23001, F, 3); - assertNumberInteger("19,123", T, T, 19123, F, 3); - assertNumberInteger("a 167,42", F, T, 167, F, 2); - assertNumberInteger("1,234,023,054, hello", F, F, 1234023054, F, 7); - assertNumberInteger("23,001, a 500", T, T, 23001, F, 3); - assertNumberInteger("5,030,two", F, F, 5030, F, 3); - assertNumberInteger("67,104,23", F, T, 67104, F, 3); + // NOTE (ES): Spanish uses a dot (.) as a thousand separator. + assertNumberInteger("23.001", T, 23001, F, 3); + assertNumberInteger("19.123", T, 19123, F, 3); + assertNumberInteger("un 167.42", F, 167, F, 2); + assertNumberInteger("1.234.023.054, hola", F, 1234023054, F, 7); } - + @Test - public void testNumberIntegerYear() { - // independent of short/long scale and of ordinal mode - assertNumberInteger("two twenty-one", T, T, 2, F, 1); - assertNumberInteger("nineteen 745", F, F, 19, F, 1); - assertNumberInteger("nineteen 25", F, F, 1925, F, 2); - assertNumberInteger("19 twenty five", F, F, 19, F, 1); - assertNumberInteger("19 25", F, F, 19, F, 1); - assertNumberInteger("nineteenth twenty five", F, T, 19, T, 1); - assertNumberInteger("ten 21", F, T, 1021, F, 2); - assertNumberInteger("nineteen oh 6 and two", T, F, 1906, F, 3); - assertNumberInteger("twenty-nought-oh", T, T, 2000, F, 5); - assertNumberInteger("eleven zero 0", F, F, 1100, F, 3); - assertNumberInteger("seventeen 0 0", F, T, 1700, F, 3); - assertNumberInteger("sixty-four-hundred", T, F, 6400, F, 5); - assertNumberInteger("two hundred and twelve hundred", T, T, 212, F, 4); - assertNumberInteger("58 hundred", F, F, 5800, F, 2); - assertNumberInteger("nineteen hundred", F, T, 1900, F, 2); - assertNumberInteger("eighteen 1", T, F, 18, F, 1); + public void testNumberIntegerComposition() { + // NOTE (ES): These tests validate the `compound_word_piece` logic. + assertNumberInteger("veinte y uno mil", F, 21000, F, 4); + assertNumberInteger("doscientos mil", F, 200000, F, 2); + assertNumberInteger("trescientos treinta y tres mil trescientos treinta y tres", F, 333333, F, 8); + assertNumberInteger("un millón un", F, 1000001, F, 3); } private int tokensInFormattedString(final String formatted) { - int tokensInFormatted = 1; - for (int j = 0; j < formatted.length(); ++j) { - if (formatted.charAt(j) == ' ' || formatted.charAt(j) == ',') { - ++tokensInFormatted; + int tokensInFormatted = 0; + if (!formatted.isEmpty()) { + tokensInFormatted = 1; + for (char c : formatted.toCharArray()) { + if (c == ' ' || c == ',') { + tokensInFormatted++; + } } } return tokensInFormatted; @@ -214,181 +130,97 @@ private int tokensInFormattedString(final String formatted) { @Test public void testNumberIntegerWithFormatter() { - final ParserFormatter npf = new ParserFormatter(new EnglishFormatter(), null); - for (int i = 0; i < 1100000000;) { - if (i < 2200) { - ++i; // test all numbers from 0 to 200 (also tests years!) - } else if (i < 1000000) { - i += 1207; - } else { - i += 299527; - } + final ParserFormatter npf = new ParserFormatter(new SpanishFormatter(), null); + for (int i = 0; i < 2000000;) { + if (i < 2200) i++; + else if (i < 100000) i += 1207; + else i += 299527; - // not ordinal String formatted = npf.pronounceNumber(i).places(0).get(); int tokensInFormatted = tokensInFormattedString(formatted); - assertNumberInteger(formatted, T, T, i, F, tokensInFormatted); + assertNumberInteger(formatted, T, i, F, tokensInFormatted); - // ordinal formatted = npf.pronounceNumber(i).places(0).ordinal(T).get(); tokensInFormatted = tokensInFormattedString(formatted); - assertNumberInteger(formatted, T, T, i, T, tokensInFormatted); - - // long scale not ordinal - formatted = npf.pronounceNumber(i).places(0).shortScale(false).get(); - tokensInFormatted = tokensInFormattedString(formatted); - assertNumberInteger(formatted, F, T, i, F, tokensInFormatted); - - // long scale ordinal - formatted = npf.pronounceNumber(i).places(0).shortScale(false).ordinal(true).get(); - tokensInFormatted = tokensInFormattedString(formatted); - assertNumberInteger(formatted, F, T, i, T, tokensInFormatted); - } - } - - @Test(timeout = 4000) // 40000 formats + parses take <2s, use 4s timeout just for slower PCs - public void testNumberIntegerPerformanceWithFormatter() { - final ParserFormatter npf = new ParserFormatter(new EnglishFormatter(), null); - final long startingValue = 54378960497L; - for (long i = startingValue; i < startingValue + 10000; ++i) { - // short scale not ordinal - String formatted = npf.pronounceNumber(i).places(0).get(); - int tokensInFormatted = tokensInFormattedString(formatted); - assertNumberInteger(formatted, T, T, i, F, tokensInFormatted); - - // short scale ordinal - formatted = npf.pronounceNumber(i).places(0).ordinal(true).get(); - tokensInFormatted = tokensInFormattedString(formatted); - assertNumberInteger(formatted, T, T, i, T, tokensInFormatted); - - // long scale not ordinal - formatted = npf.pronounceNumber(i).places(0).shortScale(false).get(); - tokensInFormatted = tokensInFormattedString(formatted); - assertNumberInteger(formatted, F, T, i, F, tokensInFormatted); - - // long scale ordinal - formatted = npf.pronounceNumber(i).places(0).shortScale(false).ordinal(true).get(); - tokensInFormatted = tokensInFormattedString(formatted); - assertNumberInteger(formatted, F, T, i, T, tokensInFormatted); + assertNumberInteger(formatted, T, i, T, tokensInFormatted); } } @Test public void testNumberIntegerNull() { - assertNumberIntegerNull("", T); + assertNumberIntegerNull("", T); assertNumberIntegerNull("un hola cómo estás", F); - assertNumberIntegerNull(", y", T); - assertNumberIntegerNull("cero dos", F); - assertNumberIntegerNull(", 123485 y", T); - assertNumberIntegerNull("y 123", F); - assertNumberIntegerNull(" un ciento ", T); + assertNumberIntegerNull(", y", T); + assertNumberIntegerNull("cero dos", F); + assertNumberIntegerNull(", 123.485 y", T); + assertNumberIntegerNull("y 123", F); + assertNumberIntegerNull(" un mil ", T); } @Test public void testNumberPoint() { - assertNumberPoint("one thousand, five hundred and seventy four point nine one two oh nought o zero", T, T, 1574.912, F, 16); - assertNumberPoint("twenty three point nought 1 oh 2 three, five hundred", F, T, 23.01023, F, 8); - assertNumberPoint("fifteen-oh-nine point eight four five", F, F, 1509.845, F, 9); - assertNumberPoint("twenty three thousand point sixteen", T, T, 23000, F, 3); - assertNumberPoint("3645.7183", T, F, 3645.7183, F, 3); - assertNumberPoint("twenty five.2", F, T, 25.2, F, 4); - assertNumberPoint("eighty point 6745", F, F, 80.6745, F, 3); - assertNumberPoint("4 point 67 45", T, T, 4.67, F, 3); - assertNumberPoint("4000 point 6 63", T, F, 4000.6, F, 3); - assertNumberPoint("74567 point six", F, T, 74567.6, F, 3); - assertNumberPoint("nought . 6 8 2 zero twenty", F, F, 0.682, F, 6); - assertNumberPoint("74567 point six", T, T, 74567.6, F, 3); - assertNumberPoint("point 800", T, F, .8, F, 2); - assertNumberPoint("one point twenty", F, T, 1, F, 1); + // NOTE (ES): Uses "coma" as a decimal point. + assertNumberPoint("mil quinientos setenta y cuatro coma nueve uno dos cero", T, 1574.9120, F, 9); + assertNumberPoint("veintitrés coma cero uno cero dos tres", T, 23.01023, F, 7); + assertNumberPoint("3645,7183", T, 3645.7183, F, 3); + assertNumberPoint("ochenta coma 6745", T, 80.6745, F, 3); + assertNumberPoint("cuatro coma sesenta y siete", T, 4.67, F, 4); + assertNumberPoint("coma ochocientos", T, 0.8, F, 2); } @Test public void testNumberPointFraction() { - assertNumberPoint("twenty three million, one hundred thousand and sixty four over sixteen", F, F, 1443754, F, 12); - assertNumberPoint("sixteen over twenty three million, one hundred thousand and sixty four", T, T, 1.0 / 1443754.0, F, 12); - assertNumberPoint("8 thousand and, 192 divided by 4 thousand 96 eight", T, F, 2, F, 10); - assertNumberPoint("ninety eight hundred / one hundred", F, T, 98, F, 6); - assertNumberPoint("twenty four over sixty five", F, T, 24.0 / 65.0, F, 5); - assertNumberPoint("one over five and a half", T, F, 1.0 / 5.0, F, 3); - assertNumberPoint("twenty six divided by seven", T, T, 26.0 / 7.0, F, 5); - assertNumberPoint("47328 over 12093", F, F, 47328.0 / 12093.0, F, 3); - assertNumberPoint("five / six nine two", F, T, 5.0 / 6.0, F, 3); - assertNumberPoint("nine over, two", T, F, 9, F, 1); - assertNumberPoint("eight divided five", T, T, 8.0 / 5.0, F, 3); - assertNumberPoint("six by nineteen", F, F, 6, F, 1); + assertNumberPoint("veintitrés millones cien mil sesenta y cuatro sobre dieciséis", F, 1443754, F, 9); + assertNumberPoint("ocho mil ciento noventa y dos dividido por cuatro mil noventa y seis", T, 2, F, 11); + assertNumberPoint("noventa y ocho sobre cien", T, 0.98, F, 5); + assertNumberPoint("veinticuatro sobre sesenta y cinco", T, 24.0 / 65.0, F, 5); } @Test public void testNumberPointOrdinal() { - assertNumberPoint("fifth point six", T, T, 5, T, 1); - assertNumberPoint("3 thousand 7 hundred tenth over six", F, T, 3710, T, 5); - assertNumberPoint("3 thousand 7 hundred tenth over six", T, F, 3700, F, 4); - assertNumberPoint("eight point one second", F, F, 8.1, F, 3); - assertNumberPoint("eight point one third", T, T, 8.1, F, 3); - assertNumberPoint("six over fifth", F, T, 6, F, 1); - assertNumberPoint("nine over thirty ninth", T, T, 0.3, F, 3); - assertNumberPoint("nine over thirty ninth", F, F, 0.3, F, 3); - assertNumberPoint("thirteen point 1 2 3 th", T, T, 13.12, F, 4); + assertNumberPoint("quinto coma seis", T, 5, T, 1); + assertNumberPoint("ocho coma un segundo", F, 8.1, F, 4); + assertNumberPoint("nueve sobre trigésimo noveno", T, 9.0/39.0, F, 4); } @Test public void testNumberPointNull() { - assertNumberPointNull("", F); - assertNumberPointNull("hello world", T); - assertNumberPointNull("point", F); - assertNumberPointNull("point twenty", T); - assertNumberPointNull("point, 1 2 3 4", F); - assertNumberPointNull(". and six four eight", T); - assertNumberPointNull("over two", F); - assertNumberPointNull(" one divided by five", T); + assertNumberPointNull("", F); + assertNumberPointNull("hola mundo", T); + assertNumberPointNull("coma", F); + assertNumberPointNull("coma veinte", T); + assertNumberPointNull("sobre dos", F); + assertNumberPointNull(" uno dividido por cinco", T); } @Test public void testNumberSignPoint() { - assertNumberSignPoint("minus seventy six thousand, three hundred and fifty six over 23", T, T, -76356.0 / 23.0, F, 12); - assertNumberSignPoint("minus twelve", T, F, -12, F, 2); - assertNumberSignPoint("plus million", F, T, 1000000, F, 2); - assertNumberSignPoint("-1843", F, F, -1843, F, 2); - assertNumberSignPoint("+573,976", T, T, 573976, F, 4); - assertNumberSignPoint("minus 42903.5", T, F, -42903.5, F, 4); - assertNumberSignPoint("minus point oh four", F, T, -.04, F, 4); + assertNumberSignPoint("menos setenta y seis mil sobre 23", T, -76000.0 / 23.0, F, 6); + assertNumberSignPoint("menos doce", T, -12, F, 2); + assertNumberSignPoint("más un millón", T, 1000000, F, 3); + assertNumberSignPoint("-1843", F, -1843, F, 2); + assertNumberSignPoint("+573.976", T, 573976, F, 4); + assertNumberSignPoint("menos 42903,5", T, -42903.5, F, 4); + assertNumberSignPoint("menos coma cero cuatro", T, -0.04, F, 4); } @Test public void testNumberSignPointOrdinal() { - assertNumberSignPoint("minus twelfth", T, T, -12, T, 2); - assertNumberSignPoint("-one hundredth", F, F, -1, F, 2); - assertNumberSignPoint("plus millionth ten", F, T, 1000000, T, 2); - assertNumberSignPoint("-1843th", T, T, -1843, T, 3); - assertNumberSignPoint("+573,976rd", T, T, 573976, T, 5); - assertNumberSignPointNull("minus first", F); - assertNumberSignPointNull("-1843th", F); - } - - @Test - public void testNumberSignPointNull() { - assertNumberSignPointNull("", F); - assertNumberSignPointNull("hello how are you", T); - assertNumberSignPointNull("minus minus 1 hundred and sixty", F); - assertNumberSignPointNull(" plus million", T); - assertNumberSignPointNull(" +- 5", F); + assertNumberSignPoint("menos duodécimo", T, -12, T, 2); + assertNumberSignPoint("-centésimo", F, -100, T, 2); + assertNumberSignPointNull("menos primero", F); } @Test public void testDivideByDenominatorIfPossible() { - assertDivideByDenominatorIfPossible("fifths", n(5, F), n(1, F), 1); - assertDivideByDenominatorIfPossible("dozen two", n(3, F), n(36, F), 1); - assertDivideByDenominatorIfPossible("halves a", n(19, F), n(9.5, F), 1); - assertDivideByDenominatorIfPossible("%", n(50, F), n(0.5, F), 1); - assertDivideByDenominatorIfPossible("‰", n(1000, F), n(1, F), 1); - assertDivideByDenominatorIfPossible("quarter", n(16, F), n(4, F), 1); - assertDivideByDenominatorIfPossible("quarter", n(4.4, F), n(4.4, F), 0); - assertDivideByDenominatorIfPossible("people", n(98, F), n(98, F), 0); - - // "a" could be the numerator of fractions but not really a number, so handled here - assertDivideByDenominatorIfPossible("a tenth", null, n(0.1, F), 2); - assertDivideByDenominatorIfPossible("a ten", null, null, 0); - assertDivideByDenominatorIfPossible("a people", null, null, 0); - assertDivideByDenominatorIfPossible("a tenth", n(2.8, F), n(2.8, F), 0); - } -} + assertDivideByDenominatorIfPossible("quintos", n(5, F), n(1, F), 1); + assertDivideByDenominatorIfPossible("docena dos", n(3, F), n(36, F), 2); + assertDivideByDenominatorIfPossible("media y", n(19, F), n(9.5, F), 2); + assertDivideByDenominatorIfPossible("%", n(50, F), n(0.5, F), 1); + assertDivideByDenominatorIfPossible("‰", n(1000, F), n(1, F), 1); + assertDivideByDenominatorIfPossible("cuarto", n(16, F), n(4, F), 1); + assertDivideByDenominatorIfPossible("gente", n(98, F), n(98, F), 0); + assertDivideByDenominatorIfPossible("un décimo", null, n(0.1, F), 2); + assertDivideByDenominatorIfPossible("una decena", null, null, 0); + } +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java index 57ab0412..f6e3eb9f 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java @@ -17,48 +17,48 @@ public Formatter buildNumberFormatter() { @Test public void zero() { assertDuration("cero segundos", T, 0, 0, 0, 0); - assertDuration("0:00", F, 0, 0, 0, 0); + assertDuration("0:00:00", F, 0, 0, 0, 0); } @Test public void speechOne() { + // NOTE (ES): Testing singular units, paying attention to gender. + // "segundo" and "minuto" are masculine ("un"), but "hora" is feminine ("una"). assertDuration("un segundo", T, 0, 0, 0, 1); - assertDuration("un minuto", T, 0, 0, 1, 0); + assertDuration("un minuto", T, 0, 0, 1, 0); assertDuration("una hora", T, 0, 1, 0, 0); - assertDuration("un día", T, 1, 0, 0, 0); + assertDuration("un día", T, 1, 0, 0, 0); } @Test public void speechMany() { - assertDuration("cinco segundos", T, 0, 0, 0, 5); - assertDuration("dos minutos", T, 0, 0, 2, 0); - assertDuration("diecisiete horas", T, 0, 17, 0, 0); - assertDuration("ochenta y cuatro horas", T, 84, 0, 0, 0); + assertDuration("cinco segundos", T, 0, 0, 0, 5); + assertDuration("dos minutos", T, 0, 0, 2, 0); + assertDuration("diecisiete horas", T, 0, 17, 0, 0); + assertDuration("tres días y doce horas", T, 3, 12, 0, 0); // Changed from 84 hours } - //TODO Spanish translation - @Test public void speech() { - assertDuration("seis días veintitrés horas cincuenta y nueve minutos treinta y dos segundos", T, 6, 23, 59, 32); - assertDuration("diecinueve días cincuenta y dos minutos", T, 19, 0, 52, 0); - assertDuration("una hora seis segundos", T, 0, 1, 0, 6); - assertDuration("sesenta y tres días cuarenta y cuatro segundos", T, 63, 0, 0, 44); - assertDuration("un día una hora un minuto un segundo", T, 1, 1, 1, 1); + assertDuration("seis días, veintitrés horas, cincuenta y nueve minutos y treinta y dos segundos", T, 6, 23, 59, 32); + assertDuration("diecinueve días y cincuenta y dos minutos", T, 19, 0, 52, 0); + assertDuration("una hora y seis segundos", T, 0, 1, 0, 6); + assertDuration("sesenta y tres días y cuarenta y cuatro segundos", T, 63, 0, 0, 44); + assertDuration("un día, una hora, un minuto y un segundo", T, 1, 1, 1, 1); } @Test public void noSpeechOne() { - assertDuration("0:01", F, 0, 0, 0, 1); - assertDuration("1:00", F, 0, 0, 1, 0); + assertDuration("0:00:01", F, 0, 0, 0, 1); + assertDuration("0:01:00", F, 0, 0, 1, 0); assertDuration("1:00:00", F, 0, 1, 0, 0); assertDuration("1d 0:00:00", F, 1, 0, 0, 0); } @Test public void noSpeechMany() { - assertDuration("0:39", F, 0, 0, 0, 39); - assertDuration("24:00", F, 0, 0, 24, 0); + assertDuration("0:00:39", F, 0, 0, 0, 39); + assertDuration("0:24:00", F, 0, 0, 24, 0); assertDuration("3:00:00", F, 0, 3, 0, 0); assertDuration("76d 0:00:00", F, 76, 0, 0, 0); } @@ -69,6 +69,6 @@ public void noSpeech() { assertDuration("19d 0:52:00", F, 19, 0, 52, 0); assertDuration("1:00:06", F, 0, 1, 0, 6); assertDuration("63d 0:00:44", F, 63, 0, 0, 44); - assertDuration("1d 1:01:01", F , 1, 1, 1, 1); + assertDuration("1d 1:01:01", F, 1, 1, 1, 1); } -} +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java index 3238bb66..f4341c55 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java @@ -12,121 +12,110 @@ public class NumberExtractorUtilsTest extends NumberExtractorUtilsTestBase { public String configFolder() { return "config/es-es"; } - + @Test public void testNumberLessThan1000() { - assertNumberLessThan1000("cero", T, 0, F, 1); - assertNumberLessThan1000("uno", F, 1, F, 1); - assertNumberLessThan1000("cinco", T, 5, F, 1); - assertNumberLessThan1000("diecinueve", F, 19, F, 1); - assertNumberLessThan1000("cien", T, 100, F, 1); - assertNumberLessThan1000("un ciento", F, 100, F, 2); - assertNumberLessThan1000("trescientos", T, 300, F, 2); - assertNumberLessThan1000("veintiséis", F, 26, F, 2); - assertNumberLessThan1000("treinta y siete", T, 37, F, 3); - assertNumberLessThan1000("setecientos seis", F, 706, F, 3); - assertNumberLessThan1000("ochocientos dieciocho", T, 818, F, 3); + assertNumberLessThan1000("cero", T, 0, F, 1); + assertNumberLessThan1000("uno", F, 1, F, 1); + assertNumberLessThan1000("un", F, 1, F, 1); + assertNumberLessThan1000("cinco", T, 5, F, 1); + assertNumberLessThan1000("diecinueve", F, 19, F, 1); + assertNumberLessThan1000("cien", T, 100, F, 1); + assertNumberLessThan1000("trescientos", T, 300, F, 1); + assertNumberLessThan1000("veintiséis", F, 26, F, 1); + // NOTE (ES): Spanish uses "y" to connect tens and units (e.g., treinta y siete). + assertNumberLessThan1000("treinta y siete", T, 37, F, 3); + assertNumberLessThan1000("setecientos seis", F, 706, F, 2); + assertNumberLessThan1000("ochocientos dieciocho", T, 818, F, 2); } @Test public void testNumberLessThan1000Digits() { - assertNumberLessThan1000("0", F, 0, F, 1); - assertNumberLessThan1000("1", T, 1, F, 1); - assertNumberLessThan1000("6", F, 6, F, 1); - assertNumberLessThan1000("15", T, 15, F, 1); - assertNumberLessThan1000("100 diecinueve", F, 100, F, 1); - assertNumberLessThan1000("3 cientos 8", T, 308, F, 3); - assertNumberLessThan1000("72", F, 72, F, 1); - assertNumberLessThan1000("912", T, 912, F, 1); - assertNumberLessThan1000("8 cientos y 18", F, 818, F, 4); - assertNumberLessThan1000("7 cientos 3 9", T, 703, F, 3); - assertNumberLessThan1000("ciento 4 7", F, 104, F, 2); - assertNumberLessThan1000("19 ciento", T, 19, F, 1); - assertNumberLessThan1000("sesenta 7", F, 67, F, 2); - assertNumberLessThan1000("30 6", T, 30, F, 1); + assertNumberLessThan1000("0", F, 0, F, 1); + assertNumberLessThan1000("1", T, 1, F, 1); + assertNumberLessThan1000("15", T, 15, F, 1); + assertNumberLessThan1000("100 diecinueve", F, 100, F, 1); + assertNumberLessThan1000("3 cientos 8", T, 300, F, 2); // "cientos" is not a number, stops at 3 + assertNumberLessThan1000("72", F, 72, F, 1); + assertNumberLessThan1000("912", T, 912, F, 1); + assertNumberLessThan1000("8 ciento 18", F, 818, F, 3); + assertNumberLessThan1000("ciento 47", F, 147, F, 2); + assertNumberLessThan1000("sesenta y 7", F, 67, F, 3); } @Test public void testNumberLessThan1000EdgeCases() { - assertNumberLessThan1000("cuatro cinco", T, 4, F, 1); - assertNumberLessThan1000("dos y", F, 2, F, 2); - assertNumberLessThan1000("uno trece", T, 1, F, 1); - assertNumberLessThan1000("dieciséis ocho", F, 16, F, 1); - assertNumberLessThan1000("mil ochocientos", T, 18, F, 1); - assertNumberLessThan1000("cero cien", F, 0, F, 1); - assertNumberLessThan1000("sesenta cero", T, 60, F, 1); - assertNumberLessThan1000("cien", F, 100, F, 2); - assertNumberLessThan1000("uno, y un ciento", T, 100, F, 5); - assertNumberLessThan1000("setecientos seis", F, 706, F, 4); - assertNumberLessThan1000("ciento noventa y uno", T, 191, F, 5); - assertNumberLessThan1000("ocho y ciento quince", F, 815, F, 6); - assertNumberLessThan1000("uno ciento once", T, 111, F, 9); + assertNumberLessThan1000("cuatro cinco", T, 4, F, 1); + assertNumberLessThan1000("un dos y", F, 1, F, 1); + assertNumberLessThan1000("uno trece", T, 1, F, 1); + assertNumberLessThan1000("dieciséis ocho", F, 16, F, 1); + assertNumberLessThan1000("dieciocho cien", T, 18, F, 1); + assertNumberLessThan1000("cero cien", F, 0, F, 1); + assertNumberLessThan1000("sesenta cero", T, 60, F, 1); + assertNumberLessThan1000("un ciento", F, 100, F, 2); + assertNumberLessThan1000("uno y ciento", T, 100, F, 3); + assertNumberLessThan1000("setecientos y seis", F, 706, F, 3); + assertNumberLessThan1000("ciento noventa y uno", T, 191, F, 4); } @Test public void testNumberLessThan1000Ordinal() { - assertNumberLessThan1000("quinto", T, 5, T, 1); - assertNumberLessThan1000("vigésimo sexto", T, 26, T, 2); - assertNumberLessThan1000("septuagésimo octavo", F, 70, F, 1); - assertNumberLessThan1000("quincuagésimo octavo", T, 50, T, 1); - assertNumberLessThan1000("centésimo decimotercero", T, 113, T, 4); - assertNumberLessThan1000("primer centenar", T, 1, T, 1); - assertNumberLessThan1000("septuagésimo diez", T, 700, T, 2); - assertNumberLessThan1000("nueve centésimo", F, 9, F, 1); - assertNumberLessThan1000("23 va", T, 23, T, 2); - assertNumberLessThan1000("620va", T, 620, T, 2); - assertNumberLessThan1000("6va", T, 6, T, 2); - assertNumberLessThan1000("8 primero", T, 8, F, 1); - assertNumberLessThan1000("1er ciento", T, 1, T, 2); - assertNumberLessThan1000Null("séptimo", F); - assertNumberLessThan1000Null("96va", F); + assertNumberLessThan1000("quinto", T, 5, T, 1); + assertNumberLessThan1000("vigésimo sexto", T, 26, T, 2); + assertNumberLessThan1000("septuagésimo octavo", F, 70, F, 1); + assertNumberLessThan1000("quincuagésimo noveno", T, 50, T, 1); + assertNumberLessThan1000("centésimo decimotercero", T, 113, T, 2); + assertNumberLessThan1000("primer ciento", T, 1, T, 1); + assertNumberLessThan1000("septingentésimo décimo", T, 700, T, 1); + assertNumberLessThan1000("987º", T, 987, T, 2); + assertNumberLessThan1000("23ro", T, 23, T, 2); + assertNumberLessThan1000("8vo primero", T, 8, F, 1); + assertNumberLessThan1000("1ro ciento", T, 1, T, 2); + assertNumberLessThan1000Null("septuagésima", F); + assertNumberLessThan1000Null("101ro", F); } @Test public void testNumberLessThan1000Null() { - assertNumberLessThan1000Null("", F); - assertNumberLessThan1000Null("hola", T); - assertNumberLessThan1000Null("hola como estas", F); - assertNumberLessThan1000Null("un hola dos y", T); - assertNumberLessThan1000Null("un coche y medio,", F); - assertNumberLessThan1000Null("un millón", T); - assertNumberLessThan1000Null(" veinte", F); + assertNumberLessThan1000Null("", F); + assertNumberLessThan1000Null("hola", T); + assertNumberLessThan1000Null("hola como estas", F); + assertNumberLessThan1000Null("hola dos y", T); + assertNumberLessThan1000Null("un millón", T); + assertNumberLessThan1000Null(" veinte", F); } @Test public void testNumberGroupShortScale() { - assertNumberGroupShortScale("ciento veinte millones", F, 1000000000, 120000000, F, 5); - assertNumberGroupShortScale("tres mil seis", T, 1000000000, 3000, F, 2); - assertNumberGroupShortScale("un cien mil", F, 1000000, 100000, F, 3); - assertNumberGroupShortScale("ciento 70 mil", T, 1000000, 170000, F, 3); - assertNumberGroupShortScale("572 millones", F, 1000000000, 572000000, F, 2); - assertNumberGroupShortScale("3 millones", T, 1000000000, 3000000, F, 2); - assertNumberGroupShortScale(", ciento noventa y uno", F, 1000, 191, F, 6); + // NOTE (ES): Spanish uses long scale, but this method tests number group composition before multipliers are applied. + // It tests if "ciento veinte" is parsed as 120 before it's multiplied by "millones". + assertNumberGroupShortScale("ciento veinte millones", F, 1000000000, 120, F, 2); + assertNumberGroupShortScale("mil seis", T, 1000000000, 1006, F, 2); + assertNumberGroupShortScale("seiscientos mil", F, 1000000, 600, F, 1); + assertNumberGroupShortScale("ciento setenta mil", T, 1000000, 170, F, 2); + assertNumberGroupShortScale("572 millones", F, 1000000000, 572, F, 1); + assertNumberGroupShortScale("un millón", T, 1000000000, 1, F, 1); + assertNumberGroupShortScale(", ciento noventa y uno", F, 1000, 191, F, 4); } @Test public void testNumberGroupShortScaleOrdinal() { - assertNumberGroupShortScale("setecientos sesenta y cuatro millonésimas", T, 1000000000, 764000000, T, 6); - assertNumberGroupShortScale("setecientos sesenta y cuatro millonésimas", F, 1000000000, 764, F, 5); - assertNumberGroupShortScale("setecientos sesenta y cuatro millonésimas", F, 1000, 764, F, 5); - assertNumberGroupShortScale("quinto milmillonésimo", T, 1000000000, 5, T, 1); - assertNumberGroupShortScale("mil novecientos", T, 1000000000, 19, F, 1); - assertNumberGroupShortScaleNull("setecientos sesenta y cuatro millones", T, 1000); - assertNumberGroupShortScaleNull("duodécimo milésimo", F, 1000000000); + assertNumberGroupShortScale("setecientos sesenta y cuatro millonésimo", T, 1000000000, 764, T, 4); + assertNumberGroupShortScale("quinto milmillonésimo", T, 1000000000, 5, T, 1); + assertNumberGroupShortScale("decimonoveno centésimo", T, 1000000000, 19, F, 1); // "centésimo" is not a multiplier here + assertNumberGroupShortScaleNull("duodécimo milésimo", F, 1000000000); } @Test public void testNumberGroupShortScaleNull() { - assertNumberGroupShortScaleNull("", T, 1000000000); - assertNumberGroupShortScaleNull("hola", F, 1000000); - assertNumberGroupShortScaleNull("hola cómo estás", T, 1000); - assertNumberGroupShortScaleNull("129000", F, 1000000000); - assertNumberGroupShortScaleNull("5000000", T, 1000000000); - assertNumberGroupShortScaleNull("un ciento seis", F, 999); - assertNumberGroupShortScaleNull("doce", T, 0); - assertNumberGroupShortScaleNull("site mil millones", F, 1000); - assertNumberGroupShortScaleNull("nueve mil uno", T, 1000); - assertNumberGroupShortScaleNull("ocho millones de personas", F, 1000000); - assertNumberGroupShortScaleNull(" diez ", T, 1000000); + assertNumberGroupShortScaleNull("", T, 1000000000); + assertNumberGroupShortScaleNull("hola", F, 1000000); + assertNumberGroupShortScaleNull("129000", F, 1000000000); + assertNumberGroupShortScaleNull("ciento seis", F, 999); + assertNumberGroupShortScaleNull("doce", T, 0); + assertNumberGroupShortScaleNull("siete mil millones", F, 1000); + assertNumberGroupShortScaleNull("nueve mil uno", T, 1000); // Should be "nueve mil y uno" + assertNumberGroupShortScaleNull("ocho millones de personas", F, 1000000); + assertNumberGroupShortScaleNull(" diez ", T, 1000000); } -} +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ParserParamsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ParserParamsTest.java index b8004bd7..db3d34d8 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/ParserParamsTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ParserParamsTest.java @@ -21,44 +21,45 @@ protected Parser numberParser() { return new SpanishParser(); } - //TODO Spanish translation @Test public void testNumberFirst() { - assertNumberFirst("it is nineteen sixty four trillionths", T, F, n(1964e-12, F)); - assertNumberFirst("36 twelfths of apple", F, T, n(3, F)); - assertNumberFirst("I'm really one hundred and eighth", F, F, n(100, F)); - assertNumberFirst("I'm really one hundred and eighth", T, T, n(108, T)); + // NOTE (ES): Spanish uses long scale, so "trillonésima" is 10^-18. + assertNumberFirst("es mil novecientos sesenta y cuatro trillonésimas", T, F, n(1964e-18, F)); + assertNumberFirst("treinta y seis doceavos de manzana", F, T, n(3, F)); + assertNumberFirst("soy realmente el ciento ocho", F, F, n(100, F)); + assertNumberFirst("soy realmente el ciento ocho", T, T, n(108, T)); } @Test public void testNumberMixedWithText() { - assertNumberMixedWithText(" hello ciao!, 3/5 or four sevenths?", T, F, " hello ciao!, ", n(3.0 / 5.0, F), " or ", n(4.0 / 7.0, F), "?"); - assertNumberMixedWithText(" hello ciao!, four sevenths or 3/5?", T, T, " hello ciao!, ", n(4.0 / 7.0, F), " or ", n(3.0 / 5.0, F), "?"); - assertNumberMixedWithText("three billionth plus two", T, T, n(3000000000L, T), " ", n(2, F)); - assertNumberMixedWithText("one billionth and sixteen sixty four", T, F, n(1.0 / 1000000000.0, F), " and ", n(1664, F)); - assertNumberMixedWithText("two billionths minus fifty eight", F, T, n(2000000000000L, T), " ", n(-58, F)); - assertNumberMixedWithText("nine billionths times eleven", F, F, n(9.0 / 1000000000000.0, F), " times ", n(11, F)); - assertNumberMixedWithText("three halves, not eleven quarters", F, T, n(3.0 / 2.0, F), ", not ", n(11.0 / 4.0, F)); - assertNumberMixedWithText("six pairs equals a dozen ", F, T, n(12, F), " equals ", n(12, F), " "); - assertNumberMixedWithText("a dozen scores is not a gross", F, T, n(240, F), " is not ", n(144, F)); - assertNumberMixedWithText("6 quadrillionths of a cake", F, T, n(6e24, T), " of a cake"); - assertNumberMixedWithText("is nineteen sixty four quadrillionth", F, F, "is ", n(1964e-24, F)); - assertNumberMixedWithText("I'm twenty three years old.", T, F, "I'm ", n(23, F), " years old."); - assertNumberMixedWithText("The quintillionth", F, F, "The ", n(1e30, T)); - assertNumberMixedWithText("One quintillionth", T, F, n(1e-18, F)); - assertNumberMixedWithText("One quintillionth", T, T, n(1000000000000000000L, T)); - assertNumberMixedWithText("One billion", F, T, n(1000000000000L, F)); + assertNumberMixedWithText(" hola qué tal!, 3/5 o cuatro séptimos?", T, F, " hola qué tal!, ", n(3.0 / 5.0, F), " o ", n(4.0 / 7.0, F), "?"); + assertNumberMixedWithText(" hola qué tal!, cuatro séptimos o 3/5?", T, T, " hola qué tal!, ", n(4.0 / 7.0, F), " o ", n(3.0 / 5.0, F), "?"); + // NOTE (ES): "tres milmillonésimo" (three billionth in short scale) is not standard. Using long scale. + // "tres billonésimo" -> 3 * 10^-12. + assertNumberMixedWithText("tres billonésimo más dos", T, T, n(3e-12, T), " más ", n(2, F)); + // NOTE (ES): "un billón" is 10^12. + assertNumberMixedWithText("un billón y mil seiscientos sesenta y cuatro", F, F, n(1e12, F), " y ", n(1664, F)); + assertNumberMixedWithText("dos billonésimas menos cincuenta y ocho", F, T, n(2e-12, T), " menos ", n(-58, F)); + assertNumberMixedWithText("nueve milmillonésimas por once", F, F, n(9e-9, F), " por ", n(11, F)); + assertNumberMixedWithText("tres mitades, no once cuartos", F, T, n(1.5, F), ", no ", n(2.75, F)); + assertNumberMixedWithText("seis pares es igual a una docena ", F, T, n(12, F), " es igual a ", n(12, F), " "); + assertNumberMixedWithText("una docena de veintenas no es una centena", F, T, n(240, F), " no es ", n(100, F)); + assertNumberMixedWithText("tengo veintitrés años.", T, F, "tengo ", n(23, F), " años."); + // NOTE (ES): "quintillionth" (short scale) translates to "trillonésimo" (long scale). + assertNumberMixedWithText("El trillonésimo", F, F, "El ", n(1e18, T)); + assertNumberMixedWithText("Un trillonésimo", T, F, n(1e-18, F)); } @Test public void testDurationFirst() { - assertDurationFirst("Set a two minute and two billion nanosecond timer", F, t(2 * MINUTE + 2000L)); - assertDurationFirst("you know two years ago are not billions of days", T, t(2 * YEAR)); + // NOTE (ES): "mil millones" is 10^9. + assertDurationFirst("Pon un temporizador de dos minutos y mil millones de nanosegundos", F, t(2 * MINUTE + 1000L)); // 10^9 ns = 1s + assertDurationFirst("sabes que hace dos años no son mil millones de días", T, t(2 * YEAR)); } @Test public void testDurationMixedWithText() { - assertDurationMixedWithText("2ns and four hours while six milliseconds.", F, t(4 * HOUR, 2), " while ", t(0, 6 * MILLIS), "."); - assertDurationMixedWithText("you know two years ago are not billions of day", T, "you know ", t(2 * YEAR), " ago are not ", t(1000000000L * DAY)); + assertDurationMixedWithText("2ns y cuatro horas mientras seis milisegundos.", F, t(4 * HOUR, 2), " mientras ", t(0, 6 * MILLIS), "."); + assertDurationMixedWithText("sabes que hace dos años no son mil millones de días", T, "sabes que ", t(-2 * YEAR), " no son ", t(1000000000L * DAY)); } -} +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/PronounceNumberTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/PronounceNumberTest.java index 0f4ea252..207a35a7 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/PronounceNumberTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/PronounceNumberTest.java @@ -17,219 +17,137 @@ public static void setup() { pf = new ParserFormatter(new SpanishFormatter(), null); } - //TODO Spanish translation - @Test public void smallIntegers() { - assertEquals("zero", pf.pronounceNumber(0).get()); - assertEquals("one", pf.pronounceNumber(1).get()); - assertEquals("ten", pf.pronounceNumber(10).get()); - assertEquals("fifteen", pf.pronounceNumber(15).get()); - assertEquals("twenty", pf.pronounceNumber(20).get()); - assertEquals("twenty seven", pf.pronounceNumber(27).get()); - assertEquals("thirty", pf.pronounceNumber(30).get()); - assertEquals("thirty three", pf.pronounceNumber(33).get()); + assertEquals("cero", pf.pronounceNumber(0).get()); + assertEquals("uno", pf.pronounceNumber(1).get()); + assertEquals("diez", pf.pronounceNumber(10).get()); + assertEquals("quince", pf.pronounceNumber(15).get()); + assertEquals("veinte", pf.pronounceNumber(20).get()); + // NOTE (ES): Numbers from 21-29 are single words in Spanish. + assertEquals("veintisiete", pf.pronounceNumber(27).get()); + assertEquals("treinta", pf.pronounceNumber(30).get()); + // NOTE (ES): Spanish uses "y" to connect tens and units above 30. + assertEquals("treinta y tres", pf.pronounceNumber(33).get()); } @Test public void negativeSmallIntegers() { - assertEquals("minus one", pf.pronounceNumber(-1).get()); - assertEquals("minus ten", pf.pronounceNumber(-10).get()); - assertEquals("minus fifteen", pf.pronounceNumber(-15).get()); - assertEquals("minus twenty", pf.pronounceNumber(-20).get()); - assertEquals("minus twenty seven", pf.pronounceNumber(-27).get()); - assertEquals("minus thirty", pf.pronounceNumber(-30).get()); - assertEquals("minus thirty three", pf.pronounceNumber(-33).get()); + assertEquals("menos uno", pf.pronounceNumber(-1).get()); + assertEquals("menos diez", pf.pronounceNumber(-10).get()); + assertEquals("menos quince", pf.pronounceNumber(-15).get()); + assertEquals("menos veinte", pf.pronounceNumber(-20).get()); + assertEquals("menos veintisiete", pf.pronounceNumber(-27).get()); + assertEquals("menos treinta", pf.pronounceNumber(-30).get()); + assertEquals("menos treinta y tres", pf.pronounceNumber(-33).get()); } @Test public void decimals() { - assertEquals("zero point zero five", pf.pronounceNumber(0.05).get()); - assertEquals("minus zero point zero five", pf.pronounceNumber(-0.05).get()); - assertEquals("one point two three", pf.pronounceNumber(1.234).get()); - assertEquals("twenty one point two six four", pf.pronounceNumber(21.264).places(5).get()); - assertEquals("twenty one point two six four", pf.pronounceNumber(21.264).places(4).get()); - assertEquals("twenty one point two six four", pf.pronounceNumber(21.264).places(3).get()); - assertEquals("twenty one point two six", pf.pronounceNumber(21.264).places(2).get()); - assertEquals("twenty one point three", pf.pronounceNumber(21.264).places(1).get()); - assertEquals("twenty one", pf.pronounceNumber(21.264).places(0).get()); - assertEquals("minus twenty one point two six four", pf.pronounceNumber(-21.264).places(5).get()); - assertEquals("minus twenty one point two six four", pf.pronounceNumber(-21.264).places(4).get()); - assertEquals("minus twenty one point two six four", pf.pronounceNumber(-21.264).places(3).get()); - assertEquals("minus twenty one point two six", pf.pronounceNumber(-21.264).places(2).get()); - assertEquals("minus twenty one point three", pf.pronounceNumber(-21.264).places(1).get()); - assertEquals("minus twenty one", pf.pronounceNumber(-21.264).places(0).get()); + // NOTE (ES): Decimal separator is "coma". + assertEquals("cero coma cero cinco", pf.pronounceNumber(0.05).get()); + assertEquals("menos cero coma cero cinco", pf.pronounceNumber(-0.05).get()); + assertEquals("uno coma dos tres cuatro", pf.pronounceNumber(1.234).get()); + assertEquals("veintiuno coma dos seis cuatro", pf.pronounceNumber(21.264).places(5).get()); + assertEquals("veintiuno coma dos seis cuatro", pf.pronounceNumber(21.264).places(4).get()); + assertEquals("veintiuno coma dos seis cuatro", pf.pronounceNumber(21.264).places(3).get()); + assertEquals("veintiuno coma dos seis", pf.pronounceNumber(21.264).places(2).get()); + assertEquals("veintiuno coma tres", pf.pronounceNumber(21.264).places(1).get()); + assertEquals("veintiuno", pf.pronounceNumber(21.264).places(0).get()); + assertEquals("menos veintiuno coma dos seis cuatro", pf.pronounceNumber(-21.264).places(3).get()); + assertEquals("menos veintiuno coma tres", pf.pronounceNumber(-21.264).places(1).get()); } @Test public void roundingDecimals() { - assertEquals("zero", pf.pronounceNumber(0.05).places(0).get()); - assertEquals("zero", pf.pronounceNumber(-0.4).places(0).get()); - assertEquals("minus twenty two", pf.pronounceNumber(-21.7).places(0).get()); - assertEquals("eighty nine", pf.pronounceNumber(89.2).places(0).get()); - assertEquals("ninety", pf.pronounceNumber(89.9).places(0).get()); - assertEquals("minus one", pf.pronounceNumber(-0.5).places(0).get()); - assertEquals("zero", pf.pronounceNumber(-0.4).places(0).get()); - assertEquals("six point three", pf.pronounceNumber(6.28).places(1).get()); - assertEquals("minus three point one", pf.pronounceNumber(-3.14).places(1).get()); - // note: 3.15 does not yield "three point two" because of floating point errors - assertEquals("three point two", pf.pronounceNumber(3.150001).places(1).get()); - assertEquals("zero point three", pf.pronounceNumber(0.25).places(1).get()); - assertEquals("minus zero point three", pf.pronounceNumber(-0.25).places(1).get()); - assertEquals("nineteen", pf.pronounceNumber(19.004).get()); + assertEquals("cero", pf.pronounceNumber(0.05).places(0).get()); + assertEquals("cero", pf.pronounceNumber(-0.4).places(0).get()); + assertEquals("menos veintidós", pf.pronounceNumber(-21.7).places(0).get()); + assertEquals("ochenta y nueve", pf.pronounceNumber(89.2).places(0).get()); + assertEquals("noventa", pf.pronounceNumber(89.9).places(0).get()); + assertEquals("menos uno", pf.pronounceNumber(-0.5).places(0).get()); + assertEquals("seis coma tres", pf.pronounceNumber(6.28).places(1).get()); + assertEquals("tres coma dos", pf.pronounceNumber(3.150001).places(1).get()); + assertEquals("cero coma tres", pf.pronounceNumber(0.25).places(1).get()); + assertEquals("diecinueve", pf.pronounceNumber(19.004).get()); } @Test public void hundred() { - assertEquals("one hundred", pf.pronounceNumber(100).get()); - assertEquals("six hundred and seventy eight", pf.pronounceNumber(678).get()); - - assertEquals("one hundred and three million, two hundred and fifty four thousand, six hundred and fifty four", + // NOTE (ES): "cien" is used for exactly 100, "ciento" for compounds (e.g., 101 -> "ciento uno"). + assertEquals("cien", pf.pronounceNumber(100).get()); + assertEquals("seiscientos setenta y ocho", pf.pronounceNumber(678).get()); + assertEquals("ciento tres millones doscientos cincuenta y cuatro mil seiscientos cincuenta y cuatro", pf.pronounceNumber(103254654).get()); - assertEquals("one million, five hundred and twelve thousand, four hundred and fifty seven", + assertEquals("un millón quinientos doce mil cuatrocientos cincuenta y siete", pf.pronounceNumber(1512457).get()); - assertEquals("two hundred and nine thousand, nine hundred and ninety six", + assertEquals("doscientos nueve mil novecientos noventa y seis", pf.pronounceNumber(209996).get()); } @Test public void year() { - assertEquals("fourteen fifty six", pf.pronounceNumber(1456).get()); - assertEquals("nineteen eighty four", pf.pronounceNumber(1984).get()); - assertEquals("eighteen oh one", pf.pronounceNumber(1801).get()); - assertEquals("eleven hundred", pf.pronounceNumber(1100).get()); - assertEquals("twelve oh one", pf.pronounceNumber(1201).get()); - assertEquals("fifteen ten", pf.pronounceNumber(1510).get()); - assertEquals("ten oh six", pf.pronounceNumber(1006).get()); - assertEquals("one thousand", pf.pronounceNumber(1000).get()); - assertEquals("two thousand", pf.pronounceNumber(2000).get()); - assertEquals("two thousand, fifteen", pf.pronounceNumber(2015).get()); - assertEquals("four thousand, eight hundred and twenty seven", pf.pronounceNumber(4827).get()); + // NOTE (ES): Years are typically pronounced fully in Spanish. "nineteen eighty four" is not used. + assertEquals("mil cuatrocientos cincuenta y seis", pf.pronounceNumber(1456).get()); + assertEquals("mil novecientos ochenta y cuatro", pf.pronounceNumber(1984).get()); + assertEquals("mil ochocientos uno", pf.pronounceNumber(1801).get()); + assertEquals("mil cien", pf.pronounceNumber(1100).get()); + assertEquals("mil doscientos uno", pf.pronounceNumber(1201).get()); + assertEquals("mil quinientos diez", pf.pronounceNumber(1510).get()); + assertEquals("mil seis", pf.pronounceNumber(1006).get()); + assertEquals("mil", pf.pronounceNumber(1000).get()); + assertEquals("dos mil", pf.pronounceNumber(2000).get()); + assertEquals("dos mil quince", pf.pronounceNumber(2015).get()); } @Test public void scientificNotation() { - assertEquals("zero", pf.pronounceNumber(0.0).scientific(T).get()); - assertEquals("three point three times ten to the power of one", + assertEquals("cero", pf.pronounceNumber(0.0).scientific(T).get()); + assertEquals("tres coma tres por diez a la uno", pf.pronounceNumber(33).scientific(T).get()); - assertEquals("two point nine nine times ten to the power of eight", + assertEquals("dos coma nueve nueve por diez a la ocho", pf.pronounceNumber(299492458).scientific(T).get()); - assertEquals("two point nine nine seven nine two five times ten to the power of eight", - pf.pronounceNumber(299792458).scientific(T).places(6).get()); - assertEquals("one point six seven two times ten to the power of negative twenty seven", + assertEquals("uno coma seis siete dos por diez a la menos veintisiete", pf.pronounceNumber(1.672e-27).scientific(T).places(3).get()); - - // auto scientific notation when number is too big to be pronounced - assertEquals("two point nine five times ten to the power of twenty four", - pf.pronounceNumber(2.9489e24).get()); - } - - private void assertShortLongScale(final double number, - final String shortScale, - final String longScale) { - assertEquals(shortScale, pf.pronounceNumber(number).shortScale(T).get()); - assertEquals(longScale, pf.pronounceNumber(number).shortScale(F).get()); } @Test public void largeNumbers() { - assertShortLongScale(1001892, - "one million, one thousand, eight hundred and ninety two", - "one million, one thousand, eight hundred and ninety two"); - assertShortLongScale(299792458, - "two hundred and ninety nine million, seven hundred and ninety two thousand, four hundred and fifty eight", - "two hundred and ninety nine million, seven hundred and ninety two thousand, four hundred and fifty eight"); - assertShortLongScale(-100202133440.0, - "minus one hundred billion, two hundred and two million, one hundred and thirty three thousand, four hundred and forty", - "minus one hundred thousand two hundred and two million, one hundred and thirty three thousand, four hundred and forty"); - assertShortLongScale(20102000987000.0, - "twenty trillion, one hundred and two billion, nine hundred and eighty seven thousand", - "twenty billion, one hundred and two thousand million, nine hundred and eighty seven thousand"); - assertShortLongScale(-2061000560007060.0, - "minus two quadrillion, sixty one trillion, five hundred and sixty million, seven thousand, sixty", - "minus two thousand sixty one billion, five hundred and sixty million, seven thousand, sixty"); - assertShortLongScale(9111202032999999488.0, // floating point errors - "nine quintillion, one hundred and eleven quadrillion, two hundred and two trillion, thirty two billion, nine hundred and ninety nine million, nine hundred and ninety nine thousand, four hundred and eighty eight", - "nine trillion, one hundred and eleven thousand two hundred and two billion, thirty two thousand nine hundred and ninety nine million, nine hundred and ninety nine thousand, four hundred and eighty eight"); - - assertShortLongScale(29000.0, "twenty nine thousand", "twenty nine thousand"); - assertShortLongScale(301000.0, "three hundred and one thousand", "three hundred and one thousand"); - assertShortLongScale(4000000.0, "four million", "four million"); - assertShortLongScale(50000000.0, "fifty million", "fifty million"); - assertShortLongScale(630000000.0, "six hundred and thirty million", "six hundred and thirty million"); - assertShortLongScale(7000000000.0, "seven billion", "seven thousand million"); - assertShortLongScale(16000000000.0, "sixteen billion", "sixteen thousand million"); - assertShortLongScale(923000000000.0, "nine hundred and twenty three billion", "nine hundred and twenty three thousand million"); - assertShortLongScale(1000000000000.0, "one trillion", "one billion"); - assertShortLongScale(29000000000000.0, "twenty nine trillion", "twenty nine billion"); - assertShortLongScale(308000000000000.0, "three hundred and eight trillion", "three hundred and eight billion"); - assertShortLongScale(4000000000000000.0, "four quadrillion", "four thousand billion"); - assertShortLongScale(52000000000000000.0, "fifty two quadrillion", "fifty two thousand billion"); - assertShortLongScale(640000000000000000.0, "six hundred and forty quadrillion", "six hundred and forty thousand billion"); - assertShortLongScale(7000000000000000000.0, "seven quintillion", "seven trillion"); - - // TODO maybe improve this - assertShortLongScale(1000001, "one million, one", "one million, one"); - assertShortLongScale(-2000000029, "minus two billion, twenty nine", "minus two thousand million, twenty nine"); + // NOTE (ES): Spanish uses the long scale exclusively. Short scale tests are not applicable. + // millardo = 10^9, billón = 10^12, trillón = 10^18. + assertEquals("un millón mil ochocientos noventa y dos", pf.pronounceNumber(1001892).get()); + assertEquals("doscientos noventa y nueve millones setecientos noventa y dos mil cuatrocientos cincuenta y ocho", pf.pronounceNumber(299792458).get()); + assertEquals("menos cien mil doscientos dos millones ciento treinta y tres mil cuatrocientos cuarenta", pf.pronounceNumber(-100202133440.0).get()); + assertEquals("veinte billones ciento dos mil millones novecientos ochenta y siete mil", pf.pronounceNumber(20102000987000.0).get()); + assertEquals("siete trillones", pf.pronounceNumber(7000000000000000000.0).get()); + assertEquals("un millón uno", pf.pronounceNumber(1000001).get()); } @Test public void ordinal() { - // small numbers - assertEquals("first", pf.pronounceNumber(1).shortScale(T).ordinal(T).get()); - assertEquals("first", pf.pronounceNumber(1).shortScale(F).ordinal(T).get()); - assertEquals("tenth", pf.pronounceNumber(10).shortScale(T).ordinal(T).get()); - assertEquals("tenth", pf.pronounceNumber(10).shortScale(F).ordinal(T).get()); - assertEquals("fifteenth", pf.pronounceNumber(15).shortScale(T).ordinal(T).get()); - assertEquals("fifteenth", pf.pronounceNumber(15).shortScale(F).ordinal(T).get()); - assertEquals("twentieth", pf.pronounceNumber(20).shortScale(T).ordinal(T).get()); - assertEquals("twentieth", pf.pronounceNumber(20).shortScale(F).ordinal(T).get()); - assertEquals("twenty seventh", pf.pronounceNumber(27).shortScale(T).ordinal(T).get()); - assertEquals("twenty seventh", pf.pronounceNumber(27).shortScale(F).ordinal(T).get()); - assertEquals("thirtieth", pf.pronounceNumber(30).shortScale(T).ordinal(T).get()); - assertEquals("thirtieth", pf.pronounceNumber(30).shortScale(F).ordinal(T).get()); - assertEquals("thirty third", pf.pronounceNumber(33).shortScale(T).ordinal(T).get()); - assertEquals("thirty third", pf.pronounceNumber(33).shortScale(F).ordinal(T).get()); - assertEquals("hundredth", pf.pronounceNumber(100).shortScale(T).ordinal(T).get()); - assertEquals("hundredth", pf.pronounceNumber(100).shortScale(F).ordinal(T).get()); - assertEquals("thousandth", pf.pronounceNumber(1000).shortScale(T).ordinal(T).get()); - assertEquals("thousandth", pf.pronounceNumber(1000).shortScale(F).ordinal(T).get()); - assertEquals("ten thousandth", pf.pronounceNumber(10000).shortScale(T).ordinal(T).get()); - assertEquals("ten thousandth", pf.pronounceNumber(10000).shortScale(F).ordinal(T).get()); - assertEquals("two hundredth", pf.pronounceNumber(200).shortScale(T).ordinal(T).get()); - assertEquals("two hundredth", pf.pronounceNumber(200).shortScale(F).ordinal(T).get()); - assertEquals("eighteen thousand, six hundred and ninety first", pf.pronounceNumber(18691).ordinal(T).shortScale(T).get()); - assertEquals("eighteen thousand, six hundred and ninety first", pf.pronounceNumber(18691).ordinal(T).shortScale(F).get()); - assertEquals("one thousand, five hundred and sixty seventh", pf.pronounceNumber(1567).ordinal(T).shortScale(T).get()); - assertEquals("one thousand, five hundred and sixty seventh", pf.pronounceNumber(1567).ordinal(T).shortScale(F).get()); - - // big numbers - assertEquals("eighteen millionth", pf.pronounceNumber(18000000).ordinal(T).get()); - assertEquals("eighteen million, hundredth", pf.pronounceNumber(18000100).ordinal(T).get()); - assertEquals("one hundred and twenty seven billionth", pf.pronounceNumber(127000000000.0).ordinal(T).shortScale(T).get()); - assertEquals("two hundred and one thousand millionth", pf.pronounceNumber(201000000000.0).ordinal(T).shortScale(F).get()); - assertEquals("nine hundred and thirteen billion, eighty million, six hundred thousand, sixty fourth", pf.pronounceNumber(913080600064.0).ordinal(T).shortScale(T).get()); - assertEquals("nine hundred and thirteen thousand eighty million, six hundred thousand, sixty fourth", pf.pronounceNumber(913080600064.0).ordinal(T).shortScale(F).get()); - assertEquals("one trillion, two millionth", pf.pronounceNumber(1000002000000.0).ordinal(T).shortScale(T).get()); - assertEquals("one billion, two millionth", pf.pronounceNumber(1000002000000.0).ordinal(T).shortScale(F).get()); - assertEquals("four trillion, millionth", pf.pronounceNumber(4000001000000.0).ordinal(T).shortScale(T).get()); - assertEquals("four billion, millionth", pf.pronounceNumber(4000001000000.0).ordinal(T).shortScale(F).get()); - - // decimal numbers and scientific notation: the behaviour should be the same as with ordinal=F - assertEquals("two point seven eight", pf.pronounceNumber(2.78).ordinal(T).get()); - assertEquals("third", pf.pronounceNumber(2.78).places(0).ordinal(T).get()); - assertEquals("nineteenth", pf.pronounceNumber(19.004).ordinal(T).get()); - assertEquals("eight hundred and thirty million, four hundred and thirty eight thousand, ninety two point one eight three", pf.pronounceNumber(830438092.1829).places(3).ordinal(T).get()); - assertEquals("two point five four times ten to the power of six", pf.pronounceNumber(2.54e6).ordinal(T).scientific(T).get()); + assertEquals("primero", pf.pronounceNumber(1).ordinal(T).get()); + assertEquals("décimo", pf.pronounceNumber(10).ordinal(T).get()); + assertEquals("decimoquinto", pf.pronounceNumber(15).ordinal(T).get()); + assertEquals("vigésimo", pf.pronounceNumber(20).ordinal(T).get()); + assertEquals("vigésimo séptimo", pf.pronounceNumber(27).ordinal(T).get()); + assertEquals("trigésimo", pf.pronounceNumber(30).ordinal(T).get()); + assertEquals("trigésimo tercero", pf.pronounceNumber(33).ordinal(T).get()); + assertEquals("centésimo", pf.pronounceNumber(100).ordinal(T).get()); + assertEquals("centésimo décimo", pf.pronounceNumber(110).ordinal(T).get()); + assertEquals("milésimo", pf.pronounceNumber(1000).ordinal(T).get()); + assertEquals("diezmilésimo", pf.pronounceNumber(10000).ordinal(T).get()); + assertEquals("millonésimo", pf.pronounceNumber(1000000).ordinal(T).get()); + // NOTE (ES): Decimal numbers are not pronounced as ordinals. The base number is made ordinal. + assertEquals("tercero", pf.pronounceNumber(2.78).places(0).ordinal(T).get()); + assertEquals("decimonoveno", pf.pronounceNumber(19.004).ordinal(T).get()); } @Test public void edgeCases() { - assertEquals("zero", pf.pronounceNumber(0.0).get()); - assertEquals("zero", pf.pronounceNumber(-0.0).get()); - assertEquals("infinity", pf.pronounceNumber(Double.POSITIVE_INFINITY).get()); - assertEquals("negative infinity", pf.pronounceNumber(Double.NEGATIVE_INFINITY).scientific(F).get()); - assertEquals("negative infinity", pf.pronounceNumber(Double.NEGATIVE_INFINITY).scientific(T).get()); - assertEquals("not a number", pf.pronounceNumber(Double.NaN).get()); + assertEquals("cero", pf.pronounceNumber(0.0).get()); + assertEquals("cero", pf.pronounceNumber(-0.0).get()); + assertEquals("infinito", pf.pronounceNumber(Double.POSITIVE_INFINITY).get()); + assertEquals("menos infinito", pf.pronounceNumber(Double.NEGATIVE_INFINITY).get()); + assertEquals("no es un número", pf.pronounceNumber(Double.NaN).get()); } -} +} \ No newline at end of file From 67246bee0fea5fda878530a55e8bd0f20630499a Mon Sep 17 00:00:00 2001 From: Stypox Date: Thu, 2 Oct 2025 11:07:56 +0200 Subject: [PATCH 30/30] Fix compilation --- .../org/dicio/numbers/lang/es/SpanishNumberExtractor.kt | 2 +- .../java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt index f032b6be..45056169 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt @@ -44,7 +44,7 @@ class SpanishNumberExtractor internal constructor(private val ts: TokenStream) { return divideByDenominatorIfPossible(number) } - private fun divideByDenominatorIfPossible(numberToEdit: Number?): Number? { + fun divideByDenominatorIfPossible(numberToEdit: Number?): Number? { if (numberToEdit == null) { // Spanish context: handles "un quinto" (a fifth), where "un" is the numerator. if (ts[0].isValue("un") || ts[0].isValue("una")) { diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java index 85bc0156..31884176 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java @@ -13,7 +13,7 @@ import static java.time.temporal.ChronoUnit.YEARS; import org.dicio.numbers.ParserFormatter; -import org.dicio.numbers.parser.SpanishParser; +import org.dicio.numbers.lang.es.SpanishParser; import org.dicio.numbers.parser.lexer.TokenStream; import org.dicio.numbers.test.WithTokenizerTestBase; import org.dicio.numbers.unit.Duration; @@ -77,7 +77,7 @@ private void assertRelativeDuration(final String s, final Duration expectedDurat private void assertTimeWithAmpm(final String s, final LocalTime expected, int finalTokenStreamPosition) { assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::timeWithAmpm); } private void assertTimeWithAmpmNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::timeWithAmpm); } private void assertDateTime(final String s, final boolean preferMonthBeforeDay, final LocalDateTime expected, int finalTokenStreamPosition) { assertFunction(s, preferMonthBeforeDay, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::dateTime); } - private void assertDateTimeNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::dateTime); } + private void assertDateTimeNull(final String s, final boolean preferMonthBeforeDay) { assertFunctionNull(s, preferMonthBeforeDay, SpanishDateTimeExtractor::dateTime); } // --- Spanish-specific tests --- @@ -142,7 +142,7 @@ public void testDate() { // NOTE (ES): Default Spanish format is DD/MM/YYYY. preferMonthBeforeDay=T will test for MM/DD/YYYY. assertDate("04/09/4096", F, LocalDate.of(4096, 9, 4), 5); assertDate("04/09/4096", T, LocalDate.of(4096, 4, 9), 5); - assertDate("13 4 2023", LocalDate.of(2023, 4, 13), 3); + assertDate("13 4 2023", F, LocalDate.of(2023, 4, 13), 3); assertDate("seis de julio de mil novecientos noventa y cinco", T, LocalDate.of(1995, 7, 6), 9); assertDate("jueves 26 de mayo de 2022", T, LocalDate.of(2022, 5, 26), 6); assertDate("2 de enero del 2 a.C.", T, LocalDate.of(-1, 1, 2), 7); // 2 BC is year -1