diff --git a/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt b/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt index 4f0628f7..0af8f938 100644 --- a/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt +++ b/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt @@ -3,6 +3,8 @@ package org.dicio.numbers import org.dicio.numbers.formatter.Formatter import org.dicio.numbers.lang.en.EnglishFormatter import org.dicio.numbers.lang.en.EnglishParser +import org.dicio.numbers.lang.es.SpanishFormatter +import org.dicio.numbers.lang.es.SpanishParser import org.dicio.numbers.lang.it.ItalianFormatter import org.dicio.numbers.lang.it.ItalianParser import org.dicio.numbers.parser.Parser @@ -12,6 +14,7 @@ object ParserFormatterBuilder { private val PARSER_FORMATTER_CLASSES_MAP = mapOf( "en" to ParserFormatterClasses(EnglishFormatter::class.java, EnglishParser::class.java), "it" to ParserFormatterClasses(ItalianFormatter::class.java, ItalianParser::class.java), + "es" to ParserFormatterClasses(SpanishFormatter::class.java, SpanishParser::class.java), ) @JvmStatic diff --git a/numbers/src/main/java/org/dicio/numbers/lang/en/EnglishNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/en/EnglishNumberExtractor.kt index 6597f0bd..8e4ba271 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/en/EnglishNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/en/EnglishNumberExtractor.kt @@ -3,6 +3,7 @@ package org.dicio.numbers.lang.en import org.dicio.numbers.parser.lexer.NumberToken import org.dicio.numbers.parser.lexer.TokenStream import org.dicio.numbers.unit.Number +import org.dicio.numbers.unit.isNullOrZero import org.dicio.numbers.util.NumberExtractorUtils class EnglishNumberExtractor internal constructor( @@ -187,9 +188,7 @@ class EnglishNumberExtractor internal constructor( } val denominator = numberInteger(false) - if (denominator == null || (denominator.isInteger && denominator.integerValue() == 0L) - || (denominator.isDecimal && denominator.decimalValue() == 0.0) - ) { + if (denominator.isNullOrZero()) { ts.position = originalPosition // not a fraction or division by zero, reset } else { return n.divide(denominator) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt new file mode 100644 index 00000000..b17f6452 --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishDateTimeExtractor.kt @@ -0,0 +1,334 @@ +package org.dicio.numbers.lang.es + +import org.dicio.numbers.parser.lexer.TokenStream +import org.dicio.numbers.unit.Duration +import org.dicio.numbers.util.DateTimeExtractorUtils +import org.dicio.numbers.util.DurationExtractorUtils +import org.dicio.numbers.util.NumberExtractorUtils +import org.dicio.numbers.util.Utils +import java.time.LocalDate +import java.time.LocalDateTime +import java.time.LocalTime +import java.time.temporal.ChronoUnit + +class SpanishDateTimeExtractor internal constructor( + private val ts: TokenStream, + private val preferMonthBeforeDay: Boolean, // Added parameter to match English functionality + private val now: LocalDateTime +) { + private val numberExtractor = SpanishNumberExtractor(ts) + private val durationExtractor = DurationExtractorUtils(ts, numberExtractor::numberNoOrdinal) + private val dateTimeExtractor = DateTimeExtractorUtils(ts, now, this::extractIntegerInRange) + + private fun extractIntegerInRange(fromInclusive: Int, toInclusive: Int, allowOrdinal: Boolean = false): Int? { + // disallow fraction as / should be treated as a day/month/year separator + return NumberExtractorUtils.extractOneIntegerInRange( + ts, fromInclusive, toInclusive + ) { NumberExtractorUtils.signBeforeNumber(ts) { numberExtractor.numberInteger(allowOrdinal) } } + } + + + fun dateTime(): LocalDateTime? { + // first try preferring having a date first, then try with time first + return ts.firstWhichUsesMostTokens({ dateTime(false) }, { dateTime(true) }) + } + + private fun dateTime(timeFirst: Boolean): LocalDateTime? { + var date: LocalDate? = null + var time: LocalTime? = null + + if (!timeFirst) { + // first try with special days, since duration-related words might be used + date = relativeSpecialDay() + + if (date == null) { + // then try with duration, since otherwise numbers would be interpreted as date days + val duration = Utils.firstNotNull(this::relativeDuration, dateTimeExtractor::relativeMonthDuration) + if (duration == null) { + // no normal relative duration found: finally try extracting a date normally + date = date() + } else if (duration.nanos == 0L && duration.days != 0L) { + // duration contains a specified day and no specified time, so a time can follow + date = duration.applyAsOffsetToDateTime(now).toLocalDate() + } else if (duration.nanos != 0L && duration.days == 0L && duration.months == 0L && duration.years == 0L) { + // duration contains a specified time, so a date could follow + time = duration.applyAsOffsetToDateTime(now).toLocalTime() + } else { + // duration contains mixed date&time, or has units >=month, nothing can follow + return duration.applyAsOffsetToDateTime(now) + } + } + } + + if (time == null) { + time = ts.tryOrSkipDateTimeIgnore(date != null) { this.timeWithAmpm() } + } + + if (date == null && time != null) { + // try to extract a date after the time + val originalPosition = ts.position + val duration = ts.tryOrSkipDateTimeIgnore(true) { this.relativeDuration() } + if (duration == null) { + date = ts.tryOrSkipDateTimeIgnore(true) { Utils.firstNotNull(this::relativeSpecialDay, this::date) } + } else if (duration.nanos == 0L && duration.days != 0L) { + date = duration.applyAsOffsetToDateTime(now).toLocalDate() + } else { + ts.position = originalPosition + } + } + + return if (date == null) { + time?.atDate(now.toLocalDate()) + } else { + time?.let { date.atTime(it) } ?: date.atTime(now.toLocalTime()) + } + } + + fun timeWithAmpm(): LocalTime? { + var time = time() + val pm: Boolean? + if (time == null) { + // if there is no time, maybe there is a moment of day (not am/pm though) preceding? + val momentOfDay = momentOfDay() ?: return null + time = ts.tryOrSkipDateTimeIgnore(true) { this.time() } + if (time == null) { + // found moment of day without a specific time + return LocalTime.of(momentOfDay, 0) + } else { + // use moment of day before time to determine am/pm + pm = DateTimeExtractorUtils.isMomentOfDayPm(momentOfDay) + } + } else { + // found a time, now look for am/pm or a moment of day + pm = ts.tryOrSkipDateTimeIgnore(true) { + Utils.firstNotNull( + dateTimeExtractor::ampm, + { momentOfDay()?.let(DateTimeExtractorUtils::isMomentOfDayPm) } + ) + } + } + + if (time.hour != 0 && pm != null) { + // AM/PM should not do anything after 0 (e.g. 0pm) + if (!pm && time.hour == 12) { + // Spanish context: 12 am is midnight + time = time.withHour(0) + } else if (pm && !DateTimeExtractorUtils.isMomentOfDayPm(time.hour)) { + // time must be in the afternoon, but time is not already, correct it + time = time.withHour((time.hour + 12) % DateTimeExtractorUtils.HOURS_IN_DAY) + } + } + return time + } + + fun time(): LocalTime? { + val originalPosition = ts.position + val specialMinute = specialMinute() // e.g., "y cuarto", "menos cuarto" + + val hour = Utils.firstNotNull(this::noonMidnightLike, this::hour) + if (hour == null) { + ts.position = originalPosition + return null + } + + if (specialMinute != null) { + // Logic for phrases like "seis menos cuarto" (quarter to six) + return if (specialMinute < 0) { + LocalTime.of((hour + 23) % 24, 60 + specialMinute) + } else { + // Logic for "seis y cuarto" (quarter past six) or "seis y media" (half past six) + LocalTime.of(hour, specialMinute) + } + } + + var result = LocalTime.of(hour, 0) + + // Handle "en punto" (o'clock) + if (ts[0].isValue("en") && ts[1].isValue("punto")) { + ts.movePositionForwardBy(2) + return result + } + + val minute = ts.tryOrSkipDateTimeIgnore(true) { dateTimeExtractor.minute() } + if (minute == null) return result + + result = result.withMinute(minute) + val second = ts.tryOrSkipDateTimeIgnore(true) { dateTimeExtractor.second() } + + return second?.let { result.withSecond(it) } ?: result + } + + fun date(): LocalDate? { + var result = now.toLocalDate() + + val dayOfWeek = dateTimeExtractor.dayOfWeek() + val firstNum = ts.tryOrSkipDateTimeIgnore(dayOfWeek != null) { extractIntegerInRange(1, 31, true) } + + if (firstNum == null && dayOfWeek != null) { + // e.g. "próximo martes" (next Tuesday) + // TODO maybe enforce the date to be in the future? + return result.plus((dayOfWeek - result.dayOfWeek.ordinal).toLong(), ChronoUnit.DAYS) + } + + val monthName = ts.tryOrSkipDateTimeIgnore(firstNum != null) { dateTimeExtractor.monthName() } + if (monthName == null) { + // Date format is likely number-based, e.g., 25/12/2023 + result = if (firstNum == null) { + result.withDayOfMonth(1).withMonth(1) + } else { + val secondNumMax = if (firstNum <= 12) 31 else 12 + val secondNum = ts.tryOrSkipDateTimeIgnore(true) { extractIntegerInRange(1, secondNumMax, true) } + if (secondNum == null) { + return if (preferMonthBeforeDay && firstNum <= 12) { + result.withDayOfMonth(1).withMonth(firstNum) + } else { + result.withDayOfMonth(firstNum) + } + } else { + // Spanish standard is day-first (DD/MM), but we respect preferMonthBeforeDay + if ((preferMonthBeforeDay || secondNum > 12) && firstNum <= 12) { + result.withDayOfMonth(secondNum).withMonth(firstNum) + } else { + result.withDayOfMonth(firstNum).withMonth(secondNum) + } + } + } + } else { + // Date format includes a month name, e.g., "diciembre 25" + result = result.withMonth(monthName) + val dayNum = firstNum ?: ts.tryOrSkipDateTimeIgnore(true) { extractIntegerInRange(1, 31, true) } + result = dayNum?.let { result.withDayOfMonth(it) } ?: result.withDayOfMonth(1) + } + val dayOrMonthFound = firstNum != null || monthName != null + + var bcad = ts.tryOrSkipDateTimeIgnore(dayOrMonthFound) { dateTimeExtractor.bcad() } + val year = ts.tryOrSkipDateTimeIgnore(dayOrMonthFound && bcad == null) { extractIntegerInRange(0, 999999999) } + + if (year == null) { + return if (dayOrMonthFound) result else null + } + + if (bcad == null) { + bcad = dateTimeExtractor.bcad() + } + // Spanish linguistics: "a.C." (antes de Cristo) means Before Christ. + return result.withYear(year * (if (bcad == null || bcad) 1 else -1)) + } + + fun dayOfWeek(): Int? { + // Spanish context: "mar" is ambiguous for "martes" (Tuesday) and "marzo" (March). + // This logic is correct and mirrors the Italian version's ambiguity. + if (ts[0].isValue("mar")) { + ts.movePositionForwardBy(1) + return 1 // Tuesday + } else { + return dateTimeExtractor.dayOfWeek() + } + } + + fun specialMinute(): Int? { + // Spanish context: handles "y cuarto" (15), "y media" (30), "menos cuarto" (-15). + val originalPosition = ts.position + + val isMinus = ts[0].isValue("menos") + val isPlus = ts[0].isValue("y") || ts[0].isValue("con") + + // Look for 'cuarto' or 'media' + val keywordIndex = if (isMinus || isPlus) 1 else 0 + if (ts[keywordIndex].isValue("cuarto")) { + ts.movePositionForwardBy(keywordIndex + 1) + return if (isMinus) -15 else 15 + } + if (ts[keywordIndex].isValue("media")) { + ts.movePositionForwardBy(keywordIndex + 1) + return if (isMinus) -30 else 30 // "menos media" is unusual for non-native speakers but possible + } + + ts.position = originalPosition + return null + } + + fun noonMidnightLike(): Int? = noonMidnightLikeOrMomentOfDay("noon_midnight_like") + + fun momentOfDay(): Int? = noonMidnightLikeOrMomentOfDay("moment_of_day") + + private fun noonMidnightLikeOrMomentOfDay(category: String): Int? { + val originalPosition = ts.position + var relativeIndicator = 0 // 0 = not found, otherwise the sign, +1 or -1 + if (ts[0].hasCategory("pre_special_hour")) { + if (ts[0].hasCategory("pre_relative_indicator")) { + relativeIndicator = if (ts[0].hasCategory("negative")) -1 else 1 + ts.movePositionForwardBy(ts.indexOfWithoutCategory("date_time_ignore", 1)) + } else { + ts.movePositionForwardBy(1) + } + } + + if (ts[0].hasCategory(category)) { + // e.g. mediodía, tarde, noche + ts.movePositionForwardBy(1) + return (ts[-1].number!!.integerValue().toInt() + DateTimeExtractorUtils.HOURS_IN_DAY + relativeIndicator) % DateTimeExtractorUtils.HOURS_IN_DAY + } + + ts.position = originalPosition + return null + } + + fun hour(): Int? { + val originalPosition = ts.position + // skip words that usually come before hours, e.g. "a las", "hora" + ts.movePositionForwardBy(ts.indexOfWithoutCategory("pre_hour", 0)) + + val number = extractIntegerInRange(0, DateTimeExtractorUtils.HOURS_IN_DAY) + if (number == null) { + ts.position = originalPosition + return null + } + return number % DateTimeExtractorUtils.HOURS_IN_DAY // transform 24 into 0 + } + + private fun relativeSpecialDay(): LocalDate? { + val days = Utils.firstNotNull( + this::relativeYesterday, + dateTimeExtractor::relativeToday, + this::relativeTomorrow, + dateTimeExtractor::relativeDayOfWeekDuration + ) + return days?.let { now.toLocalDate().plusDays(it.toLong()) } + } + + fun relativeYesterday(): Int? { + // Spanish context: "anteayer" is a single word for "day before yesterday". + // The complex multi-word logic from English/Italian is not needed. + if (ts[0].hasCategory("day_before_yesterday")) { + ts.movePositionForwardBy(1) + return -2 + } + if (ts[0].hasCategory("yesterday")) { + ts.movePositionForwardBy(1) + return -1 + } + return null + } + + fun relativeTomorrow(): Int? { + // Spanish context: "pasado mañana" is a single token for "day after tomorrow". + if (ts[0].hasCategory("day_after_tomorrow")) { + ts.movePositionForwardBy(1) + return 2 + } + if (ts[0].hasCategory("tomorrow")) { + ts.movePositionForwardBy(1) + return 1 + } + return null + } + + fun relativeDuration(): Duration? { + // Spanish context: Handles "hace [duration]" (ago) and "[duration] después" (later). + return dateTimeExtractor.relativeIndicatorDuration( + durationExtractor::duration, + { duration -> duration.multiply(-1) } + ) + } +} \ No newline at end of file diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt new file mode 100644 index 00000000..da40d1f3 --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishFormatter.kt @@ -0,0 +1,195 @@ +package org.dicio.numbers.lang.es + +import org.dicio.numbers.formatter.Formatter +import org.dicio.numbers.unit.MixedFraction +import org.dicio.numbers.util.Utils +import java.time.LocalTime +import java.time.format.DateTimeFormatter +import java.util.Locale +import kotlin.math.abs + +class SpanishFormatter : Formatter("config/es-es") { + + override fun niceNumber(mixedFraction: MixedFraction, speech: Boolean): String { + if (speech) { + val sign = if (mixedFraction.negative) "menos " else "" + if (mixedFraction.numerator == 0) { + return sign + pronouncePositive(mixedFraction.whole, false) + } + + // Spanish context: some fractions are gendered. "un medio" (a half) vs "una media tarta" (a half cake) + val isFemale = mixedFraction.whole == 0L + val denominatorString = when (mixedFraction.denominator) { + 2 -> if (isFemale) "media" else "medio" + else -> { + // Ordinals are used for other denominators, e.g., "tercio", "cuarto", "quinto" + val ordinal = pronouncePositive(mixedFraction.denominator.toLong(), true) + if (mixedFraction.numerator > 1) { + // pluralize, e.g. "quinto" -> "quintos" + if (ordinal.endsWith("o")) ordinal.dropLast(1) + "os" else ordinal + "s" + } else { + if (ordinal.endsWith("o")) ordinal.dropLast(1) + "o" else ordinal // e.g. tercio, not tercer + } + } + } + + val numeratorString = if (mixedFraction.numerator == 1) { + if (isFemale) "una" else "un" + } else { + pronouncePositive(mixedFraction.numerator.toLong(), false) + } + + return if (mixedFraction.whole == 0L) { + "$sign$numeratorString $denominatorString" + } else { + (sign + pronouncePositive(mixedFraction.whole, false) + + " y " + numeratorString + " " + denominatorString) + } + } else { + return niceNumberNotSpeech(mixedFraction) + } + } + + override fun pronounceNumber(number: Double, places: Int, shortScale: Boolean, scientific: Boolean, ordinal: Boolean): String { + if (number == Double.POSITIVE_INFINITY) return "infinito" + if (number == Double.NEGATIVE_INFINITY) return "menos infinito" + if (java.lang.Double.isNaN(number)) return "no es un número" + + if (scientific || abs(number) > 999999999999999934463.0) { + val scientificFormatted = String.format(Locale("es"), "%E", number) + val parts = scientificFormatted.split("E".toRegex(), limit = 2).toTypedArray() + val power = parts[1].toInt().toDouble() + if (power != 0.0) { + val n = parts[0].toDouble() + return String.format( + "%s por diez a la %s", + pronounceNumber(n, places, shortScale, false, false), + pronounceNumber(power, places, shortScale, false, true) // exponent as ordinal + ) + } + } + + val result = StringBuilder() + var varNumber = number + if (varNumber < 0) { + varNumber = -varNumber + if (places != 0 || varNumber >= 0.5) result.append("menos ") + } + + val realPlaces = Utils.decimalPlacesNoFinalZeros(varNumber, places) + val numberIsWhole = realPlaces == 0 + val numberLong = varNumber.toLong() + (if (varNumber % 1 >= 0.5 && numberIsWhole) 1 else 0) + + result.append(pronouncePositive(numberLong, ordinal && numberIsWhole)) + + if (realPlaces > 0) { + if (result.toString() == "menos " || result.isEmpty()) result.append("cero") + // Spanish context: "coma" is the standard decimal separator + result.append(" coma") + val fractionalPart = String.format("%." + realPlaces + "f", varNumber % 1) + for (i in 2 until fractionalPart.length) { + result.append(" ") + result.append(NUMBER_NAMES[(fractionalPart[i].code - '0'.code).toLong()]) + } + } + return result.toString() + } + + private fun pronouncePositive(n: Long, ordinal: Boolean): String { + if (ordinal) { + ORDINAL_NAMES[n]?.let { return it } + // Logic to build compound ordinals can be added here if needed + } else { + NUMBER_NAMES[n]?.let { return it } + } + + return when { + n >= 1_000_000_000_000 -> buildString { val base = if (n / 1_000_000_000_000 == 1L) "" else pronouncePositive(n / 1_000_000_000_000, false) + " "; append(base); append("billones"); val rem = n % 1_000_000_000_000; if (rem > 0) append(" ").append(pronouncePositive(rem, false)) } + n >= 1_000_000 -> buildString { val base = if (n / 1_000_000 == 1L) "un" else pronouncePositive(n / 1_000_000, false); append(base); append(" millones"); val rem = n % 1_000_000; if (rem > 0) append(" ").append(pronouncePositive(rem, false)) } + n >= 1000 -> buildString { if (n / 1000 > 1) append(pronouncePositive(n / 1000, false)); append(" mil"); val rem = n % 1000; if (rem > 0) append(" ").append(pronouncePositive(rem, false)) } + n >= 100 -> buildString { append(HUNDRED_NAMES[n / 100 * 100]); val rem = n % 100; if (rem > 0) append(" ").append(pronouncePositive(rem, false)) } + n >= 30 -> buildString { append(NUMBER_NAMES[n / 10 * 10]); val rem = n % 10; if (rem > 0) append(" y ").append(pronouncePositive(rem, false)) } + else -> "" // Should be unreachable given the initial checks + } + } + + override fun niceTime(time: LocalTime, speech: Boolean, use24Hour: Boolean, showAmPm: Boolean): String { + if (speech) { + if (time.hour == 0 && time.minute == 0) return "medianoche" + if (time.hour == 12 && time.minute == 0) return "mediodía" + + val result = StringBuilder() + // Spanish context: hours use 1-12 cycle for speech, not 0-23. + val hourForSpeech = if (use24Hour) time.hour else (if (time.hour % 12 == 0) 12 else time.hour % 12) + + if (time.minute == 45 && !use24Hour) { + // Spanish context: "menos cuarto" refers to the next hour. "Son las dos menos cuarto" is 1:45. + val nextHour = (hourForSpeech % 12) + 1 + result.append(getHourName(nextHour, true)).append(" menos cuarto") + } else { + result.append(getHourName(hourForSpeech, false)) + when (time.minute) { + 0 -> result.append(" en punto") + 15 -> result.append(" y cuarto") + 30 -> result.append(" y media") + else -> result.append(" y ").append(pronouncePositive(time.minute.toLong(), false)) + } + } + + if (showAmPm && !use24Hour) { + when { + time.hour < 6 -> result.append(" de la madrugada") + time.hour < 12 -> result.append(" de la mañana") + time.hour < 20 -> result.append(" de la tarde") + else -> result.append(" de la noche") + } + } + return result.toString() + } else { + val pattern = if (use24Hour) "HH:mm" else if (showAmPm) "h:mm a" else "h:mm" + return time.format(DateTimeFormatter.ofPattern(pattern, Locale("es", "ES"))) + } + } + + private fun getHourName(hour: Int, isForNextHour: Boolean): String { + // Spanish context: "la una" (one o'clock) is feminine singular. + // All other hours are feminine plural: "las dos", "las tres", etc. + val normalizedHour = if (hour == 0) 12 else hour + return if (normalizedHour == 1) { + "la una" + } else { + "las " + pronouncePositive(normalizedHour.toLong(), false) + } + } + + // "pronounceNumberDuration" is a simplification for contexts where gender doesn't matter, + // like "un minuto", but "una hora". The base "pronouncePositive" is more versatile. + override fun pronounceNumberDuration(number: Long): String { + if (number == 1L) return "un" + return pronouncePositive(number, false) + } + + companion object { + private val NUMBER_NAMES = mapOf( + 0L to "cero", 1L to "uno", 2L to "dos", 3L to "tres", 4L to "cuatro", 5L to "cinco", + 6L to "seis", 7L to "siete", 8L to "ocho", 9L to "nueve", 10L to "diez", + 11L to "once", 12L to "doce", 13L to "trece", 14L to "catorce", 15L to "quince", + 16L to "dieciséis", 17L to "diecisiete", 18L to "dieciocho", 19L to "diecinueve", + 20L to "veinte", 21L to "veintiuno", 22L to "veintidós", 23L to "veintitrés", 24L to "veinticuatro", + 25L to "veinticinco", 26L to "veintiséis", 27L to "veintisiete", 28L to "veintiocho", 29L to "veintinueve", + 30L to "treinta", 40L to "cuarenta", 50L to "cincuenta", 60L to "sesenta", 70L to "setenta", + 80L to "ochenta", 90L to "noventa", 100L to "cien" + ) + // Spanish context: Hundreds have special names, e.g., 500 is "quinientos", not "cinco cientos". + private val HUNDRED_NAMES = mapOf( + 100L to "ciento", 200L to "doscientos", 300L to "trescientos", 400L to "cuatrocientos", 500L to "quinientos", + 600L to "seiscientos", 700L to "setecientos", 800L to "ochocientos", 900L to "novecientos" + ) + // Includes common ordinals. + private val ORDINAL_NAMES = mapOf( + 1L to "primero", 2L to "segundo", 3L to "tercero", 4L to "cuarto", 5L to "quinto", + 6L to "sexto", 7L to "séptimo", 8L to "octavo", 9L to "noveno", 10L to "décimo", + 11L to "undécimo", 12L to "duodécimo" + ) + } +} \ No newline at end of file diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt new file mode 100644 index 00000000..45056169 --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishNumberExtractor.kt @@ -0,0 +1,180 @@ +package org.dicio.numbers.lang.es + +import org.dicio.numbers.parser.lexer.TokenStream +import org.dicio.numbers.unit.Number +import org.dicio.numbers.unit.isNullOrZero +import org.dicio.numbers.util.NumberExtractorUtils + +class SpanishNumberExtractor internal constructor(private val ts: TokenStream) { + + fun numberPreferOrdinal(): Number? { + // first try with suffix multiplier, e.g. docena (dozen) + var number = numberSuffixMultiplier() + if (number == null) { + number = numberSignPoint(true) // then try with normal number + } + + // a number was found, maybe it has a valid denominator? + return divideByDenominatorIfPossible(number) + } + + fun numberPreferFraction(): Number? { + // first try with suffix multiplier, e.g. docena (dozen) + var number = numberSuffixMultiplier() + if (number == null) { + number = numberSignPoint(false) // then try without ordinal + } + + // a number was found, maybe it has a valid denominator? + number = divideByDenominatorIfPossible(number) + + if (number == null) { + // maybe an ordinal number? + number = numberSignPoint(true) + } + return number + } + + fun numberNoOrdinal(): Number? { + // This function is used internally for duration parsing. + var number = numberSuffixMultiplier() + if (number == null) { + number = numberSignPoint(false) + } + return divideByDenominatorIfPossible(number) + } + + fun divideByDenominatorIfPossible(numberToEdit: Number?): Number? { + if (numberToEdit == null) { + // Spanish context: handles "un quinto" (a fifth), where "un" is the numerator. + if (ts[0].isValue("un") || ts[0].isValue("una")) { + val originalPosition = ts.position + ts.movePositionForwardBy(1) + val denominator = numberInteger(true) + if (denominator != null && denominator.isOrdinal && denominator.moreThan(2)) { + return Number(1).divide(denominator) + } else { + ts.position = originalPosition + } + } + return null + } + + // if numberToEdit is directly followed by an ordinal number then it is a fraction + if (!numberToEdit.isOrdinal && !numberToEdit.isDecimal && !ts[0].hasCategory("ignore")) { + val originalPosition = ts.position + val denominator = numberInteger(true) + if (denominator == null) { + // no denominator found: maybe a custom multiplier? e.g. media (=0.5), docena (=12) + if (ts[0].hasCategory("suffix_multiplier")) { + ts.movePositionForwardBy(1) + val multiplier = ts[-1].number!! + if (multiplier.isDecimal && (1 / multiplier.decimalValue()).toLong().toDouble() == (1 / multiplier.decimalValue())) { + return numberToEdit.divide((1 / multiplier.decimalValue()).toLong()) + } + return numberToEdit.multiply(multiplier) + } + } else if (denominator.isOrdinal && denominator.moreThan(2)) { + return numberToEdit.divide(denominator) // valid denominator, e.g. dos tercios + } else { + // invalid denominator, e.g. seis primeros + ts.position = originalPosition // restore to original position + } + } + return numberToEdit + } + + fun numberSuffixMultiplier(): Number? { + if (ts[0].hasCategory("suffix_multiplier")) { + ts.movePositionForwardBy(1) + return ts[-1].number + } + return null + } + + fun numberSignPoint(allowOrdinal: Boolean): Number? { + return NumberExtractorUtils.signBeforeNumber(ts) { numberPoint(allowOrdinal) } + } + + fun numberPoint(allowOrdinal: Boolean): Number? { + var n = numberInteger(allowOrdinal) + if (n != null && n.isOrdinal) { + // no point or fraction separator can appear after an ordinal number + return n + } + + if (ts[0].hasCategory("point")) { + // parse point indicator from e.g. "veintiuno coma cuatro" (twenty one point four) + if (!ts[1].hasCategory("digit_after_point") && (!NumberExtractorUtils.isRawNumber(ts[1]) || ts[2].hasCategory("ordinal_suffix"))) { + return n // there is a lone comma at the end of the number: it is not part of it + } + ts.movePositionForwardBy(1) + if (n == null) n = Number(0.0) // numbers can start with just "coma" + + var magnitude = 0.1 + if (ts[0].value.length > 1 && NumberExtractorUtils.isRawNumber(ts[0])) { + for (i in ts[0].value.indices) { + n = n!!.plus((ts[0].value[i].code - '0'.code) * magnitude) + magnitude /= 10.0 + } + ts.movePositionForwardBy(1) + } else { + while (true) { + if (ts[0].hasCategory("digit_after_point") || (ts[0].value.length == 1 && NumberExtractorUtils.isRawNumber(ts[0]) && !ts[1].hasCategory("ordinal_suffix"))) { + n = n!!.plus(ts[0].number!!.multiply(magnitude)) + magnitude /= 10.0 + } else { + break + } + ts.movePositionForwardBy(1) + } + } + } else if (n != null && ts[0].hasCategory("fraction_separator")) { + // parse fraction from e.g. "veinte dividido entre cien" + val originalPosition = ts.position + ts.movePositionForwardBy(1) + if (ts[0].hasCategory("fraction_separator_secondary")) { + ts.movePositionForwardBy(1) + } + val denominator = numberInteger(false) + if (denominator.isNullOrZero()) { + ts.position = originalPosition // not a fraction or division by zero, reset + } else { + return n.divide(denominator) + } + } + return n + } + + fun numberInteger(allowOrdinal: Boolean): Number? { + if (ts[0].hasCategory("ignore")) return null + + var n = NumberExtractorUtils.numberMadeOfGroups(ts, allowOrdinal, NumberExtractorUtils::numberGroupShortScale) + if (n == null) { + return NumberExtractorUtils.numberBigRaw(ts, allowOrdinal) // try to parse big raw numbers (>=1000), e.g. 1207 + } else if (n.isOrdinal) { + return n + } + + if (n.lessThan(1000)) { + // parse raw number n separated by comma, e.g. 123.045.006 + if (NumberExtractorUtils.isRawNumber(ts[-1]) && ts[0].hasCategory("thousand_separator") && ts[1].value.length == 3 && NumberExtractorUtils.isRawNumber(ts[1])) { + val originalPosition = ts.position - 1 + while (ts[0].hasCategory("thousand_separator") && ts[1].value.length == 3 && NumberExtractorUtils.isRawNumber(ts[1])) { + n = n!!.multiply(1000).plus(ts[1].number) + ts.movePositionForwardBy(2) + } + if (ts[0].hasCategory("ordinal_suffix")) { + if (allowOrdinal) { + ts.movePositionForwardBy(1) + return n!!.withOrdinal(true) + } else { + ts.position = originalPosition + return null + } + } + } + } + return n + } +} \ No newline at end of file diff --git a/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishParser.kt b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishParser.kt new file mode 100644 index 00000000..814408e4 --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/es/SpanishParser.kt @@ -0,0 +1,43 @@ +package org.dicio.numbers.lang.es + +import org.dicio.numbers.parser.Parser +import org.dicio.numbers.parser.lexer.TokenStream +import org.dicio.numbers.unit.Duration +import org.dicio.numbers.unit.Number +import org.dicio.numbers.util.DurationExtractorUtils +import java.time.LocalDateTime + +class SpanishParser : Parser("config/es-es") { + override fun extractNumber( + tokenStream: TokenStream, + shortScale: Boolean, + preferOrdinal: Boolean + ): () -> Number? { + // Spanish uses the long scale exclusively for number names. + // The shortScale parameter is ignored for pronunciation but passed for API consistency. + val numberExtractor = SpanishNumberExtractor(tokenStream) + return if (preferOrdinal) { + numberExtractor::numberPreferOrdinal + } else { + numberExtractor::numberPreferFraction + } + } + + override fun extractDuration( + tokenStream: TokenStream, + shortScale: Boolean + ): () -> Duration? { + val numberExtractor = SpanishNumberExtractor(tokenStream) + return DurationExtractorUtils(tokenStream, numberExtractor::numberNoOrdinal)::duration + } + + override fun extractDateTime( + tokenStream: TokenStream, + shortScale: Boolean, + preferMonthBeforeDay: Boolean, + now: LocalDateTime + ): () -> LocalDateTime? { + // Pass all parameters down to the extractor, following the English model. + return SpanishDateTimeExtractor(tokenStream, preferMonthBeforeDay, now)::dateTime + } +} \ No newline at end of file diff --git a/numbers/src/main/java/org/dicio/numbers/lang/it/ItalianNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/it/ItalianNumberExtractor.kt index 9d84bcb3..e246a652 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/it/ItalianNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/it/ItalianNumberExtractor.kt @@ -2,6 +2,7 @@ package org.dicio.numbers.lang.it import org.dicio.numbers.parser.lexer.TokenStream import org.dicio.numbers.unit.Number +import org.dicio.numbers.unit.isNullOrZero import org.dicio.numbers.util.NumberExtractorUtils class ItalianNumberExtractor internal constructor(private val ts: TokenStream) { @@ -164,7 +165,7 @@ class ItalianNumberExtractor internal constructor(private val ts: TokenStream) { ts.movePositionForwardBy(separatorLength) val denominator = numberInteger(false) - if (denominator == null) { + if (denominator.isNullOrZero()) { ts.movePositionForwardBy(-separatorLength) // not a fraction, reset } else { return n.divide(denominator) diff --git a/numbers/src/main/java/org/dicio/numbers/unit/Number.kt b/numbers/src/main/java/org/dicio/numbers/unit/Number.kt index f6ed93d7..36eeec8e 100644 --- a/numbers/src/main/java/org/dicio/numbers/unit/Number.kt +++ b/numbers/src/main/java/org/dicio/numbers/unit/Number.kt @@ -1,6 +1,8 @@ package org.dicio.numbers.unit import java.util.Objects +import kotlin.contracts.ExperimentalContracts +import kotlin.contracts.contract /** * TODO add documentation @@ -31,6 +33,9 @@ class Number private constructor( val isInteger: Boolean get() = !isDecimal + val isZero: Boolean + get() = (isDecimal && decimalValue == 0.0) || (!isDecimal && integerValue == 0L) + fun integerValue(): Long { return integerValue } @@ -165,3 +170,11 @@ class Number private constructor( } } } + +@OptIn(ExperimentalContracts::class) +fun Number?.isNullOrZero(): Boolean { + contract { + returns(false) implies (this@isNullOrZero != null) + } + return this == null || this.isZero +} diff --git a/numbers/src/main/resources/config/es-es/date_time.json b/numbers/src/main/resources/config/es-es/date_time.json new file mode 100644 index 00000000..1f33facf --- /dev/null +++ b/numbers/src/main/resources/config/es-es/date_time.json @@ -0,0 +1,148 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^2\\d$", "format": "{xx}"}, + "4": {"match": "^\\d0$", "format": "{x0}"}, + "5": {"match": "^[3-9]\\d$", "format": "{x0} y {x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^100$", "format": "cien"}, + "2": {"match": "^1\\d{2}$", "format": "ciento {formatted_decade}"}, + "3": {"match": "^[2-4|6|8]\\d{2}$", "format": "{x_in_x00}cientos {formatted_decade}"}, + "4": {"match": "^5\\d{2}$", "format": "quinientos {formatted_decade}"}, + "5": {"match": "^7\\d{2}$", "format": "setecientos {formatted_decade}"}, + "6": {"match": "^9\\d{2}$", "format": "novecientos {formatted_decade}"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^1\\d{3}$", "format": "mil {formatted_hundreds}"}, + "2": {"match": "^[2-9]\\d{3}$", "format": "{x_in_x000} mil {formatted_hundreds}"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "4": {"match": "^\\d{4}$", "format": "{formatted_thousand} {bc}"}, + "default": "{year} {bc}", + "bc": "a.C." + }, + "date_format": { + "date_full": "{weekday}, {day} de {month} de {formatted_year}", + "date_full_no_year": "{weekday}, {day} de {month}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "hoy", + "tomorrow": "mañana", + "yesterday": "ayer" + }, + "date_time_format": { + "date_time": "{formatted_date} a las {formatted_time}" + }, + "weekday": { + "0": "lunes", + "1": "martes", + "2": "miércoles", + "3": "jueves", + "4": "viernes", + "5": "sábado", + "6": "domingo" + }, + "date": { + "1": "uno", + "2": "dos", + "3": "tres", + "4": "cuatro", + "5": "cinco", + "6": "seis", + "7": "siete", + "8": "ocho", + "9": "nueve", + "10": "diez", + "11": "once", + "12": "doce", + "13": "trece", + "14": "catorce", + "15": "quince", + "16": "dieciséis", + "17": "diecisiete", + "18": "dieciocho", + "19": "diecinueve", + "20": "veinte", + "21": "veintiuno", + "22": "veintidós", + "23": "veintitrés", + "24": "veinticuatro", + "25": "veinticinco", + "26": "veintiséis", + "27": "veintisiete", + "28": "veintiocho", + "29": "veintinueve", + "30": "treinta", + "31": "treinta y uno" + }, + "month": { + "1": "enero", + "2": "febrero", + "3": "marzo", + "4": "abril", + "5": "mayo", + "6": "junio", + "7": "julio", + "8": "agosto", + "9": "septiembre", + "10": "octubre", + "11": "noviembre", + "12": "diciembre" + }, + "number": { + "0": "cero", + "1": "uno", + "2": "dos", + "3": "tres", + "4": "cuatro", + "5": "cinco", + "6": "seis", + "7": "siete", + "8": "ocho", + "9": "nueve", + "10": "diez", + "11": "once", + "12": "doce", + "13": "trece", + "14": "catorce", + "15": "quince", + "16": "dieciséis", + "17": "diecisiete", + "18": "dieciocho", + "19": "diecinueve", + "20": "veinte", + "21": "veintiuno", + "22": "veintidós", + "23": "veintitrés", + "24": "veinticuatro", + "25": "veinticinco", + "26": "veintiséis", + "27": "veintisiete", + "28": "veintiocho", + "29": "veintinueve", + "30": "treinta", + "40": "cuarenta", + "50": "cincuenta", + "60": "sesenta", + "70": "setenta", + "80": "ochenta", + "90": "noventa", + "100": "cien", + "200": "doscientos", + "300": "trescientos", + "400": "cuatrocientos", + "500": "quinientos", + "600": "seiscientos", + "700": "setecientos", + "800": "ochocientos", + "900": "novecientos", + "1000": "mil" + } +} \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/day.word b/numbers/src/main/resources/config/es-es/day.word new file mode 100644 index 00000000..eff79eda --- /dev/null +++ b/numbers/src/main/resources/config/es-es/day.word @@ -0,0 +1 @@ +día \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/days.word b/numbers/src/main/resources/config/es-es/days.word new file mode 100644 index 00000000..1d0beda5 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/days.word @@ -0,0 +1 @@ +días \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/hour.word b/numbers/src/main/resources/config/es-es/hour.word new file mode 100644 index 00000000..30325568 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/hour.word @@ -0,0 +1 @@ +hora \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/hours.word b/numbers/src/main/resources/config/es-es/hours.word new file mode 100644 index 00000000..cb3c87d0 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/hours.word @@ -0,0 +1 @@ +horas \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/minute.word b/numbers/src/main/resources/config/es-es/minute.word new file mode 100644 index 00000000..3def900f --- /dev/null +++ b/numbers/src/main/resources/config/es-es/minute.word @@ -0,0 +1 @@ +minuto \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/minutes.word b/numbers/src/main/resources/config/es-es/minutes.word new file mode 100644 index 00000000..dd89c355 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/minutes.word @@ -0,0 +1 @@ +minutos \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/second.word b/numbers/src/main/resources/config/es-es/second.word new file mode 100644 index 00000000..08aee9a2 --- /dev/null +++ b/numbers/src/main/resources/config/es-es/second.word @@ -0,0 +1 @@ +segundo \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/seconds.word b/numbers/src/main/resources/config/es-es/seconds.word new file mode 100644 index 00000000..608bae4c --- /dev/null +++ b/numbers/src/main/resources/config/es-es/seconds.word @@ -0,0 +1 @@ +segundos \ No newline at end of file diff --git a/numbers/src/main/resources/config/es-es/tokenizer.json b/numbers/src/main/resources/config/es-es/tokenizer.json new file mode 100644 index 00000000..3e1db04a --- /dev/null +++ b/numbers/src/main/resources/config/es-es/tokenizer.json @@ -0,0 +1,964 @@ +{ + "spaces": " \t\n\f\r:;_!?<>|=()[]{}»«*~^`'\"", + "characters_as_word": "%‰#-+.,/", + "compound_word_piece_category": "compound_word_piece", + "raw_number_categories": [ + "number", + "raw" + ], + "plural_endings": [ + "s", + "es" + ], + "word_matches": [ + { + "categories": [ + "ignore", + "date_time_ignore" + ], + "values": [ + "y", + "con" + ] + }, + { + "categories": [ + "ignore", + "date_time_ignore", + "day_adder_the", + "ampm_before", + "bcad_after", + "pre_special_hour" + ], + "values": [ + "a" + ] + }, + { + "categories": [ + "ignore", + "date_time_ignore", + "day_adder_the" + ], + "values": [ + "uno", + "una" + ] + }, + { + "categories": [ + "ignore", + "date_time_ignore", + "thousand_separator" + ], + "values": [ + "." + ] + }, + { + "categories": [ + "ordinal_suffix" + ], + "values": [ + "ro", + "ra", + "do", + "da", + "avo", + "ava" + ] + }, + { + "categories": [ + "point" + ], + "values": [ + "punto", + "coma" + ] + }, + { + "categories": [ + "point", + "post_oclock" + ], + "values": [ + "punto" + ] + }, + { + "categories": [ + "point", + "ignore", + "date_time_ignore" + ], + "values": [ + "," + ] + }, + { + "categories": [ + "fraction_separator" + ], + "values": [ + "sobre", + "dividido", + "dividido entre" + ] + }, + { + "categories": [ + "fraction_separator", + "date_time_ignore" + ], + "values": [ + "/" + ] + }, + { + "categories": [ + "sign", + "positive" + ], + "values": [ + "positivo", + "más", + "+" + ] + }, + { + "categories": [ + "sign", + "negative" + ], + "values": [ + "negativo", + "menos" + ] + }, + { + "categories": [ + "ignore", + "date_time_ignore", + "sign", + "negative" + ], + "values": [ + "-" + ] + }, + { + "categories": [ + "duration_separator", + "date_time_ignore" + ], + "values": [ + "de", + "más" + ] + }, + { + "categories": [ + "day_before_yesterday" + ], + "values": [ + "anteayer", + "antier" + ] + }, + { + "categories": [ + "yesterday" + ], + "values": [ + "ayer" + ] + }, + { + "categories": [ + "today" + ], + "values": [ + "hoy" + ] + }, + { + "categories": [ + "tomorrow" + ], + "values": [ + "mañana" + ] + }, + { + "categories": [ + "day_after_tomorrow" + ], + "values": [ + "pasado mañana" + ] + }, + { + "categories": [ + "day_adder_the", + "date_time_ignore", + "pre_hour", + "pre_special_hour" + ], + "values": [ + "el", + "la", + "los", + "las" + ] + }, + { + "categories": [ + "day_adder_day" + ], + "values": [ + "día" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "post_relative_indicator", + "positive", + "day_adder_after", + "special_minute_after", + "pre_special_hour" + ], + "values": [ + "después" + ] + }, + { + "categories": [ + "day_adder_before", + "special_minute_before", + "bcad_before", + "pre_relative_indicator", + "post_relative_indicator", + "negative", + "pre_special_hour" + ], + "values": [ + "antes", + "para" + ] + }, + { + "categories": [ + "date_time_ignore", + "special_minute_before" + ], + "values": [ + "de" + ] + }, + { + "categories": [ + "special_minute_after", + "pre_relative_indicator", + "negative" + ], + "values": [ + "antes" + ] + }, + { + "categories": [ + "pre_hour" + ], + "values": [ + "hora", + "horas" + ] + }, + { + "categories": [ + "pre_hour", + "pre_special_hour" + ], + "values": [ + "a la", + "a las" + ] + }, + { + "categories": [ + "pre_special_hour" + ], + "values": [ + "este", + "estos", + "esta", + "estas", + "ese", + "esos", + "esa", + "esas", + "aquel", + "aquellos", + "aquella", + "aquellas" + ] + }, + { + "categories": [ + "pre_special_hour", + "pre_relative_indicator", + "positive", + "pre_oclock" + ], + "values": [ + "en" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "positive" + ], + "values": [ + "siguiente", + "siguientes", + "posterior", + "posteriores", + "próximo", + "próximos", + "próxima", + "próximas", + "dentro de" + ] + }, + { + "categories": [ + "date_time_ignore", + "pre_relative_indicator", + "positive" + ], + "values": [ + "en", + "en el", + "en la", + "en los", + "en las" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "post_relative_indicator", + "positive" + ], + "values": [ + "siguiente", + "siguientes", + "posterior", + "posteriores", + "próximo", + "próximos", + "próxima", + "próximas", + "que vendrá", + "que viene" + ] + }, + { + "categories": [ + "post_relative_indicator", + "negative" + ], + "values": [ + "pasado", + "pasados", + "anterior", + "anteriores", + "transcurrido", + "transcurridos", + "que pasó", + "que transcurrió" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "negative" + ], + "values": [ + "anterior", + "anteriores", + "pasado", + "pasados", + "pasada", + "pasadas", + "precedido", + "precedidos", + "precedida", + "precedidas", + "hace" + ] + }, + { + "categories": [ + "bcad_before" + ], + "values": [ + "b" + ] + }, + { + "categories": [ + "bcad_after" + ], + "values": [ + "año" + ] + }, + { + "categories": [ + "bcad_identifier" + ], + "values": [ + "cristo" + ] + }, + { + "categories": [ + "bcad_identifier", + "bcad_after" + ], + "values": [ + "c", + "común", + "actual" + ] + }, + { + "categories": [ + "bcad_identifier", + "bcad_era" + ], + "values": [ + "era", + "e" + ] + }, + { + "categories": [ + "bcad_before_combined" + ], + "values": [ + "a.C.", + "aC", + "ane" + ] + }, + { + "categories": [ + "bcad_after_combined" + ], + "values": [ + "d.C.", + "dC", + "ec" + ] + }, + { + "categories": [ + "ampm_before" + ], + "values": [ + "ante" + ] + }, + { + "categories": [ + "ampm_after" + ], + "values": [ + "p", + "post" + ] + }, + { + "categories": [ + "ampm_identifier" + ], + "values": [ + "meridiem", + "meridiano", + "m" + ] + }, + { + "categories": [ + "ampm_before_combined" + ], + "values": [ + "am" + ] + }, + { + "categories": [ + "ampm_after_combined" + ], + "values": [ + "pm" + ] + }, + { + "categories": [ + "post_oclock" + ], + "values": [ + "punto" + ] + }, + { + "categories": [ + "oclock_combined" + ], + "values": [ + "en punto" + ] + } + ], + "number_mappings": [ + { + "categories": [ + "number", + "digit", + "digit_after_point", + "compound_word_piece" + ], + "values": { + "cero": 0, + "uno": 1, + "dos": 2, + "tres": 3, + "cuatro": 4, + "cinco": 5, + "seis": 6, + "siete": 7, + "ocho": 8, + "nueve": 9 + } + }, + { + "categories": [ + "number", + "digit", + "compound_word_piece" + ], + "values": { + "un": 1, + "una": 1 + } + }, + { + "categories": [ + "number", + "teen", + "compound_word_piece" + ], + "values": { + "diez": 10, + "once": 11, + "doce": 12, + "trece": 13, + "catorce": 14, + "quince": 15, + "dieciséis": 16, + "diecisiete": 17, + "dieciocho": 18, + "diecinueve": 19, + "veintiuno": 21, + "veintidós": 22, + "veintitrés": 23, + "veinticuatro": 24, + "veinticinco": 25, + "veintiséis": 26, + "veintisiete": 27, + "veintiocho": 28, + "veintinueve": 29 + } + }, + { + "categories": [ + "number", + "tens", + "compound_word_piece" + ], + "values": { + "veinte": 20, + "veinti": 20, + "treinta": 30, + "cuarenta": 40, + "cincuenta": 50, + "sesenta": 60, + "setenta": 70, + "ochenta": 80, + "noventa": 90 + } + }, + { + "categories": [ + "number", + "hundred", + "compound_word_piece" + ], + "values": { + "cien": 100, + "ciento": 100, + "doscientos": 200, + "trescientos": 300, + "cuatrocientos": 400, + "quinientos": 500, + "seiscientos": 600, + "setecientos": 700, + "ochocientos": 800, + "novecientos": 900 + } + }, + { + "categories": [ + "number", + "multiplier", + "compound_word_piece" + ], + "values": { + "mil": 1000, + "miles": 1000, + "millón": 1000000, + "millones": 1000000, + "millardo": 1000000000, + "millardos": 1000000000, + "billón": 1000000000000, + "billones": 1000000000000, + "trillón": 1000000000000000000, + "trillones": 1000000000000000000 + } + }, + { + "categories": [ + "number", + "ordinal", + "digit", + "compound_word_piece" + ], + "values": { + "primero": 1, + "primera": 1, + "primer": 1, + "segundo": 2, + "segunda": 2, + "tercero": 3, + "tercera": 3, + "tercer": 3, + "cuarto": 4, + "cuarta": 4, + "quinto": 5, + "quinta": 5, + "sexto": 6, + "sexta": 6, + "séptimo": 7, + "séptima": 7, + "octavo": 8, + "octava": 8, + "noveno": 9, + "novena": 9 + } + }, + { + "categories": [ + "number", + "ordinal", + "teen", + "compound_word_piece" + ], + "values": { + "décimo": 10, + "décima": 10, + "undécimo": 11, + "undécima": 11, + "duodécimo": 12, + "duodécima": 12, + "decimotercero": 13, + "decimotercera": 13, + "decimocuarto": 14, + "decimocuarta": 14, + "decimoquinto": 15, + "decimoquinta": 15, + "decimosexto": 16, + "decimosexta": 16, + "decimoséptimo": 17, + "decimoséptima": 17, + "decimooctavo": 18, + "decimooctava": 18, + "decimonoveno": 19, + "decimonovena": 19 + } + }, + { + "categories": [ + "number", + "ordinal", + "tens", + "compound_word_piece" + ], + "values": { + "vigésimo": 20, + "vigésima": 20, + "trigésimo": 30, + "trigésima": 30, + "cuadragésimo": 40, + "cuadragésima": 40, + "quincuagésimo": 50, + "quincuagésima": 50, + "sexagésimo": 60, + "sexagésima": 60, + "septuagésimo": 70, + "septuagésima": 70, + "octogésimo": 80, + "octogésima": 80, + "nonagésimo": 90, + "nonagésima": 90 + } + }, + { + "categories": [ + "number", + "ordinal", + "hundred", + "compound_word_piece" + ], + "values": { + "centésimo": 100, + "centésima": 100 + } + }, + { + "categories": [ + "number", + "ordinal", + "multiplier", + "compound_word_piece" + ], + "values": { + "milésimo": 1000, + "milésima": 1000, + "millonésimo": 1000000, + "millonésima": 1000000, + "milmillonésimo": 1000000000, + "milmillonésima": 1000000000, + "billonésimo": 1000000000000, + "billonésima": 1000000000000, + "trillonésimo": 1000000000000000000, + "trillonésima": 1000000000000000000 + } + }, + { + "categories": [ + "number", + "suffix_multiplier" + ], + "values": { + "media": 0.5, + "medio": 0.5, + "mitad": 0.5, + "cuarto": 0.25, + "quinto": 0.2, + "octavo": 0.125, + "par": 2, + "dupla": 2, + "trío": 3, + "cuarteto": 3, + "docena": 12, + "decena": 10, + "quincena": 15, + "veintena": 20, + "treintena": 30, + "cuarentena": 40, + "centena": 100, + "porciento": 0.01, + "centésima": 0.01, + "pc": 0.01, + "%": 0.01, + "por ciento": 0.01, + "pormil": 0.001, + "milésima": 0.001, + "‰": 0.001 + } + }, + { + "categories": [ + "month_name" + ], + "values": { + "enero": 1, + "ene": 1, + "febrero": 2, + "feb": 2, + "marzo": 3, + "mar": 3, + "abril": 4, + "abr": 4, + "mayo": 5, + "may": 5, + "junio": 6, + "jun": 6, + "julio": 7, + "jul": 7, + "agosto": 8, + "ago": 8, + "septiembre": 9, + "sep": 9, + "octubre": 10, + "oct": 10, + "noviembre": 11, + "nov": 11, + "diciembre": 12, + "dic": 12 + } + }, + { + "categories": [ + "day_of_week" + ], + "values": { + "lunes": 0, + "lun": 0, + "martes": 1, + "mar": 1, + "miércoles": 2, + "mie": 2, + "jueves": 3, + "jue": 3, + "viernes": 4, + "vie": 4, + "sábado": 5, + "sab": 5, + "domingo": 6, + "dom": 6 + } + }, + { + "categories": [ + "noon_midnight_like", + "moment_of_day" + ], + "values": { + "mediodía": 12, + "medianoche": 0 + } + }, + { + "categories": [ + "moment_of_day" + ], + "values": { + "madrugada": 3, + "amanecer": 6, + "amaneciendo": 6, + "desayuno": 7, + "desayunos": 7, + "mañana": 9, + "mañanas": 9, + "almuerzo": 12, + "almuerzos": 12, + "cena": 20, + "cenas": 20, + "tarde": 15, + "tardes": 15, + "atardecer": 18, + "atardeceres": 18, + "noche": 21, + "noches": 21 + } + } + ], + "duration_words": { + "1 NANOS": [ + "nanosegundo", + "nanosegundos", + "ns" + ], + "1 MICROS": [ + "microsegundo", + "microsegundos", + "μs" + ], + "1 MILLIS": [ + "milisegundo", + "milisegundos", + "ms" + ], + "1 SECONDS": [ + "segundo", + "segundos", + "s", + "seg", + "segs" + ], + "1 MINUTES": [ + "minuto", + "minutos", + "m", + "min", + "mins" + ], + "1 HOURS": [ + "hora", + "horas", + "h", + "hr", + "hrs" + ], + "1 DAYS": [ + "día", + "días", + "d" + ], + "1 WEEKS": [ + "semana", + "semanas", + "sem" + ], + "1 MONTHS": [ + "mes", + "meses" + ], + "1 YEARS": [ + "año", + "años", + "a" + ], + "1 DECADES": [ + "década", + "décadas" + ], + "1 CENTURIES": [ + "siglo", + "siglos" + ], + "1 MILLENNIA": [ + "milenio", + "milenios" + ] + }, + "duration_restrict_after_number": [ + "ns", + "μs", + "ms", + "s", + "m", + "h", + "d", + "sem", + "mes", + "año" + ] +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeConfigTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeConfigTest.java new file mode 100644 index 00000000..6092f381 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeConfigTest.java @@ -0,0 +1,10 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.test.DateTimeConfigTestBase; + +public class DateTimeConfigTest extends DateTimeConfigTestBase { + @Override + public String configFolder() { + return "config/es-es"; + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java new file mode 100644 index 00000000..9dc0f9ea --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeExtractorUtilsTest.java @@ -0,0 +1,194 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.t; +import static org.dicio.numbers.util.NumberExtractorUtils.signBeforeNumber; +import static java.time.temporal.ChronoUnit.DAYS; +import static java.time.temporal.ChronoUnit.MONTHS; + +import org.dicio.numbers.parser.lexer.TokenStream; +import org.dicio.numbers.test.DateTimeExtractorUtilsTestBase; +import org.dicio.numbers.util.DateTimeExtractorUtils; +import org.dicio.numbers.util.NumberExtractorUtils; +import org.junit.Test; + +import java.time.LocalDateTime; + +public class DateTimeExtractorUtilsTest extends DateTimeExtractorUtilsTestBase { + + // NOTE (ES): Reference date is a Saturday. + // Saturday, 4th of February, 2023, 22:03:47 + private static final LocalDateTime NOW = LocalDateTime.of(2023, 2, 4, 22, 3, 47, 482175927); + + @Override + public String configFolder() { + return "config/es-es"; + } + + @Override + public DateTimeExtractorUtils build(final TokenStream ts) { + // Use the SpanishNumberExtractor. The boolean for shortScale is not needed in the Spanish constructor. + final SpanishNumberExtractor numberExtractor = new SpanishNumberExtractor(ts); + return new DateTimeExtractorUtils(ts, NOW, (fromInclusive, toInclusive) -> + NumberExtractorUtils.extractOneIntegerInRange(ts, fromInclusive, toInclusive, + () -> signBeforeNumber(ts, () -> numberExtractor.numberInteger(false))) + ); + } + + @Test + public void testRelativeMonthDuration() { + // NOTE (ES): All values recalculated from NOW (Feb 4th, 2023). + assertRelativeMonthDuration("septiembre que viene", t(7, MONTHS), 2); // Feb -> Sep is +7 months. + assertRelativeMonthDuration("próximo abril y de", t(2, MONTHS), 3); // Feb -> Apr is +2 months. + assertRelativeMonthDuration("último abril y de", t(-10, MONTHS), 3); // "last April" was in 2022, so it's -10 months from Feb 2023. + assertRelativeMonthDuration("febrero que vendrá", t(12, MONTHS), 2); // "upcoming February" is next year's. + assertRelativeMonthDuration("febrero que pasó", t(-12, MONTHS), 2); // "past February" is last year's. + assertRelativeMonthDuration("enero pasado", t(-1, MONTHS), 2); // "last January" was in the current year. + } + + @Test + public void testRelativeMonthDurationNull() { + assertRelativeMonthDurationNull("hola cómo estás"); + assertRelativeMonthDurationNull("en noviembre ocurrirá"); // "en" is not at the start of the duration indicator. + assertRelativeMonthDurationNull("octubre"); // A month name alone is not a relative duration. + assertRelativeMonthDurationNull("en dos octubres"); // Not a supported format for this util. + assertRelativeMonthDurationNull("en dos meses"); + } + + @Test + public void testRelativeToday() { + assertRelativeToday("hoy"); + assertRelativeToday("hoy ahora mismo"); + assertRelativeToday("hoy prueba"); + assertRelativeToday("hoy y"); + } + + @Test + public void testRelativeTodayNull() { + assertRelativeTodayNull("hola cómo estás"); + assertRelativeTodayNull("el mismo hoy"); + assertRelativeTodayNull("el día de hoy"); + assertRelativeTodayNull("ayer"); + assertRelativeTodayNull("mañana"); + } + + @Test + public void testRelativeDayOfWeekDuration() { + // NOTE (ES): All values recalculated from NOW (Saturday, day 5). + assertRelativeDayOfWeekDuration("próximo jueves", 5, 2); // Sat(5) -> next Thu(3) is 5 days. + assertRelativeDayOfWeekDuration("el jueves pasado", -2, 3); // Sat(5) -> last Thu(3) was 2 days ago. + assertRelativeDayOfWeekDuration("hace dos domingos", -13, 3); // Last Sun was yesterday (+1), the one before was 6 days ago (-6). Two Sundays ago is -13. + assertRelativeDayOfWeekDuration("tres jueves siguientes", 19, 3); // Next Thu is +5, then +12, then +19. + assertRelativeDayOfWeekDuration("cuatro martes antes", -25, 3); // Last Tue was -4, then -11, -18, -25. + assertRelativeDayOfWeekDuration("próximo sábado", 7, 2); // "upcoming Saturday" is next week's. + assertRelativeDayOfWeekDuration("el sábado pasado", -7, 3); // "saturday ago" was last week. + } + + @Test + public void testRelativeDayOfWeekDurationNull() { + assertRelativeDayOfWeekDurationNull("hola cómo estás"); + assertRelativeDayOfWeekDurationNull("lunes"); // A day name alone is not a relative duration. + assertRelativeDayOfWeekDurationNull("pasado lunes"); // "pasado" is a post-indicator. + assertRelativeDayOfWeekDurationNull("dos viernes"); + assertRelativeDayOfWeekDurationNull("en dos días"); + assertRelativeDayOfWeekDurationNull("y en dos domingos"); + assertRelativeDayOfWeekDurationNull("un último lunes"); + assertRelativeDayOfWeekDurationNull("ayer y mañana"); + } + + @Test + public void testMinute() { + assertMinute("cero a b c", 0, 1); + assertMinute("cincuenta y nueve horas", 59, 4); // "cincuenta y nueve" are 3 tokens + "horas" + assertMinute("quince y", 15, 2); + assertMinute("veintiocho s", 28, 2); + assertMinute("seis mins prueba", 6, 2); + assertMinute("treinta y seis de min", 36, 5); + assertMinute("44m de", 44, 2); + } + + @Test + public void testMinuteNull() { + assertMinuteNull("hola cómo estás"); + assertMinuteNull("sesenta minutos"); // 60 is an invalid minute value. + assertMinuteNull("ciento veinte"); + assertMinuteNull("menos dieciséis"); + assertMinuteNull("12000 minutos"); + assertMinuteNull("y dos de"); + } + + @Test + public void testSecond() { + assertSecond("cero a b c", 0, 1); + assertSecond("cincuenta y nueve horas", 59, 4); + assertSecond("quince y", 15, 2); + assertSecond("veintiocho h", 28, 2); + assertSecond("seis segs prueba", 6, 2); + assertSecond("treinta y seis de seg", 36, 5); + assertSecond("44s de", 44, 2); + } + + @Test + public void testSecondNull() { + assertSecondNull("hola cómo estás"); + assertSecondNull("sesenta segundos"); // 60 is an invalid second value. + assertSecondNull("ciento veinte"); + assertSecondNull("menos dieciséis"); + assertSecondNull("12000 segundos"); + assertSecondNull("y dos de"); + } + + @Test + public void testBcad() { + assertBcad("a.C. prueba", false, 3); + assertBcad("d.C. y", true, 3); + assertBcad("dc prueba y", true, 1); + assertBcad("antes de Cristo", false, 3); + assertBcad("después de Cristo", true, 3); + } + + @Test + public void testBcadNull() { + assertBcadNull("a.m."); + assertBcadNull("año Domini"); + assertBcadNull("y antes común"); + assertBcadNull("prueba c"); + assertBcadNull("m"); + assertBcadNull("c prueba"); + } + + @Test + public void testAmpm() { + assertAmpm("a.m. prueba", false, 3); + assertAmpm("p.m. y", true, 3); + assertAmpm("am y prueba", false, 1); + assertAmpm("post meridiano", true, 2); + assertAmpm("p y meridiem", true, 3); + } + + @Test + public void testAmpmNull() { + assertAmpmNull("d.C."); + assertAmpmNull("ante prueba meridiem"); + assertAmpmNull("y post m"); + assertAmpmNull("prueba m"); + assertAmpmNull("c"); + assertAmpmNull("aym"); + assertAmpmNull("meridiano prueba"); + } + + @Test + public void testMonthName() { + assertMonthName("enero", 1); + assertMonthName("dic e", 12); + assertMonthName("septiembre", 9); + assertMonthName("mar", 3); + } + + @Test + public void testMonthNameNull() { + assertMonthNameNull("lunes"); + assertMonthNameNull("jaguar"); + assertMonthNameNull("hola feb"); + assertMonthNameNull("y dic de"); + } +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeTest.java new file mode 100644 index 00000000..1caff63c --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DateTimeTest.java @@ -0,0 +1,54 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.formatter.Formatter; +import org.dicio.numbers.test.DateTimeTestBase; +import org.junit.Test; + +import java.time.LocalDate; +import java.time.LocalDateTime; + +import static org.junit.Assert.assertEquals; + +public class DateTimeTest extends DateTimeTestBase { + + @Override + public String configFolder() { + return "config/es-es"; + } + + @Override + public Formatter buildNumberFormatter() { + return new SpanishFormatter(); + } + + @Test + public void testNiceDate() { + // NOTE (ES): Test that the Formatter correctly generates full dates in Spanish. + // The expected format is "{weekday}, {day} de {month} de {year}". + assertEquals("miércoles, veintiocho de abril de dos mil veintiuno", + pf.niceDate(LocalDate.of(2021, 4, 28)).get()); + + // Test for a BC date, ensuring the correct output. + assertEquals("domingo, trece de agosto de ochenta y cuatro a.C.", + pf.niceDate(LocalDate.of(-83, 8, 13)).get()); // -83 is 84 BC + } + + @Test + public void testNiceYear() { + // NOTE (ES): Test that the Formatter correctly pronounces years in Spanish. + assertEquals("mil novecientos ochenta y cuatro", pf.niceYear(LocalDate.of(1984, 4, 28)).get()); + assertEquals("ochocientos diez a.C.", pf.niceYear(LocalDate.of(-809, 8, 13)).get()); // -809 is 810 BC + } + + @Test + public void testNiceDateTime() { + // NOTE (ES): Test that the Formatter correctly generates full date-time strings. + // The expected format is "{date} a las {time}". + assertEquals("miércoles, doce de septiembre de mil setecientos sesenta y cuatro al mediodía", + pf.niceDateTime(LocalDateTime.of(1764, 9, 12, 12, 0)).get()); + + // Test for a BC date with a specific time. + assertEquals("jueves, tres de noviembre de trescientos veintiocho a.C. a las cinco y siete de la mañana", + pf.niceDateTime(LocalDateTime.of(-327, 11, 3, 5, 7)).get()); + } +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/DurationExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/DurationExtractorUtilsTest.java new file mode 100644 index 00000000..185e506c --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/DurationExtractorUtilsTest.java @@ -0,0 +1,118 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.DAY; +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.HOUR; +import static org.dicio.numbers.test.TestUtils.MICROS; +import static org.dicio.numbers.test.TestUtils.MILLIS; +import static org.dicio.numbers.test.TestUtils.MINUTE; +import static org.dicio.numbers.test.TestUtils.MONTH; +import static org.dicio.numbers.test.TestUtils.T; +import static org.dicio.numbers.test.TestUtils.WEEK; +import static org.dicio.numbers.test.TestUtils.YEAR; +import static org.dicio.numbers.test.TestUtils.t; +import static org.junit.Assert.assertTrue; + +import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.parser.lexer.TokenStream; +import org.dicio.numbers.test.DurationExtractorUtilsTestBase; +import org.dicio.numbers.unit.Duration; +import org.dicio.numbers.util.DurationExtractorUtils; +import org.junit.Test; + + +public class DurationExtractorUtilsTest extends DurationExtractorUtilsTestBase { + + @Override + public String configFolder() { + return "config/es-es"; + } + + @Override + public Duration extractDuration(final TokenStream ts, final boolean shortScale) { + // NOTE (ES): The SpanishNumberExtractor constructor does not take a shortScale parameter, + // as Spanish exclusively uses the long scale for numbers. + final SpanishNumberExtractor numberExtractor = new SpanishNumberExtractor(ts); + return new DurationExtractorUtils(ts, numberExtractor::numberNoOrdinal).duration(); + } + + + @Test + public void testDurationNumberAndUnit() { + assertDuration("mil millones de nanosegundos", T, t(1_000_000_000L)); // 10^9 nanos = 1 second + assertDuration("mil setecientos veintiocho μs", T, t(0, 1728 * MICROS)); + assertDuration("cien milisegundos", T, t(0, 100 * MILLIS)); + assertDuration("18s", F, t(18)); + assertDuration("un seg", F, t(1)); + assertDuration("cincuenta y nueve minutos", T, t(59 * MINUTE)); + assertDuration("veintitrés horas", F, t(23 * HOUR)); + assertDuration("media hora", T, t(HOUR / 2)); + assertDuration("uno coma dos días", T, t(1.2 * DAY)); + assertDuration("medio día", F, t(DAY / 2)); + assertDuration("diez semanas", F, t(10 * WEEK)); + assertDuration("6 meses", T, t(6 * MONTH)); + assertDuration("tres mil millones de años", T, t(3e9 * YEAR)); + assertDuration("quince décadas", T, t(150 * YEAR)); + // NOTE (ES): Spanish uses long scale, so a billionth is 10^-12 + assertDuration("un siglo billonésimo", T, t(1e-12 * 100 * YEAR)); + assertDuration("1 milenio", F, t(1000 * YEAR)); + assertNoDuration("cuarenta y tres milenios cuatro", T); + assertNoDuration("y diez semanas y", F); + assertNoDuration("cien pruebas", F); + assertNoDuration("coma treinta y cuatro gramos", T); + } + + @Test + public void testDurationOnlyUnit() { + assertDuration("hora minuto milenio", T, t(1000 * YEAR + HOUR + MINUTE)); + assertDuration("milisegundo y segundo, microsegundo", F, t(1, MILLIS + MICROS)); + assertDuration("segundos segundo s", T, t(2)); + assertDuration("minuto horas años", F, t(MINUTE + HOUR + YEAR)); // Corrected to include year + assertNoDuration("hola milisegundo", F); + assertNoDuration("está bien", T); + assertNoDuration("ns μs ms s m h d sem mes a", F); + } + + @Test + public void testDurationOf() { + assertDuration("dos décimas de segundo", F, t(0, 200 * MILLIS)); + assertDuration("un par de horas", F, t(2 * HOUR)); + assertNoDuration("muchos segundos", F); + assertNoDuration("decenas de líneas de prueba", T); + assertNoDuration("hola dos cientos de hola", F); + assertNoDuration("hola de semana", F); + } + + @Test + public void testMultipleDurationGroups() { + assertDuration("veinte minutos y treinta y seis segundos porque", T, t(20 * MINUTE + 36)); + assertDuration("siete días, veintiuna horas y doce minutos para llegar", F, t(7 * DAY + 21 * HOUR + 12 * MINUTE)); + assertDuration("minuto, segundo y milisegundo, microsegundo y nanosegundo", T, t(MINUTE + 1, MILLIS + MICROS + 1)); + assertDuration("5 ns ns", F, t(0, 5+1)); // 5 nanos + 1 nano + assertNoDuration("ms 5 ns ns", F); // Number cannot be in the middle + } + + @Test(timeout = 4000) + public void testPerformanceWithFormatter() { + final java.time.Duration[] alternatives = { + t(1), t(5 * MINUTE), t(2 * HOUR), t(16 * DAY), t(WEEK), t(3 * MONTH), t(5 * YEAR), + t(1e8 * YEAR), t(17 * WEEK), t(45) + }; + + final ParserFormatter npf = new ParserFormatter(new SpanishFormatter(), null); + for (int i = 0; i < (1 << alternatives.length); ++i) { + java.time.Duration durationToTest = java.time.Duration.ZERO; + for (int j = 0; j < alternatives.length; ++j) { + if ((i & (1 << j)) != 0) { + durationToTest = durationToTest.plus(alternatives[j]); + } + } + + // The Spanish formatter correctly handles the long scale numbers. + final String formatted = npf.niceDuration(new Duration(durationToTest)).get(); + final TokenStream ts = new TokenStream(tokenizer.tokenize(formatted)); + assertDuration(formatted, ts, T, durationToTest); + assertTrue(ts.finished()); + } + } +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java new file mode 100644 index 00000000..31884176 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDateTimeTest.java @@ -0,0 +1,211 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.dicio.numbers.test.TestUtils.t; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static java.time.temporal.ChronoUnit.DAYS; +import static java.time.temporal.ChronoUnit.MONTHS; +import static java.time.temporal.ChronoUnit.SECONDS; +import static java.time.temporal.ChronoUnit.WEEKS; +import static java.time.temporal.ChronoUnit.YEARS; + +import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.lang.es.SpanishParser; +import org.dicio.numbers.parser.lexer.TokenStream; +import org.dicio.numbers.test.WithTokenizerTestBase; +import org.dicio.numbers.unit.Duration; +import org.junit.Test; + +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.function.Function; + +public class ExtractDateTimeTest extends WithTokenizerTestBase { + + // NOTE (ES): Reference date is a Sunday. + // Sunday, 5th of February, 2023, 9:41:12 + private static final LocalDateTime NOW = LocalDateTime.of(2023, 2, 5, 9, 41, 12, 759274821); + + @Override + public String configFolder() { + return "config/es-es"; + } + + // --- Helper assertion methods --- + + private void assertFunction(final String s, + final boolean preferMonthBeforeDay, + final T expectedResult, + int finalTokenStreamPosition, + final Function function) { + final TokenStream ts = new TokenStream(tokenizer.tokenize(s)); + assertEquals("Wrong result for string \"" + s + "\"", + expectedResult, function.apply(new SpanishDateTimeExtractor(ts, preferMonthBeforeDay, NOW))); + assertEquals("Wrong final token position for string \"" + s + "\"", + finalTokenStreamPosition, ts.position); + } + + private void assertFunctionNull(final String s, + final boolean preferMonthBeforeDay, + final Function numberFunction) { + assertFunction(s, preferMonthBeforeDay, null, 0, numberFunction); + } + + // Overloads for cleaner test code + private void assertRelativeDuration(final String s, final Duration expectedDuration, int finalTokenStreamPosition) { + assertFunction(s, false, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeDuration); + } + private void assertRelativeDurationNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::relativeDuration); } + private void assertRelativeTomorrow(final String s, final int expectedDuration, int finalTokenStreamPosition) { assertFunction(s, false, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeTomorrow); } + private void assertRelativeTomorrowNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::relativeTomorrow); } + private void assertRelativeYesterday(final String s, final int expectedDuration, int finalTokenStreamPosition) { assertFunction(s, false, expectedDuration, finalTokenStreamPosition, SpanishDateTimeExtractor::relativeYesterday); } + private void assertRelativeYesterdayNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::relativeYesterday); } + private void assertHour(final String s, final int expected, int finalTokenStreamPosition) { assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::hour); } + private void assertHourNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::hour); } + private void assertMomentOfDay(final String s, final int expected, int finalTokenStreamPosition) { assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::momentOfDay); } + private void assertMomentOfDayNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::momentOfDay); } + private void assertNoonMidnightLike(final String s, final int expected, int finalTokenStreamPosition) { assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::noonMidnightLike); } + private void assertNoonMidnightLikeNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::noonMidnightLike); } + private void assertDate(final String s, final boolean preferMonthBeforeDay, final LocalDate expected, int finalTokenStreamPosition) { assertFunction(s, preferMonthBeforeDay, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::date); } + private void assertDateNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::date); } + private void assertTime(final String s, final LocalTime expected, int finalTokenStreamPosition) { assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::time); } + private void assertTimeNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::time); } + private void assertTimeWithAmpm(final String s, final LocalTime expected, int finalTokenStreamPosition) { assertFunction(s, false, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::timeWithAmpm); } + private void assertTimeWithAmpmNull(final String s) { assertFunctionNull(s, false, SpanishDateTimeExtractor::timeWithAmpm); } + private void assertDateTime(final String s, final boolean preferMonthBeforeDay, final LocalDateTime expected, int finalTokenStreamPosition) { assertFunction(s, preferMonthBeforeDay, expected, finalTokenStreamPosition, SpanishDateTimeExtractor::dateTime); } + private void assertDateTimeNull(final String s, final boolean preferMonthBeforeDay) { assertFunctionNull(s, preferMonthBeforeDay, SpanishDateTimeExtractor::dateTime); } + + // --- Spanish-specific tests --- + + @Test + public void testRelativeDuration() { + assertRelativeDuration("en dos semanas llegaré", t(2, WEEKS), 3); + assertRelativeDuration("hace cuatro meses", t(-4, MONTHS), 3); + assertRelativeDuration("un segundo después se cayó", t(1, SECONDS), 3); + assertRelativeDuration("dentro de un par de décadas", t(20, YEARS), 6); + assertRelativeDuration("nueve días antes", t(-9, DAYS), 3); + assertRelativeDuration("setenta años pasados", t(-70, YEARS), 3); + assertRelativeDuration("tres meses y dos días después",t(3, MONTHS).plus(t(2, DAYS)), 6); + } + + @Test + public void testRelativeDurationNull() { + assertRelativeDurationNull("hola cómo estás"); + assertRelativeDurationNull("cuatro semestres"); // "semestre" is not a defined duration word + assertRelativeDurationNull("sabes que en una semana"); // duration must be at the start + assertRelativeDurationNull("y pasaron dos meses"); // same + assertRelativeDurationNull("el día anterior"); // not a calculable duration + } + + @Test + public void testRelativeTomorrow() { + assertRelativeTomorrow("mañana iremos", 1, 1); + assertRelativeTomorrow("pasado mañana y", 2, 1); // "pasado mañana" is a single token + } + + @Test + public void testRelativeYesterday() { + assertRelativeYesterday("ayer yo estuve", -1, 1); + assertRelativeYesterday("anteayer prueba",-2, 1); // "anteayer" is a single word + } + + @Test + public void testHour() { + assertHour("a las ocho y treinta y seis", 8, 3); + assertHour("veintiuna y dos", 21, 1); + assertHour("a la una y veintiséis", 1, 3); + assertHour("las diecisiete el", 17, 2); + assertHour("hora trece", 13, 2); + } + + @Test + public void testNoonMidnightLike() { + assertNoonMidnightLike("al mediodía", 12, 2); + assertNoonMidnightLike("medianoche", 0, 1); + } + + @Test + public void testMomentOfDay() { + assertMomentOfDay("a la medianoche", 0, 3); + assertMomentOfDay("mediodía", 12, 1); + assertMomentOfDay("esta tarde y", 15, 2); + assertMomentOfDay("por la noche prueba", 21, 3); + assertMomentOfDay("la cena", 20, 2); + } + + @Test + public void testDate() { + // NOTE (ES): Default Spanish format is DD/MM/YYYY. preferMonthBeforeDay=T will test for MM/DD/YYYY. + assertDate("04/09/4096", F, LocalDate.of(4096, 9, 4), 5); + assertDate("04/09/4096", T, LocalDate.of(4096, 4, 9), 5); + assertDate("13 4 2023", F, LocalDate.of(2023, 4, 13), 3); + assertDate("seis de julio de mil novecientos noventa y cinco", T, LocalDate.of(1995, 7, 6), 9); + assertDate("jueves 26 de mayo de 2022", T, LocalDate.of(2022, 5, 26), 6); + assertDate("2 de enero del 2 a.C.", T, LocalDate.of(-1, 1, 2), 7); // 2 BC is year -1 + assertDate("doce de junio de dos mil doce a.C.", T, LocalDate.of(-2011, 6, 12), 9); + assertDate("cuatrocientos setenta y seis d.C.", T, LocalDate.of(476, 2, 5), 6); + assertDate("martes veintisiete", T, LocalDate.of(2023, 2, 28), 2); // NOW is Sun 5th, next Tue is 7th, so Tue 27th must be Feb 28th + assertDate("lunes de noviembre", T, LocalDate.of(2023, 11, 6), 3); + } + + @Test + public void testTime() { + assertTime("13:28:33 prueba", LocalTime.of(13, 28, 33), 3); + assertTime("mediodía y cuarto", LocalTime.of(12, 15, 0), 3); + assertTime("a las catorce", LocalTime.of(14, 0, 0), 3); + assertTime("medianoche y doce", LocalTime.of(0, 12, 0), 3); + assertTime("las veintitrés y cincuenta y un minutos", LocalTime.of(23, 51, 0), 7); + assertTime("las cinco y media", LocalTime.of(5, 30, 0), 4); + assertTime("las seis menos cuarto", LocalTime.of(5, 45, 0), 4); + } + + @Test + public void testTimeWithAmpm() { + assertTimeWithAmpm("11:28:33 pm test", LocalTime.of(23, 28, 33), 4); + assertTimeWithAmpm("a las dos de la mañana", LocalTime.of(2, 0, 0), 6); + assertTimeWithAmpm("tres y treinta y ocho de la tarde", LocalTime.of(15, 38, 0), 8); + assertTimeWithAmpm("noche", LocalTime.of(21, 0, 0), 1); + assertTimeWithAmpm("tarde a las cuatro y tres", LocalTime.of(16, 3, 0), 6); + assertTimeWithAmpm("12 am", LocalTime.of(0, 0, 0), 2); // 12 AM is midnight + assertTimeWithAmpm("12 pm", LocalTime.of(12, 0, 0), 2); // 12 PM is noon + } + + @Test + public void testDateTime() { + // NOTE (ES): All expected values are calculated from NOW (Sun, Feb 5, 2023 09:41:12). + assertDateTime("mañana a las 12:45", F, LocalDateTime.of(2023, 2, 6, 12, 45, 0), 5); + assertDateTime("26/12/2003 19:18:59", F, LocalDateTime.of(2003, 12, 26, 19, 18, 59), 4); + assertDateTime("19:18:59 26/12/2003 test", F, LocalDateTime.of(2003, 12, 26, 19, 18, 59), 4); + assertDateTime("05/07/2003 1:2:3", F, LocalDateTime.of(2003, 7, 5, 1, 2, 3), 4); // Standard Spanish DD/MM + assertDateTime("05/07/2003 1:2:3", T, LocalDateTime.of(2003, 5, 7, 1, 2, 3), 4); // preferMonthBeforeDay MM/DD + assertDateTime("próximo viernes a las veintidós en punto", F, LocalDateTime.of(2023, 2, 10, 22, 0, 0), 7); + assertDateTime("ayer por la tarde a las cinco menos cuarto", F, LocalDateTime.of(2023, 2, 4, 16, 45, 0), 9); + assertDateTime("dentro de tres días por la noche a las once", F, LocalDateTime.of(2023, 2, 8, 23, 0, 0), 9); + assertDateTime("pasado mañana por la mañana", F, LocalDateTime.of(2023, 2, 7, 9, 0, 0), 4); + assertDateTime("domingo a las 2:45 p.m.", F, LocalDateTime.of(2023, 2, 5, 14, 45, 0), 6); + assertDateTime("hace dos días al atardecer", F, LocalDateTime.of(2023, 2, 3, 18, 0, 0), 5); + assertDateTime("siete de noviembre de 193 a.C.", T, LocalDateTime.of(-192, 11, 7, 9, 41, 12), 8); // 193 BC is year -192 + } + + @Test + public void testDateTimeNull() { + assertDateTimeNull("hola cómo estás", F); + assertDateTimeNull("prueba veintiuno de enero después de cenar", F); + assertDateTimeNull("menos un milisegundo", F); + } + + @Test + public void testNumberParserExtractDateTime() { + // NOTE (ES): This tests the top-level ParserFormatter class. + final ParserFormatter npf = new ParserFormatter(null, new SpanishParser()); + assertNull(npf.extractDateTime("hola cómo estás").getFirst()); + assertEquals(NOW.minusDays(30).withHour(14).withMinute(39).withSecond(0).withNano(0), + npf.extractDateTime("2:39 p.m. hace treinta días").now(NOW).getFirst()); + assertEquals(NOW.plusMinutes(3).plusSeconds(46), + npf.extractDateTime("dentro de tres minutos y cuarenta y seis segundos").now(NOW).getFirst()); + } +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDurationTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDurationTest.java new file mode 100644 index 00000000..850db7c4 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractDurationTest.java @@ -0,0 +1,27 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.DAY; +import static org.dicio.numbers.test.TestUtils.t; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.test.WithTokenizerTestBase; +import org.junit.Test; + +public class ExtractDurationTest extends WithTokenizerTestBase { + @Override + public String configFolder() { + return "config/es-es"; + } + + @Test + public void testNumberParserExtractDuration() { + final ParserFormatter npf = new ParserFormatter(null, new SpanishParser()); + assertNull(npf.extractDuration("hola cómo estás").getFirst()); + assertNull(npf.extractDuration("mil millones de euros").shortScale(true).getFirst()); + assertNull(npf.extractDuration("un millón").shortScale(false).getFirst()); + assertEquals(t(DAY), npf.extractDuration("veinticuatro horas no son dos días").getFirst().toJavaDuration()); + assertEquals(t(2 * DAY), npf.extractDuration("dos días n son veinticuatro horas").getFirst().toJavaDuration()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java new file mode 100644 index 00000000..b6171587 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ExtractNumbersTest.java @@ -0,0 +1,226 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.parser.lexer.TokenStream; +import org.dicio.numbers.test.WithTokenizerTestBase; +import org.dicio.numbers.unit.Number; +import org.junit.Test; + +import java.util.function.BiFunction; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.dicio.numbers.test.TestUtils.n; +import static org.dicio.numbers.test.TestUtils.numberDeduceType; +import static org.junit.Assert.*; + +public class ExtractNumbersTest extends WithTokenizerTestBase { + + @Override + public String configFolder() { + return "config/es-es"; + } + + private void assertNumberFunction(final String s, + final Number value, + final int finalTokenStreamPosition, + final BiFunction numberFunction) { + final TokenStream ts = new TokenStream(tokenizer.tokenize(s)); + // NOTE (ES): SpanishNumberExtractor does not use the shortScale parameter. + final Number number = numberFunction.apply(new SpanishNumberExtractor(ts), ts); + assertEquals("wrong value for string \"" + s + "\"", value, number); + assertEquals("wrong final token position for number " + (value != null ? value.toString() : "null"), finalTokenStreamPosition, + ts.position); + } + + private void assertNumberFunctionNull(final String s, + final BiFunction numberFunction) { + assertNumberFunction(s, null, 0, numberFunction); + } + + private void assertNumberInteger(final String s, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { + assertNumberFunction(s, numberDeduceType(value).withOrdinal(isOrdinal), finalTokenStreamPosition, + (enp, ts) -> enp.numberInteger(allowOrdinal)); + } + + private void assertNumberIntegerNull(final String s, final boolean allowOrdinal) { + assertNumberFunctionNull(s, (enp, ts) -> enp.numberInteger(allowOrdinal)); + } + + private void assertNumberPoint(final String s, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { + assertNumberFunction(s, numberDeduceType(value).withOrdinal(isOrdinal), + finalTokenStreamPosition, (enp, ts) -> enp.numberPoint(allowOrdinal)); + } + + private void assertNumberPointNull(final String s, final boolean allowOrdinal) { + assertNumberFunctionNull(s, (enp, ts) -> enp.numberPoint(allowOrdinal)); + } + + private void assertNumberSignPoint(final String s, final boolean allowOrdinal, final double value, final boolean isOrdinal, final int finalTokenStreamPosition) { + assertNumberFunction(s, numberDeduceType(value).withOrdinal(isOrdinal), + finalTokenStreamPosition, (enp, ts) -> enp.numberSignPoint(allowOrdinal)); + } + + private void assertNumberSignPointNull(final String s, final boolean allowOrdinal) { + assertNumberFunctionNull(s, (enp, ts) -> enp.numberSignPoint(allowOrdinal)); + } + + private void assertDivideByDenominatorIfPossible(final String s, final Number startingNumber, final Number value, final int finalTokenStreamPosition) { + assertNumberFunction(s, value, finalTokenStreamPosition, + (enp, ts) -> enp.divideByDenominatorIfPossible(startingNumber)); + } + + // --- Spanish-specific number tests --- + + @Test + public void testNumberInteger() { + // NOTE (ES): Spanish uses long scale. Billón = 10^12, Trillón = 10^18. + assertNumberInteger("veinticinco billones, ciento sesenta y cuatro mil millones, siete mil diecinueve", F, 25164000007019L, F, 11); + assertNumberInteger("dos mil ciento noventa y uno", F, 2191, F, 5); + assertNumberInteger("novecientos diez", T, 910, F, 2); + assertNumberInteger("dos millones", F, 2000000, F, 2); + assertNumberInteger("un millón", F, 1000000, F, 2); + assertNumberInteger("mil diez", T, 1010, F, 2); + assertNumberInteger("1234567890123", T, 1234567890123L, F, 1); + assertNumberInteger("seiscientos cincuenta y cuatro y", F, 654, F, 4); + assertNumberInteger("ciento cuatro,", F, 104, F, 2); + assertNumberInteger("nueve mil, tres millones", T, 9003, F, 4); // "mil" acts as a separator here + } + + @Test + public void testNumberIntegerOrdinal() { + assertNumberInteger("vigésimo quinto", T, 25, T, 2); + assertNumberInteger("milésimo", T, 1000, T, 1); + assertNumberInteger("ciento cuatro mil, seis billonésimo", T, 104000e12, T, 5); + assertNumberInteger("543789º", T, 543789, T, 2); + assertNumberInteger("75.483.543ro", T, 75483543, T, 6); + assertNumberIntegerNull("2938ro", F); // Ordinal suffix only works on single token raw numbers + } + + @Test + public void testNumberIntegerThousandSeparator() { + // NOTE (ES): Spanish uses a dot (.) as a thousand separator. + assertNumberInteger("23.001", T, 23001, F, 3); + assertNumberInteger("19.123", T, 19123, F, 3); + assertNumberInteger("un 167.42", F, 167, F, 2); + assertNumberInteger("1.234.023.054, hola", F, 1234023054, F, 7); + } + + @Test + public void testNumberIntegerComposition() { + // NOTE (ES): These tests validate the `compound_word_piece` logic. + assertNumberInteger("veinte y uno mil", F, 21000, F, 4); + assertNumberInteger("doscientos mil", F, 200000, F, 2); + assertNumberInteger("trescientos treinta y tres mil trescientos treinta y tres", F, 333333, F, 8); + assertNumberInteger("un millón un", F, 1000001, F, 3); + } + + private int tokensInFormattedString(final String formatted) { + int tokensInFormatted = 0; + if (!formatted.isEmpty()) { + tokensInFormatted = 1; + for (char c : formatted.toCharArray()) { + if (c == ' ' || c == ',') { + tokensInFormatted++; + } + } + } + return tokensInFormatted; + } + + @Test + public void testNumberIntegerWithFormatter() { + final ParserFormatter npf = new ParserFormatter(new SpanishFormatter(), null); + for (int i = 0; i < 2000000;) { + if (i < 2200) i++; + else if (i < 100000) i += 1207; + else i += 299527; + + String formatted = npf.pronounceNumber(i).places(0).get(); + int tokensInFormatted = tokensInFormattedString(formatted); + assertNumberInteger(formatted, T, i, F, tokensInFormatted); + + formatted = npf.pronounceNumber(i).places(0).ordinal(T).get(); + tokensInFormatted = tokensInFormattedString(formatted); + assertNumberInteger(formatted, T, i, T, tokensInFormatted); + } + } + + @Test + public void testNumberIntegerNull() { + assertNumberIntegerNull("", T); + assertNumberIntegerNull("un hola cómo estás", F); + assertNumberIntegerNull(", y", T); + assertNumberIntegerNull("cero dos", F); + assertNumberIntegerNull(", 123.485 y", T); + assertNumberIntegerNull("y 123", F); + assertNumberIntegerNull(" un mil ", T); + } + + @Test + public void testNumberPoint() { + // NOTE (ES): Uses "coma" as a decimal point. + assertNumberPoint("mil quinientos setenta y cuatro coma nueve uno dos cero", T, 1574.9120, F, 9); + assertNumberPoint("veintitrés coma cero uno cero dos tres", T, 23.01023, F, 7); + assertNumberPoint("3645,7183", T, 3645.7183, F, 3); + assertNumberPoint("ochenta coma 6745", T, 80.6745, F, 3); + assertNumberPoint("cuatro coma sesenta y siete", T, 4.67, F, 4); + assertNumberPoint("coma ochocientos", T, 0.8, F, 2); + } + + @Test + public void testNumberPointFraction() { + assertNumberPoint("veintitrés millones cien mil sesenta y cuatro sobre dieciséis", F, 1443754, F, 9); + assertNumberPoint("ocho mil ciento noventa y dos dividido por cuatro mil noventa y seis", T, 2, F, 11); + assertNumberPoint("noventa y ocho sobre cien", T, 0.98, F, 5); + assertNumberPoint("veinticuatro sobre sesenta y cinco", T, 24.0 / 65.0, F, 5); + } + + @Test + public void testNumberPointOrdinal() { + assertNumberPoint("quinto coma seis", T, 5, T, 1); + assertNumberPoint("ocho coma un segundo", F, 8.1, F, 4); + assertNumberPoint("nueve sobre trigésimo noveno", T, 9.0/39.0, F, 4); + } + + @Test + public void testNumberPointNull() { + assertNumberPointNull("", F); + assertNumberPointNull("hola mundo", T); + assertNumberPointNull("coma", F); + assertNumberPointNull("coma veinte", T); + assertNumberPointNull("sobre dos", F); + assertNumberPointNull(" uno dividido por cinco", T); + } + + @Test + public void testNumberSignPoint() { + assertNumberSignPoint("menos setenta y seis mil sobre 23", T, -76000.0 / 23.0, F, 6); + assertNumberSignPoint("menos doce", T, -12, F, 2); + assertNumberSignPoint("más un millón", T, 1000000, F, 3); + assertNumberSignPoint("-1843", F, -1843, F, 2); + assertNumberSignPoint("+573.976", T, 573976, F, 4); + assertNumberSignPoint("menos 42903,5", T, -42903.5, F, 4); + assertNumberSignPoint("menos coma cero cuatro", T, -0.04, F, 4); + } + + @Test + public void testNumberSignPointOrdinal() { + assertNumberSignPoint("menos duodécimo", T, -12, T, 2); + assertNumberSignPoint("-centésimo", F, -100, T, 2); + assertNumberSignPointNull("menos primero", F); + } + + @Test + public void testDivideByDenominatorIfPossible() { + assertDivideByDenominatorIfPossible("quintos", n(5, F), n(1, F), 1); + assertDivideByDenominatorIfPossible("docena dos", n(3, F), n(36, F), 2); + assertDivideByDenominatorIfPossible("media y", n(19, F), n(9.5, F), 2); + assertDivideByDenominatorIfPossible("%", n(50, F), n(0.5, F), 1); + assertDivideByDenominatorIfPossible("‰", n(1000, F), n(1, F), 1); + assertDivideByDenominatorIfPossible("cuarto", n(16, F), n(4, F), 1); + assertDivideByDenominatorIfPossible("gente", n(98, F), n(98, F), 0); + assertDivideByDenominatorIfPossible("un décimo", null, n(0.1, F), 2); + assertDivideByDenominatorIfPossible("una decena", null, null, 0); + } +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java new file mode 100644 index 00000000..f6e3eb9f --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceDurationTest.java @@ -0,0 +1,74 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; + +import org.dicio.numbers.formatter.Formatter; +import org.dicio.numbers.test.NiceDurationTestBase; +import org.junit.Test; + +public class NiceDurationTest extends NiceDurationTestBase { + + @Override + public Formatter buildNumberFormatter() { + return new SpanishFormatter(); + } + + @Test + public void zero() { + assertDuration("cero segundos", T, 0, 0, 0, 0); + assertDuration("0:00:00", F, 0, 0, 0, 0); + } + + @Test + public void speechOne() { + // NOTE (ES): Testing singular units, paying attention to gender. + // "segundo" and "minuto" are masculine ("un"), but "hora" is feminine ("una"). + assertDuration("un segundo", T, 0, 0, 0, 1); + assertDuration("un minuto", T, 0, 0, 1, 0); + assertDuration("una hora", T, 0, 1, 0, 0); + assertDuration("un día", T, 1, 0, 0, 0); + } + + @Test + public void speechMany() { + assertDuration("cinco segundos", T, 0, 0, 0, 5); + assertDuration("dos minutos", T, 0, 0, 2, 0); + assertDuration("diecisiete horas", T, 0, 17, 0, 0); + assertDuration("tres días y doce horas", T, 3, 12, 0, 0); // Changed from 84 hours + } + + @Test + public void speech() { + assertDuration("seis días, veintitrés horas, cincuenta y nueve minutos y treinta y dos segundos", T, 6, 23, 59, 32); + assertDuration("diecinueve días y cincuenta y dos minutos", T, 19, 0, 52, 0); + assertDuration("una hora y seis segundos", T, 0, 1, 0, 6); + assertDuration("sesenta y tres días y cuarenta y cuatro segundos", T, 63, 0, 0, 44); + assertDuration("un día, una hora, un minuto y un segundo", T, 1, 1, 1, 1); + } + + @Test + public void noSpeechOne() { + assertDuration("0:00:01", F, 0, 0, 0, 1); + assertDuration("0:01:00", F, 0, 0, 1, 0); + assertDuration("1:00:00", F, 0, 1, 0, 0); + assertDuration("1d 0:00:00", F, 1, 0, 0, 0); + } + + @Test + public void noSpeechMany() { + assertDuration("0:00:39", F, 0, 0, 0, 39); + assertDuration("0:24:00", F, 0, 0, 24, 0); + assertDuration("3:00:00", F, 0, 3, 0, 0); + assertDuration("76d 0:00:00", F, 76, 0, 0, 0); + } + + @Test + public void noSpeech() { + assertDuration("6d 23:59:32", F, 6, 23, 59, 32); + assertDuration("19d 0:52:00", F, 19, 0, 52, 0); + assertDuration("1:00:06", F, 0, 1, 0, 6); + assertDuration("63d 0:00:44", F, 63, 0, 0, 44); + assertDuration("1d 1:01:01", F, 1, 1, 1, 1); + } +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceNumberTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceNumberTest.java new file mode 100644 index 00000000..6950c45f --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceNumberTest.java @@ -0,0 +1,65 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.junit.Assert.assertEquals; + +public class NiceNumberTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new SpanishFormatter(), null); + } + + @Test + public void speech() { + assertEquals("treinta y cuatro y medio", pf.niceNumber(34.5).get()); + assertEquals("menos dieciocho y tres quintos", pf.niceNumber(-18.6).get()); + assertEquals("noventa y ocho y dieciocho diecinueve", pf.niceNumber(98.947368421).get()); + assertEquals("menos cinco y seis undécimas", pf.niceNumber(-5.5454545).get()); + assertEquals("siete novenos", pf.niceNumber(7.0 / 9).get()); + assertEquals("menos dos decimoséptimos", pf.niceNumber(-2.0 / 17).get()); + assertEquals("cuatrocientos sesenta y cinco", pf.niceNumber(465).get()); + assertEquals("menos noventa y uno", pf.niceNumber(-91).get()); + assertEquals("cero", pf.niceNumber(0).get()); + } + + @Test + public void noSpeech() { + assertEquals("34 1/2", pf.niceNumber(34.5).speech(F).get()); + assertEquals("-18 3/5", pf.niceNumber(-18.6).speech(F).get()); + assertEquals("98 18/19", pf.niceNumber(98.947368421).speech(F).get()); + assertEquals("-5 6/11", pf.niceNumber(-5.5454545).speech(F).get()); + assertEquals("7/9", pf.niceNumber(7.0 / 9).speech(F).get()); + assertEquals("-2/17", pf.niceNumber(-2.0 / 17).speech(F).get()); + assertEquals("465", pf.niceNumber(465).speech(F).get()); + assertEquals("-91", pf.niceNumber(-91).speech(F).get()); + assertEquals("0", pf.niceNumber(0).speech(F).get()); + } + + @Test + public void customDenominators() { + assertEquals("menos cuatro y cuatro décimas", pf.niceNumber(-4.4).denominators(Arrays.asList(2, 3, 4, 6, 7, 8, 9, 10, 11)).get()); + assertEquals("-64 6/12", pf.niceNumber(-64.5).speech(F).denominators(Collections.singletonList(12)).get()); + assertEquals("menos trescientas quinientas mil millonésimas", pf.niceNumber(-3.5).denominators(Arrays.asList(1000000, 2000000)).get()); + assertEquals("9 1000000/2000000", pf.niceNumber(9.5).speech(F).denominators(Arrays.asList(2000000, 1000000)).get()); + assertEquals("cero punto ocho", pf.niceNumber(4.0 / 5).denominators(Arrays.asList(2, 3, 4)).get()); + } + + @Test + public void invalidFraction() { + assertEquals("un punto ochenta y cuatro", pf.niceNumber(1.837).get()); + assertEquals("menos treinta y ocho coma uno nueve", pf.niceNumber(-38.192).get()); + assertEquals("3829.48", pf.niceNumber(3829.47832).speech(F).get()); + assertEquals("-7.19", pf.niceNumber(-7.1928).speech(F).get()); + assertEquals("-9322.38", pf.niceNumber(-9322 - 8.0 / 21).speech(F).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NiceTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceTimeTest.java new file mode 100644 index 00000000..8423106c --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NiceTimeTest.java @@ -0,0 +1,113 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.time.LocalTime; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.junit.Assert.assertEquals; + +public class NiceTimeTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new SpanishFormatter(), null); + } + + + @Test + public void random() { + final LocalTime dt = LocalTime.of(13, 22, 3); + assertEquals("una veintidós", pf.niceTime(dt).get()); + assertEquals("una y veintidós p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("trece veintidós", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("trece veintidós", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("1:22", pf.niceTime(dt).speech(F).get()); + assertEquals("1:22 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("13:22", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("13:22", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void oClock() { + final LocalTime dt = LocalTime.of(15, 0, 32); + assertEquals("tres en punto", pf.niceTime(dt).get()); + assertEquals("tres p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("mil quinientos", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("mil quinientos", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("3:00", pf.niceTime(dt).speech(F).get()); + assertEquals("3:00 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("15:00", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("15:00", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void afterMidnight() { + final LocalTime dt = LocalTime.of(0, 2, 9); + assertEquals("dos cero dos", pf.niceTime(dt).get()); + assertEquals("doce cero dos a.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("cero cero cero dos", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("cero cero cero dos", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("12:02", pf.niceTime(dt).speech(F).get()); + assertEquals("12:02 AM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("00:02", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("00:02", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void quarterPast() { + final LocalTime dt = LocalTime.of(1, 15, 33); + assertEquals("una y cuarto", pf.niceTime(dt).get()); + assertEquals("una y cuarto a.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("cero uno quince", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("cero uno quince", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("1:15", pf.niceTime(dt).speech(F).get()); + assertEquals("1:15 AM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("01:15", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("01:15", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void half() { + final LocalTime dt = LocalTime.of(12, 30, 59); + assertEquals("doce y media", pf.niceTime(dt).get()); + assertEquals("doce y media p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("doce treinta", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("doce y treinta", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("12:30", pf.niceTime(dt).speech(F).get()); + assertEquals("12:30 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("12:30", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("12:30", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void quarterTo() { + final LocalTime dt = LocalTime.of(23, 45, 7); + assertEquals("cuarto para las doce", pf.niceTime(dt).get()); + assertEquals("cuarto para las doce p.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("veintitrés cuarenta y cinco", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("veintitrés cuarenta y cinco", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("11:45", pf.niceTime(dt).speech(F).get()); + assertEquals("11:45 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("23:45", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("23:45", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void tenAm() { + final LocalTime dt = LocalTime.of(10, 3, 44); + assertEquals("diez cero trés", pf.niceTime(dt).get()); + assertEquals("diez cero trés a.m.", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("diez cero trés", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("diez y trés", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("10:03", pf.niceTime(dt).speech(F).get()); + assertEquals("10:03 AM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("10:03", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("10:03", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java new file mode 100644 index 00000000..f4341c55 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/NumberExtractorUtilsTest.java @@ -0,0 +1,121 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; + +import org.dicio.numbers.test.NumberExtractorUtilsTestBase; +import org.junit.Test; + +public class NumberExtractorUtilsTest extends NumberExtractorUtilsTestBase { + + @Override + public String configFolder() { + return "config/es-es"; + } + + @Test + public void testNumberLessThan1000() { + assertNumberLessThan1000("cero", T, 0, F, 1); + assertNumberLessThan1000("uno", F, 1, F, 1); + assertNumberLessThan1000("un", F, 1, F, 1); + assertNumberLessThan1000("cinco", T, 5, F, 1); + assertNumberLessThan1000("diecinueve", F, 19, F, 1); + assertNumberLessThan1000("cien", T, 100, F, 1); + assertNumberLessThan1000("trescientos", T, 300, F, 1); + assertNumberLessThan1000("veintiséis", F, 26, F, 1); + // NOTE (ES): Spanish uses "y" to connect tens and units (e.g., treinta y siete). + assertNumberLessThan1000("treinta y siete", T, 37, F, 3); + assertNumberLessThan1000("setecientos seis", F, 706, F, 2); + assertNumberLessThan1000("ochocientos dieciocho", T, 818, F, 2); + } + + @Test + public void testNumberLessThan1000Digits() { + assertNumberLessThan1000("0", F, 0, F, 1); + assertNumberLessThan1000("1", T, 1, F, 1); + assertNumberLessThan1000("15", T, 15, F, 1); + assertNumberLessThan1000("100 diecinueve", F, 100, F, 1); + assertNumberLessThan1000("3 cientos 8", T, 300, F, 2); // "cientos" is not a number, stops at 3 + assertNumberLessThan1000("72", F, 72, F, 1); + assertNumberLessThan1000("912", T, 912, F, 1); + assertNumberLessThan1000("8 ciento 18", F, 818, F, 3); + assertNumberLessThan1000("ciento 47", F, 147, F, 2); + assertNumberLessThan1000("sesenta y 7", F, 67, F, 3); + } + + @Test + public void testNumberLessThan1000EdgeCases() { + assertNumberLessThan1000("cuatro cinco", T, 4, F, 1); + assertNumberLessThan1000("un dos y", F, 1, F, 1); + assertNumberLessThan1000("uno trece", T, 1, F, 1); + assertNumberLessThan1000("dieciséis ocho", F, 16, F, 1); + assertNumberLessThan1000("dieciocho cien", T, 18, F, 1); + assertNumberLessThan1000("cero cien", F, 0, F, 1); + assertNumberLessThan1000("sesenta cero", T, 60, F, 1); + assertNumberLessThan1000("un ciento", F, 100, F, 2); + assertNumberLessThan1000("uno y ciento", T, 100, F, 3); + assertNumberLessThan1000("setecientos y seis", F, 706, F, 3); + assertNumberLessThan1000("ciento noventa y uno", T, 191, F, 4); + } + + @Test + public void testNumberLessThan1000Ordinal() { + assertNumberLessThan1000("quinto", T, 5, T, 1); + assertNumberLessThan1000("vigésimo sexto", T, 26, T, 2); + assertNumberLessThan1000("septuagésimo octavo", F, 70, F, 1); + assertNumberLessThan1000("quincuagésimo noveno", T, 50, T, 1); + assertNumberLessThan1000("centésimo decimotercero", T, 113, T, 2); + assertNumberLessThan1000("primer ciento", T, 1, T, 1); + assertNumberLessThan1000("septingentésimo décimo", T, 700, T, 1); + assertNumberLessThan1000("987º", T, 987, T, 2); + assertNumberLessThan1000("23ro", T, 23, T, 2); + assertNumberLessThan1000("8vo primero", T, 8, F, 1); + assertNumberLessThan1000("1ro ciento", T, 1, T, 2); + assertNumberLessThan1000Null("septuagésima", F); + assertNumberLessThan1000Null("101ro", F); + } + + @Test + public void testNumberLessThan1000Null() { + assertNumberLessThan1000Null("", F); + assertNumberLessThan1000Null("hola", T); + assertNumberLessThan1000Null("hola como estas", F); + assertNumberLessThan1000Null("hola dos y", T); + assertNumberLessThan1000Null("un millón", T); + assertNumberLessThan1000Null(" veinte", F); + } + + @Test + public void testNumberGroupShortScale() { + // NOTE (ES): Spanish uses long scale, but this method tests number group composition before multipliers are applied. + // It tests if "ciento veinte" is parsed as 120 before it's multiplied by "millones". + assertNumberGroupShortScale("ciento veinte millones", F, 1000000000, 120, F, 2); + assertNumberGroupShortScale("mil seis", T, 1000000000, 1006, F, 2); + assertNumberGroupShortScale("seiscientos mil", F, 1000000, 600, F, 1); + assertNumberGroupShortScale("ciento setenta mil", T, 1000000, 170, F, 2); + assertNumberGroupShortScale("572 millones", F, 1000000000, 572, F, 1); + assertNumberGroupShortScale("un millón", T, 1000000000, 1, F, 1); + assertNumberGroupShortScale(", ciento noventa y uno", F, 1000, 191, F, 4); + } + + @Test + public void testNumberGroupShortScaleOrdinal() { + assertNumberGroupShortScale("setecientos sesenta y cuatro millonésimo", T, 1000000000, 764, T, 4); + assertNumberGroupShortScale("quinto milmillonésimo", T, 1000000000, 5, T, 1); + assertNumberGroupShortScale("decimonoveno centésimo", T, 1000000000, 19, F, 1); // "centésimo" is not a multiplier here + assertNumberGroupShortScaleNull("duodécimo milésimo", F, 1000000000); + } + + @Test + public void testNumberGroupShortScaleNull() { + assertNumberGroupShortScaleNull("", T, 1000000000); + assertNumberGroupShortScaleNull("hola", F, 1000000); + assertNumberGroupShortScaleNull("129000", F, 1000000000); + assertNumberGroupShortScaleNull("ciento seis", F, 999); + assertNumberGroupShortScaleNull("doce", T, 0); + assertNumberGroupShortScaleNull("siete mil millones", F, 1000); + assertNumberGroupShortScaleNull("nueve mil uno", T, 1000); // Should be "nueve mil y uno" + assertNumberGroupShortScaleNull("ocho millones de personas", F, 1000000); + assertNumberGroupShortScaleNull(" diez ", T, 1000000); + } +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/ParserParamsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/ParserParamsTest.java new file mode 100644 index 00000000..db3d34d8 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/ParserParamsTest.java @@ -0,0 +1,65 @@ +package org.dicio.numbers.lang.es; + +import static org.dicio.numbers.test.TestUtils.DAY; +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.HOUR; +import static org.dicio.numbers.test.TestUtils.MILLIS; +import static org.dicio.numbers.test.TestUtils.MINUTE; +import static org.dicio.numbers.test.TestUtils.T; +import static org.dicio.numbers.test.TestUtils.YEAR; +import static org.dicio.numbers.test.TestUtils.n; +import static org.dicio.numbers.test.TestUtils.t; + +import org.dicio.numbers.parser.Parser; +import org.dicio.numbers.parser.param.NumberParserParamsTestBase; +import org.junit.Test; + +public class ParserParamsTest extends NumberParserParamsTestBase { + + @Override + protected Parser numberParser() { + return new SpanishParser(); + } + + @Test + public void testNumberFirst() { + // NOTE (ES): Spanish uses long scale, so "trillonésima" is 10^-18. + assertNumberFirst("es mil novecientos sesenta y cuatro trillonésimas", T, F, n(1964e-18, F)); + assertNumberFirst("treinta y seis doceavos de manzana", F, T, n(3, F)); + assertNumberFirst("soy realmente el ciento ocho", F, F, n(100, F)); + assertNumberFirst("soy realmente el ciento ocho", T, T, n(108, T)); + } + + @Test + public void testNumberMixedWithText() { + assertNumberMixedWithText(" hola qué tal!, 3/5 o cuatro séptimos?", T, F, " hola qué tal!, ", n(3.0 / 5.0, F), " o ", n(4.0 / 7.0, F), "?"); + assertNumberMixedWithText(" hola qué tal!, cuatro séptimos o 3/5?", T, T, " hola qué tal!, ", n(4.0 / 7.0, F), " o ", n(3.0 / 5.0, F), "?"); + // NOTE (ES): "tres milmillonésimo" (three billionth in short scale) is not standard. Using long scale. + // "tres billonésimo" -> 3 * 10^-12. + assertNumberMixedWithText("tres billonésimo más dos", T, T, n(3e-12, T), " más ", n(2, F)); + // NOTE (ES): "un billón" is 10^12. + assertNumberMixedWithText("un billón y mil seiscientos sesenta y cuatro", F, F, n(1e12, F), " y ", n(1664, F)); + assertNumberMixedWithText("dos billonésimas menos cincuenta y ocho", F, T, n(2e-12, T), " menos ", n(-58, F)); + assertNumberMixedWithText("nueve milmillonésimas por once", F, F, n(9e-9, F), " por ", n(11, F)); + assertNumberMixedWithText("tres mitades, no once cuartos", F, T, n(1.5, F), ", no ", n(2.75, F)); + assertNumberMixedWithText("seis pares es igual a una docena ", F, T, n(12, F), " es igual a ", n(12, F), " "); + assertNumberMixedWithText("una docena de veintenas no es una centena", F, T, n(240, F), " no es ", n(100, F)); + assertNumberMixedWithText("tengo veintitrés años.", T, F, "tengo ", n(23, F), " años."); + // NOTE (ES): "quintillionth" (short scale) translates to "trillonésimo" (long scale). + assertNumberMixedWithText("El trillonésimo", F, F, "El ", n(1e18, T)); + assertNumberMixedWithText("Un trillonésimo", T, F, n(1e-18, F)); + } + + @Test + public void testDurationFirst() { + // NOTE (ES): "mil millones" is 10^9. + assertDurationFirst("Pon un temporizador de dos minutos y mil millones de nanosegundos", F, t(2 * MINUTE + 1000L)); // 10^9 ns = 1s + assertDurationFirst("sabes que hace dos años no son mil millones de días", T, t(2 * YEAR)); + } + + @Test + public void testDurationMixedWithText() { + assertDurationMixedWithText("2ns y cuatro horas mientras seis milisegundos.", F, t(4 * HOUR, 2), " mientras ", t(0, 6 * MILLIS), "."); + assertDurationMixedWithText("sabes que hace dos años no son mil millones de días", T, "sabes que ", t(-2 * YEAR), " no son ", t(1000000000L * DAY)); + } +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/PronounceNumberTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/PronounceNumberTest.java new file mode 100644 index 00000000..207a35a7 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/PronounceNumberTest.java @@ -0,0 +1,153 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.junit.Assert.assertEquals; + +public class PronounceNumberTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new SpanishFormatter(), null); + } + + @Test + public void smallIntegers() { + assertEquals("cero", pf.pronounceNumber(0).get()); + assertEquals("uno", pf.pronounceNumber(1).get()); + assertEquals("diez", pf.pronounceNumber(10).get()); + assertEquals("quince", pf.pronounceNumber(15).get()); + assertEquals("veinte", pf.pronounceNumber(20).get()); + // NOTE (ES): Numbers from 21-29 are single words in Spanish. + assertEquals("veintisiete", pf.pronounceNumber(27).get()); + assertEquals("treinta", pf.pronounceNumber(30).get()); + // NOTE (ES): Spanish uses "y" to connect tens and units above 30. + assertEquals("treinta y tres", pf.pronounceNumber(33).get()); + } + + @Test + public void negativeSmallIntegers() { + assertEquals("menos uno", pf.pronounceNumber(-1).get()); + assertEquals("menos diez", pf.pronounceNumber(-10).get()); + assertEquals("menos quince", pf.pronounceNumber(-15).get()); + assertEquals("menos veinte", pf.pronounceNumber(-20).get()); + assertEquals("menos veintisiete", pf.pronounceNumber(-27).get()); + assertEquals("menos treinta", pf.pronounceNumber(-30).get()); + assertEquals("menos treinta y tres", pf.pronounceNumber(-33).get()); + } + + @Test + public void decimals() { + // NOTE (ES): Decimal separator is "coma". + assertEquals("cero coma cero cinco", pf.pronounceNumber(0.05).get()); + assertEquals("menos cero coma cero cinco", pf.pronounceNumber(-0.05).get()); + assertEquals("uno coma dos tres cuatro", pf.pronounceNumber(1.234).get()); + assertEquals("veintiuno coma dos seis cuatro", pf.pronounceNumber(21.264).places(5).get()); + assertEquals("veintiuno coma dos seis cuatro", pf.pronounceNumber(21.264).places(4).get()); + assertEquals("veintiuno coma dos seis cuatro", pf.pronounceNumber(21.264).places(3).get()); + assertEquals("veintiuno coma dos seis", pf.pronounceNumber(21.264).places(2).get()); + assertEquals("veintiuno coma tres", pf.pronounceNumber(21.264).places(1).get()); + assertEquals("veintiuno", pf.pronounceNumber(21.264).places(0).get()); + assertEquals("menos veintiuno coma dos seis cuatro", pf.pronounceNumber(-21.264).places(3).get()); + assertEquals("menos veintiuno coma tres", pf.pronounceNumber(-21.264).places(1).get()); + } + + @Test + public void roundingDecimals() { + assertEquals("cero", pf.pronounceNumber(0.05).places(0).get()); + assertEquals("cero", pf.pronounceNumber(-0.4).places(0).get()); + assertEquals("menos veintidós", pf.pronounceNumber(-21.7).places(0).get()); + assertEquals("ochenta y nueve", pf.pronounceNumber(89.2).places(0).get()); + assertEquals("noventa", pf.pronounceNumber(89.9).places(0).get()); + assertEquals("menos uno", pf.pronounceNumber(-0.5).places(0).get()); + assertEquals("seis coma tres", pf.pronounceNumber(6.28).places(1).get()); + assertEquals("tres coma dos", pf.pronounceNumber(3.150001).places(1).get()); + assertEquals("cero coma tres", pf.pronounceNumber(0.25).places(1).get()); + assertEquals("diecinueve", pf.pronounceNumber(19.004).get()); + } + + @Test + public void hundred() { + // NOTE (ES): "cien" is used for exactly 100, "ciento" for compounds (e.g., 101 -> "ciento uno"). + assertEquals("cien", pf.pronounceNumber(100).get()); + assertEquals("seiscientos setenta y ocho", pf.pronounceNumber(678).get()); + assertEquals("ciento tres millones doscientos cincuenta y cuatro mil seiscientos cincuenta y cuatro", + pf.pronounceNumber(103254654).get()); + assertEquals("un millón quinientos doce mil cuatrocientos cincuenta y siete", + pf.pronounceNumber(1512457).get()); + assertEquals("doscientos nueve mil novecientos noventa y seis", + pf.pronounceNumber(209996).get()); + } + + @Test + public void year() { + // NOTE (ES): Years are typically pronounced fully in Spanish. "nineteen eighty four" is not used. + assertEquals("mil cuatrocientos cincuenta y seis", pf.pronounceNumber(1456).get()); + assertEquals("mil novecientos ochenta y cuatro", pf.pronounceNumber(1984).get()); + assertEquals("mil ochocientos uno", pf.pronounceNumber(1801).get()); + assertEquals("mil cien", pf.pronounceNumber(1100).get()); + assertEquals("mil doscientos uno", pf.pronounceNumber(1201).get()); + assertEquals("mil quinientos diez", pf.pronounceNumber(1510).get()); + assertEquals("mil seis", pf.pronounceNumber(1006).get()); + assertEquals("mil", pf.pronounceNumber(1000).get()); + assertEquals("dos mil", pf.pronounceNumber(2000).get()); + assertEquals("dos mil quince", pf.pronounceNumber(2015).get()); + } + + @Test + public void scientificNotation() { + assertEquals("cero", pf.pronounceNumber(0.0).scientific(T).get()); + assertEquals("tres coma tres por diez a la uno", + pf.pronounceNumber(33).scientific(T).get()); + assertEquals("dos coma nueve nueve por diez a la ocho", + pf.pronounceNumber(299492458).scientific(T).get()); + assertEquals("uno coma seis siete dos por diez a la menos veintisiete", + pf.pronounceNumber(1.672e-27).scientific(T).places(3).get()); + } + + @Test + public void largeNumbers() { + // NOTE (ES): Spanish uses the long scale exclusively. Short scale tests are not applicable. + // millardo = 10^9, billón = 10^12, trillón = 10^18. + assertEquals("un millón mil ochocientos noventa y dos", pf.pronounceNumber(1001892).get()); + assertEquals("doscientos noventa y nueve millones setecientos noventa y dos mil cuatrocientos cincuenta y ocho", pf.pronounceNumber(299792458).get()); + assertEquals("menos cien mil doscientos dos millones ciento treinta y tres mil cuatrocientos cuarenta", pf.pronounceNumber(-100202133440.0).get()); + assertEquals("veinte billones ciento dos mil millones novecientos ochenta y siete mil", pf.pronounceNumber(20102000987000.0).get()); + assertEquals("siete trillones", pf.pronounceNumber(7000000000000000000.0).get()); + assertEquals("un millón uno", pf.pronounceNumber(1000001).get()); + } + + @Test + public void ordinal() { + assertEquals("primero", pf.pronounceNumber(1).ordinal(T).get()); + assertEquals("décimo", pf.pronounceNumber(10).ordinal(T).get()); + assertEquals("decimoquinto", pf.pronounceNumber(15).ordinal(T).get()); + assertEquals("vigésimo", pf.pronounceNumber(20).ordinal(T).get()); + assertEquals("vigésimo séptimo", pf.pronounceNumber(27).ordinal(T).get()); + assertEquals("trigésimo", pf.pronounceNumber(30).ordinal(T).get()); + assertEquals("trigésimo tercero", pf.pronounceNumber(33).ordinal(T).get()); + assertEquals("centésimo", pf.pronounceNumber(100).ordinal(T).get()); + assertEquals("centésimo décimo", pf.pronounceNumber(110).ordinal(T).get()); + assertEquals("milésimo", pf.pronounceNumber(1000).ordinal(T).get()); + assertEquals("diezmilésimo", pf.pronounceNumber(10000).ordinal(T).get()); + assertEquals("millonésimo", pf.pronounceNumber(1000000).ordinal(T).get()); + // NOTE (ES): Decimal numbers are not pronounced as ordinals. The base number is made ordinal. + assertEquals("tercero", pf.pronounceNumber(2.78).places(0).ordinal(T).get()); + assertEquals("decimonoveno", pf.pronounceNumber(19.004).ordinal(T).get()); + } + + @Test + public void edgeCases() { + assertEquals("cero", pf.pronounceNumber(0.0).get()); + assertEquals("cero", pf.pronounceNumber(-0.0).get()); + assertEquals("infinito", pf.pronounceNumber(Double.POSITIVE_INFINITY).get()); + assertEquals("menos infinito", pf.pronounceNumber(Double.NEGATIVE_INFINITY).get()); + assertEquals("no es un número", pf.pronounceNumber(Double.NaN).get()); + } +} \ No newline at end of file diff --git a/numbers/src/test/java/org/dicio/numbers/lang/es/TokenizerConfigTest.java b/numbers/src/test/java/org/dicio/numbers/lang/es/TokenizerConfigTest.java new file mode 100644 index 00000000..c9c50c33 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/es/TokenizerConfigTest.java @@ -0,0 +1,11 @@ +package org.dicio.numbers.lang.es; + +import org.dicio.numbers.test.TokenizerConfigTestBase; + +public class TokenizerConfigTest extends TokenizerConfigTestBase { + + @Override + public String configFolder() { + return "config/es-es"; + } +} diff --git a/numbers/src/test/resources/config/es-es/date_time_test.json b/numbers/src/test/resources/config/es-es/date_time_test.json new file mode 100644 index 00000000..43be466b --- /dev/null +++ b/numbers/src/test/resources/config/es-es/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "uno a.C." }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "diez a.C." }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "noventa y dos a.C." }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "ochocientos tres" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "ochocientos once" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "cuatrocientos cincuenta y cinco" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cinco" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil doce" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cuarenta y seis" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil ochocientos siete" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil setesientos diecisiete" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil novecientos ochenta y ocho"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil nueve"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil dieciocho"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil veintiuno"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil treinta"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dos mil cien" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tres mil doscientos veinte a.C." }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tres mil doscientos cuarenta y uno a.C." }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "cinco mil doscientos" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cien" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dos mil cien" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "martes, treinta y uno de enero de dos mil diecisiete"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "domingo, cuatro de febrero de dos mil dieciocho"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "domingo, cuatro"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "domingo, cuatro"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "mañana"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "hoy"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "ayer"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "domingo, catorce de febrero"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "domingo, catorce de febrero de dos mil deciocho"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "jueves, 31 de enero de dos mil diecisiete a las doce y dos p.m."}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "martes, treinta y uno de enero de dos mil diecisiete a las trece horas y veintidós minutos"} + } +}