Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 54 additions & 24 deletions src/main/java/org/mtransit/commons/CleanUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import kotlin.text.Regex;

@SuppressWarnings({"unused", "WeakerAccess"})
public final class CleanUtils {

Expand Down Expand Up @@ -101,17 +103,19 @@ public static String cleanLabel(@NotNull Locale locale, @NotNull String label) {
return label.trim();
}

private static final String PLACE_CHAR_DE_L = "de l'";
private static final String PLACE_CHAR_DE_LA = "de la ";
private static final String PLACE_CHAR_D = "d'";
private static final String PLACE_CHAR_DE = "de ";
private static final String PLACE_CHAR_DES = "des ";
private static final String PLACE_CHAR_DU = "du ";
private static final String PLACE_CHAR_LA = "la ";
private static final String PLACE_CHAR_LE = "le ";
private static final String PLACE_CHAR_LES = "les ";
private static final String PLACE_CHAR_L = "l'";

static final String PLACE_CHAR_DE_L = "de l'";
static final String PLACE_CHAR_DE_LA = "de la ";
static final String PLACE_CHAR_D = "d'";
static final String PLACE_CHAR_DE = "de ";
static final String PLACE_CHAR_DES = "des ";
static final String PLACE_CHAR_DU = "du ";
static final String PLACE_CHAR_LA = "la ";
static final String PLACE_CHAR_LE = "le ";
static final String PLACE_CHAR_LES = "les ";
static final String PLACE_CHAR_L = "l'";

@SuppressWarnings("DeprecatedIsStillUsed")
@Deprecated
private static final Pattern[] START_WITH_CHARS = new Pattern[]{ //
Pattern.compile("^(" + PLACE_CHAR_DE_L + ")", Pattern.CASE_INSENSITIVE), //
Pattern.compile("^(" + PLACE_CHAR_DE_LA + ")", Pattern.CASE_INSENSITIVE), //
Expand All @@ -125,6 +129,8 @@ public static String cleanLabel(@NotNull Locale locale, @NotNull String label) {
Pattern.compile("^(" + PLACE_CHAR_L + ")", Pattern.CASE_INSENSITIVE) //
};

@SuppressWarnings("unused")
@Deprecated
public static final Pattern[] SPACE_CHARS = new Pattern[]{ //
Pattern.compile("( " + PLACE_CHAR_DE_L + ")", Pattern.CASE_INSENSITIVE), //
Pattern.compile("( " + PLACE_CHAR_DE_LA + ")", Pattern.CASE_INSENSITIVE), //
Expand All @@ -138,6 +144,8 @@ public static String cleanLabel(@NotNull Locale locale, @NotNull String label) {
Pattern.compile("( " + PLACE_CHAR_L + ")", Pattern.CASE_INSENSITIVE) //
};

@SuppressWarnings("DeprecatedIsStillUsed")
@Deprecated
private static final Pattern[] SLASH_CHARS = new Pattern[]{//
Pattern.compile("(/ " + PLACE_CHAR_DE_L + ")", Pattern.CASE_INSENSITIVE), //
Pattern.compile("(/ " + PLACE_CHAR_DE_LA + ")", Pattern.CASE_INSENSITIVE), //
Expand All @@ -151,19 +159,24 @@ public static String cleanLabel(@NotNull Locale locale, @NotNull String label) {
Pattern.compile("(/ " + PLACE_CHAR_L + ")", Pattern.CASE_INSENSITIVE) //
};

private static final String PLACE_CHAR_ARRONDISSEMENT = "arrondissement ";
private static final String PLACE_CHAR_AV = "av ";
private static final String PLACE_CHAR_AVENUE = "avenue ";
private static final String PLACE_CHAR_BOUL = "boul ";
private static final String PLACE_CHAR_BOULEVARD = "boulevard ";
private static final String PLACE_CHAR_CH = "ch ";
private static final String PLACE_CHAR_CIVIQUE = "civique ";
private static final String PLACE_CHAR_CROISS = "croiss ";
private static final String PLACE_CHAR_QUARTIER = "quartier ";
private static final String PLACE_CHAR_RTE = "rte ";
private static final String PLACE_CHAR_RUE = "rue ";
private static final String PLACE_CHAR_TSSE = "tsse ";

public static final Regex ALL_CHARS_REGEX = CleanUtilsExtKt.makeALL_CHARS_REGEX();
public static final String ALL_CHARS_REGEX_REPLACEMENT = CleanUtilsExtKt.makeALL_CHARS_REGEX_REPLACEMENT();

static final String PLACE_CHAR_ARRONDISSEMENT = "arrondissement ";
static final String PLACE_CHAR_AV = "av ";
static final String PLACE_CHAR_AVENUE = "avenue ";
static final String PLACE_CHAR_BOUL = "boul ";
static final String PLACE_CHAR_BOULEVARD = "boulevard ";
static final String PLACE_CHAR_CH = "ch ";
static final String PLACE_CHAR_CIVIQUE = "civique ";
static final String PLACE_CHAR_CROISS = "croiss ";
static final String PLACE_CHAR_QUARTIER = "quartier ";
static final String PLACE_CHAR_RTE = "rte ";
static final String PLACE_CHAR_RUE = "rue ";
static final String PLACE_CHAR_TSSE = "tsse ";

@SuppressWarnings("DeprecatedIsStillUsed")
@Deprecated
private static final Pattern[] START_WITH_ST = new Pattern[]{ //
Pattern.compile("^(" + PLACE_CHAR_ARRONDISSEMENT + ")", Pattern.CASE_INSENSITIVE), //
Pattern.compile("^(" + PLACE_CHAR_AV + ")", Pattern.CASE_INSENSITIVE), //
Expand All @@ -179,6 +192,8 @@ public static String cleanLabel(@NotNull Locale locale, @NotNull String label) {
Pattern.compile("^(" + PLACE_CHAR_TSSE + ")", Pattern.CASE_INSENSITIVE) //
};

@SuppressWarnings("unused")
@Deprecated
public static final Pattern[] SPACE_ST = new Pattern[]{ //
Pattern.compile("( " + PLACE_CHAR_ARRONDISSEMENT + ")", Pattern.CASE_INSENSITIVE), //
Pattern.compile("( " + PLACE_CHAR_AV + ")", Pattern.CASE_INSENSITIVE), //
Expand All @@ -194,6 +209,8 @@ public static String cleanLabel(@NotNull Locale locale, @NotNull String label) {
Pattern.compile("( " + PLACE_CHAR_TSSE + ")", Pattern.CASE_INSENSITIVE) //
};

@SuppressWarnings("DeprecatedIsStillUsed")
@Deprecated
private static final Pattern[] SLASH_ST = new Pattern[]{ //
Pattern.compile("(/ " + PLACE_CHAR_ARRONDISSEMENT + ")", Pattern.CASE_INSENSITIVE), //
Pattern.compile("(/ " + PLACE_CHAR_AV + ")", Pattern.CASE_INSENSITIVE), //
Expand All @@ -209,6 +226,9 @@ public static String cleanLabel(@NotNull Locale locale, @NotNull String label) {
Pattern.compile("(/ " + PLACE_CHAR_TSSE + ")", Pattern.CASE_INSENSITIVE) //
};

public static final Regex ALL_ST_REGEX = CleanUtilsExtKt.makeALL_ST_REGEX();
public static final String ALL_ST_REGEX_REPLACEMENT = CleanUtilsExtKt.makeALL_ST_REGEX_REPLACEMENT();

@NotNull
public static Pattern cleanWord(@NotNull String word) {
return cleanWords(word);
Expand Down Expand Up @@ -355,9 +375,13 @@ public static String cleanLabelFR(@NotNull String label) {
label = CLEAN_PARENTHESIS2.matcher(label).replaceAll(CLEAN_PARENTHESIS2_REPLACEMENT);
label = SAINT.matcher(label).replaceAll(SAINT_REPLACEMENT);
label = removePointsI(label); // after capitalize
//noinspection deprecation
label = RegexUtils.replaceAllNN(label.trim(), START_WITH_ST, SPACE); // Constants.EMPTY); // SPACE);
//noinspection deprecation
label = RegexUtils.replaceAllNN(label, SLASH_ST, SLASH_SPACE);
//noinspection deprecation
label = RegexUtils.replaceAllNN(label.trim(), START_WITH_CHARS, SPACE); // , Constants.EMPTY); //
//noinspection deprecation
label = RegexUtils.replaceAllNN(label, SLASH_CHARS, SLASH_SPACE);
return cleanLabel(Locale.FRENCH, label);
}
Expand Down Expand Up @@ -595,6 +619,9 @@ private static boolean containsIgnoreCase(@Nullable String string, @NotNull Stri
return false;
}

public static final Regex ALL_FACE_A_REGEX = CleanUtilsExtKt.makeALL_FACE_A_REGEX();
public static final String ALL_FACE_A_REGEX_REPLACEMENT = CleanUtilsExtKt.makeALL_FACE_A_REGEX_REPLACEMENT();

// TODO white-space VS non-word?
private static final Pattern FIRST = cleanWords("first");
private static final String FIRST_REPLACEMENT = cleanWordsReplacement("1st");
Expand Down Expand Up @@ -960,6 +987,8 @@ public static String cleanStreetTypes(@NotNull String string) {
private static final String FR_CA_BOULEVARD_REPLACEMENT = cleanWordsReplacement("Boul");
private static final Pattern FR_CA_CARREFOUR = cleanWordsFR("carrefour");
private static final String FR_CA_CARREFOUR_REPLACEMENT = cleanWordsReplacement("Carref");
private static final Pattern FR_CA_CARRE = cleanWordsFR("carr[é|e]");
private static final String FR_CA_CARRE_REPLACEMENT = cleanWordsReplacement("Carr");
private static final Pattern FR_CA_MONTAGNE = cleanWordsFR("montagne");
private static final String FR_CA_MONTAGNE_REPLACEMENT = cleanWordsReplacement("Mgne");
private static final Pattern FR_CA_MONTEE = cleanWordsFR("mont[é|e]e");
Expand Down Expand Up @@ -1017,6 +1046,7 @@ public static String cleanStreetTypesFRCA(@NotNull String string) {
string = FR_CA_AUTOROUTE.matcher(string).replaceAll(FR_CA_AUTOROUTE_REPLACEMENT);
string = FR_CA_BOULEVARD.matcher(string).replaceAll(FR_CA_BOULEVARD_REPLACEMENT);
string = FR_CA_CARREFOUR.matcher(string).replaceAll(FR_CA_CARREFOUR_REPLACEMENT);
string = FR_CA_CARRE.matcher(string).replaceAll(FR_CA_CARRE_REPLACEMENT);
string = FR_CA_MONTAGNE.matcher(string).replaceAll(FR_CA_MONTAGNE_REPLACEMENT);
string = FR_CA_MONTEE.matcher(string).replaceAll(FR_CA_MONTEE_REPLACEMENT);
string = FR_CA_PARC_INDUSTRIEL.matcher(string).replaceAll(FR_CA_PARC_INDUSTRIEL_REPLACEMENT);
Expand Down
87 changes: 87 additions & 0 deletions src/main/java/org/mtransit/commons/CleanUtilsExt.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
@file:Suppress("FunctionName")

package org.mtransit.commons

import org.mtransit.commons.CleanUtils.PLACE_CHAR_ARRONDISSEMENT
import org.mtransit.commons.CleanUtils.PLACE_CHAR_AV
import org.mtransit.commons.CleanUtils.PLACE_CHAR_AVENUE
import org.mtransit.commons.CleanUtils.PLACE_CHAR_BOUL
import org.mtransit.commons.CleanUtils.PLACE_CHAR_BOULEVARD
import org.mtransit.commons.CleanUtils.PLACE_CHAR_CH
import org.mtransit.commons.CleanUtils.PLACE_CHAR_CIVIQUE
import org.mtransit.commons.CleanUtils.PLACE_CHAR_CROISS
import org.mtransit.commons.CleanUtils.PLACE_CHAR_D
import org.mtransit.commons.CleanUtils.PLACE_CHAR_DE
import org.mtransit.commons.CleanUtils.PLACE_CHAR_DES
import org.mtransit.commons.CleanUtils.PLACE_CHAR_DE_L
import org.mtransit.commons.CleanUtils.PLACE_CHAR_DE_LA
import org.mtransit.commons.CleanUtils.PLACE_CHAR_DU
import org.mtransit.commons.CleanUtils.PLACE_CHAR_L
import org.mtransit.commons.CleanUtils.PLACE_CHAR_LA
import org.mtransit.commons.CleanUtils.PLACE_CHAR_LE
import org.mtransit.commons.CleanUtils.PLACE_CHAR_LES
import org.mtransit.commons.CleanUtils.PLACE_CHAR_QUARTIER
import org.mtransit.commons.CleanUtils.PLACE_CHAR_RTE
import org.mtransit.commons.CleanUtils.PLACE_CHAR_RUE
import org.mtransit.commons.CleanUtils.PLACE_CHAR_TSSE

fun makeALL_ST_REGEX() =
buildString {
append("((^\\s*|/\\s*)(")
append(
listOf(
PLACE_CHAR_ARRONDISSEMENT,
PLACE_CHAR_AV,
PLACE_CHAR_AVENUE,
PLACE_CHAR_BOUL,
PLACE_CHAR_BOULEVARD,
PLACE_CHAR_CH,
PLACE_CHAR_CIVIQUE,
PLACE_CHAR_CROISS,
PLACE_CHAR_QUARTIER,
PLACE_CHAR_RTE,
PLACE_CHAR_RUE,
PLACE_CHAR_TSSE,
).joinToString("|")
)
append("))")
}.toRegex(setOf(RegexOption.IGNORE_CASE))

fun makeALL_ST_REGEX_REPLACEMENT() = "$2"

fun makeALL_CHARS_REGEX() =
buildString {
append("((^\\s*|/\\s*)(")
append(
listOf(
PLACE_CHAR_DE_L,
PLACE_CHAR_DE_LA,
PLACE_CHAR_D,
PLACE_CHAR_DE,
PLACE_CHAR_DES,
PLACE_CHAR_DU,
PLACE_CHAR_LA,
PLACE_CHAR_LE,
PLACE_CHAR_LES,
PLACE_CHAR_L,
).joinToString("|")
)
append("))")
}.toRegex(setOf(RegexOption.IGNORE_CASE))

fun makeALL_CHARS_REGEX_REPLACEMENT() = "$2"

fun makeALL_FACE_A_REGEX() =
buildString {
append("((^|\\s)(")
append(
listOf(
"face à ",
"face au ",
"face ",
).joinToString("|")
)
append("))")
}.toRegex(setOf(RegexOption.IGNORE_CASE))

fun makeALL_FACE_A_REGEX_REPLACEMENT() = "$2"
4 changes: 4 additions & 0 deletions src/main/java/org/mtransit/commons/StringsCleaner.kt
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ object StringsCleaner {
string = CleanUtils.CLEAN_ET.matcher(string).replaceAll(CleanUtils.CLEAN_ET_REPLACEMENT)
string = CleanUtils.SAINT.matcher(string).replaceAll(CleanUtils.SAINT_REPLACEMENT)
string = CleanUtils.cleanStreetTypesFRCA(string)
string = CleanUtils.removePointsI(string) // BEFORE next regexes
string = CleanUtils.ALL_FACE_A_REGEX.replace(string, CleanUtils.ALL_FACE_A_REGEX_REPLACEMENT)
string = CleanUtils.ALL_ST_REGEX.replace(string, CleanUtils.ALL_ST_REGEX_REPLACEMENT)
string = CleanUtils.ALL_CHARS_REGEX.replace(string, CleanUtils.ALL_CHARS_REGEX_REPLACEMENT)
}
}
languages?.forEach { language ->
Expand Down