Skip to content

Commit 1da3d68

Browse files
committed
Strings cleaner improvements
1 parent 5129882 commit 1da3d68

4 files changed

Lines changed: 58 additions & 15 deletions

File tree

src/main/java/org/mtransit/commons/CharUtils.kt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ object CharUtils {
1414
@JvmStatic
1515
fun countUpperCase(charArray: CharArray?) = charArray?.count { it.isUpperCase() } ?: 0
1616

17+
@JvmStatic
18+
fun countLowerCase(string: String?) = countLowerCase(string?.toCharArray())
19+
20+
@JvmStatic
21+
fun countLowerCase(charArray: CharArray?) = charArray?.count { it.isLowerCase() } ?: 0
22+
1723
@JvmStatic
1824
fun isDigitsOnly(str: CharSequence, notEmpty: Boolean): Boolean {
1925
if (str.isEmpty()) {

src/main/java/org/mtransit/commons/CleanUtils.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -548,10 +548,17 @@ public static String fixMcXCase(@NotNull String string) { // Mccowan -> McCowan
548548
Pattern.CASE_INSENSITIVE | RegexUtils.fUNICODE_CHARACTER_CLASS() | RegexUtils.fCANON_EQ());
549549

550550
@NotNull
551-
public static String toLowerCaseUpperCaseWords(@NotNull Locale locale, @NotNull String string, @NotNull String... ignoreWords) {
552-
if (string.isEmpty()) {
553-
return string;
551+
public static String toLowerCaseUpperCaseStrings(@NotNull Locale locale, @NotNull String string) {
552+
if (string.isEmpty()) return string;
553+
if (CharUtils.isUppercaseOnly(string, true, true)) {
554+
return string.toLowerCase(locale);
554555
}
556+
return string;
557+
}
558+
559+
@NotNull
560+
public static String toLowerCaseUpperCaseWords(@NotNull Locale locale, @NotNull String string, @NotNull String... ignoreWords) {
561+
if (string.isEmpty()) return string;
555562
final float charCount = string.length();
556563
final float upperCaseCount = CharUtils.countUpperCase(string);
557564
final float percent = upperCaseCount / charCount;

src/main/java/org/mtransit/commons/StringsCleaner.kt

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,17 @@ import java.util.Locale
44

55
object StringsCleaner {
66

7+
@JvmOverloads
78
@JvmStatic
8-
fun cleanRouteLongName(originalRouteLongName: String, languages: List<Locale>?): String {
9+
fun cleanRouteLongName(
10+
originalRouteLongName: String,
11+
languages: List<Locale>?,
12+
lowerUCStrings: Boolean = false,
13+
lowerUCWords: Boolean = false,
14+
vararg ignoredUCWords: String = emptyArray(),
15+
): String {
916
var routeLongName = originalRouteLongName
10-
routeLongName = cleanString(routeLongName, languages, short = false)
17+
routeLongName = cleanString(routeLongName, languages, lowerUCStrings, lowerUCWords, *ignoredUCWords, short = false)
1118
return routeLongName
1219
}
1320

@@ -16,6 +23,9 @@ object StringsCleaner {
1623
fun cleanTripHeadsign(
1724
originalTripHeadsign: String,
1825
languages: List<Locale>?,
26+
lowerUCStrings: Boolean = false,
27+
lowerUCWords: Boolean = false,
28+
vararg ignoredUCWords: String = emptyArray(),
1929
removeVia: Boolean = false,
2030
): String {
2131
var tripHeadsign = originalTripHeadsign
@@ -29,19 +39,40 @@ object StringsCleaner {
2939
if (languages?.contains(Locale.FRENCH) == true) {
3040
tripHeadsign = CleanUtils.keepToFR(tripHeadsign)
3141
}
32-
tripHeadsign = cleanString(tripHeadsign, languages, short = true)
42+
tripHeadsign = cleanString(tripHeadsign, languages, lowerUCStrings, lowerUCWords, *ignoredUCWords, short = true)
3343
return tripHeadsign
3444
}
3545

46+
@JvmOverloads
3647
@JvmStatic
37-
fun cleanStopName(originalStopName: String, languages: List<Locale>?): String {
48+
fun cleanStopName(
49+
originalStopName: String,
50+
languages: List<Locale>?,
51+
lowerUCStrings: Boolean = false,
52+
lowerUCWords: Boolean = false,
53+
vararg ignoredUCWords: String = emptyArray(),
54+
): String {
3855
var stopName = originalStopName
39-
stopName = cleanString(stopName, languages, short = true)
56+
stopName = cleanString(stopName, languages, lowerUCStrings, lowerUCWords, *ignoredUCWords, short = true)
4057
return stopName
4158
}
4259

43-
private fun cleanString(originalString: String, languages: List<Locale>?, short: Boolean): String {
60+
private fun cleanString(
61+
originalString: String,
62+
languages: List<Locale>?,
63+
lowerUCStrings: Boolean = false,
64+
lowerUCWords: Boolean = false,
65+
vararg ignoredUCWords: String = emptyArray(),
66+
short: Boolean
67+
): String {
4468
var string = originalString
69+
languages?.forEach { language ->
70+
if (lowerUCWords) {
71+
string = CleanUtils.toLowerCaseUpperCaseWords(language, string, *ignoredUCWords)
72+
} else if (lowerUCStrings) {
73+
string = CleanUtils.toLowerCaseUpperCaseStrings(language, string)
74+
}
75+
}
4576
if (!short) {
4677
string = CleanUtils.cleanSlashes(string)
4778
}
@@ -68,4 +99,4 @@ object StringsCleaner {
6899
}
69100
return string
70101
}
71-
}
102+
}

src/main/java/org/mtransit/scratch/RegexScratch.kt

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,19 @@ internal object RegexScratch {
1414
var string: String
1515
string = ""
1616
println("string: '$string'.")
17-
println("-------")
17+
println("=======")
1818

1919
val matcher = pattern.matcher(string)
2020
while (matcher.find()) {
2121
println("-------")
22-
println("group: '" + matcher.group() + "'.")
23-
println("groupCount: '" + matcher.groupCount() + "'.")
22+
println("- Found group: '" + matcher.group() + "' (count: '" + matcher.groupCount() + "'):")
2423
for (g in 0..matcher.groupCount()) {
25-
println("group[" + g + "]: '" + matcher.group(g) + "'.")
24+
println(" - group[" + g + "]: '" + matcher.group(g) + "'.")
2625
}
2726
println("-------")
2827
}
2928

30-
println("-------")
29+
println("=======")
3130
var replaceAll: String
3231
replaceAll = ""
3332
println("replaceAll: '$replaceAll'.")

0 commit comments

Comments
 (0)