Skip to content

Commit 6c12c4a

Browse files
committed
TEDEFO-4995 Implement count-duplicates and get-duplicates functions
1 parent e98544a commit 6c12c4a

4 files changed

Lines changed: 164 additions & 0 deletions

File tree

src/main/java/eu/europa/ted/efx/interfaces/ScriptGenerator.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,25 @@ public DurationExpression composeSubtraction(final DurationExpression left,
10361036
public <T extends SequenceExpression> T composeDistinctValuesFunction(
10371037
T list, Class<T> listType);
10381038

1039+
/**
1040+
* Returns the target language script that counts the number of duplicate values in a sequence.
1041+
*
1042+
* @param sequence The sequence to count duplicates in.
1043+
* @return A numeric expression representing the number of duplicate values.
1044+
*/
1045+
public NumericExpression composeCountDuplicatesFunction(SequenceExpression sequence);
1046+
1047+
/**
1048+
* Returns the target language script that extracts the duplicate values from a sequence.
1049+
*
1050+
* @param <T> The type of the sequence expression.
1051+
* @param list The sequence to extract duplicates from.
1052+
* @param listType The class of the sequence expression type.
1053+
* @return A sequence containing only the values that appear more than once.
1054+
*/
1055+
public <T extends SequenceExpression> T composeGetDuplicatesFunction(
1056+
T list, Class<T> listType);
1057+
10391058
/**
10401059
* Returns the target language script that computes the union of two sequences
10411060
* (all values from both, with duplicates removed).

src/main/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2.java

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2816,6 +2816,42 @@ public void exitCountDurationsFunction(CountDurationsFunctionContext ctx) {
28162816
this.stack.push(this.script.composeCountOperation(this.stack.pop(DurationSequenceExpression.class)));
28172817
}
28182818

2819+
@Override
2820+
public void exitLateBoundCountDuplicatesFunction(LateBoundCountDuplicatesFunctionContext ctx) {
2821+
this.stack.push(this.script.composeCountDuplicatesFunction(
2822+
this.stack.pop(this.resolveSequenceType(this.stack.peekType()))));
2823+
}
2824+
2825+
@Override
2826+
public void exitCountDuplicatesStringsFunction(CountDuplicatesStringsFunctionContext ctx) {
2827+
this.stack.push(this.script.composeCountDuplicatesFunction(this.stack.pop(StringSequenceExpression.class)));
2828+
}
2829+
2830+
@Override
2831+
public void exitCountDuplicatesBooleansFunction(CountDuplicatesBooleansFunctionContext ctx) {
2832+
this.stack.push(this.script.composeCountDuplicatesFunction(this.stack.pop(BooleanSequenceExpression.class)));
2833+
}
2834+
2835+
@Override
2836+
public void exitCountDuplicatesNumbersFunction(CountDuplicatesNumbersFunctionContext ctx) {
2837+
this.stack.push(this.script.composeCountDuplicatesFunction(this.stack.pop(NumericSequenceExpression.class)));
2838+
}
2839+
2840+
@Override
2841+
public void exitCountDuplicatesDatesFunction(CountDuplicatesDatesFunctionContext ctx) {
2842+
this.stack.push(this.script.composeCountDuplicatesFunction(this.stack.pop(DateSequenceExpression.class)));
2843+
}
2844+
2845+
@Override
2846+
public void exitCountDuplicatesTimesFunction(CountDuplicatesTimesFunctionContext ctx) {
2847+
this.stack.push(this.script.composeCountDuplicatesFunction(this.stack.pop(TimeSequenceExpression.class)));
2848+
}
2849+
2850+
@Override
2851+
public void exitCountDuplicatesDurationsFunction(CountDuplicatesDurationsFunctionContext ctx) {
2852+
this.stack.push(this.script.composeCountDuplicatesFunction(this.stack.pop(DurationSequenceExpression.class)));
2853+
}
2854+
28192855
@Override
28202856
public void exitNumberFromStringFunction(NumberFromStringFunctionContext ctx) {
28212857
this.stack.push(this.script.composeToNumberConversion(this.stack.pop(StringExpression.class)));
@@ -3326,6 +3362,50 @@ private <T extends SequenceExpression> void exitDistinctValuesFunction(Class<T>
33263362

33273363
// #endregion Distinct-values ------------------------------------------------
33283364

3365+
// #region Get-duplicates ---------------------------------------------------
3366+
3367+
@Override
3368+
public void exitStringGetDuplicatesFunction(StringGetDuplicatesFunctionContext ctx) {
3369+
this.exitGetDuplicatesFunction(StringSequenceExpression.class);
3370+
}
3371+
3372+
@Override
3373+
public void exitBooleanGetDuplicatesFunction(BooleanGetDuplicatesFunctionContext ctx) {
3374+
this.exitGetDuplicatesFunction(BooleanSequenceExpression.class);
3375+
}
3376+
3377+
@Override
3378+
public void exitNumericGetDuplicatesFunction(NumericGetDuplicatesFunctionContext ctx) {
3379+
this.exitGetDuplicatesFunction(NumericSequenceExpression.class);
3380+
}
3381+
3382+
@Override
3383+
public void exitDateGetDuplicatesFunction(DateGetDuplicatesFunctionContext ctx) {
3384+
this.exitGetDuplicatesFunction(DateSequenceExpression.class);
3385+
}
3386+
3387+
@Override
3388+
public void exitTimeGetDuplicatesFunction(TimeGetDuplicatesFunctionContext ctx) {
3389+
this.exitGetDuplicatesFunction(TimeSequenceExpression.class);
3390+
}
3391+
3392+
@Override
3393+
public void exitDurationGetDuplicatesFunction(DurationGetDuplicatesFunctionContext ctx) {
3394+
this.exitGetDuplicatesFunction(DurationSequenceExpression.class);
3395+
}
3396+
3397+
@Override
3398+
public void exitLateBoundGetDuplicatesFunction(LateBoundGetDuplicatesFunctionContext ctx) {
3399+
this.exitGetDuplicatesFunction(this.resolveSequenceType(this.stack.peekType()));
3400+
}
3401+
3402+
private <T extends SequenceExpression> void exitGetDuplicatesFunction(final Class<T> listType) {
3403+
final T list = this.stack.pop(listType);
3404+
this.stack.push(this.script.composeGetDuplicatesFunction(list, listType));
3405+
}
3406+
3407+
// #endregion Get-duplicates ------------------------------------------------
3408+
33293409
// #region Union ------------------------------------------------------------
33303410

33313411
@Override

src/main/java/eu/europa/ted/efx/xpath/XPathScriptGenerator.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,12 @@ public BooleanExpression composeIsDistinctCondition(SequenceExpression sequence)
458458

459459
//#region Numeric functions -------------------------------------------------
460460

461+
@Override
462+
public NumericExpression composeCountDuplicatesFunction(final SequenceExpression sequence) {
463+
return new NumericExpression(
464+
"count(" + sequence.getScript() + ") - count(distinct-values(" + sequence.getScript() + "))");
465+
}
466+
461467
@Override
462468
public NumericExpression composeCountOperation(SequenceExpression list) {
463469
return new NumericExpression("count(" + list.getScript() + ")");
@@ -869,6 +875,15 @@ public <T extends SequenceExpression> T composeDistinctValuesFunction(
869875
return Expression.instantiate("distinct-values(" + list.getScript() + ")", listType);
870876
}
871877

878+
@Override
879+
public <T extends SequenceExpression> T composeGetDuplicatesFunction(
880+
final T list, final Class<T> listType) {
881+
final String seq = list.getScript();
882+
return Expression.instantiate(
883+
"for $v in distinct-values(" + seq + ") return if (count(" + seq + "[. = $v]) > 1) then $v else ()",
884+
listType);
885+
}
886+
872887
@Override
873888
public <T extends SequenceExpression> T composeUnionFunction(T listOne,
874889
T listTwo, Class<T> listType) {

src/test/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2Test.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2599,6 +2599,56 @@ void testDistinctValuesFunction_WithFieldReferences() {
25992599
"distinct-values(BT-00-Text)");
26002600
}
26012601

2602+
// #region: Count-duplicates
2603+
2604+
@Test
2605+
void testCountDuplicatesFunction_WithStringSequences() {
2606+
testExpressionTranslationWithContext(
2607+
"count(('one','two','one')) - count(distinct-values(('one','two','one')))", "ND-Root",
2608+
"count-duplicates(['one', 'two', 'one'])");
2609+
}
2610+
2611+
@Test
2612+
void testCountDuplicatesFunction_WithNumberSequences() {
2613+
testExpressionTranslationWithContext(
2614+
"count((1,2,3,2,3,4)) - count(distinct-values((1,2,3,2,3,4)))", "ND-Root",
2615+
"count-duplicates([1, 2, 3, 2, 3, 4])");
2616+
}
2617+
2618+
@Test
2619+
void testCountDuplicatesFunction_WithFieldReferences() {
2620+
testExpressionTranslationWithContext(
2621+
"count(PathNode/TextField/normalize-space(text())) - count(distinct-values(PathNode/TextField/normalize-space(text())))",
2622+
"ND-Root", "count-duplicates(BT-00-Text)");
2623+
}
2624+
2625+
// #endregion: Count-duplicates
2626+
2627+
// #region: Get-duplicates
2628+
2629+
@Test
2630+
void testGetDuplicatesFunction_WithStringSequences() {
2631+
testExpressionTranslationWithContext(
2632+
"for $v in distinct-values(('one','two','one')) return if (count(('one','two','one')[. = $v]) > 1) then $v else ()",
2633+
"ND-Root", "get-duplicates(['one', 'two', 'one'])");
2634+
}
2635+
2636+
@Test
2637+
void testGetDuplicatesFunction_WithNumberSequences() {
2638+
testExpressionTranslationWithContext(
2639+
"for $v in distinct-values((1,2,3,2,3,4)) return if (count((1,2,3,2,3,4)[. = $v]) > 1) then $v else ()",
2640+
"ND-Root", "get-duplicates([1, 2, 3, 2, 3, 4])");
2641+
}
2642+
2643+
@Test
2644+
void testGetDuplicatesFunction_WithFieldReferences() {
2645+
testExpressionTranslationWithContext(
2646+
"for $v in distinct-values(PathNode/TextField/normalize-space(text())) return if (count(PathNode/TextField/normalize-space(text())[. = $v]) > 1) then $v else ()",
2647+
"ND-Root", "get-duplicates(BT-00-Text)");
2648+
}
2649+
2650+
// #endregion: Get-duplicates
2651+
26022652
// #region: Union
26032653

26042654
@Test

0 commit comments

Comments
 (0)