From da559a01f9361fb4160d5f976c10c557b59acbc6 Mon Sep 17 00:00:00 2001 From: Marek Horst Date: Tue, 4 Dec 2018 17:11:46 +0100 Subject: [PATCH] Closes #430: removal_least_used parameter is improperly used in document-similarity-s1-rank_filter --- .../src/main/pig/document-similarity-s1-rank_filter.pig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/document-similarity/document-similarity-logic/src/main/pig/document-similarity-s1-rank_filter.pig b/document-similarity/document-similarity-logic/src/main/pig/document-similarity-s1-rank_filter.pig index d6a7449c..9cd069bf 100644 --- a/document-similarity/document-similarity-logic/src/main/pig/document-similarity-s1-rank_filter.pig +++ b/document-similarity/document-similarity-logic/src/main/pig/document-similarity-s1-rank_filter.pig @@ -128,7 +128,7 @@ wc = foreach group_by_terms generate COUNT(terms) as count, group as term; wc_ranked = rank wc by count asc; store wc_ranked into '$outputPath$WORD_RANK_HR'; -term_condition_accepted_tmp = filter wc_ranked by ($0 <= (double)tc.val*$removal_rate and $0 >= $removal_least_used); +term_condition_accepted_tmp = filter wc_ranked by ($0 <= (double)tc.val*$removal_rate and $1 >= $removal_least_used); term_condition_accepted_tmp_joined_with_docs = join term_condition_accepted_tmp by term, doc_all by term; doc_selected_termsX = foreach term_condition_accepted_tmp_joined_with_docs generate doc_all::docId, doc_all::term; store doc_selected_termsX into '$outputPath$WORD_COUNT';