diff --git a/document-similarity/document-similarity-logic/src/main/pig/document-similarity-s1-rank_filter.pig b/document-similarity/document-similarity-logic/src/main/pig/document-similarity-s1-rank_filter.pig index d6a7449c..9cd069bf 100644 --- a/document-similarity/document-similarity-logic/src/main/pig/document-similarity-s1-rank_filter.pig +++ b/document-similarity/document-similarity-logic/src/main/pig/document-similarity-s1-rank_filter.pig @@ -128,7 +128,7 @@ wc = foreach group_by_terms generate COUNT(terms) as count, group as term; wc_ranked = rank wc by count asc; store wc_ranked into '$outputPath$WORD_RANK_HR'; -term_condition_accepted_tmp = filter wc_ranked by ($0 <= (double)tc.val*$removal_rate and $0 >= $removal_least_used); +term_condition_accepted_tmp = filter wc_ranked by ($0 <= (double)tc.val*$removal_rate and $1 >= $removal_least_used); term_condition_accepted_tmp_joined_with_docs = join term_condition_accepted_tmp by term, doc_all by term; doc_selected_termsX = foreach term_condition_accepted_tmp_joined_with_docs generate doc_all::docId, doc_all::term; store doc_selected_termsX into '$outputPath$WORD_COUNT';