From 0c6b7f1c2925ed79350279acce90041d92f7c027 Mon Sep 17 00:00:00 2001 From: kaldan007 Date: Sat, 22 Nov 2025 17:01:33 +0530 Subject: [PATCH] fix:tokenizing sentence by verb is failing. --- botok/tokenizers/sentencetokenizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/botok/tokenizers/sentencetokenizer.py b/botok/tokenizers/sentencetokenizer.py index 7ba9ed0..619cb82 100644 --- a/botok/tokenizers/sentencetokenizer.py +++ b/botok/tokenizers/sentencetokenizer.py @@ -107,9 +107,9 @@ def get_sentence_indices(tokens): sentence_idx = piped_sentencify(sentence_idx, tokens, is_verb_n_punct) # 4. find verbs followed by clause boundaries - sentence_idx = piped_sentencify( - sentence_idx, tokens, is_verb_n_clause_boundary, threshold=30 - ) # max size to check + # sentence_idx = piped_sentencify( + # sentence_idx, tokens, is_verb_n_clause_boundary, threshold=30 + # ) # max size to check # joining the sentences without verbs to either the one preceding them or following them sentence_idx = join_no_verb_sentences(sentence_idx, tokens)