diff --git a/lib/pericope.rb b/lib/pericope.rb index 50368cb..4e63ca3 100644 --- a/lib/pericope.rb +++ b/lib/pericope.rb @@ -245,12 +245,14 @@ def set_book(value) def parse_reference(reference) reference = normalize_reference(reference) - (reference.nil? || reference.empty?) ? [] : parse_ranges(reference.split(/[,;]/)) + (reference.nil? || reference.empty?) ? [] : parse_ranges(reference.split(/[,;]/).delete_if{|s| s.length==0}) end def normalize_reference(reference) - [ [%r{(\d+)[".](\d+)},'\1:\2'], # 12"5 and 12.5 -> 12:5 - [%r{(–|—)},'-'], # convert em dash and en dash to - + [ [%r{[".]},':'], # 12"5 and 12.5 -> 12:5 + [%r{:\s*\(},':'], # replace any ( after a : with a : only + [%r{(\(|\))},','], # replace any remaining () with a , + [%r{(–|—)},'-'], # convert em dash and en dash to - [%r{[^0-9,:;\-–—]},''] # remove everything but [0-9,;:-] ].each { |pattern, replacement| reference.gsub!(pattern, replacement) } reference @@ -348,8 +350,28 @@ def self.match_one(text) nil end - - + def self.get_unmatched_ending(match) + i = 0 + str = match.to_s + stack = [] + + str.each_char { |c| + if c == '(' + stack << c + elsif c == ')' + if stack.size() > 0 + stack.pop() + else + return str[i..str.length-1] + end + end + i = i + 1 + } + + return "" + end + + # matches all valid Bible references in the supplied string # ! will not necessarily return references in order ! def self.match_all(text, &block) @@ -359,8 +381,17 @@ def self.match_all(text, &block) for book_regex in book_name_regexes rx = book_regex[1] while (match = unmatched.match rx) # find all occurrences of pericopes in this book - length = match.end(0) - match.begin(0) - + + # calculate the unnecessary parens at the end of the statement + unmatchedEnding = Pericope.get_unmatched_ending(match) + length = match.end(0) - match.begin(0) - unmatchedEnding.length + lengthFromBegin = match.end(0) - unmatchedEnding.length + + # recalculate the matchdata based on the shortened expression + if unmatchedEnding.length > 0 + match = unmatched[0..lengthFromBegin - 1].match(rx) + end + # after matching "2 Peter" don't match "Peter" again as "1 Peter" # but keep the same number of characters in the string so indices work unmatched = match.pre_match + ("*" * length) + match.post_match @@ -375,13 +406,15 @@ def self.match_all(text, &block) block_given? ? text : matches end - - def parse_ranges(ranges) + return if ranges == nil + recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8 recent_chapter = 1 if !self.book_has_chapters? ranges.map do |range| + range = range.split('-') # parse the low end of a verse range and the high end separately + range << range[0] if (range.length < 2) # treat 12:4 as 12:4-12:4 lower_chapter_verse = range[0].split(':').map {|n| n.to_i} # parse "3:28" to [3,28] upper_chapter_verse = range[1].split(':').map {|n| n.to_i} # parse "3:28" to [3,28] @@ -409,6 +442,7 @@ def parse_ranges(ranges) Range.new( Pericope.get_id(book, lower_chapter_verse[0], lower_chapter_verse[1]), Pericope.get_id(book, upper_chapter_verse[0], upper_chapter_verse[1])) + end end @@ -528,7 +562,8 @@ def self.book_chapter_counts end ValidReference = begin - reference = '((\s*\d{1,3})(\s*[:\"\.]\s*\d{1,3}(a|b)?(\s*(,|;)\s*(\d{1,3}[:\"\.])?\s*\d{1,3}(a|b)?)*)?(\s*(-|–|—)\s*(\d{1,3}\s*[:\"\.])?(\d{1,3}(a|b)?)(\s*(,|;)\s*(\d{1,3}\s*[:\"\.])?\s*\d{1,3}(a|b)?)*)*)' + #note: this regular expression will include "optional" verses enclosed in parentheses by default + reference = '(\(?(\s*\d{1,3})(\s*[:\"\.]\s*\(?\s*\d{1,3}(a|b)?(\s*\))?(\s*(,|;| )\s*(\d{1,3}[:\"\.])?\s*\(?\s*\(?\s*\d{1,3}(a|b)?(\s*\))?)*)?(\s*(-|–|—)\s*(\s*\(?\s*\d{1,3}\s*[:\"\.])?(\d{1,3}(a|b)?)(\s*\))?(\s*(,|;| )\s*\(?\s*(\d{1,3}\s*[:\"\.])?\s*\(?\d{1,3}(a|b)?(\s*\))?)*)*)' end diff --git a/lib/pericope/version.rb b/lib/pericope/version.rb index 07f4fd3..d2a9cf4 100644 --- a/lib/pericope/version.rb +++ b/lib/pericope/version.rb @@ -1,3 +1,3 @@ class Pericope - VERSION = "0.5.3" unless defined?(::Pericope::Version) + VERSION = "0.5.4" unless defined?(::Pericope::Version) end diff --git a/test/pericope_test.rb b/test/pericope_test.rb index 10edc69..21f1d09 100644 --- a/test/pericope_test.rb +++ b/test/pericope_test.rb @@ -92,6 +92,17 @@ class PericopeTest < ActiveSupport::TestCase ["phil 1:1-17,2:3-5,17"] => "Philippians 1:1-17, 2:3-5, 17", # test comma-separated ranges # test the values embedded in the pericope extraction + ["Leviticus (18:1–5) 19:9–18"] => "Leviticus 18:1-5, 19:9-18", + ["Matt 1:1-2, 2:1-10"] => "Matthew 1:1-2, 2:1-10", + ["Matt 1:1-2, (1-10)"] => "Matthew 1:1-2, 1-10", + ["Matt 1:1-2, (2:1-10)"] => "Matthew 1:1-2, 2:1-10", + ["Matt 1:1-2 (2:1-10)"] => "Matthew 1:1-2, 2:1-10", + ["Matt 1:1-2 (2:1-10)"] => "Matthew 1:1-2, 2:1-10", + ["Matt 1:(1-10) 5:1-12"] => "Matthew 1:1-10, 5:1-12", + ["Matt 1\"(1-10) 5:(1-12)"] => "Matthew 1:1-10, 5:1-12", + ["Matt 1\" (1-10) 5:(1-12)"] => "Matthew 1:1-10, 5:1-12", + ["Mark 2:23-28 (3:1-6"] => "Mark 2:23-28, 3:1-6", + ["Psalm 29 (2)"] => "Psalm 29", ["Psalm 37:3–7a, 23–24, 39–40"] => "Psalm 37:3-7, 23-24, 39-40", ["John 20:19–23"] => "John 20:19-23", ["2 Peter 4.1 "] => "2 Peter 3:1", # nb: chapter coercion