diff --git a/lib/marcel/magic.rb b/lib/marcel/magic.rb index 9d4ea86..0a6f3cc 100644 --- a/lib/marcel/magic.rb +++ b/lib/marcel/magic.rb @@ -123,34 +123,33 @@ def self.magic_match(io, method) def self.magic_match_io(io, matches, buffer) matches.any? do |offset, value, children| - match = - if value - if value.is_a?(Regexp) - match_regex(io, offset, value, buffer) - elsif Range === offset - io_seek(io, offset.begin, buffer) - x = io.read(offset.end - offset.begin + value.bytesize, buffer) - x && x.include?(value) - else - io_seek(io, offset, buffer) - io.read(value.bytesize, buffer) == value - end + match = if value + is_range = Range === offset + is_regexp = Regexp === value + sample_size = is_regexp ? 256 : value.bytesize + + x = if is_range + io_seek(io, offset.begin, buffer) + io.read(offset.end - offset.begin + sample_size, buffer) + else + io_seek(io, offset, buffer) + io.read(sample_size, buffer) end + if is_regexp + x&.match?(value) + elsif is_range + x&.include?(value) + else + x == value + end + end + io.rewind match && (!children || magic_match_io(io, children, buffer)) end end - def self.match_regex(io, offset, regexp, buffer) - start = offset.is_a?(Range) ? offset.begin : offset - io.read(start, buffer) if start > 0 - data = io.read(256, buffer) - return false unless data - - data.match?(regexp) - end - def self.io_seek(io, offset, buffer) return if offset == 0 @@ -162,6 +161,6 @@ def self.io_seek(io, offset, buffer) end end - private_class_method :magic_match, :magic_match_io, :match_regex, :io_seek + private_class_method :magic_match, :magic_match_io, :io_seek end end diff --git a/test/magic_test.rb b/test/magic_test.rb index 6950923..a0a6e5c 100644 --- a/test/magic_test.rb +++ b/test/magic_test.rb @@ -50,7 +50,7 @@ class Marcel::MimeType::MagicTest < Marcel::TestCase test "none of the regex patterns should match random test data" do ignore_list = %w( application/x-dbf ) - + extract_regexes = lambda do |matching_rules, collected = []| matching_rules.each do |offset, value, children| collected << [offset, value] if value.is_a?(Regexp) @@ -58,22 +58,17 @@ class Marcel::MimeType::MagicTest < Marcel::TestCase end collected end - + # Use a test string that's very unlikely to match any file format regex # Using only high Unicode characters and very specific patterns test_data = "🇨🇭 \xFF\xFE\x03\x05\x06🧀 cheese\x06\x07\x03" - + Marcel::MAGIC.each do |type, matching_rules| next if ignore_list.include?(type) regexes = extract_regexes.call(matching_rules) - - regexes.each do |offset, regex| - buffer = (+"").encode(Encoding::BINARY) - - result = Marcel::Magic.send(:match_regex, StringIO.new(test_data), offset, regex, buffer) - - assert_equal false, result, "Test data unexpectedly matched a file format regexp (#{type}, #{regex.inspect})" - end + + result = Marcel::Magic.send(:magic_match_io, StringIO.new(test_data), regexes, "".b) + assert_equal false, result, "Test data unexpectedly matched a file format regexp (#{type}, #{regexes.inspect})" end end