From ceda9f905d2f8d34c6d88d104b257a703521b89a Mon Sep 17 00:00:00 2001 From: Brian White Date: Mon, 17 Sep 2012 18:05:17 -0400 Subject: [PATCH 1/4] Simple lastIndexOf --- buffertools.cc | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/buffertools.cc b/buffertools.cc index ddfbf88..bc6b9d4 100644 --- a/buffertools.cc +++ b/buffertools.cc @@ -177,6 +177,34 @@ struct IndexOfAction: BinaryAction { } }; +struct LastIndexOfAction: BinaryAction { + Handle apply(Handle& buffer, const uint8_t* data2, size_t size2, const Arguments& args, HandleScope& scope) { + const uint8_t* data = (const uint8_t*) Buffer::Data(buffer); + const size_t size = Buffer::Length(buffer); + + int32_t start = args[1]->Int32Value(); + + if (start < 0) + start = size - std::min(size, -start); + else if (static_cast(start) > size) + start = size; + + const uint8_t* p; + const uint8_t* prev; + while (true) { + p = boyermoore_search(data + start, size - start, data2, size2); + if (p) { + prev = p; + start = (prev - data) + 1; + } else + break; + } + + const ptrdiff_t offset = prev ? (prev - data) : -1; + return scope.Close(Integer::New(offset)); + } +}; + static char toHexTable[] = "0123456789abcdef"; // CHECKME is this cache efficient? @@ -276,6 +304,10 @@ Handle IndexOf(const Arguments& args) { return IndexOfAction()(args); } +Handle LastIndexOf(const Arguments& args) { + return LastIndexOfAction()(args); +} + Handle FromHex(const Arguments& args) { return FromHexAction()(args); } @@ -341,6 +373,7 @@ void RegisterModule(Handle target) { target->Set(String::NewSymbol("equals"), FunctionTemplate::New(Equals)->GetFunction()); target->Set(String::NewSymbol("compare"), FunctionTemplate::New(Compare)->GetFunction()); target->Set(String::NewSymbol("indexOf"), FunctionTemplate::New(IndexOf)->GetFunction()); + target->Set(String::NewSymbol("lastIndexOf"), FunctionTemplate::New(LastIndexOf)->GetFunction()); target->Set(String::NewSymbol("fromHex"), FunctionTemplate::New(FromHex)->GetFunction()); target->Set(String::NewSymbol("toHex"), FunctionTemplate::New(ToHex)->GetFunction()); } From 2a9817c35993892b48c9da93aa6a0027c1953d6f Mon Sep 17 00:00:00 2001 From: Brian White Date: Mon, 17 Sep 2012 21:04:25 -0400 Subject: [PATCH 2/4] Use reverse Boyer-Moore search for lastIndexOf --- BoyerMoore.h | 72 ++++++++++++++++++++++++++++++++++---------------- buffertools.cc | 16 +++-------- 2 files changed, 53 insertions(+), 35 deletions(-) diff --git a/BoyerMoore.h b/BoyerMoore.h index 830f4eb..074edaf 100644 --- a/BoyerMoore.h +++ b/BoyerMoore.h @@ -73,7 +73,7 @@ void prepare_goodsuffix_heuristic(const uint8_t *normal, const size_t size, int /* * Boyer-Moore search algorithm */ -const uint8_t *boyermoore_search(const uint8_t *haystack, size_t haystack_len, const uint8_t *needle, size_t needle_len) { +const uint8_t *boyermoore_search(const uint8_t *haystack, size_t haystack_len, const uint8_t *needle, size_t needle_len, bool reverse = false) { /* * Simple checks */ @@ -96,28 +96,54 @@ const uint8_t *boyermoore_search(const uint8_t *haystack, size_t haystack_len, c /* * Boyer-Moore search */ - size_t s = 0; - while(s <= (haystack_len - needle_len)) - { - size_t j = needle_len; - while(j > 0 && needle[j-1] == haystack[s+j-1]) - j--; - - if(j > 0) - { - int k = badcharacter[haystack[s+j-1]]; - int m; - if(k < (int)j && (m = j-k-1) > goodsuffix[j]) - s+= m; - else - s+= goodsuffix[j]; - } - else - { - delete[] goodsuffix; - return haystack + s; - } - } + size_t len = haystack_len - needle_len; + if (!reverse) { + size_t s = 0; + while(s <= len) + { + size_t j = needle_len; + while(j > 0 && needle[j-1] == haystack[s+j-1]) + j--; + + if(j > 0) + { + int k = badcharacter[haystack[s+j-1]]; + int m; + if(k < (int)j && (m = j-k-1) > goodsuffix[j]) + s+= m; + else + s+= goodsuffix[j]; + } + else + { + delete[] goodsuffix; + return haystack + s; + } + } + } else { + ptrdiff_t s = haystack_len; + while(s >= 0) + { + size_t j = needle_len; + while(j > 0 && needle[j-1] == haystack[s+(j-needle_len)-(s == haystack_len ? 1 : 0)]) + --j; + + if(j > 0) + { + int k = badcharacter[haystack[s+(j-needle_len)-1]]; + int m; + if(k < (int)j && (m = j-k-1) > goodsuffix[j]) + s-= m; + else + s-= goodsuffix[j]; + } + else + { + delete[] goodsuffix; + return haystack + (s - needle_len) + (s == haystack_len ? 0 : 1); + } + } + } delete[] goodsuffix; /* not found */ diff --git a/buffertools.cc b/buffertools.cc index bc6b9d4..3580cea 100644 --- a/buffertools.cc +++ b/buffertools.cc @@ -189,18 +189,10 @@ struct LastIndexOfAction: BinaryAction { else if (static_cast(start) > size) start = size; - const uint8_t* p; - const uint8_t* prev; - while (true) { - p = boyermoore_search(data + start, size - start, data2, size2); - if (p) { - prev = p; - start = (prev - data) + 1; - } else - break; - } - - const ptrdiff_t offset = prev ? (prev - data) : -1; + const uint8_t* p = boyermoore_search( + data, (start == 0 ? start : size - start), data2, size2, true); + + const ptrdiff_t offset = p ? (p - data) : -1; return scope.Close(Integer::New(offset)); } }; From 6b6354b536698db3b79bba84b9762f32ac4fa29e Mon Sep 17 00:00:00 2001 From: Brian White Date: Mon, 17 Sep 2012 21:10:54 -0400 Subject: [PATCH 3/4] Distinguish between 0 and undefined for "start index" to more closely align with str.lastIndexOf behavior --- buffertools.cc | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/buffertools.cc b/buffertools.cc index 3580cea..061c243 100644 --- a/buffertools.cc +++ b/buffertools.cc @@ -181,16 +181,24 @@ struct LastIndexOfAction: BinaryAction { Handle apply(Handle& buffer, const uint8_t* data2, size_t size2, const Arguments& args, HandleScope& scope) { const uint8_t* data = (const uint8_t*) Buffer::Data(buffer); const size_t size = Buffer::Length(buffer); + size_t len = 0; + int32_t start; - int32_t start = args[1]->Int32Value(); + if (args[1]->IsInt32()) { + start = args[1]->Int32Value(); - if (start < 0) - start = size - std::min(size, -start); - else if (static_cast(start) > size) - start = size; + if (start < 0) + start = size - std::min(size, -start); + else if (static_cast(start) > size) + start = size; + + if (start > 0) + len = size - start; + } else + len = size; const uint8_t* p = boyermoore_search( - data, (start == 0 ? start : size - start), data2, size2, true); + data, len, data2, size2, true); const ptrdiff_t offset = p ? (p - data) : -1; return scope.Close(Integer::New(offset)); From 93592f5533499044dbf451eff19087ac92c303c5 Mon Sep 17 00:00:00 2001 From: Brian White Date: Tue, 18 Sep 2012 15:56:49 -0400 Subject: [PATCH 4/4] Fix missing offset adjustments --- BoyerMoore.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/BoyerMoore.h b/BoyerMoore.h index 074edaf..0e16902 100644 --- a/BoyerMoore.h +++ b/BoyerMoore.h @@ -130,10 +130,10 @@ const uint8_t *boyermoore_search(const uint8_t *haystack, size_t haystack_len, c if(j > 0) { - int k = badcharacter[haystack[s+(j-needle_len)-1]]; + int k = badcharacter[haystack[s+(j-needle_len)-(s == haystack_len ? 1 : 0)]]; int m; if(k < (int)j && (m = j-k-1) > goodsuffix[j]) - s-= m; + s-= m - (s == haystack_len ? 1 : 0); else s-= goodsuffix[j]; }