Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ rocm_configure(name = "local_config_rocm")

http_archive(
name = "com_google_sentencepiece",
build_file = "@//patches:sentencepiece.BUILD",
build_file = "@//third_party:sentencepiece.BUILD",
patch_args = ["-p1"],
patches = ["@//patches:com_google_sentencepiece.diff"],
sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754",
Expand All @@ -111,7 +111,7 @@ http_archive(

http_archive(
name = "darts_clone",
build_file = "@//patches:darts_clone.BUILD",
build_file = "@//third_party:darts_clone.BUILD",
patch_args = ["-p0"],
patches = ["//patches:darts_no_exceptions.diff"],
sha256 = "c97f55d05c98da6fcaf7f9ecc6a6dc6bc5b18b8564465f77abff8879d446491c",
Expand Down Expand Up @@ -161,6 +161,14 @@ http_archive(
urls = ["https://github.com/MediaTek-NeuroPilot/tflite-neuron-delegate/archive/refs/heads/update_for_leroy.zip"],
)

http_archive(
name = "oleander_stemming_library",
build_file = "@//third_party:oleander_stemming_library.BUILD",
sha256 = "d4390e82590d67c73ac32629ddd4fc3ba0b6b293a2757612a2e76726c3752e0b",
strip_prefix = "OleanderStemmingLibrary-45eb3485f67b94d67bb883601ed65459975b3960",
urls = ["https://github.com/Blake-Madden/OleanderStemmingLibrary/archive/45eb3485f67b94d67bb883601ed65459975b3960.zip"],
)

new_git_repository(
name = "org_mlperf_inference",
build_file = "@//flutter/android/third_party:loadgen.BUILD",
Expand Down
2 changes: 2 additions & 0 deletions flutter/cpp/datasets/ifeval_utils/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ cc_library(
name = "ifeval_utils",
hdrs = [
"common.h",
"irregular-plurals.h",
"json.h",
"types.h",
],
Expand All @@ -36,5 +37,6 @@ cc_library(
}),
deps = [
"@cld2",
"@oleander_stemming_library",
],
)
76 changes: 59 additions & 17 deletions flutter/cpp/datasets/ifeval_utils/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,14 @@ inline std::string tolower(std::string s) {
return s;
}

inline bool ends_with(const std::string& s, const std::string& suf) {
if (s.size() < suf.size()) return false;
std::string a = tolower(s.substr(s.size() - suf.size()));
std::string b = tolower(suf);
return a == b;
inline std::string to_lower_ascii(std::string s) {
for (char& c : s)
c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
return s;
}

inline bool is_word_char(unsigned char c) {
return std::isalnum(c) || c == '_';
}

inline bool contains_string(const std::string& text,
Expand All @@ -48,17 +51,25 @@ inline bool contains_string(const std::string& text,
return h.find(n) != std::string::npos;
}

inline bool ends_with(const std::string& s, const std::string& suf,
unsigned threshold) {
if (s.size() < suf.size()) return false;
std::string a = tolower(s.substr(s.size() - (suf.size() + threshold)));
std::string b = tolower(suf);
return threshold == 0 ? a == b : contains_string(a, b);
}

inline bool starts_with(const std::string& s, const std::string& prf,
unsigned threshold) {
if (s.size() < prf.size()) return false;
std::string a = tolower(s.substr(0, prf.size() + threshold));
std::string b = tolower(prf);
return threshold == 0 ? a == b : contains_string(a, b);
}

inline bool contains_word(const std::string& text, const std::string& word) {
if (word.empty()) return false;

auto to_lower_ascii = [](std::string s) {
for (char& c : s) c = std::tolower(static_cast<unsigned char>(c));
return s;
};
auto is_word_char = [](unsigned char c) {
return std::isalnum(c) || c == '_'; // match std::regex \b notion of "word"
};

std::string t = to_lower_ascii(text);
std::string w = to_lower_ascii(word);

Expand All @@ -83,6 +94,39 @@ inline bool contains_none(const std::string& text,
return true;
}

inline size_t find_containing_word(const std::string& text,
const std::string& keyword,
std::string& containing_word, size_t pos) {
if (keyword.empty() || pos >= text.size()) return std::string::npos;

std::string t = to_lower_ascii(text);
std::string k = to_lower_ascii(keyword);

if ((pos = t.find(k, pos)) == std::string::npos) return std::string::npos;

// Expand left to word boundary
size_t start = pos;
while (start > 0 && is_word_char(static_cast<unsigned char>(t[start - 1]))) {
--start;
}

// Expand right to word boundary
size_t end = pos + k.size();
while (end < t.size() && is_word_char(static_cast<unsigned char>(t[end]))) {
++end;
}

// Extract original (not lowercased) word
containing_word = text.substr(start, end - start);
return start;
}

inline size_t find_containing_word(const std::string& text,
const std::string& keyword,
std::string& out_word) {
return find_containing_word(text, keyword, out_word, 0);
}

inline std::string remove_font_modifiers(const std::string& s) {
std::string out;
out.reserve(s.size());
Expand Down Expand Up @@ -115,14 +159,12 @@ inline std::string remove_font_modifiers(const std::string& s) {

inline std::string remove_first_line(const std::string& s) {
std::size_t pos = s.find('\n');
return (pos == std::string::npos) ? std::string{} : s.substr(pos + 1);
// If there is no newline, removing the first line yields empty.
return (pos == std::string::npos) ? std::string(s) : s.substr(pos + 1);
}

inline std::string remove_last_line(const std::string& s) {
std::size_t pos = s.rfind('\n');
return (pos == std::string::npos) ? std::string{} : s.substr(0, pos);
// If there is no newline, removing the last line yields empty.
return (pos == std::string::npos) ? std::string(s) : s.substr(0, pos);
}

// Returns the 8 transformations as an array of strings.
Expand Down
Loading
Loading