Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions keyvi/bin/keyvi_c/c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,19 @@

#include "keyvi/c_api/c_api.h"

#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <exception>
#include <iostream>
#include <string>
#include <utility>

#include "keyvi/dictionary/completion/multiword_completion.h"
#include "keyvi/dictionary/completion/prefix_completion.h"
#include "keyvi/dictionary/dictionary.h"
#include "keyvi/dictionary/match.h"
#include "keyvi/dictionary/match_iterator.h"

using keyvi::dictionary::Dictionary;
using keyvi::dictionary::dictionary_t;
Expand All @@ -41,7 +48,7 @@ using keyvi::dictionary::completion::PrefixCompletion;
namespace {
char* std_2_c_string(const std::string& str) {
const size_t c_str_length = str.size() + 1;
auto result = static_cast<char*>(malloc(c_str_length));
auto* result = static_cast<char*>(malloc(c_str_length));
strncpy(result, str.c_str(), c_str_length);
return result;
}
Expand All @@ -54,7 +61,7 @@ struct keyvi_dictionary {
};

struct keyvi_match {
explicit keyvi_match(const match_t& obj) : obj_(obj) {}
explicit keyvi_match(match_t obj) : obj_(std::move(obj)) {}

match_t obj_;
};
Expand Down Expand Up @@ -91,7 +98,7 @@ keyvi_dictionary* keyvi_create_dictionary(const char* filename) {
try {
return new keyvi_dictionary(Dictionary(filename));
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
std::cerr << e.what() << '\n';
return nullptr;
}
}
Expand Down Expand Up @@ -129,7 +136,7 @@ keyvi_match_iterator* keyvi_dictionary_get_fuzzy(const keyvi_dictionary* dict, c

keyvi_match_iterator* keyvi_dictionary_get_multi_word_completions(const keyvi_dictionary* dict, const char* key,
const size_t key_len, const size_t cutoff) {
MultiWordCompletion multiWordCompletion(dict->obj_);
MultiWordCompletion const multiWordCompletion(dict->obj_);
return new keyvi_match_iterator(multiWordCompletion.GetCompletions(std::string(key, key_len), cutoff));
}

Expand Down Expand Up @@ -166,7 +173,7 @@ keyvi_bytes keyvi_match_get_msgpacked_value(const struct keyvi_match* match) {
if (0 == data_size) {
return empty_keyvi_bytes;
}
auto data_ptr = malloc(data_size);
auto* data_ptr = malloc(data_size);
if (nullptr == data_ptr) {
return empty_keyvi_bytes;
}
Expand Down
115 changes: 69 additions & 46 deletions keyvi/bin/keyvicompiler/keyvicompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,44 +23,61 @@
* Author: hendrik
*/

#include <cstddef>
#include <cstdint>
#include <exception>
#include <fstream>
#include <functional>

#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/join.hpp>
#include <boost/filesystem.hpp>
#include <ios>
#include <iostream>
#include <ostream>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>

#include <boost/algorithm/string/split.hpp>
#include <boost/filesystem/directory.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/program_options.hpp>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/iterator_range.hpp>
#include <boost/lexical_cast/bad_lexical_cast.hpp>
#include <boost/program_options.hpp> //NOLINT
#include <boost/program_options/options_description.hpp>
#include <boost/program_options/positional_options.hpp>
#include <boost/program_options/value_semantic.hpp>
#include <boost/program_options/variables_map.hpp>
#include <boost/range/adaptor/map.hpp> //NOLINT
#include <boost/range/iterator_range_core.hpp>

#include "keyvi/dictionary/dictionary_compiler.h"
#include "keyvi/dictionary/dictionary_types.h"
#include "keyvi/dictionary/fsa/internal/constants.h"
#include "keyvi/util/configuration.h"

void callback(size_t added, size_t overall, void*) {
std::cout << "Processed " << added << "/" << overall << "(" << ((100 * added) / overall) << "%)." << std::endl;
void callback(size_t added, size_t overall, void* /*unused*/) {
std::cout << "Processed " << added << "/" << overall << "(" << ((100 * added) / overall) << "%)." << '\n';

Check warning on line 61 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Release on ubuntu-22.04

100 is a magic number; consider replacing it with a named constant [cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers]

Check warning on line 61 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Debug on ubuntu-22.04

100 is a magic number; consider replacing it with a named constant [cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers]
}

template <typename CompilerType, typename ValueType>
void compile_multiple(CompilerType* compiler, std::function<std::pair<std::string, ValueType>(std::string)> parser,

Check warning on line 65 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Release on ubuntu-22.04

function 'compile_multiple' has cognitive complexity of 26 (threshold 25) [readability-function-cognitive-complexity]

Check warning on line 65 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Debug on ubuntu-22.04

function 'compile_multiple' has cognitive complexity of 26 (threshold 25) [readability-function-cognitive-complexity]
const std::vector<std::string>& inputs) {
boost::iostreams::filtering_istream input_stream;
std::string line;

for (auto input_as_string : inputs) {
for (const auto& input_as_string : inputs) {
auto input = boost::filesystem::path(input_as_string);

if (boost::filesystem::is_directory(input)) {
int files_added = 0;

Check warning on line 74 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Release on macos-latest

variable 'files_added' set but not used [-Wunused-but-set-variable]

Check warning on line 74 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Debug on macos-latest

variable 'files_added' set but not used [-Wunused-but-set-variable]
for (auto& entry : boost::make_iterator_range(boost::filesystem::directory_iterator(input), {})) {
if (entry.path().extension() == ".gz") {
input_stream.push(boost::iostreams::gzip_decompressor());
}

boost::iostreams::file_source file(entry.path().string(), std::ios_base::in | std::ios_base::binary);
boost::iostreams::file_source const file(entry.path().string(), std::ios_base::in | std::ios_base::binary);
input_stream.push(file);
++files_added;
while (std::getline(input_stream, line)) {
Expand All @@ -79,7 +96,7 @@
input_stream.push(boost::iostreams::gzip_decompressor());
}

boost::iostreams::file_source file(input.string(), std::ios_base::in | std::ios_base::binary);
boost::iostreams::file_source const file(input.string(), std::ios_base::in | std::ios_base::binary);

input_stream.push(file);
while (std::getline(input_stream, line)) {
Expand All @@ -92,7 +109,7 @@
}

template <typename CompilerType>
void finalize_compile(CompilerType* compiler, const std::string& output, const std::string& manifest = {}) {

Check warning on line 112 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Release on ubuntu-22.04

2 adjacent parameters of 'finalize_compile' of similar type ('const std::string &') are easily swapped by mistake [bugprone-easily-swappable-parameters]

Check warning on line 112 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Debug on ubuntu-22.04

2 adjacent parameters of 'finalize_compile' of similar type ('const std::string &') are easily swapped by mistake [bugprone-easily-swappable-parameters]
std::ofstream out_stream(output, std::ios::binary);
compiler->Compile(callback);
compiler->SetManifest(manifest);
Expand All @@ -107,19 +124,21 @@
const keyvi::util::parameters_t& value_store_params = keyvi::util::parameters_t()) {
keyvi::dictionary::CompletionDictionaryCompiler compiler(value_store_params);

std::function<std::pair<std::string, uint32_t>(std::string)> parser = [](std::string line) {
size_t tab = line.find('\t');
std::function<std::pair<std::string, uint32_t>(std::string)> const parser = [](const std::string& line) {
size_t const tab = line.find('\t');

if (tab == std::string::npos) return std::pair<std::string, uint32_t>();
if (tab == std::string::npos) {
return std::pair<std::string, uint32_t>();
}

std::string key = line.substr(0, tab);
std::string value_as_string = line.substr(tab + 1);
uint32_t value;
std::string const key = line.substr(0, tab);
std::string const value_as_string = line.substr(tab + 1);
uint32_t value = 0;

try {
value = boost::lexical_cast<uint32_t>(value_as_string);
} catch (boost::bad_lexical_cast const&) {
std::cout << "Error: value was not valid: " << line << std::endl;
std::cout << "Error: value was not valid: " << line << '\n';
return std::pair<std::string, uint32_t>();
}
return std::pair<std::string, uint32_t>(key, value);
Expand All @@ -133,19 +152,21 @@
const keyvi::util::parameters_t& value_store_params = keyvi::util::parameters_t()) {
keyvi::dictionary::IntDictionaryCompiler compiler(value_store_params);

std::function<std::pair<std::string, uint32_t>(std::string)> parser = [](std::string line) {
size_t tab = line.find('\t');
std::function<std::pair<std::string, uint32_t>(std::string)> const parser = [](const std::string& line) {
size_t const tab = line.find('\t');

if (tab == std::string::npos) return std::pair<std::string, uint32_t>();
if (tab == std::string::npos) {
return std::pair<std::string, uint32_t>();
}

std::string key = line.substr(0, tab);
std::string value_as_string = line.substr(tab + 1);
uint32_t value;
std::string const key = line.substr(0, tab);
std::string const value_as_string = line.substr(tab + 1);
uint32_t value = 0;

try {
value = boost::lexical_cast<uint32_t>(value_as_string);
} catch (boost::bad_lexical_cast const&) {
std::cout << "Error: value was not valid: " << line << std::endl;
std::cout << "Error: value was not valid: " << line << '\n';
return std::pair<std::string, uint32_t>();
}
return std::pair<std::string, uint32_t>(key, value);
Expand All @@ -158,11 +179,13 @@
template <class Compiler>
void compile_strings_inner(Compiler* compiler, const std::vector<std::string>& input, const std::string& output,
const std::string& manifest = {}) {
std::function<std::pair<std::string, std::string>(std::string)> parser = [](std::string line) {
size_t tab = line.find('\t');
if (tab == std::string::npos) return std::pair<std::string, std::string>();
std::string key = line.substr(0, tab);
std::string value = line.substr(tab + 1);
std::function<std::pair<std::string, std::string>(std::string)> const parser = [](const std::string& line) {
size_t const tab = line.find('\t');
if (tab == std::string::npos) {
return std::pair<std::string, std::string>();
}
std::string const key = line.substr(0, tab);
std::string const value = line.substr(tab + 1);

return std::pair<std::string, std::string>(key, value);
};
Expand All @@ -183,9 +206,9 @@
const keyvi::util::parameters_t& value_store_params = keyvi::util::parameters_t()) {
keyvi::dictionary::KeyOnlyDictionaryCompiler compiler(value_store_params);

std::function<std::pair<std::string, uint32_t>(std::string)> parser = [](std::string line) {
std::function<std::pair<std::string, uint32_t>(std::string)> const parser = [](const std::string& line) {
std::string key = line;
size_t tab = line.find('\t');
size_t const tab = line.find('\t');

if (tab != std::string::npos) {
key = line.substr(0, tab);
Expand All @@ -206,11 +229,11 @@
}

/** Extracts the parameters. */
keyvi::util::parameters_t extract_parameters(const boost::program_options::variables_map& vm) {

Check warning on line 232 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Release on ubuntu-22.04

parameter name 'vm' is too short, expected at least 3 characters [readability-identifier-length]

Check warning on line 232 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Debug on ubuntu-22.04

parameter name 'vm' is too short, expected at least 3 characters [readability-identifier-length]
keyvi::util::parameters_t ret;
for (auto& v : vm["parameter"].as<std::vector<std::string>>()) {
for (const auto& v : vm["parameter"].as<std::vector<std::string>>()) {

Check warning on line 234 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Release on ubuntu-22.04

variable name 'v' is too short, expected at least 3 characters [readability-identifier-length]

Check warning on line 234 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Debug on ubuntu-22.04

variable name 'v' is too short, expected at least 3 characters [readability-identifier-length]
std::vector<std::string> key_value;
boost::split(key_value, v, std::bind(std::equal_to<char>(), std::placeholders::_1, '='));
boost::split(key_value, v, [](auto&& PH1) { return std::equal_to<char>()(std::forward<decltype(PH1)>(PH1), '='); });
if (key_value.size() == 2) {
ret[key_value[0]] = key_value[1];
} else {
Expand Down Expand Up @@ -244,10 +267,10 @@
"manifest to be embedded");

// Declare which options are positional
boost::program_options::positional_options_description p;

Check warning on line 270 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Release on ubuntu-22.04

variable name 'p' is too short, expected at least 3 characters [readability-identifier-length]

Check warning on line 270 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Debug on ubuntu-22.04

variable name 'p' is too short, expected at least 3 characters [readability-identifier-length]
p.add("input-file", -1);

boost::program_options::variables_map vm;

Check warning on line 273 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Release on ubuntu-22.04

variable name 'vm' is too short, expected at least 3 characters [readability-identifier-length]

Check warning on line 273 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Debug on ubuntu-22.04

variable name 'vm' is too short, expected at least 3 characters [readability-identifier-length]

try {
boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(description).run(),
Expand All @@ -260,22 +283,22 @@
boost::program_options::command_line_parser(argc, argv).options(description).positional(p).run(), vm);
boost::program_options::notify(vm);

if (vm.count("help")) {
if (vm.count("help") != 0U) {
std::cout << description;
return 0;
}

std::string manifest = vm["manifest"].as<std::string>();
std::cout << manifest << std::endl;
std::string const manifest = vm["manifest"].as<std::string>();
std::cout << manifest << '\n';

std::string dictionary_type = vm["dictionary-type"].as<std::string>();
std::string const dictionary_type = vm["dictionary-type"].as<std::string>();
keyvi::util::parameters_t value_store_params = extract_parameters(vm);

if (vm.count("memory-limit")) {
if (vm.count("memory-limit") != 0U) {
value_store_params[MEMORY_LIMIT_KEY] = vm["memory-limit"].as<std::string>();

Check warning on line 298 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Release on ubuntu-22.04

do not implicitly decay an array into a pointer; consider using gsl::array_view or an explicit cast instead [cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay]

Check warning on line 298 in keyvi/bin/keyvicompiler/keyvicompiler.cpp

View workflow job for this annotation

GitHub Actions / Debug on ubuntu-22.04

do not implicitly decay an array into a pointer; consider using gsl::array_view or an explicit cast instead [cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay]
}

if (vm.count("input-file") && vm.count("output-file")) {
if ((vm.count("input-file") != 0U) && (vm.count("output-file") != 0U)) {
input_files = vm["input-file"].as<std::vector<std::string>>();
output_file = vm["output-file"].as<std::string>();

Expand All @@ -290,19 +313,19 @@
} else if (dictionary_type == "completion") {
compile_completion(input_files, output_file, manifest, value_store_params);
} else {
std::cout << "ERROR: unknown dictionary type." << std::endl << std::endl;
std::cout << "ERROR: unknown dictionary type." << '\n' << '\n';
std::cout << description;
return 1;
}
} else {
std::cout << "ERROR: arguments wrong or missing." << std::endl << std::endl;
std::cout << "ERROR: arguments wrong or missing." << '\n' << '\n';
std::cout << description;
return 1;
}
} catch (std::exception& e) {
std::cout << "ERROR: arguments wrong or missing." << std::endl << std::endl;
std::cout << "ERROR: arguments wrong or missing." << '\n' << '\n';

std::cout << e.what() << std::endl << std::endl;
std::cout << e.what() << '\n' << '\n';
std::cout << description;

return 1;
Expand Down
32 changes: 19 additions & 13 deletions keyvi/bin/keyviinspector/keyviinspector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,33 @@
* Created on: May 13, 2014
* Author: hendrik
*/
#include <fstream>
#include <iostream>
#include <string>

#include <boost/lexical_cast.hpp>
#include <boost/program_options.hpp>
#include <boost/program_options/options_description.hpp>
#include <boost/program_options/positional_options.hpp>
#include <boost/program_options/value_semantic.hpp>
#include <boost/program_options/variables_map.hpp>

#include "keyvi/dictionary/fsa/automata.h"
#include "keyvi/dictionary/fsa/entry_iterator.h"

void dump(const std::string& input, const std::string& output, bool keys_only = false) {

Check warning on line 39 in keyvi/bin/keyviinspector/keyviinspector.cpp

View workflow job for this annotation

GitHub Actions / Release on ubuntu-22.04

2 adjacent parameters of 'dump' of similar type ('const std::string &') are easily swapped by mistake [bugprone-easily-swappable-parameters]

Check warning on line 39 in keyvi/bin/keyviinspector/keyviinspector.cpp

View workflow job for this annotation

GitHub Actions / Debug on ubuntu-22.04

2 adjacent parameters of 'dump' of similar type ('const std::string &') are easily swapped by mistake [bugprone-easily-swappable-parameters]
keyvi::dictionary::fsa::automata_t automata(new keyvi::dictionary::fsa::Automata(input.c_str()));
keyvi::dictionary::fsa::automata_t const automata(new keyvi::dictionary::fsa::Automata(input));
keyvi::dictionary::fsa::EntryIterator it(automata);

Check warning on line 41 in keyvi/bin/keyviinspector/keyviinspector.cpp

View workflow job for this annotation

GitHub Actions / Release on ubuntu-22.04

variable name 'it' is too short, expected at least 3 characters [readability-identifier-length]

Check warning on line 41 in keyvi/bin/keyviinspector/keyviinspector.cpp

View workflow job for this annotation

GitHub Actions / Debug on ubuntu-22.04

variable name 'it' is too short, expected at least 3 characters [readability-identifier-length]
keyvi::dictionary::fsa::EntryIterator end_it = keyvi::dictionary::fsa::EntryIterator();
keyvi::dictionary::fsa::EntryIterator const end_it = keyvi::dictionary::fsa::EntryIterator();

std::ofstream out_stream(output);

while (it != end_it) {
it.WriteKey(out_stream);

if (!keys_only) {
std::string value = it.GetValueAsString();
if (value.size()) {
std::string const value = it.GetValueAsString();
if (!value.empty()) {
out_stream << "\t";
out_stream << value;
}
Expand All @@ -54,9 +60,9 @@
}

void dump_with_attributes(const std::string& input, const std::string& output) {
keyvi::dictionary::fsa::automata_t automata(new keyvi::dictionary::fsa::Automata(input.c_str()));
keyvi::dictionary::fsa::automata_t const automata(new keyvi::dictionary::fsa::Automata(input));
keyvi::dictionary::fsa::EntryIterator it(automata);
keyvi::dictionary::fsa::EntryIterator end_it = keyvi::dictionary::fsa::EntryIterator();
keyvi::dictionary::fsa::EntryIterator const end_it = keyvi::dictionary::fsa::EntryIterator();

std::ofstream out_stream(output);

Expand All @@ -73,8 +79,8 @@
}

void print_statistics(const std::string& input) {
keyvi::dictionary::fsa::automata_t automata(new keyvi::dictionary::fsa::Automata(input.c_str()));
std::cout << automata->GetStatistics() << std::endl;
keyvi::dictionary::fsa::automata_t const automata(new keyvi::dictionary::fsa::Automata(input));
std::cout << automata->GetStatistics() << '\n';
}

int main(int argc, char** argv) {
Expand All @@ -100,17 +106,17 @@
boost::program_options::store(
boost::program_options::command_line_parser(argc, argv).options(description).positional(p).run(), vm);
boost::program_options::notify(vm);
if (vm.count("help")) {
if (vm.count("help") != 0U) {
std::cout << description;
return 0;
}

bool key_only = false;
if (vm.count("keys-only")) {
if (vm.count("keys-only") != 0U) {
key_only = true;
}

if (vm.count("input-file") && vm.count("output-file")) {
if ((vm.count("input-file") != 0U) && (vm.count("output-file") != 0U)) {
input_file = vm["input-file"].as<std::string>();
output_file = vm["output-file"].as<std::string>();

Expand All @@ -119,13 +125,13 @@
return 0;
}

if (vm.count("input-file") && vm.count("statistics")) {
if ((vm.count("input-file") != 0U) && (vm.count("statistics") != 0U)) {
input_file = vm["input-file"].as<std::string>();
print_statistics(input_file);
return 0;
}

std::cout << "ERROR: arguments wrong or missing." << std::endl << std::endl;
std::cout << "ERROR: arguments wrong or missing." << '\n' << '\n';
std::cout << description;
return 1;
}
Loading
Loading