From fb2f4fc46b20c338db001505c352a5f79dee72d2 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 21 Oct 2025 05:13:50 +0200 Subject: [PATCH 1/5] restore backwards compatibility in string value store --- .../keyvi/dictionary/fsa/internal/string_value_store.h | 3 ++- python/tests/match_object_test.py | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/keyvi/include/keyvi/dictionary/fsa/internal/string_value_store.h b/keyvi/include/keyvi/dictionary/fsa/internal/string_value_store.h index c6011a671..dd70bc8c0 100644 --- a/keyvi/include/keyvi/dictionary/fsa/internal/string_value_store.h +++ b/keyvi/include/keyvi/dictionary/fsa/internal/string_value_store.h @@ -293,7 +293,8 @@ class StringValueStoreReader final : public IValueStoreReader { std::string GetMsgPackedValueAsString(uint64_t fsa_value, const compression::CompressionAlgorithm compression_algorithm = compression::CompressionAlgorithm::NO_COMPRESSION) const override { - std::string msgpacked_value = keyvi::util::ValueToMsgPack(std::string(strings_ + fsa_value)); + // GH#333: if string is valid json, parse it as msgpack for backwards-compatibility + std::string msgpacked_value = keyvi::util::JsonStringToMsgPack(std::string(strings_ + fsa_value)); if (compression_algorithm == compression::CompressionAlgorithm::NO_COMPRESSION) { return msgpacked_value; diff --git a/python/tests/match_object_test.py b/python/tests/match_object_test.py index a49292e4a..4dabd0886 100644 --- a/python/tests/match_object_test.py +++ b/python/tests/match_object_test.py @@ -206,6 +206,7 @@ def test_get_value_string(): c.add("abc", "aaaaa") c.add("abd", "bbbbb") c.add("abe", "{}") + c.add("abf", "{'xyz': 42}") with tmp_dictionary(c, "match_object_string.kv") as d: m = d["abc"] assert m.value == "aaaaa" @@ -223,9 +224,11 @@ def test_get_value_string(): == "bbbbb" ) m = d["abe"] - # gh#333: keyvi < 0.6.4 returned a dictionary instead of a string - assert m.value == "{}" - assert isinstance(m.value, str) + # gh#333: return a dictionary instead of a string + assert m.value == {} + assert isinstance(m.value, dict) + m = d["abf"] + assert m.value == {"xyz": 42} def test_matched_string(): From 2c552a616f731ca87f14e3c749dba4e41108dc73 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 21 Oct 2025 05:34:09 +0200 Subject: [PATCH 2/5] fix tests --- python/tests/match_object_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/tests/match_object_test.py b/python/tests/match_object_test.py index 4dabd0886..9688b2f54 100644 --- a/python/tests/match_object_test.py +++ b/python/tests/match_object_test.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Usage: py.test tests +import json import keyvi import msgpack from test_tools import tmp_dictionary @@ -86,7 +87,7 @@ def test_boolean_attributes(): m = keyvi.Match() bytes_key = bytes("def".encode("utf-8")) m[bytes_key] = True - assert m[bytes_key] == True + assert m[bytes_key] def test_start(): @@ -206,7 +207,7 @@ def test_get_value_string(): c.add("abc", "aaaaa") c.add("abd", "bbbbb") c.add("abe", "{}") - c.add("abf", "{'xyz': 42}") + c.add("abf", json.dumps({"xyz": 42})) with tmp_dictionary(c, "match_object_string.kv") as d: m = d["abc"] assert m.value == "aaaaa" @@ -251,5 +252,5 @@ def test_bool_operator(): assert issubclass(w[-1].category, DeprecationWarning) assert not m m.end = 42 - assert not m is False + assert m is not False assert m From e1d9968a623d4711bf6373fc1550d652fd3abef4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 08:25:24 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- LICENSE | 1 - README.md | 6 +- doc/RELEASE_PROCESS.md | 4 +- doc/algorithm/Construction-Basics.md | 79 +++++----- doc/algorithm/Extensibility.md | 6 +- doc/algorithm/Minimization.md | 20 +-- doc/algorithm/Persistence-Basics.md | 9 +- doc/algorithm/Scaling.md | 20 +-- .../bbuzz2016/keyvi-presentation.svg | 66 ++++---- .../keyvi-presentation-progscon2017.svg | 66 ++++---- .../keyvi-presentation-search-meetup-2018.svg | 66 ++++---- ...Building keyvi dictionaries with python.md | 9 +- doc/usage/Building keyvi dictionaries.md | 10 +- doc/usage/Crashcourse.md | 52 +++---- doc/usage/Keyvi Index with python.md | 2 +- doc/usage/Using pykeyvi in EMR.md | 6 +- keyvi/include/keyvi/dictionary/fsa/automata.h | 16 +- .../fsa/comparable_state_traverser.h | 38 ++--- .../fsa/internal/lru_generation_cache.h | 16 +- .../fsa/internal/minimization_hash.h | 12 +- .../fsa/internal/sparse_array_persistence.h | 4 +- .../keyvi/dictionary/fsa/state_traverser.h | 16 +- .../dictionary/fsa/zip_state_traverser.h | 58 +++---- keyvi/include/keyvi/dictionary/util/trace.h | 40 ++--- .../include/keyvi/dictionary/util/transform.h | 5 +- .../keyvi/dictionary/util/utf8_utils.h | 41 ++--- .../fsa/internal/minimization_hash_test.cpp | 4 +- .../fsa/zip_state_traverser_test.cpp | 2 +- keyvi/tests/keyvi/index/index_limits_test.cpp | 4 +- python/LICENSE.txt | 1 - python/autowrap_includes/autowrap_tools.hpp | 40 ++--- python/build_macosx_wheels.sh | 2 +- .../completion/multiword_completion_tester.py | 9 +- .../completion/multiword_completion_writer.py | 17 ++- .../prefix_completion_fuzzy_tester.py | 8 +- .../completion/prefix_completion_tester.py | 8 +- python/examples/lookup/compile_json.py | 45 +++--- python/examples/lookup/text_lookup_tester.py | 6 +- python/examples/lookup/value_lookup_tester.py | 6 +- python/examples/normalization/normalize.py | 4 +- .../var_length_short_calculation_test.py | 10 +- .../dictionary/dictionary_leak_test.py | 18 ++- python/setup.py | 46 ++++-- .../addons/CompletionDictionaryCompiler.pyx | 2 +- .../src/addons/CompletionDictionaryMerger.pyx | 2 +- python/src/addons/Dictionary.pyx | 5 +- python/src/addons/IntDictionaryCompiler.pyx | 1 - python/src/addons/IntDictionaryMerger.pyx | 2 +- python/src/addons/JsonDictionaryCompiler.pyx | 3 +- python/src/addons/JsonDictionaryMerger.pyx | 2 +- python/src/addons/KeyOnlyDictionaryMerger.pyx | 2 +- python/src/addons/Match.pyx | 2 +- ...condaryKeyCompletionDictionaryCompiler.pyx | 1 - ...ondaryKeyFloatVectorDictionaryCompiler.pyx | 1 - .../SecondaryKeyIntDictionaryCompiler.pyx | 1 - .../SecondaryKeyJsonDictionaryCompiler.pyx | 1 - .../SecondaryKeyKeyOnlyDictionaryCompiler.pyx | 1 - .../SecondaryKeyStringDictionaryCompiler.pyx | 1 - python/src/addons/match_iterator.pyx | 2 +- python/src/extra/attributes_converter.h | 25 ++- python/src/pxds/dictionary.pxd | 8 +- python/src/pxds/dictionary_compiler.pxd | 2 +- python/src/pxds/dictionary_merger.pxd | 2 +- python/src/pxds/match.pxd | 1 - python/src/pxds/multi_word_completion.pxd | 2 - python/src/pxds/prefix_completion.pxd | 2 - python/src/pxds/secondary_key_dictionary.pxd | 4 +- python/src/py/keyvi/__init__.py | 11 +- python/src/py/keyvi/_pycore/keyvimerger.py | 34 +++-- python/src/py/keyvi/cli/cli.py | 143 ++++++++++-------- python/src/py/keyvi/compiler/__init__.py | 41 ++++- python/src/py/keyvi/completion/__init__.py | 4 +- python/src/py/keyvi/dictionary/__init__.py | 4 +- python/src/py/keyvi/index/__init__.py | 4 +- python/src/py/keyvi/util/__init__.py | 4 +- python/src/py/keyvi/vector/__init__.py | 4 +- .../forward_backward_completion_test.py | 22 ++- .../tests/completion/fuzzy_completion_test.py | 38 +++-- .../completion/multiword_completion_test.py | 75 +++++---- .../dictionary/dictionary_merger_test.py | 61 ++++---- python/tests/dictionary/dictionary_test.py | 5 +- .../floatvector_dictionary_test.py | 9 +- python/tests/dictionary/get_fuzzy_test.py | 13 +- .../dictionary/int_dictionary_merger_test.py | 52 +++---- python/tests/dictionary/iterators_test.py | 36 +++-- .../key_only_dictionary_merger_test.py | 46 ++---- python/tests/dictionary/loading_test.py | 49 +++--- python/tests/dictionary/near_test.py | 48 +++--- .../secondary_key_dictionary_test.py | 24 +-- .../string_dictionary_merger_test.py | 52 +++---- python/tests/dictionary/unicode_test.py | 10 +- python/tests/dictionary/zerobyte_test.py | 5 +- python/tests/dictionary_compiler_test.py | 26 ++-- python/tests/index/index_test.py | 52 +++---- python/tests/index/merger_binary_test.py | 7 +- python/tests/int/int_dictionary_test.py | 6 +- python/tests/statistics_test.py | 46 +++--- .../tests/utils/jump_consistent_hash_test.py | 5 +- python/tests/vector/basic_test.py | 24 +-- sphinx-docs/_static/custom.css | 2 +- sphinx-docs/conf_extra.py | 22 +-- sphinx-docs/cpp/dictionary_compiler.rst | 2 +- sphinx-docs/index.rst | 16 +- 104 files changed, 1025 insertions(+), 952 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 7569be5b0..eaa17b244 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,7 +1,7 @@