From 121ead6e3090c5b2994f8809a03b8d2e849ce67a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 11 Mar 2026 12:19:04 +0000 Subject: [PATCH 1/2] Initial plan From 2ba11de66ff01fe12232c5198727e3dfdb1d9f6d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 11 Mar 2026 12:22:12 +0000 Subject: [PATCH 2/2] Fix FullSegment nullptr word length handling Co-authored-by: yanyiwu <2162645+yanyiwu@users.noreply.github.com> --- include/cppjieba/FullSegment.hpp | 1 + test/unittest/segments_test.cpp | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/include/cppjieba/FullSegment.hpp b/include/cppjieba/FullSegment.hpp index 79d5211e..a1dab316 100644 --- a/include/cppjieba/FullSegment.hpp +++ b/include/cppjieba/FullSegment.hpp @@ -68,6 +68,7 @@ class FullSegment: public SegmentBase { assert(nextoffset < dags.size()); const DictUnit* du = dags[i].nexts[j].second; if (du == NULL) { + wordLen = 1; if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) { WordRange wr(begin + i, begin + nextoffset); res.push_back(wr); diff --git a/test/unittest/segments_test.cpp b/test/unittest/segments_test.cpp index dbf0a507..93612024 100644 --- a/test/unittest/segments_test.cpp +++ b/test/unittest/segments_test.cpp @@ -200,6 +200,14 @@ TEST(FullSegment, Test1) { ASSERT_EQ(s, "[\"上市\", \"公司\", \"C\", \"E\", \"O\"]"); } +TEST(FullSegment, NullDictUnitDoesNotSkipFollowingRune) { + FullSegment segment(DICT_DIR "/jieba.dict.utf8"); + vector words; + + segment.Cut("崎岖的牙齿", words); + ASSERT_EQ("崎岖/的/牙齿", Join(words.begin(), words.end(), "/")); +} + TEST(QuerySegment, Test1) { QuerySegment segment(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8", ""); vector words;