From f3e81ae8a104426a70d430f2262b04172a1128e4 Mon Sep 17 00:00:00 2001 From: Weng Xuetian Date: Tue, 30 Sep 2025 21:28:21 -0700 Subject: [PATCH] Support multiple entry in inner fuzzy Previous commit added a duplicate entry in inner fuzzy, which would require some additional change to support. It seems there's no much benefit in exposing inner fuzzy, the new version will make use to transparent map and be hidden API. --- src/libime/core/utils_p.h | 13 +++++++++++ src/libime/pinyin/pinyindata.cpp | 34 +++++++++++++++++++++++++++++ src/libime/pinyin/pinyindata_p.h | 29 ++++++++++++++++++++++++ src/libime/pinyin/pinyinencoder.cpp | 15 ++++++++----- test/testpinyinencoder.cpp | 4 ++++ 5 files changed, 89 insertions(+), 6 deletions(-) create mode 100644 src/libime/pinyin/pinyindata_p.h diff --git a/src/libime/core/utils_p.h b/src/libime/core/utils_p.h index ebdf72e..15d7ca0 100644 --- a/src/libime/core/utils_p.h +++ b/src/libime/core/utils_p.h @@ -194,6 +194,19 @@ inline int millisecondsTill(T t0) { .count(); } +struct StringHash { + using hash_type = std::hash; + using is_transparent = void; + + std::size_t operator()(const char *str) const { return hash_type{}(str); } + std::size_t operator()(std::string_view str) const { + return hash_type{}(str); + } + std::size_t operator()(const std::string &str) const { + return hash_type{}(str); + } +}; + } // namespace libime #endif // _LIBIME_LIBIME_CORE_UTILS_P_H_ diff --git a/src/libime/pinyin/pinyindata.cpp b/src/libime/pinyin/pinyindata.cpp index d08d128..5316b77 100644 --- a/src/libime/pinyin/pinyindata.cpp +++ b/src/libime/pinyin/pinyindata.cpp @@ -16,6 +16,7 @@ #include #include #include +#include "pinyindata_p.h" #include "pinyinencoder.h" namespace libime { @@ -152,6 +153,39 @@ getInnerSegment() { return innerSegment; } +const InnerSegmentMap &getInnerSegmentV2() { + static const InnerSegmentMap innerSegment = []() { + InnerSegmentMap innerSegmentV2; + for (const auto &[key, value] : getInnerSegment()) { + innerSegmentV2[key].push_back(value); + } + + for (const auto &newItem : std::vector< + std::pair>>{ + {"qiao", {"qia", "o"}}, + {"niao", {"nia", "o"}}, + {"liao", {"lia", "o"}}, + {"zhuo", {"zhu", "o"}}, + {"diao", {"dia", "o"}}, + {"shao", {"sha", "o"}}, + {"xiao", {"xia", "o"}}, + {"zhua", {"zhu", "a"}}, + {"shuo", {"shu", "o"}}, + {"shua", {"shu", "a"}}, + {"zhao", {"zha", "o"}}, + {"jiao", {"jia", "o"}}, + {"chuo", {"chu", "o"}}, + {"chua", {"chu", "a"}}, + {"chao", {"cha", "o"}}, + }) { + innerSegmentV2[newItem.first].push_back(newItem.second); + } + return innerSegmentV2; + }(); + + return innerSegment; +} + inline bool operator==(const PinyinEntry &a, const PinyinEntry &b) { return a.pinyin() == b.pinyin() && a.initial() == b.initial() && a.final() == b.final() && a.flags() == b.flags(); diff --git a/src/libime/pinyin/pinyindata_p.h b/src/libime/pinyin/pinyindata_p.h new file mode 100644 index 0000000..b472c88 --- /dev/null +++ b/src/libime/pinyin/pinyindata_p.h @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: 2017-2017 CSSlayer + * + * SPDX-License-Identifier: LGPL-2.1-or-later + */ +#ifndef _FCITX_LIBIME_PINYIN_PINYINDATA_P_H_ +#define _FCITX_LIBIME_PINYIN_PINYINDATA_P_H_ + +#include +#include +#include +#include +#include +#include "libime/core/utils_p.h" + +namespace libime { + +using InnerSegmentMap = + std::unordered_map>, + StringHash, std::equal_to<>>; + +const std::unordered_map>, + StringHash, std::equal_to<>> & +getInnerSegmentV2(); +} // namespace libime + +#endif // _FCITX_LIBIME_PINYIN_PINYINDATA_P_H_ diff --git a/src/libime/pinyin/pinyinencoder.cpp b/src/libime/pinyin/pinyinencoder.cpp index 4727b6d..ee2712f 100644 --- a/src/libime/pinyin/pinyinencoder.cpp +++ b/src/libime/pinyin/pinyinencoder.cpp @@ -29,6 +29,7 @@ #include "libime/core/segmentgraph.h" #include "pinyincorrectionprofile.h" #include "pinyindata.h" +#include "pinyindata_p.h" #include "shuangpinprofile.h" namespace libime { @@ -295,13 +296,15 @@ PinyinEncoder::parseUserPinyin(std::string userPinyin, fuzzyFlags.test(PinyinFuzzyFlag::Inner)) || (nextPinyin.size() == 3 && flags.test(PinyinFuzzyFlag::InnerShort))) { - const auto &innerSegments = getInnerSegment(); - auto iter = innerSegments.find(std::string(nextPinyin)); + const auto &innerSegments = getInnerSegmentV2(); + auto iter = innerSegments.find(nextPinyin); if (iter != innerSegments.end()) { - result.addNext(top, - top + iter->second.first.size()); - result.addNext(top + iter->second.first.size(), - top + nextSize[i]); + for (const auto &innerSeg : iter->second) { + result.addNext(top, + top + innerSeg.first.size()); + result.addNext(top + innerSeg.first.size(), + top + nextSize[i]); + } } } else if (nextPinyin.size() == 2 && flags.test(PinyinFuzzyFlag::InnerShort) && diff --git a/test/testpinyinencoder.cpp b/test/testpinyinencoder.cpp index 3ba5d42..cb3200a 100644 --- a/test/testpinyinencoder.cpp +++ b/test/testpinyinencoder.cpp @@ -251,6 +251,10 @@ int main() { check("sangeren", PinyinFuzzyFlag::Inner, {"san", "ge", "ren"}); + check("jiao", PinyinFuzzyFlag::Inner, {"jiao"}); + check("jiao", PinyinFuzzyFlag::Inner, {"ji", "ao"}); + check("jiao", PinyinFuzzyFlag::Inner, {"jia", "o"}); + { PinyinCorrectionProfile profile(BuiltinPinyinCorrectionProfile::Qwerty); auto graph = PinyinEncoder::parseUserPinyin(