You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
113 lines
3.3 KiB
113 lines
3.3 KiB
4 years ago
|
ommit 4ae420734990ab9f5ccc038262368256b9323f4a
|
||
|
Merge: b66799f 994c1c7
|
||
|
Author: Evgeniy Dushistov <dushistov@mail.ru>
|
||
|
Date: Wed Dec 23 04:30:13 2020 +0300
|
||
|
|
||
|
Merge pull request #67 from doozan/master
|
||
|
|
||
|
Use binary search for synonyms, fixes #31
|
||
|
|
||
|
diff --git a/src/stardict_lib.cpp b/src/stardict_lib.cpp
|
||
|
index 0af4304..6b1f92b 100644
|
||
|
--- a/src/stardict_lib.cpp
|
||
|
+++ b/src/stardict_lib.cpp
|
||
|
@@ -833,21 +833,23 @@ bool SynFile::load(const std::string &url, gulong wc)
|
||
|
{
|
||
|
struct stat stat_buf;
|
||
|
if (!stat(url.c_str(), &stat_buf)) {
|
||
|
- MapFile syn;
|
||
|
- if (!syn.open(url.c_str(), stat_buf.st_size))
|
||
|
+
|
||
|
+ if (!synfile.open(url.c_str(), stat_buf.st_size))
|
||
|
return false;
|
||
|
- const gchar *current = syn.begin();
|
||
|
+
|
||
|
+ synlist.resize(wc + 1);
|
||
|
+ gchar *p1 = synfile.begin();
|
||
|
+
|
||
|
for (unsigned long i = 0; i < wc; i++) {
|
||
|
// each entry in a syn-file is:
|
||
|
// - 0-terminated string
|
||
|
// 4-byte index into .dict file in network byte order
|
||
|
- glib::CharStr lower_string{ g_utf8_casefold(current, -1) };
|
||
|
- std::string synonym{ get_impl(lower_string) };
|
||
|
- current += synonym.length() + 1;
|
||
|
- const guint32 idx = g_ntohl(get_uint32(current));
|
||
|
- current += sizeof(idx);
|
||
|
- synonyms[synonym] = idx;
|
||
|
+
|
||
|
+ synlist[i] = p1;
|
||
|
+ p1 += strlen(p1) + 1 + 4;
|
||
|
}
|
||
|
+ synlist[wc] = p1;
|
||
|
+
|
||
|
return true;
|
||
|
} else {
|
||
|
return false;
|
||
|
@@ -856,13 +858,38 @@ bool SynFile::load(const std::string &url, gulong wc)
|
||
|
|
||
|
bool SynFile::lookup(const char *str, glong &idx)
|
||
|
{
|
||
|
- glib::CharStr lower_string{ g_utf8_casefold(str, -1) };
|
||
|
- auto it = synonyms.find(get_impl(lower_string));
|
||
|
- if (it != synonyms.end()) {
|
||
|
- idx = it->second;
|
||
|
- return true;
|
||
|
+ bool bFound = false;
|
||
|
+ glong iTo = synlist.size() - 2;
|
||
|
+ if (iTo <0) return false;
|
||
|
+
|
||
|
+ if (stardict_strcmp(str, get_key(0)) < 0) {
|
||
|
+ idx = 0;
|
||
|
+ } else if (stardict_strcmp(str, get_key(iTo)) > 0) {
|
||
|
+ idx = INVALID_INDEX;
|
||
|
+ } else {
|
||
|
+ glong iThisIndex = 0;
|
||
|
+ glong iFrom = 0;
|
||
|
+ gint cmpint;
|
||
|
+ while (iFrom <= iTo) {
|
||
|
+ iThisIndex = (iFrom + iTo) / 2;
|
||
|
+ cmpint = stardict_strcmp(str, get_key(iThisIndex));
|
||
|
+ if (cmpint > 0)
|
||
|
+ iFrom = iThisIndex + 1;
|
||
|
+ else if (cmpint < 0)
|
||
|
+ iTo = iThisIndex - 1;
|
||
|
+ else {
|
||
|
+ bFound = true;
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ if (!bFound)
|
||
|
+ idx = iFrom; //next
|
||
|
+ else {
|
||
|
+ const gchar *key = get_key(iThisIndex);
|
||
|
+ idx = g_ntohl(get_uint32(key+strlen(key)+1));
|
||
|
+ }
|
||
|
}
|
||
|
- return false;
|
||
|
+ return bFound;
|
||
|
}
|
||
|
|
||
|
bool Dict::Lookup(const char *str, glong &idx)
|
||
|
diff --git a/src/stardict_lib.hpp b/src/stardict_lib.hpp
|
||
|
index a629cbe..38f76f4 100644
|
||
|
--- a/src/stardict_lib.hpp
|
||
|
+++ b/src/stardict_lib.hpp
|
||
|
@@ -102,11 +102,15 @@ public:
|
||
|
class SynFile
|
||
|
{
|
||
|
public:
|
||
|
+ SynFile() {}
|
||
|
+ ~SynFile() {}
|
||
|
bool load(const std::string &url, gulong wc);
|
||
|
bool lookup(const char *str, glong &idx);
|
||
|
+ const gchar *get_key(glong idx) { return synlist[idx]; }
|
||
|
|
||
|
private:
|
||
|
- std::map<std::string, gulong> synonyms;
|
||
|
+ MapFile synfile;
|
||
|
+ std::vector<gchar *> synlist;
|
||
|
};
|
||
|
|
||
|
class Dict : public DictBase
|