xref: /OK3568_Linux_fs/app/forlinx/flapp/src/keyboard/pinyin/share/spellingtrie.cpp (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <stdio.h>
18 #include <string.h>
19 #include <assert.h>
20 #include "../include/dictdef.h"
21 
22 #ifdef _WIN32
23 #define snprintf _snprintf
24 #endif
25 
26 #ifdef ___BUILD_MODEL___
27 #include "../include/spellingtable.h"
28 #endif
29 
30 #include "../include/spellingtrie.h"
31 
32 namespace ime_pinyin {
33 
34 SpellingTrie* SpellingTrie::instance_ = NULL;
35 
36 // z/c/s is for Zh/Ch/Sh
37 const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] =
38     "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
39 
40 // Bit 0 : is it a Shengmu char?
41 // Bit 1 : is it a Yunmu char? (one char is a Yunmu)
42 // Bit 2 : is it enabled in ShouZiMu(first char) mode?
43 unsigned char SpellingTrie::char_flags_[] = {
44   // a    b      c     d     e     f     g
45   0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
46   // h    i     j      k     l     m    n
47   0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
48   // o    p     q      r     s     t
49   0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
50   // u    v     w      x     y     z
51   0x00, 0x00, 0x01, 0x01, 0x01, 0x01
52 };
53 
compare_spl(const void * p1,const void * p2)54 int compare_spl(const void* p1, const void* p2) {
55   return strcmp((const char*)(p1), (const char*)(p2));
56 }
57 
SpellingTrie()58 SpellingTrie::SpellingTrie() {
59   spelling_buf_ = NULL;
60   spelling_size_ = 0;
61   spelling_num_ = 0;
62   spl_ym_ids_ = NULL;
63   splstr_queried_ = NULL;
64   splstr16_queried_ = NULL;
65   root_ = NULL;
66   dumb_node_ = NULL;
67   splitter_node_ = NULL;
68   instance_ = NULL;
69   ym_buf_ = NULL;
70   f2h_ = NULL;
71 
72   szm_enable_shm(true);
73   szm_enable_ym(true);
74 
75 #ifdef ___BUILD_MODEL___
76   node_num_ = 0;
77 #endif
78 }
79 
~SpellingTrie()80 SpellingTrie::~SpellingTrie() {
81   if (NULL != spelling_buf_)
82     delete [] spelling_buf_;
83 
84   if (NULL != splstr_queried_)
85     delete [] splstr_queried_;
86 
87   if (NULL != splstr16_queried_)
88     delete [] splstr16_queried_;
89 
90   if (NULL != spl_ym_ids_)
91     delete [] spl_ym_ids_;
92 
93   if (NULL != root_) {
94     free_son_trie(root_);
95     delete root_;
96   }
97 
98   if (NULL != dumb_node_) {
99     delete [] dumb_node_;
100   }
101 
102   if (NULL != splitter_node_) {
103     delete [] splitter_node_;
104   }
105 
106   if (NULL != instance_) {
107     delete instance_;
108     instance_ = NULL;
109   }
110 
111   if (NULL != ym_buf_)
112     delete [] ym_buf_;
113 
114   if (NULL != f2h_)
115     delete [] f2h_;
116 }
117 
if_valid_id_update(uint16 * splid) const118 bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
119   if (NULL == splid || 0 == *splid)
120     return false;
121 
122   if (*splid >= kFullSplIdStart)
123     return true;
124   if (*splid < kFullSplIdStart) {
125     char ch = kHalfId2Sc_[*splid];
126     if (ch > 'Z') {
127       return true;
128     } else {
129       if (szm_is_enabled(ch)) {
130         return true;
131       } else if (is_yunmu_char(ch)) {
132         assert(h2f_num_[*splid] > 0);
133         *splid = h2f_start_[*splid];
134         return true;
135       }
136     }
137   }
138   return false;
139 }
140 
is_half_id(uint16 splid) const141 bool SpellingTrie::is_half_id(uint16 splid) const {
142   if (0 == splid || splid >= kFullSplIdStart)
143     return false;
144 
145   return true;
146 }
147 
is_full_id(uint16 splid) const148 bool SpellingTrie::is_full_id(uint16 splid) const {
149   if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_)
150     return false;
151   return true;
152 }
153 
half_full_compatible(uint16 half_id,uint16 full_id) const154 bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
155   uint16 half_fr_full = full_to_half(full_id);
156 
157   if (half_fr_full == half_id)
158     return true;
159 
160   // &~0x20 is used to conver the char to upper case.
161   // So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
162   char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
163   char ch_h = kHalfId2Sc_[half_id];
164   if (ch_f == ch_h)
165     return true;
166 
167   return false;
168 }
169 
is_half_id_yunmu(uint16 splid) const170 bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
171   if (0 == splid || splid >= kFullSplIdStart)
172     return false;
173 
174   char ch = kHalfId2Sc_[splid];
175   // If ch >= 'a', that means the half id is one of Zh/Ch/Sh
176   if (ch >= 'a') {
177     return false;
178   }
179 
180   return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
181 }
182 
is_shengmu_char(char ch) const183 bool SpellingTrie::is_shengmu_char(char ch) const {
184   return char_flags_[ch - 'A'] & kHalfIdShengmuMask;
185 }
186 
is_yunmu_char(char ch) const187 bool SpellingTrie::is_yunmu_char(char ch) const {
188   return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
189 }
190 
is_szm_char(char ch) const191 bool SpellingTrie::is_szm_char(char ch) const {
192   return is_shengmu_char(ch) || is_yunmu_char(ch);
193 }
194 
szm_is_enabled(char ch) const195 bool SpellingTrie::szm_is_enabled(char ch) const {
196   return char_flags_[ch - 'A'] & kHalfIdSzmMask;
197 }
198 
szm_enable_shm(bool enable)199 void SpellingTrie::szm_enable_shm(bool enable) {
200   if (enable) {
201     for (char ch = 'A'; ch <= 'Z'; ch++) {
202       if (is_shengmu_char(ch))
203         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
204     }
205   } else {
206     for (char ch = 'A'; ch <= 'Z'; ch++) {
207       if (is_shengmu_char(ch))
208         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
209     }
210   }
211 }
212 
szm_enable_ym(bool enable)213 void SpellingTrie::szm_enable_ym(bool enable) {
214   if (enable) {
215     for (char ch = 'A'; ch <= 'Z'; ch++) {
216       if (is_yunmu_char(ch))
217         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
218     }
219   } else {
220     for (char ch = 'A'; ch <= 'Z'; ch++) {
221       if (is_yunmu_char(ch))
222         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
223     }
224   }
225 }
226 
is_szm_enabled(char ch) const227 bool SpellingTrie::is_szm_enabled(char ch) const {
228   return char_flags_[ch - 'A'] & kHalfIdSzmMask;
229 }
230 
get_cpinstance()231 const SpellingTrie* SpellingTrie::get_cpinstance() {
232   return &get_instance();
233 }
234 
get_instance()235 SpellingTrie& SpellingTrie::get_instance() {
236   if (NULL == instance_)
237     instance_ = new SpellingTrie();
238 
239   return *instance_;
240 }
241 
half2full_num(uint16 half_id) const242 uint16 SpellingTrie::half2full_num(uint16 half_id) const {
243   if (NULL == root_ || half_id >= kFullSplIdStart)
244     return 0;
245   return h2f_num_[half_id];
246 }
247 
half_to_full(uint16 half_id,uint16 * spl_id_start) const248 uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
249   if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart)
250     return 0;
251 
252   *spl_id_start = h2f_start_[half_id];
253   return h2f_num_[half_id];
254 }
255 
full_to_half(uint16 full_id) const256 uint16 SpellingTrie::full_to_half(uint16 full_id) const {
257   if (NULL == root_ || full_id < kFullSplIdStart ||
258       full_id > spelling_num_ + kFullSplIdStart)
259     return 0;
260 
261   return f2h_[full_id - kFullSplIdStart];
262 }
263 
free_son_trie(SpellingNode * node)264 void SpellingTrie::free_son_trie(SpellingNode* node) {
265   if (NULL == node)
266     return;
267 
268   for (size_t pos = 0; pos < node->num_of_son; pos++) {
269     free_son_trie(node->first_son + pos);
270   }
271 
272   if (NULL != node->first_son)
273     delete [] node->first_son;
274 }
275 
construct(const char * spelling_arr,size_t item_size,size_t item_num,float score_amplifier,unsigned char average_score)276 bool SpellingTrie::construct(const char* spelling_arr, size_t item_size,
277                              size_t item_num, float score_amplifier,
278                              unsigned char average_score) {
279   if (spelling_arr == NULL)
280     return false;
281 
282   memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
283   memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
284 
285   // If the arr is the same as the buf, means this function is called by
286   // load_table(), the table data are ready; otherwise the array should be
287   // saved.
288   if (spelling_arr != spelling_buf_) {
289     if (NULL != spelling_buf_)
290       delete [] spelling_buf_;
291     spelling_buf_ = new char[item_size * item_num];
292     if (NULL == spelling_buf_)
293       return false;
294     memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
295   }
296 
297   spelling_size_ = item_size;
298   spelling_num_ = item_num;
299 
300   score_amplifier_ = score_amplifier;
301   average_score_ = average_score;
302 
303   if (NULL != splstr_queried_)
304     delete [] splstr_queried_;
305   splstr_queried_ = new char[spelling_size_];
306   if (NULL == splstr_queried_)
307     return false;
308 
309   if (NULL != splstr16_queried_)
310     delete [] splstr16_queried_;
311   splstr16_queried_ = new char16[spelling_size_];
312   if (NULL == splstr16_queried_)
313     return false;
314 
315   // First, sort the buf to ensure they are in ascendant order
316   qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
317 
318 #ifdef ___BUILD_MODEL___
319   node_num_ = 1;
320 #endif
321 
322   root_ = new SpellingNode();
323   memset(root_, 0, sizeof(SpellingNode));
324 
325   dumb_node_ = new SpellingNode();
326   memset(dumb_node_, 0, sizeof(SpellingNode));
327   dumb_node_->score = average_score_;
328 
329   splitter_node_ = new SpellingNode();
330   memset(splitter_node_, 0, sizeof(SpellingNode));
331   splitter_node_->score = average_score_;
332 
333   memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum);
334 
335   root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
336 
337   // Root's score should be cleared.
338   root_->score = 0;
339 
340   if (NULL == root_->first_son)
341     return false;
342 
343   h2f_start_[0] = h2f_num_[0] = 0;
344 
345   if (!build_f2h())
346     return false;
347 
348 #ifdef ___BUILD_MODEL___
349   if (kPrintDebug0) {
350     printf("---SpellingTrie Nodes: %d\n", (int)node_num_);
351   }
352   return build_ym_info();
353 #else
354   return true;
355 #endif
356 }
357 
358 #ifdef ___BUILD_MODEL___
get_ym_str(const char * spl_str)359 const char* SpellingTrie::get_ym_str(const char *spl_str) {
360   bool start_ZCS = false;
361   if (is_shengmu_char(*spl_str)) {
362     if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str)
363       start_ZCS = true;
364     spl_str += 1;
365     if (start_ZCS && 'h' == *spl_str)
366       spl_str += 1;
367   }
368   return spl_str;
369 }
370 
build_ym_info()371 bool SpellingTrie::build_ym_info() {
372   bool sucess;
373   SpellingTable *spl_table = new SpellingTable();
374 
375   sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
376   assert(sucess);
377 
378   for (uint16 pos = 0; pos < spelling_num_; pos++) {
379     const char *spl_str = spelling_buf_ + spelling_size_ * pos;
380     spl_str = get_ym_str(spl_str);
381     if ('\0' != spl_str[0]) {
382       sucess = spl_table->put_spelling(spl_str, 0);
383       assert(sucess);
384     }
385   }
386 
387   size_t ym_item_size;  // '\0' is included
388   size_t ym_num;
389   const char* ym_buf;
390   ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
391 
392   if (NULL != ym_buf_)
393     delete [] ym_buf_;
394   ym_buf_ = new char[ym_item_size * ym_num];
395   if (NULL == ym_buf_) {
396     delete spl_table;
397     return false;
398   }
399 
400   memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
401   ym_size_ = ym_item_size;
402   ym_num_ = ym_num;
403 
404   delete spl_table;
405 
406   // Generate the maping from the spelling ids to the Yunmu ids.
407   if (spl_ym_ids_)
408     delete spl_ym_ids_;
409   spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
410   if (NULL == spl_ym_ids_)
411     return false;
412 
413   memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
414 
415   for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
416     const char *str = get_spelling_str(id);
417 
418     str = get_ym_str(str);
419     if ('\0' != str[0]) {
420       uint8 ym_id = get_ym_id(str);
421       spl_ym_ids_[id] = ym_id;
422       assert(ym_id > 0);
423     } else {
424       spl_ym_ids_[id] = 0;
425     }
426   }
427   return true;
428 }
429 #endif
430 
construct_spellings_subset(size_t item_start,size_t item_end,size_t level,SpellingNode * parent)431 SpellingNode* SpellingTrie::construct_spellings_subset(
432     size_t item_start, size_t item_end, size_t level, SpellingNode* parent) {
433   if (level >= spelling_size_ || item_end <= item_start || NULL == parent)
434     return NULL;
435 
436   SpellingNode *first_son = NULL;
437   uint16 num_of_son = 0;
438   unsigned char min_son_score = 255;
439 
440   const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
441   char char_for_node = spelling_last_start[level];
442   assert((char_for_node >= 'A' && char_for_node <= 'Z') ||
443          'h' == char_for_node);
444 
445   // Scan the array to find how many sons
446   for (size_t i = item_start + 1; i < item_end; i++) {
447     const char *spelling_current = spelling_buf_ + spelling_size_ * i;
448     char char_current = spelling_current[level];
449     if (char_current != char_for_node) {
450       num_of_son++;
451       char_for_node = char_current;
452     }
453   }
454   num_of_son++;
455 
456   // Allocate memory
457 #ifdef ___BUILD_MODEL___
458   node_num_ += num_of_son;
459 #endif
460   first_son = new SpellingNode[num_of_son];
461   memset(first_son, 0, sizeof(SpellingNode)*num_of_son);
462 
463   // Now begin construct tree
464   size_t son_pos = 0;
465 
466   spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
467   char_for_node = spelling_last_start[level];
468 
469   bool spelling_endable = true;
470   if (spelling_last_start[level + 1] != '\0')
471     spelling_endable = false;
472 
473   size_t item_start_next = item_start;
474 
475   for (size_t i = item_start + 1; i < item_end; i++) {
476     const char *spelling_current = spelling_buf_ + spelling_size_ * i;
477     char char_current = spelling_current[level];
478     assert(is_valid_spl_char(char_current));
479 
480     if (char_current != char_for_node) {
481       // Construct a node
482       SpellingNode *node_current = first_son + son_pos;
483       node_current->char_this_node = char_for_node;
484 
485       // For quick search in the first level
486       if (0 == level)
487         level1_sons_[char_for_node - 'A'] = node_current;
488 
489       if (spelling_endable) {
490         node_current->spelling_idx = kFullSplIdStart + item_start_next;
491       }
492 
493       if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
494         size_t real_start = item_start_next;
495         if (spelling_last_start[level + 1] == '\0')
496           real_start++;
497 
498         node_current->first_son =
499             construct_spellings_subset(real_start, i, level + 1,
500                                        node_current);
501 
502         if (real_start == item_start_next + 1) {
503           uint16 score_this = static_cast<unsigned char>(
504               spelling_last_start[spelling_size_ - 1]);
505           if (score_this < node_current->score)
506             node_current->score = score_this;
507         }
508       } else {
509         node_current->first_son = NULL;
510         node_current->score = static_cast<unsigned char>(
511             spelling_last_start[spelling_size_ - 1]);
512       }
513 
514       if (node_current->score < min_son_score)
515         min_son_score = node_current->score;
516 
517       bool is_half = false;
518       if (level == 0 && is_szm_char(char_for_node)) {
519         node_current->spelling_idx =
520           static_cast<uint16>(char_for_node - 'A' + 1);
521 
522         if (char_for_node > 'C')
523           node_current->spelling_idx++;
524         if (char_for_node > 'S')
525           node_current->spelling_idx++;
526 
527         h2f_num_[node_current->spelling_idx] = i - item_start_next;
528         is_half = true;
529       } else if (level == 1 && char_for_node == 'h') {
530         char ch_level0 = spelling_last_start[0];
531         uint16 part_id = 0;
532         if (ch_level0 == 'C')
533           part_id = 'C' - 'A' + 1 + 1;
534         else if (ch_level0 == 'S')
535           part_id = 'S' - 'A' + 1 + 2;
536         else if (ch_level0 == 'Z')
537           part_id = 'Z' - 'A' + 1 + 3;
538         if (0 != part_id) {
539           node_current->spelling_idx = part_id;
540           h2f_num_[node_current->spelling_idx] = i - item_start_next;
541           is_half = true;
542         }
543       }
544 
545       if (is_half) {
546         if (h2f_num_[node_current->spelling_idx] > 0)
547           h2f_start_[node_current->spelling_idx] =
548             item_start_next + kFullSplIdStart;
549         else
550           h2f_start_[node_current->spelling_idx] = 0;
551       }
552 
553       // for next sibling
554       spelling_last_start = spelling_current;
555       char_for_node = char_current;
556       item_start_next = i;
557       spelling_endable = true;
558       if (spelling_current[level + 1] != '\0')
559         spelling_endable = false;
560 
561       son_pos++;
562     }
563   }
564 
565   // the last one
566   SpellingNode *node_current = first_son + son_pos;
567   node_current->char_this_node = char_for_node;
568 
569   // For quick search in the first level
570   if (0 == level)
571     level1_sons_[char_for_node - 'A'] = node_current;
572 
573   if (spelling_endable) {
574     node_current->spelling_idx = kFullSplIdStart + item_start_next;
575   }
576 
577   if (spelling_last_start[level + 1] != '\0' ||
578       item_end - item_start_next > 1) {
579     size_t real_start = item_start_next;
580     if (spelling_last_start[level + 1] == '\0')
581       real_start++;
582 
583     node_current->first_son =
584         construct_spellings_subset(real_start, item_end, level + 1,
585                                    node_current);
586 
587     if (real_start == item_start_next + 1) {
588       uint16 score_this = static_cast<unsigned char>(
589           spelling_last_start[spelling_size_ - 1]);
590       if (score_this < node_current->score)
591         node_current->score = score_this;
592     }
593   } else {
594     node_current->first_son = NULL;
595     node_current->score = static_cast<unsigned char>(
596         spelling_last_start[spelling_size_ - 1]);
597   }
598 
599   if (node_current->score < min_son_score)
600     min_son_score = node_current->score;
601 
602   assert(son_pos + 1 == num_of_son);
603 
604   bool is_half = false;
605   if (level == 0 && szm_is_enabled(char_for_node)) {
606     node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
607 
608     if (char_for_node > 'C')
609       node_current->spelling_idx++;
610     if (char_for_node > 'S')
611       node_current->spelling_idx++;
612 
613     h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
614     is_half = true;
615   } else if (level == 1 && char_for_node == 'h') {
616     char ch_level0 = spelling_last_start[0];
617     uint16 part_id = 0;
618     if (ch_level0 == 'C')
619       part_id = 'C' - 'A' + 1 + 1;
620     else if (ch_level0 == 'S')
621       part_id = 'S' - 'A' + 1 + 2;
622     else if (ch_level0 == 'Z')
623       part_id = 'Z' - 'A' + 1 + 3;
624     if (0 != part_id) {
625       node_current->spelling_idx = part_id;
626       h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
627       is_half = true;
628     }
629   }
630   if (is_half) {
631     if (h2f_num_[node_current->spelling_idx] > 0)
632       h2f_start_[node_current->spelling_idx] =
633         item_start_next + kFullSplIdStart;
634     else
635       h2f_start_[node_current->spelling_idx] = 0;
636   }
637 
638   parent->num_of_son = num_of_son;
639   parent->score = min_son_score;
640   return first_son;
641 }
642 
save_spl_trie(FILE * fp)643 bool SpellingTrie::save_spl_trie(FILE *fp) {
644   if (NULL == fp || NULL == spelling_buf_)
645     return false;
646 
647   if (fwrite(&spelling_size_, sizeof(uint32), 1, fp) != 1)
648     return false;
649 
650   if (fwrite(&spelling_num_, sizeof(uint32), 1, fp) != 1)
651     return false;
652 
653   if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1)
654     return false;
655 
656   if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1)
657     return false;
658 
659   if (fwrite(spelling_buf_, sizeof(char) * spelling_size_,
660              spelling_num_, fp) != spelling_num_)
661     return false;
662 
663   return true;
664 }
665 
load_spl_trie(FILE * fp)666 bool SpellingTrie::load_spl_trie(FILE *fp) {
667   if (NULL == fp)
668     return false;
669 
670   if (fread(&spelling_size_, sizeof(uint32), 1, fp) != 1)
671     return false;
672 
673   if (fread(&spelling_num_, sizeof(uint32), 1, fp) != 1)
674     return false;
675 
676   if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1)
677     return false;
678 
679   if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1)
680     return false;
681 
682   if (NULL != spelling_buf_)
683     delete [] spelling_buf_;
684 
685   spelling_buf_ = new char[spelling_size_ * spelling_num_];
686   if (NULL == spelling_buf_)
687     return false;
688 
689   if (fread(spelling_buf_, sizeof(char) * spelling_size_,
690             spelling_num_, fp) != spelling_num_)
691     return false;
692 
693   return construct(spelling_buf_, spelling_size_, spelling_num_,
694                    score_amplifier_, average_score_);
695 }
696 
build_f2h()697 bool SpellingTrie::build_f2h() {
698   if (NULL != f2h_)
699     delete [] f2h_;
700   f2h_ = new uint16[spelling_num_];
701   if (NULL == f2h_)
702     return false;
703 
704   for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
705     for (uint16 fid = h2f_start_[hid];
706          fid < h2f_start_[hid] + h2f_num_[hid]; fid++)
707       f2h_[fid - kFullSplIdStart] = hid;
708   }
709 
710   return true;
711 }
712 
get_spelling_num()713 size_t SpellingTrie::get_spelling_num() {
714   return spelling_num_;
715 }
716 
get_ym_id(const char * ym_str)717 uint8 SpellingTrie::get_ym_id(const char *ym_str) {
718   if (NULL == ym_str || NULL == ym_buf_)
719     return 0;
720 
721   for (uint8 pos = 0; pos < ym_num_; pos++)
722     if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0)
723       return pos + 1;
724 
725   return 0;
726 }
727 
get_spelling_str(uint16 splid)728 const char* SpellingTrie::get_spelling_str(uint16 splid) {
729   splstr_queried_[0] = '\0';
730 
731   if (splid >= kFullSplIdStart) {
732     splid -= kFullSplIdStart;
733     snprintf(splstr_queried_, spelling_size_, "%s",
734              spelling_buf_ + splid * spelling_size_);
735   } else {
736     if (splid == 'C' - 'A' + 1 + 1) {
737       snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
738     } else if (splid == 'S' - 'A' + 1 + 2) {
739       snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
740     } else if (splid == 'Z' - 'A' + 1 + 3) {
741       snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
742     } else {
743       if (splid > 'C' - 'A' + 1)
744         splid--;
745       if (splid > 'S' - 'A' + 1)
746         splid--;
747       splstr_queried_[0] = 'A' + splid - 1;
748       splstr_queried_[1] = '\0';
749     }
750   }
751   return splstr_queried_;
752 }
753 
get_spelling_str16(uint16 splid)754 const char16* SpellingTrie::get_spelling_str16(uint16 splid) {
755   splstr16_queried_[0] = '\0';
756 
757   if (splid >= kFullSplIdStart) {
758     splid -= kFullSplIdStart;
759     for (size_t pos = 0; pos < spelling_size_; pos++) {
760       splstr16_queried_[pos] = static_cast<char16>
761           (spelling_buf_[splid * spelling_size_ + pos]);
762     }
763   } else {
764     if (splid == 'C' - 'A' + 1 + 1) {
765       splstr16_queried_[0] = static_cast<char16>('C');
766       splstr16_queried_[1] = static_cast<char16>('h');
767       splstr16_queried_[2] = static_cast<char16>('\0');
768     } else if (splid == 'S' - 'A' + 1 + 2) {
769       splstr16_queried_[0] = static_cast<char16>('S');
770       splstr16_queried_[1] = static_cast<char16>('h');
771       splstr16_queried_[2] = static_cast<char16>('\0');
772     } else if (splid == 'Z' - 'A' + 1 + 3) {
773       splstr16_queried_[0] = static_cast<char16>('Z');
774       splstr16_queried_[1] = static_cast<char16>('h');
775       splstr16_queried_[2] = static_cast<char16>('\0');
776     } else {
777       if (splid > 'C' - 'A' + 1)
778         splid--;
779       if (splid > 'S' - 'A' + 1)
780         splid--;
781       splstr16_queried_[0] = 'A' + splid - 1;
782       splstr16_queried_[1] = '\0';
783     }
784   }
785   return splstr16_queried_;
786 }
787 
get_spelling_str16(uint16 splid,char16 * splstr16,size_t splstr16_len)788 size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16,
789                                         size_t splstr16_len) {
790   if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
791 
792   if (splid >= kFullSplIdStart) {
793     splid -= kFullSplIdStart;
794     for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
795       splstr16[pos] = static_cast<char16>
796           (spelling_buf_[splid * spelling_size_ + pos]);
797       if (static_cast<char16>('\0') == splstr16[pos]) {
798         return pos;
799       }
800     }
801   } else {
802     if (splid == 'C' - 'A' + 1 + 1) {
803       splstr16[0] = static_cast<char16>('C');
804       splstr16[1] = static_cast<char16>('h');
805       splstr16[2] = static_cast<char16>('\0');
806       return 2;
807     } else if (splid == 'S' - 'A' + 1 + 2) {
808       splstr16[0] = static_cast<char16>('S');
809       splstr16[1] = static_cast<char16>('h');
810       splstr16[2] = static_cast<char16>('\0');
811       return 2;
812     } else if (splid == 'Z' - 'A' + 1 + 3) {
813       splstr16[0] = static_cast<char16>('Z');
814       splstr16[1] = static_cast<char16>('h');
815       splstr16[2] = static_cast<char16>('\0');
816       return 2;
817     } else {
818       if (splid > 'C' - 'A' + 1)
819         splid--;
820       if (splid > 'S' - 'A' + 1)
821         splid--;
822       splstr16[0] = 'A' + splid - 1;
823       splstr16[1] = '\0';
824       return 1;
825     }
826   }
827 
828   // Not reachable.
829   return 0;
830 }
831 
832 }  // namespace ime_pinyin
833