1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <stdio.h>
18 #include <string.h>
19 #include <assert.h>
20 #include "../include/dictdef.h"
21
22 #ifdef _WIN32
23 #define snprintf _snprintf
24 #endif
25
26 #ifdef ___BUILD_MODEL___
27 #include "../include/spellingtable.h"
28 #endif
29
30 #include "../include/spellingtrie.h"
31
32 namespace ime_pinyin {
33
34 SpellingTrie* SpellingTrie::instance_ = NULL;
35
36 // z/c/s is for Zh/Ch/Sh
37 const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] =
38 "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
39
40 // Bit 0 : is it a Shengmu char?
41 // Bit 1 : is it a Yunmu char? (one char is a Yunmu)
42 // Bit 2 : is it enabled in ShouZiMu(first char) mode?
43 unsigned char SpellingTrie::char_flags_[] = {
44 // a b c d e f g
45 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
46 // h i j k l m n
47 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
48 // o p q r s t
49 0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
50 // u v w x y z
51 0x00, 0x00, 0x01, 0x01, 0x01, 0x01
52 };
53
compare_spl(const void * p1,const void * p2)54 int compare_spl(const void* p1, const void* p2) {
55 return strcmp((const char*)(p1), (const char*)(p2));
56 }
57
SpellingTrie()58 SpellingTrie::SpellingTrie() {
59 spelling_buf_ = NULL;
60 spelling_size_ = 0;
61 spelling_num_ = 0;
62 spl_ym_ids_ = NULL;
63 splstr_queried_ = NULL;
64 splstr16_queried_ = NULL;
65 root_ = NULL;
66 dumb_node_ = NULL;
67 splitter_node_ = NULL;
68 instance_ = NULL;
69 ym_buf_ = NULL;
70 f2h_ = NULL;
71
72 szm_enable_shm(true);
73 szm_enable_ym(true);
74
75 #ifdef ___BUILD_MODEL___
76 node_num_ = 0;
77 #endif
78 }
79
~SpellingTrie()80 SpellingTrie::~SpellingTrie() {
81 if (NULL != spelling_buf_)
82 delete [] spelling_buf_;
83
84 if (NULL != splstr_queried_)
85 delete [] splstr_queried_;
86
87 if (NULL != splstr16_queried_)
88 delete [] splstr16_queried_;
89
90 if (NULL != spl_ym_ids_)
91 delete [] spl_ym_ids_;
92
93 if (NULL != root_) {
94 free_son_trie(root_);
95 delete root_;
96 }
97
98 if (NULL != dumb_node_) {
99 delete [] dumb_node_;
100 }
101
102 if (NULL != splitter_node_) {
103 delete [] splitter_node_;
104 }
105
106 if (NULL != instance_) {
107 delete instance_;
108 instance_ = NULL;
109 }
110
111 if (NULL != ym_buf_)
112 delete [] ym_buf_;
113
114 if (NULL != f2h_)
115 delete [] f2h_;
116 }
117
if_valid_id_update(uint16 * splid) const118 bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
119 if (NULL == splid || 0 == *splid)
120 return false;
121
122 if (*splid >= kFullSplIdStart)
123 return true;
124 if (*splid < kFullSplIdStart) {
125 char ch = kHalfId2Sc_[*splid];
126 if (ch > 'Z') {
127 return true;
128 } else {
129 if (szm_is_enabled(ch)) {
130 return true;
131 } else if (is_yunmu_char(ch)) {
132 assert(h2f_num_[*splid] > 0);
133 *splid = h2f_start_[*splid];
134 return true;
135 }
136 }
137 }
138 return false;
139 }
140
is_half_id(uint16 splid) const141 bool SpellingTrie::is_half_id(uint16 splid) const {
142 if (0 == splid || splid >= kFullSplIdStart)
143 return false;
144
145 return true;
146 }
147
is_full_id(uint16 splid) const148 bool SpellingTrie::is_full_id(uint16 splid) const {
149 if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_)
150 return false;
151 return true;
152 }
153
half_full_compatible(uint16 half_id,uint16 full_id) const154 bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
155 uint16 half_fr_full = full_to_half(full_id);
156
157 if (half_fr_full == half_id)
158 return true;
159
160 // &~0x20 is used to conver the char to upper case.
161 // So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
162 char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
163 char ch_h = kHalfId2Sc_[half_id];
164 if (ch_f == ch_h)
165 return true;
166
167 return false;
168 }
169
is_half_id_yunmu(uint16 splid) const170 bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
171 if (0 == splid || splid >= kFullSplIdStart)
172 return false;
173
174 char ch = kHalfId2Sc_[splid];
175 // If ch >= 'a', that means the half id is one of Zh/Ch/Sh
176 if (ch >= 'a') {
177 return false;
178 }
179
180 return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
181 }
182
is_shengmu_char(char ch) const183 bool SpellingTrie::is_shengmu_char(char ch) const {
184 return char_flags_[ch - 'A'] & kHalfIdShengmuMask;
185 }
186
is_yunmu_char(char ch) const187 bool SpellingTrie::is_yunmu_char(char ch) const {
188 return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
189 }
190
is_szm_char(char ch) const191 bool SpellingTrie::is_szm_char(char ch) const {
192 return is_shengmu_char(ch) || is_yunmu_char(ch);
193 }
194
szm_is_enabled(char ch) const195 bool SpellingTrie::szm_is_enabled(char ch) const {
196 return char_flags_[ch - 'A'] & kHalfIdSzmMask;
197 }
198
szm_enable_shm(bool enable)199 void SpellingTrie::szm_enable_shm(bool enable) {
200 if (enable) {
201 for (char ch = 'A'; ch <= 'Z'; ch++) {
202 if (is_shengmu_char(ch))
203 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
204 }
205 } else {
206 for (char ch = 'A'; ch <= 'Z'; ch++) {
207 if (is_shengmu_char(ch))
208 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
209 }
210 }
211 }
212
szm_enable_ym(bool enable)213 void SpellingTrie::szm_enable_ym(bool enable) {
214 if (enable) {
215 for (char ch = 'A'; ch <= 'Z'; ch++) {
216 if (is_yunmu_char(ch))
217 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
218 }
219 } else {
220 for (char ch = 'A'; ch <= 'Z'; ch++) {
221 if (is_yunmu_char(ch))
222 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
223 }
224 }
225 }
226
is_szm_enabled(char ch) const227 bool SpellingTrie::is_szm_enabled(char ch) const {
228 return char_flags_[ch - 'A'] & kHalfIdSzmMask;
229 }
230
get_cpinstance()231 const SpellingTrie* SpellingTrie::get_cpinstance() {
232 return &get_instance();
233 }
234
get_instance()235 SpellingTrie& SpellingTrie::get_instance() {
236 if (NULL == instance_)
237 instance_ = new SpellingTrie();
238
239 return *instance_;
240 }
241
half2full_num(uint16 half_id) const242 uint16 SpellingTrie::half2full_num(uint16 half_id) const {
243 if (NULL == root_ || half_id >= kFullSplIdStart)
244 return 0;
245 return h2f_num_[half_id];
246 }
247
half_to_full(uint16 half_id,uint16 * spl_id_start) const248 uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
249 if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart)
250 return 0;
251
252 *spl_id_start = h2f_start_[half_id];
253 return h2f_num_[half_id];
254 }
255
full_to_half(uint16 full_id) const256 uint16 SpellingTrie::full_to_half(uint16 full_id) const {
257 if (NULL == root_ || full_id < kFullSplIdStart ||
258 full_id > spelling_num_ + kFullSplIdStart)
259 return 0;
260
261 return f2h_[full_id - kFullSplIdStart];
262 }
263
free_son_trie(SpellingNode * node)264 void SpellingTrie::free_son_trie(SpellingNode* node) {
265 if (NULL == node)
266 return;
267
268 for (size_t pos = 0; pos < node->num_of_son; pos++) {
269 free_son_trie(node->first_son + pos);
270 }
271
272 if (NULL != node->first_son)
273 delete [] node->first_son;
274 }
275
construct(const char * spelling_arr,size_t item_size,size_t item_num,float score_amplifier,unsigned char average_score)276 bool SpellingTrie::construct(const char* spelling_arr, size_t item_size,
277 size_t item_num, float score_amplifier,
278 unsigned char average_score) {
279 if (spelling_arr == NULL)
280 return false;
281
282 memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
283 memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
284
285 // If the arr is the same as the buf, means this function is called by
286 // load_table(), the table data are ready; otherwise the array should be
287 // saved.
288 if (spelling_arr != spelling_buf_) {
289 if (NULL != spelling_buf_)
290 delete [] spelling_buf_;
291 spelling_buf_ = new char[item_size * item_num];
292 if (NULL == spelling_buf_)
293 return false;
294 memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
295 }
296
297 spelling_size_ = item_size;
298 spelling_num_ = item_num;
299
300 score_amplifier_ = score_amplifier;
301 average_score_ = average_score;
302
303 if (NULL != splstr_queried_)
304 delete [] splstr_queried_;
305 splstr_queried_ = new char[spelling_size_];
306 if (NULL == splstr_queried_)
307 return false;
308
309 if (NULL != splstr16_queried_)
310 delete [] splstr16_queried_;
311 splstr16_queried_ = new char16[spelling_size_];
312 if (NULL == splstr16_queried_)
313 return false;
314
315 // First, sort the buf to ensure they are in ascendant order
316 qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
317
318 #ifdef ___BUILD_MODEL___
319 node_num_ = 1;
320 #endif
321
322 root_ = new SpellingNode();
323 memset(root_, 0, sizeof(SpellingNode));
324
325 dumb_node_ = new SpellingNode();
326 memset(dumb_node_, 0, sizeof(SpellingNode));
327 dumb_node_->score = average_score_;
328
329 splitter_node_ = new SpellingNode();
330 memset(splitter_node_, 0, sizeof(SpellingNode));
331 splitter_node_->score = average_score_;
332
333 memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum);
334
335 root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
336
337 // Root's score should be cleared.
338 root_->score = 0;
339
340 if (NULL == root_->first_son)
341 return false;
342
343 h2f_start_[0] = h2f_num_[0] = 0;
344
345 if (!build_f2h())
346 return false;
347
348 #ifdef ___BUILD_MODEL___
349 if (kPrintDebug0) {
350 printf("---SpellingTrie Nodes: %d\n", (int)node_num_);
351 }
352 return build_ym_info();
353 #else
354 return true;
355 #endif
356 }
357
358 #ifdef ___BUILD_MODEL___
get_ym_str(const char * spl_str)359 const char* SpellingTrie::get_ym_str(const char *spl_str) {
360 bool start_ZCS = false;
361 if (is_shengmu_char(*spl_str)) {
362 if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str)
363 start_ZCS = true;
364 spl_str += 1;
365 if (start_ZCS && 'h' == *spl_str)
366 spl_str += 1;
367 }
368 return spl_str;
369 }
370
build_ym_info()371 bool SpellingTrie::build_ym_info() {
372 bool sucess;
373 SpellingTable *spl_table = new SpellingTable();
374
375 sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
376 assert(sucess);
377
378 for (uint16 pos = 0; pos < spelling_num_; pos++) {
379 const char *spl_str = spelling_buf_ + spelling_size_ * pos;
380 spl_str = get_ym_str(spl_str);
381 if ('\0' != spl_str[0]) {
382 sucess = spl_table->put_spelling(spl_str, 0);
383 assert(sucess);
384 }
385 }
386
387 size_t ym_item_size; // '\0' is included
388 size_t ym_num;
389 const char* ym_buf;
390 ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
391
392 if (NULL != ym_buf_)
393 delete [] ym_buf_;
394 ym_buf_ = new char[ym_item_size * ym_num];
395 if (NULL == ym_buf_) {
396 delete spl_table;
397 return false;
398 }
399
400 memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
401 ym_size_ = ym_item_size;
402 ym_num_ = ym_num;
403
404 delete spl_table;
405
406 // Generate the maping from the spelling ids to the Yunmu ids.
407 if (spl_ym_ids_)
408 delete spl_ym_ids_;
409 spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
410 if (NULL == spl_ym_ids_)
411 return false;
412
413 memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
414
415 for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
416 const char *str = get_spelling_str(id);
417
418 str = get_ym_str(str);
419 if ('\0' != str[0]) {
420 uint8 ym_id = get_ym_id(str);
421 spl_ym_ids_[id] = ym_id;
422 assert(ym_id > 0);
423 } else {
424 spl_ym_ids_[id] = 0;
425 }
426 }
427 return true;
428 }
429 #endif
430
construct_spellings_subset(size_t item_start,size_t item_end,size_t level,SpellingNode * parent)431 SpellingNode* SpellingTrie::construct_spellings_subset(
432 size_t item_start, size_t item_end, size_t level, SpellingNode* parent) {
433 if (level >= spelling_size_ || item_end <= item_start || NULL == parent)
434 return NULL;
435
436 SpellingNode *first_son = NULL;
437 uint16 num_of_son = 0;
438 unsigned char min_son_score = 255;
439
440 const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
441 char char_for_node = spelling_last_start[level];
442 assert((char_for_node >= 'A' && char_for_node <= 'Z') ||
443 'h' == char_for_node);
444
445 // Scan the array to find how many sons
446 for (size_t i = item_start + 1; i < item_end; i++) {
447 const char *spelling_current = spelling_buf_ + spelling_size_ * i;
448 char char_current = spelling_current[level];
449 if (char_current != char_for_node) {
450 num_of_son++;
451 char_for_node = char_current;
452 }
453 }
454 num_of_son++;
455
456 // Allocate memory
457 #ifdef ___BUILD_MODEL___
458 node_num_ += num_of_son;
459 #endif
460 first_son = new SpellingNode[num_of_son];
461 memset(first_son, 0, sizeof(SpellingNode)*num_of_son);
462
463 // Now begin construct tree
464 size_t son_pos = 0;
465
466 spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
467 char_for_node = spelling_last_start[level];
468
469 bool spelling_endable = true;
470 if (spelling_last_start[level + 1] != '\0')
471 spelling_endable = false;
472
473 size_t item_start_next = item_start;
474
475 for (size_t i = item_start + 1; i < item_end; i++) {
476 const char *spelling_current = spelling_buf_ + spelling_size_ * i;
477 char char_current = spelling_current[level];
478 assert(is_valid_spl_char(char_current));
479
480 if (char_current != char_for_node) {
481 // Construct a node
482 SpellingNode *node_current = first_son + son_pos;
483 node_current->char_this_node = char_for_node;
484
485 // For quick search in the first level
486 if (0 == level)
487 level1_sons_[char_for_node - 'A'] = node_current;
488
489 if (spelling_endable) {
490 node_current->spelling_idx = kFullSplIdStart + item_start_next;
491 }
492
493 if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
494 size_t real_start = item_start_next;
495 if (spelling_last_start[level + 1] == '\0')
496 real_start++;
497
498 node_current->first_son =
499 construct_spellings_subset(real_start, i, level + 1,
500 node_current);
501
502 if (real_start == item_start_next + 1) {
503 uint16 score_this = static_cast<unsigned char>(
504 spelling_last_start[spelling_size_ - 1]);
505 if (score_this < node_current->score)
506 node_current->score = score_this;
507 }
508 } else {
509 node_current->first_son = NULL;
510 node_current->score = static_cast<unsigned char>(
511 spelling_last_start[spelling_size_ - 1]);
512 }
513
514 if (node_current->score < min_son_score)
515 min_son_score = node_current->score;
516
517 bool is_half = false;
518 if (level == 0 && is_szm_char(char_for_node)) {
519 node_current->spelling_idx =
520 static_cast<uint16>(char_for_node - 'A' + 1);
521
522 if (char_for_node > 'C')
523 node_current->spelling_idx++;
524 if (char_for_node > 'S')
525 node_current->spelling_idx++;
526
527 h2f_num_[node_current->spelling_idx] = i - item_start_next;
528 is_half = true;
529 } else if (level == 1 && char_for_node == 'h') {
530 char ch_level0 = spelling_last_start[0];
531 uint16 part_id = 0;
532 if (ch_level0 == 'C')
533 part_id = 'C' - 'A' + 1 + 1;
534 else if (ch_level0 == 'S')
535 part_id = 'S' - 'A' + 1 + 2;
536 else if (ch_level0 == 'Z')
537 part_id = 'Z' - 'A' + 1 + 3;
538 if (0 != part_id) {
539 node_current->spelling_idx = part_id;
540 h2f_num_[node_current->spelling_idx] = i - item_start_next;
541 is_half = true;
542 }
543 }
544
545 if (is_half) {
546 if (h2f_num_[node_current->spelling_idx] > 0)
547 h2f_start_[node_current->spelling_idx] =
548 item_start_next + kFullSplIdStart;
549 else
550 h2f_start_[node_current->spelling_idx] = 0;
551 }
552
553 // for next sibling
554 spelling_last_start = spelling_current;
555 char_for_node = char_current;
556 item_start_next = i;
557 spelling_endable = true;
558 if (spelling_current[level + 1] != '\0')
559 spelling_endable = false;
560
561 son_pos++;
562 }
563 }
564
565 // the last one
566 SpellingNode *node_current = first_son + son_pos;
567 node_current->char_this_node = char_for_node;
568
569 // For quick search in the first level
570 if (0 == level)
571 level1_sons_[char_for_node - 'A'] = node_current;
572
573 if (spelling_endable) {
574 node_current->spelling_idx = kFullSplIdStart + item_start_next;
575 }
576
577 if (spelling_last_start[level + 1] != '\0' ||
578 item_end - item_start_next > 1) {
579 size_t real_start = item_start_next;
580 if (spelling_last_start[level + 1] == '\0')
581 real_start++;
582
583 node_current->first_son =
584 construct_spellings_subset(real_start, item_end, level + 1,
585 node_current);
586
587 if (real_start == item_start_next + 1) {
588 uint16 score_this = static_cast<unsigned char>(
589 spelling_last_start[spelling_size_ - 1]);
590 if (score_this < node_current->score)
591 node_current->score = score_this;
592 }
593 } else {
594 node_current->first_son = NULL;
595 node_current->score = static_cast<unsigned char>(
596 spelling_last_start[spelling_size_ - 1]);
597 }
598
599 if (node_current->score < min_son_score)
600 min_son_score = node_current->score;
601
602 assert(son_pos + 1 == num_of_son);
603
604 bool is_half = false;
605 if (level == 0 && szm_is_enabled(char_for_node)) {
606 node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
607
608 if (char_for_node > 'C')
609 node_current->spelling_idx++;
610 if (char_for_node > 'S')
611 node_current->spelling_idx++;
612
613 h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
614 is_half = true;
615 } else if (level == 1 && char_for_node == 'h') {
616 char ch_level0 = spelling_last_start[0];
617 uint16 part_id = 0;
618 if (ch_level0 == 'C')
619 part_id = 'C' - 'A' + 1 + 1;
620 else if (ch_level0 == 'S')
621 part_id = 'S' - 'A' + 1 + 2;
622 else if (ch_level0 == 'Z')
623 part_id = 'Z' - 'A' + 1 + 3;
624 if (0 != part_id) {
625 node_current->spelling_idx = part_id;
626 h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
627 is_half = true;
628 }
629 }
630 if (is_half) {
631 if (h2f_num_[node_current->spelling_idx] > 0)
632 h2f_start_[node_current->spelling_idx] =
633 item_start_next + kFullSplIdStart;
634 else
635 h2f_start_[node_current->spelling_idx] = 0;
636 }
637
638 parent->num_of_son = num_of_son;
639 parent->score = min_son_score;
640 return first_son;
641 }
642
save_spl_trie(FILE * fp)643 bool SpellingTrie::save_spl_trie(FILE *fp) {
644 if (NULL == fp || NULL == spelling_buf_)
645 return false;
646
647 if (fwrite(&spelling_size_, sizeof(uint32), 1, fp) != 1)
648 return false;
649
650 if (fwrite(&spelling_num_, sizeof(uint32), 1, fp) != 1)
651 return false;
652
653 if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1)
654 return false;
655
656 if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1)
657 return false;
658
659 if (fwrite(spelling_buf_, sizeof(char) * spelling_size_,
660 spelling_num_, fp) != spelling_num_)
661 return false;
662
663 return true;
664 }
665
load_spl_trie(FILE * fp)666 bool SpellingTrie::load_spl_trie(FILE *fp) {
667 if (NULL == fp)
668 return false;
669
670 if (fread(&spelling_size_, sizeof(uint32), 1, fp) != 1)
671 return false;
672
673 if (fread(&spelling_num_, sizeof(uint32), 1, fp) != 1)
674 return false;
675
676 if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1)
677 return false;
678
679 if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1)
680 return false;
681
682 if (NULL != spelling_buf_)
683 delete [] spelling_buf_;
684
685 spelling_buf_ = new char[spelling_size_ * spelling_num_];
686 if (NULL == spelling_buf_)
687 return false;
688
689 if (fread(spelling_buf_, sizeof(char) * spelling_size_,
690 spelling_num_, fp) != spelling_num_)
691 return false;
692
693 return construct(spelling_buf_, spelling_size_, spelling_num_,
694 score_amplifier_, average_score_);
695 }
696
build_f2h()697 bool SpellingTrie::build_f2h() {
698 if (NULL != f2h_)
699 delete [] f2h_;
700 f2h_ = new uint16[spelling_num_];
701 if (NULL == f2h_)
702 return false;
703
704 for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
705 for (uint16 fid = h2f_start_[hid];
706 fid < h2f_start_[hid] + h2f_num_[hid]; fid++)
707 f2h_[fid - kFullSplIdStart] = hid;
708 }
709
710 return true;
711 }
712
get_spelling_num()713 size_t SpellingTrie::get_spelling_num() {
714 return spelling_num_;
715 }
716
get_ym_id(const char * ym_str)717 uint8 SpellingTrie::get_ym_id(const char *ym_str) {
718 if (NULL == ym_str || NULL == ym_buf_)
719 return 0;
720
721 for (uint8 pos = 0; pos < ym_num_; pos++)
722 if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0)
723 return pos + 1;
724
725 return 0;
726 }
727
get_spelling_str(uint16 splid)728 const char* SpellingTrie::get_spelling_str(uint16 splid) {
729 splstr_queried_[0] = '\0';
730
731 if (splid >= kFullSplIdStart) {
732 splid -= kFullSplIdStart;
733 snprintf(splstr_queried_, spelling_size_, "%s",
734 spelling_buf_ + splid * spelling_size_);
735 } else {
736 if (splid == 'C' - 'A' + 1 + 1) {
737 snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
738 } else if (splid == 'S' - 'A' + 1 + 2) {
739 snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
740 } else if (splid == 'Z' - 'A' + 1 + 3) {
741 snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
742 } else {
743 if (splid > 'C' - 'A' + 1)
744 splid--;
745 if (splid > 'S' - 'A' + 1)
746 splid--;
747 splstr_queried_[0] = 'A' + splid - 1;
748 splstr_queried_[1] = '\0';
749 }
750 }
751 return splstr_queried_;
752 }
753
get_spelling_str16(uint16 splid)754 const char16* SpellingTrie::get_spelling_str16(uint16 splid) {
755 splstr16_queried_[0] = '\0';
756
757 if (splid >= kFullSplIdStart) {
758 splid -= kFullSplIdStart;
759 for (size_t pos = 0; pos < spelling_size_; pos++) {
760 splstr16_queried_[pos] = static_cast<char16>
761 (spelling_buf_[splid * spelling_size_ + pos]);
762 }
763 } else {
764 if (splid == 'C' - 'A' + 1 + 1) {
765 splstr16_queried_[0] = static_cast<char16>('C');
766 splstr16_queried_[1] = static_cast<char16>('h');
767 splstr16_queried_[2] = static_cast<char16>('\0');
768 } else if (splid == 'S' - 'A' + 1 + 2) {
769 splstr16_queried_[0] = static_cast<char16>('S');
770 splstr16_queried_[1] = static_cast<char16>('h');
771 splstr16_queried_[2] = static_cast<char16>('\0');
772 } else if (splid == 'Z' - 'A' + 1 + 3) {
773 splstr16_queried_[0] = static_cast<char16>('Z');
774 splstr16_queried_[1] = static_cast<char16>('h');
775 splstr16_queried_[2] = static_cast<char16>('\0');
776 } else {
777 if (splid > 'C' - 'A' + 1)
778 splid--;
779 if (splid > 'S' - 'A' + 1)
780 splid--;
781 splstr16_queried_[0] = 'A' + splid - 1;
782 splstr16_queried_[1] = '\0';
783 }
784 }
785 return splstr16_queried_;
786 }
787
get_spelling_str16(uint16 splid,char16 * splstr16,size_t splstr16_len)788 size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16,
789 size_t splstr16_len) {
790 if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
791
792 if (splid >= kFullSplIdStart) {
793 splid -= kFullSplIdStart;
794 for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
795 splstr16[pos] = static_cast<char16>
796 (spelling_buf_[splid * spelling_size_ + pos]);
797 if (static_cast<char16>('\0') == splstr16[pos]) {
798 return pos;
799 }
800 }
801 } else {
802 if (splid == 'C' - 'A' + 1 + 1) {
803 splstr16[0] = static_cast<char16>('C');
804 splstr16[1] = static_cast<char16>('h');
805 splstr16[2] = static_cast<char16>('\0');
806 return 2;
807 } else if (splid == 'S' - 'A' + 1 + 2) {
808 splstr16[0] = static_cast<char16>('S');
809 splstr16[1] = static_cast<char16>('h');
810 splstr16[2] = static_cast<char16>('\0');
811 return 2;
812 } else if (splid == 'Z' - 'A' + 1 + 3) {
813 splstr16[0] = static_cast<char16>('Z');
814 splstr16[1] = static_cast<char16>('h');
815 splstr16[2] = static_cast<char16>('\0');
816 return 2;
817 } else {
818 if (splid > 'C' - 'A' + 1)
819 splid--;
820 if (splid > 'S' - 'A' + 1)
821 splid--;
822 splstr16[0] = 'A' + splid - 1;
823 splstr16[1] = '\0';
824 return 1;
825 }
826 }
827
828 // Not reachable.
829 return 0;
830 }
831
832 } // namespace ime_pinyin
833