1 #include <assert.h>
2
3 #include <libnu/casemap.h>
4
5 #ifdef NU_WITH_UNACCENT
6
7 #include <libnu/casemap_internal.h>
8 #include "gen/_tounaccent.c"
9
nu_tounaccent(uint32_t codepoint)10 const char* nu_tounaccent(uint32_t codepoint) {
11 typedef struct {
12 uint32_t block_start;
13 uint32_t block_end;
14 } block_t;
15
16 static const block_t blocks[] = {
17 { 0x0300, 0x036F }, /* Combining Diacritical Marks */
18 { 0x1AB0, 0x1AFF }, /* Combining Diacritical Marks Extended */
19 { 0x20D0, 0x20FF }, /* Combining Diacritical Marks for Symbols */
20 { 0x1DC0, 0x1DFF }, /* Combining Diacritical Marks Supplement */
21 };
22 static const size_t blocks_count = sizeof(blocks) / sizeof(*blocks);
23
24 /* check if codepoint itself is a diacritic,
25 * return empty string in that case
26 * (transform into empty string */
27 assert(nu_casemap_read == nu_utf8_read);
28 for (size_t i = 0; i < blocks_count; ++i) {
29 if (codepoint >= blocks[i].block_start && codepoint <= blocks[i].block_end) {
30 return ""; /* return zero-terminated empty string in nu_casemap_read (utf-8) */
31 }
32 }
33
34 return _nu_to_something(codepoint, NU_TOUNACCENT_G, NU_TOUNACCENT_G_SIZE,
35 NU_TOUNACCENT_VALUES_C, NU_TOUNACCENT_VALUES_I, NU_TOUNACCENT_COMBINED);
36 }
37
_nu_tounaccent(const char * encoded,const char * limit,nu_read_iterator_t read,uint32_t * u,const char ** transform,void * context)38 const char* _nu_tounaccent(const char *encoded, const char *limit, nu_read_iterator_t read,
39 uint32_t *u, const char **transform,
40 void *context) {
41
42 (void)(limit);
43 (void)(context);
44
45 uint32_t _u = 0;
46 const char *np = read(encoded, &_u);
47
48 *transform = nu_tounaccent(_u);
49
50 if (u != 0) {
51 *u = _u;
52 }
53
54 return np;
55 }
56
57 #endif /* NU_WITH_UNACCENT */
58