1 #include <assert.h>
2 
3 #include <libnu/casemap.h>
4 
5 #ifdef NU_WITH_UNACCENT
6 
7 #include <libnu/casemap_internal.h>
8 #include "gen/_tounaccent.c"
9 
nu_tounaccent(uint32_t codepoint)10 const char* nu_tounaccent(uint32_t codepoint) {
11 	typedef struct {
12 		uint32_t block_start;
13 		uint32_t block_end;
14 	} block_t;
15 
16 	static const block_t blocks[] = {
17 		{ 0x0300, 0x036F },  /* Combining Diacritical Marks */
18 		{ 0x1AB0, 0x1AFF },  /* Combining Diacritical Marks Extended */
19 		{ 0x20D0, 0x20FF },  /* Combining Diacritical Marks for Symbols */
20 		{ 0x1DC0, 0x1DFF },  /* Combining Diacritical Marks Supplement */
21 	};
22 	static const size_t blocks_count = sizeof(blocks) / sizeof(*blocks);
23 
24 	/* check if codepoint itself is a diacritic,
25 	 * return empty string in that case
26 	 * (transform into empty string */
27 	assert(nu_casemap_read == nu_utf8_read);
28 	for (size_t i = 0; i < blocks_count; ++i) {
29 		if (codepoint >= blocks[i].block_start && codepoint <= blocks[i].block_end) {
30 			return ""; /* return zero-terminated empty string in nu_casemap_read (utf-8) */
31 		}
32 	}
33 
34 	return _nu_to_something(codepoint, NU_TOUNACCENT_G, NU_TOUNACCENT_G_SIZE,
35 		NU_TOUNACCENT_VALUES_C, NU_TOUNACCENT_VALUES_I, NU_TOUNACCENT_COMBINED);
36 }
37 
_nu_tounaccent(const char * encoded,const char * limit,nu_read_iterator_t read,uint32_t * u,const char ** transform,void * context)38 const char* _nu_tounaccent(const char *encoded, const char *limit, nu_read_iterator_t read,
39 	uint32_t *u, const char **transform,
40 	void *context) {
41 
42 	(void)(limit);
43 	(void)(context);
44 
45 	uint32_t _u = 0;
46 	const char *np = read(encoded, &_u);
47 
48 	*transform = nu_tounaccent(_u);
49 
50 	if (u != 0) {
51 		*u = _u;
52 	}
53 
54 	return np;
55 }
56 
57 #endif /* NU_WITH_UNACCENT */
58