1 #ifndef NU_STRCOLL_INTERNAL_H
2 #define NU_STRCOLL_INTERNAL_H
3 
4 /** @defgroup collation_internal Internal collation functions
5  *
6  * Functions in this group are mostly for the internal use. PLease use them
7  * with care.
8  */
9 
10 #include <libnu/config.h>
11 #include <libnu/casemap.h>
12 #include <libnu/defines.h>
13 #include <libnu/strings.h>
14 
15 #if defined (__cplusplus) || defined (c_plusplus)
16 extern "C" {
17 #endif
18 
19 /** Read (decode) iterator with transformation applied inside of it
20  *
21  * @ingroup collation_internal
22  * @see nu_default_compound_read
23  * @see nu_nocase_compound_read
24  */
25 typedef const char* (*nu_compound_read_t)(
26 	const char *encoded, const char *encoded_limit, nu_read_iterator_t encoded_read,
27 	uint32_t *unicode, const char **tail);
28 
29 /** Weight unicode codepoint (or several codepoints)
30  *
31  * 0 should always be weighted to 0. If your weight function need more
32  * than one codepoint - return negative value, which will be passed back to
33  * this function along with next codepoint.
34  *
35  * When function decided on weight and returned positive result, it has to
36  * fill weight with how many (Unicode) codepoints nunicode should rollback.
37  * E.g. function consumed "ZZS" and decided weight (in Hungarian collation),
38  * it fills 0 to \*weight because no rollback is needed. Then function
39  * consumed "ZZZ" and no weight available for such contraction - it
40  * returns weight for "Z" and fills \*weight with 2, to rollback
41  * redundant "ZZ".
42  *
43  * If string suddenly ends before weight function can decide (string limit
44  * reached), 0 will be passed additionally to the previous string to signal
45  * end of the string.
46  *
47  * @ingroup collation_internal
48  * @param u unicode codepoint to weight
49  * @param weight 0 at first call or (on sequential calls) pointer to negative
50  * weight previously returned by this function
51  * @param context pointer passed to _nu_strcoll() or _nu_strstr()
52  * @return positive codepoint weight or negative value if function need more
53  * codepoints
54  */
55 typedef int32_t (*nu_codepoint_weight_t)(uint32_t u, int32_t *weight, void *context);
56 
57 #if (defined NU_WITH_Z_COLLATION) || (defined NU_WITH_N_COLLATION)
58 
59 /** Default compound read, equal to simply calling encoded_read(encoded, &unicode)
60  *
61  * @ingroup collation_internal
62  * @param encoded encoded string
63  * @param encoded_limit upper limit for encoded. NU_UNLIMITED for 0-terminated
64  * strings
65  * @param encoded_read read (decode) function
66  * @param unicode output unicode codepoint
67  * @param tail output pointer to compound tail, should never be 0
68  * @return pointer to next encoded codepoint
69  */
70 static inline
nu_default_compound_read(const char * encoded,const char * encoded_limit,nu_read_iterator_t encoded_read,uint32_t * unicode,const char ** tail)71 const char* nu_default_compound_read(const char *encoded, const char *encoded_limit,
72 	nu_read_iterator_t encoded_read, uint32_t *unicode,
73 	const char **tail) {
74 	(void)(encoded_limit);
75 	(void)(tail);
76 
77 	return encoded_read(encoded, unicode);
78 }
79 
80 /** Case-ignoring compound read, equal to calling
81  * encoded_read(encoded, &unicode) with nu_toupper() applied internally
82  *
83  * @ingroup collation_internal
84  * @param encoded encoded string
85  * @param encoded_limit upper limit for encoded. NU_UNLIMITED for 0-terminated
86  * strings
87  * @param encoded_read read (decode) function
88  * @param unicode output unicode codepoint
89  * @param tail output pointer to compound tail, should never be 0
90  * @return pointer to next encoded codepoint
91  */
92 static inline
nu_nocase_compound_read(const char * encoded,const char * encoded_limit,nu_read_iterator_t encoded_read,uint32_t * unicode,const char ** tail)93 const char* nu_nocase_compound_read(const char *encoded, const char *encoded_limit,
94 	nu_read_iterator_t encoded_read, uint32_t *unicode,
95 	const char **tail) {
96 
97 	/* re-entry with tail != 0 */
98 	if (*tail != 0) {
99 		*tail = nu_casemap_read(*tail, unicode);
100 
101 		if (*unicode != 0) {
102 			return encoded;
103 		}
104 
105 		*tail = 0; // fall thru
106 	}
107 
108 	if (encoded >= encoded_limit) {
109 		*unicode = 0;
110 		return encoded;
111 	}
112 
113 	const char *p = encoded_read(encoded, unicode);
114 
115 	if (*unicode == 0) {
116 		return p;
117 	}
118 
119 	const char *map = NU_FOLDING_FUNCTION(*unicode);
120 	if (map != 0) {
121 		*tail = nu_casemap_read(map, unicode);
122 	}
123 
124 	return p;
125 }
126 
127 /** Internal interface for nu_strcoll
128  *
129  * @ingroup collation_internal
130  * @param lhs left-hand side encoded string
131  * @param lhs_limit upper limit for lhs, use NU_UNLIMITED for 0-terminated
132  * strings
133  * @param rhs right-hand side encoded string
134  * @param rhs_limit upper limit for rhs, use NU_UNLIMITED for 0-terminated
135  * strings
136  * @param it1 lhs read (decoding) function
137  * @param it2 rhs read (decoding) function
138  * @param com1 lhs compound read function
139  * @param com2 rhs compound read function
140  * @param weight codepoint weighting function
141  * @param context pointer which will be passed to weight
142  * @param collated_left (optional) number of codepoints collated in lhs
143  * @param collated_right (optional) number of codepoints collated in rhs
144  *
145  * @see nu_strcoll
146  * @see nu_default_compound_read
147  * @see nu_nocase_compound_read
148  * @see nu_ducet_weight
149  */
150 NU_EXPORT
151 int _nu_strcoll(const char *lhs, const char *lhs_limit,
152 	const char *rhs, const char *rhs_limit,
153 	nu_read_iterator_t it1, nu_read_iterator_t it2,
154 	nu_compound_read_t com1, nu_compound_read_t com2,
155 	nu_codepoint_weight_t weight, void *context,
156 	ssize_t *collated_left, ssize_t *collated_right);
157 
158 /** Internal interface for nu_strchr
159  *
160  * @ingroup collation_internal
161  * @param lhs left-hand side encoded string
162  * @param lhs_limit upper limit for lhs, use NU_UNLIMITED for 0-terminated
163  * strings
164  * @param c unicode codepoint to look for
165  * @param read lhs read (decoding) function
166  * @param com lhs compound read function
167  * @param casemap casemapping function
168  * @param casemap_read casemapping result decoding function
169  *
170  * @see nu_strchr
171  * @see nu_default_compound_read
172  * @see nu_nocase_compound_read
173  * @see nu_toupper
174  * @see nu_tolower
175  */
176 NU_EXPORT
177 const char* _nu_strchr(const char *lhs, const char *lhs_limit,
178 	uint32_t c, nu_read_iterator_t read,
179 	nu_compound_read_t com,
180 	nu_casemapping_t casemap, nu_read_iterator_t casemap_read);
181 
182 /** Internal interface for nu_strchr
183  *
184  * @ingroup collation_internal
185  * @see _nu_strchr
186  */
187 NU_EXPORT
188 const char* _nu_strrchr(const char *encoded, const char *limit,
189 	uint32_t c, nu_read_iterator_t read,
190 	nu_compound_read_t com,
191 	nu_casemapping_t casemap, nu_read_iterator_t casemap_read);
192 
193 /** Internal interface for nu_strcoll
194  *
195  * @ingroup collation_internal
196  * @param haystack encoded haystack
197  * @param haystack_limit upper limit for haystack, use NU_UNLIMITED for
198  * 0-terminated strings
199  * @param needle encoded needle string
200  * @param needle_limit upper limit for needle, use NU_UNLIMITED for
201  * 0-terminated strings
202  * @param it1 haystack read (decoding) function
203  * @param it2 needle read (decoding) function
204  * @param com1 haystack compound read function
205  * @param com2 needle compound read function
206  * @param casemap casemapping function
207  * @param casemap_read casemapping result decoding function
208  * @param weight codepoint weighting function
209  * @param context pointer which will be passed to weight
210  *
211  * @see nu_strstr
212  * @see nu_default_compound_read
213  * @see nu_nocase_compound_read
214  * @see nu_toupper
215  * @see nu_tolower
216  * @see nu_ducet_weight
217  */
218 NU_EXPORT
219 const char* _nu_strstr(const char *haystack, const char *haystack_limit,
220 	const char *needle, const char *needle_limit,
221 	nu_read_iterator_t it1, nu_read_iterator_t it2,
222 	nu_compound_read_t com1, nu_compound_read_t com2,
223 	nu_casemapping_t casemap, nu_read_iterator_t casemap_read,
224 	nu_codepoint_weight_t weight, void *context);
225 
226 #endif /* (defined NU_WITH_Z_COLLATION) || (defined NU_WITH_N_COLLATION) */
227 
228 #if defined (__cplusplus) || defined (c_plusplus)
229 }
230 #endif
231 
232 #endif /* NU_STRCOLL_INTERNAL_H */
233