1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * lib/textsearch.c Generic text search interface
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Authors: Thomas Graf <tgraf@suug.ch>
6*4882a593Smuzhiyun * Pablo Neira Ayuso <pablo@netfilter.org>
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * ==========================================================================
9*4882a593Smuzhiyun */
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun /**
12*4882a593Smuzhiyun * DOC: ts_intro
13*4882a593Smuzhiyun * INTRODUCTION
14*4882a593Smuzhiyun *
15*4882a593Smuzhiyun * The textsearch infrastructure provides text searching facilities for
16*4882a593Smuzhiyun * both linear and non-linear data. Individual search algorithms are
17*4882a593Smuzhiyun * implemented in modules and chosen by the user.
18*4882a593Smuzhiyun *
19*4882a593Smuzhiyun * ARCHITECTURE
20*4882a593Smuzhiyun *
21*4882a593Smuzhiyun * .. code-block:: none
22*4882a593Smuzhiyun *
23*4882a593Smuzhiyun * User
24*4882a593Smuzhiyun * +----------------+
25*4882a593Smuzhiyun * | finish()|<--------------(6)-----------------+
26*4882a593Smuzhiyun * |get_next_block()|<--------------(5)---------------+ |
27*4882a593Smuzhiyun * | | Algorithm | |
28*4882a593Smuzhiyun * | | +------------------------------+
29*4882a593Smuzhiyun * | | | init() find() destroy() |
30*4882a593Smuzhiyun * | | +------------------------------+
31*4882a593Smuzhiyun * | | Core API ^ ^ ^
32*4882a593Smuzhiyun * | | +---------------+ (2) (4) (8)
33*4882a593Smuzhiyun * | (1)|----->| prepare() |---+ | |
34*4882a593Smuzhiyun * | (3)|----->| find()/next() |-----------+ |
35*4882a593Smuzhiyun * | (7)|----->| destroy() |----------------------+
36*4882a593Smuzhiyun * +----------------+ +---------------+
37*4882a593Smuzhiyun *
38*4882a593Smuzhiyun * (1) User configures a search by calling textsearch_prepare() specifying
39*4882a593Smuzhiyun * the search parameters such as the pattern and algorithm name.
40*4882a593Smuzhiyun * (2) Core requests the algorithm to allocate and initialize a search
41*4882a593Smuzhiyun * configuration according to the specified parameters.
42*4882a593Smuzhiyun * (3) User starts the search(es) by calling textsearch_find() or
43*4882a593Smuzhiyun * textsearch_next() to fetch subsequent occurrences. A state variable
44*4882a593Smuzhiyun * is provided to the algorithm to store persistent variables.
45*4882a593Smuzhiyun * (4) Core eventually resets the search offset and forwards the find()
46*4882a593Smuzhiyun * request to the algorithm.
47*4882a593Smuzhiyun * (5) Algorithm calls get_next_block() provided by the user continuously
48*4882a593Smuzhiyun * to fetch the data to be searched in block by block.
49*4882a593Smuzhiyun * (6) Algorithm invokes finish() after the last call to get_next_block
50*4882a593Smuzhiyun * to clean up any leftovers from get_next_block. (Optional)
51*4882a593Smuzhiyun * (7) User destroys the configuration by calling textsearch_destroy().
52*4882a593Smuzhiyun * (8) Core notifies the algorithm to destroy algorithm specific
53*4882a593Smuzhiyun * allocations. (Optional)
54*4882a593Smuzhiyun *
55*4882a593Smuzhiyun * USAGE
56*4882a593Smuzhiyun *
57*4882a593Smuzhiyun * Before a search can be performed, a configuration must be created
58*4882a593Smuzhiyun * by calling textsearch_prepare() specifying the searching algorithm,
59*4882a593Smuzhiyun * the pattern to look for and flags. As a flag, you can set TS_IGNORECASE
60*4882a593Smuzhiyun * to perform case insensitive matching. But it might slow down
61*4882a593Smuzhiyun * performance of algorithm, so you should use it at own your risk.
62*4882a593Smuzhiyun * The returned configuration may then be used for an arbitrary
63*4882a593Smuzhiyun * amount of times and even in parallel as long as a separate struct
64*4882a593Smuzhiyun * ts_state variable is provided to every instance.
65*4882a593Smuzhiyun *
66*4882a593Smuzhiyun * The actual search is performed by either calling
67*4882a593Smuzhiyun * textsearch_find_continuous() for linear data or by providing
68*4882a593Smuzhiyun * an own get_next_block() implementation and
69*4882a593Smuzhiyun * calling textsearch_find(). Both functions return
70*4882a593Smuzhiyun * the position of the first occurrence of the pattern or UINT_MAX if
71*4882a593Smuzhiyun * no match was found. Subsequent occurrences can be found by calling
72*4882a593Smuzhiyun * textsearch_next() regardless of the linearity of the data.
73*4882a593Smuzhiyun *
74*4882a593Smuzhiyun * Once you're done using a configuration it must be given back via
75*4882a593Smuzhiyun * textsearch_destroy.
76*4882a593Smuzhiyun *
77*4882a593Smuzhiyun * EXAMPLE::
78*4882a593Smuzhiyun *
79*4882a593Smuzhiyun * int pos;
80*4882a593Smuzhiyun * struct ts_config *conf;
81*4882a593Smuzhiyun * struct ts_state state;
82*4882a593Smuzhiyun * const char *pattern = "chicken";
83*4882a593Smuzhiyun * const char *example = "We dance the funky chicken";
84*4882a593Smuzhiyun *
85*4882a593Smuzhiyun * conf = textsearch_prepare("kmp", pattern, strlen(pattern),
86*4882a593Smuzhiyun * GFP_KERNEL, TS_AUTOLOAD);
87*4882a593Smuzhiyun * if (IS_ERR(conf)) {
88*4882a593Smuzhiyun * err = PTR_ERR(conf);
89*4882a593Smuzhiyun * goto errout;
90*4882a593Smuzhiyun * }
91*4882a593Smuzhiyun *
92*4882a593Smuzhiyun * pos = textsearch_find_continuous(conf, &state, example, strlen(example));
93*4882a593Smuzhiyun * if (pos != UINT_MAX)
94*4882a593Smuzhiyun * panic("Oh my god, dancing chickens at %d\n", pos);
95*4882a593Smuzhiyun *
96*4882a593Smuzhiyun * textsearch_destroy(conf);
97*4882a593Smuzhiyun */
98*4882a593Smuzhiyun /* ========================================================================== */
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun #include <linux/module.h>
101*4882a593Smuzhiyun #include <linux/types.h>
102*4882a593Smuzhiyun #include <linux/string.h>
103*4882a593Smuzhiyun #include <linux/init.h>
104*4882a593Smuzhiyun #include <linux/rculist.h>
105*4882a593Smuzhiyun #include <linux/rcupdate.h>
106*4882a593Smuzhiyun #include <linux/err.h>
107*4882a593Smuzhiyun #include <linux/textsearch.h>
108*4882a593Smuzhiyun #include <linux/slab.h>
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun static LIST_HEAD(ts_ops);
111*4882a593Smuzhiyun static DEFINE_SPINLOCK(ts_mod_lock);
112*4882a593Smuzhiyun
lookup_ts_algo(const char * name)113*4882a593Smuzhiyun static inline struct ts_ops *lookup_ts_algo(const char *name)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun struct ts_ops *o;
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun rcu_read_lock();
118*4882a593Smuzhiyun list_for_each_entry_rcu(o, &ts_ops, list) {
119*4882a593Smuzhiyun if (!strcmp(name, o->name)) {
120*4882a593Smuzhiyun if (!try_module_get(o->owner))
121*4882a593Smuzhiyun o = NULL;
122*4882a593Smuzhiyun rcu_read_unlock();
123*4882a593Smuzhiyun return o;
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun }
126*4882a593Smuzhiyun rcu_read_unlock();
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun return NULL;
129*4882a593Smuzhiyun }
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun /**
132*4882a593Smuzhiyun * textsearch_register - register a textsearch module
133*4882a593Smuzhiyun * @ops: operations lookup table
134*4882a593Smuzhiyun *
135*4882a593Smuzhiyun * This function must be called by textsearch modules to announce
136*4882a593Smuzhiyun * their presence. The specified &@ops must have %name set to a
137*4882a593Smuzhiyun * unique identifier and the callbacks find(), init(), get_pattern(),
138*4882a593Smuzhiyun * and get_pattern_len() must be implemented.
139*4882a593Smuzhiyun *
140*4882a593Smuzhiyun * Returns 0 or -EEXISTS if another module has already registered
141*4882a593Smuzhiyun * with same name.
142*4882a593Smuzhiyun */
textsearch_register(struct ts_ops * ops)143*4882a593Smuzhiyun int textsearch_register(struct ts_ops *ops)
144*4882a593Smuzhiyun {
145*4882a593Smuzhiyun int err = -EEXIST;
146*4882a593Smuzhiyun struct ts_ops *o;
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun if (ops->name == NULL || ops->find == NULL || ops->init == NULL ||
149*4882a593Smuzhiyun ops->get_pattern == NULL || ops->get_pattern_len == NULL)
150*4882a593Smuzhiyun return -EINVAL;
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun spin_lock(&ts_mod_lock);
153*4882a593Smuzhiyun list_for_each_entry(o, &ts_ops, list) {
154*4882a593Smuzhiyun if (!strcmp(ops->name, o->name))
155*4882a593Smuzhiyun goto errout;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun list_add_tail_rcu(&ops->list, &ts_ops);
159*4882a593Smuzhiyun err = 0;
160*4882a593Smuzhiyun errout:
161*4882a593Smuzhiyun spin_unlock(&ts_mod_lock);
162*4882a593Smuzhiyun return err;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun EXPORT_SYMBOL(textsearch_register);
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun /**
167*4882a593Smuzhiyun * textsearch_unregister - unregister a textsearch module
168*4882a593Smuzhiyun * @ops: operations lookup table
169*4882a593Smuzhiyun *
170*4882a593Smuzhiyun * This function must be called by textsearch modules to announce
171*4882a593Smuzhiyun * their disappearance for examples when the module gets unloaded.
172*4882a593Smuzhiyun * The &ops parameter must be the same as the one during the
173*4882a593Smuzhiyun * registration.
174*4882a593Smuzhiyun *
175*4882a593Smuzhiyun * Returns 0 on success or -ENOENT if no matching textsearch
176*4882a593Smuzhiyun * registration was found.
177*4882a593Smuzhiyun */
textsearch_unregister(struct ts_ops * ops)178*4882a593Smuzhiyun int textsearch_unregister(struct ts_ops *ops)
179*4882a593Smuzhiyun {
180*4882a593Smuzhiyun int err = 0;
181*4882a593Smuzhiyun struct ts_ops *o;
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun spin_lock(&ts_mod_lock);
184*4882a593Smuzhiyun list_for_each_entry(o, &ts_ops, list) {
185*4882a593Smuzhiyun if (o == ops) {
186*4882a593Smuzhiyun list_del_rcu(&o->list);
187*4882a593Smuzhiyun goto out;
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun }
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun err = -ENOENT;
192*4882a593Smuzhiyun out:
193*4882a593Smuzhiyun spin_unlock(&ts_mod_lock);
194*4882a593Smuzhiyun return err;
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun EXPORT_SYMBOL(textsearch_unregister);
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun struct ts_linear_state
199*4882a593Smuzhiyun {
200*4882a593Smuzhiyun unsigned int len;
201*4882a593Smuzhiyun const void *data;
202*4882a593Smuzhiyun };
203*4882a593Smuzhiyun
get_linear_data(unsigned int consumed,const u8 ** dst,struct ts_config * conf,struct ts_state * state)204*4882a593Smuzhiyun static unsigned int get_linear_data(unsigned int consumed, const u8 **dst,
205*4882a593Smuzhiyun struct ts_config *conf,
206*4882a593Smuzhiyun struct ts_state *state)
207*4882a593Smuzhiyun {
208*4882a593Smuzhiyun struct ts_linear_state *st = (struct ts_linear_state *) state->cb;
209*4882a593Smuzhiyun
210*4882a593Smuzhiyun if (likely(consumed < st->len)) {
211*4882a593Smuzhiyun *dst = st->data + consumed;
212*4882a593Smuzhiyun return st->len - consumed;
213*4882a593Smuzhiyun }
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun return 0;
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun /**
219*4882a593Smuzhiyun * textsearch_find_continuous - search a pattern in continuous/linear data
220*4882a593Smuzhiyun * @conf: search configuration
221*4882a593Smuzhiyun * @state: search state
222*4882a593Smuzhiyun * @data: data to search in
223*4882a593Smuzhiyun * @len: length of data
224*4882a593Smuzhiyun *
225*4882a593Smuzhiyun * A simplified version of textsearch_find() for continuous/linear data.
226*4882a593Smuzhiyun * Call textsearch_next() to retrieve subsequent matches.
227*4882a593Smuzhiyun *
228*4882a593Smuzhiyun * Returns the position of first occurrence of the pattern or
229*4882a593Smuzhiyun * %UINT_MAX if no occurrence was found.
230*4882a593Smuzhiyun */
textsearch_find_continuous(struct ts_config * conf,struct ts_state * state,const void * data,unsigned int len)231*4882a593Smuzhiyun unsigned int textsearch_find_continuous(struct ts_config *conf,
232*4882a593Smuzhiyun struct ts_state *state,
233*4882a593Smuzhiyun const void *data, unsigned int len)
234*4882a593Smuzhiyun {
235*4882a593Smuzhiyun struct ts_linear_state *st = (struct ts_linear_state *) state->cb;
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun conf->get_next_block = get_linear_data;
238*4882a593Smuzhiyun st->data = data;
239*4882a593Smuzhiyun st->len = len;
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun return textsearch_find(conf, state);
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun EXPORT_SYMBOL(textsearch_find_continuous);
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun /**
246*4882a593Smuzhiyun * textsearch_prepare - Prepare a search
247*4882a593Smuzhiyun * @algo: name of search algorithm
248*4882a593Smuzhiyun * @pattern: pattern data
249*4882a593Smuzhiyun * @len: length of pattern
250*4882a593Smuzhiyun * @gfp_mask: allocation mask
251*4882a593Smuzhiyun * @flags: search flags
252*4882a593Smuzhiyun *
253*4882a593Smuzhiyun * Looks up the search algorithm module and creates a new textsearch
254*4882a593Smuzhiyun * configuration for the specified pattern.
255*4882a593Smuzhiyun *
256*4882a593Smuzhiyun * Note: The format of the pattern may not be compatible between
257*4882a593Smuzhiyun * the various search algorithms.
258*4882a593Smuzhiyun *
259*4882a593Smuzhiyun * Returns a new textsearch configuration according to the specified
260*4882a593Smuzhiyun * parameters or a ERR_PTR(). If a zero length pattern is passed, this
261*4882a593Smuzhiyun * function returns EINVAL.
262*4882a593Smuzhiyun */
textsearch_prepare(const char * algo,const void * pattern,unsigned int len,gfp_t gfp_mask,int flags)263*4882a593Smuzhiyun struct ts_config *textsearch_prepare(const char *algo, const void *pattern,
264*4882a593Smuzhiyun unsigned int len, gfp_t gfp_mask, int flags)
265*4882a593Smuzhiyun {
266*4882a593Smuzhiyun int err = -ENOENT;
267*4882a593Smuzhiyun struct ts_config *conf;
268*4882a593Smuzhiyun struct ts_ops *ops;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun if (len == 0)
271*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun ops = lookup_ts_algo(algo);
274*4882a593Smuzhiyun #ifdef CONFIG_MODULES
275*4882a593Smuzhiyun /*
276*4882a593Smuzhiyun * Why not always autoload you may ask. Some users are
277*4882a593Smuzhiyun * in a situation where requesting a module may deadlock,
278*4882a593Smuzhiyun * especially when the module is located on a NFS mount.
279*4882a593Smuzhiyun */
280*4882a593Smuzhiyun if (ops == NULL && flags & TS_AUTOLOAD) {
281*4882a593Smuzhiyun request_module("ts_%s", algo);
282*4882a593Smuzhiyun ops = lookup_ts_algo(algo);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun #endif
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun if (ops == NULL)
287*4882a593Smuzhiyun goto errout;
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun conf = ops->init(pattern, len, gfp_mask, flags);
290*4882a593Smuzhiyun if (IS_ERR(conf)) {
291*4882a593Smuzhiyun err = PTR_ERR(conf);
292*4882a593Smuzhiyun goto errout;
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun conf->ops = ops;
296*4882a593Smuzhiyun return conf;
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun errout:
299*4882a593Smuzhiyun if (ops)
300*4882a593Smuzhiyun module_put(ops->owner);
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun return ERR_PTR(err);
303*4882a593Smuzhiyun }
304*4882a593Smuzhiyun EXPORT_SYMBOL(textsearch_prepare);
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun /**
307*4882a593Smuzhiyun * textsearch_destroy - destroy a search configuration
308*4882a593Smuzhiyun * @conf: search configuration
309*4882a593Smuzhiyun *
310*4882a593Smuzhiyun * Releases all references of the configuration and frees
311*4882a593Smuzhiyun * up the memory.
312*4882a593Smuzhiyun */
textsearch_destroy(struct ts_config * conf)313*4882a593Smuzhiyun void textsearch_destroy(struct ts_config *conf)
314*4882a593Smuzhiyun {
315*4882a593Smuzhiyun if (conf->ops) {
316*4882a593Smuzhiyun if (conf->ops->destroy)
317*4882a593Smuzhiyun conf->ops->destroy(conf);
318*4882a593Smuzhiyun module_put(conf->ops->owner);
319*4882a593Smuzhiyun }
320*4882a593Smuzhiyun
321*4882a593Smuzhiyun kfree(conf);
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun EXPORT_SYMBOL(textsearch_destroy);
324