1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef _FS_CEPH_LIBCEPH_H
3*4882a593Smuzhiyun #define _FS_CEPH_LIBCEPH_H
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun #include <linux/ceph/ceph_debug.h>
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #include <asm/unaligned.h>
8*4882a593Smuzhiyun #include <linux/backing-dev.h>
9*4882a593Smuzhiyun #include <linux/completion.h>
10*4882a593Smuzhiyun #include <linux/exportfs.h>
11*4882a593Smuzhiyun #include <linux/bug.h>
12*4882a593Smuzhiyun #include <linux/fs.h>
13*4882a593Smuzhiyun #include <linux/mempool.h>
14*4882a593Smuzhiyun #include <linux/pagemap.h>
15*4882a593Smuzhiyun #include <linux/wait.h>
16*4882a593Smuzhiyun #include <linux/writeback.h>
17*4882a593Smuzhiyun #include <linux/slab.h>
18*4882a593Smuzhiyun #include <linux/refcount.h>
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun #include <linux/ceph/types.h>
21*4882a593Smuzhiyun #include <linux/ceph/messenger.h>
22*4882a593Smuzhiyun #include <linux/ceph/msgpool.h>
23*4882a593Smuzhiyun #include <linux/ceph/mon_client.h>
24*4882a593Smuzhiyun #include <linux/ceph/osd_client.h>
25*4882a593Smuzhiyun #include <linux/ceph/ceph_fs.h>
26*4882a593Smuzhiyun #include <linux/ceph/string_table.h>
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun /*
29*4882a593Smuzhiyun * mount options
30*4882a593Smuzhiyun */
31*4882a593Smuzhiyun #define CEPH_OPT_FSID (1<<0)
32*4882a593Smuzhiyun #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
33*4882a593Smuzhiyun #define CEPH_OPT_MYIP (1<<2) /* specified my ip */
34*4882a593Smuzhiyun #define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
35*4882a593Smuzhiyun #define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */
36*4882a593Smuzhiyun #define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */
37*4882a593Smuzhiyun #define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */
38*4882a593Smuzhiyun #define CEPH_OPT_ABORT_ON_FULL (1<<7) /* abort w/ ENOSPC when full */
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun #define ceph_set_opt(client, opt) \
43*4882a593Smuzhiyun (client)->options->flags |= CEPH_OPT_##opt;
44*4882a593Smuzhiyun #define ceph_test_opt(client, opt) \
45*4882a593Smuzhiyun (!!((client)->options->flags & CEPH_OPT_##opt))
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun struct ceph_options {
48*4882a593Smuzhiyun int flags;
49*4882a593Smuzhiyun struct ceph_fsid fsid;
50*4882a593Smuzhiyun struct ceph_entity_addr my_addr;
51*4882a593Smuzhiyun unsigned long mount_timeout; /* jiffies */
52*4882a593Smuzhiyun unsigned long osd_idle_ttl; /* jiffies */
53*4882a593Smuzhiyun unsigned long osd_keepalive_timeout; /* jiffies */
54*4882a593Smuzhiyun unsigned long osd_request_timeout; /* jiffies */
55*4882a593Smuzhiyun u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun /*
58*4882a593Smuzhiyun * any type that can't be simply compared or doesn't need
59*4882a593Smuzhiyun * to be compared should go beyond this point,
60*4882a593Smuzhiyun * ceph_compare_options() should be updated accordingly
61*4882a593Smuzhiyun */
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun struct ceph_entity_addr *mon_addr; /* should be the first
64*4882a593Smuzhiyun pointer type of args */
65*4882a593Smuzhiyun int num_mon;
66*4882a593Smuzhiyun char *name;
67*4882a593Smuzhiyun struct ceph_crypto_key *key;
68*4882a593Smuzhiyun struct rb_root crush_locs;
69*4882a593Smuzhiyun };
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun /*
72*4882a593Smuzhiyun * defaults
73*4882a593Smuzhiyun */
74*4882a593Smuzhiyun #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
75*4882a593Smuzhiyun #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
76*4882a593Smuzhiyun #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
77*4882a593Smuzhiyun #define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */
78*4882a593Smuzhiyun #define CEPH_READ_FROM_REPLICA_DEFAULT 0 /* read from primary */
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun #define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000)
81*4882a593Smuzhiyun #define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000)
82*4882a593Smuzhiyun #define CEPH_MONC_PING_TIMEOUT msecs_to_jiffies(30 * 1000)
83*4882a593Smuzhiyun #define CEPH_MONC_HUNT_BACKOFF 2
84*4882a593Smuzhiyun #define CEPH_MONC_HUNT_MAX_MULT 10
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
87*4882a593Smuzhiyun #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun /*
90*4882a593Smuzhiyun * The largest possible rbd data object is 32M.
91*4882a593Smuzhiyun * The largest possible rbd object map object is 64M.
92*4882a593Smuzhiyun *
93*4882a593Smuzhiyun * There is no limit on the size of cephfs objects, but it has to obey
94*4882a593Smuzhiyun * rsize and wsize mount options anyway.
95*4882a593Smuzhiyun */
96*4882a593Smuzhiyun #define CEPH_MSG_MAX_DATA_LEN (64*1024*1024)
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun #define CEPH_AUTH_NAME_DEFAULT "guest"
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun /* mount state */
101*4882a593Smuzhiyun enum {
102*4882a593Smuzhiyun CEPH_MOUNT_MOUNTING,
103*4882a593Smuzhiyun CEPH_MOUNT_MOUNTED,
104*4882a593Smuzhiyun CEPH_MOUNT_UNMOUNTING,
105*4882a593Smuzhiyun CEPH_MOUNT_UNMOUNTED,
106*4882a593Smuzhiyun CEPH_MOUNT_SHUTDOWN,
107*4882a593Smuzhiyun };
108*4882a593Smuzhiyun
ceph_timeout_jiffies(unsigned long timeout)109*4882a593Smuzhiyun static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
110*4882a593Smuzhiyun {
111*4882a593Smuzhiyun return timeout ?: MAX_SCHEDULE_TIMEOUT;
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun struct ceph_mds_client;
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun /*
117*4882a593Smuzhiyun * per client state
118*4882a593Smuzhiyun *
119*4882a593Smuzhiyun * possibly shared by multiple mount points, if they are
120*4882a593Smuzhiyun * mounting the same ceph filesystem/cluster.
121*4882a593Smuzhiyun */
122*4882a593Smuzhiyun struct ceph_client {
123*4882a593Smuzhiyun struct ceph_fsid fsid;
124*4882a593Smuzhiyun bool have_fsid;
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun void *private;
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun struct ceph_options *options;
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun struct mutex mount_mutex; /* serialize mount attempts */
131*4882a593Smuzhiyun wait_queue_head_t auth_wq;
132*4882a593Smuzhiyun int auth_err;
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun u64 supported_features;
137*4882a593Smuzhiyun u64 required_features;
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun struct ceph_messenger msgr; /* messenger instance */
140*4882a593Smuzhiyun struct ceph_mon_client monc;
141*4882a593Smuzhiyun struct ceph_osd_client osdc;
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun #ifdef CONFIG_DEBUG_FS
144*4882a593Smuzhiyun struct dentry *debugfs_dir;
145*4882a593Smuzhiyun struct dentry *debugfs_monmap;
146*4882a593Smuzhiyun struct dentry *debugfs_osdmap;
147*4882a593Smuzhiyun struct dentry *debugfs_options;
148*4882a593Smuzhiyun #endif
149*4882a593Smuzhiyun };
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun #define from_msgr(ms) container_of(ms, struct ceph_client, msgr)
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun /*
155*4882a593Smuzhiyun * snapshots
156*4882a593Smuzhiyun */
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun /*
159*4882a593Smuzhiyun * A "snap context" is the set of existing snapshots when we
160*4882a593Smuzhiyun * write data. It is used by the OSD to guide its COW behavior.
161*4882a593Smuzhiyun *
162*4882a593Smuzhiyun * The ceph_snap_context is refcounted, and attached to each dirty
163*4882a593Smuzhiyun * page, indicating which context the dirty data belonged when it was
164*4882a593Smuzhiyun * dirtied.
165*4882a593Smuzhiyun */
166*4882a593Smuzhiyun struct ceph_snap_context {
167*4882a593Smuzhiyun refcount_t nref;
168*4882a593Smuzhiyun u64 seq;
169*4882a593Smuzhiyun u32 num_snaps;
170*4882a593Smuzhiyun u64 snaps[];
171*4882a593Smuzhiyun };
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun extern struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
174*4882a593Smuzhiyun gfp_t gfp_flags);
175*4882a593Smuzhiyun extern struct ceph_snap_context *ceph_get_snap_context(
176*4882a593Smuzhiyun struct ceph_snap_context *sc);
177*4882a593Smuzhiyun extern void ceph_put_snap_context(struct ceph_snap_context *sc);
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun /*
180*4882a593Smuzhiyun * calculate the number of pages a given length and offset map onto,
181*4882a593Smuzhiyun * if we align the data.
182*4882a593Smuzhiyun */
calc_pages_for(u64 off,u64 len)183*4882a593Smuzhiyun static inline int calc_pages_for(u64 off, u64 len)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun return ((off+len+PAGE_SIZE-1) >> PAGE_SHIFT) -
186*4882a593Smuzhiyun (off >> PAGE_SHIFT);
187*4882a593Smuzhiyun }
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun #define RB_BYVAL(a) (a)
190*4882a593Smuzhiyun #define RB_BYPTR(a) (&(a))
191*4882a593Smuzhiyun #define RB_CMP3WAY(a, b) ((a) < (b) ? -1 : (a) > (b))
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun #define DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \
194*4882a593Smuzhiyun static bool __insert_##name(struct rb_root *root, type *t) \
195*4882a593Smuzhiyun { \
196*4882a593Smuzhiyun struct rb_node **n = &root->rb_node; \
197*4882a593Smuzhiyun struct rb_node *parent = NULL; \
198*4882a593Smuzhiyun \
199*4882a593Smuzhiyun BUG_ON(!RB_EMPTY_NODE(&t->nodefld)); \
200*4882a593Smuzhiyun \
201*4882a593Smuzhiyun while (*n) { \
202*4882a593Smuzhiyun type *cur = rb_entry(*n, type, nodefld); \
203*4882a593Smuzhiyun int cmp; \
204*4882a593Smuzhiyun \
205*4882a593Smuzhiyun parent = *n; \
206*4882a593Smuzhiyun cmp = cmpexp(keyexp(t->keyfld), keyexp(cur->keyfld)); \
207*4882a593Smuzhiyun if (cmp < 0) \
208*4882a593Smuzhiyun n = &(*n)->rb_left; \
209*4882a593Smuzhiyun else if (cmp > 0) \
210*4882a593Smuzhiyun n = &(*n)->rb_right; \
211*4882a593Smuzhiyun else \
212*4882a593Smuzhiyun return false; \
213*4882a593Smuzhiyun } \
214*4882a593Smuzhiyun \
215*4882a593Smuzhiyun rb_link_node(&t->nodefld, parent, n); \
216*4882a593Smuzhiyun rb_insert_color(&t->nodefld, root); \
217*4882a593Smuzhiyun return true; \
218*4882a593Smuzhiyun } \
219*4882a593Smuzhiyun static void __maybe_unused insert_##name(struct rb_root *root, type *t) \
220*4882a593Smuzhiyun { \
221*4882a593Smuzhiyun if (!__insert_##name(root, t)) \
222*4882a593Smuzhiyun BUG(); \
223*4882a593Smuzhiyun } \
224*4882a593Smuzhiyun static void erase_##name(struct rb_root *root, type *t) \
225*4882a593Smuzhiyun { \
226*4882a593Smuzhiyun BUG_ON(RB_EMPTY_NODE(&t->nodefld)); \
227*4882a593Smuzhiyun rb_erase(&t->nodefld, root); \
228*4882a593Smuzhiyun RB_CLEAR_NODE(&t->nodefld); \
229*4882a593Smuzhiyun }
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun /*
232*4882a593Smuzhiyun * @lookup_param_type is a parameter and not constructed from (@type,
233*4882a593Smuzhiyun * @keyfld) with typeof() because adding const is too unwieldy.
234*4882a593Smuzhiyun */
235*4882a593Smuzhiyun #define DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp, \
236*4882a593Smuzhiyun lookup_param_type, nodefld) \
237*4882a593Smuzhiyun static type *lookup_##name(struct rb_root *root, lookup_param_type key) \
238*4882a593Smuzhiyun { \
239*4882a593Smuzhiyun struct rb_node *n = root->rb_node; \
240*4882a593Smuzhiyun \
241*4882a593Smuzhiyun while (n) { \
242*4882a593Smuzhiyun type *cur = rb_entry(n, type, nodefld); \
243*4882a593Smuzhiyun int cmp; \
244*4882a593Smuzhiyun \
245*4882a593Smuzhiyun cmp = cmpexp(key, keyexp(cur->keyfld)); \
246*4882a593Smuzhiyun if (cmp < 0) \
247*4882a593Smuzhiyun n = n->rb_left; \
248*4882a593Smuzhiyun else if (cmp > 0) \
249*4882a593Smuzhiyun n = n->rb_right; \
250*4882a593Smuzhiyun else \
251*4882a593Smuzhiyun return cur; \
252*4882a593Smuzhiyun } \
253*4882a593Smuzhiyun \
254*4882a593Smuzhiyun return NULL; \
255*4882a593Smuzhiyun }
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun #define DEFINE_RB_FUNCS2(name, type, keyfld, cmpexp, keyexp, \
258*4882a593Smuzhiyun lookup_param_type, nodefld) \
259*4882a593Smuzhiyun DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \
260*4882a593Smuzhiyun DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp, \
261*4882a593Smuzhiyun lookup_param_type, nodefld)
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun /*
264*4882a593Smuzhiyun * Shorthands for integer keys.
265*4882a593Smuzhiyun */
266*4882a593Smuzhiyun #define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \
267*4882a593Smuzhiyun DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, nodefld)
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun #define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \
270*4882a593Smuzhiyun extern type __lookup_##name##_key; \
271*4882a593Smuzhiyun DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, \
272*4882a593Smuzhiyun typeof(__lookup_##name##_key.keyfld), nodefld)
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun #define DEFINE_RB_FUNCS(name, type, keyfld, nodefld) \
275*4882a593Smuzhiyun DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \
276*4882a593Smuzhiyun DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun extern struct kmem_cache *ceph_inode_cachep;
279*4882a593Smuzhiyun extern struct kmem_cache *ceph_cap_cachep;
280*4882a593Smuzhiyun extern struct kmem_cache *ceph_cap_flush_cachep;
281*4882a593Smuzhiyun extern struct kmem_cache *ceph_dentry_cachep;
282*4882a593Smuzhiyun extern struct kmem_cache *ceph_file_cachep;
283*4882a593Smuzhiyun extern struct kmem_cache *ceph_dir_file_cachep;
284*4882a593Smuzhiyun extern struct kmem_cache *ceph_mds_request_cachep;
285*4882a593Smuzhiyun extern mempool_t *ceph_wb_pagevec_pool;
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun /* ceph_common.c */
288*4882a593Smuzhiyun extern bool libceph_compatible(void *data);
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun extern const char *ceph_msg_type_name(int type);
291*4882a593Smuzhiyun extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
292*4882a593Smuzhiyun extern void *ceph_kvmalloc(size_t size, gfp_t flags);
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun struct fs_parameter;
295*4882a593Smuzhiyun struct fc_log;
296*4882a593Smuzhiyun struct ceph_options *ceph_alloc_options(void);
297*4882a593Smuzhiyun int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
298*4882a593Smuzhiyun struct fc_log *l);
299*4882a593Smuzhiyun int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
300*4882a593Smuzhiyun struct fc_log *l);
301*4882a593Smuzhiyun int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
302*4882a593Smuzhiyun bool show_all);
303*4882a593Smuzhiyun extern void ceph_destroy_options(struct ceph_options *opt);
304*4882a593Smuzhiyun extern int ceph_compare_options(struct ceph_options *new_opt,
305*4882a593Smuzhiyun struct ceph_client *client);
306*4882a593Smuzhiyun struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private);
307*4882a593Smuzhiyun struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
308*4882a593Smuzhiyun u64 ceph_client_gid(struct ceph_client *client);
309*4882a593Smuzhiyun extern void ceph_destroy_client(struct ceph_client *client);
310*4882a593Smuzhiyun extern void ceph_reset_client_addr(struct ceph_client *client);
311*4882a593Smuzhiyun extern int __ceph_open_session(struct ceph_client *client,
312*4882a593Smuzhiyun unsigned long started);
313*4882a593Smuzhiyun extern int ceph_open_session(struct ceph_client *client);
314*4882a593Smuzhiyun int ceph_wait_for_latest_osdmap(struct ceph_client *client,
315*4882a593Smuzhiyun unsigned long timeout);
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun /* pagevec.c */
318*4882a593Smuzhiyun extern void ceph_release_page_vector(struct page **pages, int num_pages);
319*4882a593Smuzhiyun extern void ceph_put_page_vector(struct page **pages, int num_pages,
320*4882a593Smuzhiyun bool dirty);
321*4882a593Smuzhiyun extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
322*4882a593Smuzhiyun extern int ceph_copy_user_to_page_vector(struct page **pages,
323*4882a593Smuzhiyun const void __user *data,
324*4882a593Smuzhiyun loff_t off, size_t len);
325*4882a593Smuzhiyun extern void ceph_copy_to_page_vector(struct page **pages,
326*4882a593Smuzhiyun const void *data,
327*4882a593Smuzhiyun loff_t off, size_t len);
328*4882a593Smuzhiyun extern void ceph_copy_from_page_vector(struct page **pages,
329*4882a593Smuzhiyun void *data,
330*4882a593Smuzhiyun loff_t off, size_t len);
331*4882a593Smuzhiyun extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
332*4882a593Smuzhiyun
333*4882a593Smuzhiyun
334*4882a593Smuzhiyun #endif /* _FS_CEPH_SUPER_H */
335