xref: /OK3568_Linux_fs/kernel/drivers/dax/super.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright(c) 2017 Intel Corporation. All rights reserved.
4*4882a593Smuzhiyun  */
5*4882a593Smuzhiyun #include <linux/pagemap.h>
6*4882a593Smuzhiyun #include <linux/module.h>
7*4882a593Smuzhiyun #include <linux/mount.h>
8*4882a593Smuzhiyun #include <linux/pseudo_fs.h>
9*4882a593Smuzhiyun #include <linux/magic.h>
10*4882a593Smuzhiyun #include <linux/genhd.h>
11*4882a593Smuzhiyun #include <linux/pfn_t.h>
12*4882a593Smuzhiyun #include <linux/cdev.h>
13*4882a593Smuzhiyun #include <linux/hash.h>
14*4882a593Smuzhiyun #include <linux/slab.h>
15*4882a593Smuzhiyun #include <linux/uio.h>
16*4882a593Smuzhiyun #include <linux/dax.h>
17*4882a593Smuzhiyun #include <linux/fs.h>
18*4882a593Smuzhiyun #include "dax-private.h"
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun static dev_t dax_devt;
21*4882a593Smuzhiyun DEFINE_STATIC_SRCU(dax_srcu);
22*4882a593Smuzhiyun static struct vfsmount *dax_mnt;
23*4882a593Smuzhiyun static DEFINE_IDA(dax_minor_ida);
24*4882a593Smuzhiyun static struct kmem_cache *dax_cache __read_mostly;
25*4882a593Smuzhiyun static struct super_block *dax_superblock __read_mostly;
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun #define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head))
28*4882a593Smuzhiyun static struct hlist_head dax_host_list[DAX_HASH_SIZE];
29*4882a593Smuzhiyun static DEFINE_SPINLOCK(dax_host_lock);
30*4882a593Smuzhiyun 
dax_read_lock(void)31*4882a593Smuzhiyun int dax_read_lock(void)
32*4882a593Smuzhiyun {
33*4882a593Smuzhiyun 	return srcu_read_lock(&dax_srcu);
34*4882a593Smuzhiyun }
35*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_read_lock);
36*4882a593Smuzhiyun 
dax_read_unlock(int id)37*4882a593Smuzhiyun void dax_read_unlock(int id)
38*4882a593Smuzhiyun {
39*4882a593Smuzhiyun 	srcu_read_unlock(&dax_srcu, id);
40*4882a593Smuzhiyun }
41*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_read_unlock);
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun #ifdef CONFIG_BLOCK
44*4882a593Smuzhiyun #include <linux/blkdev.h>
45*4882a593Smuzhiyun 
bdev_dax_pgoff(struct block_device * bdev,sector_t sector,size_t size,pgoff_t * pgoff)46*4882a593Smuzhiyun int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
47*4882a593Smuzhiyun 		pgoff_t *pgoff)
48*4882a593Smuzhiyun {
49*4882a593Smuzhiyun 	sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
50*4882a593Smuzhiyun 	phys_addr_t phys_off = (start_sect + sector) * 512;
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 	if (pgoff)
53*4882a593Smuzhiyun 		*pgoff = PHYS_PFN(phys_off);
54*4882a593Smuzhiyun 	if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
55*4882a593Smuzhiyun 		return -EINVAL;
56*4882a593Smuzhiyun 	return 0;
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun EXPORT_SYMBOL(bdev_dax_pgoff);
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_FS_DAX)
fs_dax_get_by_bdev(struct block_device * bdev)61*4882a593Smuzhiyun struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
62*4882a593Smuzhiyun {
63*4882a593Smuzhiyun 	if (!blk_queue_dax(bdev->bd_disk->queue))
64*4882a593Smuzhiyun 		return NULL;
65*4882a593Smuzhiyun 	return dax_get_by_host(bdev->bd_disk->disk_name);
66*4882a593Smuzhiyun }
67*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
68*4882a593Smuzhiyun #endif
69*4882a593Smuzhiyun 
__generic_fsdax_supported(struct dax_device * dax_dev,struct block_device * bdev,int blocksize,sector_t start,sector_t sectors)70*4882a593Smuzhiyun bool __generic_fsdax_supported(struct dax_device *dax_dev,
71*4882a593Smuzhiyun 		struct block_device *bdev, int blocksize, sector_t start,
72*4882a593Smuzhiyun 		sector_t sectors)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun 	bool dax_enabled = false;
75*4882a593Smuzhiyun 	pgoff_t pgoff, pgoff_end;
76*4882a593Smuzhiyun 	char buf[BDEVNAME_SIZE];
77*4882a593Smuzhiyun 	void *kaddr, *end_kaddr;
78*4882a593Smuzhiyun 	pfn_t pfn, end_pfn;
79*4882a593Smuzhiyun 	sector_t last_page;
80*4882a593Smuzhiyun 	long len, len2;
81*4882a593Smuzhiyun 	int err, id;
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun 	if (blocksize != PAGE_SIZE) {
84*4882a593Smuzhiyun 		pr_info("%s: error: unsupported blocksize for dax\n",
85*4882a593Smuzhiyun 				bdevname(bdev, buf));
86*4882a593Smuzhiyun 		return false;
87*4882a593Smuzhiyun 	}
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 	if (!dax_dev) {
90*4882a593Smuzhiyun 		pr_debug("%s: error: dax unsupported by block device\n",
91*4882a593Smuzhiyun 				bdevname(bdev, buf));
92*4882a593Smuzhiyun 		return false;
93*4882a593Smuzhiyun 	}
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
96*4882a593Smuzhiyun 	if (err) {
97*4882a593Smuzhiyun 		pr_info("%s: error: unaligned partition for dax\n",
98*4882a593Smuzhiyun 				bdevname(bdev, buf));
99*4882a593Smuzhiyun 		return false;
100*4882a593Smuzhiyun 	}
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 	last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
103*4882a593Smuzhiyun 	err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
104*4882a593Smuzhiyun 	if (err) {
105*4882a593Smuzhiyun 		pr_info("%s: error: unaligned partition for dax\n",
106*4882a593Smuzhiyun 				bdevname(bdev, buf));
107*4882a593Smuzhiyun 		return false;
108*4882a593Smuzhiyun 	}
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	id = dax_read_lock();
111*4882a593Smuzhiyun 	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
112*4882a593Smuzhiyun 	len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
113*4882a593Smuzhiyun 
114*4882a593Smuzhiyun 	if (len < 1 || len2 < 1) {
115*4882a593Smuzhiyun 		pr_info("%s: error: dax access failed (%ld)\n",
116*4882a593Smuzhiyun 				bdevname(bdev, buf), len < 1 ? len : len2);
117*4882a593Smuzhiyun 		dax_read_unlock(id);
118*4882a593Smuzhiyun 		return false;
119*4882a593Smuzhiyun 	}
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun 	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
122*4882a593Smuzhiyun 		/*
123*4882a593Smuzhiyun 		 * An arch that has enabled the pmem api should also
124*4882a593Smuzhiyun 		 * have its drivers support pfn_t_devmap()
125*4882a593Smuzhiyun 		 *
126*4882a593Smuzhiyun 		 * This is a developer warning and should not trigger in
127*4882a593Smuzhiyun 		 * production. dax_flush() will crash since it depends
128*4882a593Smuzhiyun 		 * on being able to do (page_address(pfn_to_page())).
129*4882a593Smuzhiyun 		 */
130*4882a593Smuzhiyun 		WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
131*4882a593Smuzhiyun 		dax_enabled = true;
132*4882a593Smuzhiyun 	} else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) {
133*4882a593Smuzhiyun 		struct dev_pagemap *pgmap, *end_pgmap;
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 		pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
136*4882a593Smuzhiyun 		end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL);
137*4882a593Smuzhiyun 		if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX
138*4882a593Smuzhiyun 				&& pfn_t_to_page(pfn)->pgmap == pgmap
139*4882a593Smuzhiyun 				&& pfn_t_to_page(end_pfn)->pgmap == pgmap
140*4882a593Smuzhiyun 				&& pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr))
141*4882a593Smuzhiyun 				&& pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr)))
142*4882a593Smuzhiyun 			dax_enabled = true;
143*4882a593Smuzhiyun 		put_dev_pagemap(pgmap);
144*4882a593Smuzhiyun 		put_dev_pagemap(end_pgmap);
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 	}
147*4882a593Smuzhiyun 	dax_read_unlock(id);
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	if (!dax_enabled) {
150*4882a593Smuzhiyun 		pr_info("%s: error: dax support not enabled\n",
151*4882a593Smuzhiyun 				bdevname(bdev, buf));
152*4882a593Smuzhiyun 		return false;
153*4882a593Smuzhiyun 	}
154*4882a593Smuzhiyun 	return true;
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(__generic_fsdax_supported);
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun /**
159*4882a593Smuzhiyun  * __bdev_dax_supported() - Check if the device supports dax for filesystem
160*4882a593Smuzhiyun  * @bdev: block device to check
161*4882a593Smuzhiyun  * @blocksize: The block size of the device
162*4882a593Smuzhiyun  *
163*4882a593Smuzhiyun  * This is a library function for filesystems to check if the block device
164*4882a593Smuzhiyun  * can be mounted with dax option.
165*4882a593Smuzhiyun  *
166*4882a593Smuzhiyun  * Return: true if supported, false if unsupported
167*4882a593Smuzhiyun  */
__bdev_dax_supported(struct block_device * bdev,int blocksize)168*4882a593Smuzhiyun bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
169*4882a593Smuzhiyun {
170*4882a593Smuzhiyun 	struct dax_device *dax_dev;
171*4882a593Smuzhiyun 	struct request_queue *q;
172*4882a593Smuzhiyun 	char buf[BDEVNAME_SIZE];
173*4882a593Smuzhiyun 	bool ret;
174*4882a593Smuzhiyun 	int id;
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun 	q = bdev_get_queue(bdev);
177*4882a593Smuzhiyun 	if (!q || !blk_queue_dax(q)) {
178*4882a593Smuzhiyun 		pr_debug("%s: error: request queue doesn't support dax\n",
179*4882a593Smuzhiyun 				bdevname(bdev, buf));
180*4882a593Smuzhiyun 		return false;
181*4882a593Smuzhiyun 	}
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun 	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
184*4882a593Smuzhiyun 	if (!dax_dev) {
185*4882a593Smuzhiyun 		pr_debug("%s: error: device does not support dax\n",
186*4882a593Smuzhiyun 				bdevname(bdev, buf));
187*4882a593Smuzhiyun 		return false;
188*4882a593Smuzhiyun 	}
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	id = dax_read_lock();
191*4882a593Smuzhiyun 	ret = dax_supported(dax_dev, bdev, blocksize, 0,
192*4882a593Smuzhiyun 			i_size_read(bdev->bd_inode) / 512);
193*4882a593Smuzhiyun 	dax_read_unlock(id);
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	put_dax(dax_dev);
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	return ret;
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(__bdev_dax_supported);
200*4882a593Smuzhiyun #endif
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun enum dax_device_flags {
203*4882a593Smuzhiyun 	/* !alive + rcu grace period == no new operations / mappings */
204*4882a593Smuzhiyun 	DAXDEV_ALIVE,
205*4882a593Smuzhiyun 	/* gate whether dax_flush() calls the low level flush routine */
206*4882a593Smuzhiyun 	DAXDEV_WRITE_CACHE,
207*4882a593Smuzhiyun 	/* flag to check if device supports synchronous flush */
208*4882a593Smuzhiyun 	DAXDEV_SYNC,
209*4882a593Smuzhiyun };
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun /**
212*4882a593Smuzhiyun  * struct dax_device - anchor object for dax services
213*4882a593Smuzhiyun  * @inode: core vfs
214*4882a593Smuzhiyun  * @cdev: optional character interface for "device dax"
215*4882a593Smuzhiyun  * @host: optional name for lookups where the device path is not available
216*4882a593Smuzhiyun  * @private: dax driver private data
217*4882a593Smuzhiyun  * @flags: state and boolean properties
218*4882a593Smuzhiyun  */
219*4882a593Smuzhiyun struct dax_device {
220*4882a593Smuzhiyun 	struct hlist_node list;
221*4882a593Smuzhiyun 	struct inode inode;
222*4882a593Smuzhiyun 	struct cdev cdev;
223*4882a593Smuzhiyun 	const char *host;
224*4882a593Smuzhiyun 	void *private;
225*4882a593Smuzhiyun 	unsigned long flags;
226*4882a593Smuzhiyun 	const struct dax_operations *ops;
227*4882a593Smuzhiyun };
228*4882a593Smuzhiyun 
write_cache_show(struct device * dev,struct device_attribute * attr,char * buf)229*4882a593Smuzhiyun static ssize_t write_cache_show(struct device *dev,
230*4882a593Smuzhiyun 		struct device_attribute *attr, char *buf)
231*4882a593Smuzhiyun {
232*4882a593Smuzhiyun 	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
233*4882a593Smuzhiyun 	ssize_t rc;
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun 	WARN_ON_ONCE(!dax_dev);
236*4882a593Smuzhiyun 	if (!dax_dev)
237*4882a593Smuzhiyun 		return -ENXIO;
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	rc = sprintf(buf, "%d\n", !!dax_write_cache_enabled(dax_dev));
240*4882a593Smuzhiyun 	put_dax(dax_dev);
241*4882a593Smuzhiyun 	return rc;
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun 
write_cache_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)244*4882a593Smuzhiyun static ssize_t write_cache_store(struct device *dev,
245*4882a593Smuzhiyun 		struct device_attribute *attr, const char *buf, size_t len)
246*4882a593Smuzhiyun {
247*4882a593Smuzhiyun 	bool write_cache;
248*4882a593Smuzhiyun 	int rc = strtobool(buf, &write_cache);
249*4882a593Smuzhiyun 	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	WARN_ON_ONCE(!dax_dev);
252*4882a593Smuzhiyun 	if (!dax_dev)
253*4882a593Smuzhiyun 		return -ENXIO;
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 	if (rc)
256*4882a593Smuzhiyun 		len = rc;
257*4882a593Smuzhiyun 	else
258*4882a593Smuzhiyun 		dax_write_cache(dax_dev, write_cache);
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun 	put_dax(dax_dev);
261*4882a593Smuzhiyun 	return len;
262*4882a593Smuzhiyun }
263*4882a593Smuzhiyun static DEVICE_ATTR_RW(write_cache);
264*4882a593Smuzhiyun 
dax_visible(struct kobject * kobj,struct attribute * a,int n)265*4882a593Smuzhiyun static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
266*4882a593Smuzhiyun {
267*4882a593Smuzhiyun 	struct device *dev = container_of(kobj, typeof(*dev), kobj);
268*4882a593Smuzhiyun 	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 	WARN_ON_ONCE(!dax_dev);
271*4882a593Smuzhiyun 	if (!dax_dev)
272*4882a593Smuzhiyun 		return 0;
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun #ifndef CONFIG_ARCH_HAS_PMEM_API
275*4882a593Smuzhiyun 	if (a == &dev_attr_write_cache.attr)
276*4882a593Smuzhiyun 		return 0;
277*4882a593Smuzhiyun #endif
278*4882a593Smuzhiyun 	return a->mode;
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun static struct attribute *dax_attributes[] = {
282*4882a593Smuzhiyun 	&dev_attr_write_cache.attr,
283*4882a593Smuzhiyun 	NULL,
284*4882a593Smuzhiyun };
285*4882a593Smuzhiyun 
286*4882a593Smuzhiyun struct attribute_group dax_attribute_group = {
287*4882a593Smuzhiyun 	.name = "dax",
288*4882a593Smuzhiyun 	.attrs = dax_attributes,
289*4882a593Smuzhiyun 	.is_visible = dax_visible,
290*4882a593Smuzhiyun };
291*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_attribute_group);
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun /**
294*4882a593Smuzhiyun  * dax_direct_access() - translate a device pgoff to an absolute pfn
295*4882a593Smuzhiyun  * @dax_dev: a dax_device instance representing the logical memory range
296*4882a593Smuzhiyun  * @pgoff: offset in pages from the start of the device to translate
297*4882a593Smuzhiyun  * @nr_pages: number of consecutive pages caller can handle relative to @pfn
298*4882a593Smuzhiyun  * @kaddr: output parameter that returns a virtual address mapping of pfn
299*4882a593Smuzhiyun  * @pfn: output parameter that returns an absolute pfn translation of @pgoff
300*4882a593Smuzhiyun  *
301*4882a593Smuzhiyun  * Return: negative errno if an error occurs, otherwise the number of
302*4882a593Smuzhiyun  * pages accessible at the device relative @pgoff.
303*4882a593Smuzhiyun  */
dax_direct_access(struct dax_device * dax_dev,pgoff_t pgoff,long nr_pages,void ** kaddr,pfn_t * pfn)304*4882a593Smuzhiyun long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
305*4882a593Smuzhiyun 		void **kaddr, pfn_t *pfn)
306*4882a593Smuzhiyun {
307*4882a593Smuzhiyun 	long avail;
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	if (!dax_dev)
310*4882a593Smuzhiyun 		return -EOPNOTSUPP;
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 	if (!dax_alive(dax_dev))
313*4882a593Smuzhiyun 		return -ENXIO;
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	if (nr_pages < 0)
316*4882a593Smuzhiyun 		return nr_pages;
317*4882a593Smuzhiyun 
318*4882a593Smuzhiyun 	avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages,
319*4882a593Smuzhiyun 			kaddr, pfn);
320*4882a593Smuzhiyun 	if (!avail)
321*4882a593Smuzhiyun 		return -ERANGE;
322*4882a593Smuzhiyun 	return min(avail, nr_pages);
323*4882a593Smuzhiyun }
324*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_direct_access);
325*4882a593Smuzhiyun 
dax_supported(struct dax_device * dax_dev,struct block_device * bdev,int blocksize,sector_t start,sector_t len)326*4882a593Smuzhiyun bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
327*4882a593Smuzhiyun 		int blocksize, sector_t start, sector_t len)
328*4882a593Smuzhiyun {
329*4882a593Smuzhiyun 	if (!dax_dev)
330*4882a593Smuzhiyun 		return false;
331*4882a593Smuzhiyun 
332*4882a593Smuzhiyun 	if (!dax_alive(dax_dev))
333*4882a593Smuzhiyun 		return false;
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun 	return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_supported);
338*4882a593Smuzhiyun 
dax_copy_from_iter(struct dax_device * dax_dev,pgoff_t pgoff,void * addr,size_t bytes,struct iov_iter * i)339*4882a593Smuzhiyun size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
340*4882a593Smuzhiyun 		size_t bytes, struct iov_iter *i)
341*4882a593Smuzhiyun {
342*4882a593Smuzhiyun 	if (!dax_alive(dax_dev))
343*4882a593Smuzhiyun 		return 0;
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i);
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_copy_from_iter);
348*4882a593Smuzhiyun 
dax_copy_to_iter(struct dax_device * dax_dev,pgoff_t pgoff,void * addr,size_t bytes,struct iov_iter * i)349*4882a593Smuzhiyun size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
350*4882a593Smuzhiyun 		size_t bytes, struct iov_iter *i)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun 	if (!dax_alive(dax_dev))
353*4882a593Smuzhiyun 		return 0;
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i);
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_copy_to_iter);
358*4882a593Smuzhiyun 
dax_zero_page_range(struct dax_device * dax_dev,pgoff_t pgoff,size_t nr_pages)359*4882a593Smuzhiyun int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
360*4882a593Smuzhiyun 			size_t nr_pages)
361*4882a593Smuzhiyun {
362*4882a593Smuzhiyun 	if (!dax_alive(dax_dev))
363*4882a593Smuzhiyun 		return -ENXIO;
364*4882a593Smuzhiyun 	/*
365*4882a593Smuzhiyun 	 * There are no callers that want to zero more than one page as of now.
366*4882a593Smuzhiyun 	 * Once users are there, this check can be removed after the
367*4882a593Smuzhiyun 	 * device mapper code has been updated to split ranges across targets.
368*4882a593Smuzhiyun 	 */
369*4882a593Smuzhiyun 	if (nr_pages != 1)
370*4882a593Smuzhiyun 		return -EIO;
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
373*4882a593Smuzhiyun }
374*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_zero_page_range);
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun #ifdef CONFIG_ARCH_HAS_PMEM_API
377*4882a593Smuzhiyun void arch_wb_cache_pmem(void *addr, size_t size);
dax_flush(struct dax_device * dax_dev,void * addr,size_t size)378*4882a593Smuzhiyun void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
379*4882a593Smuzhiyun {
380*4882a593Smuzhiyun 	if (unlikely(!dax_write_cache_enabled(dax_dev)))
381*4882a593Smuzhiyun 		return;
382*4882a593Smuzhiyun 
383*4882a593Smuzhiyun 	arch_wb_cache_pmem(addr, size);
384*4882a593Smuzhiyun }
385*4882a593Smuzhiyun #else
dax_flush(struct dax_device * dax_dev,void * addr,size_t size)386*4882a593Smuzhiyun void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
387*4882a593Smuzhiyun {
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun #endif
390*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_flush);
391*4882a593Smuzhiyun 
dax_write_cache(struct dax_device * dax_dev,bool wc)392*4882a593Smuzhiyun void dax_write_cache(struct dax_device *dax_dev, bool wc)
393*4882a593Smuzhiyun {
394*4882a593Smuzhiyun 	if (wc)
395*4882a593Smuzhiyun 		set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
396*4882a593Smuzhiyun 	else
397*4882a593Smuzhiyun 		clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
398*4882a593Smuzhiyun }
399*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_write_cache);
400*4882a593Smuzhiyun 
dax_write_cache_enabled(struct dax_device * dax_dev)401*4882a593Smuzhiyun bool dax_write_cache_enabled(struct dax_device *dax_dev)
402*4882a593Smuzhiyun {
403*4882a593Smuzhiyun 	return test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
404*4882a593Smuzhiyun }
405*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
406*4882a593Smuzhiyun 
__dax_synchronous(struct dax_device * dax_dev)407*4882a593Smuzhiyun bool __dax_synchronous(struct dax_device *dax_dev)
408*4882a593Smuzhiyun {
409*4882a593Smuzhiyun 	return test_bit(DAXDEV_SYNC, &dax_dev->flags);
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(__dax_synchronous);
412*4882a593Smuzhiyun 
__set_dax_synchronous(struct dax_device * dax_dev)413*4882a593Smuzhiyun void __set_dax_synchronous(struct dax_device *dax_dev)
414*4882a593Smuzhiyun {
415*4882a593Smuzhiyun 	set_bit(DAXDEV_SYNC, &dax_dev->flags);
416*4882a593Smuzhiyun }
417*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(__set_dax_synchronous);
418*4882a593Smuzhiyun 
dax_alive(struct dax_device * dax_dev)419*4882a593Smuzhiyun bool dax_alive(struct dax_device *dax_dev)
420*4882a593Smuzhiyun {
421*4882a593Smuzhiyun 	lockdep_assert_held(&dax_srcu);
422*4882a593Smuzhiyun 	return test_bit(DAXDEV_ALIVE, &dax_dev->flags);
423*4882a593Smuzhiyun }
424*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_alive);
425*4882a593Smuzhiyun 
dax_host_hash(const char * host)426*4882a593Smuzhiyun static int dax_host_hash(const char *host)
427*4882a593Smuzhiyun {
428*4882a593Smuzhiyun 	return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun 
431*4882a593Smuzhiyun /*
432*4882a593Smuzhiyun  * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring
433*4882a593Smuzhiyun  * that any fault handlers or operations that might have seen
434*4882a593Smuzhiyun  * dax_alive(), have completed.  Any operations that start after
435*4882a593Smuzhiyun  * synchronize_srcu() has run will abort upon seeing !dax_alive().
436*4882a593Smuzhiyun  */
kill_dax(struct dax_device * dax_dev)437*4882a593Smuzhiyun void kill_dax(struct dax_device *dax_dev)
438*4882a593Smuzhiyun {
439*4882a593Smuzhiyun 	if (!dax_dev)
440*4882a593Smuzhiyun 		return;
441*4882a593Smuzhiyun 
442*4882a593Smuzhiyun 	clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
443*4882a593Smuzhiyun 
444*4882a593Smuzhiyun 	synchronize_srcu(&dax_srcu);
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	spin_lock(&dax_host_lock);
447*4882a593Smuzhiyun 	hlist_del_init(&dax_dev->list);
448*4882a593Smuzhiyun 	spin_unlock(&dax_host_lock);
449*4882a593Smuzhiyun }
450*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(kill_dax);
451*4882a593Smuzhiyun 
run_dax(struct dax_device * dax_dev)452*4882a593Smuzhiyun void run_dax(struct dax_device *dax_dev)
453*4882a593Smuzhiyun {
454*4882a593Smuzhiyun 	set_bit(DAXDEV_ALIVE, &dax_dev->flags);
455*4882a593Smuzhiyun }
456*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(run_dax);
457*4882a593Smuzhiyun 
dax_alloc_inode(struct super_block * sb)458*4882a593Smuzhiyun static struct inode *dax_alloc_inode(struct super_block *sb)
459*4882a593Smuzhiyun {
460*4882a593Smuzhiyun 	struct dax_device *dax_dev;
461*4882a593Smuzhiyun 	struct inode *inode;
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
464*4882a593Smuzhiyun 	if (!dax_dev)
465*4882a593Smuzhiyun 		return NULL;
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun 	inode = &dax_dev->inode;
468*4882a593Smuzhiyun 	inode->i_rdev = 0;
469*4882a593Smuzhiyun 	return inode;
470*4882a593Smuzhiyun }
471*4882a593Smuzhiyun 
to_dax_dev(struct inode * inode)472*4882a593Smuzhiyun static struct dax_device *to_dax_dev(struct inode *inode)
473*4882a593Smuzhiyun {
474*4882a593Smuzhiyun 	return container_of(inode, struct dax_device, inode);
475*4882a593Smuzhiyun }
476*4882a593Smuzhiyun 
dax_free_inode(struct inode * inode)477*4882a593Smuzhiyun static void dax_free_inode(struct inode *inode)
478*4882a593Smuzhiyun {
479*4882a593Smuzhiyun 	struct dax_device *dax_dev = to_dax_dev(inode);
480*4882a593Smuzhiyun 	kfree(dax_dev->host);
481*4882a593Smuzhiyun 	dax_dev->host = NULL;
482*4882a593Smuzhiyun 	if (inode->i_rdev)
483*4882a593Smuzhiyun 		ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
484*4882a593Smuzhiyun 	kmem_cache_free(dax_cache, dax_dev);
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun 
dax_destroy_inode(struct inode * inode)487*4882a593Smuzhiyun static void dax_destroy_inode(struct inode *inode)
488*4882a593Smuzhiyun {
489*4882a593Smuzhiyun 	struct dax_device *dax_dev = to_dax_dev(inode);
490*4882a593Smuzhiyun 	WARN_ONCE(test_bit(DAXDEV_ALIVE, &dax_dev->flags),
491*4882a593Smuzhiyun 			"kill_dax() must be called before final iput()\n");
492*4882a593Smuzhiyun }
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun static const struct super_operations dax_sops = {
495*4882a593Smuzhiyun 	.statfs = simple_statfs,
496*4882a593Smuzhiyun 	.alloc_inode = dax_alloc_inode,
497*4882a593Smuzhiyun 	.destroy_inode = dax_destroy_inode,
498*4882a593Smuzhiyun 	.free_inode = dax_free_inode,
499*4882a593Smuzhiyun 	.drop_inode = generic_delete_inode,
500*4882a593Smuzhiyun };
501*4882a593Smuzhiyun 
dax_init_fs_context(struct fs_context * fc)502*4882a593Smuzhiyun static int dax_init_fs_context(struct fs_context *fc)
503*4882a593Smuzhiyun {
504*4882a593Smuzhiyun 	struct pseudo_fs_context *ctx = init_pseudo(fc, DAXFS_MAGIC);
505*4882a593Smuzhiyun 	if (!ctx)
506*4882a593Smuzhiyun 		return -ENOMEM;
507*4882a593Smuzhiyun 	ctx->ops = &dax_sops;
508*4882a593Smuzhiyun 	return 0;
509*4882a593Smuzhiyun }
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun static struct file_system_type dax_fs_type = {
512*4882a593Smuzhiyun 	.name		= "dax",
513*4882a593Smuzhiyun 	.init_fs_context = dax_init_fs_context,
514*4882a593Smuzhiyun 	.kill_sb	= kill_anon_super,
515*4882a593Smuzhiyun };
516*4882a593Smuzhiyun 
dax_test(struct inode * inode,void * data)517*4882a593Smuzhiyun static int dax_test(struct inode *inode, void *data)
518*4882a593Smuzhiyun {
519*4882a593Smuzhiyun 	dev_t devt = *(dev_t *) data;
520*4882a593Smuzhiyun 
521*4882a593Smuzhiyun 	return inode->i_rdev == devt;
522*4882a593Smuzhiyun }
523*4882a593Smuzhiyun 
dax_set(struct inode * inode,void * data)524*4882a593Smuzhiyun static int dax_set(struct inode *inode, void *data)
525*4882a593Smuzhiyun {
526*4882a593Smuzhiyun 	dev_t devt = *(dev_t *) data;
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	inode->i_rdev = devt;
529*4882a593Smuzhiyun 	return 0;
530*4882a593Smuzhiyun }
531*4882a593Smuzhiyun 
dax_dev_get(dev_t devt)532*4882a593Smuzhiyun static struct dax_device *dax_dev_get(dev_t devt)
533*4882a593Smuzhiyun {
534*4882a593Smuzhiyun 	struct dax_device *dax_dev;
535*4882a593Smuzhiyun 	struct inode *inode;
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 	inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31),
538*4882a593Smuzhiyun 			dax_test, dax_set, &devt);
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	if (!inode)
541*4882a593Smuzhiyun 		return NULL;
542*4882a593Smuzhiyun 
543*4882a593Smuzhiyun 	dax_dev = to_dax_dev(inode);
544*4882a593Smuzhiyun 	if (inode->i_state & I_NEW) {
545*4882a593Smuzhiyun 		set_bit(DAXDEV_ALIVE, &dax_dev->flags);
546*4882a593Smuzhiyun 		inode->i_cdev = &dax_dev->cdev;
547*4882a593Smuzhiyun 		inode->i_mode = S_IFCHR;
548*4882a593Smuzhiyun 		inode->i_flags = S_DAX;
549*4882a593Smuzhiyun 		mapping_set_gfp_mask(&inode->i_data, GFP_USER);
550*4882a593Smuzhiyun 		unlock_new_inode(inode);
551*4882a593Smuzhiyun 	}
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun 	return dax_dev;
554*4882a593Smuzhiyun }
555*4882a593Smuzhiyun 
dax_add_host(struct dax_device * dax_dev,const char * host)556*4882a593Smuzhiyun static void dax_add_host(struct dax_device *dax_dev, const char *host)
557*4882a593Smuzhiyun {
558*4882a593Smuzhiyun 	int hash;
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun 	/*
561*4882a593Smuzhiyun 	 * Unconditionally init dax_dev since it's coming from a
562*4882a593Smuzhiyun 	 * non-zeroed slab cache
563*4882a593Smuzhiyun 	 */
564*4882a593Smuzhiyun 	INIT_HLIST_NODE(&dax_dev->list);
565*4882a593Smuzhiyun 	dax_dev->host = host;
566*4882a593Smuzhiyun 	if (!host)
567*4882a593Smuzhiyun 		return;
568*4882a593Smuzhiyun 
569*4882a593Smuzhiyun 	hash = dax_host_hash(host);
570*4882a593Smuzhiyun 	spin_lock(&dax_host_lock);
571*4882a593Smuzhiyun 	hlist_add_head(&dax_dev->list, &dax_host_list[hash]);
572*4882a593Smuzhiyun 	spin_unlock(&dax_host_lock);
573*4882a593Smuzhiyun }
574*4882a593Smuzhiyun 
alloc_dax(void * private,const char * __host,const struct dax_operations * ops,unsigned long flags)575*4882a593Smuzhiyun struct dax_device *alloc_dax(void *private, const char *__host,
576*4882a593Smuzhiyun 		const struct dax_operations *ops, unsigned long flags)
577*4882a593Smuzhiyun {
578*4882a593Smuzhiyun 	struct dax_device *dax_dev;
579*4882a593Smuzhiyun 	const char *host;
580*4882a593Smuzhiyun 	dev_t devt;
581*4882a593Smuzhiyun 	int minor;
582*4882a593Smuzhiyun 
583*4882a593Smuzhiyun 	if (ops && !ops->zero_page_range) {
584*4882a593Smuzhiyun 		pr_debug("%s: error: device does not provide dax"
585*4882a593Smuzhiyun 			 " operation zero_page_range()\n",
586*4882a593Smuzhiyun 			 __host ? __host : "Unknown");
587*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
588*4882a593Smuzhiyun 	}
589*4882a593Smuzhiyun 
590*4882a593Smuzhiyun 	host = kstrdup(__host, GFP_KERNEL);
591*4882a593Smuzhiyun 	if (__host && !host)
592*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
593*4882a593Smuzhiyun 
594*4882a593Smuzhiyun 	minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
595*4882a593Smuzhiyun 	if (minor < 0)
596*4882a593Smuzhiyun 		goto err_minor;
597*4882a593Smuzhiyun 
598*4882a593Smuzhiyun 	devt = MKDEV(MAJOR(dax_devt), minor);
599*4882a593Smuzhiyun 	dax_dev = dax_dev_get(devt);
600*4882a593Smuzhiyun 	if (!dax_dev)
601*4882a593Smuzhiyun 		goto err_dev;
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun 	dax_add_host(dax_dev, host);
604*4882a593Smuzhiyun 	dax_dev->ops = ops;
605*4882a593Smuzhiyun 	dax_dev->private = private;
606*4882a593Smuzhiyun 	if (flags & DAXDEV_F_SYNC)
607*4882a593Smuzhiyun 		set_dax_synchronous(dax_dev);
608*4882a593Smuzhiyun 
609*4882a593Smuzhiyun 	return dax_dev;
610*4882a593Smuzhiyun 
611*4882a593Smuzhiyun  err_dev:
612*4882a593Smuzhiyun 	ida_simple_remove(&dax_minor_ida, minor);
613*4882a593Smuzhiyun  err_minor:
614*4882a593Smuzhiyun 	kfree(host);
615*4882a593Smuzhiyun 	return ERR_PTR(-ENOMEM);
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(alloc_dax);
618*4882a593Smuzhiyun 
put_dax(struct dax_device * dax_dev)619*4882a593Smuzhiyun void put_dax(struct dax_device *dax_dev)
620*4882a593Smuzhiyun {
621*4882a593Smuzhiyun 	if (!dax_dev)
622*4882a593Smuzhiyun 		return;
623*4882a593Smuzhiyun 	iput(&dax_dev->inode);
624*4882a593Smuzhiyun }
625*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(put_dax);
626*4882a593Smuzhiyun 
627*4882a593Smuzhiyun /**
628*4882a593Smuzhiyun  * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
629*4882a593Smuzhiyun  * @host: alternate name for the device registered by a dax driver
630*4882a593Smuzhiyun  */
dax_get_by_host(const char * host)631*4882a593Smuzhiyun struct dax_device *dax_get_by_host(const char *host)
632*4882a593Smuzhiyun {
633*4882a593Smuzhiyun 	struct dax_device *dax_dev, *found = NULL;
634*4882a593Smuzhiyun 	int hash, id;
635*4882a593Smuzhiyun 
636*4882a593Smuzhiyun 	if (!host)
637*4882a593Smuzhiyun 		return NULL;
638*4882a593Smuzhiyun 
639*4882a593Smuzhiyun 	hash = dax_host_hash(host);
640*4882a593Smuzhiyun 
641*4882a593Smuzhiyun 	id = dax_read_lock();
642*4882a593Smuzhiyun 	spin_lock(&dax_host_lock);
643*4882a593Smuzhiyun 	hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
644*4882a593Smuzhiyun 		if (!dax_alive(dax_dev)
645*4882a593Smuzhiyun 				|| strcmp(host, dax_dev->host) != 0)
646*4882a593Smuzhiyun 			continue;
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun 		if (igrab(&dax_dev->inode))
649*4882a593Smuzhiyun 			found = dax_dev;
650*4882a593Smuzhiyun 		break;
651*4882a593Smuzhiyun 	}
652*4882a593Smuzhiyun 	spin_unlock(&dax_host_lock);
653*4882a593Smuzhiyun 	dax_read_unlock(id);
654*4882a593Smuzhiyun 
655*4882a593Smuzhiyun 	return found;
656*4882a593Smuzhiyun }
657*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_get_by_host);
658*4882a593Smuzhiyun 
659*4882a593Smuzhiyun /**
660*4882a593Smuzhiyun  * inode_dax: convert a public inode into its dax_dev
661*4882a593Smuzhiyun  * @inode: An inode with i_cdev pointing to a dax_dev
662*4882a593Smuzhiyun  *
663*4882a593Smuzhiyun  * Note this is not equivalent to to_dax_dev() which is for private
664*4882a593Smuzhiyun  * internal use where we know the inode filesystem type == dax_fs_type.
665*4882a593Smuzhiyun  */
inode_dax(struct inode * inode)666*4882a593Smuzhiyun struct dax_device *inode_dax(struct inode *inode)
667*4882a593Smuzhiyun {
668*4882a593Smuzhiyun 	struct cdev *cdev = inode->i_cdev;
669*4882a593Smuzhiyun 
670*4882a593Smuzhiyun 	return container_of(cdev, struct dax_device, cdev);
671*4882a593Smuzhiyun }
672*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(inode_dax);
673*4882a593Smuzhiyun 
dax_inode(struct dax_device * dax_dev)674*4882a593Smuzhiyun struct inode *dax_inode(struct dax_device *dax_dev)
675*4882a593Smuzhiyun {
676*4882a593Smuzhiyun 	return &dax_dev->inode;
677*4882a593Smuzhiyun }
678*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_inode);
679*4882a593Smuzhiyun 
dax_get_private(struct dax_device * dax_dev)680*4882a593Smuzhiyun void *dax_get_private(struct dax_device *dax_dev)
681*4882a593Smuzhiyun {
682*4882a593Smuzhiyun 	if (!test_bit(DAXDEV_ALIVE, &dax_dev->flags))
683*4882a593Smuzhiyun 		return NULL;
684*4882a593Smuzhiyun 	return dax_dev->private;
685*4882a593Smuzhiyun }
686*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dax_get_private);
687*4882a593Smuzhiyun 
init_once(void * _dax_dev)688*4882a593Smuzhiyun static void init_once(void *_dax_dev)
689*4882a593Smuzhiyun {
690*4882a593Smuzhiyun 	struct dax_device *dax_dev = _dax_dev;
691*4882a593Smuzhiyun 	struct inode *inode = &dax_dev->inode;
692*4882a593Smuzhiyun 
693*4882a593Smuzhiyun 	memset(dax_dev, 0, sizeof(*dax_dev));
694*4882a593Smuzhiyun 	inode_init_once(inode);
695*4882a593Smuzhiyun }
696*4882a593Smuzhiyun 
dax_fs_init(void)697*4882a593Smuzhiyun static int dax_fs_init(void)
698*4882a593Smuzhiyun {
699*4882a593Smuzhiyun 	int rc;
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun 	dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0,
702*4882a593Smuzhiyun 			(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
703*4882a593Smuzhiyun 			 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
704*4882a593Smuzhiyun 			init_once);
705*4882a593Smuzhiyun 	if (!dax_cache)
706*4882a593Smuzhiyun 		return -ENOMEM;
707*4882a593Smuzhiyun 
708*4882a593Smuzhiyun 	dax_mnt = kern_mount(&dax_fs_type);
709*4882a593Smuzhiyun 	if (IS_ERR(dax_mnt)) {
710*4882a593Smuzhiyun 		rc = PTR_ERR(dax_mnt);
711*4882a593Smuzhiyun 		goto err_mount;
712*4882a593Smuzhiyun 	}
713*4882a593Smuzhiyun 	dax_superblock = dax_mnt->mnt_sb;
714*4882a593Smuzhiyun 
715*4882a593Smuzhiyun 	return 0;
716*4882a593Smuzhiyun 
717*4882a593Smuzhiyun  err_mount:
718*4882a593Smuzhiyun 	kmem_cache_destroy(dax_cache);
719*4882a593Smuzhiyun 
720*4882a593Smuzhiyun 	return rc;
721*4882a593Smuzhiyun }
722*4882a593Smuzhiyun 
dax_fs_exit(void)723*4882a593Smuzhiyun static void dax_fs_exit(void)
724*4882a593Smuzhiyun {
725*4882a593Smuzhiyun 	kern_unmount(dax_mnt);
726*4882a593Smuzhiyun 	rcu_barrier();
727*4882a593Smuzhiyun 	kmem_cache_destroy(dax_cache);
728*4882a593Smuzhiyun }
729*4882a593Smuzhiyun 
dax_core_init(void)730*4882a593Smuzhiyun static int __init dax_core_init(void)
731*4882a593Smuzhiyun {
732*4882a593Smuzhiyun 	int rc;
733*4882a593Smuzhiyun 
734*4882a593Smuzhiyun 	rc = dax_fs_init();
735*4882a593Smuzhiyun 	if (rc)
736*4882a593Smuzhiyun 		return rc;
737*4882a593Smuzhiyun 
738*4882a593Smuzhiyun 	rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax");
739*4882a593Smuzhiyun 	if (rc)
740*4882a593Smuzhiyun 		goto err_chrdev;
741*4882a593Smuzhiyun 
742*4882a593Smuzhiyun 	rc = dax_bus_init();
743*4882a593Smuzhiyun 	if (rc)
744*4882a593Smuzhiyun 		goto err_bus;
745*4882a593Smuzhiyun 	return 0;
746*4882a593Smuzhiyun 
747*4882a593Smuzhiyun err_bus:
748*4882a593Smuzhiyun 	unregister_chrdev_region(dax_devt, MINORMASK+1);
749*4882a593Smuzhiyun err_chrdev:
750*4882a593Smuzhiyun 	dax_fs_exit();
751*4882a593Smuzhiyun 	return 0;
752*4882a593Smuzhiyun }
753*4882a593Smuzhiyun 
dax_core_exit(void)754*4882a593Smuzhiyun static void __exit dax_core_exit(void)
755*4882a593Smuzhiyun {
756*4882a593Smuzhiyun 	dax_bus_exit();
757*4882a593Smuzhiyun 	unregister_chrdev_region(dax_devt, MINORMASK+1);
758*4882a593Smuzhiyun 	ida_destroy(&dax_minor_ida);
759*4882a593Smuzhiyun 	dax_fs_exit();
760*4882a593Smuzhiyun }
761*4882a593Smuzhiyun 
762*4882a593Smuzhiyun MODULE_AUTHOR("Intel Corporation");
763*4882a593Smuzhiyun MODULE_LICENSE("GPL v2");
764*4882a593Smuzhiyun subsys_initcall(dax_core_init);
765*4882a593Smuzhiyun module_exit(dax_core_exit);
766