xref: /OK3568_Linux_fs/kernel/drivers/block/brd.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Ram backed block device driver.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2007 Nick Piggin
6*4882a593Smuzhiyun  * Copyright (C) 2007 Novell Inc.
7*4882a593Smuzhiyun  *
8*4882a593Smuzhiyun  * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright
9*4882a593Smuzhiyun  * of their respective owners.
10*4882a593Smuzhiyun  */
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #include <linux/init.h>
13*4882a593Smuzhiyun #include <linux/initrd.h>
14*4882a593Smuzhiyun #include <linux/module.h>
15*4882a593Smuzhiyun #include <linux/moduleparam.h>
16*4882a593Smuzhiyun #include <linux/major.h>
17*4882a593Smuzhiyun #include <linux/blkdev.h>
18*4882a593Smuzhiyun #include <linux/bio.h>
19*4882a593Smuzhiyun #include <linux/highmem.h>
20*4882a593Smuzhiyun #include <linux/mutex.h>
21*4882a593Smuzhiyun #include <linux/radix-tree.h>
22*4882a593Smuzhiyun #include <linux/fs.h>
23*4882a593Smuzhiyun #include <linux/slab.h>
24*4882a593Smuzhiyun #include <linux/backing-dev.h>
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun #include <linux/uaccess.h>
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun #define PAGE_SECTORS_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
29*4882a593Smuzhiyun #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun /*
32*4882a593Smuzhiyun  * Each block ramdisk device has a radix_tree brd_pages of pages that stores
33*4882a593Smuzhiyun  * the pages containing the block device's contents. A brd page's ->index is
34*4882a593Smuzhiyun  * its offset in PAGE_SIZE units. This is similar to, but in no way connected
35*4882a593Smuzhiyun  * with, the kernel's pagecache or buffer cache (which sit above our block
36*4882a593Smuzhiyun  * device).
37*4882a593Smuzhiyun  */
38*4882a593Smuzhiyun struct brd_device {
39*4882a593Smuzhiyun 	int		brd_number;
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun 	struct request_queue	*brd_queue;
42*4882a593Smuzhiyun 	struct gendisk		*brd_disk;
43*4882a593Smuzhiyun 	struct list_head	brd_list;
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun 	/*
46*4882a593Smuzhiyun 	 * Backing store of pages and lock to protect it. This is the contents
47*4882a593Smuzhiyun 	 * of the block device.
48*4882a593Smuzhiyun 	 */
49*4882a593Smuzhiyun 	spinlock_t		brd_lock;
50*4882a593Smuzhiyun 	struct radix_tree_root	brd_pages;
51*4882a593Smuzhiyun };
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun /*
54*4882a593Smuzhiyun  * Look up and return a brd's page for a given sector.
55*4882a593Smuzhiyun  */
brd_lookup_page(struct brd_device * brd,sector_t sector)56*4882a593Smuzhiyun static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
57*4882a593Smuzhiyun {
58*4882a593Smuzhiyun 	pgoff_t idx;
59*4882a593Smuzhiyun 	struct page *page;
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun 	/*
62*4882a593Smuzhiyun 	 * The page lifetime is protected by the fact that we have opened the
63*4882a593Smuzhiyun 	 * device node -- brd pages will never be deleted under us, so we
64*4882a593Smuzhiyun 	 * don't need any further locking or refcounting.
65*4882a593Smuzhiyun 	 *
66*4882a593Smuzhiyun 	 * This is strictly true for the radix-tree nodes as well (ie. we
67*4882a593Smuzhiyun 	 * don't actually need the rcu_read_lock()), however that is not a
68*4882a593Smuzhiyun 	 * documented feature of the radix-tree API so it is better to be
69*4882a593Smuzhiyun 	 * safe here (we don't have total exclusion from radix tree updates
70*4882a593Smuzhiyun 	 * here, only deletes).
71*4882a593Smuzhiyun 	 */
72*4882a593Smuzhiyun 	rcu_read_lock();
73*4882a593Smuzhiyun 	idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
74*4882a593Smuzhiyun 	page = radix_tree_lookup(&brd->brd_pages, idx);
75*4882a593Smuzhiyun 	rcu_read_unlock();
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun 	BUG_ON(page && page->index != idx);
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	return page;
80*4882a593Smuzhiyun }
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun /*
83*4882a593Smuzhiyun  * Look up and return a brd's page for a given sector.
84*4882a593Smuzhiyun  * If one does not exist, allocate an empty page, and insert that. Then
85*4882a593Smuzhiyun  * return it.
86*4882a593Smuzhiyun  */
brd_insert_page(struct brd_device * brd,sector_t sector)87*4882a593Smuzhiyun static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
88*4882a593Smuzhiyun {
89*4882a593Smuzhiyun 	pgoff_t idx;
90*4882a593Smuzhiyun 	struct page *page;
91*4882a593Smuzhiyun 	gfp_t gfp_flags;
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 	page = brd_lookup_page(brd, sector);
94*4882a593Smuzhiyun 	if (page)
95*4882a593Smuzhiyun 		return page;
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun 	/*
98*4882a593Smuzhiyun 	 * Must use NOIO because we don't want to recurse back into the
99*4882a593Smuzhiyun 	 * block or filesystem layers from page reclaim.
100*4882a593Smuzhiyun 	 */
101*4882a593Smuzhiyun 	gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
102*4882a593Smuzhiyun 	page = alloc_page(gfp_flags);
103*4882a593Smuzhiyun 	if (!page)
104*4882a593Smuzhiyun 		return NULL;
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 	if (radix_tree_preload(GFP_NOIO)) {
107*4882a593Smuzhiyun 		__free_page(page);
108*4882a593Smuzhiyun 		return NULL;
109*4882a593Smuzhiyun 	}
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	spin_lock(&brd->brd_lock);
112*4882a593Smuzhiyun 	idx = sector >> PAGE_SECTORS_SHIFT;
113*4882a593Smuzhiyun 	page->index = idx;
114*4882a593Smuzhiyun 	if (radix_tree_insert(&brd->brd_pages, idx, page)) {
115*4882a593Smuzhiyun 		__free_page(page);
116*4882a593Smuzhiyun 		page = radix_tree_lookup(&brd->brd_pages, idx);
117*4882a593Smuzhiyun 		BUG_ON(!page);
118*4882a593Smuzhiyun 		BUG_ON(page->index != idx);
119*4882a593Smuzhiyun 	}
120*4882a593Smuzhiyun 	spin_unlock(&brd->brd_lock);
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 	radix_tree_preload_end();
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun 	return page;
125*4882a593Smuzhiyun }
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun /*
128*4882a593Smuzhiyun  * Free all backing store pages and radix tree. This must only be called when
129*4882a593Smuzhiyun  * there are no other users of the device.
130*4882a593Smuzhiyun  */
131*4882a593Smuzhiyun #define FREE_BATCH 16
brd_free_pages(struct brd_device * brd)132*4882a593Smuzhiyun static void brd_free_pages(struct brd_device *brd)
133*4882a593Smuzhiyun {
134*4882a593Smuzhiyun 	unsigned long pos = 0;
135*4882a593Smuzhiyun 	struct page *pages[FREE_BATCH];
136*4882a593Smuzhiyun 	int nr_pages;
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	do {
139*4882a593Smuzhiyun 		int i;
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 		nr_pages = radix_tree_gang_lookup(&brd->brd_pages,
142*4882a593Smuzhiyun 				(void **)pages, pos, FREE_BATCH);
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 		for (i = 0; i < nr_pages; i++) {
145*4882a593Smuzhiyun 			void *ret;
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 			BUG_ON(pages[i]->index < pos);
148*4882a593Smuzhiyun 			pos = pages[i]->index;
149*4882a593Smuzhiyun 			ret = radix_tree_delete(&brd->brd_pages, pos);
150*4882a593Smuzhiyun 			BUG_ON(!ret || ret != pages[i]);
151*4882a593Smuzhiyun 			__free_page(pages[i]);
152*4882a593Smuzhiyun 		}
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 		pos++;
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun 		/*
157*4882a593Smuzhiyun 		 * It takes 3.4 seconds to remove 80GiB ramdisk.
158*4882a593Smuzhiyun 		 * So, we need cond_resched to avoid stalling the CPU.
159*4882a593Smuzhiyun 		 */
160*4882a593Smuzhiyun 		cond_resched();
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 		/*
163*4882a593Smuzhiyun 		 * This assumes radix_tree_gang_lookup always returns as
164*4882a593Smuzhiyun 		 * many pages as possible. If the radix-tree code changes,
165*4882a593Smuzhiyun 		 * so will this have to.
166*4882a593Smuzhiyun 		 */
167*4882a593Smuzhiyun 	} while (nr_pages == FREE_BATCH);
168*4882a593Smuzhiyun }
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun /*
171*4882a593Smuzhiyun  * copy_to_brd_setup must be called before copy_to_brd. It may sleep.
172*4882a593Smuzhiyun  */
copy_to_brd_setup(struct brd_device * brd,sector_t sector,size_t n)173*4882a593Smuzhiyun static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
174*4882a593Smuzhiyun {
175*4882a593Smuzhiyun 	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
176*4882a593Smuzhiyun 	size_t copy;
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun 	copy = min_t(size_t, n, PAGE_SIZE - offset);
179*4882a593Smuzhiyun 	if (!brd_insert_page(brd, sector))
180*4882a593Smuzhiyun 		return -ENOSPC;
181*4882a593Smuzhiyun 	if (copy < n) {
182*4882a593Smuzhiyun 		sector += copy >> SECTOR_SHIFT;
183*4882a593Smuzhiyun 		if (!brd_insert_page(brd, sector))
184*4882a593Smuzhiyun 			return -ENOSPC;
185*4882a593Smuzhiyun 	}
186*4882a593Smuzhiyun 	return 0;
187*4882a593Smuzhiyun }
188*4882a593Smuzhiyun 
189*4882a593Smuzhiyun /*
190*4882a593Smuzhiyun  * Copy n bytes from src to the brd starting at sector. Does not sleep.
191*4882a593Smuzhiyun  */
copy_to_brd(struct brd_device * brd,const void * src,sector_t sector,size_t n)192*4882a593Smuzhiyun static void copy_to_brd(struct brd_device *brd, const void *src,
193*4882a593Smuzhiyun 			sector_t sector, size_t n)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun 	struct page *page;
196*4882a593Smuzhiyun 	void *dst;
197*4882a593Smuzhiyun 	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
198*4882a593Smuzhiyun 	size_t copy;
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	copy = min_t(size_t, n, PAGE_SIZE - offset);
201*4882a593Smuzhiyun 	page = brd_lookup_page(brd, sector);
202*4882a593Smuzhiyun 	BUG_ON(!page);
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	dst = kmap_atomic(page);
205*4882a593Smuzhiyun 	memcpy(dst + offset, src, copy);
206*4882a593Smuzhiyun 	kunmap_atomic(dst);
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun 	if (copy < n) {
209*4882a593Smuzhiyun 		src += copy;
210*4882a593Smuzhiyun 		sector += copy >> SECTOR_SHIFT;
211*4882a593Smuzhiyun 		copy = n - copy;
212*4882a593Smuzhiyun 		page = brd_lookup_page(brd, sector);
213*4882a593Smuzhiyun 		BUG_ON(!page);
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 		dst = kmap_atomic(page);
216*4882a593Smuzhiyun 		memcpy(dst, src, copy);
217*4882a593Smuzhiyun 		kunmap_atomic(dst);
218*4882a593Smuzhiyun 	}
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun /*
222*4882a593Smuzhiyun  * Copy n bytes to dst from the brd starting at sector. Does not sleep.
223*4882a593Smuzhiyun  */
copy_from_brd(void * dst,struct brd_device * brd,sector_t sector,size_t n)224*4882a593Smuzhiyun static void copy_from_brd(void *dst, struct brd_device *brd,
225*4882a593Smuzhiyun 			sector_t sector, size_t n)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun 	struct page *page;
228*4882a593Smuzhiyun 	void *src;
229*4882a593Smuzhiyun 	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
230*4882a593Smuzhiyun 	size_t copy;
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	copy = min_t(size_t, n, PAGE_SIZE - offset);
233*4882a593Smuzhiyun 	page = brd_lookup_page(brd, sector);
234*4882a593Smuzhiyun 	if (page) {
235*4882a593Smuzhiyun 		src = kmap_atomic(page);
236*4882a593Smuzhiyun 		memcpy(dst, src + offset, copy);
237*4882a593Smuzhiyun 		kunmap_atomic(src);
238*4882a593Smuzhiyun 	} else
239*4882a593Smuzhiyun 		memset(dst, 0, copy);
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	if (copy < n) {
242*4882a593Smuzhiyun 		dst += copy;
243*4882a593Smuzhiyun 		sector += copy >> SECTOR_SHIFT;
244*4882a593Smuzhiyun 		copy = n - copy;
245*4882a593Smuzhiyun 		page = brd_lookup_page(brd, sector);
246*4882a593Smuzhiyun 		if (page) {
247*4882a593Smuzhiyun 			src = kmap_atomic(page);
248*4882a593Smuzhiyun 			memcpy(dst, src, copy);
249*4882a593Smuzhiyun 			kunmap_atomic(src);
250*4882a593Smuzhiyun 		} else
251*4882a593Smuzhiyun 			memset(dst, 0, copy);
252*4882a593Smuzhiyun 	}
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun /*
256*4882a593Smuzhiyun  * Process a single bvec of a bio.
257*4882a593Smuzhiyun  */
brd_do_bvec(struct brd_device * brd,struct page * page,unsigned int len,unsigned int off,unsigned int op,sector_t sector)258*4882a593Smuzhiyun static int brd_do_bvec(struct brd_device *brd, struct page *page,
259*4882a593Smuzhiyun 			unsigned int len, unsigned int off, unsigned int op,
260*4882a593Smuzhiyun 			sector_t sector)
261*4882a593Smuzhiyun {
262*4882a593Smuzhiyun 	void *mem;
263*4882a593Smuzhiyun 	int err = 0;
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	if (op_is_write(op)) {
266*4882a593Smuzhiyun 		err = copy_to_brd_setup(brd, sector, len);
267*4882a593Smuzhiyun 		if (err)
268*4882a593Smuzhiyun 			goto out;
269*4882a593Smuzhiyun 	}
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	mem = kmap_atomic(page);
272*4882a593Smuzhiyun 	if (!op_is_write(op)) {
273*4882a593Smuzhiyun 		copy_from_brd(mem + off, brd, sector, len);
274*4882a593Smuzhiyun 		flush_dcache_page(page);
275*4882a593Smuzhiyun 	} else {
276*4882a593Smuzhiyun 		flush_dcache_page(page);
277*4882a593Smuzhiyun 		copy_to_brd(brd, mem + off, sector, len);
278*4882a593Smuzhiyun 	}
279*4882a593Smuzhiyun 	kunmap_atomic(mem);
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun out:
282*4882a593Smuzhiyun 	return err;
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun 
brd_submit_bio(struct bio * bio)285*4882a593Smuzhiyun static blk_qc_t brd_submit_bio(struct bio *bio)
286*4882a593Smuzhiyun {
287*4882a593Smuzhiyun 	struct brd_device *brd = bio->bi_disk->private_data;
288*4882a593Smuzhiyun 	struct bio_vec bvec;
289*4882a593Smuzhiyun 	sector_t sector;
290*4882a593Smuzhiyun 	struct bvec_iter iter;
291*4882a593Smuzhiyun 
292*4882a593Smuzhiyun 	sector = bio->bi_iter.bi_sector;
293*4882a593Smuzhiyun 	if (bio_end_sector(bio) > get_capacity(bio->bi_disk))
294*4882a593Smuzhiyun 		goto io_error;
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	bio_for_each_segment(bvec, bio, iter) {
297*4882a593Smuzhiyun 		unsigned int len = bvec.bv_len;
298*4882a593Smuzhiyun 		int err;
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 		/* Don't support un-aligned buffer */
301*4882a593Smuzhiyun 		WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
302*4882a593Smuzhiyun 				(len & (SECTOR_SIZE - 1)));
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 		err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
305*4882a593Smuzhiyun 				  bio_op(bio), sector);
306*4882a593Smuzhiyun 		if (err)
307*4882a593Smuzhiyun 			goto io_error;
308*4882a593Smuzhiyun 		sector += len >> SECTOR_SHIFT;
309*4882a593Smuzhiyun 	}
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun 	bio_endio(bio);
312*4882a593Smuzhiyun 	return BLK_QC_T_NONE;
313*4882a593Smuzhiyun io_error:
314*4882a593Smuzhiyun 	bio_io_error(bio);
315*4882a593Smuzhiyun 	return BLK_QC_T_NONE;
316*4882a593Smuzhiyun }
317*4882a593Smuzhiyun 
brd_rw_page(struct block_device * bdev,sector_t sector,struct page * page,unsigned int op)318*4882a593Smuzhiyun static int brd_rw_page(struct block_device *bdev, sector_t sector,
319*4882a593Smuzhiyun 		       struct page *page, unsigned int op)
320*4882a593Smuzhiyun {
321*4882a593Smuzhiyun 	struct brd_device *brd = bdev->bd_disk->private_data;
322*4882a593Smuzhiyun 	int err;
323*4882a593Smuzhiyun 
324*4882a593Smuzhiyun 	if (PageTransHuge(page))
325*4882a593Smuzhiyun 		return -ENOTSUPP;
326*4882a593Smuzhiyun 	err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector);
327*4882a593Smuzhiyun 	page_endio(page, op_is_write(op), err);
328*4882a593Smuzhiyun 	return err;
329*4882a593Smuzhiyun }
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun static const struct block_device_operations brd_fops = {
332*4882a593Smuzhiyun 	.owner =		THIS_MODULE,
333*4882a593Smuzhiyun 	.submit_bio =		brd_submit_bio,
334*4882a593Smuzhiyun 	.rw_page =		brd_rw_page,
335*4882a593Smuzhiyun };
336*4882a593Smuzhiyun 
337*4882a593Smuzhiyun /*
338*4882a593Smuzhiyun  * And now the modules code and kernel interface.
339*4882a593Smuzhiyun  */
340*4882a593Smuzhiyun static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
341*4882a593Smuzhiyun module_param(rd_nr, int, 0444);
342*4882a593Smuzhiyun MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
345*4882a593Smuzhiyun module_param(rd_size, ulong, 0444);
346*4882a593Smuzhiyun MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun static int max_part = 1;
349*4882a593Smuzhiyun module_param(max_part, int, 0444);
350*4882a593Smuzhiyun MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
351*4882a593Smuzhiyun 
352*4882a593Smuzhiyun MODULE_LICENSE("GPL");
353*4882a593Smuzhiyun MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
354*4882a593Smuzhiyun MODULE_ALIAS("rd");
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun #ifndef MODULE
357*4882a593Smuzhiyun /* Legacy boot options - nonmodular */
ramdisk_size(char * str)358*4882a593Smuzhiyun static int __init ramdisk_size(char *str)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun 	rd_size = simple_strtol(str, NULL, 0);
361*4882a593Smuzhiyun 	return 1;
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun __setup("ramdisk_size=", ramdisk_size);
364*4882a593Smuzhiyun #endif
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun /*
367*4882a593Smuzhiyun  * The device scheme is derived from loop.c. Keep them in synch where possible
368*4882a593Smuzhiyun  * (should share code eventually).
369*4882a593Smuzhiyun  */
370*4882a593Smuzhiyun static LIST_HEAD(brd_devices);
371*4882a593Smuzhiyun static DEFINE_MUTEX(brd_devices_mutex);
372*4882a593Smuzhiyun 
brd_alloc(int i)373*4882a593Smuzhiyun static struct brd_device *brd_alloc(int i)
374*4882a593Smuzhiyun {
375*4882a593Smuzhiyun 	struct brd_device *brd;
376*4882a593Smuzhiyun 	struct gendisk *disk;
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun 	brd = kzalloc(sizeof(*brd), GFP_KERNEL);
379*4882a593Smuzhiyun 	if (!brd)
380*4882a593Smuzhiyun 		goto out;
381*4882a593Smuzhiyun 	brd->brd_number		= i;
382*4882a593Smuzhiyun 	spin_lock_init(&brd->brd_lock);
383*4882a593Smuzhiyun 	INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 	brd->brd_queue = blk_alloc_queue(NUMA_NO_NODE);
386*4882a593Smuzhiyun 	if (!brd->brd_queue)
387*4882a593Smuzhiyun 		goto out_free_dev;
388*4882a593Smuzhiyun 
389*4882a593Smuzhiyun 	/* This is so fdisk will align partitions on 4k, because of
390*4882a593Smuzhiyun 	 * direct_access API needing 4k alignment, returning a PFN
391*4882a593Smuzhiyun 	 * (This is only a problem on very small devices <= 4M,
392*4882a593Smuzhiyun 	 *  otherwise fdisk will align on 1M. Regardless this call
393*4882a593Smuzhiyun 	 *  is harmless)
394*4882a593Smuzhiyun 	 */
395*4882a593Smuzhiyun 	blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
396*4882a593Smuzhiyun 	disk = brd->brd_disk = alloc_disk(max_part);
397*4882a593Smuzhiyun 	if (!disk)
398*4882a593Smuzhiyun 		goto out_free_queue;
399*4882a593Smuzhiyun 	disk->major		= RAMDISK_MAJOR;
400*4882a593Smuzhiyun 	disk->first_minor	= i * max_part;
401*4882a593Smuzhiyun 	disk->fops		= &brd_fops;
402*4882a593Smuzhiyun 	disk->private_data	= brd;
403*4882a593Smuzhiyun 	disk->flags		= GENHD_FL_EXT_DEVT;
404*4882a593Smuzhiyun 	sprintf(disk->disk_name, "ram%d", i);
405*4882a593Smuzhiyun 	set_capacity(disk, rd_size * 2);
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 	/* Tell the block layer that this is not a rotational device */
408*4882a593Smuzhiyun 	blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue);
409*4882a593Smuzhiyun 	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue);
410*4882a593Smuzhiyun 
411*4882a593Smuzhiyun 	return brd;
412*4882a593Smuzhiyun 
413*4882a593Smuzhiyun out_free_queue:
414*4882a593Smuzhiyun 	blk_cleanup_queue(brd->brd_queue);
415*4882a593Smuzhiyun out_free_dev:
416*4882a593Smuzhiyun 	kfree(brd);
417*4882a593Smuzhiyun out:
418*4882a593Smuzhiyun 	return NULL;
419*4882a593Smuzhiyun }
420*4882a593Smuzhiyun 
brd_free(struct brd_device * brd)421*4882a593Smuzhiyun static void brd_free(struct brd_device *brd)
422*4882a593Smuzhiyun {
423*4882a593Smuzhiyun 	put_disk(brd->brd_disk);
424*4882a593Smuzhiyun 	blk_cleanup_queue(brd->brd_queue);
425*4882a593Smuzhiyun 	brd_free_pages(brd);
426*4882a593Smuzhiyun 	kfree(brd);
427*4882a593Smuzhiyun }
428*4882a593Smuzhiyun 
brd_init_one(int i,bool * new)429*4882a593Smuzhiyun static struct brd_device *brd_init_one(int i, bool *new)
430*4882a593Smuzhiyun {
431*4882a593Smuzhiyun 	struct brd_device *brd;
432*4882a593Smuzhiyun 
433*4882a593Smuzhiyun 	*new = false;
434*4882a593Smuzhiyun 	list_for_each_entry(brd, &brd_devices, brd_list) {
435*4882a593Smuzhiyun 		if (brd->brd_number == i)
436*4882a593Smuzhiyun 			goto out;
437*4882a593Smuzhiyun 	}
438*4882a593Smuzhiyun 
439*4882a593Smuzhiyun 	brd = brd_alloc(i);
440*4882a593Smuzhiyun 	if (brd) {
441*4882a593Smuzhiyun 		brd->brd_disk->queue = brd->brd_queue;
442*4882a593Smuzhiyun 		add_disk(brd->brd_disk);
443*4882a593Smuzhiyun 		list_add_tail(&brd->brd_list, &brd_devices);
444*4882a593Smuzhiyun 	}
445*4882a593Smuzhiyun 	*new = true;
446*4882a593Smuzhiyun out:
447*4882a593Smuzhiyun 	return brd;
448*4882a593Smuzhiyun }
449*4882a593Smuzhiyun 
brd_del_one(struct brd_device * brd)450*4882a593Smuzhiyun static void brd_del_one(struct brd_device *brd)
451*4882a593Smuzhiyun {
452*4882a593Smuzhiyun 	list_del(&brd->brd_list);
453*4882a593Smuzhiyun 	del_gendisk(brd->brd_disk);
454*4882a593Smuzhiyun 	brd_free(brd);
455*4882a593Smuzhiyun }
456*4882a593Smuzhiyun 
brd_probe(dev_t dev,int * part,void * data)457*4882a593Smuzhiyun static struct kobject *brd_probe(dev_t dev, int *part, void *data)
458*4882a593Smuzhiyun {
459*4882a593Smuzhiyun 	struct brd_device *brd;
460*4882a593Smuzhiyun 	struct kobject *kobj;
461*4882a593Smuzhiyun 	bool new;
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	mutex_lock(&brd_devices_mutex);
464*4882a593Smuzhiyun 	brd = brd_init_one(MINOR(dev) / max_part, &new);
465*4882a593Smuzhiyun 	kobj = brd ? get_disk_and_module(brd->brd_disk) : NULL;
466*4882a593Smuzhiyun 	mutex_unlock(&brd_devices_mutex);
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 	if (new)
469*4882a593Smuzhiyun 		*part = 0;
470*4882a593Smuzhiyun 
471*4882a593Smuzhiyun 	return kobj;
472*4882a593Smuzhiyun }
473*4882a593Smuzhiyun 
brd_check_and_reset_par(void)474*4882a593Smuzhiyun static inline void brd_check_and_reset_par(void)
475*4882a593Smuzhiyun {
476*4882a593Smuzhiyun 	if (unlikely(!max_part))
477*4882a593Smuzhiyun 		max_part = 1;
478*4882a593Smuzhiyun 
479*4882a593Smuzhiyun 	/*
480*4882a593Smuzhiyun 	 * make sure 'max_part' can be divided exactly by (1U << MINORBITS),
481*4882a593Smuzhiyun 	 * otherwise, it is possiable to get same dev_t when adding partitions.
482*4882a593Smuzhiyun 	 */
483*4882a593Smuzhiyun 	if ((1U << MINORBITS) % max_part != 0)
484*4882a593Smuzhiyun 		max_part = 1UL << fls(max_part);
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 	if (max_part > DISK_MAX_PARTS) {
487*4882a593Smuzhiyun 		pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
488*4882a593Smuzhiyun 			DISK_MAX_PARTS, DISK_MAX_PARTS);
489*4882a593Smuzhiyun 		max_part = DISK_MAX_PARTS;
490*4882a593Smuzhiyun 	}
491*4882a593Smuzhiyun }
492*4882a593Smuzhiyun 
brd_init(void)493*4882a593Smuzhiyun static int __init brd_init(void)
494*4882a593Smuzhiyun {
495*4882a593Smuzhiyun 	struct brd_device *brd, *next;
496*4882a593Smuzhiyun 	int i;
497*4882a593Smuzhiyun 
498*4882a593Smuzhiyun 	/*
499*4882a593Smuzhiyun 	 * brd module now has a feature to instantiate underlying device
500*4882a593Smuzhiyun 	 * structure on-demand, provided that there is an access dev node.
501*4882a593Smuzhiyun 	 *
502*4882a593Smuzhiyun 	 * (1) if rd_nr is specified, create that many upfront. else
503*4882a593Smuzhiyun 	 *     it defaults to CONFIG_BLK_DEV_RAM_COUNT
504*4882a593Smuzhiyun 	 * (2) User can further extend brd devices by create dev node themselves
505*4882a593Smuzhiyun 	 *     and have kernel automatically instantiate actual device
506*4882a593Smuzhiyun 	 *     on-demand. Example:
507*4882a593Smuzhiyun 	 *		mknod /path/devnod_name b 1 X	# 1 is the rd major
508*4882a593Smuzhiyun 	 *		fdisk -l /path/devnod_name
509*4882a593Smuzhiyun 	 *	If (X / max_part) was not already created it will be created
510*4882a593Smuzhiyun 	 *	dynamically.
511*4882a593Smuzhiyun 	 */
512*4882a593Smuzhiyun 
513*4882a593Smuzhiyun 	if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
514*4882a593Smuzhiyun 		return -EIO;
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	brd_check_and_reset_par();
517*4882a593Smuzhiyun 
518*4882a593Smuzhiyun 	for (i = 0; i < rd_nr; i++) {
519*4882a593Smuzhiyun 		brd = brd_alloc(i);
520*4882a593Smuzhiyun 		if (!brd)
521*4882a593Smuzhiyun 			goto out_free;
522*4882a593Smuzhiyun 		list_add_tail(&brd->brd_list, &brd_devices);
523*4882a593Smuzhiyun 	}
524*4882a593Smuzhiyun 
525*4882a593Smuzhiyun 	/* point of no return */
526*4882a593Smuzhiyun 
527*4882a593Smuzhiyun 	list_for_each_entry(brd, &brd_devices, brd_list) {
528*4882a593Smuzhiyun 		/*
529*4882a593Smuzhiyun 		 * associate with queue just before adding disk for
530*4882a593Smuzhiyun 		 * avoiding to mess up failure path
531*4882a593Smuzhiyun 		 */
532*4882a593Smuzhiyun 		brd->brd_disk->queue = brd->brd_queue;
533*4882a593Smuzhiyun 		add_disk(brd->brd_disk);
534*4882a593Smuzhiyun 	}
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
537*4882a593Smuzhiyun 				  THIS_MODULE, brd_probe, NULL, NULL);
538*4882a593Smuzhiyun 
539*4882a593Smuzhiyun 	pr_info("brd: module loaded\n");
540*4882a593Smuzhiyun 	return 0;
541*4882a593Smuzhiyun 
542*4882a593Smuzhiyun out_free:
543*4882a593Smuzhiyun 	list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
544*4882a593Smuzhiyun 		list_del(&brd->brd_list);
545*4882a593Smuzhiyun 		brd_free(brd);
546*4882a593Smuzhiyun 	}
547*4882a593Smuzhiyun 	unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 	pr_info("brd: module NOT loaded !!!\n");
550*4882a593Smuzhiyun 	return -ENOMEM;
551*4882a593Smuzhiyun }
552*4882a593Smuzhiyun 
brd_exit(void)553*4882a593Smuzhiyun static void __exit brd_exit(void)
554*4882a593Smuzhiyun {
555*4882a593Smuzhiyun 	struct brd_device *brd, *next;
556*4882a593Smuzhiyun 
557*4882a593Smuzhiyun 	list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
558*4882a593Smuzhiyun 		brd_del_one(brd);
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun 	blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS);
561*4882a593Smuzhiyun 	unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 	pr_info("brd: module unloaded\n");
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun 
566*4882a593Smuzhiyun module_init(brd_init);
567*4882a593Smuzhiyun module_exit(brd_exit);
568*4882a593Smuzhiyun 
569