xref: /OK3568_Linux_fs/kernel/fs/mpage.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * fs/mpage.c
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2002, Linus Torvalds.
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Contains functions related to preparing and submitting BIOs which contain
8*4882a593Smuzhiyun  * multiple pagecache pages.
9*4882a593Smuzhiyun  *
10*4882a593Smuzhiyun  * 15May2002	Andrew Morton
11*4882a593Smuzhiyun  *		Initial version
12*4882a593Smuzhiyun  * 27Jun2002	axboe@suse.de
13*4882a593Smuzhiyun  *		use bio_add_page() to build bio's just the right size
14*4882a593Smuzhiyun  */
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun #include <linux/kernel.h>
17*4882a593Smuzhiyun #include <linux/export.h>
18*4882a593Smuzhiyun #include <linux/mm.h>
19*4882a593Smuzhiyun #include <linux/kdev_t.h>
20*4882a593Smuzhiyun #include <linux/gfp.h>
21*4882a593Smuzhiyun #include <linux/bio.h>
22*4882a593Smuzhiyun #include <linux/fs.h>
23*4882a593Smuzhiyun #include <linux/buffer_head.h>
24*4882a593Smuzhiyun #include <linux/blkdev.h>
25*4882a593Smuzhiyun #include <linux/highmem.h>
26*4882a593Smuzhiyun #include <linux/prefetch.h>
27*4882a593Smuzhiyun #include <linux/mpage.h>
28*4882a593Smuzhiyun #include <linux/mm_inline.h>
29*4882a593Smuzhiyun #include <linux/writeback.h>
30*4882a593Smuzhiyun #include <linux/backing-dev.h>
31*4882a593Smuzhiyun #include <linux/pagevec.h>
32*4882a593Smuzhiyun #include <linux/cleancache.h>
33*4882a593Smuzhiyun #include "internal.h"
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun #define CREATE_TRACE_POINTS
36*4882a593Smuzhiyun #include <trace/events/android_fs.h>
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_start);
39*4882a593Smuzhiyun EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_end);
40*4882a593Smuzhiyun EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_start);
41*4882a593Smuzhiyun EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_end);
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun /*
44*4882a593Smuzhiyun  * I/O completion handler for multipage BIOs.
45*4882a593Smuzhiyun  *
46*4882a593Smuzhiyun  * The mpage code never puts partial pages into a BIO (except for end-of-file).
47*4882a593Smuzhiyun  * If a page does not map to a contiguous run of blocks then it simply falls
48*4882a593Smuzhiyun  * back to block_read_full_page().
49*4882a593Smuzhiyun  *
50*4882a593Smuzhiyun  * Why is this?  If a page's completion depends on a number of different BIOs
51*4882a593Smuzhiyun  * which can complete in any order (or at the same time) then determining the
52*4882a593Smuzhiyun  * status of that page is hard.  See end_buffer_async_read() for the details.
53*4882a593Smuzhiyun  * There is no point in duplicating all that complexity.
54*4882a593Smuzhiyun  */
mpage_end_io(struct bio * bio)55*4882a593Smuzhiyun static void mpage_end_io(struct bio *bio)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun 	struct bio_vec *bv;
58*4882a593Smuzhiyun 	struct bvec_iter_all iter_all;
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	if (trace_android_fs_dataread_end_enabled() &&
61*4882a593Smuzhiyun 	    (bio_data_dir(bio) == READ)) {
62*4882a593Smuzhiyun 		struct page *first_page = bio->bi_io_vec[0].bv_page;
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 		if (first_page != NULL)
65*4882a593Smuzhiyun 			trace_android_fs_dataread_end(first_page->mapping->host,
66*4882a593Smuzhiyun 						      page_offset(first_page),
67*4882a593Smuzhiyun 						      bio->bi_iter.bi_size);
68*4882a593Smuzhiyun 	}
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun 	bio_for_each_segment_all(bv, bio, iter_all) {
71*4882a593Smuzhiyun 		struct page *page = bv->bv_page;
72*4882a593Smuzhiyun 		page_endio(page, bio_op(bio),
73*4882a593Smuzhiyun 			   blk_status_to_errno(bio->bi_status));
74*4882a593Smuzhiyun 	}
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 	bio_put(bio);
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun 
mpage_bio_submit(int op,int op_flags,struct bio * bio)79*4882a593Smuzhiyun static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
80*4882a593Smuzhiyun {
81*4882a593Smuzhiyun 	if (trace_android_fs_dataread_start_enabled() && (op == REQ_OP_READ)) {
82*4882a593Smuzhiyun 		struct page *first_page = bio->bi_io_vec[0].bv_page;
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 		if (first_page != NULL) {
85*4882a593Smuzhiyun 			char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun 			path = android_fstrace_get_pathname(pathbuf,
88*4882a593Smuzhiyun 						    MAX_TRACE_PATHBUF_LEN,
89*4882a593Smuzhiyun 						    first_page->mapping->host);
90*4882a593Smuzhiyun 			trace_android_fs_dataread_start(
91*4882a593Smuzhiyun 				first_page->mapping->host,
92*4882a593Smuzhiyun 				page_offset(first_page),
93*4882a593Smuzhiyun 				bio->bi_iter.bi_size,
94*4882a593Smuzhiyun 				current->pid,
95*4882a593Smuzhiyun 				path,
96*4882a593Smuzhiyun 				current->comm);
97*4882a593Smuzhiyun 		}
98*4882a593Smuzhiyun 	}
99*4882a593Smuzhiyun 	bio->bi_end_io = mpage_end_io;
100*4882a593Smuzhiyun 	bio_set_op_attrs(bio, op, op_flags);
101*4882a593Smuzhiyun 	guard_bio_eod(bio);
102*4882a593Smuzhiyun 	submit_bio(bio);
103*4882a593Smuzhiyun 	return NULL;
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun static struct bio *
mpage_alloc(struct block_device * bdev,sector_t first_sector,int nr_vecs,gfp_t gfp_flags)107*4882a593Smuzhiyun mpage_alloc(struct block_device *bdev,
108*4882a593Smuzhiyun 		sector_t first_sector, int nr_vecs,
109*4882a593Smuzhiyun 		gfp_t gfp_flags)
110*4882a593Smuzhiyun {
111*4882a593Smuzhiyun 	struct bio *bio;
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	/* Restrict the given (page cache) mask for slab allocations */
114*4882a593Smuzhiyun 	gfp_flags &= GFP_KERNEL;
115*4882a593Smuzhiyun 	bio = bio_alloc(gfp_flags, nr_vecs);
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun 	if (bio == NULL && (current->flags & PF_MEMALLOC)) {
118*4882a593Smuzhiyun 		while (!bio && (nr_vecs /= 2))
119*4882a593Smuzhiyun 			bio = bio_alloc(gfp_flags, nr_vecs);
120*4882a593Smuzhiyun 	}
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 	if (bio) {
123*4882a593Smuzhiyun 		bio_set_dev(bio, bdev);
124*4882a593Smuzhiyun 		bio->bi_iter.bi_sector = first_sector;
125*4882a593Smuzhiyun 	}
126*4882a593Smuzhiyun 	return bio;
127*4882a593Smuzhiyun }
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun /*
130*4882a593Smuzhiyun  * support function for mpage_readahead.  The fs supplied get_block might
131*4882a593Smuzhiyun  * return an up to date buffer.  This is used to map that buffer into
132*4882a593Smuzhiyun  * the page, which allows readpage to avoid triggering a duplicate call
133*4882a593Smuzhiyun  * to get_block.
134*4882a593Smuzhiyun  *
135*4882a593Smuzhiyun  * The idea is to avoid adding buffers to pages that don't already have
136*4882a593Smuzhiyun  * them.  So when the buffer is up to date and the page size == block size,
137*4882a593Smuzhiyun  * this marks the page up to date instead of adding new buffers.
138*4882a593Smuzhiyun  */
139*4882a593Smuzhiyun static void
map_buffer_to_page(struct page * page,struct buffer_head * bh,int page_block)140*4882a593Smuzhiyun map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
141*4882a593Smuzhiyun {
142*4882a593Smuzhiyun 	struct inode *inode = page->mapping->host;
143*4882a593Smuzhiyun 	struct buffer_head *page_bh, *head;
144*4882a593Smuzhiyun 	int block = 0;
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 	if (!page_has_buffers(page)) {
147*4882a593Smuzhiyun 		/*
148*4882a593Smuzhiyun 		 * don't make any buffers if there is only one buffer on
149*4882a593Smuzhiyun 		 * the page and the page just needs to be set up to date
150*4882a593Smuzhiyun 		 */
151*4882a593Smuzhiyun 		if (inode->i_blkbits == PAGE_SHIFT &&
152*4882a593Smuzhiyun 		    buffer_uptodate(bh)) {
153*4882a593Smuzhiyun 			SetPageUptodate(page);
154*4882a593Smuzhiyun 			return;
155*4882a593Smuzhiyun 		}
156*4882a593Smuzhiyun 		create_empty_buffers(page, i_blocksize(inode), 0);
157*4882a593Smuzhiyun 	}
158*4882a593Smuzhiyun 	head = page_buffers(page);
159*4882a593Smuzhiyun 	page_bh = head;
160*4882a593Smuzhiyun 	do {
161*4882a593Smuzhiyun 		if (block == page_block) {
162*4882a593Smuzhiyun 			page_bh->b_state = bh->b_state;
163*4882a593Smuzhiyun 			page_bh->b_bdev = bh->b_bdev;
164*4882a593Smuzhiyun 			page_bh->b_blocknr = bh->b_blocknr;
165*4882a593Smuzhiyun 			break;
166*4882a593Smuzhiyun 		}
167*4882a593Smuzhiyun 		page_bh = page_bh->b_this_page;
168*4882a593Smuzhiyun 		block++;
169*4882a593Smuzhiyun 	} while (page_bh != head);
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun struct mpage_readpage_args {
173*4882a593Smuzhiyun 	struct bio *bio;
174*4882a593Smuzhiyun 	struct page *page;
175*4882a593Smuzhiyun 	unsigned int nr_pages;
176*4882a593Smuzhiyun 	bool is_readahead;
177*4882a593Smuzhiyun 	sector_t last_block_in_bio;
178*4882a593Smuzhiyun 	struct buffer_head map_bh;
179*4882a593Smuzhiyun 	unsigned long first_logical_block;
180*4882a593Smuzhiyun 	get_block_t *get_block;
181*4882a593Smuzhiyun };
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun /*
184*4882a593Smuzhiyun  * This is the worker routine which does all the work of mapping the disk
185*4882a593Smuzhiyun  * blocks and constructs largest possible bios, submits them for IO if the
186*4882a593Smuzhiyun  * blocks are not contiguous on the disk.
187*4882a593Smuzhiyun  *
188*4882a593Smuzhiyun  * We pass a buffer_head back and forth and use its buffer_mapped() flag to
189*4882a593Smuzhiyun  * represent the validity of its disk mapping and to decide when to do the next
190*4882a593Smuzhiyun  * get_block() call.
191*4882a593Smuzhiyun  */
do_mpage_readpage(struct mpage_readpage_args * args)192*4882a593Smuzhiyun static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
193*4882a593Smuzhiyun {
194*4882a593Smuzhiyun 	struct page *page = args->page;
195*4882a593Smuzhiyun 	struct inode *inode = page->mapping->host;
196*4882a593Smuzhiyun 	const unsigned blkbits = inode->i_blkbits;
197*4882a593Smuzhiyun 	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
198*4882a593Smuzhiyun 	const unsigned blocksize = 1 << blkbits;
199*4882a593Smuzhiyun 	struct buffer_head *map_bh = &args->map_bh;
200*4882a593Smuzhiyun 	sector_t block_in_file;
201*4882a593Smuzhiyun 	sector_t last_block;
202*4882a593Smuzhiyun 	sector_t last_block_in_file;
203*4882a593Smuzhiyun 	sector_t blocks[MAX_BUF_PER_PAGE];
204*4882a593Smuzhiyun 	unsigned page_block;
205*4882a593Smuzhiyun 	unsigned first_hole = blocks_per_page;
206*4882a593Smuzhiyun 	struct block_device *bdev = NULL;
207*4882a593Smuzhiyun 	int length;
208*4882a593Smuzhiyun 	int fully_mapped = 1;
209*4882a593Smuzhiyun 	int op_flags;
210*4882a593Smuzhiyun 	unsigned nblocks;
211*4882a593Smuzhiyun 	unsigned relative_block;
212*4882a593Smuzhiyun 	gfp_t gfp;
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	if (args->is_readahead) {
215*4882a593Smuzhiyun 		op_flags = REQ_RAHEAD;
216*4882a593Smuzhiyun 		gfp = readahead_gfp_mask(page->mapping);
217*4882a593Smuzhiyun 	} else {
218*4882a593Smuzhiyun 		op_flags = 0;
219*4882a593Smuzhiyun 		gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
220*4882a593Smuzhiyun 	}
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	if (page_has_buffers(page))
223*4882a593Smuzhiyun 		goto confused;
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
226*4882a593Smuzhiyun 	last_block = block_in_file + args->nr_pages * blocks_per_page;
227*4882a593Smuzhiyun 	last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
228*4882a593Smuzhiyun 	if (last_block > last_block_in_file)
229*4882a593Smuzhiyun 		last_block = last_block_in_file;
230*4882a593Smuzhiyun 	page_block = 0;
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	/*
233*4882a593Smuzhiyun 	 * Map blocks using the result from the previous get_blocks call first.
234*4882a593Smuzhiyun 	 */
235*4882a593Smuzhiyun 	nblocks = map_bh->b_size >> blkbits;
236*4882a593Smuzhiyun 	if (buffer_mapped(map_bh) &&
237*4882a593Smuzhiyun 			block_in_file > args->first_logical_block &&
238*4882a593Smuzhiyun 			block_in_file < (args->first_logical_block + nblocks)) {
239*4882a593Smuzhiyun 		unsigned map_offset = block_in_file - args->first_logical_block;
240*4882a593Smuzhiyun 		unsigned last = nblocks - map_offset;
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 		for (relative_block = 0; ; relative_block++) {
243*4882a593Smuzhiyun 			if (relative_block == last) {
244*4882a593Smuzhiyun 				clear_buffer_mapped(map_bh);
245*4882a593Smuzhiyun 				break;
246*4882a593Smuzhiyun 			}
247*4882a593Smuzhiyun 			if (page_block == blocks_per_page)
248*4882a593Smuzhiyun 				break;
249*4882a593Smuzhiyun 			blocks[page_block] = map_bh->b_blocknr + map_offset +
250*4882a593Smuzhiyun 						relative_block;
251*4882a593Smuzhiyun 			page_block++;
252*4882a593Smuzhiyun 			block_in_file++;
253*4882a593Smuzhiyun 		}
254*4882a593Smuzhiyun 		bdev = map_bh->b_bdev;
255*4882a593Smuzhiyun 	}
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	/*
258*4882a593Smuzhiyun 	 * Then do more get_blocks calls until we are done with this page.
259*4882a593Smuzhiyun 	 */
260*4882a593Smuzhiyun 	map_bh->b_page = page;
261*4882a593Smuzhiyun 	while (page_block < blocks_per_page) {
262*4882a593Smuzhiyun 		map_bh->b_state = 0;
263*4882a593Smuzhiyun 		map_bh->b_size = 0;
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 		if (block_in_file < last_block) {
266*4882a593Smuzhiyun 			map_bh->b_size = (last_block-block_in_file) << blkbits;
267*4882a593Smuzhiyun 			if (args->get_block(inode, block_in_file, map_bh, 0))
268*4882a593Smuzhiyun 				goto confused;
269*4882a593Smuzhiyun 			args->first_logical_block = block_in_file;
270*4882a593Smuzhiyun 		}
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun 		if (!buffer_mapped(map_bh)) {
273*4882a593Smuzhiyun 			fully_mapped = 0;
274*4882a593Smuzhiyun 			if (first_hole == blocks_per_page)
275*4882a593Smuzhiyun 				first_hole = page_block;
276*4882a593Smuzhiyun 			page_block++;
277*4882a593Smuzhiyun 			block_in_file++;
278*4882a593Smuzhiyun 			continue;
279*4882a593Smuzhiyun 		}
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 		/* some filesystems will copy data into the page during
282*4882a593Smuzhiyun 		 * the get_block call, in which case we don't want to
283*4882a593Smuzhiyun 		 * read it again.  map_buffer_to_page copies the data
284*4882a593Smuzhiyun 		 * we just collected from get_block into the page's buffers
285*4882a593Smuzhiyun 		 * so readpage doesn't have to repeat the get_block call
286*4882a593Smuzhiyun 		 */
287*4882a593Smuzhiyun 		if (buffer_uptodate(map_bh)) {
288*4882a593Smuzhiyun 			map_buffer_to_page(page, map_bh, page_block);
289*4882a593Smuzhiyun 			goto confused;
290*4882a593Smuzhiyun 		}
291*4882a593Smuzhiyun 
292*4882a593Smuzhiyun 		if (first_hole != blocks_per_page)
293*4882a593Smuzhiyun 			goto confused;		/* hole -> non-hole */
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 		/* Contiguous blocks? */
296*4882a593Smuzhiyun 		if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
297*4882a593Smuzhiyun 			goto confused;
298*4882a593Smuzhiyun 		nblocks = map_bh->b_size >> blkbits;
299*4882a593Smuzhiyun 		for (relative_block = 0; ; relative_block++) {
300*4882a593Smuzhiyun 			if (relative_block == nblocks) {
301*4882a593Smuzhiyun 				clear_buffer_mapped(map_bh);
302*4882a593Smuzhiyun 				break;
303*4882a593Smuzhiyun 			} else if (page_block == blocks_per_page)
304*4882a593Smuzhiyun 				break;
305*4882a593Smuzhiyun 			blocks[page_block] = map_bh->b_blocknr+relative_block;
306*4882a593Smuzhiyun 			page_block++;
307*4882a593Smuzhiyun 			block_in_file++;
308*4882a593Smuzhiyun 		}
309*4882a593Smuzhiyun 		bdev = map_bh->b_bdev;
310*4882a593Smuzhiyun 	}
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 	if (first_hole != blocks_per_page) {
313*4882a593Smuzhiyun 		zero_user_segment(page, first_hole << blkbits, PAGE_SIZE);
314*4882a593Smuzhiyun 		if (first_hole == 0) {
315*4882a593Smuzhiyun 			SetPageUptodate(page);
316*4882a593Smuzhiyun 			unlock_page(page);
317*4882a593Smuzhiyun 			goto out;
318*4882a593Smuzhiyun 		}
319*4882a593Smuzhiyun 	} else if (fully_mapped) {
320*4882a593Smuzhiyun 		SetPageMappedToDisk(page);
321*4882a593Smuzhiyun 	}
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun 	if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
324*4882a593Smuzhiyun 	    cleancache_get_page(page) == 0) {
325*4882a593Smuzhiyun 		SetPageUptodate(page);
326*4882a593Smuzhiyun 		goto confused;
327*4882a593Smuzhiyun 	}
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun 	/*
330*4882a593Smuzhiyun 	 * This page will go to BIO.  Do we need to send this BIO off first?
331*4882a593Smuzhiyun 	 */
332*4882a593Smuzhiyun 	if (args->bio && (args->last_block_in_bio != blocks[0] - 1))
333*4882a593Smuzhiyun 		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun alloc_new:
336*4882a593Smuzhiyun 	if (args->bio == NULL) {
337*4882a593Smuzhiyun 		if (first_hole == blocks_per_page) {
338*4882a593Smuzhiyun 			if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
339*4882a593Smuzhiyun 								page))
340*4882a593Smuzhiyun 				goto out;
341*4882a593Smuzhiyun 		}
342*4882a593Smuzhiyun 		args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
343*4882a593Smuzhiyun 					min_t(int, args->nr_pages,
344*4882a593Smuzhiyun 					      BIO_MAX_PAGES),
345*4882a593Smuzhiyun 					gfp);
346*4882a593Smuzhiyun 		if (args->bio == NULL)
347*4882a593Smuzhiyun 			goto confused;
348*4882a593Smuzhiyun 	}
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 	length = first_hole << blkbits;
351*4882a593Smuzhiyun 	if (bio_add_page(args->bio, page, length, 0) < length) {
352*4882a593Smuzhiyun 		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
353*4882a593Smuzhiyun 		goto alloc_new;
354*4882a593Smuzhiyun 	}
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun 	relative_block = block_in_file - args->first_logical_block;
357*4882a593Smuzhiyun 	nblocks = map_bh->b_size >> blkbits;
358*4882a593Smuzhiyun 	if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
359*4882a593Smuzhiyun 	    (first_hole != blocks_per_page))
360*4882a593Smuzhiyun 		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
361*4882a593Smuzhiyun 	else
362*4882a593Smuzhiyun 		args->last_block_in_bio = blocks[blocks_per_page - 1];
363*4882a593Smuzhiyun out:
364*4882a593Smuzhiyun 	return args->bio;
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun confused:
367*4882a593Smuzhiyun 	if (args->bio)
368*4882a593Smuzhiyun 		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
369*4882a593Smuzhiyun 	if (!PageUptodate(page))
370*4882a593Smuzhiyun 		block_read_full_page(page, args->get_block);
371*4882a593Smuzhiyun 	else
372*4882a593Smuzhiyun 		unlock_page(page);
373*4882a593Smuzhiyun 	goto out;
374*4882a593Smuzhiyun }
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun /**
377*4882a593Smuzhiyun  * mpage_readahead - start reads against pages
378*4882a593Smuzhiyun  * @rac: Describes which pages to read.
379*4882a593Smuzhiyun  * @get_block: The filesystem's block mapper function.
380*4882a593Smuzhiyun  *
381*4882a593Smuzhiyun  * This function walks the pages and the blocks within each page, building and
382*4882a593Smuzhiyun  * emitting large BIOs.
383*4882a593Smuzhiyun  *
384*4882a593Smuzhiyun  * If anything unusual happens, such as:
385*4882a593Smuzhiyun  *
386*4882a593Smuzhiyun  * - encountering a page which has buffers
387*4882a593Smuzhiyun  * - encountering a page which has a non-hole after a hole
388*4882a593Smuzhiyun  * - encountering a page with non-contiguous blocks
389*4882a593Smuzhiyun  *
390*4882a593Smuzhiyun  * then this code just gives up and calls the buffer_head-based read function.
391*4882a593Smuzhiyun  * It does handle a page which has holes at the end - that is a common case:
392*4882a593Smuzhiyun  * the end-of-file on blocksize < PAGE_SIZE setups.
393*4882a593Smuzhiyun  *
394*4882a593Smuzhiyun  * BH_Boundary explanation:
395*4882a593Smuzhiyun  *
396*4882a593Smuzhiyun  * There is a problem.  The mpage read code assembles several pages, gets all
397*4882a593Smuzhiyun  * their disk mappings, and then submits them all.  That's fine, but obtaining
398*4882a593Smuzhiyun  * the disk mappings may require I/O.  Reads of indirect blocks, for example.
399*4882a593Smuzhiyun  *
400*4882a593Smuzhiyun  * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
401*4882a593Smuzhiyun  * submitted in the following order:
402*4882a593Smuzhiyun  *
403*4882a593Smuzhiyun  * 	12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
404*4882a593Smuzhiyun  *
405*4882a593Smuzhiyun  * because the indirect block has to be read to get the mappings of blocks
406*4882a593Smuzhiyun  * 13,14,15,16.  Obviously, this impacts performance.
407*4882a593Smuzhiyun  *
408*4882a593Smuzhiyun  * So what we do it to allow the filesystem's get_block() function to set
409*4882a593Smuzhiyun  * BH_Boundary when it maps block 11.  BH_Boundary says: mapping of the block
410*4882a593Smuzhiyun  * after this one will require I/O against a block which is probably close to
411*4882a593Smuzhiyun  * this one.  So you should push what I/O you have currently accumulated.
412*4882a593Smuzhiyun  *
413*4882a593Smuzhiyun  * This all causes the disk requests to be issued in the correct order.
414*4882a593Smuzhiyun  */
mpage_readahead(struct readahead_control * rac,get_block_t get_block)415*4882a593Smuzhiyun void mpage_readahead(struct readahead_control *rac, get_block_t get_block)
416*4882a593Smuzhiyun {
417*4882a593Smuzhiyun 	struct page *page;
418*4882a593Smuzhiyun 	struct mpage_readpage_args args = {
419*4882a593Smuzhiyun 		.get_block = get_block,
420*4882a593Smuzhiyun 		.is_readahead = true,
421*4882a593Smuzhiyun 	};
422*4882a593Smuzhiyun 
423*4882a593Smuzhiyun 	while ((page = readahead_page(rac))) {
424*4882a593Smuzhiyun 		prefetchw(&page->flags);
425*4882a593Smuzhiyun 		args.page = page;
426*4882a593Smuzhiyun 		args.nr_pages = readahead_count(rac);
427*4882a593Smuzhiyun 		args.bio = do_mpage_readpage(&args);
428*4882a593Smuzhiyun 		put_page(page);
429*4882a593Smuzhiyun 	}
430*4882a593Smuzhiyun 	if (args.bio)
431*4882a593Smuzhiyun 		mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio);
432*4882a593Smuzhiyun }
433*4882a593Smuzhiyun EXPORT_SYMBOL_NS(mpage_readahead, ANDROID_GKI_VFS_EXPORT_ONLY);
434*4882a593Smuzhiyun 
435*4882a593Smuzhiyun /*
436*4882a593Smuzhiyun  * This isn't called much at all
437*4882a593Smuzhiyun  */
mpage_readpage(struct page * page,get_block_t get_block)438*4882a593Smuzhiyun int mpage_readpage(struct page *page, get_block_t get_block)
439*4882a593Smuzhiyun {
440*4882a593Smuzhiyun 	struct mpage_readpage_args args = {
441*4882a593Smuzhiyun 		.page = page,
442*4882a593Smuzhiyun 		.nr_pages = 1,
443*4882a593Smuzhiyun 		.get_block = get_block,
444*4882a593Smuzhiyun 	};
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	args.bio = do_mpage_readpage(&args);
447*4882a593Smuzhiyun 	if (args.bio)
448*4882a593Smuzhiyun 		mpage_bio_submit(REQ_OP_READ, 0, args.bio);
449*4882a593Smuzhiyun 	return 0;
450*4882a593Smuzhiyun }
451*4882a593Smuzhiyun EXPORT_SYMBOL_NS(mpage_readpage, ANDROID_GKI_VFS_EXPORT_ONLY);
452*4882a593Smuzhiyun 
453*4882a593Smuzhiyun /*
454*4882a593Smuzhiyun  * Writing is not so simple.
455*4882a593Smuzhiyun  *
456*4882a593Smuzhiyun  * If the page has buffers then they will be used for obtaining the disk
457*4882a593Smuzhiyun  * mapping.  We only support pages which are fully mapped-and-dirty, with a
458*4882a593Smuzhiyun  * special case for pages which are unmapped at the end: end-of-file.
459*4882a593Smuzhiyun  *
460*4882a593Smuzhiyun  * If the page has no buffers (preferred) then the page is mapped here.
461*4882a593Smuzhiyun  *
462*4882a593Smuzhiyun  * If all blocks are found to be contiguous then the page can go into the
463*4882a593Smuzhiyun  * BIO.  Otherwise fall back to the mapping's writepage().
464*4882a593Smuzhiyun  *
465*4882a593Smuzhiyun  * FIXME: This code wants an estimate of how many pages are still to be
466*4882a593Smuzhiyun  * written, so it can intelligently allocate a suitably-sized BIO.  For now,
467*4882a593Smuzhiyun  * just allocate full-size (16-page) BIOs.
468*4882a593Smuzhiyun  */
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun struct mpage_data {
471*4882a593Smuzhiyun 	struct bio *bio;
472*4882a593Smuzhiyun 	sector_t last_block_in_bio;
473*4882a593Smuzhiyun 	get_block_t *get_block;
474*4882a593Smuzhiyun 	unsigned use_writepage;
475*4882a593Smuzhiyun };
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun /*
478*4882a593Smuzhiyun  * We have our BIO, so we can now mark the buffers clean.  Make
479*4882a593Smuzhiyun  * sure to only clean buffers which we know we'll be writing.
480*4882a593Smuzhiyun  */
clean_buffers(struct page * page,unsigned first_unmapped)481*4882a593Smuzhiyun static void clean_buffers(struct page *page, unsigned first_unmapped)
482*4882a593Smuzhiyun {
483*4882a593Smuzhiyun 	unsigned buffer_counter = 0;
484*4882a593Smuzhiyun 	struct buffer_head *bh, *head;
485*4882a593Smuzhiyun 	if (!page_has_buffers(page))
486*4882a593Smuzhiyun 		return;
487*4882a593Smuzhiyun 	head = page_buffers(page);
488*4882a593Smuzhiyun 	bh = head;
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 	do {
491*4882a593Smuzhiyun 		if (buffer_counter++ == first_unmapped)
492*4882a593Smuzhiyun 			break;
493*4882a593Smuzhiyun 		clear_buffer_dirty(bh);
494*4882a593Smuzhiyun 		bh = bh->b_this_page;
495*4882a593Smuzhiyun 	} while (bh != head);
496*4882a593Smuzhiyun 
497*4882a593Smuzhiyun 	/*
498*4882a593Smuzhiyun 	 * we cannot drop the bh if the page is not uptodate or a concurrent
499*4882a593Smuzhiyun 	 * readpage would fail to serialize with the bh and it would read from
500*4882a593Smuzhiyun 	 * disk before we reach the platter.
501*4882a593Smuzhiyun 	 */
502*4882a593Smuzhiyun 	if (buffer_heads_over_limit && PageUptodate(page))
503*4882a593Smuzhiyun 		try_to_free_buffers(page);
504*4882a593Smuzhiyun }
505*4882a593Smuzhiyun 
506*4882a593Smuzhiyun /*
507*4882a593Smuzhiyun  * For situations where we want to clean all buffers attached to a page.
508*4882a593Smuzhiyun  * We don't need to calculate how many buffers are attached to the page,
509*4882a593Smuzhiyun  * we just need to specify a number larger than the maximum number of buffers.
510*4882a593Smuzhiyun  */
clean_page_buffers(struct page * page)511*4882a593Smuzhiyun void clean_page_buffers(struct page *page)
512*4882a593Smuzhiyun {
513*4882a593Smuzhiyun 	clean_buffers(page, ~0U);
514*4882a593Smuzhiyun }
515*4882a593Smuzhiyun 
__mpage_writepage(struct page * page,struct writeback_control * wbc,void * data)516*4882a593Smuzhiyun static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
517*4882a593Smuzhiyun 		      void *data)
518*4882a593Smuzhiyun {
519*4882a593Smuzhiyun 	struct mpage_data *mpd = data;
520*4882a593Smuzhiyun 	struct bio *bio = mpd->bio;
521*4882a593Smuzhiyun 	struct address_space *mapping = page->mapping;
522*4882a593Smuzhiyun 	struct inode *inode = page->mapping->host;
523*4882a593Smuzhiyun 	const unsigned blkbits = inode->i_blkbits;
524*4882a593Smuzhiyun 	unsigned long end_index;
525*4882a593Smuzhiyun 	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
526*4882a593Smuzhiyun 	sector_t last_block;
527*4882a593Smuzhiyun 	sector_t block_in_file;
528*4882a593Smuzhiyun 	sector_t blocks[MAX_BUF_PER_PAGE];
529*4882a593Smuzhiyun 	unsigned page_block;
530*4882a593Smuzhiyun 	unsigned first_unmapped = blocks_per_page;
531*4882a593Smuzhiyun 	struct block_device *bdev = NULL;
532*4882a593Smuzhiyun 	int boundary = 0;
533*4882a593Smuzhiyun 	sector_t boundary_block = 0;
534*4882a593Smuzhiyun 	struct block_device *boundary_bdev = NULL;
535*4882a593Smuzhiyun 	int length;
536*4882a593Smuzhiyun 	struct buffer_head map_bh;
537*4882a593Smuzhiyun 	loff_t i_size = i_size_read(inode);
538*4882a593Smuzhiyun 	int ret = 0;
539*4882a593Smuzhiyun 	int op_flags = wbc_to_write_flags(wbc);
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 	if (page_has_buffers(page)) {
542*4882a593Smuzhiyun 		struct buffer_head *head = page_buffers(page);
543*4882a593Smuzhiyun 		struct buffer_head *bh = head;
544*4882a593Smuzhiyun 
545*4882a593Smuzhiyun 		/* If they're all mapped and dirty, do it */
546*4882a593Smuzhiyun 		page_block = 0;
547*4882a593Smuzhiyun 		do {
548*4882a593Smuzhiyun 			BUG_ON(buffer_locked(bh));
549*4882a593Smuzhiyun 			if (!buffer_mapped(bh)) {
550*4882a593Smuzhiyun 				/*
551*4882a593Smuzhiyun 				 * unmapped dirty buffers are created by
552*4882a593Smuzhiyun 				 * __set_page_dirty_buffers -> mmapped data
553*4882a593Smuzhiyun 				 */
554*4882a593Smuzhiyun 				if (buffer_dirty(bh))
555*4882a593Smuzhiyun 					goto confused;
556*4882a593Smuzhiyun 				if (first_unmapped == blocks_per_page)
557*4882a593Smuzhiyun 					first_unmapped = page_block;
558*4882a593Smuzhiyun 				continue;
559*4882a593Smuzhiyun 			}
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 			if (first_unmapped != blocks_per_page)
562*4882a593Smuzhiyun 				goto confused;	/* hole -> non-hole */
563*4882a593Smuzhiyun 
564*4882a593Smuzhiyun 			if (!buffer_dirty(bh) || !buffer_uptodate(bh))
565*4882a593Smuzhiyun 				goto confused;
566*4882a593Smuzhiyun 			if (page_block) {
567*4882a593Smuzhiyun 				if (bh->b_blocknr != blocks[page_block-1] + 1)
568*4882a593Smuzhiyun 					goto confused;
569*4882a593Smuzhiyun 			}
570*4882a593Smuzhiyun 			blocks[page_block++] = bh->b_blocknr;
571*4882a593Smuzhiyun 			boundary = buffer_boundary(bh);
572*4882a593Smuzhiyun 			if (boundary) {
573*4882a593Smuzhiyun 				boundary_block = bh->b_blocknr;
574*4882a593Smuzhiyun 				boundary_bdev = bh->b_bdev;
575*4882a593Smuzhiyun 			}
576*4882a593Smuzhiyun 			bdev = bh->b_bdev;
577*4882a593Smuzhiyun 		} while ((bh = bh->b_this_page) != head);
578*4882a593Smuzhiyun 
579*4882a593Smuzhiyun 		if (first_unmapped)
580*4882a593Smuzhiyun 			goto page_is_mapped;
581*4882a593Smuzhiyun 
582*4882a593Smuzhiyun 		/*
583*4882a593Smuzhiyun 		 * Page has buffers, but they are all unmapped. The page was
584*4882a593Smuzhiyun 		 * created by pagein or read over a hole which was handled by
585*4882a593Smuzhiyun 		 * block_read_full_page().  If this address_space is also
586*4882a593Smuzhiyun 		 * using mpage_readahead then this can rarely happen.
587*4882a593Smuzhiyun 		 */
588*4882a593Smuzhiyun 		goto confused;
589*4882a593Smuzhiyun 	}
590*4882a593Smuzhiyun 
591*4882a593Smuzhiyun 	/*
592*4882a593Smuzhiyun 	 * The page has no buffers: map it to disk
593*4882a593Smuzhiyun 	 */
594*4882a593Smuzhiyun 	BUG_ON(!PageUptodate(page));
595*4882a593Smuzhiyun 	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
596*4882a593Smuzhiyun 	last_block = (i_size - 1) >> blkbits;
597*4882a593Smuzhiyun 	map_bh.b_page = page;
598*4882a593Smuzhiyun 	for (page_block = 0; page_block < blocks_per_page; ) {
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 		map_bh.b_state = 0;
601*4882a593Smuzhiyun 		map_bh.b_size = 1 << blkbits;
602*4882a593Smuzhiyun 		if (mpd->get_block(inode, block_in_file, &map_bh, 1))
603*4882a593Smuzhiyun 			goto confused;
604*4882a593Smuzhiyun 		if (buffer_new(&map_bh))
605*4882a593Smuzhiyun 			clean_bdev_bh_alias(&map_bh);
606*4882a593Smuzhiyun 		if (buffer_boundary(&map_bh)) {
607*4882a593Smuzhiyun 			boundary_block = map_bh.b_blocknr;
608*4882a593Smuzhiyun 			boundary_bdev = map_bh.b_bdev;
609*4882a593Smuzhiyun 		}
610*4882a593Smuzhiyun 		if (page_block) {
611*4882a593Smuzhiyun 			if (map_bh.b_blocknr != blocks[page_block-1] + 1)
612*4882a593Smuzhiyun 				goto confused;
613*4882a593Smuzhiyun 		}
614*4882a593Smuzhiyun 		blocks[page_block++] = map_bh.b_blocknr;
615*4882a593Smuzhiyun 		boundary = buffer_boundary(&map_bh);
616*4882a593Smuzhiyun 		bdev = map_bh.b_bdev;
617*4882a593Smuzhiyun 		if (block_in_file == last_block)
618*4882a593Smuzhiyun 			break;
619*4882a593Smuzhiyun 		block_in_file++;
620*4882a593Smuzhiyun 	}
621*4882a593Smuzhiyun 	BUG_ON(page_block == 0);
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	first_unmapped = page_block;
624*4882a593Smuzhiyun 
625*4882a593Smuzhiyun page_is_mapped:
626*4882a593Smuzhiyun 	end_index = i_size >> PAGE_SHIFT;
627*4882a593Smuzhiyun 	if (page->index >= end_index) {
628*4882a593Smuzhiyun 		/*
629*4882a593Smuzhiyun 		 * The page straddles i_size.  It must be zeroed out on each
630*4882a593Smuzhiyun 		 * and every writepage invocation because it may be mmapped.
631*4882a593Smuzhiyun 		 * "A file is mapped in multiples of the page size.  For a file
632*4882a593Smuzhiyun 		 * that is not a multiple of the page size, the remaining memory
633*4882a593Smuzhiyun 		 * is zeroed when mapped, and writes to that region are not
634*4882a593Smuzhiyun 		 * written out to the file."
635*4882a593Smuzhiyun 		 */
636*4882a593Smuzhiyun 		unsigned offset = i_size & (PAGE_SIZE - 1);
637*4882a593Smuzhiyun 
638*4882a593Smuzhiyun 		if (page->index > end_index || !offset)
639*4882a593Smuzhiyun 			goto confused;
640*4882a593Smuzhiyun 		zero_user_segment(page, offset, PAGE_SIZE);
641*4882a593Smuzhiyun 	}
642*4882a593Smuzhiyun 
643*4882a593Smuzhiyun 	/*
644*4882a593Smuzhiyun 	 * This page will go to BIO.  Do we need to send this BIO off first?
645*4882a593Smuzhiyun 	 */
646*4882a593Smuzhiyun 	if (bio && mpd->last_block_in_bio != blocks[0] - 1)
647*4882a593Smuzhiyun 		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
648*4882a593Smuzhiyun 
649*4882a593Smuzhiyun alloc_new:
650*4882a593Smuzhiyun 	if (bio == NULL) {
651*4882a593Smuzhiyun 		if (first_unmapped == blocks_per_page) {
652*4882a593Smuzhiyun 			if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
653*4882a593Smuzhiyun 								page, wbc))
654*4882a593Smuzhiyun 				goto out;
655*4882a593Smuzhiyun 		}
656*4882a593Smuzhiyun 		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
657*4882a593Smuzhiyun 				BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
658*4882a593Smuzhiyun 		if (bio == NULL)
659*4882a593Smuzhiyun 			goto confused;
660*4882a593Smuzhiyun 
661*4882a593Smuzhiyun 		wbc_init_bio(wbc, bio);
662*4882a593Smuzhiyun 		bio->bi_write_hint = inode->i_write_hint;
663*4882a593Smuzhiyun 	}
664*4882a593Smuzhiyun 
665*4882a593Smuzhiyun 	/*
666*4882a593Smuzhiyun 	 * Must try to add the page before marking the buffer clean or
667*4882a593Smuzhiyun 	 * the confused fail path above (OOM) will be very confused when
668*4882a593Smuzhiyun 	 * it finds all bh marked clean (i.e. it will not write anything)
669*4882a593Smuzhiyun 	 */
670*4882a593Smuzhiyun 	wbc_account_cgroup_owner(wbc, page, PAGE_SIZE);
671*4882a593Smuzhiyun 	length = first_unmapped << blkbits;
672*4882a593Smuzhiyun 	if (bio_add_page(bio, page, length, 0) < length) {
673*4882a593Smuzhiyun 		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
674*4882a593Smuzhiyun 		goto alloc_new;
675*4882a593Smuzhiyun 	}
676*4882a593Smuzhiyun 
677*4882a593Smuzhiyun 	clean_buffers(page, first_unmapped);
678*4882a593Smuzhiyun 
679*4882a593Smuzhiyun 	BUG_ON(PageWriteback(page));
680*4882a593Smuzhiyun 	set_page_writeback(page);
681*4882a593Smuzhiyun 	unlock_page(page);
682*4882a593Smuzhiyun 	if (boundary || (first_unmapped != blocks_per_page)) {
683*4882a593Smuzhiyun 		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
684*4882a593Smuzhiyun 		if (boundary_block) {
685*4882a593Smuzhiyun 			write_boundary_block(boundary_bdev,
686*4882a593Smuzhiyun 					boundary_block, 1 << blkbits);
687*4882a593Smuzhiyun 		}
688*4882a593Smuzhiyun 	} else {
689*4882a593Smuzhiyun 		mpd->last_block_in_bio = blocks[blocks_per_page - 1];
690*4882a593Smuzhiyun 	}
691*4882a593Smuzhiyun 	goto out;
692*4882a593Smuzhiyun 
693*4882a593Smuzhiyun confused:
694*4882a593Smuzhiyun 	if (bio)
695*4882a593Smuzhiyun 		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
696*4882a593Smuzhiyun 
697*4882a593Smuzhiyun 	if (mpd->use_writepage) {
698*4882a593Smuzhiyun 		ret = mapping->a_ops->writepage(page, wbc);
699*4882a593Smuzhiyun 	} else {
700*4882a593Smuzhiyun 		ret = -EAGAIN;
701*4882a593Smuzhiyun 		goto out;
702*4882a593Smuzhiyun 	}
703*4882a593Smuzhiyun 	/*
704*4882a593Smuzhiyun 	 * The caller has a ref on the inode, so *mapping is stable
705*4882a593Smuzhiyun 	 */
706*4882a593Smuzhiyun 	mapping_set_error(mapping, ret);
707*4882a593Smuzhiyun out:
708*4882a593Smuzhiyun 	mpd->bio = bio;
709*4882a593Smuzhiyun 	return ret;
710*4882a593Smuzhiyun }
711*4882a593Smuzhiyun 
712*4882a593Smuzhiyun /**
713*4882a593Smuzhiyun  * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
714*4882a593Smuzhiyun  * @mapping: address space structure to write
715*4882a593Smuzhiyun  * @wbc: subtract the number of written pages from *@wbc->nr_to_write
716*4882a593Smuzhiyun  * @get_block: the filesystem's block mapper function.
717*4882a593Smuzhiyun  *             If this is NULL then use a_ops->writepage.  Otherwise, go
718*4882a593Smuzhiyun  *             direct-to-BIO.
719*4882a593Smuzhiyun  *
720*4882a593Smuzhiyun  * This is a library function, which implements the writepages()
721*4882a593Smuzhiyun  * address_space_operation.
722*4882a593Smuzhiyun  *
723*4882a593Smuzhiyun  * If a page is already under I/O, generic_writepages() skips it, even
724*4882a593Smuzhiyun  * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
725*4882a593Smuzhiyun  * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
726*4882a593Smuzhiyun  * and msync() need to guarantee that all the data which was dirty at the time
727*4882a593Smuzhiyun  * the call was made get new I/O started against them.  If wbc->sync_mode is
728*4882a593Smuzhiyun  * WB_SYNC_ALL then we were called for data integrity and we must wait for
729*4882a593Smuzhiyun  * existing IO to complete.
730*4882a593Smuzhiyun  */
731*4882a593Smuzhiyun int
mpage_writepages(struct address_space * mapping,struct writeback_control * wbc,get_block_t get_block)732*4882a593Smuzhiyun mpage_writepages(struct address_space *mapping,
733*4882a593Smuzhiyun 		struct writeback_control *wbc, get_block_t get_block)
734*4882a593Smuzhiyun {
735*4882a593Smuzhiyun 	struct blk_plug plug;
736*4882a593Smuzhiyun 	int ret;
737*4882a593Smuzhiyun 
738*4882a593Smuzhiyun 	blk_start_plug(&plug);
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 	if (!get_block)
741*4882a593Smuzhiyun 		ret = generic_writepages(mapping, wbc);
742*4882a593Smuzhiyun 	else {
743*4882a593Smuzhiyun 		struct mpage_data mpd = {
744*4882a593Smuzhiyun 			.bio = NULL,
745*4882a593Smuzhiyun 			.last_block_in_bio = 0,
746*4882a593Smuzhiyun 			.get_block = get_block,
747*4882a593Smuzhiyun 			.use_writepage = 1,
748*4882a593Smuzhiyun 		};
749*4882a593Smuzhiyun 
750*4882a593Smuzhiyun 		ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
751*4882a593Smuzhiyun 		if (mpd.bio) {
752*4882a593Smuzhiyun 			int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
753*4882a593Smuzhiyun 				  REQ_SYNC : 0);
754*4882a593Smuzhiyun 			mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
755*4882a593Smuzhiyun 		}
756*4882a593Smuzhiyun 	}
757*4882a593Smuzhiyun 	blk_finish_plug(&plug);
758*4882a593Smuzhiyun 	return ret;
759*4882a593Smuzhiyun }
760*4882a593Smuzhiyun EXPORT_SYMBOL(mpage_writepages);
761*4882a593Smuzhiyun 
mpage_writepage(struct page * page,get_block_t get_block,struct writeback_control * wbc)762*4882a593Smuzhiyun int mpage_writepage(struct page *page, get_block_t get_block,
763*4882a593Smuzhiyun 	struct writeback_control *wbc)
764*4882a593Smuzhiyun {
765*4882a593Smuzhiyun 	struct mpage_data mpd = {
766*4882a593Smuzhiyun 		.bio = NULL,
767*4882a593Smuzhiyun 		.last_block_in_bio = 0,
768*4882a593Smuzhiyun 		.get_block = get_block,
769*4882a593Smuzhiyun 		.use_writepage = 0,
770*4882a593Smuzhiyun 	};
771*4882a593Smuzhiyun 	int ret = __mpage_writepage(page, wbc, &mpd);
772*4882a593Smuzhiyun 	if (mpd.bio) {
773*4882a593Smuzhiyun 		int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
774*4882a593Smuzhiyun 			  REQ_SYNC : 0);
775*4882a593Smuzhiyun 		mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
776*4882a593Smuzhiyun 	}
777*4882a593Smuzhiyun 	return ret;
778*4882a593Smuzhiyun }
779*4882a593Smuzhiyun EXPORT_SYMBOL(mpage_writepage);
780