xref: /OK3568_Linux_fs/kernel/fs/ocfs2/mmap.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /* -*- mode: c; c-basic-offset: 8; -*-
3*4882a593Smuzhiyun  * vim: noexpandtab sw=8 ts=8 sts=0:
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * mmap.c
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Code to deal with the mess that is clustered mmap.
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
10*4882a593Smuzhiyun  */
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #include <linux/fs.h>
13*4882a593Smuzhiyun #include <linux/types.h>
14*4882a593Smuzhiyun #include <linux/highmem.h>
15*4882a593Smuzhiyun #include <linux/pagemap.h>
16*4882a593Smuzhiyun #include <linux/uio.h>
17*4882a593Smuzhiyun #include <linux/signal.h>
18*4882a593Smuzhiyun #include <linux/rbtree.h>
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #include <cluster/masklog.h>
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun #include "ocfs2.h"
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun #include "aops.h"
25*4882a593Smuzhiyun #include "dlmglue.h"
26*4882a593Smuzhiyun #include "file.h"
27*4882a593Smuzhiyun #include "inode.h"
28*4882a593Smuzhiyun #include "mmap.h"
29*4882a593Smuzhiyun #include "super.h"
30*4882a593Smuzhiyun #include "ocfs2_trace.h"
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun 
ocfs2_fault(struct vm_fault * vmf)33*4882a593Smuzhiyun static vm_fault_t ocfs2_fault(struct vm_fault *vmf)
34*4882a593Smuzhiyun {
35*4882a593Smuzhiyun 	struct vm_area_struct *vma = vmf->vma;
36*4882a593Smuzhiyun 	sigset_t oldset;
37*4882a593Smuzhiyun 	vm_fault_t ret;
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun 	ocfs2_block_signals(&oldset);
40*4882a593Smuzhiyun 	ret = filemap_fault(vmf);
41*4882a593Smuzhiyun 	ocfs2_unblock_signals(&oldset);
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun 	trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno,
44*4882a593Smuzhiyun 			  vma, vmf->page, vmf->pgoff);
45*4882a593Smuzhiyun 	return ret;
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun 
__ocfs2_page_mkwrite(struct file * file,struct buffer_head * di_bh,struct page * page)48*4882a593Smuzhiyun static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
49*4882a593Smuzhiyun 			struct buffer_head *di_bh, struct page *page)
50*4882a593Smuzhiyun {
51*4882a593Smuzhiyun 	int err;
52*4882a593Smuzhiyun 	vm_fault_t ret = VM_FAULT_NOPAGE;
53*4882a593Smuzhiyun 	struct inode *inode = file_inode(file);
54*4882a593Smuzhiyun 	struct address_space *mapping = inode->i_mapping;
55*4882a593Smuzhiyun 	loff_t pos = page_offset(page);
56*4882a593Smuzhiyun 	unsigned int len = PAGE_SIZE;
57*4882a593Smuzhiyun 	pgoff_t last_index;
58*4882a593Smuzhiyun 	struct page *locked_page = NULL;
59*4882a593Smuzhiyun 	void *fsdata;
60*4882a593Smuzhiyun 	loff_t size = i_size_read(inode);
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	last_index = (size - 1) >> PAGE_SHIFT;
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 	/*
65*4882a593Smuzhiyun 	 * There are cases that lead to the page no longer belonging to the
66*4882a593Smuzhiyun 	 * mapping.
67*4882a593Smuzhiyun 	 * 1) pagecache truncates locally due to memory pressure.
68*4882a593Smuzhiyun 	 * 2) pagecache truncates when another is taking EX lock against
69*4882a593Smuzhiyun 	 * inode lock. see ocfs2_data_convert_worker.
70*4882a593Smuzhiyun 	 *
71*4882a593Smuzhiyun 	 * The i_size check doesn't catch the case where nodes truncated and
72*4882a593Smuzhiyun 	 * then re-extended the file. We'll re-check the page mapping after
73*4882a593Smuzhiyun 	 * taking the page lock inside of ocfs2_write_begin_nolock().
74*4882a593Smuzhiyun 	 *
75*4882a593Smuzhiyun 	 * Let VM retry with these cases.
76*4882a593Smuzhiyun 	 */
77*4882a593Smuzhiyun 	if ((page->mapping != inode->i_mapping) ||
78*4882a593Smuzhiyun 	    (!PageUptodate(page)) ||
79*4882a593Smuzhiyun 	    (page_offset(page) >= size))
80*4882a593Smuzhiyun 		goto out;
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun 	/*
83*4882a593Smuzhiyun 	 * Call ocfs2_write_begin() and ocfs2_write_end() to take
84*4882a593Smuzhiyun 	 * advantage of the allocation code there. We pass a write
85*4882a593Smuzhiyun 	 * length of the whole page (chopped to i_size) to make sure
86*4882a593Smuzhiyun 	 * the whole thing is allocated.
87*4882a593Smuzhiyun 	 *
88*4882a593Smuzhiyun 	 * Since we know the page is up to date, we don't have to
89*4882a593Smuzhiyun 	 * worry about ocfs2_write_begin() skipping some buffer reads
90*4882a593Smuzhiyun 	 * because the "write" would invalidate their data.
91*4882a593Smuzhiyun 	 */
92*4882a593Smuzhiyun 	if (page->index == last_index)
93*4882a593Smuzhiyun 		len = ((size - 1) & ~PAGE_MASK) + 1;
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
96*4882a593Smuzhiyun 				       &locked_page, &fsdata, di_bh, page);
97*4882a593Smuzhiyun 	if (err) {
98*4882a593Smuzhiyun 		if (err != -ENOSPC)
99*4882a593Smuzhiyun 			mlog_errno(err);
100*4882a593Smuzhiyun 		ret = vmf_error(err);
101*4882a593Smuzhiyun 		goto out;
102*4882a593Smuzhiyun 	}
103*4882a593Smuzhiyun 
104*4882a593Smuzhiyun 	if (!locked_page) {
105*4882a593Smuzhiyun 		ret = VM_FAULT_NOPAGE;
106*4882a593Smuzhiyun 		goto out;
107*4882a593Smuzhiyun 	}
108*4882a593Smuzhiyun 	err = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata);
109*4882a593Smuzhiyun 	BUG_ON(err != len);
110*4882a593Smuzhiyun 	ret = VM_FAULT_LOCKED;
111*4882a593Smuzhiyun out:
112*4882a593Smuzhiyun 	return ret;
113*4882a593Smuzhiyun }
114*4882a593Smuzhiyun 
ocfs2_page_mkwrite(struct vm_fault * vmf)115*4882a593Smuzhiyun static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
116*4882a593Smuzhiyun {
117*4882a593Smuzhiyun 	struct page *page = vmf->page;
118*4882a593Smuzhiyun 	struct inode *inode = file_inode(vmf->vma->vm_file);
119*4882a593Smuzhiyun 	struct buffer_head *di_bh = NULL;
120*4882a593Smuzhiyun 	sigset_t oldset;
121*4882a593Smuzhiyun 	int err;
122*4882a593Smuzhiyun 	vm_fault_t ret;
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun 	sb_start_pagefault(inode->i_sb);
125*4882a593Smuzhiyun 	ocfs2_block_signals(&oldset);
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 	/*
128*4882a593Smuzhiyun 	 * The cluster locks taken will block a truncate from another
129*4882a593Smuzhiyun 	 * node. Taking the data lock will also ensure that we don't
130*4882a593Smuzhiyun 	 * attempt page truncation as part of a downconvert.
131*4882a593Smuzhiyun 	 */
132*4882a593Smuzhiyun 	err = ocfs2_inode_lock(inode, &di_bh, 1);
133*4882a593Smuzhiyun 	if (err < 0) {
134*4882a593Smuzhiyun 		mlog_errno(err);
135*4882a593Smuzhiyun 		ret = vmf_error(err);
136*4882a593Smuzhiyun 		goto out;
137*4882a593Smuzhiyun 	}
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 	/*
140*4882a593Smuzhiyun 	 * The alloc sem should be enough to serialize with
141*4882a593Smuzhiyun 	 * ocfs2_truncate_file() changing i_size as well as any thread
142*4882a593Smuzhiyun 	 * modifying the inode btree.
143*4882a593Smuzhiyun 	 */
144*4882a593Smuzhiyun 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 	ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page);
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun 	brelse(di_bh);
151*4882a593Smuzhiyun 	ocfs2_inode_unlock(inode, 1);
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun out:
154*4882a593Smuzhiyun 	ocfs2_unblock_signals(&oldset);
155*4882a593Smuzhiyun 	sb_end_pagefault(inode->i_sb);
156*4882a593Smuzhiyun 	return ret;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun static const struct vm_operations_struct ocfs2_file_vm_ops = {
160*4882a593Smuzhiyun 	.fault		= ocfs2_fault,
161*4882a593Smuzhiyun 	.page_mkwrite	= ocfs2_page_mkwrite,
162*4882a593Smuzhiyun };
163*4882a593Smuzhiyun 
ocfs2_mmap(struct file * file,struct vm_area_struct * vma)164*4882a593Smuzhiyun int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
165*4882a593Smuzhiyun {
166*4882a593Smuzhiyun 	int ret = 0, lock_level = 0;
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 	ret = ocfs2_inode_lock_atime(file_inode(file),
169*4882a593Smuzhiyun 				    file->f_path.mnt, &lock_level, 1);
170*4882a593Smuzhiyun 	if (ret < 0) {
171*4882a593Smuzhiyun 		mlog_errno(ret);
172*4882a593Smuzhiyun 		goto out;
173*4882a593Smuzhiyun 	}
174*4882a593Smuzhiyun 	ocfs2_inode_unlock(file_inode(file), lock_level);
175*4882a593Smuzhiyun out:
176*4882a593Smuzhiyun 	vma->vm_ops = &ocfs2_file_vm_ops;
177*4882a593Smuzhiyun 	return 0;
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun 
180