1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/backing-dev.h>
9*4882a593Smuzhiyun #include <linux/buffer_head.h>
10*4882a593Smuzhiyun #include <linux/gfp.h>
11*4882a593Smuzhiyun #include <linux/pagemap.h>
12*4882a593Smuzhiyun #include <linux/pagevec.h>
13*4882a593Smuzhiyun #include <linux/sched/signal.h>
14*4882a593Smuzhiyun #include <linux/swap.h>
15*4882a593Smuzhiyun #include <linux/uio.h>
16*4882a593Smuzhiyun #include <linux/writeback.h>
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun #include <asm/page.h>
19*4882a593Smuzhiyun #include <linux/uaccess.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun #include "attrib.h"
22*4882a593Smuzhiyun #include "bitmap.h"
23*4882a593Smuzhiyun #include "inode.h"
24*4882a593Smuzhiyun #include "debug.h"
25*4882a593Smuzhiyun #include "lcnalloc.h"
26*4882a593Smuzhiyun #include "malloc.h"
27*4882a593Smuzhiyun #include "mft.h"
28*4882a593Smuzhiyun #include "ntfs.h"
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun /**
31*4882a593Smuzhiyun * ntfs_file_open - called when an inode is about to be opened
32*4882a593Smuzhiyun * @vi: inode to be opened
33*4882a593Smuzhiyun * @filp: file structure describing the inode
34*4882a593Smuzhiyun *
35*4882a593Smuzhiyun * Limit file size to the page cache limit on architectures where unsigned long
36*4882a593Smuzhiyun * is 32-bits. This is the most we can do for now without overflowing the page
37*4882a593Smuzhiyun * cache page index. Doing it this way means we don't run into problems because
38*4882a593Smuzhiyun * of existing too large files. It would be better to allow the user to read
39*4882a593Smuzhiyun * the beginning of the file but I doubt very much anyone is going to hit this
40*4882a593Smuzhiyun * check on a 32-bit architecture, so there is no point in adding the extra
41*4882a593Smuzhiyun * complexity required to support this.
42*4882a593Smuzhiyun *
43*4882a593Smuzhiyun * On 64-bit architectures, the check is hopefully optimized away by the
44*4882a593Smuzhiyun * compiler.
45*4882a593Smuzhiyun *
46*4882a593Smuzhiyun * After the check passes, just call generic_file_open() to do its work.
47*4882a593Smuzhiyun */
ntfs_file_open(struct inode * vi,struct file * filp)48*4882a593Smuzhiyun static int ntfs_file_open(struct inode *vi, struct file *filp)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun if (sizeof(unsigned long) < 8) {
51*4882a593Smuzhiyun if (i_size_read(vi) > MAX_LFS_FILESIZE)
52*4882a593Smuzhiyun return -EOVERFLOW;
53*4882a593Smuzhiyun }
54*4882a593Smuzhiyun return generic_file_open(vi, filp);
55*4882a593Smuzhiyun }
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun #ifdef NTFS_RW
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun /**
60*4882a593Smuzhiyun * ntfs_attr_extend_initialized - extend the initialized size of an attribute
61*4882a593Smuzhiyun * @ni: ntfs inode of the attribute to extend
62*4882a593Smuzhiyun * @new_init_size: requested new initialized size in bytes
63*4882a593Smuzhiyun *
64*4882a593Smuzhiyun * Extend the initialized size of an attribute described by the ntfs inode @ni
65*4882a593Smuzhiyun * to @new_init_size bytes. This involves zeroing any non-sparse space between
66*4882a593Smuzhiyun * the old initialized size and @new_init_size both in the page cache and on
67*4882a593Smuzhiyun * disk (if relevant complete pages are already uptodate in the page cache then
68*4882a593Smuzhiyun * these are simply marked dirty).
69*4882a593Smuzhiyun *
70*4882a593Smuzhiyun * As a side-effect, the file size (vfs inode->i_size) may be incremented as,
71*4882a593Smuzhiyun * in the resident attribute case, it is tied to the initialized size and, in
72*4882a593Smuzhiyun * the non-resident attribute case, it may not fall below the initialized size.
73*4882a593Smuzhiyun *
74*4882a593Smuzhiyun * Note that if the attribute is resident, we do not need to touch the page
75*4882a593Smuzhiyun * cache at all. This is because if the page cache page is not uptodate we
76*4882a593Smuzhiyun * bring it uptodate later, when doing the write to the mft record since we
77*4882a593Smuzhiyun * then already have the page mapped. And if the page is uptodate, the
78*4882a593Smuzhiyun * non-initialized region will already have been zeroed when the page was
79*4882a593Smuzhiyun * brought uptodate and the region may in fact already have been overwritten
80*4882a593Smuzhiyun * with new data via mmap() based writes, so we cannot just zero it. And since
81*4882a593Smuzhiyun * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped
82*4882a593Smuzhiyun * is unspecified, we choose not to do zeroing and thus we do not need to touch
83*4882a593Smuzhiyun * the page at all. For a more detailed explanation see ntfs_truncate() in
84*4882a593Smuzhiyun * fs/ntfs/inode.c.
85*4882a593Smuzhiyun *
86*4882a593Smuzhiyun * Return 0 on success and -errno on error. In the case that an error is
87*4882a593Smuzhiyun * encountered it is possible that the initialized size will already have been
88*4882a593Smuzhiyun * incremented some way towards @new_init_size but it is guaranteed that if
89*4882a593Smuzhiyun * this is the case, the necessary zeroing will also have happened and that all
90*4882a593Smuzhiyun * metadata is self-consistent.
91*4882a593Smuzhiyun *
92*4882a593Smuzhiyun * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be
93*4882a593Smuzhiyun * held by the caller.
94*4882a593Smuzhiyun */
ntfs_attr_extend_initialized(ntfs_inode * ni,const s64 new_init_size)95*4882a593Smuzhiyun static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun s64 old_init_size;
98*4882a593Smuzhiyun loff_t old_i_size;
99*4882a593Smuzhiyun pgoff_t index, end_index;
100*4882a593Smuzhiyun unsigned long flags;
101*4882a593Smuzhiyun struct inode *vi = VFS_I(ni);
102*4882a593Smuzhiyun ntfs_inode *base_ni;
103*4882a593Smuzhiyun MFT_RECORD *m = NULL;
104*4882a593Smuzhiyun ATTR_RECORD *a;
105*4882a593Smuzhiyun ntfs_attr_search_ctx *ctx = NULL;
106*4882a593Smuzhiyun struct address_space *mapping;
107*4882a593Smuzhiyun struct page *page = NULL;
108*4882a593Smuzhiyun u8 *kattr;
109*4882a593Smuzhiyun int err;
110*4882a593Smuzhiyun u32 attr_len;
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun read_lock_irqsave(&ni->size_lock, flags);
113*4882a593Smuzhiyun old_init_size = ni->initialized_size;
114*4882a593Smuzhiyun old_i_size = i_size_read(vi);
115*4882a593Smuzhiyun BUG_ON(new_init_size > ni->allocated_size);
116*4882a593Smuzhiyun read_unlock_irqrestore(&ni->size_lock, flags);
117*4882a593Smuzhiyun ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
118*4882a593Smuzhiyun "old_initialized_size 0x%llx, "
119*4882a593Smuzhiyun "new_initialized_size 0x%llx, i_size 0x%llx.",
120*4882a593Smuzhiyun vi->i_ino, (unsigned)le32_to_cpu(ni->type),
121*4882a593Smuzhiyun (unsigned long long)old_init_size,
122*4882a593Smuzhiyun (unsigned long long)new_init_size, old_i_size);
123*4882a593Smuzhiyun if (!NInoAttr(ni))
124*4882a593Smuzhiyun base_ni = ni;
125*4882a593Smuzhiyun else
126*4882a593Smuzhiyun base_ni = ni->ext.base_ntfs_ino;
127*4882a593Smuzhiyun /* Use goto to reduce indentation and we need the label below anyway. */
128*4882a593Smuzhiyun if (NInoNonResident(ni))
129*4882a593Smuzhiyun goto do_non_resident_extend;
130*4882a593Smuzhiyun BUG_ON(old_init_size != old_i_size);
131*4882a593Smuzhiyun m = map_mft_record(base_ni);
132*4882a593Smuzhiyun if (IS_ERR(m)) {
133*4882a593Smuzhiyun err = PTR_ERR(m);
134*4882a593Smuzhiyun m = NULL;
135*4882a593Smuzhiyun goto err_out;
136*4882a593Smuzhiyun }
137*4882a593Smuzhiyun ctx = ntfs_attr_get_search_ctx(base_ni, m);
138*4882a593Smuzhiyun if (unlikely(!ctx)) {
139*4882a593Smuzhiyun err = -ENOMEM;
140*4882a593Smuzhiyun goto err_out;
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
143*4882a593Smuzhiyun CASE_SENSITIVE, 0, NULL, 0, ctx);
144*4882a593Smuzhiyun if (unlikely(err)) {
145*4882a593Smuzhiyun if (err == -ENOENT)
146*4882a593Smuzhiyun err = -EIO;
147*4882a593Smuzhiyun goto err_out;
148*4882a593Smuzhiyun }
149*4882a593Smuzhiyun m = ctx->mrec;
150*4882a593Smuzhiyun a = ctx->attr;
151*4882a593Smuzhiyun BUG_ON(a->non_resident);
152*4882a593Smuzhiyun /* The total length of the attribute value. */
153*4882a593Smuzhiyun attr_len = le32_to_cpu(a->data.resident.value_length);
154*4882a593Smuzhiyun BUG_ON(old_i_size != (loff_t)attr_len);
155*4882a593Smuzhiyun /*
156*4882a593Smuzhiyun * Do the zeroing in the mft record and update the attribute size in
157*4882a593Smuzhiyun * the mft record.
158*4882a593Smuzhiyun */
159*4882a593Smuzhiyun kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
160*4882a593Smuzhiyun memset(kattr + attr_len, 0, new_init_size - attr_len);
161*4882a593Smuzhiyun a->data.resident.value_length = cpu_to_le32((u32)new_init_size);
162*4882a593Smuzhiyun /* Finally, update the sizes in the vfs and ntfs inodes. */
163*4882a593Smuzhiyun write_lock_irqsave(&ni->size_lock, flags);
164*4882a593Smuzhiyun i_size_write(vi, new_init_size);
165*4882a593Smuzhiyun ni->initialized_size = new_init_size;
166*4882a593Smuzhiyun write_unlock_irqrestore(&ni->size_lock, flags);
167*4882a593Smuzhiyun goto done;
168*4882a593Smuzhiyun do_non_resident_extend:
169*4882a593Smuzhiyun /*
170*4882a593Smuzhiyun * If the new initialized size @new_init_size exceeds the current file
171*4882a593Smuzhiyun * size (vfs inode->i_size), we need to extend the file size to the
172*4882a593Smuzhiyun * new initialized size.
173*4882a593Smuzhiyun */
174*4882a593Smuzhiyun if (new_init_size > old_i_size) {
175*4882a593Smuzhiyun m = map_mft_record(base_ni);
176*4882a593Smuzhiyun if (IS_ERR(m)) {
177*4882a593Smuzhiyun err = PTR_ERR(m);
178*4882a593Smuzhiyun m = NULL;
179*4882a593Smuzhiyun goto err_out;
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun ctx = ntfs_attr_get_search_ctx(base_ni, m);
182*4882a593Smuzhiyun if (unlikely(!ctx)) {
183*4882a593Smuzhiyun err = -ENOMEM;
184*4882a593Smuzhiyun goto err_out;
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
187*4882a593Smuzhiyun CASE_SENSITIVE, 0, NULL, 0, ctx);
188*4882a593Smuzhiyun if (unlikely(err)) {
189*4882a593Smuzhiyun if (err == -ENOENT)
190*4882a593Smuzhiyun err = -EIO;
191*4882a593Smuzhiyun goto err_out;
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun m = ctx->mrec;
194*4882a593Smuzhiyun a = ctx->attr;
195*4882a593Smuzhiyun BUG_ON(!a->non_resident);
196*4882a593Smuzhiyun BUG_ON(old_i_size != (loff_t)
197*4882a593Smuzhiyun sle64_to_cpu(a->data.non_resident.data_size));
198*4882a593Smuzhiyun a->data.non_resident.data_size = cpu_to_sle64(new_init_size);
199*4882a593Smuzhiyun flush_dcache_mft_record_page(ctx->ntfs_ino);
200*4882a593Smuzhiyun mark_mft_record_dirty(ctx->ntfs_ino);
201*4882a593Smuzhiyun /* Update the file size in the vfs inode. */
202*4882a593Smuzhiyun i_size_write(vi, new_init_size);
203*4882a593Smuzhiyun ntfs_attr_put_search_ctx(ctx);
204*4882a593Smuzhiyun ctx = NULL;
205*4882a593Smuzhiyun unmap_mft_record(base_ni);
206*4882a593Smuzhiyun m = NULL;
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun mapping = vi->i_mapping;
209*4882a593Smuzhiyun index = old_init_size >> PAGE_SHIFT;
210*4882a593Smuzhiyun end_index = (new_init_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
211*4882a593Smuzhiyun do {
212*4882a593Smuzhiyun /*
213*4882a593Smuzhiyun * Read the page. If the page is not present, this will zero
214*4882a593Smuzhiyun * the uninitialized regions for us.
215*4882a593Smuzhiyun */
216*4882a593Smuzhiyun page = read_mapping_page(mapping, index, NULL);
217*4882a593Smuzhiyun if (IS_ERR(page)) {
218*4882a593Smuzhiyun err = PTR_ERR(page);
219*4882a593Smuzhiyun goto init_err_out;
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun if (unlikely(PageError(page))) {
222*4882a593Smuzhiyun put_page(page);
223*4882a593Smuzhiyun err = -EIO;
224*4882a593Smuzhiyun goto init_err_out;
225*4882a593Smuzhiyun }
226*4882a593Smuzhiyun /*
227*4882a593Smuzhiyun * Update the initialized size in the ntfs inode. This is
228*4882a593Smuzhiyun * enough to make ntfs_writepage() work.
229*4882a593Smuzhiyun */
230*4882a593Smuzhiyun write_lock_irqsave(&ni->size_lock, flags);
231*4882a593Smuzhiyun ni->initialized_size = (s64)(index + 1) << PAGE_SHIFT;
232*4882a593Smuzhiyun if (ni->initialized_size > new_init_size)
233*4882a593Smuzhiyun ni->initialized_size = new_init_size;
234*4882a593Smuzhiyun write_unlock_irqrestore(&ni->size_lock, flags);
235*4882a593Smuzhiyun /* Set the page dirty so it gets written out. */
236*4882a593Smuzhiyun set_page_dirty(page);
237*4882a593Smuzhiyun put_page(page);
238*4882a593Smuzhiyun /*
239*4882a593Smuzhiyun * Play nice with the vm and the rest of the system. This is
240*4882a593Smuzhiyun * very much needed as we can potentially be modifying the
241*4882a593Smuzhiyun * initialised size from a very small value to a really huge
242*4882a593Smuzhiyun * value, e.g.
243*4882a593Smuzhiyun * f = open(somefile, O_TRUNC);
244*4882a593Smuzhiyun * truncate(f, 10GiB);
245*4882a593Smuzhiyun * seek(f, 10GiB);
246*4882a593Smuzhiyun * write(f, 1);
247*4882a593Smuzhiyun * And this would mean we would be marking dirty hundreds of
248*4882a593Smuzhiyun * thousands of pages or as in the above example more than
249*4882a593Smuzhiyun * two and a half million pages!
250*4882a593Smuzhiyun *
251*4882a593Smuzhiyun * TODO: For sparse pages could optimize this workload by using
252*4882a593Smuzhiyun * the FsMisc / MiscFs page bit as a "PageIsSparse" bit. This
253*4882a593Smuzhiyun * would be set in readpage for sparse pages and here we would
254*4882a593Smuzhiyun * not need to mark dirty any pages which have this bit set.
255*4882a593Smuzhiyun * The only caveat is that we have to clear the bit everywhere
256*4882a593Smuzhiyun * where we allocate any clusters that lie in the page or that
257*4882a593Smuzhiyun * contain the page.
258*4882a593Smuzhiyun *
259*4882a593Smuzhiyun * TODO: An even greater optimization would be for us to only
260*4882a593Smuzhiyun * call readpage() on pages which are not in sparse regions as
261*4882a593Smuzhiyun * determined from the runlist. This would greatly reduce the
262*4882a593Smuzhiyun * number of pages we read and make dirty in the case of sparse
263*4882a593Smuzhiyun * files.
264*4882a593Smuzhiyun */
265*4882a593Smuzhiyun balance_dirty_pages_ratelimited(mapping);
266*4882a593Smuzhiyun cond_resched();
267*4882a593Smuzhiyun } while (++index < end_index);
268*4882a593Smuzhiyun read_lock_irqsave(&ni->size_lock, flags);
269*4882a593Smuzhiyun BUG_ON(ni->initialized_size != new_init_size);
270*4882a593Smuzhiyun read_unlock_irqrestore(&ni->size_lock, flags);
271*4882a593Smuzhiyun /* Now bring in sync the initialized_size in the mft record. */
272*4882a593Smuzhiyun m = map_mft_record(base_ni);
273*4882a593Smuzhiyun if (IS_ERR(m)) {
274*4882a593Smuzhiyun err = PTR_ERR(m);
275*4882a593Smuzhiyun m = NULL;
276*4882a593Smuzhiyun goto init_err_out;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun ctx = ntfs_attr_get_search_ctx(base_ni, m);
279*4882a593Smuzhiyun if (unlikely(!ctx)) {
280*4882a593Smuzhiyun err = -ENOMEM;
281*4882a593Smuzhiyun goto init_err_out;
282*4882a593Smuzhiyun }
283*4882a593Smuzhiyun err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
284*4882a593Smuzhiyun CASE_SENSITIVE, 0, NULL, 0, ctx);
285*4882a593Smuzhiyun if (unlikely(err)) {
286*4882a593Smuzhiyun if (err == -ENOENT)
287*4882a593Smuzhiyun err = -EIO;
288*4882a593Smuzhiyun goto init_err_out;
289*4882a593Smuzhiyun }
290*4882a593Smuzhiyun m = ctx->mrec;
291*4882a593Smuzhiyun a = ctx->attr;
292*4882a593Smuzhiyun BUG_ON(!a->non_resident);
293*4882a593Smuzhiyun a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size);
294*4882a593Smuzhiyun done:
295*4882a593Smuzhiyun flush_dcache_mft_record_page(ctx->ntfs_ino);
296*4882a593Smuzhiyun mark_mft_record_dirty(ctx->ntfs_ino);
297*4882a593Smuzhiyun if (ctx)
298*4882a593Smuzhiyun ntfs_attr_put_search_ctx(ctx);
299*4882a593Smuzhiyun if (m)
300*4882a593Smuzhiyun unmap_mft_record(base_ni);
301*4882a593Smuzhiyun ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.",
302*4882a593Smuzhiyun (unsigned long long)new_init_size, i_size_read(vi));
303*4882a593Smuzhiyun return 0;
304*4882a593Smuzhiyun init_err_out:
305*4882a593Smuzhiyun write_lock_irqsave(&ni->size_lock, flags);
306*4882a593Smuzhiyun ni->initialized_size = old_init_size;
307*4882a593Smuzhiyun write_unlock_irqrestore(&ni->size_lock, flags);
308*4882a593Smuzhiyun err_out:
309*4882a593Smuzhiyun if (ctx)
310*4882a593Smuzhiyun ntfs_attr_put_search_ctx(ctx);
311*4882a593Smuzhiyun if (m)
312*4882a593Smuzhiyun unmap_mft_record(base_ni);
313*4882a593Smuzhiyun ntfs_debug("Failed. Returning error code %i.", err);
314*4882a593Smuzhiyun return err;
315*4882a593Smuzhiyun }
316*4882a593Smuzhiyun
ntfs_prepare_file_for_write(struct kiocb * iocb,struct iov_iter * from)317*4882a593Smuzhiyun static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb,
318*4882a593Smuzhiyun struct iov_iter *from)
319*4882a593Smuzhiyun {
320*4882a593Smuzhiyun loff_t pos;
321*4882a593Smuzhiyun s64 end, ll;
322*4882a593Smuzhiyun ssize_t err;
323*4882a593Smuzhiyun unsigned long flags;
324*4882a593Smuzhiyun struct file *file = iocb->ki_filp;
325*4882a593Smuzhiyun struct inode *vi = file_inode(file);
326*4882a593Smuzhiyun ntfs_inode *base_ni, *ni = NTFS_I(vi);
327*4882a593Smuzhiyun ntfs_volume *vol = ni->vol;
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
330*4882a593Smuzhiyun "0x%llx, count 0x%zx.", vi->i_ino,
331*4882a593Smuzhiyun (unsigned)le32_to_cpu(ni->type),
332*4882a593Smuzhiyun (unsigned long long)iocb->ki_pos,
333*4882a593Smuzhiyun iov_iter_count(from));
334*4882a593Smuzhiyun err = generic_write_checks(iocb, from);
335*4882a593Smuzhiyun if (unlikely(err <= 0))
336*4882a593Smuzhiyun goto out;
337*4882a593Smuzhiyun /*
338*4882a593Smuzhiyun * All checks have passed. Before we start doing any writing we want
339*4882a593Smuzhiyun * to abort any totally illegal writes.
340*4882a593Smuzhiyun */
341*4882a593Smuzhiyun BUG_ON(NInoMstProtected(ni));
342*4882a593Smuzhiyun BUG_ON(ni->type != AT_DATA);
343*4882a593Smuzhiyun /* If file is encrypted, deny access, just like NT4. */
344*4882a593Smuzhiyun if (NInoEncrypted(ni)) {
345*4882a593Smuzhiyun /* Only $DATA attributes can be encrypted. */
346*4882a593Smuzhiyun /*
347*4882a593Smuzhiyun * Reminder for later: Encrypted files are _always_
348*4882a593Smuzhiyun * non-resident so that the content can always be encrypted.
349*4882a593Smuzhiyun */
350*4882a593Smuzhiyun ntfs_debug("Denying write access to encrypted file.");
351*4882a593Smuzhiyun err = -EACCES;
352*4882a593Smuzhiyun goto out;
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun if (NInoCompressed(ni)) {
355*4882a593Smuzhiyun /* Only unnamed $DATA attribute can be compressed. */
356*4882a593Smuzhiyun BUG_ON(ni->name_len);
357*4882a593Smuzhiyun /*
358*4882a593Smuzhiyun * Reminder for later: If resident, the data is not actually
359*4882a593Smuzhiyun * compressed. Only on the switch to non-resident does
360*4882a593Smuzhiyun * compression kick in. This is in contrast to encrypted files
361*4882a593Smuzhiyun * (see above).
362*4882a593Smuzhiyun */
363*4882a593Smuzhiyun ntfs_error(vi->i_sb, "Writing to compressed files is not "
364*4882a593Smuzhiyun "implemented yet. Sorry.");
365*4882a593Smuzhiyun err = -EOPNOTSUPP;
366*4882a593Smuzhiyun goto out;
367*4882a593Smuzhiyun }
368*4882a593Smuzhiyun base_ni = ni;
369*4882a593Smuzhiyun if (NInoAttr(ni))
370*4882a593Smuzhiyun base_ni = ni->ext.base_ntfs_ino;
371*4882a593Smuzhiyun err = file_remove_privs(file);
372*4882a593Smuzhiyun if (unlikely(err))
373*4882a593Smuzhiyun goto out;
374*4882a593Smuzhiyun /*
375*4882a593Smuzhiyun * Our ->update_time method always succeeds thus file_update_time()
376*4882a593Smuzhiyun * cannot fail either so there is no need to check the return code.
377*4882a593Smuzhiyun */
378*4882a593Smuzhiyun file_update_time(file);
379*4882a593Smuzhiyun pos = iocb->ki_pos;
380*4882a593Smuzhiyun /* The first byte after the last cluster being written to. */
381*4882a593Smuzhiyun end = (pos + iov_iter_count(from) + vol->cluster_size_mask) &
382*4882a593Smuzhiyun ~(u64)vol->cluster_size_mask;
383*4882a593Smuzhiyun /*
384*4882a593Smuzhiyun * If the write goes beyond the allocated size, extend the allocation
385*4882a593Smuzhiyun * to cover the whole of the write, rounded up to the nearest cluster.
386*4882a593Smuzhiyun */
387*4882a593Smuzhiyun read_lock_irqsave(&ni->size_lock, flags);
388*4882a593Smuzhiyun ll = ni->allocated_size;
389*4882a593Smuzhiyun read_unlock_irqrestore(&ni->size_lock, flags);
390*4882a593Smuzhiyun if (end > ll) {
391*4882a593Smuzhiyun /*
392*4882a593Smuzhiyun * Extend the allocation without changing the data size.
393*4882a593Smuzhiyun *
394*4882a593Smuzhiyun * Note we ensure the allocation is big enough to at least
395*4882a593Smuzhiyun * write some data but we do not require the allocation to be
396*4882a593Smuzhiyun * complete, i.e. it may be partial.
397*4882a593Smuzhiyun */
398*4882a593Smuzhiyun ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
399*4882a593Smuzhiyun if (likely(ll >= 0)) {
400*4882a593Smuzhiyun BUG_ON(pos >= ll);
401*4882a593Smuzhiyun /* If the extension was partial truncate the write. */
402*4882a593Smuzhiyun if (end > ll) {
403*4882a593Smuzhiyun ntfs_debug("Truncating write to inode 0x%lx, "
404*4882a593Smuzhiyun "attribute type 0x%x, because "
405*4882a593Smuzhiyun "the allocation was only "
406*4882a593Smuzhiyun "partially extended.",
407*4882a593Smuzhiyun vi->i_ino, (unsigned)
408*4882a593Smuzhiyun le32_to_cpu(ni->type));
409*4882a593Smuzhiyun iov_iter_truncate(from, ll - pos);
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun } else {
412*4882a593Smuzhiyun err = ll;
413*4882a593Smuzhiyun read_lock_irqsave(&ni->size_lock, flags);
414*4882a593Smuzhiyun ll = ni->allocated_size;
415*4882a593Smuzhiyun read_unlock_irqrestore(&ni->size_lock, flags);
416*4882a593Smuzhiyun /* Perform a partial write if possible or fail. */
417*4882a593Smuzhiyun if (pos < ll) {
418*4882a593Smuzhiyun ntfs_debug("Truncating write to inode 0x%lx "
419*4882a593Smuzhiyun "attribute type 0x%x, because "
420*4882a593Smuzhiyun "extending the allocation "
421*4882a593Smuzhiyun "failed (error %d).",
422*4882a593Smuzhiyun vi->i_ino, (unsigned)
423*4882a593Smuzhiyun le32_to_cpu(ni->type),
424*4882a593Smuzhiyun (int)-err);
425*4882a593Smuzhiyun iov_iter_truncate(from, ll - pos);
426*4882a593Smuzhiyun } else {
427*4882a593Smuzhiyun if (err != -ENOSPC)
428*4882a593Smuzhiyun ntfs_error(vi->i_sb, "Cannot perform "
429*4882a593Smuzhiyun "write to inode "
430*4882a593Smuzhiyun "0x%lx, attribute "
431*4882a593Smuzhiyun "type 0x%x, because "
432*4882a593Smuzhiyun "extending the "
433*4882a593Smuzhiyun "allocation failed "
434*4882a593Smuzhiyun "(error %ld).",
435*4882a593Smuzhiyun vi->i_ino, (unsigned)
436*4882a593Smuzhiyun le32_to_cpu(ni->type),
437*4882a593Smuzhiyun (long)-err);
438*4882a593Smuzhiyun else
439*4882a593Smuzhiyun ntfs_debug("Cannot perform write to "
440*4882a593Smuzhiyun "inode 0x%lx, "
441*4882a593Smuzhiyun "attribute type 0x%x, "
442*4882a593Smuzhiyun "because there is not "
443*4882a593Smuzhiyun "space left.",
444*4882a593Smuzhiyun vi->i_ino, (unsigned)
445*4882a593Smuzhiyun le32_to_cpu(ni->type));
446*4882a593Smuzhiyun goto out;
447*4882a593Smuzhiyun }
448*4882a593Smuzhiyun }
449*4882a593Smuzhiyun }
450*4882a593Smuzhiyun /*
451*4882a593Smuzhiyun * If the write starts beyond the initialized size, extend it up to the
452*4882a593Smuzhiyun * beginning of the write and initialize all non-sparse space between
453*4882a593Smuzhiyun * the old initialized size and the new one. This automatically also
454*4882a593Smuzhiyun * increments the vfs inode->i_size to keep it above or equal to the
455*4882a593Smuzhiyun * initialized_size.
456*4882a593Smuzhiyun */
457*4882a593Smuzhiyun read_lock_irqsave(&ni->size_lock, flags);
458*4882a593Smuzhiyun ll = ni->initialized_size;
459*4882a593Smuzhiyun read_unlock_irqrestore(&ni->size_lock, flags);
460*4882a593Smuzhiyun if (pos > ll) {
461*4882a593Smuzhiyun /*
462*4882a593Smuzhiyun * Wait for ongoing direct i/o to complete before proceeding.
463*4882a593Smuzhiyun * New direct i/o cannot start as we hold i_mutex.
464*4882a593Smuzhiyun */
465*4882a593Smuzhiyun inode_dio_wait(vi);
466*4882a593Smuzhiyun err = ntfs_attr_extend_initialized(ni, pos);
467*4882a593Smuzhiyun if (unlikely(err < 0))
468*4882a593Smuzhiyun ntfs_error(vi->i_sb, "Cannot perform write to inode "
469*4882a593Smuzhiyun "0x%lx, attribute type 0x%x, because "
470*4882a593Smuzhiyun "extending the initialized size "
471*4882a593Smuzhiyun "failed (error %d).", vi->i_ino,
472*4882a593Smuzhiyun (unsigned)le32_to_cpu(ni->type),
473*4882a593Smuzhiyun (int)-err);
474*4882a593Smuzhiyun }
475*4882a593Smuzhiyun out:
476*4882a593Smuzhiyun return err;
477*4882a593Smuzhiyun }
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun /**
480*4882a593Smuzhiyun * __ntfs_grab_cache_pages - obtain a number of locked pages
481*4882a593Smuzhiyun * @mapping: address space mapping from which to obtain page cache pages
482*4882a593Smuzhiyun * @index: starting index in @mapping at which to begin obtaining pages
483*4882a593Smuzhiyun * @nr_pages: number of page cache pages to obtain
484*4882a593Smuzhiyun * @pages: array of pages in which to return the obtained page cache pages
485*4882a593Smuzhiyun * @cached_page: allocated but as yet unused page
486*4882a593Smuzhiyun *
487*4882a593Smuzhiyun * Obtain @nr_pages locked page cache pages from the mapping @mapping and
488*4882a593Smuzhiyun * starting at index @index.
489*4882a593Smuzhiyun *
490*4882a593Smuzhiyun * If a page is newly created, add it to lru list
491*4882a593Smuzhiyun *
492*4882a593Smuzhiyun * Note, the page locks are obtained in ascending page index order.
493*4882a593Smuzhiyun */
__ntfs_grab_cache_pages(struct address_space * mapping,pgoff_t index,const unsigned nr_pages,struct page ** pages,struct page ** cached_page)494*4882a593Smuzhiyun static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
495*4882a593Smuzhiyun pgoff_t index, const unsigned nr_pages, struct page **pages,
496*4882a593Smuzhiyun struct page **cached_page)
497*4882a593Smuzhiyun {
498*4882a593Smuzhiyun int err, nr;
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun BUG_ON(!nr_pages);
501*4882a593Smuzhiyun err = nr = 0;
502*4882a593Smuzhiyun do {
503*4882a593Smuzhiyun pages[nr] = find_get_page_flags(mapping, index, FGP_LOCK |
504*4882a593Smuzhiyun FGP_ACCESSED);
505*4882a593Smuzhiyun if (!pages[nr]) {
506*4882a593Smuzhiyun if (!*cached_page) {
507*4882a593Smuzhiyun *cached_page = page_cache_alloc(mapping);
508*4882a593Smuzhiyun if (unlikely(!*cached_page)) {
509*4882a593Smuzhiyun err = -ENOMEM;
510*4882a593Smuzhiyun goto err_out;
511*4882a593Smuzhiyun }
512*4882a593Smuzhiyun }
513*4882a593Smuzhiyun err = add_to_page_cache_lru(*cached_page, mapping,
514*4882a593Smuzhiyun index,
515*4882a593Smuzhiyun mapping_gfp_constraint(mapping, GFP_KERNEL));
516*4882a593Smuzhiyun if (unlikely(err)) {
517*4882a593Smuzhiyun if (err == -EEXIST)
518*4882a593Smuzhiyun continue;
519*4882a593Smuzhiyun goto err_out;
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun pages[nr] = *cached_page;
522*4882a593Smuzhiyun *cached_page = NULL;
523*4882a593Smuzhiyun }
524*4882a593Smuzhiyun index++;
525*4882a593Smuzhiyun nr++;
526*4882a593Smuzhiyun } while (nr < nr_pages);
527*4882a593Smuzhiyun out:
528*4882a593Smuzhiyun return err;
529*4882a593Smuzhiyun err_out:
530*4882a593Smuzhiyun while (nr > 0) {
531*4882a593Smuzhiyun unlock_page(pages[--nr]);
532*4882a593Smuzhiyun put_page(pages[nr]);
533*4882a593Smuzhiyun }
534*4882a593Smuzhiyun goto out;
535*4882a593Smuzhiyun }
536*4882a593Smuzhiyun
ntfs_submit_bh_for_read(struct buffer_head * bh)537*4882a593Smuzhiyun static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
538*4882a593Smuzhiyun {
539*4882a593Smuzhiyun lock_buffer(bh);
540*4882a593Smuzhiyun get_bh(bh);
541*4882a593Smuzhiyun bh->b_end_io = end_buffer_read_sync;
542*4882a593Smuzhiyun return submit_bh(REQ_OP_READ, 0, bh);
543*4882a593Smuzhiyun }
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun /**
546*4882a593Smuzhiyun * ntfs_prepare_pages_for_non_resident_write - prepare pages for receiving data
547*4882a593Smuzhiyun * @pages: array of destination pages
548*4882a593Smuzhiyun * @nr_pages: number of pages in @pages
549*4882a593Smuzhiyun * @pos: byte position in file at which the write begins
550*4882a593Smuzhiyun * @bytes: number of bytes to be written
551*4882a593Smuzhiyun *
552*4882a593Smuzhiyun * This is called for non-resident attributes from ntfs_file_buffered_write()
553*4882a593Smuzhiyun * with i_mutex held on the inode (@pages[0]->mapping->host). There are
554*4882a593Smuzhiyun * @nr_pages pages in @pages which are locked but not kmap()ped. The source
555*4882a593Smuzhiyun * data has not yet been copied into the @pages.
556*4882a593Smuzhiyun *
557*4882a593Smuzhiyun * Need to fill any holes with actual clusters, allocate buffers if necessary,
558*4882a593Smuzhiyun * ensure all the buffers are mapped, and bring uptodate any buffers that are
559*4882a593Smuzhiyun * only partially being written to.
560*4882a593Smuzhiyun *
561*4882a593Smuzhiyun * If @nr_pages is greater than one, we are guaranteed that the cluster size is
562*4882a593Smuzhiyun * greater than PAGE_SIZE, that all pages in @pages are entirely inside
563*4882a593Smuzhiyun * the same cluster and that they are the entirety of that cluster, and that
564*4882a593Smuzhiyun * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole.
565*4882a593Smuzhiyun *
566*4882a593Smuzhiyun * i_size is not to be modified yet.
567*4882a593Smuzhiyun *
568*4882a593Smuzhiyun * Return 0 on success or -errno on error.
569*4882a593Smuzhiyun */
ntfs_prepare_pages_for_non_resident_write(struct page ** pages,unsigned nr_pages,s64 pos,size_t bytes)570*4882a593Smuzhiyun static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
571*4882a593Smuzhiyun unsigned nr_pages, s64 pos, size_t bytes)
572*4882a593Smuzhiyun {
573*4882a593Smuzhiyun VCN vcn, highest_vcn = 0, cpos, cend, bh_cpos, bh_cend;
574*4882a593Smuzhiyun LCN lcn;
575*4882a593Smuzhiyun s64 bh_pos, vcn_len, end, initialized_size;
576*4882a593Smuzhiyun sector_t lcn_block;
577*4882a593Smuzhiyun struct page *page;
578*4882a593Smuzhiyun struct inode *vi;
579*4882a593Smuzhiyun ntfs_inode *ni, *base_ni = NULL;
580*4882a593Smuzhiyun ntfs_volume *vol;
581*4882a593Smuzhiyun runlist_element *rl, *rl2;
582*4882a593Smuzhiyun struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
583*4882a593Smuzhiyun ntfs_attr_search_ctx *ctx = NULL;
584*4882a593Smuzhiyun MFT_RECORD *m = NULL;
585*4882a593Smuzhiyun ATTR_RECORD *a = NULL;
586*4882a593Smuzhiyun unsigned long flags;
587*4882a593Smuzhiyun u32 attr_rec_len = 0;
588*4882a593Smuzhiyun unsigned blocksize, u;
589*4882a593Smuzhiyun int err, mp_size;
590*4882a593Smuzhiyun bool rl_write_locked, was_hole, is_retry;
591*4882a593Smuzhiyun unsigned char blocksize_bits;
592*4882a593Smuzhiyun struct {
593*4882a593Smuzhiyun u8 runlist_merged:1;
594*4882a593Smuzhiyun u8 mft_attr_mapped:1;
595*4882a593Smuzhiyun u8 mp_rebuilt:1;
596*4882a593Smuzhiyun u8 attr_switched:1;
597*4882a593Smuzhiyun } status = { 0, 0, 0, 0 };
598*4882a593Smuzhiyun
599*4882a593Smuzhiyun BUG_ON(!nr_pages);
600*4882a593Smuzhiyun BUG_ON(!pages);
601*4882a593Smuzhiyun BUG_ON(!*pages);
602*4882a593Smuzhiyun vi = pages[0]->mapping->host;
603*4882a593Smuzhiyun ni = NTFS_I(vi);
604*4882a593Smuzhiyun vol = ni->vol;
605*4882a593Smuzhiyun ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
606*4882a593Smuzhiyun "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
607*4882a593Smuzhiyun vi->i_ino, ni->type, pages[0]->index, nr_pages,
608*4882a593Smuzhiyun (long long)pos, bytes);
609*4882a593Smuzhiyun blocksize = vol->sb->s_blocksize;
610*4882a593Smuzhiyun blocksize_bits = vol->sb->s_blocksize_bits;
611*4882a593Smuzhiyun u = 0;
612*4882a593Smuzhiyun do {
613*4882a593Smuzhiyun page = pages[u];
614*4882a593Smuzhiyun BUG_ON(!page);
615*4882a593Smuzhiyun /*
616*4882a593Smuzhiyun * create_empty_buffers() will create uptodate/dirty buffers if
617*4882a593Smuzhiyun * the page is uptodate/dirty.
618*4882a593Smuzhiyun */
619*4882a593Smuzhiyun if (!page_has_buffers(page)) {
620*4882a593Smuzhiyun create_empty_buffers(page, blocksize, 0);
621*4882a593Smuzhiyun if (unlikely(!page_has_buffers(page)))
622*4882a593Smuzhiyun return -ENOMEM;
623*4882a593Smuzhiyun }
624*4882a593Smuzhiyun } while (++u < nr_pages);
625*4882a593Smuzhiyun rl_write_locked = false;
626*4882a593Smuzhiyun rl = NULL;
627*4882a593Smuzhiyun err = 0;
628*4882a593Smuzhiyun vcn = lcn = -1;
629*4882a593Smuzhiyun vcn_len = 0;
630*4882a593Smuzhiyun lcn_block = -1;
631*4882a593Smuzhiyun was_hole = false;
632*4882a593Smuzhiyun cpos = pos >> vol->cluster_size_bits;
633*4882a593Smuzhiyun end = pos + bytes;
634*4882a593Smuzhiyun cend = (end + vol->cluster_size - 1) >> vol->cluster_size_bits;
635*4882a593Smuzhiyun /*
636*4882a593Smuzhiyun * Loop over each page and for each page over each buffer. Use goto to
637*4882a593Smuzhiyun * reduce indentation.
638*4882a593Smuzhiyun */
639*4882a593Smuzhiyun u = 0;
640*4882a593Smuzhiyun do_next_page:
641*4882a593Smuzhiyun page = pages[u];
642*4882a593Smuzhiyun bh_pos = (s64)page->index << PAGE_SHIFT;
643*4882a593Smuzhiyun bh = head = page_buffers(page);
644*4882a593Smuzhiyun do {
645*4882a593Smuzhiyun VCN cdelta;
646*4882a593Smuzhiyun s64 bh_end;
647*4882a593Smuzhiyun unsigned bh_cofs;
648*4882a593Smuzhiyun
649*4882a593Smuzhiyun /* Clear buffer_new on all buffers to reinitialise state. */
650*4882a593Smuzhiyun if (buffer_new(bh))
651*4882a593Smuzhiyun clear_buffer_new(bh);
652*4882a593Smuzhiyun bh_end = bh_pos + blocksize;
653*4882a593Smuzhiyun bh_cpos = bh_pos >> vol->cluster_size_bits;
654*4882a593Smuzhiyun bh_cofs = bh_pos & vol->cluster_size_mask;
655*4882a593Smuzhiyun if (buffer_mapped(bh)) {
656*4882a593Smuzhiyun /*
657*4882a593Smuzhiyun * The buffer is already mapped. If it is uptodate,
658*4882a593Smuzhiyun * ignore it.
659*4882a593Smuzhiyun */
660*4882a593Smuzhiyun if (buffer_uptodate(bh))
661*4882a593Smuzhiyun continue;
662*4882a593Smuzhiyun /*
663*4882a593Smuzhiyun * The buffer is not uptodate. If the page is uptodate
664*4882a593Smuzhiyun * set the buffer uptodate and otherwise ignore it.
665*4882a593Smuzhiyun */
666*4882a593Smuzhiyun if (PageUptodate(page)) {
667*4882a593Smuzhiyun set_buffer_uptodate(bh);
668*4882a593Smuzhiyun continue;
669*4882a593Smuzhiyun }
670*4882a593Smuzhiyun /*
671*4882a593Smuzhiyun * Neither the page nor the buffer are uptodate. If
672*4882a593Smuzhiyun * the buffer is only partially being written to, we
673*4882a593Smuzhiyun * need to read it in before the write, i.e. now.
674*4882a593Smuzhiyun */
675*4882a593Smuzhiyun if ((bh_pos < pos && bh_end > pos) ||
676*4882a593Smuzhiyun (bh_pos < end && bh_end > end)) {
677*4882a593Smuzhiyun /*
678*4882a593Smuzhiyun * If the buffer is fully or partially within
679*4882a593Smuzhiyun * the initialized size, do an actual read.
680*4882a593Smuzhiyun * Otherwise, simply zero the buffer.
681*4882a593Smuzhiyun */
682*4882a593Smuzhiyun read_lock_irqsave(&ni->size_lock, flags);
683*4882a593Smuzhiyun initialized_size = ni->initialized_size;
684*4882a593Smuzhiyun read_unlock_irqrestore(&ni->size_lock, flags);
685*4882a593Smuzhiyun if (bh_pos < initialized_size) {
686*4882a593Smuzhiyun ntfs_submit_bh_for_read(bh);
687*4882a593Smuzhiyun *wait_bh++ = bh;
688*4882a593Smuzhiyun } else {
689*4882a593Smuzhiyun zero_user(page, bh_offset(bh),
690*4882a593Smuzhiyun blocksize);
691*4882a593Smuzhiyun set_buffer_uptodate(bh);
692*4882a593Smuzhiyun }
693*4882a593Smuzhiyun }
694*4882a593Smuzhiyun continue;
695*4882a593Smuzhiyun }
696*4882a593Smuzhiyun /* Unmapped buffer. Need to map it. */
697*4882a593Smuzhiyun bh->b_bdev = vol->sb->s_bdev;
698*4882a593Smuzhiyun /*
699*4882a593Smuzhiyun * If the current buffer is in the same clusters as the map
700*4882a593Smuzhiyun * cache, there is no need to check the runlist again. The
701*4882a593Smuzhiyun * map cache is made up of @vcn, which is the first cached file
702*4882a593Smuzhiyun * cluster, @vcn_len which is the number of cached file
703*4882a593Smuzhiyun * clusters, @lcn is the device cluster corresponding to @vcn,
704*4882a593Smuzhiyun * and @lcn_block is the block number corresponding to @lcn.
705*4882a593Smuzhiyun */
706*4882a593Smuzhiyun cdelta = bh_cpos - vcn;
707*4882a593Smuzhiyun if (likely(!cdelta || (cdelta > 0 && cdelta < vcn_len))) {
708*4882a593Smuzhiyun map_buffer_cached:
709*4882a593Smuzhiyun BUG_ON(lcn < 0);
710*4882a593Smuzhiyun bh->b_blocknr = lcn_block +
711*4882a593Smuzhiyun (cdelta << (vol->cluster_size_bits -
712*4882a593Smuzhiyun blocksize_bits)) +
713*4882a593Smuzhiyun (bh_cofs >> blocksize_bits);
714*4882a593Smuzhiyun set_buffer_mapped(bh);
715*4882a593Smuzhiyun /*
716*4882a593Smuzhiyun * If the page is uptodate so is the buffer. If the
717*4882a593Smuzhiyun * buffer is fully outside the write, we ignore it if
718*4882a593Smuzhiyun * it was already allocated and we mark it dirty so it
719*4882a593Smuzhiyun * gets written out if we allocated it. On the other
720*4882a593Smuzhiyun * hand, if we allocated the buffer but we are not
721*4882a593Smuzhiyun * marking it dirty we set buffer_new so we can do
722*4882a593Smuzhiyun * error recovery.
723*4882a593Smuzhiyun */
724*4882a593Smuzhiyun if (PageUptodate(page)) {
725*4882a593Smuzhiyun if (!buffer_uptodate(bh))
726*4882a593Smuzhiyun set_buffer_uptodate(bh);
727*4882a593Smuzhiyun if (unlikely(was_hole)) {
728*4882a593Smuzhiyun /* We allocated the buffer. */
729*4882a593Smuzhiyun clean_bdev_bh_alias(bh);
730*4882a593Smuzhiyun if (bh_end <= pos || bh_pos >= end)
731*4882a593Smuzhiyun mark_buffer_dirty(bh);
732*4882a593Smuzhiyun else
733*4882a593Smuzhiyun set_buffer_new(bh);
734*4882a593Smuzhiyun }
735*4882a593Smuzhiyun continue;
736*4882a593Smuzhiyun }
737*4882a593Smuzhiyun /* Page is _not_ uptodate. */
738*4882a593Smuzhiyun if (likely(!was_hole)) {
739*4882a593Smuzhiyun /*
740*4882a593Smuzhiyun * Buffer was already allocated. If it is not
741*4882a593Smuzhiyun * uptodate and is only partially being written
742*4882a593Smuzhiyun * to, we need to read it in before the write,
743*4882a593Smuzhiyun * i.e. now.
744*4882a593Smuzhiyun */
745*4882a593Smuzhiyun if (!buffer_uptodate(bh) && bh_pos < end &&
746*4882a593Smuzhiyun bh_end > pos &&
747*4882a593Smuzhiyun (bh_pos < pos ||
748*4882a593Smuzhiyun bh_end > end)) {
749*4882a593Smuzhiyun /*
750*4882a593Smuzhiyun * If the buffer is fully or partially
751*4882a593Smuzhiyun * within the initialized size, do an
752*4882a593Smuzhiyun * actual read. Otherwise, simply zero
753*4882a593Smuzhiyun * the buffer.
754*4882a593Smuzhiyun */
755*4882a593Smuzhiyun read_lock_irqsave(&ni->size_lock,
756*4882a593Smuzhiyun flags);
757*4882a593Smuzhiyun initialized_size = ni->initialized_size;
758*4882a593Smuzhiyun read_unlock_irqrestore(&ni->size_lock,
759*4882a593Smuzhiyun flags);
760*4882a593Smuzhiyun if (bh_pos < initialized_size) {
761*4882a593Smuzhiyun ntfs_submit_bh_for_read(bh);
762*4882a593Smuzhiyun *wait_bh++ = bh;
763*4882a593Smuzhiyun } else {
764*4882a593Smuzhiyun zero_user(page, bh_offset(bh),
765*4882a593Smuzhiyun blocksize);
766*4882a593Smuzhiyun set_buffer_uptodate(bh);
767*4882a593Smuzhiyun }
768*4882a593Smuzhiyun }
769*4882a593Smuzhiyun continue;
770*4882a593Smuzhiyun }
771*4882a593Smuzhiyun /* We allocated the buffer. */
772*4882a593Smuzhiyun clean_bdev_bh_alias(bh);
773*4882a593Smuzhiyun /*
774*4882a593Smuzhiyun * If the buffer is fully outside the write, zero it,
775*4882a593Smuzhiyun * set it uptodate, and mark it dirty so it gets
776*4882a593Smuzhiyun * written out. If it is partially being written to,
777*4882a593Smuzhiyun * zero region surrounding the write but leave it to
778*4882a593Smuzhiyun * commit write to do anything else. Finally, if the
779*4882a593Smuzhiyun * buffer is fully being overwritten, do nothing.
780*4882a593Smuzhiyun */
781*4882a593Smuzhiyun if (bh_end <= pos || bh_pos >= end) {
782*4882a593Smuzhiyun if (!buffer_uptodate(bh)) {
783*4882a593Smuzhiyun zero_user(page, bh_offset(bh),
784*4882a593Smuzhiyun blocksize);
785*4882a593Smuzhiyun set_buffer_uptodate(bh);
786*4882a593Smuzhiyun }
787*4882a593Smuzhiyun mark_buffer_dirty(bh);
788*4882a593Smuzhiyun continue;
789*4882a593Smuzhiyun }
790*4882a593Smuzhiyun set_buffer_new(bh);
791*4882a593Smuzhiyun if (!buffer_uptodate(bh) &&
792*4882a593Smuzhiyun (bh_pos < pos || bh_end > end)) {
793*4882a593Smuzhiyun u8 *kaddr;
794*4882a593Smuzhiyun unsigned pofs;
795*4882a593Smuzhiyun
796*4882a593Smuzhiyun kaddr = kmap_atomic(page);
797*4882a593Smuzhiyun if (bh_pos < pos) {
798*4882a593Smuzhiyun pofs = bh_pos & ~PAGE_MASK;
799*4882a593Smuzhiyun memset(kaddr + pofs, 0, pos - bh_pos);
800*4882a593Smuzhiyun }
801*4882a593Smuzhiyun if (bh_end > end) {
802*4882a593Smuzhiyun pofs = end & ~PAGE_MASK;
803*4882a593Smuzhiyun memset(kaddr + pofs, 0, bh_end - end);
804*4882a593Smuzhiyun }
805*4882a593Smuzhiyun kunmap_atomic(kaddr);
806*4882a593Smuzhiyun flush_dcache_page(page);
807*4882a593Smuzhiyun }
808*4882a593Smuzhiyun continue;
809*4882a593Smuzhiyun }
810*4882a593Smuzhiyun /*
811*4882a593Smuzhiyun * Slow path: this is the first buffer in the cluster. If it
812*4882a593Smuzhiyun * is outside allocated size and is not uptodate, zero it and
813*4882a593Smuzhiyun * set it uptodate.
814*4882a593Smuzhiyun */
815*4882a593Smuzhiyun read_lock_irqsave(&ni->size_lock, flags);
816*4882a593Smuzhiyun initialized_size = ni->allocated_size;
817*4882a593Smuzhiyun read_unlock_irqrestore(&ni->size_lock, flags);
818*4882a593Smuzhiyun if (bh_pos > initialized_size) {
819*4882a593Smuzhiyun if (PageUptodate(page)) {
820*4882a593Smuzhiyun if (!buffer_uptodate(bh))
821*4882a593Smuzhiyun set_buffer_uptodate(bh);
822*4882a593Smuzhiyun } else if (!buffer_uptodate(bh)) {
823*4882a593Smuzhiyun zero_user(page, bh_offset(bh), blocksize);
824*4882a593Smuzhiyun set_buffer_uptodate(bh);
825*4882a593Smuzhiyun }
826*4882a593Smuzhiyun continue;
827*4882a593Smuzhiyun }
828*4882a593Smuzhiyun is_retry = false;
829*4882a593Smuzhiyun if (!rl) {
830*4882a593Smuzhiyun down_read(&ni->runlist.lock);
831*4882a593Smuzhiyun retry_remap:
832*4882a593Smuzhiyun rl = ni->runlist.rl;
833*4882a593Smuzhiyun }
834*4882a593Smuzhiyun if (likely(rl != NULL)) {
835*4882a593Smuzhiyun /* Seek to element containing target cluster. */
836*4882a593Smuzhiyun while (rl->length && rl[1].vcn <= bh_cpos)
837*4882a593Smuzhiyun rl++;
838*4882a593Smuzhiyun lcn = ntfs_rl_vcn_to_lcn(rl, bh_cpos);
839*4882a593Smuzhiyun if (likely(lcn >= 0)) {
840*4882a593Smuzhiyun /*
841*4882a593Smuzhiyun * Successful remap, setup the map cache and
842*4882a593Smuzhiyun * use that to deal with the buffer.
843*4882a593Smuzhiyun */
844*4882a593Smuzhiyun was_hole = false;
845*4882a593Smuzhiyun vcn = bh_cpos;
846*4882a593Smuzhiyun vcn_len = rl[1].vcn - vcn;
847*4882a593Smuzhiyun lcn_block = lcn << (vol->cluster_size_bits -
848*4882a593Smuzhiyun blocksize_bits);
849*4882a593Smuzhiyun cdelta = 0;
850*4882a593Smuzhiyun /*
851*4882a593Smuzhiyun * If the number of remaining clusters touched
852*4882a593Smuzhiyun * by the write is smaller or equal to the
853*4882a593Smuzhiyun * number of cached clusters, unlock the
854*4882a593Smuzhiyun * runlist as the map cache will be used from
855*4882a593Smuzhiyun * now on.
856*4882a593Smuzhiyun */
857*4882a593Smuzhiyun if (likely(vcn + vcn_len >= cend)) {
858*4882a593Smuzhiyun if (rl_write_locked) {
859*4882a593Smuzhiyun up_write(&ni->runlist.lock);
860*4882a593Smuzhiyun rl_write_locked = false;
861*4882a593Smuzhiyun } else
862*4882a593Smuzhiyun up_read(&ni->runlist.lock);
863*4882a593Smuzhiyun rl = NULL;
864*4882a593Smuzhiyun }
865*4882a593Smuzhiyun goto map_buffer_cached;
866*4882a593Smuzhiyun }
867*4882a593Smuzhiyun } else
868*4882a593Smuzhiyun lcn = LCN_RL_NOT_MAPPED;
869*4882a593Smuzhiyun /*
870*4882a593Smuzhiyun * If it is not a hole and not out of bounds, the runlist is
871*4882a593Smuzhiyun * probably unmapped so try to map it now.
872*4882a593Smuzhiyun */
873*4882a593Smuzhiyun if (unlikely(lcn != LCN_HOLE && lcn != LCN_ENOENT)) {
874*4882a593Smuzhiyun if (likely(!is_retry && lcn == LCN_RL_NOT_MAPPED)) {
875*4882a593Smuzhiyun /* Attempt to map runlist. */
876*4882a593Smuzhiyun if (!rl_write_locked) {
877*4882a593Smuzhiyun /*
878*4882a593Smuzhiyun * We need the runlist locked for
879*4882a593Smuzhiyun * writing, so if it is locked for
880*4882a593Smuzhiyun * reading relock it now and retry in
881*4882a593Smuzhiyun * case it changed whilst we dropped
882*4882a593Smuzhiyun * the lock.
883*4882a593Smuzhiyun */
884*4882a593Smuzhiyun up_read(&ni->runlist.lock);
885*4882a593Smuzhiyun down_write(&ni->runlist.lock);
886*4882a593Smuzhiyun rl_write_locked = true;
887*4882a593Smuzhiyun goto retry_remap;
888*4882a593Smuzhiyun }
889*4882a593Smuzhiyun err = ntfs_map_runlist_nolock(ni, bh_cpos,
890*4882a593Smuzhiyun NULL);
891*4882a593Smuzhiyun if (likely(!err)) {
892*4882a593Smuzhiyun is_retry = true;
893*4882a593Smuzhiyun goto retry_remap;
894*4882a593Smuzhiyun }
895*4882a593Smuzhiyun /*
896*4882a593Smuzhiyun * If @vcn is out of bounds, pretend @lcn is
897*4882a593Smuzhiyun * LCN_ENOENT. As long as the buffer is out
898*4882a593Smuzhiyun * of bounds this will work fine.
899*4882a593Smuzhiyun */
900*4882a593Smuzhiyun if (err == -ENOENT) {
901*4882a593Smuzhiyun lcn = LCN_ENOENT;
902*4882a593Smuzhiyun err = 0;
903*4882a593Smuzhiyun goto rl_not_mapped_enoent;
904*4882a593Smuzhiyun }
905*4882a593Smuzhiyun } else
906*4882a593Smuzhiyun err = -EIO;
907*4882a593Smuzhiyun /* Failed to map the buffer, even after retrying. */
908*4882a593Smuzhiyun bh->b_blocknr = -1;
909*4882a593Smuzhiyun ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
910*4882a593Smuzhiyun "attribute type 0x%x, vcn 0x%llx, "
911*4882a593Smuzhiyun "vcn offset 0x%x, because its "
912*4882a593Smuzhiyun "location on disk could not be "
913*4882a593Smuzhiyun "determined%s (error code %i).",
914*4882a593Smuzhiyun ni->mft_no, ni->type,
915*4882a593Smuzhiyun (unsigned long long)bh_cpos,
916*4882a593Smuzhiyun (unsigned)bh_pos &
917*4882a593Smuzhiyun vol->cluster_size_mask,
918*4882a593Smuzhiyun is_retry ? " even after retrying" : "",
919*4882a593Smuzhiyun err);
920*4882a593Smuzhiyun break;
921*4882a593Smuzhiyun }
922*4882a593Smuzhiyun rl_not_mapped_enoent:
923*4882a593Smuzhiyun /*
924*4882a593Smuzhiyun * The buffer is in a hole or out of bounds. We need to fill
925*4882a593Smuzhiyun * the hole, unless the buffer is in a cluster which is not
926*4882a593Smuzhiyun * touched by the write, in which case we just leave the buffer
927*4882a593Smuzhiyun * unmapped. This can only happen when the cluster size is
928*4882a593Smuzhiyun * less than the page cache size.
929*4882a593Smuzhiyun */
930*4882a593Smuzhiyun if (unlikely(vol->cluster_size < PAGE_SIZE)) {
931*4882a593Smuzhiyun bh_cend = (bh_end + vol->cluster_size - 1) >>
932*4882a593Smuzhiyun vol->cluster_size_bits;
933*4882a593Smuzhiyun if ((bh_cend <= cpos || bh_cpos >= cend)) {
934*4882a593Smuzhiyun bh->b_blocknr = -1;
935*4882a593Smuzhiyun /*
936*4882a593Smuzhiyun * If the buffer is uptodate we skip it. If it
937*4882a593Smuzhiyun * is not but the page is uptodate, we can set
938*4882a593Smuzhiyun * the buffer uptodate. If the page is not
939*4882a593Smuzhiyun * uptodate, we can clear the buffer and set it
940*4882a593Smuzhiyun * uptodate. Whether this is worthwhile is
941*4882a593Smuzhiyun * debatable and this could be removed.
942*4882a593Smuzhiyun */
943*4882a593Smuzhiyun if (PageUptodate(page)) {
944*4882a593Smuzhiyun if (!buffer_uptodate(bh))
945*4882a593Smuzhiyun set_buffer_uptodate(bh);
946*4882a593Smuzhiyun } else if (!buffer_uptodate(bh)) {
947*4882a593Smuzhiyun zero_user(page, bh_offset(bh),
948*4882a593Smuzhiyun blocksize);
949*4882a593Smuzhiyun set_buffer_uptodate(bh);
950*4882a593Smuzhiyun }
951*4882a593Smuzhiyun continue;
952*4882a593Smuzhiyun }
953*4882a593Smuzhiyun }
954*4882a593Smuzhiyun /*
955*4882a593Smuzhiyun * Out of bounds buffer is invalid if it was not really out of
956*4882a593Smuzhiyun * bounds.
957*4882a593Smuzhiyun */
958*4882a593Smuzhiyun BUG_ON(lcn != LCN_HOLE);
959*4882a593Smuzhiyun /*
960*4882a593Smuzhiyun * We need the runlist locked for writing, so if it is locked
961*4882a593Smuzhiyun * for reading relock it now and retry in case it changed
962*4882a593Smuzhiyun * whilst we dropped the lock.
963*4882a593Smuzhiyun */
964*4882a593Smuzhiyun BUG_ON(!rl);
965*4882a593Smuzhiyun if (!rl_write_locked) {
966*4882a593Smuzhiyun up_read(&ni->runlist.lock);
967*4882a593Smuzhiyun down_write(&ni->runlist.lock);
968*4882a593Smuzhiyun rl_write_locked = true;
969*4882a593Smuzhiyun goto retry_remap;
970*4882a593Smuzhiyun }
971*4882a593Smuzhiyun /* Find the previous last allocated cluster. */
972*4882a593Smuzhiyun BUG_ON(rl->lcn != LCN_HOLE);
973*4882a593Smuzhiyun lcn = -1;
974*4882a593Smuzhiyun rl2 = rl;
975*4882a593Smuzhiyun while (--rl2 >= ni->runlist.rl) {
976*4882a593Smuzhiyun if (rl2->lcn >= 0) {
977*4882a593Smuzhiyun lcn = rl2->lcn + rl2->length;
978*4882a593Smuzhiyun break;
979*4882a593Smuzhiyun }
980*4882a593Smuzhiyun }
981*4882a593Smuzhiyun rl2 = ntfs_cluster_alloc(vol, bh_cpos, 1, lcn, DATA_ZONE,
982*4882a593Smuzhiyun false);
983*4882a593Smuzhiyun if (IS_ERR(rl2)) {
984*4882a593Smuzhiyun err = PTR_ERR(rl2);
985*4882a593Smuzhiyun ntfs_debug("Failed to allocate cluster, error code %i.",
986*4882a593Smuzhiyun err);
987*4882a593Smuzhiyun break;
988*4882a593Smuzhiyun }
989*4882a593Smuzhiyun lcn = rl2->lcn;
990*4882a593Smuzhiyun rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
991*4882a593Smuzhiyun if (IS_ERR(rl)) {
992*4882a593Smuzhiyun err = PTR_ERR(rl);
993*4882a593Smuzhiyun if (err != -ENOMEM)
994*4882a593Smuzhiyun err = -EIO;
995*4882a593Smuzhiyun if (ntfs_cluster_free_from_rl(vol, rl2)) {
996*4882a593Smuzhiyun ntfs_error(vol->sb, "Failed to release "
997*4882a593Smuzhiyun "allocated cluster in error "
998*4882a593Smuzhiyun "code path. Run chkdsk to "
999*4882a593Smuzhiyun "recover the lost cluster.");
1000*4882a593Smuzhiyun NVolSetErrors(vol);
1001*4882a593Smuzhiyun }
1002*4882a593Smuzhiyun ntfs_free(rl2);
1003*4882a593Smuzhiyun break;
1004*4882a593Smuzhiyun }
1005*4882a593Smuzhiyun ni->runlist.rl = rl;
1006*4882a593Smuzhiyun status.runlist_merged = 1;
1007*4882a593Smuzhiyun ntfs_debug("Allocated cluster, lcn 0x%llx.",
1008*4882a593Smuzhiyun (unsigned long long)lcn);
1009*4882a593Smuzhiyun /* Map and lock the mft record and get the attribute record. */
1010*4882a593Smuzhiyun if (!NInoAttr(ni))
1011*4882a593Smuzhiyun base_ni = ni;
1012*4882a593Smuzhiyun else
1013*4882a593Smuzhiyun base_ni = ni->ext.base_ntfs_ino;
1014*4882a593Smuzhiyun m = map_mft_record(base_ni);
1015*4882a593Smuzhiyun if (IS_ERR(m)) {
1016*4882a593Smuzhiyun err = PTR_ERR(m);
1017*4882a593Smuzhiyun break;
1018*4882a593Smuzhiyun }
1019*4882a593Smuzhiyun ctx = ntfs_attr_get_search_ctx(base_ni, m);
1020*4882a593Smuzhiyun if (unlikely(!ctx)) {
1021*4882a593Smuzhiyun err = -ENOMEM;
1022*4882a593Smuzhiyun unmap_mft_record(base_ni);
1023*4882a593Smuzhiyun break;
1024*4882a593Smuzhiyun }
1025*4882a593Smuzhiyun status.mft_attr_mapped = 1;
1026*4882a593Smuzhiyun err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1027*4882a593Smuzhiyun CASE_SENSITIVE, bh_cpos, NULL, 0, ctx);
1028*4882a593Smuzhiyun if (unlikely(err)) {
1029*4882a593Smuzhiyun if (err == -ENOENT)
1030*4882a593Smuzhiyun err = -EIO;
1031*4882a593Smuzhiyun break;
1032*4882a593Smuzhiyun }
1033*4882a593Smuzhiyun m = ctx->mrec;
1034*4882a593Smuzhiyun a = ctx->attr;
1035*4882a593Smuzhiyun /*
1036*4882a593Smuzhiyun * Find the runlist element with which the attribute extent
1037*4882a593Smuzhiyun * starts. Note, we cannot use the _attr_ version because we
1038*4882a593Smuzhiyun * have mapped the mft record. That is ok because we know the
1039*4882a593Smuzhiyun * runlist fragment must be mapped already to have ever gotten
1040*4882a593Smuzhiyun * here, so we can just use the _rl_ version.
1041*4882a593Smuzhiyun */
1042*4882a593Smuzhiyun vcn = sle64_to_cpu(a->data.non_resident.lowest_vcn);
1043*4882a593Smuzhiyun rl2 = ntfs_rl_find_vcn_nolock(rl, vcn);
1044*4882a593Smuzhiyun BUG_ON(!rl2);
1045*4882a593Smuzhiyun BUG_ON(!rl2->length);
1046*4882a593Smuzhiyun BUG_ON(rl2->lcn < LCN_HOLE);
1047*4882a593Smuzhiyun highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
1048*4882a593Smuzhiyun /*
1049*4882a593Smuzhiyun * If @highest_vcn is zero, calculate the real highest_vcn
1050*4882a593Smuzhiyun * (which can really be zero).
1051*4882a593Smuzhiyun */
1052*4882a593Smuzhiyun if (!highest_vcn)
1053*4882a593Smuzhiyun highest_vcn = (sle64_to_cpu(
1054*4882a593Smuzhiyun a->data.non_resident.allocated_size) >>
1055*4882a593Smuzhiyun vol->cluster_size_bits) - 1;
1056*4882a593Smuzhiyun /*
1057*4882a593Smuzhiyun * Determine the size of the mapping pairs array for the new
1058*4882a593Smuzhiyun * extent, i.e. the old extent with the hole filled.
1059*4882a593Smuzhiyun */
1060*4882a593Smuzhiyun mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, vcn,
1061*4882a593Smuzhiyun highest_vcn);
1062*4882a593Smuzhiyun if (unlikely(mp_size <= 0)) {
1063*4882a593Smuzhiyun if (!(err = mp_size))
1064*4882a593Smuzhiyun err = -EIO;
1065*4882a593Smuzhiyun ntfs_debug("Failed to get size for mapping pairs "
1066*4882a593Smuzhiyun "array, error code %i.", err);
1067*4882a593Smuzhiyun break;
1068*4882a593Smuzhiyun }
1069*4882a593Smuzhiyun /*
1070*4882a593Smuzhiyun * Resize the attribute record to fit the new mapping pairs
1071*4882a593Smuzhiyun * array.
1072*4882a593Smuzhiyun */
1073*4882a593Smuzhiyun attr_rec_len = le32_to_cpu(a->length);
1074*4882a593Smuzhiyun err = ntfs_attr_record_resize(m, a, mp_size + le16_to_cpu(
1075*4882a593Smuzhiyun a->data.non_resident.mapping_pairs_offset));
1076*4882a593Smuzhiyun if (unlikely(err)) {
1077*4882a593Smuzhiyun BUG_ON(err != -ENOSPC);
1078*4882a593Smuzhiyun // TODO: Deal with this by using the current attribute
1079*4882a593Smuzhiyun // and fill it with as much of the mapping pairs
1080*4882a593Smuzhiyun // array as possible. Then loop over each attribute
1081*4882a593Smuzhiyun // extent rewriting the mapping pairs arrays as we go
1082*4882a593Smuzhiyun // along and if when we reach the end we have not
1083*4882a593Smuzhiyun // enough space, try to resize the last attribute
1084*4882a593Smuzhiyun // extent and if even that fails, add a new attribute
1085*4882a593Smuzhiyun // extent.
1086*4882a593Smuzhiyun // We could also try to resize at each step in the hope
1087*4882a593Smuzhiyun // that we will not need to rewrite every single extent.
1088*4882a593Smuzhiyun // Note, we may need to decompress some extents to fill
1089*4882a593Smuzhiyun // the runlist as we are walking the extents...
1090*4882a593Smuzhiyun ntfs_error(vol->sb, "Not enough space in the mft "
1091*4882a593Smuzhiyun "record for the extended attribute "
1092*4882a593Smuzhiyun "record. This case is not "
1093*4882a593Smuzhiyun "implemented yet.");
1094*4882a593Smuzhiyun err = -EOPNOTSUPP;
1095*4882a593Smuzhiyun break ;
1096*4882a593Smuzhiyun }
1097*4882a593Smuzhiyun status.mp_rebuilt = 1;
1098*4882a593Smuzhiyun /*
1099*4882a593Smuzhiyun * Generate the mapping pairs array directly into the attribute
1100*4882a593Smuzhiyun * record.
1101*4882a593Smuzhiyun */
1102*4882a593Smuzhiyun err = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
1103*4882a593Smuzhiyun a->data.non_resident.mapping_pairs_offset),
1104*4882a593Smuzhiyun mp_size, rl2, vcn, highest_vcn, NULL);
1105*4882a593Smuzhiyun if (unlikely(err)) {
1106*4882a593Smuzhiyun ntfs_error(vol->sb, "Cannot fill hole in inode 0x%lx, "
1107*4882a593Smuzhiyun "attribute type 0x%x, because building "
1108*4882a593Smuzhiyun "the mapping pairs failed with error "
1109*4882a593Smuzhiyun "code %i.", vi->i_ino,
1110*4882a593Smuzhiyun (unsigned)le32_to_cpu(ni->type), err);
1111*4882a593Smuzhiyun err = -EIO;
1112*4882a593Smuzhiyun break;
1113*4882a593Smuzhiyun }
1114*4882a593Smuzhiyun /* Update the highest_vcn but only if it was not set. */
1115*4882a593Smuzhiyun if (unlikely(!a->data.non_resident.highest_vcn))
1116*4882a593Smuzhiyun a->data.non_resident.highest_vcn =
1117*4882a593Smuzhiyun cpu_to_sle64(highest_vcn);
1118*4882a593Smuzhiyun /*
1119*4882a593Smuzhiyun * If the attribute is sparse/compressed, update the compressed
1120*4882a593Smuzhiyun * size in the ntfs_inode structure and the attribute record.
1121*4882a593Smuzhiyun */
1122*4882a593Smuzhiyun if (likely(NInoSparse(ni) || NInoCompressed(ni))) {
1123*4882a593Smuzhiyun /*
1124*4882a593Smuzhiyun * If we are not in the first attribute extent, switch
1125*4882a593Smuzhiyun * to it, but first ensure the changes will make it to
1126*4882a593Smuzhiyun * disk later.
1127*4882a593Smuzhiyun */
1128*4882a593Smuzhiyun if (a->data.non_resident.lowest_vcn) {
1129*4882a593Smuzhiyun flush_dcache_mft_record_page(ctx->ntfs_ino);
1130*4882a593Smuzhiyun mark_mft_record_dirty(ctx->ntfs_ino);
1131*4882a593Smuzhiyun ntfs_attr_reinit_search_ctx(ctx);
1132*4882a593Smuzhiyun err = ntfs_attr_lookup(ni->type, ni->name,
1133*4882a593Smuzhiyun ni->name_len, CASE_SENSITIVE,
1134*4882a593Smuzhiyun 0, NULL, 0, ctx);
1135*4882a593Smuzhiyun if (unlikely(err)) {
1136*4882a593Smuzhiyun status.attr_switched = 1;
1137*4882a593Smuzhiyun break;
1138*4882a593Smuzhiyun }
1139*4882a593Smuzhiyun /* @m is not used any more so do not set it. */
1140*4882a593Smuzhiyun a = ctx->attr;
1141*4882a593Smuzhiyun }
1142*4882a593Smuzhiyun write_lock_irqsave(&ni->size_lock, flags);
1143*4882a593Smuzhiyun ni->itype.compressed.size += vol->cluster_size;
1144*4882a593Smuzhiyun a->data.non_resident.compressed_size =
1145*4882a593Smuzhiyun cpu_to_sle64(ni->itype.compressed.size);
1146*4882a593Smuzhiyun write_unlock_irqrestore(&ni->size_lock, flags);
1147*4882a593Smuzhiyun }
1148*4882a593Smuzhiyun /* Ensure the changes make it to disk. */
1149*4882a593Smuzhiyun flush_dcache_mft_record_page(ctx->ntfs_ino);
1150*4882a593Smuzhiyun mark_mft_record_dirty(ctx->ntfs_ino);
1151*4882a593Smuzhiyun ntfs_attr_put_search_ctx(ctx);
1152*4882a593Smuzhiyun unmap_mft_record(base_ni);
1153*4882a593Smuzhiyun /* Successfully filled the hole. */
1154*4882a593Smuzhiyun status.runlist_merged = 0;
1155*4882a593Smuzhiyun status.mft_attr_mapped = 0;
1156*4882a593Smuzhiyun status.mp_rebuilt = 0;
1157*4882a593Smuzhiyun /* Setup the map cache and use that to deal with the buffer. */
1158*4882a593Smuzhiyun was_hole = true;
1159*4882a593Smuzhiyun vcn = bh_cpos;
1160*4882a593Smuzhiyun vcn_len = 1;
1161*4882a593Smuzhiyun lcn_block = lcn << (vol->cluster_size_bits - blocksize_bits);
1162*4882a593Smuzhiyun cdelta = 0;
1163*4882a593Smuzhiyun /*
1164*4882a593Smuzhiyun * If the number of remaining clusters in the @pages is smaller
1165*4882a593Smuzhiyun * or equal to the number of cached clusters, unlock the
1166*4882a593Smuzhiyun * runlist as the map cache will be used from now on.
1167*4882a593Smuzhiyun */
1168*4882a593Smuzhiyun if (likely(vcn + vcn_len >= cend)) {
1169*4882a593Smuzhiyun up_write(&ni->runlist.lock);
1170*4882a593Smuzhiyun rl_write_locked = false;
1171*4882a593Smuzhiyun rl = NULL;
1172*4882a593Smuzhiyun }
1173*4882a593Smuzhiyun goto map_buffer_cached;
1174*4882a593Smuzhiyun } while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
1175*4882a593Smuzhiyun /* If there are no errors, do the next page. */
1176*4882a593Smuzhiyun if (likely(!err && ++u < nr_pages))
1177*4882a593Smuzhiyun goto do_next_page;
1178*4882a593Smuzhiyun /* If there are no errors, release the runlist lock if we took it. */
1179*4882a593Smuzhiyun if (likely(!err)) {
1180*4882a593Smuzhiyun if (unlikely(rl_write_locked)) {
1181*4882a593Smuzhiyun up_write(&ni->runlist.lock);
1182*4882a593Smuzhiyun rl_write_locked = false;
1183*4882a593Smuzhiyun } else if (unlikely(rl))
1184*4882a593Smuzhiyun up_read(&ni->runlist.lock);
1185*4882a593Smuzhiyun rl = NULL;
1186*4882a593Smuzhiyun }
1187*4882a593Smuzhiyun /* If we issued read requests, let them complete. */
1188*4882a593Smuzhiyun read_lock_irqsave(&ni->size_lock, flags);
1189*4882a593Smuzhiyun initialized_size = ni->initialized_size;
1190*4882a593Smuzhiyun read_unlock_irqrestore(&ni->size_lock, flags);
1191*4882a593Smuzhiyun while (wait_bh > wait) {
1192*4882a593Smuzhiyun bh = *--wait_bh;
1193*4882a593Smuzhiyun wait_on_buffer(bh);
1194*4882a593Smuzhiyun if (likely(buffer_uptodate(bh))) {
1195*4882a593Smuzhiyun page = bh->b_page;
1196*4882a593Smuzhiyun bh_pos = ((s64)page->index << PAGE_SHIFT) +
1197*4882a593Smuzhiyun bh_offset(bh);
1198*4882a593Smuzhiyun /*
1199*4882a593Smuzhiyun * If the buffer overflows the initialized size, need
1200*4882a593Smuzhiyun * to zero the overflowing region.
1201*4882a593Smuzhiyun */
1202*4882a593Smuzhiyun if (unlikely(bh_pos + blocksize > initialized_size)) {
1203*4882a593Smuzhiyun int ofs = 0;
1204*4882a593Smuzhiyun
1205*4882a593Smuzhiyun if (likely(bh_pos < initialized_size))
1206*4882a593Smuzhiyun ofs = initialized_size - bh_pos;
1207*4882a593Smuzhiyun zero_user_segment(page, bh_offset(bh) + ofs,
1208*4882a593Smuzhiyun blocksize);
1209*4882a593Smuzhiyun }
1210*4882a593Smuzhiyun } else /* if (unlikely(!buffer_uptodate(bh))) */
1211*4882a593Smuzhiyun err = -EIO;
1212*4882a593Smuzhiyun }
1213*4882a593Smuzhiyun if (likely(!err)) {
1214*4882a593Smuzhiyun /* Clear buffer_new on all buffers. */
1215*4882a593Smuzhiyun u = 0;
1216*4882a593Smuzhiyun do {
1217*4882a593Smuzhiyun bh = head = page_buffers(pages[u]);
1218*4882a593Smuzhiyun do {
1219*4882a593Smuzhiyun if (buffer_new(bh))
1220*4882a593Smuzhiyun clear_buffer_new(bh);
1221*4882a593Smuzhiyun } while ((bh = bh->b_this_page) != head);
1222*4882a593Smuzhiyun } while (++u < nr_pages);
1223*4882a593Smuzhiyun ntfs_debug("Done.");
1224*4882a593Smuzhiyun return err;
1225*4882a593Smuzhiyun }
1226*4882a593Smuzhiyun if (status.attr_switched) {
1227*4882a593Smuzhiyun /* Get back to the attribute extent we modified. */
1228*4882a593Smuzhiyun ntfs_attr_reinit_search_ctx(ctx);
1229*4882a593Smuzhiyun if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1230*4882a593Smuzhiyun CASE_SENSITIVE, bh_cpos, NULL, 0, ctx)) {
1231*4882a593Smuzhiyun ntfs_error(vol->sb, "Failed to find required "
1232*4882a593Smuzhiyun "attribute extent of attribute in "
1233*4882a593Smuzhiyun "error code path. Run chkdsk to "
1234*4882a593Smuzhiyun "recover.");
1235*4882a593Smuzhiyun write_lock_irqsave(&ni->size_lock, flags);
1236*4882a593Smuzhiyun ni->itype.compressed.size += vol->cluster_size;
1237*4882a593Smuzhiyun write_unlock_irqrestore(&ni->size_lock, flags);
1238*4882a593Smuzhiyun flush_dcache_mft_record_page(ctx->ntfs_ino);
1239*4882a593Smuzhiyun mark_mft_record_dirty(ctx->ntfs_ino);
1240*4882a593Smuzhiyun /*
1241*4882a593Smuzhiyun * The only thing that is now wrong is the compressed
1242*4882a593Smuzhiyun * size of the base attribute extent which chkdsk
1243*4882a593Smuzhiyun * should be able to fix.
1244*4882a593Smuzhiyun */
1245*4882a593Smuzhiyun NVolSetErrors(vol);
1246*4882a593Smuzhiyun } else {
1247*4882a593Smuzhiyun m = ctx->mrec;
1248*4882a593Smuzhiyun a = ctx->attr;
1249*4882a593Smuzhiyun status.attr_switched = 0;
1250*4882a593Smuzhiyun }
1251*4882a593Smuzhiyun }
1252*4882a593Smuzhiyun /*
1253*4882a593Smuzhiyun * If the runlist has been modified, need to restore it by punching a
1254*4882a593Smuzhiyun * hole into it and we then need to deallocate the on-disk cluster as
1255*4882a593Smuzhiyun * well. Note, we only modify the runlist if we are able to generate a
1256*4882a593Smuzhiyun * new mapping pairs array, i.e. only when the mapped attribute extent
1257*4882a593Smuzhiyun * is not switched.
1258*4882a593Smuzhiyun */
1259*4882a593Smuzhiyun if (status.runlist_merged && !status.attr_switched) {
1260*4882a593Smuzhiyun BUG_ON(!rl_write_locked);
1261*4882a593Smuzhiyun /* Make the file cluster we allocated sparse in the runlist. */
1262*4882a593Smuzhiyun if (ntfs_rl_punch_nolock(vol, &ni->runlist, bh_cpos, 1)) {
1263*4882a593Smuzhiyun ntfs_error(vol->sb, "Failed to punch hole into "
1264*4882a593Smuzhiyun "attribute runlist in error code "
1265*4882a593Smuzhiyun "path. Run chkdsk to recover the "
1266*4882a593Smuzhiyun "lost cluster.");
1267*4882a593Smuzhiyun NVolSetErrors(vol);
1268*4882a593Smuzhiyun } else /* if (success) */ {
1269*4882a593Smuzhiyun status.runlist_merged = 0;
1270*4882a593Smuzhiyun /*
1271*4882a593Smuzhiyun * Deallocate the on-disk cluster we allocated but only
1272*4882a593Smuzhiyun * if we succeeded in punching its vcn out of the
1273*4882a593Smuzhiyun * runlist.
1274*4882a593Smuzhiyun */
1275*4882a593Smuzhiyun down_write(&vol->lcnbmp_lock);
1276*4882a593Smuzhiyun if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) {
1277*4882a593Smuzhiyun ntfs_error(vol->sb, "Failed to release "
1278*4882a593Smuzhiyun "allocated cluster in error "
1279*4882a593Smuzhiyun "code path. Run chkdsk to "
1280*4882a593Smuzhiyun "recover the lost cluster.");
1281*4882a593Smuzhiyun NVolSetErrors(vol);
1282*4882a593Smuzhiyun }
1283*4882a593Smuzhiyun up_write(&vol->lcnbmp_lock);
1284*4882a593Smuzhiyun }
1285*4882a593Smuzhiyun }
1286*4882a593Smuzhiyun /*
1287*4882a593Smuzhiyun * Resize the attribute record to its old size and rebuild the mapping
1288*4882a593Smuzhiyun * pairs array. Note, we only can do this if the runlist has been
1289*4882a593Smuzhiyun * restored to its old state which also implies that the mapped
1290*4882a593Smuzhiyun * attribute extent is not switched.
1291*4882a593Smuzhiyun */
1292*4882a593Smuzhiyun if (status.mp_rebuilt && !status.runlist_merged) {
1293*4882a593Smuzhiyun if (ntfs_attr_record_resize(m, a, attr_rec_len)) {
1294*4882a593Smuzhiyun ntfs_error(vol->sb, "Failed to restore attribute "
1295*4882a593Smuzhiyun "record in error code path. Run "
1296*4882a593Smuzhiyun "chkdsk to recover.");
1297*4882a593Smuzhiyun NVolSetErrors(vol);
1298*4882a593Smuzhiyun } else /* if (success) */ {
1299*4882a593Smuzhiyun if (ntfs_mapping_pairs_build(vol, (u8*)a +
1300*4882a593Smuzhiyun le16_to_cpu(a->data.non_resident.
1301*4882a593Smuzhiyun mapping_pairs_offset), attr_rec_len -
1302*4882a593Smuzhiyun le16_to_cpu(a->data.non_resident.
1303*4882a593Smuzhiyun mapping_pairs_offset), ni->runlist.rl,
1304*4882a593Smuzhiyun vcn, highest_vcn, NULL)) {
1305*4882a593Smuzhiyun ntfs_error(vol->sb, "Failed to restore "
1306*4882a593Smuzhiyun "mapping pairs array in error "
1307*4882a593Smuzhiyun "code path. Run chkdsk to "
1308*4882a593Smuzhiyun "recover.");
1309*4882a593Smuzhiyun NVolSetErrors(vol);
1310*4882a593Smuzhiyun }
1311*4882a593Smuzhiyun flush_dcache_mft_record_page(ctx->ntfs_ino);
1312*4882a593Smuzhiyun mark_mft_record_dirty(ctx->ntfs_ino);
1313*4882a593Smuzhiyun }
1314*4882a593Smuzhiyun }
1315*4882a593Smuzhiyun /* Release the mft record and the attribute. */
1316*4882a593Smuzhiyun if (status.mft_attr_mapped) {
1317*4882a593Smuzhiyun ntfs_attr_put_search_ctx(ctx);
1318*4882a593Smuzhiyun unmap_mft_record(base_ni);
1319*4882a593Smuzhiyun }
1320*4882a593Smuzhiyun /* Release the runlist lock. */
1321*4882a593Smuzhiyun if (rl_write_locked)
1322*4882a593Smuzhiyun up_write(&ni->runlist.lock);
1323*4882a593Smuzhiyun else if (rl)
1324*4882a593Smuzhiyun up_read(&ni->runlist.lock);
1325*4882a593Smuzhiyun /*
1326*4882a593Smuzhiyun * Zero out any newly allocated blocks to avoid exposing stale data.
1327*4882a593Smuzhiyun * If BH_New is set, we know that the block was newly allocated above
1328*4882a593Smuzhiyun * and that it has not been fully zeroed and marked dirty yet.
1329*4882a593Smuzhiyun */
1330*4882a593Smuzhiyun nr_pages = u;
1331*4882a593Smuzhiyun u = 0;
1332*4882a593Smuzhiyun end = bh_cpos << vol->cluster_size_bits;
1333*4882a593Smuzhiyun do {
1334*4882a593Smuzhiyun page = pages[u];
1335*4882a593Smuzhiyun bh = head = page_buffers(page);
1336*4882a593Smuzhiyun do {
1337*4882a593Smuzhiyun if (u == nr_pages &&
1338*4882a593Smuzhiyun ((s64)page->index << PAGE_SHIFT) +
1339*4882a593Smuzhiyun bh_offset(bh) >= end)
1340*4882a593Smuzhiyun break;
1341*4882a593Smuzhiyun if (!buffer_new(bh))
1342*4882a593Smuzhiyun continue;
1343*4882a593Smuzhiyun clear_buffer_new(bh);
1344*4882a593Smuzhiyun if (!buffer_uptodate(bh)) {
1345*4882a593Smuzhiyun if (PageUptodate(page))
1346*4882a593Smuzhiyun set_buffer_uptodate(bh);
1347*4882a593Smuzhiyun else {
1348*4882a593Smuzhiyun zero_user(page, bh_offset(bh),
1349*4882a593Smuzhiyun blocksize);
1350*4882a593Smuzhiyun set_buffer_uptodate(bh);
1351*4882a593Smuzhiyun }
1352*4882a593Smuzhiyun }
1353*4882a593Smuzhiyun mark_buffer_dirty(bh);
1354*4882a593Smuzhiyun } while ((bh = bh->b_this_page) != head);
1355*4882a593Smuzhiyun } while (++u <= nr_pages);
1356*4882a593Smuzhiyun ntfs_error(vol->sb, "Failed. Returning error code %i.", err);
1357*4882a593Smuzhiyun return err;
1358*4882a593Smuzhiyun }
1359*4882a593Smuzhiyun
ntfs_flush_dcache_pages(struct page ** pages,unsigned nr_pages)1360*4882a593Smuzhiyun static inline void ntfs_flush_dcache_pages(struct page **pages,
1361*4882a593Smuzhiyun unsigned nr_pages)
1362*4882a593Smuzhiyun {
1363*4882a593Smuzhiyun BUG_ON(!nr_pages);
1364*4882a593Smuzhiyun /*
1365*4882a593Smuzhiyun * Warning: Do not do the decrement at the same time as the call to
1366*4882a593Smuzhiyun * flush_dcache_page() because it is a NULL macro on i386 and hence the
1367*4882a593Smuzhiyun * decrement never happens so the loop never terminates.
1368*4882a593Smuzhiyun */
1369*4882a593Smuzhiyun do {
1370*4882a593Smuzhiyun --nr_pages;
1371*4882a593Smuzhiyun flush_dcache_page(pages[nr_pages]);
1372*4882a593Smuzhiyun } while (nr_pages > 0);
1373*4882a593Smuzhiyun }
1374*4882a593Smuzhiyun
1375*4882a593Smuzhiyun /**
1376*4882a593Smuzhiyun * ntfs_commit_pages_after_non_resident_write - commit the received data
1377*4882a593Smuzhiyun * @pages: array of destination pages
1378*4882a593Smuzhiyun * @nr_pages: number of pages in @pages
1379*4882a593Smuzhiyun * @pos: byte position in file at which the write begins
1380*4882a593Smuzhiyun * @bytes: number of bytes to be written
1381*4882a593Smuzhiyun *
1382*4882a593Smuzhiyun * See description of ntfs_commit_pages_after_write(), below.
1383*4882a593Smuzhiyun */
ntfs_commit_pages_after_non_resident_write(struct page ** pages,const unsigned nr_pages,s64 pos,size_t bytes)1384*4882a593Smuzhiyun static inline int ntfs_commit_pages_after_non_resident_write(
1385*4882a593Smuzhiyun struct page **pages, const unsigned nr_pages,
1386*4882a593Smuzhiyun s64 pos, size_t bytes)
1387*4882a593Smuzhiyun {
1388*4882a593Smuzhiyun s64 end, initialized_size;
1389*4882a593Smuzhiyun struct inode *vi;
1390*4882a593Smuzhiyun ntfs_inode *ni, *base_ni;
1391*4882a593Smuzhiyun struct buffer_head *bh, *head;
1392*4882a593Smuzhiyun ntfs_attr_search_ctx *ctx;
1393*4882a593Smuzhiyun MFT_RECORD *m;
1394*4882a593Smuzhiyun ATTR_RECORD *a;
1395*4882a593Smuzhiyun unsigned long flags;
1396*4882a593Smuzhiyun unsigned blocksize, u;
1397*4882a593Smuzhiyun int err;
1398*4882a593Smuzhiyun
1399*4882a593Smuzhiyun vi = pages[0]->mapping->host;
1400*4882a593Smuzhiyun ni = NTFS_I(vi);
1401*4882a593Smuzhiyun blocksize = vi->i_sb->s_blocksize;
1402*4882a593Smuzhiyun end = pos + bytes;
1403*4882a593Smuzhiyun u = 0;
1404*4882a593Smuzhiyun do {
1405*4882a593Smuzhiyun s64 bh_pos;
1406*4882a593Smuzhiyun struct page *page;
1407*4882a593Smuzhiyun bool partial;
1408*4882a593Smuzhiyun
1409*4882a593Smuzhiyun page = pages[u];
1410*4882a593Smuzhiyun bh_pos = (s64)page->index << PAGE_SHIFT;
1411*4882a593Smuzhiyun bh = head = page_buffers(page);
1412*4882a593Smuzhiyun partial = false;
1413*4882a593Smuzhiyun do {
1414*4882a593Smuzhiyun s64 bh_end;
1415*4882a593Smuzhiyun
1416*4882a593Smuzhiyun bh_end = bh_pos + blocksize;
1417*4882a593Smuzhiyun if (bh_end <= pos || bh_pos >= end) {
1418*4882a593Smuzhiyun if (!buffer_uptodate(bh))
1419*4882a593Smuzhiyun partial = true;
1420*4882a593Smuzhiyun } else {
1421*4882a593Smuzhiyun set_buffer_uptodate(bh);
1422*4882a593Smuzhiyun mark_buffer_dirty(bh);
1423*4882a593Smuzhiyun }
1424*4882a593Smuzhiyun } while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
1425*4882a593Smuzhiyun /*
1426*4882a593Smuzhiyun * If all buffers are now uptodate but the page is not, set the
1427*4882a593Smuzhiyun * page uptodate.
1428*4882a593Smuzhiyun */
1429*4882a593Smuzhiyun if (!partial && !PageUptodate(page))
1430*4882a593Smuzhiyun SetPageUptodate(page);
1431*4882a593Smuzhiyun } while (++u < nr_pages);
1432*4882a593Smuzhiyun /*
1433*4882a593Smuzhiyun * Finally, if we do not need to update initialized_size or i_size we
1434*4882a593Smuzhiyun * are finished.
1435*4882a593Smuzhiyun */
1436*4882a593Smuzhiyun read_lock_irqsave(&ni->size_lock, flags);
1437*4882a593Smuzhiyun initialized_size = ni->initialized_size;
1438*4882a593Smuzhiyun read_unlock_irqrestore(&ni->size_lock, flags);
1439*4882a593Smuzhiyun if (end <= initialized_size) {
1440*4882a593Smuzhiyun ntfs_debug("Done.");
1441*4882a593Smuzhiyun return 0;
1442*4882a593Smuzhiyun }
1443*4882a593Smuzhiyun /*
1444*4882a593Smuzhiyun * Update initialized_size/i_size as appropriate, both in the inode and
1445*4882a593Smuzhiyun * the mft record.
1446*4882a593Smuzhiyun */
1447*4882a593Smuzhiyun if (!NInoAttr(ni))
1448*4882a593Smuzhiyun base_ni = ni;
1449*4882a593Smuzhiyun else
1450*4882a593Smuzhiyun base_ni = ni->ext.base_ntfs_ino;
1451*4882a593Smuzhiyun /* Map, pin, and lock the mft record. */
1452*4882a593Smuzhiyun m = map_mft_record(base_ni);
1453*4882a593Smuzhiyun if (IS_ERR(m)) {
1454*4882a593Smuzhiyun err = PTR_ERR(m);
1455*4882a593Smuzhiyun m = NULL;
1456*4882a593Smuzhiyun ctx = NULL;
1457*4882a593Smuzhiyun goto err_out;
1458*4882a593Smuzhiyun }
1459*4882a593Smuzhiyun BUG_ON(!NInoNonResident(ni));
1460*4882a593Smuzhiyun ctx = ntfs_attr_get_search_ctx(base_ni, m);
1461*4882a593Smuzhiyun if (unlikely(!ctx)) {
1462*4882a593Smuzhiyun err = -ENOMEM;
1463*4882a593Smuzhiyun goto err_out;
1464*4882a593Smuzhiyun }
1465*4882a593Smuzhiyun err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1466*4882a593Smuzhiyun CASE_SENSITIVE, 0, NULL, 0, ctx);
1467*4882a593Smuzhiyun if (unlikely(err)) {
1468*4882a593Smuzhiyun if (err == -ENOENT)
1469*4882a593Smuzhiyun err = -EIO;
1470*4882a593Smuzhiyun goto err_out;
1471*4882a593Smuzhiyun }
1472*4882a593Smuzhiyun a = ctx->attr;
1473*4882a593Smuzhiyun BUG_ON(!a->non_resident);
1474*4882a593Smuzhiyun write_lock_irqsave(&ni->size_lock, flags);
1475*4882a593Smuzhiyun BUG_ON(end > ni->allocated_size);
1476*4882a593Smuzhiyun ni->initialized_size = end;
1477*4882a593Smuzhiyun a->data.non_resident.initialized_size = cpu_to_sle64(end);
1478*4882a593Smuzhiyun if (end > i_size_read(vi)) {
1479*4882a593Smuzhiyun i_size_write(vi, end);
1480*4882a593Smuzhiyun a->data.non_resident.data_size =
1481*4882a593Smuzhiyun a->data.non_resident.initialized_size;
1482*4882a593Smuzhiyun }
1483*4882a593Smuzhiyun write_unlock_irqrestore(&ni->size_lock, flags);
1484*4882a593Smuzhiyun /* Mark the mft record dirty, so it gets written back. */
1485*4882a593Smuzhiyun flush_dcache_mft_record_page(ctx->ntfs_ino);
1486*4882a593Smuzhiyun mark_mft_record_dirty(ctx->ntfs_ino);
1487*4882a593Smuzhiyun ntfs_attr_put_search_ctx(ctx);
1488*4882a593Smuzhiyun unmap_mft_record(base_ni);
1489*4882a593Smuzhiyun ntfs_debug("Done.");
1490*4882a593Smuzhiyun return 0;
1491*4882a593Smuzhiyun err_out:
1492*4882a593Smuzhiyun if (ctx)
1493*4882a593Smuzhiyun ntfs_attr_put_search_ctx(ctx);
1494*4882a593Smuzhiyun if (m)
1495*4882a593Smuzhiyun unmap_mft_record(base_ni);
1496*4882a593Smuzhiyun ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error "
1497*4882a593Smuzhiyun "code %i).", err);
1498*4882a593Smuzhiyun if (err != -ENOMEM)
1499*4882a593Smuzhiyun NVolSetErrors(ni->vol);
1500*4882a593Smuzhiyun return err;
1501*4882a593Smuzhiyun }
1502*4882a593Smuzhiyun
1503*4882a593Smuzhiyun /**
1504*4882a593Smuzhiyun * ntfs_commit_pages_after_write - commit the received data
1505*4882a593Smuzhiyun * @pages: array of destination pages
1506*4882a593Smuzhiyun * @nr_pages: number of pages in @pages
1507*4882a593Smuzhiyun * @pos: byte position in file at which the write begins
1508*4882a593Smuzhiyun * @bytes: number of bytes to be written
1509*4882a593Smuzhiyun *
1510*4882a593Smuzhiyun * This is called from ntfs_file_buffered_write() with i_mutex held on the inode
1511*4882a593Smuzhiyun * (@pages[0]->mapping->host). There are @nr_pages pages in @pages which are
1512*4882a593Smuzhiyun * locked but not kmap()ped. The source data has already been copied into the
1513*4882a593Smuzhiyun * @page. ntfs_prepare_pages_for_non_resident_write() has been called before
1514*4882a593Smuzhiyun * the data was copied (for non-resident attributes only) and it returned
1515*4882a593Smuzhiyun * success.
1516*4882a593Smuzhiyun *
1517*4882a593Smuzhiyun * Need to set uptodate and mark dirty all buffers within the boundary of the
1518*4882a593Smuzhiyun * write. If all buffers in a page are uptodate we set the page uptodate, too.
1519*4882a593Smuzhiyun *
1520*4882a593Smuzhiyun * Setting the buffers dirty ensures that they get written out later when
1521*4882a593Smuzhiyun * ntfs_writepage() is invoked by the VM.
1522*4882a593Smuzhiyun *
1523*4882a593Smuzhiyun * Finally, we need to update i_size and initialized_size as appropriate both
1524*4882a593Smuzhiyun * in the inode and the mft record.
1525*4882a593Smuzhiyun *
1526*4882a593Smuzhiyun * This is modelled after fs/buffer.c::generic_commit_write(), which marks
1527*4882a593Smuzhiyun * buffers uptodate and dirty, sets the page uptodate if all buffers in the
1528*4882a593Smuzhiyun * page are uptodate, and updates i_size if the end of io is beyond i_size. In
1529*4882a593Smuzhiyun * that case, it also marks the inode dirty.
1530*4882a593Smuzhiyun *
1531*4882a593Smuzhiyun * If things have gone as outlined in
1532*4882a593Smuzhiyun * ntfs_prepare_pages_for_non_resident_write(), we do not need to do any page
1533*4882a593Smuzhiyun * content modifications here for non-resident attributes. For resident
1534*4882a593Smuzhiyun * attributes we need to do the uptodate bringing here which we combine with
1535*4882a593Smuzhiyun * the copying into the mft record which means we save one atomic kmap.
1536*4882a593Smuzhiyun *
1537*4882a593Smuzhiyun * Return 0 on success or -errno on error.
1538*4882a593Smuzhiyun */
ntfs_commit_pages_after_write(struct page ** pages,const unsigned nr_pages,s64 pos,size_t bytes)1539*4882a593Smuzhiyun static int ntfs_commit_pages_after_write(struct page **pages,
1540*4882a593Smuzhiyun const unsigned nr_pages, s64 pos, size_t bytes)
1541*4882a593Smuzhiyun {
1542*4882a593Smuzhiyun s64 end, initialized_size;
1543*4882a593Smuzhiyun loff_t i_size;
1544*4882a593Smuzhiyun struct inode *vi;
1545*4882a593Smuzhiyun ntfs_inode *ni, *base_ni;
1546*4882a593Smuzhiyun struct page *page;
1547*4882a593Smuzhiyun ntfs_attr_search_ctx *ctx;
1548*4882a593Smuzhiyun MFT_RECORD *m;
1549*4882a593Smuzhiyun ATTR_RECORD *a;
1550*4882a593Smuzhiyun char *kattr, *kaddr;
1551*4882a593Smuzhiyun unsigned long flags;
1552*4882a593Smuzhiyun u32 attr_len;
1553*4882a593Smuzhiyun int err;
1554*4882a593Smuzhiyun
1555*4882a593Smuzhiyun BUG_ON(!nr_pages);
1556*4882a593Smuzhiyun BUG_ON(!pages);
1557*4882a593Smuzhiyun page = pages[0];
1558*4882a593Smuzhiyun BUG_ON(!page);
1559*4882a593Smuzhiyun vi = page->mapping->host;
1560*4882a593Smuzhiyun ni = NTFS_I(vi);
1561*4882a593Smuzhiyun ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
1562*4882a593Smuzhiyun "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
1563*4882a593Smuzhiyun vi->i_ino, ni->type, page->index, nr_pages,
1564*4882a593Smuzhiyun (long long)pos, bytes);
1565*4882a593Smuzhiyun if (NInoNonResident(ni))
1566*4882a593Smuzhiyun return ntfs_commit_pages_after_non_resident_write(pages,
1567*4882a593Smuzhiyun nr_pages, pos, bytes);
1568*4882a593Smuzhiyun BUG_ON(nr_pages > 1);
1569*4882a593Smuzhiyun /*
1570*4882a593Smuzhiyun * Attribute is resident, implying it is not compressed, encrypted, or
1571*4882a593Smuzhiyun * sparse.
1572*4882a593Smuzhiyun */
1573*4882a593Smuzhiyun if (!NInoAttr(ni))
1574*4882a593Smuzhiyun base_ni = ni;
1575*4882a593Smuzhiyun else
1576*4882a593Smuzhiyun base_ni = ni->ext.base_ntfs_ino;
1577*4882a593Smuzhiyun BUG_ON(NInoNonResident(ni));
1578*4882a593Smuzhiyun /* Map, pin, and lock the mft record. */
1579*4882a593Smuzhiyun m = map_mft_record(base_ni);
1580*4882a593Smuzhiyun if (IS_ERR(m)) {
1581*4882a593Smuzhiyun err = PTR_ERR(m);
1582*4882a593Smuzhiyun m = NULL;
1583*4882a593Smuzhiyun ctx = NULL;
1584*4882a593Smuzhiyun goto err_out;
1585*4882a593Smuzhiyun }
1586*4882a593Smuzhiyun ctx = ntfs_attr_get_search_ctx(base_ni, m);
1587*4882a593Smuzhiyun if (unlikely(!ctx)) {
1588*4882a593Smuzhiyun err = -ENOMEM;
1589*4882a593Smuzhiyun goto err_out;
1590*4882a593Smuzhiyun }
1591*4882a593Smuzhiyun err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1592*4882a593Smuzhiyun CASE_SENSITIVE, 0, NULL, 0, ctx);
1593*4882a593Smuzhiyun if (unlikely(err)) {
1594*4882a593Smuzhiyun if (err == -ENOENT)
1595*4882a593Smuzhiyun err = -EIO;
1596*4882a593Smuzhiyun goto err_out;
1597*4882a593Smuzhiyun }
1598*4882a593Smuzhiyun a = ctx->attr;
1599*4882a593Smuzhiyun BUG_ON(a->non_resident);
1600*4882a593Smuzhiyun /* The total length of the attribute value. */
1601*4882a593Smuzhiyun attr_len = le32_to_cpu(a->data.resident.value_length);
1602*4882a593Smuzhiyun i_size = i_size_read(vi);
1603*4882a593Smuzhiyun BUG_ON(attr_len != i_size);
1604*4882a593Smuzhiyun BUG_ON(pos > attr_len);
1605*4882a593Smuzhiyun end = pos + bytes;
1606*4882a593Smuzhiyun BUG_ON(end > le32_to_cpu(a->length) -
1607*4882a593Smuzhiyun le16_to_cpu(a->data.resident.value_offset));
1608*4882a593Smuzhiyun kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
1609*4882a593Smuzhiyun kaddr = kmap_atomic(page);
1610*4882a593Smuzhiyun /* Copy the received data from the page to the mft record. */
1611*4882a593Smuzhiyun memcpy(kattr + pos, kaddr + pos, bytes);
1612*4882a593Smuzhiyun /* Update the attribute length if necessary. */
1613*4882a593Smuzhiyun if (end > attr_len) {
1614*4882a593Smuzhiyun attr_len = end;
1615*4882a593Smuzhiyun a->data.resident.value_length = cpu_to_le32(attr_len);
1616*4882a593Smuzhiyun }
1617*4882a593Smuzhiyun /*
1618*4882a593Smuzhiyun * If the page is not uptodate, bring the out of bounds area(s)
1619*4882a593Smuzhiyun * uptodate by copying data from the mft record to the page.
1620*4882a593Smuzhiyun */
1621*4882a593Smuzhiyun if (!PageUptodate(page)) {
1622*4882a593Smuzhiyun if (pos > 0)
1623*4882a593Smuzhiyun memcpy(kaddr, kattr, pos);
1624*4882a593Smuzhiyun if (end < attr_len)
1625*4882a593Smuzhiyun memcpy(kaddr + end, kattr + end, attr_len - end);
1626*4882a593Smuzhiyun /* Zero the region outside the end of the attribute value. */
1627*4882a593Smuzhiyun memset(kaddr + attr_len, 0, PAGE_SIZE - attr_len);
1628*4882a593Smuzhiyun flush_dcache_page(page);
1629*4882a593Smuzhiyun SetPageUptodate(page);
1630*4882a593Smuzhiyun }
1631*4882a593Smuzhiyun kunmap_atomic(kaddr);
1632*4882a593Smuzhiyun /* Update initialized_size/i_size if necessary. */
1633*4882a593Smuzhiyun read_lock_irqsave(&ni->size_lock, flags);
1634*4882a593Smuzhiyun initialized_size = ni->initialized_size;
1635*4882a593Smuzhiyun BUG_ON(end > ni->allocated_size);
1636*4882a593Smuzhiyun read_unlock_irqrestore(&ni->size_lock, flags);
1637*4882a593Smuzhiyun BUG_ON(initialized_size != i_size);
1638*4882a593Smuzhiyun if (end > initialized_size) {
1639*4882a593Smuzhiyun write_lock_irqsave(&ni->size_lock, flags);
1640*4882a593Smuzhiyun ni->initialized_size = end;
1641*4882a593Smuzhiyun i_size_write(vi, end);
1642*4882a593Smuzhiyun write_unlock_irqrestore(&ni->size_lock, flags);
1643*4882a593Smuzhiyun }
1644*4882a593Smuzhiyun /* Mark the mft record dirty, so it gets written back. */
1645*4882a593Smuzhiyun flush_dcache_mft_record_page(ctx->ntfs_ino);
1646*4882a593Smuzhiyun mark_mft_record_dirty(ctx->ntfs_ino);
1647*4882a593Smuzhiyun ntfs_attr_put_search_ctx(ctx);
1648*4882a593Smuzhiyun unmap_mft_record(base_ni);
1649*4882a593Smuzhiyun ntfs_debug("Done.");
1650*4882a593Smuzhiyun return 0;
1651*4882a593Smuzhiyun err_out:
1652*4882a593Smuzhiyun if (err == -ENOMEM) {
1653*4882a593Smuzhiyun ntfs_warning(vi->i_sb, "Error allocating memory required to "
1654*4882a593Smuzhiyun "commit the write.");
1655*4882a593Smuzhiyun if (PageUptodate(page)) {
1656*4882a593Smuzhiyun ntfs_warning(vi->i_sb, "Page is uptodate, setting "
1657*4882a593Smuzhiyun "dirty so the write will be retried "
1658*4882a593Smuzhiyun "later on by the VM.");
1659*4882a593Smuzhiyun /*
1660*4882a593Smuzhiyun * Put the page on mapping->dirty_pages, but leave its
1661*4882a593Smuzhiyun * buffers' dirty state as-is.
1662*4882a593Smuzhiyun */
1663*4882a593Smuzhiyun __set_page_dirty_nobuffers(page);
1664*4882a593Smuzhiyun err = 0;
1665*4882a593Smuzhiyun } else
1666*4882a593Smuzhiyun ntfs_error(vi->i_sb, "Page is not uptodate. Written "
1667*4882a593Smuzhiyun "data has been lost.");
1668*4882a593Smuzhiyun } else {
1669*4882a593Smuzhiyun ntfs_error(vi->i_sb, "Resident attribute commit write failed "
1670*4882a593Smuzhiyun "with error %i.", err);
1671*4882a593Smuzhiyun NVolSetErrors(ni->vol);
1672*4882a593Smuzhiyun }
1673*4882a593Smuzhiyun if (ctx)
1674*4882a593Smuzhiyun ntfs_attr_put_search_ctx(ctx);
1675*4882a593Smuzhiyun if (m)
1676*4882a593Smuzhiyun unmap_mft_record(base_ni);
1677*4882a593Smuzhiyun return err;
1678*4882a593Smuzhiyun }
1679*4882a593Smuzhiyun
1680*4882a593Smuzhiyun /*
1681*4882a593Smuzhiyun * Copy as much as we can into the pages and return the number of bytes which
1682*4882a593Smuzhiyun * were successfully copied. If a fault is encountered then clear the pages
1683*4882a593Smuzhiyun * out to (ofs + bytes) and return the number of bytes which were copied.
1684*4882a593Smuzhiyun */
ntfs_copy_from_user_iter(struct page ** pages,unsigned nr_pages,unsigned ofs,struct iov_iter * i,size_t bytes)1685*4882a593Smuzhiyun static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
1686*4882a593Smuzhiyun unsigned ofs, struct iov_iter *i, size_t bytes)
1687*4882a593Smuzhiyun {
1688*4882a593Smuzhiyun struct page **last_page = pages + nr_pages;
1689*4882a593Smuzhiyun size_t total = 0;
1690*4882a593Smuzhiyun struct iov_iter data = *i;
1691*4882a593Smuzhiyun unsigned len, copied;
1692*4882a593Smuzhiyun
1693*4882a593Smuzhiyun do {
1694*4882a593Smuzhiyun len = PAGE_SIZE - ofs;
1695*4882a593Smuzhiyun if (len > bytes)
1696*4882a593Smuzhiyun len = bytes;
1697*4882a593Smuzhiyun copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
1698*4882a593Smuzhiyun len);
1699*4882a593Smuzhiyun total += copied;
1700*4882a593Smuzhiyun bytes -= copied;
1701*4882a593Smuzhiyun if (!bytes)
1702*4882a593Smuzhiyun break;
1703*4882a593Smuzhiyun iov_iter_advance(&data, copied);
1704*4882a593Smuzhiyun if (copied < len)
1705*4882a593Smuzhiyun goto err;
1706*4882a593Smuzhiyun ofs = 0;
1707*4882a593Smuzhiyun } while (++pages < last_page);
1708*4882a593Smuzhiyun out:
1709*4882a593Smuzhiyun return total;
1710*4882a593Smuzhiyun err:
1711*4882a593Smuzhiyun /* Zero the rest of the target like __copy_from_user(). */
1712*4882a593Smuzhiyun len = PAGE_SIZE - copied;
1713*4882a593Smuzhiyun do {
1714*4882a593Smuzhiyun if (len > bytes)
1715*4882a593Smuzhiyun len = bytes;
1716*4882a593Smuzhiyun zero_user(*pages, copied, len);
1717*4882a593Smuzhiyun bytes -= len;
1718*4882a593Smuzhiyun copied = 0;
1719*4882a593Smuzhiyun len = PAGE_SIZE;
1720*4882a593Smuzhiyun } while (++pages < last_page);
1721*4882a593Smuzhiyun goto out;
1722*4882a593Smuzhiyun }
1723*4882a593Smuzhiyun
1724*4882a593Smuzhiyun /**
1725*4882a593Smuzhiyun * ntfs_perform_write - perform buffered write to a file
1726*4882a593Smuzhiyun * @file: file to write to
1727*4882a593Smuzhiyun * @i: iov_iter with data to write
1728*4882a593Smuzhiyun * @pos: byte offset in file at which to begin writing to
1729*4882a593Smuzhiyun */
ntfs_perform_write(struct file * file,struct iov_iter * i,loff_t pos)1730*4882a593Smuzhiyun static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
1731*4882a593Smuzhiyun loff_t pos)
1732*4882a593Smuzhiyun {
1733*4882a593Smuzhiyun struct address_space *mapping = file->f_mapping;
1734*4882a593Smuzhiyun struct inode *vi = mapping->host;
1735*4882a593Smuzhiyun ntfs_inode *ni = NTFS_I(vi);
1736*4882a593Smuzhiyun ntfs_volume *vol = ni->vol;
1737*4882a593Smuzhiyun struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
1738*4882a593Smuzhiyun struct page *cached_page = NULL;
1739*4882a593Smuzhiyun VCN last_vcn;
1740*4882a593Smuzhiyun LCN lcn;
1741*4882a593Smuzhiyun size_t bytes;
1742*4882a593Smuzhiyun ssize_t status, written = 0;
1743*4882a593Smuzhiyun unsigned nr_pages;
1744*4882a593Smuzhiyun
1745*4882a593Smuzhiyun ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
1746*4882a593Smuzhiyun "0x%llx, count 0x%lx.", vi->i_ino,
1747*4882a593Smuzhiyun (unsigned)le32_to_cpu(ni->type),
1748*4882a593Smuzhiyun (unsigned long long)pos,
1749*4882a593Smuzhiyun (unsigned long)iov_iter_count(i));
1750*4882a593Smuzhiyun /*
1751*4882a593Smuzhiyun * If a previous ntfs_truncate() failed, repeat it and abort if it
1752*4882a593Smuzhiyun * fails again.
1753*4882a593Smuzhiyun */
1754*4882a593Smuzhiyun if (unlikely(NInoTruncateFailed(ni))) {
1755*4882a593Smuzhiyun int err;
1756*4882a593Smuzhiyun
1757*4882a593Smuzhiyun inode_dio_wait(vi);
1758*4882a593Smuzhiyun err = ntfs_truncate(vi);
1759*4882a593Smuzhiyun if (err || NInoTruncateFailed(ni)) {
1760*4882a593Smuzhiyun if (!err)
1761*4882a593Smuzhiyun err = -EIO;
1762*4882a593Smuzhiyun ntfs_error(vol->sb, "Cannot perform write to inode "
1763*4882a593Smuzhiyun "0x%lx, attribute type 0x%x, because "
1764*4882a593Smuzhiyun "ntfs_truncate() failed (error code "
1765*4882a593Smuzhiyun "%i).", vi->i_ino,
1766*4882a593Smuzhiyun (unsigned)le32_to_cpu(ni->type), err);
1767*4882a593Smuzhiyun return err;
1768*4882a593Smuzhiyun }
1769*4882a593Smuzhiyun }
1770*4882a593Smuzhiyun /*
1771*4882a593Smuzhiyun * Determine the number of pages per cluster for non-resident
1772*4882a593Smuzhiyun * attributes.
1773*4882a593Smuzhiyun */
1774*4882a593Smuzhiyun nr_pages = 1;
1775*4882a593Smuzhiyun if (vol->cluster_size > PAGE_SIZE && NInoNonResident(ni))
1776*4882a593Smuzhiyun nr_pages = vol->cluster_size >> PAGE_SHIFT;
1777*4882a593Smuzhiyun last_vcn = -1;
1778*4882a593Smuzhiyun do {
1779*4882a593Smuzhiyun VCN vcn;
1780*4882a593Smuzhiyun pgoff_t idx, start_idx;
1781*4882a593Smuzhiyun unsigned ofs, do_pages, u;
1782*4882a593Smuzhiyun size_t copied;
1783*4882a593Smuzhiyun
1784*4882a593Smuzhiyun start_idx = idx = pos >> PAGE_SHIFT;
1785*4882a593Smuzhiyun ofs = pos & ~PAGE_MASK;
1786*4882a593Smuzhiyun bytes = PAGE_SIZE - ofs;
1787*4882a593Smuzhiyun do_pages = 1;
1788*4882a593Smuzhiyun if (nr_pages > 1) {
1789*4882a593Smuzhiyun vcn = pos >> vol->cluster_size_bits;
1790*4882a593Smuzhiyun if (vcn != last_vcn) {
1791*4882a593Smuzhiyun last_vcn = vcn;
1792*4882a593Smuzhiyun /*
1793*4882a593Smuzhiyun * Get the lcn of the vcn the write is in. If
1794*4882a593Smuzhiyun * it is a hole, need to lock down all pages in
1795*4882a593Smuzhiyun * the cluster.
1796*4882a593Smuzhiyun */
1797*4882a593Smuzhiyun down_read(&ni->runlist.lock);
1798*4882a593Smuzhiyun lcn = ntfs_attr_vcn_to_lcn_nolock(ni, pos >>
1799*4882a593Smuzhiyun vol->cluster_size_bits, false);
1800*4882a593Smuzhiyun up_read(&ni->runlist.lock);
1801*4882a593Smuzhiyun if (unlikely(lcn < LCN_HOLE)) {
1802*4882a593Smuzhiyun if (lcn == LCN_ENOMEM)
1803*4882a593Smuzhiyun status = -ENOMEM;
1804*4882a593Smuzhiyun else {
1805*4882a593Smuzhiyun status = -EIO;
1806*4882a593Smuzhiyun ntfs_error(vol->sb, "Cannot "
1807*4882a593Smuzhiyun "perform write to "
1808*4882a593Smuzhiyun "inode 0x%lx, "
1809*4882a593Smuzhiyun "attribute type 0x%x, "
1810*4882a593Smuzhiyun "because the attribute "
1811*4882a593Smuzhiyun "is corrupt.",
1812*4882a593Smuzhiyun vi->i_ino, (unsigned)
1813*4882a593Smuzhiyun le32_to_cpu(ni->type));
1814*4882a593Smuzhiyun }
1815*4882a593Smuzhiyun break;
1816*4882a593Smuzhiyun }
1817*4882a593Smuzhiyun if (lcn == LCN_HOLE) {
1818*4882a593Smuzhiyun start_idx = (pos & ~(s64)
1819*4882a593Smuzhiyun vol->cluster_size_mask)
1820*4882a593Smuzhiyun >> PAGE_SHIFT;
1821*4882a593Smuzhiyun bytes = vol->cluster_size - (pos &
1822*4882a593Smuzhiyun vol->cluster_size_mask);
1823*4882a593Smuzhiyun do_pages = nr_pages;
1824*4882a593Smuzhiyun }
1825*4882a593Smuzhiyun }
1826*4882a593Smuzhiyun }
1827*4882a593Smuzhiyun if (bytes > iov_iter_count(i))
1828*4882a593Smuzhiyun bytes = iov_iter_count(i);
1829*4882a593Smuzhiyun again:
1830*4882a593Smuzhiyun /*
1831*4882a593Smuzhiyun * Bring in the user page(s) that we will copy from _first_.
1832*4882a593Smuzhiyun * Otherwise there is a nasty deadlock on copying from the same
1833*4882a593Smuzhiyun * page(s) as we are writing to, without it/them being marked
1834*4882a593Smuzhiyun * up-to-date. Note, at present there is nothing to stop the
1835*4882a593Smuzhiyun * pages being swapped out between us bringing them into memory
1836*4882a593Smuzhiyun * and doing the actual copying.
1837*4882a593Smuzhiyun */
1838*4882a593Smuzhiyun if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
1839*4882a593Smuzhiyun status = -EFAULT;
1840*4882a593Smuzhiyun break;
1841*4882a593Smuzhiyun }
1842*4882a593Smuzhiyun /* Get and lock @do_pages starting at index @start_idx. */
1843*4882a593Smuzhiyun status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
1844*4882a593Smuzhiyun pages, &cached_page);
1845*4882a593Smuzhiyun if (unlikely(status))
1846*4882a593Smuzhiyun break;
1847*4882a593Smuzhiyun /*
1848*4882a593Smuzhiyun * For non-resident attributes, we need to fill any holes with
1849*4882a593Smuzhiyun * actual clusters and ensure all bufferes are mapped. We also
1850*4882a593Smuzhiyun * need to bring uptodate any buffers that are only partially
1851*4882a593Smuzhiyun * being written to.
1852*4882a593Smuzhiyun */
1853*4882a593Smuzhiyun if (NInoNonResident(ni)) {
1854*4882a593Smuzhiyun status = ntfs_prepare_pages_for_non_resident_write(
1855*4882a593Smuzhiyun pages, do_pages, pos, bytes);
1856*4882a593Smuzhiyun if (unlikely(status)) {
1857*4882a593Smuzhiyun do {
1858*4882a593Smuzhiyun unlock_page(pages[--do_pages]);
1859*4882a593Smuzhiyun put_page(pages[do_pages]);
1860*4882a593Smuzhiyun } while (do_pages);
1861*4882a593Smuzhiyun break;
1862*4882a593Smuzhiyun }
1863*4882a593Smuzhiyun }
1864*4882a593Smuzhiyun u = (pos >> PAGE_SHIFT) - pages[0]->index;
1865*4882a593Smuzhiyun copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
1866*4882a593Smuzhiyun i, bytes);
1867*4882a593Smuzhiyun ntfs_flush_dcache_pages(pages + u, do_pages - u);
1868*4882a593Smuzhiyun status = 0;
1869*4882a593Smuzhiyun if (likely(copied == bytes)) {
1870*4882a593Smuzhiyun status = ntfs_commit_pages_after_write(pages, do_pages,
1871*4882a593Smuzhiyun pos, bytes);
1872*4882a593Smuzhiyun if (!status)
1873*4882a593Smuzhiyun status = bytes;
1874*4882a593Smuzhiyun }
1875*4882a593Smuzhiyun do {
1876*4882a593Smuzhiyun unlock_page(pages[--do_pages]);
1877*4882a593Smuzhiyun put_page(pages[do_pages]);
1878*4882a593Smuzhiyun } while (do_pages);
1879*4882a593Smuzhiyun if (unlikely(status < 0))
1880*4882a593Smuzhiyun break;
1881*4882a593Smuzhiyun copied = status;
1882*4882a593Smuzhiyun cond_resched();
1883*4882a593Smuzhiyun if (unlikely(!copied)) {
1884*4882a593Smuzhiyun size_t sc;
1885*4882a593Smuzhiyun
1886*4882a593Smuzhiyun /*
1887*4882a593Smuzhiyun * We failed to copy anything. Fall back to single
1888*4882a593Smuzhiyun * segment length write.
1889*4882a593Smuzhiyun *
1890*4882a593Smuzhiyun * This is needed to avoid possible livelock in the
1891*4882a593Smuzhiyun * case that all segments in the iov cannot be copied
1892*4882a593Smuzhiyun * at once without a pagefault.
1893*4882a593Smuzhiyun */
1894*4882a593Smuzhiyun sc = iov_iter_single_seg_count(i);
1895*4882a593Smuzhiyun if (bytes > sc)
1896*4882a593Smuzhiyun bytes = sc;
1897*4882a593Smuzhiyun goto again;
1898*4882a593Smuzhiyun }
1899*4882a593Smuzhiyun iov_iter_advance(i, copied);
1900*4882a593Smuzhiyun pos += copied;
1901*4882a593Smuzhiyun written += copied;
1902*4882a593Smuzhiyun balance_dirty_pages_ratelimited(mapping);
1903*4882a593Smuzhiyun if (fatal_signal_pending(current)) {
1904*4882a593Smuzhiyun status = -EINTR;
1905*4882a593Smuzhiyun break;
1906*4882a593Smuzhiyun }
1907*4882a593Smuzhiyun } while (iov_iter_count(i));
1908*4882a593Smuzhiyun if (cached_page)
1909*4882a593Smuzhiyun put_page(cached_page);
1910*4882a593Smuzhiyun ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
1911*4882a593Smuzhiyun written ? "written" : "status", (unsigned long)written,
1912*4882a593Smuzhiyun (long)status);
1913*4882a593Smuzhiyun return written ? written : status;
1914*4882a593Smuzhiyun }
1915*4882a593Smuzhiyun
1916*4882a593Smuzhiyun /**
1917*4882a593Smuzhiyun * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
1918*4882a593Smuzhiyun * @iocb: IO state structure
1919*4882a593Smuzhiyun * @from: iov_iter with data to write
1920*4882a593Smuzhiyun *
1921*4882a593Smuzhiyun * Basically the same as generic_file_write_iter() except that it ends up
1922*4882a593Smuzhiyun * up calling ntfs_perform_write() instead of generic_perform_write() and that
1923*4882a593Smuzhiyun * O_DIRECT is not implemented.
1924*4882a593Smuzhiyun */
ntfs_file_write_iter(struct kiocb * iocb,struct iov_iter * from)1925*4882a593Smuzhiyun static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1926*4882a593Smuzhiyun {
1927*4882a593Smuzhiyun struct file *file = iocb->ki_filp;
1928*4882a593Smuzhiyun struct inode *vi = file_inode(file);
1929*4882a593Smuzhiyun ssize_t written = 0;
1930*4882a593Smuzhiyun ssize_t err;
1931*4882a593Smuzhiyun
1932*4882a593Smuzhiyun inode_lock(vi);
1933*4882a593Smuzhiyun /* We can write back this queue in page reclaim. */
1934*4882a593Smuzhiyun current->backing_dev_info = inode_to_bdi(vi);
1935*4882a593Smuzhiyun err = ntfs_prepare_file_for_write(iocb, from);
1936*4882a593Smuzhiyun if (iov_iter_count(from) && !err)
1937*4882a593Smuzhiyun written = ntfs_perform_write(file, from, iocb->ki_pos);
1938*4882a593Smuzhiyun current->backing_dev_info = NULL;
1939*4882a593Smuzhiyun inode_unlock(vi);
1940*4882a593Smuzhiyun iocb->ki_pos += written;
1941*4882a593Smuzhiyun if (likely(written > 0))
1942*4882a593Smuzhiyun written = generic_write_sync(iocb, written);
1943*4882a593Smuzhiyun return written ? written : err;
1944*4882a593Smuzhiyun }
1945*4882a593Smuzhiyun
1946*4882a593Smuzhiyun /**
1947*4882a593Smuzhiyun * ntfs_file_fsync - sync a file to disk
1948*4882a593Smuzhiyun * @filp: file to be synced
1949*4882a593Smuzhiyun * @datasync: if non-zero only flush user data and not metadata
1950*4882a593Smuzhiyun *
1951*4882a593Smuzhiyun * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync
1952*4882a593Smuzhiyun * system calls. This function is inspired by fs/buffer.c::file_fsync().
1953*4882a593Smuzhiyun *
1954*4882a593Smuzhiyun * If @datasync is false, write the mft record and all associated extent mft
1955*4882a593Smuzhiyun * records as well as the $DATA attribute and then sync the block device.
1956*4882a593Smuzhiyun *
1957*4882a593Smuzhiyun * If @datasync is true and the attribute is non-resident, we skip the writing
1958*4882a593Smuzhiyun * of the mft record and all associated extent mft records (this might still
1959*4882a593Smuzhiyun * happen due to the write_inode_now() call).
1960*4882a593Smuzhiyun *
1961*4882a593Smuzhiyun * Also, if @datasync is true, we do not wait on the inode to be written out
1962*4882a593Smuzhiyun * but we always wait on the page cache pages to be written out.
1963*4882a593Smuzhiyun *
1964*4882a593Smuzhiyun * Locking: Caller must hold i_mutex on the inode.
1965*4882a593Smuzhiyun *
1966*4882a593Smuzhiyun * TODO: We should probably also write all attribute/index inodes associated
1967*4882a593Smuzhiyun * with this inode but since we have no simple way of getting to them we ignore
1968*4882a593Smuzhiyun * this problem for now.
1969*4882a593Smuzhiyun */
ntfs_file_fsync(struct file * filp,loff_t start,loff_t end,int datasync)1970*4882a593Smuzhiyun static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
1971*4882a593Smuzhiyun int datasync)
1972*4882a593Smuzhiyun {
1973*4882a593Smuzhiyun struct inode *vi = filp->f_mapping->host;
1974*4882a593Smuzhiyun int err, ret = 0;
1975*4882a593Smuzhiyun
1976*4882a593Smuzhiyun ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
1977*4882a593Smuzhiyun
1978*4882a593Smuzhiyun err = file_write_and_wait_range(filp, start, end);
1979*4882a593Smuzhiyun if (err)
1980*4882a593Smuzhiyun return err;
1981*4882a593Smuzhiyun inode_lock(vi);
1982*4882a593Smuzhiyun
1983*4882a593Smuzhiyun BUG_ON(S_ISDIR(vi->i_mode));
1984*4882a593Smuzhiyun if (!datasync || !NInoNonResident(NTFS_I(vi)))
1985*4882a593Smuzhiyun ret = __ntfs_write_inode(vi, 1);
1986*4882a593Smuzhiyun write_inode_now(vi, !datasync);
1987*4882a593Smuzhiyun /*
1988*4882a593Smuzhiyun * NOTE: If we were to use mapping->private_list (see ext2 and
1989*4882a593Smuzhiyun * fs/buffer.c) for dirty blocks then we could optimize the below to be
1990*4882a593Smuzhiyun * sync_mapping_buffers(vi->i_mapping).
1991*4882a593Smuzhiyun */
1992*4882a593Smuzhiyun err = sync_blockdev(vi->i_sb->s_bdev);
1993*4882a593Smuzhiyun if (unlikely(err && !ret))
1994*4882a593Smuzhiyun ret = err;
1995*4882a593Smuzhiyun if (likely(!ret))
1996*4882a593Smuzhiyun ntfs_debug("Done.");
1997*4882a593Smuzhiyun else
1998*4882a593Smuzhiyun ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
1999*4882a593Smuzhiyun "%u.", datasync ? "data" : "", vi->i_ino, -ret);
2000*4882a593Smuzhiyun inode_unlock(vi);
2001*4882a593Smuzhiyun return ret;
2002*4882a593Smuzhiyun }
2003*4882a593Smuzhiyun
2004*4882a593Smuzhiyun #endif /* NTFS_RW */
2005*4882a593Smuzhiyun
2006*4882a593Smuzhiyun const struct file_operations ntfs_file_ops = {
2007*4882a593Smuzhiyun .llseek = generic_file_llseek,
2008*4882a593Smuzhiyun .read_iter = generic_file_read_iter,
2009*4882a593Smuzhiyun #ifdef NTFS_RW
2010*4882a593Smuzhiyun .write_iter = ntfs_file_write_iter,
2011*4882a593Smuzhiyun .fsync = ntfs_file_fsync,
2012*4882a593Smuzhiyun #endif /* NTFS_RW */
2013*4882a593Smuzhiyun .mmap = generic_file_mmap,
2014*4882a593Smuzhiyun .open = ntfs_file_open,
2015*4882a593Smuzhiyun .splice_read = generic_file_splice_read,
2016*4882a593Smuzhiyun };
2017*4882a593Smuzhiyun
2018*4882a593Smuzhiyun const struct inode_operations ntfs_file_inode_ops = {
2019*4882a593Smuzhiyun #ifdef NTFS_RW
2020*4882a593Smuzhiyun .setattr = ntfs_setattr,
2021*4882a593Smuzhiyun #endif /* NTFS_RW */
2022*4882a593Smuzhiyun };
2023*4882a593Smuzhiyun
2024*4882a593Smuzhiyun const struct file_operations ntfs_empty_file_ops = {};
2025*4882a593Smuzhiyun
2026*4882a593Smuzhiyun const struct inode_operations ntfs_empty_inode_ops = {};
2027