xref: /OK3568_Linux_fs/yocto/poky/scripts/lib/wic/filemap.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1#
2# Copyright (c) 2012 Intel, Inc.
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7"""
8This module implements python implements a way to get file block. Two methods
9are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of
10the file seek syscall. The former is implemented by the 'FilemapFiemap' class,
11the latter is implemented by the 'FilemapSeek' class. Both classes provide the
12same API. The 'filemap' function automatically selects which class can be used
13and returns an instance of the class.
14"""
15
16# Disable the following pylint recommendations:
17#   * Too many instance attributes (R0902)
18# pylint: disable=R0902
19
20import errno
21import os
22import struct
23import array
24import fcntl
25import tempfile
26import logging
27
28def get_block_size(file_obj):
29    """
30    Returns block size for file object 'file_obj'. Errors are indicated by the
31    'IOError' exception.
32    """
33    # Get the block size of the host file-system for the image file by calling
34    # the FIGETBSZ ioctl (number 2).
35    try:
36        binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0))
37        bsize = struct.unpack('I', binary_data)[0]
38    except OSError:
39        bsize = None
40
41    # If ioctl causes OSError or give bsize to zero failback to os.fstat
42    if not bsize:
43        import os
44        stat = os.fstat(file_obj.fileno())
45        if hasattr(stat, 'st_blksize'):
46            bsize = stat.st_blksize
47        else:
48            raise IOError("Unable to determine block size")
49    return bsize
50
51class ErrorNotSupp(Exception):
52    """
53    An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature
54    is not supported either by the kernel or the file-system.
55    """
56    pass
57
58class Error(Exception):
59    """A class for all the other exceptions raised by this module."""
60    pass
61
62
63class _FilemapBase(object):
64    """
65    This is a base class for a couple of other classes in this module. This
66    class simply performs the common parts of the initialization process: opens
67    the image file, gets its size, etc. The 'log' parameter is the logger object
68    to use for printing messages.
69    """
70
71    def __init__(self, image, log=None):
72        """
73        Initialize a class instance. The 'image' argument is full path to the
74        file or file object to operate on.
75        """
76
77        self._log = log
78        if self._log is None:
79            self._log = logging.getLogger(__name__)
80
81        self._f_image_needs_close = False
82
83        if hasattr(image, "fileno"):
84            self._f_image = image
85            self._image_path = image.name
86        else:
87            self._image_path = image
88            self._open_image_file()
89
90        try:
91            self.image_size = os.fstat(self._f_image.fileno()).st_size
92        except IOError as err:
93            raise Error("cannot get information about file '%s': %s"
94                        % (self._f_image.name, err))
95
96        try:
97            self.block_size = get_block_size(self._f_image)
98        except IOError as err:
99            raise Error("cannot get block size for '%s': %s"
100                        % (self._image_path, err))
101
102        self.blocks_cnt = self.image_size + self.block_size - 1
103        self.blocks_cnt //= self.block_size
104
105        try:
106            self._f_image.flush()
107        except IOError as err:
108            raise Error("cannot flush image file '%s': %s"
109                        % (self._image_path, err))
110
111        try:
112            os.fsync(self._f_image.fileno()),
113        except OSError as err:
114            raise Error("cannot synchronize image file '%s': %s "
115                        % (self._image_path, err.strerror))
116
117        self._log.debug("opened image \"%s\"" % self._image_path)
118        self._log.debug("block size %d, blocks count %d, image size %d"
119                        % (self.block_size, self.blocks_cnt, self.image_size))
120
121    def __del__(self):
122        """The class destructor which just closes the image file."""
123        if self._f_image_needs_close:
124            self._f_image.close()
125
126    def _open_image_file(self):
127        """Open the image file."""
128        try:
129            self._f_image = open(self._image_path, 'rb')
130        except IOError as err:
131            raise Error("cannot open image file '%s': %s"
132                        % (self._image_path, err))
133
134        self._f_image_needs_close = True
135
136    def block_is_mapped(self, block): # pylint: disable=W0613,R0201
137        """
138        This method has has to be implemented by child classes. It returns
139        'True' if block number 'block' of the image file is mapped and 'False'
140        otherwise.
141        """
142
143        raise Error("the method is not implemented")
144
145    def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201
146        """
147        This method has has to be implemented by child classes. This is a
148        generator which yields ranges of mapped blocks in the file. The ranges
149        are tuples of 2 elements: [first, last], where 'first' is the first
150        mapped block and 'last' is the last mapped block.
151
152        The ranges are yielded for the area of the file of size 'count' blocks,
153        starting from block 'start'.
154        """
155
156        raise Error("the method is not implemented")
157
158
159# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call
160_SEEK_DATA = 3
161_SEEK_HOLE = 4
162
163def _lseek(file_obj, offset, whence):
164    """This is a helper function which invokes 'os.lseek' for file object
165    'file_obj' and with specified 'offset' and 'whence'. The 'whence'
166    argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When
167    there is no more data or hole starting from 'offset', this function
168    returns '-1'.  Otherwise the data or hole position is returned."""
169
170    try:
171        return os.lseek(file_obj.fileno(), offset, whence)
172    except OSError as err:
173        # The 'lseek' system call returns the ENXIO if there is no data or
174        # hole starting from the specified offset.
175        if err.errno == errno.ENXIO:
176            return -1
177        elif err.errno == errno.EINVAL:
178            raise ErrorNotSupp("the kernel or file-system does not support "
179                               "\"SEEK_HOLE\" and \"SEEK_DATA\"")
180        else:
181            raise
182
183class FilemapSeek(_FilemapBase):
184    """
185    This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping.
186    Unfortunately, the current implementation requires the caller to have write
187    access to the image file.
188    """
189
190    def __init__(self, image, log=None):
191        """Refer the '_FilemapBase' class for the documentation."""
192
193        # Call the base class constructor first
194        _FilemapBase.__init__(self, image, log)
195        self._log.debug("FilemapSeek: initializing")
196
197        self._probe_seek_hole()
198
199    def _probe_seek_hole(self):
200        """
201        Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'.
202        Unfortunately, there seems to be no clean way for detecting this,
203        because often the system just fakes them by just assuming that all
204        files are fully mapped, so 'SEEK_HOLE' always returns EOF and
205        'SEEK_DATA' always returns the requested offset.
206
207        I could not invent a better way of detecting the fake 'SEEK_HOLE'
208        implementation than just to create a temporary file in the same
209        directory where the image file resides. It would be nice to change this
210        to something better.
211        """
212
213        directory = os.path.dirname(self._image_path)
214
215        try:
216            tmp_obj = tempfile.TemporaryFile("w+", dir=directory)
217        except IOError as err:
218            raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \
219                              % (directory, err))
220
221        try:
222            os.ftruncate(tmp_obj.fileno(), self.block_size)
223        except OSError as err:
224            raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s"
225                               % (directory, err))
226
227        offs = _lseek(tmp_obj, 0, _SEEK_HOLE)
228        if offs != 0:
229            # We are dealing with the stub 'SEEK_HOLE' implementation which
230            # always returns EOF.
231            self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs)
232            raise ErrorNotSupp("the file-system does not support "
233                               "\"SEEK_HOLE\" and \"SEEK_DATA\" but only "
234                               "provides a stub implementation")
235
236        tmp_obj.close()
237
238    def block_is_mapped(self, block):
239        """Refer the '_FilemapBase' class for the documentation."""
240        offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA)
241        if offs == -1:
242            result = False
243        else:
244            result = (offs // self.block_size == block)
245
246        self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s"
247                        % (block, result))
248        return result
249
250    def _get_ranges(self, start, count, whence1, whence2):
251        """
252        This function implements 'get_mapped_ranges()' depending
253        on what is passed in the 'whence1' and 'whence2' arguments.
254        """
255
256        assert whence1 != whence2
257        end = start * self.block_size
258        limit = end + count * self.block_size
259
260        while True:
261            start = _lseek(self._f_image, end, whence1)
262            if start == -1 or start >= limit or start == self.image_size:
263                break
264
265            end = _lseek(self._f_image, start, whence2)
266            if end == -1 or end == self.image_size:
267                end = self.blocks_cnt * self.block_size
268            if end > limit:
269                end = limit
270
271            start_blk = start // self.block_size
272            end_blk = end // self.block_size - 1
273            self._log.debug("FilemapSeek: yielding range (%d, %d)"
274                            % (start_blk, end_blk))
275            yield (start_blk, end_blk)
276
277    def get_mapped_ranges(self, start, count):
278        """Refer the '_FilemapBase' class for the documentation."""
279        self._log.debug("FilemapSeek: get_mapped_ranges(%d,  %d(%d))"
280                        % (start, count, start + count - 1))
281        return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE)
282
283
284# Below goes the FIEMAP ioctl implementation, which is not very readable
285# because it deals with the rather complex FIEMAP ioctl. To understand the
286# code, you need to know the FIEMAP interface, which is documented in the
287# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources.
288
289# Format string for 'struct fiemap'
290_FIEMAP_FORMAT = "=QQLLLL"
291# sizeof(struct fiemap)
292_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT)
293# Format string for 'struct fiemap_extent'
294_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL"
295# sizeof(struct fiemap_extent)
296_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT)
297# The FIEMAP ioctl number
298_FIEMAP_IOCTL = 0xC020660B
299# This FIEMAP ioctl flag which instructs the kernel to sync the file before
300# reading the block map
301_FIEMAP_FLAG_SYNC = 0x00000001
302# Size of the buffer for 'struct fiemap_extent' elements which will be used
303# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the
304# FIEMAP ioctl will be invoked.
305_FIEMAP_BUFFER_SIZE = 256 * 1024
306
307class FilemapFiemap(_FilemapBase):
308    """
309    This class provides API to the FIEMAP ioctl. Namely, it allows to iterate
310    over all mapped blocks and over all holes.
311
312    This class synchronizes the image file every time it invokes the FIEMAP
313    ioctl in order to work-around early FIEMAP implementation kernel bugs.
314    """
315
316    def __init__(self, image, log=None):
317        """
318        Initialize a class instance. The 'image' argument is full the file
319        object to operate on.
320        """
321
322        # Call the base class constructor first
323        _FilemapBase.__init__(self, image, log)
324        self._log.debug("FilemapFiemap: initializing")
325
326        self._buf_size = _FIEMAP_BUFFER_SIZE
327
328        # Calculate how many 'struct fiemap_extent' elements fit the buffer
329        self._buf_size -= _FIEMAP_SIZE
330        self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE
331        assert self._fiemap_extent_cnt > 0
332        self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE
333        self._buf_size += _FIEMAP_SIZE
334
335        # Allocate a mutable buffer for the FIEMAP ioctl
336        self._buf = array.array('B', [0] * self._buf_size)
337
338        # Check if the FIEMAP ioctl is supported
339        self.block_is_mapped(0)
340
341    def _invoke_fiemap(self, block, count):
342        """
343        Invoke the FIEMAP ioctl for 'count' blocks of the file starting from
344        block number 'block'.
345
346        The full result of the operation is stored in 'self._buf' on exit.
347        Returns the unpacked 'struct fiemap' data structure in form of a python
348        list (just like 'struct.upack()').
349        """
350
351        if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt):
352            raise Error("bad block number %d, should be within [0, %d]"
353                        % (block, self.blocks_cnt))
354
355        # Initialize the 'struct fiemap' part of the buffer. We use the
356        # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is
357        # synchronized. The reason for this is that early FIEMAP
358        # implementations had many bugs related to cached dirty data, and
359        # synchronizing the file is a necessary work-around.
360        struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size,
361                         count * self.block_size, _FIEMAP_FLAG_SYNC, 0,
362                         self._fiemap_extent_cnt, 0)
363
364        try:
365            fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1)
366        except IOError as err:
367            # Note, the FIEMAP ioctl is supported by the Linux kernel starting
368            # from version 2.6.28 (year 2008).
369            if err.errno == errno.EOPNOTSUPP:
370                errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
371                         "by the file-system"
372                self._log.debug(errstr)
373                raise ErrorNotSupp(errstr)
374            if err.errno == errno.ENOTTY:
375                errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
376                         "by the kernel"
377                self._log.debug(errstr)
378                raise ErrorNotSupp(errstr)
379            raise Error("the FIEMAP ioctl failed for '%s': %s"
380                        % (self._image_path, err))
381
382        return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE])
383
384    def block_is_mapped(self, block):
385        """Refer the '_FilemapBase' class for the documentation."""
386        struct_fiemap = self._invoke_fiemap(block, 1)
387
388        # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field.
389        # If it contains zero, the block is not mapped, otherwise it is
390        # mapped.
391        result = bool(struct_fiemap[3])
392        self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s"
393                        % (block, result))
394        return result
395
396    def _unpack_fiemap_extent(self, index):
397        """
398        Unpack a 'struct fiemap_extent' structure object number 'index' from
399        the internal 'self._buf' buffer.
400        """
401
402        offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index
403        return struct.unpack(_FIEMAP_EXTENT_FORMAT,
404                             self._buf[offset : offset + _FIEMAP_EXTENT_SIZE])
405
406    def _do_get_mapped_ranges(self, start, count):
407        """
408        Implements most the functionality for the  'get_mapped_ranges()'
409        generator: invokes the FIEMAP ioctl, walks through the mapped extents
410        and yields mapped block ranges. However, the ranges may be consecutive
411        (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges
412        them.
413        """
414
415        block = start
416        while block < start + count:
417            struct_fiemap = self._invoke_fiemap(block, count)
418
419            mapped_extents = struct_fiemap[3]
420            if mapped_extents == 0:
421                # No more mapped blocks
422                return
423
424            extent = 0
425            while extent < mapped_extents:
426                fiemap_extent = self._unpack_fiemap_extent(extent)
427
428                # Start of the extent
429                extent_start = fiemap_extent[0]
430                # Starting block number of the extent
431                extent_block = extent_start // self.block_size
432                # Length of the extent
433                extent_len = fiemap_extent[2]
434                # Count of blocks in the extent
435                extent_count = extent_len // self.block_size
436
437                # Extent length and offset have to be block-aligned
438                assert extent_start % self.block_size == 0
439                assert extent_len % self.block_size == 0
440
441                if extent_block > start + count - 1:
442                    return
443
444                first = max(extent_block, block)
445                last = min(extent_block + extent_count, start + count) - 1
446                yield (first, last)
447
448                extent += 1
449
450            block = extent_block + extent_count
451
452    def get_mapped_ranges(self, start, count):
453        """Refer the '_FilemapBase' class for the documentation."""
454        self._log.debug("FilemapFiemap: get_mapped_ranges(%d,  %d(%d))"
455                        % (start, count, start + count - 1))
456        iterator = self._do_get_mapped_ranges(start, count)
457        first_prev, last_prev = next(iterator)
458
459        for first, last in iterator:
460            if last_prev == first - 1:
461                last_prev = last
462            else:
463                self._log.debug("FilemapFiemap: yielding range (%d, %d)"
464                                % (first_prev, last_prev))
465                yield (first_prev, last_prev)
466                first_prev, last_prev = first, last
467
468        self._log.debug("FilemapFiemap: yielding range (%d, %d)"
469                        % (first_prev, last_prev))
470        yield (first_prev, last_prev)
471
472class FilemapNobmap(_FilemapBase):
473    """
474    This class is used when both the 'SEEK_DATA/HOLE' and FIEMAP are not
475    supported by the filesystem or kernel.
476    """
477
478    def __init__(self, image, log=None):
479        """Refer the '_FilemapBase' class for the documentation."""
480
481        # Call the base class constructor first
482        _FilemapBase.__init__(self, image, log)
483        self._log.debug("FilemapNobmap: initializing")
484
485    def block_is_mapped(self, block):
486        """Refer the '_FilemapBase' class for the documentation."""
487        return True
488
489    def get_mapped_ranges(self, start, count):
490        """Refer the '_FilemapBase' class for the documentation."""
491        self._log.debug("FilemapNobmap: get_mapped_ranges(%d,  %d(%d))"
492                        % (start, count, start + count - 1))
493        yield (start, start + count -1)
494
495def filemap(image, log=None):
496    """
497    Create and return an instance of a Filemap class - 'FilemapFiemap' or
498    'FilemapSeek', depending on what the system we run on supports. If the
499    FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is
500    returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the
501    'FilemapSeek' class is returned. If none of these are supported, the
502    function generates an 'Error' type exception.
503    """
504
505    try:
506        return FilemapFiemap(image, log)
507    except ErrorNotSupp:
508        try:
509            return FilemapSeek(image, log)
510        except ErrorNotSupp:
511            return FilemapNobmap(image, log)
512
513def sparse_copy(src_fname, dst_fname, skip=0, seek=0,
514                length=0, api=None):
515    """
516    Efficiently copy sparse file to or into another file.
517
518    src_fname: path to source file
519    dst_fname: path to destination file
520    skip: skip N bytes at thestart of src
521    seek: seek N bytes from the start of dst
522    length: read N bytes from src and write them to dst
523    api: FilemapFiemap or FilemapSeek object
524    """
525    if not api:
526        api = filemap
527    fmap = api(src_fname)
528    try:
529        dst_file = open(dst_fname, 'r+b')
530    except IOError:
531        dst_file = open(dst_fname, 'wb')
532        if length:
533            dst_size = length + seek
534        else:
535            dst_size = os.path.getsize(src_fname) + seek - skip
536        dst_file.truncate(dst_size)
537
538    written = 0
539    for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt):
540        start = first * fmap.block_size
541        end = (last + 1) * fmap.block_size
542
543        if skip >= end:
544            continue
545
546        if start < skip < end:
547            start = skip
548
549        fmap._f_image.seek(start, os.SEEK_SET)
550
551        written += start - skip - written
552        if length and written >= length:
553            dst_file.seek(seek + length, os.SEEK_SET)
554            dst_file.close()
555            return
556
557        dst_file.seek(seek + start - skip, os.SEEK_SET)
558
559        chunk_size = 1024 * 1024
560        to_read = end - start
561        read = 0
562
563        while read < to_read:
564            if read + chunk_size > to_read:
565                chunk_size = to_read - read
566            size = chunk_size
567            if length and written + size > length:
568                size = length - written
569            chunk = fmap._f_image.read(size)
570            dst_file.write(chunk)
571            read += size
572            written += size
573            if written == length:
574                dst_file.close()
575                return
576    dst_file.close()
577