drivers/md/raid10.h

*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
*4882a593Smuzhiyun#ifndef _RAID10_H
*4882a593Smuzhiyun#define _RAID10_H
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Note: raid10_info.rdev can be set to NULL asynchronously by
*4882a593Smuzhiyun * raid10_remove_disk.
*4882a593Smuzhiyun * There are three safe ways to access raid10_info.rdev.
*4882a593Smuzhiyun * 1/ when holding mddev->reconfig_mutex
*4882a593Smuzhiyun * 2/ when resync/recovery/reshape is known to be happening - i.e. in code
*4882a593Smuzhiyun *    that is called as part of performing resync/recovery/reshape.
*4882a593Smuzhiyun * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
*4882a593Smuzhiyun *    and if it is non-NULL, increment rdev->nr_pending before dropping the
*4882a593Smuzhiyun *    RCU lock.
*4882a593Smuzhiyun * When .rdev is set to NULL, the nr_pending count checked again and if it has
*4882a593Smuzhiyun * been incremented, the pointer is put back in .rdev.
*4882a593Smuzhiyun */
*4882a593Smuzhiyun
*4882a593Smuzhiyunstruct raid10_info {
*4882a593Smuzhiyun	struct md_rdev	*rdev, *replacement;
*4882a593Smuzhiyun	sector_t	head_position;
*4882a593Smuzhiyun	int		recovery_disabled;	/* matches
*4882a593Smuzhiyun						 * mddev->recovery_disabled
*4882a593Smuzhiyun						 * when we shouldn't try
*4882a593Smuzhiyun						 * recovering this device.
*4882a593Smuzhiyun						 */
*4882a593Smuzhiyun};
*4882a593Smuzhiyun
*4882a593Smuzhiyunstruct r10conf {
*4882a593Smuzhiyun	struct mddev		*mddev;
*4882a593Smuzhiyun	struct raid10_info	*mirrors;
*4882a593Smuzhiyun	struct raid10_info	*mirrors_new, *mirrors_old;
*4882a593Smuzhiyun	spinlock_t		device_lock;
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/* geometry */
*4882a593Smuzhiyun	struct geom {
*4882a593Smuzhiyun		int		raid_disks;
*4882a593Smuzhiyun		int		near_copies;  /* number of copies laid out
*4882a593Smuzhiyun					       * raid0 style */
*4882a593Smuzhiyun		int		far_copies;   /* number of copies laid out
*4882a593Smuzhiyun					       * at large strides across drives
*4882a593Smuzhiyun					       */
*4882a593Smuzhiyun		int		far_offset;   /* far_copies are offset by 1
*4882a593Smuzhiyun					       * stripe instead of many
*4882a593Smuzhiyun					       */
*4882a593Smuzhiyun		sector_t	stride;	      /* distance between far copies.
*4882a593Smuzhiyun					       * This is size / far_copies unless
*4882a593Smuzhiyun					       * far_offset, in which case it is
*4882a593Smuzhiyun					       * 1 stripe.
*4882a593Smuzhiyun					       */
*4882a593Smuzhiyun		int             far_set_size; /* The number of devices in a set,
*4882a593Smuzhiyun					       * where a 'set' are devices that
*4882a593Smuzhiyun					       * contain far/offset copies of
*4882a593Smuzhiyun					       * each other.
*4882a593Smuzhiyun					       */
*4882a593Smuzhiyun		int		chunk_shift; /* shift from chunks to sectors */
*4882a593Smuzhiyun		sector_t	chunk_mask;
*4882a593Smuzhiyun	} prev, geo;
*4882a593Smuzhiyun	int			copies;	      /* near_copies * far_copies.
*4882a593Smuzhiyun					       * must be <= raid_disks
*4882a593Smuzhiyun					       */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	sector_t		dev_sectors;  /* temp copy of
*4882a593Smuzhiyun					       * mddev->dev_sectors */
*4882a593Smuzhiyun	sector_t		reshape_progress;
*4882a593Smuzhiyun	sector_t		reshape_safe;
*4882a593Smuzhiyun	unsigned long		reshape_checkpoint;
*4882a593Smuzhiyun	sector_t		offset_diff;
*4882a593Smuzhiyun
*4882a593Smuzhiyun	struct list_head	retry_list;
*4882a593Smuzhiyun	/* A separate list of r1bio which just need raid_end_bio_io called.
*4882a593Smuzhiyun	 * This mustn't happen for writes which had any errors if the superblock
*4882a593Smuzhiyun	 * needs to be written.
*4882a593Smuzhiyun	 */
*4882a593Smuzhiyun	struct list_head	bio_end_io_list;
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/* queue pending writes and submit them on unplug */
*4882a593Smuzhiyun	struct bio_list		pending_bio_list;
*4882a593Smuzhiyun	int			pending_count;
*4882a593Smuzhiyun
*4882a593Smuzhiyun	spinlock_t		resync_lock;
*4882a593Smuzhiyun	atomic_t		nr_pending;
*4882a593Smuzhiyun	int			nr_waiting;
*4882a593Smuzhiyun	int			nr_queued;
*4882a593Smuzhiyun	int			barrier;
*4882a593Smuzhiyun	int			array_freeze_pending;
*4882a593Smuzhiyun	sector_t		next_resync;
*4882a593Smuzhiyun	int			fullsync;  /* set to 1 if a full sync is needed,
*4882a593Smuzhiyun					    * (fresh device added).
*4882a593Smuzhiyun					    * Cleared when a sync completes.
*4882a593Smuzhiyun					    */
*4882a593Smuzhiyun	int			have_replacement; /* There is at least one
*4882a593Smuzhiyun						   * replacement device.
*4882a593Smuzhiyun						   */
*4882a593Smuzhiyun	wait_queue_head_t	wait_barrier;
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mempool_t		r10bio_pool;
*4882a593Smuzhiyun	mempool_t		r10buf_pool;
*4882a593Smuzhiyun	struct page		*tmppage;
*4882a593Smuzhiyun	struct bio_set		bio_split;
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/* When taking over an array from a different personality, we store
*4882a593Smuzhiyun	 * the new thread here until we fully activate the array.
*4882a593Smuzhiyun	 */
*4882a593Smuzhiyun	struct md_thread	*thread;
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/*
*4882a593Smuzhiyun	 * Keep track of cluster resync window to send to other nodes.
*4882a593Smuzhiyun	 */
*4882a593Smuzhiyun	sector_t		cluster_sync_low;
*4882a593Smuzhiyun	sector_t		cluster_sync_high;
*4882a593Smuzhiyun};
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * this is our 'private' RAID10 bio.
*4882a593Smuzhiyun *
*4882a593Smuzhiyun * it contains information about what kind of IO operations were started
*4882a593Smuzhiyun * for this RAID10 operation, and about their status:
*4882a593Smuzhiyun */
*4882a593Smuzhiyun
*4882a593Smuzhiyunstruct r10bio {
*4882a593Smuzhiyun	atomic_t		remaining; /* 'have we finished' count,
*4882a593Smuzhiyun					    * used from IRQ handlers
*4882a593Smuzhiyun					    */
*4882a593Smuzhiyun	sector_t		sector;	/* virtual sector number */
*4882a593Smuzhiyun	int			sectors;
*4882a593Smuzhiyun	unsigned long		state;
*4882a593Smuzhiyun	struct mddev		*mddev;
*4882a593Smuzhiyun	/*
*4882a593Smuzhiyun	 * original bio going to /dev/mdx
*4882a593Smuzhiyun	 */
*4882a593Smuzhiyun	struct bio		*master_bio;
*4882a593Smuzhiyun	/*
*4882a593Smuzhiyun	 * if the IO is in READ direction, then this is where we read
*4882a593Smuzhiyun	 */
*4882a593Smuzhiyun	int			read_slot;
*4882a593Smuzhiyun
*4882a593Smuzhiyun	struct list_head	retry_list;
*4882a593Smuzhiyun	/*
*4882a593Smuzhiyun	 * if the IO is in WRITE direction, then multiple bios are used,
*4882a593Smuzhiyun	 * one for each copy.
*4882a593Smuzhiyun	 * When resyncing we also use one for each copy.
*4882a593Smuzhiyun	 * When reconstructing, we use 2 bios, one for read, one for write.
*4882a593Smuzhiyun	 * We choose the number when they are allocated.
*4882a593Smuzhiyun	 * We sometimes need an extra bio to write to the replacement.
*4882a593Smuzhiyun	 */
*4882a593Smuzhiyun	struct r10dev {
*4882a593Smuzhiyun		struct bio	*bio;
*4882a593Smuzhiyun		union {
*4882a593Smuzhiyun			struct bio	*repl_bio; /* used for resync and
*4882a593Smuzhiyun						    * writes */
*4882a593Smuzhiyun			struct md_rdev	*rdev;	   /* used for reads
*4882a593Smuzhiyun						    * (read_slot >= 0) */
*4882a593Smuzhiyun		};
*4882a593Smuzhiyun		sector_t	addr;
*4882a593Smuzhiyun		int		devnum;
*4882a593Smuzhiyun	} devs[];
*4882a593Smuzhiyun};
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* bits for r10bio.state */
*4882a593Smuzhiyunenum r10bio_state {
*4882a593Smuzhiyun	R10BIO_Uptodate,
*4882a593Smuzhiyun	R10BIO_IsSync,
*4882a593Smuzhiyun	R10BIO_IsRecover,
*4882a593Smuzhiyun	R10BIO_IsReshape,
*4882a593Smuzhiyun	R10BIO_Degraded,
*4882a593Smuzhiyun/* Set ReadError on bios that experience a read error
*4882a593Smuzhiyun * so that raid10d knows what to do with them.
*4882a593Smuzhiyun */
*4882a593Smuzhiyun	R10BIO_ReadError,
*4882a593Smuzhiyun/* If a write for this request means we can clear some
*4882a593Smuzhiyun * known-bad-block records, we set this flag.
*4882a593Smuzhiyun */
*4882a593Smuzhiyun	R10BIO_MadeGood,
*4882a593Smuzhiyun	R10BIO_WriteError,
*4882a593Smuzhiyun/* During a reshape we might be performing IO on the
*4882a593Smuzhiyun * 'previous' part of the array, in which case this
*4882a593Smuzhiyun * flag is set
*4882a593Smuzhiyun */
*4882a593Smuzhiyun	R10BIO_Previous,
*4882a593Smuzhiyun/* failfast devices did receive failfast requests. */
*4882a593Smuzhiyun	R10BIO_FailFast,
*4882a593Smuzhiyun};
*4882a593Smuzhiyun#endif