1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun #ifndef _RAID10_H 3*4882a593Smuzhiyun #define _RAID10_H 4*4882a593Smuzhiyun 5*4882a593Smuzhiyun /* Note: raid10_info.rdev can be set to NULL asynchronously by 6*4882a593Smuzhiyun * raid10_remove_disk. 7*4882a593Smuzhiyun * There are three safe ways to access raid10_info.rdev. 8*4882a593Smuzhiyun * 1/ when holding mddev->reconfig_mutex 9*4882a593Smuzhiyun * 2/ when resync/recovery/reshape is known to be happening - i.e. in code 10*4882a593Smuzhiyun * that is called as part of performing resync/recovery/reshape. 11*4882a593Smuzhiyun * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer 12*4882a593Smuzhiyun * and if it is non-NULL, increment rdev->nr_pending before dropping the 13*4882a593Smuzhiyun * RCU lock. 14*4882a593Smuzhiyun * When .rdev is set to NULL, the nr_pending count checked again and if it has 15*4882a593Smuzhiyun * been incremented, the pointer is put back in .rdev. 16*4882a593Smuzhiyun */ 17*4882a593Smuzhiyun 18*4882a593Smuzhiyun struct raid10_info { 19*4882a593Smuzhiyun struct md_rdev *rdev, *replacement; 20*4882a593Smuzhiyun sector_t head_position; 21*4882a593Smuzhiyun int recovery_disabled; /* matches 22*4882a593Smuzhiyun * mddev->recovery_disabled 23*4882a593Smuzhiyun * when we shouldn't try 24*4882a593Smuzhiyun * recovering this device. 25*4882a593Smuzhiyun */ 26*4882a593Smuzhiyun }; 27*4882a593Smuzhiyun 28*4882a593Smuzhiyun struct r10conf { 29*4882a593Smuzhiyun struct mddev *mddev; 30*4882a593Smuzhiyun struct raid10_info *mirrors; 31*4882a593Smuzhiyun struct raid10_info *mirrors_new, *mirrors_old; 32*4882a593Smuzhiyun spinlock_t device_lock; 33*4882a593Smuzhiyun 34*4882a593Smuzhiyun /* geometry */ 35*4882a593Smuzhiyun struct geom { 36*4882a593Smuzhiyun int raid_disks; 37*4882a593Smuzhiyun int near_copies; /* number of copies laid out 38*4882a593Smuzhiyun * raid0 style */ 39*4882a593Smuzhiyun int far_copies; /* number of copies laid out 40*4882a593Smuzhiyun * at large strides across drives 41*4882a593Smuzhiyun */ 42*4882a593Smuzhiyun int far_offset; /* far_copies are offset by 1 43*4882a593Smuzhiyun * stripe instead of many 44*4882a593Smuzhiyun */ 45*4882a593Smuzhiyun sector_t stride; /* distance between far copies. 46*4882a593Smuzhiyun * This is size / far_copies unless 47*4882a593Smuzhiyun * far_offset, in which case it is 48*4882a593Smuzhiyun * 1 stripe. 49*4882a593Smuzhiyun */ 50*4882a593Smuzhiyun int far_set_size; /* The number of devices in a set, 51*4882a593Smuzhiyun * where a 'set' are devices that 52*4882a593Smuzhiyun * contain far/offset copies of 53*4882a593Smuzhiyun * each other. 54*4882a593Smuzhiyun */ 55*4882a593Smuzhiyun int chunk_shift; /* shift from chunks to sectors */ 56*4882a593Smuzhiyun sector_t chunk_mask; 57*4882a593Smuzhiyun } prev, geo; 58*4882a593Smuzhiyun int copies; /* near_copies * far_copies. 59*4882a593Smuzhiyun * must be <= raid_disks 60*4882a593Smuzhiyun */ 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun sector_t dev_sectors; /* temp copy of 63*4882a593Smuzhiyun * mddev->dev_sectors */ 64*4882a593Smuzhiyun sector_t reshape_progress; 65*4882a593Smuzhiyun sector_t reshape_safe; 66*4882a593Smuzhiyun unsigned long reshape_checkpoint; 67*4882a593Smuzhiyun sector_t offset_diff; 68*4882a593Smuzhiyun 69*4882a593Smuzhiyun struct list_head retry_list; 70*4882a593Smuzhiyun /* A separate list of r1bio which just need raid_end_bio_io called. 71*4882a593Smuzhiyun * This mustn't happen for writes which had any errors if the superblock 72*4882a593Smuzhiyun * needs to be written. 73*4882a593Smuzhiyun */ 74*4882a593Smuzhiyun struct list_head bio_end_io_list; 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun /* queue pending writes and submit them on unplug */ 77*4882a593Smuzhiyun struct bio_list pending_bio_list; 78*4882a593Smuzhiyun int pending_count; 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun spinlock_t resync_lock; 81*4882a593Smuzhiyun atomic_t nr_pending; 82*4882a593Smuzhiyun int nr_waiting; 83*4882a593Smuzhiyun int nr_queued; 84*4882a593Smuzhiyun int barrier; 85*4882a593Smuzhiyun int array_freeze_pending; 86*4882a593Smuzhiyun sector_t next_resync; 87*4882a593Smuzhiyun int fullsync; /* set to 1 if a full sync is needed, 88*4882a593Smuzhiyun * (fresh device added). 89*4882a593Smuzhiyun * Cleared when a sync completes. 90*4882a593Smuzhiyun */ 91*4882a593Smuzhiyun int have_replacement; /* There is at least one 92*4882a593Smuzhiyun * replacement device. 93*4882a593Smuzhiyun */ 94*4882a593Smuzhiyun wait_queue_head_t wait_barrier; 95*4882a593Smuzhiyun 96*4882a593Smuzhiyun mempool_t r10bio_pool; 97*4882a593Smuzhiyun mempool_t r10buf_pool; 98*4882a593Smuzhiyun struct page *tmppage; 99*4882a593Smuzhiyun struct bio_set bio_split; 100*4882a593Smuzhiyun 101*4882a593Smuzhiyun /* When taking over an array from a different personality, we store 102*4882a593Smuzhiyun * the new thread here until we fully activate the array. 103*4882a593Smuzhiyun */ 104*4882a593Smuzhiyun struct md_thread *thread; 105*4882a593Smuzhiyun 106*4882a593Smuzhiyun /* 107*4882a593Smuzhiyun * Keep track of cluster resync window to send to other nodes. 108*4882a593Smuzhiyun */ 109*4882a593Smuzhiyun sector_t cluster_sync_low; 110*4882a593Smuzhiyun sector_t cluster_sync_high; 111*4882a593Smuzhiyun }; 112*4882a593Smuzhiyun 113*4882a593Smuzhiyun /* 114*4882a593Smuzhiyun * this is our 'private' RAID10 bio. 115*4882a593Smuzhiyun * 116*4882a593Smuzhiyun * it contains information about what kind of IO operations were started 117*4882a593Smuzhiyun * for this RAID10 operation, and about their status: 118*4882a593Smuzhiyun */ 119*4882a593Smuzhiyun 120*4882a593Smuzhiyun struct r10bio { 121*4882a593Smuzhiyun atomic_t remaining; /* 'have we finished' count, 122*4882a593Smuzhiyun * used from IRQ handlers 123*4882a593Smuzhiyun */ 124*4882a593Smuzhiyun sector_t sector; /* virtual sector number */ 125*4882a593Smuzhiyun int sectors; 126*4882a593Smuzhiyun unsigned long state; 127*4882a593Smuzhiyun struct mddev *mddev; 128*4882a593Smuzhiyun /* 129*4882a593Smuzhiyun * original bio going to /dev/mdx 130*4882a593Smuzhiyun */ 131*4882a593Smuzhiyun struct bio *master_bio; 132*4882a593Smuzhiyun /* 133*4882a593Smuzhiyun * if the IO is in READ direction, then this is where we read 134*4882a593Smuzhiyun */ 135*4882a593Smuzhiyun int read_slot; 136*4882a593Smuzhiyun 137*4882a593Smuzhiyun struct list_head retry_list; 138*4882a593Smuzhiyun /* 139*4882a593Smuzhiyun * if the IO is in WRITE direction, then multiple bios are used, 140*4882a593Smuzhiyun * one for each copy. 141*4882a593Smuzhiyun * When resyncing we also use one for each copy. 142*4882a593Smuzhiyun * When reconstructing, we use 2 bios, one for read, one for write. 143*4882a593Smuzhiyun * We choose the number when they are allocated. 144*4882a593Smuzhiyun * We sometimes need an extra bio to write to the replacement. 145*4882a593Smuzhiyun */ 146*4882a593Smuzhiyun struct r10dev { 147*4882a593Smuzhiyun struct bio *bio; 148*4882a593Smuzhiyun union { 149*4882a593Smuzhiyun struct bio *repl_bio; /* used for resync and 150*4882a593Smuzhiyun * writes */ 151*4882a593Smuzhiyun struct md_rdev *rdev; /* used for reads 152*4882a593Smuzhiyun * (read_slot >= 0) */ 153*4882a593Smuzhiyun }; 154*4882a593Smuzhiyun sector_t addr; 155*4882a593Smuzhiyun int devnum; 156*4882a593Smuzhiyun } devs[]; 157*4882a593Smuzhiyun }; 158*4882a593Smuzhiyun 159*4882a593Smuzhiyun /* bits for r10bio.state */ 160*4882a593Smuzhiyun enum r10bio_state { 161*4882a593Smuzhiyun R10BIO_Uptodate, 162*4882a593Smuzhiyun R10BIO_IsSync, 163*4882a593Smuzhiyun R10BIO_IsRecover, 164*4882a593Smuzhiyun R10BIO_IsReshape, 165*4882a593Smuzhiyun R10BIO_Degraded, 166*4882a593Smuzhiyun /* Set ReadError on bios that experience a read error 167*4882a593Smuzhiyun * so that raid10d knows what to do with them. 168*4882a593Smuzhiyun */ 169*4882a593Smuzhiyun R10BIO_ReadError, 170*4882a593Smuzhiyun /* If a write for this request means we can clear some 171*4882a593Smuzhiyun * known-bad-block records, we set this flag. 172*4882a593Smuzhiyun */ 173*4882a593Smuzhiyun R10BIO_MadeGood, 174*4882a593Smuzhiyun R10BIO_WriteError, 175*4882a593Smuzhiyun /* During a reshape we might be performing IO on the 176*4882a593Smuzhiyun * 'previous' part of the array, in which case this 177*4882a593Smuzhiyun * flag is set 178*4882a593Smuzhiyun */ 179*4882a593Smuzhiyun R10BIO_Previous, 180*4882a593Smuzhiyun /* failfast devices did receive failfast requests. */ 181*4882a593Smuzhiyun R10BIO_FailFast, 182*4882a593Smuzhiyun }; 183*4882a593Smuzhiyun #endif 184