1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun /* 3*4882a593Smuzhiyun * Block Translation Table library 4*4882a593Smuzhiyun * Copyright (c) 2014-2015, Intel Corporation. 5*4882a593Smuzhiyun */ 6*4882a593Smuzhiyun 7*4882a593Smuzhiyun #ifndef _LINUX_BTT_H 8*4882a593Smuzhiyun #define _LINUX_BTT_H 9*4882a593Smuzhiyun 10*4882a593Smuzhiyun #include <linux/badblocks.h> 11*4882a593Smuzhiyun #include <linux/types.h> 12*4882a593Smuzhiyun 13*4882a593Smuzhiyun #define BTT_SIG_LEN 16 14*4882a593Smuzhiyun #define BTT_SIG "BTT_ARENA_INFO\0" 15*4882a593Smuzhiyun #define MAP_ENT_SIZE 4 16*4882a593Smuzhiyun #define MAP_TRIM_SHIFT 31 17*4882a593Smuzhiyun #define MAP_TRIM_MASK (1 << MAP_TRIM_SHIFT) 18*4882a593Smuzhiyun #define MAP_ERR_SHIFT 30 19*4882a593Smuzhiyun #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) 20*4882a593Smuzhiyun #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) 21*4882a593Smuzhiyun #define MAP_ENT_NORMAL 0xC0000000 22*4882a593Smuzhiyun #define LOG_GRP_SIZE sizeof(struct log_group) 23*4882a593Smuzhiyun #define LOG_ENT_SIZE sizeof(struct log_entry) 24*4882a593Smuzhiyun #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ 25*4882a593Smuzhiyun #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ 26*4882a593Smuzhiyun #define RTT_VALID (1UL << 31) 27*4882a593Smuzhiyun #define RTT_INVALID 0 28*4882a593Smuzhiyun #define BTT_PG_SIZE 4096 29*4882a593Smuzhiyun #define BTT_DEFAULT_NFREE ND_MAX_LANES 30*4882a593Smuzhiyun #define LOG_SEQ_INIT 1 31*4882a593Smuzhiyun 32*4882a593Smuzhiyun #define IB_FLAG_ERROR 0x00000001 33*4882a593Smuzhiyun #define IB_FLAG_ERROR_MASK 0x00000001 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun #define ent_lba(ent) (ent & MAP_LBA_MASK) 36*4882a593Smuzhiyun #define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK)) 37*4882a593Smuzhiyun #define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK)) 38*4882a593Smuzhiyun #define set_e_flag(ent) (ent |= MAP_ERR_MASK) 39*4882a593Smuzhiyun /* 'normal' is both e and z flags set */ 40*4882a593Smuzhiyun #define ent_normal(ent) (ent_e_flag(ent) && ent_z_flag(ent)) 41*4882a593Smuzhiyun 42*4882a593Smuzhiyun enum btt_init_state { 43*4882a593Smuzhiyun INIT_UNCHECKED = 0, 44*4882a593Smuzhiyun INIT_NOTFOUND, 45*4882a593Smuzhiyun INIT_READY 46*4882a593Smuzhiyun }; 47*4882a593Smuzhiyun 48*4882a593Smuzhiyun /* 49*4882a593Smuzhiyun * A log group represents one log 'lane', and consists of four log entries. 50*4882a593Smuzhiyun * Two of the four entries are valid entries, and the remaining two are 51*4882a593Smuzhiyun * padding. Due to an old bug in the padding location, we need to perform a 52*4882a593Smuzhiyun * test to determine the padding scheme being used, and use that scheme 53*4882a593Smuzhiyun * thereafter. 54*4882a593Smuzhiyun * 55*4882a593Smuzhiyun * In kernels prior to 4.15, 'log group' would have actual log entries at 56*4882a593Smuzhiyun * indices (0, 2) and padding at indices (1, 3), where as the correct/updated 57*4882a593Smuzhiyun * format has log entries at indices (0, 1) and padding at indices (2, 3). 58*4882a593Smuzhiyun * 59*4882a593Smuzhiyun * Old (pre 4.15) format: 60*4882a593Smuzhiyun * +-----------------+-----------------+ 61*4882a593Smuzhiyun * | ent[0] | ent[1] | 62*4882a593Smuzhiyun * | 16B | 16B | 63*4882a593Smuzhiyun * | lba/old/new/seq | pad | 64*4882a593Smuzhiyun * +-----------------------------------+ 65*4882a593Smuzhiyun * | ent[2] | ent[3] | 66*4882a593Smuzhiyun * | 16B | 16B | 67*4882a593Smuzhiyun * | lba/old/new/seq | pad | 68*4882a593Smuzhiyun * +-----------------+-----------------+ 69*4882a593Smuzhiyun * 70*4882a593Smuzhiyun * New format: 71*4882a593Smuzhiyun * +-----------------+-----------------+ 72*4882a593Smuzhiyun * | ent[0] | ent[1] | 73*4882a593Smuzhiyun * | 16B | 16B | 74*4882a593Smuzhiyun * | lba/old/new/seq | lba/old/new/seq | 75*4882a593Smuzhiyun * +-----------------------------------+ 76*4882a593Smuzhiyun * | ent[2] | ent[3] | 77*4882a593Smuzhiyun * | 16B | 16B | 78*4882a593Smuzhiyun * | pad | pad | 79*4882a593Smuzhiyun * +-----------------+-----------------+ 80*4882a593Smuzhiyun * 81*4882a593Smuzhiyun * We detect during start-up which format is in use, and set 82*4882a593Smuzhiyun * arena->log_index[(0, 1)] with the detected format. 83*4882a593Smuzhiyun */ 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun struct log_entry { 86*4882a593Smuzhiyun __le32 lba; 87*4882a593Smuzhiyun __le32 old_map; 88*4882a593Smuzhiyun __le32 new_map; 89*4882a593Smuzhiyun __le32 seq; 90*4882a593Smuzhiyun }; 91*4882a593Smuzhiyun 92*4882a593Smuzhiyun struct log_group { 93*4882a593Smuzhiyun struct log_entry ent[4]; 94*4882a593Smuzhiyun }; 95*4882a593Smuzhiyun 96*4882a593Smuzhiyun struct btt_sb { 97*4882a593Smuzhiyun u8 signature[BTT_SIG_LEN]; 98*4882a593Smuzhiyun u8 uuid[16]; 99*4882a593Smuzhiyun u8 parent_uuid[16]; 100*4882a593Smuzhiyun __le32 flags; 101*4882a593Smuzhiyun __le16 version_major; 102*4882a593Smuzhiyun __le16 version_minor; 103*4882a593Smuzhiyun __le32 external_lbasize; 104*4882a593Smuzhiyun __le32 external_nlba; 105*4882a593Smuzhiyun __le32 internal_lbasize; 106*4882a593Smuzhiyun __le32 internal_nlba; 107*4882a593Smuzhiyun __le32 nfree; 108*4882a593Smuzhiyun __le32 infosize; 109*4882a593Smuzhiyun __le64 nextoff; 110*4882a593Smuzhiyun __le64 dataoff; 111*4882a593Smuzhiyun __le64 mapoff; 112*4882a593Smuzhiyun __le64 logoff; 113*4882a593Smuzhiyun __le64 info2off; 114*4882a593Smuzhiyun u8 padding[3968]; 115*4882a593Smuzhiyun __le64 checksum; 116*4882a593Smuzhiyun }; 117*4882a593Smuzhiyun 118*4882a593Smuzhiyun struct free_entry { 119*4882a593Smuzhiyun u32 block; 120*4882a593Smuzhiyun u8 sub; 121*4882a593Smuzhiyun u8 seq; 122*4882a593Smuzhiyun u8 has_err; 123*4882a593Smuzhiyun }; 124*4882a593Smuzhiyun 125*4882a593Smuzhiyun struct aligned_lock { 126*4882a593Smuzhiyun union { 127*4882a593Smuzhiyun spinlock_t lock; 128*4882a593Smuzhiyun u8 cacheline_padding[L1_CACHE_BYTES]; 129*4882a593Smuzhiyun }; 130*4882a593Smuzhiyun }; 131*4882a593Smuzhiyun 132*4882a593Smuzhiyun /** 133*4882a593Smuzhiyun * struct arena_info - handle for an arena 134*4882a593Smuzhiyun * @size: Size in bytes this arena occupies on the raw device. 135*4882a593Smuzhiyun * This includes arena metadata. 136*4882a593Smuzhiyun * @external_lba_start: The first external LBA in this arena. 137*4882a593Smuzhiyun * @internal_nlba: Number of internal blocks available in the arena 138*4882a593Smuzhiyun * including nfree reserved blocks 139*4882a593Smuzhiyun * @internal_lbasize: Internal and external lba sizes may be different as 140*4882a593Smuzhiyun * we can round up 'odd' external lbasizes such as 520B 141*4882a593Smuzhiyun * to be aligned. 142*4882a593Smuzhiyun * @external_nlba: Number of blocks contributed by the arena to the number 143*4882a593Smuzhiyun * reported to upper layers. (internal_nlba - nfree) 144*4882a593Smuzhiyun * @external_lbasize: LBA size as exposed to upper layers. 145*4882a593Smuzhiyun * @nfree: A reserve number of 'free' blocks that is used to 146*4882a593Smuzhiyun * handle incoming writes. 147*4882a593Smuzhiyun * @version_major: Metadata layout version major. 148*4882a593Smuzhiyun * @version_minor: Metadata layout version minor. 149*4882a593Smuzhiyun * @sector_size: The Linux sector size - 512 or 4096 150*4882a593Smuzhiyun * @nextoff: Offset in bytes to the start of the next arena. 151*4882a593Smuzhiyun * @infooff: Offset in bytes to the info block of this arena. 152*4882a593Smuzhiyun * @dataoff: Offset in bytes to the data area of this arena. 153*4882a593Smuzhiyun * @mapoff: Offset in bytes to the map area of this arena. 154*4882a593Smuzhiyun * @logoff: Offset in bytes to the log area of this arena. 155*4882a593Smuzhiyun * @info2off: Offset in bytes to the backup info block of this arena. 156*4882a593Smuzhiyun * @freelist: Pointer to in-memory list of free blocks 157*4882a593Smuzhiyun * @rtt: Pointer to in-memory "Read Tracking Table" 158*4882a593Smuzhiyun * @map_locks: Spinlocks protecting concurrent map writes 159*4882a593Smuzhiyun * @nd_btt: Pointer to parent nd_btt structure. 160*4882a593Smuzhiyun * @list: List head for list of arenas 161*4882a593Smuzhiyun * @debugfs_dir: Debugfs dentry 162*4882a593Smuzhiyun * @flags: Arena flags - may signify error states. 163*4882a593Smuzhiyun * @err_lock: Mutex for synchronizing error clearing. 164*4882a593Smuzhiyun * @log_index: Indices of the valid log entries in a log_group 165*4882a593Smuzhiyun * 166*4882a593Smuzhiyun * arena_info is a per-arena handle. Once an arena is narrowed down for an 167*4882a593Smuzhiyun * IO, this struct is passed around for the duration of the IO. 168*4882a593Smuzhiyun */ 169*4882a593Smuzhiyun struct arena_info { 170*4882a593Smuzhiyun u64 size; /* Total bytes for this arena */ 171*4882a593Smuzhiyun u64 external_lba_start; 172*4882a593Smuzhiyun u32 internal_nlba; 173*4882a593Smuzhiyun u32 internal_lbasize; 174*4882a593Smuzhiyun u32 external_nlba; 175*4882a593Smuzhiyun u32 external_lbasize; 176*4882a593Smuzhiyun u32 nfree; 177*4882a593Smuzhiyun u16 version_major; 178*4882a593Smuzhiyun u16 version_minor; 179*4882a593Smuzhiyun u32 sector_size; 180*4882a593Smuzhiyun /* Byte offsets to the different on-media structures */ 181*4882a593Smuzhiyun u64 nextoff; 182*4882a593Smuzhiyun u64 infooff; 183*4882a593Smuzhiyun u64 dataoff; 184*4882a593Smuzhiyun u64 mapoff; 185*4882a593Smuzhiyun u64 logoff; 186*4882a593Smuzhiyun u64 info2off; 187*4882a593Smuzhiyun /* Pointers to other in-memory structures for this arena */ 188*4882a593Smuzhiyun struct free_entry *freelist; 189*4882a593Smuzhiyun u32 *rtt; 190*4882a593Smuzhiyun struct aligned_lock *map_locks; 191*4882a593Smuzhiyun struct nd_btt *nd_btt; 192*4882a593Smuzhiyun struct list_head list; 193*4882a593Smuzhiyun struct dentry *debugfs_dir; 194*4882a593Smuzhiyun /* Arena flags */ 195*4882a593Smuzhiyun u32 flags; 196*4882a593Smuzhiyun struct mutex err_lock; 197*4882a593Smuzhiyun int log_index[2]; 198*4882a593Smuzhiyun }; 199*4882a593Smuzhiyun 200*4882a593Smuzhiyun /** 201*4882a593Smuzhiyun * struct btt - handle for a BTT instance 202*4882a593Smuzhiyun * @btt_disk: Pointer to the gendisk for BTT device 203*4882a593Smuzhiyun * @btt_queue: Pointer to the request queue for the BTT device 204*4882a593Smuzhiyun * @arena_list: Head of the list of arenas 205*4882a593Smuzhiyun * @debugfs_dir: Debugfs dentry 206*4882a593Smuzhiyun * @nd_btt: Parent nd_btt struct 207*4882a593Smuzhiyun * @nlba: Number of logical blocks exposed to the upper layers 208*4882a593Smuzhiyun * after removing the amount of space needed by metadata 209*4882a593Smuzhiyun * @rawsize: Total size in bytes of the available backing device 210*4882a593Smuzhiyun * @lbasize: LBA size as requested and presented to upper layers. 211*4882a593Smuzhiyun * This is sector_size + size of any metadata. 212*4882a593Smuzhiyun * @sector_size: The Linux sector size - 512 or 4096 213*4882a593Smuzhiyun * @lanes: Per-lane spinlocks 214*4882a593Smuzhiyun * @init_lock: Mutex used for the BTT initialization 215*4882a593Smuzhiyun * @init_state: Flag describing the initialization state for the BTT 216*4882a593Smuzhiyun * @num_arenas: Number of arenas in the BTT instance 217*4882a593Smuzhiyun * @phys_bb: Pointer to the namespace's badblocks structure 218*4882a593Smuzhiyun */ 219*4882a593Smuzhiyun struct btt { 220*4882a593Smuzhiyun struct gendisk *btt_disk; 221*4882a593Smuzhiyun struct request_queue *btt_queue; 222*4882a593Smuzhiyun struct list_head arena_list; 223*4882a593Smuzhiyun struct dentry *debugfs_dir; 224*4882a593Smuzhiyun struct nd_btt *nd_btt; 225*4882a593Smuzhiyun u64 nlba; 226*4882a593Smuzhiyun unsigned long long rawsize; 227*4882a593Smuzhiyun u32 lbasize; 228*4882a593Smuzhiyun u32 sector_size; 229*4882a593Smuzhiyun struct nd_region *nd_region; 230*4882a593Smuzhiyun struct mutex init_lock; 231*4882a593Smuzhiyun int init_state; 232*4882a593Smuzhiyun int num_arenas; 233*4882a593Smuzhiyun struct badblocks *phys_bb; 234*4882a593Smuzhiyun }; 235*4882a593Smuzhiyun 236*4882a593Smuzhiyun bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); 237*4882a593Smuzhiyun int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, 238*4882a593Smuzhiyun struct btt_sb *btt_sb); 239*4882a593Smuzhiyun 240*4882a593Smuzhiyun #endif 241