1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Some low level IO code, and hacks for various block layer limitations
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
6*4882a593Smuzhiyun * Copyright 2012 Google, Inc.
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #include "bcache.h"
10*4882a593Smuzhiyun #include "bset.h"
11*4882a593Smuzhiyun #include "debug.h"
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #include <linux/blkdev.h>
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun /* Bios with headers */
16*4882a593Smuzhiyun
bch_bbio_free(struct bio * bio,struct cache_set * c)17*4882a593Smuzhiyun void bch_bbio_free(struct bio *bio, struct cache_set *c)
18*4882a593Smuzhiyun {
19*4882a593Smuzhiyun struct bbio *b = container_of(bio, struct bbio, bio);
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun mempool_free(b, &c->bio_meta);
22*4882a593Smuzhiyun }
23*4882a593Smuzhiyun
bch_bbio_alloc(struct cache_set * c)24*4882a593Smuzhiyun struct bio *bch_bbio_alloc(struct cache_set *c)
25*4882a593Smuzhiyun {
26*4882a593Smuzhiyun struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO);
27*4882a593Smuzhiyun struct bio *bio = &b->bio;
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->cache->sb));
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun return bio;
32*4882a593Smuzhiyun }
33*4882a593Smuzhiyun
__bch_submit_bbio(struct bio * bio,struct cache_set * c)34*4882a593Smuzhiyun void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
35*4882a593Smuzhiyun {
36*4882a593Smuzhiyun struct bbio *b = container_of(bio, struct bbio, bio);
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0);
39*4882a593Smuzhiyun bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun b->submit_time_us = local_clock_us();
42*4882a593Smuzhiyun closure_bio_submit(c, bio, bio->bi_private);
43*4882a593Smuzhiyun }
44*4882a593Smuzhiyun
bch_submit_bbio(struct bio * bio,struct cache_set * c,struct bkey * k,unsigned int ptr)45*4882a593Smuzhiyun void bch_submit_bbio(struct bio *bio, struct cache_set *c,
46*4882a593Smuzhiyun struct bkey *k, unsigned int ptr)
47*4882a593Smuzhiyun {
48*4882a593Smuzhiyun struct bbio *b = container_of(bio, struct bbio, bio);
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun bch_bkey_copy_single_ptr(&b->key, k, ptr);
51*4882a593Smuzhiyun __bch_submit_bbio(bio, c);
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun /* IO errors */
bch_count_backing_io_errors(struct cached_dev * dc,struct bio * bio)55*4882a593Smuzhiyun void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun unsigned int errors;
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun /*
62*4882a593Smuzhiyun * Read-ahead requests on a degrading and recovering md raid
63*4882a593Smuzhiyun * (e.g. raid6) device might be failured immediately by md
64*4882a593Smuzhiyun * raid code, which is not a real hardware media failure. So
65*4882a593Smuzhiyun * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors.
66*4882a593Smuzhiyun */
67*4882a593Smuzhiyun if (bio->bi_opf & REQ_RAHEAD) {
68*4882a593Smuzhiyun pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n",
69*4882a593Smuzhiyun dc->backing_dev_name);
70*4882a593Smuzhiyun return;
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun errors = atomic_add_return(1, &dc->io_errors);
74*4882a593Smuzhiyun if (errors < dc->error_limit)
75*4882a593Smuzhiyun pr_err("%s: IO error on backing device, unrecoverable\n",
76*4882a593Smuzhiyun dc->backing_dev_name);
77*4882a593Smuzhiyun else
78*4882a593Smuzhiyun bch_cached_dev_error(dc);
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun
bch_count_io_errors(struct cache * ca,blk_status_t error,int is_read,const char * m)81*4882a593Smuzhiyun void bch_count_io_errors(struct cache *ca,
82*4882a593Smuzhiyun blk_status_t error,
83*4882a593Smuzhiyun int is_read,
84*4882a593Smuzhiyun const char *m)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun /*
87*4882a593Smuzhiyun * The halflife of an error is:
88*4882a593Smuzhiyun * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
89*4882a593Smuzhiyun */
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun if (ca->set->error_decay) {
92*4882a593Smuzhiyun unsigned int count = atomic_inc_return(&ca->io_count);
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun while (count > ca->set->error_decay) {
95*4882a593Smuzhiyun unsigned int errors;
96*4882a593Smuzhiyun unsigned int old = count;
97*4882a593Smuzhiyun unsigned int new = count - ca->set->error_decay;
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun /*
100*4882a593Smuzhiyun * First we subtract refresh from count; each time we
101*4882a593Smuzhiyun * successfully do so, we rescale the errors once:
102*4882a593Smuzhiyun */
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun count = atomic_cmpxchg(&ca->io_count, old, new);
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun if (count == old) {
107*4882a593Smuzhiyun count = new;
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun errors = atomic_read(&ca->io_errors);
110*4882a593Smuzhiyun do {
111*4882a593Smuzhiyun old = errors;
112*4882a593Smuzhiyun new = ((uint64_t) errors * 127) / 128;
113*4882a593Smuzhiyun errors = atomic_cmpxchg(&ca->io_errors,
114*4882a593Smuzhiyun old, new);
115*4882a593Smuzhiyun } while (old != errors);
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun }
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun if (error) {
121*4882a593Smuzhiyun unsigned int errors = atomic_add_return(1 << IO_ERROR_SHIFT,
122*4882a593Smuzhiyun &ca->io_errors);
123*4882a593Smuzhiyun errors >>= IO_ERROR_SHIFT;
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun if (errors < ca->set->error_limit)
126*4882a593Smuzhiyun pr_err("%s: IO error on %s%s\n",
127*4882a593Smuzhiyun ca->cache_dev_name, m,
128*4882a593Smuzhiyun is_read ? ", recovering." : ".");
129*4882a593Smuzhiyun else
130*4882a593Smuzhiyun bch_cache_set_error(ca->set,
131*4882a593Smuzhiyun "%s: too many IO errors %s\n",
132*4882a593Smuzhiyun ca->cache_dev_name, m);
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun }
135*4882a593Smuzhiyun
bch_bbio_count_io_errors(struct cache_set * c,struct bio * bio,blk_status_t error,const char * m)136*4882a593Smuzhiyun void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
137*4882a593Smuzhiyun blk_status_t error, const char *m)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun struct bbio *b = container_of(bio, struct bbio, bio);
140*4882a593Smuzhiyun struct cache *ca = PTR_CACHE(c, &b->key, 0);
141*4882a593Smuzhiyun int is_read = (bio_data_dir(bio) == READ ? 1 : 0);
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun unsigned int threshold = op_is_write(bio_op(bio))
144*4882a593Smuzhiyun ? c->congested_write_threshold_us
145*4882a593Smuzhiyun : c->congested_read_threshold_us;
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun if (threshold) {
148*4882a593Smuzhiyun unsigned int t = local_clock_us();
149*4882a593Smuzhiyun int us = t - b->submit_time_us;
150*4882a593Smuzhiyun int congested = atomic_read(&c->congested);
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun if (us > (int) threshold) {
153*4882a593Smuzhiyun int ms = us / 1024;
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun c->congested_last_us = t;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun ms = min(ms, CONGESTED_MAX + congested);
158*4882a593Smuzhiyun atomic_sub(ms, &c->congested);
159*4882a593Smuzhiyun } else if (congested < 0)
160*4882a593Smuzhiyun atomic_inc(&c->congested);
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun bch_count_io_errors(ca, error, is_read, m);
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun
bch_bbio_endio(struct cache_set * c,struct bio * bio,blk_status_t error,const char * m)166*4882a593Smuzhiyun void bch_bbio_endio(struct cache_set *c, struct bio *bio,
167*4882a593Smuzhiyun blk_status_t error, const char *m)
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun struct closure *cl = bio->bi_private;
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun bch_bbio_count_io_errors(c, bio, error, m);
172*4882a593Smuzhiyun bio_put(bio);
173*4882a593Smuzhiyun closure_put(cl);
174*4882a593Smuzhiyun }
175