Lines Matching +full:entry +full:- +full:latency
1 // SPDX-License-Identifier: GPL-2.0
3 * The Kyber I/O scheduler. Controls latency by throttling queue depths using
11 #include <linux/blk-mq.h>
17 #include "blk-mq.h"
18 #include "blk-mq-debugfs.h"
19 #include "blk-mq-sched.h"
20 #include "blk-mq-tag.h"
54 * Maximum device-wide depth for each scheduling domain.
68 * Default latency targets for each scheduling domain.
89 * to the target latency:
91 * <= 1/4 * target latency
92 * <= 1/2 * target latency
93 * <= 3/4 * target latency
94 * <= target latency
95 * <= 1 1/4 * target latency
96 * <= 1 1/2 * target latency
97 * <= 1 3/4 * target latency
98 * > 1 3/4 * target latency
102 * The width of the latency histogram buckets is
103 * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency.
107 * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency,
116 * We measure both the total latency and the I/O latency (i.e., latency after
130 * Per-cpu latency histograms: total latency and I/O latency for each scheduling
139 * we use request->mq_ctx->index_hw to index the kcq in khd.
154 * Each scheduling domain has a limited number of in-flight requests
155 * device-wide, limited by these tokens.
160 * Async request percentage, converted to per-word depth for
213 unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; in flush_latency_buckets()
214 atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type]; in flush_latency_buckets()
222 * Calculate the histogram bucket with the given percentile rank, or -1 if there
229 unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; in calculate_percentile()
236 return -1; in calculate_percentile()
242 if (!kqd->latency_timeout[sched_domain]) in calculate_percentile()
243 kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL); in calculate_percentile()
245 time_is_after_jiffies(kqd->latency_timeout[sched_domain])) { in calculate_percentile()
246 return -1; in calculate_percentile()
248 kqd->latency_timeout[sched_domain] = 0; in calculate_percentile()
251 for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) { in calculate_percentile()
254 percentile_samples -= buckets[bucket]; in calculate_percentile()
256 memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type])); in calculate_percentile()
258 trace_kyber_latency(kqd->q, kyber_domain_names[sched_domain], in calculate_percentile()
269 if (depth != kqd->domain_tokens[sched_domain].sb.depth) { in kyber_resize_domain()
270 sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); in kyber_resize_domain()
271 trace_kyber_adjust(kqd->q, kyber_domain_names[sched_domain], in kyber_resize_domain()
283 /* Sum all of the per-cpu latency histograms. */ in kyber_timer_fn()
287 cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu); in kyber_timer_fn()
297 * Check if any domains have a high I/O latency, which might indicate in kyber_timer_fn()
323 * necessarily have enough samples to calculate the latency in kyber_timer_fn()
327 * reset it to -1. in kyber_timer_fn()
331 p99 = kqd->domain_p99[sched_domain]; in kyber_timer_fn()
332 kqd->domain_p99[sched_domain] = -1; in kyber_timer_fn()
334 kqd->domain_p99[sched_domain] = p99; in kyber_timer_fn()
340 * If this domain has bad latency, throttle less. Otherwise, in kyber_timer_fn()
343 * The new depth is scaled linearly with the p99 latency vs the in kyber_timer_fn()
344 * latency target. E.g., if the p99 is 3/4 of the target, then in kyber_timer_fn()
349 orig_depth = kqd->domain_tokens[sched_domain].sb.depth; in kyber_timer_fn()
362 return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift; in kyber_sched_tags_shift()
369 int ret = -ENOMEM; in kyber_queue_data_alloc()
372 kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node); in kyber_queue_data_alloc()
376 kqd->q = q; in kyber_queue_data_alloc()
378 kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency, in kyber_queue_data_alloc()
380 if (!kqd->cpu_latency) in kyber_queue_data_alloc()
383 timer_setup(&kqd->timer, kyber_timer_fn, 0); in kyber_queue_data_alloc()
388 ret = sbitmap_queue_init_node(&kqd->domain_tokens[i], in kyber_queue_data_alloc()
389 kyber_depth[i], -1, false, in kyber_queue_data_alloc()
390 GFP_KERNEL, q->node); in kyber_queue_data_alloc()
392 while (--i >= 0) in kyber_queue_data_alloc()
393 sbitmap_queue_free(&kqd->domain_tokens[i]); in kyber_queue_data_alloc()
399 kqd->domain_p99[i] = -1; in kyber_queue_data_alloc()
400 kqd->latency_targets[i] = kyber_latency_targets[i]; in kyber_queue_data_alloc()
404 kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; in kyber_queue_data_alloc()
409 free_percpu(kqd->cpu_latency); in kyber_queue_data_alloc()
423 return -ENOMEM; in kyber_init_sched()
427 kobject_put(&eq->kobj); in kyber_init_sched()
433 eq->elevator_data = kqd; in kyber_init_sched()
434 q->elevator = eq; in kyber_init_sched()
441 struct kyber_queue_data *kqd = e->elevator_data; in kyber_exit_sched()
444 del_timer_sync(&kqd->timer); in kyber_exit_sched()
447 sbitmap_queue_free(&kqd->domain_tokens[i]); in kyber_exit_sched()
448 free_percpu(kqd->cpu_latency); in kyber_exit_sched()
456 spin_lock_init(&kcq->lock); in kyber_ctx_queue_init()
458 INIT_LIST_HEAD(&kcq->rq_list[i]); in kyber_ctx_queue_init()
463 struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; in kyber_init_hctx()
467 khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node); in kyber_init_hctx()
469 return -ENOMEM; in kyber_init_hctx()
471 khd->kcqs = kmalloc_array_node(hctx->nr_ctx, in kyber_init_hctx()
473 GFP_KERNEL, hctx->numa_node); in kyber_init_hctx()
474 if (!khd->kcqs) in kyber_init_hctx()
477 for (i = 0; i < hctx->nr_ctx; i++) in kyber_init_hctx()
478 kyber_ctx_queue_init(&khd->kcqs[i]); in kyber_init_hctx()
481 if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx, in kyber_init_hctx()
482 ilog2(8), GFP_KERNEL, hctx->numa_node)) { in kyber_init_hctx()
483 while (--i >= 0) in kyber_init_hctx()
484 sbitmap_free(&khd->kcq_map[i]); in kyber_init_hctx()
489 spin_lock_init(&khd->lock); in kyber_init_hctx()
492 INIT_LIST_HEAD(&khd->rqs[i]); in kyber_init_hctx()
493 khd->domain_wait[i].sbq = NULL; in kyber_init_hctx()
494 init_waitqueue_func_entry(&khd->domain_wait[i].wait, in kyber_init_hctx()
496 khd->domain_wait[i].wait.private = hctx; in kyber_init_hctx()
497 INIT_LIST_HEAD(&khd->domain_wait[i].wait.entry); in kyber_init_hctx()
498 atomic_set(&khd->wait_index[i], 0); in kyber_init_hctx()
501 khd->cur_domain = 0; in kyber_init_hctx()
502 khd->batching = 0; in kyber_init_hctx()
504 hctx->sched_data = khd; in kyber_init_hctx()
505 sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags, in kyber_init_hctx()
506 kqd->async_depth); in kyber_init_hctx()
511 kfree(khd->kcqs); in kyber_init_hctx()
514 return -ENOMEM; in kyber_init_hctx()
519 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_exit_hctx()
523 sbitmap_free(&khd->kcq_map[i]); in kyber_exit_hctx()
524 kfree(khd->kcqs); in kyber_exit_hctx()
525 kfree(hctx->sched_data); in kyber_exit_hctx()
530 return (long)rq->elv.priv[0]; in rq_get_domain_token()
535 rq->elv.priv[0] = (void *)(long)token; in rq_set_domain_token()
545 if (nr != -1) { in rq_clear_domain_token()
546 sched_domain = kyber_sched_domain(rq->cmd_flags); in rq_clear_domain_token()
547 sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr, in rq_clear_domain_token()
548 rq->mq_ctx->cpu); in rq_clear_domain_token()
555 * We use the scheduler tags as per-hardware queue queueing tokens. in kyber_limit_depth()
559 struct kyber_queue_data *kqd = data->q->elevator->elevator_data; in kyber_limit_depth()
561 data->shallow_depth = kqd->async_depth; in kyber_limit_depth()
569 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); in kyber_bio_merge()
570 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_bio_merge()
571 struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]]; in kyber_bio_merge()
572 unsigned int sched_domain = kyber_sched_domain(bio->bi_opf); in kyber_bio_merge()
573 struct list_head *rq_list = &kcq->rq_list[sched_domain]; in kyber_bio_merge()
576 spin_lock(&kcq->lock); in kyber_bio_merge()
577 merged = blk_bio_list_merge(hctx->queue, rq_list, bio, nr_segs); in kyber_bio_merge()
578 spin_unlock(&kcq->lock); in kyber_bio_merge()
585 rq_set_domain_token(rq, -1); in kyber_prepare_request()
591 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_insert_requests()
595 unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags); in kyber_insert_requests()
596 struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw[hctx->type]]; in kyber_insert_requests()
597 struct list_head *head = &kcq->rq_list[sched_domain]; in kyber_insert_requests()
599 spin_lock(&kcq->lock); in kyber_insert_requests()
601 list_move(&rq->queuelist, head); in kyber_insert_requests()
603 list_move_tail(&rq->queuelist, head); in kyber_insert_requests()
604 sbitmap_set_bit(&khd->kcq_map[sched_domain], in kyber_insert_requests()
605 rq->mq_ctx->index_hw[hctx->type]); in kyber_insert_requests()
607 spin_unlock(&kcq->lock); in kyber_insert_requests()
613 struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; in kyber_finish_request()
620 u64 target, u64 latency) in add_latency_sample() argument
625 if (latency > 0) { in add_latency_sample()
627 bucket = min_t(unsigned int, div64_u64(latency - 1, divisor), in add_latency_sample()
628 KYBER_LATENCY_BUCKETS - 1); in add_latency_sample()
633 atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]); in add_latency_sample()
638 struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; in kyber_completed_request()
643 sched_domain = kyber_sched_domain(rq->cmd_flags); in kyber_completed_request()
647 cpu_latency = get_cpu_ptr(kqd->cpu_latency); in kyber_completed_request()
648 target = kqd->latency_targets[sched_domain]; in kyber_completed_request()
650 target, now - rq->start_time_ns); in kyber_completed_request()
652 now - rq->io_start_time_ns); in kyber_completed_request()
653 put_cpu_ptr(kqd->cpu_latency); in kyber_completed_request()
655 timer_reduce(&kqd->timer, jiffies + HZ / 10); in kyber_completed_request()
667 struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr]; in flush_busy_kcq()
669 spin_lock(&kcq->lock); in flush_busy_kcq()
670 list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain], in flush_busy_kcq()
671 flush_data->list); in flush_busy_kcq()
673 spin_unlock(&kcq->lock); in flush_busy_kcq()
688 sbitmap_for_each_set(&khd->kcq_map[sched_domain], in kyber_flush_busy_kcqs()
695 struct blk_mq_hw_ctx *hctx = READ_ONCE(wqe->private); in kyber_domain_wake()
707 unsigned int sched_domain = khd->cur_domain; in kyber_get_domain_token()
708 struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain]; in kyber_get_domain_token()
709 struct sbq_wait *wait = &khd->domain_wait[sched_domain]; in kyber_get_domain_token()
718 * khd->lock, but we still need to be careful about the waker. in kyber_get_domain_token()
720 if (nr < 0 && list_empty_careful(&wait->wait.entry)) { in kyber_get_domain_token()
722 &khd->wait_index[sched_domain]); in kyber_get_domain_token()
723 khd->domain_ws[sched_domain] = ws; in kyber_get_domain_token()
736 * progress. It's possible that the waker already deleted the entry in kyber_get_domain_token()
740 if (nr >= 0 && !list_empty_careful(&wait->wait.entry)) { in kyber_get_domain_token()
741 ws = khd->domain_ws[sched_domain]; in kyber_get_domain_token()
742 spin_lock_irq(&ws->wait.lock); in kyber_get_domain_token()
744 spin_unlock_irq(&ws->wait.lock); in kyber_get_domain_token()
759 rqs = &khd->rqs[khd->cur_domain]; in kyber_dispatch_cur_domain()
766 * khd->lock serializes the flushes, so if we observed any bit set in in kyber_dispatch_cur_domain()
773 khd->batching++; in kyber_dispatch_cur_domain()
775 list_del_init(&rq->queuelist); in kyber_dispatch_cur_domain()
778 trace_kyber_throttled(kqd->q, in kyber_dispatch_cur_domain()
779 kyber_domain_names[khd->cur_domain]); in kyber_dispatch_cur_domain()
781 } else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) { in kyber_dispatch_cur_domain()
784 kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs); in kyber_dispatch_cur_domain()
786 khd->batching++; in kyber_dispatch_cur_domain()
788 list_del_init(&rq->queuelist); in kyber_dispatch_cur_domain()
791 trace_kyber_throttled(kqd->q, in kyber_dispatch_cur_domain()
792 kyber_domain_names[khd->cur_domain]); in kyber_dispatch_cur_domain()
802 struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; in kyber_dispatch_request()
803 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_dispatch_request()
807 spin_lock(&khd->lock); in kyber_dispatch_request()
813 if (khd->batching < kyber_batch_size[khd->cur_domain]) { in kyber_dispatch_request()
828 khd->batching = 0; in kyber_dispatch_request()
830 if (khd->cur_domain == KYBER_NUM_DOMAINS - 1) in kyber_dispatch_request()
831 khd->cur_domain = 0; in kyber_dispatch_request()
833 khd->cur_domain++; in kyber_dispatch_request()
842 spin_unlock(&khd->lock); in kyber_dispatch_request()
848 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_has_work()
852 if (!list_empty_careful(&khd->rqs[i]) || in kyber_has_work()
853 sbitmap_any_bit_set(&khd->kcq_map[i])) in kyber_has_work()
864 struct kyber_queue_data *kqd = e->elevator_data; \
866 return sprintf(page, "%llu\n", kqd->latency_targets[domain]); \
872 struct kyber_queue_data *kqd = e->elevator_data; \
880 kqd->latency_targets[domain] = nsec; \
901 struct kyber_queue_data *kqd = q->elevator->elevator_data; \
903 sbitmap_queue_show(&kqd->domain_tokens[domain], m); \
908 __acquires(&khd->lock) \
910 struct blk_mq_hw_ctx *hctx = m->private; \
911 struct kyber_hctx_data *khd = hctx->sched_data; \
913 spin_lock(&khd->lock); \
914 return seq_list_start(&khd->rqs[domain], *pos); \
920 struct blk_mq_hw_ctx *hctx = m->private; \
921 struct kyber_hctx_data *khd = hctx->sched_data; \
923 return seq_list_next(v, &khd->rqs[domain], pos); \
927 __releases(&khd->lock) \
929 struct blk_mq_hw_ctx *hctx = m->private; \
930 struct kyber_hctx_data *khd = hctx->sched_data; \
932 spin_unlock(&khd->lock); \
945 struct kyber_hctx_data *khd = hctx->sched_data; \
946 wait_queue_entry_t *wait = &khd->domain_wait[domain].wait; \
948 seq_printf(m, "%d\n", !list_empty_careful(&wait->entry)); \
960 struct kyber_queue_data *kqd = q->elevator->elevator_data; in KYBER_DEBUGFS_DOMAIN_ATTRS()
962 seq_printf(m, "%u\n", kqd->async_depth); in KYBER_DEBUGFS_DOMAIN_ATTRS()
969 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_cur_domain_show()
971 seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]); in kyber_cur_domain_show()
978 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_batching_show()
980 seq_printf(m, "%u\n", khd->batching); in kyber_batching_show()