~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/block/blk-mq-tag.c

Version: ~ [ linux-5.14-rc3 ] ~ [ linux-5.13.5 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.53 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.135 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.198 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.240 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.276 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.276 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * Tag allocation using scalable bitmaps. Uses active queue tracking to support
  4  * fairer distribution of tags between multiple submitters when a shared tag map
  5  * is used.
  6  *
  7  * Copyright (C) 2013-2014 Jens Axboe
  8  */
  9 #include <linux/kernel.h>
 10 #include <linux/module.h>
 11 
 12 #include <linux/blk-mq.h>
 13 #include <linux/delay.h>
 14 #include "blk.h"
 15 #include "blk-mq.h"
 16 #include "blk-mq-tag.h"
 17 
 18 /*
 19  * If a previously inactive queue goes active, bump the active user count.
 20  * We need to do this before try to allocate driver tag, then even if fail
 21  * to get tag when first time, the other shared-tag users could reserve
 22  * budget for it.
 23  */
 24 bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 25 {
 26         if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
 27             !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 28                 atomic_inc(&hctx->tags->active_queues);
 29 
 30         return true;
 31 }
 32 
 33 /*
 34  * Wakeup all potentially sleeping on tags
 35  */
 36 void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
 37 {
 38         sbitmap_queue_wake_all(&tags->bitmap_tags);
 39         if (include_reserve)
 40                 sbitmap_queue_wake_all(&tags->breserved_tags);
 41 }
 42 
 43 /*
 44  * If a previously busy queue goes inactive, potential waiters could now
 45  * be allowed to queue. Wake them up and check.
 46  */
 47 void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 48 {
 49         struct blk_mq_tags *tags = hctx->tags;
 50 
 51         if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 52                 return;
 53 
 54         atomic_dec(&tags->active_queues);
 55 
 56         blk_mq_tag_wakeup_all(tags, false);
 57 }
 58 
 59 static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
 60                             struct sbitmap_queue *bt)
 61 {
 62         if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) &&
 63                         !hctx_may_queue(data->hctx, bt))
 64                 return BLK_MQ_NO_TAG;
 65 
 66         if (data->shallow_depth)
 67                 return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
 68         else
 69                 return __sbitmap_queue_get(bt);
 70 }
 71 
 72 unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 73 {
 74         struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
 75         struct sbitmap_queue *bt;
 76         struct sbq_wait_state *ws;
 77         DEFINE_SBQ_WAIT(wait);
 78         unsigned int tag_offset;
 79         int tag;
 80 
 81         if (data->flags & BLK_MQ_REQ_RESERVED) {
 82                 if (unlikely(!tags->nr_reserved_tags)) {
 83                         WARN_ON_ONCE(1);
 84                         return BLK_MQ_NO_TAG;
 85                 }
 86                 bt = &tags->breserved_tags;
 87                 tag_offset = 0;
 88         } else {
 89                 bt = &tags->bitmap_tags;
 90                 tag_offset = tags->nr_reserved_tags;
 91         }
 92 
 93         tag = __blk_mq_get_tag(data, bt);
 94         if (tag != BLK_MQ_NO_TAG)
 95                 goto found_tag;
 96 
 97         if (data->flags & BLK_MQ_REQ_NOWAIT)
 98                 return BLK_MQ_NO_TAG;
 99 
100         ws = bt_wait_ptr(bt, data->hctx);
101         do {
102                 struct sbitmap_queue *bt_prev;
103 
104                 /*
105                  * We're out of tags on this hardware queue, kick any
106                  * pending IO submits before going to sleep waiting for
107                  * some to complete.
108                  */
109                 blk_mq_run_hw_queue(data->hctx, false);
110 
111                 /*
112                  * Retry tag allocation after running the hardware queue,
113                  * as running the queue may also have found completions.
114                  */
115                 tag = __blk_mq_get_tag(data, bt);
116                 if (tag != BLK_MQ_NO_TAG)
117                         break;
118 
119                 sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
120 
121                 tag = __blk_mq_get_tag(data, bt);
122                 if (tag != BLK_MQ_NO_TAG)
123                         break;
124 
125                 bt_prev = bt;
126                 io_schedule();
127 
128                 sbitmap_finish_wait(bt, ws, &wait);
129 
130                 data->ctx = blk_mq_get_ctx(data->q);
131                 data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
132                                                 data->ctx);
133                 tags = blk_mq_tags_from_data(data);
134                 if (data->flags & BLK_MQ_REQ_RESERVED)
135                         bt = &tags->breserved_tags;
136                 else
137                         bt = &tags->bitmap_tags;
138 
139                 /*
140                  * If destination hw queue is changed, fake wake up on
141                  * previous queue for compensating the wake up miss, so
142                  * other allocations on previous queue won't be starved.
143                  */
144                 if (bt != bt_prev)
145                         sbitmap_queue_wake_up(bt_prev);
146 
147                 ws = bt_wait_ptr(bt, data->hctx);
148         } while (1);
149 
150         sbitmap_finish_wait(bt, ws, &wait);
151 
152 found_tag:
153         /*
154          * Give up this allocation if the hctx is inactive.  The caller will
155          * retry on an active hctx.
156          */
157         if (unlikely(test_bit(BLK_MQ_S_INACTIVE, &data->hctx->state))) {
158                 blk_mq_put_tag(tags, data->ctx, tag + tag_offset);
159                 return BLK_MQ_NO_TAG;
160         }
161         return tag + tag_offset;
162 }
163 
164 void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
165                     unsigned int tag)
166 {
167         if (!blk_mq_tag_is_reserved(tags, tag)) {
168                 const int real_tag = tag - tags->nr_reserved_tags;
169 
170                 BUG_ON(real_tag >= tags->nr_tags);
171                 sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
172         } else {
173                 BUG_ON(tag >= tags->nr_reserved_tags);
174                 sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
175         }
176 }
177 
178 struct bt_iter_data {
179         struct blk_mq_hw_ctx *hctx;
180         busy_iter_fn *fn;
181         void *data;
182         bool reserved;
183 };
184 
185 static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
186 {
187         struct bt_iter_data *iter_data = data;
188         struct blk_mq_hw_ctx *hctx = iter_data->hctx;
189         struct blk_mq_tags *tags = hctx->tags;
190         bool reserved = iter_data->reserved;
191         struct request *rq;
192 
193         if (!reserved)
194                 bitnr += tags->nr_reserved_tags;
195         rq = tags->rqs[bitnr];
196 
197         /*
198          * We can hit rq == NULL here, because the tagging functions
199          * test and set the bit before assigning ->rqs[].
200          */
201         if (rq && rq->q == hctx->queue)
202                 return iter_data->fn(hctx, rq, iter_data->data, reserved);
203         return true;
204 }
205 
206 /**
207  * bt_for_each - iterate over the requests associated with a hardware queue
208  * @hctx:       Hardware queue to examine.
209  * @bt:         sbitmap to examine. This is either the breserved_tags member
210  *              or the bitmap_tags member of struct blk_mq_tags.
211  * @fn:         Pointer to the function that will be called for each request
212  *              associated with @hctx that has been assigned a driver tag.
213  *              @fn will be called as follows: @fn(@hctx, rq, @data, @reserved)
214  *              where rq is a pointer to a request. Return true to continue
215  *              iterating tags, false to stop.
216  * @data:       Will be passed as third argument to @fn.
217  * @reserved:   Indicates whether @bt is the breserved_tags member or the
218  *              bitmap_tags member of struct blk_mq_tags.
219  */
220 static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
221                         busy_iter_fn *fn, void *data, bool reserved)
222 {
223         struct bt_iter_data iter_data = {
224                 .hctx = hctx,
225                 .fn = fn,
226                 .data = data,
227                 .reserved = reserved,
228         };
229 
230         sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
231 }
232 
233 struct bt_tags_iter_data {
234         struct blk_mq_tags *tags;
235         busy_tag_iter_fn *fn;
236         void *data;
237         unsigned int flags;
238 };
239 
240 #define BT_TAG_ITER_RESERVED            (1 << 0)
241 #define BT_TAG_ITER_STARTED             (1 << 1)
242 #define BT_TAG_ITER_STATIC_RQS          (1 << 2)
243 
244 static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
245 {
246         struct bt_tags_iter_data *iter_data = data;
247         struct blk_mq_tags *tags = iter_data->tags;
248         bool reserved = iter_data->flags & BT_TAG_ITER_RESERVED;
249         struct request *rq;
250 
251         if (!reserved)
252                 bitnr += tags->nr_reserved_tags;
253 
254         /*
255          * We can hit rq == NULL here, because the tagging functions
256          * test and set the bit before assigning ->rqs[].
257          */
258         if (iter_data->flags & BT_TAG_ITER_STATIC_RQS)
259                 rq = tags->static_rqs[bitnr];
260         else
261                 rq = tags->rqs[bitnr];
262         if (!rq)
263                 return true;
264         if ((iter_data->flags & BT_TAG_ITER_STARTED) &&
265             !blk_mq_request_started(rq))
266                 return true;
267         return iter_data->fn(rq, iter_data->data, reserved);
268 }
269 
270 /**
271  * bt_tags_for_each - iterate over the requests in a tag map
272  * @tags:       Tag map to iterate over.
273  * @bt:         sbitmap to examine. This is either the breserved_tags member
274  *              or the bitmap_tags member of struct blk_mq_tags.
275  * @fn:         Pointer to the function that will be called for each started
276  *              request. @fn will be called as follows: @fn(rq, @data,
277  *              @reserved) where rq is a pointer to a request. Return true
278  *              to continue iterating tags, false to stop.
279  * @data:       Will be passed as second argument to @fn.
280  * @flags:      BT_TAG_ITER_*
281  */
282 static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
283                              busy_tag_iter_fn *fn, void *data, unsigned int flags)
284 {
285         struct bt_tags_iter_data iter_data = {
286                 .tags = tags,
287                 .fn = fn,
288                 .data = data,
289                 .flags = flags,
290         };
291 
292         if (tags->rqs)
293                 sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
294 }
295 
296 static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
297                 busy_tag_iter_fn *fn, void *priv, unsigned int flags)
298 {
299         WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED);
300 
301         if (tags->nr_reserved_tags)
302                 bt_tags_for_each(tags, &tags->breserved_tags, fn, priv,
303                                  flags | BT_TAG_ITER_RESERVED);
304         bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, flags);
305 }
306 
307 /**
308  * blk_mq_all_tag_iter - iterate over all requests in a tag map
309  * @tags:       Tag map to iterate over.
310  * @fn:         Pointer to the function that will be called for each
311  *              request. @fn will be called as follows: @fn(rq, @priv,
312  *              reserved) where rq is a pointer to a request. 'reserved'
313  *              indicates whether or not @rq is a reserved request. Return
314  *              true to continue iterating tags, false to stop.
315  * @priv:       Will be passed as second argument to @fn.
316  *
317  * Caller has to pass the tag map from which requests are allocated.
318  */
319 void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
320                 void *priv)
321 {
322         __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS);
323 }
324 
325 /**
326  * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set
327  * @tagset:     Tag set to iterate over.
328  * @fn:         Pointer to the function that will be called for each started
329  *              request. @fn will be called as follows: @fn(rq, @priv,
330  *              reserved) where rq is a pointer to a request. 'reserved'
331  *              indicates whether or not @rq is a reserved request. Return
332  *              true to continue iterating tags, false to stop.
333  * @priv:       Will be passed as second argument to @fn.
334  */
335 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
336                 busy_tag_iter_fn *fn, void *priv)
337 {
338         int i;
339 
340         for (i = 0; i < tagset->nr_hw_queues; i++) {
341                 if (tagset->tags && tagset->tags[i])
342                         __blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
343                                               BT_TAG_ITER_STARTED);
344         }
345 }
346 EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
347 
348 static bool blk_mq_tagset_count_completed_rqs(struct request *rq,
349                 void *data, bool reserved)
350 {
351         unsigned *count = data;
352 
353         if (blk_mq_request_completed(rq))
354                 (*count)++;
355         return true;
356 }
357 
358 /**
359  * blk_mq_tagset_wait_completed_request - wait until all completed req's
360  * complete funtion is run
361  * @tagset:     Tag set to drain completed request
362  *
363  * Note: This function has to be run after all IO queues are shutdown
364  */
365 void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset)
366 {
367         while (true) {
368                 unsigned count = 0;
369 
370                 blk_mq_tagset_busy_iter(tagset,
371                                 blk_mq_tagset_count_completed_rqs, &count);
372                 if (!count)
373                         break;
374                 msleep(5);
375         }
376 }
377 EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
378 
379 /**
380  * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
381  * @q:          Request queue to examine.
382  * @fn:         Pointer to the function that will be called for each request
383  *              on @q. @fn will be called as follows: @fn(hctx, rq, @priv,
384  *              reserved) where rq is a pointer to a request and hctx points
385  *              to the hardware queue associated with the request. 'reserved'
386  *              indicates whether or not @rq is a reserved request.
387  * @priv:       Will be passed as third argument to @fn.
388  *
389  * Note: if @q->tag_set is shared with other request queues then @fn will be
390  * called for all requests on all queues that share that tag set and not only
391  * for requests associated with @q.
392  */
393 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
394                 void *priv)
395 {
396         struct blk_mq_hw_ctx *hctx;
397         int i;
398 
399         /*
400          * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
401          * while the queue is frozen. So we can use q_usage_counter to avoid
402          * racing with it. __blk_mq_update_nr_hw_queues() uses
403          * synchronize_rcu() to ensure this function left the critical section
404          * below.
405          */
406         if (!percpu_ref_tryget(&q->q_usage_counter))
407                 return;
408 
409         queue_for_each_hw_ctx(q, hctx, i) {
410                 struct blk_mq_tags *tags = hctx->tags;
411 
412                 /*
413                  * If no software queues are currently mapped to this
414                  * hardware queue, there's nothing to check
415                  */
416                 if (!blk_mq_hw_queue_mapped(hctx))
417                         continue;
418 
419                 if (tags->nr_reserved_tags)
420                         bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
421                 bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
422         }
423         blk_queue_exit(q);
424 }
425 
426 static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
427                     bool round_robin, int node)
428 {
429         return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
430                                        node);
431 }
432 
433 static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
434                                                    int node, int alloc_policy)
435 {
436         unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
437         bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
438 
439         if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
440                 goto free_tags;
441         if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin,
442                      node))
443                 goto free_bitmap_tags;
444 
445         return tags;
446 free_bitmap_tags:
447         sbitmap_queue_free(&tags->bitmap_tags);
448 free_tags:
449         kfree(tags);
450         return NULL;
451 }
452 
453 struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
454                                      unsigned int reserved_tags,
455                                      int node, int alloc_policy)
456 {
457         struct blk_mq_tags *tags;
458 
459         if (total_tags > BLK_MQ_TAG_MAX) {
460                 pr_err("blk-mq: tag depth too large\n");
461                 return NULL;
462         }
463 
464         tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
465         if (!tags)
466                 return NULL;
467 
468         tags->nr_tags = total_tags;
469         tags->nr_reserved_tags = reserved_tags;
470 
471         return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
472 }
473 
474 void blk_mq_free_tags(struct blk_mq_tags *tags)
475 {
476         sbitmap_queue_free(&tags->bitmap_tags);
477         sbitmap_queue_free(&tags->breserved_tags);
478         kfree(tags);
479 }
480 
481 int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
482                             struct blk_mq_tags **tagsptr, unsigned int tdepth,
483                             bool can_grow)
484 {
485         struct blk_mq_tags *tags = *tagsptr;
486 
487         if (tdepth <= tags->nr_reserved_tags)
488                 return -EINVAL;
489 
490         /*
491          * If we are allowed to grow beyond the original size, allocate
492          * a new set of tags before freeing the old one.
493          */
494         if (tdepth > tags->nr_tags) {
495                 struct blk_mq_tag_set *set = hctx->queue->tag_set;
496                 struct blk_mq_tags *new;
497                 bool ret;
498 
499                 if (!can_grow)
500                         return -EINVAL;
501 
502                 /*
503                  * We need some sort of upper limit, set it high enough that
504                  * no valid use cases should require more.
505                  */
506                 if (tdepth > 16 * BLKDEV_MAX_RQ)
507                         return -EINVAL;
508 
509                 new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth,
510                                 tags->nr_reserved_tags);
511                 if (!new)
512                         return -ENOMEM;
513                 ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
514                 if (ret) {
515                         blk_mq_free_rq_map(new);
516                         return -ENOMEM;
517                 }
518 
519                 blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
520                 blk_mq_free_rq_map(*tagsptr);
521                 *tagsptr = new;
522         } else {
523                 /*
524                  * Don't need (or can't) update reserved tags here, they
525                  * remain static and should never need resizing.
526                  */
527                 sbitmap_queue_resize(&tags->bitmap_tags,
528                                 tdepth - tags->nr_reserved_tags);
529         }
530 
531         return 0;
532 }
533 
534 /**
535  * blk_mq_unique_tag() - return a tag that is unique queue-wide
536  * @rq: request for which to compute a unique tag
537  *
538  * The tag field in struct request is unique per hardware queue but not over
539  * all hardware queues. Hence this function that returns a tag with the
540  * hardware context index in the upper bits and the per hardware queue tag in
541  * the lower bits.
542  *
543  * Note: When called for a request that is queued on a non-multiqueue request
544  * queue, the hardware context index is set to zero.
545  */
546 u32 blk_mq_unique_tag(struct request *rq)
547 {
548         return (rq->mq_hctx->queue_num << BLK_MQ_UNIQUE_TAG_BITS) |
549                 (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
550 }
551 EXPORT_SYMBOL(blk_mq_unique_tag);
552 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp