~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/rds/ib_rdma.c

Version: ~ [ linux-5.9.1 ] ~ [ linux-5.8.16 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.72 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.152 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.202 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.240 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.240 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (c) 2006 Oracle.  All rights reserved.
  3  *
  4  * This software is available to you under a choice of one of two
  5  * licenses.  You may choose to be licensed under the terms of the GNU
  6  * General Public License (GPL) Version 2, available from the file
  7  * COPYING in the main directory of this source tree, or the
  8  * OpenIB.org BSD license below:
  9  *
 10  *     Redistribution and use in source and binary forms, with or
 11  *     without modification, are permitted provided that the following
 12  *     conditions are met:
 13  *
 14  *      - Redistributions of source code must retain the above
 15  *        copyright notice, this list of conditions and the following
 16  *        disclaimer.
 17  *
 18  *      - Redistributions in binary form must reproduce the above
 19  *        copyright notice, this list of conditions and the following
 20  *        disclaimer in the documentation and/or other materials
 21  *        provided with the distribution.
 22  *
 23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 30  * SOFTWARE.
 31  *
 32  */
 33 #include <linux/kernel.h>
 34 #include <linux/slab.h>
 35 #include <linux/rculist.h>
 36 #include <linux/llist.h>
 37 
 38 #include "ib_mr.h"
 39 
 40 struct workqueue_struct *rds_ib_mr_wq;
 41 
 42 static DEFINE_PER_CPU(unsigned long, clean_list_grace);
 43 #define CLEAN_LIST_BUSY_BIT 0
 44 
 45 static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
 46 {
 47         struct rds_ib_device *rds_ibdev;
 48         struct rds_ib_ipaddr *i_ipaddr;
 49 
 50         rcu_read_lock();
 51         list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
 52                 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
 53                         if (i_ipaddr->ipaddr == ipaddr) {
 54                                 atomic_inc(&rds_ibdev->refcount);
 55                                 rcu_read_unlock();
 56                                 return rds_ibdev;
 57                         }
 58                 }
 59         }
 60         rcu_read_unlock();
 61 
 62         return NULL;
 63 }
 64 
 65 static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
 66 {
 67         struct rds_ib_ipaddr *i_ipaddr;
 68 
 69         i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL);
 70         if (!i_ipaddr)
 71                 return -ENOMEM;
 72 
 73         i_ipaddr->ipaddr = ipaddr;
 74 
 75         spin_lock_irq(&rds_ibdev->spinlock);
 76         list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list);
 77         spin_unlock_irq(&rds_ibdev->spinlock);
 78 
 79         return 0;
 80 }
 81 
 82 static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
 83 {
 84         struct rds_ib_ipaddr *i_ipaddr;
 85         struct rds_ib_ipaddr *to_free = NULL;
 86 
 87 
 88         spin_lock_irq(&rds_ibdev->spinlock);
 89         list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
 90                 if (i_ipaddr->ipaddr == ipaddr) {
 91                         list_del_rcu(&i_ipaddr->list);
 92                         to_free = i_ipaddr;
 93                         break;
 94                 }
 95         }
 96         spin_unlock_irq(&rds_ibdev->spinlock);
 97 
 98         if (to_free)
 99                 kfree_rcu(to_free, rcu);
100 }
101 
102 int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
103 {
104         struct rds_ib_device *rds_ibdev_old;
105 
106         rds_ibdev_old = rds_ib_get_device(ipaddr);
107         if (!rds_ibdev_old)
108                 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
109 
110         if (rds_ibdev_old != rds_ibdev) {
111                 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
112                 rds_ib_dev_put(rds_ibdev_old);
113                 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
114         }
115         rds_ib_dev_put(rds_ibdev_old);
116 
117         return 0;
118 }
119 
120 void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
121 {
122         struct rds_ib_connection *ic = conn->c_transport_data;
123 
124         /* conn was previously on the nodev_conns_list */
125         spin_lock_irq(&ib_nodev_conns_lock);
126         BUG_ON(list_empty(&ib_nodev_conns));
127         BUG_ON(list_empty(&ic->ib_node));
128         list_del(&ic->ib_node);
129 
130         spin_lock(&rds_ibdev->spinlock);
131         list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
132         spin_unlock(&rds_ibdev->spinlock);
133         spin_unlock_irq(&ib_nodev_conns_lock);
134 
135         ic->rds_ibdev = rds_ibdev;
136         atomic_inc(&rds_ibdev->refcount);
137 }
138 
139 void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
140 {
141         struct rds_ib_connection *ic = conn->c_transport_data;
142 
143         /* place conn on nodev_conns_list */
144         spin_lock(&ib_nodev_conns_lock);
145 
146         spin_lock_irq(&rds_ibdev->spinlock);
147         BUG_ON(list_empty(&ic->ib_node));
148         list_del(&ic->ib_node);
149         spin_unlock_irq(&rds_ibdev->spinlock);
150 
151         list_add_tail(&ic->ib_node, &ib_nodev_conns);
152 
153         spin_unlock(&ib_nodev_conns_lock);
154 
155         ic->rds_ibdev = NULL;
156         rds_ib_dev_put(rds_ibdev);
157 }
158 
159 void rds_ib_destroy_nodev_conns(void)
160 {
161         struct rds_ib_connection *ic, *_ic;
162         LIST_HEAD(tmp_list);
163 
164         /* avoid calling conn_destroy with irqs off */
165         spin_lock_irq(&ib_nodev_conns_lock);
166         list_splice(&ib_nodev_conns, &tmp_list);
167         spin_unlock_irq(&ib_nodev_conns_lock);
168 
169         list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node)
170                 rds_conn_destroy(ic->conn);
171 }
172 
173 void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo)
174 {
175         struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
176 
177         iinfo->rdma_mr_max = pool_1m->max_items;
178         iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages;
179 }
180 
181 struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
182 {
183         struct rds_ib_mr *ibmr = NULL;
184         struct llist_node *ret;
185         unsigned long *flag;
186 
187         preempt_disable();
188         flag = this_cpu_ptr(&clean_list_grace);
189         set_bit(CLEAN_LIST_BUSY_BIT, flag);
190         ret = llist_del_first(&pool->clean_list);
191         if (ret) {
192                 ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
193                 if (pool->pool_type == RDS_IB_MR_8K_POOL)
194                         rds_ib_stats_inc(s_ib_rdma_mr_8k_reused);
195                 else
196                         rds_ib_stats_inc(s_ib_rdma_mr_1m_reused);
197         }
198 
199         clear_bit(CLEAN_LIST_BUSY_BIT, flag);
200         preempt_enable();
201         return ibmr;
202 }
203 
204 static inline void wait_clean_list_grace(void)
205 {
206         int cpu;
207         unsigned long *flag;
208 
209         for_each_online_cpu(cpu) {
210                 flag = &per_cpu(clean_list_grace, cpu);
211                 while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
212                         cpu_relax();
213         }
214 }
215 
216 void rds_ib_sync_mr(void *trans_private, int direction)
217 {
218         struct rds_ib_mr *ibmr = trans_private;
219         struct rds_ib_device *rds_ibdev = ibmr->device;
220 
221         switch (direction) {
222         case DMA_FROM_DEVICE:
223                 ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg,
224                         ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
225                 break;
226         case DMA_TO_DEVICE:
227                 ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg,
228                         ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
229                 break;
230         }
231 }
232 
233 void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
234 {
235         struct rds_ib_device *rds_ibdev = ibmr->device;
236 
237         if (ibmr->sg_dma_len) {
238                 ib_dma_unmap_sg(rds_ibdev->dev,
239                                 ibmr->sg, ibmr->sg_len,
240                                 DMA_BIDIRECTIONAL);
241                 ibmr->sg_dma_len = 0;
242         }
243 
244         /* Release the s/g list */
245         if (ibmr->sg_len) {
246                 unsigned int i;
247 
248                 for (i = 0; i < ibmr->sg_len; ++i) {
249                         struct page *page = sg_page(&ibmr->sg[i]);
250 
251                         /* FIXME we need a way to tell a r/w MR
252                          * from a r/o MR */
253                         WARN_ON(!page->mapping && irqs_disabled());
254                         set_page_dirty(page);
255                         put_page(page);
256                 }
257                 kfree(ibmr->sg);
258 
259                 ibmr->sg = NULL;
260                 ibmr->sg_len = 0;
261         }
262 }
263 
264 void rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
265 {
266         unsigned int pinned = ibmr->sg_len;
267 
268         __rds_ib_teardown_mr(ibmr);
269         if (pinned) {
270                 struct rds_ib_mr_pool *pool = ibmr->pool;
271 
272                 atomic_sub(pinned, &pool->free_pinned);
273         }
274 }
275 
276 static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all)
277 {
278         unsigned int item_count;
279 
280         item_count = atomic_read(&pool->item_count);
281         if (free_all)
282                 return item_count;
283 
284         return 0;
285 }
286 
287 /*
288  * given an llist of mrs, put them all into the list_head for more processing
289  */
290 static unsigned int llist_append_to_list(struct llist_head *llist,
291                                          struct list_head *list)
292 {
293         struct rds_ib_mr *ibmr;
294         struct llist_node *node;
295         struct llist_node *next;
296         unsigned int count = 0;
297 
298         node = llist_del_all(llist);
299         while (node) {
300                 next = node->next;
301                 ibmr = llist_entry(node, struct rds_ib_mr, llnode);
302                 list_add_tail(&ibmr->unmap_list, list);
303                 node = next;
304                 count++;
305         }
306         return count;
307 }
308 
309 /*
310  * this takes a list head of mrs and turns it into linked llist nodes
311  * of clusters.  Each cluster has linked llist nodes of
312  * MR_CLUSTER_SIZE mrs that are ready for reuse.
313  */
314 static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
315                                 struct list_head *list,
316                                 struct llist_node **nodes_head,
317                                 struct llist_node **nodes_tail)
318 {
319         struct rds_ib_mr *ibmr;
320         struct llist_node *cur = NULL;
321         struct llist_node **next = nodes_head;
322 
323         list_for_each_entry(ibmr, list, unmap_list) {
324                 cur = &ibmr->llnode;
325                 *next = cur;
326                 next = &cur->next;
327         }
328         *next = NULL;
329         *nodes_tail = cur;
330 }
331 
332 /*
333  * Flush our pool of MRs.
334  * At a minimum, all currently unused MRs are unmapped.
335  * If the number of MRs allocated exceeds the limit, we also try
336  * to free as many MRs as needed to get back to this limit.
337  */
338 int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
339                          int free_all, struct rds_ib_mr **ibmr_ret)
340 {
341         struct rds_ib_mr *ibmr;
342         struct llist_node *clean_nodes;
343         struct llist_node *clean_tail;
344         LIST_HEAD(unmap_list);
345         unsigned long unpinned = 0;
346         unsigned int nfreed = 0, dirty_to_clean = 0, free_goal;
347 
348         if (pool->pool_type == RDS_IB_MR_8K_POOL)
349                 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush);
350         else
351                 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_flush);
352 
353         if (ibmr_ret) {
354                 DEFINE_WAIT(wait);
355                 while (!mutex_trylock(&pool->flush_lock)) {
356                         ibmr = rds_ib_reuse_mr(pool);
357                         if (ibmr) {
358                                 *ibmr_ret = ibmr;
359                                 finish_wait(&pool->flush_wait, &wait);
360                                 goto out_nolock;
361                         }
362 
363                         prepare_to_wait(&pool->flush_wait, &wait,
364                                         TASK_UNINTERRUPTIBLE);
365                         if (llist_empty(&pool->clean_list))
366                                 schedule();
367 
368                         ibmr = rds_ib_reuse_mr(pool);
369                         if (ibmr) {
370                                 *ibmr_ret = ibmr;
371                                 finish_wait(&pool->flush_wait, &wait);
372                                 goto out_nolock;
373                         }
374                 }
375                 finish_wait(&pool->flush_wait, &wait);
376         } else
377                 mutex_lock(&pool->flush_lock);
378 
379         if (ibmr_ret) {
380                 ibmr = rds_ib_reuse_mr(pool);
381                 if (ibmr) {
382                         *ibmr_ret = ibmr;
383                         goto out;
384                 }
385         }
386 
387         /* Get the list of all MRs to be dropped. Ordering matters -
388          * we want to put drop_list ahead of free_list.
389          */
390         dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list);
391         dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list);
392         if (free_all)
393                 llist_append_to_list(&pool->clean_list, &unmap_list);
394 
395         free_goal = rds_ib_flush_goal(pool, free_all);
396 
397         if (list_empty(&unmap_list))
398                 goto out;
399 
400         if (pool->use_fastreg)
401                 rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal);
402         else
403                 rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal);
404 
405         if (!list_empty(&unmap_list)) {
406                 /* we have to make sure that none of the things we're about
407                  * to put on the clean list would race with other cpus trying
408                  * to pull items off.  The llist would explode if we managed to
409                  * remove something from the clean list and then add it back again
410                  * while another CPU was spinning on that same item in llist_del_first.
411                  *
412                  * This is pretty unlikely, but just in case  wait for an llist grace period
413                  * here before adding anything back into the clean list.
414                  */
415                 wait_clean_list_grace();
416 
417                 list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail);
418                 if (ibmr_ret)
419                         *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
420 
421                 /* more than one entry in llist nodes */
422                 if (clean_nodes->next)
423                         llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list);
424 
425         }
426 
427         atomic_sub(unpinned, &pool->free_pinned);
428         atomic_sub(dirty_to_clean, &pool->dirty_count);
429         atomic_sub(nfreed, &pool->item_count);
430 
431 out:
432         mutex_unlock(&pool->flush_lock);
433         if (waitqueue_active(&pool->flush_wait))
434                 wake_up(&pool->flush_wait);
435 out_nolock:
436         return 0;
437 }
438 
439 struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
440 {
441         struct rds_ib_mr *ibmr = NULL;
442         int iter = 0;
443 
444         if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10)
445                 queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
446 
447         while (1) {
448                 ibmr = rds_ib_reuse_mr(pool);
449                 if (ibmr)
450                         return ibmr;
451 
452                 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
453                         break;
454 
455                 atomic_dec(&pool->item_count);
456 
457                 if (++iter > 2) {
458                         if (pool->pool_type == RDS_IB_MR_8K_POOL)
459                                 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
460                         else
461                                 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
462                         return ERR_PTR(-EAGAIN);
463                 }
464 
465                 /* We do have some empty MRs. Flush them out. */
466                 if (pool->pool_type == RDS_IB_MR_8K_POOL)
467                         rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
468                 else
469                         rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
470 
471                 rds_ib_flush_mr_pool(pool, 0, &ibmr);
472                 if (ibmr)
473                         return ibmr;
474         }
475 
476         return ibmr;
477 }
478 
479 static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
480 {
481         struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
482 
483         rds_ib_flush_mr_pool(pool, 0, NULL);
484 }
485 
486 void rds_ib_free_mr(void *trans_private, int invalidate)
487 {
488         struct rds_ib_mr *ibmr = trans_private;
489         struct rds_ib_mr_pool *pool = ibmr->pool;
490         struct rds_ib_device *rds_ibdev = ibmr->device;
491 
492         rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
493 
494         /* Return it to the pool's free list */
495         if (rds_ibdev->use_fastreg)
496                 rds_ib_free_frmr_list(ibmr);
497         else
498                 rds_ib_free_fmr_list(ibmr);
499 
500         atomic_add(ibmr->sg_len, &pool->free_pinned);
501         atomic_inc(&pool->dirty_count);
502 
503         /* If we've pinned too many pages, request a flush */
504         if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
505             atomic_read(&pool->dirty_count) >= pool->max_items / 5)
506                 queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
507 
508         if (invalidate) {
509                 if (likely(!in_interrupt())) {
510                         rds_ib_flush_mr_pool(pool, 0, NULL);
511                 } else {
512                         /* We get here if the user created a MR marked
513                          * as use_once and invalidate at the same time.
514                          */
515                         queue_delayed_work(rds_ib_mr_wq,
516                                            &pool->flush_worker, 10);
517                 }
518         }
519 
520         rds_ib_dev_put(rds_ibdev);
521 }
522 
523 void rds_ib_flush_mrs(void)
524 {
525         struct rds_ib_device *rds_ibdev;
526 
527         down_read(&rds_ib_devices_lock);
528         list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
529                 if (rds_ibdev->mr_8k_pool)
530                         rds_ib_flush_mr_pool(rds_ibdev->mr_8k_pool, 0, NULL);
531 
532                 if (rds_ibdev->mr_1m_pool)
533                         rds_ib_flush_mr_pool(rds_ibdev->mr_1m_pool, 0, NULL);
534         }
535         up_read(&rds_ib_devices_lock);
536 }
537 
538 void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
539                     struct rds_sock *rs, u32 *key_ret)
540 {
541         struct rds_ib_device *rds_ibdev;
542         struct rds_ib_mr *ibmr = NULL;
543         struct rds_ib_connection *ic = rs->rs_conn->c_transport_data;
544         int ret;
545 
546         rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
547         if (!rds_ibdev) {
548                 ret = -ENODEV;
549                 goto out;
550         }
551 
552         if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) {
553                 ret = -ENODEV;
554                 goto out;
555         }
556 
557         if (rds_ibdev->use_fastreg)
558                 ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret);
559         else
560                 ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret);
561         if (ibmr)
562                 rds_ibdev = NULL;
563 
564  out:
565         if (!ibmr)
566                 pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret);
567 
568         if (rds_ibdev)
569                 rds_ib_dev_put(rds_ibdev);
570 
571         return ibmr;
572 }
573 
574 void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
575 {
576         cancel_delayed_work_sync(&pool->flush_worker);
577         rds_ib_flush_mr_pool(pool, 1, NULL);
578         WARN_ON(atomic_read(&pool->item_count));
579         WARN_ON(atomic_read(&pool->free_pinned));
580         kfree(pool);
581 }
582 
583 struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
584                                              int pool_type)
585 {
586         struct rds_ib_mr_pool *pool;
587 
588         pool = kzalloc(sizeof(*pool), GFP_KERNEL);
589         if (!pool)
590                 return ERR_PTR(-ENOMEM);
591 
592         pool->pool_type = pool_type;
593         init_llist_head(&pool->free_list);
594         init_llist_head(&pool->drop_list);
595         init_llist_head(&pool->clean_list);
596         mutex_init(&pool->flush_lock);
597         init_waitqueue_head(&pool->flush_wait);
598         INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
599 
600         if (pool_type == RDS_IB_MR_1M_POOL) {
601                 /* +1 allows for unaligned MRs */
602                 pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1;
603                 pool->max_items = RDS_MR_1M_POOL_SIZE;
604         } else {
605                 /* pool_type == RDS_IB_MR_8K_POOL */
606                 pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1;
607                 pool->max_items = RDS_MR_8K_POOL_SIZE;
608         }
609 
610         pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
611         pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
612         pool->fmr_attr.page_shift = PAGE_SHIFT;
613         pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4;
614         pool->use_fastreg = rds_ibdev->use_fastreg;
615 
616         return pool;
617 }
618 
619 int rds_ib_mr_init(void)
620 {
621         rds_ib_mr_wq = create_workqueue("rds_mr_flushd");
622         if (!rds_ib_mr_wq)
623                 return -ENOMEM;
624         return 0;
625 }
626 
627 /* By the time this is called all the IB devices should have been torn down and
628  * had their pools freed.  As each pool is freed its work struct is waited on,
629  * so the pool flushing work queue should be idle by the time we get here.
630  */
631 void rds_ib_mr_exit(void)
632 {
633         destroy_workqueue(rds_ib_mr_wq);
634 }
635 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp