~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/sunrpc/xprtrdma/frwr_ops.c

Version: ~ [ linux-5.15-rc5 ] ~ [ linux-5.14.11 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.72 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.152 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.210 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.250 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.286 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.288 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (c) 2015 Oracle.  All rights reserved.
  3  * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  4  */
  5 
  6 /* Lightweight memory registration using Fast Registration Work
  7  * Requests (FRWR). Also referred to sometimes as FRMR mode.
  8  *
  9  * FRWR features ordered asynchronous registration and deregistration
 10  * of arbitrarily sized memory regions. This is the fastest and safest
 11  * but most complex memory registration mode.
 12  */
 13 
 14 /* Normal operation
 15  *
 16  * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
 17  * Work Request (frmr_op_map). When the RDMA operation is finished, this
 18  * Memory Region is invalidated using a LOCAL_INV Work Request
 19  * (frmr_op_unmap).
 20  *
 21  * Typically these Work Requests are not signaled, and neither are RDMA
 22  * SEND Work Requests (with the exception of signaling occasionally to
 23  * prevent provider work queue overflows). This greatly reduces HCA
 24  * interrupt workload.
 25  *
 26  * As an optimization, frwr_op_unmap marks MRs INVALID before the
 27  * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
 28  * rb_mws immediately so that no work (like managing a linked list
 29  * under a spinlock) is needed in the completion upcall.
 30  *
 31  * But this means that frwr_op_map() can occasionally encounter an MR
 32  * that is INVALID but the LOCAL_INV WR has not completed. Work Queue
 33  * ordering prevents a subsequent FAST_REG WR from executing against
 34  * that MR while it is still being invalidated.
 35  */
 36 
 37 /* Transport recovery
 38  *
 39  * ->op_map and the transport connect worker cannot run at the same
 40  * time, but ->op_unmap can fire while the transport connect worker
 41  * is running. Thus MR recovery is handled in ->op_map, to guarantee
 42  * that recovered MRs are owned by a sending RPC, and not one where
 43  * ->op_unmap could fire at the same time transport reconnect is
 44  * being done.
 45  *
 46  * When the underlying transport disconnects, MRs are left in one of
 47  * three states:
 48  *
 49  * INVALID:     The MR was not in use before the QP entered ERROR state.
 50  *              (Or, the LOCAL_INV WR has not completed or flushed yet).
 51  *
 52  * STALE:       The MR was being registered or unregistered when the QP
 53  *              entered ERROR state, and the pending WR was flushed.
 54  *
 55  * VALID:       The MR was registered before the QP entered ERROR state.
 56  *
 57  * When frwr_op_map encounters STALE and VALID MRs, they are recovered
 58  * with ib_dereg_mr and then are re-initialized. Beause MR recovery
 59  * allocates fresh resources, it is deferred to a workqueue, and the
 60  * recovered MRs are placed back on the rb_mws list when recovery is
 61  * complete. frwr_op_map allocates another MR for the current RPC while
 62  * the broken MR is reset.
 63  *
 64  * To ensure that frwr_op_map doesn't encounter an MR that is marked
 65  * INVALID but that is about to be flushed due to a previous transport
 66  * disconnect, the transport connect worker attempts to drain all
 67  * pending send queue WRs before the transport is reconnected.
 68  */
 69 
 70 #include "xprt_rdma.h"
 71 
 72 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 73 # define RPCDBG_FACILITY        RPCDBG_TRANS
 74 #endif
 75 
 76 static struct workqueue_struct *frwr_recovery_wq;
 77 
 78 #define FRWR_RECOVERY_WQ_FLAGS          (WQ_UNBOUND | WQ_MEM_RECLAIM)
 79 
 80 int
 81 frwr_alloc_recovery_wq(void)
 82 {
 83         frwr_recovery_wq = alloc_workqueue("frwr_recovery",
 84                                            FRWR_RECOVERY_WQ_FLAGS, 0);
 85         return !frwr_recovery_wq ? -ENOMEM : 0;
 86 }
 87 
 88 void
 89 frwr_destroy_recovery_wq(void)
 90 {
 91         struct workqueue_struct *wq;
 92 
 93         if (!frwr_recovery_wq)
 94                 return;
 95 
 96         wq = frwr_recovery_wq;
 97         frwr_recovery_wq = NULL;
 98         destroy_workqueue(wq);
 99 }
100 
101 static int
102 __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
103 {
104         struct rpcrdma_frmr *f = &r->frmr;
105         int rc;
106 
107         rc = ib_dereg_mr(f->fr_mr);
108         if (rc) {
109                 pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
110                         rc, r);
111                 return rc;
112         }
113 
114         f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG,
115                                ia->ri_max_frmr_depth);
116         if (IS_ERR(f->fr_mr)) {
117                 pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
118                         PTR_ERR(f->fr_mr), r);
119                 return PTR_ERR(f->fr_mr);
120         }
121 
122         dprintk("RPC:       %s: recovered FRMR %p\n", __func__, r);
123         f->fr_state = FRMR_IS_INVALID;
124         return 0;
125 }
126 
127 static void
128 __frwr_reset_and_unmap(struct rpcrdma_mw *mw)
129 {
130         struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
131         struct rpcrdma_ia *ia = &r_xprt->rx_ia;
132         int rc;
133 
134         rc = __frwr_reset_mr(ia, mw);
135         ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir);
136         if (rc)
137                 return;
138         rpcrdma_put_mw(r_xprt, mw);
139 }
140 
141 /* Deferred reset of a single FRMR. Generate a fresh rkey by
142  * replacing the MR.
143  *
144  * There's no recovery if this fails. The FRMR is abandoned, but
145  * remains in rb_all. It will be cleaned up when the transport is
146  * destroyed.
147  */
148 static void
149 __frwr_recovery_worker(struct work_struct *work)
150 {
151         struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw,
152                                             mw_work);
153 
154         __frwr_reset_and_unmap(r);
155 }
156 
157 /* A broken MR was discovered in a context that can't sleep.
158  * Defer recovery to the recovery worker.
159  */
160 static void
161 __frwr_queue_recovery(struct rpcrdma_mw *r)
162 {
163         INIT_WORK(&r->mw_work, __frwr_recovery_worker);
164         queue_work(frwr_recovery_wq, &r->mw_work);
165 }
166 
167 static int
168 __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, unsigned int depth)
169 {
170         struct rpcrdma_frmr *f = &r->frmr;
171         int rc;
172 
173         f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
174         if (IS_ERR(f->fr_mr))
175                 goto out_mr_err;
176 
177         r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL);
178         if (!r->mw_sg)
179                 goto out_list_err;
180 
181         sg_init_table(r->mw_sg, depth);
182 
183         init_completion(&f->fr_linv_done);
184 
185         return 0;
186 
187 out_mr_err:
188         rc = PTR_ERR(f->fr_mr);
189         dprintk("RPC:       %s: ib_alloc_mr status %i\n",
190                 __func__, rc);
191         return rc;
192 
193 out_list_err:
194         rc = -ENOMEM;
195         dprintk("RPC:       %s: sg allocation failure\n",
196                 __func__);
197         ib_dereg_mr(f->fr_mr);
198         return rc;
199 }
200 
201 static void
202 __frwr_release(struct rpcrdma_mw *r)
203 {
204         int rc;
205 
206         rc = ib_dereg_mr(r->frmr.fr_mr);
207         if (rc)
208                 dprintk("RPC:       %s: ib_dereg_mr status %i\n",
209                         __func__, rc);
210         kfree(r->mw_sg);
211 }
212 
213 static int
214 frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
215              struct rpcrdma_create_data_internal *cdata)
216 {
217         int depth, delta;
218 
219         ia->ri_max_frmr_depth =
220                         min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
221                               ia->ri_device->attrs.max_fast_reg_page_list_len);
222         dprintk("RPC:       %s: device's max FR page list len = %u\n",
223                 __func__, ia->ri_max_frmr_depth);
224 
225         /* Add room for frmr register and invalidate WRs.
226          * 1. FRMR reg WR for head
227          * 2. FRMR invalidate WR for head
228          * 3. N FRMR reg WRs for pagelist
229          * 4. N FRMR invalidate WRs for pagelist
230          * 5. FRMR reg WR for tail
231          * 6. FRMR invalidate WR for tail
232          * 7. The RDMA_SEND WR
233          */
234         depth = 7;
235 
236         /* Calculate N if the device max FRMR depth is smaller than
237          * RPCRDMA_MAX_DATA_SEGS.
238          */
239         if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
240                 delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth;
241                 do {
242                         depth += 2; /* FRMR reg + invalidate */
243                         delta -= ia->ri_max_frmr_depth;
244                 } while (delta > 0);
245         }
246 
247         ep->rep_attr.cap.max_send_wr *= depth;
248         if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
249                 cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
250                 if (!cdata->max_requests)
251                         return -EINVAL;
252                 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
253                                                depth;
254         }
255 
256         rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
257                                                       RPCRDMA_MAX_DATA_SEGS /
258                                                       ia->ri_max_frmr_depth));
259         return 0;
260 }
261 
262 /* FRWR mode conveys a list of pages per chunk segment. The
263  * maximum length of that list is the FRWR page list depth.
264  */
265 static size_t
266 frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
267 {
268         struct rpcrdma_ia *ia = &r_xprt->rx_ia;
269 
270         return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
271                      RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth);
272 }
273 
274 static void
275 __frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr,
276                             const char *wr)
277 {
278         frmr->fr_state = FRMR_IS_STALE;
279         if (wc->status != IB_WC_WR_FLUSH_ERR)
280                 pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
281                        wr, ib_wc_status_msg(wc->status),
282                        wc->status, wc->vendor_err);
283 }
284 
285 /**
286  * frwr_wc_fastreg - Invoked by RDMA provider for each polled FastReg WC
287  * @cq: completion queue (ignored)
288  * @wc: completed WR
289  *
290  */
291 static void
292 frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
293 {
294         struct rpcrdma_frmr *frmr;
295         struct ib_cqe *cqe;
296 
297         /* WARNING: Only wr_cqe and status are reliable at this point */
298         if (wc->status != IB_WC_SUCCESS) {
299                 cqe = wc->wr_cqe;
300                 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
301                 __frwr_sendcompletion_flush(wc, frmr, "fastreg");
302         }
303 }
304 
305 /**
306  * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
307  * @cq: completion queue (ignored)
308  * @wc: completed WR
309  *
310  */
311 static void
312 frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
313 {
314         struct rpcrdma_frmr *frmr;
315         struct ib_cqe *cqe;
316 
317         /* WARNING: Only wr_cqe and status are reliable at this point */
318         if (wc->status != IB_WC_SUCCESS) {
319                 cqe = wc->wr_cqe;
320                 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
321                 __frwr_sendcompletion_flush(wc, frmr, "localinv");
322         }
323 }
324 
325 /**
326  * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
327  * @cq: completion queue (ignored)
328  * @wc: completed WR
329  *
330  * Awaken anyone waiting for an MR to finish being fenced.
331  */
332 static void
333 frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
334 {
335         struct rpcrdma_frmr *frmr;
336         struct ib_cqe *cqe;
337 
338         /* WARNING: Only wr_cqe and status are reliable at this point */
339         cqe = wc->wr_cqe;
340         frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
341         if (wc->status != IB_WC_SUCCESS)
342                 __frwr_sendcompletion_flush(wc, frmr, "localinv");
343         complete_all(&frmr->fr_linv_done);
344 }
345 
346 static int
347 frwr_op_init(struct rpcrdma_xprt *r_xprt)
348 {
349         struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
350         unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
351         struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
352         int i;
353 
354         spin_lock_init(&buf->rb_mwlock);
355         INIT_LIST_HEAD(&buf->rb_mws);
356         INIT_LIST_HEAD(&buf->rb_all);
357 
358         i = max_t(int, RPCRDMA_MAX_DATA_SEGS / depth, 1);
359         i += 2;                         /* head + tail */
360         i *= buf->rb_max_requests;      /* one set for each RPC slot */
361         dprintk("RPC:       %s: initalizing %d FRMRs\n", __func__, i);
362 
363         while (i--) {
364                 struct rpcrdma_mw *r;
365                 int rc;
366 
367                 r = kzalloc(sizeof(*r), GFP_KERNEL);
368                 if (!r)
369                         return -ENOMEM;
370 
371                 rc = __frwr_init(r, pd, depth);
372                 if (rc) {
373                         kfree(r);
374                         return rc;
375                 }
376 
377                 r->mw_xprt = r_xprt;
378                 list_add(&r->mw_list, &buf->rb_mws);
379                 list_add(&r->mw_all, &buf->rb_all);
380         }
381 
382         return 0;
383 }
384 
385 /* Post a REG_MR Work Request to register a memory region
386  * for remote access via RDMA READ or RDMA WRITE.
387  */
388 static int
389 frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
390             int nsegs, bool writing)
391 {
392         struct rpcrdma_ia *ia = &r_xprt->rx_ia;
393         struct rpcrdma_mr_seg *seg1 = seg;
394         struct rpcrdma_mw *mw;
395         struct rpcrdma_frmr *frmr;
396         struct ib_mr *mr;
397         struct ib_reg_wr *reg_wr;
398         struct ib_send_wr *bad_wr;
399         int rc, i, n, dma_nents;
400         u8 key;
401 
402         mw = seg1->rl_mw;
403         seg1->rl_mw = NULL;
404         do {
405                 if (mw)
406                         __frwr_queue_recovery(mw);
407                 mw = rpcrdma_get_mw(r_xprt);
408                 if (!mw)
409                         return -ENOMEM;
410         } while (mw->frmr.fr_state != FRMR_IS_INVALID);
411         frmr = &mw->frmr;
412         frmr->fr_state = FRMR_IS_VALID;
413         mr = frmr->fr_mr;
414         reg_wr = &frmr->fr_regwr;
415 
416         if (nsegs > ia->ri_max_frmr_depth)
417                 nsegs = ia->ri_max_frmr_depth;
418         for (i = 0; i < nsegs;) {
419                 if (seg->mr_page)
420                         sg_set_page(&mw->mw_sg[i],
421                                     seg->mr_page,
422                                     seg->mr_len,
423                                     offset_in_page(seg->mr_offset));
424                 else
425                         sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
426                                    seg->mr_len);
427 
428                 ++seg;
429                 ++i;
430 
431                 /* Check for holes */
432                 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
433                     offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
434                         break;
435         }
436         mw->mw_nents = i;
437         mw->mw_dir = rpcrdma_data_dir(writing);
438 
439         dma_nents = ib_dma_map_sg(ia->ri_device,
440                                   mw->mw_sg, mw->mw_nents, mw->mw_dir);
441         if (!dma_nents)
442                 goto out_dmamap_err;
443 
444         n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE);
445         if (unlikely(n != mw->mw_nents))
446                 goto out_mapmr_err;
447 
448         dprintk("RPC:       %s: Using frmr %p to map %u segments (%u bytes)\n",
449                 __func__, mw, mw->mw_nents, mr->length);
450 
451         key = (u8)(mr->rkey & 0x000000FF);
452         ib_update_fast_reg_key(mr, ++key);
453 
454         reg_wr->wr.next = NULL;
455         reg_wr->wr.opcode = IB_WR_REG_MR;
456         frmr->fr_cqe.done = frwr_wc_fastreg;
457         reg_wr->wr.wr_cqe = &frmr->fr_cqe;
458         reg_wr->wr.num_sge = 0;
459         reg_wr->wr.send_flags = 0;
460         reg_wr->mr = mr;
461         reg_wr->key = mr->rkey;
462         reg_wr->access = writing ?
463                          IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
464                          IB_ACCESS_REMOTE_READ;
465 
466         DECR_CQCOUNT(&r_xprt->rx_ep);
467         rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
468         if (rc)
469                 goto out_senderr;
470 
471         seg1->rl_mw = mw;
472         seg1->mr_rkey = mr->rkey;
473         seg1->mr_base = mr->iova;
474         seg1->mr_nsegs = mw->mw_nents;
475         seg1->mr_len = mr->length;
476 
477         return mw->mw_nents;
478 
479 out_dmamap_err:
480         pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
481                mw->mw_sg, mw->mw_nents);
482         return -ENOMEM;
483 
484 out_mapmr_err:
485         pr_err("rpcrdma: failed to map mr %p (%u/%u)\n",
486                frmr->fr_mr, n, mw->mw_nents);
487         rc = n < 0 ? n : -EIO;
488         __frwr_queue_recovery(mw);
489         return rc;
490 
491 out_senderr:
492         pr_err("rpcrdma: ib_post_send status %i\n", rc);
493         __frwr_queue_recovery(mw);
494         return rc;
495 }
496 
497 static struct ib_send_wr *
498 __frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg)
499 {
500         struct rpcrdma_mw *mw = seg->rl_mw;
501         struct rpcrdma_frmr *f = &mw->frmr;
502         struct ib_send_wr *invalidate_wr;
503 
504         f->fr_state = FRMR_IS_INVALID;
505         invalidate_wr = &f->fr_invwr;
506 
507         memset(invalidate_wr, 0, sizeof(*invalidate_wr));
508         f->fr_cqe.done = frwr_wc_localinv;
509         invalidate_wr->wr_cqe = &f->fr_cqe;
510         invalidate_wr->opcode = IB_WR_LOCAL_INV;
511         invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
512 
513         return invalidate_wr;
514 }
515 
516 /* Invalidate all memory regions that were registered for "req".
517  *
518  * Sleeps until it is safe for the host CPU to access the
519  * previously mapped memory regions.
520  */
521 static void
522 frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
523 {
524         struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
525         struct rpcrdma_ia *ia = &r_xprt->rx_ia;
526         struct rpcrdma_mr_seg *seg;
527         unsigned int i, nchunks;
528         struct rpcrdma_frmr *f;
529         struct rpcrdma_mw *mw;
530         int rc;
531 
532         dprintk("RPC:       %s: req %p\n", __func__, req);
533 
534         /* ORDER: Invalidate all of the req's MRs first
535          *
536          * Chain the LOCAL_INV Work Requests and post them with
537          * a single ib_post_send() call.
538          */
539         invalidate_wrs = pos = prev = NULL;
540         seg = NULL;
541         for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
542                 seg = &req->rl_segments[i];
543 
544                 pos = __frwr_prepare_linv_wr(seg);
545 
546                 if (!invalidate_wrs)
547                         invalidate_wrs = pos;
548                 else
549                         prev->next = pos;
550                 prev = pos;
551 
552                 i += seg->mr_nsegs;
553         }
554         f = &seg->rl_mw->frmr;
555 
556         /* Strong send queue ordering guarantees that when the
557          * last WR in the chain completes, all WRs in the chain
558          * are complete.
559          */
560         f->fr_invwr.send_flags = IB_SEND_SIGNALED;
561         f->fr_cqe.done = frwr_wc_localinv_wake;
562         reinit_completion(&f->fr_linv_done);
563         INIT_CQCOUNT(&r_xprt->rx_ep);
564 
565         /* Transport disconnect drains the receive CQ before it
566          * replaces the QP. The RPC reply handler won't call us
567          * unless ri_id->qp is a valid pointer.
568          */
569         rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
570         if (rc)
571                 goto reset_mrs;
572 
573         wait_for_completion(&f->fr_linv_done);
574 
575         /* ORDER: Now DMA unmap all of the req's MRs, and return
576          * them to the free MW list.
577          */
578 unmap:
579         for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
580                 seg = &req->rl_segments[i];
581                 mw = seg->rl_mw;
582                 seg->rl_mw = NULL;
583 
584                 ib_dma_unmap_sg(ia->ri_device,
585                                 mw->mw_sg, mw->mw_nents, mw->mw_dir);
586                 rpcrdma_put_mw(r_xprt, mw);
587 
588                 i += seg->mr_nsegs;
589                 seg->mr_nsegs = 0;
590         }
591 
592         req->rl_nchunks = 0;
593         return;
594 
595 reset_mrs:
596         pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
597 
598         /* Find and reset the MRs in the LOCAL_INV WRs that did not
599          * get posted. This is synchronous, and slow.
600          */
601         for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
602                 seg = &req->rl_segments[i];
603                 mw = seg->rl_mw;
604                 f = &mw->frmr;
605 
606                 if (mw->frmr.fr_mr->rkey == bad_wr->ex.invalidate_rkey) {
607                         __frwr_reset_mr(ia, mw);
608                         bad_wr = bad_wr->next;
609                 }
610 
611                 i += seg->mr_nsegs;
612         }
613         goto unmap;
614 }
615 
616 /* Use a slow, safe mechanism to invalidate all memory regions
617  * that were registered for "req".
618  */
619 static void
620 frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
621                    bool sync)
622 {
623         struct rpcrdma_mr_seg *seg;
624         struct rpcrdma_mw *mw;
625         unsigned int i;
626 
627         for (i = 0; req->rl_nchunks; req->rl_nchunks--) {
628                 seg = &req->rl_segments[i];
629                 mw = seg->rl_mw;
630 
631                 if (sync)
632                         __frwr_reset_and_unmap(mw);
633                 else
634                         __frwr_queue_recovery(mw);
635 
636                 i += seg->mr_nsegs;
637                 seg->mr_nsegs = 0;
638                 seg->rl_mw = NULL;
639         }
640 }
641 
642 static void
643 frwr_op_destroy(struct rpcrdma_buffer *buf)
644 {
645         struct rpcrdma_mw *r;
646 
647         /* Ensure stale MWs for "buf" are no longer in flight */
648         flush_workqueue(frwr_recovery_wq);
649 
650         while (!list_empty(&buf->rb_all)) {
651                 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
652                 list_del(&r->mw_all);
653                 __frwr_release(r);
654                 kfree(r);
655         }
656 }
657 
658 const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
659         .ro_map                         = frwr_op_map,
660         .ro_unmap_sync                  = frwr_op_unmap_sync,
661         .ro_unmap_safe                  = frwr_op_unmap_safe,
662         .ro_open                        = frwr_op_open,
663         .ro_maxpages                    = frwr_op_maxpages,
664         .ro_init                        = frwr_op_init,
665         .ro_destroy                     = frwr_op_destroy,
666         .ro_displayname                 = "frwr",
667 };
668 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp