~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/sunrpc/xprtrdma/fmr_ops.c

Version: ~ [ linux-5.15-rc5 ] ~ [ linux-5.14.11 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.72 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.152 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.210 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.250 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.286 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.288 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (c) 2015 Oracle.  All rights reserved.
  3  * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  4  */
  5 
  6 /* Lightweight memory registration using Fast Memory Regions (FMR).
  7  * Referred to sometimes as MTHCAFMR mode.
  8  *
  9  * FMR uses synchronous memory registration and deregistration.
 10  * FMR registration is known to be fast, but FMR deregistration
 11  * can take tens of usecs to complete.
 12  */
 13 
 14 /* Normal operation
 15  *
 16  * A Memory Region is prepared for RDMA READ or WRITE using the
 17  * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
 18  * finished, the Memory Region is unmapped using the ib_unmap_fmr
 19  * verb (fmr_op_unmap).
 20  */
 21 
 22 /* Transport recovery
 23  *
 24  * After a transport reconnect, fmr_op_map re-uses the MR already
 25  * allocated for the RPC, but generates a fresh rkey then maps the
 26  * MR again. This process is synchronous.
 27  */
 28 
 29 #include "xprt_rdma.h"
 30 
 31 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 32 # define RPCDBG_FACILITY        RPCDBG_TRANS
 33 #endif
 34 
 35 /* Maximum scatter/gather per FMR */
 36 #define RPCRDMA_MAX_FMR_SGES    (64)
 37 
 38 static struct workqueue_struct *fmr_recovery_wq;
 39 
 40 #define FMR_RECOVERY_WQ_FLAGS           (WQ_UNBOUND)
 41 
 42 int
 43 fmr_alloc_recovery_wq(void)
 44 {
 45         fmr_recovery_wq = alloc_workqueue("fmr_recovery", WQ_UNBOUND, 0);
 46         return !fmr_recovery_wq ? -ENOMEM : 0;
 47 }
 48 
 49 void
 50 fmr_destroy_recovery_wq(void)
 51 {
 52         struct workqueue_struct *wq;
 53 
 54         if (!fmr_recovery_wq)
 55                 return;
 56 
 57         wq = fmr_recovery_wq;
 58         fmr_recovery_wq = NULL;
 59         destroy_workqueue(wq);
 60 }
 61 
 62 static int
 63 __fmr_unmap(struct rpcrdma_mw *mw)
 64 {
 65         LIST_HEAD(l);
 66         int rc;
 67 
 68         list_add(&mw->fmr.fmr->list, &l);
 69         rc = ib_unmap_fmr(&l);
 70         list_del_init(&mw->fmr.fmr->list);
 71         return rc;
 72 }
 73 
 74 /* Deferred reset of a single FMR. Generate a fresh rkey by
 75  * replacing the MR. There's no recovery if this fails.
 76  */
 77 static void
 78 __fmr_recovery_worker(struct work_struct *work)
 79 {
 80         struct rpcrdma_mw *mw = container_of(work, struct rpcrdma_mw,
 81                                             mw_work);
 82         struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
 83 
 84         __fmr_unmap(mw);
 85         rpcrdma_put_mw(r_xprt, mw);
 86         return;
 87 }
 88 
 89 /* A broken MR was discovered in a context that can't sleep.
 90  * Defer recovery to the recovery worker.
 91  */
 92 static void
 93 __fmr_queue_recovery(struct rpcrdma_mw *mw)
 94 {
 95         INIT_WORK(&mw->mw_work, __fmr_recovery_worker);
 96         queue_work(fmr_recovery_wq, &mw->mw_work);
 97 }
 98 
 99 static int
100 fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
101             struct rpcrdma_create_data_internal *cdata)
102 {
103         rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
104                                                       RPCRDMA_MAX_DATA_SEGS /
105                                                       RPCRDMA_MAX_FMR_SGES));
106         return 0;
107 }
108 
109 /* FMR mode conveys up to 64 pages of payload per chunk segment.
110  */
111 static size_t
112 fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
113 {
114         return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
115                      RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
116 }
117 
118 static int
119 fmr_op_init(struct rpcrdma_xprt *r_xprt)
120 {
121         struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
122         int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
123         struct ib_fmr_attr fmr_attr = {
124                 .max_pages      = RPCRDMA_MAX_FMR_SGES,
125                 .max_maps       = 1,
126                 .page_shift     = PAGE_SHIFT
127         };
128         struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
129         struct rpcrdma_mw *r;
130         int i, rc;
131 
132         spin_lock_init(&buf->rb_mwlock);
133         INIT_LIST_HEAD(&buf->rb_mws);
134         INIT_LIST_HEAD(&buf->rb_all);
135 
136         i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
137         i += 2;                         /* head + tail */
138         i *= buf->rb_max_requests;      /* one set for each RPC slot */
139         dprintk("RPC:       %s: initalizing %d FMRs\n", __func__, i);
140 
141         rc = -ENOMEM;
142         while (i--) {
143                 r = kzalloc(sizeof(*r), GFP_KERNEL);
144                 if (!r)
145                         goto out;
146 
147                 r->fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
148                                            sizeof(u64), GFP_KERNEL);
149                 if (!r->fmr.physaddrs)
150                         goto out_free;
151 
152                 r->fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
153                 if (IS_ERR(r->fmr.fmr))
154                         goto out_fmr_err;
155 
156                 r->mw_xprt = r_xprt;
157                 list_add(&r->mw_list, &buf->rb_mws);
158                 list_add(&r->mw_all, &buf->rb_all);
159         }
160         return 0;
161 
162 out_fmr_err:
163         rc = PTR_ERR(r->fmr.fmr);
164         dprintk("RPC:       %s: ib_alloc_fmr status %i\n", __func__, rc);
165         kfree(r->fmr.physaddrs);
166 out_free:
167         kfree(r);
168 out:
169         return rc;
170 }
171 
172 /* Use the ib_map_phys_fmr() verb to register a memory region
173  * for remote access via RDMA READ or RDMA WRITE.
174  */
175 static int
176 fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
177            int nsegs, bool writing)
178 {
179         struct rpcrdma_ia *ia = &r_xprt->rx_ia;
180         struct ib_device *device = ia->ri_device;
181         enum dma_data_direction direction = rpcrdma_data_dir(writing);
182         struct rpcrdma_mr_seg *seg1 = seg;
183         int len, pageoff, i, rc;
184         struct rpcrdma_mw *mw;
185 
186         mw = seg1->rl_mw;
187         seg1->rl_mw = NULL;
188         if (!mw) {
189                 mw = rpcrdma_get_mw(r_xprt);
190                 if (!mw)
191                         return -ENOMEM;
192         } else {
193                 /* this is a retransmit; generate a fresh rkey */
194                 rc = __fmr_unmap(mw);
195                 if (rc)
196                         return rc;
197         }
198 
199         pageoff = offset_in_page(seg1->mr_offset);
200         seg1->mr_offset -= pageoff;     /* start of page */
201         seg1->mr_len += pageoff;
202         len = -pageoff;
203         if (nsegs > RPCRDMA_MAX_FMR_SGES)
204                 nsegs = RPCRDMA_MAX_FMR_SGES;
205         for (i = 0; i < nsegs;) {
206                 rpcrdma_map_one(device, seg, direction);
207                 mw->fmr.physaddrs[i] = seg->mr_dma;
208                 len += seg->mr_len;
209                 ++seg;
210                 ++i;
211                 /* Check for holes */
212                 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
213                     offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
214                         break;
215         }
216 
217         rc = ib_map_phys_fmr(mw->fmr.fmr, mw->fmr.physaddrs,
218                              i, seg1->mr_dma);
219         if (rc)
220                 goto out_maperr;
221 
222         seg1->rl_mw = mw;
223         seg1->mr_rkey = mw->fmr.fmr->rkey;
224         seg1->mr_base = seg1->mr_dma + pageoff;
225         seg1->mr_nsegs = i;
226         seg1->mr_len = len;
227         return i;
228 
229 out_maperr:
230         dprintk("RPC:       %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
231                 __func__, len, (unsigned long long)seg1->mr_dma,
232                 pageoff, i, rc);
233         while (i--)
234                 rpcrdma_unmap_one(device, --seg);
235         return rc;
236 }
237 
238 static void
239 __fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
240 {
241         struct ib_device *device = r_xprt->rx_ia.ri_device;
242         int nsegs = seg->mr_nsegs;
243 
244         while (nsegs--)
245                 rpcrdma_unmap_one(device, seg++);
246 }
247 
248 /* Invalidate all memory regions that were registered for "req".
249  *
250  * Sleeps until it is safe for the host CPU to access the
251  * previously mapped memory regions.
252  */
253 static void
254 fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
255 {
256         struct rpcrdma_mr_seg *seg;
257         unsigned int i, nchunks;
258         struct rpcrdma_mw *mw;
259         LIST_HEAD(unmap_list);
260         int rc;
261 
262         dprintk("RPC:       %s: req %p\n", __func__, req);
263 
264         /* ORDER: Invalidate all of the req's MRs first
265          *
266          * ib_unmap_fmr() is slow, so use a single call instead
267          * of one call per mapped MR.
268          */
269         for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
270                 seg = &req->rl_segments[i];
271                 mw = seg->rl_mw;
272 
273                 list_add_tail(&mw->fmr.fmr->list, &unmap_list);
274 
275                 i += seg->mr_nsegs;
276         }
277         rc = ib_unmap_fmr(&unmap_list);
278         if (rc)
279                 pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc);
280 
281         /* ORDER: Now DMA unmap all of the req's MRs, and return
282          * them to the free MW list.
283          */
284         for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
285                 seg = &req->rl_segments[i];
286                 mw = seg->rl_mw;
287 
288                 list_del_init(&mw->fmr.fmr->list);
289                 __fmr_dma_unmap(r_xprt, seg);
290                 rpcrdma_put_mw(r_xprt, seg->rl_mw);
291 
292                 i += seg->mr_nsegs;
293                 seg->mr_nsegs = 0;
294                 seg->rl_mw = NULL;
295         }
296 
297         req->rl_nchunks = 0;
298 }
299 
300 /* Use a slow, safe mechanism to invalidate all memory regions
301  * that were registered for "req".
302  *
303  * In the asynchronous case, DMA unmapping occurs first here
304  * because the rpcrdma_mr_seg is released immediately after this
305  * call. It's contents won't be available in __fmr_dma_unmap later.
306  * FIXME.
307  */
308 static void
309 fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
310                   bool sync)
311 {
312         struct rpcrdma_mr_seg *seg;
313         struct rpcrdma_mw *mw;
314         unsigned int i;
315 
316         for (i = 0; req->rl_nchunks; req->rl_nchunks--) {
317                 seg = &req->rl_segments[i];
318                 mw = seg->rl_mw;
319 
320                 if (sync) {
321                         /* ORDER */
322                         __fmr_unmap(mw);
323                         __fmr_dma_unmap(r_xprt, seg);
324                         rpcrdma_put_mw(r_xprt, mw);
325                 } else {
326                         __fmr_dma_unmap(r_xprt, seg);
327                         __fmr_queue_recovery(mw);
328                 }
329 
330                 i += seg->mr_nsegs;
331                 seg->mr_nsegs = 0;
332                 seg->rl_mw = NULL;
333         }
334 }
335 
336 static void
337 fmr_op_destroy(struct rpcrdma_buffer *buf)
338 {
339         struct rpcrdma_mw *r;
340         int rc;
341 
342         while (!list_empty(&buf->rb_all)) {
343                 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
344                 list_del(&r->mw_all);
345                 kfree(r->fmr.physaddrs);
346 
347                 rc = ib_dealloc_fmr(r->fmr.fmr);
348                 if (rc)
349                         dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
350                                 __func__, rc);
351 
352                 kfree(r);
353         }
354 }
355 
356 const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
357         .ro_map                         = fmr_op_map,
358         .ro_unmap_sync                  = fmr_op_unmap_sync,
359         .ro_unmap_safe                  = fmr_op_unmap_safe,
360         .ro_open                        = fmr_op_open,
361         .ro_maxpages                    = fmr_op_maxpages,
362         .ro_init                        = fmr_op_init,
363         .ro_destroy                     = fmr_op_destroy,
364         .ro_displayname                 = "fmr",
365 };
366 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp