1 /* 2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the BSD-type 8 * license below: 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 17 * Redistributions in binary form must reproduce the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer in the documentation and/or other materials provided 20 * with the distribution. 21 * 22 * Neither the name of the Network Appliance, Inc. nor the names of 23 * its contributors may be used to endorse or promote products 24 * derived from this software without specific prior written 25 * permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 * 39 * Author: Tom Tucker <tom@opengridcomputing.com> 40 */ 41 42 #include <linux/sunrpc/xdr.h> 43 #include <linux/sunrpc/debug.h> 44 #include <asm/unaligned.h> 45 #include <linux/sunrpc/rpc_rdma.h> 46 #include <linux/sunrpc/svc_rdma.h> 47 48 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 49 50 /* 51 * Decodes a read chunk list. The expected format is as follows: 52 * descrim : xdr_one 53 * position : u32 offset into XDR stream 54 * handle : u32 RKEY 55 * . . . 56 * end-of-list: xdr_zero 57 */ 58 static u32 *decode_read_list(u32 *va, u32 *vaend) 59 { 60 struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; 61 62 while (ch->rc_discrim != xdr_zero) { 63 u64 ch_offset; 64 65 if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > 66 (unsigned long)vaend) { 67 dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch); 68 return NULL; 69 } 70 71 ch->rc_discrim = ntohl(ch->rc_discrim); 72 ch->rc_position = ntohl(ch->rc_position); 73 ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle); 74 ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length); 75 va = (u32 *)&ch->rc_target.rs_offset; 76 xdr_decode_hyper(va, &ch_offset); 77 put_unaligned(ch_offset, (u64 *)va); 78 ch++; 79 } 80 return (u32 *)&ch->rc_position; 81 } 82 83 /* 84 * Determine number of chunks and total bytes in chunk list. The chunk 85 * list has already been verified to fit within the RPCRDMA header. 86 */ 87 void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, 88 int *ch_count, int *byte_count) 89 { 90 /* compute the number of bytes represented by read chunks */ 91 *byte_count = 0; 92 *ch_count = 0; 93 for (; ch->rc_discrim != 0; ch++) { 94 *byte_count = *byte_count + ch->rc_target.rs_length; 95 *ch_count = *ch_count + 1; 96 } 97 } 98 99 /* 100 * Decodes a write chunk list. The expected format is as follows: 101 * descrim : xdr_one 102 * nchunks : <count> 103 * handle : u32 RKEY ---+ 104 * length : u32 <len of segment> | 105 * offset : remove va + <count> 106 * . . . | 107 * ---+ 108 */ 109 static u32 *decode_write_list(u32 *va, u32 *vaend) 110 { 111 int ch_no; 112 struct rpcrdma_write_array *ary = 113 (struct rpcrdma_write_array *)va; 114 115 /* Check for not write-array */ 116 if (ary->wc_discrim == xdr_zero) 117 return (u32 *)&ary->wc_nchunks; 118 119 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > 120 (unsigned long)vaend) { 121 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); 122 return NULL; 123 } 124 ary->wc_discrim = ntohl(ary->wc_discrim); 125 ary->wc_nchunks = ntohl(ary->wc_nchunks); 126 if (((unsigned long)&ary->wc_array[0] + 127 (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > 128 (unsigned long)vaend) { 129 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", 130 ary, ary->wc_nchunks, vaend); 131 return NULL; 132 } 133 for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { 134 u64 ch_offset; 135 136 ary->wc_array[ch_no].wc_target.rs_handle = 137 ntohl(ary->wc_array[ch_no].wc_target.rs_handle); 138 ary->wc_array[ch_no].wc_target.rs_length = 139 ntohl(ary->wc_array[ch_no].wc_target.rs_length); 140 va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; 141 xdr_decode_hyper(va, &ch_offset); 142 put_unaligned(ch_offset, (u64 *)va); 143 } 144 145 /* 146 * rs_length is the 2nd 4B field in wc_target and taking its 147 * address skips the list terminator 148 */ 149 return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length; 150 } 151 152 static u32 *decode_reply_array(u32 *va, u32 *vaend) 153 { 154 int ch_no; 155 struct rpcrdma_write_array *ary = 156 (struct rpcrdma_write_array *)va; 157 158 /* Check for no reply-array */ 159 if (ary->wc_discrim == xdr_zero) 160 return (u32 *)&ary->wc_nchunks; 161 162 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > 163 (unsigned long)vaend) { 164 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); 165 return NULL; 166 } 167 ary->wc_discrim = ntohl(ary->wc_discrim); 168 ary->wc_nchunks = ntohl(ary->wc_nchunks); 169 if (((unsigned long)&ary->wc_array[0] + 170 (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > 171 (unsigned long)vaend) { 172 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", 173 ary, ary->wc_nchunks, vaend); 174 return NULL; 175 } 176 for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { 177 u64 ch_offset; 178 179 ary->wc_array[ch_no].wc_target.rs_handle = 180 ntohl(ary->wc_array[ch_no].wc_target.rs_handle); 181 ary->wc_array[ch_no].wc_target.rs_length = 182 ntohl(ary->wc_array[ch_no].wc_target.rs_length); 183 va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; 184 xdr_decode_hyper(va, &ch_offset); 185 put_unaligned(ch_offset, (u64 *)va); 186 } 187 188 return (u32 *)&ary->wc_array[ch_no]; 189 } 190 191 int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, 192 struct svc_rqst *rqstp) 193 { 194 struct rpcrdma_msg *rmsgp = NULL; 195 u32 *va; 196 u32 *vaend; 197 u32 hdr_len; 198 199 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; 200 201 /* Verify that there's enough bytes for header + something */ 202 if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) { 203 dprintk("svcrdma: header too short = %d\n", 204 rqstp->rq_arg.len); 205 return -EINVAL; 206 } 207 208 /* Decode the header */ 209 rmsgp->rm_xid = ntohl(rmsgp->rm_xid); 210 rmsgp->rm_vers = ntohl(rmsgp->rm_vers); 211 rmsgp->rm_credit = ntohl(rmsgp->rm_credit); 212 rmsgp->rm_type = ntohl(rmsgp->rm_type); 213 214 if (rmsgp->rm_vers != RPCRDMA_VERSION) 215 return -ENOSYS; 216 217 /* Pull in the extra for the padded case and bump our pointer */ 218 if (rmsgp->rm_type == RDMA_MSGP) { 219 int hdrlen; 220 rmsgp->rm_body.rm_padded.rm_align = 221 ntohl(rmsgp->rm_body.rm_padded.rm_align); 222 rmsgp->rm_body.rm_padded.rm_thresh = 223 ntohl(rmsgp->rm_body.rm_padded.rm_thresh); 224 225 va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; 226 rqstp->rq_arg.head[0].iov_base = va; 227 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); 228 rqstp->rq_arg.head[0].iov_len -= hdrlen; 229 if (hdrlen > rqstp->rq_arg.len) 230 return -EINVAL; 231 return hdrlen; 232 } 233 234 /* The chunk list may contain either a read chunk list or a write 235 * chunk list and a reply chunk list. 236 */ 237 va = &rmsgp->rm_body.rm_chunks[0]; 238 vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); 239 va = decode_read_list(va, vaend); 240 if (!va) 241 return -EINVAL; 242 va = decode_write_list(va, vaend); 243 if (!va) 244 return -EINVAL; 245 va = decode_reply_array(va, vaend); 246 if (!va) 247 return -EINVAL; 248 249 rqstp->rq_arg.head[0].iov_base = va; 250 hdr_len = (unsigned long)va - (unsigned long)rmsgp; 251 rqstp->rq_arg.head[0].iov_len -= hdr_len; 252 253 *rdma_req = rmsgp; 254 return hdr_len; 255 } 256 257 int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp) 258 { 259 struct rpcrdma_msg *rmsgp = NULL; 260 struct rpcrdma_read_chunk *ch; 261 struct rpcrdma_write_array *ary; 262 u32 *va; 263 u32 hdrlen; 264 265 dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n", 266 rqstp); 267 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; 268 269 /* Pull in the extra for the padded case and bump our pointer */ 270 if (rmsgp->rm_type == RDMA_MSGP) { 271 va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; 272 rqstp->rq_arg.head[0].iov_base = va; 273 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); 274 rqstp->rq_arg.head[0].iov_len -= hdrlen; 275 return hdrlen; 276 } 277 278 /* 279 * Skip all chunks to find RPC msg. These were previously processed 280 */ 281 va = &rmsgp->rm_body.rm_chunks[0]; 282 283 /* Skip read-list */ 284 for (ch = (struct rpcrdma_read_chunk *)va; 285 ch->rc_discrim != xdr_zero; ch++); 286 va = (u32 *)&ch->rc_position; 287 288 /* Skip write-list */ 289 ary = (struct rpcrdma_write_array *)va; 290 if (ary->wc_discrim == xdr_zero) 291 va = (u32 *)&ary->wc_nchunks; 292 else 293 /* 294 * rs_length is the 2nd 4B field in wc_target and taking its 295 * address skips the list terminator 296 */ 297 va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length; 298 299 /* Skip reply-array */ 300 ary = (struct rpcrdma_write_array *)va; 301 if (ary->wc_discrim == xdr_zero) 302 va = (u32 *)&ary->wc_nchunks; 303 else 304 va = (u32 *)&ary->wc_array[ary->wc_nchunks]; 305 306 rqstp->rq_arg.head[0].iov_base = va; 307 hdrlen = (unsigned long)va - (unsigned long)rmsgp; 308 rqstp->rq_arg.head[0].iov_len -= hdrlen; 309 310 return hdrlen; 311 } 312 313 int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, 314 struct rpcrdma_msg *rmsgp, 315 enum rpcrdma_errcode err, u32 *va) 316 { 317 u32 *startp = va; 318 319 *va++ = htonl(rmsgp->rm_xid); 320 *va++ = htonl(rmsgp->rm_vers); 321 *va++ = htonl(xprt->sc_max_requests); 322 *va++ = htonl(RDMA_ERROR); 323 *va++ = htonl(err); 324 if (err == ERR_VERS) { 325 *va++ = htonl(RPCRDMA_VERSION); 326 *va++ = htonl(RPCRDMA_VERSION); 327 } 328 329 return (int)((unsigned long)va - (unsigned long)startp); 330 } 331 332 int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) 333 { 334 struct rpcrdma_write_array *wr_ary; 335 336 /* There is no read-list in a reply */ 337 338 /* skip write list */ 339 wr_ary = (struct rpcrdma_write_array *) 340 &rmsgp->rm_body.rm_chunks[1]; 341 if (wr_ary->wc_discrim) 342 wr_ary = (struct rpcrdma_write_array *) 343 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]. 344 wc_target.rs_length; 345 else 346 wr_ary = (struct rpcrdma_write_array *) 347 &wr_ary->wc_nchunks; 348 349 /* skip reply array */ 350 if (wr_ary->wc_discrim) 351 wr_ary = (struct rpcrdma_write_array *) 352 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]; 353 else 354 wr_ary = (struct rpcrdma_write_array *) 355 &wr_ary->wc_nchunks; 356 357 return (unsigned long) wr_ary - (unsigned long) rmsgp; 358 } 359 360 void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) 361 { 362 struct rpcrdma_write_array *ary; 363 364 /* no read-list */ 365 rmsgp->rm_body.rm_chunks[0] = xdr_zero; 366 367 /* write-array discrim */ 368 ary = (struct rpcrdma_write_array *) 369 &rmsgp->rm_body.rm_chunks[1]; 370 ary->wc_discrim = xdr_one; 371 ary->wc_nchunks = htonl(chunks); 372 373 /* write-list terminator */ 374 ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; 375 376 /* reply-array discriminator */ 377 ary->wc_array[chunks].wc_target.rs_length = xdr_zero; 378 } 379 380 void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary, 381 int chunks) 382 { 383 ary->wc_discrim = xdr_one; 384 ary->wc_nchunks = htonl(chunks); 385 } 386 387 void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, 388 int chunk_no, 389 u32 rs_handle, u64 rs_offset, 390 u32 write_len) 391 { 392 struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; 393 seg->rs_handle = htonl(rs_handle); 394 seg->rs_length = htonl(write_len); 395 xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset); 396 } 397 398 void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, 399 struct rpcrdma_msg *rdma_argp, 400 struct rpcrdma_msg *rdma_resp, 401 enum rpcrdma_proc rdma_type) 402 { 403 rdma_resp->rm_xid = htonl(rdma_argp->rm_xid); 404 rdma_resp->rm_vers = htonl(rdma_argp->rm_vers); 405 rdma_resp->rm_credit = htonl(xprt->sc_max_requests); 406 rdma_resp->rm_type = htonl(rdma_type); 407 408 /* Encode <nul> chunks lists */ 409 rdma_resp->rm_body.rm_chunks[0] = xdr_zero; 410 rdma_resp->rm_body.rm_chunks[1] = xdr_zero; 411 rdma_resp->rm_body.rm_chunks[2] = xdr_zero; 412 } 413
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.