1 /* 2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher 3 * 4 * Copyright (C) 2012 Johannes Goetzfried 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 20 * USA 21 * 22 */ 23 24 #include <linux/module.h> 25 #include <linux/hardirq.h> 26 #include <linux/types.h> 27 #include <linux/crypto.h> 28 #include <linux/err.h> 29 #include <crypto/ablk_helper.h> 30 #include <crypto/algapi.h> 31 #include <crypto/cast5.h> 32 #include <crypto/cryptd.h> 33 #include <crypto/ctr.h> 34 #include <asm/fpu/api.h> 35 #include <asm/crypto/glue_helper.h> 36 37 #define CAST5_PARALLEL_BLOCKS 16 38 39 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, 40 const u8 *src); 41 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, 42 const u8 *src); 43 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, 44 const u8 *src); 45 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, 46 __be64 *iv); 47 48 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) 49 { 50 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, 51 NULL, fpu_enabled, nbytes); 52 } 53 54 static inline void cast5_fpu_end(bool fpu_enabled) 55 { 56 return glue_fpu_end(fpu_enabled); 57 } 58 59 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 60 bool enc) 61 { 62 bool fpu_enabled = false; 63 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 64 const unsigned int bsize = CAST5_BLOCK_SIZE; 65 unsigned int nbytes; 66 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); 67 int err; 68 69 err = blkcipher_walk_virt(desc, walk); 70 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 71 72 while ((nbytes = walk->nbytes)) { 73 u8 *wsrc = walk->src.virt.addr; 74 u8 *wdst = walk->dst.virt.addr; 75 76 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 77 78 /* Process multi-block batch */ 79 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 80 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; 81 do { 82 fn(ctx, wdst, wsrc); 83 84 wsrc += bsize * CAST5_PARALLEL_BLOCKS; 85 wdst += bsize * CAST5_PARALLEL_BLOCKS; 86 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 87 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 88 89 if (nbytes < bsize) 90 goto done; 91 } 92 93 fn = (enc) ? __cast5_encrypt : __cast5_decrypt; 94 95 /* Handle leftovers */ 96 do { 97 fn(ctx, wdst, wsrc); 98 99 wsrc += bsize; 100 wdst += bsize; 101 nbytes -= bsize; 102 } while (nbytes >= bsize); 103 104 done: 105 err = blkcipher_walk_done(desc, walk, nbytes); 106 } 107 108 cast5_fpu_end(fpu_enabled); 109 return err; 110 } 111 112 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 113 struct scatterlist *src, unsigned int nbytes) 114 { 115 struct blkcipher_walk walk; 116 117 blkcipher_walk_init(&walk, dst, src, nbytes); 118 return ecb_crypt(desc, &walk, true); 119 } 120 121 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 122 struct scatterlist *src, unsigned int nbytes) 123 { 124 struct blkcipher_walk walk; 125 126 blkcipher_walk_init(&walk, dst, src, nbytes); 127 return ecb_crypt(desc, &walk, false); 128 } 129 130 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 131 struct blkcipher_walk *walk) 132 { 133 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 134 const unsigned int bsize = CAST5_BLOCK_SIZE; 135 unsigned int nbytes = walk->nbytes; 136 u64 *src = (u64 *)walk->src.virt.addr; 137 u64 *dst = (u64 *)walk->dst.virt.addr; 138 u64 *iv = (u64 *)walk->iv; 139 140 do { 141 *dst = *src ^ *iv; 142 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); 143 iv = dst; 144 145 src += 1; 146 dst += 1; 147 nbytes -= bsize; 148 } while (nbytes >= bsize); 149 150 *(u64 *)walk->iv = *iv; 151 return nbytes; 152 } 153 154 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 155 struct scatterlist *src, unsigned int nbytes) 156 { 157 struct blkcipher_walk walk; 158 int err; 159 160 blkcipher_walk_init(&walk, dst, src, nbytes); 161 err = blkcipher_walk_virt(desc, &walk); 162 163 while ((nbytes = walk.nbytes)) { 164 nbytes = __cbc_encrypt(desc, &walk); 165 err = blkcipher_walk_done(desc, &walk, nbytes); 166 } 167 168 return err; 169 } 170 171 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, 172 struct blkcipher_walk *walk) 173 { 174 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 175 const unsigned int bsize = CAST5_BLOCK_SIZE; 176 unsigned int nbytes = walk->nbytes; 177 u64 *src = (u64 *)walk->src.virt.addr; 178 u64 *dst = (u64 *)walk->dst.virt.addr; 179 u64 last_iv; 180 181 /* Start of the last block. */ 182 src += nbytes / bsize - 1; 183 dst += nbytes / bsize - 1; 184 185 last_iv = *src; 186 187 /* Process multi-block batch */ 188 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 189 do { 190 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); 191 src -= CAST5_PARALLEL_BLOCKS - 1; 192 dst -= CAST5_PARALLEL_BLOCKS - 1; 193 194 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); 195 196 nbytes -= bsize; 197 if (nbytes < bsize) 198 goto done; 199 200 *dst ^= *(src - 1); 201 src -= 1; 202 dst -= 1; 203 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 204 } 205 206 /* Handle leftovers */ 207 for (;;) { 208 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); 209 210 nbytes -= bsize; 211 if (nbytes < bsize) 212 break; 213 214 *dst ^= *(src - 1); 215 src -= 1; 216 dst -= 1; 217 } 218 219 done: 220 *dst ^= *(u64 *)walk->iv; 221 *(u64 *)walk->iv = last_iv; 222 223 return nbytes; 224 } 225 226 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 227 struct scatterlist *src, unsigned int nbytes) 228 { 229 bool fpu_enabled = false; 230 struct blkcipher_walk walk; 231 int err; 232 233 blkcipher_walk_init(&walk, dst, src, nbytes); 234 err = blkcipher_walk_virt(desc, &walk); 235 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 236 237 while ((nbytes = walk.nbytes)) { 238 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 239 nbytes = __cbc_decrypt(desc, &walk); 240 err = blkcipher_walk_done(desc, &walk, nbytes); 241 } 242 243 cast5_fpu_end(fpu_enabled); 244 return err; 245 } 246 247 static void ctr_crypt_final(struct blkcipher_desc *desc, 248 struct blkcipher_walk *walk) 249 { 250 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 251 u8 *ctrblk = walk->iv; 252 u8 keystream[CAST5_BLOCK_SIZE]; 253 u8 *src = walk->src.virt.addr; 254 u8 *dst = walk->dst.virt.addr; 255 unsigned int nbytes = walk->nbytes; 256 257 __cast5_encrypt(ctx, keystream, ctrblk); 258 crypto_xor_cpy(dst, keystream, src, nbytes); 259 260 crypto_inc(ctrblk, CAST5_BLOCK_SIZE); 261 } 262 263 static unsigned int __ctr_crypt(struct blkcipher_desc *desc, 264 struct blkcipher_walk *walk) 265 { 266 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 267 const unsigned int bsize = CAST5_BLOCK_SIZE; 268 unsigned int nbytes = walk->nbytes; 269 u64 *src = (u64 *)walk->src.virt.addr; 270 u64 *dst = (u64 *)walk->dst.virt.addr; 271 272 /* Process multi-block batch */ 273 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 274 do { 275 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, 276 (__be64 *)walk->iv); 277 278 src += CAST5_PARALLEL_BLOCKS; 279 dst += CAST5_PARALLEL_BLOCKS; 280 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 281 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 282 283 if (nbytes < bsize) 284 goto done; 285 } 286 287 /* Handle leftovers */ 288 do { 289 u64 ctrblk; 290 291 if (dst != src) 292 *dst = *src; 293 294 ctrblk = *(u64 *)walk->iv; 295 be64_add_cpu((__be64 *)walk->iv, 1); 296 297 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 298 *dst ^= ctrblk; 299 300 src += 1; 301 dst += 1; 302 nbytes -= bsize; 303 } while (nbytes >= bsize); 304 305 done: 306 return nbytes; 307 } 308 309 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 310 struct scatterlist *src, unsigned int nbytes) 311 { 312 bool fpu_enabled = false; 313 struct blkcipher_walk walk; 314 int err; 315 316 blkcipher_walk_init(&walk, dst, src, nbytes); 317 err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); 318 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 319 320 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { 321 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 322 nbytes = __ctr_crypt(desc, &walk); 323 err = blkcipher_walk_done(desc, &walk, nbytes); 324 } 325 326 cast5_fpu_end(fpu_enabled); 327 328 if (walk.nbytes) { 329 ctr_crypt_final(desc, &walk); 330 err = blkcipher_walk_done(desc, &walk, 0); 331 } 332 333 return err; 334 } 335 336 337 static struct crypto_alg cast5_algs[6] = { { 338 .cra_name = "__ecb-cast5-avx", 339 .cra_driver_name = "__driver-ecb-cast5-avx", 340 .cra_priority = 0, 341 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 342 CRYPTO_ALG_INTERNAL, 343 .cra_blocksize = CAST5_BLOCK_SIZE, 344 .cra_ctxsize = sizeof(struct cast5_ctx), 345 .cra_alignmask = 0, 346 .cra_type = &crypto_blkcipher_type, 347 .cra_module = THIS_MODULE, 348 .cra_u = { 349 .blkcipher = { 350 .min_keysize = CAST5_MIN_KEY_SIZE, 351 .max_keysize = CAST5_MAX_KEY_SIZE, 352 .setkey = cast5_setkey, 353 .encrypt = ecb_encrypt, 354 .decrypt = ecb_decrypt, 355 }, 356 }, 357 }, { 358 .cra_name = "__cbc-cast5-avx", 359 .cra_driver_name = "__driver-cbc-cast5-avx", 360 .cra_priority = 0, 361 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 362 CRYPTO_ALG_INTERNAL, 363 .cra_blocksize = CAST5_BLOCK_SIZE, 364 .cra_ctxsize = sizeof(struct cast5_ctx), 365 .cra_alignmask = 0, 366 .cra_type = &crypto_blkcipher_type, 367 .cra_module = THIS_MODULE, 368 .cra_u = { 369 .blkcipher = { 370 .min_keysize = CAST5_MIN_KEY_SIZE, 371 .max_keysize = CAST5_MAX_KEY_SIZE, 372 .setkey = cast5_setkey, 373 .encrypt = cbc_encrypt, 374 .decrypt = cbc_decrypt, 375 }, 376 }, 377 }, { 378 .cra_name = "__ctr-cast5-avx", 379 .cra_driver_name = "__driver-ctr-cast5-avx", 380 .cra_priority = 0, 381 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 382 CRYPTO_ALG_INTERNAL, 383 .cra_blocksize = 1, 384 .cra_ctxsize = sizeof(struct cast5_ctx), 385 .cra_alignmask = 0, 386 .cra_type = &crypto_blkcipher_type, 387 .cra_module = THIS_MODULE, 388 .cra_u = { 389 .blkcipher = { 390 .min_keysize = CAST5_MIN_KEY_SIZE, 391 .max_keysize = CAST5_MAX_KEY_SIZE, 392 .ivsize = CAST5_BLOCK_SIZE, 393 .setkey = cast5_setkey, 394 .encrypt = ctr_crypt, 395 .decrypt = ctr_crypt, 396 }, 397 }, 398 }, { 399 .cra_name = "ecb(cast5)", 400 .cra_driver_name = "ecb-cast5-avx", 401 .cra_priority = 200, 402 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 403 .cra_blocksize = CAST5_BLOCK_SIZE, 404 .cra_ctxsize = sizeof(struct async_helper_ctx), 405 .cra_alignmask = 0, 406 .cra_type = &crypto_ablkcipher_type, 407 .cra_module = THIS_MODULE, 408 .cra_init = ablk_init, 409 .cra_exit = ablk_exit, 410 .cra_u = { 411 .ablkcipher = { 412 .min_keysize = CAST5_MIN_KEY_SIZE, 413 .max_keysize = CAST5_MAX_KEY_SIZE, 414 .setkey = ablk_set_key, 415 .encrypt = ablk_encrypt, 416 .decrypt = ablk_decrypt, 417 }, 418 }, 419 }, { 420 .cra_name = "cbc(cast5)", 421 .cra_driver_name = "cbc-cast5-avx", 422 .cra_priority = 200, 423 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 424 .cra_blocksize = CAST5_BLOCK_SIZE, 425 .cra_ctxsize = sizeof(struct async_helper_ctx), 426 .cra_alignmask = 0, 427 .cra_type = &crypto_ablkcipher_type, 428 .cra_module = THIS_MODULE, 429 .cra_init = ablk_init, 430 .cra_exit = ablk_exit, 431 .cra_u = { 432 .ablkcipher = { 433 .min_keysize = CAST5_MIN_KEY_SIZE, 434 .max_keysize = CAST5_MAX_KEY_SIZE, 435 .ivsize = CAST5_BLOCK_SIZE, 436 .setkey = ablk_set_key, 437 .encrypt = __ablk_encrypt, 438 .decrypt = ablk_decrypt, 439 }, 440 }, 441 }, { 442 .cra_name = "ctr(cast5)", 443 .cra_driver_name = "ctr-cast5-avx", 444 .cra_priority = 200, 445 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 446 .cra_blocksize = 1, 447 .cra_ctxsize = sizeof(struct async_helper_ctx), 448 .cra_alignmask = 0, 449 .cra_type = &crypto_ablkcipher_type, 450 .cra_module = THIS_MODULE, 451 .cra_init = ablk_init, 452 .cra_exit = ablk_exit, 453 .cra_u = { 454 .ablkcipher = { 455 .min_keysize = CAST5_MIN_KEY_SIZE, 456 .max_keysize = CAST5_MAX_KEY_SIZE, 457 .ivsize = CAST5_BLOCK_SIZE, 458 .setkey = ablk_set_key, 459 .encrypt = ablk_encrypt, 460 .decrypt = ablk_encrypt, 461 .geniv = "chainiv", 462 }, 463 }, 464 } }; 465 466 static int __init cast5_init(void) 467 { 468 const char *feature_name; 469 470 if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, 471 &feature_name)) { 472 pr_info("CPU feature '%s' is not supported.\n", feature_name); 473 return -ENODEV; 474 } 475 476 return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 477 } 478 479 static void __exit cast5_exit(void) 480 { 481 crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 482 } 483 484 module_init(cast5_init); 485 module_exit(cast5_exit); 486 487 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); 488 MODULE_LICENSE("GPL"); 489 MODULE_ALIAS_CRYPTO("cast5"); 490
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.