~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/include/asm/xor.h

Version: ~ [ linux-5.4-rc7 ] ~ [ linux-5.3.11 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.84 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.154 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.201 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.201 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.77 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 #ifdef CONFIG_KMEMCHECK
  2 /* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
  3 # include <asm-generic/xor.h>
  4 #elif !defined(_ASM_X86_XOR_H)
  5 #define _ASM_X86_XOR_H
  6 
  7 /*
  8  * Optimized RAID-5 checksumming functions for SSE.
  9  *
 10  * This program is free software; you can redistribute it and/or modify
 11  * it under the terms of the GNU General Public License as published by
 12  * the Free Software Foundation; either version 2, or (at your option)
 13  * any later version.
 14  *
 15  * You should have received a copy of the GNU General Public License
 16  * (for example /usr/src/linux/COPYING); if not, write to the Free
 17  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 18  */
 19 
 20 /*
 21  * Cache avoiding checksumming functions utilizing KNI instructions
 22  * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
 23  */
 24 
 25 /*
 26  * Based on
 27  * High-speed RAID5 checksumming functions utilizing SSE instructions.
 28  * Copyright (C) 1998 Ingo Molnar.
 29  */
 30 
 31 /*
 32  * x86-64 changes / gcc fixes from Andi Kleen.
 33  * Copyright 2002 Andi Kleen, SuSE Labs.
 34  *
 35  * This hasn't been optimized for the hammer yet, but there are likely
 36  * no advantages to be gotten from x86-64 here anyways.
 37  */
 38 
 39 #include <asm/i387.h>
 40 
 41 #ifdef CONFIG_X86_32
 42 /* reduce register pressure */
 43 # define XOR_CONSTANT_CONSTRAINT "i"
 44 #else
 45 # define XOR_CONSTANT_CONSTRAINT "re"
 46 #endif
 47 
 48 #define OFFS(x)         "16*("#x")"
 49 #define PF_OFFS(x)      "256+16*("#x")"
 50 #define PF0(x)          "       prefetchnta "PF_OFFS(x)"(%[p1])         ;\n"
 51 #define LD(x, y)        "       movaps "OFFS(x)"(%[p1]), %%xmm"#y"      ;\n"
 52 #define ST(x, y)        "       movaps %%xmm"#y", "OFFS(x)"(%[p1])      ;\n"
 53 #define PF1(x)          "       prefetchnta "PF_OFFS(x)"(%[p2])         ;\n"
 54 #define PF2(x)          "       prefetchnta "PF_OFFS(x)"(%[p3])         ;\n"
 55 #define PF3(x)          "       prefetchnta "PF_OFFS(x)"(%[p4])         ;\n"
 56 #define PF4(x)          "       prefetchnta "PF_OFFS(x)"(%[p5])         ;\n"
 57 #define XO1(x, y)       "       xorps "OFFS(x)"(%[p2]), %%xmm"#y"       ;\n"
 58 #define XO2(x, y)       "       xorps "OFFS(x)"(%[p3]), %%xmm"#y"       ;\n"
 59 #define XO3(x, y)       "       xorps "OFFS(x)"(%[p4]), %%xmm"#y"       ;\n"
 60 #define XO4(x, y)       "       xorps "OFFS(x)"(%[p5]), %%xmm"#y"       ;\n"
 61 #define NOP(x)
 62 
 63 #define BLK64(pf, op, i)                                \
 64                 pf(i)                                   \
 65                 op(i, 0)                                \
 66                         op(i + 1, 1)                    \
 67                                 op(i + 2, 2)            \
 68                                         op(i + 3, 3)
 69 
 70 static void
 71 xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 72 {
 73         unsigned long lines = bytes >> 8;
 74 
 75         kernel_fpu_begin();
 76 
 77         asm volatile(
 78 #undef BLOCK
 79 #define BLOCK(i)                                        \
 80                 LD(i, 0)                                \
 81                         LD(i + 1, 1)                    \
 82                 PF1(i)                                  \
 83                                 PF1(i + 2)              \
 84                                 LD(i + 2, 2)            \
 85                                         LD(i + 3, 3)    \
 86                 PF0(i + 4)                              \
 87                                 PF0(i + 6)              \
 88                 XO1(i, 0)                               \
 89                         XO1(i + 1, 1)                   \
 90                                 XO1(i + 2, 2)           \
 91                                         XO1(i + 3, 3)   \
 92                 ST(i, 0)                                \
 93                         ST(i + 1, 1)                    \
 94                                 ST(i + 2, 2)            \
 95                                         ST(i + 3, 3)    \
 96 
 97 
 98                 PF0(0)
 99                                 PF0(2)
100 
101         " .align 32                     ;\n"
102         " 1:                            ;\n"
103 
104                 BLOCK(0)
105                 BLOCK(4)
106                 BLOCK(8)
107                 BLOCK(12)
108 
109         "       add %[inc], %[p1]       ;\n"
110         "       add %[inc], %[p2]       ;\n"
111         "       dec %[cnt]              ;\n"
112         "       jnz 1b                  ;\n"
113         : [cnt] "+r" (lines),
114           [p1] "+r" (p1), [p2] "+r" (p2)
115         : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
116         : "memory");
117 
118         kernel_fpu_end();
119 }
120 
121 static void
122 xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
123 {
124         unsigned long lines = bytes >> 8;
125 
126         kernel_fpu_begin();
127 
128         asm volatile(
129 #undef BLOCK
130 #define BLOCK(i)                        \
131                 BLK64(PF0, LD, i)       \
132                 BLK64(PF1, XO1, i)      \
133                 BLK64(NOP, ST, i)       \
134 
135         " .align 32                     ;\n"
136         " 1:                            ;\n"
137 
138                 BLOCK(0)
139                 BLOCK(4)
140                 BLOCK(8)
141                 BLOCK(12)
142 
143         "       add %[inc], %[p1]       ;\n"
144         "       add %[inc], %[p2]       ;\n"
145         "       dec %[cnt]              ;\n"
146         "       jnz 1b                  ;\n"
147         : [cnt] "+r" (lines),
148           [p1] "+r" (p1), [p2] "+r" (p2)
149         : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
150         : "memory");
151 
152         kernel_fpu_end();
153 }
154 
155 static void
156 xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
157           unsigned long *p3)
158 {
159         unsigned long lines = bytes >> 8;
160 
161         kernel_fpu_begin();
162 
163         asm volatile(
164 #undef BLOCK
165 #define BLOCK(i) \
166                 PF1(i)                                  \
167                                 PF1(i + 2)              \
168                 LD(i, 0)                                \
169                         LD(i + 1, 1)                    \
170                                 LD(i + 2, 2)            \
171                                         LD(i + 3, 3)    \
172                 PF2(i)                                  \
173                                 PF2(i + 2)              \
174                 PF0(i + 4)                              \
175                                 PF0(i + 6)              \
176                 XO1(i, 0)                               \
177                         XO1(i + 1, 1)                   \
178                                 XO1(i + 2, 2)           \
179                                         XO1(i + 3, 3)   \
180                 XO2(i, 0)                               \
181                         XO2(i + 1, 1)                   \
182                                 XO2(i + 2, 2)           \
183                                         XO2(i + 3, 3)   \
184                 ST(i, 0)                                \
185                         ST(i + 1, 1)                    \
186                                 ST(i + 2, 2)            \
187                                         ST(i + 3, 3)    \
188 
189 
190                 PF0(0)
191                                 PF0(2)
192 
193         " .align 32                     ;\n"
194         " 1:                            ;\n"
195 
196                 BLOCK(0)
197                 BLOCK(4)
198                 BLOCK(8)
199                 BLOCK(12)
200 
201         "       add %[inc], %[p1]       ;\n"
202         "       add %[inc], %[p2]       ;\n"
203         "       add %[inc], %[p3]       ;\n"
204         "       dec %[cnt]              ;\n"
205         "       jnz 1b                  ;\n"
206         : [cnt] "+r" (lines),
207           [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
208         : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
209         : "memory");
210 
211         kernel_fpu_end();
212 }
213 
214 static void
215 xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
216                unsigned long *p3)
217 {
218         unsigned long lines = bytes >> 8;
219 
220         kernel_fpu_begin();
221 
222         asm volatile(
223 #undef BLOCK
224 #define BLOCK(i)                        \
225                 BLK64(PF0, LD, i)       \
226                 BLK64(PF1, XO1, i)      \
227                 BLK64(PF2, XO2, i)      \
228                 BLK64(NOP, ST, i)       \
229 
230         " .align 32                     ;\n"
231         " 1:                            ;\n"
232 
233                 BLOCK(0)
234                 BLOCK(4)
235                 BLOCK(8)
236                 BLOCK(12)
237 
238         "       add %[inc], %[p1]       ;\n"
239         "       add %[inc], %[p2]       ;\n"
240         "       add %[inc], %[p3]       ;\n"
241         "       dec %[cnt]              ;\n"
242         "       jnz 1b                  ;\n"
243         : [cnt] "+r" (lines),
244           [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
245         : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
246         : "memory");
247 
248         kernel_fpu_end();
249 }
250 
251 static void
252 xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
253           unsigned long *p3, unsigned long *p4)
254 {
255         unsigned long lines = bytes >> 8;
256 
257         kernel_fpu_begin();
258 
259         asm volatile(
260 #undef BLOCK
261 #define BLOCK(i) \
262                 PF1(i)                                  \
263                                 PF1(i + 2)              \
264                 LD(i, 0)                                \
265                         LD(i + 1, 1)                    \
266                                 LD(i + 2, 2)            \
267                                         LD(i + 3, 3)    \
268                 PF2(i)                                  \
269                                 PF2(i + 2)              \
270                 XO1(i, 0)                               \
271                         XO1(i + 1, 1)                   \
272                                 XO1(i + 2, 2)           \
273                                         XO1(i + 3, 3)   \
274                 PF3(i)                                  \
275                                 PF3(i + 2)              \
276                 PF0(i + 4)                              \
277                                 PF0(i + 6)              \
278                 XO2(i, 0)                               \
279                         XO2(i + 1, 1)                   \
280                                 XO2(i + 2, 2)           \
281                                         XO2(i + 3, 3)   \
282                 XO3(i, 0)                               \
283                         XO3(i + 1, 1)                   \
284                                 XO3(i + 2, 2)           \
285                                         XO3(i + 3, 3)   \
286                 ST(i, 0)                                \
287                         ST(i + 1, 1)                    \
288                                 ST(i + 2, 2)            \
289                                         ST(i + 3, 3)    \
290 
291 
292                 PF0(0)
293                                 PF0(2)
294 
295         " .align 32                     ;\n"
296         " 1:                            ;\n"
297 
298                 BLOCK(0)
299                 BLOCK(4)
300                 BLOCK(8)
301                 BLOCK(12)
302 
303         "       add %[inc], %[p1]       ;\n"
304         "       add %[inc], %[p2]       ;\n"
305         "       add %[inc], %[p3]       ;\n"
306         "       add %[inc], %[p4]       ;\n"
307         "       dec %[cnt]              ;\n"
308         "       jnz 1b                  ;\n"
309         : [cnt] "+r" (lines), [p1] "+r" (p1),
310           [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
311         : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
312         : "memory");
313 
314         kernel_fpu_end();
315 }
316 
317 static void
318 xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
319                unsigned long *p3, unsigned long *p4)
320 {
321         unsigned long lines = bytes >> 8;
322 
323         kernel_fpu_begin();
324 
325         asm volatile(
326 #undef BLOCK
327 #define BLOCK(i)                        \
328                 BLK64(PF0, LD, i)       \
329                 BLK64(PF1, XO1, i)      \
330                 BLK64(PF2, XO2, i)      \
331                 BLK64(PF3, XO3, i)      \
332                 BLK64(NOP, ST, i)       \
333 
334         " .align 32                     ;\n"
335         " 1:                            ;\n"
336 
337                 BLOCK(0)
338                 BLOCK(4)
339                 BLOCK(8)
340                 BLOCK(12)
341 
342         "       add %[inc], %[p1]       ;\n"
343         "       add %[inc], %[p2]       ;\n"
344         "       add %[inc], %[p3]       ;\n"
345         "       add %[inc], %[p4]       ;\n"
346         "       dec %[cnt]              ;\n"
347         "       jnz 1b                  ;\n"
348         : [cnt] "+r" (lines), [p1] "+r" (p1),
349           [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
350         : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
351         : "memory");
352 
353         kernel_fpu_end();
354 }
355 
356 static void
357 xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
358           unsigned long *p3, unsigned long *p4, unsigned long *p5)
359 {
360         unsigned long lines = bytes >> 8;
361 
362         kernel_fpu_begin();
363 
364         asm volatile(
365 #undef BLOCK
366 #define BLOCK(i) \
367                 PF1(i)                                  \
368                                 PF1(i + 2)              \
369                 LD(i, 0)                                \
370                         LD(i + 1, 1)                    \
371                                 LD(i + 2, 2)            \
372                                         LD(i + 3, 3)    \
373                 PF2(i)                                  \
374                                 PF2(i + 2)              \
375                 XO1(i, 0)                               \
376                         XO1(i + 1, 1)                   \
377                                 XO1(i + 2, 2)           \
378                                         XO1(i + 3, 3)   \
379                 PF3(i)                                  \
380                                 PF3(i + 2)              \
381                 XO2(i, 0)                               \
382                         XO2(i + 1, 1)                   \
383                                 XO2(i + 2, 2)           \
384                                         XO2(i + 3, 3)   \
385                 PF4(i)                                  \
386                                 PF4(i + 2)              \
387                 PF0(i + 4)                              \
388                                 PF0(i + 6)              \
389                 XO3(i, 0)                               \
390                         XO3(i + 1, 1)                   \
391                                 XO3(i + 2, 2)           \
392                                         XO3(i + 3, 3)   \
393                 XO4(i, 0)                               \
394                         XO4(i + 1, 1)                   \
395                                 XO4(i + 2, 2)           \
396                                         XO4(i + 3, 3)   \
397                 ST(i, 0)                                \
398                         ST(i + 1, 1)                    \
399                                 ST(i + 2, 2)            \
400                                         ST(i + 3, 3)    \
401 
402 
403                 PF0(0)
404                                 PF0(2)
405 
406         " .align 32                     ;\n"
407         " 1:                            ;\n"
408 
409                 BLOCK(0)
410                 BLOCK(4)
411                 BLOCK(8)
412                 BLOCK(12)
413 
414         "       add %[inc], %[p1]       ;\n"
415         "       add %[inc], %[p2]       ;\n"
416         "       add %[inc], %[p3]       ;\n"
417         "       add %[inc], %[p4]       ;\n"
418         "       add %[inc], %[p5]       ;\n"
419         "       dec %[cnt]              ;\n"
420         "       jnz 1b                  ;\n"
421         : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
422           [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
423         : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
424         : "memory");
425 
426         kernel_fpu_end();
427 }
428 
429 static void
430 xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
431                unsigned long *p3, unsigned long *p4, unsigned long *p5)
432 {
433         unsigned long lines = bytes >> 8;
434 
435         kernel_fpu_begin();
436 
437         asm volatile(
438 #undef BLOCK
439 #define BLOCK(i)                        \
440                 BLK64(PF0, LD, i)       \
441                 BLK64(PF1, XO1, i)      \
442                 BLK64(PF2, XO2, i)      \
443                 BLK64(PF3, XO3, i)      \
444                 BLK64(PF4, XO4, i)      \
445                 BLK64(NOP, ST, i)       \
446 
447         " .align 32                     ;\n"
448         " 1:                            ;\n"
449 
450                 BLOCK(0)
451                 BLOCK(4)
452                 BLOCK(8)
453                 BLOCK(12)
454 
455         "       add %[inc], %[p1]       ;\n"
456         "       add %[inc], %[p2]       ;\n"
457         "       add %[inc], %[p3]       ;\n"
458         "       add %[inc], %[p4]       ;\n"
459         "       add %[inc], %[p5]       ;\n"
460         "       dec %[cnt]              ;\n"
461         "       jnz 1b                  ;\n"
462         : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
463           [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
464         : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
465         : "memory");
466 
467         kernel_fpu_end();
468 }
469 
470 static struct xor_block_template xor_block_sse_pf64 = {
471         .name = "prefetch64-sse",
472         .do_2 = xor_sse_2_pf64,
473         .do_3 = xor_sse_3_pf64,
474         .do_4 = xor_sse_4_pf64,
475         .do_5 = xor_sse_5_pf64,
476 };
477 
478 #undef LD
479 #undef XO1
480 #undef XO2
481 #undef XO3
482 #undef XO4
483 #undef ST
484 #undef NOP
485 #undef BLK64
486 #undef BLOCK
487 
488 #undef XOR_CONSTANT_CONSTRAINT
489 
490 #ifdef CONFIG_X86_32
491 # include <asm/xor_32.h>
492 #else
493 # include <asm/xor_64.h>
494 #endif
495 
496 #define XOR_SELECT_TEMPLATE(FASTEST) \
497         AVX_SELECT(FASTEST)
498 
499 #endif /* _ASM_X86_XOR_H */
500 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp