~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/lib/mmx_32.c

Version: ~ [ linux-5.3-rc5 ] ~ [ linux-5.2.9 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.67 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.139 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.189 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.189 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.72 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *      MMX 3DNow! library helper functions
  3  *
  4  *      To do:
  5  *      We can use MMX just for prefetch in IRQ's. This may be a win.
  6  *              (reported so on K6-III)
  7  *      We should use a better code neutral filler for the short jump
  8  *              leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
  9  *      We also want to clobber the filler register so we don't get any
 10  *              register forwarding stalls on the filler.
 11  *
 12  *      Add *user handling. Checksums are not a win with MMX on any CPU
 13  *      tested so far for any MMX solution figured.
 14  *
 15  *      22/09/2000 - Arjan van de Ven
 16  *              Improved for non-egineering-sample Athlons
 17  *
 18  */
 19 #include <linux/hardirq.h>
 20 #include <linux/string.h>
 21 #include <linux/module.h>
 22 #include <linux/sched.h>
 23 #include <linux/types.h>
 24 
 25 #include <asm/i387.h>
 26 #include <asm/asm.h>
 27 
 28 void *_mmx_memcpy(void *to, const void *from, size_t len)
 29 {
 30         void *p;
 31         int i;
 32 
 33         if (unlikely(in_interrupt()))
 34                 return __memcpy(to, from, len);
 35 
 36         p = to;
 37         i = len >> 6; /* len/64 */
 38 
 39         kernel_fpu_begin();
 40 
 41         __asm__ __volatile__ (
 42                 "1: prefetch (%0)\n"            /* This set is 28 bytes */
 43                 "   prefetch 64(%0)\n"
 44                 "   prefetch 128(%0)\n"
 45                 "   prefetch 192(%0)\n"
 46                 "   prefetch 256(%0)\n"
 47                 "2:  \n"
 48                 ".section .fixup, \"ax\"\n"
 49                 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
 50                 "   jmp 2b\n"
 51                 ".previous\n"
 52                         _ASM_EXTABLE(1b, 3b)
 53                         : : "r" (from));
 54 
 55         for ( ; i > 5; i--) {
 56                 __asm__ __volatile__ (
 57                 "1:  prefetch 320(%0)\n"
 58                 "2:  movq (%0), %%mm0\n"
 59                 "  movq 8(%0), %%mm1\n"
 60                 "  movq 16(%0), %%mm2\n"
 61                 "  movq 24(%0), %%mm3\n"
 62                 "  movq %%mm0, (%1)\n"
 63                 "  movq %%mm1, 8(%1)\n"
 64                 "  movq %%mm2, 16(%1)\n"
 65                 "  movq %%mm3, 24(%1)\n"
 66                 "  movq 32(%0), %%mm0\n"
 67                 "  movq 40(%0), %%mm1\n"
 68                 "  movq 48(%0), %%mm2\n"
 69                 "  movq 56(%0), %%mm3\n"
 70                 "  movq %%mm0, 32(%1)\n"
 71                 "  movq %%mm1, 40(%1)\n"
 72                 "  movq %%mm2, 48(%1)\n"
 73                 "  movq %%mm3, 56(%1)\n"
 74                 ".section .fixup, \"ax\"\n"
 75                 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
 76                 "   jmp 2b\n"
 77                 ".previous\n"
 78                         _ASM_EXTABLE(1b, 3b)
 79                         : : "r" (from), "r" (to) : "memory");
 80 
 81                 from += 64;
 82                 to += 64;
 83         }
 84 
 85         for ( ; i > 0; i--) {
 86                 __asm__ __volatile__ (
 87                 "  movq (%0), %%mm0\n"
 88                 "  movq 8(%0), %%mm1\n"
 89                 "  movq 16(%0), %%mm2\n"
 90                 "  movq 24(%0), %%mm3\n"
 91                 "  movq %%mm0, (%1)\n"
 92                 "  movq %%mm1, 8(%1)\n"
 93                 "  movq %%mm2, 16(%1)\n"
 94                 "  movq %%mm3, 24(%1)\n"
 95                 "  movq 32(%0), %%mm0\n"
 96                 "  movq 40(%0), %%mm1\n"
 97                 "  movq 48(%0), %%mm2\n"
 98                 "  movq 56(%0), %%mm3\n"
 99                 "  movq %%mm0, 32(%1)\n"
100                 "  movq %%mm1, 40(%1)\n"
101                 "  movq %%mm2, 48(%1)\n"
102                 "  movq %%mm3, 56(%1)\n"
103                         : : "r" (from), "r" (to) : "memory");
104 
105                 from += 64;
106                 to += 64;
107         }
108         /*
109          * Now do the tail of the block:
110          */
111         __memcpy(to, from, len & 63);
112         kernel_fpu_end();
113 
114         return p;
115 }
116 EXPORT_SYMBOL(_mmx_memcpy);
117 
118 #ifdef CONFIG_MK7
119 
120 /*
121  *      The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
122  *      other MMX using processors do not.
123  */
124 
125 static void fast_clear_page(void *page)
126 {
127         int i;
128 
129         kernel_fpu_begin();
130 
131         __asm__ __volatile__ (
132                 "  pxor %%mm0, %%mm0\n" : :
133         );
134 
135         for (i = 0; i < 4096/64; i++) {
136                 __asm__ __volatile__ (
137                 "  movntq %%mm0, (%0)\n"
138                 "  movntq %%mm0, 8(%0)\n"
139                 "  movntq %%mm0, 16(%0)\n"
140                 "  movntq %%mm0, 24(%0)\n"
141                 "  movntq %%mm0, 32(%0)\n"
142                 "  movntq %%mm0, 40(%0)\n"
143                 "  movntq %%mm0, 48(%0)\n"
144                 "  movntq %%mm0, 56(%0)\n"
145                 : : "r" (page) : "memory");
146                 page += 64;
147         }
148 
149         /*
150          * Since movntq is weakly-ordered, a "sfence" is needed to become
151          * ordered again:
152          */
153         __asm__ __volatile__("sfence\n"::);
154 
155         kernel_fpu_end();
156 }
157 
158 static void fast_copy_page(void *to, void *from)
159 {
160         int i;
161 
162         kernel_fpu_begin();
163 
164         /*
165          * maybe the prefetch stuff can go before the expensive fnsave...
166          * but that is for later. -AV
167          */
168         __asm__ __volatile__(
169                 "1: prefetch (%0)\n"
170                 "   prefetch 64(%0)\n"
171                 "   prefetch 128(%0)\n"
172                 "   prefetch 192(%0)\n"
173                 "   prefetch 256(%0)\n"
174                 "2:  \n"
175                 ".section .fixup, \"ax\"\n"
176                 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
177                 "   jmp 2b\n"
178                 ".previous\n"
179                         _ASM_EXTABLE(1b, 3b) : : "r" (from));
180 
181         for (i = 0; i < (4096-320)/64; i++) {
182                 __asm__ __volatile__ (
183                 "1: prefetch 320(%0)\n"
184                 "2: movq (%0), %%mm0\n"
185                 "   movntq %%mm0, (%1)\n"
186                 "   movq 8(%0), %%mm1\n"
187                 "   movntq %%mm1, 8(%1)\n"
188                 "   movq 16(%0), %%mm2\n"
189                 "   movntq %%mm2, 16(%1)\n"
190                 "   movq 24(%0), %%mm3\n"
191                 "   movntq %%mm3, 24(%1)\n"
192                 "   movq 32(%0), %%mm4\n"
193                 "   movntq %%mm4, 32(%1)\n"
194                 "   movq 40(%0), %%mm5\n"
195                 "   movntq %%mm5, 40(%1)\n"
196                 "   movq 48(%0), %%mm6\n"
197                 "   movntq %%mm6, 48(%1)\n"
198                 "   movq 56(%0), %%mm7\n"
199                 "   movntq %%mm7, 56(%1)\n"
200                 ".section .fixup, \"ax\"\n"
201                 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
202                 "   jmp 2b\n"
203                 ".previous\n"
204                 _ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory");
205 
206                 from += 64;
207                 to += 64;
208         }
209 
210         for (i = (4096-320)/64; i < 4096/64; i++) {
211                 __asm__ __volatile__ (
212                 "2: movq (%0), %%mm0\n"
213                 "   movntq %%mm0, (%1)\n"
214                 "   movq 8(%0), %%mm1\n"
215                 "   movntq %%mm1, 8(%1)\n"
216                 "   movq 16(%0), %%mm2\n"
217                 "   movntq %%mm2, 16(%1)\n"
218                 "   movq 24(%0), %%mm3\n"
219                 "   movntq %%mm3, 24(%1)\n"
220                 "   movq 32(%0), %%mm4\n"
221                 "   movntq %%mm4, 32(%1)\n"
222                 "   movq 40(%0), %%mm5\n"
223                 "   movntq %%mm5, 40(%1)\n"
224                 "   movq 48(%0), %%mm6\n"
225                 "   movntq %%mm6, 48(%1)\n"
226                 "   movq 56(%0), %%mm7\n"
227                 "   movntq %%mm7, 56(%1)\n"
228                         : : "r" (from), "r" (to) : "memory");
229                 from += 64;
230                 to += 64;
231         }
232         /*
233          * Since movntq is weakly-ordered, a "sfence" is needed to become
234          * ordered again:
235          */
236         __asm__ __volatile__("sfence \n"::);
237         kernel_fpu_end();
238 }
239 
240 #else /* CONFIG_MK7 */
241 
242 /*
243  *      Generic MMX implementation without K7 specific streaming
244  */
245 static void fast_clear_page(void *page)
246 {
247         int i;
248 
249         kernel_fpu_begin();
250 
251         __asm__ __volatile__ (
252                 "  pxor %%mm0, %%mm0\n" : :
253         );
254 
255         for (i = 0; i < 4096/128; i++) {
256                 __asm__ __volatile__ (
257                 "  movq %%mm0, (%0)\n"
258                 "  movq %%mm0, 8(%0)\n"
259                 "  movq %%mm0, 16(%0)\n"
260                 "  movq %%mm0, 24(%0)\n"
261                 "  movq %%mm0, 32(%0)\n"
262                 "  movq %%mm0, 40(%0)\n"
263                 "  movq %%mm0, 48(%0)\n"
264                 "  movq %%mm0, 56(%0)\n"
265                 "  movq %%mm0, 64(%0)\n"
266                 "  movq %%mm0, 72(%0)\n"
267                 "  movq %%mm0, 80(%0)\n"
268                 "  movq %%mm0, 88(%0)\n"
269                 "  movq %%mm0, 96(%0)\n"
270                 "  movq %%mm0, 104(%0)\n"
271                 "  movq %%mm0, 112(%0)\n"
272                 "  movq %%mm0, 120(%0)\n"
273                         : : "r" (page) : "memory");
274                 page += 128;
275         }
276 
277         kernel_fpu_end();
278 }
279 
280 static void fast_copy_page(void *to, void *from)
281 {
282         int i;
283 
284         kernel_fpu_begin();
285 
286         __asm__ __volatile__ (
287                 "1: prefetch (%0)\n"
288                 "   prefetch 64(%0)\n"
289                 "   prefetch 128(%0)\n"
290                 "   prefetch 192(%0)\n"
291                 "   prefetch 256(%0)\n"
292                 "2:  \n"
293                 ".section .fixup, \"ax\"\n"
294                 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
295                 "   jmp 2b\n"
296                 ".previous\n"
297                         _ASM_EXTABLE(1b, 3b) : : "r" (from));
298 
299         for (i = 0; i < 4096/64; i++) {
300                 __asm__ __volatile__ (
301                 "1: prefetch 320(%0)\n"
302                 "2: movq (%0), %%mm0\n"
303                 "   movq 8(%0), %%mm1\n"
304                 "   movq 16(%0), %%mm2\n"
305                 "   movq 24(%0), %%mm3\n"
306                 "   movq %%mm0, (%1)\n"
307                 "   movq %%mm1, 8(%1)\n"
308                 "   movq %%mm2, 16(%1)\n"
309                 "   movq %%mm3, 24(%1)\n"
310                 "   movq 32(%0), %%mm0\n"
311                 "   movq 40(%0), %%mm1\n"
312                 "   movq 48(%0), %%mm2\n"
313                 "   movq 56(%0), %%mm3\n"
314                 "   movq %%mm0, 32(%1)\n"
315                 "   movq %%mm1, 40(%1)\n"
316                 "   movq %%mm2, 48(%1)\n"
317                 "   movq %%mm3, 56(%1)\n"
318                 ".section .fixup, \"ax\"\n"
319                 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
320                 "   jmp 2b\n"
321                 ".previous\n"
322                         _ASM_EXTABLE(1b, 3b)
323                         : : "r" (from), "r" (to) : "memory");
324 
325                 from += 64;
326                 to += 64;
327         }
328         kernel_fpu_end();
329 }
330 
331 #endif /* !CONFIG_MK7 */
332 
333 /*
334  * Favour MMX for page clear and copy:
335  */
336 static void slow_zero_page(void *page)
337 {
338         int d0, d1;
339 
340         __asm__ __volatile__(
341                 "cld\n\t"
342                 "rep ; stosl"
343 
344                         : "=&c" (d0), "=&D" (d1)
345                         :"a" (0), "1" (page), "" (1024)
346                         :"memory");
347 }
348 
349 void mmx_clear_page(void *page)
350 {
351         if (unlikely(in_interrupt()))
352                 slow_zero_page(page);
353         else
354                 fast_clear_page(page);
355 }
356 EXPORT_SYMBOL(mmx_clear_page);
357 
358 static void slow_copy_page(void *to, void *from)
359 {
360         int d0, d1, d2;
361 
362         __asm__ __volatile__(
363                 "cld\n\t"
364                 "rep ; movsl"
365                 : "=&c" (d0), "=&D" (d1), "=&S" (d2)
366                 : "" (1024), "1" ((long) to), "2" ((long) from)
367                 : "memory");
368 }
369 
370 void mmx_copy_page(void *to, void *from)
371 {
372         if (unlikely(in_interrupt()))
373                 slow_copy_page(to, from);
374         else
375                 fast_copy_page(to, from);
376 }
377 EXPORT_SYMBOL(mmx_copy_page);
378 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp