~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/x86/sigreturn.c

Version: ~ [ linux-5.16-rc3 ] ~ [ linux-5.15.5 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.82 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.162 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.218 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.256 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.291 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.293 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
  4  * Copyright (c) 2014-2015 Andrew Lutomirski
  5  *
  6  * This is a series of tests that exercises the sigreturn(2) syscall and
  7  * the IRET / SYSRET paths in the kernel.
  8  *
  9  * For now, this focuses on the effects of unusual CS and SS values,
 10  * and it has a bunch of tests to make sure that ESP/RSP is restored
 11  * properly.
 12  *
 13  * The basic idea behind these tests is to raise(SIGUSR1) to create a
 14  * sigcontext frame, plug in the values to be tested, and then return,
 15  * which implicitly invokes sigreturn(2) and programs the user context
 16  * as desired.
 17  *
 18  * For tests for which we expect sigreturn and the subsequent return to
 19  * user mode to succeed, we return to a short trampoline that generates
 20  * SIGTRAP so that the meat of the tests can be ordinary C code in a
 21  * SIGTRAP handler.
 22  *
 23  * The inner workings of each test is documented below.
 24  *
 25  * Do not run on outdated, unpatched kernels at risk of nasty crashes.
 26  */
 27 
 28 #define _GNU_SOURCE
 29 
 30 #include <sys/time.h>
 31 #include <time.h>
 32 #include <stdlib.h>
 33 #include <sys/syscall.h>
 34 #include <unistd.h>
 35 #include <stdio.h>
 36 #include <string.h>
 37 #include <inttypes.h>
 38 #include <sys/mman.h>
 39 #include <sys/signal.h>
 40 #include <sys/ucontext.h>
 41 #include <asm/ldt.h>
 42 #include <err.h>
 43 #include <setjmp.h>
 44 #include <stddef.h>
 45 #include <stdbool.h>
 46 #include <sys/ptrace.h>
 47 #include <sys/user.h>
 48 
 49 /* Pull in AR_xyz defines. */
 50 typedef unsigned int u32;
 51 typedef unsigned short u16;
 52 #include "../../../../arch/x86/include/asm/desc_defs.h"
 53 
 54 /*
 55  * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
 56  * headers.
 57  */
 58 #ifdef __x86_64__
 59 /*
 60  * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
 61  * kernels that save SS in the sigcontext.  All kernels that set
 62  * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
 63  * regardless of SS (i.e. they implement espfix).
 64  *
 65  * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
 66  * when delivering a signal that came from 64-bit code.
 67  *
 68  * Sigreturn restores SS as follows:
 69  *
 70  * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
 71  *     saved CS is not 64-bit)
 72  *         new SS = saved SS  (will fail IRET and signal if invalid)
 73  * else
 74  *         new SS = a flat 32-bit data segment
 75  */
 76 #define UC_SIGCONTEXT_SS       0x2
 77 #define UC_STRICT_RESTORE_SS   0x4
 78 #endif
 79 
 80 /*
 81  * In principle, this test can run on Linux emulation layers (e.g.
 82  * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
 83  * entries 0-5 for their own internal purposes, so start our LDT
 84  * allocations above that reservation.  (The tests don't pass on LX
 85  * branded zones, but at least this lets them run.)
 86  */
 87 #define LDT_OFFSET 6
 88 
 89 /* An aligned stack accessible through some of our segments. */
 90 static unsigned char stack16[65536] __attribute__((aligned(4096)));
 91 
 92 /*
 93  * An aligned int3 instruction used as a trampoline.  Some of the tests
 94  * want to fish out their ss values, so this trampoline copies ss to eax
 95  * before the int3.
 96  */
 97 asm (".pushsection .text\n\t"
 98      ".type int3, @function\n\t"
 99      ".align 4096\n\t"
100      "int3:\n\t"
101      "mov %ss,%ecx\n\t"
102      "int3\n\t"
103      ".size int3, . - int3\n\t"
104      ".align 4096, 0xcc\n\t"
105      ".popsection");
106 extern char int3[4096];
107 
108 /*
109  * At startup, we prepapre:
110  *
111  * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
112  *   descriptor or out of bounds).
113  * - code16_sel: A 16-bit LDT code segment pointing to int3.
114  * - data16_sel: A 16-bit LDT data segment pointing to stack16.
115  * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
116  * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
117  * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
118  * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
119  *   stack16.
120  *
121  * For no particularly good reason, xyz_sel is a selector value with the
122  * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
123  * descriptor table.  These variables will be zero if their respective
124  * segments could not be allocated.
125  */
126 static unsigned short ldt_nonexistent_sel;
127 static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
128 
129 static unsigned short gdt_data16_idx, gdt_npdata32_idx;
130 
131 static unsigned short GDT3(int idx)
132 {
133         return (idx << 3) | 3;
134 }
135 
136 static unsigned short LDT3(int idx)
137 {
138         return (idx << 3) | 7;
139 }
140 
141 /* Our sigaltstack scratch space. */
142 static char altstack_data[SIGSTKSZ];
143 
144 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
145                        int flags)
146 {
147         struct sigaction sa;
148         memset(&sa, 0, sizeof(sa));
149         sa.sa_sigaction = handler;
150         sa.sa_flags = SA_SIGINFO | flags;
151         sigemptyset(&sa.sa_mask);
152         if (sigaction(sig, &sa, 0))
153                 err(1, "sigaction");
154 }
155 
156 static void clearhandler(int sig)
157 {
158         struct sigaction sa;
159         memset(&sa, 0, sizeof(sa));
160         sa.sa_handler = SIG_DFL;
161         sigemptyset(&sa.sa_mask);
162         if (sigaction(sig, &sa, 0))
163                 err(1, "sigaction");
164 }
165 
166 static void add_ldt(const struct user_desc *desc, unsigned short *var,
167                     const char *name)
168 {
169         if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
170                 *var = LDT3(desc->entry_number);
171         } else {
172                 printf("[NOTE]\tFailed to create %s segment\n", name);
173                 *var = 0;
174         }
175 }
176 
177 static void setup_ldt(void)
178 {
179         if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
180                 errx(1, "stack16 is too high\n");
181         if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
182                 errx(1, "int3 is too high\n");
183 
184         ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
185 
186         const struct user_desc code16_desc = {
187                 .entry_number    = LDT_OFFSET + 0,
188                 .base_addr       = (unsigned long)int3,
189                 .limit           = 4095,
190                 .seg_32bit       = 0,
191                 .contents        = 2, /* Code, not conforming */
192                 .read_exec_only  = 0,
193                 .limit_in_pages  = 0,
194                 .seg_not_present = 0,
195                 .useable         = 0
196         };
197         add_ldt(&code16_desc, &code16_sel, "code16");
198 
199         const struct user_desc data16_desc = {
200                 .entry_number    = LDT_OFFSET + 1,
201                 .base_addr       = (unsigned long)stack16,
202                 .limit           = 0xffff,
203                 .seg_32bit       = 0,
204                 .contents        = 0, /* Data, grow-up */
205                 .read_exec_only  = 0,
206                 .limit_in_pages  = 0,
207                 .seg_not_present = 0,
208                 .useable         = 0
209         };
210         add_ldt(&data16_desc, &data16_sel, "data16");
211 
212         const struct user_desc npcode32_desc = {
213                 .entry_number    = LDT_OFFSET + 3,
214                 .base_addr       = (unsigned long)int3,
215                 .limit           = 4095,
216                 .seg_32bit       = 1,
217                 .contents        = 2, /* Code, not conforming */
218                 .read_exec_only  = 0,
219                 .limit_in_pages  = 0,
220                 .seg_not_present = 1,
221                 .useable         = 0
222         };
223         add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
224 
225         const struct user_desc npdata32_desc = {
226                 .entry_number    = LDT_OFFSET + 4,
227                 .base_addr       = (unsigned long)stack16,
228                 .limit           = 0xffff,
229                 .seg_32bit       = 1,
230                 .contents        = 0, /* Data, grow-up */
231                 .read_exec_only  = 0,
232                 .limit_in_pages  = 0,
233                 .seg_not_present = 1,
234                 .useable         = 0
235         };
236         add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
237 
238         struct user_desc gdt_data16_desc = {
239                 .entry_number    = -1,
240                 .base_addr       = (unsigned long)stack16,
241                 .limit           = 0xffff,
242                 .seg_32bit       = 0,
243                 .contents        = 0, /* Data, grow-up */
244                 .read_exec_only  = 0,
245                 .limit_in_pages  = 0,
246                 .seg_not_present = 0,
247                 .useable         = 0
248         };
249 
250         if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
251                 /*
252                  * This probably indicates vulnerability to CVE-2014-8133.
253                  * Merely getting here isn't definitive, though, and we'll
254                  * diagnose the problem for real later on.
255                  */
256                 printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
257                        gdt_data16_desc.entry_number);
258                 gdt_data16_idx = gdt_data16_desc.entry_number;
259         } else {
260                 printf("[OK]\tset_thread_area refused 16-bit data\n");
261         }
262 
263         struct user_desc gdt_npdata32_desc = {
264                 .entry_number    = -1,
265                 .base_addr       = (unsigned long)stack16,
266                 .limit           = 0xffff,
267                 .seg_32bit       = 1,
268                 .contents        = 0, /* Data, grow-up */
269                 .read_exec_only  = 0,
270                 .limit_in_pages  = 0,
271                 .seg_not_present = 1,
272                 .useable         = 0
273         };
274 
275         if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
276                 /*
277                  * As a hardening measure, newer kernels don't allow this.
278                  */
279                 printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
280                        gdt_npdata32_desc.entry_number);
281                 gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
282         } else {
283                 printf("[OK]\tset_thread_area refused 16-bit data\n");
284         }
285 }
286 
287 /* State used by our signal handlers. */
288 static gregset_t initial_regs, requested_regs, resulting_regs;
289 
290 /* Instructions for the SIGUSR1 handler. */
291 static volatile unsigned short sig_cs, sig_ss;
292 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
293 #ifdef __x86_64__
294 static volatile sig_atomic_t sig_corrupt_final_ss;
295 #endif
296 
297 /* Abstractions for some 32-bit vs 64-bit differences. */
298 #ifdef __x86_64__
299 # define REG_IP REG_RIP
300 # define REG_SP REG_RSP
301 # define REG_CX REG_RCX
302 
303 struct selectors {
304         unsigned short cs, gs, fs, ss;
305 };
306 
307 static unsigned short *ssptr(ucontext_t *ctx)
308 {
309         struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
310         return &sels->ss;
311 }
312 
313 static unsigned short *csptr(ucontext_t *ctx)
314 {
315         struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
316         return &sels->cs;
317 }
318 #else
319 # define REG_IP REG_EIP
320 # define REG_SP REG_ESP
321 # define REG_CX REG_ECX
322 
323 static greg_t *ssptr(ucontext_t *ctx)
324 {
325         return &ctx->uc_mcontext.gregs[REG_SS];
326 }
327 
328 static greg_t *csptr(ucontext_t *ctx)
329 {
330         return &ctx->uc_mcontext.gregs[REG_CS];
331 }
332 #endif
333 
334 /*
335  * Checks a given selector for its code bitness or returns -1 if it's not
336  * a usable code segment selector.
337  */
338 int cs_bitness(unsigned short cs)
339 {
340         uint32_t valid = 0, ar;
341         asm ("lar %[cs], %[ar]\n\t"
342              "jnz 1f\n\t"
343              "mov $1, %[valid]\n\t"
344              "1:"
345              : [ar] "=r" (ar), [valid] "+rm" (valid)
346              : [cs] "r" (cs));
347 
348         if (!valid)
349                 return -1;
350 
351         bool db = (ar & (1 << 22));
352         bool l = (ar & (1 << 21));
353 
354         if (!(ar & (1<<11)))
355             return -1;  /* Not code. */
356 
357         if (l && !db)
358                 return 64;
359         else if (!l && db)
360                 return 32;
361         else if (!l && !db)
362                 return 16;
363         else
364                 return -1;      /* Unknown bitness. */
365 }
366 
367 /*
368  * Checks a given selector for its code bitness or returns -1 if it's not
369  * a usable code segment selector.
370  */
371 bool is_valid_ss(unsigned short cs)
372 {
373         uint32_t valid = 0, ar;
374         asm ("lar %[cs], %[ar]\n\t"
375              "jnz 1f\n\t"
376              "mov $1, %[valid]\n\t"
377              "1:"
378              : [ar] "=r" (ar), [valid] "+rm" (valid)
379              : [cs] "r" (cs));
380 
381         if (!valid)
382                 return false;
383 
384         if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
385             (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
386                 return false;
387 
388         return (ar & AR_P);
389 }
390 
391 /* Number of errors in the current test case. */
392 static volatile sig_atomic_t nerrs;
393 
394 static void validate_signal_ss(int sig, ucontext_t *ctx)
395 {
396 #ifdef __x86_64__
397         bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
398 
399         if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
400                 printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
401                 nerrs++;
402 
403                 /*
404                  * This happens on Linux 4.1.  The rest will fail, too, so
405                  * return now to reduce the noise.
406                  */
407                 return;
408         }
409 
410         /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
411         if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
412                 printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
413                        sig);
414                 nerrs++;
415         }
416 
417         if (is_valid_ss(*ssptr(ctx))) {
418                 /*
419                  * DOSEMU was written before 64-bit sigcontext had SS, and
420                  * it tries to figure out the signal source SS by looking at
421                  * the physical register.  Make sure that keeps working.
422                  */
423                 unsigned short hw_ss;
424                 asm ("mov %%ss, %0" : "=rm" (hw_ss));
425                 if (hw_ss != *ssptr(ctx)) {
426                         printf("[FAIL]\tHW SS didn't match saved SS\n");
427                         nerrs++;
428                 }
429         }
430 #endif
431 }
432 
433 /*
434  * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
435  * int3 trampoline.  Sets SP to a large known value so that we can see
436  * whether the value round-trips back to user mode correctly.
437  */
438 static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
439 {
440         ucontext_t *ctx = (ucontext_t*)ctx_void;
441 
442         validate_signal_ss(sig, ctx);
443 
444         memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
445 
446         *csptr(ctx) = sig_cs;
447         *ssptr(ctx) = sig_ss;
448 
449         ctx->uc_mcontext.gregs[REG_IP] =
450                 sig_cs == code16_sel ? 0 : (unsigned long)&int3;
451         ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
452         ctx->uc_mcontext.gregs[REG_CX] = 0;
453 
454 #ifdef __i386__
455         /*
456          * Make sure the kernel doesn't inadvertently use DS or ES-relative
457          * accesses in a region where user DS or ES is loaded.
458          *
459          * Skip this for 64-bit builds because long mode doesn't care about
460          * DS and ES and skipping it increases test coverage a little bit,
461          * since 64-bit kernels can still run the 32-bit build.
462          */
463         ctx->uc_mcontext.gregs[REG_DS] = 0;
464         ctx->uc_mcontext.gregs[REG_ES] = 0;
465 #endif
466 
467         memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
468         requested_regs[REG_CX] = *ssptr(ctx);   /* The asm code does this. */
469 
470         return;
471 }
472 
473 /*
474  * Called after a successful sigreturn (via int3) or from a failed
475  * sigreturn (directly by kernel).  Restores our state so that the
476  * original raise(SIGUSR1) returns.
477  */
478 static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
479 {
480         ucontext_t *ctx = (ucontext_t*)ctx_void;
481 
482         validate_signal_ss(sig, ctx);
483 
484         sig_err = ctx->uc_mcontext.gregs[REG_ERR];
485         sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
486 
487         unsigned short ss;
488         asm ("mov %%ss,%0" : "=r" (ss));
489 
490         greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
491         if (asm_ss != sig_ss && sig == SIGTRAP) {
492                 /* Sanity check failure. */
493                 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
494                        ss, *ssptr(ctx), (unsigned long long)asm_ss);
495                 nerrs++;
496         }
497 
498         memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
499         memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
500 
501 #ifdef __x86_64__
502         if (sig_corrupt_final_ss) {
503                 if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
504                         printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
505                         nerrs++;
506                 } else {
507                         /*
508                          * DOSEMU transitions from 32-bit to 64-bit mode by
509                          * adjusting sigcontext, and it requires that this work
510                          * even if the saved SS is bogus.
511                          */
512                         printf("\tCorrupting SS on return to 64-bit mode\n");
513                         *ssptr(ctx) = 0;
514                 }
515         }
516 #endif
517 
518         sig_trapped = sig;
519 }
520 
521 #ifdef __x86_64__
522 /* Tests recovery if !UC_STRICT_RESTORE_SS */
523 static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
524 {
525         ucontext_t *ctx = (ucontext_t*)ctx_void;
526 
527         if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
528                 printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
529                 nerrs++;
530                 return;  /* We can't do the rest. */
531         }
532 
533         ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
534         *ssptr(ctx) = 0;
535 
536         /* Return.  The kernel should recover without sending another signal. */
537 }
538 
539 static int test_nonstrict_ss(void)
540 {
541         clearhandler(SIGUSR1);
542         clearhandler(SIGTRAP);
543         clearhandler(SIGSEGV);
544         clearhandler(SIGILL);
545         sethandler(SIGUSR2, sigusr2, 0);
546 
547         nerrs = 0;
548 
549         printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
550         raise(SIGUSR2);
551         if (!nerrs)
552                 printf("[OK]\tIt worked\n");
553 
554         return nerrs;
555 }
556 #endif
557 
558 /* Finds a usable code segment of the requested bitness. */
559 int find_cs(int bitness)
560 {
561         unsigned short my_cs;
562 
563         asm ("mov %%cs,%0" :  "=r" (my_cs));
564 
565         if (cs_bitness(my_cs) == bitness)
566                 return my_cs;
567         if (cs_bitness(my_cs + (2 << 3)) == bitness)
568                 return my_cs + (2 << 3);
569         if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
570             return my_cs - (2 << 3);
571         if (cs_bitness(code16_sel) == bitness)
572                 return code16_sel;
573 
574         printf("[WARN]\tCould not find %d-bit CS\n", bitness);
575         return -1;
576 }
577 
578 static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
579 {
580         int cs = find_cs(cs_bits);
581         if (cs == -1) {
582                 printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
583                        cs_bits, use_16bit_ss ? 16 : 32);
584                 return 0;
585         }
586 
587         if (force_ss != -1) {
588                 sig_ss = force_ss;
589         } else {
590                 if (use_16bit_ss) {
591                         if (!data16_sel) {
592                                 printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
593                                        cs_bits);
594                                 return 0;
595                         }
596                         sig_ss = data16_sel;
597                 } else {
598                         asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
599                 }
600         }
601 
602         sig_cs = cs;
603 
604         printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
605                cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
606                (sig_ss & 4) ? "" : ", GDT");
607 
608         raise(SIGUSR1);
609 
610         nerrs = 0;
611 
612         /*
613          * Check that each register had an acceptable value when the
614          * int3 trampoline was invoked.
615          */
616         for (int i = 0; i < NGREG; i++) {
617                 greg_t req = requested_regs[i], res = resulting_regs[i];
618 
619                 if (i == REG_TRAPNO || i == REG_IP)
620                         continue;       /* don't care */
621 
622                 if (i == REG_SP) {
623                         /*
624                          * If we were using a 16-bit stack segment, then
625                          * the kernel is a bit stuck: IRET only restores
626                          * the low 16 bits of ESP/RSP if SS is 16-bit.
627                          * The kernel uses a hack to restore bits 31:16,
628                          * but that hack doesn't help with bits 63:32.
629                          * On Intel CPUs, bits 63:32 end up zeroed, and, on
630                          * AMD CPUs, they leak the high bits of the kernel
631                          * espfix64 stack pointer.  There's very little that
632                          * the kernel can do about it.
633                          *
634                          * Similarly, if we are returning to a 32-bit context,
635                          * the CPU will often lose the high 32 bits of RSP.
636                          */
637 
638                         if (res == req)
639                                 continue;
640 
641                         if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
642                                 printf("[NOTE]\tSP: %llx -> %llx\n",
643                                        (unsigned long long)req,
644                                        (unsigned long long)res);
645                                 continue;
646                         }
647 
648                         printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
649                                (unsigned long long)requested_regs[i],
650                                (unsigned long long)resulting_regs[i]);
651                         nerrs++;
652                         continue;
653                 }
654 
655                 bool ignore_reg = false;
656 #if __i386__
657                 if (i == REG_UESP)
658                         ignore_reg = true;
659 #else
660                 if (i == REG_CSGSFS) {
661                         struct selectors *req_sels =
662                                 (void *)&requested_regs[REG_CSGSFS];
663                         struct selectors *res_sels =
664                                 (void *)&resulting_regs[REG_CSGSFS];
665                         if (req_sels->cs != res_sels->cs) {
666                                 printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
667                                        req_sels->cs, res_sels->cs);
668                                 nerrs++;
669                         }
670 
671                         if (req_sels->ss != res_sels->ss) {
672                                 printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
673                                        req_sels->ss, res_sels->ss);
674                                 nerrs++;
675                         }
676 
677                         continue;
678                 }
679 #endif
680 
681                 /* Sanity check on the kernel */
682                 if (i == REG_CX && req != res) {
683                         printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
684                                (unsigned long long)req,
685                                (unsigned long long)res);
686                         nerrs++;
687                         continue;
688                 }
689 
690                 if (req != res && !ignore_reg) {
691                         printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
692                                i, (unsigned long long)req,
693                                (unsigned long long)res);
694                         nerrs++;
695                 }
696         }
697 
698         if (nerrs == 0)
699                 printf("[OK]\tall registers okay\n");
700 
701         return nerrs;
702 }
703 
704 static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
705 {
706         int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
707         if (cs == -1)
708                 return 0;
709 
710         sig_cs = cs;
711         sig_ss = ss;
712 
713         printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
714                cs_bits, sig_cs, sig_ss);
715 
716         sig_trapped = 0;
717         raise(SIGUSR1);
718         if (sig_trapped) {
719                 char errdesc[32] = "";
720                 if (sig_err) {
721                         const char *src = (sig_err & 1) ? " EXT" : "";
722                         const char *table;
723                         if ((sig_err & 0x6) == 0x0)
724                                 table = "GDT";
725                         else if ((sig_err & 0x6) == 0x4)
726                                 table = "LDT";
727                         else if ((sig_err & 0x6) == 0x2)
728                                 table = "IDT";
729                         else
730                                 table = "???";
731 
732                         sprintf(errdesc, "%s%s index %d, ",
733                                 table, src, sig_err >> 3);
734                 }
735 
736                 char trapname[32];
737                 if (sig_trapno == 13)
738                         strcpy(trapname, "GP");
739                 else if (sig_trapno == 11)
740                         strcpy(trapname, "NP");
741                 else if (sig_trapno == 12)
742                         strcpy(trapname, "SS");
743                 else if (sig_trapno == 32)
744                         strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
745                 else
746                         sprintf(trapname, "%d", sig_trapno);
747 
748                 printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
749                        trapname, (unsigned long)sig_err,
750                        errdesc, strsignal(sig_trapped));
751                 return 0;
752         } else {
753                 /*
754                  * This also implicitly tests UC_STRICT_RESTORE_SS:
755                  * We check that these signals set UC_STRICT_RESTORE_SS and,
756                  * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
757                  * then we won't get SIGSEGV.
758                  */
759                 printf("[FAIL]\tDid not get SIGSEGV\n");
760                 return 1;
761         }
762 }
763 
764 int main()
765 {
766         int total_nerrs = 0;
767         unsigned short my_cs, my_ss;
768 
769         asm volatile ("mov %%cs,%0" : "=r" (my_cs));
770         asm volatile ("mov %%ss,%0" : "=r" (my_ss));
771         setup_ldt();
772 
773         stack_t stack = {
774                 .ss_sp = altstack_data,
775                 .ss_size = SIGSTKSZ,
776         };
777         if (sigaltstack(&stack, NULL) != 0)
778                 err(1, "sigaltstack");
779 
780         sethandler(SIGUSR1, sigusr1, 0);
781         sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
782 
783         /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
784         total_nerrs += test_valid_sigreturn(64, false, -1);
785         total_nerrs += test_valid_sigreturn(32, false, -1);
786         total_nerrs += test_valid_sigreturn(16, false, -1);
787 
788         /*
789          * Test easy espfix cases: return to a 16-bit LDT SS in each possible
790          * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
791          *
792          * This catches the original missing-espfix-on-64-bit-kernels issue
793          * as well as CVE-2014-8134.
794          */
795         total_nerrs += test_valid_sigreturn(64, true, -1);
796         total_nerrs += test_valid_sigreturn(32, true, -1);
797         total_nerrs += test_valid_sigreturn(16, true, -1);
798 
799         if (gdt_data16_idx) {
800                 /*
801                  * For performance reasons, Linux skips espfix if SS points
802                  * to the GDT.  If we were able to allocate a 16-bit SS in
803                  * the GDT, see if it leaks parts of the kernel stack pointer.
804                  *
805                  * This tests for CVE-2014-8133.
806                  */
807                 total_nerrs += test_valid_sigreturn(64, true,
808                                                     GDT3(gdt_data16_idx));
809                 total_nerrs += test_valid_sigreturn(32, true,
810                                                     GDT3(gdt_data16_idx));
811                 total_nerrs += test_valid_sigreturn(16, true,
812                                                     GDT3(gdt_data16_idx));
813         }
814 
815 #ifdef __x86_64__
816         /* Nasty ABI case: check SS corruption handling. */
817         sig_corrupt_final_ss = 1;
818         total_nerrs += test_valid_sigreturn(32, false, -1);
819         total_nerrs += test_valid_sigreturn(32, true, -1);
820         sig_corrupt_final_ss = 0;
821 #endif
822 
823         /*
824          * We're done testing valid sigreturn cases.  Now we test states
825          * for which sigreturn itself will succeed but the subsequent
826          * entry to user mode will fail.
827          *
828          * Depending on the failure mode and the kernel bitness, these
829          * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
830          */
831         clearhandler(SIGTRAP);
832         sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
833         sethandler(SIGBUS, sigtrap, SA_ONSTACK);
834         sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
835 
836         /* Easy failures: invalid SS, resulting in #GP(0) */
837         test_bad_iret(64, ldt_nonexistent_sel, -1);
838         test_bad_iret(32, ldt_nonexistent_sel, -1);
839         test_bad_iret(16, ldt_nonexistent_sel, -1);
840 
841         /* These fail because SS isn't a data segment, resulting in #GP(SS) */
842         test_bad_iret(64, my_cs, -1);
843         test_bad_iret(32, my_cs, -1);
844         test_bad_iret(16, my_cs, -1);
845 
846         /* Try to return to a not-present code segment, triggering #NP(SS). */
847         test_bad_iret(32, my_ss, npcode32_sel);
848 
849         /*
850          * Try to return to a not-present but otherwise valid data segment.
851          * This will cause IRET to fail with #SS on the espfix stack.  This
852          * exercises CVE-2014-9322.
853          *
854          * Note that, if espfix is enabled, 64-bit Linux will lose track
855          * of the actual cause of failure and report #GP(0) instead.
856          * This would be very difficult for Linux to avoid, because
857          * espfix64 causes IRET failures to be promoted to #DF, so the
858          * original exception frame is never pushed onto the stack.
859          */
860         test_bad_iret(32, npdata32_sel, -1);
861 
862         /*
863          * Try to return to a not-present but otherwise valid data
864          * segment without invoking espfix.  Newer kernels don't allow
865          * this to happen in the first place.  On older kernels, though,
866          * this can trigger CVE-2014-9322.
867          */
868         if (gdt_npdata32_idx)
869                 test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
870 
871 #ifdef __x86_64__
872         total_nerrs += test_nonstrict_ss();
873 #endif
874 
875         return total_nerrs ? 1 : 0;
876 }
877 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp