1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (C) 2005-2017 Andes Technology Corporation 3 4 #include <linux/extable.h> 5 #include <linux/module.h> 6 #include <linux/signal.h> 7 #include <linux/ptrace.h> 8 #include <linux/mm.h> 9 #include <linux/init.h> 10 #include <linux/hardirq.h> 11 #include <linux/uaccess.h> 12 #include <linux/perf_event.h> 13 14 #include <asm/pgtable.h> 15 #include <asm/tlbflush.h> 16 17 extern void die(const char *str, struct pt_regs *regs, long err); 18 19 /* 20 * This is useful to dump out the page tables associated with 21 * 'addr' in mm 'mm'. 22 */ 23 void show_pte(struct mm_struct *mm, unsigned long addr) 24 { 25 pgd_t *pgd; 26 if (!mm) 27 mm = &init_mm; 28 29 pr_alert("pgd = %p\n", mm->pgd); 30 pgd = pgd_offset(mm, addr); 31 pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); 32 33 do { 34 p4d_t *p4d; 35 pud_t *pud; 36 pmd_t *pmd; 37 38 if (pgd_none(*pgd)) 39 break; 40 41 if (pgd_bad(*pgd)) { 42 pr_alert("(bad)"); 43 break; 44 } 45 46 p4d = p4d_offset(pgd, addr); 47 pud = pud_offset(p4d, addr); 48 pmd = pmd_offset(pud, addr); 49 #if PTRS_PER_PMD != 1 50 pr_alert(", *pmd=%08lx", pmd_val(*pmd)); 51 #endif 52 53 if (pmd_none(*pmd)) 54 break; 55 56 if (pmd_bad(*pmd)) { 57 pr_alert("(bad)"); 58 break; 59 } 60 61 if (IS_ENABLED(CONFIG_HIGHMEM)) 62 { 63 pte_t *pte; 64 /* We must not map this if we have highmem enabled */ 65 pte = pte_offset_map(pmd, addr); 66 pr_alert(", *pte=%08lx", pte_val(*pte)); 67 pte_unmap(pte); 68 } 69 } while (0); 70 71 pr_alert("\n"); 72 } 73 74 void do_page_fault(unsigned long entry, unsigned long addr, 75 unsigned int error_code, struct pt_regs *regs) 76 { 77 struct task_struct *tsk; 78 struct mm_struct *mm; 79 struct vm_area_struct *vma; 80 int si_code; 81 vm_fault_t fault; 82 unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; 83 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 84 85 error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE); 86 tsk = current; 87 mm = tsk->mm; 88 si_code = SEGV_MAPERR; 89 /* 90 * We fault-in kernel-space virtual memory on-demand. The 91 * 'reference' page table is init_mm.pgd. 92 * 93 * NOTE! We MUST NOT take any locks for this case. We may 94 * be in an interrupt or a critical region, and should 95 * only copy the information from the master page table, 96 * nothing more. 97 */ 98 if (addr >= TASK_SIZE) { 99 if (user_mode(regs)) 100 goto bad_area_nosemaphore; 101 102 if (addr >= TASK_SIZE && addr < VMALLOC_END 103 && (entry == ENTRY_PTE_NOT_PRESENT)) 104 goto vmalloc_fault; 105 else 106 goto no_context; 107 } 108 109 /* Send a signal to the task for handling the unalignment access. */ 110 if (entry == ENTRY_GENERAL_EXCPETION 111 && error_code == ETYPE_ALIGNMENT_CHECK) { 112 if (user_mode(regs)) 113 goto bad_area_nosemaphore; 114 else 115 goto no_context; 116 } 117 118 /* 119 * If we're in an interrupt or have no user 120 * context, we must not take the fault.. 121 */ 122 if (unlikely(faulthandler_disabled() || !mm)) 123 goto no_context; 124 125 /* 126 * As per x86, we may deadlock here. However, since the kernel only 127 * validly references user space from well defined areas of the code, 128 * we can bug out early if this is from code which shouldn't. 129 */ 130 if (unlikely(!down_read_trylock(&mm->mmap_sem))) { 131 if (!user_mode(regs) && 132 !search_exception_tables(instruction_pointer(regs))) 133 goto no_context; 134 retry: 135 down_read(&mm->mmap_sem); 136 } else { 137 /* 138 * The above down_read_trylock() might have succeeded in which 139 * case, we'll have missed the might_sleep() from down_read(). 140 */ 141 might_sleep(); 142 if (IS_ENABLED(CONFIG_DEBUG_VM)) { 143 if (!user_mode(regs) && 144 !search_exception_tables(instruction_pointer(regs))) 145 goto no_context; 146 } 147 } 148 149 vma = find_vma(mm, addr); 150 151 if (unlikely(!vma)) 152 goto bad_area; 153 154 if (vma->vm_start <= addr) 155 goto good_area; 156 157 if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) 158 goto bad_area; 159 160 if (unlikely(expand_stack(vma, addr))) 161 goto bad_area; 162 163 /* 164 * Ok, we have a good vm_area for this memory access, so 165 * we can handle it.. 166 */ 167 168 good_area: 169 si_code = SEGV_ACCERR; 170 171 /* first do some preliminary protection checks */ 172 if (entry == ENTRY_PTE_NOT_PRESENT) { 173 if (error_code & ITYPE_mskINST) 174 mask = VM_EXEC; 175 else { 176 mask = VM_READ | VM_WRITE; 177 } 178 } else if (entry == ENTRY_TLB_MISC) { 179 switch (error_code & ITYPE_mskETYPE) { 180 case RD_PROT: 181 mask = VM_READ; 182 break; 183 case WRT_PROT: 184 mask = VM_WRITE; 185 flags |= FAULT_FLAG_WRITE; 186 break; 187 case NOEXEC: 188 mask = VM_EXEC; 189 break; 190 case PAGE_MODIFY: 191 mask = VM_WRITE; 192 flags |= FAULT_FLAG_WRITE; 193 break; 194 case ACC_BIT: 195 BUG(); 196 default: 197 break; 198 } 199 200 } 201 if (!(vma->vm_flags & mask)) 202 goto bad_area; 203 204 /* 205 * If for any reason at all we couldn't handle the fault, 206 * make sure we exit gracefully rather than endlessly redo 207 * the fault. 208 */ 209 210 fault = handle_mm_fault(vma, addr, flags); 211 212 /* 213 * If we need to retry but a fatal signal is pending, handle the 214 * signal first. We do not need to release the mmap_sem because it 215 * would already be released in __lock_page_or_retry in mm/filemap.c. 216 */ 217 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { 218 if (!user_mode(regs)) 219 goto no_context; 220 return; 221 } 222 223 if (unlikely(fault & VM_FAULT_ERROR)) { 224 if (fault & VM_FAULT_OOM) 225 goto out_of_memory; 226 else if (fault & VM_FAULT_SIGBUS) 227 goto do_sigbus; 228 else 229 goto bad_area; 230 } 231 232 /* 233 * Major/minor page fault accounting is only done on the initial 234 * attempt. If we go through a retry, it is extremely likely that the 235 * page will be found in page cache at that point. 236 */ 237 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 238 if (flags & FAULT_FLAG_ALLOW_RETRY) { 239 if (fault & VM_FAULT_MAJOR) { 240 tsk->maj_flt++; 241 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 242 1, regs, addr); 243 } else { 244 tsk->min_flt++; 245 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 246 1, regs, addr); 247 } 248 if (fault & VM_FAULT_RETRY) { 249 flags &= ~FAULT_FLAG_ALLOW_RETRY; 250 flags |= FAULT_FLAG_TRIED; 251 252 /* No need to up_read(&mm->mmap_sem) as we would 253 * have already released it in __lock_page_or_retry 254 * in mm/filemap.c. 255 */ 256 goto retry; 257 } 258 } 259 260 up_read(&mm->mmap_sem); 261 return; 262 263 /* 264 * Something tried to access memory that isn't in our memory map.. 265 * Fix it, but check if it's kernel or user first.. 266 */ 267 bad_area: 268 up_read(&mm->mmap_sem); 269 270 bad_area_nosemaphore: 271 272 /* User mode accesses just cause a SIGSEGV */ 273 274 if (user_mode(regs)) { 275 tsk->thread.address = addr; 276 tsk->thread.error_code = error_code; 277 tsk->thread.trap_no = entry; 278 force_sig_fault(SIGSEGV, si_code, (void __user *)addr); 279 return; 280 } 281 282 no_context: 283 284 /* Are we prepared to handle this kernel fault? 285 * 286 * (The kernel has valid exception-points in the source 287 * when it acesses user-memory. When it fails in one 288 * of those points, we find it in a table and do a jump 289 * to some fixup code that loads an appropriate error 290 * code) 291 */ 292 293 { 294 const struct exception_table_entry *entry; 295 296 if ((entry = 297 search_exception_tables(instruction_pointer(regs))) != 298 NULL) { 299 /* Adjust the instruction pointer in the stackframe */ 300 instruction_pointer(regs) = entry->fixup; 301 return; 302 } 303 } 304 305 /* 306 * Oops. The kernel tried to access some bad page. We'll have to 307 * terminate things with extreme prejudice. 308 */ 309 310 bust_spinlocks(1); 311 pr_alert("Unable to handle kernel %s at virtual address %08lx\n", 312 (addr < PAGE_SIZE) ? "NULL pointer dereference" : 313 "paging request", addr); 314 315 show_pte(mm, addr); 316 die("Oops", regs, error_code); 317 bust_spinlocks(0); 318 do_exit(SIGKILL); 319 320 return; 321 322 /* 323 * We ran out of memory, or some other thing happened to us that made 324 * us unable to handle the page fault gracefully. 325 */ 326 327 out_of_memory: 328 up_read(&mm->mmap_sem); 329 if (!user_mode(regs)) 330 goto no_context; 331 pagefault_out_of_memory(); 332 return; 333 334 do_sigbus: 335 up_read(&mm->mmap_sem); 336 337 /* Kernel mode? Handle exceptions or die */ 338 if (!user_mode(regs)) 339 goto no_context; 340 341 /* 342 * Send a sigbus 343 */ 344 tsk->thread.address = addr; 345 tsk->thread.error_code = error_code; 346 tsk->thread.trap_no = entry; 347 force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr); 348 349 return; 350 351 vmalloc_fault: 352 { 353 /* 354 * Synchronize this task's top level page-table 355 * with the 'reference' page table. 356 * 357 * Use current_pgd instead of tsk->active_mm->pgd 358 * since the latter might be unavailable if this 359 * code is executed in a misfortunately run irq 360 * (like inside schedule() between switch_mm and 361 * switch_to...). 362 */ 363 364 unsigned int index = pgd_index(addr); 365 pgd_t *pgd, *pgd_k; 366 p4d_t *p4d, *p4d_k; 367 pud_t *pud, *pud_k; 368 pmd_t *pmd, *pmd_k; 369 pte_t *pte_k; 370 371 pgd = (pgd_t *) __va(__nds32__mfsr(NDS32_SR_L1_PPTB)) + index; 372 pgd_k = init_mm.pgd + index; 373 374 if (!pgd_present(*pgd_k)) 375 goto no_context; 376 377 p4d = p4d_offset(pgd, addr); 378 p4d_k = p4d_offset(pgd_k, addr); 379 if (!p4d_present(*p4d_k)) 380 goto no_context; 381 382 pud = pud_offset(p4d, addr); 383 pud_k = pud_offset(p4d_k, addr); 384 if (!pud_present(*pud_k)) 385 goto no_context; 386 387 pmd = pmd_offset(pud, addr); 388 pmd_k = pmd_offset(pud_k, addr); 389 if (!pmd_present(*pmd_k)) 390 goto no_context; 391 392 if (!pmd_present(*pmd)) 393 set_pmd(pmd, *pmd_k); 394 else 395 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); 396 397 /* 398 * Since the vmalloc area is global, we don't 399 * need to copy individual PTE's, it is enough to 400 * copy the pgd pointer into the pte page of the 401 * root task. If that is there, we'll find our pte if 402 * it exists. 403 */ 404 405 /* Make sure the actual PTE exists as well to 406 * catch kernel vmalloc-area accesses to non-mapped 407 * addres. If we don't do this, this will just 408 * silently loop forever. 409 */ 410 411 pte_k = pte_offset_kernel(pmd_k, addr); 412 if (!pte_present(*pte_k)) 413 goto no_context; 414 415 return; 416 } 417 } 418
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.