1 /* 2 * Debug helper to dump the current kernel pagetables of the system 3 * so that we can see what the various memory ranges are set to. 4 * 5 * (C) Copyright 2008 Intel Corporation 6 * 7 * Author: Arjan van de Ven <arjan@linux.intel.com> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; version 2 12 * of the License. 13 */ 14 15 #include <linux/debugfs.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/seq_file.h> 19 20 #include <asm/pgtable.h> 21 22 /* 23 * The dumper groups pagetable entries of the same type into one, and for 24 * that it needs to keep some state when walking, and flush this state 25 * when a "break" in the continuity is found. 26 */ 27 struct pg_state { 28 int level; 29 pgprot_t current_prot; 30 unsigned long start_address; 31 unsigned long current_address; 32 const struct addr_marker *marker; 33 unsigned long lines; 34 }; 35 36 struct addr_marker { 37 unsigned long start_address; 38 const char *name; 39 unsigned long max_lines; 40 }; 41 42 /* Address space markers hints */ 43 static struct addr_marker address_markers[] = { 44 { 0, "User Space" }, 45 #ifdef CONFIG_X86_64 46 { 0x8000000000000000UL, "Kernel Space" }, 47 { PAGE_OFFSET, "Low Kernel Mapping" }, 48 { VMALLOC_START, "vmalloc() Area" }, 49 { VMEMMAP_START, "Vmemmap" }, 50 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, 51 { __START_KERNEL_map, "High Kernel Mapping" }, 52 { MODULES_VADDR, "Modules" }, 53 { MODULES_END, "End Modules" }, 54 #else 55 { PAGE_OFFSET, "Kernel Mapping" }, 56 { 0/* VMALLOC_START */, "vmalloc() Area" }, 57 { 0/*VMALLOC_END*/, "vmalloc() End" }, 58 # ifdef CONFIG_HIGHMEM 59 { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, 60 # endif 61 { 0/*FIXADDR_START*/, "Fixmap Area" }, 62 #endif 63 { -1, NULL } /* End of list */ 64 }; 65 66 /* Multipliers for offsets within the PTEs */ 67 #define PTE_LEVEL_MULT (PAGE_SIZE) 68 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) 69 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) 70 #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) 71 72 /* 73 * Print a readable form of a pgprot_t to the seq_file 74 */ 75 static void printk_prot(struct seq_file *m, pgprot_t prot, int level) 76 { 77 pgprotval_t pr = pgprot_val(prot); 78 static const char * const level_name[] = 79 { "cr3", "pgd", "pud", "pmd", "pte" }; 80 81 if (!pgprot_val(prot)) { 82 /* Not present */ 83 seq_printf(m, " "); 84 } else { 85 if (pr & _PAGE_USER) 86 seq_printf(m, "USR "); 87 else 88 seq_printf(m, " "); 89 if (pr & _PAGE_RW) 90 seq_printf(m, "RW "); 91 else 92 seq_printf(m, "ro "); 93 if (pr & _PAGE_PWT) 94 seq_printf(m, "PWT "); 95 else 96 seq_printf(m, " "); 97 if (pr & _PAGE_PCD) 98 seq_printf(m, "PCD "); 99 else 100 seq_printf(m, " "); 101 102 /* Bit 9 has a different meaning on level 3 vs 4 */ 103 if (level <= 3) { 104 if (pr & _PAGE_PSE) 105 seq_printf(m, "PSE "); 106 else 107 seq_printf(m, " "); 108 } else { 109 if (pr & _PAGE_PAT) 110 seq_printf(m, "pat "); 111 else 112 seq_printf(m, " "); 113 } 114 if (pr & _PAGE_GLOBAL) 115 seq_printf(m, "GLB "); 116 else 117 seq_printf(m, " "); 118 if (pr & _PAGE_NX) 119 seq_printf(m, "NX "); 120 else 121 seq_printf(m, "x "); 122 } 123 seq_printf(m, "%s\n", level_name[level]); 124 } 125 126 /* 127 * On 64 bits, sign-extend the 48 bit address to 64 bit 128 */ 129 static unsigned long normalize_addr(unsigned long u) 130 { 131 #ifdef CONFIG_X86_64 132 return (signed long)(u << 16) >> 16; 133 #else 134 return u; 135 #endif 136 } 137 138 /* 139 * This function gets called on a break in a continuous series 140 * of PTE entries; the next one is different so we need to 141 * print what we collected so far. 142 */ 143 static void note_page(struct seq_file *m, struct pg_state *st, 144 pgprot_t new_prot, int level) 145 { 146 pgprotval_t prot, cur; 147 static const char units[] = "BKMGTPE"; 148 149 /* 150 * If we have a "break" in the series, we need to flush the state that 151 * we have now. "break" is either changing perms, levels or 152 * address space marker. 153 */ 154 prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; 155 cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; 156 157 if (!st->level) { 158 /* First entry */ 159 st->current_prot = new_prot; 160 st->level = level; 161 st->marker = address_markers; 162 st->lines = 0; 163 seq_printf(m, "---[ %s ]---\n", st->marker->name); 164 } else if (prot != cur || level != st->level || 165 st->current_address >= st->marker[1].start_address) { 166 const char *unit = units; 167 unsigned long delta; 168 int width = sizeof(unsigned long) * 2; 169 170 /* 171 * Now print the actual finished series 172 */ 173 if (!st->marker->max_lines || 174 st->lines < st->marker->max_lines) { 175 seq_printf(m, "0x%0*lx-0x%0*lx ", 176 width, st->start_address, 177 width, st->current_address); 178 179 delta = (st->current_address - st->start_address); 180 while (!(delta & 1023) && unit[1]) { 181 delta >>= 10; 182 unit++; 183 } 184 seq_printf(m, "%9lu%c ", delta, *unit); 185 printk_prot(m, st->current_prot, st->level); 186 } 187 st->lines++; 188 189 /* 190 * We print markers for special areas of address space, 191 * such as the start of vmalloc space etc. 192 * This helps in the interpretation. 193 */ 194 if (st->current_address >= st->marker[1].start_address) { 195 if (st->marker->max_lines && 196 st->lines > st->marker->max_lines) { 197 unsigned long nskip = 198 st->lines - st->marker->max_lines; 199 seq_printf(m, "... %lu entr%s skipped ... \n", 200 nskip, nskip == 1 ? "y" : "ies"); 201 } 202 st->marker++; 203 st->lines = 0; 204 seq_printf(m, "---[ %s ]---\n", st->marker->name); 205 } 206 207 st->start_address = st->current_address; 208 st->current_prot = new_prot; 209 st->level = level; 210 } 211 } 212 213 static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, 214 unsigned long P) 215 { 216 int i; 217 pte_t *start; 218 219 start = (pte_t *) pmd_page_vaddr(addr); 220 for (i = 0; i < PTRS_PER_PTE; i++) { 221 pgprot_t prot = pte_pgprot(*start); 222 223 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); 224 note_page(m, st, prot, 4); 225 start++; 226 } 227 } 228 229 #if PTRS_PER_PMD > 1 230 231 static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, 232 unsigned long P) 233 { 234 int i; 235 pmd_t *start; 236 237 start = (pmd_t *) pud_page_vaddr(addr); 238 for (i = 0; i < PTRS_PER_PMD; i++) { 239 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); 240 if (!pmd_none(*start)) { 241 pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; 242 243 if (pmd_large(*start) || !pmd_present(*start)) 244 note_page(m, st, __pgprot(prot), 3); 245 else 246 walk_pte_level(m, st, *start, 247 P + i * PMD_LEVEL_MULT); 248 } else 249 note_page(m, st, __pgprot(0), 3); 250 start++; 251 } 252 } 253 254 #else 255 #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) 256 #define pud_large(a) pmd_large(__pmd(pud_val(a))) 257 #define pud_none(a) pmd_none(__pmd(pud_val(a))) 258 #endif 259 260 #if PTRS_PER_PUD > 1 261 262 static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, 263 unsigned long P) 264 { 265 int i; 266 pud_t *start; 267 268 start = (pud_t *) pgd_page_vaddr(addr); 269 270 for (i = 0; i < PTRS_PER_PUD; i++) { 271 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); 272 if (!pud_none(*start)) { 273 pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; 274 275 if (pud_large(*start) || !pud_present(*start)) 276 note_page(m, st, __pgprot(prot), 2); 277 else 278 walk_pmd_level(m, st, *start, 279 P + i * PUD_LEVEL_MULT); 280 } else 281 note_page(m, st, __pgprot(0), 2); 282 283 start++; 284 } 285 } 286 287 #else 288 #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) 289 #define pgd_large(a) pud_large(__pud(pgd_val(a))) 290 #define pgd_none(a) pud_none(__pud(pgd_val(a))) 291 #endif 292 293 static void walk_pgd_level(struct seq_file *m) 294 { 295 #ifdef CONFIG_X86_64 296 pgd_t *start = (pgd_t *) &init_level4_pgt; 297 #else 298 pgd_t *start = swapper_pg_dir; 299 #endif 300 int i; 301 struct pg_state st; 302 303 memset(&st, 0, sizeof(st)); 304 305 for (i = 0; i < PTRS_PER_PGD; i++) { 306 st.current_address = normalize_addr(i * PGD_LEVEL_MULT); 307 if (!pgd_none(*start)) { 308 pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; 309 310 if (pgd_large(*start) || !pgd_present(*start)) 311 note_page(m, &st, __pgprot(prot), 1); 312 else 313 walk_pud_level(m, &st, *start, 314 i * PGD_LEVEL_MULT); 315 } else 316 note_page(m, &st, __pgprot(0), 1); 317 318 start++; 319 } 320 321 /* Flush out the last page */ 322 st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); 323 note_page(m, &st, __pgprot(0), 0); 324 } 325 326 static int ptdump_show(struct seq_file *m, void *v) 327 { 328 walk_pgd_level(m); 329 return 0; 330 } 331 332 static int ptdump_open(struct inode *inode, struct file *filp) 333 { 334 return single_open(filp, ptdump_show, NULL); 335 } 336 337 static const struct file_operations ptdump_fops = { 338 .open = ptdump_open, 339 .read = seq_read, 340 .llseek = seq_lseek, 341 .release = single_release, 342 }; 343 344 static int pt_dump_init(void) 345 { 346 struct dentry *pe; 347 348 #ifdef CONFIG_X86_32 349 /* Not a compile-time constant on x86-32 */ 350 address_markers[2].start_address = VMALLOC_START; 351 address_markers[3].start_address = VMALLOC_END; 352 # ifdef CONFIG_HIGHMEM 353 address_markers[4].start_address = PKMAP_BASE; 354 address_markers[5].start_address = FIXADDR_START; 355 # else 356 address_markers[4].start_address = FIXADDR_START; 357 # endif 358 #endif 359 360 pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, 361 &ptdump_fops); 362 if (!pe) 363 return -ENOMEM; 364 365 return 0; 366 } 367 368 __initcall(pt_dump_init); 369 MODULE_LICENSE("GPL"); 370 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); 371 MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); 372
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.