~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/mm/init_64.c

Version: ~ [ linux-5.14-rc3 ] ~ [ linux-5.13.5 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.53 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.135 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.198 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.240 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.276 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.276 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *  linux/arch/x86_64/mm/init.c
  3  *
  4  *  Copyright (C) 1995  Linus Torvalds
  5  *  Copyright (C) 2000  Pavel Machek <pavel@ucw.cz>
  6  *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
  7  */
  8 
  9 #include <linux/signal.h>
 10 #include <linux/sched.h>
 11 #include <linux/kernel.h>
 12 #include <linux/errno.h>
 13 #include <linux/string.h>
 14 #include <linux/types.h>
 15 #include <linux/ptrace.h>
 16 #include <linux/mman.h>
 17 #include <linux/mm.h>
 18 #include <linux/swap.h>
 19 #include <linux/smp.h>
 20 #include <linux/init.h>
 21 #include <linux/initrd.h>
 22 #include <linux/pagemap.h>
 23 #include <linux/bootmem.h>
 24 #include <linux/memblock.h>
 25 #include <linux/proc_fs.h>
 26 #include <linux/pci.h>
 27 #include <linux/pfn.h>
 28 #include <linux/poison.h>
 29 #include <linux/dma-mapping.h>
 30 #include <linux/module.h>
 31 #include <linux/memory.h>
 32 #include <linux/memory_hotplug.h>
 33 #include <linux/nmi.h>
 34 #include <linux/gfp.h>
 35 #include <linux/kcore.h>
 36 
 37 #include <asm/processor.h>
 38 #include <asm/bios_ebda.h>
 39 #include <asm/uaccess.h>
 40 #include <asm/pgtable.h>
 41 #include <asm/pgalloc.h>
 42 #include <asm/dma.h>
 43 #include <asm/fixmap.h>
 44 #include <asm/e820.h>
 45 #include <asm/apic.h>
 46 #include <asm/tlb.h>
 47 #include <asm/mmu_context.h>
 48 #include <asm/proto.h>
 49 #include <asm/smp.h>
 50 #include <asm/sections.h>
 51 #include <asm/kdebug.h>
 52 #include <asm/numa.h>
 53 #include <asm/cacheflush.h>
 54 #include <asm/init.h>
 55 #include <asm/uv/uv.h>
 56 #include <asm/setup.h>
 57 
 58 #include "mm_internal.h"
 59 
 60 static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
 61                            unsigned long addr, unsigned long end)
 62 {
 63         addr &= PMD_MASK;
 64         for (; addr < end; addr += PMD_SIZE) {
 65                 pmd_t *pmd = pmd_page + pmd_index(addr);
 66 
 67                 if (!pmd_present(*pmd))
 68                         set_pmd(pmd, __pmd(addr | pmd_flag));
 69         }
 70 }
 71 static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 72                           unsigned long addr, unsigned long end)
 73 {
 74         unsigned long next;
 75 
 76         for (; addr < end; addr = next) {
 77                 pud_t *pud = pud_page + pud_index(addr);
 78                 pmd_t *pmd;
 79 
 80                 next = (addr & PUD_MASK) + PUD_SIZE;
 81                 if (next > end)
 82                         next = end;
 83 
 84                 if (pud_present(*pud)) {
 85                         pmd = pmd_offset(pud, 0);
 86                         ident_pmd_init(info->pmd_flag, pmd, addr, next);
 87                         continue;
 88                 }
 89                 pmd = (pmd_t *)info->alloc_pgt_page(info->context);
 90                 if (!pmd)
 91                         return -ENOMEM;
 92                 ident_pmd_init(info->pmd_flag, pmd, addr, next);
 93                 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
 94         }
 95 
 96         return 0;
 97 }
 98 
 99 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
100                               unsigned long addr, unsigned long end)
101 {
102         unsigned long next;
103         int result;
104         int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
105 
106         for (; addr < end; addr = next) {
107                 pgd_t *pgd = pgd_page + pgd_index(addr) + off;
108                 pud_t *pud;
109 
110                 next = (addr & PGDIR_MASK) + PGDIR_SIZE;
111                 if (next > end)
112                         next = end;
113 
114                 if (pgd_present(*pgd)) {
115                         pud = pud_offset(pgd, 0);
116                         result = ident_pud_init(info, pud, addr, next);
117                         if (result)
118                                 return result;
119                         continue;
120                 }
121 
122                 pud = (pud_t *)info->alloc_pgt_page(info->context);
123                 if (!pud)
124                         return -ENOMEM;
125                 result = ident_pud_init(info, pud, addr, next);
126                 if (result)
127                         return result;
128                 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
129         }
130 
131         return 0;
132 }
133 
134 static int __init parse_direct_gbpages_off(char *arg)
135 {
136         direct_gbpages = 0;
137         return 0;
138 }
139 early_param("nogbpages", parse_direct_gbpages_off);
140 
141 static int __init parse_direct_gbpages_on(char *arg)
142 {
143         direct_gbpages = 1;
144         return 0;
145 }
146 early_param("gbpages", parse_direct_gbpages_on);
147 
148 /*
149  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
150  * physical space so we can cache the place of the first one and move
151  * around without checking the pgd every time.
152  */
153 
154 pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
155 EXPORT_SYMBOL_GPL(__supported_pte_mask);
156 
157 int force_personality32;
158 
159 /*
160  * noexec32=on|off
161  * Control non executable heap for 32bit processes.
162  * To control the stack too use noexec=off
163  *
164  * on   PROT_READ does not imply PROT_EXEC for 32-bit processes (default)
165  * off  PROT_READ implies PROT_EXEC
166  */
167 static int __init nonx32_setup(char *str)
168 {
169         if (!strcmp(str, "on"))
170                 force_personality32 &= ~READ_IMPLIES_EXEC;
171         else if (!strcmp(str, "off"))
172                 force_personality32 |= READ_IMPLIES_EXEC;
173         return 1;
174 }
175 __setup("noexec32=", nonx32_setup);
176 
177 /*
178  * When memory was added/removed make sure all the processes MM have
179  * suitable PGD entries in the local PGD level page.
180  */
181 void sync_global_pgds(unsigned long start, unsigned long end, int removed)
182 {
183         unsigned long address;
184 
185         for (address = start; address <= end; address += PGDIR_SIZE) {
186                 const pgd_t *pgd_ref = pgd_offset_k(address);
187                 struct page *page;
188 
189                 /*
190                  * When it is called after memory hot remove, pgd_none()
191                  * returns true. In this case (removed == 1), we must clear
192                  * the PGD entries in the local PGD level page.
193                  */
194                 if (pgd_none(*pgd_ref) && !removed)
195                         continue;
196 
197                 spin_lock(&pgd_lock);
198                 list_for_each_entry(page, &pgd_list, lru) {
199                         pgd_t *pgd;
200                         spinlock_t *pgt_lock;
201 
202                         pgd = (pgd_t *)page_address(page) + pgd_index(address);
203                         /* the pgt_lock only for Xen */
204                         pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
205                         spin_lock(pgt_lock);
206 
207                         if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
208                                 BUG_ON(pgd_page_vaddr(*pgd)
209                                        != pgd_page_vaddr(*pgd_ref));
210 
211                         if (removed) {
212                                 if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
213                                         pgd_clear(pgd);
214                         } else {
215                                 if (pgd_none(*pgd))
216                                         set_pgd(pgd, *pgd_ref);
217                         }
218 
219                         spin_unlock(pgt_lock);
220                 }
221                 spin_unlock(&pgd_lock);
222         }
223 }
224 
225 /*
226  * NOTE: This function is marked __ref because it calls __init function
227  * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
228  */
229 static __ref void *spp_getpage(void)
230 {
231         void *ptr;
232 
233         if (after_bootmem)
234                 ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
235         else
236                 ptr = alloc_bootmem_pages(PAGE_SIZE);
237 
238         if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) {
239                 panic("set_pte_phys: cannot allocate page data %s\n",
240                         after_bootmem ? "after bootmem" : "");
241         }
242 
243         pr_debug("spp_getpage %p\n", ptr);
244 
245         return ptr;
246 }
247 
248 static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
249 {
250         if (pgd_none(*pgd)) {
251                 pud_t *pud = (pud_t *)spp_getpage();
252                 pgd_populate(&init_mm, pgd, pud);
253                 if (pud != pud_offset(pgd, 0))
254                         printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
255                                pud, pud_offset(pgd, 0));
256         }
257         return pud_offset(pgd, vaddr);
258 }
259 
260 static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
261 {
262         if (pud_none(*pud)) {
263                 pmd_t *pmd = (pmd_t *) spp_getpage();
264                 pud_populate(&init_mm, pud, pmd);
265                 if (pmd != pmd_offset(pud, 0))
266                         printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
267                                pmd, pmd_offset(pud, 0));
268         }
269         return pmd_offset(pud, vaddr);
270 }
271 
272 static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
273 {
274         if (pmd_none(*pmd)) {
275                 pte_t *pte = (pte_t *) spp_getpage();
276                 pmd_populate_kernel(&init_mm, pmd, pte);
277                 if (pte != pte_offset_kernel(pmd, 0))
278                         printk(KERN_ERR "PAGETABLE BUG #02!\n");
279         }
280         return pte_offset_kernel(pmd, vaddr);
281 }
282 
283 void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
284 {
285         pud_t *pud;
286         pmd_t *pmd;
287         pte_t *pte;
288 
289         pud = pud_page + pud_index(vaddr);
290         pmd = fill_pmd(pud, vaddr);
291         pte = fill_pte(pmd, vaddr);
292 
293         set_pte(pte, new_pte);
294 
295         /*
296          * It's enough to flush this one mapping.
297          * (PGE mappings get flushed as well)
298          */
299         __flush_tlb_one(vaddr);
300 }
301 
302 void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
303 {
304         pgd_t *pgd;
305         pud_t *pud_page;
306 
307         pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
308 
309         pgd = pgd_offset_k(vaddr);
310         if (pgd_none(*pgd)) {
311                 printk(KERN_ERR
312                         "PGD FIXMAP MISSING, it should be setup in head.S!\n");
313                 return;
314         }
315         pud_page = (pud_t*)pgd_page_vaddr(*pgd);
316         set_pte_vaddr_pud(pud_page, vaddr, pteval);
317 }
318 
319 pmd_t * __init populate_extra_pmd(unsigned long vaddr)
320 {
321         pgd_t *pgd;
322         pud_t *pud;
323 
324         pgd = pgd_offset_k(vaddr);
325         pud = fill_pud(pgd, vaddr);
326         return fill_pmd(pud, vaddr);
327 }
328 
329 pte_t * __init populate_extra_pte(unsigned long vaddr)
330 {
331         pmd_t *pmd;
332 
333         pmd = populate_extra_pmd(vaddr);
334         return fill_pte(pmd, vaddr);
335 }
336 
337 /*
338  * Create large page table mappings for a range of physical addresses.
339  */
340 static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
341                                                 pgprot_t prot)
342 {
343         pgd_t *pgd;
344         pud_t *pud;
345         pmd_t *pmd;
346 
347         BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
348         for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
349                 pgd = pgd_offset_k((unsigned long)__va(phys));
350                 if (pgd_none(*pgd)) {
351                         pud = (pud_t *) spp_getpage();
352                         set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
353                                                 _PAGE_USER));
354                 }
355                 pud = pud_offset(pgd, (unsigned long)__va(phys));
356                 if (pud_none(*pud)) {
357                         pmd = (pmd_t *) spp_getpage();
358                         set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
359                                                 _PAGE_USER));
360                 }
361                 pmd = pmd_offset(pud, phys);
362                 BUG_ON(!pmd_none(*pmd));
363                 set_pmd(pmd, __pmd(phys | pgprot_val(prot)));
364         }
365 }
366 
367 void __init init_extra_mapping_wb(unsigned long phys, unsigned long size)
368 {
369         __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE);
370 }
371 
372 void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
373 {
374         __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE);
375 }
376 
377 /*
378  * The head.S code sets up the kernel high mapping:
379  *
380  *   from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
381  *
382  * phys_base holds the negative offset to the kernel, which is added
383  * to the compile time generated pmds. This results in invalid pmds up
384  * to the point where we hit the physaddr 0 mapping.
385  *
386  * We limit the mappings to the region from _text to _brk_end.  _brk_end
387  * is rounded up to the 2MB boundary. This catches the invalid pmds as
388  * well, as they are located before _text:
389  */
390 void __init cleanup_highmap(void)
391 {
392         unsigned long vaddr = __START_KERNEL_map;
393         unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE;
394         unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
395         pmd_t *pmd = level2_kernel_pgt;
396 
397         /*
398          * Native path, max_pfn_mapped is not set yet.
399          * Xen has valid max_pfn_mapped set in
400          *      arch/x86/xen/mmu.c:xen_setup_kernel_pagetable().
401          */
402         if (max_pfn_mapped)
403                 vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
404 
405         for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
406                 if (pmd_none(*pmd))
407                         continue;
408                 if (vaddr < (unsigned long) _text || vaddr > end)
409                         set_pmd(pmd, __pmd(0));
410         }
411 }
412 
413 static unsigned long __meminit
414 phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
415               pgprot_t prot)
416 {
417         unsigned long pages = 0, next;
418         unsigned long last_map_addr = end;
419         int i;
420 
421         pte_t *pte = pte_page + pte_index(addr);
422 
423         for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
424                 next = (addr & PAGE_MASK) + PAGE_SIZE;
425                 if (addr >= end) {
426                         if (!after_bootmem &&
427                             !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
428                             !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
429                                 set_pte(pte, __pte(0));
430                         continue;
431                 }
432 
433                 /*
434                  * We will re-use the existing mapping.
435                  * Xen for example has some special requirements, like mapping
436                  * pagetable pages as RO. So assume someone who pre-setup
437                  * these mappings are more intelligent.
438                  */
439                 if (pte_val(*pte)) {
440                         if (!after_bootmem)
441                                 pages++;
442                         continue;
443                 }
444 
445                 if (0)
446                         printk("   pte=%p addr=%lx pte=%016lx\n",
447                                pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
448                 pages++;
449                 set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
450                 last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
451         }
452 
453         update_page_count(PG_LEVEL_4K, pages);
454 
455         return last_map_addr;
456 }
457 
458 static unsigned long __meminit
459 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
460               unsigned long page_size_mask, pgprot_t prot)
461 {
462         unsigned long pages = 0, next;
463         unsigned long last_map_addr = end;
464 
465         int i = pmd_index(address);
466 
467         for (; i < PTRS_PER_PMD; i++, address = next) {
468                 pmd_t *pmd = pmd_page + pmd_index(address);
469                 pte_t *pte;
470                 pgprot_t new_prot = prot;
471 
472                 next = (address & PMD_MASK) + PMD_SIZE;
473                 if (address >= end) {
474                         if (!after_bootmem &&
475                             !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
476                             !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
477                                 set_pmd(pmd, __pmd(0));
478                         continue;
479                 }
480 
481                 if (pmd_val(*pmd)) {
482                         if (!pmd_large(*pmd)) {
483                                 spin_lock(&init_mm.page_table_lock);
484                                 pte = (pte_t *)pmd_page_vaddr(*pmd);
485                                 last_map_addr = phys_pte_init(pte, address,
486                                                                 end, prot);
487                                 spin_unlock(&init_mm.page_table_lock);
488                                 continue;
489                         }
490                         /*
491                          * If we are ok with PG_LEVEL_2M mapping, then we will
492                          * use the existing mapping,
493                          *
494                          * Otherwise, we will split the large page mapping but
495                          * use the same existing protection bits except for
496                          * large page, so that we don't violate Intel's TLB
497                          * Application note (317080) which says, while changing
498                          * the page sizes, new and old translations should
499                          * not differ with respect to page frame and
500                          * attributes.
501                          */
502                         if (page_size_mask & (1 << PG_LEVEL_2M)) {
503                                 if (!after_bootmem)
504                                         pages++;
505                                 last_map_addr = next;
506                                 continue;
507                         }
508                         new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
509                 }
510 
511                 if (page_size_mask & (1<<PG_LEVEL_2M)) {
512                         pages++;
513                         spin_lock(&init_mm.page_table_lock);
514                         set_pte((pte_t *)pmd,
515                                 pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
516                                         __pgprot(pgprot_val(prot) | _PAGE_PSE)));
517                         spin_unlock(&init_mm.page_table_lock);
518                         last_map_addr = next;
519                         continue;
520                 }
521 
522                 pte = alloc_low_page();
523                 last_map_addr = phys_pte_init(pte, address, end, new_prot);
524 
525                 spin_lock(&init_mm.page_table_lock);
526                 pmd_populate_kernel(&init_mm, pmd, pte);
527                 spin_unlock(&init_mm.page_table_lock);
528         }
529         update_page_count(PG_LEVEL_2M, pages);
530         return last_map_addr;
531 }
532 
533 static unsigned long __meminit
534 phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
535                          unsigned long page_size_mask)
536 {
537         unsigned long pages = 0, next;
538         unsigned long last_map_addr = end;
539         int i = pud_index(addr);
540 
541         for (; i < PTRS_PER_PUD; i++, addr = next) {
542                 pud_t *pud = pud_page + pud_index(addr);
543                 pmd_t *pmd;
544                 pgprot_t prot = PAGE_KERNEL;
545 
546                 next = (addr & PUD_MASK) + PUD_SIZE;
547                 if (addr >= end) {
548                         if (!after_bootmem &&
549                             !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
550                             !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
551                                 set_pud(pud, __pud(0));
552                         continue;
553                 }
554 
555                 if (pud_val(*pud)) {
556                         if (!pud_large(*pud)) {
557                                 pmd = pmd_offset(pud, 0);
558                                 last_map_addr = phys_pmd_init(pmd, addr, end,
559                                                          page_size_mask, prot);
560                                 __flush_tlb_all();
561                                 continue;
562                         }
563                         /*
564                          * If we are ok with PG_LEVEL_1G mapping, then we will
565                          * use the existing mapping.
566                          *
567                          * Otherwise, we will split the gbpage mapping but use
568                          * the same existing protection  bits except for large
569                          * page, so that we don't violate Intel's TLB
570                          * Application note (317080) which says, while changing
571                          * the page sizes, new and old translations should
572                          * not differ with respect to page frame and
573                          * attributes.
574                          */
575                         if (page_size_mask & (1 << PG_LEVEL_1G)) {
576                                 if (!after_bootmem)
577                                         pages++;
578                                 last_map_addr = next;
579                                 continue;
580                         }
581                         prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
582                 }
583 
584                 if (page_size_mask & (1<<PG_LEVEL_1G)) {
585                         pages++;
586                         spin_lock(&init_mm.page_table_lock);
587                         set_pte((pte_t *)pud,
588                                 pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
589                                         PAGE_KERNEL_LARGE));
590                         spin_unlock(&init_mm.page_table_lock);
591                         last_map_addr = next;
592                         continue;
593                 }
594 
595                 pmd = alloc_low_page();
596                 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
597                                               prot);
598 
599                 spin_lock(&init_mm.page_table_lock);
600                 pud_populate(&init_mm, pud, pmd);
601                 spin_unlock(&init_mm.page_table_lock);
602         }
603         __flush_tlb_all();
604 
605         update_page_count(PG_LEVEL_1G, pages);
606 
607         return last_map_addr;
608 }
609 
610 unsigned long __meminit
611 kernel_physical_mapping_init(unsigned long start,
612                              unsigned long end,
613                              unsigned long page_size_mask)
614 {
615         bool pgd_changed = false;
616         unsigned long next, last_map_addr = end;
617         unsigned long addr;
618 
619         start = (unsigned long)__va(start);
620         end = (unsigned long)__va(end);
621         addr = start;
622 
623         for (; start < end; start = next) {
624                 pgd_t *pgd = pgd_offset_k(start);
625                 pud_t *pud;
626 
627                 next = (start & PGDIR_MASK) + PGDIR_SIZE;
628 
629                 if (pgd_val(*pgd)) {
630                         pud = (pud_t *)pgd_page_vaddr(*pgd);
631                         last_map_addr = phys_pud_init(pud, __pa(start),
632                                                  __pa(end), page_size_mask);
633                         continue;
634                 }
635 
636                 pud = alloc_low_page();
637                 last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
638                                                  page_size_mask);
639 
640                 spin_lock(&init_mm.page_table_lock);
641                 pgd_populate(&init_mm, pgd, pud);
642                 spin_unlock(&init_mm.page_table_lock);
643                 pgd_changed = true;
644         }
645 
646         if (pgd_changed)
647                 sync_global_pgds(addr, end - 1, 0);
648 
649         __flush_tlb_all();
650 
651         return last_map_addr;
652 }
653 
654 #ifndef CONFIG_NUMA
655 void __init initmem_init(void)
656 {
657         memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
658 }
659 #endif
660 
661 void __init paging_init(void)
662 {
663         sparse_memory_present_with_active_regions(MAX_NUMNODES);
664         sparse_init();
665 
666         /*
667          * clear the default setting with node 0
668          * note: don't use nodes_clear here, that is really clearing when
669          *       numa support is not compiled in, and later node_set_state
670          *       will not set it back.
671          */
672         node_clear_state(0, N_MEMORY);
673         if (N_MEMORY != N_NORMAL_MEMORY)
674                 node_clear_state(0, N_NORMAL_MEMORY);
675 
676         zone_sizes_init();
677 }
678 
679 /*
680  * Memory hotplug specific functions
681  */
682 #ifdef CONFIG_MEMORY_HOTPLUG
683 /*
684  * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
685  * updating.
686  */
687 static void  update_end_of_memory_vars(u64 start, u64 size)
688 {
689         unsigned long end_pfn = PFN_UP(start + size);
690 
691         if (end_pfn > max_pfn) {
692                 max_pfn = end_pfn;
693                 max_low_pfn = end_pfn;
694                 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
695         }
696 }
697 
698 /*
699  * Memory is added always to NORMAL zone. This means you will never get
700  * additional DMA/DMA32 memory.
701  */
702 int arch_add_memory(int nid, u64 start, u64 size)
703 {
704         struct pglist_data *pgdat = NODE_DATA(nid);
705         struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
706         unsigned long start_pfn = start >> PAGE_SHIFT;
707         unsigned long nr_pages = size >> PAGE_SHIFT;
708         int ret;
709 
710         init_memory_mapping(start, start + size);
711 
712         ret = __add_pages(nid, zone, start_pfn, nr_pages);
713         WARN_ON_ONCE(ret);
714 
715         /* update max_pfn, max_low_pfn and high_memory */
716         update_end_of_memory_vars(start, size);
717 
718         return ret;
719 }
720 EXPORT_SYMBOL_GPL(arch_add_memory);
721 
722 #define PAGE_INUSE 0xFD
723 
724 static void __meminit free_pagetable(struct page *page, int order)
725 {
726         unsigned long magic;
727         unsigned int nr_pages = 1 << order;
728 
729         /* bootmem page has reserved flag */
730         if (PageReserved(page)) {
731                 __ClearPageReserved(page);
732 
733                 magic = (unsigned long)page->lru.next;
734                 if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
735                         while (nr_pages--)
736                                 put_page_bootmem(page++);
737                 } else
738                         while (nr_pages--)
739                                 free_reserved_page(page++);
740         } else
741                 free_pages((unsigned long)page_address(page), order);
742 }
743 
744 static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
745 {
746         pte_t *pte;
747         int i;
748 
749         for (i = 0; i < PTRS_PER_PTE; i++) {
750                 pte = pte_start + i;
751                 if (pte_val(*pte))
752                         return;
753         }
754 
755         /* free a pte talbe */
756         free_pagetable(pmd_page(*pmd), 0);
757         spin_lock(&init_mm.page_table_lock);
758         pmd_clear(pmd);
759         spin_unlock(&init_mm.page_table_lock);
760 }
761 
762 static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
763 {
764         pmd_t *pmd;
765         int i;
766 
767         for (i = 0; i < PTRS_PER_PMD; i++) {
768                 pmd = pmd_start + i;
769                 if (pmd_val(*pmd))
770                         return;
771         }
772 
773         /* free a pmd talbe */
774         free_pagetable(pud_page(*pud), 0);
775         spin_lock(&init_mm.page_table_lock);
776         pud_clear(pud);
777         spin_unlock(&init_mm.page_table_lock);
778 }
779 
780 /* Return true if pgd is changed, otherwise return false. */
781 static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
782 {
783         pud_t *pud;
784         int i;
785 
786         for (i = 0; i < PTRS_PER_PUD; i++) {
787                 pud = pud_start + i;
788                 if (pud_val(*pud))
789                         return false;
790         }
791 
792         /* free a pud table */
793         free_pagetable(pgd_page(*pgd), 0);
794         spin_lock(&init_mm.page_table_lock);
795         pgd_clear(pgd);
796         spin_unlock(&init_mm.page_table_lock);
797 
798         return true;
799 }
800 
801 static void __meminit
802 remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
803                  bool direct)
804 {
805         unsigned long next, pages = 0;
806         pte_t *pte;
807         void *page_addr;
808         phys_addr_t phys_addr;
809 
810         pte = pte_start + pte_index(addr);
811         for (; addr < end; addr = next, pte++) {
812                 next = (addr + PAGE_SIZE) & PAGE_MASK;
813                 if (next > end)
814                         next = end;
815 
816                 if (!pte_present(*pte))
817                         continue;
818 
819                 /*
820                  * We mapped [0,1G) memory as identity mapping when
821                  * initializing, in arch/x86/kernel/head_64.S. These
822                  * pagetables cannot be removed.
823                  */
824                 phys_addr = pte_val(*pte) + (addr & PAGE_MASK);
825                 if (phys_addr < (phys_addr_t)0x40000000)
826                         return;
827 
828                 if (IS_ALIGNED(addr, PAGE_SIZE) &&
829                     IS_ALIGNED(next, PAGE_SIZE)) {
830                         /*
831                          * Do not free direct mapping pages since they were
832                          * freed when offlining, or simplely not in use.
833                          */
834                         if (!direct)
835                                 free_pagetable(pte_page(*pte), 0);
836 
837                         spin_lock(&init_mm.page_table_lock);
838                         pte_clear(&init_mm, addr, pte);
839                         spin_unlock(&init_mm.page_table_lock);
840 
841                         /* For non-direct mapping, pages means nothing. */
842                         pages++;
843                 } else {
844                         /*
845                          * If we are here, we are freeing vmemmap pages since
846                          * direct mapped memory ranges to be freed are aligned.
847                          *
848                          * If we are not removing the whole page, it means
849                          * other page structs in this page are being used and
850                          * we canot remove them. So fill the unused page_structs
851                          * with 0xFD, and remove the page when it is wholly
852                          * filled with 0xFD.
853                          */
854                         memset((void *)addr, PAGE_INUSE, next - addr);
855 
856                         page_addr = page_address(pte_page(*pte));
857                         if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
858                                 free_pagetable(pte_page(*pte), 0);
859 
860                                 spin_lock(&init_mm.page_table_lock);
861                                 pte_clear(&init_mm, addr, pte);
862                                 spin_unlock(&init_mm.page_table_lock);
863                         }
864                 }
865         }
866 
867         /* Call free_pte_table() in remove_pmd_table(). */
868         flush_tlb_all();
869         if (direct)
870                 update_page_count(PG_LEVEL_4K, -pages);
871 }
872 
873 static void __meminit
874 remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
875                  bool direct)
876 {
877         unsigned long next, pages = 0;
878         pte_t *pte_base;
879         pmd_t *pmd;
880         void *page_addr;
881 
882         pmd = pmd_start + pmd_index(addr);
883         for (; addr < end; addr = next, pmd++) {
884                 next = pmd_addr_end(addr, end);
885 
886                 if (!pmd_present(*pmd))
887                         continue;
888 
889                 if (pmd_large(*pmd)) {
890                         if (IS_ALIGNED(addr, PMD_SIZE) &&
891                             IS_ALIGNED(next, PMD_SIZE)) {
892                                 if (!direct)
893                                         free_pagetable(pmd_page(*pmd),
894                                                        get_order(PMD_SIZE));
895 
896                                 spin_lock(&init_mm.page_table_lock);
897                                 pmd_clear(pmd);
898                                 spin_unlock(&init_mm.page_table_lock);
899                                 pages++;
900                         } else {
901                                 /* If here, we are freeing vmemmap pages. */
902                                 memset((void *)addr, PAGE_INUSE, next - addr);
903 
904                                 page_addr = page_address(pmd_page(*pmd));
905                                 if (!memchr_inv(page_addr, PAGE_INUSE,
906                                                 PMD_SIZE)) {
907                                         free_pagetable(pmd_page(*pmd),
908                                                        get_order(PMD_SIZE));
909 
910                                         spin_lock(&init_mm.page_table_lock);
911                                         pmd_clear(pmd);
912                                         spin_unlock(&init_mm.page_table_lock);
913                                 }
914                         }
915 
916                         continue;
917                 }
918 
919                 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
920                 remove_pte_table(pte_base, addr, next, direct);
921                 free_pte_table(pte_base, pmd);
922         }
923 
924         /* Call free_pmd_table() in remove_pud_table(). */
925         if (direct)
926                 update_page_count(PG_LEVEL_2M, -pages);
927 }
928 
929 static void __meminit
930 remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
931                  bool direct)
932 {
933         unsigned long next, pages = 0;
934         pmd_t *pmd_base;
935         pud_t *pud;
936         void *page_addr;
937 
938         pud = pud_start + pud_index(addr);
939         for (; addr < end; addr = next, pud++) {
940                 next = pud_addr_end(addr, end);
941 
942                 if (!pud_present(*pud))
943                         continue;
944 
945                 if (pud_large(*pud)) {
946                         if (IS_ALIGNED(addr, PUD_SIZE) &&
947                             IS_ALIGNED(next, PUD_SIZE)) {
948                                 if (!direct)
949                                         free_pagetable(pud_page(*pud),
950                                                        get_order(PUD_SIZE));
951 
952                                 spin_lock(&init_mm.page_table_lock);
953                                 pud_clear(pud);
954                                 spin_unlock(&init_mm.page_table_lock);
955                                 pages++;
956                         } else {
957                                 /* If here, we are freeing vmemmap pages. */
958                                 memset((void *)addr, PAGE_INUSE, next - addr);
959 
960                                 page_addr = page_address(pud_page(*pud));
961                                 if (!memchr_inv(page_addr, PAGE_INUSE,
962                                                 PUD_SIZE)) {
963                                         free_pagetable(pud_page(*pud),
964                                                        get_order(PUD_SIZE));
965 
966                                         spin_lock(&init_mm.page_table_lock);
967                                         pud_clear(pud);
968                                         spin_unlock(&init_mm.page_table_lock);
969                                 }
970                         }
971 
972                         continue;
973                 }
974 
975                 pmd_base = (pmd_t *)pud_page_vaddr(*pud);
976                 remove_pmd_table(pmd_base, addr, next, direct);
977                 free_pmd_table(pmd_base, pud);
978         }
979 
980         if (direct)
981                 update_page_count(PG_LEVEL_1G, -pages);
982 }
983 
984 /* start and end are both virtual address. */
985 static void __meminit
986 remove_pagetable(unsigned long start, unsigned long end, bool direct)
987 {
988         unsigned long next;
989         unsigned long addr;
990         pgd_t *pgd;
991         pud_t *pud;
992         bool pgd_changed = false;
993 
994         for (addr = start; addr < end; addr = next) {
995                 next = pgd_addr_end(addr, end);
996 
997                 pgd = pgd_offset_k(addr);
998                 if (!pgd_present(*pgd))
999                         continue;
1000 
1001                 pud = (pud_t *)pgd_page_vaddr(*pgd);
1002                 remove_pud_table(pud, addr, next, direct);
1003                 if (free_pud_table(pud, pgd))
1004                         pgd_changed = true;
1005         }
1006 
1007         if (pgd_changed)
1008                 sync_global_pgds(start, end - 1, 1);
1009 
1010         flush_tlb_all();
1011 }
1012 
1013 void __ref vmemmap_free(unsigned long start, unsigned long end)
1014 {
1015         remove_pagetable(start, end, false);
1016 }
1017 
1018 #ifdef CONFIG_MEMORY_HOTREMOVE
1019 static void __meminit
1020 kernel_physical_mapping_remove(unsigned long start, unsigned long end)
1021 {
1022         start = (unsigned long)__va(start);
1023         end = (unsigned long)__va(end);
1024 
1025         remove_pagetable(start, end, true);
1026 }
1027 
1028 int __ref arch_remove_memory(u64 start, u64 size)
1029 {
1030         unsigned long start_pfn = start >> PAGE_SHIFT;
1031         unsigned long nr_pages = size >> PAGE_SHIFT;
1032         struct zone *zone;
1033         int ret;
1034 
1035         zone = page_zone(pfn_to_page(start_pfn));
1036         kernel_physical_mapping_remove(start, start + size);
1037         ret = __remove_pages(zone, start_pfn, nr_pages);
1038         WARN_ON_ONCE(ret);
1039 
1040         return ret;
1041 }
1042 #endif
1043 #endif /* CONFIG_MEMORY_HOTPLUG */
1044 
1045 static struct kcore_list kcore_vsyscall;
1046 
1047 static void __init register_page_bootmem_info(void)
1048 {
1049 #ifdef CONFIG_NUMA
1050         int i;
1051 
1052         for_each_online_node(i)
1053                 register_page_bootmem_info_node(NODE_DATA(i));
1054 #endif
1055 }
1056 
1057 void __init mem_init(void)
1058 {
1059         pci_iommu_alloc();
1060 
1061         /* clear_bss() already clear the empty_zero_page */
1062 
1063         register_page_bootmem_info();
1064 
1065         /* this will put all memory onto the freelists */
1066         free_all_bootmem();
1067         after_bootmem = 1;
1068 
1069         /* Register memory areas for /proc/kcore */
1070         kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
1071                          VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
1072 
1073         mem_init_print_info(NULL);
1074 }
1075 
1076 #ifdef CONFIG_DEBUG_RODATA
1077 const int rodata_test_data = 0xC3;
1078 EXPORT_SYMBOL_GPL(rodata_test_data);
1079 
1080 int kernel_set_to_readonly;
1081 
1082 void set_kernel_text_rw(void)
1083 {
1084         unsigned long start = PFN_ALIGN(_text);
1085         unsigned long end = PFN_ALIGN(__stop___ex_table);
1086 
1087         if (!kernel_set_to_readonly)
1088                 return;
1089 
1090         pr_debug("Set kernel text: %lx - %lx for read write\n",
1091                  start, end);
1092 
1093         /*
1094          * Make the kernel identity mapping for text RW. Kernel text
1095          * mapping will always be RO. Refer to the comment in
1096          * static_protections() in pageattr.c
1097          */
1098         set_memory_rw(start, (end - start) >> PAGE_SHIFT);
1099 }
1100 
1101 void set_kernel_text_ro(void)
1102 {
1103         unsigned long start = PFN_ALIGN(_text);
1104         unsigned long end = PFN_ALIGN(__stop___ex_table);
1105 
1106         if (!kernel_set_to_readonly)
1107                 return;
1108 
1109         pr_debug("Set kernel text: %lx - %lx for read only\n",
1110                  start, end);
1111 
1112         /*
1113          * Set the kernel identity mapping for text RO.
1114          */
1115         set_memory_ro(start, (end - start) >> PAGE_SHIFT);
1116 }
1117 
1118 void mark_rodata_ro(void)
1119 {
1120         unsigned long start = PFN_ALIGN(_text);
1121         unsigned long rodata_start = PFN_ALIGN(__start_rodata);
1122         unsigned long end = (unsigned long) &__end_rodata_hpage_align;
1123         unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
1124         unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
1125         unsigned long all_end;
1126 
1127         printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
1128                (end - start) >> 10);
1129         set_memory_ro(start, (end - start) >> PAGE_SHIFT);
1130 
1131         kernel_set_to_readonly = 1;
1132 
1133         /*
1134          * The rodata/data/bss/brk section (but not the kernel text!)
1135          * should also be not-executable.
1136          *
1137          * We align all_end to PMD_SIZE because the existing mapping
1138          * is a full PMD. If we would align _brk_end to PAGE_SIZE we
1139          * split the PMD and the reminder between _brk_end and the end
1140          * of the PMD will remain mapped executable.
1141          *
1142          * Any PMD which was setup after the one which covers _brk_end
1143          * has been zapped already via cleanup_highmem().
1144          */
1145         all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
1146         set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
1147 
1148         rodata_test();
1149 
1150 #ifdef CONFIG_CPA_DEBUG
1151         printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
1152         set_memory_rw(start, (end-start) >> PAGE_SHIFT);
1153 
1154         printk(KERN_INFO "Testing CPA: again\n");
1155         set_memory_ro(start, (end-start) >> PAGE_SHIFT);
1156 #endif
1157 
1158         free_init_pages("unused kernel",
1159                         (unsigned long) __va(__pa_symbol(text_end)),
1160                         (unsigned long) __va(__pa_symbol(rodata_start)));
1161         free_init_pages("unused kernel",
1162                         (unsigned long) __va(__pa_symbol(rodata_end)),
1163                         (unsigned long) __va(__pa_symbol(_sdata)));
1164 }
1165 
1166 #endif
1167 
1168 int kern_addr_valid(unsigned long addr)
1169 {
1170         unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
1171         pgd_t *pgd;
1172         pud_t *pud;
1173         pmd_t *pmd;
1174         pte_t *pte;
1175 
1176         if (above != 0 && above != -1UL)
1177                 return 0;
1178 
1179         pgd = pgd_offset_k(addr);
1180         if (pgd_none(*pgd))
1181                 return 0;
1182 
1183         pud = pud_offset(pgd, addr);
1184         if (pud_none(*pud))
1185                 return 0;
1186 
1187         if (pud_large(*pud))
1188                 return pfn_valid(pud_pfn(*pud));
1189 
1190         pmd = pmd_offset(pud, addr);
1191         if (pmd_none(*pmd))
1192                 return 0;
1193 
1194         if (pmd_large(*pmd))
1195                 return pfn_valid(pmd_pfn(*pmd));
1196 
1197         pte = pte_offset_kernel(pmd, addr);
1198         if (pte_none(*pte))
1199                 return 0;
1200 
1201         return pfn_valid(pte_pfn(*pte));
1202 }
1203 
1204 /*
1205  * A pseudo VMA to allow ptrace access for the vsyscall page.  This only
1206  * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
1207  * not need special handling anymore:
1208  */
1209 static struct vm_area_struct gate_vma = {
1210         .vm_start       = VSYSCALL_START,
1211         .vm_end         = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE),
1212         .vm_page_prot   = PAGE_READONLY_EXEC,
1213         .vm_flags       = VM_READ | VM_EXEC
1214 };
1215 
1216 struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
1217 {
1218 #ifdef CONFIG_IA32_EMULATION
1219         if (!mm || mm->context.ia32_compat)
1220                 return NULL;
1221 #endif
1222         return &gate_vma;
1223 }
1224 
1225 int in_gate_area(struct mm_struct *mm, unsigned long addr)
1226 {
1227         struct vm_area_struct *vma = get_gate_vma(mm);
1228 
1229         if (!vma)
1230                 return 0;
1231 
1232         return (addr >= vma->vm_start) && (addr < vma->vm_end);
1233 }
1234 
1235 /*
1236  * Use this when you have no reliable mm, typically from interrupt
1237  * context. It is less reliable than using a task's mm and may give
1238  * false positives.
1239  */
1240 int in_gate_area_no_mm(unsigned long addr)
1241 {
1242         return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
1243 }
1244 
1245 const char *arch_vma_name(struct vm_area_struct *vma)
1246 {
1247         if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
1248                 return "[vdso]";
1249         if (vma == &gate_vma)
1250                 return "[vsyscall]";
1251         return NULL;
1252 }
1253 
1254 #ifdef CONFIG_X86_UV
1255 unsigned long memory_block_size_bytes(void)
1256 {
1257         if (is_uv_system()) {
1258                 printk(KERN_INFO "UV: memory block size 2GB\n");
1259                 return 2UL * 1024 * 1024 * 1024;
1260         }
1261         return MIN_MEMORY_BLOCK_SIZE;
1262 }
1263 #endif
1264 
1265 #ifdef CONFIG_SPARSEMEM_VMEMMAP
1266 /*
1267  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
1268  */
1269 static long __meminitdata addr_start, addr_end;
1270 static void __meminitdata *p_start, *p_end;
1271 static int __meminitdata node_start;
1272 
1273 static int __meminit vmemmap_populate_hugepages(unsigned long start,
1274                                                 unsigned long end, int node)
1275 {
1276         unsigned long addr;
1277         unsigned long next;
1278         pgd_t *pgd;
1279         pud_t *pud;
1280         pmd_t *pmd;
1281 
1282         for (addr = start; addr < end; addr = next) {
1283                 next = pmd_addr_end(addr, end);
1284 
1285                 pgd = vmemmap_pgd_populate(addr, node);
1286                 if (!pgd)
1287                         return -ENOMEM;
1288 
1289                 pud = vmemmap_pud_populate(pgd, addr, node);
1290                 if (!pud)
1291                         return -ENOMEM;
1292 
1293                 pmd = pmd_offset(pud, addr);
1294                 if (pmd_none(*pmd)) {
1295                         void *p;
1296 
1297                         p = vmemmap_alloc_block_buf(PMD_SIZE, node);
1298                         if (p) {
1299                                 pte_t entry;
1300 
1301                                 entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
1302                                                 PAGE_KERNEL_LARGE);
1303                                 set_pmd(pmd, __pmd(pte_val(entry)));
1304 
1305                                 /* check to see if we have contiguous blocks */
1306                                 if (p_end != p || node_start != node) {
1307                                         if (p_start)
1308                                                 printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
1309                                                        addr_start, addr_end-1, p_start, p_end-1, node_start);
1310                                         addr_start = addr;
1311                                         node_start = node;
1312                                         p_start = p;
1313                                 }
1314 
1315                                 addr_end = addr + PMD_SIZE;
1316                                 p_end = p + PMD_SIZE;
1317                                 continue;
1318                         }
1319                 } else if (pmd_large(*pmd)) {
1320                         vmemmap_verify((pte_t *)pmd, node, addr, next);
1321                         continue;
1322                 }
1323                 pr_warn_once("vmemmap: falling back to regular page backing\n");
1324                 if (vmemmap_populate_basepages(addr, next, node))
1325                         return -ENOMEM;
1326         }
1327         return 0;
1328 }
1329 
1330 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
1331 {
1332         int err;
1333 
1334         if (cpu_has_pse)
1335                 err = vmemmap_populate_hugepages(start, end, node);
1336         else
1337                 err = vmemmap_populate_basepages(start, end, node);
1338         if (!err)
1339                 sync_global_pgds(start, end - 1, 0);
1340         return err;
1341 }
1342 
1343 #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
1344 void register_page_bootmem_memmap(unsigned long section_nr,
1345                                   struct page *start_page, unsigned long size)
1346 {
1347         unsigned long addr = (unsigned long)start_page;
1348         unsigned long end = (unsigned long)(start_page + size);
1349         unsigned long next;
1350         pgd_t *pgd;
1351         pud_t *pud;
1352         pmd_t *pmd;
1353         unsigned int nr_pages;
1354         struct page *page;
1355 
1356         for (; addr < end; addr = next) {
1357                 pte_t *pte = NULL;
1358 
1359                 pgd = pgd_offset_k(addr);
1360                 if (pgd_none(*pgd)) {
1361                         next = (addr + PAGE_SIZE) & PAGE_MASK;
1362                         continue;
1363                 }
1364                 get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
1365 
1366                 pud = pud_offset(pgd, addr);
1367                 if (pud_none(*pud)) {
1368                         next = (addr + PAGE_SIZE) & PAGE_MASK;
1369                         continue;
1370                 }
1371                 get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
1372 
1373                 if (!cpu_has_pse) {
1374                         next = (addr + PAGE_SIZE) & PAGE_MASK;
1375                         pmd = pmd_offset(pud, addr);
1376                         if (pmd_none(*pmd))
1377                                 continue;
1378                         get_page_bootmem(section_nr, pmd_page(*pmd),
1379                                          MIX_SECTION_INFO);
1380 
1381                         pte = pte_offset_kernel(pmd, addr);
1382                         if (pte_none(*pte))
1383                                 continue;
1384                         get_page_bootmem(section_nr, pte_page(*pte),
1385                                          SECTION_INFO);
1386                 } else {
1387                         next = pmd_addr_end(addr, end);
1388 
1389                         pmd = pmd_offset(pud, addr);
1390                         if (pmd_none(*pmd))
1391                                 continue;
1392 
1393                         nr_pages = 1 << (get_order(PMD_SIZE));
1394                         page = pmd_page(*pmd);
1395                         while (nr_pages--)
1396                                 get_page_bootmem(section_nr, page++,
1397                                                  SECTION_INFO);
1398                 }
1399         }
1400 }
1401 #endif
1402 
1403 void __meminit vmemmap_populate_print_last(void)
1404 {
1405         if (p_start) {
1406                 printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
1407                         addr_start, addr_end-1, p_start, p_end-1, node_start);
1408                 p_start = NULL;
1409                 p_end = NULL;
1410                 node_start = 0;
1411         }
1412 }
1413 #endif
1414 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp