~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/ia64/mm/discontig.c

Version: ~ [ linux-5.1-rc1 ] ~ [ linux-5.0.3 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.30 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.107 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.164 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.176 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.136 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.63 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.39.4 ] ~ [ linux-2.6.38.8 ] ~ [ linux-2.6.37.6 ] ~ [ linux-2.6.36.4 ] ~ [ linux-2.6.35.14 ] ~ [ linux-2.6.34.15 ] ~ [ linux-2.6.33.20 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (c) 2000, 2003 Silicon Graphics, Inc.  All rights reserved.
  3  * Copyright (c) 2001 Intel Corp.
  4  * Copyright (c) 2001 Tony Luck <tony.luck@intel.com>
  5  * Copyright (c) 2002 NEC Corp.
  6  * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
  7  */
  8 
  9 /*
 10  * Platform initialization for Discontig Memory
 11  */
 12 
 13 #include <linux/kernel.h>
 14 #include <linux/mm.h>
 15 #include <linux/swap.h>
 16 #include <linux/bootmem.h>
 17 #include <linux/acpi.h>
 18 #include <linux/efi.h>
 19 #include <asm/pgalloc.h>
 20 #include <asm/tlb.h>
 21 #include <asm/meminit.h>
 22 #include <asm/numa.h>
 23 #include <asm/sections.h>
 24 
 25 /*
 26  * Track per-node information needed to setup the boot memory allocator, the
 27  * per-node areas, and the real VM.
 28  */
 29 struct early_node_data {
 30         struct ia64_node_data *node_data;
 31         pg_data_t *pgdat;
 32         unsigned long pernode_addr;
 33         unsigned long pernode_size;
 34         struct bootmem_data bootmem_data;
 35         unsigned long num_physpages;
 36         unsigned long num_dma_physpages;
 37         unsigned long min_pfn;
 38         unsigned long max_pfn;
 39 };
 40 
 41 static struct early_node_data mem_data[NR_NODES] __initdata;
 42 
 43 /*
 44  * To prevent cache aliasing effects, align per-node structures so that they
 45  * start at addresses that are strided by node number.
 46  */
 47 #define NODEDATA_ALIGN(addr, node)                                              \
 48         ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + (node)*PERCPU_PAGE_SIZE)
 49 
 50 /**
 51  * build_node_maps - callback to setup bootmem structs for each node
 52  * @start: physical start of range
 53  * @len: length of range
 54  * @node: node where this range resides
 55  *
 56  * We allocate a struct bootmem_data for each piece of memory that we wish to
 57  * treat as a virtually contiguous block (i.e. each node). Each such block
 58  * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
 59  * if necessary.  Any non-existent pages will simply be part of the virtual
 60  * memmap.  We also update min_low_pfn and max_low_pfn here as we receive
 61  * memory ranges from the caller.
 62  */
 63 static int __init build_node_maps(unsigned long start, unsigned long len,
 64                                   int node)
 65 {
 66         unsigned long cstart, epfn, end = start + len;
 67         struct bootmem_data *bdp = &mem_data[node].bootmem_data;
 68 
 69         epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
 70         cstart = GRANULEROUNDDOWN(start);
 71 
 72         if (!bdp->node_low_pfn) {
 73                 bdp->node_boot_start = cstart;
 74                 bdp->node_low_pfn = epfn;
 75         } else {
 76                 bdp->node_boot_start = min(cstart, bdp->node_boot_start);
 77                 bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
 78         }
 79 
 80         min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
 81         max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
 82 
 83         return 0;
 84 }
 85 
 86 /**
 87  * early_nr_cpus_node - return number of cpus on a given node
 88  * @node: node to check
 89  *
 90  * Count the number of cpus on @node.  We can't use nr_cpus_node() yet because
 91  * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
 92  * called yet.
 93  */
 94 static int early_nr_cpus_node(int node)
 95 {
 96         int cpu, n = 0;
 97 
 98         for (cpu = 0; cpu < NR_CPUS; cpu++)
 99                 if (node == node_cpuid[cpu].nid)
100                         n++;
101 
102         return n;
103 }
104 
105 /**
106  * find_pernode_space - allocate memory for memory map and per-node structures
107  * @start: physical start of range
108  * @len: length of range
109  * @node: node where this range resides
110  *
111  * This routine reserves space for the per-cpu data struct, the list of
112  * pg_data_ts and the per-node data struct.  Each node will have something like
113  * the following in the first chunk of addr. space large enough to hold it.
114  *
115  *    ________________________
116  *   |                        |
117  *   |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first
118  *   |    PERCPU_PAGE_SIZE *  |     start and length big enough
119  *   |        NR_CPUS         |
120  *   |------------------------|
121  *   |   local pg_data_t *    |
122  *   |------------------------|
123  *   |  local ia64_node_data  |
124  *   |------------------------|
125  *   |          ???           |
126  *   |________________________|
127  *
128  * Once this space has been set aside, the bootmem maps are initialized.  We
129  * could probably move the allocation of the per-cpu and ia64_node_data space
130  * outside of this function and use alloc_bootmem_node(), but doing it here
131  * is straightforward and we get the alignments we want so...
132  */
133 static int __init find_pernode_space(unsigned long start, unsigned long len,
134                                      int node)
135 {
136         unsigned long epfn, cpu, cpus;
137         unsigned long pernodesize = 0, pernode;
138         void *cpu_data;
139         struct bootmem_data *bdp = &mem_data[node].bootmem_data;
140 
141         epfn = (start + len) >> PAGE_SHIFT;
142 
143         /*
144          * Make sure this memory falls within this node's usable memory
145          * since we may have thrown some away in build_maps().
146          */
147         if (start < bdp->node_boot_start ||
148             epfn > bdp->node_low_pfn)
149                 return 0;
150 
151         /* Don't setup this node's local space twice... */
152         if (!mem_data[node].pernode_addr) {
153                 /*
154                  * Calculate total size needed, incl. what's necessary
155                  * for good alignment and alias prevention.
156                  */
157                 cpus = early_nr_cpus_node(node);
158                 pernodesize += PERCPU_PAGE_SIZE * cpus;
159                 pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
160                 pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
161                 pernodesize = PAGE_ALIGN(pernodesize);
162                 pernode = NODEDATA_ALIGN(start, node);
163 
164                 /* Is this range big enough for what we want to store here? */
165                 if (start + len > (pernode + pernodesize)) {
166                         mem_data[node].pernode_addr = pernode;
167                         mem_data[node].pernode_size = pernodesize;
168                         memset(__va(pernode), 0, pernodesize);
169 
170                         cpu_data = (void *)pernode;
171                         pernode += PERCPU_PAGE_SIZE * cpus;
172 
173                         mem_data[node].pgdat = __va(pernode);
174                         pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
175 
176                         mem_data[node].node_data = __va(pernode);
177                         pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
178 
179                         mem_data[node].pgdat->bdata = bdp;
180                         pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
181 
182                         /*
183                          * Copy the static per-cpu data into the region we
184                          * just set aside and then setup __per_cpu_offset
185                          * for each CPU on this node.
186                          */
187                         for (cpu = 0; cpu < NR_CPUS; cpu++) {
188                                 if (node == node_cpuid[cpu].nid) {
189                                         memcpy(__va(cpu_data), __phys_per_cpu_start,
190                                                __per_cpu_end-__per_cpu_start);
191                                         __per_cpu_offset[cpu] =
192                                                 (char*)__va(cpu_data) -
193                                                 __per_cpu_start;
194                                         cpu_data += PERCPU_PAGE_SIZE;
195                                 }
196                         }
197                 }
198         }
199 
200         pernode = mem_data[node].pernode_addr;
201         pernodesize = mem_data[node].pernode_size;
202         if (pernode && !bdp->node_bootmem_map) {
203                 unsigned long pages, mapsize, map = 0;
204 
205                 pages = bdp->node_low_pfn -
206                         (bdp->node_boot_start >> PAGE_SHIFT);
207                 mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
208 
209                 /*
210                  * The map will either contain the pernode area or begin
211                  * after it.
212                  */
213                 if (pernode - start > mapsize)
214                         map = start;
215                 else if (start + len - pernode - pernodesize > mapsize)
216                         map = pernode + pernodesize;
217 
218                 if (map) {
219                         init_bootmem_node(mem_data[node].pgdat,
220                                           map>>PAGE_SHIFT,
221                                           bdp->node_boot_start>>PAGE_SHIFT,
222                                           bdp->node_low_pfn);
223                 }
224 
225         }
226 
227         return 0;
228 }
229 
230 /**
231  * free_node_bootmem - free bootmem allocator memory for use
232  * @start: physical start of range
233  * @len: length of range
234  * @node: node where this range resides
235  *
236  * Simply calls the bootmem allocator to free the specified ranged from
237  * the given pg_data_t's bdata struct.  After this function has been called
238  * for all the entries in the EFI memory map, the bootmem allocator will
239  * be ready to service allocation requests.
240  */
241 static int __init free_node_bootmem(unsigned long start, unsigned long len,
242                                     int node)
243 {
244         free_bootmem_node(mem_data[node].pgdat, start, len);
245 
246         return 0;
247 }
248 
249 /**
250  * reserve_pernode_space - reserve memory for per-node space
251  *
252  * Reserve the space used by the bootmem maps & per-node space in the boot
253  * allocator so that when we actually create the real mem maps we don't
254  * use their memory.
255  */
256 static void __init reserve_pernode_space(void)
257 {
258         unsigned long base, size, pages;
259         struct bootmem_data *bdp;
260         int node;
261 
262         for (node = 0; node < numnodes; node++) {
263                 pg_data_t *pdp = mem_data[node].pgdat;
264 
265                 bdp = pdp->bdata;
266 
267                 /* First the bootmem_map itself */
268                 pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
269                 size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
270                 base = __pa(bdp->node_bootmem_map);
271                 reserve_bootmem_node(pdp, base, size);
272 
273                 /* Now the per-node space */
274                 size = mem_data[node].pernode_size;
275                 base = __pa(mem_data[node].pernode_addr);
276                 reserve_bootmem_node(pdp, base, size);
277         }
278 }
279 
280 /**
281  * initialize_pernode_data - fixup per-cpu & per-node pointers
282  *
283  * Each node's per-node area has a copy of the global pg_data_t list, so
284  * we copy that to each node here, as well as setting the per-cpu pointer
285  * to the local node data structure.  The active_cpus field of the per-node
286  * structure gets setup by the platform_cpu_init() function later.
287  */
288 static void __init initialize_pernode_data(void)
289 {
290         int cpu, node;
291         pg_data_t *pgdat_list[NR_NODES];
292 
293         for (node = 0; node < numnodes; node++)
294                 pgdat_list[node] = mem_data[node].pgdat;
295 
296         /* Copy the pg_data_t list to each node and init the node field */
297         for (node = 0; node < numnodes; node++) {
298                 memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
299                        sizeof(pgdat_list));
300         }
301 
302         /* Set the node_data pointer for each per-cpu struct */
303         for (cpu = 0; cpu < NR_CPUS; cpu++) {
304                 node = node_cpuid[cpu].nid;
305                 per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
306         }
307 }
308 
309 /**
310  * find_memory - walk the EFI memory map and setup the bootmem allocator
311  *
312  * Called early in boot to setup the bootmem allocator, and to
313  * allocate the per-cpu and per-node structures.
314  */
315 void __init find_memory(void)
316 {
317         reserve_memory();
318 
319         if (numnodes == 0) {
320                 printk(KERN_ERR "node info missing!\n");
321                 numnodes = 1;
322         }
323 
324         min_low_pfn = -1;
325         max_low_pfn = 0;
326 
327         /* These actually end up getting called by call_pernode_memory() */
328         efi_memmap_walk(filter_rsvd_memory, build_node_maps);
329         efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
330         efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
331 
332         reserve_pernode_space();
333         initialize_pernode_data();
334 
335         max_pfn = max_low_pfn;
336 
337         find_initrd();
338 }
339 
340 /**
341  * per_cpu_init - setup per-cpu variables
342  *
343  * find_pernode_space() does most of this already, we just need to set
344  * local_per_cpu_offset
345  */
346 void *per_cpu_init(void)
347 {
348         int cpu;
349 
350         if (smp_processor_id() == 0) {
351                 for (cpu = 0; cpu < NR_CPUS; cpu++) {
352                         per_cpu(local_per_cpu_offset, cpu) =
353                                 __per_cpu_offset[cpu];
354                 }
355         }
356 
357         return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
358 }
359 
360 /**
361  * show_mem - give short summary of memory stats
362  *
363  * Shows a simple page count of reserved and used pages in the system.
364  * For discontig machines, it does this on a per-pgdat basis.
365  */
366 void show_mem(void)
367 {
368         int i, reserved = 0;
369         int shared = 0, cached = 0;
370         pg_data_t *pgdat;
371 
372         printk("Mem-info:\n");
373         show_free_areas();
374         printk("Free swap:       %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
375         for_each_pgdat(pgdat) {
376                 printk("Node ID: %d\n", pgdat->node_id);
377                 for(i = 0; i < pgdat->node_spanned_pages; i++) {
378                         if (PageReserved(pgdat->node_mem_map+i))
379                                 reserved++;
380                         else if (PageSwapCache(pgdat->node_mem_map+i))
381                                 cached++;
382                         else if (page_count(pgdat->node_mem_map+i))
383                                 shared += page_count(pgdat->node_mem_map+i)-1;
384                 }
385                 printk("\t%ld pages of RAM\n", pgdat->node_present_pages);
386                 printk("\t%d reserved pages\n", reserved);
387                 printk("\t%d pages shared\n", shared);
388                 printk("\t%d pages swap cached\n", cached);
389         }
390         printk("Total of %ld pages in page table cache\n", pgtable_cache_size);
391         printk("%d free buffer pages\n", nr_free_buffer_pages());
392 }
393 
394 /**
395  * call_pernode_memory - use SRAT to call callback functions with node info
396  * @start: physical start of range
397  * @len: length of range
398  * @arg: function to call for each range
399  *
400  * efi_memmap_walk() knows nothing about layout of memory across nodes. Find
401  * out to which node a block of memory belongs.  Ignore memory that we cannot
402  * identify, and split blocks that run across multiple nodes.
403  *
404  * Take this opportunity to round the start address up and the end address
405  * down to page boundaries.
406  */
407 void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
408 {
409         unsigned long rs, re, end = start + len;
410         void (*func)(unsigned long, unsigned long, int);
411         int i;
412 
413         start = PAGE_ALIGN(start);
414         end &= PAGE_MASK;
415         if (start >= end)
416                 return;
417 
418         func = arg;
419 
420         if (!num_memblks) {
421                 /* No SRAT table, to assume one node (node 0) */
422                 if (start < end)
423                         (*func)(start, len, 0);
424                 return;
425         }
426 
427         for (i = 0; i < num_memblks; i++) {
428                 rs = max(start, node_memblk[i].start_paddr);
429                 re = min(end, node_memblk[i].start_paddr +
430                          node_memblk[i].size);
431 
432                 if (rs < re)
433                         (*func)(rs, re - rs, node_memblk[i].nid);
434 
435                 if (re == end)
436                         break;
437         }
438 }
439 
440 /**
441  * count_node_pages - callback to build per-node memory info structures
442  * @start: physical start of range
443  * @len: length of range
444  * @node: node where this range resides
445  *
446  * Each node has it's own number of physical pages, DMAable pages, start, and
447  * end page frame number.  This routine will be called by call_pernode_memory()
448  * for each piece of usable memory and will setup these values for each node.
449  * Very similar to build_maps().
450  */
451 static int count_node_pages(unsigned long start, unsigned long len, int node)
452 {
453         unsigned long end = start + len;
454 
455         mem_data[node].num_physpages += len >> PAGE_SHIFT;
456         if (start <= __pa(MAX_DMA_ADDRESS))
457                 mem_data[node].num_dma_physpages +=
458                         (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
459         start = GRANULEROUNDDOWN(start);
460         start = ORDERROUNDDOWN(start);
461         end = GRANULEROUNDUP(end);
462         mem_data[node].max_pfn = max(mem_data[node].max_pfn,
463                                      end >> PAGE_SHIFT);
464         mem_data[node].min_pfn = min(mem_data[node].min_pfn,
465                                      start >> PAGE_SHIFT);
466 
467         return 0;
468 }
469 
470 /**
471  * paging_init - setup page tables
472  *
473  * paging_init() sets up the page tables for each node of the system and frees
474  * the bootmem allocator memory for general use.
475  */
476 void paging_init(void)
477 {
478         unsigned long max_dma;
479         unsigned long zones_size[MAX_NR_ZONES];
480         unsigned long zholes_size[MAX_NR_ZONES];
481         unsigned long max_gap, pfn_offset = 0;
482         int node;
483 
484         max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
485         max_gap = 0;
486         efi_memmap_walk(find_largest_hole, &max_gap);
487 
488         /* so min() will work in count_node_pages */
489         for (node = 0; node < numnodes; node++)
490                 mem_data[node].min_pfn = ~0UL;
491 
492         efi_memmap_walk(filter_rsvd_memory, count_node_pages);
493 
494         for (node = 0; node < numnodes; node++) {
495                 memset(zones_size, 0, sizeof(zones_size));
496                 memset(zholes_size, 0, sizeof(zholes_size));
497 
498                 num_physpages += mem_data[node].num_physpages;
499 
500                 if (mem_data[node].min_pfn >= max_dma) {
501                         /* All of this node's memory is above ZONE_DMA */
502                         zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
503                                 mem_data[node].min_pfn;
504                         zholes_size[ZONE_NORMAL] = mem_data[node].max_pfn -
505                                 mem_data[node].min_pfn -
506                                 mem_data[node].num_physpages;
507                 } else if (mem_data[node].max_pfn < max_dma) {
508                         /* All of this node's memory is in ZONE_DMA */
509                         zones_size[ZONE_DMA] = mem_data[node].max_pfn -
510                                 mem_data[node].min_pfn;
511                         zholes_size[ZONE_DMA] = mem_data[node].max_pfn -
512                                 mem_data[node].min_pfn -
513                                 mem_data[node].num_dma_physpages;
514                 } else {
515                         /* This node has memory in both zones */
516                         zones_size[ZONE_DMA] = max_dma -
517                                 mem_data[node].min_pfn;
518                         zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
519                                 mem_data[node].num_dma_physpages;
520                         zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
521                                 max_dma;
522                         zholes_size[ZONE_NORMAL] = zones_size[ZONE_NORMAL] -
523                                 (mem_data[node].num_physpages -
524                                  mem_data[node].num_dma_physpages);
525                 }
526 
527                 if (node == 0) {
528                         vmalloc_end -=
529                                 PAGE_ALIGN(max_low_pfn * sizeof(struct page));
530                         vmem_map = (struct page *) vmalloc_end;
531 
532                         efi_memmap_walk(create_mem_map_page_table, 0);
533                         printk("Virtual mem_map starts at 0x%p\n", vmem_map);
534                 }
535 
536                 pfn_offset = mem_data[node].min_pfn;
537 
538                 free_area_init_node(node, NODE_DATA(node),
539                                     vmem_map + pfn_offset, zones_size,
540                                     pfn_offset, zholes_size);
541         }
542 
543         zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
544 }
545 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp