1 #include <linux/mm.h> 2 #include <linux/highmem.h> 3 #include <linux/sched.h> 4 #include <linux/hugetlb.h> 5 6 static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 7 struct mm_walk *walk) 8 { 9 pte_t *pte; 10 int err = 0; 11 12 pte = pte_offset_map(pmd, addr); 13 for (;;) { 14 err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk); 15 if (err) 16 break; 17 addr += PAGE_SIZE; 18 if (addr == end) 19 break; 20 pte++; 21 } 22 23 pte_unmap(pte); 24 return err; 25 } 26 27 static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, 28 struct mm_walk *walk) 29 { 30 pmd_t *pmd; 31 unsigned long next; 32 int err = 0; 33 34 pmd = pmd_offset(pud, addr); 35 do { 36 again: 37 next = pmd_addr_end(addr, end); 38 if (pmd_none(*pmd) || !walk->vma) { 39 if (walk->pte_hole) 40 err = walk->pte_hole(addr, next, walk); 41 if (err) 42 break; 43 continue; 44 } 45 /* 46 * This implies that each ->pmd_entry() handler 47 * needs to know about pmd_trans_huge() pmds 48 */ 49 if (walk->pmd_entry) 50 err = walk->pmd_entry(pmd, addr, next, walk); 51 if (err) 52 break; 53 54 /* 55 * Check this here so we only break down trans_huge 56 * pages when we _need_ to 57 */ 58 if (!walk->pte_entry) 59 continue; 60 61 split_huge_page_pmd_mm(walk->mm, addr, pmd); 62 if (pmd_trans_unstable(pmd)) 63 goto again; 64 err = walk_pte_range(pmd, addr, next, walk); 65 if (err) 66 break; 67 } while (pmd++, addr = next, addr != end); 68 69 return err; 70 } 71 72 static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, 73 struct mm_walk *walk) 74 { 75 pud_t *pud; 76 unsigned long next; 77 int err = 0; 78 79 pud = pud_offset(pgd, addr); 80 do { 81 next = pud_addr_end(addr, end); 82 if (pud_none_or_clear_bad(pud)) { 83 if (walk->pte_hole) 84 err = walk->pte_hole(addr, next, walk); 85 if (err) 86 break; 87 continue; 88 } 89 if (walk->pmd_entry || walk->pte_entry) 90 err = walk_pmd_range(pud, addr, next, walk); 91 if (err) 92 break; 93 } while (pud++, addr = next, addr != end); 94 95 return err; 96 } 97 98 static int walk_pgd_range(unsigned long addr, unsigned long end, 99 struct mm_walk *walk) 100 { 101 pgd_t *pgd; 102 unsigned long next; 103 int err = 0; 104 105 pgd = pgd_offset(walk->mm, addr); 106 do { 107 next = pgd_addr_end(addr, end); 108 if (pgd_none_or_clear_bad(pgd)) { 109 if (walk->pte_hole) 110 err = walk->pte_hole(addr, next, walk); 111 if (err) 112 break; 113 continue; 114 } 115 if (walk->pmd_entry || walk->pte_entry) 116 err = walk_pud_range(pgd, addr, next, walk); 117 if (err) 118 break; 119 } while (pgd++, addr = next, addr != end); 120 121 return err; 122 } 123 124 #ifdef CONFIG_HUGETLB_PAGE 125 static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr, 126 unsigned long end) 127 { 128 unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h); 129 return boundary < end ? boundary : end; 130 } 131 132 static int walk_hugetlb_range(unsigned long addr, unsigned long end, 133 struct mm_walk *walk) 134 { 135 struct vm_area_struct *vma = walk->vma; 136 struct hstate *h = hstate_vma(vma); 137 unsigned long next; 138 unsigned long hmask = huge_page_mask(h); 139 pte_t *pte; 140 int err = 0; 141 142 do { 143 next = hugetlb_entry_end(h, addr, end); 144 pte = huge_pte_offset(walk->mm, addr & hmask); 145 146 if (pte) 147 err = walk->hugetlb_entry(pte, hmask, addr, next, walk); 148 else if (walk->pte_hole) 149 err = walk->pte_hole(addr, next, walk); 150 151 if (err) 152 break; 153 } while (addr = next, addr != end); 154 155 return err; 156 } 157 158 #else /* CONFIG_HUGETLB_PAGE */ 159 static int walk_hugetlb_range(unsigned long addr, unsigned long end, 160 struct mm_walk *walk) 161 { 162 return 0; 163 } 164 165 #endif /* CONFIG_HUGETLB_PAGE */ 166 167 /* 168 * Decide whether we really walk over the current vma on [@start, @end) 169 * or skip it via the returned value. Return 0 if we do walk over the 170 * current vma, and return 1 if we skip the vma. Negative values means 171 * error, where we abort the current walk. 172 */ 173 static int walk_page_test(unsigned long start, unsigned long end, 174 struct mm_walk *walk) 175 { 176 struct vm_area_struct *vma = walk->vma; 177 178 if (walk->test_walk) 179 return walk->test_walk(start, end, walk); 180 181 /* 182 * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP 183 * range, so we don't walk over it as we do for normal vmas. However, 184 * Some callers are interested in handling hole range and they don't 185 * want to just ignore any single address range. Such users certainly 186 * define their ->pte_hole() callbacks, so let's delegate them to handle 187 * vma(VM_PFNMAP). 188 */ 189 if (vma->vm_flags & VM_PFNMAP) { 190 int err = 1; 191 if (walk->pte_hole) 192 err = walk->pte_hole(start, end, walk); 193 return err ? err : 1; 194 } 195 return 0; 196 } 197 198 static int __walk_page_range(unsigned long start, unsigned long end, 199 struct mm_walk *walk) 200 { 201 int err = 0; 202 struct vm_area_struct *vma = walk->vma; 203 204 if (vma && is_vm_hugetlb_page(vma)) { 205 if (walk->hugetlb_entry) 206 err = walk_hugetlb_range(start, end, walk); 207 } else 208 err = walk_pgd_range(start, end, walk); 209 210 return err; 211 } 212 213 /** 214 * walk_page_range - walk page table with caller specific callbacks 215 * 216 * Recursively walk the page table tree of the process represented by @walk->mm 217 * within the virtual address range [@start, @end). During walking, we can do 218 * some caller-specific works for each entry, by setting up pmd_entry(), 219 * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these 220 * callbacks, the associated entries/pages are just ignored. 221 * The return values of these callbacks are commonly defined like below: 222 * - 0 : succeeded to handle the current entry, and if you don't reach the 223 * end address yet, continue to walk. 224 * - >0 : succeeded to handle the current entry, and return to the caller 225 * with caller specific value. 226 * - <0 : failed to handle the current entry, and return to the caller 227 * with error code. 228 * 229 * Before starting to walk page table, some callers want to check whether 230 * they really want to walk over the current vma, typically by checking 231 * its vm_flags. walk_page_test() and @walk->test_walk() are used for this 232 * purpose. 233 * 234 * struct mm_walk keeps current values of some common data like vma and pmd, 235 * which are useful for the access from callbacks. If you want to pass some 236 * caller-specific data to callbacks, @walk->private should be helpful. 237 * 238 * Locking: 239 * Callers of walk_page_range() and walk_page_vma() should hold 240 * @walk->mm->mmap_sem, because these function traverse vma list and/or 241 * access to vma's data. 242 */ 243 int walk_page_range(unsigned long start, unsigned long end, 244 struct mm_walk *walk) 245 { 246 int err = 0; 247 unsigned long next; 248 struct vm_area_struct *vma; 249 250 if (start >= end) 251 return -EINVAL; 252 253 if (!walk->mm) 254 return -EINVAL; 255 256 VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm); 257 258 vma = find_vma(walk->mm, start); 259 do { 260 if (!vma) { /* after the last vma */ 261 walk->vma = NULL; 262 next = end; 263 } else if (start < vma->vm_start) { /* outside vma */ 264 walk->vma = NULL; 265 next = min(end, vma->vm_start); 266 } else { /* inside vma */ 267 walk->vma = vma; 268 next = min(end, vma->vm_end); 269 vma = vma->vm_next; 270 271 err = walk_page_test(start, next, walk); 272 if (err > 0) { 273 /* 274 * positive return values are purely for 275 * controlling the pagewalk, so should never 276 * be passed to the callers. 277 */ 278 err = 0; 279 continue; 280 } 281 if (err < 0) 282 break; 283 } 284 if (walk->vma || walk->pte_hole) 285 err = __walk_page_range(start, next, walk); 286 if (err) 287 break; 288 } while (start = next, start < end); 289 return err; 290 } 291 292 int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk) 293 { 294 int err; 295 296 if (!walk->mm) 297 return -EINVAL; 298 299 VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); 300 VM_BUG_ON(!vma); 301 walk->vma = vma; 302 err = walk_page_test(vma->vm_start, vma->vm_end, walk); 303 if (err > 0) 304 return 0; 305 if (err < 0) 306 return err; 307 return __walk_page_range(vma->vm_start, vma->vm_end, walk); 308 } 309
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.