1 /* 2 * Meta version derived from arch/powerpc/lib/dma-noncoherent.c 3 * Copyright (C) 2008 Imagination Technologies Ltd. 4 * 5 * PowerPC version derived from arch/arm/mm/consistent.c 6 * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) 7 * 8 * Copyright (C) 2000 Russell King 9 * 10 * Consistent memory allocators. Used for DMA devices that want to 11 * share uncached memory with the processor core. The function return 12 * is the virtual address and 'dma_handle' is the physical address. 13 * Mostly stolen from the ARM port, with some changes for PowerPC. 14 * -- Dan 15 * 16 * Reorganized to get rid of the arch-specific consistent_* functions 17 * and provide non-coherent implementations for the DMA API. -Matt 18 * 19 * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() 20 * implementation. This is pulled straight from ARM and barely 21 * modified. -Matt 22 * 23 * This program is free software; you can redistribute it and/or modify 24 * it under the terms of the GNU General Public License version 2 as 25 * published by the Free Software Foundation. 26 */ 27 28 #include <linux/sched.h> 29 #include <linux/kernel.h> 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/string.h> 33 #include <linux/types.h> 34 #include <linux/highmem.h> 35 #include <linux/dma-mapping.h> 36 #include <linux/slab.h> 37 38 #include <asm/tlbflush.h> 39 #include <asm/mmu.h> 40 41 #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_START) \ 42 >> PAGE_SHIFT) 43 44 static u64 get_coherent_dma_mask(struct device *dev) 45 { 46 u64 mask = ~0ULL; 47 48 if (dev) { 49 mask = dev->coherent_dma_mask; 50 51 /* 52 * Sanity check the DMA mask - it must be non-zero, and 53 * must be able to be satisfied by a DMA allocation. 54 */ 55 if (mask == 0) { 56 dev_warn(dev, "coherent DMA mask is unset\n"); 57 return 0; 58 } 59 } 60 61 return mask; 62 } 63 /* 64 * This is the page table (2MB) covering uncached, DMA consistent allocations 65 */ 66 static pte_t *consistent_pte; 67 static DEFINE_SPINLOCK(consistent_lock); 68 69 /* 70 * VM region handling support. 71 * 72 * This should become something generic, handling VM region allocations for 73 * vmalloc and similar (ioremap, module space, etc). 74 * 75 * I envisage vmalloc()'s supporting vm_struct becoming: 76 * 77 * struct vm_struct { 78 * struct metag_vm_region region; 79 * unsigned long flags; 80 * struct page **pages; 81 * unsigned int nr_pages; 82 * unsigned long phys_addr; 83 * }; 84 * 85 * get_vm_area() would then call metag_vm_region_alloc with an appropriate 86 * struct metag_vm_region head (eg): 87 * 88 * struct metag_vm_region vmalloc_head = { 89 * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), 90 * .vm_start = VMALLOC_START, 91 * .vm_end = VMALLOC_END, 92 * }; 93 * 94 * However, vmalloc_head.vm_start is variable (typically, it is dependent on 95 * the amount of RAM found at boot time.) I would imagine that get_vm_area() 96 * would have to initialise this each time prior to calling 97 * metag_vm_region_alloc(). 98 */ 99 struct metag_vm_region { 100 struct list_head vm_list; 101 unsigned long vm_start; 102 unsigned long vm_end; 103 struct page *vm_pages; 104 int vm_active; 105 }; 106 107 static struct metag_vm_region consistent_head = { 108 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), 109 .vm_start = CONSISTENT_START, 110 .vm_end = CONSISTENT_END, 111 }; 112 113 static struct metag_vm_region *metag_vm_region_alloc(struct metag_vm_region 114 *head, size_t size, 115 gfp_t gfp) 116 { 117 unsigned long addr = head->vm_start, end = head->vm_end - size; 118 unsigned long flags; 119 struct metag_vm_region *c, *new; 120 121 new = kmalloc(sizeof(struct metag_vm_region), gfp); 122 if (!new) 123 goto out; 124 125 spin_lock_irqsave(&consistent_lock, flags); 126 127 list_for_each_entry(c, &head->vm_list, vm_list) { 128 if ((addr + size) < addr) 129 goto nospc; 130 if ((addr + size) <= c->vm_start) 131 goto found; 132 addr = c->vm_end; 133 if (addr > end) 134 goto nospc; 135 } 136 137 found: 138 /* 139 * Insert this entry _before_ the one we found. 140 */ 141 list_add_tail(&new->vm_list, &c->vm_list); 142 new->vm_start = addr; 143 new->vm_end = addr + size; 144 new->vm_active = 1; 145 146 spin_unlock_irqrestore(&consistent_lock, flags); 147 return new; 148 149 nospc: 150 spin_unlock_irqrestore(&consistent_lock, flags); 151 kfree(new); 152 out: 153 return NULL; 154 } 155 156 static struct metag_vm_region *metag_vm_region_find(struct metag_vm_region 157 *head, unsigned long addr) 158 { 159 struct metag_vm_region *c; 160 161 list_for_each_entry(c, &head->vm_list, vm_list) { 162 if (c->vm_active && c->vm_start == addr) 163 goto out; 164 } 165 c = NULL; 166 out: 167 return c; 168 } 169 170 /* 171 * Allocate DMA-coherent memory space and return both the kernel remapped 172 * virtual and bus address for that space. 173 */ 174 void *dma_alloc_coherent(struct device *dev, size_t size, 175 dma_addr_t *handle, gfp_t gfp) 176 { 177 struct page *page; 178 struct metag_vm_region *c; 179 unsigned long order; 180 u64 mask = get_coherent_dma_mask(dev); 181 u64 limit; 182 183 if (!consistent_pte) { 184 pr_err("%s: not initialised\n", __func__); 185 dump_stack(); 186 return NULL; 187 } 188 189 if (!mask) 190 goto no_page; 191 size = PAGE_ALIGN(size); 192 limit = (mask + 1) & ~mask; 193 if ((limit && size >= limit) 194 || size >= (CONSISTENT_END - CONSISTENT_START)) { 195 pr_warn("coherent allocation too big (requested %#x mask %#Lx)\n", 196 size, mask); 197 return NULL; 198 } 199 200 order = get_order(size); 201 202 if (mask != 0xffffffff) 203 gfp |= GFP_DMA; 204 205 page = alloc_pages(gfp, order); 206 if (!page) 207 goto no_page; 208 209 /* 210 * Invalidate any data that might be lurking in the 211 * kernel direct-mapped region for device DMA. 212 */ 213 { 214 void *kaddr = page_address(page); 215 memset(kaddr, 0, size); 216 flush_dcache_region(kaddr, size); 217 } 218 219 /* 220 * Allocate a virtual address in the consistent mapping region. 221 */ 222 c = metag_vm_region_alloc(&consistent_head, size, 223 gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); 224 if (c) { 225 unsigned long vaddr = c->vm_start; 226 pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); 227 struct page *end = page + (1 << order); 228 229 c->vm_pages = page; 230 split_page(page, order); 231 232 /* 233 * Set the "dma handle" 234 */ 235 *handle = page_to_bus(page); 236 237 do { 238 BUG_ON(!pte_none(*pte)); 239 240 SetPageReserved(page); 241 set_pte_at(&init_mm, vaddr, 242 pte, mk_pte(page, 243 pgprot_writecombine 244 (PAGE_KERNEL))); 245 page++; 246 pte++; 247 vaddr += PAGE_SIZE; 248 } while (size -= PAGE_SIZE); 249 250 /* 251 * Free the otherwise unused pages. 252 */ 253 while (page < end) { 254 __free_page(page); 255 page++; 256 } 257 258 return (void *)c->vm_start; 259 } 260 261 if (page) 262 __free_pages(page, order); 263 no_page: 264 return NULL; 265 } 266 EXPORT_SYMBOL(dma_alloc_coherent); 267 268 /* 269 * free a page as defined by the above mapping. 270 */ 271 void dma_free_coherent(struct device *dev, size_t size, 272 void *vaddr, dma_addr_t dma_handle) 273 { 274 struct metag_vm_region *c; 275 unsigned long flags, addr; 276 pte_t *ptep; 277 278 size = PAGE_ALIGN(size); 279 280 spin_lock_irqsave(&consistent_lock, flags); 281 282 c = metag_vm_region_find(&consistent_head, (unsigned long)vaddr); 283 if (!c) 284 goto no_area; 285 286 c->vm_active = 0; 287 if ((c->vm_end - c->vm_start) != size) { 288 pr_err("%s: freeing wrong coherent size (%ld != %d)\n", 289 __func__, c->vm_end - c->vm_start, size); 290 dump_stack(); 291 size = c->vm_end - c->vm_start; 292 } 293 294 ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); 295 addr = c->vm_start; 296 do { 297 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); 298 unsigned long pfn; 299 300 ptep++; 301 addr += PAGE_SIZE; 302 303 if (!pte_none(pte) && pte_present(pte)) { 304 pfn = pte_pfn(pte); 305 306 if (pfn_valid(pfn)) { 307 struct page *page = pfn_to_page(pfn); 308 __free_reserved_page(page); 309 continue; 310 } 311 } 312 313 pr_crit("%s: bad page in kernel page table\n", 314 __func__); 315 } while (size -= PAGE_SIZE); 316 317 flush_tlb_kernel_range(c->vm_start, c->vm_end); 318 319 list_del(&c->vm_list); 320 321 spin_unlock_irqrestore(&consistent_lock, flags); 322 323 kfree(c); 324 return; 325 326 no_area: 327 spin_unlock_irqrestore(&consistent_lock, flags); 328 pr_err("%s: trying to free invalid coherent area: %p\n", 329 __func__, vaddr); 330 dump_stack(); 331 } 332 EXPORT_SYMBOL(dma_free_coherent); 333 334 335 static int dma_mmap(struct device *dev, struct vm_area_struct *vma, 336 void *cpu_addr, dma_addr_t dma_addr, size_t size) 337 { 338 int ret = -ENXIO; 339 340 unsigned long flags, user_size, kern_size; 341 struct metag_vm_region *c; 342 343 user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 344 345 spin_lock_irqsave(&consistent_lock, flags); 346 c = metag_vm_region_find(&consistent_head, (unsigned long)cpu_addr); 347 spin_unlock_irqrestore(&consistent_lock, flags); 348 349 if (c) { 350 unsigned long off = vma->vm_pgoff; 351 352 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; 353 354 if (off < kern_size && 355 user_size <= (kern_size - off)) { 356 ret = remap_pfn_range(vma, vma->vm_start, 357 page_to_pfn(c->vm_pages) + off, 358 user_size << PAGE_SHIFT, 359 vma->vm_page_prot); 360 } 361 } 362 363 364 return ret; 365 } 366 367 int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, 368 void *cpu_addr, dma_addr_t dma_addr, size_t size) 369 { 370 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 371 return dma_mmap(dev, vma, cpu_addr, dma_addr, size); 372 } 373 EXPORT_SYMBOL(dma_mmap_coherent); 374 375 int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, 376 void *cpu_addr, dma_addr_t dma_addr, size_t size) 377 { 378 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 379 return dma_mmap(dev, vma, cpu_addr, dma_addr, size); 380 } 381 EXPORT_SYMBOL(dma_mmap_writecombine); 382 383 384 385 386 /* 387 * Initialise the consistent memory allocation. 388 */ 389 static int __init dma_alloc_init(void) 390 { 391 pgd_t *pgd, *pgd_k; 392 pud_t *pud, *pud_k; 393 pmd_t *pmd, *pmd_k; 394 pte_t *pte; 395 int ret = 0; 396 397 do { 398 int offset = pgd_index(CONSISTENT_START); 399 pgd = pgd_offset(&init_mm, CONSISTENT_START); 400 pud = pud_alloc(&init_mm, pgd, CONSISTENT_START); 401 pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START); 402 WARN_ON(!pmd_none(*pmd)); 403 404 pte = pte_alloc_kernel(pmd, CONSISTENT_START); 405 if (!pte) { 406 pr_err("%s: no pte tables\n", __func__); 407 ret = -ENOMEM; 408 break; 409 } 410 411 pgd_k = ((pgd_t *) mmu_get_base()) + offset; 412 pud_k = pud_offset(pgd_k, CONSISTENT_START); 413 pmd_k = pmd_offset(pud_k, CONSISTENT_START); 414 set_pmd(pmd_k, *pmd); 415 416 consistent_pte = pte; 417 } while (0); 418 419 return ret; 420 } 421 early_initcall(dma_alloc_init); 422 423 /* 424 * make an area consistent to devices. 425 */ 426 void dma_sync_for_device(void *vaddr, size_t size, int dma_direction) 427 { 428 /* 429 * Ensure any writes get through the write combiner. This is necessary 430 * even with DMA_FROM_DEVICE, or the write may dirty the cache after 431 * we've invalidated it and get written back during the DMA. 432 */ 433 434 barrier(); 435 436 switch (dma_direction) { 437 case DMA_BIDIRECTIONAL: 438 /* 439 * Writeback to ensure the device can see our latest changes and 440 * so that we have no dirty lines, and invalidate the cache 441 * lines too in preparation for receiving the buffer back 442 * (dma_sync_for_cpu) later. 443 */ 444 flush_dcache_region(vaddr, size); 445 break; 446 case DMA_TO_DEVICE: 447 /* 448 * Writeback to ensure the device can see our latest changes. 449 * There's no need to invalidate as the device shouldn't write 450 * to the buffer. 451 */ 452 writeback_dcache_region(vaddr, size); 453 break; 454 case DMA_FROM_DEVICE: 455 /* 456 * Invalidate to ensure we have no dirty lines that could get 457 * written back during the DMA. It's also safe to flush 458 * (writeback) here if necessary. 459 */ 460 invalidate_dcache_region(vaddr, size); 461 break; 462 case DMA_NONE: 463 BUG(); 464 } 465 466 wmb(); 467 } 468 EXPORT_SYMBOL(dma_sync_for_device); 469 470 /* 471 * make an area consistent to the core. 472 */ 473 void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction) 474 { 475 /* 476 * Hardware L2 cache prefetch doesn't occur across 4K physical 477 * boundaries, however according to Documentation/DMA-API-HOWTO.txt 478 * kmalloc'd memory is DMA'able, so accesses in nearby memory could 479 * trigger a cache fill in the DMA buffer. 480 * 481 * This should never cause dirty lines, so a flush or invalidate should 482 * be safe to allow us to see data from the device. 483 */ 484 if (_meta_l2c_pf_is_enabled()) { 485 switch (dma_direction) { 486 case DMA_BIDIRECTIONAL: 487 case DMA_FROM_DEVICE: 488 invalidate_dcache_region(vaddr, size); 489 break; 490 case DMA_TO_DEVICE: 491 /* The device shouldn't have written to the buffer */ 492 break; 493 case DMA_NONE: 494 BUG(); 495 } 496 } 497 498 rmb(); 499 } 500 EXPORT_SYMBOL(dma_sync_for_cpu); 501
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.