~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/kernel/amd_iommu.c

Version: ~ [ linux-5.1-rc5 ] ~ [ linux-5.0.7 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.34 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.111 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.168 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.178 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.138 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.65 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.39.4 ] ~ [ linux-2.6.38.8 ] ~ [ linux-2.6.37.6 ] ~ [ linux-2.6.36.4 ] ~ [ linux-2.6.35.14 ] ~ [ linux-2.6.34.15 ] ~ [ linux-2.6.33.20 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
  3  * Author: Joerg Roedel <joerg.roedel@amd.com>
  4  *         Leo Duran <leo.duran@amd.com>
  5  *
  6  * This program is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 as published
  8  * by the Free Software Foundation.
  9  *
 10  * This program is distributed in the hope that it will be useful,
 11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13  * GNU General Public License for more details.
 14  *
 15  * You should have received a copy of the GNU General Public License
 16  * along with this program; if not, write to the Free Software
 17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 18  */
 19 
 20 #include <linux/pci.h>
 21 #include <linux/pci-ats.h>
 22 #include <linux/bitmap.h>
 23 #include <linux/slab.h>
 24 #include <linux/debugfs.h>
 25 #include <linux/scatterlist.h>
 26 #include <linux/dma-mapping.h>
 27 #include <linux/iommu-helper.h>
 28 #include <linux/iommu.h>
 29 #include <linux/delay.h>
 30 #include <asm/proto.h>
 31 #include <asm/iommu.h>
 32 #include <asm/gart.h>
 33 #include <asm/dma.h>
 34 #include <asm/amd_iommu_proto.h>
 35 #include <asm/amd_iommu_types.h>
 36 #include <asm/amd_iommu.h>
 37 
 38 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
 39 
 40 #define LOOP_TIMEOUT    100000
 41 
 42 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 43 
 44 /* A list of preallocated protection domains */
 45 static LIST_HEAD(iommu_pd_list);
 46 static DEFINE_SPINLOCK(iommu_pd_list_lock);
 47 
 48 /*
 49  * Domain for untranslated devices - only allocated
 50  * if iommu=pt passed on kernel cmd line.
 51  */
 52 static struct protection_domain *pt_domain;
 53 
 54 static struct iommu_ops amd_iommu_ops;
 55 
 56 static struct dma_map_ops amd_iommu_dma_ops;
 57 
 58 /*
 59  * general struct to manage commands send to an IOMMU
 60  */
 61 struct iommu_cmd {
 62         u32 data[4];
 63 };
 64 
 65 static void update_domain(struct protection_domain *domain);
 66 
 67 /****************************************************************************
 68  *
 69  * Helper functions
 70  *
 71  ****************************************************************************/
 72 
 73 static inline u16 get_device_id(struct device *dev)
 74 {
 75         struct pci_dev *pdev = to_pci_dev(dev);
 76 
 77         return calc_devid(pdev->bus->number, pdev->devfn);
 78 }
 79 
 80 static struct iommu_dev_data *get_dev_data(struct device *dev)
 81 {
 82         return dev->archdata.iommu;
 83 }
 84 
 85 /*
 86  * In this function the list of preallocated protection domains is traversed to
 87  * find the domain for a specific device
 88  */
 89 static struct dma_ops_domain *find_protection_domain(u16 devid)
 90 {
 91         struct dma_ops_domain *entry, *ret = NULL;
 92         unsigned long flags;
 93         u16 alias = amd_iommu_alias_table[devid];
 94 
 95         if (list_empty(&iommu_pd_list))
 96                 return NULL;
 97 
 98         spin_lock_irqsave(&iommu_pd_list_lock, flags);
 99 
100         list_for_each_entry(entry, &iommu_pd_list, list) {
101                 if (entry->target_dev == devid ||
102                     entry->target_dev == alias) {
103                         ret = entry;
104                         break;
105                 }
106         }
107 
108         spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
109 
110         return ret;
111 }
112 
113 /*
114  * This function checks if the driver got a valid device from the caller to
115  * avoid dereferencing invalid pointers.
116  */
117 static bool check_device(struct device *dev)
118 {
119         u16 devid;
120 
121         if (!dev || !dev->dma_mask)
122                 return false;
123 
124         /* No device or no PCI device */
125         if (dev->bus != &pci_bus_type)
126                 return false;
127 
128         devid = get_device_id(dev);
129 
130         /* Out of our scope? */
131         if (devid > amd_iommu_last_bdf)
132                 return false;
133 
134         if (amd_iommu_rlookup_table[devid] == NULL)
135                 return false;
136 
137         return true;
138 }
139 
140 static int iommu_init_device(struct device *dev)
141 {
142         struct iommu_dev_data *dev_data;
143         struct pci_dev *pdev;
144         u16 devid, alias;
145 
146         if (dev->archdata.iommu)
147                 return 0;
148 
149         dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
150         if (!dev_data)
151                 return -ENOMEM;
152 
153         dev_data->dev = dev;
154 
155         devid = get_device_id(dev);
156         alias = amd_iommu_alias_table[devid];
157         pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff);
158         if (pdev)
159                 dev_data->alias = &pdev->dev;
160         else {
161                 kfree(dev_data);
162                 return -ENOTSUPP;
163         }
164 
165         atomic_set(&dev_data->bind, 0);
166 
167         dev->archdata.iommu = dev_data;
168 
169 
170         return 0;
171 }
172 
173 static void iommu_ignore_device(struct device *dev)
174 {
175         u16 devid, alias;
176 
177         devid = get_device_id(dev);
178         alias = amd_iommu_alias_table[devid];
179 
180         memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
181         memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));
182 
183         amd_iommu_rlookup_table[devid] = NULL;
184         amd_iommu_rlookup_table[alias] = NULL;
185 }
186 
187 static void iommu_uninit_device(struct device *dev)
188 {
189         kfree(dev->archdata.iommu);
190 }
191 
192 void __init amd_iommu_uninit_devices(void)
193 {
194         struct pci_dev *pdev = NULL;
195 
196         for_each_pci_dev(pdev) {
197 
198                 if (!check_device(&pdev->dev))
199                         continue;
200 
201                 iommu_uninit_device(&pdev->dev);
202         }
203 }
204 
205 int __init amd_iommu_init_devices(void)
206 {
207         struct pci_dev *pdev = NULL;
208         int ret = 0;
209 
210         for_each_pci_dev(pdev) {
211 
212                 if (!check_device(&pdev->dev))
213                         continue;
214 
215                 ret = iommu_init_device(&pdev->dev);
216                 if (ret == -ENOTSUPP)
217                         iommu_ignore_device(&pdev->dev);
218                 else if (ret)
219                         goto out_free;
220         }
221 
222         return 0;
223 
224 out_free:
225 
226         amd_iommu_uninit_devices();
227 
228         return ret;
229 }
230 #ifdef CONFIG_AMD_IOMMU_STATS
231 
232 /*
233  * Initialization code for statistics collection
234  */
235 
236 DECLARE_STATS_COUNTER(compl_wait);
237 DECLARE_STATS_COUNTER(cnt_map_single);
238 DECLARE_STATS_COUNTER(cnt_unmap_single);
239 DECLARE_STATS_COUNTER(cnt_map_sg);
240 DECLARE_STATS_COUNTER(cnt_unmap_sg);
241 DECLARE_STATS_COUNTER(cnt_alloc_coherent);
242 DECLARE_STATS_COUNTER(cnt_free_coherent);
243 DECLARE_STATS_COUNTER(cross_page);
244 DECLARE_STATS_COUNTER(domain_flush_single);
245 DECLARE_STATS_COUNTER(domain_flush_all);
246 DECLARE_STATS_COUNTER(alloced_io_mem);
247 DECLARE_STATS_COUNTER(total_map_requests);
248 
249 static struct dentry *stats_dir;
250 static struct dentry *de_fflush;
251 
252 static void amd_iommu_stats_add(struct __iommu_counter *cnt)
253 {
254         if (stats_dir == NULL)
255                 return;
256 
257         cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
258                                        &cnt->value);
259 }
260 
261 static void amd_iommu_stats_init(void)
262 {
263         stats_dir = debugfs_create_dir("amd-iommu", NULL);
264         if (stats_dir == NULL)
265                 return;
266 
267         de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,
268                                          (u32 *)&amd_iommu_unmap_flush);
269 
270         amd_iommu_stats_add(&compl_wait);
271         amd_iommu_stats_add(&cnt_map_single);
272         amd_iommu_stats_add(&cnt_unmap_single);
273         amd_iommu_stats_add(&cnt_map_sg);
274         amd_iommu_stats_add(&cnt_unmap_sg);
275         amd_iommu_stats_add(&cnt_alloc_coherent);
276         amd_iommu_stats_add(&cnt_free_coherent);
277         amd_iommu_stats_add(&cross_page);
278         amd_iommu_stats_add(&domain_flush_single);
279         amd_iommu_stats_add(&domain_flush_all);
280         amd_iommu_stats_add(&alloced_io_mem);
281         amd_iommu_stats_add(&total_map_requests);
282 }
283 
284 #endif
285 
286 /****************************************************************************
287  *
288  * Interrupt handling functions
289  *
290  ****************************************************************************/
291 
292 static void dump_dte_entry(u16 devid)
293 {
294         int i;
295 
296         for (i = 0; i < 8; ++i)
297                 pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
298                         amd_iommu_dev_table[devid].data[i]);
299 }
300 
301 static void dump_command(unsigned long phys_addr)
302 {
303         struct iommu_cmd *cmd = phys_to_virt(phys_addr);
304         int i;
305 
306         for (i = 0; i < 4; ++i)
307                 pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
308 }
309 
310 static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
311 {
312         u32 *event = __evt;
313         int type  = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
314         int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
315         int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
316         int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
317         u64 address = (u64)(((u64)event[3]) << 32) | event[2];
318 
319         printk(KERN_ERR "AMD-Vi: Event logged [");
320 
321         switch (type) {
322         case EVENT_TYPE_ILL_DEV:
323                 printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
324                        "address=0x%016llx flags=0x%04x]\n",
325                        PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
326                        address, flags);
327                 dump_dte_entry(devid);
328                 break;
329         case EVENT_TYPE_IO_FAULT:
330                 printk("IO_PAGE_FAULT device=%02x:%02x.%x "
331                        "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
332                        PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
333                        domid, address, flags);
334                 break;
335         case EVENT_TYPE_DEV_TAB_ERR:
336                 printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
337                        "address=0x%016llx flags=0x%04x]\n",
338                        PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
339                        address, flags);
340                 break;
341         case EVENT_TYPE_PAGE_TAB_ERR:
342                 printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
343                        "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
344                        PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
345                        domid, address, flags);
346                 break;
347         case EVENT_TYPE_ILL_CMD:
348                 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
349                 dump_command(address);
350                 break;
351         case EVENT_TYPE_CMD_HARD_ERR:
352                 printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
353                        "flags=0x%04x]\n", address, flags);
354                 break;
355         case EVENT_TYPE_IOTLB_INV_TO:
356                 printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
357                        "address=0x%016llx]\n",
358                        PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
359                        address);
360                 break;
361         case EVENT_TYPE_INV_DEV_REQ:
362                 printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
363                        "address=0x%016llx flags=0x%04x]\n",
364                        PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
365                        address, flags);
366                 break;
367         default:
368                 printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
369         }
370 }
371 
372 static void iommu_poll_events(struct amd_iommu *iommu)
373 {
374         u32 head, tail;
375         unsigned long flags;
376 
377         spin_lock_irqsave(&iommu->lock, flags);
378 
379         head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
380         tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
381 
382         while (head != tail) {
383                 iommu_print_event(iommu, iommu->evt_buf + head);
384                 head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
385         }
386 
387         writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
388 
389         spin_unlock_irqrestore(&iommu->lock, flags);
390 }
391 
392 irqreturn_t amd_iommu_int_thread(int irq, void *data)
393 {
394         struct amd_iommu *iommu;
395 
396         for_each_iommu(iommu)
397                 iommu_poll_events(iommu);
398 
399         return IRQ_HANDLED;
400 }
401 
402 irqreturn_t amd_iommu_int_handler(int irq, void *data)
403 {
404         return IRQ_WAKE_THREAD;
405 }
406 
407 /****************************************************************************
408  *
409  * IOMMU command queuing functions
410  *
411  ****************************************************************************/
412 
413 static int wait_on_sem(volatile u64 *sem)
414 {
415         int i = 0;
416 
417         while (*sem == 0 && i < LOOP_TIMEOUT) {
418                 udelay(1);
419                 i += 1;
420         }
421 
422         if (i == LOOP_TIMEOUT) {
423                 pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
424                 return -EIO;
425         }
426 
427         return 0;
428 }
429 
430 static void copy_cmd_to_buffer(struct amd_iommu *iommu,
431                                struct iommu_cmd *cmd,
432                                u32 tail)
433 {
434         u8 *target;
435 
436         target = iommu->cmd_buf + tail;
437         tail   = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
438 
439         /* Copy command to buffer */
440         memcpy(target, cmd, sizeof(*cmd));
441 
442         /* Tell the IOMMU about it */
443         writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
444 }
445 
446 static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
447 {
448         WARN_ON(address & 0x7ULL);
449 
450         memset(cmd, 0, sizeof(*cmd));
451         cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
452         cmd->data[1] = upper_32_bits(__pa(address));
453         cmd->data[2] = 1;
454         CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
455 }
456 
457 static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
458 {
459         memset(cmd, 0, sizeof(*cmd));
460         cmd->data[0] = devid;
461         CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
462 }
463 
464 static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
465                                   size_t size, u16 domid, int pde)
466 {
467         u64 pages;
468         int s;
469 
470         pages = iommu_num_pages(address, size, PAGE_SIZE);
471         s     = 0;
472 
473         if (pages > 1) {
474                 /*
475                  * If we have to flush more than one page, flush all
476                  * TLB entries for this domain
477                  */
478                 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
479                 s = 1;
480         }
481 
482         address &= PAGE_MASK;
483 
484         memset(cmd, 0, sizeof(*cmd));
485         cmd->data[1] |= domid;
486         cmd->data[2]  = lower_32_bits(address);
487         cmd->data[3]  = upper_32_bits(address);
488         CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
489         if (s) /* size bit - we flush more than one 4kb page */
490                 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
491         if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
492                 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
493 }
494 
495 static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
496                                   u64 address, size_t size)
497 {
498         u64 pages;
499         int s;
500 
501         pages = iommu_num_pages(address, size, PAGE_SIZE);
502         s     = 0;
503 
504         if (pages > 1) {
505                 /*
506                  * If we have to flush more than one page, flush all
507                  * TLB entries for this domain
508                  */
509                 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
510                 s = 1;
511         }
512 
513         address &= PAGE_MASK;
514 
515         memset(cmd, 0, sizeof(*cmd));
516         cmd->data[0]  = devid;
517         cmd->data[0] |= (qdep & 0xff) << 24;
518         cmd->data[1]  = devid;
519         cmd->data[2]  = lower_32_bits(address);
520         cmd->data[3]  = upper_32_bits(address);
521         CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
522         if (s)
523                 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
524 }
525 
526 static void build_inv_all(struct iommu_cmd *cmd)
527 {
528         memset(cmd, 0, sizeof(*cmd));
529         CMD_SET_TYPE(cmd, CMD_INV_ALL);
530 }
531 
532 /*
533  * Writes the command to the IOMMUs command buffer and informs the
534  * hardware about the new command.
535  */
536 static int iommu_queue_command_sync(struct amd_iommu *iommu,
537                                     struct iommu_cmd *cmd,
538                                     bool sync)
539 {
540         u32 left, tail, head, next_tail;
541         unsigned long flags;
542 
543         WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
544 
545 again:
546         spin_lock_irqsave(&iommu->lock, flags);
547 
548         head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
549         tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
550         next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
551         left      = (head - next_tail) % iommu->cmd_buf_size;
552 
553         if (left <= 2) {
554                 struct iommu_cmd sync_cmd;
555                 volatile u64 sem = 0;
556                 int ret;
557 
558                 build_completion_wait(&sync_cmd, (u64)&sem);
559                 copy_cmd_to_buffer(iommu, &sync_cmd, tail);
560 
561                 spin_unlock_irqrestore(&iommu->lock, flags);
562 
563                 if ((ret = wait_on_sem(&sem)) != 0)
564                         return ret;
565 
566                 goto again;
567         }
568 
569         copy_cmd_to_buffer(iommu, cmd, tail);
570 
571         /* We need to sync now to make sure all commands are processed */
572         iommu->need_sync = sync;
573 
574         spin_unlock_irqrestore(&iommu->lock, flags);
575 
576         return 0;
577 }
578 
579 static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
580 {
581         return iommu_queue_command_sync(iommu, cmd, true);
582 }
583 
584 /*
585  * This function queues a completion wait command into the command
586  * buffer of an IOMMU
587  */
588 static int iommu_completion_wait(struct amd_iommu *iommu)
589 {
590         struct iommu_cmd cmd;
591         volatile u64 sem = 0;
592         int ret;
593 
594         if (!iommu->need_sync)
595                 return 0;
596 
597         build_completion_wait(&cmd, (u64)&sem);
598 
599         ret = iommu_queue_command_sync(iommu, &cmd, false);
600         if (ret)
601                 return ret;
602 
603         return wait_on_sem(&sem);
604 }
605 
606 static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
607 {
608         struct iommu_cmd cmd;
609 
610         build_inv_dte(&cmd, devid);
611 
612         return iommu_queue_command(iommu, &cmd);
613 }
614 
615 static void iommu_flush_dte_all(struct amd_iommu *iommu)
616 {
617         u32 devid;
618 
619         for (devid = 0; devid <= 0xffff; ++devid)
620                 iommu_flush_dte(iommu, devid);
621 
622         iommu_completion_wait(iommu);
623 }
624 
625 /*
626  * This function uses heavy locking and may disable irqs for some time. But
627  * this is no issue because it is only called during resume.
628  */
629 static void iommu_flush_tlb_all(struct amd_iommu *iommu)
630 {
631         u32 dom_id;
632 
633         for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
634                 struct iommu_cmd cmd;
635                 build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
636                                       dom_id, 1);
637                 iommu_queue_command(iommu, &cmd);
638         }
639 
640         iommu_completion_wait(iommu);
641 }
642 
643 static void iommu_flush_all(struct amd_iommu *iommu)
644 {
645         struct iommu_cmd cmd;
646 
647         build_inv_all(&cmd);
648 
649         iommu_queue_command(iommu, &cmd);
650         iommu_completion_wait(iommu);
651 }
652 
653 void iommu_flush_all_caches(struct amd_iommu *iommu)
654 {
655         if (iommu_feature(iommu, FEATURE_IA)) {
656                 iommu_flush_all(iommu);
657         } else {
658                 iommu_flush_dte_all(iommu);
659                 iommu_flush_tlb_all(iommu);
660         }
661 }
662 
663 /*
664  * Command send function for flushing on-device TLB
665  */
666 static int device_flush_iotlb(struct device *dev, u64 address, size_t size)
667 {
668         struct pci_dev *pdev = to_pci_dev(dev);
669         struct amd_iommu *iommu;
670         struct iommu_cmd cmd;
671         u16 devid;
672         int qdep;
673 
674         qdep  = pci_ats_queue_depth(pdev);
675         devid = get_device_id(dev);
676         iommu = amd_iommu_rlookup_table[devid];
677 
678         build_inv_iotlb_pages(&cmd, devid, qdep, address, size);
679 
680         return iommu_queue_command(iommu, &cmd);
681 }
682 
683 /*
684  * Command send function for invalidating a device table entry
685  */
686 static int device_flush_dte(struct device *dev)
687 {
688         struct amd_iommu *iommu;
689         struct pci_dev *pdev;
690         u16 devid;
691         int ret;
692 
693         pdev  = to_pci_dev(dev);
694         devid = get_device_id(dev);
695         iommu = amd_iommu_rlookup_table[devid];
696 
697         ret = iommu_flush_dte(iommu, devid);
698         if (ret)
699                 return ret;
700 
701         if (pci_ats_enabled(pdev))
702                 ret = device_flush_iotlb(dev, 0, ~0UL);
703 
704         return ret;
705 }
706 
707 /*
708  * TLB invalidation function which is called from the mapping functions.
709  * It invalidates a single PTE if the range to flush is within a single
710  * page. Otherwise it flushes the whole TLB of the IOMMU.
711  */
712 static void __domain_flush_pages(struct protection_domain *domain,
713                                  u64 address, size_t size, int pde)
714 {
715         struct iommu_dev_data *dev_data;
716         struct iommu_cmd cmd;
717         int ret = 0, i;
718 
719         build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
720 
721         for (i = 0; i < amd_iommus_present; ++i) {
722                 if (!domain->dev_iommu[i])
723                         continue;
724 
725                 /*
726                  * Devices of this domain are behind this IOMMU
727                  * We need a TLB flush
728                  */
729                 ret |= iommu_queue_command(amd_iommus[i], &cmd);
730         }
731 
732         list_for_each_entry(dev_data, &domain->dev_list, list) {
733                 struct pci_dev *pdev = to_pci_dev(dev_data->dev);
734 
735                 if (!pci_ats_enabled(pdev))
736                         continue;
737 
738                 ret |= device_flush_iotlb(dev_data->dev, address, size);
739         }
740 
741         WARN_ON(ret);
742 }
743 
744 static void domain_flush_pages(struct protection_domain *domain,
745                                u64 address, size_t size)
746 {
747         __domain_flush_pages(domain, address, size, 0);
748 }
749 
750 /* Flush the whole IO/TLB for a given protection domain */
751 static void domain_flush_tlb(struct protection_domain *domain)
752 {
753         __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
754 }
755 
756 /* Flush the whole IO/TLB for a given protection domain - including PDE */
757 static void domain_flush_tlb_pde(struct protection_domain *domain)
758 {
759         __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
760 }
761 
762 static void domain_flush_complete(struct protection_domain *domain)
763 {
764         int i;
765 
766         for (i = 0; i < amd_iommus_present; ++i) {
767                 if (!domain->dev_iommu[i])
768                         continue;
769 
770                 /*
771                  * Devices of this domain are behind this IOMMU
772                  * We need to wait for completion of all commands.
773                  */
774                 iommu_completion_wait(amd_iommus[i]);
775         }
776 }
777 
778 
779 /*
780  * This function flushes the DTEs for all devices in domain
781  */
782 static void domain_flush_devices(struct protection_domain *domain)
783 {
784         struct iommu_dev_data *dev_data;
785 
786         list_for_each_entry(dev_data, &domain->dev_list, list)
787                 device_flush_dte(dev_data->dev);
788 }
789 
790 /****************************************************************************
791  *
792  * The functions below are used the create the page table mappings for
793  * unity mapped regions.
794  *
795  ****************************************************************************/
796 
797 /*
798  * This function is used to add another level to an IO page table. Adding
799  * another level increases the size of the address space by 9 bits to a size up
800  * to 64 bits.
801  */
802 static bool increase_address_space(struct protection_domain *domain,
803                                    gfp_t gfp)
804 {
805         u64 *pte;
806 
807         if (domain->mode == PAGE_MODE_6_LEVEL)
808                 /* address space already 64 bit large */
809                 return false;
810 
811         pte = (void *)get_zeroed_page(gfp);
812         if (!pte)
813                 return false;
814 
815         *pte             = PM_LEVEL_PDE(domain->mode,
816                                         virt_to_phys(domain->pt_root));
817         domain->pt_root  = pte;
818         domain->mode    += 1;
819         domain->updated  = true;
820 
821         return true;
822 }
823 
824 static u64 *alloc_pte(struct protection_domain *domain,
825                       unsigned long address,
826                       unsigned long page_size,
827                       u64 **pte_page,
828                       gfp_t gfp)
829 {
830         int level, end_lvl;
831         u64 *pte, *page;
832 
833         BUG_ON(!is_power_of_2(page_size));
834 
835         while (address > PM_LEVEL_SIZE(domain->mode))
836                 increase_address_space(domain, gfp);
837 
838         level   = domain->mode - 1;
839         pte     = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
840         address = PAGE_SIZE_ALIGN(address, page_size);
841         end_lvl = PAGE_SIZE_LEVEL(page_size);
842 
843         while (level > end_lvl) {
844                 if (!IOMMU_PTE_PRESENT(*pte)) {
845                         page = (u64 *)get_zeroed_page(gfp);
846                         if (!page)
847                                 return NULL;
848                         *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
849                 }
850 
851                 /* No level skipping support yet */
852                 if (PM_PTE_LEVEL(*pte) != level)
853                         return NULL;
854 
855                 level -= 1;
856 
857                 pte = IOMMU_PTE_PAGE(*pte);
858 
859                 if (pte_page && level == end_lvl)
860                         *pte_page = pte;
861 
862                 pte = &pte[PM_LEVEL_INDEX(level, address)];
863         }
864 
865         return pte;
866 }
867 
868 /*
869  * This function checks if there is a PTE for a given dma address. If
870  * there is one, it returns the pointer to it.
871  */
872 static u64 *fetch_pte(struct protection_domain *domain, unsigned long address)
873 {
874         int level;
875         u64 *pte;
876 
877         if (address > PM_LEVEL_SIZE(domain->mode))
878                 return NULL;
879 
880         level   =  domain->mode - 1;
881         pte     = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
882 
883         while (level > 0) {
884 
885                 /* Not Present */
886                 if (!IOMMU_PTE_PRESENT(*pte))
887                         return NULL;
888 
889                 /* Large PTE */
890                 if (PM_PTE_LEVEL(*pte) == 0x07) {
891                         unsigned long pte_mask, __pte;
892 
893                         /*
894                          * If we have a series of large PTEs, make
895                          * sure to return a pointer to the first one.
896                          */
897                         pte_mask = PTE_PAGE_SIZE(*pte);
898                         pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
899                         __pte    = ((unsigned long)pte) & pte_mask;
900 
901                         return (u64 *)__pte;
902                 }
903 
904                 /* No level skipping support yet */
905                 if (PM_PTE_LEVEL(*pte) != level)
906                         return NULL;
907 
908                 level -= 1;
909 
910                 /* Walk to the next level */
911                 pte = IOMMU_PTE_PAGE(*pte);
912                 pte = &pte[PM_LEVEL_INDEX(level, address)];
913         }
914 
915         return pte;
916 }
917 
918 /*
919  * Generic mapping functions. It maps a physical address into a DMA
920  * address space. It allocates the page table pages if necessary.
921  * In the future it can be extended to a generic mapping function
922  * supporting all features of AMD IOMMU page tables like level skipping
923  * and full 64 bit address spaces.
924  */
925 static int iommu_map_page(struct protection_domain *dom,
926                           unsigned long bus_addr,
927                           unsigned long phys_addr,
928                           int prot,
929                           unsigned long page_size)
930 {
931         u64 __pte, *pte;
932         int i, count;
933 
934         if (!(prot & IOMMU_PROT_MASK))
935                 return -EINVAL;
936 
937         bus_addr  = PAGE_ALIGN(bus_addr);
938         phys_addr = PAGE_ALIGN(phys_addr);
939         count     = PAGE_SIZE_PTE_COUNT(page_size);
940         pte       = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL);
941 
942         for (i = 0; i < count; ++i)
943                 if (IOMMU_PTE_PRESENT(pte[i]))
944                         return -EBUSY;
945 
946         if (page_size > PAGE_SIZE) {
947                 __pte = PAGE_SIZE_PTE(phys_addr, page_size);
948                 __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
949         } else
950                 __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
951 
952         if (prot & IOMMU_PROT_IR)
953                 __pte |= IOMMU_PTE_IR;
954         if (prot & IOMMU_PROT_IW)
955                 __pte |= IOMMU_PTE_IW;
956 
957         for (i = 0; i < count; ++i)
958                 pte[i] = __pte;
959 
960         update_domain(dom);
961 
962         return 0;
963 }
964 
965 static unsigned long iommu_unmap_page(struct protection_domain *dom,
966                                       unsigned long bus_addr,
967                                       unsigned long page_size)
968 {
969         unsigned long long unmap_size, unmapped;
970         u64 *pte;
971 
972         BUG_ON(!is_power_of_2(page_size));
973 
974         unmapped = 0;
975 
976         while (unmapped < page_size) {
977 
978                 pte = fetch_pte(dom, bus_addr);
979 
980                 if (!pte) {
981                         /*
982                          * No PTE for this address
983                          * move forward in 4kb steps
984                          */
985                         unmap_size = PAGE_SIZE;
986                 } else if (PM_PTE_LEVEL(*pte) == 0) {
987                         /* 4kb PTE found for this address */
988                         unmap_size = PAGE_SIZE;
989                         *pte       = 0ULL;
990                 } else {
991                         int count, i;
992 
993                         /* Large PTE found which maps this address */
994                         unmap_size = PTE_PAGE_SIZE(*pte);
995                         count      = PAGE_SIZE_PTE_COUNT(unmap_size);
996                         for (i = 0; i < count; i++)
997                                 pte[i] = 0ULL;
998                 }
999 
1000                 bus_addr  = (bus_addr & ~(unmap_size - 1)) + unmap_size;
1001                 unmapped += unmap_size;
1002         }
1003 
1004         BUG_ON(!is_power_of_2(unmapped));
1005 
1006         return unmapped;
1007 }
1008 
1009 /*
1010  * This function checks if a specific unity mapping entry is needed for
1011  * this specific IOMMU.
1012  */
1013 static int iommu_for_unity_map(struct amd_iommu *iommu,
1014                                struct unity_map_entry *entry)
1015 {
1016         u16 bdf, i;
1017 
1018         for (i = entry->devid_start; i <= entry->devid_end; ++i) {
1019                 bdf = amd_iommu_alias_table[i];
1020                 if (amd_iommu_rlookup_table[bdf] == iommu)
1021                         return 1;
1022         }
1023 
1024         return 0;
1025 }
1026 
1027 /*
1028  * This function actually applies the mapping to the page table of the
1029  * dma_ops domain.
1030  */
1031 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
1032                              struct unity_map_entry *e)
1033 {
1034         u64 addr;
1035         int ret;
1036 
1037         for (addr = e->address_start; addr < e->address_end;
1038              addr += PAGE_SIZE) {
1039                 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
1040                                      PAGE_SIZE);
1041                 if (ret)
1042                         return ret;
1043                 /*
1044                  * if unity mapping is in aperture range mark the page
1045                  * as allocated in the aperture
1046                  */
1047                 if (addr < dma_dom->aperture_size)
1048                         __set_bit(addr >> PAGE_SHIFT,
1049                                   dma_dom->aperture[0]->bitmap);
1050         }
1051 
1052         return 0;
1053 }
1054 
1055 /*
1056  * Init the unity mappings for a specific IOMMU in the system
1057  *
1058  * Basically iterates over all unity mapping entries and applies them to
1059  * the default domain DMA of that IOMMU if necessary.
1060  */
1061 static int iommu_init_unity_mappings(struct amd_iommu *iommu)
1062 {
1063         struct unity_map_entry *entry;
1064         int ret;
1065 
1066         list_for_each_entry(entry, &amd_iommu_unity_map, list) {
1067                 if (!iommu_for_unity_map(iommu, entry))
1068                         continue;
1069                 ret = dma_ops_unity_map(iommu->default_dom, entry);
1070                 if (ret)
1071                         return ret;
1072         }
1073 
1074         return 0;
1075 }
1076 
1077 /*
1078  * Inits the unity mappings required for a specific device
1079  */
1080 static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
1081                                           u16 devid)
1082 {
1083         struct unity_map_entry *e;
1084         int ret;
1085 
1086         list_for_each_entry(e, &amd_iommu_unity_map, list) {
1087                 if (!(devid >= e->devid_start && devid <= e->devid_end))
1088                         continue;
1089                 ret = dma_ops_unity_map(dma_dom, e);
1090                 if (ret)
1091                         return ret;
1092         }
1093 
1094         return 0;
1095 }
1096 
1097 /****************************************************************************
1098  *
1099  * The next functions belong to the address allocator for the dma_ops
1100  * interface functions. They work like the allocators in the other IOMMU
1101  * drivers. Its basically a bitmap which marks the allocated pages in
1102  * the aperture. Maybe it could be enhanced in the future to a more
1103  * efficient allocator.
1104  *
1105  ****************************************************************************/
1106 
1107 /*
1108  * The address allocator core functions.
1109  *
1110  * called with domain->lock held
1111  */
1112 
1113 /*
1114  * Used to reserve address ranges in the aperture (e.g. for exclusion
1115  * ranges.
1116  */
1117 static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
1118                                       unsigned long start_page,
1119                                       unsigned int pages)
1120 {
1121         unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
1122 
1123         if (start_page + pages > last_page)
1124                 pages = last_page - start_page;
1125 
1126         for (i = start_page; i < start_page + pages; ++i) {
1127                 int index = i / APERTURE_RANGE_PAGES;
1128                 int page  = i % APERTURE_RANGE_PAGES;
1129                 __set_bit(page, dom->aperture[index]->bitmap);
1130         }
1131 }
1132 
1133 /*
1134  * This function is used to add a new aperture range to an existing
1135  * aperture in case of dma_ops domain allocation or address allocation
1136  * failure.
1137  */
1138 static int alloc_new_range(struct dma_ops_domain *dma_dom,
1139                            bool populate, gfp_t gfp)
1140 {
1141         int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
1142         struct amd_iommu *iommu;
1143         unsigned long i;
1144 
1145 #ifdef CONFIG_IOMMU_STRESS
1146         populate = false;
1147 #endif
1148 
1149         if (index >= APERTURE_MAX_RANGES)
1150                 return -ENOMEM;
1151 
1152         dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
1153         if (!dma_dom->aperture[index])
1154                 return -ENOMEM;
1155 
1156         dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
1157         if (!dma_dom->aperture[index]->bitmap)
1158                 goto out_free;
1159 
1160         dma_dom->aperture[index]->offset = dma_dom->aperture_size;
1161 
1162         if (populate) {
1163                 unsigned long address = dma_dom->aperture_size;
1164                 int i, num_ptes = APERTURE_RANGE_PAGES / 512;
1165                 u64 *pte, *pte_page;
1166 
1167                 for (i = 0; i < num_ptes; ++i) {
1168                         pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE,
1169                                         &pte_page, gfp);
1170                         if (!pte)
1171                                 goto out_free;
1172 
1173                         dma_dom->aperture[index]->pte_pages[i] = pte_page;
1174 
1175                         address += APERTURE_RANGE_SIZE / 64;
1176                 }
1177         }
1178 
1179         dma_dom->aperture_size += APERTURE_RANGE_SIZE;
1180 
1181         /* Initialize the exclusion range if necessary */
1182         for_each_iommu(iommu) {
1183                 if (iommu->exclusion_start &&
1184                     iommu->exclusion_start >= dma_dom->aperture[index]->offset
1185                     && iommu->exclusion_start < dma_dom->aperture_size) {
1186                         unsigned long startpage;
1187                         int pages = iommu_num_pages(iommu->exclusion_start,
1188                                                     iommu->exclusion_length,
1189                                                     PAGE_SIZE);
1190                         startpage = iommu->exclusion_start >> PAGE_SHIFT;
1191                         dma_ops_reserve_addresses(dma_dom, startpage, pages);
1192                 }
1193         }
1194 
1195         /*
1196          * Check for areas already mapped as present in the new aperture
1197          * range and mark those pages as reserved in the allocator. Such
1198          * mappings may already exist as a result of requested unity
1199          * mappings for devices.
1200          */
1201         for (i = dma_dom->aperture[index]->offset;
1202              i < dma_dom->aperture_size;
1203              i += PAGE_SIZE) {
1204                 u64 *pte = fetch_pte(&dma_dom->domain, i);
1205                 if (!pte || !IOMMU_PTE_PRESENT(*pte))
1206                         continue;
1207 
1208                 dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT, 1);
1209         }
1210 
1211         update_domain(&dma_dom->domain);
1212 
1213         return 0;
1214 
1215 out_free:
1216         update_domain(&dma_dom->domain);
1217 
1218         free_page((unsigned long)dma_dom->aperture[index]->bitmap);
1219 
1220         kfree(dma_dom->aperture[index]);
1221         dma_dom->aperture[index] = NULL;
1222 
1223         return -ENOMEM;
1224 }
1225 
1226 static unsigned long dma_ops_area_alloc(struct device *dev,
1227                                         struct dma_ops_domain *dom,
1228                                         unsigned int pages,
1229                                         unsigned long align_mask,
1230                                         u64 dma_mask,
1231                                         unsigned long start)
1232 {
1233         unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
1234         int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
1235         int i = start >> APERTURE_RANGE_SHIFT;
1236         unsigned long boundary_size;
1237         unsigned long address = -1;
1238         unsigned long limit;
1239 
1240         next_bit >>= PAGE_SHIFT;
1241 
1242         boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
1243                         PAGE_SIZE) >> PAGE_SHIFT;
1244 
1245         for (;i < max_index; ++i) {
1246                 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
1247 
1248                 if (dom->aperture[i]->offset >= dma_mask)
1249                         break;
1250 
1251                 limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
1252                                                dma_mask >> PAGE_SHIFT);
1253 
1254                 address = iommu_area_alloc(dom->aperture[i]->bitmap,
1255                                            limit, next_bit, pages, 0,
1256                                             boundary_size, align_mask);
1257                 if (address != -1) {
1258                         address = dom->aperture[i]->offset +
1259                                   (address << PAGE_SHIFT);
1260                         dom->next_address = address + (pages << PAGE_SHIFT);
1261                         break;
1262                 }
1263 
1264                 next_bit = 0;
1265         }
1266 
1267         return address;
1268 }
1269 
1270 static unsigned long dma_ops_alloc_addresses(struct device *dev,
1271                                              struct dma_ops_domain *dom,
1272                                              unsigned int pages,
1273                                              unsigned long align_mask,
1274                                              u64 dma_mask)
1275 {
1276         unsigned long address;
1277 
1278 #ifdef CONFIG_IOMMU_STRESS
1279         dom->next_address = 0;
1280         dom->need_flush = true;
1281 #endif
1282 
1283         address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1284                                      dma_mask, dom->next_address);
1285 
1286         if (address == -1) {
1287                 dom->next_address = 0;
1288                 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1289                                              dma_mask, 0);
1290                 dom->need_flush = true;
1291         }
1292 
1293         if (unlikely(address == -1))
1294                 address = DMA_ERROR_CODE;
1295 
1296         WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
1297 
1298         return address;
1299 }
1300 
1301 /*
1302  * The address free function.
1303  *
1304  * called with domain->lock held
1305  */
1306 static void dma_ops_free_addresses(struct dma_ops_domain *dom,
1307                                    unsigned long address,
1308                                    unsigned int pages)
1309 {
1310         unsigned i = address >> APERTURE_RANGE_SHIFT;
1311         struct aperture_range *range = dom->aperture[i];
1312 
1313         BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
1314 
1315 #ifdef CONFIG_IOMMU_STRESS
1316         if (i < 4)
1317                 return;
1318 #endif
1319 
1320         if (address >= dom->next_address)
1321                 dom->need_flush = true;
1322 
1323         address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
1324 
1325         bitmap_clear(range->bitmap, address, pages);
1326 
1327 }
1328 
1329 /****************************************************************************
1330  *
1331  * The next functions belong to the domain allocation. A domain is
1332  * allocated for every IOMMU as the default domain. If device isolation
1333  * is enabled, every device get its own domain. The most important thing
1334  * about domains is the page table mapping the DMA address space they
1335  * contain.
1336  *
1337  ****************************************************************************/
1338 
1339 /*
1340  * This function adds a protection domain to the global protection domain list
1341  */
1342 static void add_domain_to_list(struct protection_domain *domain)
1343 {
1344         unsigned long flags;
1345 
1346         spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1347         list_add(&domain->list, &amd_iommu_pd_list);
1348         spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1349 }
1350 
1351 /*
1352  * This function removes a protection domain to the global
1353  * protection domain list
1354  */
1355 static void del_domain_from_list(struct protection_domain *domain)
1356 {
1357         unsigned long flags;
1358 
1359         spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1360         list_del(&domain->list);
1361         spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1362 }
1363 
1364 static u16 domain_id_alloc(void)
1365 {
1366         unsigned long flags;
1367         int id;
1368 
1369         write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1370         id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
1371         BUG_ON(id == 0);
1372         if (id > 0 && id < MAX_DOMAIN_ID)
1373                 __set_bit(id, amd_iommu_pd_alloc_bitmap);
1374         else
1375                 id = 0;
1376         write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1377 
1378         return id;
1379 }
1380 
1381 static void domain_id_free(int id)
1382 {
1383         unsigned long flags;
1384 
1385         write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1386         if (id > 0 && id < MAX_DOMAIN_ID)
1387                 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
1388         write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1389 }
1390 
1391 static void free_pagetable(struct protection_domain *domain)
1392 {
1393         int i, j;
1394         u64 *p1, *p2, *p3;
1395 
1396         p1 = domain->pt_root;
1397 
1398         if (!p1)
1399                 return;
1400 
1401         for (i = 0; i < 512; ++i) {
1402                 if (!IOMMU_PTE_PRESENT(p1[i]))
1403                         continue;
1404 
1405                 p2 = IOMMU_PTE_PAGE(p1[i]);
1406                 for (j = 0; j < 512; ++j) {
1407                         if (!IOMMU_PTE_PRESENT(p2[j]))
1408                                 continue;
1409                         p3 = IOMMU_PTE_PAGE(p2[j]);
1410                         free_page((unsigned long)p3);
1411                 }
1412 
1413                 free_page((unsigned long)p2);
1414         }
1415 
1416         free_page((unsigned long)p1);
1417 
1418         domain->pt_root = NULL;
1419 }
1420 
1421 /*
1422  * Free a domain, only used if something went wrong in the
1423  * allocation path and we need to free an already allocated page table
1424  */
1425 static void dma_ops_domain_free(struct dma_ops_domain *dom)
1426 {
1427         int i;
1428 
1429         if (!dom)
1430                 return;
1431 
1432         del_domain_from_list(&dom->domain);
1433 
1434         free_pagetable(&dom->domain);
1435 
1436         for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
1437                 if (!dom->aperture[i])
1438                         continue;
1439                 free_page((unsigned long)dom->aperture[i]->bitmap);
1440                 kfree(dom->aperture[i]);
1441         }
1442 
1443         kfree(dom);
1444 }
1445 
1446 /*
1447  * Allocates a new protection domain usable for the dma_ops functions.
1448  * It also initializes the page table and the address allocator data
1449  * structures required for the dma_ops interface
1450  */
1451 static struct dma_ops_domain *dma_ops_domain_alloc(void)
1452 {
1453         struct dma_ops_domain *dma_dom;
1454 
1455         dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
1456         if (!dma_dom)
1457                 return NULL;
1458 
1459         spin_lock_init(&dma_dom->domain.lock);
1460 
1461         dma_dom->domain.id = domain_id_alloc();
1462         if (dma_dom->domain.id == 0)
1463                 goto free_dma_dom;
1464         INIT_LIST_HEAD(&dma_dom->domain.dev_list);
1465         dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
1466         dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1467         dma_dom->domain.flags = PD_DMA_OPS_MASK;
1468         dma_dom->domain.priv = dma_dom;
1469         if (!dma_dom->domain.pt_root)
1470                 goto free_dma_dom;
1471 
1472         dma_dom->need_flush = false;
1473         dma_dom->target_dev = 0xffff;
1474 
1475         add_domain_to_list(&dma_dom->domain);
1476 
1477         if (alloc_new_range(dma_dom, true, GFP_KERNEL))
1478                 goto free_dma_dom;
1479 
1480         /*
1481          * mark the first page as allocated so we never return 0 as
1482          * a valid dma-address. So we can use 0 as error value
1483          */
1484         dma_dom->aperture[0]->bitmap[0] = 1;
1485         dma_dom->next_address = 0;
1486 
1487 
1488         return dma_dom;
1489 
1490 free_dma_dom:
1491         dma_ops_domain_free(dma_dom);
1492 
1493         return NULL;
1494 }
1495 
1496 /*
1497  * little helper function to check whether a given protection domain is a
1498  * dma_ops domain
1499  */
1500 static bool dma_ops_domain(struct protection_domain *domain)
1501 {
1502         return domain->flags & PD_DMA_OPS_MASK;
1503 }
1504 
1505 static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
1506 {
1507         u64 pte_root = virt_to_phys(domain->pt_root);
1508         u32 flags = 0;
1509 
1510         pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1511                     << DEV_ENTRY_MODE_SHIFT;
1512         pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
1513 
1514         if (ats)
1515                 flags |= DTE_FLAG_IOTLB;
1516 
1517         amd_iommu_dev_table[devid].data[3] |= flags;
1518         amd_iommu_dev_table[devid].data[2]  = domain->id;
1519         amd_iommu_dev_table[devid].data[1]  = upper_32_bits(pte_root);
1520         amd_iommu_dev_table[devid].data[0]  = lower_32_bits(pte_root);
1521 }
1522 
1523 static void clear_dte_entry(u16 devid)
1524 {
1525         /* remove entry from the device table seen by the hardware */
1526         amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1527         amd_iommu_dev_table[devid].data[1] = 0;
1528         amd_iommu_dev_table[devid].data[2] = 0;
1529 
1530         amd_iommu_apply_erratum_63(devid);
1531 }
1532 
1533 static void do_attach(struct device *dev, struct protection_domain *domain)
1534 {
1535         struct iommu_dev_data *dev_data;
1536         struct amd_iommu *iommu;
1537         struct pci_dev *pdev;
1538         bool ats = false;
1539         u16 devid;
1540 
1541         devid    = get_device_id(dev);
1542         iommu    = amd_iommu_rlookup_table[devid];
1543         dev_data = get_dev_data(dev);
1544         pdev     = to_pci_dev(dev);
1545 
1546         if (amd_iommu_iotlb_sup)
1547                 ats = pci_ats_enabled(pdev);
1548 
1549         /* Update data structures */
1550         dev_data->domain = domain;
1551         list_add(&dev_data->list, &domain->dev_list);
1552         set_dte_entry(devid, domain, ats);
1553 
1554         /* Do reference counting */
1555         domain->dev_iommu[iommu->index] += 1;
1556         domain->dev_cnt                 += 1;
1557 
1558         /* Flush the DTE entry */
1559         device_flush_dte(dev);
1560 }
1561 
1562 static void do_detach(struct device *dev)
1563 {
1564         struct iommu_dev_data *dev_data;
1565         struct amd_iommu *iommu;
1566         u16 devid;
1567 
1568         devid    = get_device_id(dev);
1569         iommu    = amd_iommu_rlookup_table[devid];
1570         dev_data = get_dev_data(dev);
1571 
1572         /* decrease reference counters */
1573         dev_data->domain->dev_iommu[iommu->index] -= 1;
1574         dev_data->domain->dev_cnt                 -= 1;
1575 
1576         /* Update data structures */
1577         dev_data->domain = NULL;
1578         list_del(&dev_data->list);
1579         clear_dte_entry(devid);
1580 
1581         /* Flush the DTE entry */
1582         device_flush_dte(dev);
1583 }
1584 
1585 /*
1586  * If a device is not yet associated with a domain, this function does
1587  * assigns it visible for the hardware
1588  */
1589 static int __attach_device(struct device *dev,
1590                            struct protection_domain *domain)
1591 {
1592         struct iommu_dev_data *dev_data, *alias_data;
1593         int ret;
1594 
1595         dev_data   = get_dev_data(dev);
1596         alias_data = get_dev_data(dev_data->alias);
1597 
1598         if (!alias_data)
1599                 return -EINVAL;
1600 
1601         /* lock domain */
1602         spin_lock(&domain->lock);
1603 
1604         /* Some sanity checks */
1605         ret = -EBUSY;
1606         if (alias_data->domain != NULL &&
1607             alias_data->domain != domain)
1608                 goto out_unlock;
1609 
1610         if (dev_data->domain != NULL &&
1611             dev_data->domain != domain)
1612                 goto out_unlock;
1613 
1614         /* Do real assignment */
1615         if (dev_data->alias != dev) {
1616                 alias_data = get_dev_data(dev_data->alias);
1617                 if (alias_data->domain == NULL)
1618                         do_attach(dev_data->alias, domain);
1619 
1620                 atomic_inc(&alias_data->bind);
1621         }
1622 
1623         if (dev_data->domain == NULL)
1624                 do_attach(dev, domain);
1625 
1626         atomic_inc(&dev_data->bind);
1627 
1628         ret = 0;
1629 
1630 out_unlock:
1631 
1632         /* ready */
1633         spin_unlock(&domain->lock);
1634 
1635         return ret;
1636 }
1637 
1638 /*
1639  * If a device is not yet associated with a domain, this function does
1640  * assigns it visible for the hardware
1641  */
1642 static int attach_device(struct device *dev,
1643                          struct protection_domain *domain)
1644 {
1645         struct pci_dev *pdev = to_pci_dev(dev);
1646         unsigned long flags;
1647         int ret;
1648 
1649         if (amd_iommu_iotlb_sup)
1650                 pci_enable_ats(pdev, PAGE_SHIFT);
1651 
1652         write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1653         ret = __attach_device(dev, domain);
1654         write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1655 
1656         /*
1657          * We might boot into a crash-kernel here. The crashed kernel
1658          * left the caches in the IOMMU dirty. So we have to flush
1659          * here to evict all dirty stuff.
1660          */
1661         domain_flush_tlb_pde(domain);
1662 
1663         return ret;
1664 }
1665 
1666 /*
1667  * Removes a device from a protection domain (unlocked)
1668  */
1669 static void __detach_device(struct device *dev)
1670 {
1671         struct iommu_dev_data *dev_data = get_dev_data(dev);
1672         struct iommu_dev_data *alias_data;
1673         struct protection_domain *domain;
1674         unsigned long flags;
1675 
1676         BUG_ON(!dev_data->domain);
1677 
1678         domain = dev_data->domain;
1679 
1680         spin_lock_irqsave(&domain->lock, flags);
1681 
1682         if (dev_data->alias != dev) {
1683                 alias_data = get_dev_data(dev_data->alias);
1684                 if (atomic_dec_and_test(&alias_data->bind))
1685                         do_detach(dev_data->alias);
1686         }
1687 
1688         if (atomic_dec_and_test(&dev_data->bind))
1689                 do_detach(dev);
1690 
1691         spin_unlock_irqrestore(&domain->lock, flags);
1692 
1693         /*
1694          * If we run in passthrough mode the device must be assigned to the
1695          * passthrough domain if it is detached from any other domain.
1696          * Make sure we can deassign from the pt_domain itself.
1697          */
1698         if (iommu_pass_through &&
1699             (dev_data->domain == NULL && domain != pt_domain))
1700                 __attach_device(dev, pt_domain);
1701 }
1702 
1703 /*
1704  * Removes a device from a protection domain (with devtable_lock held)
1705  */
1706 static void detach_device(struct device *dev)
1707 {
1708         struct pci_dev *pdev = to_pci_dev(dev);
1709         unsigned long flags;
1710 
1711         /* lock device table */
1712         write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1713         __detach_device(dev);
1714         write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1715 
1716         if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev))
1717                 pci_disable_ats(pdev);
1718 }
1719 
1720 /*
1721  * Find out the protection domain structure for a given PCI device. This
1722  * will give us the pointer to the page table root for example.
1723  */
1724 static struct protection_domain *domain_for_device(struct device *dev)
1725 {
1726         struct protection_domain *dom;
1727         struct iommu_dev_data *dev_data, *alias_data;
1728         unsigned long flags;
1729         u16 devid;
1730 
1731         devid      = get_device_id(dev);
1732         dev_data   = get_dev_data(dev);
1733         alias_data = get_dev_data(dev_data->alias);
1734         if (!alias_data)
1735                 return NULL;
1736 
1737         read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1738         dom = dev_data->domain;
1739         if (dom == NULL &&
1740             alias_data->domain != NULL) {
1741                 __attach_device(dev, alias_data->domain);
1742                 dom = alias_data->domain;
1743         }
1744 
1745         read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1746 
1747         return dom;
1748 }
1749 
1750 static int device_change_notifier(struct notifier_block *nb,
1751                                   unsigned long action, void *data)
1752 {
1753         struct device *dev = data;
1754         u16 devid;
1755         struct protection_domain *domain;
1756         struct dma_ops_domain *dma_domain;
1757         struct amd_iommu *iommu;
1758         unsigned long flags;
1759 
1760         if (!check_device(dev))
1761                 return 0;
1762 
1763         devid  = get_device_id(dev);
1764         iommu  = amd_iommu_rlookup_table[devid];
1765 
1766         switch (action) {
1767         case BUS_NOTIFY_UNBOUND_DRIVER:
1768 
1769                 domain = domain_for_device(dev);
1770 
1771                 if (!domain)
1772                         goto out;
1773                 if (iommu_pass_through)
1774                         break;
1775                 detach_device(dev);
1776                 break;
1777         case BUS_NOTIFY_ADD_DEVICE:
1778 
1779                 iommu_init_device(dev);
1780 
1781                 domain = domain_for_device(dev);
1782 
1783                 dma_domain = find_protection_domain(devid);
1784                 if (!dma_domain) {
1785                         /* allocate a protection domain if a device is added */
1786                         dma_domain = dma_ops_domain_alloc();
1787                         if (!dma_domain)
1788                                 goto out;
1789                         dma_domain->target_dev = devid;
1790 
1791                         spin_lock_irqsave(&iommu_pd_list_lock, flags);
1792                         list_add_tail(&dma_domain->list, &iommu_pd_list);
1793                         spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1794                 }
1795 
1796                 dev->archdata.dma_ops = &amd_iommu_dma_ops;
1797 
1798                 break;
1799         case BUS_NOTIFY_DEL_DEVICE:
1800 
1801                 iommu_uninit_device(dev);
1802 
1803         default:
1804                 goto out;
1805         }
1806 
1807         device_flush_dte(dev);
1808         iommu_completion_wait(iommu);
1809 
1810 out:
1811         return 0;
1812 }
1813 
1814 static struct notifier_block device_nb = {
1815         .notifier_call = device_change_notifier,
1816 };
1817 
1818 void amd_iommu_init_notifier(void)
1819 {
1820         bus_register_notifier(&pci_bus_type, &device_nb);
1821 }
1822 
1823 /*****************************************************************************
1824  *
1825  * The next functions belong to the dma_ops mapping/unmapping code.
1826  *
1827  *****************************************************************************/
1828 
1829 /*
1830  * In the dma_ops path we only have the struct device. This function
1831  * finds the corresponding IOMMU, the protection domain and the
1832  * requestor id for a given device.
1833  * If the device is not yet associated with a domain this is also done
1834  * in this function.
1835  */
1836 static struct protection_domain *get_domain(struct device *dev)
1837 {
1838         struct protection_domain *domain;
1839         struct dma_ops_domain *dma_dom;
1840         u16 devid = get_device_id(dev);
1841 
1842         if (!check_device(dev))
1843                 return ERR_PTR(-EINVAL);
1844 
1845         domain = domain_for_device(dev);
1846         if (domain != NULL && !dma_ops_domain(domain))
1847                 return ERR_PTR(-EBUSY);
1848 
1849         if (domain != NULL)
1850                 return domain;
1851 
1852         /* Device not bount yet - bind it */
1853         dma_dom = find_protection_domain(devid);
1854         if (!dma_dom)
1855                 dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
1856         attach_device(dev, &dma_dom->domain);
1857         DUMP_printk("Using protection domain %d for device %s\n",
1858                     dma_dom->domain.id, dev_name(dev));
1859 
1860         return &dma_dom->domain;
1861 }
1862 
1863 static void update_device_table(struct protection_domain *domain)
1864 {
1865         struct iommu_dev_data *dev_data;
1866 
1867         list_for_each_entry(dev_data, &domain->dev_list, list) {
1868                 struct pci_dev *pdev = to_pci_dev(dev_data->dev);
1869                 u16 devid = get_device_id(dev_data->dev);
1870                 set_dte_entry(devid, domain, pci_ats_enabled(pdev));
1871         }
1872 }
1873 
1874 static void update_domain(struct protection_domain *domain)
1875 {
1876         if (!domain->updated)
1877                 return;
1878 
1879         update_device_table(domain);
1880 
1881         domain_flush_devices(domain);
1882         domain_flush_tlb_pde(domain);
1883 
1884         domain->updated = false;
1885 }
1886 
1887 /*
1888  * This function fetches the PTE for a given address in the aperture
1889  */
1890 static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1891                             unsigned long address)
1892 {
1893         struct aperture_range *aperture;
1894         u64 *pte, *pte_page;
1895 
1896         aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1897         if (!aperture)
1898                 return NULL;
1899 
1900         pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1901         if (!pte) {
1902                 pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page,
1903                                 GFP_ATOMIC);
1904                 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1905         } else
1906                 pte += PM_LEVEL_INDEX(0, address);
1907 
1908         update_domain(&dom->domain);
1909 
1910         return pte;
1911 }
1912 
1913 /*
1914  * This is the generic map function. It maps one 4kb page at paddr to
1915  * the given address in the DMA address space for the domain.
1916  */
1917 static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
1918                                      unsigned long address,
1919                                      phys_addr_t paddr,
1920                                      int direction)
1921 {
1922         u64 *pte, __pte;
1923 
1924         WARN_ON(address > dom->aperture_size);
1925 
1926         paddr &= PAGE_MASK;
1927 
1928         pte  = dma_ops_get_pte(dom, address);
1929         if (!pte)
1930                 return DMA_ERROR_CODE;
1931 
1932         __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1933 
1934         if (direction == DMA_TO_DEVICE)
1935                 __pte |= IOMMU_PTE_IR;
1936         else if (direction == DMA_FROM_DEVICE)
1937                 __pte |= IOMMU_PTE_IW;
1938         else if (direction == DMA_BIDIRECTIONAL)
1939                 __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
1940 
1941         WARN_ON(*pte);
1942 
1943         *pte = __pte;
1944 
1945         return (dma_addr_t)address;
1946 }
1947 
1948 /*
1949  * The generic unmapping function for on page in the DMA address space.
1950  */
1951 static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
1952                                  unsigned long address)
1953 {
1954         struct aperture_range *aperture;
1955         u64 *pte;
1956 
1957         if (address >= dom->aperture_size)
1958                 return;
1959 
1960         aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1961         if (!aperture)
1962                 return;
1963 
1964         pte  = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1965         if (!pte)
1966                 return;
1967 
1968         pte += PM_LEVEL_INDEX(0, address);
1969 
1970         WARN_ON(!*pte);
1971 
1972         *pte = 0ULL;
1973 }
1974 
1975 /*
1976  * This function contains common code for mapping of a physically
1977  * contiguous memory region into DMA address space. It is used by all
1978  * mapping functions provided with this IOMMU driver.
1979  * Must be called with the domain lock held.
1980  */
1981 static dma_addr_t __map_single(struct device *dev,
1982                                struct dma_ops_domain *dma_dom,
1983                                phys_addr_t paddr,
1984                                size_t size,
1985                                int dir,
1986                                bool align,
1987                                u64 dma_mask)
1988 {
1989         dma_addr_t offset = paddr & ~PAGE_MASK;
1990         dma_addr_t address, start, ret;
1991         unsigned int pages;
1992         unsigned long align_mask = 0;
1993         int i;
1994 
1995         pages = iommu_num_pages(paddr, size, PAGE_SIZE);
1996         paddr &= PAGE_MASK;
1997 
1998         INC_STATS_COUNTER(total_map_requests);
1999 
2000         if (pages > 1)
2001                 INC_STATS_COUNTER(cross_page);
2002 
2003         if (align)
2004                 align_mask = (1UL << get_order(size)) - 1;
2005 
2006 retry:
2007         address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
2008                                           dma_mask);
2009         if (unlikely(address == DMA_ERROR_CODE)) {
2010                 /*
2011                  * setting next_address here will let the address
2012                  * allocator only scan the new allocated range in the
2013                  * first run. This is a small optimization.
2014                  */
2015                 dma_dom->next_address = dma_dom->aperture_size;
2016 
2017                 if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
2018                         goto out;
2019 
2020                 /*
2021                  * aperture was successfully enlarged by 128 MB, try
2022                  * allocation again
2023                  */
2024                 goto retry;
2025         }
2026 
2027         start = address;
2028         for (i = 0; i < pages; ++i) {
2029                 ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
2030                 if (ret == DMA_ERROR_CODE)
2031                         goto out_unmap;
2032 
2033                 paddr += PAGE_SIZE;
2034                 start += PAGE_SIZE;
2035         }
2036         address += offset;
2037 
2038         ADD_STATS_COUNTER(alloced_io_mem, size);
2039 
2040         if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
2041                 domain_flush_tlb(&dma_dom->domain);
2042                 dma_dom->need_flush = false;
2043         } else if (unlikely(amd_iommu_np_cache))
2044                 domain_flush_pages(&dma_dom->domain, address, size);
2045 
2046 out:
2047         return address;
2048 
2049 out_unmap:
2050 
2051         for (--i; i >= 0; --i) {
2052                 start -= PAGE_SIZE;
2053                 dma_ops_domain_unmap(dma_dom, start);
2054         }
2055 
2056         dma_ops_free_addresses(dma_dom, address, pages);
2057 
2058         return DMA_ERROR_CODE;
2059 }
2060 
2061 /*
2062  * Does the reverse of the __map_single function. Must be called with
2063  * the domain lock held too
2064  */
2065 static void __unmap_single(struct dma_ops_domain *dma_dom,
2066                            dma_addr_t dma_addr,
2067                            size_t size,
2068                            int dir)
2069 {
2070         dma_addr_t flush_addr;
2071         dma_addr_t i, start;
2072         unsigned int pages;
2073 
2074         if ((dma_addr == DMA_ERROR_CODE) ||
2075             (dma_addr + size > dma_dom->aperture_size))
2076                 return;
2077 
2078         flush_addr = dma_addr;
2079         pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
2080         dma_addr &= PAGE_MASK;
2081         start = dma_addr;
2082 
2083         for (i = 0; i < pages; ++i) {
2084                 dma_ops_domain_unmap(dma_dom, start);
2085                 start += PAGE_SIZE;
2086         }
2087 
2088         SUB_STATS_COUNTER(alloced_io_mem, size);
2089 
2090         dma_ops_free_addresses(dma_dom, dma_addr, pages);
2091 
2092         if (amd_iommu_unmap_flush || dma_dom->need_flush) {
2093                 domain_flush_pages(&dma_dom->domain, flush_addr, size);
2094                 dma_dom->need_flush = false;
2095         }
2096 }
2097 
2098 /*
2099  * The exported map_single function for dma_ops.
2100  */
2101 static dma_addr_t map_page(struct device *dev, struct page *page,
2102                            unsigned long offset, size_t size,
2103                            enum dma_data_direction dir,
2104                            struct dma_attrs *attrs)
2105 {
2106         unsigned long flags;
2107         struct protection_domain *domain;
2108         dma_addr_t addr;
2109         u64 dma_mask;
2110         phys_addr_t paddr = page_to_phys(page) + offset;
2111 
2112         INC_STATS_COUNTER(cnt_map_single);
2113 
2114         domain = get_domain(dev);
2115         if (PTR_ERR(domain) == -EINVAL)
2116                 return (dma_addr_t)paddr;
2117         else if (IS_ERR(domain))
2118                 return DMA_ERROR_CODE;
2119 
2120         dma_mask = *dev->dma_mask;
2121 
2122         spin_lock_irqsave(&domain->lock, flags);
2123 
2124         addr = __map_single(dev, domain->priv, paddr, size, dir, false,
2125                             dma_mask);
2126         if (addr == DMA_ERROR_CODE)
2127                 goto out;
2128 
2129         domain_flush_complete(domain);
2130 
2131 out:
2132         spin_unlock_irqrestore(&domain->lock, flags);
2133 
2134         return addr;
2135 }
2136 
2137 /*
2138  * The exported unmap_single function for dma_ops.
2139  */
2140 static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
2141                        enum dma_data_direction dir, struct dma_attrs *attrs)
2142 {
2143         unsigned long flags;
2144         struct protection_domain *domain;
2145 
2146         INC_STATS_COUNTER(cnt_unmap_single);
2147 
2148         domain = get_domain(dev);
2149         if (IS_ERR(domain))
2150                 return;
2151 
2152         spin_lock_irqsave(&domain->lock, flags);
2153 
2154         __unmap_single(domain->priv, dma_addr, size, dir);
2155 
2156         domain_flush_complete(domain);
2157 
2158         spin_unlock_irqrestore(&domain->lock, flags);
2159 }
2160 
2161 /*
2162  * This is a special map_sg function which is used if we should map a
2163  * device which is not handled by an AMD IOMMU in the system.
2164  */
2165 static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
2166                            int nelems, int dir)
2167 {
2168         struct scatterlist *s;
2169         int i;
2170 
2171         for_each_sg(sglist, s, nelems, i) {
2172                 s->dma_address = (dma_addr_t)sg_phys(s);
2173                 s->dma_length  = s->length;
2174         }
2175 
2176         return nelems;
2177 }
2178 
2179 /*
2180  * The exported map_sg function for dma_ops (handles scatter-gather
2181  * lists).
2182  */
2183 static int map_sg(struct device *dev, struct scatterlist *sglist,
2184                   int nelems, enum dma_data_direction dir,
2185                   struct dma_attrs *attrs)
2186 {
2187         unsigned long flags;
2188         struct protection_domain *domain;
2189         int i;
2190         struct scatterlist *s;
2191         phys_addr_t paddr;
2192         int mapped_elems = 0;
2193         u64 dma_mask;
2194 
2195         INC_STATS_COUNTER(cnt_map_sg);
2196 
2197         domain = get_domain(dev);
2198         if (PTR_ERR(domain) == -EINVAL)
2199                 return map_sg_no_iommu(dev, sglist, nelems, dir);
2200         else if (IS_ERR(domain))
2201                 return 0;
2202 
2203         dma_mask = *dev->dma_mask;
2204 
2205         spin_lock_irqsave(&domain->lock, flags);
2206 
2207         for_each_sg(sglist, s, nelems, i) {
2208                 paddr = sg_phys(s);
2209 
2210                 s->dma_address = __map_single(dev, domain->priv,
2211                                               paddr, s->length, dir, false,
2212                                               dma_mask);
2213 
2214                 if (s->dma_address) {
2215                         s->dma_length = s->length;
2216                         mapped_elems++;
2217                 } else
2218                         goto unmap;
2219         }
2220 
2221         domain_flush_complete(domain);
2222 
2223 out:
2224         spin_unlock_irqrestore(&domain->lock, flags);
2225 
2226         return mapped_elems;
2227 unmap:
2228         for_each_sg(sglist, s, mapped_elems, i) {
2229                 if (s->dma_address)
2230                         __unmap_single(domain->priv, s->dma_address,
2231                                        s->dma_length, dir);
2232                 s->dma_address = s->dma_length = 0;
2233         }
2234 
2235         mapped_elems = 0;
2236 
2237         goto out;
2238 }
2239 
2240 /*
2241  * The exported map_sg function for dma_ops (handles scatter-gather
2242  * lists).
2243  */
2244 static void unmap_sg(struct device *dev, struct scatterlist *sglist,
2245                      int nelems, enum dma_data_direction dir,
2246                      struct dma_attrs *attrs)
2247 {
2248         unsigned long flags;
2249         struct protection_domain *domain;
2250         struct scatterlist *s;
2251         int i;
2252 
2253         INC_STATS_COUNTER(cnt_unmap_sg);
2254 
2255         domain = get_domain(dev);
2256         if (IS_ERR(domain))
2257                 return;
2258 
2259         spin_lock_irqsave(&domain->lock, flags);
2260 
2261         for_each_sg(sglist, s, nelems, i) {
2262                 __unmap_single(domain->priv, s->dma_address,
2263                                s->dma_length, dir);
2264                 s->dma_address = s->dma_length = 0;
2265         }
2266 
2267         domain_flush_complete(domain);
2268 
2269         spin_unlock_irqrestore(&domain->lock, flags);
2270 }
2271 
2272 /*
2273  * The exported alloc_coherent function for dma_ops.
2274  */
2275 static void *alloc_coherent(struct device *dev, size_t size,
2276                             dma_addr_t *dma_addr, gfp_t flag)
2277 {
2278         unsigned long flags;
2279         void *virt_addr;
2280         struct protection_domain *domain;
2281         phys_addr_t paddr;
2282         u64 dma_mask = dev->coherent_dma_mask;
2283 
2284         INC_STATS_COUNTER(cnt_alloc_coherent);
2285 
2286         domain = get_domain(dev);
2287         if (PTR_ERR(domain) == -EINVAL) {
2288                 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2289                 *dma_addr = __pa(virt_addr);
2290                 return virt_addr;
2291         } else if (IS_ERR(domain))
2292                 return NULL;
2293 
2294         dma_mask  = dev->coherent_dma_mask;
2295         flag     &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2296         flag     |= __GFP_ZERO;
2297 
2298         virt_addr = (void *)__get_free_pages(flag, get_order(size));
2299         if (!virt_addr)
2300                 return NULL;
2301 
2302         paddr = virt_to_phys(virt_addr);
2303 
2304         if (!dma_mask)
2305                 dma_mask = *dev->dma_mask;
2306 
2307         spin_lock_irqsave(&domain->lock, flags);
2308 
2309         *dma_addr = __map_single(dev, domain->priv, paddr,
2310                                  size, DMA_BIDIRECTIONAL, true, dma_mask);
2311 
2312         if (*dma_addr == DMA_ERROR_CODE) {
2313                 spin_unlock_irqrestore(&domain->lock, flags);
2314                 goto out_free;
2315         }
2316 
2317         domain_flush_complete(domain);
2318 
2319         spin_unlock_irqrestore(&domain->lock, flags);
2320 
2321         return virt_addr;
2322 
2323 out_free:
2324 
2325         free_pages((unsigned long)virt_addr, get_order(size));
2326 
2327         return NULL;
2328 }
2329 
2330 /*
2331  * The exported free_coherent function for dma_ops.
2332  */
2333 static void free_coherent(struct device *dev, size_t size,
2334                           void *virt_addr, dma_addr_t dma_addr)
2335 {
2336         unsigned long flags;
2337         struct protection_domain *domain;
2338 
2339         INC_STATS_COUNTER(cnt_free_coherent);
2340 
2341         domain = get_domain(dev);
2342         if (IS_ERR(domain))
2343                 goto free_mem;
2344 
2345         spin_lock_irqsave(&domain->lock, flags);
2346 
2347         __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2348 
2349         domain_flush_complete(domain);
2350 
2351         spin_unlock_irqrestore(&domain->lock, flags);
2352 
2353 free_mem:
2354         free_pages((unsigned long)virt_addr, get_order(size));
2355 }
2356 
2357 /*
2358  * This function is called by the DMA layer to find out if we can handle a
2359  * particular device. It is part of the dma_ops.
2360  */
2361 static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2362 {
2363         return check_device(dev);
2364 }
2365 
2366 /*
2367  * The function for pre-allocating protection domains.
2368  *
2369  * If the driver core informs the DMA layer if a driver grabs a device
2370  * we don't need to preallocate the protection domains anymore.
2371  * For now we have to.
2372  */
2373 static void prealloc_protection_domains(void)
2374 {
2375         struct pci_dev *dev = NULL;
2376         struct dma_ops_domain *dma_dom;
2377         u16 devid;
2378 
2379         for_each_pci_dev(dev) {
2380 
2381                 /* Do we handle this device? */
2382                 if (!check_device(&dev->dev))
2383                         continue;
2384 
2385                 /* Is there already any domain for it? */
2386                 if (domain_for_device(&dev->dev))
2387                         continue;
2388 
2389                 devid = get_device_id(&dev->dev);
2390 
2391                 dma_dom = dma_ops_domain_alloc();
2392                 if (!dma_dom)
2393                         continue;
2394                 init_unity_mappings_for_device(dma_dom, devid);
2395                 dma_dom->target_dev = devid;
2396 
2397                 attach_device(&dev->dev, &dma_dom->domain);
2398 
2399                 list_add_tail(&dma_dom->list, &iommu_pd_list);
2400         }
2401 }
2402 
2403 static struct dma_map_ops amd_iommu_dma_ops = {
2404         .alloc_coherent = alloc_coherent,
2405         .free_coherent = free_coherent,
2406         .map_page = map_page,
2407         .unmap_page = unmap_page,
2408         .map_sg = map_sg,
2409         .unmap_sg = unmap_sg,
2410         .dma_supported = amd_iommu_dma_supported,
2411 };
2412 
2413 static unsigned device_dma_ops_init(void)
2414 {
2415         struct pci_dev *pdev = NULL;
2416         unsigned unhandled = 0;
2417 
2418         for_each_pci_dev(pdev) {
2419                 if (!check_device(&pdev->dev)) {
2420                         unhandled += 1;
2421                         continue;
2422                 }
2423 
2424                 pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
2425         }
2426 
2427         return unhandled;
2428 }
2429 
2430 /*
2431  * The function which clues the AMD IOMMU driver into dma_ops.
2432  */
2433 
2434 void __init amd_iommu_init_api(void)
2435 {
2436         register_iommu(&amd_iommu_ops);
2437 }
2438 
2439 int __init amd_iommu_init_dma_ops(void)
2440 {
2441         struct amd_iommu *iommu;
2442         int ret, unhandled;
2443 
2444         /*
2445          * first allocate a default protection domain for every IOMMU we
2446          * found in the system. Devices not assigned to any other
2447          * protection domain will be assigned to the default one.
2448          */
2449         for_each_iommu(iommu) {
2450                 iommu->default_dom = dma_ops_domain_alloc();
2451                 if (iommu->default_dom == NULL)
2452                         return -ENOMEM;
2453                 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
2454                 ret = iommu_init_unity_mappings(iommu);
2455                 if (ret)
2456                         goto free_domains;
2457         }
2458 
2459         /*
2460          * Pre-allocate the protection domains for each device.
2461          */
2462         prealloc_protection_domains();
2463 
2464         iommu_detected = 1;
2465         swiotlb = 0;
2466 
2467         /* Make the driver finally visible to the drivers */
2468         unhandled = device_dma_ops_init();
2469         if (unhandled && max_pfn > MAX_DMA32_PFN) {
2470                 /* There are unhandled devices - initialize swiotlb for them */
2471                 swiotlb = 1;
2472         }
2473 
2474         amd_iommu_stats_init();
2475 
2476         return 0;
2477 
2478 free_domains:
2479 
2480         for_each_iommu(iommu) {
2481                 if (iommu->default_dom)
2482                         dma_ops_domain_free(iommu->default_dom);
2483         }
2484 
2485         return ret;
2486 }
2487 
2488 /*****************************************************************************
2489  *
2490  * The following functions belong to the exported interface of AMD IOMMU
2491  *
2492  * This interface allows access to lower level functions of the IOMMU
2493  * like protection domain handling and assignement of devices to domains
2494  * which is not possible with the dma_ops interface.
2495  *
2496  *****************************************************************************/
2497 
2498 static void cleanup_domain(struct protection_domain *domain)
2499 {
2500         struct iommu_dev_data *dev_data, *next;
2501         unsigned long flags;
2502 
2503         write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2504 
2505         list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
2506                 struct device *dev = dev_data->dev;
2507 
2508                 __detach_device(dev);
2509                 atomic_set(&dev_data->bind, 0);
2510         }
2511 
2512         write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2513 }
2514 
2515 static void protection_domain_free(struct protection_domain *domain)
2516 {
2517         if (!domain)
2518                 return;
2519 
2520         del_domain_from_list(domain);
2521 
2522         if (domain->id)
2523                 domain_id_free(domain->id);
2524 
2525         kfree(domain);
2526 }
2527 
2528 static struct protection_domain *protection_domain_alloc(void)
2529 {
2530         struct protection_domain *domain;
2531 
2532         domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2533         if (!domain)
2534                 return NULL;
2535 
2536         spin_lock_init(&domain->lock);
2537         mutex_init(&domain->api_lock);
2538         domain->id = domain_id_alloc();
2539         if (!domain->id)
2540                 goto out_err;
2541         INIT_LIST_HEAD(&domain->dev_list);
2542 
2543         add_domain_to_list(domain);
2544 
2545         return domain;
2546 
2547 out_err:
2548         kfree(domain);
2549 
2550         return NULL;
2551 }
2552 
2553 static int amd_iommu_domain_init(struct iommu_domain *dom)
2554 {
2555         struct protection_domain *domain;
2556 
2557         domain = protection_domain_alloc();
2558         if (!domain)
2559                 goto out_free;
2560 
2561         domain->mode    = PAGE_MODE_3_LEVEL;
2562         domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
2563         if (!domain->pt_root)
2564                 goto out_free;
2565 
2566         dom->priv = domain;
2567 
2568         return 0;
2569 
2570 out_free:
2571         protection_domain_free(domain);
2572 
2573         return -ENOMEM;
2574 }
2575 
2576 static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2577 {
2578         struct protection_domain *domain = dom->priv;
2579 
2580         if (!domain)
2581                 return;
2582 
2583         if (domain->dev_cnt > 0)
2584                 cleanup_domain(domain);
2585 
2586         BUG_ON(domain->dev_cnt != 0);
2587 
2588         free_pagetable(domain);
2589 
2590         protection_domain_free(domain);
2591 
2592         dom->priv = NULL;
2593 }
2594 
2595 static void amd_iommu_detach_device(struct iommu_domain *dom,
2596                                     struct device *dev)
2597 {
2598         struct iommu_dev_data *dev_data = dev->archdata.iommu;
2599         struct amd_iommu *iommu;
2600         u16 devid;
2601 
2602         if (!check_device(dev))
2603                 return;
2604 
2605         devid = get_device_id(dev);
2606 
2607         if (dev_data->domain != NULL)
2608                 detach_device(dev);
2609 
2610         iommu = amd_iommu_rlookup_table[devid];
2611         if (!iommu)
2612                 return;
2613 
2614         device_flush_dte(dev);
2615         iommu_completion_wait(iommu);
2616 }
2617 
2618 static int amd_iommu_attach_device(struct iommu_domain *dom,
2619                                    struct device *dev)
2620 {
2621         struct protection_domain *domain = dom->priv;
2622         struct iommu_dev_data *dev_data;
2623         struct amd_iommu *iommu;
2624         int ret;
2625         u16 devid;
2626 
2627         if (!check_device(dev))
2628                 return -EINVAL;
2629 
2630         dev_data = dev->archdata.iommu;
2631 
2632         devid = get_device_id(dev);
2633 
2634         iommu = amd_iommu_rlookup_table[devid];
2635         if (!iommu)
2636                 return -EINVAL;
2637 
2638         if (dev_data->domain)
2639                 detach_device(dev);
2640 
2641         ret = attach_device(dev, domain);
2642 
2643         iommu_completion_wait(iommu);
2644 
2645         return ret;
2646 }
2647 
2648 static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
2649                          phys_addr_t paddr, int gfp_order, int iommu_prot)
2650 {
2651         unsigned long page_size = 0x1000UL << gfp_order;
2652         struct protection_domain *domain = dom->priv;
2653         int prot = 0;
2654         int ret;
2655 
2656         if (iommu_prot & IOMMU_READ)
2657                 prot |= IOMMU_PROT_IR;
2658         if (iommu_prot & IOMMU_WRITE)
2659                 prot |= IOMMU_PROT_IW;
2660 
2661         mutex_lock(&domain->api_lock);
2662         ret = iommu_map_page(domain, iova, paddr, prot, page_size);
2663         mutex_unlock(&domain->api_lock);
2664 
2665         return ret;
2666 }
2667 
2668 static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2669                            int gfp_order)
2670 {
2671         struct protection_domain *domain = dom->priv;
2672         unsigned long page_size, unmap_size;
2673 
2674         page_size  = 0x1000UL << gfp_order;
2675 
2676         mutex_lock(&domain->api_lock);
2677         unmap_size = iommu_unmap_page(domain, iova, page_size);
2678         mutex_unlock(&domain->api_lock);
2679 
2680         domain_flush_tlb_pde(domain);
2681 
2682         return get_order(unmap_size);
2683 }
2684 
2685 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2686                                           unsigned long iova)
2687 {
2688         struct protection_domain *domain = dom->priv;
2689         unsigned long offset_mask;
2690         phys_addr_t paddr;
2691         u64 *pte, __pte;
2692 
2693         pte = fetch_pte(domain, iova);
2694 
2695         if (!pte || !IOMMU_PTE_PRESENT(*pte))
2696                 return 0;
2697 
2698         if (PM_PTE_LEVEL(*pte) == 0)
2699                 offset_mask = PAGE_SIZE - 1;
2700         else
2701                 offset_mask = PTE_PAGE_SIZE(*pte) - 1;
2702 
2703         __pte = *pte & PM_ADDR_MASK;
2704         paddr = (__pte & ~offset_mask) | (iova & offset_mask);
2705 
2706         return paddr;
2707 }
2708 
2709 static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
2710                                     unsigned long cap)
2711 {
2712         switch (cap) {
2713         case IOMMU_CAP_CACHE_COHERENCY:
2714                 return 1;
2715         }
2716 
2717         return 0;
2718 }
2719 
2720 static struct iommu_ops amd_iommu_ops = {
2721         .domain_init = amd_iommu_domain_init,
2722         .domain_destroy = amd_iommu_domain_destroy,
2723         .attach_dev = amd_iommu_attach_device,
2724         .detach_dev = amd_iommu_detach_device,
2725         .map = amd_iommu_map,
2726         .unmap = amd_iommu_unmap,
2727         .iova_to_phys = amd_iommu_iova_to_phys,
2728         .domain_has_cap = amd_iommu_domain_has_cap,
2729 };
2730 
2731 /*****************************************************************************
2732  *
2733  * The next functions do a basic initialization of IOMMU for pass through
2734  * mode
2735  *
2736  * In passthrough mode the IOMMU is initialized and enabled but not used for
2737  * DMA-API translation.
2738  *
2739  *****************************************************************************/
2740 
2741 int __init amd_iommu_init_passthrough(void)
2742 {
2743         struct amd_iommu *iommu;
2744         struct pci_dev *dev = NULL;
2745         u16 devid;
2746 
2747         /* allocate passthrough domain */
2748         pt_domain = protection_domain_alloc();
2749         if (!pt_domain)
2750                 return -ENOMEM;
2751 
2752         pt_domain->mode |= PAGE_MODE_NONE;
2753 
2754         for_each_pci_dev(dev) {
2755                 if (!check_device(&dev->dev))
2756                         continue;
2757 
2758                 devid = get_device_id(&dev->dev);
2759 
2760                 iommu = amd_iommu_rlookup_table[devid];
2761                 if (!iommu)
2762                         continue;
2763 
2764                 attach_device(&dev->dev, pt_domain);
2765         }
2766 
2767         pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
2768 
2769         return 0;
2770 }
2771 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp